Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull second set of KVM updates from Paolo Bonzini:
"ARM:
- Support for Group0 interrupts in guests
- Cache management optimizations for ARMv8.4 systems
- Userspace interface for RAS
- Fault path optimization
- Emulated physical timer fixes
- Random cleanups
x86:
- fixes for L1TF
- a new test case
- non-support for SGX (inject the right exception in the guest)
- fix lockdep false positive"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits)
KVM: VMX: fixes for vmentry_l1d_flush module parameter
kvm: selftest: add dirty logging test
kvm: selftest: pass in extra memory when create vm
kvm: selftest: include the tools headers
kvm: selftest: unify the guest port macros
tools: introduce test_and_clear_bit
KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with interrupts disabled
KVM: vmx: Inject #UD for SGX ENCLS instruction in guest
KVM: vmx: Add defines for SGX ENCLS exiting
x86/kvm/vmx: Fix coding style in vmx_setup_l1d_flush()
x86: kvm: avoid unused variable warning
KVM: Documentation: rename the capability of KVM_CAP_ARM_SET_SERROR_ESR
KVM: arm/arm64: Skip updating PTE entry if no change
KVM: arm/arm64: Skip updating PMD entry if no change
KVM: arm: Use true and false for boolean values
KVM: arm/arm64: vgic: Do not use spin_lock_irqsave/restore with irq disabled
KVM: arm/arm64: vgic: Move DEBUG_SPINLOCK_BUG_ON to vgic.h
KVM: arm: vgic-v3: Add support for ICC_SGI0R and ICC_ASGI1R accesses
KVM: arm64: vgic-v3: Add support for ICC_SGI0R_EL1 and ICC_ASGI1R_EL1 accesses
KVM: arm/arm64: vgic-v3: Add core support for Group0 SGIs
...
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 9b01233..94a24ae 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -51,6 +51,14 @@
Controls the dirty page count condition for the in-place-update
policies.
+What: /sys/fs/f2fs/<disk>/min_seq_blocks
+Date: August 2018
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the dirty page count condition for batched sequential
+ writes in ->writepages.
+
+
What: /sys/fs/f2fs/<disk>/min_hot_blocks
Date: March 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 1746131..184193b 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1072,6 +1072,24 @@
high limit is used and monitored properly, this limit's
utility is limited to providing the final safety net.
+ memory.oom.group
+ A read-write single value file which exists on non-root
+ cgroups. The default value is "0".
+
+ Determines whether the cgroup should be treated as
+ an indivisible workload by the OOM killer. If set,
+ all tasks belonging to the cgroup or to its descendants
+ (if the memory cgroup is not a leaf cgroup) are killed
+ together or not at all. This can be used to avoid
+ partial kills to guarantee workload integrity.
+
+ Tasks with the OOM protection (oom_score_adj set to -1000)
+ are treated as an exception and are never killed.
+
+ If the OOM killer is invoked in a cgroup, it's not going
+ to kill any tasks outside of this cgroup, regardless
+ memory.oom.group values of ancestor cgroups.
+
memory.events
A read-only flat-keyed file which exists on non-root cgroups.
The following entries are defined. Unless specified
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bffb0ca..970d837 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3041,8 +3041,9 @@
on: enable the feature
page_poison= [KNL] Boot-time parameter changing the state of
- poisoning on the buddy allocator.
- off: turn off poisoning
+ poisoning on the buddy allocator, available with
+ CONFIG_PAGE_POISONING=y.
+ off: turn off poisoning (default)
on: turn on poisoning
panic= [KNL] Kernel behaviour on panic: delay <timeout>
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 69f8de9..e5edd29 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -157,6 +157,24 @@
persist data of regular and symlink.
fault_injection=%d Enable fault injection in all supported types with
specified injection rate.
+fault_type=%d Support configuring fault injection type, should be
+ enabled with fault_injection option, fault type value
+ is shown below, it supports single or combined type.
+ Type_Name Type_Value
+ FAULT_KMALLOC 0x000000001
+ FAULT_KVMALLOC 0x000000002
+ FAULT_PAGE_ALLOC 0x000000004
+ FAULT_PAGE_GET 0x000000008
+ FAULT_ALLOC_BIO 0x000000010
+ FAULT_ALLOC_NID 0x000000020
+ FAULT_ORPHAN 0x000000040
+ FAULT_BLOCK 0x000000080
+ FAULT_DIR_DEPTH 0x000000100
+ FAULT_EVICT_INODE 0x000000200
+ FAULT_TRUNCATE 0x000000400
+ FAULT_IO 0x000000800
+ FAULT_CHECKPOINT 0x000001000
+ FAULT_DISCARD 0x000002000
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 1605acbb..22b4b00 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -870,6 +870,7 @@
VmallocTotal: 112216 kB
VmallocUsed: 428 kB
VmallocChunk: 111088 kB
+Percpu: 62080 kB
HardwareCorrupted: 0 kB
AnonHugePages: 49152 kB
ShmemHugePages: 0 kB
@@ -962,6 +963,8 @@
VmallocTotal: total size of vmalloc memory area
VmallocUsed: amount of vmalloc area which is used
VmallocChunk: largest contiguous block of vmalloc area which is free
+ Percpu: Memory allocated to the percpu allocator used to back percpu
+ allocations. This stat excludes the cost of metadata.
..............................................................................
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 5958503..37a6795 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -38,6 +38,7 @@
- hung_task_panic
- hung_task_check_count
- hung_task_timeout_secs
+- hung_task_check_interval_secs
- hung_task_warnings
- hyperv_record_panic_msg
- kexec_load_disabled
@@ -355,7 +356,7 @@
hung_task_timeout_secs:
-Check interval. When a task in D state did not get scheduled
+When a task in D state did not get scheduled
for more than this value report a warning.
This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
@@ -364,6 +365,18 @@
==============================================================
+hung_task_check_interval_secs:
+
+Hung task check interval. If hung task checking is enabled
+(see hung_task_timeout_secs), the check is done every
+hung_task_check_interval_secs seconds.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0 (default): means use hung_task_timeout_secs as checking interval.
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+==============================================================
+
hung_task_warnings:
The maximum number of warnings to report. During a check interval
@@ -451,7 +464,8 @@
1) kernel doesn't guarantee, that new object will have desired id. So,
it's up to userspace, how to handle an object with "wrong" id.
2) Toggle with non-default value will be set back to -1 by kernel after
-successful IPC object allocation.
+successful IPC object allocation. If an IPC object allocation syscall
+fails, it is undefined if the value remains unmodified or is reset to -1.
==============================================================
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index e72853b..7d73882 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -691,7 +691,7 @@
The default value is 0.
See Documentation/vm/overcommit-accounting.rst and
-mm/mmap.c::__vm_enough_memory() for more information.
+mm/util.c::__vm_enough_memory() for more information.
==============================================================
diff --git a/arch/Kconfig b/arch/Kconfig
index c614816..4426e96 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -841,6 +841,16 @@
against various use-after-free conditions that can be used in
security flaw exploits.
+config HAVE_ARCH_PREL32_RELOCATIONS
+ bool
+ help
+ May be selected by an architecture if it supports place-relative
+ 32-bit relocations, both in the toolchain and in the module loader,
+ in which case relative references can be used in special sections
+ for PCI fixup, initcalls etc which are only half the size on 64 bit
+ architectures, and don't require runtime relocation on relocatable
+ kernels.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d9c2991..82ab015 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -330,16 +330,15 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
* atomic helpers. Insert it into the gate_vma so that it is visible
* through ptrace and /proc/<pid>/mem.
*/
-static struct vm_area_struct gate_vma = {
- .vm_start = 0xffff0000,
- .vm_end = 0xffff0000 + PAGE_SIZE,
- .vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC,
-};
+static struct vm_area_struct gate_vma;
static int __init gate_vma_init(void)
{
vma_init(&gate_vma, NULL);
gate_vma.vm_page_prot = PAGE_READONLY_EXEC;
+ gate_vma.vm_start = 0xffff0000;
+ gate_vma.vm_end = 0xffff0000 + PAGE_SIZE;
+ gate_vma.vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC;
return 0;
}
arch_initcall(gate_vma_init);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d0a53cc..29e75b4 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -108,6 +108,7 @@
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e89d40..0b334b6 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -16,6 +16,7 @@
select OF_IRQ
select OF_EARLY_FLATTREE
select HAVE_MEMBLOCK
+ select NO_BOOTMEM
select TIMER_OF
select H8300_TMR8
select HAVE_KERNEL_GZIP
diff --git a/arch/h8300/Makefile b/arch/h8300/Makefile
index e1c02ca..cc12b16 100644
--- a/arch/h8300/Makefile
+++ b/arch/h8300/Makefile
@@ -8,6 +8,8 @@
# (C) Copyright 2002-2015 Yoshinori Sato <ysato@users.sourceforge.jp>
#
+KBUILD_DEFCONFIG := edosk2674_defconfig
+
cflags-$(CONFIG_CPU_H8300H) := -mh
aflags-$(CONFIG_CPU_H8300H) := -mh -Wa,--mach=h8300h
ldflags-$(CONFIG_CPU_H8300H) := -mh8300helf_linux
@@ -22,6 +24,8 @@
KBUILD_AFLAGS += $(aflags-y)
LDFLAGS += $(ldflags-y)
+CHECKFLAGS += -msize-long
+
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := h8300-unknown-linux-
endif
diff --git a/arch/h8300/boot/dts/h8300h_sim.dts b/arch/h8300/boot/dts/h8300h_sim.dts
index f1c31ce..595398b 100644
--- a/arch/h8300/boot/dts/h8300h_sim.dts
+++ b/arch/h8300/boot/dts/h8300h_sim.dts
@@ -73,7 +73,7 @@
timer16: timer@ffff68 {
compatible = "renesas,16bit-timer";
reg = <0xffff68 8>, <0xffff60 8>;
- interrupts = <24 0>;
+ interrupts = <26 0>;
renesas,channel = <0>;
clocks = <&fclk>;
clock-names = "fck";
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index ea0cb0c..647a83b 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -29,11 +29,11 @@ static inline unsigned long ffz(unsigned long word)
result = -1;
__asm__("1:\n\t"
- "shlr.l %2\n\t"
+ "shlr.l %1\n\t"
"adds #1,%0\n\t"
"bcs 1b"
- : "=r"(result)
- : "0"(result), "r"(word));
+ : "=r"(result),"=r"(word)
+ : "0"(result), "1"(word));
return result;
}
@@ -66,7 +66,7 @@ H8300_GEN_BITOP(change_bit, "bnot")
#undef H8300_GEN_BITOP
-static inline int test_bit(int nr, const unsigned long *addr)
+static inline int test_bit(int nr, const volatile unsigned long *addr)
{
int ret = 0;
unsigned char *b_addr;
@@ -162,11 +162,11 @@ static inline unsigned long __ffs(unsigned long word)
result = -1;
__asm__("1:\n\t"
- "shlr.l %2\n\t"
+ "shlr.l %1\n\t"
"adds #1,%0\n\t"
"bcc 1b"
- : "=r" (result)
- : "0"(result), "r"(word));
+ : "=r" (result),"=r"(word)
+ : "0"(result), "1"(word));
return result;
}
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index 313cafa..66d3838 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h
@@ -4,6 +4,8 @@
#include <uapi/asm/ptrace.h>
+struct task_struct;
+
#ifndef __ASSEMBLY__
#ifndef PS_S
#define PS_S (0x10)
diff --git a/arch/h8300/kernel/kgdb.c b/arch/h8300/kernel/kgdb.c
index 602e478..1a1d30c 100644
--- a/arch/h8300/kernel/kgdb.c
+++ b/arch/h8300/kernel/kgdb.c
@@ -129,7 +129,7 @@ void kgdb_arch_exit(void)
/* Nothing to do */
}
-const struct kgdb_arch arch_kgdb_ops = {
+struct kgdb_arch arch_kgdb_ops = {
/* Breakpoint instruction: trapa #2 */
.gdb_bpt_instr = { 0x57, 0x20 },
};
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index a4d0470..34e2df5 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -23,7 +23,6 @@
#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/clk-provider.h>
#include <linux/memblock.h>
@@ -71,10 +70,6 @@ void __init h8300_fdt_init(void *fdt, char *bootargs)
static void __init bootmem_init(void)
{
- int bootmap_size;
- unsigned long ram_start_pfn;
- unsigned long free_ram_start_pfn;
- unsigned long ram_end_pfn;
struct memblock_region *region;
memory_end = memory_start = 0;
@@ -88,33 +83,17 @@ static void __init bootmem_init(void)
if (!memory_end)
panic("No memory!");
- ram_start_pfn = PFN_UP(memory_start);
- /* free_ram_start_pfn is first page after kernel */
- free_ram_start_pfn = PFN_UP(__pa(_end));
- ram_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */
+ min_low_pfn = PFN_UP(memory_start);
+ max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ max_pfn = max_low_pfn;
- max_pfn = ram_end_pfn;
+ memblock_reserve(__pa(_stext), _end - _stext);
- /*
- * give all the memory to the bootmap allocator, tell it to put the
- * boot mem_map at the start of memory
- */
- bootmap_size = init_bootmem_node(NODE_DATA(0),
- free_ram_start_pfn,
- 0,
- ram_end_pfn);
- /*
- * free the usable memory, we have to make sure we do not free
- * the bootmem bitmap so we then reserve it after freeing it :-)
- */
- free_bootmem(PFN_PHYS(free_ram_start_pfn),
- (ram_end_pfn - free_ram_start_pfn) << PAGE_SHIFT);
- reserve_bootmem(PFN_PHYS(free_ram_start_pfn), bootmap_size,
- BOOTMEM_DEFAULT);
+ early_init_fdt_reserve_self();
+ early_init_fdt_scan_reserved_mem();
- for_each_memblock(reserved, region) {
- reserve_bootmem(region->base, region->size, BOOTMEM_DEFAULT);
- }
+ memblock_dump_all();
}
void __init setup_arch(char **cmdline_p)
@@ -188,15 +167,6 @@ const struct seq_operations cpuinfo_op = {
.show = show_cpuinfo,
};
-static int __init device_probe(void)
-{
- of_platform_populate(NULL, NULL, NULL, NULL);
-
- return 0;
-}
-
-device_initcall(device_probe);
-
#if defined(CONFIG_CPU_H8300H)
#define get_wait(base, addr) ({ \
int baddr; \
diff --git a/arch/h8300/kernel/sim-console.c b/arch/h8300/kernel/sim-console.c
index 46138f5..03aa35b 100644
--- a/arch/h8300/kernel/sim-console.c
+++ b/arch/h8300/kernel/sim-console.c
@@ -13,12 +13,13 @@
static void sim_write(struct console *con, const char *s, unsigned n)
{
- register const int fd __asm__("er0") = 1; /* stdout */
register const char *_ptr __asm__("er1") = s;
register const unsigned _len __asm__("er2") = n;
- __asm__(".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */
- : : "g"(fd), "g"(_ptr), "g"(_len));
+ __asm__("sub.l er0,er0\n\t" /* er0 = 1 (stdout) */
+ "inc.l #1,er0\n\t"
+ ".byte 0x5e,0x00,0x00,0xc7\n\t" /* jsr @0xc7 (sys_write) */
+ : : "g"(_ptr), "g"(_len):"er0");
}
static int __init sim_setup(struct earlycon_device *device, const char *opt)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a806692..db0b6ee 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -177,6 +177,7 @@
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JIT if !PPC64
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 2d58c26..e6f2a38 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -45,9 +45,13 @@
select LOCKDEP_SMALL if LOCKDEP
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
+ select HAVE_MEMBLOCK
+ select NO_BOOTMEM
config SPARC32
def_bool !64BIT
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select DMA_NONCOHERENT_OPS
select GENERIC_ATOMIC64
select CLZ_TAB
select HAVE_UID16
@@ -60,7 +64,6 @@
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
@@ -79,7 +82,6 @@
select IRQ_PREFLOW_FASTEOI
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select HAVE_C_RECORDMCOUNT
- select NO_BOOTMEM
select HAVE_ARCH_AUDITSYSCALL
select ARCH_SUPPORTS_ATOMIC_RMW
select HAVE_NMI
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index 966a13d..e32ef20 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -9,10 +9,10 @@
# Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
# We are not yet configured - so test on arch
-ifeq ($(ARCH),sparc)
- KBUILD_DEFCONFIG := sparc32_defconfig
-else
+ifeq ($(ARCH),sparc64)
KBUILD_DEFCONFIG := sparc64_defconfig
+else
+ KBUILD_DEFCONFIG := sparc32_defconfig
endif
ifeq ($(CONFIG_SPARC32),y)
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index 12ae33d..e175663 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -7,7 +7,6 @@
#include <linux/dma-debug.h>
extern const struct dma_map_ops *dma_ops;
-extern const struct dma_map_ops pci32_dma_ops;
extern struct bus_type pci_bus_type;
@@ -15,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
{
#ifdef CONFIG_SPARC_LEON
if (sparc_cpu_model == sparc_leon)
- return &pci32_dma_ops;
+ return &dma_noncoherent_ops;
#endif
#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
if (bus == &pci_bus_type)
- return &pci32_dma_ops;
+ return &dma_noncoherent_ops;
#endif
return dma_ops;
}
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index cca9134..6799c93 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -38,6 +38,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/scatterlist.h>
+#include <linux/dma-noncoherent.h>
#include <linux/of_device.h>
#include <asm/io.h>
@@ -434,42 +435,41 @@ arch_initcall(sparc_register_ioport);
/* Allocate and map kernel buffer using consistent mode DMA for a device.
* hwdev should be valid struct pci_dev pointer for PCI devices.
*/
-static void *pci32_alloc_coherent(struct device *dev, size_t len,
- dma_addr_t *pba, gfp_t gfp,
- unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t gfp, unsigned long attrs)
{
- unsigned long len_total = PAGE_ALIGN(len);
+ unsigned long len_total = PAGE_ALIGN(size);
void *va;
struct resource *res;
int order;
- if (len == 0) {
+ if (size == 0) {
return NULL;
}
- if (len > 256*1024) { /* __get_free_pages() limit */
+ if (size > 256*1024) { /* __get_free_pages() limit */
return NULL;
}
order = get_order(len_total);
va = (void *) __get_free_pages(gfp, order);
if (va == NULL) {
- printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
+ printk("%s: no %ld pages\n", __func__, len_total>>PAGE_SHIFT);
goto err_nopages;
}
if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
- printk("pci_alloc_consistent: no core\n");
+ printk("%s: no core\n", __func__);
goto err_nomem;
}
if (allocate_resource(&_sparc_dvma, res, len_total,
_sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
- printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
+ printk("%s: cannot occupy 0x%lx", __func__, len_total);
goto err_nova;
}
srmmu_mapiorange(0, virt_to_phys(va), res->start, len_total);
- *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
+ *dma_handle = virt_to_phys(va);
return (void *) res->start;
err_nova:
@@ -481,184 +481,53 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
}
/* Free and unmap a consistent DMA buffer.
- * cpu_addr is what was returned from pci_alloc_consistent,
- * size must be the same as what as passed into pci_alloc_consistent,
- * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ * cpu_addr is what was returned arch_dma_alloc, size must be the same as what
+ * was passed into arch_dma_alloc, and likewise dma_addr must be the same as
+ * what *dma_ndler was set to.
*
* References to the memory and mappings associated with cpu_addr/dma_addr
* past this call are illegal.
*/
-static void pci32_free_coherent(struct device *dev, size_t n, void *p,
- dma_addr_t ba, unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_addr, unsigned long attrs)
{
struct resource *res;
if ((res = lookup_resource(&_sparc_dvma,
- (unsigned long)p)) == NULL) {
- printk("pci_free_consistent: cannot free %p\n", p);
+ (unsigned long)cpu_addr)) == NULL) {
+ printk("%s: cannot free %p\n", __func__, cpu_addr);
return;
}
- if (((unsigned long)p & (PAGE_SIZE-1)) != 0) {
- printk("pci_free_consistent: unaligned va %p\n", p);
+ if (((unsigned long)cpu_addr & (PAGE_SIZE-1)) != 0) {
+ printk("%s: unaligned va %p\n", __func__, cpu_addr);
return;
}
- n = PAGE_ALIGN(n);
- if (resource_size(res) != n) {
- printk("pci_free_consistent: region 0x%lx asked 0x%lx\n",
- (long)resource_size(res), (long)n);
+ size = PAGE_ALIGN(size);
+ if (resource_size(res) != size) {
+ printk("%s: region 0x%lx asked 0x%zx\n", __func__,
+ (long)resource_size(res), size);
return;
}
- dma_make_coherent(ba, n);
- srmmu_unmapiorange((unsigned long)p, n);
+ dma_make_coherent(dma_addr, size);
+ srmmu_unmapiorange((unsigned long)cpu_addr, size);
release_resource(res);
kfree(res);
- free_pages((unsigned long)phys_to_virt(ba), get_order(n));
+ free_pages((unsigned long)phys_to_virt(dma_addr), get_order(size));
}
-/*
- * Same as pci_map_single, but with pages.
- */
-static dma_addr_t pci32_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
+/* IIep is write-through, not flushing on cpu to device transfer. */
+
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- /* IIep is write-through, not flushing. */
- return page_to_phys(page) + offset;
+ if (dir != PCI_DMA_TODEVICE)
+ dma_make_coherent(paddr, PAGE_ALIGN(size));
}
-static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_make_coherent(ba, PAGE_ALIGN(size));
-}
-
-/* Map a set of buffers described by scatterlist in streaming
- * mode for DMA. This is the scatter-gather version of the
- * above pci_map_single interface. Here the scatter gather list
- * elements are each tagged with the appropriate dma address
- * and length. They are obtained via sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
- * Device ownership issues as mentioned above for pci_map_single are
- * the same here.
- */
-static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int n;
-
- /* IIep is write-through, not flushing. */
- for_each_sg(sgl, sg, nents, n) {
- sg->dma_address = sg_phys(sg);
- sg->dma_length = sg->length;
- }
- return nents;
-}
-
-/* Unmap a set of streaming mode DMA translations.
- * Again, cpu read rules concerning calls here are the same as for
- * pci_unmap_single() above.
- */
-static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int n;
-
- if (dir != PCI_DMA_TODEVICE && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
- for_each_sg(sgl, sg, nents, n) {
- dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
- }
- }
-}
-
-/* Make physical memory consistent for a single
- * streaming mode DMA translation before or after a transfer.
- *
- * If you perform a pci_map_single() but wish to interrogate the
- * buffer using the cpu, yet do not wish to teardown the PCI dma
- * mapping, you must call this function before doing so. At the
- * next point you give the PCI dma address back to the card, you
- * must first perform a pci_dma_sync_for_device, and then the
- * device again owns the buffer.
- */
-static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
- size_t size, enum dma_data_direction dir)
-{
- if (dir != PCI_DMA_TODEVICE) {
- dma_make_coherent(ba, PAGE_ALIGN(size));
- }
-}
-
-static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
- size_t size, enum dma_data_direction dir)
-{
- if (dir != PCI_DMA_TODEVICE) {
- dma_make_coherent(ba, PAGE_ALIGN(size));
- }
-}
-
-/* Make physical memory consistent for a set of streaming
- * mode DMA translations after a transfer.
- *
- * The same as pci_dma_sync_single_* but for a scatter-gather list,
- * same rules and usage.
- */
-static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int n;
-
- if (dir != PCI_DMA_TODEVICE) {
- for_each_sg(sgl, sg, nents, n) {
- dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
- }
- }
-}
-
-static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int n;
-
- if (dir != PCI_DMA_TODEVICE) {
- for_each_sg(sgl, sg, nents, n) {
- dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
- }
- }
-}
-
-/* note: leon re-uses pci32_dma_ops */
-const struct dma_map_ops pci32_dma_ops = {
- .alloc = pci32_alloc_coherent,
- .free = pci32_free_coherent,
- .map_page = pci32_map_page,
- .unmap_page = pci32_unmap_page,
- .map_sg = pci32_map_sg,
- .unmap_sg = pci32_unmap_sg,
- .sync_single_for_cpu = pci32_sync_single_for_cpu,
- .sync_single_for_device = pci32_sync_single_for_device,
- .sync_sg_for_cpu = pci32_sync_sg_for_cpu,
- .sync_sg_for_device = pci32_sync_sg_for_device,
-};
-EXPORT_SYMBOL(pci32_dma_ops);
-
const struct dma_map_ops *dma_ops = &sbus_dma_ops;
EXPORT_SYMBOL(dma_ops);
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 95fe4f0..92634d4 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/pagemap.h>
#include <linux/poison.h>
#include <linux/gfp.h>
@@ -101,13 +102,46 @@ static unsigned long calc_max_low_pfn(void)
return tmp;
}
+static void __init find_ramdisk(unsigned long end_of_phys_memory)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+ unsigned long size;
+
+ /* Now have to check initial ramdisk, so that it won't pass
+ * the end of memory
+ */
+ if (sparc_ramdisk_image) {
+ if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
+ sparc_ramdisk_image -= KERNBASE;
+ initrd_start = sparc_ramdisk_image + phys_base;
+ initrd_end = initrd_start + sparc_ramdisk_size;
+ if (initrd_end > end_of_phys_memory) {
+ printk(KERN_CRIT "initrd extends beyond end of memory "
+ "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
+ initrd_end, end_of_phys_memory);
+ initrd_start = 0;
+ } else {
+ /* Reserve the initrd image area. */
+ size = initrd_end - initrd_start;
+ memblock_reserve(initrd_start, size);
+
+ initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
+ initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;
+ }
+ }
+#endif
+}
+
unsigned long __init bootmem_init(unsigned long *pages_avail)
{
- unsigned long bootmap_size, start_pfn;
- unsigned long end_of_phys_memory = 0UL;
- unsigned long bootmap_pfn, bytes_avail, size;
+ unsigned long start_pfn, bytes_avail, size;
+ unsigned long end_of_phys_memory = 0;
+ unsigned long high_pages = 0;
int i;
+ memblock_set_bottom_up(true);
+ memblock_allow_resize();
+
bytes_avail = 0UL;
for (i = 0; sp_banks[i].num_bytes != 0; i++) {
end_of_phys_memory = sp_banks[i].base_addr +
@@ -124,24 +158,25 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
if (sp_banks[i].num_bytes == 0) {
sp_banks[i].base_addr = 0xdeadbeef;
} else {
+ memblock_add(sp_banks[i].base_addr,
+ sp_banks[i].num_bytes);
sp_banks[i+1].num_bytes = 0;
sp_banks[i+1].base_addr = 0xdeadbeef;
}
break;
}
}
+ memblock_add(sp_banks[i].base_addr, sp_banks[i].num_bytes);
}
/* Start with page aligned address of last symbol in kernel
- * image.
+ * image.
*/
start_pfn = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &_end));
/* Now shift down to get the real physical page frame number. */
start_pfn >>= PAGE_SHIFT;
- bootmap_pfn = start_pfn;
-
max_pfn = end_of_phys_memory >> PAGE_SHIFT;
max_low_pfn = max_pfn;
@@ -150,85 +185,19 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
if (max_low_pfn > pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT)) {
highstart_pfn = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
max_low_pfn = calc_max_low_pfn();
+ high_pages = calc_highpages();
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
- calc_highpages() >> (20 - PAGE_SHIFT));
+ high_pages >> (20 - PAGE_SHIFT));
}
-#ifdef CONFIG_BLK_DEV_INITRD
- /* Now have to check initial ramdisk, so that bootmap does not overwrite it */
- if (sparc_ramdisk_image) {
- if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
- sparc_ramdisk_image -= KERNBASE;
- initrd_start = sparc_ramdisk_image + phys_base;
- initrd_end = initrd_start + sparc_ramdisk_size;
- if (initrd_end > end_of_phys_memory) {
- printk(KERN_CRIT "initrd extends beyond end of memory "
- "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
- initrd_end, end_of_phys_memory);
- initrd_start = 0;
- }
- if (initrd_start) {
- if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
- initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
- bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
- }
- }
-#endif
- /* Initialize the boot-time allocator. */
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base,
- max_low_pfn);
+ find_ramdisk(end_of_phys_memory);
- /* Now register the available physical memory with the
- * allocator.
- */
- *pages_avail = 0;
- for (i = 0; sp_banks[i].num_bytes != 0; i++) {
- unsigned long curr_pfn, last_pfn;
-
- curr_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
- if (curr_pfn >= max_low_pfn)
- break;
-
- last_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
- if (last_pfn > max_low_pfn)
- last_pfn = max_low_pfn;
-
- /*
- * .. finally, did all the rounding and playing
- * around just make the area go away?
- */
- if (last_pfn <= curr_pfn)
- continue;
-
- size = (last_pfn - curr_pfn) << PAGE_SHIFT;
- *pages_avail += last_pfn - curr_pfn;
-
- free_bootmem(sp_banks[i].base_addr, size);
- }
-
-#ifdef CONFIG_BLK_DEV_INITRD
- if (initrd_start) {
- /* Reserve the initrd image area. */
- size = initrd_end - initrd_start;
- reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT);
- *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
- initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;
- }
-#endif
/* Reserve the kernel text/data/bss. */
size = (start_pfn << PAGE_SHIFT) - phys_base;
- reserve_bootmem(phys_base, size, BOOTMEM_DEFAULT);
- *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+ memblock_reserve(phys_base, size);
- /* Reserve the bootmem map. We do not account for it
- * in pages_avail because we will release that memory
- * in free_all_bootmem.
- */
- size = bootmap_size;
- reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
- *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+ size = memblock_phys_mem_size() - memblock_reserved_size();
+ *pages_avail = (size >> PAGE_SHIFT) - high_pages;
return max_pfn;
}
@@ -322,7 +291,7 @@ void __init mem_init(void)
map_high_region(start_pfn, end_pfn);
}
-
+
mem_init_print_info(NULL);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b0312f8..512003f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -124,6 +124,7 @@
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
+ select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 3025179..d1e19f3 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
* _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
* While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
* which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
*/
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
#include "misc.h"
#include "error.h"
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index de690c2..a0ab9ab 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,5 +8,6 @@
generic-y += dma-contiguous.h
generic-y += early_ioremap.h
+generic-y += export.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66..0000000
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include <asm-generic/export.h>
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index dde437f..158ad14 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -108,7 +108,7 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
cx->type);
}
snprintf(cx->desc,
- ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
+ ACPI_CX_DESC_LEN, "ACPI FFH MWAIT 0x%x",
cx->address);
out:
return retval;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 14ee9a8..506bd2b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7303,8 +7303,9 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
}
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end,
+ bool blockable)
{
unsigned long apic_address;
@@ -7315,6 +7316,8 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (start <= apic_address && apic_address < end)
kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+
+ return 0;
}
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index a9e8633..58c6efa 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -913,7 +913,8 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
if (ret)
return ret;
- return bfq_io_set_weight_legacy(of_css(of), NULL, weight);
+ ret = bfq_io_set_weight_legacy(of_css(of), NULL, weight);
+ return ret ?: nbytes;
}
#ifdef CONFIG_DEBUG_BLK_CGROUP
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 41d9036..653100f 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -187,11 +187,25 @@ static const int bfq_stats_min_budgets = 194;
static const int bfq_default_max_budget = 16 * 1024;
/*
- * Async to sync throughput distribution is controlled as follows:
- * when an async request is served, the entity is charged the number
- * of sectors of the request, multiplied by the factor below
+ * When a sync request is dispatched, the queue that contains that
+ * request, and all the ancestor entities of that queue, are charged
+ * with the number of sectors of the request. In constrast, if the
+ * request is async, then the queue and its ancestor entities are
+ * charged with the number of sectors of the request, multiplied by
+ * the factor below. This throttles the bandwidth for async I/O,
+ * w.r.t. to sync I/O, and it is done to counter the tendency of async
+ * writes to steal I/O throughput to reads.
+ *
+ * The current value of this parameter is the result of a tuning with
+ * several hardware and software configurations. We tried to find the
+ * lowest value for which writes do not cause noticeable problems to
+ * reads. In fact, the lower this parameter, the stabler I/O control,
+ * in the following respect. The lower this parameter is, the less
+ * the bandwidth enjoyed by a group decreases
+ * - when the group does writes, w.r.t. to when it does reads;
+ * - when other groups do reads, w.r.t. to when they do writes.
*/
-static const int bfq_async_charge_factor = 10;
+static const int bfq_async_charge_factor = 3;
/* Default timeout values, in jiffies, approximating CFQ defaults. */
const int bfq_timeout = HZ / 8;
@@ -853,16 +867,7 @@ static unsigned long bfq_serv_to_charge(struct request *rq,
if (bfq_bfqq_sync(bfqq) || bfqq->wr_coeff > 1)
return blk_rq_sectors(rq);
- /*
- * If there are no weight-raised queues, then amplify service
- * by just the async charge factor; otherwise amplify service
- * by twice the async charge factor, to further reduce latency
- * for weight-raised queues.
- */
- if (bfqq->bfqd->wr_busy_queues == 0)
- return blk_rq_sectors(rq) * bfq_async_charge_factor;
-
- return blk_rq_sectors(rq) * 2 * bfq_async_charge_factor;
+ return blk_rq_sectors(rq) * bfq_async_charge_factor;
}
/**
@@ -3298,6 +3303,27 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
*/
} else
entity->service = 0;
+
+ /*
+ * Reset the received-service counter for every parent entity.
+ * Differently from what happens with bfqq->entity.service,
+ * the resetting of this counter never needs to be postponed
+ * for parent entities. In fact, in case bfqq may have a
+ * chance to go on being served using the last, partially
+ * consumed budget, bfqq->entity.service needs to be kept,
+ * because if bfqq then actually goes on being served using
+ * the same budget, the last value of bfqq->entity.service is
+ * needed to properly decrement bfqq->entity.budget by the
+ * portion already consumed. In contrast, it is not necessary
+ * to keep entity->service for parent entities too, because
+ * the bubble up of the new value of bfqq->entity.budget will
+ * make sure that the budgets of parent entities are correct,
+ * even in case bfqq and thus parent entities go on receiving
+ * service with the same budget.
+ */
+ entity = entity->parent;
+ for_each_entity(entity)
+ entity->service = 0;
}
/*
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index dbc07b4..ae52bff 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -130,10 +130,14 @@ static bool bfq_update_next_in_service(struct bfq_sched_data *sd,
if (!change_without_lookup) /* lookup needed */
next_in_service = bfq_lookup_next_entity(sd, expiration);
- if (next_in_service)
- parent_sched_may_change = !sd->next_in_service ||
+ if (next_in_service) {
+ bool new_budget_triggers_change =
bfq_update_parent_budget(next_in_service);
+ parent_sched_may_change = !sd->next_in_service ||
+ new_budget_triggers_change;
+ }
+
sd->next_in_service = next_in_service;
if (!next_in_service)
@@ -877,15 +881,11 @@ void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq,
unsigned long time_ms)
{
struct bfq_entity *entity = &bfqq->entity;
- int tot_serv_to_charge = entity->service;
- unsigned int timeout_ms = jiffies_to_msecs(bfq_timeout);
-
- if (time_ms > 0 && time_ms < timeout_ms)
- tot_serv_to_charge =
- (bfqd->bfq_max_budget * time_ms) / timeout_ms;
-
- if (tot_serv_to_charge < entity->service)
- tot_serv_to_charge = entity->service;
+ unsigned long timeout_ms = jiffies_to_msecs(bfq_timeout);
+ unsigned long bounded_time_ms = min(time_ms, timeout_ms);
+ int serv_to_charge_for_time =
+ (bfqd->bfq_max_budget * bounded_time_ms) / timeout_ms;
+ int tot_serv_to_charge = max(serv_to_charge_for_time, entity->service);
/* Increase budget to avoid inconsistencies */
if (tot_serv_to_charge > entity->budget)
diff --git a/block/blk-core.c b/block/blk-core.c
index 1255034..dee56c2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1036,7 +1036,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
INIT_WORK(&q->timeout_work, NULL);
- INIT_LIST_HEAD(&q->queue_head);
INIT_LIST_HEAD(&q->timeout_list);
INIT_LIST_HEAD(&q->icq_list);
#ifdef CONFIG_BLK_CGROUP
@@ -2162,7 +2161,9 @@ static inline bool should_fail_request(struct hd_struct *part,
static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
{
- if (part->policy && op_is_write(bio_op(bio))) {
+ const int op = bio_op(bio);
+
+ if (part->policy && (op_is_write(op) && !op_is_flush(op))) {
char b[BDEVNAME_SIZE];
WARN_ONCE(1,
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index cf9c66c..29bfe80 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -462,50 +462,6 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q)
blk_mq_sched_free_tags(set, hctx, i);
}
-int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
-{
- struct elevator_queue *e = q->elevator;
- int ret;
-
- if (!e)
- return 0;
-
- ret = blk_mq_sched_alloc_tags(q, hctx, hctx_idx);
- if (ret)
- return ret;
-
- if (e->type->ops.mq.init_hctx) {
- ret = e->type->ops.mq.init_hctx(hctx, hctx_idx);
- if (ret) {
- blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
- return ret;
- }
- }
-
- blk_mq_debugfs_register_sched_hctx(q, hctx);
-
- return 0;
-}
-
-void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx)
-{
- struct elevator_queue *e = q->elevator;
-
- if (!e)
- return;
-
- blk_mq_debugfs_unregister_sched_hctx(hctx);
-
- if (e->type->ops.mq.exit_hctx && hctx->sched_data) {
- e->type->ops.mq.exit_hctx(hctx, hctx_idx);
- hctx->sched_data = NULL;
- }
-
- blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
-}
-
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
struct blk_mq_hw_ctx *hctx;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 0cb8f93..4e028ee 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -28,11 +28,6 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
-int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx);
-void blk_mq_sched_exit_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
- unsigned int hctx_idx);
-
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 816923b..94e1ed6 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -320,6 +320,18 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
struct blk_mq_hw_ctx *hctx;
int i;
+ /*
+ * __blk_mq_update_nr_hw_queues will update the nr_hw_queues and
+ * queue_hw_ctx after freeze the queue. So we could use q_usage_counter
+ * to avoid race with it. __blk_mq_update_nr_hw_queues will users
+ * synchronize_rcu to ensure all of the users go out of the critical
+ * section below and see zeroed q_usage_counter.
+ */
+ rcu_read_lock();
+ if (percpu_ref_is_zero(&q->q_usage_counter)) {
+ rcu_read_unlock();
+ return;
+ }
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
@@ -335,7 +347,7 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
}
-
+ rcu_read_unlock();
}
static int bt_alloc(struct sbitmap_queue *bt, unsigned int depth,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 72a0033..85a1c1a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2145,8 +2145,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_request)
set->ops->exit_request(set, hctx->fq->flush_rq, hctx_idx);
- blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
-
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
@@ -2214,12 +2212,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
goto free_bitmap;
- if (blk_mq_sched_init_hctx(q, hctx, hctx_idx))
- goto exit_hctx;
-
hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
if (!hctx->fq)
- goto sched_exit_hctx;
+ goto exit_hctx;
if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
goto free_fq;
@@ -2233,8 +2228,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
free_fq:
kfree(hctx->fq);
- sched_exit_hctx:
- blk_mq_sched_exit_hctx(q, hctx, hctx_idx);
exit_hctx:
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);
@@ -2896,10 +2889,81 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
return ret;
}
+/*
+ * request_queue and elevator_type pair.
+ * It is just used by __blk_mq_update_nr_hw_queues to cache
+ * the elevator_type associated with a request_queue.
+ */
+struct blk_mq_qe_pair {
+ struct list_head node;
+ struct request_queue *q;
+ struct elevator_type *type;
+};
+
+/*
+ * Cache the elevator_type in qe pair list and switch the
+ * io scheduler to 'none'
+ */
+static bool blk_mq_elv_switch_none(struct list_head *head,
+ struct request_queue *q)
+{
+ struct blk_mq_qe_pair *qe;
+
+ if (!q->elevator)
+ return true;
+
+ qe = kmalloc(sizeof(*qe), GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
+ if (!qe)
+ return false;
+
+ INIT_LIST_HEAD(&qe->node);
+ qe->q = q;
+ qe->type = q->elevator->type;
+ list_add(&qe->node, head);
+
+ mutex_lock(&q->sysfs_lock);
+ /*
+ * After elevator_switch_mq, the previous elevator_queue will be
+ * released by elevator_release. The reference of the io scheduler
+ * module get by elevator_get will also be put. So we need to get
+ * a reference of the io scheduler module here to prevent it to be
+ * removed.
+ */
+ __module_get(qe->type->elevator_owner);
+ elevator_switch_mq(q, NULL);
+ mutex_unlock(&q->sysfs_lock);
+
+ return true;
+}
+
+static void blk_mq_elv_switch_back(struct list_head *head,
+ struct request_queue *q)
+{
+ struct blk_mq_qe_pair *qe;
+ struct elevator_type *t = NULL;
+
+ list_for_each_entry(qe, head, node)
+ if (qe->q == q) {
+ t = qe->type;
+ break;
+ }
+
+ if (!t)
+ return;
+
+ list_del(&qe->node);
+ kfree(qe);
+
+ mutex_lock(&q->sysfs_lock);
+ elevator_switch_mq(q, t);
+ mutex_unlock(&q->sysfs_lock);
+}
+
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
int nr_hw_queues)
{
struct request_queue *q;
+ LIST_HEAD(head);
lockdep_assert_held(&set->tag_list_lock);
@@ -2910,6 +2974,18 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
+ /*
+ * Sync with blk_mq_queue_tag_busy_iter.
+ */
+ synchronize_rcu();
+ /*
+ * Switch IO scheduler to 'none', cleaning up the data associated
+ * with the previous scheduler. We will switch back once we are done
+ * updating the new sw to hw queue mappings.
+ */
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ if (!blk_mq_elv_switch_none(&head, q))
+ goto switch_back;
set->nr_hw_queues = nr_hw_queues;
blk_mq_update_queue_map(set);
@@ -2918,6 +2994,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_queue_reinit(q);
}
+switch_back:
+ list_for_each_entry(q, &set->tag_list, tag_set_list)
+ blk_mq_elv_switch_back(&head, q);
+
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_unfreeze_queue(q);
}
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 1d94a20..bb93c7c 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -576,12 +576,8 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio, spinlock_t *lock)
struct rq_wb *rwb = RQWB(rqos);
enum wbt_flags flags;
- if (!rwb_enabled(rwb))
- return;
-
flags = bio_to_wbt_flags(rwb, bio);
-
- if (!wbt_should_throttle(rwb, bio)) {
+ if (!(flags & WBT_TRACKED)) {
if (flags & WBT_READ)
wb_timestamp(rwb, &rwb->last_issue);
return;
diff --git a/block/blk.h b/block/blk.h
index d4d67e9..9db4e38 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -234,6 +234,8 @@ static inline void elv_deactivate_rq(struct request_queue *q, struct request *rq
int elevator_init(struct request_queue *);
int elevator_init_mq(struct request_queue *q);
+int elevator_switch_mq(struct request_queue *q,
+ struct elevator_type *new_e);
void elevator_exit(struct request_queue *, struct elevator_queue *);
int elv_register_queue(struct request_queue *q);
void elv_unregister_queue(struct request_queue *q);
@@ -297,7 +299,7 @@ extern int blk_update_nr_requests(struct request_queue *, unsigned int);
* b) the queue had IO stats enabled when this request was started, and
* c) it's a file system request
*/
-static inline int blk_do_io_stat(struct request *rq)
+static inline bool blk_do_io_stat(struct request *rq)
{
return rq->rq_disk &&
(rq->rq_flags & RQF_IO_STAT) &&
diff --git a/block/elevator.c b/block/elevator.c
index fa828b5..5ea6e7d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -933,16 +933,13 @@ void elv_unregister(struct elevator_type *e)
}
EXPORT_SYMBOL_GPL(elv_unregister);
-static int elevator_switch_mq(struct request_queue *q,
+int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e)
{
int ret;
lockdep_assert_held(&q->sysfs_lock);
- blk_mq_freeze_queue(q);
- blk_mq_quiesce_queue(q);
-
if (q->elevator) {
if (q->elevator->registered)
elv_unregister_queue(q);
@@ -968,8 +965,6 @@ static int elevator_switch_mq(struct request_queue *q,
blk_add_trace_msg(q, "elv switch: none");
out:
- blk_mq_unquiesce_queue(q);
- blk_mq_unfreeze_queue(q);
return ret;
}
@@ -1021,8 +1016,17 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
lockdep_assert_held(&q->sysfs_lock);
- if (q->mq_ops)
- return elevator_switch_mq(q, new_e);
+ if (q->mq_ops) {
+ blk_mq_freeze_queue(q);
+ blk_mq_quiesce_queue(q);
+
+ err = elevator_switch_mq(q, new_e);
+
+ blk_mq_unquiesce_queue(q);
+ blk_mq_unfreeze_queue(q);
+
+ return err;
+ }
/*
* Turn on BYPASS and drain all requests w/ elevator private data.
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index c5367bf..0f28a38 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -164,6 +164,7 @@ struct acpi_namespace_node {
#define ANOBJ_SUBTREE_HAS_INI 0x10 /* Used to optimize device initialization */
#define ANOBJ_EVALUATED 0x20 /* Set on first evaluation of node */
#define ANOBJ_ALLOCATED_BUFFER 0x40 /* Method AML buffer is dynamic (install_method) */
+#define ANOBJ_NODE_EARLY_INIT 0x80 /* acpi_exec only: Node was create via init file (-fi) */
#define ANOBJ_IS_EXTERNAL 0x08 /* iASL only: This object created via External() */
#define ANOBJ_METHOD_NO_RETVAL 0x10 /* iASL only: Method has no return value */
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 3825df9..bbb3b4d 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -25,14 +25,15 @@
/* Flags for acpi_ns_lookup, acpi_ns_search_and_enter */
#define ACPI_NS_NO_UPSEARCH 0
-#define ACPI_NS_SEARCH_PARENT 0x01
-#define ACPI_NS_DONT_OPEN_SCOPE 0x02
-#define ACPI_NS_NO_PEER_SEARCH 0x04
-#define ACPI_NS_ERROR_IF_FOUND 0x08
-#define ACPI_NS_PREFIX_IS_SCOPE 0x10
-#define ACPI_NS_EXTERNAL 0x20
-#define ACPI_NS_TEMPORARY 0x40
-#define ACPI_NS_OVERRIDE_IF_FOUND 0x80
+#define ACPI_NS_SEARCH_PARENT 0x0001
+#define ACPI_NS_DONT_OPEN_SCOPE 0x0002
+#define ACPI_NS_NO_PEER_SEARCH 0x0004
+#define ACPI_NS_ERROR_IF_FOUND 0x0008
+#define ACPI_NS_PREFIX_IS_SCOPE 0x0010
+#define ACPI_NS_EXTERNAL 0x0020
+#define ACPI_NS_TEMPORARY 0x0040
+#define ACPI_NS_OVERRIDE_IF_FOUND 0x0080
+#define ACPI_NS_EARLY_INIT 0x0100
/* Flags for acpi_ns_walk_namespace */
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 2733cd4..3374d41 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -180,6 +180,8 @@ char acpi_ut_remove_leading_zeros(char **string);
u8 acpi_ut_detect_hex_prefix(char **string);
+void acpi_ut_remove_hex_prefix(char **string);
+
u8 acpi_ut_detect_octal_prefix(char **string);
/*
diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index 556ff59..3e5f953 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c
@@ -763,7 +763,12 @@ acpi_db_command_dispatch(char *input_buffer,
case CMD_DISASSEMBLE:
case CMD_DISASM:
+#ifdef ACPI_DISASSEMBLER
(void)acpi_db_disassemble_method(acpi_gbl_db_args[1]);
+#else
+ acpi_os_printf
+ ("The AML Disassembler is not configured/present\n");
+#endif
break;
case CMD_DUMP:
@@ -872,7 +877,12 @@ acpi_db_command_dispatch(char *input_buffer,
case CMD_LIST:
+#ifdef ACPI_DISASSEMBLER
acpi_db_disassemble_aml(acpi_gbl_db_args[1], op);
+#else
+ acpi_os_printf
+ ("The AML Disassembler is not configured/present\n");
+#endif
break;
case CMD_LOCKS:
diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index 9fcecf1..d8b7a0f 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -216,6 +216,7 @@ void acpi_db_set_method_data(char *type_arg, char *index_arg, char *value_arg)
acpi_ut_remove_reference(obj_desc);
}
+#ifdef ACPI_DISASSEMBLER
/*******************************************************************************
*
* FUNCTION: acpi_db_disassemble_aml
@@ -242,9 +243,8 @@ void acpi_db_disassemble_aml(char *statements, union acpi_parse_object *op)
if (statements) {
num_statements = strtoul(statements, NULL, 0);
}
-#ifdef ACPI_DISASSEMBLER
+
acpi_dm_disassemble(NULL, op, num_statements);
-#endif
}
/*******************************************************************************
@@ -317,8 +317,6 @@ acpi_status acpi_db_disassemble_method(char *name)
walk_state->parse_flags |= ACPI_PARSE_DISASSEMBLE;
status = acpi_ps_parse_aml(walk_state);
-
-#ifdef ACPI_DISASSEMBLER
(void)acpi_dm_parse_deferred_ops(op);
/* Now we can disassemble the method */
@@ -326,7 +324,6 @@ acpi_status acpi_db_disassemble_method(char *name)
acpi_gbl_dm_opt_verbose = FALSE;
acpi_dm_disassemble(NULL, op, 0);
acpi_gbl_dm_opt_verbose = TRUE;
-#endif
acpi_ps_delete_parse_tree(op);
@@ -337,6 +334,7 @@ acpi_status acpi_db_disassemble_method(char *name)
acpi_ut_release_owner_id(&obj_desc->method.owner_id);
return (AE_OK);
}
+#endif
/*******************************************************************************
*
diff --git a/drivers/acpi/acpica/dbxface.c b/drivers/acpi/acpica/dbxface.c
index 4647aa8..f252672 100644
--- a/drivers/acpi/acpica/dbxface.c
+++ b/drivers/acpi/acpica/dbxface.c
@@ -10,6 +10,7 @@
#include "amlcode.h"
#include "acdebug.h"
#include "acinterp.h"
+#include "acparser.h"
#define _COMPONENT ACPI_CA_DEBUGGER
ACPI_MODULE_NAME("dbxface")
@@ -262,10 +263,17 @@ acpi_db_single_step(struct acpi_walk_state *walk_state,
}
}
- /* Now we can display it */
+ /* Now we can disassemble and display it */
#ifdef ACPI_DISASSEMBLER
acpi_dm_disassemble(walk_state, display_op, ACPI_UINT32_MAX);
+#else
+ /*
+ * The AML Disassembler is not configured - at least we can
+ * display the opcode value and name
+ */
+ acpi_os_printf("AML Opcode: %4.4X %s\n", op->common.aml_opcode,
+ acpi_ps_get_opcode_name(op->common.aml_opcode));
#endif
if ((op->common.aml_opcode == AML_IF_OP) ||
diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
index 7c93759..30fe895 100644
--- a/drivers/acpi/acpica/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -15,6 +15,10 @@
#include "acnamesp.h"
#include "acparser.h"
+#ifdef ACPI_EXEC_APP
+#include "aecommon.h"
+#endif
+
#define _COMPONENT ACPI_DISPATCHER
ACPI_MODULE_NAME("dsfield")
@@ -259,6 +263,13 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info,
u64 position;
union acpi_parse_object *child;
+#ifdef ACPI_EXEC_APP
+ u64 value = 0;
+ union acpi_operand_object *result_desc;
+ union acpi_operand_object *obj_desc;
+ char *name_path;
+#endif
+
ACPI_FUNCTION_TRACE_PTR(ds_get_field_names, info);
/* First field starts at bit zero */
@@ -391,6 +402,25 @@ acpi_ds_get_field_names(struct acpi_create_field_info *info,
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
+#ifdef ACPI_EXEC_APP
+ name_path =
+ acpi_ns_get_external_pathname(info->
+ field_node);
+ obj_desc =
+ acpi_ut_create_integer_object
+ (value);
+ if (ACPI_SUCCESS
+ (ae_lookup_init_file_entry
+ (name_path, &value))) {
+ acpi_ex_write_data_to_field
+ (obj_desc,
+ acpi_ns_get_attached_object
+ (info->field_node),
+ &result_desc);
+ }
+ acpi_ut_remove_reference(obj_desc);
+ ACPI_FREE(name_path);
+#endif
}
}
@@ -573,7 +603,9 @@ acpi_ds_init_field_objects(union acpi_parse_object *op,
!(walk_state->parse_flags & ACPI_PARSE_MODULE_LEVEL)) {
flags |= ACPI_NS_TEMPORARY;
}
-
+#ifdef ACPI_EXEC_APP
+ flags |= ACPI_NS_OVERRIDE_IF_FOUND;
+#endif
/*
* Walk the list of entries in the field_list
* Note: field_list can be of zero length. In this case, Arg will be NULL.
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 3de794b..69603ba 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -528,13 +528,18 @@ acpi_status acpi_hw_register_read(u32 register_id, u32 *return_value)
status =
acpi_hw_read(&value64, &acpi_gbl_FADT.xpm2_control_block);
- value = (u32)value64;
+ if (ACPI_SUCCESS(status)) {
+ value = (u32)value64;
+ }
break;
case ACPI_REGISTER_PM_TIMER: /* 32-bit access */
status = acpi_hw_read(&value64, &acpi_gbl_FADT.xpm_timer_block);
- value = (u32)value64;
+ if (ACPI_SUCCESS(status)) {
+ value = (u32)value64;
+ }
+
break;
case ACPI_REGISTER_SMI_COMMAND_BLOCK: /* 8-bit access */
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index fe9d46d8..d8b8fc2f 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -56,14 +56,9 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state)
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
- /*
- * If the target sleep state is S5, clear all GPEs and fixed events too
- */
- if (sleep_state == ACPI_STATE_S5) {
- status = acpi_hw_clear_acpi_status();
- if (ACPI_FAILURE(status)) {
- return_ACPI_STATUS(status);
- }
+ status = acpi_hw_clear_acpi_status();
+ if (ACPI_FAILURE(status)) {
+ return_ACPI_STATUS(status);
}
acpi_gbl_system_awake_and_running = FALSE;
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 83a593e..e3f10af 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -558,6 +558,14 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
(char *)¤t_node->name,
current_node));
}
+#ifdef ACPI_EXEC_APP
+ if ((status == AE_ALREADY_EXISTS) &&
+ (this_node->flags & ANOBJ_NODE_EARLY_INIT)) {
+ this_node->flags &= ~ANOBJ_NODE_EARLY_INIT;
+ status = AE_OK;
+ }
+#endif
+
#ifdef ACPI_ASL_COMPILER
/*
* If this ACPI name already exists within the namespace as an
@@ -676,6 +684,11 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
}
}
}
+#ifdef ACPI_EXEC_APP
+ if (flags & ACPI_NS_EARLY_INIT) {
+ this_node->flags |= ANOBJ_NODE_EARLY_INIT;
+ }
+#endif
*return_node = this_node;
return_ACPI_STATUS(AE_OK);
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 44f35ab..34fc2f7 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -22,6 +22,7 @@
#include "acdispat.h"
#include "amlcode.h"
#include "acconvert.h"
+#include "acnamesp.h"
#define _COMPONENT ACPI_PARSER
ACPI_MODULE_NAME("psloop")
@@ -527,12 +528,18 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
if (ACPI_FAILURE(status)) {
return_ACPI_STATUS(status);
}
- if (walk_state->opcode == AML_SCOPE_OP) {
+ if (acpi_ns_opens_scope
+ (acpi_ps_get_opcode_info
+ (walk_state->opcode)->object_type)) {
/*
- * If the scope op fails to parse, skip the body of the
- * scope op because the parse failure indicates that the
- * device may not exist.
+ * If the scope/device op fails to parse, skip the body of
+ * the scope op because the parse failure indicates that
+ * the device may not exist.
*/
+ ACPI_ERROR((AE_INFO,
+ "Skip parsing opcode %s",
+ acpi_ps_get_opcode_name
+ (walk_state->opcode)));
walk_state->parser_state.aml =
walk_state->aml + 1;
walk_state->parser_state.aml =
@@ -540,8 +547,6 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
(&walk_state->parser_state);
walk_state->aml =
walk_state->parser_state.aml;
- ACPI_ERROR((AE_INFO,
- "Skipping Scope block"));
}
continue;
@@ -709,20 +714,20 @@ acpi_status acpi_ps_parse_loop(struct acpi_walk_state *walk_state)
} else
if ((walk_state->
parse_flags & ACPI_PARSE_MODULE_LEVEL)
- && status != AE_CTRL_TRANSFER
- && ACPI_FAILURE(status)) {
+ && (ACPI_AML_EXCEPTION(status)
+ || status == AE_ALREADY_EXISTS
+ || status == AE_NOT_FOUND)) {
/*
- * ACPI_PARSE_MODULE_LEVEL flag means that we are currently
- * loading a table by executing it as a control method.
- * However, if we encounter an error while loading the table,
- * we need to keep trying to load the table rather than
- * aborting the table load (setting the status to AE_OK
- * continues the table load). If we get a failure at this
- * point, it means that the dispatcher got an error while
- * processing Op (most likely an AML operand error) or a
- * control method was called from module level and the
- * dispatcher returned AE_CTRL_TRANSFER. In the latter case,
- * leave the status alone, there's nothing wrong with it.
+ * ACPI_PARSE_MODULE_LEVEL flag means that we
+ * are currently loading a table by executing
+ * it as a control method. However, if we
+ * encounter an error while loading the table,
+ * we need to keep trying to load the table
+ * rather than aborting the table load (setting
+ * the status to AE_OK continues the table
+ * load). If we get a failure at this point, it
+ * means that the dispatcher got an error while
+ * trying to execute the Op.
*/
status = AE_OK;
}
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 51891f9..862149c 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -516,9 +516,9 @@ acpi_tb_verify_temp_table(struct acpi_table_desc *table_desc,
acpi_tb_check_duplication(table_desc, table_index);
if (ACPI_FAILURE(status)) {
if (status != AE_CTRL_TERMINATE) {
- ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY,
+ ACPI_EXCEPTION((AE_INFO, status,
"%4.4s 0x%8.8X%8.8X"
- " Table is duplicated",
+ " Table is already loaded",
acpi_ut_valid_nameseg
(table_desc->signature.
ascii) ? table_desc->
diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
index 118f3ff..8cc4392 100644
--- a/drivers/acpi/acpica/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -355,6 +355,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action)
u16 original_count;
u16 new_count = 0;
acpi_cpu_flags lock_flags;
+ char *message;
ACPI_FUNCTION_NAME(ut_update_ref_count);
@@ -391,6 +392,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action)
object, object->common.type,
acpi_ut_get_object_type_name(object),
new_count));
+ message = "Incremement";
break;
case REF_DECREMENT:
@@ -420,6 +422,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action)
if (new_count == 0) {
acpi_ut_delete_internal_obj(object);
}
+ message = "Decrement";
break;
default:
@@ -436,8 +439,8 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action)
*/
if (new_count > ACPI_MAX_REFERENCE_COUNT) {
ACPI_WARNING((AE_INFO,
- "Large Reference Count (0x%X) in object %p, Type=0x%.2X",
- new_count, object, object->common.type));
+ "Large Reference Count (0x%X) in object %p, Type=0x%.2X Operation=%s",
+ new_count, object, object->common.type, message));
}
}
diff --git a/drivers/acpi/acpica/utstrsuppt.c b/drivers/acpi/acpica/utstrsuppt.c
index 954f8e3..05ff200 100644
--- a/drivers/acpi/acpica/utstrsuppt.c
+++ b/drivers/acpi/acpica/utstrsuppt.c
@@ -231,11 +231,11 @@ char acpi_ut_remove_whitespace(char **string)
u8 acpi_ut_detect_hex_prefix(char **string)
{
+ char *initial_position = *string;
- if ((**string == ACPI_ASCII_ZERO) &&
- (tolower((int)*(*string + 1)) == 'x')) {
- *string += 2; /* Go past the leading 0x */
- return (TRUE);
+ acpi_ut_remove_hex_prefix(string);
+ if (*string != initial_position) {
+ return (TRUE); /* String is past leading 0x */
}
return (FALSE); /* Not a hex string */
@@ -243,6 +243,26 @@ u8 acpi_ut_detect_hex_prefix(char **string)
/*******************************************************************************
*
+ * FUNCTION: acpi_ut_remove_hex_prefix
+ *
+ * PARAMETERS: string - Pointer to input ASCII string
+ *
+ * RETURN: none
+ *
+ * DESCRIPTION: Remove a hex "0x" prefix
+ *
+ ******************************************************************************/
+
+void acpi_ut_remove_hex_prefix(char **string)
+{
+ if ((**string == ACPI_ASCII_ZERO) &&
+ (tolower((int)*(*string + 1)) == 'x')) {
+ *string += 2; /* Go past the leading 0x */
+ }
+}
+
+/*******************************************************************************
+ *
* FUNCTION: acpi_ut_detect_octal_prefix
*
* PARAMETERS: string - Pointer to input ASCII string
diff --git a/drivers/acpi/acpica/utstrtoul64.c b/drivers/acpi/acpica/utstrtoul64.c
index 8fadad2..5fde619 100644
--- a/drivers/acpi/acpica/utstrtoul64.c
+++ b/drivers/acpi/acpica/utstrtoul64.c
@@ -218,7 +218,7 @@ u64 acpi_ut_implicit_strtoul64(char *string)
* implicit conversions, and the "0x" prefix is "not allowed".
* However, allow a "0x" prefix as an ACPI extension.
*/
- acpi_ut_detect_hex_prefix(&string);
+ acpi_ut_remove_hex_prefix(&string);
if (!acpi_ut_remove_leading_zeros(&string)) {
return_VALUE(0);
diff --git a/drivers/acpi/pmic/intel_pmic_crc.c b/drivers/acpi/pmic/intel_pmic_crc.c
index 7ffa740..22c9e37 100644
--- a/drivers/acpi/pmic/intel_pmic_crc.c
+++ b/drivers/acpi/pmic/intel_pmic_crc.c
@@ -25,16 +25,121 @@
#define PMIC_A0LOCK_REG 0xc5
static struct pmic_table power_table[] = {
+/* {
+ .address = 0x00,
+ .reg = ??,
+ .bit = ??,
+ }, ** VSYS */
+ {
+ .address = 0x04,
+ .reg = 0x63,
+ .bit = 0x00,
+ }, /* SYSX -> VSYS_SX */
+ {
+ .address = 0x08,
+ .reg = 0x62,
+ .bit = 0x00,
+ }, /* SYSU -> VSYS_U */
+ {
+ .address = 0x0c,
+ .reg = 0x64,
+ .bit = 0x00,
+ }, /* SYSS -> VSYS_S */
+ {
+ .address = 0x10,
+ .reg = 0x6a,
+ .bit = 0x00,
+ }, /* V50S -> V5P0S */
+ {
+ .address = 0x14,
+ .reg = 0x6b,
+ .bit = 0x00,
+ }, /* HOST -> VHOST, USB2/3 host */
+ {
+ .address = 0x18,
+ .reg = 0x6c,
+ .bit = 0x00,
+ }, /* VBUS -> VBUS, USB2/3 OTG */
+ {
+ .address = 0x1c,
+ .reg = 0x6d,
+ .bit = 0x00,
+ }, /* HDMI -> VHDMI */
+/* {
+ .address = 0x20,
+ .reg = ??,
+ .bit = ??,
+ }, ** S285 */
{
.address = 0x24,
.reg = 0x66,
.bit = 0x00,
- },
+ }, /* X285 -> V2P85SX, camera */
+/* {
+ .address = 0x28,
+ .reg = ??,
+ .bit = ??,
+ }, ** V33A */
+ {
+ .address = 0x2c,
+ .reg = 0x69,
+ .bit = 0x00,
+ }, /* V33S -> V3P3S, display/ssd/audio */
+ {
+ .address = 0x30,
+ .reg = 0x68,
+ .bit = 0x00,
+ }, /* V33U -> V3P3U, SDIO wifi&bt */
+/* {
+ .address = 0x34 .. 0x40,
+ .reg = ??,
+ .bit = ??,
+ }, ** V33I, V18A, REFQ, V12A */
+ {
+ .address = 0x44,
+ .reg = 0x5c,
+ .bit = 0x00,
+ }, /* V18S -> V1P8S, SOC/USB PHY/SIM */
{
.address = 0x48,
.reg = 0x5d,
.bit = 0x00,
- },
+ }, /* V18X -> V1P8SX, eMMC/camara/audio */
+ {
+ .address = 0x4c,
+ .reg = 0x5b,
+ .bit = 0x00,
+ }, /* V18U -> V1P8U, LPDDR */
+ {
+ .address = 0x50,
+ .reg = 0x61,
+ .bit = 0x00,
+ }, /* V12X -> V1P2SX, SOC SFR */
+ {
+ .address = 0x54,
+ .reg = 0x60,
+ .bit = 0x00,
+ }, /* V12S -> V1P2S, MIPI */
+/* {
+ .address = 0x58,
+ .reg = ??,
+ .bit = ??,
+ }, ** V10A */
+ {
+ .address = 0x5c,
+ .reg = 0x56,
+ .bit = 0x00,
+ }, /* V10S -> V1P0S, SOC GFX */
+ {
+ .address = 0x60,
+ .reg = 0x57,
+ .bit = 0x00,
+ }, /* V10X -> V1P0SX, SOC display/DDR IO/PCIe */
+ {
+ .address = 0x64,
+ .reg = 0x59,
+ .bit = 0x00,
+ }, /* V105 -> V1P05S, L2 SRAM */
};
static struct pmic_table thermal_table[] = {
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index f99e5c8..581312a 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2428,16 +2428,20 @@ static bool DAC960_V2_ReportDeviceConfiguration(DAC960_Controller_T
{
DAC960_V2_LogicalDeviceInfo_T *LogicalDeviceInfo =
Controller->V2.LogicalDeviceInformation[LogicalDriveNumber];
- unsigned char *ReadCacheStatus[] = { "Read Cache Disabled",
- "Read Cache Enabled",
- "Read Ahead Enabled",
- "Intelligent Read Ahead Enabled",
- "-", "-", "-", "-" };
- unsigned char *WriteCacheStatus[] = { "Write Cache Disabled",
- "Logical Device Read Only",
- "Write Cache Enabled",
- "Intelligent Write Cache Enabled",
- "-", "-", "-", "-" };
+ static const unsigned char *ReadCacheStatus[] = {
+ "Read Cache Disabled",
+ "Read Cache Enabled",
+ "Read Ahead Enabled",
+ "Intelligent Read Ahead Enabled",
+ "-", "-", "-", "-"
+ };
+ static const unsigned char *WriteCacheStatus[] = {
+ "Write Cache Disabled",
+ "Logical Device Read Only",
+ "Write Cache Enabled",
+ "Intelligent Write Cache Enabled",
+ "-", "-", "-", "-"
+ };
unsigned char *GeometryTranslation;
if (LogicalDeviceInfo == NULL) continue;
switch (LogicalDeviceInfo->DriveGeometry)
@@ -4339,14 +4343,16 @@ static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command)
static void DAC960_V2_ReadWriteError(DAC960_Command_T *Command)
{
DAC960_Controller_T *Controller = Command->Controller;
- unsigned char *SenseErrors[] = { "NO SENSE", "RECOVERED ERROR",
- "NOT READY", "MEDIUM ERROR",
- "HARDWARE ERROR", "ILLEGAL REQUEST",
- "UNIT ATTENTION", "DATA PROTECT",
- "BLANK CHECK", "VENDOR-SPECIFIC",
- "COPY ABORTED", "ABORTED COMMAND",
- "EQUAL", "VOLUME OVERFLOW",
- "MISCOMPARE", "RESERVED" };
+ static const unsigned char *SenseErrors[] = {
+ "NO SENSE", "RECOVERED ERROR",
+ "NOT READY", "MEDIUM ERROR",
+ "HARDWARE ERROR", "ILLEGAL REQUEST",
+ "UNIT ATTENTION", "DATA PROTECT",
+ "BLANK CHECK", "VENDOR-SPECIFIC",
+ "COPY ABORTED", "ABORTED COMMAND",
+ "EQUAL", "VOLUME OVERFLOW",
+ "MISCOMPARE", "RESERVED"
+ };
unsigned char *CommandName = "UNKNOWN";
switch (Command->CommandType)
{
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index e285413..6f1d25c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2740,6 +2740,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
pd->write_congestion_on = write_congestion_on;
pd->write_congestion_off = write_congestion_off;
+ ret = -ENOMEM;
disk = alloc_disk(1);
if (!disk)
goto out_mem;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index c7acf74..a1d6b55 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -337,6 +337,7 @@ static ssize_t backing_dev_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
char *file_name;
+ size_t sz;
struct file *backing_dev = NULL;
struct inode *inode;
struct address_space *mapping;
@@ -357,7 +358,11 @@ static ssize_t backing_dev_store(struct device *dev,
goto out;
}
- strlcpy(file_name, buf, len);
+ strlcpy(file_name, buf, PATH_MAX);
+ /* ignore trailing newline */
+ sz = strlen(file_name);
+ if (sz > 0 && file_name[sz - 1] == '\n')
+ file_name[sz - 1] = 0x00;
backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
if (IS_ERR(backing_dev)) {
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 1d50e97..6d53f7d 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -555,12 +555,20 @@ EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop);
void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy)
{
- struct policy_dbs_info *policy_dbs = policy->governor_data;
+ struct policy_dbs_info *policy_dbs;
+
+ /* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */
+ mutex_lock(&gov_dbs_data_mutex);
+ policy_dbs = policy->governor_data;
+ if (!policy_dbs)
+ goto out;
mutex_lock(&policy_dbs->update_mutex);
cpufreq_policy_apply_limits(policy);
gov_update_sample_delay(policy_dbs, 0);
-
mutex_unlock(&policy_dbs->update_mutex);
+
+out:
+ mutex_unlock(&gov_dbs_data_mutex);
}
EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 1aef60d..110483f 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -328,9 +328,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
unsigned int polling_threshold;
/*
- * We want to default to C1 (hlt), not to busy polling
- * unless the timer is happening really really soon, or
- * C1's exit latency exceeds the user configured limit.
+ * Default to a physical idle state, not to busy polling, unless
+ * a timer is going to trigger really really soon.
*/
polling_threshold = max_t(unsigned int, 20, s->target_residency);
if (data->next_timer_us > polling_threshold &&
@@ -349,14 +348,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* If the tick is already stopped, the cost of possible short
* idle duration misprediction is much higher, because the CPU
* may be stuck in a shallow idle state for a long time as a
- * result of it. In that case say we might mispredict and try
- * to force the CPU into a state for which we would have stopped
- * the tick, unless a timer is going to expire really soon
- * anyway.
+ * result of it. In that case say we might mispredict and use
+ * the known time till the closest timer event for the idle
+ * state selection.
*/
if (data->predicted_us < TICK_USEC)
- data->predicted_us = min_t(unsigned int, TICK_USEC,
- ktime_to_us(delta_next));
+ data->predicted_us = ktime_to_us(delta_next);
} else {
/*
* Use the performance multiplier and the user-configurable
@@ -381,8 +378,22 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
continue;
if (idx == -1)
idx = i; /* first enabled state */
- if (s->target_residency > data->predicted_us)
- break;
+ if (s->target_residency > data->predicted_us) {
+ if (!tick_nohz_tick_stopped())
+ break;
+
+ /*
+ * If the state selected so far is shallow and this
+ * state's target residency matches the time till the
+ * closest timer event, select this one to avoid getting
+ * stuck in the shallow one for too long.
+ */
+ if (drv->states[idx].target_residency < TICK_USEC &&
+ s->target_residency <= ktime_to_us(delta_next))
+ idx = i;
+
+ goto out;
+ }
if (s->exit_latency > latency_req) {
/*
* If we break out of the loop for latency reasons, use
@@ -403,14 +414,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* Don't stop the tick if the selected state is a polling one or if the
* expected idle duration is shorter than the tick period length.
*/
- if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
- expected_interval < TICK_USEC) {
+ if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
+ expected_interval < TICK_USEC) && !tick_nohz_tick_stopped()) {
unsigned int delta_next_us = ktime_to_us(delta_next);
*stop_tick = false;
- if (!tick_nohz_tick_stopped() && idx > 0 &&
- drv->states[idx].target_residency > delta_next_us) {
+ if (idx > 0 && drv->states[idx].target_residency > delta_next_us) {
/*
* The tick is not going to be stopped and the target
* residency of the state to be returned is not within
@@ -418,8 +428,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick, so try to correct that.
*/
for (i = idx - 1; i >= 0; i--) {
- if (drv->states[i].disabled ||
- dev->states_usage[i].disable)
+ if (drv->states[i].disabled ||
+ dev->states_usage[i].disable)
continue;
idx = i;
@@ -429,6 +439,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
}
+out:
data->last_state_idx = idx;
return data->last_state_idx;
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 88c322d..14c40a7 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -24,6 +24,7 @@
-D__NO_FORTIFY \
$(call cc-option,-ffreestanding) \
$(call cc-option,-fno-stack-protector) \
+ -D__DISABLE_EXPORTS
GCOV_PROFILE := n
KASAN_SANITIZE := n
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index a365ea2..e55508b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -178,12 +178,18 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
*
* @amn: our notifier
*/
-static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
+static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
{
- mutex_lock(&amn->read_lock);
+ if (blockable)
+ mutex_lock(&amn->read_lock);
+ else if (!mutex_trylock(&amn->read_lock))
+ return -EAGAIN;
+
if (atomic_inc_return(&amn->recursion) == 1)
down_read_non_owner(&amn->lock);
mutex_unlock(&amn->read_lock);
+
+ return 0;
}
/**
@@ -239,10 +245,11 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
@@ -250,17 +257,28 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
/* notification is exclusive, but interval is inclusive */
end -= 1;
- amdgpu_mn_read_lock(amn);
+ /* TODO we should be able to split locking for interval tree and
+ * amdgpu_mn_invalidate_node
+ */
+ if (amdgpu_mn_read_lock(amn, blockable))
+ return -EAGAIN;
it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
+ if (!blockable) {
+ amdgpu_mn_read_unlock(amn);
+ return -EAGAIN;
+ }
+
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
amdgpu_mn_invalidate_node(node, start, end);
}
+
+ return 0;
}
/**
@@ -275,10 +293,11 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
* necessitates evicting all user-mode queues of the process. The BOs
* are restorted in amdgpu_mn_invalidate_range_end_hsa.
*/
-static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
@@ -286,13 +305,19 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
/* notification is exclusive, but interval is inclusive */
end -= 1;
- amdgpu_mn_read_lock(amn);
+ if (amdgpu_mn_read_lock(amn, blockable))
+ return -EAGAIN;
it = interval_tree_iter_first(&amn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
struct amdgpu_bo *bo;
+ if (!blockable) {
+ amdgpu_mn_read_unlock(amn);
+ return -EAGAIN;
+ }
+
node = container_of(it, struct amdgpu_mn_node, it);
it = interval_tree_iter_next(it, start, end);
@@ -304,6 +329,8 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
amdgpu_amdkfd_evict_userptr(mem, mm);
}
}
+
+ return 0;
}
/**
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index dcd6e23..2c9b284 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -112,10 +112,11 @@ static void del_object(struct i915_mmu_object *mo)
mo->attached = false;
}
-static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
+static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct i915_mmu_notifier *mn =
container_of(_mn, struct i915_mmu_notifier, mn);
@@ -124,7 +125,7 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
LIST_HEAD(cancelled);
if (RB_EMPTY_ROOT(&mn->objects.rb_root))
- return;
+ return 0;
/* interval ranges are inclusive, but invalidate range is exclusive */
end--;
@@ -132,6 +133,10 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
spin_lock(&mn->lock);
it = interval_tree_iter_first(&mn->objects, start, end);
while (it) {
+ if (!blockable) {
+ spin_unlock(&mn->lock);
+ return -EAGAIN;
+ }
/* The mmu_object is released late when destroying the
* GEM object so it is entirely possible to gain a
* reference on an object in the process of being freed
@@ -154,6 +159,8 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
if (!list_empty(&cancelled))
flush_workqueue(mn->wq);
+
+ return 0;
}
static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index abd2497..f8b35df 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -118,19 +118,27 @@ static void radeon_mn_release(struct mmu_notifier *mn,
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
-static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
+static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
struct ttm_operation_ctx ctx = { false, false };
struct interval_tree_node *it;
+ int ret = 0;
/* notification is exclusive, but interval is inclusive */
end -= 1;
- mutex_lock(&rmn->lock);
+ /* TODO we should be able to split locking for interval tree and
+ * the tear down.
+ */
+ if (blockable)
+ mutex_lock(&rmn->lock);
+ else if (!mutex_trylock(&rmn->lock))
+ return -EAGAIN;
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
@@ -138,6 +146,11 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
struct radeon_bo *bo;
long r;
+ if (!blockable) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
node = container_of(it, struct radeon_mn_node, it);
it = interval_tree_iter_next(it, start, end);
@@ -166,7 +179,10 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
}
}
+out_unlock:
mutex_unlock(&rmn->lock);
+
+ return ret;
}
static const struct mmu_notifier_ops radeon_mn_ops = {
diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c
index e52c58c..4d565b0 100644
--- a/drivers/ide/hpt366.c
+++ b/drivers/ide/hpt366.c
@@ -574,13 +574,14 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive)
if (!HPT370_ALLOW_ATA100_5 ||
check_in_drive_list(drive, bad_ata100_5))
return ATA_UDMA4;
+ /* else: fall through */
case HPT372 :
case HPT372A:
case HPT372N:
case HPT374 :
if (ata_id_is_sata(drive->id))
mask &= ~0x0e;
- /* Fall thru */
+ /* fall through */
default:
return mask;
}
@@ -600,7 +601,7 @@ static u8 hpt3xx_mdma_filter(ide_drive_t *drive)
case HPT374 :
if (ata_id_is_sata(drive->id))
return 0x00;
- /* Fall thru */
+ /* else: fall through */
default:
return 0x07;
}
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 5bd2aaf..a8df300 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -427,6 +427,7 @@ static int ide_floppy_get_capacity(ide_drive_t *drive)
* (maintains previous driver behaviour)
*/
break;
+ /* else: fall through */
case CAPACITY_CURRENT:
/* Normal Zip/LS-120 disks */
if (memcmp(cap_desc, &floppy->cap_desc, 8))
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index a444bad..0d93e0c 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -460,7 +460,6 @@ void do_ide_request(struct request_queue *q)
struct ide_host *host = hwif->host;
struct request *rq = NULL;
ide_startstop_t startstop;
- unsigned long queue_run_ms = 3; /* old plug delay */
spin_unlock_irq(q->queue_lock);
@@ -480,9 +479,6 @@ void do_ide_request(struct request_queue *q)
prev_port = hwif->host->cur_port;
if (drive->dev_flags & IDE_DFLAG_SLEEPING &&
time_after(drive->sleep, jiffies)) {
- unsigned long left = jiffies - drive->sleep;
-
- queue_run_ms = jiffies_to_msecs(left + 1);
ide_unlock_port(hwif);
goto plug_device;
}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 416a2f3..3b75a7b 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -142,6 +142,7 @@ static void ide_classify_atapi_dev(ide_drive_t *drive)
}
/* Early cdrom models used zero */
type = ide_cdrom;
+ /* fall through */
case ide_cdrom:
drive->dev_flags |= IDE_DFLAG_REMOVABLE;
#ifdef CONFIG_PPC
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index aee7b46..34c1165 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1746,7 +1746,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
{
unsigned long t;
int speed;
- int buffer_size;
u16 *ctl = (u16 *)&tape->caps[12];
ide_debug_log(IDE_DBG_FUNC, "minor: %d", minor);
@@ -1781,7 +1780,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
*ctl /= 2;
tape->buffer_size = *ctl * tape->blk_size;
}
- buffer_size = tape->buffer_size;
/* select the "best" DSC read/write polling freq */
speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 89b2902..c21d5c5 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -128,7 +128,7 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
return pre_task_out_intr(drive, cmd);
}
handler = task_pio_intr;
- /* fall-through */
+ /* fall through */
case ATA_PROT_NODATA:
if (handler == NULL)
handler = task_no_data_intr;
@@ -140,6 +140,7 @@ ide_startstop_t do_rw_taskfile(ide_drive_t *drive, struct ide_cmd *orig_cmd)
hwif->expiry = dma_ops->dma_timer_expiry;
ide_execute_command(drive, cmd, ide_dma_intr, 2 * WAIT_CMD);
dma_ops->dma_start(drive);
+ /* fall through */
default:
return ide_started;
}
diff --git a/drivers/ide/sis5513.c b/drivers/ide/sis5513.c
index c3062b5..024bc7b 100644
--- a/drivers/ide/sis5513.c
+++ b/drivers/ide/sis5513.c
@@ -494,6 +494,7 @@ static int init_chipset_sis5513(struct pci_dev *dev)
pci_read_config_byte(dev, 0x09, ®);
if ((reg & 0x0f) != 0x00)
pci_write_config_byte(dev, 0x09, reg&0xf0);
+ /* fall through */
case ATA_16:
/* force per drive recovery and active timings
needed on ATA_33 and below chips */
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 182436b..6ec748e 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -186,6 +186,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
ULLONG_MAX,
ib_umem_notifier_release_trampoline,
+ true,
NULL);
up_read(&context->umem_rwsem);
}
@@ -207,22 +208,31 @@ static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
return 0;
}
-static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+ int ret;
if (!context->invalidate_range)
- return;
+ return 0;
+
+ if (blockable)
+ down_read(&context->umem_rwsem);
+ else if (!down_read_trylock(&context->umem_rwsem))
+ return -EAGAIN;
ib_ucontext_notifier_start_account(context);
- down_read(&context->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+ ret = rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end,
- invalidate_range_start_trampoline, NULL);
+ invalidate_range_start_trampoline,
+ blockable, NULL);
up_read(&context->umem_rwsem);
+
+ return ret;
}
static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
@@ -242,10 +252,15 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
if (!context->invalidate_range)
return;
+ /*
+ * TODO: we currently bail out if there is any sleepable work to be done
+ * in ib_umem_notifier_invalidate_range_start so we shouldn't really block
+ * here. But this is ugly and fragile.
+ */
down_read(&context->umem_rwsem);
rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
end,
- invalidate_range_end_trampoline, NULL);
+ invalidate_range_end_trampoline, true, NULL);
up_read(&context->umem_rwsem);
ib_ucontext_notifier_end_account(context);
}
@@ -798,6 +813,7 @@ EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
u64 start, u64 last,
umem_call_back cb,
+ bool blockable,
void *cookie)
{
int ret_val = 0;
@@ -809,6 +825,9 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
for (node = rbt_ib_umem_iter_first(root, start, last - 1);
node; node = next) {
+ /* TODO move the blockable decision up to the callback */
+ if (!blockable)
+ return -EAGAIN;
next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 70aceef..e1c7996 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -67,9 +67,9 @@ struct mmu_rb_handler {
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
-static void mmu_notifier_range_start(struct mmu_notifier *,
+static int mmu_notifier_range_start(struct mmu_notifier *,
struct mm_struct *,
- unsigned long, unsigned long);
+ unsigned long, unsigned long, bool);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
static void do_remove(struct mmu_rb_handler *handler,
@@ -284,10 +284,11 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
handler->ops->remove(handler->ops_arg, node);
}
-static void mmu_notifier_range_start(struct mmu_notifier *mn,
+static int mmu_notifier_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
@@ -313,6 +314,8 @@ static void mmu_notifier_range_start(struct mmu_notifier *mn,
if (added)
queue_work(handler->wq, &handler->del_work);
+
+ return 0;
}
/*
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index f1a87a6..d216e0d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -488,7 +488,7 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
down_read(&ctx->umem_rwsem);
rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
- mr_leaf_free, imr);
+ mr_leaf_free, true, imr);
up_read(&ctx->umem_rwsem);
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
diff --git a/drivers/irqchip/irq-renesas-h8s.c b/drivers/irqchip/irq-renesas-h8s.c
index aed31af..85234d4 100644
--- a/drivers/irqchip/irq-renesas-h8s.c
+++ b/drivers/irqchip/irq-renesas-h8s.c
@@ -12,7 +12,7 @@
#include <asm/io.h>
static void *intc_baseaddr;
-#define IPRA ((unsigned long)intc_baseaddr)
+#define IPRA (intc_baseaddr)
static const unsigned char ipr_table[] = {
0x03, 0x02, 0x01, 0x00, 0x13, 0x12, 0x11, 0x10, /* 16 - 23 */
@@ -34,7 +34,7 @@ static const unsigned char ipr_table[] = {
static void h8s_disable_irq(struct irq_data *data)
{
int pos;
- unsigned int addr;
+ void __iomem *addr;
unsigned short pri;
int irq = data->irq;
@@ -48,7 +48,7 @@ static void h8s_disable_irq(struct irq_data *data)
static void h8s_enable_irq(struct irq_data *data)
{
int pos;
- unsigned int addr;
+ void __iomem *addr;
unsigned short pri;
int irq = data->irq;
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 17bf109..f6e0a8b 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -1,7 +1,8 @@
config BCACHE
tristate "Block device as cache"
- ---help---
+ select CRC64
+ help
Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs.
@@ -10,7 +11,7 @@
config BCACHE_DEBUG
bool "Bcache debugging"
depends on BCACHE
- ---help---
+ help
Don't select this option unless you're a developer
Enables extra debugging tools, allows expensive runtime checks to be
@@ -20,7 +21,7 @@
bool "Debug closures"
depends on BCACHE
select DEBUG_FS
- ---help---
+ help
Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous
operations that get stuck.
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 7fa2631..7a28232 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -87,8 +87,8 @@ void bch_rescale_priorities(struct cache_set *c, int sectors)
{
struct cache *ca;
struct bucket *b;
- unsigned next = c->nbuckets * c->sb.bucket_size / 1024;
- unsigned i;
+ unsigned int next = c->nbuckets * c->sb.bucket_size / 1024;
+ unsigned int i;
int r;
atomic_sub(sectors, &c->rescale);
@@ -169,7 +169,7 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
#define bucket_prio(b) \
({ \
- unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
+ unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
\
(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
})
@@ -244,6 +244,7 @@ static void invalidate_buckets_random(struct cache *ca)
while (!fifo_full(&ca->free_inc)) {
size_t n;
+
get_random_bytes(&n, sizeof(n));
n %= (size_t) (ca->sb.nbuckets - ca->sb.first_bucket);
@@ -301,7 +302,7 @@ do { \
static int bch_allocator_push(struct cache *ca, long bucket)
{
- unsigned i;
+ unsigned int i;
/* Prios/gens are actually the most important reserve */
if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
@@ -385,7 +386,7 @@ static int bch_allocator_thread(void *arg)
/* Allocation */
-long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
+long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait)
{
DEFINE_WAIT(w);
struct bucket *b;
@@ -421,7 +422,7 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
if (expensive_debug_checks(ca->set)) {
size_t iter;
long i;
- unsigned j;
+ unsigned int j;
for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
@@ -470,14 +471,14 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b)
void bch_bucket_free(struct cache_set *c, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
__bch_bucket_free(PTR_CACHE(c, k, i),
PTR_BUCKET(c, k, i));
}
-int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait)
{
int i;
@@ -510,10 +511,11 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
return -1;
}
-int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
+int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
struct bkey *k, int n, bool wait)
{
int ret;
+
mutex_lock(&c->bucket_lock);
ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
mutex_unlock(&c->bucket_lock);
@@ -524,8 +526,8 @@ int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
struct open_bucket {
struct list_head list;
- unsigned last_write_point;
- unsigned sectors_free;
+ unsigned int last_write_point;
+ unsigned int sectors_free;
BKEY_PADDED(key);
};
@@ -556,7 +558,7 @@ struct open_bucket {
*/
static struct open_bucket *pick_data_bucket(struct cache_set *c,
const struct bkey *search,
- unsigned write_point,
+ unsigned int write_point,
struct bkey *alloc)
{
struct open_bucket *ret, *ret_task = NULL;
@@ -595,12 +597,16 @@ static struct open_bucket *pick_data_bucket(struct cache_set *c,
*
* If s->writeback is true, will not fail.
*/
-bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
- unsigned write_point, unsigned write_prio, bool wait)
+bool bch_alloc_sectors(struct cache_set *c,
+ struct bkey *k,
+ unsigned int sectors,
+ unsigned int write_point,
+ unsigned int write_prio,
+ bool wait)
{
struct open_bucket *b;
BKEY_PADDED(key) alloc;
- unsigned i;
+ unsigned int i;
/*
* We might have to allocate a new bucket, which we can't do with a
@@ -613,7 +619,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
spin_lock(&c->data_bucket_lock);
while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
- unsigned watermark = write_prio
+ unsigned int watermark = write_prio
? RESERVE_MOVINGGC
: RESERVE_NONE;
@@ -702,6 +708,7 @@ int bch_open_buckets_alloc(struct cache_set *c)
for (i = 0; i < MAX_OPEN_BUCKETS; i++) {
struct open_bucket *b = kzalloc(sizeof(*b), GFP_KERNEL);
+
if (!b)
return -ENOMEM;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 05f82ff6..83504dd 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -252,7 +252,7 @@ struct bcache_device {
struct kobject kobj;
struct cache_set *c;
- unsigned id;
+ unsigned int id;
#define BCACHEDEVNAME_SIZE 12
char name[BCACHEDEVNAME_SIZE];
@@ -264,18 +264,19 @@ struct bcache_device {
#define BCACHE_DEV_UNLINK_DONE 2
#define BCACHE_DEV_WB_RUNNING 3
#define BCACHE_DEV_RATE_DW_RUNNING 4
- unsigned nr_stripes;
- unsigned stripe_size;
+ unsigned int nr_stripes;
+ unsigned int stripe_size;
atomic_t *stripe_sectors_dirty;
unsigned long *full_dirty_stripes;
struct bio_set bio_split;
- unsigned data_csum:1;
+ unsigned int data_csum:1;
- int (*cache_miss)(struct btree *, struct search *,
- struct bio *, unsigned);
- int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long);
+ int (*cache_miss)(struct btree *b, struct search *s,
+ struct bio *bio, unsigned int sectors);
+ int (*ioctl)(struct bcache_device *d, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
};
struct io {
@@ -284,7 +285,7 @@ struct io {
struct list_head lru;
unsigned long jiffies;
- unsigned sequential;
+ unsigned int sequential;
sector_t last;
};
@@ -358,18 +359,18 @@ struct cached_dev {
struct cache_accounting accounting;
/* The rest of this all shows up in sysfs */
- unsigned sequential_cutoff;
- unsigned readahead;
+ unsigned int sequential_cutoff;
+ unsigned int readahead;
- unsigned io_disable:1;
- unsigned verify:1;
- unsigned bypass_torture_test:1;
+ unsigned int io_disable:1;
+ unsigned int verify:1;
+ unsigned int bypass_torture_test:1;
- unsigned partial_stripes_expensive:1;
- unsigned writeback_metadata:1;
- unsigned writeback_running:1;
+ unsigned int partial_stripes_expensive:1;
+ unsigned int writeback_metadata:1;
+ unsigned int writeback_running:1;
unsigned char writeback_percent;
- unsigned writeback_delay;
+ unsigned int writeback_delay;
uint64_t writeback_rate_target;
int64_t writeback_rate_proportional;
@@ -377,16 +378,16 @@ struct cached_dev {
int64_t writeback_rate_integral_scaled;
int32_t writeback_rate_change;
- unsigned writeback_rate_update_seconds;
- unsigned writeback_rate_i_term_inverse;
- unsigned writeback_rate_p_term_inverse;
- unsigned writeback_rate_minimum;
+ unsigned int writeback_rate_update_seconds;
+ unsigned int writeback_rate_i_term_inverse;
+ unsigned int writeback_rate_p_term_inverse;
+ unsigned int writeback_rate_minimum;
enum stop_on_failure stop_when_cache_set_failed;
#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
atomic_t io_errors;
- unsigned error_limit;
- unsigned offline_seconds;
+ unsigned int error_limit;
+ unsigned int offline_seconds;
char backing_dev_name[BDEVNAME_SIZE];
};
@@ -447,7 +448,7 @@ struct cache {
* until a gc finishes - otherwise we could pointlessly burn a ton of
* cpu
*/
- unsigned invalidate_needs_gc;
+ unsigned int invalidate_needs_gc;
bool discard; /* Get rid of? */
@@ -472,7 +473,7 @@ struct gc_stat {
size_t nkeys;
uint64_t data; /* sectors */
- unsigned in_use; /* percent */
+ unsigned int in_use; /* percent */
};
/*
@@ -518,7 +519,7 @@ struct cache_set {
int caches_loaded;
struct bcache_device **devices;
- unsigned devices_max_used;
+ unsigned int devices_max_used;
atomic_t attached_dev_nr;
struct list_head cached_devs;
uint64_t cached_dev_sectors;
@@ -548,7 +549,7 @@ struct cache_set {
* Default number of pages for a new btree node - may be less than a
* full bucket
*/
- unsigned btree_pages;
+ unsigned int btree_pages;
/*
* Lists of struct btrees; lru is the list for structs that have memory
@@ -571,7 +572,7 @@ struct cache_set {
struct list_head btree_cache_freed;
/* Number of elements in btree_cache + btree_cache_freeable lists */
- unsigned btree_cache_used;
+ unsigned int btree_cache_used;
/*
* If we need to allocate memory for a new btree node and that
@@ -613,8 +614,8 @@ struct cache_set {
uint16_t min_prio;
/*
- * max(gen - last_gc) for all buckets. When it gets too big we have to gc
- * to keep gens from wrapping around.
+ * max(gen - last_gc) for all buckets. When it gets too big we have to
+ * gc to keep gens from wrapping around.
*/
uint8_t need_gc;
struct gc_stat gc_stats;
@@ -649,7 +650,7 @@ struct cache_set {
struct mutex verify_lock;
#endif
- unsigned nr_uuids;
+ unsigned int nr_uuids;
struct uuid_entry *uuids;
BKEY_PADDED(uuid_bucket);
struct closure uuid_write;
@@ -670,12 +671,12 @@ struct cache_set {
struct journal journal;
#define CONGESTED_MAX 1024
- unsigned congested_last_us;
+ unsigned int congested_last_us;
atomic_t congested;
/* The rest of this all shows up in sysfs */
- unsigned congested_read_threshold_us;
- unsigned congested_write_threshold_us;
+ unsigned int congested_read_threshold_us;
+ unsigned int congested_write_threshold_us;
struct time_stats btree_gc_time;
struct time_stats btree_split_time;
@@ -694,16 +695,16 @@ struct cache_set {
ON_ERROR_PANIC,
} on_error;
#define DEFAULT_IO_ERROR_LIMIT 8
- unsigned error_limit;
- unsigned error_decay;
+ unsigned int error_limit;
+ unsigned int error_decay;
unsigned short journal_delay_ms;
bool expensive_debug_checks;
- unsigned verify:1;
- unsigned key_merging_disabled:1;
- unsigned gc_always_rewrite:1;
- unsigned shrinker_disabled:1;
- unsigned copy_gc_enabled:1;
+ unsigned int verify:1;
+ unsigned int key_merging_disabled:1;
+ unsigned int gc_always_rewrite:1;
+ unsigned int shrinker_disabled:1;
+ unsigned int copy_gc_enabled:1;
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
@@ -712,7 +713,7 @@ struct cache_set {
};
struct bbio {
- unsigned submit_time_us;
+ unsigned int submit_time_us;
union {
struct bkey key;
uint64_t _pad[3];
@@ -729,10 +730,10 @@ struct bbio {
#define btree_bytes(c) ((c)->btree_pages * PAGE_SIZE)
#define btree_blocks(b) \
- ((unsigned) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
+ ((unsigned int) (KEY_SIZE(&b->key) >> (b)->c->block_bits))
#define btree_default_blocks(c) \
- ((unsigned) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
+ ((unsigned int) ((PAGE_SECTORS * (c)->btree_pages) >> (c)->block_bits))
#define bucket_pages(c) ((c)->sb.bucket_size / PAGE_SECTORS)
#define bucket_bytes(c) ((c)->sb.bucket_size << 9)
@@ -761,21 +762,21 @@ static inline sector_t bucket_remainder(struct cache_set *c, sector_t s)
static inline struct cache *PTR_CACHE(struct cache_set *c,
const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
return c->cache[PTR_DEV(k, ptr)];
}
static inline size_t PTR_BUCKET_NR(struct cache_set *c,
const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
return sector_to_bucket(c, PTR_OFFSET(k, ptr));
}
static inline struct bucket *PTR_BUCKET(struct cache_set *c,
const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
}
@@ -783,17 +784,18 @@ static inline struct bucket *PTR_BUCKET(struct cache_set *c,
static inline uint8_t gen_after(uint8_t a, uint8_t b)
{
uint8_t r = a - b;
+
return r > 128U ? 0 : r;
}
static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
- unsigned i)
+ unsigned int i)
{
return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
}
static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
- unsigned i)
+ unsigned int i)
{
return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
}
@@ -879,16 +881,16 @@ static inline uint8_t bucket_gc_gen(struct bucket *b)
#define BUCKET_GC_GEN_MAX 96U
#define kobj_attribute_write(n, fn) \
- static struct kobj_attribute ksysfs_##n = __ATTR(n, S_IWUSR, NULL, fn)
+ static struct kobj_attribute ksysfs_##n = __ATTR(n, 0200, NULL, fn)
#define kobj_attribute_rw(n, show, store) \
static struct kobj_attribute ksysfs_##n = \
- __ATTR(n, S_IWUSR|S_IRUSR, show, store)
+ __ATTR(n, 0600, show, store)
static inline void wake_up_allocators(struct cache_set *c)
{
struct cache *ca;
- unsigned i;
+ unsigned int i;
for_each_cache(ca, c, i)
wake_up_process(ca->alloc_thread);
@@ -924,40 +926,43 @@ static inline void wait_for_kthread_stop(void)
/* Forward declarations */
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
-void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
-void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
- blk_status_t, const char *);
-void bch_bbio_endio(struct cache_set *, struct bio *, blk_status_t,
- const char *);
-void bch_bbio_free(struct bio *, struct cache_set *);
-struct bio *bch_bbio_alloc(struct cache_set *);
+void bch_count_io_errors(struct cache *ca, blk_status_t error,
+ int is_read, const char *m);
+void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
+ blk_status_t error, const char *m);
+void bch_bbio_endio(struct cache_set *c, struct bio *bio,
+ blk_status_t error, const char *m);
+void bch_bbio_free(struct bio *bio, struct cache_set *c);
+struct bio *bch_bbio_alloc(struct cache_set *c);
-void __bch_submit_bbio(struct bio *, struct cache_set *);
-void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
+void __bch_submit_bbio(struct bio *bio, struct cache_set *c);
+void bch_submit_bbio(struct bio *bio, struct cache_set *c,
+ struct bkey *k, unsigned int ptr);
-uint8_t bch_inc_gen(struct cache *, struct bucket *);
-void bch_rescale_priorities(struct cache_set *, int);
+uint8_t bch_inc_gen(struct cache *ca, struct bucket *b);
+void bch_rescale_priorities(struct cache_set *c, int sectors);
-bool bch_can_invalidate_bucket(struct cache *, struct bucket *);
-void __bch_invalidate_one_bucket(struct cache *, struct bucket *);
+bool bch_can_invalidate_bucket(struct cache *ca, struct bucket *b);
+void __bch_invalidate_one_bucket(struct cache *ca, struct bucket *b);
-void __bch_bucket_free(struct cache *, struct bucket *);
-void bch_bucket_free(struct cache_set *, struct bkey *);
+void __bch_bucket_free(struct cache *ca, struct bucket *b);
+void bch_bucket_free(struct cache_set *c, struct bkey *k);
-long bch_bucket_alloc(struct cache *, unsigned, bool);
-int __bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
-int bch_bucket_alloc_set(struct cache_set *, unsigned,
- struct bkey *, int, bool);
-bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
- unsigned, unsigned, bool);
+long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait);
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
+ struct bkey *k, int n, bool wait);
+int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve,
+ struct bkey *k, int n, bool wait);
+bool bch_alloc_sectors(struct cache_set *c, struct bkey *k,
+ unsigned int sectors, unsigned int write_point,
+ unsigned int write_prio, bool wait);
bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
-bool bch_cache_set_error(struct cache_set *, const char *, ...);
+bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
-void bch_prio_write(struct cache *);
-void bch_write_bdev_super(struct cached_dev *, struct closure *);
+void bch_prio_write(struct cache *ca);
+void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
extern struct workqueue_struct *bcache_wq;
extern struct mutex bch_register_lock;
@@ -969,30 +974,31 @@ extern struct kobj_type bch_cache_set_ktype;
extern struct kobj_type bch_cache_set_internal_ktype;
extern struct kobj_type bch_cache_ktype;
-void bch_cached_dev_release(struct kobject *);
-void bch_flash_dev_release(struct kobject *);
-void bch_cache_set_release(struct kobject *);
-void bch_cache_release(struct kobject *);
+void bch_cached_dev_release(struct kobject *kobj);
+void bch_flash_dev_release(struct kobject *kobj);
+void bch_cache_set_release(struct kobject *kobj);
+void bch_cache_release(struct kobject *kobj);
-int bch_uuid_write(struct cache_set *);
-void bcache_write_super(struct cache_set *);
+int bch_uuid_write(struct cache_set *c);
+void bcache_write_super(struct cache_set *c);
int bch_flash_dev_create(struct cache_set *c, uint64_t size);
-int bch_cached_dev_attach(struct cached_dev *, struct cache_set *, uint8_t *);
-void bch_cached_dev_detach(struct cached_dev *);
-void bch_cached_dev_run(struct cached_dev *);
-void bcache_device_stop(struct bcache_device *);
+int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
+ uint8_t *set_uuid);
+void bch_cached_dev_detach(struct cached_dev *dc);
+void bch_cached_dev_run(struct cached_dev *dc);
+void bcache_device_stop(struct bcache_device *d);
-void bch_cache_set_unregister(struct cache_set *);
-void bch_cache_set_stop(struct cache_set *);
+void bch_cache_set_unregister(struct cache_set *c);
+void bch_cache_set_stop(struct cache_set *c);
-struct cache_set *bch_cache_set_alloc(struct cache_sb *);
-void bch_btree_cache_free(struct cache_set *);
-int bch_btree_cache_alloc(struct cache_set *);
-void bch_moving_init_cache_set(struct cache_set *);
-int bch_open_buckets_alloc(struct cache_set *);
-void bch_open_buckets_free(struct cache_set *);
+struct cache_set *bch_cache_set_alloc(struct cache_sb *sb);
+void bch_btree_cache_free(struct cache_set *c);
+int bch_btree_cache_alloc(struct cache_set *c);
+void bch_moving_init_cache_set(struct cache_set *c);
+int bch_open_buckets_alloc(struct cache_set *c);
+void bch_open_buckets_free(struct cache_set *c);
int bch_cache_allocator_start(struct cache *ca);
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 596c93b4..8f07fa6 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -18,31 +18,31 @@
#ifdef CONFIG_BCACHE_DEBUG
-void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
{
struct bkey *k, *next;
for (k = i->start; k < bset_bkey_last(i); k = next) {
next = bkey_next(k);
- printk(KERN_ERR "block %u key %u/%u: ", set,
- (unsigned) ((u64 *) k - i->d), i->keys);
+ pr_err("block %u key %u/%u: ", set,
+ (unsigned int) ((u64 *) k - i->d), i->keys);
if (b->ops->key_dump)
b->ops->key_dump(b, k);
else
- printk("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
+ pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
if (next < bset_bkey_last(i) &&
bkey_cmp(k, b->ops->is_extents ?
&START_KEY(next) : next) > 0)
- printk(KERN_ERR "Key skipped backwards\n");
+ pr_err("Key skipped backwards\n");
}
}
void bch_dump_bucket(struct btree_keys *b)
{
- unsigned i;
+ unsigned int i;
console_lock();
for (i = 0; i <= b->nsets; i++)
@@ -53,7 +53,7 @@ void bch_dump_bucket(struct btree_keys *b)
int __bch_count_data(struct btree_keys *b)
{
- unsigned ret = 0;
+ unsigned int ret = 0;
struct btree_iter iter;
struct bkey *k;
@@ -128,7 +128,7 @@ static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
/* Keylists */
-int __bch_keylist_realloc(struct keylist *l, unsigned u64s)
+int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
{
size_t oldsize = bch_keylist_nkeys(l);
size_t newsize = oldsize + u64s;
@@ -180,7 +180,7 @@ void bch_keylist_pop_front(struct keylist *l)
/* Key/pointer manipulation */
void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
- unsigned i)
+ unsigned int i)
{
BUG_ON(i > KEY_PTRS(src));
@@ -194,7 +194,7 @@ void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
bool __bch_cut_front(const struct bkey *where, struct bkey *k)
{
- unsigned i, len = 0;
+ unsigned int i, len = 0;
if (bkey_cmp(where, &START_KEY(k)) <= 0)
return false;
@@ -214,7 +214,7 @@ bool __bch_cut_front(const struct bkey *where, struct bkey *k)
bool __bch_cut_back(const struct bkey *where, struct bkey *k)
{
- unsigned len = 0;
+ unsigned int len = 0;
if (bkey_cmp(where, k) >= 0)
return false;
@@ -240,9 +240,9 @@ bool __bch_cut_back(const struct bkey *where, struct bkey *k)
#define BKEY_MANTISSA_MASK ((1 << BKEY_MANTISSA_BITS) - 1)
struct bkey_float {
- unsigned exponent:BKEY_EXPONENT_BITS;
- unsigned m:BKEY_MID_BITS;
- unsigned mantissa:BKEY_MANTISSA_BITS;
+ unsigned int exponent:BKEY_EXPONENT_BITS;
+ unsigned int m:BKEY_MID_BITS;
+ unsigned int mantissa:BKEY_MANTISSA_BITS;
} __packed;
/*
@@ -311,7 +311,9 @@ void bch_btree_keys_free(struct btree_keys *b)
}
EXPORT_SYMBOL(bch_btree_keys_free);
-int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp)
+int bch_btree_keys_alloc(struct btree_keys *b,
+ unsigned int page_order,
+ gfp_t gfp)
{
struct bset_tree *t = b->set;
@@ -345,7 +347,7 @@ EXPORT_SYMBOL(bch_btree_keys_alloc);
void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
bool *expensive_debug_checks)
{
- unsigned i;
+ unsigned int i;
b->ops = ops;
b->expensive_debug_checks = expensive_debug_checks;
@@ -370,7 +372,7 @@ EXPORT_SYMBOL(bch_btree_keys_init);
* return array index next to j when does in-order traverse
* of a binary tree which is stored in a linear array
*/
-static unsigned inorder_next(unsigned j, unsigned size)
+static unsigned int inorder_next(unsigned int j, unsigned int size)
{
if (j * 2 + 1 < size) {
j = j * 2 + 1;
@@ -387,7 +389,7 @@ static unsigned inorder_next(unsigned j, unsigned size)
* return array index previous to j when does in-order traverse
* of a binary tree which is stored in a linear array
*/
-static unsigned inorder_prev(unsigned j, unsigned size)
+static unsigned int inorder_prev(unsigned int j, unsigned int size)
{
if (j * 2 < size) {
j = j * 2;
@@ -400,7 +402,8 @@ static unsigned inorder_prev(unsigned j, unsigned size)
return j;
}
-/* I have no idea why this code works... and I'm the one who wrote it
+/*
+ * I have no idea why this code works... and I'm the one who wrote it
*
* However, I do know what it does:
* Given a binary tree constructed in an array (i.e. how you normally implement
@@ -413,10 +416,12 @@ static unsigned inorder_prev(unsigned j, unsigned size)
* extra is a function of size:
* extra = (size - rounddown_pow_of_two(size - 1)) << 1;
*/
-static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra)
+static unsigned int __to_inorder(unsigned int j,
+ unsigned int size,
+ unsigned int extra)
{
- unsigned b = fls(j);
- unsigned shift = fls(size - 1) - b;
+ unsigned int b = fls(j);
+ unsigned int shift = fls(size - 1) - b;
j ^= 1U << (b - 1);
j <<= 1;
@@ -433,14 +438,16 @@ static unsigned __to_inorder(unsigned j, unsigned size, unsigned extra)
* Return the cacheline index in bset_tree->data, where j is index
* from a linear array which stores the auxiliar binary tree
*/
-static unsigned to_inorder(unsigned j, struct bset_tree *t)
+static unsigned int to_inorder(unsigned int j, struct bset_tree *t)
{
return __to_inorder(j, t->size, t->extra);
}
-static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra)
+static unsigned int __inorder_to_tree(unsigned int j,
+ unsigned int size,
+ unsigned int extra)
{
- unsigned shift;
+ unsigned int shift;
if (j > extra)
j += j - extra;
@@ -457,7 +464,7 @@ static unsigned __inorder_to_tree(unsigned j, unsigned size, unsigned extra)
* Return an index from a linear array which stores the auxiliar binary
* tree, j is the cacheline index of t->data.
*/
-static unsigned inorder_to_tree(unsigned j, struct bset_tree *t)
+static unsigned int inorder_to_tree(unsigned int j, struct bset_tree *t)
{
return __inorder_to_tree(j, t->size, t->extra);
}
@@ -468,14 +475,15 @@ void inorder_test(void)
unsigned long done = 0;
ktime_t start = ktime_get();
- for (unsigned size = 2;
+ for (unsigned int size = 2;
size < 65536000;
size++) {
- unsigned extra = (size - rounddown_pow_of_two(size - 1)) << 1;
- unsigned i = 1, j = rounddown_pow_of_two(size - 1);
+ unsigned int extra =
+ (size - rounddown_pow_of_two(size - 1)) << 1;
+ unsigned int i = 1, j = rounddown_pow_of_two(size - 1);
if (!(size % 4096))
- printk(KERN_NOTICE "loop %u, %llu per us\n", size,
+ pr_notice("loop %u, %llu per us\n", size,
done / ktime_us_delta(ktime_get(), start));
while (1) {
@@ -518,30 +526,31 @@ void inorder_test(void)
* of the previous key so we can walk backwards to it from t->tree[j]'s key.
*/
-static struct bkey *cacheline_to_bkey(struct bset_tree *t, unsigned cacheline,
- unsigned offset)
+static struct bkey *cacheline_to_bkey(struct bset_tree *t,
+ unsigned int cacheline,
+ unsigned int offset)
{
return ((void *) t->data) + cacheline * BSET_CACHELINE + offset * 8;
}
-static unsigned bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
+static unsigned int bkey_to_cacheline(struct bset_tree *t, struct bkey *k)
{
return ((void *) k - (void *) t->data) / BSET_CACHELINE;
}
-static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
- unsigned cacheline,
+static unsigned int bkey_to_cacheline_offset(struct bset_tree *t,
+ unsigned int cacheline,
struct bkey *k)
{
return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
}
-static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j)
+static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned int j)
{
return cacheline_to_bkey(t, to_inorder(j, t), t->tree[j].m);
}
-static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
+static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned int j)
{
return (void *) (((uint64_t *) tree_to_bkey(t, j)) - t->prev[j]);
}
@@ -550,7 +559,7 @@ static struct bkey *tree_to_prev_bkey(struct bset_tree *t, unsigned j)
* For the write set - the one we're currently inserting keys into - we don't
* maintain a full search tree, we just keep a simple lookup table in t->prev.
*/
-static struct bkey *table_to_bkey(struct bset_tree *t, unsigned cacheline)
+static struct bkey *table_to_bkey(struct bset_tree *t, unsigned int cacheline)
{
return cacheline_to_bkey(t, cacheline, t->prev[cacheline]);
}
@@ -576,14 +585,15 @@ static inline uint64_t shrd128(uint64_t high, uint64_t low, uint8_t shift)
* See make_bfloat() to check when most significant bit of f->exponent
* is set or not.
*/
-static inline unsigned bfloat_mantissa(const struct bkey *k,
+static inline unsigned int bfloat_mantissa(const struct bkey *k,
struct bkey_float *f)
{
const uint64_t *p = &k->low - (f->exponent >> 6);
+
return shrd128(p[-1], p[0], f->exponent & 63) & BKEY_MANTISSA_MASK;
}
-static void make_bfloat(struct bset_tree *t, unsigned j)
+static void make_bfloat(struct bset_tree *t, unsigned int j)
{
struct bkey_float *f = &t->tree[j];
struct bkey *m = tree_to_bkey(t, j);
@@ -631,7 +641,7 @@ static void make_bfloat(struct bset_tree *t, unsigned j)
static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
{
if (t != b->set) {
- unsigned j = roundup(t[-1].size,
+ unsigned int j = roundup(t[-1].size,
64 / sizeof(struct bkey_float));
t->tree = t[-1].tree + j;
@@ -686,13 +696,13 @@ void bch_bset_build_written_tree(struct btree_keys *b)
{
struct bset_tree *t = bset_tree_last(b);
struct bkey *prev = NULL, *k = t->data->start;
- unsigned j, cacheline = 1;
+ unsigned int j, cacheline = 1;
b->last_set_unwritten = 0;
bset_alloc_tree(b, t);
- t->size = min_t(unsigned,
+ t->size = min_t(unsigned int,
bkey_to_cacheline(t, bset_bkey_last(t->data)),
b->set->tree + btree_keys_cachelines(b) - t->tree);
@@ -732,7 +742,7 @@ EXPORT_SYMBOL(bch_bset_build_written_tree);
void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
{
struct bset_tree *t;
- unsigned inorder, j = 1;
+ unsigned int inorder, j = 1;
for (t = b->set; t <= bset_tree_last(b); t++)
if (k < bset_bkey_last(t->data))
@@ -779,14 +789,15 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
struct bset_tree *t,
struct bkey *k)
{
- unsigned shift = bkey_u64s(k);
- unsigned j = bkey_to_cacheline(t, k);
+ unsigned int shift = bkey_u64s(k);
+ unsigned int j = bkey_to_cacheline(t, k);
/* We're getting called from btree_split() or btree_gc, just bail out */
if (!t->size)
return;
- /* k is the key we just inserted; we need to find the entry in the
+ /*
+ * k is the key we just inserted; we need to find the entry in the
* lookup table for the first key that is strictly greater than k:
* it's either k's cacheline or the next one
*/
@@ -794,7 +805,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
table_to_bkey(t, j) <= k)
j++;
- /* Adjust all the lookup table entries, and find a new key for any that
+ /*
+ * Adjust all the lookup table entries, and find a new key for any that
* have gotten too big
*/
for (; j < t->size; j++) {
@@ -819,7 +831,8 @@ static void bch_bset_fix_lookup_table(struct btree_keys *b,
k != bset_bkey_last(t->data);
k = bkey_next(k))
if (t->size == bkey_to_cacheline(t, k)) {
- t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k);
+ t->prev[t->size] =
+ bkey_to_cacheline_offset(t, t->size, k);
t->size++;
}
}
@@ -867,10 +880,10 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where,
}
EXPORT_SYMBOL(bch_bset_insert);
-unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
+unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bkey *replace_key)
{
- unsigned status = BTREE_INSERT_STATUS_NO_INSERT;
+ unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
struct bset *i = bset_tree_last(b)->data;
struct bkey *m, *prev = NULL;
struct btree_iter iter;
@@ -922,10 +935,10 @@ struct bset_search_iter {
static struct bset_search_iter bset_search_write_set(struct bset_tree *t,
const struct bkey *search)
{
- unsigned li = 0, ri = t->size;
+ unsigned int li = 0, ri = t->size;
while (li + 1 != ri) {
- unsigned m = (li + ri) >> 1;
+ unsigned int m = (li + ri) >> 1;
if (bkey_cmp(table_to_bkey(t, m), search) > 0)
ri = m;
@@ -944,7 +957,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
{
struct bkey *l, *r;
struct bkey_float *f;
- unsigned inorder, j, n = 1;
+ unsigned int inorder, j, n = 1;
do {
/*
@@ -958,7 +971,8 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
* p = 0;
* but a branch instruction is avoided.
*/
- unsigned p = n << 4;
+ unsigned int p = n << 4;
+
p &= ((int) (p - t->size)) >> 31;
prefetch(&t->tree[p]);
@@ -978,7 +992,7 @@ static struct bset_search_iter bset_search_tree(struct bset_tree *t,
* to work - that's done in make_bfloat()
*/
if (likely(f->exponent != 127))
- n = j * 2 + (((unsigned)
+ n = j * 2 + (((unsigned int)
(f->mantissa -
bfloat_mantissa(search, f))) >> 31);
else
@@ -1109,6 +1123,7 @@ static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
struct bset_tree *start)
{
struct bkey *ret = NULL;
+
iter->size = ARRAY_SIZE(iter->data);
iter->used = 0;
@@ -1184,7 +1199,8 @@ void bch_bset_sort_state_free(struct bset_sort_state *state)
mempool_exit(&state->pool);
}
-int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
+int bch_bset_sort_state_init(struct bset_sort_state *state,
+ unsigned int page_order)
{
spin_lock_init(&state->time.lock);
@@ -1237,7 +1253,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
}
static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
- unsigned start, unsigned order, bool fixup,
+ unsigned int start, unsigned int order, bool fixup,
struct bset_sort_state *state)
{
uint64_t start_time;
@@ -1288,7 +1304,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
bch_time_stats_update(&state->time, start_time);
}
-void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
+void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
struct bset_sort_state *state)
{
size_t order = b->page_order, keys = 0;
@@ -1298,7 +1314,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
if (start) {
- unsigned i;
+ unsigned int i;
for (i = start; i <= b->nsets; i++)
keys += b->set[i].data->keys;
@@ -1323,8 +1339,8 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
struct bset_sort_state *state)
{
uint64_t start_time = local_clock();
-
struct btree_iter iter;
+
bch_btree_iter_init(b, &iter, NULL);
btree_mergesort(b, new->set->data, &iter, false, true);
@@ -1338,7 +1354,7 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
{
- unsigned crit = SORT_CRIT;
+ unsigned int crit = SORT_CRIT;
int i;
/* Don't sort if nothing to do */
@@ -1367,7 +1383,7 @@ EXPORT_SYMBOL(bch_btree_sort_lazy);
void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i <= b->nsets; i++) {
struct bset_tree *t = &b->set[i];
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index b867f22..bac76aa 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -163,10 +163,10 @@ struct bset_tree {
*/
/* size of the binary tree and prev array */
- unsigned size;
+ unsigned int size;
/* function of size - precalculated for to_inorder() */
- unsigned extra;
+ unsigned int extra;
/* copy of the last key in the set */
struct bkey end;
@@ -187,18 +187,25 @@ struct bset_tree {
};
struct btree_keys_ops {
- bool (*sort_cmp)(struct btree_iter_set,
- struct btree_iter_set);
- struct bkey *(*sort_fixup)(struct btree_iter *, struct bkey *);
- bool (*insert_fixup)(struct btree_keys *, struct bkey *,
- struct btree_iter *, struct bkey *);
- bool (*key_invalid)(struct btree_keys *,
- const struct bkey *);
- bool (*key_bad)(struct btree_keys *, const struct bkey *);
- bool (*key_merge)(struct btree_keys *,
- struct bkey *, struct bkey *);
- void (*key_to_text)(char *, size_t, const struct bkey *);
- void (*key_dump)(struct btree_keys *, const struct bkey *);
+ bool (*sort_cmp)(struct btree_iter_set l,
+ struct btree_iter_set r);
+ struct bkey *(*sort_fixup)(struct btree_iter *iter,
+ struct bkey *tmp);
+ bool (*insert_fixup)(struct btree_keys *b,
+ struct bkey *insert,
+ struct btree_iter *iter,
+ struct bkey *replace_key);
+ bool (*key_invalid)(struct btree_keys *bk,
+ const struct bkey *k);
+ bool (*key_bad)(struct btree_keys *bk,
+ const struct bkey *k);
+ bool (*key_merge)(struct btree_keys *bk,
+ struct bkey *l, struct bkey *r);
+ void (*key_to_text)(char *buf,
+ size_t size,
+ const struct bkey *k);
+ void (*key_dump)(struct btree_keys *keys,
+ const struct bkey *k);
/*
* Only used for deciding whether to use START_KEY(k) or just the key
@@ -211,7 +218,7 @@ struct btree_keys {
const struct btree_keys_ops *ops;
uint8_t page_order;
uint8_t nsets;
- unsigned last_set_unwritten:1;
+ unsigned int last_set_unwritten:1;
bool *expensive_debug_checks;
/*
@@ -239,12 +246,14 @@ static inline bool bkey_written(struct btree_keys *b, struct bkey *k)
return !b->last_set_unwritten || k < b->set[b->nsets].data->start;
}
-static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i)
+static inline unsigned int bset_byte_offset(struct btree_keys *b,
+ struct bset *i)
{
return ((size_t) i) - ((size_t) b->set->data);
}
-static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i)
+static inline unsigned int bset_sector_offset(struct btree_keys *b,
+ struct bset *i)
{
return bset_byte_offset(b, i) >> 9;
}
@@ -273,25 +282,27 @@ static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b)
}
static inline struct bset *bset_next_set(struct btree_keys *b,
- unsigned block_bytes)
+ unsigned int block_bytes)
{
struct bset *i = bset_tree_last(b)->data;
return ((void *) i) + roundup(set_bytes(i), block_bytes);
}
-void bch_btree_keys_free(struct btree_keys *);
-int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t);
-void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
- bool *);
+void bch_btree_keys_free(struct btree_keys *b);
+int bch_btree_keys_alloc(struct btree_keys *b, unsigned int page_order,
+ gfp_t gfp);
+void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
+ bool *expensive_debug_checks);
-void bch_bset_init_next(struct btree_keys *, struct bset *, uint64_t);
-void bch_bset_build_written_tree(struct btree_keys *);
-void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *);
-bool bch_bkey_try_merge(struct btree_keys *, struct bkey *, struct bkey *);
-void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *);
-unsigned bch_btree_insert_key(struct btree_keys *, struct bkey *,
- struct bkey *);
+void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic);
+void bch_bset_build_written_tree(struct btree_keys *b);
+void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k);
+bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r);
+void bch_bset_insert(struct btree_keys *b, struct bkey *where,
+ struct bkey *insert);
+unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
+ struct bkey *replace_key);
enum {
BTREE_INSERT_STATUS_NO_INSERT = 0,
@@ -313,18 +324,21 @@ struct btree_iter {
} data[MAX_BSETS];
};
-typedef bool (*ptr_filter_fn)(struct btree_keys *, const struct bkey *);
+typedef bool (*ptr_filter_fn)(struct btree_keys *b, const struct bkey *k);
-struct bkey *bch_btree_iter_next(struct btree_iter *);
-struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
- struct btree_keys *, ptr_filter_fn);
+struct bkey *bch_btree_iter_next(struct btree_iter *iter);
+struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
+ struct btree_keys *b,
+ ptr_filter_fn fn);
-void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
-struct bkey *bch_btree_iter_init(struct btree_keys *, struct btree_iter *,
- struct bkey *);
+void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
+ struct bkey *end);
+struct bkey *bch_btree_iter_init(struct btree_keys *b,
+ struct btree_iter *iter,
+ struct bkey *search);
-struct bkey *__bch_bset_search(struct btree_keys *, struct bset_tree *,
- const struct bkey *);
+struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
+ const struct bkey *search);
/*
* Returns the first key that is strictly greater than search
@@ -349,21 +363,23 @@ static inline struct bkey *bch_bset_search(struct btree_keys *b,
struct bset_sort_state {
mempool_t pool;
- unsigned page_order;
- unsigned crit_factor;
+ unsigned int page_order;
+ unsigned int crit_factor;
struct time_stats time;
};
-void bch_bset_sort_state_free(struct bset_sort_state *);
-int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
-void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
-void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
- struct bset_sort_state *);
-void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *,
- struct bset_sort_state *);
-void bch_btree_sort_partial(struct btree_keys *, unsigned,
- struct bset_sort_state *);
+void bch_bset_sort_state_free(struct bset_sort_state *state);
+int bch_bset_sort_state_init(struct bset_sort_state *state,
+ unsigned int page_order);
+void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state);
+void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
+ struct bset_sort_state *state);
+void bch_btree_sort_and_fix_extents(struct btree_keys *b,
+ struct btree_iter *iter,
+ struct bset_sort_state *state);
+void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
+ struct bset_sort_state *state);
static inline void bch_btree_sort(struct btree_keys *b,
struct bset_sort_state *state)
@@ -377,13 +393,13 @@ struct bset_stats {
size_t floats, failed;
};
-void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
+void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state);
/* Bkey utility code */
#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
-static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
+static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx)
{
return bkey_idx(i->start, idx);
}
@@ -401,10 +417,10 @@ static __always_inline int64_t bkey_cmp(const struct bkey *l,
: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
}
-void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
- unsigned);
-bool __bch_cut_front(const struct bkey *, struct bkey *);
-bool __bch_cut_back(const struct bkey *, struct bkey *);
+void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
+ unsigned int i);
+bool __bch_cut_front(const struct bkey *where, struct bkey *k);
+bool __bch_cut_back(const struct bkey *where, struct bkey *k);
static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
{
@@ -522,18 +538,20 @@ static inline size_t bch_keylist_bytes(struct keylist *l)
return bch_keylist_nkeys(l) * sizeof(uint64_t);
}
-struct bkey *bch_keylist_pop(struct keylist *);
-void bch_keylist_pop_front(struct keylist *);
-int __bch_keylist_realloc(struct keylist *, unsigned);
+struct bkey *bch_keylist_pop(struct keylist *l);
+void bch_keylist_pop_front(struct keylist *l);
+int __bch_keylist_realloc(struct keylist *l, unsigned int u64s);
/* Debug stuff */
#ifdef CONFIG_BCACHE_DEBUG
-int __bch_count_data(struct btree_keys *);
-void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
-void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
-void bch_dump_bucket(struct btree_keys *);
+int __bch_count_data(struct btree_keys *b);
+void __printf(2, 3) __bch_check_keys(struct btree_keys *b,
+ const char *fmt,
+ ...);
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set);
+void bch_dump_bucket(struct btree_keys *b);
#else
@@ -541,7 +559,7 @@ static inline int __bch_count_data(struct btree_keys *b) { return -1; }
static inline void __printf(2, 3)
__bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
static inline void bch_dump_bucket(struct btree_keys *b) {}
-void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set);
#endif
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index c19f771..e7d4817 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -183,7 +183,7 @@ static void bch_btree_init_next(struct btree *b)
void bkey_put(struct cache_set *c, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i))
@@ -287,6 +287,7 @@ void bch_btree_node_read_done(struct btree *b)
static void btree_node_read_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
+
closure_put(cl);
}
@@ -435,7 +436,10 @@ static void do_btree_node_write(struct btree *b)
continue_at(cl, btree_node_write_done, NULL);
} else {
- /* No problem for multipage bvec since the bio is just allocated */
+ /*
+ * No problem for multipage bvec since the bio is
+ * just allocated
+ */
b->bio->bi_vcnt = 0;
bch_bio_map(b->bio, i);
@@ -479,7 +483,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
void bch_btree_node_write(struct btree *b, struct closure *parent)
{
- unsigned nsets = b->keys.nsets;
+ unsigned int nsets = b->keys.nsets;
lockdep_assert_held(&b->lock);
@@ -581,7 +585,7 @@ static void mca_bucket_free(struct btree *b)
list_move(&b->list, &b->c->btree_cache_freeable);
}
-static unsigned btree_order(struct bkey *k)
+static unsigned int btree_order(struct bkey *k)
{
return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1);
}
@@ -589,7 +593,7 @@ static unsigned btree_order(struct bkey *k)
static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
{
if (!bch_btree_keys_alloc(&b->keys,
- max_t(unsigned,
+ max_t(unsigned int,
ilog2(b->c->btree_pages),
btree_order(k)),
gfp)) {
@@ -604,6 +608,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
struct bkey *k, gfp_t gfp)
{
struct btree *b = kzalloc(sizeof(struct btree), gfp);
+
if (!b)
return NULL;
@@ -620,7 +625,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
return b;
}
-static int mca_reap(struct btree *b, unsigned min_order, bool flush)
+static int mca_reap(struct btree *b, unsigned int min_order, bool flush)
{
struct closure cl;
@@ -746,6 +751,7 @@ void bch_btree_cache_free(struct cache_set *c)
{
struct btree *b;
struct closure cl;
+
closure_init_stack(&cl);
if (c->shrink.list.next)
@@ -786,7 +792,7 @@ void bch_btree_cache_free(struct cache_set *c)
int bch_btree_cache_alloc(struct cache_set *c)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < mca_reserve(c); i++)
if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL))
@@ -1124,6 +1130,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
struct btree_op *op)
{
struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
+
if (!IS_ERR_OR_NULL(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1136,7 +1143,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
static void make_btree_freeing_key(struct btree *b, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
mutex_lock(&b->c->bucket_lock);
@@ -1157,7 +1164,7 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op)
{
struct cache_set *c = b->c;
struct cache *ca;
- unsigned i, reserve = (c->root->level - b->level) * 2 + 1;
+ unsigned int i, reserve = (c->root->level - b->level) * 2 + 1;
mutex_lock(&c->bucket_lock);
@@ -1181,7 +1188,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
struct bkey *k)
{
uint8_t stale = 0;
- unsigned i;
+ unsigned int i;
struct bucket *g;
/*
@@ -1219,7 +1226,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
SET_GC_MARK(g, GC_MARK_RECLAIMABLE);
/* guard against overflow */
- SET_GC_SECTORS_USED(g, min_t(unsigned,
+ SET_GC_SECTORS_USED(g, min_t(unsigned int,
GC_SECTORS_USED(g) + KEY_SIZE(k),
MAX_GC_SECTORS_USED));
@@ -1233,7 +1240,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
@@ -1259,7 +1266,7 @@ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)
static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
{
uint8_t stale = 0;
- unsigned keys = 0, good_keys = 0;
+ unsigned int keys = 0, good_keys = 0;
struct bkey *k;
struct btree_iter iter;
struct bset_tree *t;
@@ -1302,16 +1309,18 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
struct gc_merge_info {
struct btree *b;
- unsigned keys;
+ unsigned int keys;
};
-static int bch_btree_insert_node(struct btree *, struct btree_op *,
- struct keylist *, atomic_t *, struct bkey *);
+static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
+ struct keylist *insert_keys,
+ atomic_t *journal_ref,
+ struct bkey *replace_key);
static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
struct gc_stat *gc, struct gc_merge_info *r)
{
- unsigned i, nodes = 0, keys = 0, blocks;
+ unsigned int i, nodes = 0, keys = 0, blocks;
struct btree *new_nodes[GC_MERGE_NODES];
struct keylist keylist;
struct closure cl;
@@ -1511,11 +1520,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
return -EINTR;
}
-static unsigned btree_gc_count_keys(struct btree *b)
+static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
struct btree_iter iter;
- unsigned ret = 0;
+ unsigned int ret = 0;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
ret += bkey_u64s(k);
@@ -1678,7 +1687,7 @@ static void btree_gc_start(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
if (!c->gc_mark_valid)
return;
@@ -1704,7 +1713,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
{
struct bucket *b;
struct cache *ca;
- unsigned i;
+ unsigned int i;
mutex_lock(&c->bucket_lock);
@@ -1722,7 +1731,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
struct bcache_device *d = c->devices[i];
struct cached_dev *dc;
struct keybuf_key *w, *n;
- unsigned j;
+ unsigned int j;
if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
continue;
@@ -1814,7 +1823,7 @@ static void bch_btree_gc(struct cache_set *c)
static bool gc_should_run(struct cache_set *c)
{
struct cache *ca;
- unsigned i;
+ unsigned int i;
for_each_cache(ca, c, i)
if (ca->invalidate_needs_gc)
@@ -1905,7 +1914,7 @@ void bch_initial_gc_finish(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
bch_btree_gc_finish(c);
@@ -1945,7 +1954,7 @@ void bch_initial_gc_finish(struct cache_set *c)
static bool btree_insert_key(struct btree *b, struct bkey *k,
struct bkey *replace_key)
{
- unsigned status;
+ unsigned int status;
BUG_ON(bkey_cmp(k, &b->key) > 0);
@@ -2044,7 +2053,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
if (split) {
- unsigned keys = 0;
+ unsigned int keys = 0;
trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
@@ -2222,10 +2231,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
rw_lock(true, b, b->level);
if (b->key.ptr[0] != btree_ptr ||
- b->seq != seq + 1) {
+ b->seq != seq + 1) {
op->lock = b->level;
goto out;
- }
+ }
}
SET_KEY_PTRS(check_key, 1);
@@ -2300,7 +2309,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
void bch_btree_set_root(struct btree *b)
{
- unsigned i;
+ unsigned int i;
struct closure cl;
closure_init_stack(&cl);
@@ -2412,7 +2421,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
struct refill {
struct btree_op op;
- unsigned nr_found;
+ unsigned int nr_found;
struct keybuf *buf;
struct bkey *end;
keybuf_pred_fn *pred;
@@ -2488,6 +2497,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
if (!RB_EMPTY_ROOT(&buf->keys)) {
struct keybuf_key *w;
+
w = RB_FIRST(&buf->keys, struct keybuf_key, node);
buf->start = START_KEY(&w->key);
@@ -2519,6 +2529,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
{
bool ret = false;
struct keybuf_key *p, *w, s;
+
s.key = *start;
if (bkey_cmp(end, &buf->start) <= 0 ||
@@ -2545,6 +2556,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
{
struct keybuf_key *w;
+
spin_lock(&buf->lock);
w = RB_FIRST(&buf->keys, struct keybuf_key, node);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 68e9d92..a68d6c5 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -184,7 +184,7 @@ static inline struct bset *btree_bset_last(struct btree *b)
return bset_tree_last(&b->keys)->data;
}
-static inline unsigned bset_block_offset(struct btree *b, struct bset *i)
+static inline unsigned int bset_block_offset(struct btree *b, struct bset *i)
{
return bset_sector_offset(&b->keys, i) >> b->c->block_bits;
}
@@ -213,7 +213,7 @@ struct btree_op {
/* Btree level at which we start taking write locks */
short lock;
- unsigned insert_collision:1;
+ unsigned int insert_collision:1;
};
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
@@ -238,26 +238,28 @@ static inline void rw_unlock(bool w, struct btree *b)
(w ? up_write : up_read)(&b->lock);
}
-void bch_btree_node_read_done(struct btree *);
-void __bch_btree_node_write(struct btree *, struct closure *);
-void bch_btree_node_write(struct btree *, struct closure *);
+void bch_btree_node_read_done(struct btree *b);
+void __bch_btree_node_write(struct btree *b, struct closure *parent);
+void bch_btree_node_write(struct btree *b, struct closure *parent);
-void bch_btree_set_root(struct btree *);
-struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
- int, bool, struct btree *);
-struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
- struct bkey *, int, bool, struct btree *);
+void bch_btree_set_root(struct btree *b);
+struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
+ int level, bool wait,
+ struct btree *parent);
+struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
+ struct bkey *k, int level, bool write,
+ struct btree *parent);
-int bch_btree_insert_check_key(struct btree *, struct btree_op *,
- struct bkey *);
-int bch_btree_insert(struct cache_set *, struct keylist *,
- atomic_t *, struct bkey *);
+int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
+ struct bkey *check_key);
+int bch_btree_insert(struct cache_set *c, struct keylist *keys,
+ atomic_t *journal_ref, struct bkey *replace_key);
-int bch_gc_thread_start(struct cache_set *);
-void bch_initial_gc_finish(struct cache_set *);
-void bch_moving_gc(struct cache_set *);
-int bch_btree_check(struct cache_set *);
-void bch_initial_mark_key(struct cache_set *, int, struct bkey *);
+int bch_gc_thread_start(struct cache_set *c);
+void bch_initial_gc_finish(struct cache_set *c);
+void bch_moving_gc(struct cache_set *c);
+int bch_btree_check(struct cache_set *c);
+void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k);
static inline void wake_up_gc(struct cache_set *c)
{
@@ -272,9 +274,9 @@ static inline void wake_up_gc(struct cache_set *c)
#define MAP_END_KEY 1
-typedef int (btree_map_nodes_fn)(struct btree_op *, struct btree *);
-int __bch_btree_map_nodes(struct btree_op *, struct cache_set *,
- struct bkey *, btree_map_nodes_fn *, int);
+typedef int (btree_map_nodes_fn)(struct btree_op *b_op, struct btree *b);
+int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
+ struct bkey *from, btree_map_nodes_fn *fn, int flags);
static inline int bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
struct bkey *from, btree_map_nodes_fn *fn)
@@ -290,21 +292,23 @@ static inline int bch_btree_map_leaf_nodes(struct btree_op *op,
return __bch_btree_map_nodes(op, c, from, fn, MAP_LEAF_NODES);
}
-typedef int (btree_map_keys_fn)(struct btree_op *, struct btree *,
- struct bkey *);
-int bch_btree_map_keys(struct btree_op *, struct cache_set *,
- struct bkey *, btree_map_keys_fn *, int);
+typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b,
+ struct bkey *k);
+int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
+ struct bkey *from, btree_map_keys_fn *fn, int flags);
-typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
+typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k);
-void bch_keybuf_init(struct keybuf *);
-void bch_refill_keybuf(struct cache_set *, struct keybuf *,
- struct bkey *, keybuf_pred_fn *);
-bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
- struct bkey *);
-void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
-struct keybuf_key *bch_keybuf_next(struct keybuf *);
-struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
- struct bkey *, keybuf_pred_fn *);
+void bch_keybuf_init(struct keybuf *buf);
+void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
+ struct bkey *end, keybuf_pred_fn *pred);
+bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
+ struct bkey *end);
+void bch_keybuf_del(struct keybuf *buf, struct keybuf_key *w);
+struct keybuf_key *bch_keybuf_next(struct keybuf *buf);
+struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
+ struct keybuf *buf,
+ struct bkey *end,
+ keybuf_pred_fn *pred);
void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats);
#endif
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 6182536..73f5319 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Asynchronous refcounty things
*
@@ -162,12 +163,13 @@ static struct dentry *closure_debug;
static int debug_seq_show(struct seq_file *f, void *data)
{
struct closure *cl;
+
spin_lock_irq(&closure_list_lock);
list_for_each_entry(cl, &closure_list, all) {
int r = atomic_read(&cl->remaining);
- seq_printf(f, "%p: %pF -> %pf p %p r %i ",
+ seq_printf(f, "%p: %pS -> %pS p %p r %i ",
cl, (void *) cl->ip, cl->fn, cl->parent,
r & CLOSURE_REMAINING_MASK);
@@ -177,7 +179,7 @@ static int debug_seq_show(struct seq_file *f, void *data)
r & CLOSURE_RUNNING ? "R" : "");
if (r & CLOSURE_WAITING)
- seq_printf(f, " W %pF\n",
+ seq_printf(f, " W %pS\n",
(void *) cl->waiting_on);
seq_printf(f, "\n");
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 7c2c5bc..eca0d496 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -159,7 +159,7 @@ struct closure {
#define CLOSURE_MAGIC_DEAD 0xc054dead
#define CLOSURE_MAGIC_ALIVE 0xc054a11e
- unsigned magic;
+ unsigned int magic;
struct list_head all;
unsigned long ip;
unsigned long waiting_on;
@@ -289,10 +289,12 @@ static inline void closure_init_stack(struct closure *cl)
}
/**
- * closure_wake_up - wake up all closures on a wait list.
+ * closure_wake_up - wake up all closures on a wait list,
+ * with memory barrier
*/
static inline void closure_wake_up(struct closure_waitlist *list)
{
+ /* Memory barrier for the wait list */
smp_mb();
__closure_wake_up(list);
}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 12034c0..06da66b 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -67,34 +67,35 @@ void bch_btree_verify(struct btree *b)
if (inmemory->keys != sorted->keys ||
memcmp(inmemory->start,
sorted->start,
- (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
+ (void *) bset_bkey_last(inmemory) -
+ (void *) inmemory->start)) {
struct bset *i;
- unsigned j;
+ unsigned int j;
console_lock();
- printk(KERN_ERR "*** in memory:\n");
+ pr_err("*** in memory:\n");
bch_dump_bset(&b->keys, inmemory, 0);
- printk(KERN_ERR "*** read back in:\n");
+ pr_err("*** read back in:\n");
bch_dump_bset(&v->keys, sorted, 0);
for_each_written_bset(b, ondisk, i) {
- unsigned block = ((void *) i - (void *) ondisk) /
+ unsigned int block = ((void *) i - (void *) ondisk) /
block_bytes(b->c);
- printk(KERN_ERR "*** on disk block %u:\n", block);
+ pr_err("*** on disk block %u:\n", block);
bch_dump_bset(&b->keys, i, block);
}
- printk(KERN_ERR "*** block %zu not written\n",
+ pr_err("*** block %zu not written\n",
((void *) i - (void *) ondisk) / block_bytes(b->c));
for (j = 0; j < inmemory->keys; j++)
if (inmemory->d[j] != sorted->d[j])
break;
- printk(KERN_ERR "b->written %u\n", b->written);
+ pr_err("b->written %u\n", b->written);
console_unlock();
panic("verify failed at %u\n", j);
@@ -176,9 +177,9 @@ static ssize_t bch_dump_read(struct file *file, char __user *buf,
while (size) {
struct keybuf_key *w;
- unsigned bytes = min(i->bytes, size);
-
+ unsigned int bytes = min(i->bytes, size);
int err = copy_to_user(buf, i->buf, bytes);
+
if (err)
return err;
@@ -237,8 +238,8 @@ void bch_debug_init_cache_set(struct cache_set *c)
{
if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
- snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
+ snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
&cache_set_debug_ops);
}
diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h
index acc48d3..fb3d4df 100644
--- a/drivers/md/bcache/debug.h
+++ b/drivers/md/bcache/debug.h
@@ -8,8 +8,8 @@ struct cache_set;
#ifdef CONFIG_BCACHE_DEBUG
-void bch_btree_verify(struct btree *);
-void bch_data_verify(struct cached_dev *, struct bio *);
+void bch_btree_verify(struct btree *b);
+void bch_data_verify(struct cached_dev *dc, struct bio *bio);
#define expensive_debug_checks(c) ((c)->expensive_debug_checks)
#define key_merging_disabled(c) ((c)->key_merging_disabled)
@@ -27,7 +27,7 @@ static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
#endif
#ifdef CONFIG_DEBUG_FS
-void bch_debug_init_cache_set(struct cache_set *);
+void bch_debug_init_cache_set(struct cache_set *c);
#else
static inline void bch_debug_init_cache_set(struct cache_set *c) {}
#endif
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 1d09674..c809724 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -46,7 +46,7 @@ static bool bch_key_sort_cmp(struct btree_iter_set l,
static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
@@ -67,7 +67,7 @@ static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i)) {
@@ -96,7 +96,7 @@ static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
{
- unsigned i = 0;
+ unsigned int i = 0;
char *out = buf, *end = buf + size;
#define p(...) (out += scnprintf(out, end - out, __VA_ARGS__))
@@ -126,22 +126,22 @@ void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
{
struct btree *b = container_of(keys, struct btree, keys);
- unsigned j;
+ unsigned int j;
char buf[80];
bch_extent_to_text(buf, sizeof(buf), k);
- printk(" %s", buf);
+ pr_err(" %s", buf);
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
- printk(" bucket %zu", n);
+ pr_err(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
- printk(" prio %i",
+ pr_err(" prio %i",
PTR_BUCKET(b->c, k, j)->prio);
}
- printk(" %s\n", bch_ptr_status(b->c, k));
+ pr_err(" %s\n", bch_ptr_status(b->c, k));
}
/* Btree ptrs */
@@ -166,12 +166,13 @@ bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
+
return __bch_btree_ptr_invalid(b->c, k);
}
static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
{
- unsigned i;
+ unsigned int i;
char buf[80];
struct bucket *g;
@@ -204,7 +205,7 @@ static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
- unsigned i;
+ unsigned int i;
if (!bkey_cmp(k, &ZERO_KEY) ||
!KEY_PTRS(k) ||
@@ -327,13 +328,14 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
struct cache_set *c = container_of(b, struct btree, keys)->c;
uint64_t old_offset;
- unsigned old_size, sectors_found = 0;
+ unsigned int old_size, sectors_found = 0;
BUG_ON(!KEY_OFFSET(insert));
BUG_ON(!KEY_SIZE(insert));
while (1) {
struct bkey *k = bch_btree_iter_next(iter);
+
if (!k)
break;
@@ -363,7 +365,7 @@ static bool bch_extent_insert_fixup(struct btree_keys *b,
* k might have been split since we inserted/found the
* key we're replacing
*/
- unsigned i;
+ unsigned int i;
uint64_t offset = KEY_START(k) -
KEY_START(replace_key);
@@ -498,11 +500,12 @@ bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
+
return __bch_extent_invalid(b->c, k);
}
static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
- unsigned ptr)
+ unsigned int ptr)
{
struct bucket *g = PTR_BUCKET(b->c, k, ptr);
char buf[80];
@@ -534,7 +537,7 @@ static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
- unsigned i, stale;
+ unsigned int i, stale;
if (!KEY_PTRS(k) ||
bch_extent_invalid(bk, k))
@@ -574,10 +577,12 @@ static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
~((uint64_t)1 << 63);
}
-static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r)
+static bool bch_extent_merge(struct btree_keys *bk,
+ struct bkey *l,
+ struct bkey *r)
{
struct btree *b = container_of(bk, struct btree, keys);
- unsigned i;
+ unsigned int i;
if (key_merging_disabled(b->c))
return false;
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index 0cd3575..4d667e0 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -8,8 +8,8 @@ extern const struct btree_keys_ops bch_extent_keys_ops;
struct bkey;
struct cache_set;
-void bch_extent_to_text(char *, size_t, const struct bkey *);
-bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
-bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
+void bch_extent_to_text(char *buf, size_t size, const struct bkey *k);
+bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k);
+bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k);
#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 9612873..c250979 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -17,6 +17,7 @@
void bch_bbio_free(struct bio *bio, struct cache_set *c)
{
struct bbio *b = container_of(bio, struct bbio, bio);
+
mempool_free(b, &c->bio_meta);
}
@@ -42,9 +43,10 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
}
void bch_submit_bbio(struct bio *bio, struct cache_set *c,
- struct bkey *k, unsigned ptr)
+ struct bkey *k, unsigned int ptr)
{
struct bbio *b = container_of(bio, struct bbio, bio);
+
bch_bkey_copy_single_ptr(&b->key, k, ptr);
__bch_submit_bbio(bio, c);
}
@@ -52,7 +54,7 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
/* IO errors */
void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
{
- unsigned errors;
+ unsigned int errors;
WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
@@ -75,16 +77,16 @@ void bch_count_io_errors(struct cache *ca,
*/
if (ca->set->error_decay) {
- unsigned count = atomic_inc_return(&ca->io_count);
+ unsigned int count = atomic_inc_return(&ca->io_count);
while (count > ca->set->error_decay) {
- unsigned errors;
- unsigned old = count;
- unsigned new = count - ca->set->error_decay;
+ unsigned int errors;
+ unsigned int old = count;
+ unsigned int new = count - ca->set->error_decay;
/*
* First we subtract refresh from count; each time we
- * succesfully do so, we rescale the errors once:
+ * successfully do so, we rescale the errors once:
*/
count = atomic_cmpxchg(&ca->io_count, old, new);
@@ -104,7 +106,7 @@ void bch_count_io_errors(struct cache *ca,
}
if (error) {
- unsigned errors = atomic_add_return(1 << IO_ERROR_SHIFT,
+ unsigned int errors = atomic_add_return(1 << IO_ERROR_SHIFT,
&ca->io_errors);
errors >>= IO_ERROR_SHIFT;
@@ -126,18 +128,18 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
struct cache *ca = PTR_CACHE(c, &b->key, 0);
int is_read = (bio_data_dir(bio) == READ ? 1 : 0);
- unsigned threshold = op_is_write(bio_op(bio))
+ unsigned int threshold = op_is_write(bio_op(bio))
? c->congested_write_threshold_us
: c->congested_read_threshold_us;
if (threshold) {
- unsigned t = local_clock_us();
-
+ unsigned int t = local_clock_us();
int us = t - b->submit_time_us;
int congested = atomic_read(&c->congested);
if (us > (int) threshold) {
int ms = us / 1024;
+
c->congested_last_us = t;
ms = min(ms, CONGESTED_MAX + congested);
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 10748c6..6116bbf 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -28,11 +28,12 @@
static void journal_read_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
+
closure_put(cl);
}
static int journal_read_bucket(struct cache *ca, struct list_head *list,
- unsigned bucket_index)
+ unsigned int bucket_index)
{
struct journal_device *ja = &ca->journal;
struct bio *bio = &ja->bio;
@@ -40,7 +41,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
struct journal_replay *i;
struct jset *j, *data = ca->set->journal.w[0].data;
struct closure cl;
- unsigned len, left, offset = 0;
+ unsigned int len, left, offset = 0;
int ret = 0;
sector_t bucket = bucket_to_sector(ca->set, ca->sb.d[bucket_index]);
@@ -50,7 +51,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
while (offset < ca->sb.bucket_size) {
reread: left = ca->sb.bucket_size - offset;
- len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS);
+ len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS);
bio_reset(bio);
bio->bi_iter.bi_sector = bucket + offset;
@@ -154,12 +155,12 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
})
struct cache *ca;
- unsigned iter;
+ unsigned int iter;
for_each_cache(ca, c, iter) {
struct journal_device *ja = &ca->journal;
DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS);
- unsigned i, l, r, m;
+ unsigned int i, l, r, m;
uint64_t seq;
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
@@ -192,7 +193,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
l < ca->sb.njournal_buckets;
- l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
+ l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets,
+ l + 1))
if (read_bucket(l))
goto bsearch;
@@ -304,7 +306,7 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
k < bset_bkey_last(&i->j);
k = bkey_next(k))
if (!__bch_extent_invalid(c, k)) {
- unsigned j;
+ unsigned int j;
for (j = 0; j < KEY_PTRS(k); j++)
if (ptr_available(c, k, j))
@@ -492,7 +494,7 @@ static void journal_reclaim(struct cache_set *c)
struct bkey *k = &c->journal.key;
struct cache *ca;
uint64_t last_seq;
- unsigned iter, n = 0;
+ unsigned int iter, n = 0;
atomic_t p __maybe_unused;
atomic_long_inc(&c->reclaim);
@@ -526,7 +528,7 @@ static void journal_reclaim(struct cache_set *c)
for_each_cache(ca, c, iter) {
struct journal_device *ja = &ca->journal;
- unsigned next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
+ unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
/* No space available on this device */
if (next == ja->discard_idx)
@@ -580,7 +582,7 @@ static void journal_write_endio(struct bio *bio)
closure_put(&w->c->journal.io);
}
-static void journal_write(struct closure *);
+static void journal_write(struct closure *cl);
static void journal_write_done(struct closure *cl)
{
@@ -609,11 +611,12 @@ static void journal_write_unlocked(struct closure *cl)
struct cache *ca;
struct journal_write *w = c->journal.cur;
struct bkey *k = &c->journal.key;
- unsigned i, sectors = set_blocks(w->data, block_bytes(c)) *
+ unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) *
c->sb.block_size;
struct bio *bio;
struct bio_list list;
+
bio_list_init(&list);
if (!w->need_write) {
@@ -705,7 +708,7 @@ static void journal_try_write(struct cache_set *c)
}
static struct journal_write *journal_wait_for_write(struct cache_set *c,
- unsigned nkeys)
+ unsigned int nkeys)
__acquires(&c->journal.lock)
{
size_t sectors;
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index b578819..66f0fac 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -110,7 +110,7 @@ struct journal {
struct delayed_work work;
/* Number of blocks free in the bucket(s) we're currently writing to */
- unsigned blocks_free;
+ unsigned int blocks_free;
uint64_t seq;
DECLARE_FIFO(atomic_t, pin);
@@ -131,13 +131,13 @@ struct journal_device {
uint64_t seq[SB_JOURNAL_BUCKETS];
/* Journal bucket we're currently writing to */
- unsigned cur_idx;
+ unsigned int cur_idx;
/* Last journal bucket that still contains an open journal entry */
- unsigned last_idx;
+ unsigned int last_idx;
/* Next journal bucket to be discarded */
- unsigned discard_idx;
+ unsigned int discard_idx;
#define DISCARD_READY 0
#define DISCARD_IN_FLIGHT 1
@@ -167,14 +167,16 @@ struct cache_set;
struct btree_op;
struct keylist;
-atomic_t *bch_journal(struct cache_set *, struct keylist *, struct closure *);
-void bch_journal_next(struct journal *);
-void bch_journal_mark(struct cache_set *, struct list_head *);
-void bch_journal_meta(struct cache_set *, struct closure *);
-int bch_journal_read(struct cache_set *, struct list_head *);
-int bch_journal_replay(struct cache_set *, struct list_head *);
+atomic_t *bch_journal(struct cache_set *c,
+ struct keylist *keys,
+ struct closure *parent);
+void bch_journal_next(struct journal *j);
+void bch_journal_mark(struct cache_set *c, struct list_head *list);
+void bch_journal_meta(struct cache_set *c, struct closure *cl);
+int bch_journal_read(struct cache_set *c, struct list_head *list);
+int bch_journal_replay(struct cache_set *c, struct list_head *list);
-void bch_journal_free(struct cache_set *);
-int bch_journal_alloc(struct cache_set *);
+void bch_journal_free(struct cache_set *c);
+int bch_journal_alloc(struct cache_set *c);
#endif /* _BCACHE_JOURNAL_H */
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index a24c3a9..7891fb5 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -23,7 +23,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
{
struct cache_set *c = container_of(buf, struct cache_set,
moving_gc_keys);
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
@@ -38,6 +38,7 @@ static bool moving_pred(struct keybuf *buf, struct bkey *k)
static void moving_io_destructor(struct closure *cl)
{
struct moving_io *io = container_of(cl, struct moving_io, cl);
+
kfree(io);
}
@@ -186,9 +187,10 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r)
return GC_SECTORS_USED(l) < GC_SECTORS_USED(r);
}
-static unsigned bucket_heap_top(struct cache *ca)
+static unsigned int bucket_heap_top(struct cache *ca)
{
struct bucket *b;
+
return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0;
}
@@ -196,7 +198,7 @@ void bch_moving_gc(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
if (!c->copy_gc_enabled)
return;
@@ -204,9 +206,9 @@ void bch_moving_gc(struct cache_set *c)
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i) {
- unsigned sectors_to_move = 0;
- unsigned reserve_sectors = ca->sb.bucket_size *
- fifo_used(&ca->free[RESERVE_MOVINGGC]);
+ unsigned int sectors_to_move = 0;
+ unsigned int reserve_sectors = ca->sb.bucket_size *
+ fifo_used(&ca->free[RESERVE_MOVINGGC]);
ca->heap.used = 0;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 7dbe8b6..51be355 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -25,9 +25,9 @@
struct kmem_cache *bch_search_cache;
-static void bch_data_insert_start(struct closure *);
+static void bch_data_insert_start(struct closure *cl);
-static unsigned cache_mode(struct cached_dev *dc)
+static unsigned int cache_mode(struct cached_dev *dc)
{
return BDEV_CACHE_MODE(&dc->sb);
}
@@ -45,6 +45,7 @@ static void bio_csum(struct bio *bio, struct bkey *k)
bio_for_each_segment(bv, bio, iter) {
void *d = kmap(bv.bv_page) + bv.bv_offset;
+
csum = bch_crc64_update(csum, d, bv.bv_len);
kunmap(bv.bv_page);
}
@@ -98,7 +99,7 @@ static void bch_data_insert_keys(struct closure *cl)
closure_return(cl);
}
-static int bch_keylist_realloc(struct keylist *l, unsigned u64s,
+static int bch_keylist_realloc(struct keylist *l, unsigned int u64s,
struct cache_set *c)
{
size_t oldsize = bch_keylist_nkeys(l);
@@ -125,7 +126,7 @@ static void bch_data_invalidate(struct closure *cl)
bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
while (bio_sectors(bio)) {
- unsigned sectors = min(bio_sectors(bio),
+ unsigned int sectors = min(bio_sectors(bio),
1U << (KEY_SIZE_BITS - 1));
if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
@@ -135,7 +136,9 @@ static void bch_data_invalidate(struct closure *cl)
bio->bi_iter.bi_size -= sectors << 9;
bch_keylist_add(&op->insert_keys,
- &KEY(op->inode, bio->bi_iter.bi_sector, sectors));
+ &KEY(op->inode,
+ bio->bi_iter.bi_sector,
+ sectors));
}
op->insert_data_done = true;
@@ -151,7 +154,7 @@ static void bch_data_insert_error(struct closure *cl)
/*
* Our data write just errored, which means we've got a bunch of keys to
- * insert that point to data that wasn't succesfully written.
+ * insert that point to data that wasn't successfully written.
*
* We don't have to insert those keys but we still have to invalidate
* that region of the cache - so, if we just strip off all the pointers
@@ -211,7 +214,7 @@ static void bch_data_insert_start(struct closure *cl)
bio->bi_opf &= ~(REQ_PREFLUSH|REQ_FUA);
do {
- unsigned i;
+ unsigned int i;
struct bkey *k;
struct bio_set *split = &op->c->bio_split;
@@ -328,7 +331,7 @@ void bch_data_insert(struct closure *cl)
/* Congested? */
-unsigned bch_get_congested(struct cache_set *c)
+unsigned int bch_get_congested(struct cache_set *c)
{
int i;
long rand;
@@ -372,8 +375,8 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k)
static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
{
struct cache_set *c = dc->disk.c;
- unsigned mode = cache_mode(dc);
- unsigned sectors, congested = bch_get_congested(c);
+ unsigned int mode = cache_mode(dc);
+ unsigned int sectors, congested = bch_get_congested(c);
struct task_struct *task = current;
struct io *i;
@@ -469,11 +472,11 @@ struct search {
struct bio *cache_miss;
struct bcache_device *d;
- unsigned insert_bio_sectors;
- unsigned recoverable:1;
- unsigned write:1;
- unsigned read_dirty_data:1;
- unsigned cache_missed:1;
+ unsigned int insert_bio_sectors;
+ unsigned int recoverable:1;
+ unsigned int write:1;
+ unsigned int read_dirty_data:1;
+ unsigned int cache_missed:1;
unsigned long start_time;
@@ -514,20 +517,20 @@ static int cache_lookup_fn(struct btree_op *op, struct btree *b, struct bkey *k)
struct search *s = container_of(op, struct search, op);
struct bio *n, *bio = &s->bio.bio;
struct bkey *bio_key;
- unsigned ptr;
+ unsigned int ptr;
if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
return MAP_CONTINUE;
if (KEY_INODE(k) != s->iop.inode ||
KEY_START(k) > bio->bi_iter.bi_sector) {
- unsigned bio_sectors = bio_sectors(bio);
- unsigned sectors = KEY_INODE(k) == s->iop.inode
+ unsigned int bio_sectors = bio_sectors(bio);
+ unsigned int sectors = KEY_INODE(k) == s->iop.inode
? min_t(uint64_t, INT_MAX,
KEY_START(k) - bio->bi_iter.bi_sector)
: INT_MAX;
-
int ret = s->d->cache_miss(b, s, bio, sectors);
+
if (ret != MAP_CONTINUE)
return ret;
@@ -623,6 +626,7 @@ static void request_endio(struct bio *bio)
if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
+
s->iop.status = bio->bi_status;
/* Only cache read errors are recoverable */
s->recoverable = false;
@@ -813,7 +817,8 @@ static void cached_dev_read_done(struct closure *cl)
if (s->iop.bio) {
bio_reset(s->iop.bio);
- s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector;
+ s->iop.bio->bi_iter.bi_sector =
+ s->cache_miss->bi_iter.bi_sector;
bio_copy_dev(s->iop.bio, s->cache_miss);
s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
bch_bio_map(s->iop.bio, NULL);
@@ -856,10 +861,10 @@ static void cached_dev_read_done_bh(struct closure *cl)
}
static int cached_dev_cache_miss(struct btree *b, struct search *s,
- struct bio *bio, unsigned sectors)
+ struct bio *bio, unsigned int sectors)
{
int ret = MAP_CONTINUE;
- unsigned reada = 0;
+ unsigned int reada = 0;
struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
struct bio *miss, *cache_bio;
@@ -1212,6 +1217,7 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
}
@@ -1226,7 +1232,7 @@ static int cached_dev_congested(void *data, int bits)
return 1;
if (cached_dev_get(dc)) {
- unsigned i;
+ unsigned int i;
struct cache *ca;
for_each_cache(ca, d->c, i) {
@@ -1253,9 +1259,9 @@ void bch_cached_dev_request_init(struct cached_dev *dc)
/* Flash backed devices */
static int flash_dev_cache_miss(struct btree *b, struct search *s,
- struct bio *bio, unsigned sectors)
+ struct bio *bio, unsigned int sectors)
{
- unsigned bytes = min(sectors, bio_sectors(bio)) << 9;
+ unsigned int bytes = min(sectors, bio_sectors(bio)) << 9;
swap(bio->bi_iter.bi_size, bytes);
zero_fill_bio(bio);
@@ -1338,7 +1344,7 @@ static int flash_dev_congested(void *data, int bits)
struct bcache_device *d = data;
struct request_queue *q;
struct cache *ca;
- unsigned i;
+ unsigned int i;
int ret = 0;
for_each_cache(ca, d->c, i) {
@@ -1361,8 +1367,7 @@ void bch_flash_dev_request_init(struct bcache_device *d)
void bch_request_exit(void)
{
- if (bch_search_cache)
- kmem_cache_destroy(bch_search_cache);
+ kmem_cache_destroy(bch_search_cache);
}
int __init bch_request_init(void)
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index dea0886..aa055cf 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -8,7 +8,7 @@ struct data_insert_op {
struct bio *bio;
struct workqueue_struct *wq;
- unsigned inode;
+ unsigned int inode;
uint16_t write_point;
uint16_t write_prio;
blk_status_t status;
@@ -17,15 +17,15 @@ struct data_insert_op {
uint16_t flags;
struct {
- unsigned bypass:1;
- unsigned writeback:1;
- unsigned flush_journal:1;
- unsigned csum:1;
+ unsigned int bypass:1;
+ unsigned int writeback:1;
+ unsigned int flush_journal:1;
+ unsigned int csum:1;
- unsigned replace:1;
- unsigned replace_collision:1;
+ unsigned int replace:1;
+ unsigned int replace_collision:1;
- unsigned insert_data_done:1;
+ unsigned int insert_data_done:1;
};
};
@@ -33,7 +33,7 @@ struct data_insert_op {
BKEY_PADDED(replace_key);
};
-unsigned bch_get_congested(struct cache_set *);
+unsigned int bch_get_congested(struct cache_set *c);
void bch_data_insert(struct closure *cl);
void bch_cached_dev_request_init(struct cached_dev *dc);
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index be11932..894410f 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -33,11 +33,11 @@
* stored left shifted by 16, and scaled back in the sysfs show() function.
*/
-static const unsigned DAY_RESCALE = 288;
-static const unsigned HOUR_RESCALE = 12;
-static const unsigned FIVE_MINUTE_RESCALE = 1;
-static const unsigned accounting_delay = (HZ * 300) / 22;
-static const unsigned accounting_weight = 32;
+static const unsigned int DAY_RESCALE = 288;
+static const unsigned int HOUR_RESCALE = 12;
+static const unsigned int FIVE_MINUTE_RESCALE = 1;
+static const unsigned int accounting_delay = (HZ * 300) / 22;
+static const unsigned int accounting_weight = 32;
/* sysfs reading/writing */
@@ -152,7 +152,7 @@ static void scale_accounting(struct timer_list *t)
struct cache_accounting *acc = from_timer(acc, t, timer);
#define move_stat(name) do { \
- unsigned t = atomic_xchg(&acc->collector.name, 0); \
+ unsigned int t = atomic_xchg(&acc->collector.name, 0); \
t <<= 16; \
acc->five_minute.name += t; \
acc->hour.name += t; \
@@ -200,6 +200,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
bool hit, bool bypass)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
mark_cache_stats(&dc->accounting.collector, hit, bypass);
mark_cache_stats(&c->accounting.collector, hit, bypass);
}
@@ -207,6 +208,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
atomic_inc(&dc->accounting.collector.cache_readaheads);
atomic_inc(&c->accounting.collector.cache_readaheads);
}
@@ -214,6 +216,7 @@ void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d)
void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d)
{
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
atomic_inc(&dc->accounting.collector.cache_miss_collisions);
atomic_inc(&c->accounting.collector.cache_miss_collisions);
}
diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h
index 0b70f9d..abfaabf7 100644
--- a/drivers/md/bcache/stats.h
+++ b/drivers/md/bcache/stats.h
@@ -23,7 +23,7 @@ struct cache_stats {
unsigned long cache_miss_collisions;
unsigned long sectors_bypassed;
- unsigned rescale;
+ unsigned int rescale;
};
struct cache_accounting {
@@ -53,10 +53,13 @@ void bch_cache_accounting_clear(struct cache_accounting *acc);
void bch_cache_accounting_destroy(struct cache_accounting *acc);
-void bch_mark_cache_accounting(struct cache_set *, struct bcache_device *,
- bool, bool);
-void bch_mark_cache_readahead(struct cache_set *, struct bcache_device *);
-void bch_mark_cache_miss_collision(struct cache_set *, struct bcache_device *);
-void bch_mark_sectors_bypassed(struct cache_set *, struct cached_dev *, int);
+void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d,
+ bool hit, bool bypass);
+void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d);
+void bch_mark_cache_miss_collision(struct cache_set *c,
+ struct bcache_device *d);
+void bch_mark_sectors_bypassed(struct cache_set *c,
+ struct cached_dev *dc,
+ int sectors);
#endif /* _BCACHE_STATS_H_ */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 55a3764..94c756c 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bcache setup/teardown code, and some metadata io - read a superblock and
* figure out what to do with it.
@@ -61,7 +62,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
const char *err;
struct cache_sb *s;
struct buffer_head *bh = __bread(bdev, 1, SB_SIZE);
- unsigned i;
+ unsigned int i;
if (!bh)
return "IO error";
@@ -149,7 +150,8 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
goto err;
err = "Invalid superblock: device too small";
- if (get_capacity(bdev->bd_disk) < sb->bucket_size * sb->nbuckets)
+ if (get_capacity(bdev->bd_disk) <
+ sb->bucket_size * sb->nbuckets)
goto err;
err = "Bad UUID";
@@ -202,7 +204,7 @@ static void write_bdev_super_endio(struct bio *bio)
static void __write_super(struct cache_sb *sb, struct bio *bio)
{
struct cache_sb *out = page_address(bio_first_page_all(bio));
- unsigned i;
+ unsigned int i;
bio->bi_iter.bi_sector = SB_SECTOR;
bio->bi_iter.bi_size = SB_SIZE;
@@ -282,7 +284,7 @@ void bcache_write_super(struct cache_set *c)
{
struct closure *cl = &c->sb_write;
struct cache *ca;
- unsigned i;
+ unsigned int i;
down(&c->sb_write_mutex);
closure_init(cl, &c->cl);
@@ -334,7 +336,7 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
{
struct closure *cl = &c->uuid_write;
struct uuid_entry *u;
- unsigned i;
+ unsigned int i;
char buf[80];
BUG_ON(!parent);
@@ -415,8 +417,8 @@ static int __uuid_write(struct cache_set *c)
{
BKEY_PADDED(key) k;
struct closure cl;
- closure_init_stack(&cl);
+ closure_init_stack(&cl);
lockdep_assert_held(&bch_register_lock);
if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true))
@@ -456,6 +458,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
static struct uuid_entry *uuid_find_empty(struct cache_set *c)
{
static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
return uuid_find(c, zero_uuid);
}
@@ -463,8 +466,8 @@ static struct uuid_entry *uuid_find_empty(struct cache_set *c)
* Bucket priorities/gens:
*
* For each bucket, we store on disk its
- * 8 bit gen
- * 16 bit priority
+ * 8 bit gen
+ * 16 bit priority
*
* See alloc.c for an explanation of the gen. The priority is used to implement
* lru (and in the future other) cache replacement policies; for most purposes
@@ -587,7 +590,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
struct prio_set *p = ca->disk_buckets;
struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
struct bucket *b;
- unsigned bucket_nr = 0;
+ unsigned int bucket_nr = 0;
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets;
@@ -599,7 +602,8 @@ static void prio_read(struct cache *ca, uint64_t bucket)
prio_io(ca, bucket, REQ_OP_READ, 0);
- if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8))
+ if (p->csum !=
+ bch_crc64(&p->magic, bucket_bytes(ca) - 8))
pr_warn("bad csum reading priorities");
if (p->magic != pset_magic(&ca->sb))
@@ -619,6 +623,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
static int open_dev(struct block_device *b, fmode_t mode)
{
struct bcache_device *d = b->bd_disk->private_data;
+
if (test_bit(BCACHE_DEV_CLOSING, &d->flags))
return -ENXIO;
@@ -629,6 +634,7 @@ static int open_dev(struct block_device *b, fmode_t mode)
static void release_dev(struct gendisk *b, fmode_t mode)
{
struct bcache_device *d = b->private_data;
+
closure_put(&d->cl);
}
@@ -662,7 +668,7 @@ static void bcache_device_unlink(struct bcache_device *d)
lockdep_assert_held(&bch_register_lock);
if (d->c && !test_and_set_bit(BCACHE_DEV_UNLINK_DONE, &d->flags)) {
- unsigned i;
+ unsigned int i;
struct cache *ca;
sysfs_remove_link(&d->c->kobj, d->name);
@@ -676,7 +682,7 @@ static void bcache_device_unlink(struct bcache_device *d)
static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
const char *name)
{
- unsigned i;
+ unsigned int i;
struct cache *ca;
for_each_cache(ca, d->c, i)
@@ -715,7 +721,7 @@ static void bcache_device_detach(struct bcache_device *d)
}
static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
- unsigned id)
+ unsigned int id)
{
d->id = id;
d->c = c;
@@ -762,7 +768,7 @@ static void bcache_device_free(struct bcache_device *d)
closure_debug_destroy(&d->cl);
}
-static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
sector_t sectors)
{
struct request_queue *q;
@@ -778,7 +784,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size,
if (!d->nr_stripes || d->nr_stripes > max_stripes) {
pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
- (unsigned)d->nr_stripes);
+ (unsigned int)d->nr_stripes);
return -ENOMEM;
}
@@ -919,6 +925,7 @@ void bch_cached_dev_run(struct cached_dev *dc)
if (!d->c &&
BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
struct closure cl;
+
closure_init_stack(&cl);
SET_BDEV_STATE(&dc->sb, BDEV_STATE_STALE);
@@ -928,8 +935,10 @@ void bch_cached_dev_run(struct cached_dev *dc)
add_disk(d->disk);
bd_link_disk_holder(dc->bdev, dc->disk.disk);
- /* won't show up in the uevent file, use udevadm monitor -e instead
- * only class / kset properties are persistent */
+ /*
+ * won't show up in the uevent file, use udevadm monitor -e instead
+ * only class / kset properties are persistent
+ */
kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
kfree(env[1]);
kfree(env[2]);
@@ -976,6 +985,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
struct closure cl;
+
closure_init_stack(&cl);
BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
@@ -1097,12 +1107,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
}
}
- /* Deadlocks since we're called via sysfs...
- sysfs_remove_file(&dc->kobj, &sysfs_attach);
+ /*
+ * Deadlocks since we're called via sysfs...
+ * sysfs_remove_file(&dc->kobj, &sysfs_attach);
*/
if (bch_is_zero(u->uuid, 16)) {
struct closure cl;
+
closure_init_stack(&cl);
memcpy(u->uuid, dc->sb.uuid, 16);
@@ -1124,11 +1136,11 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
list_move(&dc->list, &c->cached_devs);
calc_cached_dev_sectors(c);
- smp_wmb();
/*
* dc->c must be set before dc->count != 0 - paired with the mb in
* cached_dev_get()
*/
+ smp_wmb();
refcount_set(&dc->count, 1);
/* Block writeback thread, but spawn it */
@@ -1212,7 +1224,7 @@ static void cached_dev_flush(struct closure *cl)
continue_at(cl, cached_dev_free, system_wq);
}
-static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
+static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
{
int ret;
struct io *io;
@@ -1320,6 +1332,7 @@ void bch_flash_dev_release(struct kobject *kobj)
static void flash_dev_free(struct closure *cl)
{
struct bcache_device *d = container_of(cl, struct bcache_device, cl);
+
mutex_lock(&bch_register_lock);
atomic_long_sub(bcache_dev_sectors_dirty(d),
&d->c->flash_dev_dirty_sectors);
@@ -1459,17 +1472,18 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
pr_info("CACHE_SET_IO_DISABLE already set");
- /* XXX: we can be called from atomic context
- acquire_console_sem();
- */
+ /*
+ * XXX: we can be called from atomic context
+ * acquire_console_sem();
+ */
- printk(KERN_ERR "bcache: error on %pU: ", c->sb.set_uuid);
+ pr_err("bcache: error on %pU: ", c->sb.set_uuid);
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
- printk(", disabling caching\n");
+ pr_err(", disabling caching\n");
if (c->on_error == ON_ERROR_PANIC)
panic("panic forced after error\n");
@@ -1481,6 +1495,7 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
void bch_cache_set_release(struct kobject *kobj)
{
struct cache_set *c = container_of(kobj, struct cache_set, kobj);
+
kfree(c);
module_put(THIS_MODULE);
}
@@ -1489,7 +1504,7 @@ static void cache_set_free(struct closure *cl)
{
struct cache_set *c = container_of(cl, struct cache_set, cl);
struct cache *ca;
- unsigned i;
+ unsigned int i;
if (!IS_ERR_OR_NULL(c->debug))
debugfs_remove(c->debug);
@@ -1532,7 +1547,7 @@ static void cache_set_flush(struct closure *cl)
struct cache_set *c = container_of(cl, struct cache_set, caching);
struct cache *ca;
struct btree *b;
- unsigned i;
+ unsigned int i;
bch_cache_accounting_destroy(&c->accounting);
@@ -1671,6 +1686,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
{
int iter_size;
struct cache_set *c = kzalloc(sizeof(struct cache_set), GFP_KERNEL);
+
if (!c)
return NULL;
@@ -1731,8 +1747,8 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
if (!(c->devices = kcalloc(c->nr_uuids, sizeof(void *), GFP_KERNEL)) ||
mempool_init_slab_pool(&c->search, 32, bch_search_cache) ||
mempool_init_kmalloc_pool(&c->bio_meta, 2,
- sizeof(struct bbio) + sizeof(struct bio_vec) *
- bucket_pages(c)) ||
+ sizeof(struct bbio) + sizeof(struct bio_vec) *
+ bucket_pages(c)) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
bioset_init(&c->bio_split, 4, offsetof(struct bbio, bio),
BIOSET_NEED_BVECS|BIOSET_NEED_RESCUER) ||
@@ -1762,7 +1778,7 @@ static void run_cache_set(struct cache_set *c)
struct cached_dev *dc, *t;
struct cache *ca;
struct closure cl;
- unsigned i;
+ unsigned int i;
closure_init_stack(&cl);
@@ -1804,7 +1820,9 @@ static void run_cache_set(struct cache_set *c)
goto err;
err = "error reading btree root";
- c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
+ c->root = bch_btree_node_get(c, NULL, k,
+ j->btree_level,
+ true, NULL);
if (IS_ERR_OR_NULL(c->root))
goto err;
@@ -1853,7 +1871,7 @@ static void run_cache_set(struct cache_set *c)
pr_notice("invalidating existing data");
for_each_cache(ca, c, i) {
- unsigned j;
+ unsigned int j;
ca->sb.keys = clamp_t(int, ca->sb.nbuckets >> 7,
2, SB_JOURNAL_BUCKETS);
@@ -1998,7 +2016,7 @@ static const char *register_cache_set(struct cache *ca)
void bch_cache_release(struct kobject *kobj)
{
struct cache *ca = container_of(kobj, struct cache, kobj);
- unsigned i;
+ unsigned int i;
if (ca->set) {
BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
@@ -2098,7 +2116,9 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
goto err;
}
- if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) {
+ if (kobject_add(&ca->kobj,
+ &part_to_dev(bdev->bd_part)->kobj,
+ "bcache")) {
err = "error calling kobject_add";
ret = -ENOMEM;
goto out;
@@ -2127,13 +2147,14 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
/* Global interfaces/init */
-static ssize_t register_bcache(struct kobject *, struct kobj_attribute *,
- const char *, size_t);
+static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
+ const char *buffer, size_t size);
kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
-static bool bch_is_open_backing(struct block_device *bdev) {
+static bool bch_is_open_backing(struct block_device *bdev)
+{
struct cache_set *c, *tc;
struct cached_dev *dc, *t;
@@ -2147,10 +2168,11 @@ static bool bch_is_open_backing(struct block_device *bdev) {
return false;
}
-static bool bch_is_open_cache(struct block_device *bdev) {
+static bool bch_is_open_cache(struct block_device *bdev)
+{
struct cache_set *c, *tc;
struct cache *ca;
- unsigned i;
+ unsigned int i;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
for_each_cache(ca, c, i)
@@ -2159,7 +2181,8 @@ static bool bch_is_open_cache(struct block_device *bdev) {
return false;
}
-static bool bch_is_open(struct block_device *bdev) {
+static bool bch_is_open(struct block_device *bdev)
+{
return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
}
@@ -2216,6 +2239,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
err = "failed to register device";
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+
if (!dc)
goto err_close;
@@ -2224,6 +2248,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
mutex_unlock(&bch_register_lock);
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+
if (!ca)
goto err_close;
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 81d3520..150cf4f 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -130,8 +130,10 @@ rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
rw_attribute(size);
-static ssize_t bch_snprint_string_list(char *buf, size_t size, const char * const list[],
- size_t selected)
+static ssize_t bch_snprint_string_list(char *buf,
+ size_t size,
+ const char * const list[],
+ size_t selected)
{
char *out = buf;
size_t i;
@@ -148,7 +150,7 @@ SHOW(__bch_cached_dev)
{
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
- const char *states[] = { "no cache", "clean", "dirty", "inconsistent" };
+ char const *states[] = { "no cache", "clean", "dirty", "inconsistent" };
int wb = dc->writeback_running;
#define var(stat) (dc->stat)
@@ -307,7 +309,7 @@ STORE(__cached_dev)
if (v < 0)
return v;
- if ((unsigned) v != BDEV_CACHE_MODE(&dc->sb)) {
+ if ((unsigned int) v != BDEV_CACHE_MODE(&dc->sb)) {
SET_BDEV_CACHE_MODE(&dc->sb, v);
bch_write_bdev_super(dc, NULL);
}
@@ -341,8 +343,9 @@ STORE(__cached_dev)
add_uevent_var(env, "DRIVER=bcache");
add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
add_uevent_var(env, "CACHED_LABEL=%s", buf);
- kobject_uevent_env(
- &disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
+ kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
+ KOBJ_CHANGE,
+ env->envp);
kfree(env);
}
@@ -459,6 +462,7 @@ STORE(__bch_flash_dev)
if (attr == &sysfs_size) {
uint64_t v;
+
strtoi_h_or_return(buf, v);
u->sectors = v >> 9;
@@ -533,9 +537,9 @@ static int bch_bset_print_stats(struct cache_set *c, char *buf)
op.stats.floats, op.stats.failed);
}
-static unsigned bch_root_usage(struct cache_set *c)
+static unsigned int bch_root_usage(struct cache_set *c)
{
- unsigned bytes = 0;
+ unsigned int bytes = 0;
struct bkey *k;
struct btree *b;
struct btree_iter iter;
@@ -570,9 +574,9 @@ static size_t bch_cache_size(struct cache_set *c)
return ret;
}
-static unsigned bch_cache_max_chain(struct cache_set *c)
+static unsigned int bch_cache_max_chain(struct cache_set *c)
{
- unsigned ret = 0;
+ unsigned int ret = 0;
struct hlist_head *h;
mutex_lock(&c->bucket_lock);
@@ -580,7 +584,7 @@ static unsigned bch_cache_max_chain(struct cache_set *c)
for (h = c->bucket_hash;
h < c->bucket_hash + (1 << BUCKET_HASH_BITS);
h++) {
- unsigned i = 0;
+ unsigned int i = 0;
struct hlist_node *p;
hlist_for_each(p, h)
@@ -593,13 +597,13 @@ static unsigned bch_cache_max_chain(struct cache_set *c)
return ret;
}
-static unsigned bch_btree_used(struct cache_set *c)
+static unsigned int bch_btree_used(struct cache_set *c)
{
return div64_u64(c->gc_stats.key_bytes * 100,
(c->gc_stats.nodes ?: 1) * btree_bytes(c));
}
-static unsigned bch_average_key_size(struct cache_set *c)
+static unsigned int bch_average_key_size(struct cache_set *c)
{
return c->gc_stats.nkeys
? div64_u64(c->gc_stats.data, c->gc_stats.nkeys)
@@ -703,6 +707,7 @@ STORE(__bch_cache_set)
if (attr == &sysfs_flash_vol_create) {
int r;
uint64_t v;
+
strtoi_h_or_return(buf, v);
r = bch_flash_dev_create(c, v);
@@ -736,6 +741,7 @@ STORE(__bch_cache_set)
if (attr == &sysfs_prune_cache) {
struct shrink_control sc;
+
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
c->shrink.scan_objects(&c->shrink, &sc);
@@ -789,12 +795,14 @@ STORE_LOCKED(bch_cache_set)
SHOW(bch_cache_set_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
+
return bch_cache_set_show(&c->kobj, attr, buf);
}
STORE(bch_cache_set_internal)
{
struct cache_set *c = container_of(kobj, struct cache_set, internal);
+
return bch_cache_set_store(&c->kobj, attr, buf, size);
}
@@ -996,7 +1004,7 @@ STORE(__bch_cache)
if (v < 0)
return v;
- if ((unsigned) v != CACHE_REPLACEMENT(&ca->sb)) {
+ if ((unsigned int) v != CACHE_REPLACEMENT(&ca->sb)) {
mutex_lock(&ca->set->bucket_lock);
SET_CACHE_REPLACEMENT(&ca->sb, v);
mutex_unlock(&ca->set->bucket_lock);
diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h
index b54fe96..3fe8242 100644
--- a/drivers/md/bcache/sysfs.h
+++ b/drivers/md/bcache/sysfs.h
@@ -44,9 +44,9 @@ STORE(fn) \
static struct attribute sysfs_##_name = \
{ .name = #_name, .mode = _mode }
-#define write_attribute(n) __sysfs_attribute(n, S_IWUSR)
-#define read_attribute(n) __sysfs_attribute(n, S_IRUGO)
-#define rw_attribute(n) __sysfs_attribute(n, S_IRUGO|S_IWUSR)
+#define write_attribute(n) __sysfs_attribute(n, 0200)
+#define read_attribute(n) __sysfs_attribute(n, 0444)
+#define rw_attribute(n) __sysfs_attribute(n, 0644)
#define sysfs_printf(file, fmt, ...) \
do { \
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index b15256b..20eddea 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* random utiility code, for bcache but in theory not specific to bcache
*
@@ -133,6 +134,7 @@ bool bch_is_zero(const char *p, size_t n)
int bch_parse_uuid(const char *s, char *uuid)
{
size_t i, j, x;
+
memset(uuid, 0, 16);
for (i = 0, j = 0;
@@ -279,134 +281,3 @@ int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
return 0;
}
-
-/*
- * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
- * use permitted, subject to terms of PostgreSQL license; see.)
-
- * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
- * usual sort of implementation. (See Ross Williams' excellent introduction
- * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
- * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
- * If we have no working 64-bit type, then fake it with two 32-bit registers.
- *
- * The present implementation is a normal (not "reflected", in Williams'
- * terms) 64-bit CRC, using initial all-ones register contents and a final
- * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
- * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
- *
- * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
- * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
- * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
- * x^7 + x^4 + x + 1
-*/
-
-static const uint64_t crc_table[256] = {
- 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
- 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
- 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
- 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
- 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
- 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
- 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
- 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
- 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
- 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
- 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
- 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
- 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
- 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
- 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
- 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
- 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
- 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
- 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
- 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
- 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
- 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
- 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
- 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
- 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
- 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
- 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
- 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
- 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
- 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
- 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
- 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
- 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
- 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
- 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
- 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
- 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
- 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
- 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
- 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
- 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
- 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
- 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
- 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
- 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
- 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
- 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
- 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
- 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
- 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
- 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
- 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
- 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
- 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
- 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
- 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
- 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
- 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
- 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
- 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
- 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
- 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
- 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
- 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
- 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
- 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
- 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
- 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
- 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
- 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
- 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
- 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
- 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
- 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
- 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
- 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
- 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
- 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
- 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
- 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
- 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
- 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
- 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
- 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
- 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
- 0x9AFCE626CE85B507ULL,
-};
-
-uint64_t bch_crc64_update(uint64_t crc, const void *_data, size_t len)
-{
- const unsigned char *data = _data;
-
- while (len--) {
- int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
- crc = crc_table[i] ^ (crc << 8);
- }
-
- return crc;
-}
-
-uint64_t bch_crc64(const void *data, size_t len)
-{
- uint64_t crc = 0xffffffffffffffffULL;
-
- crc = bch_crc64_update(crc, data, len);
-
- return crc ^ 0xffffffffffffffffULL;
-}
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index f7b0133..00aab6a 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -11,6 +11,7 @@
#include <linux/ratelimit.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
+#include <linux/crc64.h>
#include "closure.h"
@@ -288,10 +289,10 @@ do { \
#define ANYSINT_MAX(t) \
((((t) 1 << (sizeof(t) * 8 - 2)) - (t) 1) * (t) 2 + (t) 1)
-int bch_strtoint_h(const char *, int *);
-int bch_strtouint_h(const char *, unsigned int *);
-int bch_strtoll_h(const char *, long long *);
-int bch_strtoull_h(const char *, unsigned long long *);
+int bch_strtoint_h(const char *cp, int *res);
+int bch_strtouint_h(const char *cp, unsigned int *res);
+int bch_strtoll_h(const char *cp, long long *res);
+int bch_strtoull_h(const char *cp, unsigned long long *res);
static inline int bch_strtol_h(const char *cp, long *res)
{
@@ -347,7 +348,7 @@ static inline int bch_strtoul_h(const char *cp, long *res)
snprintf(buf, size, \
__builtin_types_compatible_p(typeof(var), int) \
? "%i\n" : \
- __builtin_types_compatible_p(typeof(var), unsigned) \
+ __builtin_types_compatible_p(typeof(var), unsigned int) \
? "%u\n" : \
__builtin_types_compatible_p(typeof(var), long) \
? "%li\n" : \
@@ -379,7 +380,7 @@ struct time_stats {
void bch_time_stats_update(struct time_stats *stats, uint64_t time);
-static inline unsigned local_clock_us(void)
+static inline unsigned int local_clock_us(void)
{
return local_clock() >> 10;
}
@@ -402,7 +403,8 @@ do { \
__print_time_stat(stats, name, \
average_duration, duration_units); \
sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \
- div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
+ div_u64((stats)->max_duration, \
+ NSEC_PER_ ## duration_units)); \
\
sysfs_print(name ## _last_ ## frequency_units, (stats)->last \
? div_s64(local_clock() - (stats)->last, \
@@ -542,10 +544,27 @@ dup: \
#define RB_PREV(ptr, member) \
container_of_or_null(rb_prev(&(ptr)->member), typeof(*ptr), member)
-/* Does linear interpolation between powers of two */
-static inline unsigned fract_exp_two(unsigned x, unsigned fract_bits)
+static inline uint64_t bch_crc64(const void *p, size_t len)
{
- unsigned fract = x & ~(~0 << fract_bits);
+ uint64_t crc = 0xffffffffffffffffULL;
+
+ crc = crc64_be(crc, p, len);
+ return crc ^ 0xffffffffffffffffULL;
+}
+
+static inline uint64_t bch_crc64_update(uint64_t crc,
+ const void *p,
+ size_t len)
+{
+ crc = crc64_be(crc, p, len);
+ return crc;
+}
+
+/* Does linear interpolation between powers of two */
+static inline unsigned int fract_exp_two(unsigned int x,
+ unsigned int fract_bits)
+{
+ unsigned int fract = x & ~(~0 << fract_bits);
x >>= fract_bits;
x = 1 << x;
@@ -561,8 +580,4 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> 9;
}
-
-uint64_t bch_crc64_update(uint64_t, const void *, size_t);
-uint64_t bch_crc64(const void *, size_t);
-
#endif /* _BCACHE_UTIL_H */
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 481d4cf..6be05bd 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -215,7 +215,8 @@ static void update_writeback_rate(struct work_struct *work)
smp_mb();
}
-static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
+static unsigned int writeback_delay(struct cached_dev *dc,
+ unsigned int sectors)
{
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
!dc->writeback_percent)
@@ -249,6 +250,7 @@ static void dirty_init(struct keybuf_key *w)
static void dirty_io_destructor(struct closure *cl)
{
struct dirty_io *io = container_of(cl, struct dirty_io, cl);
+
kfree(io);
}
@@ -263,7 +265,7 @@ static void write_dirty_finish(struct closure *cl)
/* This is kind of a dumb way of signalling errors. */
if (KEY_DIRTY(&w->key)) {
int ret;
- unsigned i;
+ unsigned int i;
struct keylist keys;
bch_keylist_init(&keys);
@@ -377,7 +379,7 @@ static void read_dirty_submit(struct closure *cl)
static void read_dirty(struct cached_dev *dc)
{
- unsigned delay = 0;
+ unsigned int delay = 0;
struct keybuf_key *next, *keys[MAX_WRITEBACKS_IN_PASS], *w;
size_t size;
int nk, i;
@@ -442,7 +444,8 @@ static void read_dirty(struct cached_dev *dc)
io = kzalloc(sizeof(struct dirty_io) +
sizeof(struct bio_vec) *
- DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS),
+ DIV_ROUND_UP(KEY_SIZE(&w->key),
+ PAGE_SECTORS),
GFP_KERNEL);
if (!io)
goto err;
@@ -465,7 +468,8 @@ static void read_dirty(struct cached_dev *dc)
down(&dc->in_flight);
- /* We've acquired a semaphore for the maximum
+ /*
+ * We've acquired a semaphore for the maximum
* simultaneous number of writebacks; from here
* everything happens asynchronously.
*/
@@ -498,11 +502,11 @@ static void read_dirty(struct cached_dev *dc)
/* Scan for dirty data */
-void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
uint64_t offset, int nr_sectors)
{
struct bcache_device *d = c->devices[inode];
- unsigned stripe_offset, stripe, sectors_dirty;
+ unsigned int stripe_offset, stripe, sectors_dirty;
if (!d)
return;
@@ -514,7 +518,7 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
stripe_offset = offset & (d->stripe_size - 1);
while (nr_sectors) {
- int s = min_t(unsigned, abs(nr_sectors),
+ int s = min_t(unsigned int, abs(nr_sectors),
d->stripe_size - stripe_offset);
if (nr_sectors < 0)
@@ -538,7 +542,9 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
static bool dirty_pred(struct keybuf *buf, struct bkey *k)
{
- struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
+ struct cached_dev *dc = container_of(buf,
+ struct cached_dev,
+ writeback_keys);
BUG_ON(KEY_INODE(k) != dc->disk.id);
@@ -548,7 +554,7 @@ static bool dirty_pred(struct keybuf *buf, struct bkey *k)
static void refill_full_stripes(struct cached_dev *dc)
{
struct keybuf *buf = &dc->writeback_keys;
- unsigned start_stripe, stripe, next_stripe;
+ unsigned int start_stripe, stripe, next_stripe;
bool wrapped = false;
stripe = offset_to_stripe(&dc->disk, KEY_OFFSET(&buf->last_scanned));
@@ -688,7 +694,7 @@ static int bch_writeback_thread(void *arg)
read_dirty(dc);
if (searched_full_index) {
- unsigned delay = dc->writeback_delay * HZ;
+ unsigned int delay = dc->writeback_delay * HZ;
while (delay &&
!kthread_should_stop() &&
@@ -712,7 +718,7 @@ static int bch_writeback_thread(void *arg)
struct sectors_dirty_init {
struct btree_op op;
- unsigned inode;
+ unsigned int inode;
size_t count;
struct bkey start;
};
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 3745d70..d2b9fdb 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -28,7 +28,7 @@ static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
return ret;
}
-static inline unsigned offset_to_stripe(struct bcache_device *d,
+static inline unsigned int offset_to_stripe(struct bcache_device *d,
uint64_t offset)
{
do_div(offset, d->stripe_size);
@@ -37,9 +37,9 @@ static inline unsigned offset_to_stripe(struct bcache_device *d,
static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
uint64_t offset,
- unsigned nr_sectors)
+ unsigned int nr_sectors)
{
- unsigned stripe = offset_to_stripe(&dc->disk, offset);
+ unsigned int stripe = offset_to_stripe(&dc->disk, offset);
while (1) {
if (atomic_read(dc->disk.stripe_sectors_dirty + stripe))
@@ -54,9 +54,9 @@ static inline bool bcache_dev_stripe_dirty(struct cached_dev *dc,
}
static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
- unsigned cache_mode, bool would_skip)
+ unsigned int cache_mode, bool would_skip)
{
- unsigned in_use = dc->disk.c->gc_stats.in_use;
+ unsigned int in_use = dc->disk.c->gc_stats.in_use;
if (cache_mode != CACHE_MODE_WRITEBACK ||
test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
@@ -96,10 +96,11 @@ static inline void bch_writeback_add(struct cached_dev *dc)
}
}
-void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned int inode,
+ uint64_t offset, int nr_sectors);
-void bch_sectors_dirty_init(struct bcache_device *);
-void bch_cached_dev_writeback_init(struct cached_dev *);
-int bch_cached_dev_writeback_start(struct cached_dev *);
+void bch_sectors_dirty_init(struct bcache_device *d);
+void bch_cached_dev_writeback_init(struct cached_dev *dc);
+int bch_cached_dev_writeback_start(struct cached_dev *dc);
#endif
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
index 63d6246..6369aea 100644
--- a/drivers/misc/mic/scif/scif_dma.c
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -200,15 +200,18 @@ static void scif_mmu_notifier_release(struct mmu_notifier *mn,
schedule_work(&scif_info.misc_work);
}
-static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct scif_mmu_notif *mmn;
mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
scif_rma_destroy_tcw(mmn, start, end - start);
+
+ return 0;
}
static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index a3454eb..be28f05 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -219,9 +219,10 @@ void gru_flush_all_tlb(struct gru_state *gru)
/*
* MMUOPS notifier callout functions
*/
-static void gru_invalidate_range_start(struct mmu_notifier *mn,
+static int gru_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ bool blockable)
{
struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
ms_notifier);
@@ -231,6 +232,8 @@ static void gru_invalidate_range_start(struct mmu_notifier *mn,
gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx, act %d\n", gms,
start, end, atomic_read(&gms->ms_range_active));
gru_flush_tlb_range(gms, start, end - start);
+
+ return 0;
}
static void gru_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 738e354..c2ab577 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -615,13 +615,11 @@ static bool acpi_pci_need_resume(struct pci_dev *dev)
/*
* In some cases (eg. Samsung 305V4A) leaving a bridge in suspend over
* system-wide suspend/resume confuses the platform firmware, so avoid
- * doing that, unless the bridge has a driver that should take care of
- * the PM handling. According to Section 16.1.6 of ACPI 6.2, endpoint
+ * doing that. According to Section 16.1.6 of ACPI 6.2, endpoint
* devices are expected to be in D3 before invoking the S3 entry path
* from the firmware, so they should not be affected by this issue.
*/
- if (pci_is_bridge(dev) && !dev->driver &&
- acpi_target_system_state() != ACPI_STATE_S0)
+ if (pci_is_bridge(dev) && acpi_target_system_state() != ACPI_STATE_S0)
return true;
if (!adev || !acpi_device_power_manageable(adev))
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 46f58a9..ef7143a 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -66,9 +66,15 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
f->vendor == (u16) PCI_ANY_ID) &&
(f->device == dev->device ||
f->device == (u16) PCI_ANY_ID)) {
- calltime = fixup_debug_start(dev, f->hook);
- f->hook(dev);
- fixup_debug_report(dev, calltime, f->hook);
+ void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ hook = offset_to_ptr(&f->hook_offset);
+#else
+ hook = f->hook;
+#endif
+ calltime = fixup_debug_start(dev, hook);
+ hook(dev);
+ fixup_debug_report(dev, calltime, hook);
}
}
diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c
index a8cb8d2..cbe467f 100644
--- a/drivers/rapidio/devices/rio_mport_cdev.c
+++ b/drivers/rapidio/devices/rio_mport_cdev.c
@@ -1006,7 +1006,6 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg)
static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
{
struct mport_cdev_priv *priv;
- struct mport_dev *md;
struct rio_async_tx_wait w_param;
struct mport_dma_req *req;
dma_cookie_t cookie;
@@ -1016,7 +1015,6 @@ static int rio_mport_wait_for_async_dma(struct file *filp, void __user *arg)
int ret;
priv = (struct mport_cdev_priv *)filp->private_data;
- md = priv->md;
if (unlikely(copy_from_user(&w_param, arg, sizeof(w_param))))
return -EFAULT;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index c866a62..57390c7 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -479,18 +479,25 @@ static const struct vm_operations_struct gntdev_vmops = {
/* ------------------------------------------------------------------ */
+static bool in_range(struct gntdev_grant_map *map,
+ unsigned long start, unsigned long end)
+{
+ if (!map->vma)
+ return false;
+ if (map->vma->vm_start >= end)
+ return false;
+ if (map->vma->vm_end <= start)
+ return false;
+
+ return true;
+}
+
static void unmap_if_in_range(struct gntdev_grant_map *map,
unsigned long start, unsigned long end)
{
unsigned long mstart, mend;
int err;
- if (!map->vma)
- return;
- if (map->vma->vm_start >= end)
- return;
- if (map->vma->vm_end <= start)
- return;
mstart = max(start, map->vma->vm_start);
mend = min(end, map->vma->vm_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
@@ -503,21 +510,40 @@ static void unmap_if_in_range(struct gntdev_grant_map *map,
WARN_ON(err);
}
-static void mn_invl_range_start(struct mmu_notifier *mn,
+static int mn_invl_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ bool blockable)
{
struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
struct gntdev_grant_map *map;
+ int ret = 0;
- mutex_lock(&priv->lock);
+ /* TODO do we really need a mutex here? */
+ if (blockable)
+ mutex_lock(&priv->lock);
+ else if (!mutex_trylock(&priv->lock))
+ return -EAGAIN;
+
list_for_each_entry(map, &priv->maps, next) {
+ if (in_range(map, start, end)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
unmap_if_in_range(map, start, end);
}
list_for_each_entry(map, &priv->freeable_maps, next) {
+ if (in_range(map, start, end)) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
unmap_if_in_range(map, start, end);
}
+
+out_unlock:
mutex_unlock(&priv->lock);
+
+ return ret;
}
static void mn_release(struct mmu_notifier *mn,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index e91028d..66621e9 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -167,7 +167,7 @@ adfs_mode2atts(struct super_block *sb, struct inode *inode)
* of time to convert from RISC OS epoch to Unix epoch.
*/
static void
-adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
+adfs_adfs2unix_time(struct timespec64 *tv, struct inode *inode)
{
unsigned int high, low;
/* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since
@@ -195,11 +195,11 @@ adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
/* convert from RISC OS to Unix epoch */
nsec -= nsec_unix_epoch_diff_risc_os_epoch;
- *tv = ns_to_timespec(nsec);
+ *tv = ns_to_timespec64(nsec);
return;
cur_time:
- *tv = timespec64_to_timespec(current_time(inode));
+ *tv = current_time(inode);
return;
too_early:
@@ -242,7 +242,6 @@ adfs_unix2adfs_time(struct inode *inode, unsigned int secs)
struct inode *
adfs_iget(struct super_block *sb, struct object_info *obj)
{
- struct timespec ts;
struct inode *inode;
inode = new_inode(sb);
@@ -271,9 +270,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
inode->i_mode = adfs_atts2mode(sb, inode);
- ts = timespec64_to_timespec(inode->i_mtime);
- adfs_adfs2unix_time(&ts, inode);
- inode->i_mtime = timespec_to_timespec64(ts);
+ adfs_adfs2unix_time(&inode->i_mtime, inode);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 5028122..9f9cadb 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -27,6 +27,7 @@
#include <linux/list.h>
#include <linux/completion.h>
#include <linux/file.h>
+#include <linux/magic.h>
/* This is the range of ioctl() numbers we claim as ours */
#define AUTOFS_IOC_FIRST AUTOFS_IOC_READY
@@ -125,7 +126,8 @@ struct autofs_sb_info {
static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
{
- return (struct autofs_sb_info *)(sb->s_fs_info);
+ return sb->s_magic != AUTOFS_SUPER_MAGIC ?
+ NULL : (struct autofs_sb_info *)(sb->s_fs_info);
}
static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry)
@@ -152,15 +154,9 @@ int autofs_expire_run(struct super_block *, struct vfsmount *,
struct autofs_sb_info *,
struct autofs_packet_expire __user *);
int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int when);
+ struct autofs_sb_info *sbi, unsigned int how);
int autofs_expire_multi(struct super_block *, struct vfsmount *,
struct autofs_sb_info *, int __user *);
-struct dentry *autofs_expire_direct(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int how);
-struct dentry *autofs_expire_indirect(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int how);
/* Device node initialization */
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index b332d3f..d441244 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -10,11 +10,9 @@
#include "autofs_i.h"
-static unsigned long now;
-
/* Check if a dentry can be expired */
static inline int autofs_can_expire(struct dentry *dentry,
- unsigned long timeout, int do_now)
+ unsigned long timeout, unsigned int how)
{
struct autofs_info *ino = autofs_dentry_ino(dentry);
@@ -22,16 +20,17 @@ static inline int autofs_can_expire(struct dentry *dentry,
if (ino == NULL)
return 0;
- if (!do_now) {
+ if (!(how & AUTOFS_EXP_IMMEDIATE)) {
/* Too young to die */
- if (!timeout || time_after(ino->last_used + timeout, now))
+ if (!timeout || time_after(ino->last_used + timeout, jiffies))
return 0;
}
return 1;
}
/* Check a mount point for busyness */
-static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
+static int autofs_mount_busy(struct vfsmount *mnt,
+ struct dentry *dentry, unsigned int how)
{
struct dentry *top = dentry;
struct path path = {.mnt = mnt, .dentry = dentry};
@@ -52,6 +51,12 @@ static int autofs_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
goto done;
}
+ /* Not a submount, has a forced expire been requested */
+ if (how & AUTOFS_EXP_FORCED) {
+ status = 0;
+ goto done;
+ }
+
/* Update the expiry counter if fs is busy */
if (!may_umount_tree(path.mnt)) {
struct autofs_info *ino;
@@ -187,10 +192,14 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev,
static int autofs_direct_busy(struct vfsmount *mnt,
struct dentry *top,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
pr_debug("top %p %pd\n", top, top);
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return 0;
+
/* If it's busy update the expiry counters */
if (!may_umount_tree(mnt)) {
struct autofs_info *ino;
@@ -202,7 +211,7 @@ static int autofs_direct_busy(struct vfsmount *mnt,
}
/* Timeout of a direct mount is determined by its top dentry */
- if (!autofs_can_expire(top, timeout, do_now))
+ if (!autofs_can_expire(top, timeout, how))
return 1;
return 0;
@@ -215,7 +224,7 @@ static int autofs_direct_busy(struct vfsmount *mnt,
static int autofs_tree_busy(struct vfsmount *mnt,
struct dentry *top,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
struct autofs_info *top_ino = autofs_dentry_ino(top);
struct dentry *p;
@@ -237,7 +246,7 @@ static int autofs_tree_busy(struct vfsmount *mnt,
* If the fs is busy update the expiry counter.
*/
if (d_mountpoint(p)) {
- if (autofs_mount_busy(mnt, p)) {
+ if (autofs_mount_busy(mnt, p, how)) {
top_ino->last_used = jiffies;
dput(p);
return 1;
@@ -260,8 +269,12 @@ static int autofs_tree_busy(struct vfsmount *mnt,
}
}
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return 0;
+
/* Timeout of a tree mount is ultimately determined by its top dentry */
- if (!autofs_can_expire(top, timeout, do_now))
+ if (!autofs_can_expire(top, timeout, how))
return 1;
return 0;
@@ -270,7 +283,7 @@ static int autofs_tree_busy(struct vfsmount *mnt,
static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
struct dentry *parent,
unsigned long timeout,
- int do_now)
+ unsigned int how)
{
struct dentry *p;
@@ -282,11 +295,17 @@ static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
if (d_mountpoint(p)) {
/* Can we umount this guy */
- if (autofs_mount_busy(mnt, p))
+ if (autofs_mount_busy(mnt, p, how))
continue;
+ /* This isn't a submount so if a forced expire
+ * has been requested, user space handles busy
+ * mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return p;
+
/* Can we expire this guy */
- if (autofs_can_expire(p, timeout, do_now))
+ if (autofs_can_expire(p, timeout, how))
return p;
}
}
@@ -294,23 +313,21 @@ static struct dentry *autofs_check_leaves(struct vfsmount *mnt,
}
/* Check if we can expire a direct mount (possibly a tree) */
-struct dentry *autofs_expire_direct(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+static struct dentry *autofs_expire_direct(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ unsigned int how)
{
- unsigned long timeout;
struct dentry *root = dget(sb->s_root);
- int do_now = how & AUTOFS_EXP_IMMEDIATE;
struct autofs_info *ino;
+ unsigned long timeout;
if (!root)
return NULL;
- now = jiffies;
timeout = sbi->exp_timeout;
- if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
+ if (!autofs_direct_busy(mnt, root, timeout, how)) {
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(root);
/* No point expiring a pending mount */
@@ -321,7 +338,7 @@ struct dentry *autofs_expire_direct(struct super_block *sb,
ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- if (!autofs_direct_busy(mnt, root, timeout, do_now)) {
+ if (!autofs_direct_busy(mnt, root, timeout, how)) {
spin_lock(&sbi->fs_lock);
ino->flags |= AUTOFS_INF_EXPIRING;
init_completion(&ino->expire_complete);
@@ -346,10 +363,8 @@ struct dentry *autofs_expire_direct(struct super_block *sb,
static struct dentry *should_expire(struct dentry *dentry,
struct vfsmount *mnt,
unsigned long timeout,
- int how)
+ unsigned int how)
{
- int do_now = how & AUTOFS_EXP_IMMEDIATE;
- int exp_leaves = how & AUTOFS_EXP_LEAVES;
struct autofs_info *ino = autofs_dentry_ino(dentry);
unsigned int ino_count;
@@ -367,22 +382,33 @@ static struct dentry *should_expire(struct dentry *dentry,
pr_debug("checking mountpoint %p %pd\n", dentry, dentry);
/* Can we umount this guy */
- if (autofs_mount_busy(mnt, dentry))
+ if (autofs_mount_busy(mnt, dentry, how))
return NULL;
+ /* This isn't a submount so if a forced expire
+ * has been requested, user space handles busy
+ * mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return dentry;
+
/* Can we expire this guy */
- if (autofs_can_expire(dentry, timeout, do_now))
+ if (autofs_can_expire(dentry, timeout, how))
return dentry;
return NULL;
}
if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
pr_debug("checking symlink %p %pd\n", dentry, dentry);
+
+ /* Forced expire, user space handles busy mounts */
+ if (how & AUTOFS_EXP_FORCED)
+ return dentry;
+
/*
* A symlink can't be "busy" in the usual sense so
* just check last used for expire timeout.
*/
- if (autofs_can_expire(dentry, timeout, do_now))
+ if (autofs_can_expire(dentry, timeout, how))
return dentry;
return NULL;
}
@@ -391,27 +417,33 @@ static struct dentry *should_expire(struct dentry *dentry,
return NULL;
/* Case 2: tree mount, expire iff entire tree is not busy */
- if (!exp_leaves) {
- /* Path walk currently on this dentry? */
- ino_count = atomic_read(&ino->count) + 1;
- if (d_count(dentry) > ino_count)
- return NULL;
+ if (!(how & AUTOFS_EXP_LEAVES)) {
+ /* Not a forced expire? */
+ if (!(how & AUTOFS_EXP_FORCED)) {
+ /* ref-walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (d_count(dentry) > ino_count)
+ return NULL;
+ }
- if (!autofs_tree_busy(mnt, dentry, timeout, do_now))
+ if (!autofs_tree_busy(mnt, dentry, timeout, how))
return dentry;
/*
* Case 3: pseudo direct mount, expire individual leaves
* (autofs-4.1).
*/
} else {
- /* Path walk currently on this dentry? */
struct dentry *expired;
- ino_count = atomic_read(&ino->count) + 1;
- if (d_count(dentry) > ino_count)
- return NULL;
+ /* Not a forced expire? */
+ if (!(how & AUTOFS_EXP_FORCED)) {
+ /* ref-walk currently on this dentry? */
+ ino_count = atomic_read(&ino->count) + 1;
+ if (d_count(dentry) > ino_count)
+ return NULL;
+ }
- expired = autofs_check_leaves(mnt, dentry, timeout, do_now);
+ expired = autofs_check_leaves(mnt, dentry, timeout, how);
if (expired) {
if (expired == dentry)
dput(dentry);
@@ -427,10 +459,10 @@ static struct dentry *should_expire(struct dentry *dentry,
* - it is unused by any user process
* - it has been unused for exp_timeout time
*/
-struct dentry *autofs_expire_indirect(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- int how)
+static struct dentry *autofs_expire_indirect(struct super_block *sb,
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ unsigned int how)
{
unsigned long timeout;
struct dentry *root = sb->s_root;
@@ -442,13 +474,10 @@ struct dentry *autofs_expire_indirect(struct super_block *sb,
if (!root)
return NULL;
- now = jiffies;
timeout = sbi->exp_timeout;
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
- int flags = how;
-
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(dentry);
if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
@@ -457,7 +486,7 @@ struct dentry *autofs_expire_indirect(struct super_block *sb,
}
spin_unlock(&sbi->fs_lock);
- expired = should_expire(dentry, mnt, timeout, flags);
+ expired = should_expire(dentry, mnt, timeout, how);
if (!expired)
continue;
@@ -470,7 +499,7 @@ struct dentry *autofs_expire_indirect(struct super_block *sb,
/* Make sure a reference is not taken on found if
* things have changed.
*/
- flags &= ~AUTOFS_EXP_LEAVES;
+ how &= ~AUTOFS_EXP_LEAVES;
found = should_expire(expired, mnt, timeout, how);
if (!found || found != expired)
/* Something has changed, continue */
@@ -575,7 +604,7 @@ int autofs_expire_run(struct super_block *sb,
spin_lock(&sbi->fs_lock);
ino = autofs_dentry_ino(dentry);
/* avoid rapid-fire expire attempts if expiry fails */
- ino->last_used = now;
+ ino->last_used = jiffies;
ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -584,15 +613,15 @@ int autofs_expire_run(struct super_block *sb,
}
int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
- struct autofs_sb_info *sbi, int when)
+ struct autofs_sb_info *sbi, unsigned int how)
{
struct dentry *dentry;
int ret = -EAGAIN;
if (autofs_type_trigger(sbi->type))
- dentry = autofs_expire_direct(sb, mnt, sbi, when);
+ dentry = autofs_expire_direct(sb, mnt, sbi, how);
else
- dentry = autofs_expire_indirect(sb, mnt, sbi, when);
+ dentry = autofs_expire_indirect(sb, mnt, sbi, how);
if (dentry) {
struct autofs_info *ino = autofs_dentry_ino(dentry);
@@ -605,7 +634,7 @@ int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
/* avoid rapid-fire expire attempts if expiry fails */
- ino->last_used = now;
+ ino->last_used = jiffies;
ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -622,10 +651,10 @@ int autofs_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
int autofs_expire_multi(struct super_block *sb, struct vfsmount *mnt,
struct autofs_sb_info *sbi, int __user *arg)
{
- int do_now = 0;
+ unsigned int how = 0;
- if (arg && get_user(do_now, arg))
+ if (arg && get_user(how, arg))
return -EFAULT;
- return autofs_do_expire_multi(sb, mnt, sbi, do_now);
+ return autofs_do_expire_multi(sb, mnt, sbi, how);
}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index b51980f..846c052 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -10,7 +10,6 @@
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/parser.h>
-#include <linux/magic.h>
#include "autofs_i.h"
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index a3d4141..782e57b 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -559,6 +559,13 @@ static int autofs_dir_symlink(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
BUG_ON(!ino);
autofs_clean_ino(ino);
@@ -612,9 +619,15 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
struct autofs_info *ino = autofs_dentry_ino(dentry);
struct autofs_info *p_ino;
- /* This allows root to remove symlinks */
- if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (!autofs_oz_mode(sbi))
+ return -EACCES;
+
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs_dentry_ino(dentry->d_parent);
@@ -697,6 +710,13 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
spin_lock(&sbi->lookup_lock);
if (!simple_empty(dentry)) {
spin_unlock(&sbi->lookup_lock);
@@ -735,6 +755,13 @@ static int autofs_dir_mkdir(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
+ /* autofs_oz_mode() needs to allow path walks when the
+ * autofs mount is catatonic but the state of an autofs
+ * file system needs to be preserved over restarts.
+ */
+ if (sbi->catatonic)
+ return -EACCES;
+
pr_debug("dentry %p, creating %pd\n", dentry, dentry);
BUG_ON(!ino);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 67db22f..42bbe68 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -50,10 +50,10 @@
*
* 1) epmutex (mutex)
* 2) ep->mtx (mutex)
- * 3) ep->lock (spinlock)
+ * 3) ep->wq.lock (spinlock)
*
* The acquire order is the one listed above, from 1 to 3.
- * We need a spinlock (ep->lock) because we manipulate objects
+ * We need a spinlock (ep->wq.lock) because we manipulate objects
* from inside the poll callback, that might be triggered from
* a wake_up() that in turn might be called from IRQ context.
* So we can't sleep inside the poll callback and hence we need
@@ -85,7 +85,7 @@
* of epoll file descriptors, we use the current recursion depth as
* the lockdep subkey.
* It is possible to drop the "ep->mtx" and to use the global
- * mutex "epmutex" (together with "ep->lock") to have it working,
+ * mutex "epmutex" (together with "ep->wq.lock") to have it working,
* but having "ep->mtx" will make the interface more scalable.
* Events that require holding "epmutex" are very rare, while for
* normal operations the epoll private "ep->mtx" will guarantee
@@ -182,11 +182,10 @@ struct epitem {
* This structure is stored inside the "private_data" member of the file
* structure and represents the main data structure for the eventpoll
* interface.
+ *
+ * Access to it is protected by the lock inside wq.
*/
struct eventpoll {
- /* Protect the access to this structure */
- spinlock_t lock;
-
/*
* This mutex is used to ensure that files are not removed
* while epoll is using them. This is held during the event
@@ -210,7 +209,7 @@ struct eventpoll {
/*
* This is a single linked list that chains all the "struct epitem" that
* happened while transferring ready events to userspace w/out
- * holding ->lock.
+ * holding ->wq.lock.
*/
struct epitem *ovflist;
@@ -337,9 +336,9 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
}
/* Tells us if the item is currently linked */
-static inline int ep_is_linked(struct list_head *p)
+static inline int ep_is_linked(struct epitem *epi)
{
- return !list_empty(p);
+ return !list_empty(&epi->rdllink);
}
static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
@@ -392,7 +391,6 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
return ep_events_available(ep) || busy_loop_timeout(start_time);
}
-#endif /* CONFIG_NET_RX_BUSY_POLL */
/*
* Busy poll if globally on and supporting sockets found && no events,
@@ -402,20 +400,16 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
*/
static void ep_busy_loop(struct eventpoll *ep, int nonblock)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_id = READ_ONCE(ep->napi_id);
if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep);
-#endif
}
static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
if (ep->napi_id)
ep->napi_id = 0;
-#endif
}
/*
@@ -423,7 +417,6 @@ static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
*/
static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
{
-#ifdef CONFIG_NET_RX_BUSY_POLL
struct eventpoll *ep;
unsigned int napi_id;
struct socket *sock;
@@ -453,9 +446,24 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
/* record NAPI ID for use in next busy poll */
ep->napi_id = napi_id;
-#endif
}
+#else
+
+static inline void ep_busy_loop(struct eventpoll *ep, int nonblock)
+{
+}
+
+static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
+{
+}
+
+static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
+{
+}
+
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
/**
* ep_call_nested - Perform a bound (possibly) nested call, by checking
* that the recursion limit is not exceeded, and that
@@ -668,10 +676,11 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
{
__poll_t res;
int pwake = 0;
- unsigned long flags;
struct epitem *epi, *nepi;
LIST_HEAD(txlist);
+ lockdep_assert_irqs_enabled();
+
/*
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
@@ -688,17 +697,17 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* because we want the "sproc" callback to be able to do it
* in a lockless way.
*/
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
list_splice_init(&ep->rdllist, &txlist);
ep->ovflist = NULL;
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
/*
* Now call the callback function.
*/
res = (*sproc)(ep, &txlist, priv);
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
@@ -712,7 +721,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
* queued into ->ovflist but the "txlist" might already
* contain them, and the list_splice() below takes care of them.
*/
- if (!ep_is_linked(&epi->rdllink)) {
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
}
@@ -740,7 +749,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
if (!ep_locked)
mutex_unlock(&ep->mtx);
@@ -764,16 +773,12 @@ static void epi_rcu_free(struct rcu_head *head)
*/
static int ep_remove(struct eventpoll *ep, struct epitem *epi)
{
- unsigned long flags;
struct file *file = epi->ffd.file;
+ lockdep_assert_irqs_enabled();
+
/*
- * Removes poll wait queue hooks. We _have_ to do this without holding
- * the "ep->lock" otherwise a deadlock might occur. This because of the
- * sequence of the lock acquisition. Here we do "ep->lock" then the wait
- * queue head lock when unregistering the wait queue. The wakeup callback
- * will run by holding the wait queue head lock and will call our callback
- * that will try to get "ep->lock".
+ * Removes poll wait queue hooks.
*/
ep_unregister_pollwait(ep, epi);
@@ -784,10 +789,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
rb_erase_cached(&epi->rbn, &ep->rbr);
- spin_lock_irqsave(&ep->lock, flags);
- if (ep_is_linked(&epi->rdllink))
+ spin_lock_irq(&ep->wq.lock);
+ if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@@ -837,7 +842,7 @@ static void ep_free(struct eventpoll *ep)
* Walks through the whole tree by freeing each "struct epitem". At this
* point we are sure no poll callbacks will be lingering around, and also by
* holding "epmutex" we can be sure that no file cleanup code will hit
- * us during this operation. So we can avoid the lock on "ep->lock".
+ * us during this operation. So we can avoid the lock on "ep->wq.lock".
* We do not need to lock ep->mtx, either, we only do it to prevent
* a lockdep warning.
*/
@@ -1017,7 +1022,6 @@ static int ep_alloc(struct eventpoll **pep)
if (unlikely(!ep))
goto free_uid;
- spin_lock_init(&ep->lock);
mutex_init(&ep->mtx);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
@@ -1122,7 +1126,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
__poll_t pollflags = key_to_poll(key);
int ewake = 0;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irqsave(&ep->wq.lock, flags);
ep_set_busy_poll_napi_id(epi);
@@ -1167,7 +1171,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
}
/* If this file is already in the ready list we exit soon */
- if (!ep_is_linked(&epi->rdllink)) {
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake_rcu(epi);
}
@@ -1199,7 +1203,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
pwake++;
out_unlock:
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irqrestore(&ep->wq.lock, flags);
/* We have to call this outside the lock */
if (pwake)
@@ -1417,11 +1421,12 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
{
int error, pwake = 0;
__poll_t revents;
- unsigned long flags;
long user_watches;
struct epitem *epi;
struct ep_pqueue epq;
+ lockdep_assert_irqs_enabled();
+
user_watches = atomic_long_read(&ep->user->epoll_watches);
if (unlikely(user_watches >= max_user_watches))
return -ENOSPC;
@@ -1484,13 +1489,13 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
/* If the file is already "ready" we drop it inside the ready list */
- if (revents && !ep_is_linked(&epi->rdllink)) {
+ if (revents && !ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1501,7 +1506,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
atomic_long_inc(&ep->user->epoll_watches);
@@ -1527,10 +1532,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
* list, since that is used/cleaned only inside a section bound by "mtx".
* And ep_insert() is called with "mtx" held.
*/
- spin_lock_irqsave(&ep->lock, flags);
- if (ep_is_linked(&epi->rdllink))
+ spin_lock_irq(&ep->wq.lock);
+ if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
wakeup_source_unregister(ep_wakeup_source(epi));
@@ -1550,6 +1555,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
int pwake = 0;
poll_table pt;
+ lockdep_assert_irqs_enabled();
+
init_poll_funcptr(&pt, NULL);
/*
@@ -1572,9 +1579,9 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* 1) Flush epi changes above to other CPUs. This ensures
* we do not miss events from ep_poll_callback if an
* event occurs immediately after we call f_op->poll().
- * We need this because we did not take ep->lock while
+ * We need this because we did not take ep->wq.lock while
* changing epi above (but ep_poll_callback does take
- * ep->lock).
+ * ep->wq.lock).
*
* 2) We also need to ensure we do not miss _past_ events
* when calling f_op->poll(). This barrier also
@@ -1593,8 +1600,8 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
- spin_lock_irq(&ep->lock);
- if (!ep_is_linked(&epi->rdllink)) {
+ spin_lock_irq(&ep->wq.lock);
+ if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
@@ -1604,7 +1611,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
- spin_unlock_irq(&ep->lock);
+ spin_unlock_irq(&ep->wq.lock);
}
/* We have to call this outside the lock */
@@ -1739,11 +1746,12 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
int maxevents, long timeout)
{
int res = 0, eavail, timed_out = 0;
- unsigned long flags;
u64 slack = 0;
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
+ lockdep_assert_irqs_enabled();
+
if (timeout > 0) {
struct timespec64 end_time = ep_set_mstimeout(timeout);
@@ -1756,7 +1764,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
* caller specified a non blocking operation.
*/
timed_out = 1;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
goto check_events;
}
@@ -1765,7 +1773,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
if (!ep_events_available(ep))
ep_busy_loop(ep, timed_out);
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
if (!ep_events_available(ep)) {
/*
@@ -1807,11 +1815,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
break;
}
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
timed_out = 1;
- spin_lock_irqsave(&ep->lock, flags);
+ spin_lock_irq(&ep->wq.lock);
}
__remove_wait_queue(&ep->wq, &wait);
@@ -1821,7 +1829,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
/* Is it worth to try to dig for events ? */
eavail = ep_events_available(ep);
- spin_unlock_irqrestore(&ep->lock, flags);
+ spin_unlock_irq(&ep->wq.lock);
/*
* Try to transfer events to user space. In case we get 0 events and
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 9f1c96c..e8b6b89 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -28,6 +28,7 @@ struct kmem_cache *f2fs_inode_entry_slab;
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
+ f2fs_build_fault_attr(sbi, 0, 0);
set_ckpt_flags(sbi, CP_ERROR_FLAG);
if (!end_io)
f2fs_flush_merged_writes(sbi);
@@ -70,6 +71,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.encrypted_page = NULL,
.is_meta = is_meta,
};
+ int err;
if (unlikely(!is_meta))
fio.op_flags &= ~REQ_META;
@@ -84,9 +86,10 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
fio.page = page;
- if (f2fs_submit_page_bio(&fio)) {
+ err = f2fs_submit_page_bio(&fio);
+ if (err) {
f2fs_put_page(page, 1);
- goto repeat;
+ return ERR_PTR(err);
}
lock_page(page);
@@ -95,14 +98,9 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
goto repeat;
}
- /*
- * if there is any IO error when accessing device, make our filesystem
- * readonly and make sure do not write checkpoint with non-uptodate
- * meta page.
- */
if (unlikely(!PageUptodate(page))) {
- memset(page_address(page), 0, PAGE_SIZE);
- f2fs_stop_checkpoint(sbi, false);
+ f2fs_put_page(page, 1);
+ return ERR_PTR(-EIO);
}
out:
return page;
@@ -113,13 +111,32 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, true);
}
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+{
+ struct page *page;
+ int count = 0;
+
+retry:
+ page = __get_meta_page(sbi, index, true);
+ if (IS_ERR(page)) {
+ if (PTR_ERR(page) == -EIO &&
+ ++count <= DEFAULT_RETRY_IO_COUNT)
+ goto retry;
+
+ f2fs_stop_checkpoint(sbi, false);
+ f2fs_bug_on(sbi, 1);
+ }
+
+ return page;
+}
+
/* for POR only */
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
return __get_meta_page(sbi, index, false);
}
-bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
switch (type) {
@@ -140,8 +157,20 @@ bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
return false;
break;
case META_POR:
+ case DATA_GENERIC:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
- blkaddr < MAIN_BLKADDR(sbi)))
+ blkaddr < MAIN_BLKADDR(sbi))) {
+ if (type == DATA_GENERIC) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "access invalid blkaddr:%u", blkaddr);
+ WARN_ON(1);
+ }
+ return false;
+ }
+ break;
+ case META_GENERIC:
+ if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
+ blkaddr >= MAIN_BLKADDR(sbi)))
return false;
break;
default:
@@ -176,7 +205,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkno, type))
+ if (!f2fs_is_valid_blkaddr(sbi, blkno, type))
goto out;
switch (type) {
@@ -242,11 +271,8 @@ static int __f2fs_write_meta_page(struct page *page,
trace_f2fs_writepage(page, META);
- if (unlikely(f2fs_cp_error(sbi))) {
- dec_page_count(sbi, F2FS_DIRTY_META);
- unlock_page(page);
- return 0;
- }
+ if (unlikely(f2fs_cp_error(sbi)))
+ goto redirty_out;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
@@ -529,13 +555,12 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
spin_lock(&im->ino_lock);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
-#endif
+
if (unlikely(im->ino_num >= sbi->max_orphans))
err = -ENOSPC;
else
@@ -572,12 +597,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
struct node_info ni;
- int err = f2fs_acquire_orphan_inode(sbi);
-
- if (err)
- goto err_out;
-
- __add_ino_entry(sbi, ino, 0, ORPHAN_INO);
+ int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode)) {
@@ -600,14 +620,15 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
/* truncate all the data during iput */
iput(inode);
- f2fs_get_node_info(sbi, ino, &ni);
+ err = f2fs_get_node_info(sbi, ino, &ni);
+ if (err)
+ goto err_out;
/* ENOMEM was fully retried in f2fs_evict_inode. */
if (ni.blk_addr != NULL_ADDR) {
err = -EIO;
goto err_out;
}
- __remove_ino_entry(sbi, ino, ORPHAN_INO);
return 0;
err_out:
@@ -639,7 +660,10 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= SB_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
#endif
@@ -649,9 +673,15 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
for (i = 0; i < orphan_blocks; i++) {
- struct page *page = f2fs_get_meta_page(sbi, start_blk + i);
+ struct page *page;
struct f2fs_orphan_block *orphan_blk;
+ page = f2fs_get_meta_page(sbi, start_blk + i);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ goto out;
+ }
+
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
@@ -742,10 +772,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
__u32 crc = 0;
*cp_page = f2fs_get_meta_page(sbi, cp_addr);
+ if (IS_ERR(*cp_page))
+ return PTR_ERR(*cp_page);
+
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset > (blk_size - sizeof(__le32))) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
@@ -753,6 +787,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
+ f2fs_put_page(*cp_page, 1);
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
}
@@ -772,14 +807,22 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_1, version);
if (err)
- goto invalid_cp1;
+ return NULL;
+
+ if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
+ sbi->blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
+ goto invalid_cp;
+ }
pre_version = *version;
cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
err = get_checkpoint_version(sbi, cp_addr, &cp_block,
&cp_page_2, version);
if (err)
- goto invalid_cp2;
+ goto invalid_cp;
cur_version = *version;
if (cur_version == pre_version) {
@@ -787,9 +830,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
f2fs_put_page(cp_page_2, 1);
return cp_page_1;
}
-invalid_cp2:
f2fs_put_page(cp_page_2, 1);
-invalid_cp1:
+invalid_cp:
f2fs_put_page(cp_page_1, 1);
return NULL;
}
@@ -838,15 +880,15 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
- /* Sanity checking of checkpoint */
- if (f2fs_sanity_check_ckpt(sbi))
- goto free_fail_no_cp;
-
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
else
sbi->cur_cp_pack = 2;
+ /* Sanity checking of checkpoint */
+ if (f2fs_sanity_check_ckpt(sbi))
+ goto free_fail_no_cp;
+
if (cp_blks <= 1)
goto done;
@@ -859,6 +901,8 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
+ if (IS_ERR(cur_page))
+ goto free_fail_no_cp;
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -980,12 +1024,10 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
iput(inode);
/* We need to give cpu to another writers. */
- if (ino == cur_ino) {
- congestion_wait(BLK_RW_ASYNC, HZ/50);
+ if (ino == cur_ino)
cond_resched();
- } else {
+ else
ino = cur_ino;
- }
} else {
/*
* We should submit bio, since it exists several
@@ -1119,7 +1161,7 @@ static void unblock_operations(struct f2fs_sb_info *sbi)
f2fs_unlock_all(sbi);
}
-static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
DEFINE_WAIT(wait);
@@ -1129,6 +1171,9 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
+ if (unlikely(f2fs_cp_error(sbi)))
+ break;
+
io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
@@ -1202,8 +1247,12 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
/* writeout cp pack 2 page */
err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
- f2fs_bug_on(sbi, err);
+ if (unlikely(err && f2fs_cp_error(sbi))) {
+ f2fs_put_page(page, 1);
+ return;
+ }
+ f2fs_bug_on(sbi, err);
f2fs_put_page(page, 0);
/* submit checkpoint (with barrier if NOBARRIER is not set) */
@@ -1229,7 +1278,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
while (get_pages(sbi, F2FS_DIRTY_META)) {
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
/*
@@ -1309,7 +1358,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_sync_meta_pages(sbi, META, LONG_MAX,
FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ break;
}
}
@@ -1348,10 +1397,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
- wait_on_all_pages_writeback(sbi);
-
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_wait_on_all_pages_writeback(sbi);
/* flush all device cache */
err = f2fs_flush_device_cache(sbi);
@@ -1360,12 +1406,19 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* barrier and flush checkpoint cp pack 2 page if it can */
commit_checkpoint(sbi, ckpt, start_blk);
- wait_on_all_pages_writeback(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ /*
+ * invalidate intermediate page cache borrowed from meta inode
+ * which are used for migration of encrypted inode's blocks.
+ */
+ if (f2fs_sb_has_encrypt(sbi->sb))
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1);
f2fs_release_ino_entry(sbi, false);
- if (unlikely(f2fs_cp_error(sbi)))
- return -EIO;
+ f2fs_reset_fsync_node_info(sbi);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
@@ -1381,7 +1434,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));
- return 0;
+ return unlikely(f2fs_cp_error(sbi)) ? -EIO : 0;
}
/*
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b7c9b58..382c1ef 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -126,12 +126,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
static void f2fs_read_end_io(struct bio *bio)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
f2fs_show_injection_info(FAULT_IO);
bio->bi_status = BLK_STS_IOERR;
}
-#endif
if (f2fs_bio_post_read_required(bio)) {
struct bio_post_read_ctx *ctx = bio->bi_private;
@@ -177,6 +175,8 @@ static void f2fs_write_end_io(struct bio *bio)
page->index != nid_of_node(page));
dec_page_count(sbi, type);
+ if (f2fs_in_warm_node_list(sbi, page))
+ f2fs_del_fsync_node_entry(sbi, page);
clear_cold_data(page);
end_page_writeback(page);
}
@@ -264,7 +264,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
if (type != DATA && type != NODE)
goto submit_io;
- if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
+ if (test_opt(sbi, LFS) && current->plug)
blk_finish_plug(current->plug);
start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
@@ -441,7 +441,10 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
struct page *page = fio->encrypted_page ?
fio->encrypted_page : fio->page;
- verify_block_addr(fio, fio->new_blkaddr);
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFAULT;
+
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -485,7 +488,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
spin_unlock(&io->io_lock);
}
- if (is_valid_blkaddr(fio->old_blkaddr))
+ if (__is_valid_data_blkaddr(fio->old_blkaddr))
verify_block_addr(fio, fio->old_blkaddr);
verify_block_addr(fio, fio->new_blkaddr);
@@ -534,19 +537,22 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
}
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
- unsigned nr_pages)
+ unsigned nr_pages, unsigned op_flag)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct bio *bio;
struct bio_post_read_ctx *ctx;
unsigned int post_read_steps = 0;
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ return ERR_PTR(-EFAULT);
+
bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
if (!bio)
return ERR_PTR(-ENOMEM);
f2fs_target_device(sbi, blkaddr, bio);
bio->bi_end_io = f2fs_read_end_io;
- bio_set_op_attrs(bio, REQ_OP_READ, 0);
+ bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
if (f2fs_encrypted_file(inode))
post_read_steps |= 1 << STEP_DECRYPT;
@@ -571,7 +577,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
block_t blkaddr)
{
- struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
+ struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
if (IS_ERR(bio))
return PTR_ERR(bio);
@@ -869,6 +875,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
+ block_t old_blkaddr;
pgoff_t fofs;
blkcnt_t count = 1;
int err;
@@ -876,6 +883,10 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return -EPERM;
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
+
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
if (dn->data_blkaddr == NEW_ADDR)
@@ -885,11 +896,13 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
return err;
alloc:
- f2fs_get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
-
- f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
+ old_blkaddr = dn->data_blkaddr;
+ f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
&sum, seg_type, NULL, false);
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
f2fs_set_data_blkaddr(dn);
/* update i_size */
@@ -1045,7 +1058,13 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
next_block:
blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
- if (!is_valid_blkaddr(blkaddr)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
+ err = -EFAULT;
+ goto sync_out;
+ }
+
+ if (!is_valid_data_blkaddr(sbi, blkaddr)) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1282,7 +1301,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
offset = offsetof(struct f2fs_inode, i_addr) +
@@ -1309,7 +1332,11 @@ static int f2fs_xattr_fiemap(struct inode *inode,
if (!page)
return -ENOMEM;
- f2fs_get_node_info(sbi, xnid, &ni);
+ err = f2fs_get_node_info(sbi, xnid, &ni);
+ if (err) {
+ f2fs_put_page(page, 1);
+ return err;
+ }
phys = (__u64)blk_to_logical(inode, ni.blk_addr);
len = inode->i_sb->s_blocksize;
@@ -1425,11 +1452,11 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* Note that the aops->readpages() function is ONLY used for read-ahead. If
* this function ever deviates from doing just read-ahead, it should either
* use ->readpage() or do the necessary surgery to decouple ->readpages()
- * readom read-ahead.
+ * from read-ahead.
*/
static int f2fs_mpage_readpages(struct address_space *mapping,
struct list_head *pages, struct page *page,
- unsigned nr_pages)
+ unsigned nr_pages, bool is_readahead)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
@@ -1500,6 +1527,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
SetPageUptodate(page);
goto confused;
}
+
+ if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
+ DATA_GENERIC))
+ goto set_error_page;
} else {
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
@@ -1519,7 +1550,8 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
bio = NULL;
}
if (bio == NULL) {
- bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
+ bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
+ is_readahead ? REQ_RAHEAD : 0);
if (IS_ERR(bio)) {
bio = NULL;
goto set_error_page;
@@ -1563,7 +1595,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
+ ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
return ret;
}
@@ -1580,12 +1612,13 @@ static int f2fs_read_data_pages(struct file *file,
if (f2fs_has_inline_data(inode))
return 0;
- return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
+ return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
}
static int encrypt_one_page(struct f2fs_io_info *fio)
{
struct inode *inode = fio->page->mapping->host;
+ struct page *mpage;
gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
@@ -1597,17 +1630,25 @@ static int encrypt_one_page(struct f2fs_io_info *fio)
retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
PAGE_SIZE, 0, fio->page->index, gfp_flags);
- if (!IS_ERR(fio->encrypted_page))
- return 0;
-
- /* flush pending IOs and wait for a while in the ENOMEM case */
- if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_writes(fio->sbi);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
+ if (IS_ERR(fio->encrypted_page)) {
+ /* flush pending IOs and wait for a while in the ENOMEM case */
+ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
+ f2fs_flush_merged_writes(fio->sbi);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
+ return PTR_ERR(fio->encrypted_page);
}
- return PTR_ERR(fio->encrypted_page);
+
+ mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
+ if (mpage) {
+ if (PageUptodate(mpage))
+ memcpy(page_address(mpage),
+ page_address(fio->encrypted_page), PAGE_SIZE);
+ f2fs_put_page(mpage, 1);
+ }
+ return 0;
}
static inline bool check_inplace_update_policy(struct inode *inode,
@@ -1691,6 +1732,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
struct extent_info ei = {0,0,0};
+ struct node_info ni;
bool ipu_force = false;
int err = 0;
@@ -1699,11 +1741,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
- if (is_valid_blkaddr(fio->old_blkaddr)) {
- ipu_force = true;
- fio->need_lock = LOCK_DONE;
- goto got_it;
- }
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC))
+ return -EFAULT;
+
+ ipu_force = true;
+ fio->need_lock = LOCK_DONE;
+ goto got_it;
}
/* Deadlock due to between page->lock and f2fs_lock_op */
@@ -1722,11 +1766,17 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
goto out_writepage;
}
got_it:
+ if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
+ DATA_GENERIC)) {
+ err = -EFAULT;
+ goto out_writepage;
+ }
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (ipu_force || (is_valid_blkaddr(fio->old_blkaddr) &&
+ if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
need_inplace_update(fio))) {
err = encrypt_one_page(fio);
if (err)
@@ -1751,6 +1801,12 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
fio->need_lock = LOCK_REQ;
}
+ err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
+ if (err)
+ goto out_writepage;
+
+ fio->version = ni.version;
+
err = encrypt_one_page(fio);
if (err)
goto out_writepage;
@@ -2079,6 +2135,18 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
return ret;
}
+static inline bool __should_serialize_io(struct inode *inode,
+ struct writeback_control *wbc)
+{
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ return true;
+ if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
+ return true;
+ return false;
+}
+
static int __f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc,
enum iostat_type io_type)
@@ -2087,6 +2155,7 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct blk_plug plug;
int ret;
+ bool locked = false;
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
@@ -2117,10 +2186,18 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
else if (atomic_read(&sbi->wb_sync_req[DATA]))
goto skip_write;
+ if (__should_serialize_io(inode, wbc)) {
+ mutex_lock(&sbi->writepages);
+ locked = true;
+ }
+
blk_start_plug(&plug);
ret = f2fs_write_cache_pages(mapping, wbc, io_type);
blk_finish_plug(&plug);
+ if (locked)
+ mutex_unlock(&sbi->writepages);
+
if (wbc->sync_mode == WB_SYNC_ALL)
atomic_dec(&sbi->wb_sync_req[DATA]);
/*
@@ -2153,10 +2230,14 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode);
if (to > i_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -2251,8 +2332,9 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
trace_f2fs_write_begin(inode, pos, len, flags);
- if (f2fs_is_atomic_file(inode) &&
- !f2fs_available_free_memory(sbi, INMEM_PAGES)) {
+ if ((f2fs_is_atomic_file(inode) &&
+ !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
+ is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
err = -ENOMEM;
drop_atomic = true;
goto fail;
@@ -2376,14 +2458,20 @@ static int f2fs_write_end(struct file *file,
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
loff_t offset)
{
- unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
+ unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
+ unsigned blkbits = i_blkbits;
+ unsigned blocksize_mask = (1 << blkbits) - 1;
+ unsigned long align = offset | iov_iter_alignment(iter);
+ struct block_device *bdev = inode->i_sb->s_bdev;
- if (offset & blocksize_mask)
- return -EINVAL;
-
- if (iov_iter_alignment(iter) & blocksize_mask)
- return -EINVAL;
-
+ if (align & blocksize_mask) {
+ if (bdev)
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ blocksize_mask = (1 << blkbits) - 1;
+ if (align & blocksize_mask)
+ return -EINVAL;
+ return 1;
+ }
return 0;
}
@@ -2401,7 +2489,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
err = check_direct_IO(inode, iter, offset);
if (err)
- return err;
+ return err < 0 ? err : 0;
if (f2fs_force_buffered_io(inode, rw))
return 0;
@@ -2495,6 +2583,10 @@ static int f2fs_set_data_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
+ /* don't remain PG_checked flag which was set during GC */
+ if (is_cold_data(page))
+ clear_cold_data(page);
+
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
f2fs_register_inmem_page(inode, page);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 2d65e77..214a968 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
- si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+ si->base_mem += NM_I(sbi)->nat_blocks *
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 7f955c4..ecc3a4e2 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -517,12 +517,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
}
start:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
-#endif
+
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6799c3f..abf9256 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -41,7 +41,6 @@
} while (0)
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
enum {
FAULT_KMALLOC,
FAULT_KVMALLOC,
@@ -56,16 +55,20 @@ enum {
FAULT_TRUNCATE,
FAULT_IO,
FAULT_CHECKPOINT,
+ FAULT_DISCARD,
FAULT_MAX,
};
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+#define F2FS_ALL_FAULT_TYPE ((1 << FAULT_MAX) - 1)
+
struct f2fs_fault_info {
atomic_t inject_ops;
unsigned int inject_rate;
unsigned int inject_type;
};
-extern char *fault_name[FAULT_MAX];
+extern char *f2fs_fault_name[FAULT_MAX];
#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
#endif
@@ -178,7 +181,6 @@ enum {
#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi)
#define DEF_MAX_DISCARD_REQUEST 8 /* issue 8 discards per round */
-#define DEF_MAX_DISCARD_LEN 512 /* Max. 2MB per discard */
#define DEF_MIN_DISCARD_ISSUE_TIME 50 /* 50 ms, if exists */
#define DEF_MID_DISCARD_ISSUE_TIME 500 /* 500 ms, if device busy */
#define DEF_MAX_DISCARD_ISSUE_TIME 60000 /* 60 s, if no candidates */
@@ -194,7 +196,7 @@ struct cp_control {
};
/*
- * For CP/NAT/SIT/SSA readahead
+ * indicate meta/data type
*/
enum {
META_CP,
@@ -202,6 +204,8 @@ enum {
META_SIT,
META_SSA,
META_POR,
+ DATA_GENERIC,
+ META_GENERIC,
};
/* for the list of ino */
@@ -226,6 +230,12 @@ struct inode_entry {
struct inode *inode; /* vfs inode pointer */
};
+struct fsync_node_entry {
+ struct list_head list; /* list head */
+ struct page *page; /* warm node page pointer */
+ unsigned int seq_id; /* sequence id */
+};
+
/* for the bitmap indicate blocks to be discarded */
struct discard_entry {
struct list_head list; /* list head */
@@ -242,9 +252,10 @@ struct discard_entry {
(MAX_PLIST_NUM - 1) : (blk_num - 1))
enum {
- D_PREP,
- D_SUBMIT,
- D_DONE,
+ D_PREP, /* initial */
+ D_PARTIAL, /* partially submitted */
+ D_SUBMIT, /* all submitted */
+ D_DONE, /* finished */
};
struct discard_info {
@@ -269,7 +280,10 @@ struct discard_cmd {
struct block_device *bdev; /* bdev */
unsigned short ref; /* reference count */
unsigned char state; /* state */
+ unsigned char issuing; /* issuing discard */
int error; /* bio error */
+ spinlock_t lock; /* for state/bio_ref updating */
+ unsigned short bio_ref; /* bio reference count */
};
enum {
@@ -289,6 +303,7 @@ struct discard_policy {
unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */
bool io_aware; /* issue discard in idle time */
bool sync; /* submit discard with REQ_SYNC flag */
+ bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
};
@@ -305,10 +320,12 @@ struct discard_cmd_control {
unsigned int max_discards; /* max. discards to be issued */
unsigned int discard_granularity; /* discard granularity */
unsigned int undiscard_blks; /* # of undiscard blocks */
+ unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
atomic_t issing_discard; /* # of issing discard */
atomic_t discard_cmd_cnt; /* # of cached cmd count */
struct rb_root root; /* root of discard rb-tree */
+ bool rbtree_check; /* config for consistence check */
};
/* for the list of fsync inodes, used only during recovery */
@@ -508,13 +525,12 @@ enum {
*/
};
+#define DEFAULT_RETRY_IO_COUNT 8 /* maximum retry read IO count */
+
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
-/* vector size for gang look-up from extent cache that consists of radix tree */
-#define EXT_TREE_VEC_SIZE 64
-
/* for in-memory extent cache entry */
#define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */
@@ -600,6 +616,8 @@ enum {
#define FADVISE_HOT_BIT 0x20
#define FADVISE_VERITY_BIT 0x40 /* reserved */
+#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
+
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
@@ -669,8 +687,8 @@ struct f2fs_inode_info {
int i_extra_isize; /* size of extra space located in i_addr */
kprojid_t i_projid; /* id for project quota */
int i_inline_xattr_size; /* inline xattr size */
- struct timespec i_crtime; /* inode creation time */
- struct timespec i_disk_time[4]; /* inode disk times */
+ struct timespec64 i_crtime; /* inode creation time */
+ struct timespec64 i_disk_time[4];/* inode disk times */
};
static inline void get_extent_info(struct extent_info *ext,
@@ -698,22 +716,22 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
}
static inline bool __is_discard_mergeable(struct discard_info *back,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
return (back->lstart + back->len == front->lstart) &&
- (back->len + front->len < DEF_MAX_DISCARD_LEN);
+ (back->len + front->len <= max_len);
}
static inline bool __is_discard_back_mergeable(struct discard_info *cur,
- struct discard_info *back)
+ struct discard_info *back, unsigned int max_len)
{
- return __is_discard_mergeable(back, cur);
+ return __is_discard_mergeable(back, cur, max_len);
}
static inline bool __is_discard_front_mergeable(struct discard_info *cur,
- struct discard_info *front)
+ struct discard_info *front, unsigned int max_len)
{
- return __is_discard_mergeable(cur, front);
+ return __is_discard_mergeable(cur, front, max_len);
}
static inline bool __is_extent_mergeable(struct extent_info *back,
@@ -768,6 +786,7 @@ struct f2fs_nm_info {
struct radix_tree_root nat_set_root;/* root of the nat set cache */
struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
+ spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
unsigned int nat_blocks; /* # of nat blocks */
@@ -894,6 +913,7 @@ struct f2fs_sm_info {
unsigned int ipu_policy; /* in-place-update policy */
unsigned int min_ipu_util; /* in-place-update threshold */
unsigned int min_fsync_blocks; /* threshold for fsync */
+ unsigned int min_seq_blocks; /* threshold for sequential blocks */
unsigned int min_hot_blocks; /* threshold for hot block allocation */
unsigned int min_ssr_sections; /* threshold to trigger SSR allocation */
@@ -1015,6 +1035,7 @@ struct f2fs_io_info {
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ unsigned char version; /* version of the node */
};
#define is_read_io(rw) ((rw) == READ)
@@ -1066,6 +1087,7 @@ enum {
SBI_POR_DOING, /* recovery is doing or not */
SBI_NEED_SB_WRITE, /* need to recover superblock */
SBI_NEED_CP, /* need to checkpoint */
+ SBI_IS_SHUTDOWN, /* shutdown by ioctl */
};
enum {
@@ -1112,6 +1134,7 @@ struct f2fs_sb_info {
struct rw_semaphore sb_lock; /* lock for raw super block */
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
+ struct mutex writepages; /* mutex for writepages() */
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1148,6 +1171,11 @@ struct f2fs_sb_info {
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
+ spinlock_t fsync_node_lock; /* for node entry lock */
+ struct list_head fsync_node_list; /* node list head */
+ unsigned int fsync_seg_id; /* sequence id */
+ unsigned int fsync_node_num; /* number of node entries */
+
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
@@ -1215,6 +1243,7 @@ struct f2fs_sb_info {
unsigned int gc_mode; /* current GC state */
/* for skip statistic */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
+ unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
@@ -1279,7 +1308,7 @@ struct f2fs_sb_info {
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk("%sF2FS-fs : inject %s in %s of %pF\n", \
- KERN_INFO, fault_name[type], \
+ KERN_INFO, f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
@@ -1298,6 +1327,12 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
}
return false;
}
+#else
+#define f2fs_show_injection_info(type) do { } while (0)
+static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
+{
+ return false;
+}
#endif
/* For write statistics. Suppose sector size is 512 bytes,
@@ -1326,7 +1361,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
struct request_list *rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
- return 0;
+ return false;
return f2fs_time_over(sbi, REQ_TIME);
}
@@ -1650,13 +1685,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (ret)
return ret;
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
release = *count;
goto enospc;
}
-#endif
+
/*
* let's increase this in prior to actual block count change in order
* for f2fs_sync_file to avoid data races when deciding checkpoint.
@@ -1680,18 +1714,20 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(&sbi->stat_lock);
- percpu_counter_sub(&sbi->alloc_valid_block_count, diff);
goto enospc;
}
}
spin_unlock(&sbi->stat_lock);
- if (unlikely(release))
+ if (unlikely(release)) {
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
+ }
f2fs_i_blocks_write(inode, *count, true, true);
return 0;
enospc:
+ percpu_counter_sub(&sbi->alloc_valid_block_count, release);
dquot_release_reservation_block(inode, release);
return -ENOSPC;
}
@@ -1863,12 +1899,10 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
return ret;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
goto enospc;
}
-#endif
spin_lock(&sbi->stat_lock);
@@ -1953,17 +1987,23 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi)
static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
pgoff_t index, bool for_write)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- struct page *page = find_lock_page(mapping, index);
+ struct page *page;
- if (page)
- return page;
+ if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) {
+ if (!for_write)
+ page = find_get_page_flags(mapping, index,
+ FGP_LOCK | FGP_ACCESSED);
+ else
+ page = find_lock_page(mapping, index);
+ if (page)
+ return page;
- if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
- f2fs_show_injection_info(FAULT_PAGE_ALLOC);
- return NULL;
+ if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
+ f2fs_show_injection_info(FAULT_PAGE_ALLOC);
+ return NULL;
+ }
}
-#endif
+
if (!for_write)
return grab_cache_page(mapping, index);
return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS);
@@ -1973,12 +2013,11 @@ static inline struct page *f2fs_pagecache_get_page(
struct address_space *mapping, pgoff_t index,
int fgp_flags, gfp_t gfp_mask)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
f2fs_show_injection_info(FAULT_PAGE_GET);
return NULL;
}
-#endif
+
return pagecache_get_page(mapping, index, fgp_flags, gfp_mask);
}
@@ -2043,12 +2082,11 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
f2fs_show_injection_info(FAULT_ALLOC_BIO);
return NULL;
}
-#endif
+
return bio_alloc(GFP_KERNEL, npages);
}
@@ -2518,7 +2556,6 @@ static inline void clear_file(struct inode *inode, int type)
static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
{
- struct timespec ts;
bool ret;
if (dsync) {
@@ -2534,16 +2571,13 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
i_size_read(inode) & ~PAGE_MASK)
return false;
- ts = timespec64_to_timespec(inode->i_atime);
- if (!timespec_equal(F2FS_I(inode)->i_disk_time, &ts))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
return false;
- ts = timespec64_to_timespec(inode->i_ctime);
- if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &ts))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
return false;
- ts = timespec64_to_timespec(inode->i_mtime);
- if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &ts))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
return false;
- if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3,
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3,
&F2FS_I(inode)->i_crtime))
return false;
@@ -2587,12 +2621,11 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
return NULL;
}
-#endif
+
return kmalloc(size, flags);
}
@@ -2605,12 +2638,11 @@ static inline void *f2fs_kzalloc(struct f2fs_sb_info *sbi,
static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_KVMALLOC)) {
f2fs_show_injection_info(FAULT_KVMALLOC);
return NULL;
}
-#endif
+
return kvmalloc(size, flags);
}
@@ -2669,13 +2701,39 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
spin_unlock(&sbi->iostat_lock);
}
-static inline bool is_valid_blkaddr(block_t blkaddr)
+#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
+ (!is_read_io(fio->op) || fio->is_meta))
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
+void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
+ f2fs_bug_on(sbi, 1);
+ }
+}
+
+static inline bool __is_valid_data_blkaddr(block_t blkaddr)
{
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR)
return false;
return true;
}
+static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ if (!__is_valid_data_blkaddr(blkaddr))
+ return false;
+ verify_blkaddr(sbi, blkaddr, DATA_GENERIC);
+ return true;
+}
+
/*
* file.c
*/
@@ -2790,16 +2848,21 @@ struct node_info;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page);
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
-void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni);
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs);
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode);
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
int f2fs_truncate_xattr_node(struct inode *inode);
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino);
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id);
int f2fs_remove_inode_page(struct inode *inode);
struct page *f2fs_new_inode_page(struct inode *inode);
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
@@ -2808,11 +2871,12 @@ struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid);
struct page *f2fs_get_node_page_ra(struct page *parent, int start);
void f2fs_move_node_page(struct page *node_page, int gc_type);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic);
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id);
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type);
-void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
@@ -2820,7 +2884,7 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
void f2fs_recover_inline_xattr(struct inode *inode, struct page *page);
int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
-void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
@@ -2898,9 +2962,10 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
-bool f2fs_is_valid_meta_blkaddr(struct f2fs_sb_info *sbi,
- block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index);
@@ -2924,6 +2989,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi);
void f2fs_update_dirty_page(struct inode *inode, struct page *page);
void f2fs_remove_dirty_inode(struct inode *inode);
int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
+void f2fs_wait_on_all_pages_writeback(struct f2fs_sb_info *sbi);
int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
int __init f2fs_create_checkpoint_caches(void);
@@ -3362,7 +3428,7 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
#else
- return 0;
+ return false;
#endif
}
@@ -3373,4 +3439,11 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, int rw)
F2FS_I_SB(inode)->s_ndevs);
}
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type);
+#else
+#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
+#endif
+
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 6880c6f..5474aaa 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -213,6 +213,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
.nr_to_write = LONG_MAX,
.for_reclaim = 0,
};
+ unsigned int seq_id = 0;
if (unlikely(f2fs_readonly(inode->i_sb)))
return 0;
@@ -275,7 +276,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
}
sync_nodes:
atomic_inc(&sbi->wb_sync_req[NODE]);
- ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic);
+ ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
atomic_dec(&sbi->wb_sync_req[NODE]);
if (ret)
goto out;
@@ -301,7 +302,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
* given fsync mark.
*/
if (!atomic) {
- ret = f2fs_wait_on_node_pages_writeback(sbi, ino);
+ ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
if (ret)
goto out;
}
@@ -350,13 +351,13 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
return pgofs;
}
-static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs,
- int whence)
+static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
+ pgoff_t dirty, pgoff_t pgofs, int whence)
{
switch (whence) {
case SEEK_DATA:
if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- is_valid_blkaddr(blkaddr))
+ is_valid_data_blkaddr(sbi, blkaddr))
return true;
break;
case SEEK_HOLE:
@@ -420,7 +421,15 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
blkaddr = datablock_addr(dn.inode,
dn.node_page, dn.ofs_in_node);
- if (__found_offset(blkaddr, dirty, pgofs, whence)) {
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
+ blkaddr, DATA_GENERIC)) {
+ f2fs_put_dnode(&dn);
+ goto fail;
+ }
+
+ if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
}
@@ -513,6 +522,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR;
f2fs_set_data_blkaddr(dn);
+
+ if (__is_valid_data_blkaddr(blkaddr) &&
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
+ continue;
+
f2fs_invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
@@ -654,12 +668,11 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
-#endif
+
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -782,22 +795,26 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size <= i_size_read(inode)) {
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
- err = f2fs_truncate(inode);
- up_write(&F2FS_I(inode)->i_mmap_sem);
- if (err)
- return err;
- } else {
- /*
- * do not trim all blocks after i_size if target size is
- * larger than i_size.
- */
- down_write(&F2FS_I(inode)->i_mmap_sem);
- truncate_setsize(inode, attr->ia_size);
- up_write(&F2FS_I(inode)->i_mmap_sem);
+ bool to_smaller = (attr->ia_size <= i_size_read(inode));
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_setsize(inode, attr->ia_size);
+
+ if (to_smaller)
+ err = f2fs_truncate(inode);
+ /*
+ * do not trim all blocks after i_size if target size is
+ * larger than i_size.
+ */
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
+ if (err)
+ return err;
+
+ if (!to_smaller) {
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -944,14 +961,19 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT;
+
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
down_write(&F2FS_I(inode)->i_mmap_sem);
+
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
f2fs_lock_op(sbi);
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi);
+
up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
}
}
@@ -1054,7 +1076,12 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
if (ret)
return ret;
- f2fs_get_node_info(sbi, dn.nid, &ni);
+ ret = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (ret) {
+ f2fs_put_dnode(&dn);
+ return ret;
+ }
+
ilen = min((pgoff_t)
ADDRS_PER_PAGE(dn.node_page, dst_inode) -
dn.ofs_in_node, len - i);
@@ -1161,25 +1188,33 @@ static int __exchange_data_block(struct inode *src_inode,
return ret;
}
-static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
+static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t start = offset >> PAGE_SHIFT;
+ pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
f2fs_balance_fs(sbi, true);
+
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
f2fs_lock_op(sbi);
-
f2fs_drop_extent_tree(inode);
-
+ truncate_pagecache(inode, offset);
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
f2fs_unlock_op(sbi);
+
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
{
- pgoff_t pg_start, pg_end;
loff_t new_size;
int ret;
@@ -1194,25 +1229,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- pg_start = offset >> PAGE_SHIFT;
- pg_end = (offset + len) >> PAGE_SHIFT;
-
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
- down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out_unlock;
+ return ret;
- truncate_pagecache(inode, offset);
-
- ret = f2fs_do_collapse(inode, pg_start, pg_end);
+ ret = f2fs_do_collapse(inode, offset, len);
if (ret)
- goto out_unlock;
+ return ret;
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
@@ -1220,11 +1247,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
truncate_pagecache(inode, new_size);
ret = f2fs_truncate_blocks(inode, new_size, true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out_unlock:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1290,12 +1315,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
- down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
- goto out_sem;
-
- truncate_pagecache_range(inode, offset, offset + len - 1);
+ return ret;
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1307,7 +1329,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size, offset + len);
} else {
@@ -1315,7 +1337,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start);
if (ret)
- goto out_sem;
+ return ret;
new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT);
@@ -1326,12 +1348,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
unsigned int end_offset;
pgoff_t end;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+
+ truncate_pagecache_range(inode,
+ (loff_t)index << PAGE_SHIFT,
+ ((loff_t)pg_end << PAGE_SHIFT) - 1);
+
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
if (ret) {
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
@@ -1340,7 +1371,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
+
f2fs_unlock_op(sbi);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
f2fs_balance_fs(sbi, dn.node_changed);
@@ -1368,9 +1402,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
else
f2fs_i_size_write(inode, new_size);
}
-out_sem:
- up_write(&F2FS_I(inode)->i_mmap_sem);
-
return ret;
}
@@ -1399,26 +1430,27 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
- /* avoid gc operation during block exchange */
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
- goto out;
+ return ret;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret)
- goto out;
-
- truncate_pagecache(inode, offset);
+ return ret;
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ /* avoid gc operation during block exchange */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
+ truncate_pagecache(inode, offset);
+
while (!ret && idx > pg_start) {
nr = idx - pg_start;
if (nr > delta)
@@ -1432,16 +1464,17 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
idx + delta, nr, false);
f2fs_unlock_op(sbi);
}
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
/* write out all moved pages, if possible */
+ down_write(&F2FS_I(inode)->i_mmap_sem);
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
-out:
- up_write(&F2FS_I(inode)->i_mmap_sem);
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
return ret;
}
@@ -1597,7 +1630,7 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned int flags = fi->i_flags;
- if (file_is_encrypt(inode))
+ if (f2fs_encrypted_inode(inode))
flags |= F2FS_ENCRYPT_FL;
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
flags |= F2FS_INLINE_DATA_FL;
@@ -1688,15 +1721,18 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
inode_lock(inode);
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
-
- if (f2fs_is_atomic_file(inode))
+ if (f2fs_is_atomic_file(inode)) {
+ if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST))
+ ret = -EINVAL;
goto out;
+ }
ret = f2fs_convert_inline_inode(inode);
if (ret)
goto out;
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+
if (!get_dirty_pages(inode))
goto skip_flush;
@@ -1704,18 +1740,20 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
- if (ret)
+ if (ret) {
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
+ }
skip_flush:
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
- f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
F2FS_I(inode)->inmem_task = current;
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1733,9 +1771,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (ret)
return ret;
- inode_lock(inode);
+ f2fs_balance_fs(F2FS_I_SB(inode), true);
- down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ inode_lock(inode);
if (f2fs_is_volatile_file(inode)) {
ret = -EINVAL;
@@ -1761,7 +1799,6 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
ret = -EINVAL;
}
- up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1853,6 +1890,8 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
+ clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
+
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1866,7 +1905,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct super_block *sb = sbi->sb;
__u32 in;
- int ret;
+ int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -1889,6 +1928,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
}
if (sb) {
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
thaw_bdev(sb->s_bdev, sb);
}
break;
@@ -1898,13 +1938,16 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
case F2FS_GOING_DOWN_METAFLUSH:
f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
f2fs_stop_checkpoint(sbi, false);
+ set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
break;
default:
ret = -EINVAL;
@@ -2107,7 +2150,7 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
return ret;
}
-static int f2fs_ioc_f2fs_write_checkpoint(struct file *filp, unsigned long arg)
+static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -2351,15 +2394,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
inode_lock(src);
- down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
if (src != dst) {
ret = -EBUSY;
if (!inode_trylock(dst))
goto out;
- if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) {
- inode_unlock(dst);
- goto out;
- }
}
ret = -EINVAL;
@@ -2404,6 +2442,14 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
goto out_unlock;
f2fs_balance_fs(sbi, true);
+
+ down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+ if (src != dst) {
+ ret = -EBUSY;
+ if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
+ goto out_src;
+ }
+
f2fs_lock_op(sbi);
ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
pos_out >> F2FS_BLKSIZE_BITS,
@@ -2416,13 +2462,15 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
f2fs_i_size_write(dst, dst_osize);
}
f2fs_unlock_op(sbi);
-out_unlock:
- if (src != dst) {
+
+ if (src != dst)
up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
- inode_unlock(dst);
- }
-out:
+out_src:
up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
+out_unlock:
+ if (src != dst)
+ inode_unlock(dst);
+out:
inode_unlock(src);
return ret;
}
@@ -2782,7 +2830,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (!pin) {
clear_inode_flag(inode, FI_PIN_FILE);
- F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = 1;
+ f2fs_i_gc_failures_write(inode, 0);
goto done;
}
@@ -2888,7 +2936,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GARBAGE_COLLECT_RANGE:
return f2fs_ioc_gc_range(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
- return f2fs_ioc_f2fs_write_checkpoint(filp, arg);
+ return f2fs_ioc_write_checkpoint(filp, arg);
case F2FS_IOC_DEFRAGMENT:
return f2fs_ioc_defragment(filp, arg);
case F2FS_IOC_MOVE_RANGE:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 9093be6..5c8d004 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -53,12 +53,10 @@ static int gc_thread_func(void *data)
continue;
}
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
@@ -517,7 +515,11 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
continue;
}
- f2fs_get_node_info(sbi, nid, &ni);
+ if (f2fs_get_node_info(sbi, nid, &ni)) {
+ f2fs_put_page(node_page, 1);
+ continue;
+ }
+
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
@@ -576,7 +578,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(node_page))
return false;
- f2fs_get_node_info(sbi, nid, dni);
+ if (f2fs_get_node_info(sbi, nid, dni)) {
+ f2fs_put_page(node_page, 1);
+ return false;
+ }
if (sum->version != dni->version) {
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -594,6 +599,72 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
+static int ra_data_block(struct inode *inode, pgoff_t index)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct address_space *mapping = inode->i_mapping;
+ struct dnode_of_data dn;
+ struct page *page;
+ struct extent_info ei = {0, 0, 0};
+ struct f2fs_io_info fio = {
+ .sbi = sbi,
+ .ino = inode->i_ino,
+ .type = DATA,
+ .temp = COLD,
+ .op = REQ_OP_READ,
+ .op_flags = 0,
+ .encrypted_page = NULL,
+ .in_list = false,
+ .retry = false,
+ };
+ int err;
+
+ page = f2fs_grab_cache_page(mapping, index, true);
+ if (!page)
+ return -ENOMEM;
+
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ goto got_it;
+ }
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
+ if (err)
+ goto put_page;
+ f2fs_put_dnode(&dn);
+
+ if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
+ DATA_GENERIC))) {
+ err = -EFAULT;
+ goto put_page;
+ }
+got_it:
+ /* read page */
+ fio.page = page;
+ fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
+
+ fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
+ dn.data_blkaddr,
+ FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ if (!fio.encrypted_page) {
+ err = -ENOMEM;
+ goto put_page;
+ }
+
+ err = f2fs_submit_page_bio(&fio);
+ if (err)
+ goto put_encrypted_page;
+ f2fs_put_page(fio.encrypted_page, 0);
+ f2fs_put_page(page, 1);
+ return 0;
+put_encrypted_page:
+ f2fs_put_page(fio.encrypted_page, 1);
+put_page:
+ f2fs_put_page(page, 1);
+ return err;
+}
+
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
@@ -615,7 +686,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
- struct page *page;
+ struct page *page, *mpage;
block_t newaddr;
int err;
bool lfs_mode = test_opt(fio.sbi, LFS);
@@ -655,7 +726,10 @@ static void move_data_block(struct inode *inode, block_t bidx,
*/
f2fs_wait_on_page_writeback(page, DATA, true);
- f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
+ if (err)
+ goto put_out;
+
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
@@ -675,6 +749,23 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto recover_block;
}
+ mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
+ if (mpage) {
+ bool updated = false;
+
+ if (PageUptodate(mpage)) {
+ memcpy(page_address(fio.encrypted_page),
+ page_address(mpage), PAGE_SIZE);
+ updated = true;
+ }
+ f2fs_put_page(mpage, 1);
+ invalidate_mapping_pages(META_MAPPING(fio.sbi),
+ fio.old_blkaddr, fio.old_blkaddr);
+ if (updated)
+ goto write_page;
+ }
+
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_page_out;
@@ -691,6 +782,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
+write_page:
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
@@ -865,22 +957,30 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
- /* if inode uses special I/O path, let's go phase 3 */
+ if (!down_write_trylock(
+ &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
+ iput(inode);
+ sbi->skipped_gc_rwsem++;
+ continue;
+ }
+
+ start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
+ ofs_in_node;
+
if (f2fs_post_read_required(inode)) {
+ int err = ra_data_block(inode, start_bidx);
+
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (err) {
+ iput(inode);
+ continue;
+ }
add_gc_inode(gc_list, inode);
continue;
}
- if (!down_write_trylock(
- &F2FS_I(inode)->i_gc_rwsem[WRITE])) {
- iput(inode);
- continue;
- }
-
- start_bidx = f2fs_start_bidx_of_node(nofs, inode);
data_page = f2fs_get_read_data_page(inode,
- start_bidx + ofs_in_node, REQ_RAHEAD,
- true);
+ start_bidx, REQ_RAHEAD, true);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
@@ -903,6 +1003,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
continue;
if (!down_write_trylock(
&fi->i_gc_rwsem[WRITE])) {
+ sbi->skipped_gc_rwsem++;
up_write(&fi->i_gc_rwsem[READ]);
continue;
}
@@ -986,7 +1087,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
goto next;
sum = page_address(sum_page);
- f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
+ if (type != GET_SUM_TYPE((&sum->footer))) {
+ f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
+ "type [%d, %d] in SSA and SIT",
+ segno, type, GET_SUM_TYPE((&sum->footer)));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ goto next;
+ }
/*
* this is to avoid deadlock:
@@ -1034,6 +1141,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
+ unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
@@ -1046,6 +1154,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
+ sbi->skipped_gc_rwsem = 0;
+ first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
@@ -1087,7 +1197,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
total_freed += seg_freed;
if (gc_type == FG_GC) {
- if (sbi->skipped_atomic_files[FG_GC] > last_skipped)
+ if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
+ sbi->skipped_gc_rwsem)
skipped_round++;
last_skipped = sbi->skipped_atomic_files[FG_GC];
round++;
@@ -1096,15 +1207,23 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
- if (!sync) {
- if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
- if (skipped_round > MAX_SKIP_ATOMIC_COUNT &&
- skipped_round * 2 >= round)
- f2fs_drop_inmem_pages_all(sbi, true);
+ if (sync)
+ goto stop;
+
+ if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
+ if (skipped_round <= MAX_SKIP_GC_COUNT ||
+ skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
+ if (first_skipped < last_skipped &&
+ (last_skipped - first_skipped) >
+ sbi->skipped_gc_rwsem) {
+ f2fs_drop_inmem_pages_all(sbi, true);
+ segno = NULL_SEGNO;
+ goto gc_more;
+ }
if (gc_type == FG_GC)
ret = f2fs_write_checkpoint(sbi, &cpc);
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 043830be..115dc21 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -121,6 +121,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
+ struct node_info ni;
int dirty, err;
if (!f2fs_exist_data(dn->inode))
@@ -130,6 +131,24 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
+ err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
+ if (err) {
+ f2fs_put_dnode(dn);
+ return err;
+ }
+
+ fio.version = ni.version;
+
+ if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(dn);
+ set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
+ f2fs_msg(fio.sbi->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EINVAL;
+ }
+
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
f2fs_do_read_inline_data(page, dn->inode_page);
@@ -363,6 +382,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (err)
goto out;
+ if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
+ f2fs_put_dnode(&dn);
+ set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
+ f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
+ "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
+ "run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EINVAL;
+ goto out;
+ }
+
f2fs_wait_on_page_writeback(page, DATA, true);
dentry_blk = page_address(page);
@@ -477,6 +507,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
return 0;
recover:
lock_page(ipage);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
@@ -668,7 +699,10 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
- f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
+ if (err)
+ goto out;
+
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index f121c86..959df22 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -68,13 +68,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
-static bool __written_first_block(struct f2fs_inode *ri)
+static int __written_first_block(struct f2fs_sb_info *sbi,
+ struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
- if (is_valid_blkaddr(addr))
- return true;
- return false;
+ if (!__is_valid_data_blkaddr(addr))
+ return 1;
+ if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
+ return -EFAULT;
+ return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -121,7 +124,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
- if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
+ if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
@@ -159,8 +162,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
struct f2fs_inode *ri;
__u32 provided, calculated;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
+ return true;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (!f2fs_enable_inode_chksum(sbi, page))
+#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
+#endif
return true;
ri = &F2FS_NODE(page)->i;
@@ -185,9 +195,31 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
-static bool sanity_check_inode(struct inode *inode)
+static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ unsigned long long iblocks;
+
+ iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
+ if (!iblocks) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
+ "run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
+ return false;
+ }
+
+ if (ino_of_node(node_page) != nid_of_node(node_page)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: corrupted inode footer i_ino=%lx, ino,nid: "
+ "[%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
+ return false;
+ }
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
&& !f2fs_has_extra_attr(inode)) {
@@ -197,6 +229,64 @@ static bool sanity_check_inode(struct inode *inode)
__func__, inode->i_ino);
return false;
}
+
+ if (f2fs_has_extra_attr(inode) &&
+ !f2fs_sb_has_extra_attr(sbi->sb)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) is with extra_attr, "
+ "but extra_attr feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
+ if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
+ fi->i_extra_isize % sizeof(__le32)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
+ "max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ return false;
+ }
+
+ if (F2FS_I(inode)->extent_tree) {
+ struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
+
+ if (ei->len &&
+ (!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
+ !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
+ DATA_GENERIC))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx) extent info [%u, %u, %u] "
+ "is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
+ return false;
+ }
+ }
+
+ if (f2fs_has_inline_data(inode) &&
+ (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
+ if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "%s: inode (ino=%lx, mode=%u) should not have "
+ "inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
+ return false;
+ }
+
return true;
}
@@ -207,6 +297,7 @@ static int do_read_inode(struct inode *inode)
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
+ int err;
/* Check if ino is within scope */
if (f2fs_check_nid_range(sbi, inode->i_ino))
@@ -268,6 +359,11 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
+ if (!sanity_check_inode(inode, node_page)) {
+ f2fs_put_page(node_page, 1);
+ return -EINVAL;
+ }
+
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
@@ -275,8 +371,15 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
- if (__written_first_block(ri))
- set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ if (S_ISREG(inode->i_mode)) {
+ err = __written_first_block(sbi, ri);
+ if (err < 0) {
+ f2fs_put_page(node_page, 1);
+ return err;
+ }
+ if (!err)
+ set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
+ }
if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
@@ -297,9 +400,9 @@ static int do_read_inode(struct inode *inode)
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
}
- F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
- F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
- F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
+ F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
+ F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
+ F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
f2fs_put_page(node_page, 1);
@@ -330,10 +433,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
ret = do_read_inode(inode);
if (ret)
goto bad_inode;
- if (!sanity_check_inode(inode)) {
- ret = -EINVAL;
- goto bad_inode;
- }
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
@@ -470,10 +569,14 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (inode->i_nlink == 0)
clear_inline_node(node_page);
- F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
- F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
- F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
+ F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
+ F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
+ F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
+#endif
}
void f2fs_update_inode_page(struct inode *inode)
@@ -558,12 +661,11 @@ void f2fs_evict_inode(struct inode *inode)
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
-#endif
+
if (!err) {
f2fs_lock_op(sbi);
err = f2fs_remove_inode_page(inode);
@@ -626,6 +728,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
+ int err;
/*
* clear nlink of inode in order to release resource of inode
@@ -648,10 +751,16 @@ void f2fs_handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
- f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_msg(sbi->sb, KERN_WARNING,
+ "May loss orphan inode, run fsck to fix.");
+ goto out;
+ }
if (ni.blk_addr != NULL_ADDR) {
- int err = f2fs_acquire_orphan_inode(sbi);
+ err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -664,6 +773,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
set_inode_flag(inode, FI_FREE_NID);
}
+out:
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 231b7f3..1f67e38 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -51,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
inode->i_ino = ino;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
- F2FS_I(inode)->i_crtime = timespec64_to_timespec(inode->i_mtime);
+ F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = sbi->s_next_generation++;
if (S_ISDIR(inode->i_mode))
@@ -246,7 +246,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
return -EINVAL;
if (hot) {
- strncpy(extlist[count], name, strlen(name));
+ memcpy(extlist[count], name, strlen(name));
sbi->raw_super->hot_ext_count = hot_count + 1;
} else {
char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
@@ -254,7 +254,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
memcpy(buf, &extlist[cold_count],
F2FS_EXTENSION_LEN * hot_count);
memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
- strncpy(extlist[cold_count], name, strlen(name));
+ memcpy(extlist[cold_count], name, strlen(name));
memcpy(&extlist[cold_count + 1], buf,
F2FS_EXTENSION_LEN * hot_count);
sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 10643b1..dd2e45a 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -28,6 +28,7 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
static struct kmem_cache *nat_entry_set_slab;
+static struct kmem_cache *fsync_node_entry_slab;
/*
* Check whether the given nid is within node id range.
@@ -112,25 +113,22 @@ static void clear_node_page_dirty(struct page *page)
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- pgoff_t index = current_nat_addr(sbi, nid);
- return f2fs_get_meta_page(sbi, index);
+ return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
struct page *src_page;
struct page *dst_page;
- pgoff_t src_off;
pgoff_t dst_off;
void *src_addr;
void *dst_addr;
struct f2fs_nm_info *nm_i = NM_I(sbi);
- src_off = current_nat_addr(sbi, nid);
- dst_off = next_nat_addr(sbi, src_off);
+ dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
/* get current nat block page with lock */
- src_page = f2fs_get_meta_page(sbi, src_off);
+ src_page = get_current_nat_page(sbi, nid);
dst_page = f2fs_grab_meta_page(sbi, dst_off);
f2fs_bug_on(sbi, PageDirty(src_page));
@@ -176,14 +174,30 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
if (raw_ne)
node_info_from_raw_nat(&ne->ni, raw_ne);
+
+ spin_lock(&nm_i->nat_list_lock);
list_add_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
nm_i->nat_cnt++;
return ne;
}
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
- return radix_tree_lookup(&nm_i->nat_root, n);
+ struct nat_entry *ne;
+
+ ne = radix_tree_lookup(&nm_i->nat_root, n);
+
+ /* for recent accessed nat entry, move it to tail of lru list */
+ if (ne && !get_nat_flag(ne, IS_DIRTY)) {
+ spin_lock(&nm_i->nat_list_lock);
+ if (!list_empty(&ne->list))
+ list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+ }
+
+ return ne;
}
static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
@@ -194,7 +208,6 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
- list_del(&e->list);
radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
nm_i->nat_cnt--;
__free_nat_entry(e);
@@ -245,16 +258,21 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
nm_i->dirty_nat_cnt++;
set_nat_flag(ne, IS_DIRTY, true);
refresh_list:
+ spin_lock(&nm_i->nat_list_lock);
if (new_ne)
list_del_init(&ne->list);
else
list_move_tail(&ne->list, &head->entry_list);
+ spin_unlock(&nm_i->nat_list_lock);
}
static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
struct nat_entry_set *set, struct nat_entry *ne)
{
+ spin_lock(&nm_i->nat_list_lock);
list_move_tail(&ne->list, &nm_i->nat_entries);
+ spin_unlock(&nm_i->nat_list_lock);
+
set_nat_flag(ne, IS_DIRTY, false);
set->entry_cnt--;
nm_i->dirty_nat_cnt--;
@@ -267,6 +285,72 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
start, nr);
}
+bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
+{
+ return NODE_MAPPING(sbi) == page->mapping &&
+ IS_DNODE(page) && is_cold_node(page);
+}
+
+void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ spin_lock_init(&sbi->fsync_node_lock);
+ INIT_LIST_HEAD(&sbi->fsync_node_list);
+ sbi->fsync_seg_id = 0;
+ sbi->fsync_node_num = 0;
+}
+
+static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
+ struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+ unsigned int seq_id;
+
+ fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);
+
+ get_page(page);
+ fn->page = page;
+ INIT_LIST_HEAD(&fn->list);
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_add_tail(&fn->list, &sbi->fsync_node_list);
+ fn->seq_id = sbi->fsync_seg_id++;
+ seq_id = fn->seq_id;
+ sbi->fsync_node_num++;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+ return seq_id;
+}
+
+void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
+{
+ struct fsync_node_entry *fn;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ list_for_each_entry(fn, &sbi->fsync_node_list, list) {
+ if (fn->page == page) {
+ list_del(&fn->list);
+ sbi->fsync_node_num--;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ kmem_cache_free(fsync_node_entry_slab, fn);
+ put_page(page);
+ return;
+ }
+ }
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ f2fs_bug_on(sbi, 1);
+}
+
+void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ sbi->fsync_seg_id = 0;
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+}
+
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -371,7 +455,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
new_blkaddr == NULL_ADDR);
f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
new_blkaddr == NEW_ADDR);
- f2fs_bug_on(sbi, is_valid_blkaddr(nat_get_blkaddr(e)) &&
+ f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
new_blkaddr == NEW_ADDR);
/* increment version no as node is removed */
@@ -382,7 +466,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
/* change address */
nat_set_blkaddr(e, new_blkaddr);
- if (!is_valid_blkaddr(new_blkaddr))
+ if (!is_valid_data_blkaddr(sbi, new_blkaddr))
set_nat_flag(e, IS_CHECKPOINTED, false);
__set_nat_cache_dirty(nm_i, e);
@@ -405,13 +489,25 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
- while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
+ spin_lock(&nm_i->nat_list_lock);
+ while (nr_shrink) {
struct nat_entry *ne;
+
+ if (list_empty(&nm_i->nat_entries))
+ break;
+
ne = list_first_entry(&nm_i->nat_entries,
struct nat_entry, list);
+ list_del(&ne->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
+
+ spin_lock(&nm_i->nat_list_lock);
}
+ spin_unlock(&nm_i->nat_list_lock);
+
up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -419,7 +515,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
/*
* This function always returns success
*/
-void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
+int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -443,7 +539,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
up_read(&nm_i->nat_tree_lock);
- return;
+ return 0;
}
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
@@ -466,6 +562,9 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
up_read(&nm_i->nat_tree_lock);
page = f2fs_get_meta_page(sbi, index);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
nat_blk = (struct f2fs_nat_block *)page_address(page);
ne = nat_blk->entries[nid - start_nid];
node_info_from_raw_nat(ni, &ne);
@@ -473,6 +572,7 @@ void f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
cache:
/* cache nat entry */
cache_nat_entry(sbi, nid, &ne);
+ return 0;
}
/*
@@ -722,12 +822,15 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
return err;
}
-static void truncate_node(struct dnode_of_data *dn)
+static int truncate_node(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info ni;
+ int err;
- f2fs_get_node_info(sbi, dn->nid, &ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &ni);
+ if (err)
+ return err;
/* Deallocate node address */
f2fs_invalidate_blocks(sbi, ni.blk_addr);
@@ -750,11 +853,14 @@ static void truncate_node(struct dnode_of_data *dn)
dn->node_page = NULL;
trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
+
+ return 0;
}
static int truncate_dnode(struct dnode_of_data *dn)
{
struct page *page;
+ int err;
if (dn->nid == 0)
return 1;
@@ -770,7 +876,10 @@ static int truncate_dnode(struct dnode_of_data *dn)
dn->node_page = page;
dn->ofs_in_node = 0;
f2fs_truncate_data_blocks(dn);
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ return err;
+
return 1;
}
@@ -835,7 +944,9 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
if (!ofs) {
/* remove current indirect node */
dn->node_page = page;
- truncate_node(dn);
+ ret = truncate_node(dn);
+ if (ret)
+ goto out_err;
freed++;
} else {
f2fs_put_page(page, 1);
@@ -893,7 +1004,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
if (offset[idx + 1] == 0) {
dn->node_page = pages[idx];
dn->nid = nid[idx];
- truncate_node(dn);
+ err = truncate_node(dn);
+ if (err)
+ goto fail;
} else {
f2fs_put_page(pages[idx], 1);
}
@@ -1014,6 +1127,7 @@ int f2fs_truncate_xattr_node(struct inode *inode)
nid_t nid = F2FS_I(inode)->i_xattr_nid;
struct dnode_of_data dn;
struct page *npage;
+ int err;
if (!nid)
return 0;
@@ -1022,10 +1136,15 @@ int f2fs_truncate_xattr_node(struct inode *inode)
if (IS_ERR(npage))
return PTR_ERR(npage);
+ set_new_dnode(&dn, inode, NULL, npage, nid);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_page(npage, 1);
+ return err;
+ }
+
f2fs_i_xnid_write(inode, 0);
- set_new_dnode(&dn, inode, NULL, npage, nid);
- truncate_node(&dn);
return 0;
}
@@ -1055,11 +1174,19 @@ int f2fs_remove_inode_page(struct inode *inode)
f2fs_truncate_data_blocks_range(&dn, 1);
/* 0 is possible, after f2fs_new_inode() has failed */
+ if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
+ f2fs_put_dnode(&dn);
+ return -EIO;
+ }
f2fs_bug_on(F2FS_I_SB(inode),
inode->i_blocks != 0 && inode->i_blocks != 8);
/* will put inode & node pages */
- truncate_node(&dn);
+ err = truncate_node(&dn);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
return 0;
}
@@ -1092,7 +1219,11 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
goto fail;
#ifdef CONFIG_F2FS_CHECK_FS
- f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
+ if (err) {
+ dec_valid_node_count(sbi, dn->inode, !ofs);
+ goto fail;
+ }
f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
#endif
new_ni.nid = dn->nid;
@@ -1140,13 +1271,21 @@ static int read_node_page(struct page *page, int op_flags)
.page = page,
.encrypted_page = NULL,
};
+ int err;
- if (PageUptodate(page))
+ if (PageUptodate(page)) {
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
+#endif
return LOCKED_PAGE;
+ }
- f2fs_get_node_info(sbi, page->index, &ni);
+ err = f2fs_get_node_info(sbi, page->index, &ni);
+ if (err)
+ return err;
- if (unlikely(ni.blk_addr == NULL_ADDR)) {
+ if (unlikely(ni.blk_addr == NULL_ADDR) ||
+ is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
ClearPageUptodate(page);
return -ENOENT;
}
@@ -1348,7 +1487,7 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct writeback_control *wbc, bool do_balance,
- enum iostat_type io_type)
+ enum iostat_type io_type, unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
@@ -1365,6 +1504,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
.io_type = io_type,
.io_wbc = wbc,
};
+ unsigned int seq;
trace_f2fs_writepage(page, NODE);
@@ -1374,10 +1514,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
+ if (wbc->sync_mode == WB_SYNC_NONE &&
+ IS_DNODE(page) && is_cold_node(page))
+ goto redirty_out;
+
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
+ if (f2fs_get_node_info(sbi, nid, &ni))
+ goto redirty_out;
+
if (wbc->for_reclaim) {
if (!down_read_trylock(&sbi->node_write))
goto redirty_out;
@@ -1385,8 +1532,6 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
down_read(&sbi->node_write);
}
- f2fs_get_node_info(sbi, nid, &ni);
-
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
@@ -1396,11 +1541,22 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
return 0;
}
+ if (__is_valid_data_blkaddr(ni.blk_addr) &&
+ !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
+ goto redirty_out;
+
if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
set_page_writeback(page);
ClearPageError(page);
+
+ if (f2fs_in_warm_node_list(sbi, page)) {
+ seq = f2fs_add_fsync_node_entry(sbi, page);
+ if (seq_id)
+ *seq_id = seq;
+ }
+
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
@@ -1448,7 +1604,7 @@ void f2fs_move_node_page(struct page *node_page, int gc_type)
goto out_page;
if (__write_node_page(node_page, false, NULL,
- &wbc, false, FS_GC_NODE_IO))
+ &wbc, false, FS_GC_NODE_IO, NULL))
unlock_page(node_page);
goto release_page;
} else {
@@ -1465,11 +1621,13 @@ void f2fs_move_node_page(struct page *node_page, int gc_type)
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
+ return __write_node_page(page, false, NULL, wbc, false,
+ FS_NODE_IO, NULL);
}
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
- struct writeback_control *wbc, bool atomic)
+ struct writeback_control *wbc, bool atomic,
+ unsigned int *seq_id)
{
pgoff_t index;
pgoff_t last_idx = ULONG_MAX;
@@ -1550,7 +1708,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
ret = __write_node_page(page, atomic &&
page == last_page,
&submitted, wbc, true,
- FS_NODE_IO);
+ FS_NODE_IO, seq_id);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
@@ -1633,7 +1791,9 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
!is_cold_node(page)))
continue;
lock_node:
- if (!trylock_page(page))
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ lock_page(page);
+ else if (!trylock_page(page))
continue;
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
@@ -1665,7 +1825,7 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
set_dentry_mark(page, 0);
ret = __write_node_page(page, false, &submitted,
- wbc, do_balance, io_type);
+ wbc, do_balance, io_type, NULL);
if (ret)
unlock_page(page);
else if (submitted)
@@ -1684,10 +1844,12 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
}
if (step < 2) {
+ if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
+ goto out;
step++;
goto next_step;
}
-
+out:
if (nwritten)
f2fs_submit_merged_write(sbi, NODE);
@@ -1696,35 +1858,46 @@ int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
return ret;
}
-int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
+int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
+ unsigned int seq_id)
{
- pgoff_t index = 0;
- struct pagevec pvec;
+ struct fsync_node_entry *fn;
+ struct page *page;
+ struct list_head *head = &sbi->fsync_node_list;
+ unsigned long flags;
+ unsigned int cur_seq_id = 0;
int ret2, ret = 0;
- int nr_pages;
- pagevec_init(&pvec);
-
- while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
- PAGECACHE_TAG_WRITEBACK))) {
- int i;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- if (ino && ino_of_node(page) == ino) {
- f2fs_wait_on_page_writeback(page, NODE, true);
- if (TestClearPageError(page))
- ret = -EIO;
- }
+ while (seq_id && cur_seq_id < seq_id) {
+ spin_lock_irqsave(&sbi->fsync_node_lock, flags);
+ if (list_empty(head)) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
}
- pagevec_release(&pvec);
- cond_resched();
+ fn = list_first_entry(head, struct fsync_node_entry, list);
+ if (fn->seq_id > seq_id) {
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+ break;
+ }
+ cur_seq_id = fn->seq_id;
+ page = fn->page;
+ get_page(page);
+ spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
+
+ f2fs_wait_on_page_writeback(page, NODE, true);
+ if (TestClearPageError(page))
+ ret = -EIO;
+
+ put_page(page);
+
+ if (ret)
+ break;
}
ret2 = filemap_check_errors(NODE_MAPPING(sbi));
if (!ret)
ret = ret2;
+
return ret;
}
@@ -1774,6 +1947,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageUptodate(page))
SetPageUptodate(page);
+#ifdef CONFIG_F2FS_CHECK_FS
+ if (IS_INODE(page))
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
@@ -1968,7 +2145,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
-static void scan_nat_page(struct f2fs_sb_info *sbi,
+static int scan_nat_page(struct f2fs_sb_info *sbi,
struct page *nat_page, nid_t start_nid)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1986,7 +2163,10 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
break;
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
- f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
+
+ if (blk_addr == NEW_ADDR)
+ return -EINVAL;
+
if (blk_addr == NULL_ADDR) {
add_free_nid(sbi, start_nid, true, true);
} else {
@@ -1995,6 +2175,8 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
spin_unlock(&NM_I(sbi)->nid_list_lock);
}
}
+
+ return 0;
}
static void scan_curseg_cache(struct f2fs_sb_info *sbi)
@@ -2050,11 +2232,11 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
up_read(&nm_i->nat_tree_lock);
}
-static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
+static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
bool sync, bool mount)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- int i = 0;
+ int i = 0, ret;
nid_t nid = nm_i->next_scan_nid;
if (unlikely(nid >= nm_i->max_nid))
@@ -2062,17 +2244,17 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
/* Enough entries */
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
- return;
+ return 0;
if (!mount) {
/* try to find free nids in free_nid_bitmap */
scan_free_nid_bits(sbi);
if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
- return;
+ return 0;
}
/* readahead nat pages to be scanned */
@@ -2086,8 +2268,16 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
nm_i->nat_block_bitmap)) {
struct page *page = get_current_nat_page(sbi, nid);
- scan_nat_page(sbi, page, nid);
+ ret = scan_nat_page(sbi, page, nid);
f2fs_put_page(page, 1);
+
+ if (ret) {
+ up_read(&nm_i->nat_tree_lock);
+ f2fs_bug_on(sbi, !mount);
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "NAT is corrupt, run fsck to fix it");
+ return -EINVAL;
+ }
}
nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
@@ -2108,13 +2298,19 @@ static void __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
+
+ return 0;
}
-void f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
+int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
+ int ret;
+
mutex_lock(&NM_I(sbi)->build_lock);
- __f2fs_build_free_nids(sbi, sync, mount);
+ ret = __f2fs_build_free_nids(sbi, sync, mount);
mutex_unlock(&NM_I(sbi)->build_lock);
+
+ return ret;
}
/*
@@ -2127,12 +2323,11 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i = NULL;
retry:
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
f2fs_show_injection_info(FAULT_ALLOC_NID);
return false;
}
-#endif
+
spin_lock(&nm_i->nid_list_lock);
if (unlikely(nm_i->available_nids == 0)) {
@@ -2277,12 +2472,16 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
struct dnode_of_data dn;
struct node_info ni;
struct page *xpage;
+ int err;
if (!prev_xnid)
goto recover_xnid;
/* 1: invalidate the previous xattr nid */
- f2fs_get_node_info(sbi, prev_xnid, &ni);
+ err = f2fs_get_node_info(sbi, prev_xnid, &ni);
+ if (err)
+ return err;
+
f2fs_invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode, false);
set_node_addr(sbi, &ni, NULL_ADDR, false);
@@ -2317,8 +2516,11 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
nid_t ino = ino_of_node(page);
struct node_info old_ni, new_ni;
struct page *ipage;
+ int err;
- f2fs_get_node_info(sbi, ino, &old_ni);
+ err = f2fs_get_node_info(sbi, ino, &old_ni);
+ if (err)
+ return err;
if (unlikely(old_ni.blk_addr != NULL_ADDR))
return -EINVAL;
@@ -2372,7 +2574,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
return 0;
}
-void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
+int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum)
{
struct f2fs_node *rn;
@@ -2394,6 +2596,9 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
for (idx = addr; idx < addr + nrpages; idx++) {
struct page *page = f2fs_get_tmp_page(sbi, idx);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
rn = F2FS_NODE(page);
sum_entry->nid = rn->footer.nid;
sum_entry->version = 0;
@@ -2405,6 +2610,7 @@ void f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
invalidate_mapping_pages(META_MAPPING(sbi), addr,
addr + nrpages);
}
+ return 0;
}
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
@@ -2582,6 +2788,13 @@ void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
nid_t set_idx = 0;
LIST_HEAD(sets);
+ /* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+ if (enabled_nat_bits(sbi, cpc)) {
+ down_write(&nm_i->nat_tree_lock);
+ remove_nats_in_journal(sbi);
+ up_write(&nm_i->nat_tree_lock);
+ }
+
if (!nm_i->dirty_nat_cnt)
return;
@@ -2634,7 +2847,13 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
- struct page *page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ struct page *page;
+
+ page = f2fs_get_meta_page(sbi, nat_bits_addr++);
+ if (IS_ERR(page)) {
+ disable_nat_bits(sbi, true);
+ return PTR_ERR(page);
+ }
memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
page_address(page), F2FS_BLKSIZE);
@@ -2718,6 +2937,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
+ spin_lock_init(&nm_i->nat_list_lock);
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->nid_list_lock);
@@ -2762,8 +2982,8 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi)
for (i = 0; i < nm_i->nat_blocks; i++) {
nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
- NAT_ENTRY_BITMAP_SIZE_ALIGNED, GFP_KERNEL);
- if (!nm_i->free_nid_bitmap)
+ f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
+ if (!nm_i->free_nid_bitmap[i])
return -ENOMEM;
}
@@ -2801,8 +3021,7 @@ int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
/* load free nid status from nat_bits table */
load_free_nid_bitmap(sbi);
- f2fs_build_free_nids(sbi, true, true);
- return 0;
+ return f2fs_build_free_nids(sbi, true, true);
}
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
@@ -2837,8 +3056,13 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
unsigned idx;
nid = nat_get_nid(natvec[found - 1]) + 1;
- for (idx = 0; idx < found; idx++)
+ for (idx = 0; idx < found; idx++) {
+ spin_lock(&nm_i->nat_list_lock);
+ list_del(&natvec[idx]->list);
+ spin_unlock(&nm_i->nat_list_lock);
+
__del_from_nat_cache(nm_i, natvec[idx]);
+ }
}
f2fs_bug_on(sbi, nm_i->nat_cnt);
@@ -2893,8 +3117,15 @@ int __init f2fs_create_node_manager_caches(void)
sizeof(struct nat_entry_set));
if (!nat_entry_set_slab)
goto destroy_free_nid;
+
+ fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
+ sizeof(struct fsync_node_entry));
+ if (!fsync_node_entry_slab)
+ goto destroy_nat_entry_set;
return 0;
+destroy_nat_entry_set:
+ kmem_cache_destroy(nat_entry_set_slab);
destroy_free_nid:
kmem_cache_destroy(free_nid_slab);
destroy_nat_entry:
@@ -2905,6 +3136,7 @@ int __init f2fs_create_node_manager_caches(void)
void f2fs_destroy_node_manager_caches(void)
{
+ kmem_cache_destroy(fsync_node_entry_slab);
kmem_cache_destroy(nat_entry_set_slab);
kmem_cache_destroy(free_nid_slab);
kmem_cache_destroy(nat_entry_slab);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index b95e49e..0f4db7a 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -135,6 +135,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
+static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
+{
+ return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
+}
+
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
@@ -444,6 +449,10 @@ static inline void set_mark(struct page *page, int mark, int type)
else
flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+ f2fs_inode_chksum_set(F2FS_P_SB(page), page);
+#endif
}
#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 38f25f0..95511ed 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -241,8 +241,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
- unsigned int free_blocks = sbi->user_block_count -
- valid_user_blocks(sbi);
+ unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
+ valid_user_blocks(sbi);
int err = 0;
/* get node pages in the current segment */
@@ -252,10 +252,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
while (1) {
struct fsync_inode_entry *entry;
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page))
break;
@@ -471,7 +475,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
- f2fs_get_node_info(sbi, dn.nid, &ni);
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err)
+ goto err;
+
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
@@ -507,14 +514,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* dest is valid block, try to recover from src to dest */
- if (f2fs_is_valid_meta_blkaddr(sbi, dest, META_POR)) {
+ if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = f2fs_reserve_new_block(&dn);
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- while (err)
+ while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
err = f2fs_reserve_new_block(&dn);
-#endif
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
@@ -568,12 +574,16 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
- if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
+ if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
f2fs_ra_meta_pages_cond(sbi, blkaddr);
page = f2fs_get_tmp_page(sbi, blkaddr);
+ if (IS_ERR(page)) {
+ err = PTR_ERR(page);
+ break;
+ }
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
@@ -628,7 +638,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9efce17..30779aa 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -250,7 +250,13 @@ static int __revoke_inmem_pages(struct inode *inode,
err = -EAGAIN;
goto next;
}
- f2fs_get_node_info(sbi, dn.nid, &ni);
+
+ err = f2fs_get_node_info(sbi, dn.nid, &ni);
+ if (err) {
+ f2fs_put_dnode(&dn);
+ return err;
+ }
+
if (cur->old_addr == NEW_ADDR) {
f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
f2fs_update_data_blkaddr(&dn, NEW_ADDR);
@@ -439,8 +445,10 @@ int f2fs_commit_inmem_pages(struct inode *inode)
int err;
f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
+ down_write(&fi->i_gc_rwsem[WRITE]);
+
+ f2fs_lock_op(sbi);
set_inode_flag(inode, FI_ATOMIC_COMMIT);
mutex_lock(&fi->inmem_lock);
@@ -455,6 +463,8 @@ int f2fs_commit_inmem_pages(struct inode *inode)
clear_inode_flag(inode, FI_ATOMIC_COMMIT);
f2fs_unlock_op(sbi);
+ up_write(&fi->i_gc_rwsem[WRITE]);
+
return err;
}
@@ -464,12 +474,10 @@ int f2fs_commit_inmem_pages(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
-#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
-#endif
/* balance_fs_bg is able to be pending */
if (need && excess_cached_nats(sbi))
@@ -503,7 +511,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
else
f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi) && !excess_dirty_nats(sbi))
+ if (!is_idle(sbi) &&
+ (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
/* checkpoint is the only way to shrink partial cached entries */
@@ -511,6 +520,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
excess_prefree_segs(sbi) ||
excess_dirty_nats(sbi) ||
+ excess_dirty_nodes(sbi) ||
f2fs_time_over(sbi, CP_TIME)) {
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
@@ -831,9 +841,12 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
dc->len = len;
dc->ref = 0;
dc->state = D_PREP;
+ dc->issuing = 0;
dc->error = 0;
init_completion(&dc->wait);
list_add_tail(&dc->list, pend_list);
+ spin_lock_init(&dc->lock);
+ dc->bio_ref = 0;
atomic_inc(&dcc->discard_cmd_cnt);
dcc->undiscard_blks += len;
@@ -860,7 +873,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
struct discard_cmd *dc)
{
if (dc->state == D_DONE)
- atomic_dec(&dcc->issing_discard);
+ atomic_sub(dc->issuing, &dcc->issing_discard);
list_del(&dc->list);
rb_erase(&dc->rb_node, &dcc->root);
@@ -875,9 +888,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_cmd *dc)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ unsigned long flags;
trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->bio_ref) {
+ spin_unlock_irqrestore(&dc->lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
+
f2fs_bug_on(sbi, dc->ref);
if (dc->error == -EOPNOTSUPP)
@@ -893,10 +914,17 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
static void f2fs_submit_discard_endio(struct bio *bio)
{
struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
+ unsigned long flags;
dc->error = blk_status_to_errno(bio->bi_status);
- dc->state = D_DONE;
- complete_all(&dc->wait);
+
+ spin_lock_irqsave(&dc->lock, flags);
+ dc->bio_ref--;
+ if (!dc->bio_ref && dc->state == D_SUBMIT) {
+ dc->state = D_DONE;
+ complete_all(&dc->wait);
+ }
+ spin_unlock_irqrestore(&dc->lock, flags);
bio_put(bio);
}
@@ -934,6 +962,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
+ dpolicy->ordered = false;
dpolicy->granularity = granularity;
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
@@ -945,6 +974,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
dpolicy->io_aware = true;
dpolicy->sync = false;
+ dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
@@ -962,48 +992,115 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
}
}
-
+static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
+ struct block_device *bdev, block_t lstart,
+ block_t start, block_t len);
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
-static void __submit_discard_cmd(struct f2fs_sb_info *sbi,
+static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
- struct discard_cmd *dc)
+ struct discard_cmd *dc,
+ unsigned int *issued)
{
+ struct block_device *bdev = dc->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
&(dcc->fstrim_list) : &(dcc->wait_list);
- struct bio *bio = NULL;
int flag = dpolicy->sync ? REQ_SYNC : 0;
+ block_t lstart, start, len, total_len;
+ int err = 0;
if (dc->state != D_PREP)
- return;
+ return 0;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
- return;
+ return 0;
- trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len);
+ trace_f2fs_issue_discard(bdev, dc->start, dc->len);
- dc->error = __blkdev_issue_discard(dc->bdev,
- SECTOR_FROM_BLOCK(dc->start),
- SECTOR_FROM_BLOCK(dc->len),
- GFP_NOFS, 0, &bio);
- if (!dc->error) {
- /* should keep before submission to avoid D_DONE right away */
- dc->state = D_SUBMIT;
- atomic_inc(&dcc->issued_discard);
- atomic_inc(&dcc->issing_discard);
- if (bio) {
- bio->bi_private = dc;
- bio->bi_end_io = f2fs_submit_discard_endio;
- bio->bi_opf |= flag;
- submit_bio(bio);
- list_move_tail(&dc->list, wait_list);
- __check_sit_bitmap(sbi, dc->start, dc->start + dc->len);
+ lstart = dc->lstart;
+ start = dc->start;
+ len = dc->len;
+ total_len = len;
- f2fs_update_iostat(sbi, FS_DISCARD, 1);
+ dc->len = 0;
+
+ while (total_len && *issued < dpolicy->max_requests && !err) {
+ struct bio *bio = NULL;
+ unsigned long flags;
+ bool last = true;
+
+ if (len > max_discard_blocks) {
+ len = max_discard_blocks;
+ last = false;
}
- } else {
- __remove_discard_cmd(sbi, dc);
+
+ (*issued)++;
+ if (*issued == dpolicy->max_requests)
+ last = true;
+
+ dc->len += len;
+
+ if (time_to_inject(sbi, FAULT_DISCARD)) {
+ f2fs_show_injection_info(FAULT_DISCARD);
+ err = -EIO;
+ goto submit;
+ }
+ err = __blkdev_issue_discard(bdev,
+ SECTOR_FROM_BLOCK(start),
+ SECTOR_FROM_BLOCK(len),
+ GFP_NOFS, 0, &bio);
+submit:
+ if (err) {
+ spin_lock_irqsave(&dc->lock, flags);
+ if (dc->state == D_PARTIAL)
+ dc->state = D_SUBMIT;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ break;
+ }
+
+ f2fs_bug_on(sbi, !bio);
+
+ /*
+ * should keep before submission to avoid D_DONE
+ * right away
+ */
+ spin_lock_irqsave(&dc->lock, flags);
+ if (last)
+ dc->state = D_SUBMIT;
+ else
+ dc->state = D_PARTIAL;
+ dc->bio_ref++;
+ spin_unlock_irqrestore(&dc->lock, flags);
+
+ atomic_inc(&dcc->issing_discard);
+ dc->issuing++;
+ list_move_tail(&dc->list, wait_list);
+
+ /* sanity check on discard range */
+ __check_sit_bitmap(sbi, start, start + len);
+
+ bio->bi_private = dc;
+ bio->bi_end_io = f2fs_submit_discard_endio;
+ bio->bi_opf |= flag;
+ submit_bio(bio);
+
+ atomic_inc(&dcc->issued_discard);
+
+ f2fs_update_iostat(sbi, FS_DISCARD, 1);
+
+ lstart += len;
+ start += len;
+ total_len -= len;
+ len = total_len;
}
+
+ if (!err && len)
+ __update_discard_tree_range(sbi, bdev, lstart, start, len);
+ return err;
}
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
@@ -1084,10 +1181,11 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct discard_info di = {0};
struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_discard_blocks =
+ SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
block_t end = lstart + len;
- mutex_lock(&dcc->cmd_lock);
-
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, lstart,
(struct rb_entry **)&prev_dc,
@@ -1127,7 +1225,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (prev_dc && prev_dc->state == D_PREP &&
prev_dc->bdev == bdev &&
- __is_discard_back_mergeable(&di, &prev_dc->di)) {
+ __is_discard_back_mergeable(&di, &prev_dc->di,
+ max_discard_blocks)) {
prev_dc->di.len += di.len;
dcc->undiscard_blks += di.len;
__relocate_discard_cmd(dcc, prev_dc);
@@ -1138,7 +1237,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
if (next_dc && next_dc->state == D_PREP &&
next_dc->bdev == bdev &&
- __is_discard_front_mergeable(&di, &next_dc->di)) {
+ __is_discard_front_mergeable(&di, &next_dc->di,
+ max_discard_blocks)) {
next_dc->di.lstart = di.lstart;
next_dc->di.len += di.len;
next_dc->di.start = di.start;
@@ -1161,8 +1261,6 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
node = rb_next(&prev_dc->rb_node);
next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
}
-
- mutex_unlock(&dcc->cmd_lock);
}
static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
@@ -1177,10 +1275,72 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
blkstart -= FDEV(devi).start_blk;
}
+ mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
+ mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
return 0;
}
+static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
+ struct discard_policy *dpolicy)
+{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+ struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
+ struct rb_node **insert_p = NULL, *insert_parent = NULL;
+ struct discard_cmd *dc;
+ struct blk_plug plug;
+ unsigned int pos = dcc->next_pos;
+ unsigned int issued = 0;
+ bool io_interrupted = false;
+
+ mutex_lock(&dcc->cmd_lock);
+ dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
+ NULL, pos,
+ (struct rb_entry **)&prev_dc,
+ (struct rb_entry **)&next_dc,
+ &insert_p, &insert_parent, true);
+ if (!dc)
+ dc = next_dc;
+
+ blk_start_plug(&plug);
+
+ while (dc) {
+ struct rb_node *node;
+ int err = 0;
+
+ if (dc->state != D_PREP)
+ goto next;
+
+ if (dpolicy->io_aware && !is_idle(sbi)) {
+ io_interrupted = true;
+ break;
+ }
+
+ dcc->next_pos = dc->lstart + dc->len;
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
+ break;
+next:
+ node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+ dc = rb_entry_safe(node, struct discard_cmd, rb_node);
+ }
+
+ blk_finish_plug(&plug);
+
+ if (!dc)
+ dcc->next_pos = 0;
+
+ mutex_unlock(&dcc->cmd_lock);
+
+ if (!issued && io_interrupted)
+ issued = -1;
+
+ return issued;
+}
+
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
@@ -1188,19 +1348,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
struct list_head *pend_list;
struct discard_cmd *dc, *tmp;
struct blk_plug plug;
- int i, iter = 0, issued = 0;
+ int i, issued = 0;
bool io_interrupted = false;
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (i + 1 < dpolicy->granularity)
break;
+
+ if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+ return __issue_discard_cmd_orderly(sbi, dpolicy);
+
pend_list = &dcc->pend_list[i];
mutex_lock(&dcc->cmd_lock);
if (list_empty(pend_list))
goto next;
- f2fs_bug_on(sbi,
- !f2fs_check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -1208,20 +1373,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
!is_idle(sbi)) {
io_interrupted = true;
- goto skip;
+ break;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
- issued++;
-skip:
- if (++iter >= dpolicy->max_requests)
+ __submit_discard_cmd(sbi, dpolicy, dc, &issued);
+
+ if (issued >= dpolicy->max_requests)
break;
}
blk_finish_plug(&plug);
next:
mutex_unlock(&dcc->cmd_lock);
- if (iter >= dpolicy->max_requests)
+ if (issued >= dpolicy->max_requests || io_interrupted)
break;
}
@@ -1319,21 +1483,22 @@ static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
return trimmed;
}
-static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
+static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy)
{
struct discard_policy dp;
+ unsigned int discard_blks;
- if (dpolicy) {
- __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
- return;
- }
+ if (dpolicy)
+ return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
/* wait all */
__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
- __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+ discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
+
+ return discard_blks;
}
/* This should be covered by global mutex, &sit_i->sentry_lock */
@@ -1386,6 +1551,8 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
/* just to make sure there is no pending discard commands */
__wait_all_discard_cmd(sbi, NULL);
+
+ f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
return dropped;
}
@@ -1643,21 +1810,30 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
unsigned int start = 0, end = -1;
unsigned int secno, start_segno;
bool force = (cpc->reason & CP_DISCARD);
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
mutex_lock(&dirty_i->seglist_lock);
while (1) {
int i;
+
+ if (need_align && end != -1)
+ end--;
start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
if (start >= MAIN_SEGS(sbi))
break;
end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
start + 1);
- for (i = start; i < end; i++)
- clear_bit(i, prefree_map);
+ if (need_align) {
+ start = rounddown(start, sbi->segs_per_sec);
+ end = roundup(end, sbi->segs_per_sec);
+ }
- dirty_i->nr_dirty[PRE] -= end - start;
+ for (i = start; i < end; i++) {
+ if (test_and_clear_bit(i, prefree_map))
+ dirty_i->nr_dirty[PRE]--;
+ }
if (!test_opt(sbi, DISCARD))
continue;
@@ -1751,7 +1927,9 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->nr_discards = 0;
dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
dcc->undiscard_blks = 0;
+ dcc->next_pos = 0;
dcc->root = RB_ROOT;
+ dcc->rbtree_check = false;
init_waitqueue_head(&dcc->discard_wait_queue);
SM_I(sbi)->dcc_info = dcc;
@@ -1901,6 +2079,8 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
if (addr == NEW_ADDR)
return;
+ invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
+
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
@@ -1919,7 +2099,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
struct seg_entry *se;
bool is_cp = false;
- if (!is_valid_blkaddr(blkaddr))
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return true;
down_read(&sit_i->sentry_lock);
@@ -1983,7 +2163,7 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return f2fs_get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2366,7 +2546,7 @@ bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
return has_candidate;
}
-static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
+static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_policy *dpolicy,
unsigned int start, unsigned int end)
{
@@ -2376,12 +2556,15 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
struct discard_cmd *dc;
struct blk_plug plug;
int issued;
+ unsigned int trimmed = 0;
next:
issued = 0;
mutex_lock(&dcc->cmd_lock);
- f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root));
+ if (unlikely(dcc->rbtree_check))
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &dcc->root));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
@@ -2395,6 +2578,7 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
while (dc && dc->lstart <= end) {
struct rb_node *node;
+ int err = 0;
if (dc->len < dpolicy->granularity)
goto skip;
@@ -2404,19 +2588,24 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
goto skip;
}
- __submit_discard_cmd(sbi, dpolicy, dc);
+ err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
- if (++issued >= dpolicy->max_requests) {
+ if (issued >= dpolicy->max_requests) {
start = dc->lstart + dc->len;
+ if (err)
+ __remove_discard_cmd(sbi, dc);
+
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
- __wait_all_discard_cmd(sbi, NULL);
+ trimmed += __wait_all_discard_cmd(sbi, NULL);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto next;
}
skip:
node = rb_next(&dc->rb_node);
+ if (err)
+ __remove_discard_cmd(sbi, dc);
dc = rb_entry_safe(node, struct discard_cmd, rb_node);
if (fatal_signal_pending(current))
@@ -2425,6 +2614,8 @@ static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
blk_finish_plug(&plug);
mutex_unlock(&dcc->cmd_lock);
+
+ return trimmed;
}
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
@@ -2437,12 +2628,13 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
struct discard_policy dpolicy;
unsigned long long trimmed = 0;
int err = 0;
+ bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
- if (end <= MAIN_BLKADDR(sbi))
- return -EINVAL;
+ if (end < MAIN_BLKADDR(sbi))
+ goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
f2fs_msg(sbi->sb, KERN_WARNING,
@@ -2454,6 +2646,10 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
GET_SEGNO(sbi, end);
+ if (need_align) {
+ start_segno = rounddown(start_segno, sbi->segs_per_sec);
+ end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
+ }
cpc.reason = CP_DISCARD;
cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
@@ -2469,24 +2665,27 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
if (err)
goto out;
- start_block = START_BLOCK(sbi, start_segno);
- end_block = START_BLOCK(sbi, end_segno + 1);
-
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
- __issue_discard_cmd_range(sbi, &dpolicy, start_block, end_block);
-
/*
* We filed discard candidates, but actually we don't need to wait for
* all of them, since they'll be issued in idle time along with runtime
* discard option. User configuration looks like using runtime discard
* or periodic fstrim instead of it.
*/
- if (!test_opt(sbi, DISCARD)) {
- trimmed = __wait_discard_cmd_range(sbi, &dpolicy,
+ if (test_opt(sbi, DISCARD))
+ goto out;
+
+ start_block = START_BLOCK(sbi, start_segno);
+ end_block = START_BLOCK(sbi, end_segno + 1);
+
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
+ trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
start_block, end_block);
- range->len = F2FS_BLK_TO_BYTES(trimmed);
- }
+
+ trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
+ start_block, end_block);
out:
+ if (!err)
+ range->len = F2FS_BLK_TO_BYTES(trimmed);
return err;
}
@@ -2639,8 +2838,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
- is_inode_flag_set(inode, FI_ATOMIC_FILE) ||
- is_inode_flag_set(inode, FI_VOLATILE_FILE))
+ f2fs_is_atomic_file(inode) ||
+ f2fs_is_volatile_file(inode))
return CURSEG_HOT_DATA;
return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
} else {
@@ -2781,6 +2980,9 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
reallocate:
f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
&fio->new_blkaddr, sum, type, fio, true);
+ if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
+ invalidate_mapping_pages(META_MAPPING(fio->sbi),
+ fio->old_blkaddr, fio->old_blkaddr);
/* writeout dirty page into bdev */
f2fs_submit_page_write(fio);
@@ -2836,11 +3038,9 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
{
struct f2fs_sb_info *sbi = fio->sbi;
struct f2fs_summary sum;
- struct node_info ni;
f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
- f2fs_get_node_info(sbi, dn->nid, &ni);
- set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
+ set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
do_write_page(&sum, fio);
f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
@@ -2937,8 +3137,11 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (!recover_curseg || recover_newaddr)
update_sit_entry(sbi, new_blkaddr, 1);
- if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
+ if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
+ invalidate_mapping_pages(META_MAPPING(sbi),
+ old_blkaddr, old_blkaddr);
update_sit_entry(sbi, old_blkaddr, -1);
+ }
locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
@@ -2992,7 +3195,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
{
struct page *cpage;
- if (!is_valid_blkaddr(blkaddr))
+ if (!is_valid_data_blkaddr(sbi, blkaddr))
return;
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
@@ -3002,7 +3205,7 @@ void f2fs_wait_on_block_writeback(struct f2fs_sb_info *sbi, block_t blkaddr)
}
}
-static void read_compacted_summaries(struct f2fs_sb_info *sbi)
+static int read_compacted_summaries(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *seg_i;
@@ -3014,6 +3217,8 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
start = start_sum_block(sbi);
page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
/* Step 1: restore nat cache */
@@ -3054,11 +3259,14 @@ static void read_compacted_summaries(struct f2fs_sb_info *sbi)
page = NULL;
page = f2fs_get_meta_page(sbi, start++);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
kaddr = (unsigned char *)page_address(page);
offset = 0;
}
}
f2fs_put_page(page, 1);
+ return 0;
}
static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
@@ -3070,6 +3278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
unsigned short blk_off;
unsigned int segno = 0;
block_t blk_addr = 0;
+ int err = 0;
/* get segment number and block addr */
if (IS_DATASEG(type)) {
@@ -3093,6 +3302,8 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
}
new = f2fs_get_meta_page(sbi, blk_addr);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
sum = (struct f2fs_summary_block *)page_address(new);
if (IS_NODESEG(type)) {
@@ -3104,7 +3315,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
ns->ofs_in_node = 0;
}
} else {
- f2fs_restore_node_summary(sbi, segno, sum);
+ err = f2fs_restore_node_summary(sbi, segno, sum);
+ if (err)
+ goto out;
}
}
@@ -3124,8 +3337,9 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
curseg->alloc_type = ckpt->alloc_type[type];
curseg->next_blkoff = blk_off;
mutex_unlock(&curseg->curseg_mutex);
+out:
f2fs_put_page(new, 1);
- return 0;
+ return err;
}
static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
@@ -3143,7 +3357,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
META_CP, true);
/* restore for compacted data summary */
- read_compacted_summaries(sbi);
+ err = read_compacted_summaries(sbi);
+ if (err)
+ return err;
type = CURSEG_HOT_NODE;
}
@@ -3274,7 +3490,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -3923,6 +4139,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
+ sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
sm_info->min_ssr_sections = reserved_sections(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index f18fc82..b3d9e31 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -85,7 +85,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
- ((!is_valid_blkaddr(blk_addr)) ? \
+ ((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
@@ -215,7 +215,7 @@ struct segment_allocation {
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
-#define MAX_SKIP_ATOMIC_COUNT 16
+#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
struct list_head list;
@@ -448,6 +448,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
+ if (IS_CURSEC(sbi, secno))
+ goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
@@ -455,6 +457,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_sections++;
}
}
+skip_free:
spin_unlock(&free_i->segmap_lock);
}
@@ -645,13 +648,10 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
struct f2fs_sb_info *sbi = fio->sbi;
- if (PAGE_TYPE_OF_BIO(fio->type) == META &&
- (!is_read_io(fio->op) || fio->is_meta))
- BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
- blk_addr >= MAIN_BLKADDR(sbi));
+ if (__is_meta_io(fio))
+ verify_blkaddr(sbi, blk_addr, META_GENERIC);
else
- BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
- blk_addr >= MAX_BLKADDR(sbi));
+ verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 17bcff7..896b885 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -41,7 +41,7 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
-char *fault_name[FAULT_MAX] = {
+char *f2fs_fault_name[FAULT_MAX] = {
[FAULT_KMALLOC] = "kmalloc",
[FAULT_KVMALLOC] = "kvmalloc",
[FAULT_PAGE_ALLOC] = "page alloc",
@@ -55,20 +55,24 @@ char *fault_name[FAULT_MAX] = {
[FAULT_TRUNCATE] = "truncate fail",
[FAULT_IO] = "IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
};
-static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi,
- unsigned int rate)
+void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
+ unsigned int type)
{
struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info;
if (rate) {
atomic_set(&ffi->inject_ops, 0);
ffi->inject_rate = rate;
- ffi->inject_type = (1 << FAULT_MAX) - 1;
- } else {
- memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
+
+ if (type)
+ ffi->inject_type = type;
+
+ if (!rate && !type)
+ memset(ffi, 0, sizeof(struct f2fs_fault_info));
}
#endif
@@ -113,6 +117,7 @@ enum {
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
+ Opt_fault_type,
Opt_lazytime,
Opt_nolazytime,
Opt_quota,
@@ -170,6 +175,7 @@ static match_table_t f2fs_tokens = {
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
+ {Opt_fault_type, "fault_type=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_quota, "quota"},
@@ -347,12 +353,6 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
"QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
- if (f2fs_sb_has_quota_ino(sbi->sb) && f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
- return -1;
- }
return 0;
}
#endif
@@ -606,7 +606,18 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, arg);
+ f2fs_build_fault_attr(sbi, arg, F2FS_ALL_FAULT_TYPE);
+ set_opt(sbi, FAULT_INJECTION);
+#else
+ f2fs_msg(sb, KERN_INFO,
+ "FAULT_INJECTION was not selected");
+#endif
+ break;
+ case Opt_fault_type:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+ f2fs_build_fault_attr(sbi, 0, arg);
set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
@@ -775,6 +786,19 @@ static int parse_options(struct super_block *sb, char *options)
#ifdef CONFIG_QUOTA
if (f2fs_check_quota_options(sbi))
return -EINVAL;
+#else
+ if (f2fs_sb_has_quota_ino(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sbi->sb, KERN_INFO,
+ "Filesystem with quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return -EINVAL;
+ }
+ if (f2fs_sb_has_project_quota(sbi->sb) && !f2fs_readonly(sbi->sb)) {
+ f2fs_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be "
+ "mounted RDWR without CONFIG_QUOTA");
+ return -EINVAL;
+ }
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
@@ -1030,6 +1054,10 @@ static void f2fs_put_super(struct super_block *sb)
/* our cp_error case, we can wait for any writeback page */
f2fs_flush_merged_writes(sbi);
+ f2fs_wait_on_all_pages_writeback(sbi);
+
+ f2fs_bug_on(sbi, sbi->fsync_node_num);
+
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -1118,7 +1146,7 @@ static int f2fs_statfs_project(struct super_block *sb,
dquot = dqget(sb, qid);
if (IS_ERR(dquot))
return PTR_ERR(dquot);
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit :
@@ -1141,7 +1169,7 @@ static int f2fs_statfs_project(struct super_block *sb,
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
}
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
dqput(dquot);
return 0;
}
@@ -1310,9 +1338,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
- if (test_opt(sbi, FAULT_INJECTION))
+ if (test_opt(sbi, FAULT_INJECTION)) {
seq_printf(seq, ",fault_injection=%u",
F2FS_OPTION(sbi).fault_info.inject_rate);
+ seq_printf(seq, ",fault_type=%u",
+ F2FS_OPTION(sbi).fault_info.inject_type);
+ }
#endif
#ifdef CONFIG_QUOTA
if (test_opt(sbi, QUOTA))
@@ -1343,6 +1374,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
seq_printf(seq, ",fsync_mode=%s", "strict");
+ else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER)
+ seq_printf(seq, ",fsync_mode=%s", "nobarrier");
return 0;
}
@@ -1355,7 +1388,8 @@ static void default_options(struct f2fs_sb_info *sbi)
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
F2FS_OPTION(sbi).test_dummy_encryption = false;
- sbi->readdir_ra = 1;
+ F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
+ F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
@@ -1365,12 +1399,12 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
- if (f2fs_sb_has_blkzoned(sbi->sb)) {
- set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ if (blk_queue_discard(bdev_get_queue(sbi->sb->s_bdev)))
set_opt(sbi, DISCARD);
- } else {
+ if (f2fs_sb_has_blkzoned(sbi->sb))
+ set_opt_mode(sbi, F2FS_MOUNT_LFS);
+ else
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
- }
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
@@ -1379,9 +1413,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, POSIX_ACL);
#endif
-#ifdef CONFIG_F2FS_FAULT_INJECTION
- f2fs_build_fault_attr(sbi, 0);
-#endif
+ f2fs_build_fault_attr(sbi, 0, 0);
}
#ifdef CONFIG_QUOTA
@@ -2229,9 +2261,9 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return 1;
}
- if (secs_per_zone > total_sections) {
+ if (secs_per_zone > total_sections || !secs_per_zone) {
f2fs_msg(sb, KERN_INFO,
- "Wrong secs_per_zone (%u > %u)",
+ "Wrong secs_per_zone / total_sections (%u, %u)",
secs_per_zone, total_sections);
return 1;
}
@@ -2282,12 +2314,20 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned int ovp_segments, reserved_segments;
unsigned int main_segs, blocks_per_seg;
+ unsigned int sit_segs, nat_segs;
+ unsigned int sit_bitmap_size, nat_bitmap_size;
+ unsigned int log_blocks_per_seg;
+ unsigned int segment_count_main;
+ unsigned int cp_pack_start_sum, cp_payload;
+ block_t user_block_count;
int i;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
- fsmeta += le32_to_cpu(raw_super->segment_count_sit);
- fsmeta += le32_to_cpu(raw_super->segment_count_nat);
+ sit_segs = le32_to_cpu(raw_super->segment_count_sit);
+ fsmeta += sit_segs;
+ nat_segs = le32_to_cpu(raw_super->segment_count_nat);
+ fsmeta += nat_segs;
fsmeta += le32_to_cpu(ckpt->rsvd_segment_count);
fsmeta += le32_to_cpu(raw_super->segment_count_ssa);
@@ -2304,6 +2344,16 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ user_block_count = le64_to_cpu(ckpt->user_block_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+ if (!user_block_count || user_block_count >=
+ segment_count_main << log_blocks_per_seg) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong user_block_count: %u", user_block_count);
+ return 1;
+ }
+
main_segs = le32_to_cpu(raw_super->segment_count_main);
blocks_per_seg = sbi->blocks_per_seg;
@@ -2318,6 +2368,28 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
return 1;
}
+ sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize);
+ nat_bitmap_size = le32_to_cpu(ckpt->nat_ver_bitmap_bytesize);
+
+ if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
+ nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong bitmap size: sit: %u, nat:%u",
+ sit_bitmap_size, nat_bitmap_size);
+ return 1;
+ }
+
+ cp_pack_start_sum = __start_sum_addr(sbi);
+ cp_payload = __cp_payload(sbi);
+ if (cp_pack_start_sum < cp_payload + 1 ||
+ cp_pack_start_sum > blocks_per_seg - 1 -
+ NR_CURSEG_TYPE) {
+ f2fs_msg(sbi->sb, KERN_ERR,
+ "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -2651,6 +2723,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
sm_i->dcc_info->discard_granularity = 1;
sm_i->ipu_policy = 1 << F2FS_IPU_FORCE;
}
+
+ sbi->readdir_ra = 1;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
@@ -2700,9 +2774,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
- F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
- F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
-
/* precompute checksum seed for metadata */
if (f2fs_sb_has_inode_chksum(sb))
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
@@ -2771,6 +2842,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
+ mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -2865,6 +2937,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_init_ino_entry_info(sbi);
+ f2fs_init_fsync_node_info(sbi);
+
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
@@ -2912,10 +2986,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
err = PTR_ERR(root);
goto free_stats;
}
- if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+ if (!S_ISDIR(root->i_mode) || !root->i_blocks ||
+ !root->i_size || !root->i_nlink) {
iput(root);
err = -EINVAL;
- goto free_node_inode;
+ goto free_stats;
}
sb->s_root = d_make_root(root); /* allocate root dentry */
@@ -2929,10 +3004,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_root_inode;
#ifdef CONFIG_QUOTA
- /*
- * Turn on quotas which were not enabled for read-only mounts if
- * filesystem has quota feature, so that they are updated correctly.
- */
+ /* Enable quota usage during mount */
if (f2fs_sb_has_quota_ino(sb) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err) {
@@ -3090,9 +3162,19 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
if (sb->s_root) {
- set_sbi_flag(F2FS_SB(sb), SBI_IS_CLOSE);
- f2fs_stop_gc_thread(F2FS_SB(sb));
- f2fs_stop_discard_thread(F2FS_SB(sb));
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+
+ set_sbi_flag(sbi, SBI_IS_CLOSE);
+ f2fs_stop_gc_thread(sbi);
+ f2fs_stop_discard_thread(sbi);
+
+ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+ !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ struct cp_control cpc = {
+ .reason = CP_UMOUNT,
+ };
+ f2fs_write_checkpoint(sbi, &cpc);
+ }
}
kill_block_super(sb);
}
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2e7e611..81c0e53 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
@@ -252,6 +253,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
if (t >= 1) {
sbi->gc_mode = GC_URGENT;
if (sbi->gc_thread) {
+ sbi->gc_thread->gc_wake = 1;
wake_up_interruptible_all(
&sbi->gc_thread->gc_wait_queue_head);
wake_up_discard_thread(sbi, true);
@@ -286,8 +288,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
a->struct_type == GC_THREAD);
- if (gc_entry)
- down_read(&sbi->sb->s_umount);
+ if (gc_entry) {
+ if (!down_read_trylock(&sbi->sb->s_umount))
+ return -EAGAIN;
+ }
ret = __sbi_store(a, sbi, buf, count);
if (gc_entry)
up_read(&sbi->sb->s_umount);
@@ -393,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
+F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
@@ -445,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
+ ATTR_LIST(min_seq_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
@@ -516,7 +522,8 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
-static int segment_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -543,7 +550,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int segment_bits_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -567,7 +575,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
return 0;
}
-static int iostat_info_seq_show(struct seq_file *seq, void *offset)
+static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
+ void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -609,6 +618,28 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
+static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+ int i;
+
+ seq_puts(seq, "format: victim_secmap bitmaps\n");
+
+ for (i = 0; i < MAIN_SECS(sbi); i++) {
+ if ((i % 10) == 0)
+ seq_printf(seq, "%-10d", i);
+ seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
+ if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
+ seq_putc(seq, '\n');
+ else
+ seq_putc(seq, ' ');
+ }
+ return 0;
+}
+
int __init f2fs_init_sysfs(void)
{
int ret;
@@ -658,6 +689,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
segment_bits_seq_show, sb);
proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
iostat_info_seq_show, sb);
+ proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc,
+ victim_bits_seq_show, sb);
}
return 0;
}
@@ -668,6 +701,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
+ remove_proc_entry("victim_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 7082718..77a010e 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -37,9 +37,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -62,9 +59,6 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
@@ -100,12 +94,22 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
const char *name, const void *value,
size_t size, int flags)
{
+ unsigned char old_advise = F2FS_I(inode)->i_advise;
+ unsigned char new_advise;
+
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
- F2FS_I(inode)->i_advise |= *(char *)value;
+ new_advise = *(char *)value;
+ if (new_advise & ~FADVISE_MODIFIABLE_BITS)
+ return -EINVAL;
+
+ new_advise = new_advise & FADVISE_MODIFIABLE_BITS;
+ new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS;
+
+ F2FS_I(inode)->i_advise = new_advise;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index e9bed49..78d501c 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -225,7 +225,8 @@ static inline void cache_init(struct fat_cache_id *cid, int fclus, int dclus)
int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
{
struct super_block *sb = inode->i_sb;
- const int limit = sb->s_maxbytes >> MSDOS_SB(sb)->cluster_bits;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ const int limit = sb->s_maxbytes >> sbi->cluster_bits;
struct fat_entry fatent;
struct fat_cache_id cid;
int nr;
@@ -234,6 +235,12 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
*fclus = 0;
*dclus = MSDOS_I(inode)->i_start;
+ if (!fat_valid_entry(sbi, *dclus)) {
+ fat_fs_error_ratelimit(sb,
+ "%s: invalid start cluster (i_pos %lld, start %08x)",
+ __func__, MSDOS_I(inode)->i_pos, *dclus);
+ return -EIO;
+ }
if (cluster == 0)
return 0;
@@ -250,9 +257,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
/* prevent the infinite loop of cluster chain */
if (*fclus > limit) {
fat_fs_error_ratelimit(sb,
- "%s: detected the cluster chain loop"
- " (i_pos %lld)", __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: detected the cluster chain loop (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
}
@@ -262,9 +268,8 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
goto out;
else if (nr == FAT_ENT_FREE) {
fat_fs_error_ratelimit(sb,
- "%s: invalid cluster chain (i_pos %lld)",
- __func__,
- MSDOS_I(inode)->i_pos);
+ "%s: invalid cluster chain (i_pos %lld)",
+ __func__, MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
} else if (nr == FAT_ENT_EOF) {
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 8e100c3bf7..7f5f369 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -1130,7 +1130,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
return err;
}
-int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
+int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts)
{
struct super_block *sb = dir->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 8fc1093..9d7d2d5 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -304,7 +304,7 @@ extern int fat_scan_logstart(struct inode *dir, int i_logstart,
struct fat_slot_info *sinfo);
extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
struct msdos_dir_entry **de);
-extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
+extern int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts);
extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots,
struct fat_slot_info *sinfo);
extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo);
@@ -348,6 +348,11 @@ static inline void fatent_brelse(struct fat_entry *fatent)
fatent->fat_inode = NULL;
}
+static inline bool fat_valid_entry(struct msdos_sb_info *sbi, int entry)
+{
+ return FAT_START_ENT <= entry && entry < sbi->max_cluster;
+}
+
extern void fat_ent_access_init(struct super_block *sb);
extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent,
int entry);
@@ -357,6 +362,7 @@ extern int fat_alloc_clusters(struct inode *inode, int *cluster,
int nr_cluster);
extern int fat_free_clusters(struct inode *inode, int cluster);
extern int fat_count_free_clusters(struct super_block *sb);
+extern int fat_trim_fs(struct inode *inode, struct fstrim_range *range);
/* fat/file.c */
extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
@@ -406,9 +412,9 @@ void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...);
} while (0)
extern int fat_clusters_flush(struct super_block *sb);
extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
-extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
+extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 __time, __le16 __date, u8 time_cs);
-extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
+extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 *time, __le16 *date, u8 *time_cs);
extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index bac10de..defc216 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -4,6 +4,7 @@
*/
#include <linux/blkdev.h>
+#include <linux/sched/signal.h>
#include "fat.h"
struct fatent_operations {
@@ -23,7 +24,7 @@ static void fat12_ent_blocknr(struct super_block *sb, int entry,
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = entry + (entry >> 1);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
@@ -33,7 +34,7 @@ static void fat_ent_blocknr(struct super_block *sb, int entry,
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int bytes = (entry << sbi->fatent_shift);
- WARN_ON(entry < FAT_START_ENT || sbi->max_cluster <= entry);
+ WARN_ON(!fat_valid_entry(sbi, entry));
*offset = bytes & (sb->s_blocksize - 1);
*blocknr = sbi->fat_start + (bytes >> sb->s_blocksize_bits);
}
@@ -353,7 +354,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
int err, offset;
sector_t blocknr;
- if (entry < FAT_START_ENT || sbi->max_cluster <= entry) {
+ if (!fat_valid_entry(sbi, entry)) {
fatent_brelse(fatent);
fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
return -EIO;
@@ -690,3 +691,104 @@ int fat_count_free_clusters(struct super_block *sb)
unlock_fat(sbi);
return err;
}
+
+static int fat_trim_clusters(struct super_block *sb, u32 clus, u32 nr_clus)
+{
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ return sb_issue_discard(sb, fat_clus_to_blknr(sbi, clus),
+ nr_clus * sbi->sec_per_clus, GFP_NOFS, 0);
+}
+
+int fat_trim_fs(struct inode *inode, struct fstrim_range *range)
+{
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ const struct fatent_operations *ops = sbi->fatent_ops;
+ struct fat_entry fatent;
+ u64 ent_start, ent_end, minlen, trimmed = 0;
+ u32 free = 0;
+ unsigned long reada_blocks, reada_mask, cur_block = 0;
+ int err = 0;
+
+ /*
+ * FAT data is organized as clusters, trim at the granulary of cluster.
+ *
+ * fstrim_range is in byte, convert vaules to cluster index.
+ * Treat sectors before data region as all used, not to trim them.
+ */
+ ent_start = max_t(u64, range->start>>sbi->cluster_bits, FAT_START_ENT);
+ ent_end = ent_start + (range->len >> sbi->cluster_bits) - 1;
+ minlen = range->minlen >> sbi->cluster_bits;
+
+ if (ent_start >= sbi->max_cluster || range->len < sbi->cluster_size)
+ return -EINVAL;
+ if (ent_end >= sbi->max_cluster)
+ ent_end = sbi->max_cluster - 1;
+
+ reada_blocks = FAT_READA_SIZE >> sb->s_blocksize_bits;
+ reada_mask = reada_blocks - 1;
+
+ fatent_init(&fatent);
+ lock_fat(sbi);
+ fatent_set_entry(&fatent, ent_start);
+ while (fatent.entry <= ent_end) {
+ /* readahead of fat blocks */
+ if ((cur_block & reada_mask) == 0) {
+ unsigned long rest = sbi->fat_length - cur_block;
+ fat_ent_reada(sb, &fatent, min(reada_blocks, rest));
+ }
+ cur_block++;
+
+ err = fat_ent_read_block(sb, &fatent);
+ if (err)
+ goto error;
+ do {
+ if (ops->ent_get(&fatent) == FAT_ENT_FREE) {
+ free++;
+ } else if (free) {
+ if (free >= minlen) {
+ u32 clus = fatent.entry - free;
+
+ err = fat_trim_clusters(sb, clus, free);
+ if (err && err != -EOPNOTSUPP)
+ goto error;
+ if (!err)
+ trimmed += free;
+ err = 0;
+ }
+ free = 0;
+ }
+ } while (fat_ent_next(sbi, &fatent) && fatent.entry <= ent_end);
+
+ if (fatal_signal_pending(current)) {
+ err = -ERESTARTSYS;
+ goto error;
+ }
+
+ if (need_resched()) {
+ fatent_brelse(&fatent);
+ unlock_fat(sbi);
+ cond_resched();
+ lock_fat(sbi);
+ }
+ }
+ /* handle scenario when tail entries are all free */
+ if (free && free >= minlen) {
+ u32 clus = fatent.entry - free;
+
+ err = fat_trim_clusters(sb, clus, free);
+ if (err && err != -EOPNOTSUPP)
+ goto error;
+ if (!err)
+ trimmed += free;
+ err = 0;
+ }
+
+error:
+ fatent_brelse(&fatent);
+ unlock_fat(sbi);
+
+ range->len = trimmed << sbi->cluster_bits;
+
+ return err;
+}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 4724cc9..4f3d72f 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -121,6 +121,37 @@ static int fat_ioctl_get_volume_id(struct inode *inode, u32 __user *user_attr)
return put_user(sbi->vol_id, user_attr);
}
+static int fat_ioctl_fitrim(struct inode *inode, unsigned long arg)
+{
+ struct super_block *sb = inode->i_sb;
+ struct fstrim_range __user *user_range;
+ struct fstrim_range range;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
+ int err;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
+ user_range = (struct fstrim_range __user *)arg;
+ if (copy_from_user(&range, user_range, sizeof(range)))
+ return -EFAULT;
+
+ range.minlen = max_t(unsigned int, range.minlen,
+ q->limits.discard_granularity);
+
+ err = fat_trim_fs(inode, &range);
+ if (err < 0)
+ return err;
+
+ if (copy_to_user(user_range, &range, sizeof(range)))
+ return -EFAULT;
+
+ return 0;
+}
+
long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -133,6 +164,8 @@ long fat_generic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return fat_ioctl_set_attributes(filp, user_attr);
case FAT_IOCTL_GET_VOLUME_ID:
return fat_ioctl_get_volume_id(inode, user_attr);
+ case FITRIM:
+ return fat_ioctl_fitrim(inode, arg);
default:
return -ENOTTY; /* Inappropriate ioctl for device */
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bfd589e..d6b81e3 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -508,7 +508,6 @@ static int fat_validate_dir(struct inode *dir)
/* doesn't deal with root inode */
int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
- struct timespec ts;
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
@@ -559,14 +558,11 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
- fat_time_fat2unix(sbi, &ts, de->time, de->date, 0);
- inode->i_mtime = timespec_to_timespec64(ts);
+ fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
if (sbi->options.isvfat) {
- fat_time_fat2unix(sbi, &ts, de->ctime,
+ fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime,
de->cdate, de->ctime_cs);
- inode->i_ctime = timespec_to_timespec64(ts);
- fat_time_fat2unix(sbi, &ts, 0, de->adate, 0);
- inode->i_atime = timespec_to_timespec64(ts);
+ fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
} else
inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -843,7 +839,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
static int __fat_write_inode(struct inode *inode, int wait)
{
- struct timespec ts;
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
@@ -881,16 +876,13 @@ static int __fat_write_inode(struct inode *inode, int wait)
raw_entry->size = cpu_to_le32(inode->i_size);
raw_entry->attr = fat_make_attrs(inode);
fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart);
- ts = timespec64_to_timespec(inode->i_mtime);
- fat_time_unix2fat(sbi, &ts, &raw_entry->time,
+ fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
&raw_entry->date, NULL);
if (sbi->options.isvfat) {
__le16 atime;
- ts = timespec64_to_timespec(inode->i_ctime);
- fat_time_unix2fat(sbi, &ts, &raw_entry->ctime,
+ fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime,
&raw_entry->cdate, &raw_entry->ctime_cs);
- ts = timespec64_to_timespec(inode->i_atime);
- fat_time_unix2fat(sbi, &ts, &atime,
+ fat_time_unix2fat(sbi, &inode->i_atime, &atime,
&raw_entry->adate, NULL);
}
spin_unlock(&sbi->inode_hash_lock);
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index f9bdc1e..573836d 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -180,17 +180,18 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100)
/* Linear day numbers of the respective 1sts in non-leap years. */
-static time_t days_in_year[] = {
+static long days_in_year[] = {
/* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */
0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0,
};
/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */
-void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
+void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 __time, __le16 __date, u8 time_cs)
{
u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date);
- time_t second, day, leap_day, month, year;
+ time64_t second;
+ long day, leap_day, month, year;
year = date >> 9;
month = max(1, (date >> 5) & 0xf);
@@ -205,7 +206,7 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
second = (time & 0x1f) << 1;
second += ((time >> 5) & 0x3f) * SECS_PER_MIN;
second += (time >> 11) * SECS_PER_HOUR;
- second += (year * 365 + leap_day
+ second += (time64_t)(year * 365 + leap_day
+ days_in_year[month] + day
+ DAYS_DELTA) * SECS_PER_DAY;
@@ -224,11 +225,11 @@ void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts,
}
/* Convert linear UNIX date to a FAT time/date pair. */
-void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts,
+void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec64 *ts,
__le16 *time, __le16 *date, u8 *time_cs)
{
struct tm tm;
- time_to_tm(ts->tv_sec,
+ time64_to_tm(ts->tv_sec,
(sbi->options.tz_set ? sbi->options.time_offset :
-sys_tz.tz_minuteswest) * SECS_PER_MIN, &tm);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 16a832c..efb8c40 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -225,7 +225,7 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
/***** Creates a directory entry (name is already formatted). */
static int msdos_add_entry(struct inode *dir, const unsigned char *name,
int is_dir, int is_hid, int cluster,
- struct timespec *ts, struct fat_slot_info *sinfo)
+ struct timespec64 *ts, struct fat_slot_info *sinfo)
{
struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
struct msdos_dir_entry de;
@@ -250,7 +250,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
if (err)
return err;
- dir->i_ctime = dir->i_mtime = timespec_to_timespec64(*ts);
+ dir->i_ctime = dir->i_mtime = *ts;
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
@@ -267,7 +267,6 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct inode *inode = NULL;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
unsigned char msdos_name[MSDOS_NAME];
int err, is_hid;
@@ -286,8 +285,7 @@ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
}
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &t, &sinfo);
+ err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &ts, &sinfo);
if (err)
goto out;
inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
@@ -347,7 +345,6 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
unsigned char msdos_name[MSDOS_NAME];
struct timespec64 ts;
- struct timespec t;
int err, is_hid, cluster;
mutex_lock(&MSDOS_SB(sb)->s_lock);
@@ -365,13 +362,12 @@ static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
}
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- cluster = fat_alloc_new_dir(dir, &t);
+ cluster = fat_alloc_new_dir(dir, &ts);
if (cluster < 0) {
err = cluster;
goto out;
}
- err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &t, &sinfo);
+ err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo);
if (err)
goto out_free;
inc_nlink(dir);
@@ -503,9 +499,8 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
new_i_pos = MSDOS_I(new_inode)->i_pos;
fat_detach(new_inode);
} else {
- struct timespec t = timespec64_to_timespec(ts);
err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0,
- &t, &sinfo);
+ &ts, &sinfo);
if (err)
goto out;
new_i_pos = sinfo.i_pos;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 9a54691..82cd1e6 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -577,7 +577,7 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
static int vfat_build_slots(struct inode *dir, const unsigned char *name,
int len, int is_dir, int cluster,
- struct timespec *ts,
+ struct timespec64 *ts,
struct msdos_dir_slot *slots, int *nr_slots)
{
struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
@@ -653,7 +653,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name,
}
static int vfat_add_entry(struct inode *dir, const struct qstr *qname,
- int is_dir, int cluster, struct timespec *ts,
+ int is_dir, int cluster, struct timespec64 *ts,
struct fat_slot_info *sinfo)
{
struct msdos_dir_slot *slots;
@@ -678,7 +678,7 @@ static int vfat_add_entry(struct inode *dir, const struct qstr *qname,
goto cleanup;
/* update timestamp */
- dir->i_ctime = dir->i_mtime = dir->i_atime = timespec_to_timespec64(*ts);
+ dir->i_ctime = dir->i_mtime = dir->i_atime = *ts;
if (IS_DIRSYNC(dir))
(void)fat_sync_inode(dir);
else
@@ -762,14 +762,12 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
int err;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &t, &sinfo);
+ err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo);
if (err)
goto out;
inode_inc_iversion(dir);
@@ -853,19 +851,17 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
struct fat_slot_info sinfo;
struct timespec64 ts;
- struct timespec t;
int err, cluster;
mutex_lock(&MSDOS_SB(sb)->s_lock);
ts = current_time(dir);
- t = timespec64_to_timespec(ts);
- cluster = fat_alloc_new_dir(dir, &t);
+ cluster = fat_alloc_new_dir(dir, &ts);
if (cluster < 0) {
err = cluster;
goto out;
}
- err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &t, &sinfo);
+ err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo);
if (err)
goto out_free;
inode_inc_iversion(dir);
@@ -904,7 +900,6 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *old_inode, *new_inode;
struct fat_slot_info old_sinfo, sinfo;
struct timespec64 ts;
- struct timespec t;
loff_t new_i_pos;
int err, is_dir, update_dotdot, corrupt = 0;
struct super_block *sb = old_dir->i_sb;
@@ -939,9 +934,8 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
new_i_pos = MSDOS_I(new_inode)->i_pos;
fat_detach(new_inode);
} else {
- t = timespec64_to_timespec(ts);
err = vfat_add_entry(new_dir, &new_dentry->d_name, is_dir, 0,
- &t, &sinfo);
+ &ts, &sinfo);
if (err)
goto out;
new_i_pos = sinfo.i_pos;
diff --git a/fs/hfsplus/Kconfig b/fs/hfsplus/Kconfig
index 7cc8b4a..a633718 100644
--- a/fs/hfsplus/Kconfig
+++ b/fs/hfsplus/Kconfig
@@ -11,18 +11,3 @@
MacOS 8. It includes all Mac specific filesystem data such as
data forks and creator codes, but it also has several UNIX
style features such as file ownership and permissions.
-
-config HFSPLUS_FS_POSIX_ACL
- bool "HFS+ POSIX Access Control Lists"
- depends on HFSPLUS_FS
- select FS_POSIX_ACL
- help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- It needs to understand that POSIX ACLs are treated only under
- Linux. POSIX ACLs doesn't mean something under Mac OS X.
- Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs,
- which are part of the NFSv4 standard.
-
- If you don't know what Access Control Lists are, say N
diff --git a/fs/hfsplus/Makefile b/fs/hfsplus/Makefile
index f6a5654..9ed20e6 100644
--- a/fs/hfsplus/Makefile
+++ b/fs/hfsplus/Makefile
@@ -8,5 +8,3 @@
hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
-
-hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o
diff --git a/fs/hfsplus/acl.h b/fs/hfsplus/acl.h
deleted file mode 100644
index 488c2b7..0000000
--- a/fs/hfsplus/acl.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/fs/hfsplus/acl.h
- *
- * Vyacheslav Dubeyko <slava@dubeyko.com>
- *
- * Handler for Posix Access Control Lists (ACLs) support.
- */
-
-#include <linux/posix_acl_xattr.h>
-
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
-
-/* posix_acl.c */
-struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type);
-int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
- int type);
-extern int hfsplus_init_posix_acl(struct inode *, struct inode *);
-
-#else /* CONFIG_HFSPLUS_FS_POSIX_ACL */
-#define hfsplus_get_posix_acl NULL
-#define hfsplus_set_posix_acl NULL
-
-static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
-{
- return 0;
-}
-#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index b525437..c5a70f8 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -18,7 +18,6 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
#include "xattr.h"
-#include "acl.h"
static inline void hfsplus_instantiate(struct dentry *dentry,
struct inode *inode, u32 cnid)
@@ -455,7 +454,7 @@ static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
if (res)
goto out_err;
- res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ res = hfsplus_init_security(inode, dir, &dentry->d_name);
if (res == -EOPNOTSUPP)
res = 0; /* Operation is not supported. */
else if (res) {
@@ -496,7 +495,7 @@ static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
if (res)
goto failed_mknod;
- res = hfsplus_init_inode_security(inode, dir, &dentry->d_name);
+ res = hfsplus_init_security(inode, dir, &dentry->d_name);
if (res == -EOPNOTSUPP)
res = 0; /* Operation is not supported. */
else if (res) {
@@ -567,10 +566,6 @@ const struct inode_operations hfsplus_dir_inode_operations = {
.mknod = hfsplus_mknod,
.rename = hfsplus_rename,
.listxattr = hfsplus_listxattr,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- .get_acl = hfsplus_get_posix_acl,
- .set_acl = hfsplus_set_posix_acl,
-#endif
};
const struct file_operations hfsplus_dir_operations = {
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index e877093..8e0f597 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -336,6 +336,9 @@ static int hfsplus_free_extents(struct super_block *sb,
int i;
int err = 0;
+ /* Mapping the allocation file may lock the extent tree */
+ WARN_ON(mutex_is_locked(&HFSPLUS_SB(sb)->ext_tree->tree_lock));
+
hfsplus_dump_extent(extent);
for (i = 0; i < 8; extent++, i++) {
count = be32_to_cpu(extent->block_count);
@@ -415,11 +418,13 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid,
if (res)
break;
start = be32_to_cpu(fd.key->ext.start_block);
- hfsplus_free_extents(sb, ext_entry,
- total_blocks - start,
- total_blocks);
hfs_brec_remove(&fd);
+
+ mutex_unlock(&fd.tree->tree_lock);
+ hfsplus_free_extents(sb, ext_entry, total_blocks - start,
+ total_blocks);
total_blocks = start;
+ mutex_lock(&fd.tree->tree_lock);
} while (total_blocks > blocks);
hfs_find_exit(&fd);
@@ -576,15 +581,20 @@ void hfsplus_file_truncate(struct inode *inode)
}
while (1) {
if (alloc_cnt == hip->first_blocks) {
+ mutex_unlock(&fd.tree->tree_lock);
hfsplus_free_extents(sb, hip->first_extents,
alloc_cnt, alloc_cnt - blk_cnt);
hfsplus_dump_extent(hip->first_extents);
hip->first_blocks = blk_cnt;
+ mutex_lock(&fd.tree->tree_lock);
break;
}
res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
if (res)
break;
+ hfs_brec_remove(&fd);
+
+ mutex_unlock(&fd.tree->tree_lock);
start = hip->cached_start;
hfsplus_free_extents(sb, hip->cached_extents,
alloc_cnt - start, alloc_cnt - blk_cnt);
@@ -596,7 +606,7 @@ void hfsplus_file_truncate(struct inode *inode)
alloc_cnt = start;
hip->cached_start = hip->cached_blocks = 0;
hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
- hfs_brec_remove(&fd);
+ mutex_lock(&fd.tree->tree_lock);
}
hfs_find_exit(&fd);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d9255ab..8e03943 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -31,7 +31,6 @@
#define DBG_EXTENT 0x00000020
#define DBG_BITMAP 0x00000040
#define DBG_ATTR_MOD 0x00000080
-#define DBG_ACL_MOD 0x00000100
#if 0
#define DBG_MASK (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index c824f70..8e9427a 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -21,7 +21,6 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
#include "xattr.h"
-#include "acl.h"
static int hfsplus_readpage(struct file *file, struct page *page)
{
@@ -267,12 +266,6 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
setattr_copy(inode, attr);
mark_inode_dirty(inode);
- if (attr->ia_valid & ATTR_MODE) {
- error = posix_acl_chmod(inode, inode->i_mode);
- if (unlikely(error))
- return error;
- }
-
return 0;
}
@@ -336,10 +329,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
static const struct inode_operations hfsplus_file_inode_operations = {
.setattr = hfsplus_setattr,
.listxattr = hfsplus_listxattr,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- .get_acl = hfsplus_get_posix_acl,
- .set_acl = hfsplus_set_posix_acl,
-#endif
};
static const struct file_operations hfsplus_file_operations = {
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
deleted file mode 100644
index 066114d..0000000
--- a/fs/hfsplus/posix_acl.c
+++ /dev/null
@@ -1,144 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/fs/hfsplus/posix_acl.c
- *
- * Vyacheslav Dubeyko <slava@dubeyko.com>
- *
- * Handler for Posix Access Control Lists (ACLs) support.
- */
-
-#include "hfsplus_fs.h"
-#include "xattr.h"
-#include "acl.h"
-
-struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
-{
- struct posix_acl *acl;
- char *xattr_name;
- char *value = NULL;
- ssize_t size;
-
- hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
- break;
- case ACL_TYPE_DEFAULT:
- xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
- break;
- default:
- return ERR_PTR(-EINVAL);
- }
-
- size = __hfsplus_getxattr(inode, xattr_name, NULL, 0);
-
- if (size > 0) {
- value = (char *)hfsplus_alloc_attr_entry();
- if (unlikely(!value))
- return ERR_PTR(-ENOMEM);
- size = __hfsplus_getxattr(inode, xattr_name, value, size);
- }
-
- if (size > 0)
- acl = posix_acl_from_xattr(&init_user_ns, value, size);
- else if (size == -ENODATA)
- acl = NULL;
- else
- acl = ERR_PTR(size);
-
- hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
-
- return acl;
-}
-
-static int __hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
- int type)
-{
- int err;
- char *xattr_name;
- size_t size = 0;
- char *value = NULL;
-
- hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- xattr_name = XATTR_NAME_POSIX_ACL_ACCESS;
- break;
-
- case ACL_TYPE_DEFAULT:
- xattr_name = XATTR_NAME_POSIX_ACL_DEFAULT;
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- break;
-
- default:
- return -EINVAL;
- }
-
- if (acl) {
- size = posix_acl_xattr_size(acl->a_count);
- if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE))
- return -ENOMEM;
- value = (char *)hfsplus_alloc_attr_entry();
- if (unlikely(!value))
- return -ENOMEM;
- err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
- if (unlikely(err < 0))
- goto end_set_acl;
- }
-
- err = __hfsplus_setxattr(inode, xattr_name, value, size, 0);
-
-end_set_acl:
- hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
-
- if (!err)
- set_cached_acl(inode, type, acl);
-
- return err;
-}
-
-int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type)
-{
- int err;
-
- if (type == ACL_TYPE_ACCESS && acl) {
- err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (err)
- return err;
- }
- return __hfsplus_set_posix_acl(inode, acl, type);
-}
-
-int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
-{
- int err = 0;
- struct posix_acl *default_acl, *acl;
-
- hfs_dbg(ACL_MOD,
- "[%s]: ino %lu, dir->ino %lu\n",
- __func__, inode->i_ino, dir->i_ino);
-
- if (S_ISLNK(inode->i_mode))
- return 0;
-
- err = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
- if (err)
- return err;
-
- if (default_acl) {
- err = __hfsplus_set_posix_acl(inode, default_acl,
- ACL_TYPE_DEFAULT);
- posix_acl_release(default_acl);
- }
-
- if (acl) {
- if (!err)
- err = __hfsplus_set_posix_acl(inode, acl,
- ACL_TYPE_ACCESS);
- posix_acl_release(acl);
- }
- return err;
-}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a6c0f54..eb4535e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -524,8 +524,10 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
- if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
+ if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) {
+ err = -EINVAL;
goto out_put_root;
+ }
inode = hfsplus_iget(sb, be32_to_cpu(entry.folder.id));
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
@@ -562,8 +564,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
goto out_put_hidden_dir;
}
- err = hfsplus_init_inode_security(sbi->hidden_dir,
- root, &str);
+ err = hfsplus_init_security(sbi->hidden_dir,
+ root, &str);
if (err == -EOPNOTSUPP)
err = 0; /* Operation is not supported. */
else if (err) {
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index dfa90c2..c8d1b2be7 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -272,8 +272,8 @@ static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
return size;
}
-/* Decomposes a single unicode character. */
-static inline u16 *decompose_unichar(wchar_t uc, int *size)
+/* Decomposes a non-Hangul unicode character. */
+static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size)
{
int off;
@@ -296,6 +296,51 @@ static inline u16 *decompose_unichar(wchar_t uc, int *size)
return hfsplus_decompose_table + (off / 4);
}
+/*
+ * Try to decompose a unicode character as Hangul. Return 0 if @uc is not
+ * precomposed Hangul, otherwise return the length of the decomposition.
+ *
+ * This function was adapted from sample code from the Unicode Standard
+ * Annex #15: Unicode Normalization Forms, version 3.2.0.
+ *
+ * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed
+ * under the Terms of Use in http://www.unicode.org/copyright.html.
+ */
+static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result)
+{
+ int index;
+ int l, v, t;
+
+ index = uc - Hangul_SBase;
+ if (index < 0 || index >= Hangul_SCount)
+ return 0;
+
+ l = Hangul_LBase + index / Hangul_NCount;
+ v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount;
+ t = Hangul_TBase + index % Hangul_TCount;
+
+ result[0] = l;
+ result[1] = v;
+ if (t != Hangul_TBase) {
+ result[2] = t;
+ return 3;
+ }
+ return 2;
+}
+
+/* Decomposes a single unicode character. */
+static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer)
+{
+ u16 *result;
+
+ /* Hangul is handled separately */
+ result = hangul_buffer;
+ *size = hfsplus_try_decompose_hangul(uc, result);
+ if (*size == 0)
+ result = hfsplus_decompose_nonhangul(uc, size);
+ return result;
+}
+
int hfsplus_asc2uni(struct super_block *sb,
struct hfsplus_unistr *ustr, int max_unistr_len,
const char *astr, int len)
@@ -303,13 +348,14 @@ int hfsplus_asc2uni(struct super_block *sb,
int size, dsize, decompose;
u16 *dstr, outlen = 0;
wchar_t c;
+ u16 dhangul[3];
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
while (outlen < max_unistr_len && len > 0) {
size = asc2unichar(sb, astr, len, &c);
if (decompose)
- dstr = decompose_unichar(c, &dsize);
+ dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
@@ -344,6 +390,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
unsigned long hash;
wchar_t c;
u16 c2;
+ u16 dhangul[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -357,7 +404,7 @@ int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
len -= size;
if (decompose)
- dstr = decompose_unichar(c, &dsize);
+ dstr = decompose_unichar(c, &dsize, dhangul);
else
dstr = NULL;
if (dstr) {
@@ -396,6 +443,7 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
const char *astr1, *astr2;
u16 c1, c2;
wchar_t c;
+ u16 dhangul_1[3], dhangul_2[3];
casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
@@ -413,7 +461,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
len1 -= size;
if (decompose)
- dstr1 = decompose_unichar(c, &dsize1);
+ dstr1 = decompose_unichar(c, &dsize1,
+ dhangul_1);
if (!decompose || !dstr1) {
c1 = c;
dstr1 = &c1;
@@ -427,7 +476,8 @@ int hfsplus_compare_dentry(const struct dentry *dentry,
len2 -= size;
if (decompose)
- dstr2 = decompose_unichar(c, &dsize2);
+ dstr2 = decompose_unichar(c, &dsize2,
+ dhangul_2);
if (!decompose || !dstr2) {
c2 = c;
dstr2 = &c2;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index e538b75..d5403b4 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -8,10 +8,8 @@
*/
#include "hfsplus_fs.h"
-#include <linux/posix_acl_xattr.h>
#include <linux/nls.h>
#include "xattr.h"
-#include "acl.h"
static int hfsplus_removexattr(struct inode *inode, const char *name);
@@ -19,10 +17,6 @@ const struct xattr_handler *hfsplus_xattr_handlers[] = {
&hfsplus_xattr_osx_handler,
&hfsplus_xattr_user_handler,
&hfsplus_xattr_trusted_handler,
-#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
- &posix_acl_access_xattr_handler,
- &posix_acl_default_xattr_handler,
-#endif
&hfsplus_xattr_security_handler,
NULL
};
diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h
index a4e611d..d14e362 100644
--- a/fs/hfsplus/xattr.h
+++ b/fs/hfsplus/xattr.h
@@ -38,7 +38,4 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
int hfsplus_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr);
-int hfsplus_init_inode_security(struct inode *inode, struct inode *dir,
- const struct qstr *qstr);
-
#endif
diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c
index f5550b0..cfbe6a3 100644
--- a/fs/hfsplus/xattr_security.c
+++ b/fs/hfsplus/xattr_security.c
@@ -12,7 +12,6 @@
#include "hfsplus_fs.h"
#include "xattr.h"
-#include "acl.h"
static int hfsplus_security_getxattr(const struct xattr_handler *handler,
struct dentry *unused, struct inode *inode,
@@ -72,18 +71,6 @@ int hfsplus_init_security(struct inode *inode, struct inode *dir,
&hfsplus_initxattrs, NULL);
}
-int hfsplus_init_inode_security(struct inode *inode,
- struct inode *dir,
- const struct qstr *qstr)
-{
- int err;
-
- err = hfsplus_init_posix_acl(inode, dir);
- if (!err)
- err = hfsplus_init_security(inode, dir, qstr);
- return err;
-}
-
const struct xattr_handler hfsplus_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.get = hfsplus_security_getxattr,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 346a146..32920a1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -410,7 +410,6 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
- memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
vma_init(&pseudo_vma, current->mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pagevec_init(&pvec);
@@ -595,7 +594,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* allocation routines. If NUMA is configured, use page index
* as input to create an allocation policy.
*/
- memset(&pseudo_vma, 0, sizeof(struct vm_area_struct));
vma_init(&pseudo_vma, mm);
pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pseudo_vma.vm_file = file;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index c5fa3de..7da0fac 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -51,7 +51,7 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return err;
}
-static int nilfs_page_mkwrite(struct vm_fault *vmf)
+static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct page *page = vmf->page;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6ffeca8..1b9067c 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -834,7 +834,7 @@ static int nilfs_setup_super(struct super_block *sb, int is_mount)
sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT);
sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1);
- sbp[0]->s_mtime = cpu_to_le64(get_seconds());
+ sbp[0]->s_mtime = cpu_to_le64(ktime_get_real_seconds());
skip_mount_setup:
sbp[0]->s_state =
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ec350d4..276914a 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -603,6 +603,10 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
if (!inode)
goto out_drop_write;
+ spin_lock(&inode->i_lock);
+ inode->i_state |= I_CREATING;
+ spin_unlock(&inode->i_lock);
+
inode_init_owner(inode, dentry->d_parent->d_inode, mode);
attr.mode = inode->i_mode;
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 0eaeb41..817c02b 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -31,6 +31,7 @@
config PROC_KCORE
bool "/proc/kcore support" if !ARM
depends on PROC_FS && MMU
+ select CRASH_CORE
help
Provides a virtual ELF core file of the live kernel. This can
be read with gdb and other ELF tools. No modifications can be
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aaffc0c..ccf86f1 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -463,7 +463,7 @@ static int lstats_show_proc(struct seq_file *m, void *v)
if (!task)
return -ESRCH;
seq_puts(m, "Latency Top version : v0.1\n");
- for (i = 0; i < 32; i++) {
+ for (i = 0; i < LT_SAVECOUNT; i++) {
struct latency_record *lr = &task->latency_record[i];
if (lr->backtrace[0]) {
int q;
@@ -1366,10 +1366,8 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
if (!task)
return -ESRCH;
len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
- len = simple_read_from_buffer(buf, count, ppos, numbuf, len);
put_task_struct(task);
-
- return len;
+ return simple_read_from_buffer(buf, count, ppos, numbuf, len);
}
static const struct file_operations proc_fail_nth_operations = {
@@ -2519,47 +2517,47 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
+ struct task_struct *task;
void *page;
- ssize_t length;
- struct task_struct *task = get_proc_task(inode);
+ int rv;
- length = -ESRCH;
- if (!task)
- goto out_no_task;
-
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
+ if (!task) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
/* A task may only write its own attributes. */
- length = -EACCES;
- if (current != task)
- goto out;
+ if (current != task) {
+ rcu_read_unlock();
+ return -EACCES;
+ }
+ rcu_read_unlock();
if (count > PAGE_SIZE)
count = PAGE_SIZE;
/* No partial writes. */
- length = -EINVAL;
if (*ppos != 0)
- goto out;
+ return -EINVAL;
page = memdup_user(buf, count);
if (IS_ERR(page)) {
- length = PTR_ERR(page);
+ rv = PTR_ERR(page);
goto out;
}
/* Guard against adverse ptrace interaction */
- length = mutex_lock_interruptible(¤t->signal->cred_guard_mutex);
- if (length < 0)
+ rv = mutex_lock_interruptible(¤t->signal->cred_guard_mutex);
+ if (rv < 0)
goto out_free;
- length = security_setprocattr(file->f_path.dentry->d_name.name,
- page, count);
+ rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
mutex_unlock(¤t->signal->cred_guard_mutex);
out_free:
kfree(page);
out:
- put_task_struct(task);
-out_no_task:
- return length;
+ return rv;
}
static const struct file_operations proc_pid_attr_operations = {
@@ -3309,12 +3307,12 @@ static const struct pid_entry tid_base_stuff[] = {
REG("cmdline", S_IRUGO, proc_pid_cmdline_ops),
ONE("stat", S_IRUGO, proc_tid_stat),
ONE("statm", S_IRUGO, proc_pid_statm),
- REG("maps", S_IRUGO, proc_tid_maps_operations),
+ REG("maps", S_IRUGO, proc_pid_maps_operations),
#ifdef CONFIG_PROC_CHILDREN
REG("children", S_IRUGO, proc_tid_children_operations),
#endif
#ifdef CONFIG_NUMA
- REG("numa_maps", S_IRUGO, proc_tid_numa_maps_operations),
+ REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
#endif
REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations),
LNK("cwd", proc_cwd_link),
@@ -3324,7 +3322,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("mountinfo", S_IRUGO, proc_mountinfo_operations),
#ifdef CONFIG_PROC_PAGE_MONITOR
REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
- REG("smaps", S_IRUGO, proc_tid_smaps_operations),
+ REG("smaps", S_IRUGO, proc_pid_smaps_operations),
REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
REG("pagemap", S_IRUSR, proc_pagemap_operations),
#endif
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index bb1c162..8ae1094 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -286,9 +286,9 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
if (!dir_emit_dots(file, ctx))
return 0;
+ i = ctx->pos - 2;
read_lock(&proc_subdir_lock);
de = pde_subdir_first(de);
- i = ctx->pos - 2;
for (;;) {
if (!de) {
read_unlock(&proc_subdir_lock);
@@ -309,8 +309,8 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
pde_put(de);
return 0;
}
- read_lock(&proc_subdir_lock);
ctx->pos++;
+ read_lock(&proc_subdir_lock);
next = pde_subdir_next(de);
pde_put(de);
de = next;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 85ffbd2..fc5306a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -105,8 +105,10 @@ void __init proc_init_kmemcache(void)
kmem_cache_create("pde_opener", sizeof(struct pde_opener), 0,
SLAB_ACCOUNT|SLAB_PANIC, NULL);
proc_dir_entry_cache = kmem_cache_create_usercopy(
- "proc_dir_entry", SIZEOF_PDE_SLOT, 0, SLAB_PANIC,
- OFFSETOF_PDE_NAME, SIZEOF_PDE_INLINE_NAME, NULL);
+ "proc_dir_entry", SIZEOF_PDE, 0, SLAB_PANIC,
+ offsetof(struct proc_dir_entry, inline_name),
+ SIZEOF_PDE_INLINE_NAME, NULL);
+ BUILD_BUG_ON(sizeof(struct proc_dir_entry) >= SIZEOF_PDE);
}
static int proc_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index da3dbfa..5185d7f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -65,16 +65,13 @@ struct proc_dir_entry {
char inline_name[];
} __randomize_layout;
-#define OFFSETOF_PDE_NAME offsetof(struct proc_dir_entry, inline_name)
-#define SIZEOF_PDE_SLOT \
- (OFFSETOF_PDE_NAME + 34 <= 64 ? 64 : \
- OFFSETOF_PDE_NAME + 34 <= 128 ? 128 : \
- OFFSETOF_PDE_NAME + 34 <= 192 ? 192 : \
- OFFSETOF_PDE_NAME + 34 <= 256 ? 256 : \
- OFFSETOF_PDE_NAME + 34 <= 512 ? 512 : \
- 0)
-
-#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE_SLOT - OFFSETOF_PDE_NAME)
+#define SIZEOF_PDE ( \
+ sizeof(struct proc_dir_entry) < 128 ? 128 : \
+ sizeof(struct proc_dir_entry) < 192 ? 192 : \
+ sizeof(struct proc_dir_entry) < 256 ? 256 : \
+ sizeof(struct proc_dir_entry) < 512 ? 512 : \
+ 0)
+#define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry))
extern struct kmem_cache *proc_dir_entry_cache;
void pde_free(struct proc_dir_entry *pde);
@@ -116,12 +113,12 @@ static inline void *__PDE_DATA(const struct inode *inode)
return PDE(inode)->data;
}
-static inline struct pid *proc_pid(struct inode *inode)
+static inline struct pid *proc_pid(const struct inode *inode)
{
return PROC_I(inode)->pid;
}
-static inline struct task_struct *get_proc_task(struct inode *inode)
+static inline struct task_struct *get_proc_task(const struct inode *inode)
{
return get_pid_task(proc_pid(inode), PIDTYPE_PID);
}
@@ -285,7 +282,6 @@ struct proc_maps_private {
struct inode *inode;
struct task_struct *task;
struct mm_struct *mm;
- struct mem_size_stats *rollup;
#ifdef CONFIG_MMU
struct vm_area_struct *tail_vma;
#endif
@@ -297,12 +293,9 @@ struct proc_maps_private {
struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode);
extern const struct file_operations proc_pid_maps_operations;
-extern const struct file_operations proc_tid_maps_operations;
extern const struct file_operations proc_pid_numa_maps_operations;
-extern const struct file_operations proc_tid_numa_maps_operations;
extern const struct file_operations proc_pid_smaps_operations;
extern const struct file_operations proc_pid_smaps_rollup_operations;
-extern const struct file_operations proc_tid_smaps_operations;
extern const struct file_operations proc_clear_refs_operations;
extern const struct file_operations proc_pagemap_operations;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e64ecb9..8046443 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -10,6 +10,7 @@
* Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
*/
+#include <linux/crash_core.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/kcore.h>
@@ -49,32 +50,23 @@ static struct proc_dir_entry *proc_root_kcore;
#define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
#endif
-/* An ELF note in memory */
-struct memelfnote
-{
- const char *name;
- int type;
- unsigned int datasz;
- void *data;
-};
-
static LIST_HEAD(kclist_head);
-static DEFINE_RWLOCK(kclist_lock);
+static DECLARE_RWSEM(kclist_lock);
static int kcore_need_update = 1;
-void
-kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
+/* This doesn't grab kclist_lock, so it should only be used at init time. */
+void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
+ int type)
{
new->addr = (unsigned long)addr;
new->size = size;
new->type = type;
- write_lock(&kclist_lock);
list_add_tail(&new->list, &kclist_head);
- write_unlock(&kclist_lock);
}
-static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
+static size_t get_kcore_size(int *nphdr, size_t *phdrs_len, size_t *notes_len,
+ size_t *data_offset)
{
size_t try, size;
struct kcore_list *m;
@@ -88,65 +80,29 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
size = try;
*nphdr = *nphdr + 1;
}
- *elf_buflen = sizeof(struct elfhdr) +
- (*nphdr + 2)*sizeof(struct elf_phdr) +
- 3 * ((sizeof(struct elf_note)) +
- roundup(sizeof(CORE_STR), 4)) +
- roundup(sizeof(struct elf_prstatus), 4) +
- roundup(sizeof(struct elf_prpsinfo), 4) +
- roundup(arch_task_struct_size, 4);
- *elf_buflen = PAGE_ALIGN(*elf_buflen);
- return size + *elf_buflen;
+
+ *phdrs_len = *nphdr * sizeof(struct elf_phdr);
+ *notes_len = (4 * sizeof(struct elf_note) +
+ 3 * ALIGN(sizeof(CORE_STR), 4) +
+ VMCOREINFO_NOTE_NAME_BYTES +
+ ALIGN(sizeof(struct elf_prstatus), 4) +
+ ALIGN(sizeof(struct elf_prpsinfo), 4) +
+ ALIGN(arch_task_struct_size, 4) +
+ ALIGN(vmcoreinfo_size, 4));
+ *data_offset = PAGE_ALIGN(sizeof(struct elfhdr) + *phdrs_len +
+ *notes_len);
+ return *data_offset + size;
}
-static void free_kclist_ents(struct list_head *head)
-{
- struct kcore_list *tmp, *pos;
-
- list_for_each_entry_safe(pos, tmp, head, list) {
- list_del(&pos->list);
- kfree(pos);
- }
-}
-/*
- * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
- */
-static void __kcore_update_ram(struct list_head *list)
-{
- int nphdr;
- size_t size;
- struct kcore_list *tmp, *pos;
- LIST_HEAD(garbage);
-
- write_lock(&kclist_lock);
- if (kcore_need_update) {
- list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
- if (pos->type == KCORE_RAM
- || pos->type == KCORE_VMEMMAP)
- list_move(&pos->list, &garbage);
- }
- list_splice_tail(list, &kclist_head);
- } else
- list_splice(list, &garbage);
- kcore_need_update = 0;
- proc_root_kcore->size = get_kcore_size(&nphdr, &size);
- write_unlock(&kclist_lock);
-
- free_kclist_ents(&garbage);
-}
-
-
#ifdef CONFIG_HIGHMEM
/*
* If no highmem, we can assume [0...max_low_pfn) continuous range of memory
* because memory hole is not as big as !HIGHMEM case.
* (HIGHMEM is special because part of memory is _invisible_ from the kernel.)
*/
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *head)
{
- LIST_HEAD(head);
struct kcore_list *ent;
- int ret = 0;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
@@ -154,9 +110,8 @@ static int kcore_update_ram(void)
ent->addr = (unsigned long)__va(0);
ent->size = max_low_pfn << PAGE_SHIFT;
ent->type = KCORE_RAM;
- list_add(&ent->list, &head);
- __kcore_update_ram(&head);
- return ret;
+ list_add(&ent->list, head);
+ return 0;
}
#else /* !CONFIG_HIGHMEM */
@@ -255,11 +210,10 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
return 1;
}
-static int kcore_update_ram(void)
+static int kcore_ram_list(struct list_head *list)
{
int nid, ret;
unsigned long end_pfn;
- LIST_HEAD(head);
/* Not inialized....update now */
/* find out "max pfn" */
@@ -271,258 +225,255 @@ static int kcore_update_ram(void)
end_pfn = node_end;
}
/* scan 0 to max_pfn */
- ret = walk_system_ram_range(0, end_pfn, &head, kclist_add_private);
- if (ret) {
- free_kclist_ents(&head);
+ ret = walk_system_ram_range(0, end_pfn, list, kclist_add_private);
+ if (ret)
return -ENOMEM;
- }
- __kcore_update_ram(&head);
- return ret;
+ return 0;
}
#endif /* CONFIG_HIGHMEM */
-/*****************************************************************************/
-/*
- * determine size of ELF note
- */
-static int notesize(struct memelfnote *en)
+static int kcore_update_ram(void)
{
- int sz;
+ LIST_HEAD(list);
+ LIST_HEAD(garbage);
+ int nphdr;
+ size_t phdrs_len, notes_len, data_offset;
+ struct kcore_list *tmp, *pos;
+ int ret = 0;
- sz = sizeof(struct elf_note);
- sz += roundup((strlen(en->name) + 1), 4);
- sz += roundup(en->datasz, 4);
+ down_write(&kclist_lock);
+ if (!xchg(&kcore_need_update, 0))
+ goto out;
- return sz;
-} /* end notesize() */
-
-/*****************************************************************************/
-/*
- * store a note in the header buffer
- */
-static char *storenote(struct memelfnote *men, char *bufp)
-{
- struct elf_note en;
-
-#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
-
- en.n_namesz = strlen(men->name) + 1;
- en.n_descsz = men->datasz;
- en.n_type = men->type;
-
- DUMP_WRITE(&en, sizeof(en));
- DUMP_WRITE(men->name, en.n_namesz);
-
- /* XXX - cast from long long to long to avoid need for libgcc.a */
- bufp = (char*) roundup((unsigned long)bufp,4);
- DUMP_WRITE(men->data, men->datasz);
- bufp = (char*) roundup((unsigned long)bufp,4);
-
-#undef DUMP_WRITE
-
- return bufp;
-} /* end storenote() */
-
-/*
- * store an ELF coredump header in the supplied buffer
- * nphdr is the number of elf_phdr to insert
- */
-static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
-{
- struct elf_prstatus prstatus; /* NT_PRSTATUS */
- struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
- struct elf_phdr *nhdr, *phdr;
- struct elfhdr *elf;
- struct memelfnote notes[3];
- off_t offset = 0;
- struct kcore_list *m;
-
- /* setup ELF header */
- elf = (struct elfhdr *) bufp;
- bufp += sizeof(struct elfhdr);
- offset += sizeof(struct elfhdr);
- memcpy(elf->e_ident, ELFMAG, SELFMAG);
- elf->e_ident[EI_CLASS] = ELF_CLASS;
- elf->e_ident[EI_DATA] = ELF_DATA;
- elf->e_ident[EI_VERSION]= EV_CURRENT;
- elf->e_ident[EI_OSABI] = ELF_OSABI;
- memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
- elf->e_type = ET_CORE;
- elf->e_machine = ELF_ARCH;
- elf->e_version = EV_CURRENT;
- elf->e_entry = 0;
- elf->e_phoff = sizeof(struct elfhdr);
- elf->e_shoff = 0;
- elf->e_flags = ELF_CORE_EFLAGS;
- elf->e_ehsize = sizeof(struct elfhdr);
- elf->e_phentsize= sizeof(struct elf_phdr);
- elf->e_phnum = nphdr;
- elf->e_shentsize= 0;
- elf->e_shnum = 0;
- elf->e_shstrndx = 0;
-
- /* setup ELF PT_NOTE program header */
- nhdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
- nhdr->p_type = PT_NOTE;
- nhdr->p_offset = 0;
- nhdr->p_vaddr = 0;
- nhdr->p_paddr = 0;
- nhdr->p_filesz = 0;
- nhdr->p_memsz = 0;
- nhdr->p_flags = 0;
- nhdr->p_align = 0;
-
- /* setup ELF PT_LOAD program header for every area */
- list_for_each_entry(m, &kclist_head, list) {
- phdr = (struct elf_phdr *) bufp;
- bufp += sizeof(struct elf_phdr);
- offset += sizeof(struct elf_phdr);
-
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = kc_vaddr_to_offset(m->addr) + dataoff;
- phdr->p_vaddr = (size_t)m->addr;
- if (m->type == KCORE_RAM || m->type == KCORE_TEXT)
- phdr->p_paddr = __pa(m->addr);
- else
- phdr->p_paddr = (elf_addr_t)-1;
- phdr->p_filesz = phdr->p_memsz = m->size;
- phdr->p_align = PAGE_SIZE;
+ ret = kcore_ram_list(&list);
+ if (ret) {
+ /* Couldn't get the RAM list, try again next time. */
+ WRITE_ONCE(kcore_need_update, 1);
+ list_splice_tail(&list, &garbage);
+ goto out;
}
- /*
- * Set up the notes in similar form to SVR4 core dumps made
- * with info from their /proc.
- */
- nhdr->p_offset = offset;
+ list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
+ if (pos->type == KCORE_RAM || pos->type == KCORE_VMEMMAP)
+ list_move(&pos->list, &garbage);
+ }
+ list_splice_tail(&list, &kclist_head);
- /* set up the process status */
- notes[0].name = CORE_STR;
- notes[0].type = NT_PRSTATUS;
- notes[0].datasz = sizeof(struct elf_prstatus);
- notes[0].data = &prstatus;
+ proc_root_kcore->size = get_kcore_size(&nphdr, &phdrs_len, ¬es_len,
+ &data_offset);
- memset(&prstatus, 0, sizeof(struct elf_prstatus));
+out:
+ up_write(&kclist_lock);
+ list_for_each_entry_safe(pos, tmp, &garbage, list) {
+ list_del(&pos->list);
+ kfree(pos);
+ }
+ return ret;
+}
- nhdr->p_filesz = notesize(¬es[0]);
- bufp = storenote(¬es[0], bufp);
+static void append_kcore_note(char *notes, size_t *i, const char *name,
+ unsigned int type, const void *desc,
+ size_t descsz)
+{
+ struct elf_note *note = (struct elf_note *)¬es[*i];
- /* set up the process info */
- notes[1].name = CORE_STR;
- notes[1].type = NT_PRPSINFO;
- notes[1].datasz = sizeof(struct elf_prpsinfo);
- notes[1].data = &prpsinfo;
+ note->n_namesz = strlen(name) + 1;
+ note->n_descsz = descsz;
+ note->n_type = type;
+ *i += sizeof(*note);
+ memcpy(¬es[*i], name, note->n_namesz);
+ *i = ALIGN(*i + note->n_namesz, 4);
+ memcpy(¬es[*i], desc, descsz);
+ *i = ALIGN(*i + descsz, 4);
+}
- memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
- prpsinfo.pr_state = 0;
- prpsinfo.pr_sname = 'R';
- prpsinfo.pr_zomb = 0;
-
- strcpy(prpsinfo.pr_fname, "vmlinux");
- strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
-
- nhdr->p_filesz += notesize(¬es[1]);
- bufp = storenote(¬es[1], bufp);
-
- /* set up the task structure */
- notes[2].name = CORE_STR;
- notes[2].type = NT_TASKSTRUCT;
- notes[2].datasz = arch_task_struct_size;
- notes[2].data = current;
-
- nhdr->p_filesz += notesize(¬es[2]);
- bufp = storenote(¬es[2], bufp);
-
-} /* end elf_kcore_store_hdr() */
-
-/*****************************************************************************/
-/*
- * read from the ELF header and then kernel memory
- */
static ssize_t
read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
{
char *buf = file->private_data;
- ssize_t acc = 0;
- size_t size, tsz;
- size_t elf_buflen;
+ size_t phdrs_offset, notes_offset, data_offset;
+ size_t phdrs_len, notes_len;
+ struct kcore_list *m;
+ size_t tsz;
int nphdr;
unsigned long start;
+ size_t orig_buflen = buflen;
+ int ret = 0;
- read_lock(&kclist_lock);
- size = get_kcore_size(&nphdr, &elf_buflen);
+ down_read(&kclist_lock);
- if (buflen == 0 || *fpos >= size) {
- read_unlock(&kclist_lock);
- return 0;
- }
+ get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset);
+ phdrs_offset = sizeof(struct elfhdr);
+ notes_offset = phdrs_offset + phdrs_len;
- /* trim buflen to not go beyond EOF */
- if (buflen > size - *fpos)
- buflen = size - *fpos;
+ /* ELF file header. */
+ if (buflen && *fpos < sizeof(struct elfhdr)) {
+ struct elfhdr ehdr = {
+ .e_ident = {
+ [EI_MAG0] = ELFMAG0,
+ [EI_MAG1] = ELFMAG1,
+ [EI_MAG2] = ELFMAG2,
+ [EI_MAG3] = ELFMAG3,
+ [EI_CLASS] = ELF_CLASS,
+ [EI_DATA] = ELF_DATA,
+ [EI_VERSION] = EV_CURRENT,
+ [EI_OSABI] = ELF_OSABI,
+ },
+ .e_type = ET_CORE,
+ .e_machine = ELF_ARCH,
+ .e_version = EV_CURRENT,
+ .e_phoff = sizeof(struct elfhdr),
+ .e_flags = ELF_CORE_EFLAGS,
+ .e_ehsize = sizeof(struct elfhdr),
+ .e_phentsize = sizeof(struct elf_phdr),
+ .e_phnum = nphdr,
+ };
- /* construct an ELF core header if we'll need some of it */
- if (*fpos < elf_buflen) {
- char * elf_buf;
-
- tsz = elf_buflen - *fpos;
- if (buflen < tsz)
- tsz = buflen;
- elf_buf = kzalloc(elf_buflen, GFP_ATOMIC);
- if (!elf_buf) {
- read_unlock(&kclist_lock);
- return -ENOMEM;
+ tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
+ if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
+ ret = -EFAULT;
+ goto out;
}
- elf_kcore_store_hdr(elf_buf, nphdr, elf_buflen);
- read_unlock(&kclist_lock);
- if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
- kfree(elf_buf);
- return -EFAULT;
- }
- kfree(elf_buf);
+
+ buffer += tsz;
buflen -= tsz;
*fpos += tsz;
- buffer += tsz;
- acc += tsz;
+ }
- /* leave now if filled buffer already */
- if (buflen == 0)
- return acc;
- } else
- read_unlock(&kclist_lock);
+ /* ELF program headers. */
+ if (buflen && *fpos < phdrs_offset + phdrs_len) {
+ struct elf_phdr *phdrs, *phdr;
+
+ phdrs = kzalloc(phdrs_len, GFP_KERNEL);
+ if (!phdrs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ phdrs[0].p_type = PT_NOTE;
+ phdrs[0].p_offset = notes_offset;
+ phdrs[0].p_filesz = notes_len;
+
+ phdr = &phdrs[1];
+ list_for_each_entry(m, &kclist_head, list) {
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_offset = kc_vaddr_to_offset(m->addr) + data_offset;
+ phdr->p_vaddr = (size_t)m->addr;
+ if (m->type == KCORE_RAM)
+ phdr->p_paddr = __pa(m->addr);
+ else if (m->type == KCORE_TEXT)
+ phdr->p_paddr = __pa_symbol(m->addr);
+ else
+ phdr->p_paddr = (elf_addr_t)-1;
+ phdr->p_filesz = phdr->p_memsz = m->size;
+ phdr->p_align = PAGE_SIZE;
+ phdr++;
+ }
+
+ tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
+ if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
+ tsz)) {
+ kfree(phdrs);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(phdrs);
+
+ buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
+ }
+
+ /* ELF note segment. */
+ if (buflen && *fpos < notes_offset + notes_len) {
+ struct elf_prstatus prstatus = {};
+ struct elf_prpsinfo prpsinfo = {
+ .pr_sname = 'R',
+ .pr_fname = "vmlinux",
+ };
+ char *notes;
+ size_t i = 0;
+
+ strlcpy(prpsinfo.pr_psargs, saved_command_line,
+ sizeof(prpsinfo.pr_psargs));
+
+ notes = kzalloc(notes_len, GFP_KERNEL);
+ if (!notes) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ append_kcore_note(notes, &i, CORE_STR, NT_PRSTATUS, &prstatus,
+ sizeof(prstatus));
+ append_kcore_note(notes, &i, CORE_STR, NT_PRPSINFO, &prpsinfo,
+ sizeof(prpsinfo));
+ append_kcore_note(notes, &i, CORE_STR, NT_TASKSTRUCT, current,
+ arch_task_struct_size);
+ /*
+ * vmcoreinfo_size is mostly constant after init time, but it
+ * can be changed by crash_save_vmcoreinfo(). Racing here with a
+ * panic on another CPU before the machine goes down is insanely
+ * unlikely, but it's better to not leave potential buffer
+ * overflows lying around, regardless.
+ */
+ append_kcore_note(notes, &i, VMCOREINFO_NOTE_NAME, 0,
+ vmcoreinfo_data,
+ min(vmcoreinfo_size, notes_len - i));
+
+ tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
+ if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
+ kfree(notes);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(notes);
+
+ buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
+ }
/*
* Check to see if our file offset matches with any of
* the addresses in the elf_phdr on our list.
*/
- start = kc_offset_to_vaddr(*fpos - elf_buflen);
+ start = kc_offset_to_vaddr(*fpos - data_offset);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
-
- while (buflen) {
- struct kcore_list *m;
- read_lock(&kclist_lock);
- list_for_each_entry(m, &kclist_head, list) {
- if (start >= m->addr && start < (m->addr+m->size))
- break;
+ m = NULL;
+ while (buflen) {
+ /*
+ * If this is the first iteration or the address is not within
+ * the previous entry, search for a matching entry.
+ */
+ if (!m || start < m->addr || start >= m->addr + m->size) {
+ list_for_each_entry(m, &kclist_head, list) {
+ if (start >= m->addr &&
+ start < m->addr + m->size)
+ break;
+ }
}
- read_unlock(&kclist_lock);
if (&m->list == &kclist_head) {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_VMALLOC) {
vread(buf, (char *)start, tsz);
/* we have to zero-fill user buffer even if no read */
- if (copy_to_user(buffer, buf, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, buf, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else if (m->type == KCORE_USER) {
/* User page is handled prior to normal kernel page: */
- if (copy_to_user(buffer, (char *)start, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, (char *)start, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else {
if (kern_addr_valid(start)) {
/*
@@ -530,29 +481,37 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
* hardened user copy kernel text checks.
*/
if (probe_kernel_read(buf, (void *) start, tsz)) {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
} else {
- if (copy_to_user(buffer, buf, tsz))
- return -EFAULT;
+ if (copy_to_user(buffer, buf, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
} else {
- if (clear_user(buffer, tsz))
- return -EFAULT;
+ if (clear_user(buffer, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
}
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
- acc += tsz;
start += tsz;
tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
}
- return acc;
+out:
+ up_read(&kclist_lock);
+ if (ret)
+ return ret;
+ return orig_buflen - buflen;
}
-
static int open_kcore(struct inode *inode, struct file *filp)
{
if (!capable(CAP_SYS_RAWIO))
@@ -592,9 +551,8 @@ static int __meminit kcore_callback(struct notifier_block *self,
switch (action) {
case MEM_ONLINE:
case MEM_OFFLINE:
- write_lock(&kclist_lock);
kcore_need_update = 1;
- write_unlock(&kclist_lock);
+ break;
}
return NOTIFY_OK;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 2fb0484..edda898 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -7,6 +7,7 @@
#include <linux/mman.h>
#include <linux/mmzone.h>
#include <linux/proc_fs.h>
+#include <linux/percpu.h>
#include <linux/quicklist.h>
#include <linux/seq_file.h>
#include <linux/swap.h>
@@ -121,6 +122,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
(unsigned long)VMALLOC_TOTAL >> 10);
show_val_kb(m, "VmallocUsed: ", 0ul);
show_val_kb(m, "VmallocChunk: ", 0ul);
+ show_val_kb(m, "Percpu: ", pcpu_nr_pages());
#ifdef CONFIG_MEMORY_FAILURE
seq_printf(m, "HardwareCorrupted: %5lu kB\n",
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 59749df..535eda7 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -183,7 +183,7 @@ static int show_stat(struct seq_file *p, void *v)
static int stat_open(struct inode *inode, struct file *file)
{
- size_t size = 1024 + 128 * num_online_cpus();
+ unsigned int size = 1024 + 128 * num_online_cpus();
/* minimum size to display an interrupt count : 2 bytes */
size += 2 * nr_irqs;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dfd73a4..5ea1d64 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,7 +247,6 @@ static int proc_map_release(struct inode *inode, struct file *file)
if (priv->mm)
mmdrop(priv->mm);
- kfree(priv->rollup);
return seq_release_private(inode, file);
}
@@ -294,7 +293,7 @@ static void show_vma_header_prefix(struct seq_file *m,
}
static void
-show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
+show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file;
@@ -357,35 +356,18 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
seq_putc(m, '\n');
}
-static int show_map(struct seq_file *m, void *v, int is_pid)
+static int show_map(struct seq_file *m, void *v)
{
- show_map_vma(m, v, is_pid);
+ show_map_vma(m, v);
m_cache_vma(m, v);
return 0;
}
-static int show_pid_map(struct seq_file *m, void *v)
-{
- return show_map(m, v, 1);
-}
-
-static int show_tid_map(struct seq_file *m, void *v)
-{
- return show_map(m, v, 0);
-}
-
static const struct seq_operations proc_pid_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_map
-};
-
-static const struct seq_operations proc_tid_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_map
+ .show = show_map
};
static int pid_maps_open(struct inode *inode, struct file *file)
@@ -393,11 +375,6 @@ static int pid_maps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_maps_op);
}
-static int tid_maps_open(struct inode *inode, struct file *file)
-{
- return do_maps_open(inode, file, &proc_tid_maps_op);
-}
-
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
@@ -405,13 +382,6 @@ const struct file_operations proc_pid_maps_operations = {
.release = proc_map_release,
};
-const struct file_operations proc_tid_maps_operations = {
- .open = tid_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
-};
-
/*
* Proportional Set Size(PSS): my share of RSS.
*
@@ -433,7 +403,6 @@ const struct file_operations proc_tid_maps_operations = {
#ifdef CONFIG_PROC_PAGE_MONITOR
struct mem_size_stats {
- bool first;
unsigned long resident;
unsigned long shared_clean;
unsigned long shared_dirty;
@@ -447,7 +416,6 @@ struct mem_size_stats {
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
- unsigned long first_vma_start;
u64 pss;
u64 pss_locked;
u64 swap_pss;
@@ -731,14 +699,9 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
}
#endif /* HUGETLB_PAGE */
-#define SEQ_PUT_DEC(str, val) \
- seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
-static int show_smap(struct seq_file *m, void *v, int is_pid)
+static void smap_gather_stats(struct vm_area_struct *vma,
+ struct mem_size_stats *mss)
{
- struct proc_maps_private *priv = m->private;
- struct vm_area_struct *vma = v;
- struct mem_size_stats mss_stack;
- struct mem_size_stats *mss;
struct mm_walk smaps_walk = {
.pmd_entry = smaps_pte_range,
#ifdef CONFIG_HUGETLB_PAGE
@@ -746,23 +709,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
#endif
.mm = vma->vm_mm,
};
- int ret = 0;
- bool rollup_mode;
- bool last_vma;
-
- if (priv->rollup) {
- rollup_mode = true;
- mss = priv->rollup;
- if (mss->first) {
- mss->first_vma_start = vma->vm_start;
- mss->first = false;
- }
- last_vma = !m_next_vma(priv, vma);
- } else {
- rollup_mode = false;
- memset(&mss_stack, 0, sizeof(mss_stack));
- mss = &mss_stack;
- }
smaps_walk.private = mss;
@@ -794,79 +740,116 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
walk_page_vma(vma, &smaps_walk);
if (vma->vm_flags & VM_LOCKED)
mss->pss_locked += mss->pss;
+}
- if (!rollup_mode) {
- show_map_vma(m, vma, is_pid);
- } else if (last_vma) {
- show_vma_header_prefix(
- m, mss->first_vma_start, vma->vm_end, 0, 0, 0, 0);
- seq_pad(m, ' ');
- seq_puts(m, "[rollup]\n");
- } else {
- ret = SEQ_SKIP;
- }
+#define SEQ_PUT_DEC(str, val) \
+ seq_put_decimal_ull_width(m, str, (val) >> 10, 8)
- if (!rollup_mode) {
- SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
- SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
- SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
- seq_puts(m, " kB\n");
- }
+/* Show the contents common for smaps and smaps_rollup */
+static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss)
+{
+ SEQ_PUT_DEC("Rss: ", mss->resident);
+ SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
+ SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
+ SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
+ SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
+ SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
+ SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
+ SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
+ SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
+ SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+ SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
+ seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
+ mss->private_hugetlb >> 10, 7);
+ SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
+ SEQ_PUT_DEC(" kB\nSwapPss: ",
+ mss->swap_pss >> PSS_SHIFT);
+ SEQ_PUT_DEC(" kB\nLocked: ",
+ mss->pss_locked >> PSS_SHIFT);
+ seq_puts(m, " kB\n");
+}
- if (!rollup_mode || last_vma) {
- SEQ_PUT_DEC("Rss: ", mss->resident);
- SEQ_PUT_DEC(" kB\nPss: ", mss->pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nShared_Clean: ", mss->shared_clean);
- SEQ_PUT_DEC(" kB\nShared_Dirty: ", mss->shared_dirty);
- SEQ_PUT_DEC(" kB\nPrivate_Clean: ", mss->private_clean);
- SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
- SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
- SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
- SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
- SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
- SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
- SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
- seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
- mss->private_hugetlb >> 10, 7);
- SEQ_PUT_DEC(" kB\nSwap: ", mss->swap);
- SEQ_PUT_DEC(" kB\nSwapPss: ",
- mss->swap_pss >> PSS_SHIFT);
- SEQ_PUT_DEC(" kB\nLocked: ",
- mss->pss_locked >> PSS_SHIFT);
- seq_puts(m, " kB\n");
- }
- if (!rollup_mode) {
- if (arch_pkeys_enabled())
- seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
- show_smap_vma_flags(m, vma);
- }
+static int show_smap(struct seq_file *m, void *v)
+{
+ struct vm_area_struct *vma = v;
+ struct mem_size_stats mss;
+
+ memset(&mss, 0, sizeof(mss));
+
+ smap_gather_stats(vma, &mss);
+
+ show_map_vma(m, vma);
+
+ SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start);
+ SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma));
+ SEQ_PUT_DEC(" kB\nMMUPageSize: ", vma_mmu_pagesize(vma));
+ seq_puts(m, " kB\n");
+
+ __show_smap(m, &mss);
+
+ if (arch_pkeys_enabled())
+ seq_printf(m, "ProtectionKey: %8u\n", vma_pkey(vma));
+ show_smap_vma_flags(m, vma);
+
m_cache_vma(m, vma);
+
+ return 0;
+}
+
+static int show_smaps_rollup(struct seq_file *m, void *v)
+{
+ struct proc_maps_private *priv = m->private;
+ struct mem_size_stats mss;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long last_vma_end = 0;
+ int ret = 0;
+
+ priv->task = get_proc_task(priv->inode);
+ if (!priv->task)
+ return -ESRCH;
+
+ mm = priv->mm;
+ if (!mm || !mmget_not_zero(mm)) {
+ ret = -ESRCH;
+ goto out_put_task;
+ }
+
+ memset(&mss, 0, sizeof(mss));
+
+ down_read(&mm->mmap_sem);
+ hold_task_mempolicy(priv);
+
+ for (vma = priv->mm->mmap; vma; vma = vma->vm_next) {
+ smap_gather_stats(vma, &mss);
+ last_vma_end = vma->vm_end;
+ }
+
+ show_vma_header_prefix(m, priv->mm->mmap->vm_start,
+ last_vma_end, 0, 0, 0, 0);
+ seq_pad(m, ' ');
+ seq_puts(m, "[rollup]\n");
+
+ __show_smap(m, &mss);
+
+ release_task_mempolicy(priv);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+
+out_put_task:
+ put_task_struct(priv->task);
+ priv->task = NULL;
+
return ret;
}
#undef SEQ_PUT_DEC
-static int show_pid_smap(struct seq_file *m, void *v)
-{
- return show_smap(m, v, 1);
-}
-
-static int show_tid_smap(struct seq_file *m, void *v)
-{
- return show_smap(m, v, 0);
-}
-
static const struct seq_operations proc_pid_smaps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_smap
-};
-
-static const struct seq_operations proc_tid_smaps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_smap
+ .show = show_smap
};
static int pid_smaps_open(struct inode *inode, struct file *file)
@@ -874,28 +857,45 @@ static int pid_smaps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_smaps_op);
}
-static int pid_smaps_rollup_open(struct inode *inode, struct file *file)
+static int smaps_rollup_open(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
+ int ret;
struct proc_maps_private *priv;
- int ret = do_maps_open(inode, file, &proc_pid_smaps_op);
- if (ret < 0)
- return ret;
- seq = file->private_data;
- priv = seq->private;
- priv->rollup = kzalloc(sizeof(*priv->rollup), GFP_KERNEL);
- if (!priv->rollup) {
- proc_map_release(inode, file);
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL_ACCOUNT);
+ if (!priv)
return -ENOMEM;
+
+ ret = single_open(file, show_smaps_rollup, priv);
+ if (ret)
+ goto out_free;
+
+ priv->inode = inode;
+ priv->mm = proc_mem_open(inode, PTRACE_MODE_READ);
+ if (IS_ERR(priv->mm)) {
+ ret = PTR_ERR(priv->mm);
+
+ single_release(inode, file);
+ goto out_free;
}
- priv->rollup->first = true;
+
return 0;
+
+out_free:
+ kfree(priv);
+ return ret;
}
-static int tid_smaps_open(struct inode *inode, struct file *file)
+static int smaps_rollup_release(struct inode *inode, struct file *file)
{
- return do_maps_open(inode, file, &proc_tid_smaps_op);
+ struct seq_file *seq = file->private_data;
+ struct proc_maps_private *priv = seq->private;
+
+ if (priv->mm)
+ mmdrop(priv->mm);
+
+ kfree(priv);
+ return single_release(inode, file);
}
const struct file_operations proc_pid_smaps_operations = {
@@ -906,17 +906,10 @@ const struct file_operations proc_pid_smaps_operations = {
};
const struct file_operations proc_pid_smaps_rollup_operations = {
- .open = pid_smaps_rollup_open,
+ .open = smaps_rollup_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = proc_map_release,
-};
-
-const struct file_operations proc_tid_smaps_operations = {
- .open = tid_smaps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
+ .release = smaps_rollup_release,
};
enum clear_refs_types {
@@ -1728,7 +1721,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
/*
* Display pages allocated per node and memory policy via /proc.
*/
-static int show_numa_map(struct seq_file *m, void *v, int is_pid)
+static int show_numa_map(struct seq_file *m, void *v)
{
struct numa_maps_private *numa_priv = m->private;
struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
@@ -1812,45 +1805,17 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
return 0;
}
-static int show_pid_numa_map(struct seq_file *m, void *v)
-{
- return show_numa_map(m, v, 1);
-}
-
-static int show_tid_numa_map(struct seq_file *m, void *v)
-{
- return show_numa_map(m, v, 0);
-}
-
static const struct seq_operations proc_pid_numa_maps_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_numa_map,
+ .show = show_numa_map,
};
-static const struct seq_operations proc_tid_numa_maps_op = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_numa_map,
-};
-
-static int numa_maps_open(struct inode *inode, struct file *file,
- const struct seq_operations *ops)
-{
- return proc_maps_open(inode, file, ops,
- sizeof(struct numa_maps_private));
-}
-
static int pid_numa_maps_open(struct inode *inode, struct file *file)
{
- return numa_maps_open(inode, file, &proc_pid_numa_maps_op);
-}
-
-static int tid_numa_maps_open(struct inode *inode, struct file *file)
-{
- return numa_maps_open(inode, file, &proc_tid_numa_maps_op);
+ return proc_maps_open(inode, file, &proc_pid_numa_maps_op,
+ sizeof(struct numa_maps_private));
}
const struct file_operations proc_pid_numa_maps_operations = {
@@ -1860,10 +1825,4 @@ const struct file_operations proc_pid_numa_maps_operations = {
.release = proc_map_release,
};
-const struct file_operations proc_tid_numa_maps_operations = {
- .open = tid_numa_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = proc_map_release,
-};
#endif /* CONFIG_NUMA */
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 5b62f57..0b63d68 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -142,8 +142,7 @@ static int is_stack(struct vm_area_struct *vma)
/*
* display a single VMA to a sequenced file
*/
-static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
- int is_pid)
+static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long ino = 0;
@@ -189,22 +188,11 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
/*
* display mapping lines for a particular process's /proc/pid/maps
*/
-static int show_map(struct seq_file *m, void *_p, int is_pid)
+static int show_map(struct seq_file *m, void *_p)
{
struct rb_node *p = _p;
- return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb),
- is_pid);
-}
-
-static int show_pid_map(struct seq_file *m, void *_p)
-{
- return show_map(m, _p, 1);
-}
-
-static int show_tid_map(struct seq_file *m, void *_p)
-{
- return show_map(m, _p, 0);
+ return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
}
static void *m_start(struct seq_file *m, loff_t *pos)
@@ -260,14 +248,7 @@ static const struct seq_operations proc_pid_maps_ops = {
.start = m_start,
.next = m_next,
.stop = m_stop,
- .show = show_pid_map
-};
-
-static const struct seq_operations proc_tid_maps_ops = {
- .start = m_start,
- .next = m_next,
- .stop = m_stop,
- .show = show_tid_map
+ .show = show_map
};
static int maps_open(struct inode *inode, struct file *file,
@@ -308,11 +289,6 @@ static int pid_maps_open(struct inode *inode, struct file *file)
return maps_open(inode, file, &proc_pid_maps_ops);
}
-static int tid_maps_open(struct inode *inode, struct file *file)
-{
- return maps_open(inode, file, &proc_tid_maps_ops);
-}
-
const struct file_operations proc_pid_maps_operations = {
.open = pid_maps_open,
.read = seq_read,
@@ -320,10 +296,3 @@ const struct file_operations proc_pid_maps_operations = {
.release = map_release,
};
-const struct file_operations proc_tid_maps_operations = {
- .open = tid_maps_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = map_release,
-};
-
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 3f723cb..a4c2791 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -9,7 +9,7 @@
static int uptime_proc_show(struct seq_file *m, void *v)
{
- struct timespec uptime;
+ struct timespec64 uptime;
struct timespec64 idle;
u64 nsec;
u32 rem;
@@ -19,7 +19,7 @@ static int uptime_proc_show(struct seq_file *m, void *v)
for_each_possible_cpu(i)
nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
- get_monotonic_boottime(&uptime);
+ ktime_get_boottime_ts64(&uptime);
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index cfb6674..6c1c260 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -379,7 +379,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
* On s390 the fault handler is used for memory regions that can't be mapped
* directly with remap_pfn_range().
*/
-static int mmap_vmcore_fault(struct vm_fault *vmf)
+static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
{
#ifdef CONFIG_S390
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index e3c558d..3a5a752 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -33,30 +33,22 @@ static int sd_is_left_mergeable(struct reiserfs_key *key, unsigned long bsize)
return 0;
}
-static char *print_time(time_t t)
-{
- static char timebuf[256];
-
- sprintf(timebuf, "%ld", t);
- return timebuf;
-}
-
static void sd_print_item(struct item_head *ih, char *item)
{
printk("\tmode | size | nlinks | first direct | mtime\n");
if (stat_data_v1(ih)) {
struct stat_data_v1 *sd = (struct stat_data_v1 *)item;
- printk("\t0%-6o | %6u | %2u | %d | %s\n", sd_v1_mode(sd),
+ printk("\t0%-6o | %6u | %2u | %d | %u\n", sd_v1_mode(sd),
sd_v1_size(sd), sd_v1_nlink(sd),
sd_v1_first_direct_byte(sd),
- print_time(sd_v1_mtime(sd)));
+ sd_v1_mtime(sd));
} else {
struct stat_data *sd = (struct stat_data *)item;
- printk("\t0%-6o | %6llu | %2u | %d | %s\n", sd_v2_mode(sd),
+ printk("\t0%-6o | %6llu | %2u | %d | %u\n", sd_v2_mode(sd),
(unsigned long long)sd_v2_size(sd), sd_v2_nlink(sd),
- sd_v2_rdev(sd), print_time(sd_v2_mtime(sd)));
+ sd_v2_rdev(sd), sd_v2_mtime(sd));
}
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 52eb5d2..8a76f9d 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2381,7 +2381,7 @@ static int journal_read(struct super_block *sb)
struct reiserfs_journal_desc *desc;
unsigned int oldest_trans_id = 0;
unsigned int oldest_invalid_trans_id = 0;
- time_t start;
+ time64_t start;
unsigned long oldest_start = 0;
unsigned long cur_dblock = 0;
unsigned long newest_mount_id = 9;
@@ -2395,7 +2395,7 @@ static int journal_read(struct super_block *sb)
cur_dblock = SB_ONDISK_JOURNAL_1st_BLOCK(sb);
reiserfs_info(sb, "checking transaction log (%pg)\n",
journal->j_dev_bd);
- start = get_seconds();
+ start = ktime_get_seconds();
/*
* step 1, read in the journal header block. Check the transaction
@@ -2556,7 +2556,7 @@ static int journal_read(struct super_block *sb)
if (replay_count > 0) {
reiserfs_info(sb,
"replayed %d transactions in %lu seconds\n",
- replay_count, get_seconds() - start);
+ replay_count, ktime_get_seconds() - start);
}
/* needed to satisfy the locking in _update_journal_header_block */
reiserfs_write_lock(sb);
@@ -2914,7 +2914,7 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
int new_alloc)
{
struct reiserfs_journal *journal = SB_JOURNAL(th->t_super);
- time_t now = get_seconds();
+ time64_t now = ktime_get_seconds();
/* cannot restart while nested */
BUG_ON(!th->t_trans_id);
if (th->t_refcount > 1)
@@ -3023,7 +3023,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
struct super_block *sb, unsigned long nblocks,
int join)
{
- time_t now = get_seconds();
+ time64_t now = ktime_get_seconds();
unsigned int old_trans_id;
struct reiserfs_journal *journal = SB_JOURNAL(sb);
struct reiserfs_transaction_handle myth;
@@ -3056,7 +3056,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
PROC_INFO_INC(sb, journal.journal_relock_writers);
goto relock;
}
- now = get_seconds();
+ now = ktime_get_seconds();
/*
* if there is no room in the journal OR
@@ -3119,7 +3119,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th,
}
/* we are the first writer, set trans_id */
if (journal->j_trans_start_time == 0) {
- journal->j_trans_start_time = get_seconds();
+ journal->j_trans_start_time = ktime_get_seconds();
}
atomic_inc(&journal->j_wcount);
journal->j_len_alloc += nblocks;
@@ -3559,11 +3559,11 @@ static void flush_async_commits(struct work_struct *work)
*/
void reiserfs_flush_old_commits(struct super_block *sb)
{
- time_t now;
+ time64_t now;
struct reiserfs_transaction_handle th;
struct reiserfs_journal *journal = SB_JOURNAL(sb);
- now = get_seconds();
+ now = ktime_get_seconds();
/*
* safety check so we don't flush while we are replaying the log during
* mount
@@ -3613,7 +3613,7 @@ void reiserfs_flush_old_commits(struct super_block *sb)
static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
{
- time_t now;
+ time64_t now;
int flush = flags & FLUSH_ALL;
int commit_now = flags & COMMIT_NOW;
int wait_on_commit = flags & WAIT;
@@ -3694,7 +3694,7 @@ static int check_journal_end(struct reiserfs_transaction_handle *th, int flags)
}
/* deal with old transactions where we are the last writers */
- now = get_seconds();
+ now = ktime_get_seconds();
if ((now - journal->j_trans_start_time) > journal->j_max_trans_age) {
commit_now = 1;
journal->j_next_async_flush = 1;
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index e39b391..f2cf344 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -297,6 +297,13 @@ static int show_oidmap(struct seq_file *m, void *unused)
return 0;
}
+static time64_t ktime_mono_to_real_seconds(time64_t mono)
+{
+ ktime_t kt = ktime_set(mono, NSEC_PER_SEC/2);
+
+ return ktime_divns(ktime_mono_to_real(kt), NSEC_PER_SEC);
+}
+
static int show_journal(struct seq_file *m, void *unused)
{
struct super_block *sb = m->private;
@@ -325,7 +332,7 @@ static int show_journal(struct seq_file *m, void *unused)
"j_bcount: \t%lu\n"
"j_first_unflushed_offset: \t%lu\n"
"j_last_flush_trans_id: \t%u\n"
- "j_trans_start_time: \t%li\n"
+ "j_trans_start_time: \t%lli\n"
"j_list_bitmap_index: \t%i\n"
"j_must_wait: \t%i\n"
"j_next_full_flush: \t%i\n"
@@ -366,7 +373,7 @@ static int show_journal(struct seq_file *m, void *unused)
JF(j_bcount),
JF(j_first_unflushed_offset),
JF(j_last_flush_trans_id),
- JF(j_trans_start_time),
+ ktime_mono_to_real_seconds(JF(j_trans_start_time)),
JF(j_list_bitmap_index),
JF(j_must_wait),
JF(j_next_full_flush),
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index ae4811f..e5ca9ed 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -271,7 +271,7 @@ struct reiserfs_journal_list {
struct mutex j_commit_mutex;
unsigned int j_trans_id;
- time_t j_timestamp;
+ time64_t j_timestamp; /* write-only but useful for crash dump analysis */
struct reiserfs_list_bitmap *j_list_bitmap;
struct buffer_head *j_commit_bh; /* commit buffer head */
struct reiserfs_journal_cnode *j_realblock;
@@ -331,7 +331,7 @@ struct reiserfs_journal {
struct buffer_head *j_header_bh;
- time_t j_trans_start_time; /* time this transaction started */
+ time64_t j_trans_start_time; /* time this transaction started */
struct mutex j_mutex;
struct mutex j_flush_mutex;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index ff94fad4..48cdfc8 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -792,8 +792,10 @@ static int listxattr_filler(struct dir_context *ctx, const char *name,
return 0;
size = namelen + 1;
if (b->buf) {
- if (size > b->size)
+ if (b->pos + size > b->size) {
+ b->pos = -ERANGE;
return -ERANGE;
+ }
memcpy(b->buf + b->pos, name, namelen);
b->buf[b->pos + namelen] = 0;
}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index bec9f79..499a20a 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -35,7 +35,7 @@
static int sysv_sync_fs(struct super_block *sb, int wait)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
- unsigned long time = get_seconds(), old_time;
+ u32 time = (u32)ktime_get_real_seconds(), old_time;
mutex_lock(&sbi->s_lock);
@@ -46,8 +46,8 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
*/
old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
if (sbi->s_type == FSTYPE_SYSV4) {
- if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
- *sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38 - time);
+ if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38u - old_time))
+ *sbi->s_sb_state = cpu_to_fs32(sbi, 0x7c269d38u - time);
*sbi->s_sb_time = cpu_to_fs32(sbi, time);
mark_buffer_dirty(sbi->s_bh2);
}
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 15c265d..f649023 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -910,7 +910,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
*/
spin_lock(&ctx->fault_pending_wqh.lock);
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);
- __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range);
+ __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);
spin_unlock(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
@@ -1066,7 +1066,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
* anyway.
*/
list_del(&uwq->wq.entry);
- __add_wait_queue(&ctx->fault_wqh, &uwq->wq);
+ add_wait_queue(&ctx->fault_wqh, &uwq->wq);
write_seqcount_end(&ctx->refile_seq);
@@ -1215,7 +1215,7 @@ static void __wake_userfault(struct userfaultfd_ctx *ctx,
__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,
range);
if (waitqueue_active(&ctx->fault_wqh))
- __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, range);
+ __wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);
spin_unlock(&ctx->fault_pending_wqh.lock);
}
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 012c55c..e6964e9 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -89,7 +89,7 @@
/* Maximum object reference count (detects object deletion issues) */
-#define ACPI_MAX_REFERENCE_COUNT 0x1000
+#define ACPI_MAX_REFERENCE_COUNT 0x4000
/* Default page size for use in mapping memory for operation regions */
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index 226e5aeb..856c56e 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -59,6 +59,12 @@ struct acpi_exception_info {
#define AE_OK (acpi_status) 0x0000
+#define ACPI_ENV_EXCEPTION(status) (status & AE_CODE_ENVIRONMENTAL)
+#define ACPI_AML_EXCEPTION(status) (status & AE_CODE_AML)
+#define ACPI_PROG_EXCEPTION(status) (status & AE_CODE_PROGRAMMER)
+#define ACPI_TABLE_EXCEPTION(status) (status & AE_CODE_ACPI_TABLES)
+#define ACPI_CNTL_EXCEPTION(status) (status & AE_CODE_CONTROL)
+
/*
* Environmental exceptions
*/
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 88072c9..9566f99 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
/* Current ACPICA subsystem version in YYYYMMDD format */
-#define ACPI_CA_VERSION 0x20180629
+#define ACPI_CA_VERSION 0x20180810
#include <acpi/acconfig.h>
#include <acpi/actypes.h>
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a7613e1..20561a6 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -75,9 +75,19 @@ struct bug_entry {
/*
* WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
- * significant issues that need prompt attention if they should ever
- * appear at runtime. Use the versions with printk format strings
- * to provide better diagnostics.
+ * significant kernel issues that need prompt attention if they should ever
+ * appear at runtime.
+ *
+ * Do not use these macros when checking for invalid external inputs
+ * (e.g. invalid system call arguments, or invalid data coming from
+ * network/devices), and on transient conditions like ENOMEM or EAGAIN.
+ * These macros should be used for recoverable kernel issues only.
+ * For invalid external inputs, transient conditions, etc use
+ * pr_err[_once/_ratelimited]() followed by dump_stack(), if necessary.
+ * Do not include "BUG"/"WARNING" in format strings manually to make these
+ * conditions distinguishable from kernel issues.
+ *
+ * Use the versions with printk format strings to provide better diagnostics.
*/
#ifndef __WARN_TAINT
extern __printf(3, 4)
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 68efb95..4d73e6e 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -5,12 +5,10 @@
#define KSYM_FUNC(x) x
#endif
#ifdef CONFIG_64BIT
-#define __put .quad
#ifndef KSYM_ALIGN
#define KSYM_ALIGN 8
#endif
#else
-#define __put .long
#ifndef KSYM_ALIGN
#define KSYM_ALIGN 4
#endif
@@ -19,6 +17,16 @@
#define KCRC_ALIGN 4
#endif
+.macro __put, val, name
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ .long \val - ., \name - .
+#elif defined(CONFIG_64BIT)
+ .quad \val, \name
+#else
+ .long \val, \name
+#endif
+.endm
+
/*
* note on .section use: @progbits vs %progbits nastiness doesn't matter,
* since we immediately emit into those sections anyway.
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 2425176..9a6bc09 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -12,6 +12,7 @@
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/kref.h>
+#include <linux/refcount.h>
struct page;
struct device;
@@ -75,7 +76,7 @@ enum wb_reason {
*/
struct bdi_writeback_congested {
unsigned long state; /* WB_[a]sync_congested flags */
- atomic_t refcnt; /* nr of attached wb's and blkg */
+ refcount_t refcnt; /* nr of attached wb's and blkg */
#ifdef CONFIG_CGROUP_WRITEBACK
struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 72ca0f3..c28a47c 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -404,13 +404,13 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
static inline struct bdi_writeback_congested *
wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
{
- atomic_inc(&bdi->wb_congested->refcnt);
+ refcount_inc(&bdi->wb_congested->refcnt);
return bdi->wb_congested;
}
static inline void wb_congested_put(struct bdi_writeback_congested *congested)
{
- if (atomic_dec_and_test(&congested->refcnt))
+ if (refcount_dec_and_test(&congested->refcnt))
kfree(congested);
}
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index af41901..7ddb134 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -4,7 +4,8 @@
#include <asm/types.h>
#include <linux/bits.h>
-#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long))
extern unsigned int __sw_hweight8(unsigned int w);
extern unsigned int __sw_hweight16(unsigned int w);
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 42506e4..681d866 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -280,6 +280,25 @@ unsigned long read_word_at_a_time(const void *addr)
#endif /* __KERNEL__ */
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+ static void * __attribute__((section(".discard.addressable"), used)) \
+ __PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
+
+/**
+ * offset_to_ptr - convert a relative memory offset to an absolute pointer
+ * @off: the address of the 32-bit offset value
+ */
+static inline void *offset_to_ptr(const int *off)
+{
+ return (void *)((unsigned long)off + *off);
+}
+
#endif /* __ASSEMBLY__ */
#ifndef __optimize
@@ -313,7 +332,7 @@ unsigned long read_word_at_a_time(const void *addr)
#ifdef __OPTIMIZE__
# define __compiletime_assert(condition, msg, prefix, suffix) \
do { \
- bool __cond = !(condition); \
+ int __cond = !(condition); \
extern void prefix ## suffix(void) __compiletime_error(msg); \
if (__cond) \
prefix ## suffix(); \
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index b511f6d..525510a 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -60,6 +60,8 @@ phys_addr_t paddr_vmcoreinfo_note(void);
#define VMCOREINFO_CONFIG(name) \
vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+extern unsigned char *vmcoreinfo_data;
+extern size_t vmcoreinfo_size;
extern u32 *vmcoreinfo_note;
Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
diff --git a/include/linux/crc64.h b/include/linux/crc64.h
new file mode 100644
index 0000000..c756e65
--- /dev/null
+++ b/include/linux/crc64.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * See lib/crc64.c for the related specification and polynomial arithmetic.
+ */
+#ifndef _LINUX_CRC64_H
+#define _LINUX_CRC64_H
+
+#include <linux/types.h>
+
+u64 __pure crc64_be(u64 crc, const void *p, size_t len);
+#endif /* _LINUX_CRC64_H */
diff --git a/include/linux/export.h b/include/linux/export.h
index b768d6d..ae072bc 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -18,12 +18,6 @@
#define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
#ifndef __ASSEMBLY__
-struct kernel_symbol
-{
- unsigned long value;
- const char *name;
-};
-
#ifdef MODULE
extern struct module __this_module;
#define THIS_MODULE (&__this_module)
@@ -54,19 +48,58 @@ extern struct module __this_module;
#define __CRC_SYMBOL(sym, sec)
#endif
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#include <linux/compiler.h>
+/*
+ * Emit the ksymtab entry as a pair of relative references: this reduces
+ * the size by half on 64-bit architectures, and eliminates the need for
+ * absolute relocations that require runtime processing on relocatable
+ * kernels.
+ */
+#define __KSYMTAB_ENTRY(sym, sec) \
+ __ADDRESSABLE(sym) \
+ asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \
+ " .balign 8 \n" \
+ "__ksymtab_" #sym ": \n" \
+ " .long " #sym "- . \n" \
+ " .long __kstrtab_" #sym "- . \n" \
+ " .previous \n")
+
+struct kernel_symbol {
+ int value_offset;
+ int name_offset;
+};
+#else
+#define __KSYMTAB_ENTRY(sym, sec) \
+ static const struct kernel_symbol __ksymtab_##sym \
+ __attribute__((section("___ksymtab" sec "+" #sym), used)) \
+ = { (unsigned long)&sym, __kstrtab_##sym }
+
+struct kernel_symbol {
+ unsigned long value;
+ const char *name;
+};
+#endif
+
/* For every exported symbol, place a struct in the __ksymtab section */
#define ___EXPORT_SYMBOL(sym, sec) \
extern typeof(sym) sym; \
__CRC_SYMBOL(sym, sec) \
static const char __kstrtab_##sym[] \
- __attribute__((section("__ksymtab_strings"), aligned(1))) \
+ __attribute__((section("__ksymtab_strings"), used, aligned(1))) \
= #sym; \
- static const struct kernel_symbol __ksymtab_##sym \
- __used \
- __attribute__((section("___ksymtab" sec "+" #sym), used)) \
- = { (unsigned long)&sym, __kstrtab_##sym }
+ __KSYMTAB_ENTRY(sym, sec)
-#if defined(__KSYM_DEPS__)
+#if defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
+
+#elif defined(__KSYM_DEPS__)
/*
* For fine grained build dependencies, we want to tell the build system
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index aa5db8b..f70f8ac 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -304,11 +304,6 @@ struct f2fs_node {
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
-#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
-#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \
- ((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \
- BITS_PER_LONG * BITS_PER_LONG)
-
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
diff --git a/include/linux/init.h b/include/linux/init.h
index bc27cf0..2538d17 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -116,8 +116,24 @@
typedef int (*initcall_t)(void);
typedef void (*exitcall_t)(void);
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+ return offset_to_ptr(entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+ return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
/* Used for contructor calls. */
typedef void (*ctor_fn_t)(void);
@@ -167,9 +183,20 @@ extern bool initcall_debug;
* as KEEP() in the linker script.
*/
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec) \
+ __ADDRESSABLE(fn) \
+ asm(".section \"" #__sec ".init\", \"a\" \n" \
+ "__initcall_" #fn #id ": \n" \
+ ".long " #fn " - . \n" \
+ ".previous \n");
+#else
+#define ___define_initcall(fn, id, __sec) \
static initcall_t __initcall_##fn##id __used \
- __attribute__((__section__(".initcall" #id ".init"))) = fn;
+ __attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
/*
* Early initcalls run before initializing SMP.
@@ -208,13 +235,8 @@ extern bool initcall_debug;
#define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
-#define console_initcall(fn) \
- static initcall_t __initcall_##fn \
- __used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn) \
- static initcall_t __initcall_##fn \
- __used __section(.security_initcall.init) = fn
+#define console_initcall(fn) ___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn) ___define_initcall(fn,, .security_initcall)
struct obs_kernel_param {
const char *str;
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6cea726..6ab8c1b 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -16,10 +16,9 @@ struct user_namespace;
struct ipc_ids {
int in_use;
unsigned short seq;
- bool tables_initialized;
struct rw_semaphore rwsem;
struct idr ipcs_idr;
- int max_id;
+ int max_idx;
#ifdef CONFIG_CHECKPOINT_RESTORE
int next_id;
#endif
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8de55e4..c20f296 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -35,7 +35,7 @@ struct vmcoredd_node {
};
#ifdef CONFIG_PROC_KCORE
-extern void kclist_add(struct kcore_list *, void *, size_t, int type);
+void __init kclist_add(struct kcore_list *, void *, size_t, int type);
#else
static inline
void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 941dc0a..d6aac75 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -85,7 +85,23 @@
* arguments just once each.
*/
#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+/**
+ * round_up - round up to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round up to (must be a power of 2)
+ *
+ * Rounds @x up to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding up, use roundup() below.
+ */
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+/**
+ * round_down - round down to next specified power of 2
+ * @x: the value to round
+ * @y: multiple to round down to (must be a power of 2)
+ *
+ * Rounds @x down to next multiple of @y (which must be a power of 2).
+ * To perform arbitrary rounding down, use rounddown() below.
+ */
#define round_down(x, y) ((x) & ~__round_mask(x, y))
/**
@@ -110,13 +126,30 @@
# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d)
#endif
-/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */
+/**
+ * roundup - round up to the next specified multiple
+ * @x: the value to up
+ * @y: multiple to round up to
+ *
+ * Rounds @x up to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_up().
+ *
+ * The `const' here prevents gcc-3.3 from calling __divdi3
+ */
#define roundup(x, y) ( \
{ \
const typeof(y) __y = y; \
(((x) + (__y - 1)) / __y) * __y; \
} \
)
+/**
+ * rounddown - round down to next specified multiple
+ * @x: the value to round
+ * @y: multiple to round down to
+ *
+ * Rounds @x down to next multiple of @y. If @y will always be a power
+ * of 2, consider using the faster round_down().
+ */
#define rounddown(x, y) ( \
{ \
typeof(x) __x = (x); \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7c7362d..0205aee 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1289,8 +1289,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp,
}
#endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */
-void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
+int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end, bool blockable);
#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0e6c515..652f602 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -225,6 +225,11 @@ struct mem_cgroup {
*/
bool use_hierarchy;
+ /*
+ * Should the OOM killer kill all belonging tasks, had it kill one?
+ */
+ bool oom_group;
+
/* protected by memcg_oom_lock */
bool oom_lock;
int under_oom;
@@ -542,6 +547,9 @@ static inline bool task_in_memcg_oom(struct task_struct *p)
}
bool mem_cgroup_oom_synchronize(bool wait);
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+ struct mem_cgroup *oom_domain);
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
#ifdef CONFIG_MEMCG_SWAP
extern int do_swap_account;
@@ -1001,6 +1009,16 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
return false;
}
+static inline struct mem_cgroup *mem_cgroup_get_oom_group(
+ struct task_struct *victim, struct mem_cgroup *oom_domain)
+{
+ return NULL;
+}
+
+static inline void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+}
+
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
int idx)
{
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4e9828c..34a2822 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -319,6 +319,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
static inline void remove_memory(int nid, u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
+extern void __ref free_area_init_core_hotplug(int nid);
extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a3cae49..a9e733b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -456,6 +456,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
{
static const struct vm_operations_struct dummy_vm_ops = {};
+ memset(vma, 0, sizeof(*vma));
vma->vm_mm = mm;
vma->vm_ops = &dummy_vm_ops;
INIT_LIST_HEAD(&vma->anon_vma_chain);
@@ -959,15 +960,6 @@ static inline int page_zone_id(struct page *page)
return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}
-static inline int zone_to_nid(struct zone *zone)
-{
-#ifdef CONFIG_NUMA
- return zone->node;
-#else
- return 0;
-#endif
-}
-
#ifdef NODE_NOT_IN_PAGE_FLAGS
extern int page_to_nid(const struct page *page);
#else
@@ -2023,7 +2015,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
extern void __init pagecache_init(void);
extern void free_area_init(unsigned long * zones_size);
-extern void free_area_init_node(int nid, unsigned long * zones_size,
+extern void __init free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
extern void free_initmem(void);
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 392e6af..133ba78 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -151,13 +151,15 @@ struct mmu_notifier_ops {
* address space but may still be referenced by sptes until
* the last refcount is dropped.
*
- * If both of these callbacks cannot block, and invalidate_range
- * cannot block, mmu_notifier_ops.flags should have
- * MMU_INVALIDATE_DOES_NOT_BLOCK set.
+ * If blockable argument is set to false then the callback cannot
+ * sleep and has to return with -EAGAIN. 0 should be returned
+ * otherwise.
+ *
*/
- void (*invalidate_range_start)(struct mmu_notifier *mn,
+ int (*invalidate_range_start)(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long start, unsigned long end);
+ unsigned long start, unsigned long end,
+ bool blockable);
void (*invalidate_range_end)(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end);
@@ -229,8 +231,9 @@ extern int __mmu_notifier_test_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_change_pte(struct mm_struct *mm,
unsigned long address, pte_t pte);
-extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
- unsigned long start, unsigned long end);
+extern int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ bool blockable);
extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end,
bool only_end);
@@ -281,7 +284,15 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
- __mmu_notifier_invalidate_range_start(mm, start, end);
+ __mmu_notifier_invalidate_range_start(mm, start, end, true);
+}
+
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ if (mm_has_notifiers(mm))
+ return __mmu_notifier_invalidate_range_start(mm, start, end, false);
+ return 0;
}
static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
@@ -461,6 +472,12 @@ static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
{
}
+static inline int mmu_notifier_invalidate_range_start_nonblock(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ return 0;
+}
+
static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 32699b2..1e22d96 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,25 +755,6 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
}
-static inline int zone_id(const struct zone *zone)
-{
- struct pglist_data *pgdat = zone->zone_pgdat;
-
- return zone - pgdat->node_zones;
-}
-
-#ifdef CONFIG_ZONE_DEVICE
-static inline bool is_dev_zone(const struct zone *zone)
-{
- return zone_id(zone) == ZONE_DEVICE;
-}
-#else
-static inline bool is_dev_zone(const struct zone *zone)
-{
- return false;
-}
-#endif
-
#include <linux/memory_hotplug.h>
void build_all_zonelists(pg_data_t *pgdat);
@@ -824,6 +805,18 @@ static inline int local_memory_node(int node_id) { return node_id; };
*/
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+ return zone_idx(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+ return false;
+}
+#endif
+
/*
* Returns true if a zone has pages managed by the buddy allocator.
* All the reclaim decisions have to use this function rather than
@@ -841,6 +834,25 @@ static inline bool populated_zone(struct zone *zone)
return zone->present_pages;
}
+#ifdef CONFIG_NUMA
+static inline int zone_to_nid(struct zone *zone)
+{
+ return zone->node;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid)
+{
+ zone->node = nid;
+}
+#else
+static inline int zone_to_nid(struct zone *zone)
+{
+ return 0;
+}
+
+static inline void zone_set_nid(struct zone *zone, int nid) {}
+#endif
+
extern int movable_zone;
#ifdef CONFIG_HIGHMEM
@@ -956,12 +968,7 @@ static inline int zonelist_zone_idx(struct zoneref *zoneref)
static inline int zonelist_node_idx(struct zoneref *zoneref)
{
-#ifdef CONFIG_NUMA
- /* zone_to_nid not available in this context */
- return zoneref->zone->node;
-#else
- return 0;
-#endif /* CONFIG_NUMA */
+ return zone_to_nid(zoneref->zone);
}
struct zoneref *__next_zones_zonelist(struct zoneref *z,
diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index db99240..c79e859 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -363,7 +363,6 @@ static inline void net_dim_sample(u16 event_ctr,
}
#define NET_DIM_NEVENTS 64
-#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
static inline void net_dim_calc_stats(struct net_dim_sample *start,
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 1fbde8a..5a30ad5 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -518,7 +518,7 @@ static inline int node_random(const nodemask_t *mask)
* NODEMASK_ALLOC(type, name) allocates an object with a specified type and
* name.
*/
-#if NODES_SHIFT > 8 /* nodemask_t > 256 bytes */
+#if NODES_SHIFT > 8 /* nodemask_t > 32 bytes */
#define NODEMASK_ALLOC(type, name, gfp_flags) \
type *name = kmalloc(sizeof(*name), gfp_flags)
#define NODEMASK_FREE(m) kfree(m)
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 6adac11..92f70e4 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,7 +95,7 @@ static inline int check_stable_address_space(struct mm_struct *mm)
return 0;
}
-void __oom_reap_task_mm(struct mm_struct *mm);
+bool __oom_reap_task_mm(struct mm_struct *mm);
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9b87f19..e72ca8d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1809,7 +1809,11 @@ struct pci_fixup {
u16 device; /* Or PCI_ANY_ID */
u32 class; /* Or PCI_ANY_ID */
unsigned int class_shift; /* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ int hook_offset;
+#else
void (*hook)(struct pci_dev *dev);
+#endif
};
enum pci_fixup_pass {
@@ -1823,12 +1827,28 @@ enum pci_fixup_pass {
pci_fixup_suspend_late, /* pci_device_suspend_late() */
};
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
+ class_shift, hook) \
+ __ADDRESSABLE(hook) \
+ asm(".section " #sec ", \"a\" \n" \
+ ".balign 16 \n" \
+ ".short " #vendor ", " #device " \n" \
+ ".long " #class ", " #class_shift " \n" \
+ ".long " #hook " - . \n" \
+ ".previous \n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
+ class_shift, hook) \
+ __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
+ class_shift, hook)
+#else
/* Anonymous variables would be nice... */
#define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class, \
class_shift, hook) \
static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used \
__attribute__((__section__(#section), aligned((sizeof(void *))))) \
= { vendor, device, class, class_shift, hook };
+#endif
#define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class, \
class_shift, hook) \
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 296bbe4..70b7123 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -149,4 +149,6 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
(typeof(type) __percpu *)__alloc_percpu(sizeof(type), \
__alignof__(type))
+extern unsigned long pcpu_nr_pages(void);
+
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 626fc65..d0e1f15 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -129,7 +129,7 @@ int open_related_ns(struct ns_common *ns,
struct ns_common *(*get_ns)(struct ns_common *ns));
/* get the associated pid namespace for a file in procfs */
-static inline struct pid_namespace *proc_pid_ns(struct inode *inode)
+static inline struct pid_namespace *proc_pid_ns(const struct inode *inode)
{
return inode->i_sb->s_fs_info;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 00de3e9..977cb57 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -854,6 +854,7 @@ struct task_struct {
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
unsigned long last_switch_count;
+ unsigned long last_switch_time;
#endif
/* Filesystem information: */
struct fs_struct *fs;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 4e9b77f..1be3572 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -323,7 +323,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey);
int force_sig_ptrace_errno_trap(int errno, void __user *addr);
extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int force_sigsegv(int, struct task_struct *);
+extern void force_sigsegv(int sig, struct task_struct *p);
extern int force_sig_info(int, struct siginfo *, struct task_struct *);
extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 913488d..a9c32da 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -10,6 +10,7 @@ struct ctl_table;
extern int sysctl_hung_task_check_count;
extern unsigned int sysctl_hung_task_panic;
extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_check_interval_secs;
extern int sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
void __user *buffer,
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 96fe289..39ad98c 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/ratelimit.h>
struct key;
@@ -12,7 +13,7 @@ struct key;
* Some day this will be a full-fledged user tracking system..
*/
struct user_struct {
- atomic_t __count; /* reference count */
+ refcount_t __count; /* reference count */
atomic_t processes; /* How many processes does this user have? */
atomic_t sigpending; /* How many pending signals does this user have? */
#ifdef CONFIG_FANOTIFY
@@ -59,7 +60,7 @@ extern struct user_struct root_user;
extern struct user_struct * alloc_uid(kuid_t);
static inline struct user_struct *get_uid(struct user_struct *u)
{
- atomic_inc(&u->__count);
+ refcount_inc(&u->__count);
return u;
}
extern void free_uid(struct user_struct *);
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index b154fd2..9443caf 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -12,6 +12,9 @@
struct shrink_control {
gfp_t gfp_mask;
+ /* current node being shrunk (for NUMA aware shrinkers) */
+ int nid;
+
/*
* How many objects scan_objects should scan and try to reclaim.
* This is reset before every call, so it is safe for callees
@@ -26,9 +29,6 @@ struct shrink_control {
*/
unsigned long nr_scanned;
- /* current node being shrunk (for NUMA aware shrinkers) */
- int nid;
-
/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
};
@@ -63,9 +63,9 @@ struct shrinker {
unsigned long (*scan_objects)(struct shrinker *,
struct shrink_control *sc);
- int seeks; /* seeks to recreate an obj */
long batch; /* reclaim batch size, 0 = default */
- unsigned long flags;
+ int seeks; /* seeks to recreate an obj */
+ unsigned flags;
/* These are for internal use */
struct list_head list;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index fe125b0..3d4cd5d 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -267,7 +267,7 @@ extern void set_current_blocked(sigset_t *);
extern void __set_current_blocked(const sigset_t *);
extern int show_unhandled_signals;
-extern int get_signal(struct ksignal *ksig);
+extern bool get_signal(struct ksignal *ksig);
extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
extern void exit_signals(struct task_struct *tsk);
extern void kernel_sigaction(int, __sighandler_t);
@@ -289,7 +289,7 @@ static inline void disallow_signal(int sig)
extern struct kmem_cache *sighand_cachep;
-int unhandled_signal(struct task_struct *tsk, int sig);
+extern bool unhandled_signal(struct task_struct *tsk, int sig);
/*
* In POSIX a signal is sent either to a specific thread (Linux task)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1a8bd05..8e2c11e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -447,7 +447,7 @@ extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(struct page *page);
extern void put_swap_page(struct page *page, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, bool cluster, swp_entry_t swp_entries[]);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t);
extern int swap_duplicate(swp_entry_t);
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index d9a084c..7f2e16e 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -249,6 +249,19 @@ extern void syscall_unregfunc(void);
return static_key_false(&__tracepoint_##name.key); \
}
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name) \
+ asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \
+ " .balign 4 \n" \
+ " .long __tracepoint_" #name " - . \n" \
+ " .previous \n")
+#else
+#define __TRACEPOINT_ENTRY(name) \
+ static struct tracepoint * const __tracepoint_ptr_##name __used \
+ __attribute__((section("__tracepoints_ptrs"))) = \
+ &__tracepoint_##name
+#endif
+
/*
* We have no guarantee that gcc and the linker won't up-align the tracepoint
* structures, so we create an array of pointers that will be used for iteration
@@ -258,11 +271,9 @@ extern void syscall_unregfunc(void);
static const char __tpstrtab_##name[] \
__attribute__((section("__tracepoints_strings"))) = #name; \
struct tracepoint __tracepoint_##name \
- __attribute__((section("__tracepoints"))) = \
+ __attribute__((section("__tracepoints"), used)) = \
{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
- static struct tracepoint * const __tracepoint_ptr_##name __used \
- __attribute__((section("__tracepoints_ptrs"))) = \
- &__tracepoint_##name;
+ __TRACEPOINT_ENTRY(name);
#define DEFINE_TRACE(name) \
DEFINE_TRACE_FN(name, NULL, NULL);
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 6a17f85..381cdf5 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -119,7 +119,8 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
*/
int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
u64 start, u64 end,
- umem_call_back cb, void *cookie);
+ umem_call_back cb,
+ bool blockable, void *cookie);
/*
* Find first region intersecting with address range.
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index e13eec3..df31aa9 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -23,7 +23,7 @@
#define AUTOFS_MIN_PROTO_VERSION 3
#define AUTOFS_MAX_PROTO_VERSION 5
-#define AUTOFS_PROTO_SUBVERSION 2
+#define AUTOFS_PROTO_SUBVERSION 3
/*
* The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
@@ -90,8 +90,10 @@ enum {
/* autofs version 4 and later definitions */
/* Mask for expire behaviour */
-#define AUTOFS_EXP_IMMEDIATE 1
-#define AUTOFS_EXP_LEAVES 2
+#define AUTOFS_EXP_NORMAL 0x00
+#define AUTOFS_EXP_IMMEDIATE 0x01
+#define AUTOFS_EXP_LEAVES 0x02
+#define AUTOFS_EXP_FORCED 0x04
#define AUTOFS_TYPE_ANY 0U
#define AUTOFS_TYPE_INDIRECT 1U
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 8d19e02..5d4f58e 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -30,10 +30,10 @@ struct bkey {
BITMASK(name, struct bkey, field, offset, size)
#define PTR_FIELD(name, offset, size) \
-static inline __u64 name(const struct bkey *k, unsigned i) \
+static inline __u64 name(const struct bkey *k, unsigned int i) \
{ return (k->ptr[i] >> offset) & ~(~0ULL << size); } \
\
-static inline void SET_##name(struct bkey *k, unsigned i, __u64 v) \
+static inline void SET_##name(struct bkey *k, unsigned int i, __u64 v) \
{ \
k->ptr[i] &= ~(~(~0ULL << size) << offset); \
k->ptr[i] |= (v & ~(~0ULL << size)) << offset; \
@@ -117,12 +117,14 @@ static inline void bkey_copy_key(struct bkey *dest, const struct bkey *src)
static inline struct bkey *bkey_next(const struct bkey *k)
{
__u64 *d = (void *) k;
+
return (struct bkey *) (d + bkey_u64s(k));
}
-static inline struct bkey *bkey_idx(const struct bkey *k, unsigned nr_keys)
+static inline struct bkey *bkey_idx(const struct bkey *k, unsigned int nr_keys)
{
__u64 *d = (void *) k;
+
return (struct bkey *) (d + nr_keys);
}
/* Enough for a key with 6 pointers */
diff --git a/init/Kconfig b/init/Kconfig
index 9bd50ba..641dd7d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -436,7 +436,7 @@
help
If you say Y here, the process accounting information is written
in a new file format that also logs the process IDs of each
- process and it's parent. Note that this file format is incompatible
+ process and its parent. Note that this file format is incompatible
with previous v0/v1/v2 file formats, so you will need updated tools
for processing it. A preliminary version of these tools is available
at <http://www.gnu.org/software/acct/>.
@@ -967,6 +967,18 @@
endif # NAMESPACES
+config CHECKPOINT_RESTORE
+ bool "Checkpoint/restore support"
+ select PROC_CHILDREN
+ default n
+ help
+ Enables additional kernel features in a sake of checkpoint/restore.
+ In particular it adds auxiliary prctl codes to setup process text,
+ data and heap segment sizes, and a few additional /proc filesystem
+ entries.
+
+ If unsure, say N here.
+
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select CGROUPS
@@ -1383,18 +1395,6 @@
If unsure, say Y.
-config CHECKPOINT_RESTORE
- bool "Checkpoint/restore support" if EXPERT
- select PROC_CHILDREN
- default n
- help
- Enables additional kernel features in a sake of checkpoint/restore.
- In particular it adds auxiliary prctl codes to setup process text,
- data and heap segment sizes, and a few additional /proc filesystem
- entries.
-
- If unsure, say N here.
-
config KALLSYMS
bool "Load all symbols for debugging/ksymoops" if EXPERT
default y
@@ -1702,7 +1702,7 @@
default n
help
Normally, and according to the Linux spec, anonymous memory obtained
- from mmap() has it's contents cleared before it is passed to
+ from mmap() has its contents cleared before it is passed to
userspace. Enabling this config option allows you to request that
mmap() skip that if it is given an MAP_UNINITIALIZED flag, thus
providing a huge performance boost. If this option is not enabled,
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 2c71dab..e1c9afa 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -1,13 +1,3 @@
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/ctype.h>
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 5a91aef..d1a5d88 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -1,14 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/unistd.h>
#include <linux/kernel.h>
#include <linux/fs.h>
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index 7d85d17..b840315 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -1,14 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/delay.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_p.h>
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index 035a5f0..32fb049 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -1,14 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/minix_fs.h>
diff --git a/init/initramfs.c b/init/initramfs.c
index 13643c4..6405577 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -1,14 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * Many of the syscalls used in this file expect some of the arguments
- * to be __user pointers not __kernel pointers. To limit the sparse
- * noise, turn off sparse checking for this file.
- */
-#ifdef __CHECKER__
-#undef __CHECKER__
-#warning "Sparse checking disabled for this file"
-#endif
-
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
diff --git a/init/main.c b/init/main.c
index b729e1f..18f8f01 100644
--- a/init/main.c
+++ b/init/main.c
@@ -902,18 +902,18 @@ int __init_or_module do_one_initcall(initcall_t fn)
}
-extern initcall_t __initcall_start[];
-extern initcall_t __initcall0_start[];
-extern initcall_t __initcall1_start[];
-extern initcall_t __initcall2_start[];
-extern initcall_t __initcall3_start[];
-extern initcall_t __initcall4_start[];
-extern initcall_t __initcall5_start[];
-extern initcall_t __initcall6_start[];
-extern initcall_t __initcall7_start[];
-extern initcall_t __initcall_end[];
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
-static initcall_t *initcall_levels[] __initdata = {
+static initcall_entry_t *initcall_levels[] __initdata = {
__initcall0_start,
__initcall1_start,
__initcall2_start,
@@ -939,7 +939,7 @@ static char *initcall_level_names[] __initdata = {
static void __init do_initcall_level(int level)
{
- initcall_t *fn;
+ initcall_entry_t *fn;
strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
@@ -950,7 +950,7 @@ static void __init do_initcall_level(int level)
trace_initcall_level(initcall_level_names[level]);
for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
- do_one_initcall(*fn);
+ do_one_initcall(initcall_from_entry(fn));
}
static void __init do_initcalls(void)
@@ -981,11 +981,11 @@ static void __init do_basic_setup(void)
static void __init do_pre_smp_initcalls(void)
{
- initcall_t *fn;
+ initcall_entry_t *fn;
trace_initcall_level("early");
for (fn = __initcall_start; fn < __initcall0_start; fn++)
- do_one_initcall(*fn);
+ do_one_initcall(initcall_from_entry(fn));
}
/*
@@ -1002,6 +1002,7 @@ void __init load_default_modules(void)
static int run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
+ pr_info("Run %s as init process\n", init_filename);
return do_execve(getname_kernel(init_filename),
(const char __user *const __user *)argv_init,
(const char __user *const __user *)envp_init);
diff --git a/ipc/msg.c b/ipc/msg.c
index 2032811..883642c 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -163,7 +163,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
/* ipc_addid() locks msq upon success. */
retval = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
if (retval < 0) {
- call_rcu(&msq->q_perm.rcu, msg_rcu_free);
+ ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
return retval;
}
@@ -386,7 +386,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
down_write(&msg_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd,
&msqid64->msg_perm, msqid64->msg_qbytes);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
@@ -456,7 +456,7 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
int cmd, struct msginfo *msginfo)
{
int err;
- int max_id;
+ int max_idx;
/*
* We must not return kernel stack data.
@@ -483,16 +483,15 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
- max_id = ipc_get_maxid(&msg_ids(ns));
+ max_idx = ipc_get_maxidx(&msg_ids(ns));
up_read(&msg_ids(ns).rwsem);
- return (max_id < 0) ? 0 : max_id;
+ return (max_idx < 0) ? 0 : max_idx;
}
static int msgctl_stat(struct ipc_namespace *ns, int msqid,
int cmd, struct msqid64_ds *p)
{
struct msg_queue *msq;
- int id = 0;
int err;
memset(p, 0, sizeof(*p));
@@ -504,7 +503,6 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
err = PTR_ERR(msq);
goto out_unlock;
}
- id = msq->q_perm.id;
} else { /* IPC_STAT */
msq = msq_obtain_object_check(ns, msqid);
if (IS_ERR(msq)) {
@@ -549,10 +547,21 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid,
p->msg_lspid = pid_vnr(msq->q_lspid);
p->msg_lrpid = pid_vnr(msq->q_lrpid);
- ipc_unlock_object(&msq->q_perm);
- rcu_read_unlock();
- return id;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * MSG_STAT and MSG_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = msq->q_perm.id;
+ }
+ ipc_unlock_object(&msq->q_perm);
out_unlock:
rcu_read_unlock();
return err;
@@ -1229,7 +1238,7 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
}
#endif
-int msg_init_ns(struct ipc_namespace *ns)
+void msg_init_ns(struct ipc_namespace *ns)
{
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
@@ -1237,7 +1246,7 @@ int msg_init_ns(struct ipc_namespace *ns)
atomic_set(&ns->msg_bytes, 0);
atomic_set(&ns->msg_hdrs, 0);
- return ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+ ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
}
#ifdef CONFIG_IPC_NS
@@ -1278,12 +1287,11 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
}
#endif
-int __init msg_init(void)
+void __init msg_init(void)
{
- const int err = msg_init_ns(&init_ipc_ns);
+ msg_init_ns(&init_ipc_ns);
ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
IPC_MSG_IDS, sysvipc_msg_proc_show);
- return err;
}
diff --git a/ipc/namespace.c b/ipc/namespace.c
index f59a899..2160779 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -55,28 +55,16 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
- err = sem_init_ns(ns);
- if (err)
- goto fail_put;
- err = msg_init_ns(ns);
- if (err)
- goto fail_destroy_sem;
- err = shm_init_ns(ns);
- if (err)
- goto fail_destroy_msg;
-
err = mq_init_ns(ns);
if (err)
- goto fail_destroy_shm;
+ goto fail_put;
+
+ sem_init_ns(ns);
+ msg_init_ns(ns);
+ shm_init_ns(ns);
return ns;
-fail_destroy_shm:
- shm_exit_ns(ns);
-fail_destroy_msg:
- msg_exit_ns(ns);
-fail_destroy_sem:
- sem_exit_ns(ns);
fail_put:
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
diff --git a/ipc/sem.c b/ipc/sem.c
index 00ef2f7..26f8e37 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -221,14 +221,14 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
#define sc_semopm sem_ctls[2]
#define sc_semmni sem_ctls[3]
-int sem_init_ns(struct ipc_namespace *ns)
+void sem_init_ns(struct ipc_namespace *ns)
{
ns->sc_semmsl = SEMMSL;
ns->sc_semmns = SEMMNS;
ns->sc_semopm = SEMOPM;
ns->sc_semmni = SEMMNI;
ns->used_sems = 0;
- return ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
+ ipc_init_ids(&ns->ids[IPC_SEM_IDS]);
}
#ifdef CONFIG_IPC_NS
@@ -240,14 +240,12 @@ void sem_exit_ns(struct ipc_namespace *ns)
}
#endif
-int __init sem_init(void)
+void __init sem_init(void)
{
- const int err = sem_init_ns(&init_ipc_ns);
-
+ sem_init_ns(&init_ipc_ns);
ipc_init_proc_interface("sysvipc/sem",
" key semid perms nsems uid gid cuid cgid otime ctime\n",
IPC_SEM_IDS, sysvipc_sem_proc_show);
- return err;
}
/**
@@ -557,7 +555,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
/* ipc_addid() locks sma upon success. */
retval = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
if (retval < 0) {
- call_rcu(&sma->sem_perm.rcu, sem_rcu_free);
+ ipc_rcu_putref(&sma->sem_perm, sem_rcu_free);
return retval;
}
ns->used_sems += nsems;
@@ -1223,7 +1221,6 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
{
struct sem_array *sma;
time64_t semotime;
- int id = 0;
int err;
memset(semid64, 0, sizeof(*semid64));
@@ -1235,7 +1232,6 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
err = PTR_ERR(sma);
goto out_unlock;
}
- id = sma->sem_perm.id;
} else { /* IPC_STAT */
sma = sem_obtain_object_check(ns, semid);
if (IS_ERR(sma)) {
@@ -1275,10 +1271,20 @@ static int semctl_stat(struct ipc_namespace *ns, int semid,
#endif
semid64->sem_nsems = sma->sem_nsems;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * SEM_STAT and SEM_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = sma->sem_perm.id;
+ }
ipc_unlock_object(&sma->sem_perm);
- rcu_read_unlock();
- return id;
-
out_unlock:
rcu_read_unlock();
return err;
@@ -1288,7 +1294,7 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
int cmd, void __user *p)
{
struct seminfo seminfo;
- int max_id;
+ int max_idx;
int err;
err = security_sem_semctl(NULL, cmd);
@@ -1312,11 +1318,11 @@ static int semctl_info(struct ipc_namespace *ns, int semid,
seminfo.semusz = SEMUSZ;
seminfo.semaem = SEMAEM;
}
- max_id = ipc_get_maxid(&sem_ids(ns));
+ max_idx = ipc_get_maxidx(&sem_ids(ns));
up_read(&sem_ids(ns).rwsem);
if (copy_to_user(p, &seminfo, sizeof(struct seminfo)))
return -EFAULT;
- return (max_id < 0) ? 0 : max_id;
+ return (max_idx < 0) ? 0 : max_idx;
}
static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum,
@@ -1588,7 +1594,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
down_write(&sem_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &sem_ids(ns), semid, cmd,
&semid64->sem_perm, 0);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
diff --git a/ipc/shm.c b/ipc/shm.c
index b204feb..b0eb375 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -96,14 +96,14 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
#endif
-int shm_init_ns(struct ipc_namespace *ns)
+void shm_init_ns(struct ipc_namespace *ns)
{
ns->shm_ctlmax = SHMMAX;
ns->shm_ctlall = SHMALL;
ns->shm_ctlmni = SHMMNI;
ns->shm_rmid_forced = 0;
ns->shm_tot = 0;
- return ipc_init_ids(&shm_ids(ns));
+ ipc_init_ids(&shm_ids(ns));
}
/*
@@ -136,9 +136,8 @@ void shm_exit_ns(struct ipc_namespace *ns)
static int __init ipc_ns_init(void)
{
- const int err = shm_init_ns(&init_ipc_ns);
- WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
- return err;
+ shm_init_ns(&init_ipc_ns);
+ return 0;
}
pure_initcall(ipc_ns_init);
@@ -180,16 +179,33 @@ static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace
*/
static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
{
- struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
+ struct kern_ipc_perm *ipcp;
+ rcu_read_lock();
+ ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
+ if (IS_ERR(ipcp))
+ goto err;
+
+ ipc_lock_object(ipcp);
+ /*
+ * ipc_rmid() may have already freed the ID while ipc_lock_object()
+ * was spinning: here verify that the structure is still valid.
+ * Upon races with RMID, return -EIDRM, thus indicating that
+ * the ID points to a removed identifier.
+ */
+ if (ipc_valid_object(ipcp)) {
+ /* return a locked ipc object upon success */
+ return container_of(ipcp, struct shmid_kernel, shm_perm);
+ }
+
+ ipc_unlock_object(ipcp);
+err:
+ rcu_read_unlock();
/*
* Callers of shm_lock() must validate the status of the returned ipc
- * object pointer (as returned by ipc_lock()), and error out as
- * appropriate.
+ * object pointer and error out as appropriate.
*/
- if (IS_ERR(ipcp))
- return (void *)ipcp;
- return container_of(ipcp, struct shmid_kernel, shm_perm);
+ return (void *)ipcp;
}
static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
@@ -684,6 +700,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if (is_file_hugepages(file) && shp->mlock_user)
user_shm_unlock(size, shp->mlock_user);
fput(file);
+ ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+ return error;
no_file:
call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
return error;
@@ -879,7 +897,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
down_write(&shm_ids(ns).rwsem);
rcu_read_lock();
- ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
+ ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
&shmid64->shm_perm, 0);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
@@ -930,7 +948,7 @@ static int shmctl_ipc_info(struct ipc_namespace *ns,
shminfo->shmall = ns->shm_ctlall;
shminfo->shmmin = SHMMIN;
down_read(&shm_ids(ns).rwsem);
- err = ipc_get_maxid(&shm_ids(ns));
+ err = ipc_get_maxidx(&shm_ids(ns));
up_read(&shm_ids(ns).rwsem);
if (err < 0)
err = 0;
@@ -950,7 +968,7 @@ static int shmctl_shm_info(struct ipc_namespace *ns,
shm_info->shm_tot = ns->shm_tot;
shm_info->swap_attempts = 0;
shm_info->swap_successes = 0;
- err = ipc_get_maxid(&shm_ids(ns));
+ err = ipc_get_maxidx(&shm_ids(ns));
up_read(&shm_ids(ns).rwsem);
if (err < 0)
err = 0;
@@ -962,7 +980,6 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
int cmd, struct shmid64_ds *tbuf)
{
struct shmid_kernel *shp;
- int id = 0;
int err;
memset(tbuf, 0, sizeof(*tbuf));
@@ -974,7 +991,6 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
err = PTR_ERR(shp);
goto out_unlock;
}
- id = shp->shm_perm.id;
} else { /* IPC_STAT */
shp = shm_obtain_object_check(ns, shmid);
if (IS_ERR(shp)) {
@@ -1024,10 +1040,21 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid,
tbuf->shm_lpid = pid_vnr(shp->shm_lprid);
tbuf->shm_nattch = shp->shm_nattch;
- ipc_unlock_object(&shp->shm_perm);
- rcu_read_unlock();
- return id;
+ if (cmd == IPC_STAT) {
+ /*
+ * As defined in SUS:
+ * Return 0 on success
+ */
+ err = 0;
+ } else {
+ /*
+ * SHM_STAT and SHM_STAT_ANY (both Linux specific)
+ * Return the full id, including the sequence number
+ */
+ err = shp->shm_perm.id;
+ }
+ ipc_unlock_object(&shp->shm_perm);
out_unlock:
rcu_read_unlock();
return err;
diff --git a/ipc/util.c b/ipc/util.c
index fdffff4..0af0575 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -88,16 +88,12 @@ struct ipc_proc_iface {
*/
static int __init ipc_init(void)
{
- int err_sem, err_msg;
-
proc_mkdir("sysvipc", NULL);
- err_sem = sem_init();
- WARN(err_sem, "ipc: sysv sem_init failed: %d\n", err_sem);
- err_msg = msg_init();
- WARN(err_msg, "ipc: sysv msg_init failed: %d\n", err_msg);
+ sem_init();
+ msg_init();
shm_init();
- return err_msg ? err_msg : err_sem;
+ return 0;
}
device_initcall(ipc_init);
@@ -116,22 +112,17 @@ static const struct rhashtable_params ipc_kht_params = {
* Set up the sequence range to use for the ipc identifier range (limited
* below IPCMNI) then initialise the keys hashtable and ids idr.
*/
-int ipc_init_ids(struct ipc_ids *ids)
+void ipc_init_ids(struct ipc_ids *ids)
{
- int err;
ids->in_use = 0;
ids->seq = 0;
init_rwsem(&ids->rwsem);
- err = rhashtable_init(&ids->key_ht, &ipc_kht_params);
- if (err)
- return err;
+ rhashtable_init(&ids->key_ht, &ipc_kht_params);
idr_init(&ids->ipcs_idr);
- ids->tables_initialized = true;
- ids->max_id = -1;
+ ids->max_idx = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
#endif
- return 0;
}
#ifdef CONFIG_PROC_FS
@@ -179,61 +170,66 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
*/
static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
{
- struct kern_ipc_perm *ipcp = NULL;
+ struct kern_ipc_perm *ipcp;
- if (likely(ids->tables_initialized))
- ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
+ ipcp = rhashtable_lookup_fast(&ids->key_ht, &key,
ipc_kht_params);
+ if (!ipcp)
+ return NULL;
- if (ipcp) {
- rcu_read_lock();
- ipc_lock_object(ipcp);
- return ipcp;
- }
-
- return NULL;
+ rcu_read_lock();
+ ipc_lock_object(ipcp);
+ return ipcp;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
/*
- * Specify desired id for next allocated IPC object.
+ * Insert new IPC object into idr tree, and set sequence number and id
+ * in the correct order.
+ * Especially:
+ * - the sequence number must be set before inserting the object into the idr,
+ * because the sequence number is accessed without a lock.
+ * - the id can/must be set after inserting the object into the idr.
+ * All accesses must be done after getting kern_ipc_perm.lock.
+ *
+ * The caller must own kern_ipc_perm.lock.of the new object.
+ * On error, the function returns a (negative) error code.
*/
-#define ipc_idr_alloc(ids, new) \
- idr_alloc(&(ids)->ipcs_idr, (new), \
- (ids)->next_id < 0 ? 0 : ipcid_to_idx((ids)->next_id),\
- 0, GFP_NOWAIT)
-
-static inline int ipc_buildid(int id, struct ipc_ids *ids,
- struct kern_ipc_perm *new)
+static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
{
- if (ids->next_id < 0) { /* default, behave as !CHECKPOINT_RESTORE */
+ int idx, next_id = -1;
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ next_id = ids->next_id;
+ ids->next_id = -1;
+#endif
+
+ /*
+ * As soon as a new object is inserted into the idr,
+ * ipc_obtain_object_idr() or ipc_obtain_object_check() can find it,
+ * and the lockless preparations for ipc operations can start.
+ * This means especially: permission checks, audit calls, allocation
+ * of undo structures, ...
+ *
+ * Thus the object must be fully initialized, and if something fails,
+ * then the full tear-down sequence must be followed.
+ * (i.e.: set new->deleted, reduce refcount, call_rcu())
+ */
+
+ if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
new->seq = ids->seq++;
if (ids->seq > IPCID_SEQ_MAX)
ids->seq = 0;
+ idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT);
} else {
- new->seq = ipcid_to_seqx(ids->next_id);
- ids->next_id = -1;
+ new->seq = ipcid_to_seqx(next_id);
+ idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
+ 0, GFP_NOWAIT);
}
-
- return SEQ_MULTIPLIER * new->seq + id;
+ if (idx >= 0)
+ new->id = SEQ_MULTIPLIER * new->seq + idx;
+ return idx;
}
-#else
-#define ipc_idr_alloc(ids, new) \
- idr_alloc(&(ids)->ipcs_idr, (new), 0, 0, GFP_NOWAIT)
-
-static inline int ipc_buildid(int id, struct ipc_ids *ids,
- struct kern_ipc_perm *new)
-{
- new->seq = ids->seq++;
- if (ids->seq > IPCID_SEQ_MAX)
- ids->seq = 0;
-
- return SEQ_MULTIPLIER * new->seq + id;
-}
-
-#endif /* CONFIG_CHECKPOINT_RESTORE */
-
/**
* ipc_addid - add an ipc identifier
* @ids: ipc identifier set
@@ -241,9 +237,11 @@ static inline int ipc_buildid(int id, struct ipc_ids *ids,
* @limit: limit for the number of used ids
*
* Add an entry 'new' to the ipc ids idr. The permissions object is
- * initialised and the first free entry is set up and the id assigned
+ * initialised and the first free entry is set up and the index assigned
* is returned. The 'new' entry is returned in a locked state on success.
+ *
* On failure the entry is not locked and a negative err-code is returned.
+ * The caller must use ipc_rcu_putref() to free the identifier.
*
* Called with writer ipc_ids.rwsem held.
*/
@@ -251,19 +249,20 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
{
kuid_t euid;
kgid_t egid;
- int id, err;
+ int idx, err;
+
+ /* 1) Initialize the refcount so that ipc_rcu_putref works */
+ refcount_set(&new->refcount, 1);
if (limit > IPCMNI)
limit = IPCMNI;
- if (!ids->tables_initialized || ids->in_use >= limit)
+ if (ids->in_use >= limit)
return -ENOSPC;
idr_preload(GFP_KERNEL);
- refcount_set(&new->refcount, 1);
spin_lock_init(&new->lock);
- new->deleted = false;
rcu_read_lock();
spin_lock(&new->lock);
@@ -271,30 +270,30 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
new->cuid = new->uid = euid;
new->gid = new->cgid = egid;
- id = ipc_idr_alloc(ids, new);
+ new->deleted = false;
+
+ idx = ipc_idr_alloc(ids, new);
idr_preload_end();
- if (id >= 0 && new->key != IPC_PRIVATE) {
+ if (idx >= 0 && new->key != IPC_PRIVATE) {
err = rhashtable_insert_fast(&ids->key_ht, &new->khtnode,
ipc_kht_params);
if (err < 0) {
- idr_remove(&ids->ipcs_idr, id);
- id = err;
+ idr_remove(&ids->ipcs_idr, idx);
+ idx = err;
}
}
- if (id < 0) {
+ if (idx < 0) {
+ new->deleted = true;
spin_unlock(&new->lock);
rcu_read_unlock();
- return id;
+ return idx;
}
ids->in_use++;
- if (id > ids->max_id)
- ids->max_id = id;
-
- new->id = ipc_buildid(id, ids, new);
-
- return id;
+ if (idx > ids->max_idx)
+ ids->max_idx = idx;
+ return idx;
}
/**
@@ -432,20 +431,20 @@ static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
*/
void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
{
- int lid = ipcid_to_idx(ipcp->id);
+ int idx = ipcid_to_idx(ipcp->id);
- idr_remove(&ids->ipcs_idr, lid);
+ idr_remove(&ids->ipcs_idr, idx);
ipc_kht_remove(ids, ipcp);
ids->in_use--;
ipcp->deleted = true;
- if (unlikely(lid == ids->max_id)) {
+ if (unlikely(idx == ids->max_idx)) {
do {
- lid--;
- if (lid == -1)
+ idx--;
+ if (idx == -1)
break;
- } while (!idr_find(&ids->ipcs_idr, lid));
- ids->max_id = lid;
+ } while (!idr_find(&ids->ipcs_idr, idx));
+ ids->max_idx = idx;
}
}
@@ -463,7 +462,7 @@ void ipc_set_key_private(struct ipc_ids *ids, struct kern_ipc_perm *ipcp)
ipcp->key = IPC_PRIVATE;
}
-int ipc_rcu_getref(struct kern_ipc_perm *ptr)
+bool ipc_rcu_getref(struct kern_ipc_perm *ptr)
{
return refcount_inc_not_zero(&ptr->refcount);
}
@@ -565,12 +564,9 @@ void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out)
struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
{
struct kern_ipc_perm *out;
- int lid = ipcid_to_idx(id);
+ int idx = ipcid_to_idx(id);
- if (unlikely(!ids->tables_initialized))
- return ERR_PTR(-EINVAL);
-
- out = idr_find(&ids->ipcs_idr, lid);
+ out = idr_find(&ids->ipcs_idr, idx);
if (!out)
return ERR_PTR(-EINVAL);
@@ -578,48 +574,12 @@ struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id)
}
/**
- * ipc_lock - lock an ipc structure without rwsem held
- * @ids: ipc identifier set
- * @id: ipc id to look for
- *
- * Look for an id in the ipc ids idr and lock the associated ipc object.
- *
- * The ipc object is locked on successful exit.
- */
-struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id)
-{
- struct kern_ipc_perm *out;
-
- rcu_read_lock();
- out = ipc_obtain_object_idr(ids, id);
- if (IS_ERR(out))
- goto err;
-
- spin_lock(&out->lock);
-
- /*
- * ipc_rmid() may have already freed the ID while ipc_lock()
- * was spinning: here verify that the structure is still valid.
- * Upon races with RMID, return -EIDRM, thus indicating that
- * the ID points to a removed identifier.
- */
- if (ipc_valid_object(out))
- return out;
-
- spin_unlock(&out->lock);
- out = ERR_PTR(-EIDRM);
-err:
- rcu_read_unlock();
- return out;
-}
-
-/**
* ipc_obtain_object_check
* @ids: ipc identifier set
* @id: ipc id to look for
*
- * Similar to ipc_obtain_object_idr() but also checks
- * the ipc object reference counter.
+ * Similar to ipc_obtain_object_idr() but also checks the ipc object
+ * sequence number.
*
* Call inside the RCU critical section.
* The ipc object is *not* locked on exit.
@@ -677,7 +637,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
}
/**
- * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
+ * ipcctl_obtain_check - retrieve an ipc object and check permissions
* @ns: ipc namespace
* @ids: the table of ids where to look for the ipc
* @id: the id of the ipc to retrieve
@@ -687,16 +647,16 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
*
* This function does some common audit and permissions check for some IPC_XXX
* cmd and is called from semctl_down, shmctl_down and msgctl_down.
- * It must be called without any lock held and:
*
- * - retrieves the ipc with the given id in the given table.
+ * It:
+ * - retrieves the ipc object with the given id in the given table.
* - performs some audit and permission check, depending on the given cmd
* - returns a pointer to the ipc object or otherwise, the corresponding
* error.
*
* Call holding the both the rwsem and the rcu read lock.
*/
-struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm)
{
diff --git a/ipc/util.h b/ipc/util.h
index 0aba323..0a159f6 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -18,8 +18,8 @@
#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
#define SEQ_MULTIPLIER (IPCMNI)
-int sem_init(void);
-int msg_init(void);
+void sem_init(void);
+void msg_init(void);
void shm_init(void);
struct ipc_namespace;
@@ -34,17 +34,17 @@ static inline void mq_put_mnt(struct ipc_namespace *ns) { }
#endif
#ifdef CONFIG_SYSVIPC
-int sem_init_ns(struct ipc_namespace *ns);
-int msg_init_ns(struct ipc_namespace *ns);
-int shm_init_ns(struct ipc_namespace *ns);
+void sem_init_ns(struct ipc_namespace *ns);
+void msg_init_ns(struct ipc_namespace *ns);
+void shm_init_ns(struct ipc_namespace *ns);
void sem_exit_ns(struct ipc_namespace *ns);
void msg_exit_ns(struct ipc_namespace *ns);
void shm_exit_ns(struct ipc_namespace *ns);
#else
-static inline int sem_init_ns(struct ipc_namespace *ns) { return 0; }
-static inline int msg_init_ns(struct ipc_namespace *ns) { return 0; }
-static inline int shm_init_ns(struct ipc_namespace *ns) { return 0; }
+static inline void sem_init_ns(struct ipc_namespace *ns) { }
+static inline void msg_init_ns(struct ipc_namespace *ns) { }
+static inline void shm_init_ns(struct ipc_namespace *ns) { }
static inline void sem_exit_ns(struct ipc_namespace *ns) { }
static inline void msg_exit_ns(struct ipc_namespace *ns) { }
@@ -83,7 +83,7 @@ struct ipc_ops {
struct seq_file;
struct ipc_ids;
-int ipc_init_ids(struct ipc_ids *);
+void ipc_init_ids(struct ipc_ids *ids);
#ifdef CONFIG_PROC_FS
void __init ipc_init_proc_interface(const char *path, const char *header,
int ids, int (*show)(struct seq_file *, void *));
@@ -113,12 +113,12 @@ void ipc_set_key_private(struct ipc_ids *, struct kern_ipc_perm *);
int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
/**
- * ipc_get_maxid - get the last assigned id
+ * ipc_get_maxidx - get the highest assigned index
* @ids: ipc identifier set
*
* Called with ipc_ids.rwsem held for reading.
*/
-static inline int ipc_get_maxid(struct ipc_ids *ids)
+static inline int ipc_get_maxidx(struct ipc_ids *ids)
{
if (ids->in_use == 0)
return -1;
@@ -126,7 +126,7 @@ static inline int ipc_get_maxid(struct ipc_ids *ids)
if (ids->in_use == IPCMNI)
return IPCMNI - 1;
- return ids->max_id;
+ return ids->max_idx;
}
/*
@@ -138,17 +138,16 @@ static inline int ipc_get_maxid(struct ipc_ids *ids)
* refcount is initialized by ipc_addid(), before that point call_rcu()
* must be used.
*/
-int ipc_rcu_getref(struct kern_ipc_perm *ptr);
+bool ipc_rcu_getref(struct kern_ipc_perm *ptr);
void ipc_rcu_putref(struct kern_ipc_perm *ptr,
void (*func)(struct rcu_head *head));
-struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
struct kern_ipc_perm *ipc_obtain_object_idr(struct ipc_ids *ids, int id);
void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out);
void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out);
int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
-struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
+struct kern_ipc_perm *ipcctl_obtain_check(struct ipc_namespace *ns,
struct ipc_ids *ids, int id, int cmd,
struct ipc64_perm *perm, int extra_perm);
@@ -173,9 +172,9 @@ extern struct msg_msg *load_msg(const void __user *src, size_t len);
extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst);
extern int store_msg(void __user *dest, struct msg_msg *msg, size_t len);
-static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid)
+static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int id)
{
- return uid / SEQ_MULTIPLIER != ipcp->seq;
+ return ipcid_to_seqx(id) != ipcp->seq;
}
static inline void ipc_lock_object(struct kern_ipc_perm *perm)
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index b66aced..933cb3e 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,8 +14,8 @@
#include <asm/sections.h>
/* vmcoreinfo stuff */
-static unsigned char *vmcoreinfo_data;
-static size_t vmcoreinfo_size;
+unsigned char *vmcoreinfo_data;
+size_t vmcoreinfo_size;
u32 *vmcoreinfo_note;
/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
@@ -344,7 +344,7 @@ void crash_save_vmcoreinfo(void)
if (vmcoreinfo_data_safecopy)
vmcoreinfo_data = vmcoreinfo_data_safecopy;
- vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
+ vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds());
update_vmcoreinfo_note();
}
@@ -401,7 +401,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(init_uts_ns);
VMCOREINFO_SYMBOL(node_online_map);
#ifdef CONFIG_MMU
- VMCOREINFO_SYMBOL(swapper_pg_dir);
+ VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir);
#endif
VMCOREINFO_SYMBOL(_stext);
VMCOREINFO_SYMBOL(vmap_area_list);
diff --git a/kernel/fork.c b/kernel/fork.c
index ff5037b..d896e9c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -310,8 +310,9 @@ static struct kmem_cache *mm_cachep;
struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
{
- struct vm_area_struct *vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ struct vm_area_struct *vma;
+ vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (vma)
vma_init(vma, mm);
return vma;
@@ -1301,6 +1302,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
+ tsk->last_switch_time = 0;
#endif
tsk->mm = NULL;
@@ -1425,7 +1427,9 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
return -ENOMEM;
atomic_set(&sig->count, 1);
+ spin_lock_irq(¤t->sighand->siglock);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
+ spin_unlock_irq(¤t->sighand->siglock);
return 0;
}
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 32b4794..b9132d1 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -40,6 +40,11 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
+/*
+ * Zero (default value) means use sysctl_hung_task_timeout_secs:
+ */
+unsigned long __read_mostly sysctl_hung_task_check_interval_secs;
+
int __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
@@ -98,8 +103,11 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)
if (switch_count != t->last_switch_count) {
t->last_switch_count = switch_count;
+ t->last_switch_time = jiffies;
return;
}
+ if (time_is_after_jiffies(t->last_switch_time + timeout * HZ))
+ return;
trace_sched_process_hang(t);
@@ -245,8 +253,13 @@ static int watchdog(void *dummy)
for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs;
- long t = hung_timeout_jiffies(hung_last_checked, timeout);
+ unsigned long interval = sysctl_hung_task_check_interval_secs;
+ long t;
+ if (interval == 0)
+ interval = timeout;
+ interval = min_t(unsigned long, interval, timeout);
+ t = hung_timeout_jiffies(hung_last_checked, interval);
if (t <= 0) {
if (!atomic_xchg(&reset_hung_task, 0))
check_hung_uninterruptible_tasks(timeout);
diff --git a/kernel/module.c b/kernel/module.c
index b046a32..6746c85 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -529,12 +529,30 @@ static bool check_symbol(const struct symsearch *syms,
return true;
}
+static unsigned long kernel_symbol_value(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ return (unsigned long)offset_to_ptr(&sym->value_offset);
+#else
+ return sym->value;
+#endif
+}
+
+static const char *kernel_symbol_name(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+ return offset_to_ptr(&sym->name_offset);
+#else
+ return sym->name;
+#endif
+}
+
static int cmp_name(const void *va, const void *vb)
{
const char *a;
const struct kernel_symbol *b;
a = va; b = vb;
- return strcmp(a, b->name);
+ return strcmp(a, kernel_symbol_name(b));
}
static bool find_symbol_in_section(const struct symsearch *syms,
@@ -2170,7 +2188,7 @@ void *__symbol_get(const char *symbol)
sym = NULL;
preempt_enable();
- return sym ? (void *)sym->value : NULL;
+ return sym ? (void *)kernel_symbol_value(sym) : NULL;
}
EXPORT_SYMBOL_GPL(__symbol_get);
@@ -2200,10 +2218,12 @@ static int verify_export_symbols(struct module *mod)
for (i = 0; i < ARRAY_SIZE(arr); i++) {
for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
- if (find_symbol(s->name, &owner, NULL, true, false)) {
+ if (find_symbol(kernel_symbol_name(s), &owner, NULL,
+ true, false)) {
pr_err("%s: exports duplicate symbol %s"
" (owned by %s)\n",
- mod->name, s->name, module_name(owner));
+ mod->name, kernel_symbol_name(s),
+ module_name(owner));
return -ENOEXEC;
}
}
@@ -2252,7 +2272,7 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
ksym = resolve_symbol_wait(mod, info, name);
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
- sym[i].st_value = ksym->value;
+ sym[i].st_value = kernel_symbol_value(ksym);
break;
}
@@ -2516,7 +2536,7 @@ static int is_exported(const char *name, unsigned long value,
ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
else
ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
- return ks != NULL && ks->value == value;
+ return ks != NULL && kernel_symbol_value(ks) == value;
}
/* As per nm */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index e880ca2..3a6c2f8 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -105,6 +105,7 @@
def_bool y
depends on SUSPEND || HIBERNATE_CALLBACKS
select PM
+ select SRCU
config PM_SLEEP_SMP
def_bool y
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 90b6ab0..918f386 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2788,7 +2788,8 @@ EXPORT_SYMBOL(unregister_console);
void __init console_init(void)
{
int ret;
- initcall_t *call;
+ initcall_t call;
+ initcall_entry_t *ce;
/* Setup the default TTY line discipline. */
n_tty_init();
@@ -2797,13 +2798,14 @@ void __init console_init(void)
* set up the console device so that later boot sequences can
* inform about problems etc..
*/
- call = __con_initcall_start;
+ ce = __con_initcall_start;
trace_initcall_level("console");
- while (call < __con_initcall_end) {
- trace_initcall_start((*call));
- ret = (*call)();
- trace_initcall_finish((*call), ret);
- call++;
+ while (ce < __con_initcall_end) {
+ call = initcall_from_entry(ce);
+ trace_initcall_start(call);
+ ret = call();
+ trace_initcall_finish(call, ret);
+ ce++;
}
}
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 1a3e9bd..16f8414 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -190,7 +190,7 @@ static void cpuidle_idle_call(void)
*/
next_state = cpuidle_select(drv, dev, &stop_tick);
- if (stop_tick)
+ if (stop_tick || tick_nohz_tick_stopped())
tick_nohz_idle_stop_tick();
else
tick_nohz_idle_retain_tick();
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 870f97b..5dd47f1 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -69,6 +69,8 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
wait_queue_entry_t *curr, *next;
int cnt = 0;
+ lockdep_assert_held(&wq_head->lock);
+
if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
curr = list_next_entry(bookmark, entry);
diff --git a/kernel/signal.c b/kernel/signal.c
index cfa9d10..5843c54 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -65,14 +65,14 @@ static void __user *sig_handler(struct task_struct *t, int sig)
return t->sighand->action[sig - 1].sa.sa_handler;
}
-static int sig_handler_ignored(void __user *handler, int sig)
+static inline bool sig_handler_ignored(void __user *handler, int sig)
{
/* Is it explicitly or implicitly ignored? */
return handler == SIG_IGN ||
- (handler == SIG_DFL && sig_kernel_ignore(sig));
+ (handler == SIG_DFL && sig_kernel_ignore(sig));
}
-static int sig_task_ignored(struct task_struct *t, int sig, bool force)
+static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
{
void __user *handler;
@@ -80,12 +80,12 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force)
if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) &&
handler == SIG_DFL && !(force && sig_kernel_only(sig)))
- return 1;
+ return true;
return sig_handler_ignored(handler, sig);
}
-static int sig_ignored(struct task_struct *t, int sig, bool force)
+static bool sig_ignored(struct task_struct *t, int sig, bool force)
{
/*
* Blocked signals are never ignored, since the
@@ -93,7 +93,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force)
* unblocked.
*/
if (sigismember(&t->blocked, sig) || sigismember(&t->real_blocked, sig))
- return 0;
+ return false;
/*
* Tracers may want to know about even ignored signal unless it
@@ -101,7 +101,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force)
* by SIGNAL_UNKILLABLE task.
*/
if (t->ptrace && sig != SIGKILL)
- return 0;
+ return false;
return sig_task_ignored(t, sig, force);
}
@@ -110,7 +110,7 @@ static int sig_ignored(struct task_struct *t, int sig, bool force)
* Re-calculate pending state from the set of locally pending
* signals, globally pending signals, and blocked signals.
*/
-static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
+static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
{
unsigned long ready;
long i;
@@ -138,20 +138,21 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
#define PENDING(p,b) has_pending_signals(&(p)->signal, (b))
-static int recalc_sigpending_tsk(struct task_struct *t)
+static bool recalc_sigpending_tsk(struct task_struct *t)
{
if ((t->jobctl & JOBCTL_PENDING_MASK) ||
PENDING(&t->pending, &t->blocked) ||
PENDING(&t->signal->shared_pending, &t->blocked)) {
set_tsk_thread_flag(t, TIF_SIGPENDING);
- return 1;
+ return true;
}
+
/*
* We must never clear the flag in another thread, or in current
* when it's possible the current syscall is returning -ERESTART*.
* So we don't clear it here, and only callers who know they should do.
*/
- return 0;
+ return false;
}
/*
@@ -529,13 +530,15 @@ flush_signal_handlers(struct task_struct *t, int force_default)
}
}
-int unhandled_signal(struct task_struct *tsk, int sig)
+bool unhandled_signal(struct task_struct *tsk, int sig)
{
void __user *handler = tsk->sighand->action[sig-1].sa.sa_handler;
if (is_global_init(tsk))
- return 1;
+ return true;
+
if (handler != SIG_IGN && handler != SIG_DFL)
- return 0;
+ return false;
+
/* if ptraced, let the tracer determine */
return !tsk->ptrace;
}
@@ -709,14 +712,14 @@ void signal_wake_up_state(struct task_struct *t, unsigned int state)
*
* All callers must be holding the siglock.
*/
-static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
+static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
{
struct sigqueue *q, *n;
sigset_t m;
sigandsets(&m, mask, &s->signal);
if (sigisemptyset(&m))
- return 0;
+ return;
sigandnsets(&s->signal, &s->signal, mask);
list_for_each_entry_safe(q, n, &s->list, list) {
@@ -725,7 +728,6 @@ static int flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
__sigqueue_free(q);
}
}
- return 1;
}
static inline int is_si_special(const struct siginfo *info)
@@ -742,21 +744,16 @@ static inline bool si_fromuser(const struct siginfo *info)
/*
* called with RCU read lock from check_kill_permission()
*/
-static int kill_ok_by_cred(struct task_struct *t)
+static bool kill_ok_by_cred(struct task_struct *t)
{
const struct cred *cred = current_cred();
const struct cred *tcred = __task_cred(t);
- if (uid_eq(cred->euid, tcred->suid) ||
- uid_eq(cred->euid, tcred->uid) ||
- uid_eq(cred->uid, tcred->suid) ||
- uid_eq(cred->uid, tcred->uid))
- return 1;
-
- if (ns_capable(tcred->user_ns, CAP_KILL))
- return 1;
-
- return 0;
+ return uid_eq(cred->euid, tcred->suid) ||
+ uid_eq(cred->euid, tcred->uid) ||
+ uid_eq(cred->uid, tcred->suid) ||
+ uid_eq(cred->uid, tcred->uid) ||
+ ns_capable(tcred->user_ns, CAP_KILL);
}
/*
@@ -907,16 +904,20 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
* as soon as they're available, so putting the signal on the shared queue
* will be equivalent to sending it to one such thread.
*/
-static inline int wants_signal(int sig, struct task_struct *p)
+static inline bool wants_signal(int sig, struct task_struct *p)
{
if (sigismember(&p->blocked, sig))
- return 0;
+ return false;
+
if (p->flags & PF_EXITING)
- return 0;
+ return false;
+
if (sig == SIGKILL)
- return 1;
+ return true;
+
if (task_is_stopped_or_traced(p))
- return 0;
+ return false;
+
return task_curr(p) || !signal_pending(p);
}
@@ -996,7 +997,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
return;
}
-static inline int legacy_queue(struct sigpending *signals, int sig)
+static inline bool legacy_queue(struct sigpending *signals, int sig)
{
return (sig < SIGRTMIN) && sigismember(&signals->signal, sig);
}
@@ -1380,14 +1381,15 @@ static int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
return error;
}
-static int kill_as_cred_perm(const struct cred *cred,
- struct task_struct *target)
+static inline bool kill_as_cred_perm(const struct cred *cred,
+ struct task_struct *target)
{
const struct cred *pcred = __task_cred(target);
- if (!uid_eq(cred->euid, pcred->suid) && !uid_eq(cred->euid, pcred->uid) &&
- !uid_eq(cred->uid, pcred->suid) && !uid_eq(cred->uid, pcred->uid))
- return 0;
- return 1;
+
+ return uid_eq(cred->euid, pcred->suid) ||
+ uid_eq(cred->euid, pcred->uid) ||
+ uid_eq(cred->uid, pcred->suid) ||
+ uid_eq(cred->uid, pcred->uid);
}
/* like kill_pid_info(), but doesn't use uid/euid of "current" */
@@ -1500,8 +1502,7 @@ send_sig(int sig, struct task_struct *p, int priv)
return send_sig_info(sig, __si_special(priv), p);
}
-void
-force_sig(int sig, struct task_struct *p)
+void force_sig(int sig, struct task_struct *p)
{
force_sig_info(sig, SEND_SIG_PRIV, p);
}
@@ -1512,8 +1513,7 @@ force_sig(int sig, struct task_struct *p)
* the problem was already a SIGSEGV, we'll want to
* make sure we don't even try to deliver the signal..
*/
-int
-force_sigsegv(int sig, struct task_struct *p)
+void force_sigsegv(int sig, struct task_struct *p)
{
if (sig == SIGSEGV) {
unsigned long flags;
@@ -1522,7 +1522,6 @@ force_sigsegv(int sig, struct task_struct *p)
spin_unlock_irqrestore(&p->sighand->siglock, flags);
}
force_sig(SIGSEGV, p);
- return 0;
}
int force_sig_fault(int sig, int code, void __user *addr
@@ -1923,10 +1922,10 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
spin_unlock_irqrestore(&sighand->siglock, flags);
}
-static inline int may_ptrace_stop(void)
+static inline bool may_ptrace_stop(void)
{
if (!likely(current->ptrace))
- return 0;
+ return false;
/*
* Are we in the middle of do_coredump?
* If so and our tracer is also part of the coredump stopping
@@ -1942,19 +1941,19 @@ static inline int may_ptrace_stop(void)
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
- return 0;
+ return false;
- return 1;
+ return true;
}
/*
* Return non-zero if there is a SIGKILL that should be waking us up.
* Called with the siglock held.
*/
-static int sigkill_pending(struct task_struct *tsk)
+static bool sigkill_pending(struct task_struct *tsk)
{
- return sigismember(&tsk->pending.signal, SIGKILL) ||
- sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
+ return sigismember(&tsk->pending.signal, SIGKILL) ||
+ sigismember(&tsk->signal->shared_pending.signal, SIGKILL);
}
/*
@@ -2334,7 +2333,7 @@ static int ptrace_signal(int signr, siginfo_t *info)
return signr;
}
-int get_signal(struct ksignal *ksig)
+bool get_signal(struct ksignal *ksig)
{
struct sighand_struct *sighand = current->sighand;
struct signal_struct *signal = current->signal;
@@ -2344,7 +2343,7 @@ int get_signal(struct ksignal *ksig)
task_work_run();
if (unlikely(uprobe_deny_signal()))
- return 0;
+ return false;
/*
* Do this once, we can't return to user-mode if freezing() == T.
@@ -2801,7 +2800,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
}
#endif
-static int do_sigpending(sigset_t *set)
+static void do_sigpending(sigset_t *set)
{
spin_lock_irq(¤t->sighand->siglock);
sigorsets(set, ¤t->pending.signal,
@@ -2810,7 +2809,6 @@ static int do_sigpending(sigset_t *set)
/* Outside the lock because only this thread touches it. */
sigandsets(set, ¤t->blocked, set);
- return 0;
}
/**
@@ -2822,15 +2820,16 @@ static int do_sigpending(sigset_t *set)
SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
{
sigset_t set;
- int err;
if (sigsetsize > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err && copy_to_user(uset, &set, sigsetsize))
- err = -EFAULT;
- return err;
+ do_sigpending(&set);
+
+ if (copy_to_user(uset, &set, sigsetsize))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
@@ -2838,15 +2837,13 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
compat_size_t, sigsetsize)
{
sigset_t set;
- int err;
if (sigsetsize > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err)
- err = put_compat_sigset(uset, &set, sigsetsize);
- return err;
+ do_sigpending(&set);
+
+ return put_compat_sigset(uset, &set, sigsetsize);
}
#endif
@@ -3608,25 +3605,26 @@ int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, uset)
{
sigset_t set;
- int err;
if (sizeof(old_sigset_t) > sizeof(*uset))
return -EINVAL;
- err = do_sigpending(&set);
- if (!err && copy_to_user(uset, &set, sizeof(old_sigset_t)))
- err = -EFAULT;
- return err;
+ do_sigpending(&set);
+
+ if (copy_to_user(uset, &set, sizeof(old_sigset_t)))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set32)
{
sigset_t set;
- int err = do_sigpending(&set);
- if (!err)
- err = put_user(set.sig[0], set32);
- return err;
+
+ do_sigpending(&set);
+
+ return put_user(set.sig[0], set32);
}
#endif
@@ -3697,25 +3695,23 @@ SYSCALL_DEFINE4(rt_sigaction, int, sig,
size_t, sigsetsize)
{
struct k_sigaction new_sa, old_sa;
- int ret = -EINVAL;
+ int ret;
/* XXX: Don't preclude handling different sized sigset_t's. */
if (sigsetsize != sizeof(sigset_t))
- goto out;
+ return -EINVAL;
- if (act) {
- if (copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
- return -EFAULT;
- }
+ if (act && copy_from_user(&new_sa.sa, act, sizeof(new_sa.sa)))
+ return -EFAULT;
ret = do_sigaction(sig, act ? &new_sa : NULL, oact ? &old_sa : NULL);
+ if (ret)
+ return ret;
- if (!ret && oact) {
- if (copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
- return -EFAULT;
- }
-out:
- return ret;
+ if (oact && copy_to_user(oact, &old_sa.sa, sizeof(old_sa.sa)))
+ return -EFAULT;
+
+ return 0;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index f22f76b..71ceb6c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -145,7 +145,10 @@ static int minolduid;
static int ngroups_max = NGROUPS_MAX;
static const int cap_last_cap = CAP_LAST_CAP;
-/*this is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs */
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
#ifdef CONFIG_DETECT_HUNG_TASK
static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
#endif
@@ -222,7 +225,7 @@ static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#ifdef CONFIG_MAGIC_SYSRQ
-/* Note: sysrq code uses it's own private copy */
+/* Note: sysrq code uses its own private copy */
static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
static int sysrq_sysctl_handler(struct ctl_table *table, int write,
@@ -1091,6 +1094,14 @@ static struct ctl_table kern_table[] = {
.extra2 = &hung_task_timeout_max,
},
{
+ .procname = "hung_task_check_interval_secs",
+ .data = &sysctl_hung_task_check_interval_secs,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_dohung_task_timeout_secs,
+ .extra2 = &hung_task_timeout_max,
+ },
+ {
.procname = "hung_task_warnings",
.data = &sysctl_hung_task_warnings,
.maxlen = sizeof(int),
@@ -1965,13 +1976,13 @@ static void warn_sysctl_write(struct ctl_table *table)
}
/**
- * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * proc_first_pos_non_zero_ignore - check if first position is allowed
* @ppos: file position
* @table: the sysctl table
*
* Returns true if the first position is non-zero and the sysctl_writes_strict
* mode indicates this is not allowed for numeric input types. String proc
- * hadlers can ignore the return value.
+ * handlers can ignore the return value.
*/
static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
struct ctl_table *table)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2a24a59..2868d85 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1829,6 +1829,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
mutex_lock(&q->blk_trace_mutex);
if (attr == &dev_attr_enable) {
+ if (!!value == !!q->blk_trace) {
+ ret = 0;
+ goto out_unlock_bdev;
+ }
if (value)
ret = blk_trace_setup_queue(q, bdev);
else
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 96db841..bf2c06e 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -371,6 +371,27 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
}
EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+ struct tracepoint * const *end,
+ void (*fct)(struct tracepoint *tp, void *priv),
+ void *priv)
+{
+ if (!begin)
+ return;
+
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+ const int *iter;
+
+ for (iter = (const int *)begin; iter < (const int *)end; iter++)
+ fct(offset_to_ptr(iter), priv);
+ } else {
+ struct tracepoint * const *iter;
+
+ for (iter = begin; iter < end; iter++)
+ fct(*iter, priv);
+ }
+}
+
#ifdef CONFIG_MODULES
bool trace_module_has_bad_taint(struct module *mod)
{
@@ -435,15 +456,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
* Ensure the tracer unregistered the module's probes before the module
* teardown is performed. Prevents leaks of probe and data pointers.
*/
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
- struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
{
- struct tracepoint * const *iter;
-
- if (!begin)
- return;
- for (iter = begin; iter < end; iter++)
- WARN_ON_ONCE((*iter)->funcs);
+ WARN_ON_ONCE(tp->funcs);
}
static int tracepoint_module_coming(struct module *mod)
@@ -494,8 +509,9 @@ static void tracepoint_module_going(struct module *mod)
* Called the going notifier before checking for
* quiescence.
*/
- tp_module_going_check_quiescent(mod->tracepoints_ptrs,
- mod->tracepoints_ptrs + mod->num_tracepoints);
+ for_each_tracepoint_range(mod->tracepoints_ptrs,
+ mod->tracepoints_ptrs + mod->num_tracepoints,
+ tp_module_going_check_quiescent, NULL);
break;
}
}
@@ -547,19 +563,6 @@ static __init int init_tracepoints(void)
__initcall(init_tracepoints);
#endif /* CONFIG_MODULES */
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
- struct tracepoint * const *end,
- void (*fct)(struct tracepoint *tp, void *priv),
- void *priv)
-{
- struct tracepoint * const *iter;
-
- if (!begin)
- return;
- for (iter = begin; iter < end; iter++)
- fct(*iter, priv);
-}
-
/**
* for_each_kernel_tracepoint - iteration on all kernel tracepoints
* @fct: callback
diff --git a/kernel/user.c b/kernel/user.c
index 36288d8..0df9b16 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -96,7 +96,7 @@ static DEFINE_SPINLOCK(uidhash_lock);
/* root_user.__count is 1, for init task cred */
struct user_struct root_user = {
- .__count = ATOMIC_INIT(1),
+ .__count = REFCOUNT_INIT(1),
.processes = ATOMIC_INIT(1),
.sigpending = ATOMIC_INIT(0),
.locked_shm = 0,
@@ -123,7 +123,7 @@ static struct user_struct *uid_hash_find(kuid_t uid, struct hlist_head *hashent)
hlist_for_each_entry(user, hashent, uidhash_node) {
if (uid_eq(user->uid, uid)) {
- atomic_inc(&user->__count);
+ refcount_inc(&user->__count);
return user;
}
}
@@ -169,11 +169,8 @@ void free_uid(struct user_struct *up)
if (!up)
return;
- local_irq_save(flags);
- if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+ if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
free_user(up, flags);
- else
- local_irq_restore(flags);
}
struct user_struct *alloc_uid(kuid_t uid)
@@ -191,7 +188,7 @@ struct user_struct *alloc_uid(kuid_t uid)
goto out_unlock;
new->uid = uid;
- atomic_set(&new->__count, 1);
+ refcount_set(&new->__count, 1);
ratelimit_state_init(&new->ratelimit, HZ, 100);
ratelimit_set_flags(&new->ratelimit, RATELIMIT_MSG_ON_RELEASE);
diff --git a/lib/.gitignore b/lib/.gitignore
index 09aae85..f2a39c9 100644
--- a/lib/.gitignore
+++ b/lib/.gitignore
@@ -2,5 +2,7 @@
# Generated files
#
gen_crc32table
+gen_crc64table
crc32table.h
+crc64table.h
oid_registry_data.c
diff --git a/lib/Kconfig b/lib/Kconfig
index 706836e..a3928d4 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -170,6 +170,14 @@
endchoice
+config CRC64
+ tristate "CRC64 functions"
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC64 functions, but a module built outside
+ the kernel tree does. Such modules that use library CRC64
+ functions require M here.
+
config CRC4
tristate "CRC4 functions"
help
@@ -223,7 +231,6 @@
config RANDOM32_SELFTEST
bool "PRNG perform self test on init"
- default n
help
This option enables the 32 bit PRNG library functions to perform a
self test on initialization.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ab1b599..3589765 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1220,7 +1220,6 @@
tristate "torture tests for locking"
depends on DEBUG_KERNEL
select TORTURE_TEST
- default n
help
This option provides a kernel module that runs torture tests
on kernel locking primitives. The kernel module may be built
@@ -1687,7 +1686,6 @@
tristate "Linux Kernel Dump Test Tool Module"
depends on DEBUG_FS
depends on BLOCK
- default n
help
This module enables testing of the different dumping mechanisms by
inducing system failures at predefined crash points.
@@ -1721,7 +1719,6 @@
bool "Kprobes sanity tests"
depends on DEBUG_KERNEL
depends on KPROBES
- default n
help
This option provides for testing basic kprobes functionality on
boot. Samples of kprobe and kretprobe are inserted and
@@ -1732,7 +1729,6 @@
config BACKTRACE_SELF_TEST
tristate "Self test for the backtrace code"
depends on DEBUG_KERNEL
- default n
help
This option provides a kernel module that can be used to test
the kernel stack backtrace code. This option is not useful
@@ -1802,7 +1798,6 @@
config TEST_BITMAP
tristate "Test bitmap_*() family of functions at runtime"
- default n
help
Enable this option to test the bitmap functions at boot.
@@ -1823,7 +1818,6 @@
config TEST_RHASHTABLE
tristate "Perform selftest on resizable hash table"
- default n
help
Enable this option to test the rhashtable functions at boot.
@@ -1831,7 +1825,6 @@
config TEST_HASH
tristate "Perform selftest on hash functions"
- default n
help
Enable this option to test the kernel's integer (<linux/hash.h>),
string (<linux/stringhash.h>), and siphash (<linux/siphash.h>)
@@ -1842,7 +1835,6 @@
config TEST_PARMAN
tristate "Perform selftest on priority array manager"
- default n
depends on PARMAN
help
Enable this option to test priority array manager on boot
@@ -1852,7 +1844,6 @@
config TEST_LKM
tristate "Test module loading with 'hello world' module"
- default n
depends on m
help
This builds the "test_module" module that emits "Hello, world"
@@ -1866,7 +1857,6 @@
config TEST_USER_COPY
tristate "Test user/kernel boundary protections"
- default n
depends on m
help
This builds the "test_user_copy" module that runs sanity checks
@@ -1879,7 +1869,6 @@
config TEST_BPF
tristate "Test BPF filter functionality"
- default n
depends on m && NET
help
This builds the "test_bpf" module that runs various test vectors
@@ -1893,7 +1882,6 @@
config FIND_BIT_BENCHMARK
tristate "Test find_bit functions"
- default n
help
This builds the "test_find_bit" module that measure find_*_bit()
functions performance.
@@ -1902,7 +1890,6 @@
config TEST_FIRMWARE
tristate "Test firmware loading via userspace interface"
- default n
depends on FW_LOADER
help
This builds the "test_firmware" module that creates a userspace
@@ -1915,7 +1902,6 @@
config TEST_SYSCTL
tristate "sysctl test driver"
- default n
depends on PROC_SYSCTL
help
This builds the "test_sysctl" module. This driver enables to test the
@@ -1926,7 +1912,6 @@
config TEST_UDELAY
tristate "udelay test driver"
- default n
help
This builds the "udelay_test" module that helps to make sure
that udelay() is working properly.
@@ -1935,7 +1920,6 @@
config TEST_STATIC_KEYS
tristate "Test static keys"
- default n
depends on m
help
Test the static key interfaces.
@@ -1944,7 +1928,6 @@
config TEST_KMOD
tristate "kmod stress tester"
- default n
depends on m
depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS
depends on NETDEVICES && NET_CORE && INET # for TUN
diff --git a/lib/Makefile b/lib/Makefile
index d95bb25..9baefb6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -103,6 +103,7 @@
obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o
obj-$(CONFIG_CRC32) += crc32.o
+obj-$(CONFIG_CRC64) += crc64.o
obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o
obj-$(CONFIG_CRC4) += crc4.o
obj-$(CONFIG_CRC7) += crc7.o
@@ -217,7 +218,9 @@
obj-$(CONFIG_PRIME_NUMBERS) += prime_numbers.o
hostprogs-y := gen_crc32table
+hostprogs-y += gen_crc64table
clean-files := crc32table.h
+clean-files += crc64table.h
$(obj)/crc32.o: $(obj)/crc32table.h
@@ -227,6 +230,14 @@
$(obj)/crc32table.h: $(obj)/gen_crc32table
$(call cmd,crc32)
+$(obj)/crc64.o: $(obj)/crc64table.h
+
+quiet_cmd_crc64 = GEN $@
+ cmd_crc64 = $< > $@
+
+$(obj)/crc64table.h: $(obj)/gen_crc64table
+ $(call cmd,crc64)
+
#
# Build a fast OID lookip registry from include/linux/oid_registry.h
#
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 730969c..2fd07f6 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1152,14 +1152,10 @@ EXPORT_SYMBOL(bitmap_free);
* @buf: array of u32 (in host byte order), the source bitmap
* @nbits: number of bits in @bitmap
*/
-void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
- unsigned int nbits)
+void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits)
{
unsigned int i, halfwords;
- if (!nbits)
- return;
-
halfwords = DIV_ROUND_UP(nbits, 32);
for (i = 0; i < halfwords; i++) {
bitmap[i/2] = (unsigned long) buf[i];
@@ -1183,9 +1179,6 @@ void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits)
{
unsigned int i, halfwords;
- if (!nbits)
- return;
-
halfwords = DIV_ROUND_UP(nbits, 32);
for (i = 0; i < halfwords; i++) {
buf[i] = (u32) (bitmap[i/2] & UINT_MAX);
diff --git a/lib/crc64.c b/lib/crc64.c
new file mode 100644
index 0000000..0ef8ae6
--- /dev/null
+++ b/lib/crc64.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Normal 64-bit CRC calculation.
+ *
+ * This is a basic crc64 implementation following ECMA-182 specification,
+ * which can be found from,
+ * http://www.ecma-international.org/publications/standards/Ecma-182.htm
+ *
+ * Dr. Ross N. Williams has a great document to introduce the idea of CRC
+ * algorithm, here the CRC64 code is also inspired by the table-driven
+ * algorithm and detail example from this paper. This paper can be found
+ * from,
+ * http://www.ross.net/crc/download/crc_v3.txt
+ *
+ * crc64table[256] is the lookup table of a table-driven 64-bit CRC
+ * calculation, which is generated by gen_crc64table.c in kernel build
+ * time. The polynomial of crc64 arithmetic is from ECMA-182 specification
+ * as well, which is defined as,
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+ *
+ * Copyright 2018 SUSE Linux.
+ * Author: Coly Li <colyli@suse.de>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include "crc64table.h"
+
+MODULE_DESCRIPTION("CRC64 calculations");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+ or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+{
+ size_t i, t;
+
+ const unsigned char *_p = p;
+
+ for (i = 0; i < len; i++) {
+ t = ((crc >> 56) ^ (*_p++)) & 0xFF;
+ crc = crc64table[t] ^ (crc << 8);
+ }
+
+ return crc;
+}
+EXPORT_SYMBOL_GPL(crc64_be);
diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
new file mode 100644
index 0000000..9011926
--- /dev/null
+++ b/lib/gen_crc64table.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate lookup table for the table-driven CRC64 calculation.
+ *
+ * gen_crc64table is executed in kernel build time and generates
+ * lib/crc64table.h. This header is included by lib/crc64.c for
+ * the table-driven CRC64 calculation.
+ *
+ * See lib/crc64.c for more information about which specification
+ * and polynomial arithmetic that gen_crc64table.c follows to
+ * generate the lookup table.
+ *
+ * Copyright 2018 SUSE Linux.
+ * Author: Coly Li <colyli@suse.de>
+ */
+#include <inttypes.h>
+#include <stdio.h>
+
+#include <linux/swab.h>
+
+#define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
+
+static uint64_t crc64_table[256] = {0};
+
+static void generate_crc64_table(void)
+{
+ uint64_t i, j, c, crc;
+
+ for (i = 0; i < 256; i++) {
+ crc = 0;
+ c = i << 56;
+
+ for (j = 0; j < 8; j++) {
+ if ((crc ^ c) & 0x8000000000000000ULL)
+ crc = (crc << 1) ^ CRC64_ECMA182_POLY;
+ else
+ crc <<= 1;
+ c <<= 1;
+ }
+
+ crc64_table[i] = crc;
+ }
+}
+
+static void print_crc64_table(void)
+{
+ int i;
+
+ printf("/* this file is generated - do not edit */\n\n");
+ printf("#include <linux/types.h>\n");
+ printf("#include <linux/cache.h>\n\n");
+ printf("static const u64 ____cacheline_aligned crc64table[256] = {\n");
+ for (i = 0; i < 256; i++) {
+ printf("\t0x%016" PRIx64 "ULL", crc64_table[i]);
+ if (i & 0x1)
+ printf(",\n");
+ else
+ printf(", ");
+ }
+ printf("};\n");
+}
+
+int main(int argc, char *argv[])
+{
+ generate_crc64_table();
+ print_crc64_table();
+ return 0;
+}
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index ae4223e..310e29b5 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -174,17 +174,15 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
int i;
size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
- if (gfp != GFP_KERNEL)
- tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
- else
- tbl = kvzalloc(size, gfp);
+ tbl = kvzalloc(size, gfp);
size = nbuckets;
- if (tbl == NULL && gfp != GFP_KERNEL) {
+ if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
nbuckets = 0;
}
+
if (tbl == NULL)
return NULL;
@@ -450,7 +448,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
err = -ENOMEM;
- new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
+ new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
if (new_tbl == NULL)
goto fail;
@@ -1060,9 +1058,16 @@ int rhashtable_init(struct rhashtable *ht,
}
}
+ /*
+ * This is api initialization and thus we need to guarantee the
+ * initial rhashtable allocation. Upon failure, retry with the
+ * smallest possible size with __GFP_NOFAIL semantics.
+ */
tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
- if (tbl == NULL)
- return -ENOMEM;
+ if (unlikely(tbl == NULL)) {
+ size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
+ tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
+ }
atomic_set(&ht->nelems, 0);
diff --git a/lib/test_debug_virtual.c b/lib/test_debug_virtual.c
index b9cdeec..d5a06ad 100644
--- a/lib/test_debug_virtual.c
+++ b/lib/test_debug_virtual.c
@@ -15,7 +15,7 @@ struct foo {
unsigned int bar;
};
-struct foo *foo;
+static struct foo *foo;
static int __init test_debug_virtual_init(void)
{
diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c
index 3f415d8..626f580 100644
--- a/lib/test_hexdump.c
+++ b/lib/test_hexdump.c
@@ -18,7 +18,7 @@ static const unsigned char data_b[] = {
static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C...";
-static const char * const test_data_1_le[] __initconst = {
+static const char * const test_data_1[] __initconst = {
"be", "32", "db", "7b", "0a", "18", "93", "b2",
"70", "ba", "c4", "24", "7d", "83", "34", "9b",
"a6", "9c", "31", "ad", "9c", "0f", "ac", "e9",
@@ -32,16 +32,33 @@ static const char * const test_data_2_le[] __initconst = {
"d14c", "9919", "b143", "0caf",
};
+static const char * const test_data_2_be[] __initconst = {
+ "be32", "db7b", "0a18", "93b2",
+ "70ba", "c424", "7d83", "349b",
+ "a69c", "31ad", "9c0f", "ace9",
+ "4cd1", "1999", "43b1", "af0c",
+};
+
static const char * const test_data_4_le[] __initconst = {
"7bdb32be", "b293180a", "24c4ba70", "9b34837d",
"ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143",
};
+static const char * const test_data_4_be[] __initconst = {
+ "be32db7b", "0a1893b2", "70bac424", "7d83349b",
+ "a69c31ad", "9c0face9", "4cd11999", "43b1af0c",
+};
+
static const char * const test_data_8_le[] __initconst = {
"b293180a7bdb32be", "9b34837d24c4ba70",
"e9ac0f9cad319ca6", "0cafb1439919d14c",
};
+static const char * const test_data_8_be[] __initconst = {
+ "be32db7b0a1893b2", "70bac4247d83349b",
+ "a69c31ad9c0face9", "4cd1199943b1af0c",
+};
+
#define FILL_CHAR '#'
static unsigned total_tests __initdata;
@@ -56,6 +73,7 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize,
size_t l = len;
int gs = groupsize, rs = rowsize;
unsigned int i;
+ const bool is_be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
if (rs != 16 && rs != 32)
rs = 16;
@@ -67,13 +85,13 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize,
gs = 1;
if (gs == 8)
- result = test_data_8_le;
+ result = is_be ? test_data_8_be : test_data_8_le;
else if (gs == 4)
- result = test_data_4_le;
+ result = is_be ? test_data_4_be : test_data_4_le;
else if (gs == 2)
- result = test_data_2_le;
+ result = is_be ? test_data_2_be : test_data_2_le;
else
- result = test_data_1_le;
+ result = test_data_1;
/* hex dump */
p = test;
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index e5e606e..9a7b8b0 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -46,7 +46,8 @@
Fill the pages with poison patterns after free_pages() and verify
the patterns before alloc_pages. The filling of the memory helps
reduce the risk of information leaks from freed data. This does
- have a potential performance impact.
+ have a potential performance impact if enabled with the
+ "page_poison=1" kernel boot option.
Note that "poison" here is not the same thing as the "HWPoison"
for CONFIG_MEMORY_FAILURE. This is software poisoning only.
@@ -65,7 +66,7 @@
say N.
config PAGE_POISONING_ZERO
- bool "Use zero for poisoning instead of random data"
+ bool "Use zero for poisoning instead of debugging value"
depends on PAGE_POISONING
---help---
Instead of using the existing poison value, fill the pages with
@@ -75,7 +76,6 @@
allocation.
If unsure, say N
- bool
config DEBUG_PAGE_REF
bool "Enable tracepoint to track down page reference manipulation"
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 2e5d3df..f5981e9 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -438,10 +438,10 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
if (new_congested) {
/* !found and storage for new one already allocated, insert */
congested = new_congested;
- new_congested = NULL;
rb_link_node(&congested->rb_node, parent, node);
rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
- goto found;
+ spin_unlock_irqrestore(&cgwb_lock, flags);
+ return congested;
}
spin_unlock_irqrestore(&cgwb_lock, flags);
@@ -451,13 +451,13 @@ wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
if (!new_congested)
return NULL;
- atomic_set(&new_congested->refcnt, 0);
+ refcount_set(&new_congested->refcnt, 1);
new_congested->__bdi = bdi;
new_congested->blkcg_id = blkcg_id;
goto retry;
found:
- atomic_inc(&congested->refcnt);
+ refcount_inc(&congested->refcnt);
spin_unlock_irqrestore(&cgwb_lock, flags);
kfree(new_congested);
return congested;
@@ -473,11 +473,8 @@ void wb_congested_put(struct bdi_writeback_congested *congested)
{
unsigned long flags;
- local_irq_save(flags);
- if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) {
- local_irq_restore(flags);
+ if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
return;
- }
/* bdi might already have been destroyed leaving @congested unlinked */
if (congested->__bdi) {
@@ -804,7 +801,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
if (!bdi->wb_congested)
return -ENOMEM;
- atomic_set(&bdi->wb_congested->refcnt, 1);
+ refcount_set(&bdi->wb_congested->refcnt, 1);
err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
if (err) {
diff --git a/mm/hmm.c b/mm/hmm.c
index 76e7a05..0b05545 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -177,16 +177,19 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
up_write(&hmm->mirrors_sem);
}
-static void hmm_invalidate_range_start(struct mmu_notifier *mn,
+static int hmm_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct hmm *hmm = mm->hmm;
VM_BUG_ON(!hmm);
atomic_inc(&hmm->sequence);
+
+ return 0;
}
static void hmm_invalidate_range_end(struct mmu_notifier *mn,
diff --git a/mm/internal.h b/mm/internal.h
index 9e3654d..dab088c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -389,18 +389,6 @@ static inline struct page *mem_map_next(struct page *iter,
return iter + 1;
}
-/*
- * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node,
- * so all functions starting at paging_init should be marked __init
- * in those cases. SPARSEMEM, however, allows for memory hotplug,
- * and alloc_bootmem_node is not used.
- */
-#ifdef CONFIG_SPARSEMEM
-#define __paginginit __meminit
-#else
-#define __paginginit __init
-#endif
-
/* Memory initialisation debug and verification */
enum mminit_level {
MMINIT_WARNING,
diff --git a/mm/ksm.c b/mm/ksm.c
index 2621be5..1bd514c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -703,7 +703,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
* We cannot do anything with the page while its refcount is 0.
* Usually 0 means free, or tail of a higher-order page: in which
* case this node is no longer referenced, and should be freed;
- * however, it might mean that the page is under page_freeze_refs().
+ * however, it might mean that the page is under page_ref_freeze().
* The __remove_mapping() case is easy, again the node is now stale;
* but if page is swapcache in migrate_page_move_mapping(), it might
* still be our page, in which case it's essential to keep the node.
@@ -714,7 +714,7 @@ static struct page *get_ksm_page(struct stable_node *stable_node, bool lock_it)
* work here too. We have chosen the !PageSwapCache test to
* optimize the common case, when the page is or is about to
* be freed: PageSwapCache is cleared (under spin_lock_irq)
- * in the freeze_refs section of __remove_mapping(); but Anon
+ * in the ref_freeze section of __remove_mapping(); but Anon
* page->mapping reset to NULL later, in free_pages_prepare().
*/
if (!PageSwapCache(page))
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6a92189..4ead5a4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1777,6 +1777,62 @@ bool mem_cgroup_oom_synchronize(bool handle)
}
/**
+ * mem_cgroup_get_oom_group - get a memory cgroup to clean up after OOM
+ * @victim: task to be killed by the OOM killer
+ * @oom_domain: memcg in case of memcg OOM, NULL in case of system-wide OOM
+ *
+ * Returns a pointer to a memory cgroup, which has to be cleaned up
+ * by killing all belonging OOM-killable tasks.
+ *
+ * Caller has to call mem_cgroup_put() on the returned non-NULL memcg.
+ */
+struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
+ struct mem_cgroup *oom_domain)
+{
+ struct mem_cgroup *oom_group = NULL;
+ struct mem_cgroup *memcg;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return NULL;
+
+ if (!oom_domain)
+ oom_domain = root_mem_cgroup;
+
+ rcu_read_lock();
+
+ memcg = mem_cgroup_from_task(victim);
+ if (memcg == root_mem_cgroup)
+ goto out;
+
+ /*
+ * Traverse the memory cgroup hierarchy from the victim task's
+ * cgroup up to the OOMing cgroup (or root) to find the
+ * highest-level memory cgroup with oom.group set.
+ */
+ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+ if (memcg->oom_group)
+ oom_group = memcg;
+
+ if (memcg == oom_domain)
+ break;
+ }
+
+ if (oom_group)
+ css_get(&oom_group->css);
+out:
+ rcu_read_unlock();
+
+ return oom_group;
+}
+
+void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
+{
+ pr_info("Tasks in ");
+ pr_cont_cgroup_path(memcg->css.cgroup);
+ pr_cont(" are going to be killed due to memory.oom.group set\n");
+}
+
+/**
* lock_page_memcg - lock a page->mem_cgroup binding
* @page: the page
*
@@ -2899,29 +2955,34 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
return retval;
}
-static void tree_stat(struct mem_cgroup *memcg, unsigned long *stat)
+struct accumulated_stats {
+ unsigned long stat[MEMCG_NR_STAT];
+ unsigned long events[NR_VM_EVENT_ITEMS];
+ unsigned long lru_pages[NR_LRU_LISTS];
+ const unsigned int *stats_array;
+ const unsigned int *events_array;
+ int stats_size;
+ int events_size;
+};
+
+static void accumulate_memcg_tree(struct mem_cgroup *memcg,
+ struct accumulated_stats *acc)
{
- struct mem_cgroup *iter;
+ struct mem_cgroup *mi;
int i;
- memset(stat, 0, sizeof(*stat) * MEMCG_NR_STAT);
+ for_each_mem_cgroup_tree(mi, memcg) {
+ for (i = 0; i < acc->stats_size; i++)
+ acc->stat[i] += memcg_page_state(mi,
+ acc->stats_array ? acc->stats_array[i] : i);
- for_each_mem_cgroup_tree(iter, memcg) {
- for (i = 0; i < MEMCG_NR_STAT; i++)
- stat[i] += memcg_page_state(iter, i);
- }
-}
+ for (i = 0; i < acc->events_size; i++)
+ acc->events[i] += memcg_sum_events(mi,
+ acc->events_array ? acc->events_array[i] : i);
-static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
-{
- struct mem_cgroup *iter;
- int i;
-
- memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);
-
- for_each_mem_cgroup_tree(iter, memcg) {
- for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
- events[i] += memcg_sum_events(iter, i);
+ for (i = 0; i < NR_LRU_LISTS; i++)
+ acc->lru_pages[i] +=
+ mem_cgroup_nr_lru_pages(mi, BIT(i));
}
}
@@ -3332,6 +3393,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
unsigned long memory, memsw;
struct mem_cgroup *mi;
unsigned int i;
+ struct accumulated_stats acc;
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
@@ -3364,32 +3426,27 @@ static int memcg_stat_show(struct seq_file *m, void *v)
seq_printf(m, "hierarchical_memsw_limit %llu\n",
(u64)memsw * PAGE_SIZE);
- for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
- unsigned long long val = 0;
+ memset(&acc, 0, sizeof(acc));
+ acc.stats_size = ARRAY_SIZE(memcg1_stats);
+ acc.stats_array = memcg1_stats;
+ acc.events_size = ARRAY_SIZE(memcg1_events);
+ acc.events_array = memcg1_events;
+ accumulate_memcg_tree(memcg, &acc);
+ for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
continue;
- for_each_mem_cgroup_tree(mi, memcg)
- val += memcg_page_state(mi, memcg1_stats[i]) *
- PAGE_SIZE;
- seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], val);
+ seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
+ (u64)acc.stat[i] * PAGE_SIZE);
}
- for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
- unsigned long long val = 0;
+ for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
+ seq_printf(m, "total_%s %llu\n", memcg1_event_names[i],
+ (u64)acc.events[i]);
- for_each_mem_cgroup_tree(mi, memcg)
- val += memcg_sum_events(mi, memcg1_events[i]);
- seq_printf(m, "total_%s %llu\n", memcg1_event_names[i], val);
- }
-
- for (i = 0; i < NR_LRU_LISTS; i++) {
- unsigned long long val = 0;
-
- for_each_mem_cgroup_tree(mi, memcg)
- val += mem_cgroup_nr_lru_pages(mi, BIT(i)) * PAGE_SIZE;
- seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i], val);
- }
+ for (i = 0; i < NR_LRU_LISTS; i++)
+ seq_printf(m, "total_%s %llu\n", mem_cgroup_lru_names[i],
+ (u64)acc.lru_pages[i] * PAGE_SIZE);
#ifdef CONFIG_DEBUG_VM
{
@@ -5486,8 +5543,7 @@ static int memory_events_show(struct seq_file *m, void *v)
static int memory_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- unsigned long stat[MEMCG_NR_STAT];
- unsigned long events[NR_VM_EVENT_ITEMS];
+ struct accumulated_stats acc;
int i;
/*
@@ -5501,70 +5557,97 @@ static int memory_stat_show(struct seq_file *m, void *v)
* Current memory state:
*/
- tree_stat(memcg, stat);
- tree_events(memcg, events);
+ memset(&acc, 0, sizeof(acc));
+ acc.stats_size = MEMCG_NR_STAT;
+ acc.events_size = NR_VM_EVENT_ITEMS;
+ accumulate_memcg_tree(memcg, &acc);
seq_printf(m, "anon %llu\n",
- (u64)stat[MEMCG_RSS] * PAGE_SIZE);
+ (u64)acc.stat[MEMCG_RSS] * PAGE_SIZE);
seq_printf(m, "file %llu\n",
- (u64)stat[MEMCG_CACHE] * PAGE_SIZE);
+ (u64)acc.stat[MEMCG_CACHE] * PAGE_SIZE);
seq_printf(m, "kernel_stack %llu\n",
- (u64)stat[MEMCG_KERNEL_STACK_KB] * 1024);
+ (u64)acc.stat[MEMCG_KERNEL_STACK_KB] * 1024);
seq_printf(m, "slab %llu\n",
- (u64)(stat[NR_SLAB_RECLAIMABLE] +
- stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
+ (u64)(acc.stat[NR_SLAB_RECLAIMABLE] +
+ acc.stat[NR_SLAB_UNRECLAIMABLE]) * PAGE_SIZE);
seq_printf(m, "sock %llu\n",
- (u64)stat[MEMCG_SOCK] * PAGE_SIZE);
+ (u64)acc.stat[MEMCG_SOCK] * PAGE_SIZE);
seq_printf(m, "shmem %llu\n",
- (u64)stat[NR_SHMEM] * PAGE_SIZE);
+ (u64)acc.stat[NR_SHMEM] * PAGE_SIZE);
seq_printf(m, "file_mapped %llu\n",
- (u64)stat[NR_FILE_MAPPED] * PAGE_SIZE);
+ (u64)acc.stat[NR_FILE_MAPPED] * PAGE_SIZE);
seq_printf(m, "file_dirty %llu\n",
- (u64)stat[NR_FILE_DIRTY] * PAGE_SIZE);
+ (u64)acc.stat[NR_FILE_DIRTY] * PAGE_SIZE);
seq_printf(m, "file_writeback %llu\n",
- (u64)stat[NR_WRITEBACK] * PAGE_SIZE);
+ (u64)acc.stat[NR_WRITEBACK] * PAGE_SIZE);
- for (i = 0; i < NR_LRU_LISTS; i++) {
- struct mem_cgroup *mi;
- unsigned long val = 0;
-
- for_each_mem_cgroup_tree(mi, memcg)
- val += mem_cgroup_nr_lru_pages(mi, BIT(i));
- seq_printf(m, "%s %llu\n",
- mem_cgroup_lru_names[i], (u64)val * PAGE_SIZE);
- }
+ for (i = 0; i < NR_LRU_LISTS; i++)
+ seq_printf(m, "%s %llu\n", mem_cgroup_lru_names[i],
+ (u64)acc.lru_pages[i] * PAGE_SIZE);
seq_printf(m, "slab_reclaimable %llu\n",
- (u64)stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
+ (u64)acc.stat[NR_SLAB_RECLAIMABLE] * PAGE_SIZE);
seq_printf(m, "slab_unreclaimable %llu\n",
- (u64)stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
+ (u64)acc.stat[NR_SLAB_UNRECLAIMABLE] * PAGE_SIZE);
/* Accumulated memory events */
- seq_printf(m, "pgfault %lu\n", events[PGFAULT]);
- seq_printf(m, "pgmajfault %lu\n", events[PGMAJFAULT]);
+ seq_printf(m, "pgfault %lu\n", acc.events[PGFAULT]);
+ seq_printf(m, "pgmajfault %lu\n", acc.events[PGMAJFAULT]);
- seq_printf(m, "pgrefill %lu\n", events[PGREFILL]);
- seq_printf(m, "pgscan %lu\n", events[PGSCAN_KSWAPD] +
- events[PGSCAN_DIRECT]);
- seq_printf(m, "pgsteal %lu\n", events[PGSTEAL_KSWAPD] +
- events[PGSTEAL_DIRECT]);
- seq_printf(m, "pgactivate %lu\n", events[PGACTIVATE]);
- seq_printf(m, "pgdeactivate %lu\n", events[PGDEACTIVATE]);
- seq_printf(m, "pglazyfree %lu\n", events[PGLAZYFREE]);
- seq_printf(m, "pglazyfreed %lu\n", events[PGLAZYFREED]);
+ seq_printf(m, "pgrefill %lu\n", acc.events[PGREFILL]);
+ seq_printf(m, "pgscan %lu\n", acc.events[PGSCAN_KSWAPD] +
+ acc.events[PGSCAN_DIRECT]);
+ seq_printf(m, "pgsteal %lu\n", acc.events[PGSTEAL_KSWAPD] +
+ acc.events[PGSTEAL_DIRECT]);
+ seq_printf(m, "pgactivate %lu\n", acc.events[PGACTIVATE]);
+ seq_printf(m, "pgdeactivate %lu\n", acc.events[PGDEACTIVATE]);
+ seq_printf(m, "pglazyfree %lu\n", acc.events[PGLAZYFREE]);
+ seq_printf(m, "pglazyfreed %lu\n", acc.events[PGLAZYFREED]);
seq_printf(m, "workingset_refault %lu\n",
- stat[WORKINGSET_REFAULT]);
+ acc.stat[WORKINGSET_REFAULT]);
seq_printf(m, "workingset_activate %lu\n",
- stat[WORKINGSET_ACTIVATE]);
+ acc.stat[WORKINGSET_ACTIVATE]);
seq_printf(m, "workingset_nodereclaim %lu\n",
- stat[WORKINGSET_NODERECLAIM]);
+ acc.stat[WORKINGSET_NODERECLAIM]);
return 0;
}
+static int memory_oom_group_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+ seq_printf(m, "%d\n", memcg->oom_group);
+
+ return 0;
+}
+
+static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ int ret, oom_group;
+
+ buf = strstrip(buf);
+ if (!buf)
+ return -EINVAL;
+
+ ret = kstrtoint(buf, 0, &oom_group);
+ if (ret)
+ return ret;
+
+ if (oom_group != 0 && oom_group != 1)
+ return -EINVAL;
+
+ memcg->oom_group = oom_group;
+
+ return nbytes;
+}
+
static struct cftype memory_files[] = {
{
.name = "current",
@@ -5606,6 +5689,12 @@ static struct cftype memory_files[] = {
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = memory_stat_show,
},
+ {
+ .name = "oom.group",
+ .flags = CFTYPE_NOT_ON_ROOT | CFTYPE_NS_DELEGATABLE,
+ .seq_show = memory_oom_group_show,
+ .write = memory_oom_group_write,
+ },
{ } /* terminate */
};
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9d142b9..c83a174 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1167,7 +1167,7 @@ int memory_failure(unsigned long pfn, int flags)
* R/W the page; let's pray that the page has been
* used and will be freed some time later.
* In fact it's dangerous to directly bump up page count from 0,
- * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+ * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
*/
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) {
if (is_free_buddy_page(p)) {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4eb6e82..9eea6e8 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -982,8 +982,6 @@ static void reset_node_present_pages(pg_data_t *pgdat)
static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
{
struct pglist_data *pgdat;
- unsigned long zones_size[MAX_NR_ZONES] = {0};
- unsigned long zholes_size[MAX_NR_ZONES] = {0};
unsigned long start_pfn = PFN_DOWN(start);
pgdat = NODE_DATA(nid);
@@ -1006,8 +1004,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
/* we can use NODE_DATA(nid) from here */
+ pgdat->node_id = nid;
+ pgdat->node_start_pfn = start_pfn;
+
/* init node's zones as empty zones, we don't have any present pages.*/
- free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+ free_area_init_core_hotplug(nid);
pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
/*
@@ -1017,18 +1018,11 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
build_all_zonelists(pgdat);
/*
- * zone->managed_pages is set to an approximate value in
- * free_area_init_core(), which will cause
- * /sys/device/system/node/nodeX/meminfo has wrong data.
- * So reset it to 0 before any memory is onlined.
- */
- reset_node_managed_pages(pgdat);
-
- /*
* When memory is hot-added, all the memory is in offline state. So
* clear all zones' present_pages because they will be updated in
* online_pages() and offline_pages().
*/
+ reset_node_managed_pages(pgdat);
reset_node_present_pages(pgdat);
return pgdat;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 01f1a14..da858f7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1784,7 +1784,7 @@ unsigned int mempolicy_slab_node(void)
zonelist = &NODE_DATA(node)->node_zonelists[ZONELIST_FALLBACK];
z = first_zones_zonelist(zonelist, highest_zoneidx,
&policy->v.nodes);
- return z->zone ? z->zone->node : node;
+ return z->zone ? zone_to_nid(z->zone) : node;
}
default:
@@ -2326,7 +2326,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
node_zonelist(numa_node_id(), GFP_HIGHUSER),
gfp_zone(GFP_HIGHUSER),
&pol->v.nodes);
- polnid = z->zone->node;
+ polnid = zone_to_nid(z->zone);
break;
default:
@@ -2504,7 +2504,6 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
goto put_new;
/* Create pseudo-vma that contains just the policy */
- memset(&pvma, 0, sizeof(struct vm_area_struct));
vma_init(&pvma, NULL);
pvma.vm_end = TASK_SIZE; /* policy covers entire file */
mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
diff --git a/mm/mempool.c b/mm/mempool.c
index 44f5fa9..0ef8cc8 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -213,6 +213,7 @@ EXPORT_SYMBOL(mempool_init_node);
/**
* mempool_init - initialize a memory pool
+ * @pool: pointer to the memory pool that should be initialized
* @min_nr: the minimum number of elements guaranteed to be
* allocated for this pool.
* @alloc_fn: user-defined element-allocation function.
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5b72266..6838a53 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -53,13 +53,8 @@ void __init mminit_verify_zonelist(void)
zone->name);
/* Iterate the zonelist */
- for_each_zone_zonelist(zone, z, zonelist, zoneid) {
-#ifdef CONFIG_NUMA
- pr_cont("%d:%s ", zone->node, zone->name);
-#else
- pr_cont("0:%s ", zone->name);
-#endif /* CONFIG_NUMA */
- }
+ for_each_zone_zonelist(zone, z, zonelist, zoneid)
+ pr_cont("%d:%s ", zone_to_nid(zone), zone->name);
pr_cont("\n");
}
}
diff --git a/mm/mmap.c b/mm/mmap.c
index 8d6449e..5f2b2b1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3063,9 +3063,7 @@ void exit_mmap(struct mm_struct *mm)
* which clears VM_LOCKED, otherwise the oom reaper cannot
* reliably test it.
*/
- mutex_lock(&oom_lock);
- __oom_reap_task_mm(mm);
- mutex_unlock(&oom_lock);
+ (void)__oom_reap_task_mm(mm);
set_bit(MMF_OOM_SKIP, &mm->flags);
down_write(&mm->mmap_sem);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index eff6b88..82bb1a9 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -174,18 +174,29 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
srcu_read_unlock(&srcu, id);
}
-void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
- unsigned long start, unsigned long end)
+int __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ bool blockable)
{
struct mmu_notifier *mn;
+ int ret = 0;
int id;
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
- if (mn->ops->invalidate_range_start)
- mn->ops->invalidate_range_start(mn, mm, start, end);
+ if (mn->ops->invalidate_range_start) {
+ int _ret = mn->ops->invalidate_range_start(mn, mm, start, end, blockable);
+ if (_ret) {
+ pr_info("%pS callback failed with %d in %sblockable context.\n",
+ mn->ops->invalidate_range_start, _ret,
+ !blockable ? "non-" : "");
+ ret = _ret;
+ }
+ }
}
srcu_read_unlock(&srcu, id);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7c74dcc..b5b25e4 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -400,7 +400,8 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
struct task_struct *p;
struct task_struct *task;
- pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
+ pr_info("Tasks state (memory values in pages):\n");
+ pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
rcu_read_lock();
for_each_process(p) {
if (oom_unkillable_task(p, memcg, nodemask))
@@ -416,7 +417,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
continue;
}
- pr_info("[%5d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
+ pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
task->pid, from_kuid(&init_user_ns, task_uid(task)),
task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
mm_pgtables_bytes(task->mm),
@@ -487,9 +488,10 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
-void __oom_reap_task_mm(struct mm_struct *mm)
+bool __oom_reap_task_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ bool ret = true;
/*
* Tell all users of get_user/copy_from_user etc... that the content
@@ -519,50 +521,32 @@ void __oom_reap_task_mm(struct mm_struct *mm)
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, mm, start, end);
- mmu_notifier_invalidate_range_start(mm, start, end);
+ if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
+ ret = false;
+ continue;
+ }
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
+
+ return ret;
}
+/*
+ * Reaps the address space of the give task.
+ *
+ * Returns true on success and false if none or part of the address space
+ * has been reclaimed and the caller should retry later.
+ */
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true;
- /*
- * We have to make sure to not race with the victim exit path
- * and cause premature new oom victim selection:
- * oom_reap_task_mm exit_mm
- * mmget_not_zero
- * mmput
- * atomic_dec_and_test
- * exit_oom_victim
- * [...]
- * out_of_memory
- * select_bad_process
- * # no TIF_MEMDIE task selects new victim
- * unmap_page_range # frees some memory
- */
- mutex_lock(&oom_lock);
-
if (!down_read_trylock(&mm->mmap_sem)) {
- ret = false;
trace_skip_task_reaping(tsk->pid);
- goto unlock_oom;
- }
-
- /*
- * If the mm has invalidate_{start,end}() notifiers that could block,
- * sleep to give the oom victim some more time.
- * TODO: we really want to get rid of this ugly hack and make sure that
- * notifiers cannot block for unbounded amount of time
- */
- if (mm_has_blockable_invalidate_notifiers(mm)) {
- up_read(&mm->mmap_sem);
- schedule_timeout_idle(HZ);
- goto unlock_oom;
+ return false;
}
/*
@@ -572,25 +556,27 @@ static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
* down_write();up_write() cycle in exit_mmap().
*/
if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
- up_read(&mm->mmap_sem);
trace_skip_task_reaping(tsk->pid);
- goto unlock_oom;
+ goto out_unlock;
}
trace_start_task_reaping(tsk->pid);
- __oom_reap_task_mm(mm);
+ /* failed to reap part of the address space. Try again later */
+ ret = __oom_reap_task_mm(mm);
+ if (!ret)
+ goto out_finish;
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
K(get_mm_counter(mm, MM_FILEPAGES)),
K(get_mm_counter(mm, MM_SHMEMPAGES)));
+out_finish:
+ trace_finish_task_reaping(tsk->pid);
+out_unlock:
up_read(&mm->mmap_sem);
- trace_finish_task_reaping(tsk->pid);
-unlock_oom:
- mutex_unlock(&oom_lock);
return ret;
}
@@ -843,68 +829,12 @@ static bool task_will_free_mem(struct task_struct *task)
return ret;
}
-static void oom_kill_process(struct oom_control *oc, const char *message)
+static void __oom_kill_process(struct task_struct *victim)
{
- struct task_struct *p = oc->chosen;
- unsigned int points = oc->chosen_points;
- struct task_struct *victim = p;
- struct task_struct *child;
- struct task_struct *t;
+ struct task_struct *p;
struct mm_struct *mm;
- unsigned int victim_points = 0;
- static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
bool can_oom_reap = true;
- /*
- * If the task is already exiting, don't alarm the sysadmin or kill
- * its children or threads, just give it access to memory reserves
- * so it can die quickly
- */
- task_lock(p);
- if (task_will_free_mem(p)) {
- mark_oom_victim(p);
- wake_oom_reaper(p);
- task_unlock(p);
- put_task_struct(p);
- return;
- }
- task_unlock(p);
-
- if (__ratelimit(&oom_rs))
- dump_header(oc, p);
-
- pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
- message, task_pid_nr(p), p->comm, points);
-
- /*
- * If any of p's children has a different mm and is eligible for kill,
- * the one with the highest oom_badness() score is sacrificed for its
- * parent. This attempts to lose the minimal amount of work done while
- * still freeing memory.
- */
- read_lock(&tasklist_lock);
- for_each_thread(p, t) {
- list_for_each_entry(child, &t->children, sibling) {
- unsigned int child_points;
-
- if (process_shares_mm(child, p->mm))
- continue;
- /*
- * oom_badness() returns 0 if the thread is unkillable
- */
- child_points = oom_badness(child,
- oc->memcg, oc->nodemask, oc->totalpages);
- if (child_points > victim_points) {
- put_task_struct(victim);
- victim = child;
- victim_points = child_points;
- get_task_struct(victim);
- }
- }
- }
- read_unlock(&tasklist_lock);
-
p = find_lock_task_mm(victim);
if (!p) {
put_task_struct(victim);
@@ -979,6 +909,99 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
#undef K
/*
+ * Kill provided task unless it's secured by setting
+ * oom_score_adj to OOM_SCORE_ADJ_MIN.
+ */
+static int oom_kill_memcg_member(struct task_struct *task, void *unused)
+{
+ if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+ get_task_struct(task);
+ __oom_kill_process(task);
+ }
+ return 0;
+}
+
+static void oom_kill_process(struct oom_control *oc, const char *message)
+{
+ struct task_struct *p = oc->chosen;
+ unsigned int points = oc->chosen_points;
+ struct task_struct *victim = p;
+ struct task_struct *child;
+ struct task_struct *t;
+ struct mem_cgroup *oom_group;
+ unsigned int victim_points = 0;
+ static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ /*
+ * If the task is already exiting, don't alarm the sysadmin or kill
+ * its children or threads, just give it access to memory reserves
+ * so it can die quickly
+ */
+ task_lock(p);
+ if (task_will_free_mem(p)) {
+ mark_oom_victim(p);
+ wake_oom_reaper(p);
+ task_unlock(p);
+ put_task_struct(p);
+ return;
+ }
+ task_unlock(p);
+
+ if (__ratelimit(&oom_rs))
+ dump_header(oc, p);
+
+ pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
+ message, task_pid_nr(p), p->comm, points);
+
+ /*
+ * If any of p's children has a different mm and is eligible for kill,
+ * the one with the highest oom_badness() score is sacrificed for its
+ * parent. This attempts to lose the minimal amount of work done while
+ * still freeing memory.
+ */
+ read_lock(&tasklist_lock);
+ for_each_thread(p, t) {
+ list_for_each_entry(child, &t->children, sibling) {
+ unsigned int child_points;
+
+ if (process_shares_mm(child, p->mm))
+ continue;
+ /*
+ * oom_badness() returns 0 if the thread is unkillable
+ */
+ child_points = oom_badness(child,
+ oc->memcg, oc->nodemask, oc->totalpages);
+ if (child_points > victim_points) {
+ put_task_struct(victim);
+ victim = child;
+ victim_points = child_points;
+ get_task_struct(victim);
+ }
+ }
+ }
+ read_unlock(&tasklist_lock);
+
+ /*
+ * Do we need to kill the entire memory cgroup?
+ * Or even one of the ancestor memory cgroups?
+ * Check this out before killing the victim task.
+ */
+ oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
+
+ __oom_kill_process(victim);
+
+ /*
+ * If necessary, kill all tasks in the selected memory cgroup.
+ */
+ if (oom_group) {
+ mem_cgroup_print_oom_group(oom_group);
+ mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
+ mem_cgroup_put(oom_group);
+ }
+}
+
+/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
static void check_panic_on_oom(struct oom_control *oc,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 15ea511..c677c15 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2909,10 +2909,10 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
if (!static_branch_likely(&vm_numa_stat_key))
return;
- if (z->node != numa_node_id())
+ if (zone_to_nid(z) != numa_node_id())
local_stat = NUMA_OTHER;
- if (z->node == preferred_zone->node)
+ if (zone_to_nid(z) == zone_to_nid(preferred_zone))
__inc_numa_state(z, NUMA_HIT);
else {
__inc_numa_state(z, NUMA_MISS);
@@ -5278,7 +5278,7 @@ int local_memory_node(int node)
z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
gfp_zone(GFP_KERNEL),
NULL);
- return z->zone->node;
+ return zone_to_nid(z->zone);
}
#endif
@@ -6120,7 +6120,7 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l
return usemapsize / 8;
}
-static void __init setup_usemap(struct pglist_data *pgdat,
+static void __ref setup_usemap(struct pglist_data *pgdat,
struct zone *zone,
unsigned long zone_start_pfn,
unsigned long zonesize)
@@ -6140,7 +6140,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
/* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __paginginit set_pageblock_order(void)
+void __init set_pageblock_order(void)
{
unsigned int order;
@@ -6168,14 +6168,14 @@ void __paginginit set_pageblock_order(void)
* include/linux/pageblock-flags.h for the values of pageblock_order based on
* the kernel config
*/
-void __paginginit set_pageblock_order(void)
+void __init set_pageblock_order(void)
{
}
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
-static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
- unsigned long present_pages)
+static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
+ unsigned long present_pages)
{
unsigned long pages = spanned_pages;
@@ -6194,6 +6194,84 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
}
+#ifdef CONFIG_NUMA_BALANCING
+static void pgdat_init_numabalancing(struct pglist_data *pgdat)
+{
+ spin_lock_init(&pgdat->numabalancing_migrate_lock);
+ pgdat->numabalancing_migrate_nr_pages = 0;
+ pgdat->numabalancing_migrate_next_window = jiffies;
+}
+#else
+static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pgdat_init_split_queue(struct pglist_data *pgdat)
+{
+ spin_lock_init(&pgdat->split_queue_lock);
+ INIT_LIST_HEAD(&pgdat->split_queue);
+ pgdat->split_queue_len = 0;
+}
+#else
+static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
+#endif
+
+#ifdef CONFIG_COMPACTION
+static void pgdat_init_kcompactd(struct pglist_data *pgdat)
+{
+ init_waitqueue_head(&pgdat->kcompactd_wait);
+}
+#else
+static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
+#endif
+
+static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
+{
+ pgdat_resize_init(pgdat);
+
+ pgdat_init_numabalancing(pgdat);
+ pgdat_init_split_queue(pgdat);
+ pgdat_init_kcompactd(pgdat);
+
+ init_waitqueue_head(&pgdat->kswapd_wait);
+ init_waitqueue_head(&pgdat->pfmemalloc_wait);
+
+ pgdat_page_ext_init(pgdat);
+ spin_lock_init(&pgdat->lru_lock);
+ lruvec_init(node_lruvec(pgdat));
+}
+
+static void __meminit zone_init_internals(struct zone *zone, enum zone_type idx, int nid,
+ unsigned long remaining_pages)
+{
+ zone->managed_pages = remaining_pages;
+ zone_set_nid(zone, nid);
+ zone->name = zone_names[idx];
+ zone->zone_pgdat = NODE_DATA(nid);
+ spin_lock_init(&zone->lock);
+ zone_seqlock_init(zone);
+ zone_pcp_init(zone);
+}
+
+/*
+ * Set up the zone data structures
+ * - init pgdat internals
+ * - init all zones belonging to this node
+ *
+ * NOTE: this function is only called during memory hotplug
+ */
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __ref free_area_init_core_hotplug(int nid)
+{
+ enum zone_type z;
+ pg_data_t *pgdat = NODE_DATA(nid);
+
+ pgdat_init_internals(pgdat);
+ for (z = 0; z < MAX_NR_ZONES; z++)
+ zone_init_internals(&pgdat->node_zones[z], z, nid, 0);
+}
+#endif
+
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -6201,32 +6279,14 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
* - clear the memory bitmaps
*
* NOTE: pgdat should get zeroed by caller.
+ * NOTE: this function is only called during early init.
*/
-static void __paginginit free_area_init_core(struct pglist_data *pgdat)
+static void __init free_area_init_core(struct pglist_data *pgdat)
{
enum zone_type j;
int nid = pgdat->node_id;
- pgdat_resize_init(pgdat);
-#ifdef CONFIG_NUMA_BALANCING
- spin_lock_init(&pgdat->numabalancing_migrate_lock);
- pgdat->numabalancing_migrate_nr_pages = 0;
- pgdat->numabalancing_migrate_next_window = jiffies;
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- spin_lock_init(&pgdat->split_queue_lock);
- INIT_LIST_HEAD(&pgdat->split_queue);
- pgdat->split_queue_len = 0;
-#endif
- init_waitqueue_head(&pgdat->kswapd_wait);
- init_waitqueue_head(&pgdat->pfmemalloc_wait);
-#ifdef CONFIG_COMPACTION
- init_waitqueue_head(&pgdat->kcompactd_wait);
-#endif
- pgdat_page_ext_init(pgdat);
- spin_lock_init(&pgdat->lru_lock);
- lruvec_init(node_lruvec(pgdat));
-
+ pgdat_init_internals(pgdat);
pgdat->per_cpu_nodestats = &boot_nodestats;
for (j = 0; j < MAX_NR_ZONES; j++) {
@@ -6274,15 +6334,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
* when the bootmem allocator frees pages into the buddy system.
* And all highmem pages will be managed by the buddy system.
*/
- zone->managed_pages = freesize;
-#ifdef CONFIG_NUMA
- zone->node = nid;
-#endif
- zone->name = zone_names[j];
- zone->zone_pgdat = pgdat;
- spin_lock_init(&zone->lock);
- zone_seqlock_init(zone);
- zone_pcp_init(zone);
+ zone_init_internals(zone, j, nid, freesize);
if (!size)
continue;
@@ -6342,8 +6394,24 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
#endif /* CONFIG_FLAT_NODE_MEM_MAP */
-void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
- unsigned long node_start_pfn, unsigned long *zholes_size)
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+static inline void pgdat_set_deferred_range(pg_data_t *pgdat)
+{
+ /*
+ * We start only with one section of pages, more pages are added as
+ * needed until the rest of deferred pages are initialized.
+ */
+ pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
+ pgdat->node_spanned_pages);
+ pgdat->first_deferred_pfn = ULONG_MAX;
+}
+#else
+static inline void pgdat_set_deferred_range(pg_data_t *pgdat) {}
+#endif
+
+void __init free_area_init_node(int nid, unsigned long *zones_size,
+ unsigned long node_start_pfn,
+ unsigned long *zholes_size)
{
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long start_pfn = 0;
@@ -6367,16 +6435,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
zones_size, zholes_size);
alloc_node_mem_map(pgdat);
+ pgdat_set_deferred_range(pgdat);
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
- /*
- * We start only with one section of pages, more pages are added as
- * needed until the rest of deferred pages are initialized.
- */
- pgdat->static_init_pgcnt = min_t(unsigned long, PAGES_PER_SECTION,
- pgdat->node_spanned_pages);
- pgdat->first_deferred_pfn = ULONG_MAX;
-#endif
free_area_init_core(pgdat);
}
@@ -6388,7 +6448,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
* may be accessed (for example page_to_pfn() on some configuration accesses
* flags). We must explicitly zero those struct pages.
*/
-void __paginginit zero_resv_unavail(void)
+void __init zero_resv_unavail(void)
{
phys_addr_t start, end;
unsigned long pfn;
diff --git a/mm/percpu.c b/mm/percpu.c
index 0b64809..a749d4d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -170,6 +170,14 @@ static LIST_HEAD(pcpu_map_extend_chunks);
int pcpu_nr_empty_pop_pages;
/*
+ * The number of populated pages in use by the allocator, protected by
+ * pcpu_lock. This number is kept per a unit per chunk (i.e. when a page gets
+ * allocated/deallocated, it is allocated/deallocated in all units of a chunk
+ * and increments/decrements this count by 1).
+ */
+static unsigned long pcpu_nr_populated;
+
+/*
* Balance work is used to populate or destroy chunks asynchronously. We
* try to keep the number of populated free pages between
* PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one
@@ -1232,6 +1240,7 @@ static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
bitmap_set(chunk->populated, page_start, nr);
chunk->nr_populated += nr;
+ pcpu_nr_populated += nr;
if (!for_alloc) {
chunk->nr_empty_pop_pages += nr;
@@ -1260,6 +1269,7 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
chunk->nr_populated -= nr;
chunk->nr_empty_pop_pages -= nr;
pcpu_nr_empty_pop_pages -= nr;
+ pcpu_nr_populated -= nr;
}
/*
@@ -2176,6 +2186,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
pcpu_chunk_relocate(pcpu_first_chunk, -1);
+ /* include all regions of the first chunk */
+ pcpu_nr_populated += PFN_DOWN(size_sum);
+
pcpu_stats_chunk_alloc();
trace_percpu_create_chunk(base_addr);
@@ -2746,6 +2759,22 @@ void __init setup_per_cpu_areas(void)
#endif /* CONFIG_SMP */
/*
+ * pcpu_nr_pages - calculate total number of populated backing pages
+ *
+ * This reflects the number of pages populated to back chunks. Metadata is
+ * excluded in the number exposed in meminfo as the number of backing pages
+ * scales with the number of cpus and can quickly outweigh the memory used for
+ * metadata. It also keeps this calculation nice and simple.
+ *
+ * RETURNS:
+ * Total number of populated backing pages in use by the allocator.
+ */
+unsigned long pcpu_nr_pages(void)
+{
+ return pcpu_nr_populated * pcpu_nr_units;
+}
+
+/*
* Percpu allocator is initialized early during boot when neither slab or
* workqueue is available. Plug async management until everything is up
* and running.
diff --git a/mm/shmem.c b/mm/shmem.c
index c48c790..fb04baa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1421,7 +1421,6 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
struct shmem_inode_info *info, pgoff_t index)
{
/* Create a pseudo vma that just contains the policy */
- memset(vma, 0, sizeof(*vma));
vma_init(vma, NULL);
/* Bias interleave by inode number to distribute better across nodes */
vma->vm_pgoff = index + info->vfs_inode.i_ino;
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 008ccb2..63a7b45 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -269,8 +269,8 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
cache->cur = 0;
if (swap_slot_cache_active)
- cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE, false,
- cache->slots);
+ cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
+ cache->slots, 1);
return cache->nr;
}
@@ -316,7 +316,7 @@ swp_entry_t get_swap_page(struct page *page)
if (PageTransHuge(page)) {
if (IS_ENABLED(CONFIG_THP_SWAP))
- get_swap_pages(1, true, &entry);
+ get_swap_pages(1, &entry, HPAGE_PMD_NR);
goto out;
}
@@ -350,7 +350,7 @@ swp_entry_t get_swap_page(struct page *page)
goto out;
}
- get_swap_pages(1, false, &entry);
+ get_swap_pages(1, &entry, 1);
out:
if (mem_cgroup_try_charge_swap(page, entry)) {
put_swap_page(page, entry);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8837b22..d954b71 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -204,8 +204,16 @@ static void discard_swap_cluster(struct swap_info_struct *si,
#ifdef CONFIG_THP_SWAP
#define SWAPFILE_CLUSTER HPAGE_PMD_NR
+
+#define swap_entry_size(size) (size)
#else
#define SWAPFILE_CLUSTER 256
+
+/*
+ * Define swap_entry_size() as constant to let compiler to optimize
+ * out some code if !CONFIG_THP_SWAP
+ */
+#define swap_entry_size(size) 1
#endif
#define LATENCY_LIMIT 256
@@ -269,7 +277,9 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
static inline bool cluster_is_huge(struct swap_cluster_info *info)
{
- return info->flags & CLUSTER_FLAG_HUGE;
+ if (IS_ENABLED(CONFIG_THP_SWAP))
+ return info->flags & CLUSTER_FLAG_HUGE;
+ return false;
}
static inline void cluster_clear_huge(struct swap_cluster_info *info)
@@ -296,13 +306,18 @@ static inline void unlock_cluster(struct swap_cluster_info *ci)
spin_unlock(&ci->lock);
}
+/*
+ * Determine the locking method in use for this device. Return
+ * swap_cluster_info if SSD-style cluster-based locking is in place.
+ */
static inline struct swap_cluster_info *lock_cluster_or_swap_info(
- struct swap_info_struct *si,
- unsigned long offset)
+ struct swap_info_struct *si, unsigned long offset)
{
struct swap_cluster_info *ci;
+ /* Try to use fine-grained SSD-style locking if available: */
ci = lock_cluster(si, offset);
+ /* Otherwise, fall back to traditional, coarse locking: */
if (!ci)
spin_lock(&si->lock);
@@ -863,7 +878,6 @@ static int scan_swap_map_slots(struct swap_info_struct *si,
return n_ret;
}
-#ifdef CONFIG_THP_SWAP
static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
{
unsigned long idx;
@@ -871,6 +885,15 @@ static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
unsigned long offset, i;
unsigned char *map;
+ /*
+ * Should not even be attempting cluster allocations when huge
+ * page swap is disabled. Warn and fail the allocation.
+ */
+ if (!IS_ENABLED(CONFIG_THP_SWAP)) {
+ VM_WARN_ON_ONCE(1);
+ return 0;
+ }
+
if (cluster_list_empty(&si->free_clusters))
return 0;
@@ -901,13 +924,6 @@ static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
unlock_cluster(ci);
swap_range_free(si, offset, SWAPFILE_CLUSTER);
}
-#else
-static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot)
-{
- VM_WARN_ON_ONCE(1);
- return 0;
-}
-#endif /* CONFIG_THP_SWAP */
static unsigned long scan_swap_map(struct swap_info_struct *si,
unsigned char usage)
@@ -924,18 +940,18 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
}
-int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
+int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
{
- unsigned long nr_pages = cluster ? SWAPFILE_CLUSTER : 1;
+ unsigned long size = swap_entry_size(entry_size);
struct swap_info_struct *si, *next;
long avail_pgs;
int n_ret = 0;
int node;
/* Only single cluster request supported */
- WARN_ON_ONCE(n_goal > 1 && cluster);
+ WARN_ON_ONCE(n_goal > 1 && size == SWAPFILE_CLUSTER);
- avail_pgs = atomic_long_read(&nr_swap_pages) / nr_pages;
+ avail_pgs = atomic_long_read(&nr_swap_pages) / size;
if (avail_pgs <= 0)
goto noswap;
@@ -945,7 +961,7 @@ int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
if (n_goal > avail_pgs)
n_goal = avail_pgs;
- atomic_long_sub(n_goal * nr_pages, &nr_swap_pages);
+ atomic_long_sub(n_goal * size, &nr_swap_pages);
spin_lock(&swap_avail_lock);
@@ -972,14 +988,14 @@ int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
spin_unlock(&si->lock);
goto nextsi;
}
- if (cluster) {
+ if (size == SWAPFILE_CLUSTER) {
if (!(si->flags & SWP_FILE))
n_ret = swap_alloc_cluster(si, swp_entries);
} else
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
n_goal, swp_entries);
spin_unlock(&si->lock);
- if (n_ret || cluster)
+ if (n_ret || size == SWAPFILE_CLUSTER)
goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n",
si->type);
@@ -1005,7 +1021,7 @@ int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[])
check_out:
if (n_ret < n_goal)
- atomic_long_add((long)(n_goal - n_ret) * nr_pages,
+ atomic_long_add((long)(n_goal - n_ret) * size,
&nr_swap_pages);
noswap:
return n_ret;
@@ -1107,16 +1123,13 @@ static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
return p;
}
-static unsigned char __swap_entry_free(struct swap_info_struct *p,
- swp_entry_t entry, unsigned char usage)
+static unsigned char __swap_entry_free_locked(struct swap_info_struct *p,
+ unsigned long offset,
+ unsigned char usage)
{
- struct swap_cluster_info *ci;
- unsigned long offset = swp_offset(entry);
unsigned char count;
unsigned char has_cache;
- ci = lock_cluster_or_swap_info(p, offset);
-
count = p->swap_map[offset];
has_cache = count & SWAP_HAS_CACHE;
@@ -1144,6 +1157,17 @@ static unsigned char __swap_entry_free(struct swap_info_struct *p,
usage = count | has_cache;
p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
+ return usage;
+}
+
+static unsigned char __swap_entry_free(struct swap_info_struct *p,
+ swp_entry_t entry, unsigned char usage)
+{
+ struct swap_cluster_info *ci;
+ unsigned long offset = swp_offset(entry);
+
+ ci = lock_cluster_or_swap_info(p, offset);
+ usage = __swap_entry_free_locked(p, offset, usage);
unlock_cluster_or_swap_info(p, ci);
return usage;
@@ -1184,19 +1208,7 @@ void swap_free(swp_entry_t entry)
/*
* Called after dropping swapcache to decrease refcnt to swap entries.
*/
-static void swapcache_free(swp_entry_t entry)
-{
- struct swap_info_struct *p;
-
- p = _swap_info_get(entry);
- if (p) {
- if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
- free_swap_slot(entry);
- }
-}
-
-#ifdef CONFIG_THP_SWAP
-static void swapcache_free_cluster(swp_entry_t entry)
+void put_swap_page(struct page *page, swp_entry_t entry)
{
unsigned long offset = swp_offset(entry);
unsigned long idx = offset / SWAPFILE_CLUSTER;
@@ -1205,42 +1217,48 @@ static void swapcache_free_cluster(swp_entry_t entry)
unsigned char *map;
unsigned int i, free_entries = 0;
unsigned char val;
+ int size = swap_entry_size(hpage_nr_pages(page));
si = _swap_info_get(entry);
if (!si)
return;
- ci = lock_cluster(si, offset);
- VM_BUG_ON(!cluster_is_huge(ci));
- map = si->swap_map + offset;
- for (i = 0; i < SWAPFILE_CLUSTER; i++) {
- val = map[i];
- VM_BUG_ON(!(val & SWAP_HAS_CACHE));
- if (val == SWAP_HAS_CACHE)
- free_entries++;
- }
- if (!free_entries) {
- for (i = 0; i < SWAPFILE_CLUSTER; i++)
- map[i] &= ~SWAP_HAS_CACHE;
- }
- cluster_clear_huge(ci);
- unlock_cluster(ci);
- if (free_entries == SWAPFILE_CLUSTER) {
- spin_lock(&si->lock);
- ci = lock_cluster(si, offset);
- memset(map, 0, SWAPFILE_CLUSTER);
- unlock_cluster(ci);
- mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
- swap_free_cluster(si, idx);
- spin_unlock(&si->lock);
- } else if (free_entries) {
- for (i = 0; i < SWAPFILE_CLUSTER; i++, entry.val++) {
- if (!__swap_entry_free(si, entry, SWAP_HAS_CACHE))
- free_swap_slot(entry);
+ ci = lock_cluster_or_swap_info(si, offset);
+ if (size == SWAPFILE_CLUSTER) {
+ VM_BUG_ON(!cluster_is_huge(ci));
+ map = si->swap_map + offset;
+ for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+ val = map[i];
+ VM_BUG_ON(!(val & SWAP_HAS_CACHE));
+ if (val == SWAP_HAS_CACHE)
+ free_entries++;
+ }
+ cluster_clear_huge(ci);
+ if (free_entries == SWAPFILE_CLUSTER) {
+ unlock_cluster_or_swap_info(si, ci);
+ spin_lock(&si->lock);
+ ci = lock_cluster(si, offset);
+ memset(map, 0, SWAPFILE_CLUSTER);
+ unlock_cluster(ci);
+ mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
+ swap_free_cluster(si, idx);
+ spin_unlock(&si->lock);
+ return;
}
}
+ for (i = 0; i < size; i++, entry.val++) {
+ if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
+ unlock_cluster_or_swap_info(si, ci);
+ free_swap_slot(entry);
+ if (i == size - 1)
+ return;
+ lock_cluster_or_swap_info(si, offset);
+ }
+ }
+ unlock_cluster_or_swap_info(si, ci);
}
+#ifdef CONFIG_THP_SWAP
int split_swap_cluster(swp_entry_t entry)
{
struct swap_info_struct *si;
@@ -1255,19 +1273,7 @@ int split_swap_cluster(swp_entry_t entry)
unlock_cluster(ci);
return 0;
}
-#else
-static inline void swapcache_free_cluster(swp_entry_t entry)
-{
-}
-#endif /* CONFIG_THP_SWAP */
-
-void put_swap_page(struct page *page, swp_entry_t entry)
-{
- if (!PageTransHuge(page))
- swapcache_free(entry);
- else
- swapcache_free_cluster(entry);
-}
+#endif
static int swp_entry_cmp(const void *ent1, const void *ent2)
{
@@ -1409,7 +1415,6 @@ int swp_swapcount(swp_entry_t entry)
return count;
}
-#ifdef CONFIG_THP_SWAP
static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
swp_entry_t entry)
{
@@ -1422,12 +1427,12 @@ static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
ci = lock_cluster_or_swap_info(si, offset);
if (!ci || !cluster_is_huge(ci)) {
- if (map[roffset] != SWAP_HAS_CACHE)
+ if (swap_count(map[roffset]))
ret = true;
goto unlock_out;
}
for (i = 0; i < SWAPFILE_CLUSTER; i++) {
- if (map[offset + i] != SWAP_HAS_CACHE) {
+ if (swap_count(map[offset + i])) {
ret = true;
break;
}
@@ -1442,7 +1447,7 @@ static bool page_swapped(struct page *page)
swp_entry_t entry;
struct swap_info_struct *si;
- if (likely(!PageTransCompound(page)))
+ if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page)))
return page_swapcount(page) != 0;
page = compound_head(page);
@@ -1466,10 +1471,8 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
/* hugetlbfs shouldn't call it */
VM_BUG_ON_PAGE(PageHuge(page), page);
- if (likely(!PageTransCompound(page))) {
- mapcount = atomic_read(&page->_mapcount) + 1;
- if (total_mapcount)
- *total_mapcount = mapcount;
+ if (!IS_ENABLED(CONFIG_THP_SWAP) || likely(!PageTransCompound(page))) {
+ mapcount = page_trans_huge_mapcount(page, total_mapcount);
if (PageSwapCache(page))
swapcount = page_swapcount(page);
if (total_swapcount)
@@ -1516,26 +1519,6 @@ static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
return map_swapcount;
}
-#else
-#define swap_page_trans_huge_swapped(si, entry) swap_swapcount(si, entry)
-#define page_swapped(page) (page_swapcount(page) != 0)
-
-static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
- int *total_swapcount)
-{
- int mapcount, swapcount = 0;
-
- /* hugetlbfs shouldn't call it */
- VM_BUG_ON_PAGE(PageHuge(page), page);
-
- mapcount = page_trans_huge_mapcount(page, total_mapcount);
- if (PageSwapCache(page))
- swapcount = page_swapcount(page);
- if (total_swapcount)
- *total_swapcount = swapcount;
- return mapcount + swapcount;
-}
-#endif
/*
* We can write to an anon page without COW if there are no other references
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4375b1e..7e7d255 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -408,7 +408,8 @@ void register_shrinker_prepared(struct shrinker *shrinker)
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
#ifdef CONFIG_MEMCG_KMEM
- idr_replace(&shrinker_idr, shrinker, shrinker->id);
+ if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+ idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
up_write(&shrinker_rwsem);
}
@@ -902,7 +903,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
refcount = 2;
if (!page_ref_freeze(page, refcount))
goto cannot_free;
- /* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
+ /* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
if (unlikely(PageDirty(page))) {
page_ref_unfreeze(page, refcount);
goto cannot_free;
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 447857f..5219280 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -13,6 +13,7 @@
use File::Basename;
use Cwd 'abs_path';
use Term::ANSIColor qw(:constants);
+use Encode qw(decode encode);
my $P = $0;
my $D = dirname(abs_path($P));
@@ -240,11 +241,11 @@
my $exit = 0;
+my $perl_version_ok = 1;
if ($^V && $^V lt $minimum_perl_version) {
+ $perl_version_ok = 0;
printf "$P: requires at least perl version %vd\n", $minimum_perl_version;
- if (!$ignore_perl_version) {
- exit(1);
- }
+ exit(1) if (!$ignore_perl_version);
}
#if no filenames are given, push '-' to read patch from stdin
@@ -346,9 +347,10 @@
__force|
__iomem|
__must_check|
- __init_refok|
__kprobes|
__ref|
+ __refconst|
+ __refdata|
__rcu|
__private
}x;
@@ -847,6 +849,17 @@
return $status =~ /obsolete/i;
}
+sub is_SPDX_License_valid {
+ my ($license) = @_;
+
+ return 1 if (!$tree || which("python") eq "" || !(-e "$root/scripts/spdxcheck.py") || !(-e "$root/.git"));
+
+ my $root_path = abs_path($root);
+ my $status = `cd "$root_path"; echo "$license" | python scripts/spdxcheck.py -`;
+ return 0 if ($status ne "");
+ return 1;
+}
+
my $camelcase_seeded = 0;
sub seed_camelcase_includes {
return if ($camelcase_seeded);
@@ -1026,11 +1039,11 @@
hash_show_words(\%use_type, "Used");
hash_show_words(\%ignore_type, "Ignored");
- if ($^V lt 5.10.0) {
+ if (!$perl_version_ok) {
print << "EOM"
NOTE: perl $^V is not modern enough to detect all possible issues.
- An upgrade to at least perl v5.10.0 is suggested.
+ An upgrade to at least perl $minimum_perl_version is suggested.
EOM
}
if ($exit) {
@@ -2235,10 +2248,14 @@
our $clean = 1;
my $signoff = 0;
+ my $author = '';
+ my $authorsignoff = 0;
my $is_patch = 0;
+ my $is_binding_patch = -1;
my $in_header_lines = $file ? 0 : 1;
my $in_commit_log = 0; #Scanning lines before patch
my $has_commit_log = 0; #Encountered lines before patch
+ my $commit_log_lines = 0; #Number of commit log lines
my $commit_log_possible_stack_dump = 0;
my $commit_log_long_line = 0;
my $commit_log_has_diff = 0;
@@ -2485,6 +2502,19 @@
$check = $check_orig;
}
$checklicenseline = 1;
+
+ if ($realfile !~ /^MAINTAINERS/) {
+ my $last_binding_patch = $is_binding_patch;
+
+ $is_binding_patch = () = $realfile =~ m@^(?:Documentation/devicetree/|include/dt-bindings/)@;
+
+ if (($last_binding_patch != -1) &&
+ ($last_binding_patch ^ $is_binding_patch)) {
+ WARN("DT_SPLIT_BINDING_PATCH",
+ "DT binding docs and includes should be a separate patch. See: Documentation/devicetree/bindings/submitting-patches.txt\n");
+ }
+ }
+
next;
}
@@ -2496,6 +2526,18 @@
$cnt_lines++ if ($realcnt != 0);
+# Verify the existence of a commit log if appropriate
+# 2 is used because a $signature is counted in $commit_log_lines
+ if ($in_commit_log) {
+ if ($line !~ /^\s*$/) {
+ $commit_log_lines++; #could be a $signature
+ }
+ } elsif ($has_commit_log && $commit_log_lines < 2) {
+ WARN("COMMIT_MESSAGE",
+ "Missing commit description - Add an appropriate one\n");
+ $commit_log_lines = 2; #warn only once
+ }
+
# Check if the commit log has what seems like a diff which can confuse patch
if ($in_commit_log && !$commit_log_has_diff &&
(($line =~ m@^\s+diff\b.*a/[\w/]+@ &&
@@ -2517,10 +2559,24 @@
}
}
+# Check the patch for a From:
+ if (decode("MIME-Header", $line) =~ /^From:\s*(.*)/) {
+ $author = $1;
+ $author = encode("utf8", $author) if ($line =~ /=\?utf-8\?/i);
+ $author =~ s/"//g;
+ }
+
# Check the patch for a signoff:
if ($line =~ /^\s*signed-off-by:/i) {
$signoff++;
$in_commit_log = 0;
+ if ($author ne '') {
+ my $l = $line;
+ $l =~ s/"//g;
+ if ($l =~ /^\s*signed-off-by:\s*\Q$author\E/i) {
+ $authorsignoff = 1;
+ }
+ }
}
# Check if MAINTAINERS is being updated. If so, there's probably no need to
@@ -2960,8 +3016,14 @@
if ($comment !~ /^$/ &&
$rawline !~ /^\+\Q$comment\E SPDX-License-Identifier: /) {
- WARN("SPDX_LICENSE_TAG",
- "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+ WARN("SPDX_LICENSE_TAG",
+ "Missing or malformed SPDX-License-Identifier tag in line $checklicenseline\n" . $herecurr);
+ } elsif ($rawline =~ /(SPDX-License-Identifier: .*)/) {
+ my $spdx_license = $1;
+ if (!is_SPDX_License_valid($spdx_license)) {
+ WARN("SPDX_LICENSE_TAG",
+ "'$spdx_license' is not supported in LICENSES/...\n" . $herecurr);
+ }
}
}
}
@@ -3079,7 +3141,7 @@
}
# check indentation starts on a tab stop
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$sline =~ /^\+\t+( +)(?:$c90_Keywords\b|\{\s*$|\}\s*(?:else\b|while\b|\s*$)|$Declare\s*$Ident\s*[;=])/) {
my $indent = length($1);
if ($indent % 8) {
@@ -3092,7 +3154,7 @@
}
# check multi-line statement indentation matches previous line
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|(?:\*\s*)*$Lval\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
$prevline =~ /^\+(\t*)(.*)$/;
my $oldindent = $1;
@@ -3781,6 +3843,26 @@
"type '$tmp' should be specified in [[un]signed] [short|int|long|long long] order\n" . $herecurr);
}
+# check for unnecessary <signed> int declarations of short/long/long long
+ while ($sline =~ m{\b($TypeMisordered(\s*\*)*|$C90_int_types)\b}g) {
+ my $type = trim($1);
+ next if ($type !~ /\bint\b/);
+ next if ($type !~ /\b(?:short|long\s+long|long)\b/);
+ my $new_type = $type;
+ $new_type =~ s/\b\s*int\s*\b/ /;
+ $new_type =~ s/\b\s*(?:un)?signed\b\s*/ /;
+ $new_type =~ s/^const\s+//;
+ $new_type = "unsigned $new_type" if ($type =~ /\bunsigned\b/);
+ $new_type = "const $new_type" if ($type =~ /^const\b/);
+ $new_type =~ s/\s+/ /g;
+ $new_type = trim($new_type);
+ if (WARN("UNNECESSARY_INT",
+ "Prefer '$new_type' over '$type' as the int is unnecessary\n" . $herecurr) &&
+ $fix) {
+ $fixed[$fixlinenr] =~ s/\b\Q$type\E\b/$new_type/;
+ }
+ }
+
# check for static const char * arrays.
if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
WARN("STATIC_CONST_CHAR_ARRAY",
@@ -3967,7 +4049,7 @@
# function brace can't be on same line, except for #defines of do while,
# or if closed on same line
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$sline =~ /$Type\s*$Ident\s*$balanced_parens\s*\{/ &&
$sline !~ /\#\s*define\b.*do\s*\{/ &&
$sline !~ /}/) {
@@ -4483,11 +4565,11 @@
#need space before brace following if, while, etc
if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) ||
- $line =~ /do\{/) {
+ $line =~ /\b(?:else|do)\{/) {
if (ERROR("SPACING",
"space required before the open brace '{'\n" . $herecurr) &&
$fix) {
- $fixed[$fixlinenr] =~ s/^(\+.*(?:do|\)))\{/$1 {/;
+ $fixed[$fixlinenr] =~ s/^(\+.*(?:do|else|\)))\{/$1 {/;
}
}
@@ -4578,7 +4660,7 @@
# check for unnecessary parentheses around comparisons in if uses
# when !drivers/staging or command-line uses --strict
if (($realfile !~ m@^(?:drivers/staging/)@ || $check_orig) &&
- $^V && $^V ge 5.10.0 && defined($stat) &&
+ $perl_version_ok && defined($stat) &&
$stat =~ /(^.\s*if\s*($balanced_parens))/) {
my $if_stat = $1;
my $test = substr($2, 1, -1);
@@ -4615,7 +4697,7 @@
# return is not a function
if (defined($stat) && $stat =~ /^.\s*return(\s*)\(/s) {
my $spacing = $1;
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$stat =~ /^.\s*return\s*($balanced_parens)\s*;\s*$/) {
my $value = $1;
$value = deparenthesize($value);
@@ -4642,7 +4724,7 @@
}
# if statements using unnecessary parentheses - ie: if ((foo == bar))
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /\bif\s*((?:\(\s*){2,})/) {
my $openparens = $1;
my $count = $openparens =~ tr@\(@\(@;
@@ -4659,7 +4741,7 @@
# avoid cases like "foo + BAR < baz"
# only fix matches surrounded by parentheses to avoid incorrect
# conversions like "FOO < baz() + 5" being "misfixed" to "baz() > FOO + 5"
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /^\+(.*)\b($Constant|[A-Z_][A-Z0-9_]*)\s*($Compare)\s*($LvalOrFunc)/) {
my $lead = $1;
my $const = $2;
@@ -4949,6 +5031,7 @@
if (defined $define_args && $define_args ne "") {
$define_args = substr($define_args, 1, length($define_args) - 2);
$define_args =~ s/\s*//g;
+ $define_args =~ s/\\\+?//g;
@def_args = split(",", $define_args);
}
@@ -5084,7 +5167,7 @@
# do {} while (0) macro tests:
# single-statement macros do not need to be enclosed in do while (0) loop,
# macro should not end with a semicolon
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$realfile !~ m@/vmlinux.lds.h$@ &&
$line =~ /^.\s*\#\s*define\s+$Ident(\()?/) {
my $ln = $linenr;
@@ -5330,15 +5413,28 @@
}
# concatenated string without spaces between elements
- if ($line =~ /$String[A-Z_]/ || $line =~ /[A-Za-z0-9_]$String/) {
- CHK("CONCATENATED_STRING",
- "Concatenated strings should use spaces between elements\n" . $herecurr);
+ if ($line =~ /$String[A-Za-z0-9_]/ || $line =~ /[A-Za-z0-9_]$String/) {
+ if (CHK("CONCATENATED_STRING",
+ "Concatenated strings should use spaces between elements\n" . $herecurr) &&
+ $fix) {
+ while ($line =~ /($String)/g) {
+ my $extracted_string = substr($rawline, $-[0], $+[0] - $-[0]);
+ $fixed[$fixlinenr] =~ s/\Q$extracted_string\E([A-Za-z0-9_])/$extracted_string $1/;
+ $fixed[$fixlinenr] =~ s/([A-Za-z0-9_])\Q$extracted_string\E/$1 $extracted_string/;
+ }
+ }
}
# uncoalesced string fragments
if ($line =~ /$String\s*"/) {
- WARN("STRING_FRAGMENTS",
- "Consecutive strings are generally better as a single string\n" . $herecurr);
+ if (WARN("STRING_FRAGMENTS",
+ "Consecutive strings are generally better as a single string\n" . $herecurr) &&
+ $fix) {
+ while ($line =~ /($String)(?=\s*")/g) {
+ my $extracted_string = substr($rawline, $-[0], $+[0] - $-[0]);
+ $fixed[$fixlinenr] =~ s/\Q$extracted_string\E\s*"/substr($extracted_string, 0, -1)/e;
+ }
+ }
}
# check for non-standard and hex prefixed decimal printf formats
@@ -5374,9 +5470,14 @@
# warn about #if 0
if ($line =~ /^.\s*\#\s*if\s+0\b/) {
- CHK("REDUNDANT_CODE",
- "if this code is redundant consider removing it\n" .
- $herecurr);
+ WARN("IF_0",
+ "Consider removing the code enclosed by this #if 0 and its #endif\n" . $herecurr);
+ }
+
+# warn about #if 1
+ if ($line =~ /^.\s*\#\s*if\s+1\b/) {
+ WARN("IF_1",
+ "Consider removing the #if 1 and its #endif\n" . $herecurr);
}
# check for needless "if (<foo>) fn(<foo>)" uses
@@ -5447,7 +5548,7 @@
}
# check for mask then right shift without a parentheses
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /$LvalOrFunc\s*\&\s*($LvalOrFunc)\s*>>/ &&
$4 !~ /^\&/) { # $LvalOrFunc may be &foo, ignore if so
WARN("MASK_THEN_SHIFT",
@@ -5455,7 +5556,7 @@
}
# check for pointer comparisons to NULL
- if ($^V && $^V ge 5.10.0) {
+ if ($perl_version_ok) {
while ($line =~ /\b$LvalOrFunc\s*(==|\!=)\s*NULL\b/g) {
my $val = $1;
my $equal = "!";
@@ -5727,7 +5828,7 @@
}
# Check for __attribute__ weak, or __weak declarations (may have link issues)
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /(?:$Declare|$DeclareMisordered)\s*$Ident\s*$balanced_parens\s*(?:$Attribute)?\s*;/ &&
($line =~ /\b__attribute__\s*\(\s*\(.*\bweak\b/ ||
$line =~ /\b__weak\b/)) {
@@ -5809,7 +5910,7 @@
}
# check for vsprintf extension %p<foo> misuses
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?![^\{]*\{\s*).*\b(\w+)\s*\(.*$String\s*,/s &&
$1 !~ /^_*volatile_*$/) {
@@ -5856,7 +5957,7 @@
}
# Check for misused memsets
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*$FuncArg\s*\)/) {
@@ -5874,7 +5975,7 @@
}
# Check for memcpy(foo, bar, ETH_ALEN) that could be ether_addr_copy(foo, bar)
-# if ($^V && $^V ge 5.10.0 &&
+# if ($perl_version_ok &&
# defined $stat &&
# $stat =~ /^\+(?:.*?)\bmemcpy\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
# if (WARN("PREFER_ETHER_ADDR_COPY",
@@ -5885,7 +5986,7 @@
# }
# Check for memcmp(foo, bar, ETH_ALEN) that could be ether_addr_equal*(foo, bar)
-# if ($^V && $^V ge 5.10.0 &&
+# if ($perl_version_ok &&
# defined $stat &&
# $stat =~ /^\+(?:.*?)\bmemcmp\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
# WARN("PREFER_ETHER_ADDR_EQUAL",
@@ -5894,7 +5995,7 @@
# check for memset(foo, 0x0, ETH_ALEN) that could be eth_zero_addr
# check for memset(foo, 0xFF, ETH_ALEN) that could be eth_broadcast_addr
-# if ($^V && $^V ge 5.10.0 &&
+# if ($perl_version_ok &&
# defined $stat &&
# $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*ETH_ALEN\s*\)/) {
#
@@ -5916,7 +6017,7 @@
# }
# typecasts on min/max could be min_t/max_t
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?:.*?)\b(min|max)\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\)/) {
if (defined $2 || defined $7) {
@@ -5940,7 +6041,7 @@
}
# check usleep_range arguments
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+(?:.*?)\busleep_range\s*\(\s*($FuncArg)\s*,\s*($FuncArg)\s*\)/) {
my $min = $1;
@@ -5956,7 +6057,7 @@
}
# check for naked sscanf
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$line =~ /\bsscanf\b/ &&
($stat !~ /$Ident\s*=\s*sscanf\s*$balanced_parens/ &&
@@ -5970,7 +6071,7 @@
}
# check for simple sscanf that should be kstrto<foo>
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$line =~ /\bsscanf\b/) {
my $lc = $stat =~ tr@\n@@;
@@ -6042,7 +6143,7 @@
}
# check for function definitions
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^.\s*(?:$Storage\s+)?$Type\s*($Ident)\s*$balanced_parens\s*{/s) {
$context_function = $1;
@@ -6082,14 +6183,14 @@
# alloc style
# p = alloc(sizeof(struct foo), ...) should be p = alloc(sizeof(*p), ...)
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*([kv][mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) {
CHK("ALLOC_SIZEOF_STRUCT",
"Prefer $3(sizeof(*$1)...) over $3($4...)\n" . $herecurr);
}
# check for k[mz]alloc with multiplies that could be kmalloc_array/kcalloc
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+\s*($Lval)\s*\=\s*(?:$balanced_parens)?\s*(k[mz]alloc)\s*\(\s*($FuncArg)\s*\*\s*($FuncArg)\s*,/) {
my $oldfunc = $3;
@@ -6118,8 +6219,9 @@
}
# check for krealloc arg reuse
- if ($^V && $^V ge 5.10.0 &&
- $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*\1\s*,/) {
+ if ($perl_version_ok &&
+ $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*($Lval)\s*,/ &&
+ $1 eq $3) {
WARN("KREALLOC_ARG_REUSE",
"Reusing the krealloc arg is almost always a bug\n" . $herecurr);
}
@@ -6187,7 +6289,7 @@
}
# check for switch/default statements without a break;
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
my $cnt = statement_rawlines($stat);
@@ -6251,6 +6353,13 @@
"Avoid using bool as bitfield. Prefer bool bitfields as unsigned int or u<8|16|32>\n" . $herecurr);
}
+# check for bool use in .h files
+ if ($realfile =~ /\.h$/ &&
+ $sline =~ /^.\s+bool\s*$Ident\s*(?::\s*d+\s*)?;/) {
+ CHK("BOOL_MEMBER",
+ "Avoid using bool structure members because of possible alignment issues - see: https://lkml.org/lkml/2017/11/21/384\n" . $herecurr);
+ }
+
# check for semaphores initialized locked
if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
WARN("CONSIDER_COMPLETION",
@@ -6297,7 +6406,7 @@
}
# likely/unlikely comparisons similar to "(likely(foo) > 0)"
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
$line =~ /\b((?:un)?likely)\s*\(\s*$FuncArg\s*\)\s*$Compare/) {
WARN("LIKELY_MISUSE",
"Using $1 should generally have parentheses around the comparison\n" . $herecurr);
@@ -6340,7 +6449,7 @@
# check for DEVICE_ATTR uses that could be DEVICE_ATTR_<FOO>
# and whether or not function naming is typical and if
# DEVICE_ATTR permissions uses are unusual too
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$stat =~ /\bDEVICE_ATTR\s*\(\s*(\w+)\s*,\s*\(?\s*(\s*(?:${multi_mode_perms_string_search}|0[0-7]{3,3})\s*)\s*\)?\s*,\s*(\w+)\s*,\s*(\w+)\s*\)/) {
my $var = $1;
@@ -6400,7 +6509,7 @@
# specific definition of not visible in sysfs.
# o Ignore proc_create*(...) uses with a decimal 0 permission as that means
# use the default permissions
- if ($^V && $^V ge 5.10.0 &&
+ if ($perl_version_ok &&
defined $stat &&
$line =~ /$mode_perms_search/) {
foreach my $entry (@mode_permission_funcs) {
@@ -6486,9 +6595,14 @@
ERROR("NOT_UNIFIED_DIFF",
"Does not appear to be a unified-diff format patch\n");
}
- if ($is_patch && $has_commit_log && $chk_signoff && $signoff == 0) {
- ERROR("MISSING_SIGN_OFF",
- "Missing Signed-off-by: line(s)\n");
+ if ($is_patch && $has_commit_log && $chk_signoff) {
+ if ($signoff == 0) {
+ ERROR("MISSING_SIGN_OFF",
+ "Missing Signed-off-by: line(s)\n");
+ } elsif (!$authorsignoff) {
+ WARN("NO_AUTHOR_SIGN_OFF",
+ "Missing Signed-off-by: line by nominal patch author '$author'\n");
+ }
}
print report_dump();
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index c87fa73..c1c088e 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -48,6 +48,7 @@
my $output_rolestats = 1;
my $output_section_maxlen = 50;
my $scm = 0;
+my $tree = 1;
my $web = 0;
my $subsystem = 0;
my $status = 0;
@@ -61,7 +62,7 @@
my $version = 0;
my $help = 0;
my $find_maintainer_files = 0;
-
+my $maintainer_path;
my $vcs_used = 0;
my $exit = 0;
@@ -255,6 +256,7 @@
'subsystem!' => \$subsystem,
'status!' => \$status,
'scm!' => \$scm,
+ 'tree!' => \$tree,
'web!' => \$web,
'letters=s' => \$letters,
'pattern-depth=i' => \$pattern_depth,
@@ -263,6 +265,7 @@
'fe|file-emails!' => \$file_emails,
'f|file' => \$from_filename,
'find-maintainer-files' => \$find_maintainer_files,
+ 'mpath|maintainer-path=s' => \$maintainer_path,
'self-test:s' => \$self_test,
'v|version' => \$version,
'h|help|usage' => \$help,
@@ -319,7 +322,7 @@
die "$P: Please select at least 1 email option\n";
}
-if (!top_of_kernel_tree($lk_path)) {
+if ($tree && !top_of_kernel_tree($lk_path)) {
die "$P: The current directory does not appear to be "
. "a linux kernel source tree.\n";
}
@@ -384,26 +387,36 @@
read_all_maintainer_files();
sub read_all_maintainer_files {
- if (-d "${lk_path}MAINTAINERS") {
- opendir(DIR, "${lk_path}MAINTAINERS") or die $!;
- my @files = readdir(DIR);
- closedir(DIR);
- foreach my $file (@files) {
- push(@mfiles, "${lk_path}MAINTAINERS/$file") if ($file !~ /^\./);
- }
+ my $path = "${lk_path}MAINTAINERS";
+ if (defined $maintainer_path) {
+ $path = $maintainer_path;
+ # Perl Cookbook tilde expansion if necessary
+ $path =~ s@^~([^/]*)@ $1 ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])@ex;
}
- if ($find_maintainer_files) {
- find( { wanted => \&find_is_maintainer_file,
- preprocess => \&find_ignore_git,
- no_chdir => 1,
- }, "${lk_path}");
+ if (-d $path) {
+ $path .= '/' if ($path !~ m@/$@);
+ if ($find_maintainer_files) {
+ find( { wanted => \&find_is_maintainer_file,
+ preprocess => \&find_ignore_git,
+ no_chdir => 1,
+ }, "$path");
+ } else {
+ opendir(DIR, "$path") or die $!;
+ my @files = readdir(DIR);
+ closedir(DIR);
+ foreach my $file (@files) {
+ push(@mfiles, "$path$file") if ($file !~ /^\./);
+ }
+ }
+ } elsif (-f "$path") {
+ push(@mfiles, "$path");
} else {
- push(@mfiles, "${lk_path}MAINTAINERS") if -f "${lk_path}MAINTAINERS";
+ die "$P: MAINTAINER file not found '$path'\n";
}
-
+ die "$P: No MAINTAINER files found in '$path'\n" if (scalar(@mfiles) == 0);
foreach my $file (@mfiles) {
- read_maintainer_file("$file");
+ read_maintainer_file("$file");
}
}
@@ -1031,13 +1044,14 @@
--sections => print all of the subsystem sections with pattern matches
--letters => print all matching 'letter' types from all matching sections
--mailmap => use .mailmap file (default: $email_use_mailmap)
+ --no-tree => run without a kernel tree
--self-test => show potential issues with MAINTAINERS file content
--version => show version
--help => show this help information
Default options:
- [--email --nogit --git-fallback --m --r --n --l --multiline --pattern-depth=0
- --remove-duplicates --rolestats]
+ [--email --tree --nogit --git-fallback --m --r --n --l --multiline
+ --pattern-depth=0 --remove-duplicates --rolestats]
Notes:
Using "-f directory" may give unexpected results:
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index 9a058cf..d2688d5 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -10,6 +10,8 @@
abandonning||abandoning
abigious||ambiguous
abitrate||arbitrate
+abord||abort
+aboslute||absolute
abov||above
abreviated||abbreviated
absense||absence
@@ -25,6 +27,7 @@
accesss||access
accidentaly||accidentally
accidentually||accidentally
+acclerated||accelerated
accoding||according
accomodate||accommodate
accomodates||accommodates
@@ -58,12 +61,15 @@
adddress||address
addreses||addresses
addresss||address
+addrress||address
aditional||additional
aditionally||additionally
aditionaly||additionally
adminstrative||administrative
adress||address
adresses||addresses
+adrresses||addresses
+advertisment||advertisement
adviced||advised
afecting||affecting
againt||against
@@ -100,6 +106,7 @@
ambigious||ambiguous
amoung||among
amout||amount
+amplifer||amplifier
an union||a union
an user||a user
an userspace||a userspace
@@ -145,11 +152,15 @@
assocation||association
associcated||associated
assotiated||associated
+asssert||assert
assum||assume
assumtpion||assumption
asuming||assuming
asycronous||asynchronous
asynchnous||asynchronous
+asynchromous||asynchronous
+asymetric||asymmetric
+asymmeric||asymmetric
atomatically||automatically
atomicly||atomically
atempt||attempt
@@ -172,6 +183,7 @@
availabe||available
availabled||available
availablity||availability
+availaible||available
availale||available
availavility||availability
availble||available
@@ -206,8 +218,11 @@
boundry||boundary
brievely||briefly
broadcat||broadcast
+bufufer||buffer
cacluated||calculated
+caculate||calculate
caculation||calculation
+cadidate||candidate
calender||calendar
calescing||coalescing
calle||called
@@ -221,12 +236,14 @@
capabitilies||capabilities
capatibilities||capabilities
capapbilities||capabilities
+caputure||capture
carefuly||carefully
cariage||carriage
catagory||category
cehck||check
challange||challenge
challanges||challenges
+chache||cache
chanell||channel
changable||changeable
chanined||chained
@@ -240,6 +257,7 @@
charcter||character
chcek||check
chck||check
+checksumed||checksummed
checksuming||checksumming
childern||children
childs||children
@@ -292,8 +310,10 @@
conbination||combination
conditionaly||conditionally
conected||connected
+conector||connector
connecetd||connected
configuartion||configuration
+configuation||configuration
configuratoin||configuration
configuraton||configuration
configuretion||configuration
@@ -315,6 +335,7 @@
continously||continuously
continueing||continuing
contraints||constraints
+contruct||construct
contol||control
contoller||controller
controled||controlled
@@ -343,6 +364,7 @@
deafult||default
deamon||daemon
decompres||decompress
+decsribed||described
decription||description
dectected||detected
defailt||default
@@ -379,6 +401,7 @@
desriptor||descriptor
desriptors||descriptors
destionation||destination
+destoried||destroyed
destory||destroy
destoryed||destroyed
destorys||destroys
@@ -404,18 +427,25 @@
difinition||definition
dimesions||dimensions
diplay||display
+directon||direction
direectly||directly
+diregard||disregard
disassocation||disassociation
disapear||disappear
disapeared||disappeared
disappared||disappeared
+disbale||disable
+disbaled||disabled
disble||disable
disbled||disabled
disconnet||disconnect
discontinous||discontinuous
+disharge||discharge
dispertion||dispersion
dissapears||disappears
distiction||distinction
+divisable||divisible
+divsiors||divisors
docuentation||documentation
documantation||documentation
documentaion||documentation
@@ -427,6 +457,7 @@
downlads||downloads
druing||during
dynmaic||dynamic
+eanable||enable
easilly||easily
ecspecially||especially
edditable||editable
@@ -484,9 +515,12 @@
extened||extended
extensability||extensibility
extention||extension
+extenstion||extension
extracter||extractor
+faield||failed
falied||failed
faild||failed
+failer||failure
faill||fail
failied||failed
faillure||failure
@@ -520,6 +554,7 @@
forse||force
fortan||fortran
forwardig||forwarding
+frambuffer||framebuffer
framming||framing
framwork||framework
frequncy||frequency
@@ -527,6 +562,7 @@
fucntion||function
fuction||function
fuctions||functions
+funcation||function
funcion||function
functionallity||functionality
functionaly||functionally
@@ -540,6 +576,7 @@
gaurenteed||guaranteed
generiously||generously
genereate||generate
+genereted||generated
genric||generic
globel||global
grabing||grabbing
@@ -553,6 +590,7 @@
halfs||halves
hander||handler
handfull||handful
+hanlde||handle
hanled||handled
happend||happened
harware||hardware
@@ -561,6 +599,7 @@
hybernate||hibernate
hierachy||hierarchy
hierarchie||hierarchy
+homogenous||homogeneous
howver||however
hsould||should
hypervior||hypervisor
@@ -568,6 +607,8 @@
identidier||identifier
iligal||illegal
illigal||illegal
+illgal||illegal
+iomaped||iomapped
imblance||imbalance
immeadiately||immediately
immedaite||immediate
@@ -618,10 +659,12 @@
initators||initiators
initialiazation||initialization
initializiation||initialization
+initialze||initialize
initialzed||initialized
initilization||initialization
initilize||initialize
inofficial||unofficial
+inrerface||interface
insititute||institute
instal||install
instanciated||instantiated
@@ -657,6 +700,7 @@
intrrupt||interrupt
intterrupt||interrupt
intuative||intuitive
+inavlid||invalid
invaid||invalid
invald||invalid
invalde||invalid
@@ -683,6 +727,7 @@
langugage||language
lauch||launch
layed||laid
+legnth||length
leightweight||lightweight
lengh||length
lenght||length
@@ -696,6 +741,7 @@
loggging||logging
loggin||login
logile||logfile
+loobpack||loopback
loosing||losing
losted||lost
machinary||machinery
@@ -703,6 +749,7 @@
maintainence||maintenance
maintan||maintain
makeing||making
+mailformed||malformed
malplaced||misplaced
malplace||misplace
managable||manageable
@@ -710,6 +757,7 @@
mangement||management
manoeuvering||maneuvering
mappping||mapping
+matchs||matches
mathimatical||mathematical
mathimatic||mathematic
mathimatics||mathematics
@@ -725,6 +773,7 @@
messsages||messages
micropone||microphone
microprocesspr||microprocessor
+migrateable||migratable
milliseonds||milliseconds
minium||minimum
minimam||minimum
@@ -741,6 +790,7 @@
miximum||maximum
mmnemonic||mnemonic
mnay||many
+modfiy||modify
modulues||modules
momery||memory
memomry||memory
@@ -777,6 +827,7 @@
numebr||number
numner||number
obtaion||obtain
+obusing||abusing
occassionally||occasionally
occationally||occasionally
occurance||occurrence
@@ -787,6 +838,7 @@
occured||occurred
occuring||occurring
offet||offset
+offloded||offloaded
omited||omitted
omiting||omitting
omitt||omit
@@ -829,6 +881,7 @@
parametised||parametrised
paramter||parameter
paramters||parameters
+parmaters||parameters
particuarly||particularly
particularily||particularly
partiton||partition
@@ -837,6 +890,7 @@
pathes||paths
pecularities||peculiarities
peformance||performance
+peforming||performing
peice||piece
pendantic||pedantic
peprocessor||preprocessor
@@ -846,6 +900,7 @@
persistance||persistence
persistant||persistent
plalform||platform
+platfoem||platform
platfrom||platform
plattform||platform
pleaes||please
@@ -858,6 +913,7 @@
positon||position
possibilites||possibilities
powerfull||powerful
+preamle||preamble
preample||preamble
preapre||prepare
preceeded||preceded
@@ -870,6 +926,7 @@
prefferably||preferably
premption||preemption
prepaired||prepared
+preperation||preparation
pressre||pressure
primative||primitive
princliple||principle
@@ -935,6 +992,7 @@
recyle||recycle
redircet||redirect
redirectrion||redirection
+redundacy||redundancy
reename||rename
refcounf||refcount
refence||reference
@@ -945,6 +1003,7 @@
refernnce||reference
refrence||reference
registerd||registered
+registeration||registration
registeresd||registered
registerred||registered
registes||registers
@@ -973,7 +1032,9 @@
requred||required
requried||required
requst||request
+reregisteration||reregistration
reseting||resetting
+reseverd||reserved
resizeable||resizable
resouce||resource
resouces||resources
@@ -982,6 +1043,7 @@
ressizes||resizes
ressource||resource
ressources||resources
+restesting||retesting
retransmited||retransmitted
retreived||retrieved
retreive||retrieve
@@ -1006,6 +1068,7 @@
safly||safely
safty||safety
savable||saveable
+scaleing||scaling
scaned||scanned
scaning||scanning
scarch||search
@@ -1014,6 +1077,8 @@
secquence||sequence
secund||second
segement||segment
+semaphone||semaphore
+senario||scenario
senarios||scenarios
sentivite||sensitive
separatly||separately
@@ -1025,9 +1090,13 @@
seperatly||separately
seperator||separator
sepperate||separate
+seqeunce||sequence
+seqeuncer||sequencer
+seqeuencer||sequencer
sequece||sequence
sequencial||sequential
serveral||several
+servive||service
setts||sets
settting||setting
shotdown||shutdown
@@ -1073,6 +1142,7 @@
standart||standard
staticly||statically
stoped||stopped
+stoping||stopping
stoppped||stopped
straming||streaming
struc||struct
@@ -1085,6 +1155,7 @@
suble||subtle
substract||subtract
submition||submission
+suceed||succeed
succesfully||successfully
succesful||successful
successed||succeeded
@@ -1108,6 +1179,7 @@
surpresses||suppresses
susbsystem||subsystem
suspeneded||suspended
+suspsend||suspend
suspicously||suspiciously
swaping||swapping
switchs||switches
@@ -1122,6 +1194,7 @@
symetric||symmetric
synax||syntax
synchonized||synchronized
+synchronuously||synchronously
syncronize||synchronize
syncronized||synchronized
syncronizing||synchronizing
@@ -1130,11 +1203,14 @@
sytem||system
sythesis||synthesis
taht||that
+tansmit||transmit
targetted||targeted
targetting||targeting
+taskelt||tasklet
teh||the
temorary||temporary
temproarily||temporarily
+thead||thread
therfore||therefore
thier||their
threds||threads
@@ -1143,11 +1219,14 @@
throught||through
troughput||throughput
thses||these
+tiggers||triggers
tiggered||triggered
tipically||typically
+timeing||timing
timout||timeout
tmis||this
torerable||tolerable
+traking||tracking
tramsmitted||transmitted
tramsmit||transmit
tranasction||transaction
@@ -1162,6 +1241,7 @@
trasfer||transfer
trasmission||transmission
treshold||threshold
+trigerred||triggered
trigerring||triggering
trun||turn
tunning||tuning
@@ -1169,6 +1249,8 @@
tyep||type
udpate||update
uesd||used
+uknown||unknown
+usupported||unsupported
uncommited||uncommitted
unconditionaly||unconditionally
underun||underrun
@@ -1181,11 +1263,14 @@
unexpexted||unexpected
unfortunatelly||unfortunately
unifiy||unify
+uniterrupted||uninterrupted
unintialized||uninitialized
unkmown||unknown
unknonw||unknown
unknow||unknown
unkown||unknown
+unamed||unnamed
+uneeded||unneeded
unneded||unneeded
unneccecary||unnecessary
unneccesary||unnecessary
@@ -1210,6 +1295,7 @@
usege||usage
usera||users
usualy||usually
+usupported||unsupported
utilites||utilities
utillities||utilities
utilties||utilities
@@ -1233,7 +1319,9 @@
virtiual||virtual
visiters||visitors
vitual||virtual
+vunerable||vulnerable
wakeus||wakeups
+wathdog||watchdog
wating||waiting
wiat||wait
wether||whether
diff --git a/security/security.c b/security/security.c
index 47cfff0..736e78d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -48,14 +48,17 @@ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] =
static void __init do_security_initcalls(void)
{
int ret;
- initcall_t *call;
- call = __security_initcall_start;
+ initcall_t call;
+ initcall_entry_t *ce;
+
+ ce = __security_initcall_start;
trace_initcall_level("security");
- while (call < __security_initcall_end) {
- trace_initcall_start((*call));
- ret = (*call) ();
- trace_initcall_finish((*call), ret);
- call++;
+ while (ce < __security_initcall_end) {
+ call = initcall_from_entry(ce);
+ trace_initcall_start(call);
+ ret = call();
+ trace_initcall_finish(call, ret);
+ ce++;
}
}
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 74e5912..82121a8 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -9,3 +9,5 @@
/proc-uptime-001
/proc-uptime-002
/read
+/self
+/thread-self
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index db310ee..1c12c34 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -1,4 +1,5 @@
CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -D_GNU_SOURCE
TEST_GEN_PROGS :=
TEST_GEN_PROGS += fd-001-lookup
@@ -12,5 +13,7 @@
TEST_GEN_PROGS += proc-uptime-001
TEST_GEN_PROGS += proc-uptime-002
TEST_GEN_PROGS += read
+TEST_GEN_PROGS += self
+TEST_GEN_PROGS += thread-self
include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h
index 4e17816..b7d57ea 100644
--- a/tools/testing/selftests/proc/proc.h
+++ b/tools/testing/selftests/proc/proc.h
@@ -6,6 +6,18 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t sys_getpid(void)
+{
+ return syscall(SYS_getpid);
+}
+
+static inline pid_t sys_gettid(void)
+{
+ return syscall(SYS_gettid);
+}
static inline bool streq(const char *s1, const char *s2)
{
diff --git a/tools/testing/selftests/proc/self.c b/tools/testing/selftests/proc/self.c
new file mode 100644
index 0000000..21c15a1
--- /dev/null
+++ b/tools/testing/selftests/proc/self.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/self gives correct TGID.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+int main(void)
+{
+ char buf1[64], buf2[64];
+ pid_t pid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ snprintf(buf1, sizeof(buf1), "%u", pid);
+
+ rv = readlink("/proc/self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/thread-self.c b/tools/testing/selftests/proc/thread-self.c
new file mode 100644
index 0000000..4b23b39
--- /dev/null
+++ b/tools/testing/selftests/proc/thread-self.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/thread-self gives correct TGID/PID.
+#undef NDEBUG
+#include <assert.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "proc.h"
+
+int f(void *arg)
+{
+ char buf1[64], buf2[64];
+ pid_t pid, tid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ tid = sys_gettid();
+ snprintf(buf1, sizeof(buf1), "%u/task/%u", pid, tid);
+
+ rv = readlink("/proc/thread-self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ if (arg)
+ exit(0);
+ return 0;
+}
+
+int main(void)
+{
+ const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ pid_t pid;
+ void *stack;
+
+ /* main thread */
+ f((void *)0);
+
+ stack = mmap(NULL, 2 * PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(stack != MAP_FAILED);
+ /* side thread */
+ pid = clone(f, stack + PAGE_SIZE, CLONE_THREAD|CLONE_SIGHAND|CLONE_VM, (void *)1);
+ assert(pid > 0);
+ pause();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 342c7bc..af5ff83 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,6 +1,7 @@
hugepage-mmap
hugepage-shm
map_hugetlb
+map_populate
thuge-gen
compaction_test
mlock2-tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index fdefa22..9881876 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -12,6 +12,7 @@
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
TEST_GEN_FILES += on-fault-limit
diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/vm/map_populate.c
new file mode 100644
index 0000000..6b8aeaa
--- /dev/null
+++ b/tools/testing/selftests/vm/map_populate.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Dmitry Safonov, Arista Networks
+ *
+ * MAP_POPULATE | MAP_PRIVATE should COW VMA pages.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifndef MMAP_SZ
+#define MMAP_SZ 4096
+#endif
+
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) { \
+ fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
+ __LINE__, (description), strerror(errno)); \
+ exit(1); \
+ } \
+ } while (0)
+
+static int parent_f(int sock, unsigned long *smap, int child)
+{
+ int status, ret;
+
+ ret = read(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ *smap = 0x22222BAD;
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = write(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ waitpid(child, &status, 0);
+ BUG_ON(!WIFEXITED(status), "child in unexpected state");
+
+ return WEXITSTATUS(status);
+}
+
+static int child_f(int sock, unsigned long *smap, int fd)
+{
+ int ret, buf = 0;
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_POPULATE, fd, 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file");
+
+ ret = write(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ ret = read(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
+ BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int sock[2], child, ret;
+ FILE *ftmp;
+ unsigned long *smap;
+
+ ftmp = tmpfile();
+ BUG_ON(ftmp == 0, "tmpfile()");
+
+ ret = ftruncate(fileno(ftmp), MMAP_SZ);
+ BUG_ON(ret, "ftruncate()");
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fileno(ftmp), 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ *smap = 0xdeadbabe;
+ /* Probably unnecessary, but let it be. */
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock);
+ BUG_ON(ret, "socketpair()");
+
+ child = fork();
+ BUG_ON(child == -1, "fork()");
+
+ if (child) {
+ ret = close(sock[0]);
+ BUG_ON(ret, "close()");
+
+ return parent_f(sock[1], smap, child);
+ }
+
+ ret = close(sock[1]);
+ BUG_ON(ret, "close()");
+
+ return child_f(sock[0], smap, fileno(ftmp));
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 88cbe55..584a91a 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -168,6 +168,17 @@
fi
echo "--------------------"
+echo "running map_populate"
+echo "--------------------"
+./map_populate
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
echo "running mlock2-tests"
echo "--------------------"
./mlock2-tests
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0df592c..f986e31f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -140,9 +140,10 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
-__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
- unsigned long start, unsigned long end)
+__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ unsigned long start, unsigned long end, bool blockable)
{
+ return 0;
}
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
@@ -360,13 +361,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
srcu_read_unlock(&kvm->srcu, idx);
}
-static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start,
- unsigned long end)
+ unsigned long end,
+ bool blockable)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0, idx;
+ int ret;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
@@ -384,9 +387,11 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
spin_unlock(&kvm->mmu_lock);
- kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
+ ret = kvm_arch_mmu_notifier_invalidate_range(kvm, start, end, blockable);
srcu_read_unlock(&kvm->srcu, idx);
+
+ return ret;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,