From c7c167032e0a7bacaf4b9e53d4e0d8e71dfe0d29 Mon Sep 17 00:00:00 2001 From: Jacek Konieczny Date: Sun, 22 Jan 2017 18:13:15 +0100 Subject: [PATCH] rt patch updated --- kernel-rt.patch | 4488 +++++++++++++++-------------------------------- kernel.spec | 2 +- 2 files changed, 1432 insertions(+), 3058 deletions(-) diff --git a/kernel-rt.patch b/kernel-rt.patch index b19eb6e3..ffc64be9 100644 --- a/kernel-rt.patch +++ b/kernel-rt.patch @@ -1,73 +1,3 @@ -diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt -new file mode 100644 -index 000000000000..cb61516483d3 ---- /dev/null -+++ b/Documentation/hwlat_detector.txt -@@ -0,0 +1,64 @@ -+Introduction: -+------------- -+ -+The module hwlat_detector is a special purpose kernel module that is used to -+detect large system latencies induced by the behavior of certain underlying -+hardware or firmware, independent of Linux itself. The code was developed -+originally to detect SMIs (System Management Interrupts) on x86 systems, -+however there is nothing x86 specific about this patchset. It was -+originally written for use by the "RT" patch since the Real Time -+kernel is highly latency sensitive. -+ -+SMIs are usually not serviced by the Linux kernel, which typically does not -+even know that they are occuring. SMIs are instead are set up by BIOS code -+and are serviced by BIOS code, usually for "critical" events such as -+management of thermal sensors and fans. Sometimes though, SMIs are used for -+other tasks and those tasks can spend an inordinate amount of time in the -+handler (sometimes measured in milliseconds). Obviously this is a problem if -+you are trying to keep event service latencies down in the microsecond range. -+ -+The hardware latency detector works by hogging all of the cpus for configurable -+amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter -+for some period, then looking for gaps in the TSC data. Any gap indicates a -+time when the polling was interrupted and since the machine is stopped and -+interrupts turned off the only thing that could do that would be an SMI. -+ -+Note that the SMI detector should *NEVER* be used in a production environment. -+It is intended to be run manually to determine if the hardware platform has a -+problem with long system firmware service routines. -+ -+Usage: -+------ -+ -+Loading the module hwlat_detector passing the parameter "enabled=1" (or by -+setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only -+step required to start the hwlat_detector. It is possible to redefine the -+threshold in microseconds (us) above which latency spikes will be taken -+into account (parameter "threshold="). -+ -+Example: -+ -+ # modprobe hwlat_detector enabled=1 threshold=100 -+ -+After the module is loaded, it creates a directory named "hwlat_detector" under -+the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary -+to have debugfs mounted, which might be on /sys/debug on your system. -+ -+The /debug/hwlat_detector interface contains the following files: -+ -+count - number of latency spikes observed since last reset -+enable - a global enable/disable toggle (0/1), resets count -+max - maximum hardware latency actually observed (usecs) -+sample - a pipe from which to read current raw sample data -+ in the format -+ (can be opened O_NONBLOCK for a single sample) -+threshold - minimum latency value to be considered (usecs) -+width - time period to sample with CPUs held (usecs) -+ must be less than the total window size (enforced) -+window - total period of sampling, width being inside (usecs) -+ -+By default we will set width to 500,000 and window to 1,000,000, meaning that -+we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we -+observe any latencies that exceed the threshold (initially 100 usecs), -+then we write to a global sample ring buffer of 8K samples, which is -+consumed by reading from the "sample" (pipe) debugfs file interface. diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 3a3b30ac2a75..9e0745cafbd8 100644 --- a/Documentation/sysrq.txt @@ -284,27 +214,8 @@ index 000000000000..6f2aeabf7faa +is provided. + +These data are also reset when the wakeup histogram is reset. -diff --git a/Makefile b/Makefile -index b249529204cd..5d699d055995 100644 ---- a/Makefile -+++ b/Makefile -@@ -398,12 +398,12 @@ KBUILD_CPPFLAGS := -D__KERNEL__ - KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -fno-common \ - -Werror-implicit-function-declaration \ -- -Wno-format-security \ -+ -Wno-format-security -fno-PIE \ - -std=gnu89 - - KBUILD_AFLAGS_KERNEL := - KBUILD_CFLAGS_KERNEL := --KBUILD_AFLAGS := -D__ASSEMBLY__ -+KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE - KBUILD_AFLAGS_MODULE := -DMODULE - KBUILD_CFLAGS_MODULE := -DMODULE - KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds diff --git a/arch/Kconfig b/arch/Kconfig -index fd6e9712af81..085134ee13e9 100644 +index 659bdd079277..099fc0f5155e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -9,6 +9,7 @@ config OPROFILE @@ -324,7 +235,7 @@ index fd6e9712af81..085134ee13e9 100644 This option enables a transparent branch optimization that makes certain almost-always-true or almost-always-false branch diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig -index a9c4e48bb7ec..6eefe4f32302 100644 +index b5d529fdffab..5715844e83e3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -36,7 +36,7 @@ config ARM @@ -344,6 +255,19 @@ index a9c4e48bb7ec..6eefe4f32302 100644 select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS +diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h +index e53638c8ed8a..6095a1649865 100644 +--- a/arch/arm/include/asm/irq.h ++++ b/arch/arm/include/asm/irq.h +@@ -22,6 +22,8 @@ + #endif + + #ifndef __ASSEMBLY__ ++#include ++ + struct irqaction; + struct pt_regs; + extern void migrate_irqs(void); diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index 12ebfcc1d539..c962084605bc 100644 --- a/arch/arm/include/asm/switch_to.h @@ -491,11 +415,42 @@ index 10c3283d6c19..8872937862cc 100644 UNWIND(.fnend ) ENDPROC(ret_fast_syscall) +diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c +index 69bda1a5707e..1f665acaa6a9 100644 +--- a/arch/arm/kernel/patch.c ++++ b/arch/arm/kernel/patch.c +@@ -15,7 +15,7 @@ struct patch { + unsigned int insn; + }; + +-static DEFINE_SPINLOCK(patch_lock); ++static DEFINE_RAW_SPINLOCK(patch_lock); + + static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) + __acquires(&patch_lock) +@@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) + return addr; + + if (flags) +- spin_lock_irqsave(&patch_lock, *flags); ++ raw_spin_lock_irqsave(&patch_lock, *flags); + else + __acquire(&patch_lock); + +@@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) + clear_fixmap(fixmap); + + if (flags) +- spin_unlock_irqrestore(&patch_lock, *flags); ++ raw_spin_unlock_irqrestore(&patch_lock, *flags); + else + __release(&patch_lock); + } diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c -index 612eb530f33f..cd3006dc1fd3 100644 +index 91d2d5b01414..750550098b59 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c -@@ -323,6 +323,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) +@@ -322,6 +322,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } #ifdef CONFIG_MMU @@ -541,7 +496,7 @@ index 7b8f2141427b..96541e00b74a 100644 } else { if (unlikely(!user_mode(regs))) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c -index 861521606c6d..e5ca865d321b 100644 +index 7dd14e8395e6..4cd7e3d98035 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -234,8 +234,6 @@ int __cpu_disable(void) @@ -619,10 +574,10 @@ index 0bee233fef9a..314cfb232a63 100644 kfree(tab); } diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c -index c94b90d43772..244dde72018a 100644 +index 19b5f5c1c0ff..82aa639e6737 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c -@@ -584,7 +584,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +@@ -619,7 +619,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * involves poking the GIC, which must be done in a * non-preemptible context. */ @@ -631,7 +586,7 @@ index c94b90d43772..244dde72018a 100644 kvm_pmu_flush_hwstate(vcpu); kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); -@@ -605,7 +605,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +@@ -640,7 +640,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); @@ -640,7 +595,7 @@ index c94b90d43772..244dde72018a 100644 continue; } -@@ -661,7 +661,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +@@ -696,7 +696,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_vgic_sync_hwstate(vcpu); @@ -1200,10 +1155,10 @@ index c2366510187a..6b60f582b738 100644 return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig -index bc3f00f586f1..0f3df6d5154a 100644 +index 969ef880d234..1182fe883771 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -90,6 +90,7 @@ config ARM64 +@@ -91,6 +91,7 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -1211,7 +1166,7 @@ index bc3f00f586f1..0f3df6d5154a 100644 select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS -@@ -689,7 +690,7 @@ config XEN_DOM0 +@@ -694,7 +695,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM64" @@ -1221,7 +1176,7 @@ index bc3f00f586f1..0f3df6d5154a 100644 select PARAVIRT help diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h -index abd64bd1f6d9..9170788ffa37 100644 +index e9ea5a6bd449..6c500ad63c6a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -49,6 +49,7 @@ struct thread_info { @@ -1232,7 +1187,7 @@ index abd64bd1f6d9..9170788ffa37 100644 int cpu; /* cpu */ }; -@@ -109,6 +110,7 @@ static inline struct thread_info *current_thread_info(void) +@@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ @@ -1240,7 +1195,7 @@ index abd64bd1f6d9..9170788ffa37 100644 #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 -@@ -124,6 +126,7 @@ static inline struct thread_info *current_thread_info(void) +@@ -127,6 +129,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) @@ -1248,21 +1203,22 @@ index abd64bd1f6d9..9170788ffa37 100644 #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) -@@ -132,7 +135,8 @@ static inline struct thread_info *current_thread_info(void) +@@ -135,7 +138,9 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_NEED_RESCHED_LAZY) ++#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c -index 05070b72fc28..acfeddb1283a 100644 +index 4a2f0f0fef32..6bf2bc17c400 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c -@@ -37,6 +37,7 @@ int main(void) +@@ -38,6 +38,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); @@ -1271,10 +1227,10 @@ index 05070b72fc28..acfeddb1283a 100644 DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S -index 441420ca7d08..404792bdca99 100644 +index 223d54a4d66b..266c0e2141ca 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S -@@ -434,11 +434,16 @@ ENDPROC(el1_sync) +@@ -428,11 +428,16 @@ ENDPROC(el1_sync) #ifdef CONFIG_PREEMPT ldr w24, [tsk, #TI_PREEMPT] // get preempt count @@ -1294,7 +1250,7 @@ index 441420ca7d08..404792bdca99 100644 #endif #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on -@@ -452,6 +457,7 @@ ENDPROC(el1_irq) +@@ -446,6 +451,7 @@ ENDPROC(el1_irq) 1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? @@ -1302,19 +1258,24 @@ index 441420ca7d08..404792bdca99 100644 ret x24 #endif -@@ -708,6 +714,7 @@ ENDPROC(cpu_switch_to) - */ - work_pending: - tbnz x1, #TIF_NEED_RESCHED, work_resched -+ tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched - /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ - mov x0, sp // 'regs' - enable_irq // enable interrupts for do_notify_resume() +diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c +index 404dd67080b9..639dc6d12e72 100644 +--- a/arch/arm64/kernel/signal.c ++++ b/arch/arm64/kernel/signal.c +@@ -409,7 +409,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, + */ + trace_hardirqs_off(); + do { +- if (thread_flags & _TIF_NEED_RESCHED) { ++ if (thread_flags & _TIF_NEED_RESCHED_MASK) { + schedule(); + } else { + local_irq_enable(); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig -index 212ff92920d2..71ad38d3d76b 100644 +index b3c5bde43d34..8122bf058de0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig -@@ -2480,7 +2480,7 @@ config MIPS_ASID_BITS_VARIABLE +@@ -2514,7 +2514,7 @@ config MIPS_ASID_BITS_VARIABLE # config HIGHMEM bool "High Memory Support" @@ -1324,10 +1285,10 @@ index 212ff92920d2..71ad38d3d76b 100644 config CPU_SUPPORTS_HIGHMEM bool diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig -index 792cb1768c8f..ddf5a0fdb25a 100644 +index 65fba4c34cd7..4b5ba68910e0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -57,10 +57,11 @@ config LOCKDEP_SUPPORT +@@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT config RWSEM_GENERIC_SPINLOCK bool @@ -1340,7 +1301,7 @@ index 792cb1768c8f..ddf5a0fdb25a 100644 config GENERIC_LOCKBREAK bool -@@ -140,6 +141,7 @@ config PPC +@@ -134,6 +135,7 @@ config PPC select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER @@ -1348,7 +1309,7 @@ index 792cb1768c8f..ddf5a0fdb25a 100644 select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS -@@ -326,7 +328,7 @@ menu "Kernel options" +@@ -321,7 +323,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" @@ -1408,7 +1369,7 @@ index 87e4b2d8dcd4..981e501a4359 100644 /* Bits in local_flags */ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c -index b89d14c0352c..81ae8f4c88f6 100644 +index c833d88c423d..96e9fbc3f684 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -156,6 +156,7 @@ int main(void) @@ -1420,7 +1381,7 @@ index b89d14c0352c..81ae8f4c88f6 100644 DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S -index 9899032230b4..f95b93f46c47 100644 +index 3841d749a430..6dbaeff192b9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -835,7 +835,14 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ @@ -1472,10 +1433,10 @@ index 9899032230b4..f95b93f46c47 100644 andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S -index 5afd03e5e8b8..f5d4c2a033ef 100644 +index 6432d4bf08c8..5509a26f1070 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S -@@ -657,7 +657,7 @@ _GLOBAL(ret_from_except_lite) +@@ -656,7 +656,7 @@ _GLOBAL(ret_from_except_lite) bl restore_math b restore #endif @@ -1484,7 +1445,7 @@ index 5afd03e5e8b8..f5d4c2a033ef 100644 beq 2f bl restore_interrupts SCHEDULE_USER -@@ -719,10 +719,18 @@ _GLOBAL(ret_from_except_lite) +@@ -718,10 +718,18 @@ _GLOBAL(ret_from_except_lite) #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ @@ -1506,7 +1467,7 @@ index 5afd03e5e8b8..f5d4c2a033ef 100644 cmpwi cr1,r8,0 ld r0,SOFTE(r1) cmpdi r0,0 -@@ -739,7 +747,7 @@ _GLOBAL(ret_from_except_lite) +@@ -738,7 +746,7 @@ _GLOBAL(ret_from_except_lite) /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) @@ -1516,10 +1477,10 @@ index 5afd03e5e8b8..f5d4c2a033ef 100644 /* diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c -index 08887cf2b20e..f1770ea2d094 100644 +index 3c05c311e35e..f83f6ac1274d 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c -@@ -633,6 +633,7 @@ void irq_ctx_init(void) +@@ -638,6 +638,7 @@ void irq_ctx_init(void) } } @@ -1527,7 +1488,7 @@ index 08887cf2b20e..f1770ea2d094 100644 void do_softirq_own_stack(void) { struct thread_info *curtp, *irqtp; -@@ -650,6 +651,7 @@ void do_softirq_own_stack(void) +@@ -655,6 +656,7 @@ void do_softirq_own_stack(void) if (irqtp->flags) set_bits(irqtp->flags, &curtp->flags); } @@ -1536,10 +1497,10 @@ index 08887cf2b20e..f1770ea2d094 100644 irq_hw_number_t virq_to_hw(unsigned int virq) { diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S -index d9c912b6e632..7b2e997a5083 100644 +index 030d72df5dd5..b471a709e100 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S -@@ -40,6 +40,7 @@ +@@ -41,6 +41,7 @@ * We store the saved ksp_limit in the unused part * of the STACK_FRAME_OVERHEAD */ @@ -1547,7 +1508,7 @@ index d9c912b6e632..7b2e997a5083 100644 _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) -@@ -56,6 +57,7 @@ _GLOBAL(call_do_softirq) +@@ -57,6 +58,7 @@ _GLOBAL(call_do_softirq) stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr @@ -1556,10 +1517,10 @@ index d9c912b6e632..7b2e997a5083 100644 /* * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S -index cb195157b318..c919a2bfd0ca 100644 +index 4f178671f230..39e7d84a3492 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S -@@ -30,6 +30,7 @@ +@@ -31,6 +31,7 @@ .text @@ -1567,7 +1528,7 @@ index cb195157b318..c919a2bfd0ca 100644 _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) -@@ -40,6 +41,7 @@ _GLOBAL(call_do_softirq) +@@ -41,6 +42,7 @@ _GLOBAL(call_do_softirq) ld r0,16(r1) mtlr r0 blr @@ -1576,10 +1537,10 @@ index cb195157b318..c919a2bfd0ca 100644 _GLOBAL(call_do_irq) mflr r0 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig -index c2024ac9d4e8..2303788da7e1 100644 +index 029be26b5a17..9528089ea142 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig -@@ -172,6 +172,7 @@ config KVM_E500MC +@@ -175,6 +175,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 @@ -1588,7 +1549,7 @@ index c2024ac9d4e8..2303788da7e1 100644 select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c -index 57caaf11a83f..030c9bfe52e3 100644 +index e48462447ff0..2670cee66064 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, @@ -1621,10 +1582,10 @@ index 6c0378c0b8b5..abd58b4dff97 100644 static inline void handle_one_irq(unsigned int irq) { diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig -index 59b09600dd32..1b073eb3dc2a 100644 +index 165ecdd24d22..b68a464a22be 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig -@@ -187,12 +187,10 @@ config NR_CPUS +@@ -194,12 +194,10 @@ config NR_CPUS source kernel/Kconfig.hz config RWSEM_GENERIC_SPINLOCK @@ -1660,7 +1621,7 @@ index 34a7930b76ef..773740521008 100644 #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig -index 2a1f0ce7c59a..bd4ab87efb31 100644 +index bada636d1065..f8a995c90c01 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -17,6 +17,7 @@ config X86_64 @@ -1671,7 +1632,7 @@ index 2a1f0ce7c59a..bd4ab87efb31 100644 select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ANON_INODES -@@ -231,8 +232,11 @@ config ARCH_MAY_HAVE_PC_FDC +@@ -232,8 +233,11 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API @@ -1684,7 +1645,7 @@ index 2a1f0ce7c59a..bd4ab87efb31 100644 config GENERIC_CALIBRATE_DELAY def_bool y -@@ -885,7 +889,7 @@ config IOMMU_HELPER +@@ -897,7 +901,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL @@ -1694,7 +1655,7 @@ index 2a1f0ce7c59a..bd4ab87efb31 100644 Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c -index 0ab5ee1c26af..fff8f6f1f90c 100644 +index aa8b0672f87a..2429414bfc71 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -372,14 +372,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, @@ -1992,10 +1953,10 @@ index 6a85598931b5..3a506ce7ed93 100644 } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c -index 1433f6b4607d..f963fde8e4fa 100644 +index bdd9cc59d20f..56d01a339ba4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c -@@ -136,7 +136,7 @@ static long syscall_trace_enter(struct pt_regs *regs) +@@ -129,7 +129,7 @@ static long syscall_trace_enter(struct pt_regs *regs) #define EXIT_TO_USERMODE_LOOP_FLAGS \ (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ @@ -2004,7 +1965,7 @@ index 1433f6b4607d..f963fde8e4fa 100644 static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) { -@@ -152,9 +152,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) +@@ -145,9 +145,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) /* We have work to do. */ local_irq_enable(); @@ -2023,10 +1984,10 @@ index 1433f6b4607d..f963fde8e4fa 100644 uprobe_notify_resume(regs); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S -index 0b56666e6039..1d8ee026c9c5 100644 +index edba8606b99a..4a3389535fc6 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S -@@ -271,8 +271,25 @@ END(ret_from_exception) +@@ -308,8 +308,25 @@ END(ret_from_exception) ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) need_resched: @@ -2041,11 +2002,11 @@ index 0b56666e6039..1d8ee026c9c5 100644 + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jne restore_all + -+ GET_THREAD_INFO(%ebp) -+ cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? ++ movl PER_CPU_VAR(current_task), %ebp ++ cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? + jnz restore_all + -+ testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp) ++ testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) + jz restore_all +test_int_off: +#endif @@ -2053,10 +2014,10 @@ index 0b56666e6039..1d8ee026c9c5 100644 jz restore_all call preempt_schedule_irq diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S -index 02fff3ebfb87..81ec3d016df0 100644 +index ef766a358b37..28401f826ab1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S -@@ -512,7 +512,23 @@ GLOBAL(retint_user) +@@ -546,7 +546,23 @@ GLOBAL(retint_user) bt $9, EFLAGS(%rsp) /* were interrupts off? */ jnc 1f 0: cmpl $0, PER_CPU_VAR(__preempt_count) @@ -2069,18 +2030,18 @@ index 02fff3ebfb87..81ec3d016df0 100644 + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jnz 1f + -+ GET_THREAD_INFO(%rcx) -+ cmpl $0, TI_preempt_lazy_count(%rcx) ++ movq PER_CPU_VAR(current_task), %rcx ++ cmpl $0, TASK_TI_preempt_lazy_count(%rcx) + jnz 1f + -+ bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx) ++ bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx) + jnc 1f +do_preempt_schedule_irq: +#endif call preempt_schedule_irq jmp 0b 1: -@@ -817,6 +833,7 @@ END(native_load_gs_index) +@@ -894,6 +910,7 @@ EXPORT_SYMBOL(native_load_gs_index) jmp 2b .previous @@ -2088,7 +2049,7 @@ index 02fff3ebfb87..81ec3d016df0 100644 /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) pushq %rbp -@@ -829,6 +846,7 @@ ENTRY(do_softirq_own_stack) +@@ -906,6 +923,7 @@ ENTRY(do_softirq_own_stack) decl PER_CPU_VAR(irq_count) ret END(do_softirq_own_stack) @@ -2149,12 +2110,12 @@ index 17f218645701..11bd1b7ee6eb 100644 #ifdef CONFIG_PREEMPT diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h -index dd1e7d6387ab..d59bedb28bab 100644 +index 8af22be0fe61..d1328789b759 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h -@@ -23,6 +23,19 @@ typedef struct { - unsigned long sig[_NSIG_WORDS]; - } sigset_t; +@@ -27,6 +27,19 @@ typedef struct { + #define SA_IA32_ABI 0x02000000u + #define SA_X32_ABI 0x01000000u +/* + * Because some traps use the IST stack, we must keep preemption @@ -2202,19 +2163,25 @@ index 58505f01962f..02fa39652cd6 100644 canary += tsc + (tsc << 32UL); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h -index 8b7c8d8e0852..631059ef61da 100644 +index ad6f5eb07a95..5ceb3a1c2b1a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h -@@ -57,6 +57,8 @@ struct thread_info { - __u32 flags; /* low level flags */ - __u32 status; /* thread synchronous flags */ - __u32 cpu; /* current CPU */ -+ int preempt_lazy_count; /* 0 => lazy preemptable +@@ -54,11 +54,14 @@ struct task_struct; + + struct thread_info { + unsigned long flags; /* low level flags */ ++ int preempt_lazy_count; /* 0 => lazy preemptable + <0 => BUG */ }; #define INIT_THREAD_INFO(tsk) \ -@@ -73,6 +75,10 @@ struct thread_info { + { \ + .flags = 0, \ ++ .preempt_lazy_count = 0, \ + } + + #define init_stack (init_thread_union.stack) +@@ -67,6 +70,10 @@ struct thread_info { #include @@ -2225,7 +2192,7 @@ index 8b7c8d8e0852..631059ef61da 100644 #endif /* -@@ -91,6 +97,7 @@ struct thread_info { +@@ -85,6 +92,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -2233,7 +2200,7 @@ index 8b7c8d8e0852..631059ef61da 100644 #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ -@@ -115,6 +122,7 @@ struct thread_info { +@@ -108,6 +116,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -2241,7 +2208,7 @@ index 8b7c8d8e0852..631059ef61da 100644 #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) -@@ -151,6 +159,8 @@ struct thread_info { +@@ -143,6 +152,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) @@ -2251,10 +2218,10 @@ index 8b7c8d8e0852..631059ef61da 100644 /* diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h -index cc44d926c17e..df278aa0f638 100644 +index 57ab86d94d64..35d25e27180f 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h -@@ -615,9 +615,9 @@ struct bau_control { +@@ -624,9 +624,9 @@ struct bau_control { cycles_t send_message; cycles_t period_end; cycles_t period_time; @@ -2267,7 +2234,7 @@ index cc44d926c17e..df278aa0f638 100644 /* tunables */ int max_concurr; int max_concurr_const; -@@ -776,15 +776,15 @@ static inline int atom_asr(short i, struct atomic_short *v) +@@ -815,15 +815,15 @@ static inline int atom_asr(short i, struct atomic_short *v) * to be lowered below the current 'v'. atomic_add_unless can only stop * on equal. */ @@ -2288,7 +2255,7 @@ index cc44d926c17e..df278aa0f638 100644 } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c -index fbd19444403f..e78f477a4ae3 100644 +index 931ced8ca345..167975ac8af7 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; @@ -2316,25 +2283,25 @@ index 48e6d84f173e..0b5a8b994f65 100644 return true; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c -index 2bd5c6ff7ee7..a2c317f5839b 100644 +index c62e015b126c..0cc71257fca6 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c -@@ -31,6 +31,7 @@ void common(void) { - BLANK(); - OFFSET(TI_flags, thread_info, flags); - OFFSET(TI_status, thread_info, status); -+ OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count); +@@ -36,6 +36,7 @@ void common(void) { BLANK(); + OFFSET(TASK_TI_flags, task_struct, thread_info.flags); ++ OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count); OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); -@@ -88,4 +89,5 @@ void common(void) { + + BLANK(); +@@ -91,4 +92,5 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c -index 79d8ec849468..accbf0e806d0 100644 +index a7fdf453d895..e3a0e969a66e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -41,6 +41,8 @@ @@ -2343,10 +2310,10 @@ index 79d8ec849468..accbf0e806d0 100644 #include +#include +#include + #include #include - #include -@@ -1291,7 +1293,7 @@ void mce_log_therm_throt_event(__u64 status) +@@ -1317,7 +1319,7 @@ void mce_log_therm_throt_event(__u64 status) static unsigned long check_interval = INITIAL_CHECK_INTERVAL; static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ @@ -2355,7 +2322,7 @@ index 79d8ec849468..accbf0e806d0 100644 static unsigned long mce_adjust_timer_default(unsigned long interval) { -@@ -1300,32 +1302,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) +@@ -1326,32 +1328,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; @@ -2394,7 +2361,7 @@ index 79d8ec849468..accbf0e806d0 100644 iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { -@@ -1348,7 +1336,7 @@ static void mce_timer_fn(unsigned long data) +@@ -1374,7 +1362,7 @@ static void mce_timer_fn(unsigned long data) done: __this_cpu_write(mce_next_interval, iv); @@ -2403,7 +2370,7 @@ index 79d8ec849468..accbf0e806d0 100644 } /* -@@ -1356,7 +1344,7 @@ static void mce_timer_fn(unsigned long data) +@@ -1382,7 +1370,7 @@ static void mce_timer_fn(unsigned long data) */ void mce_timer_kick(unsigned long interval) { @@ -2412,7 +2379,7 @@ index 79d8ec849468..accbf0e806d0 100644 unsigned long iv = __this_cpu_read(mce_next_interval); __restart_timer(t, interval); -@@ -1371,7 +1359,7 @@ static void mce_timer_delete_all(void) +@@ -1397,7 +1385,7 @@ static void mce_timer_delete_all(void) int cpu; for_each_online_cpu(cpu) @@ -2421,7 +2388,7 @@ index 79d8ec849468..accbf0e806d0 100644 } static void mce_do_trigger(struct work_struct *work) -@@ -1381,6 +1369,56 @@ static void mce_do_trigger(struct work_struct *work) +@@ -1407,6 +1395,56 @@ static void mce_do_trigger(struct work_struct *work) static DECLARE_WORK(mce_trigger_work, mce_do_trigger); @@ -2478,7 +2445,7 @@ index 79d8ec849468..accbf0e806d0 100644 /* * Notify the user(s) about new machine check events. * Can be called from interrupt context, but not from machine check/NMI -@@ -1388,19 +1426,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); +@@ -1414,19 +1452,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); */ int mce_notify_irq(void) { @@ -2499,7 +2466,7 @@ index 79d8ec849468..accbf0e806d0 100644 return 1; } return 0; -@@ -1717,7 +1744,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) +@@ -1732,7 +1759,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) } } @@ -2508,7 +2475,7 @@ index 79d8ec849468..accbf0e806d0 100644 { unsigned long iv = check_interval * HZ; -@@ -1726,16 +1753,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) +@@ -1741,16 +1768,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) per_cpu(mce_next_interval, cpu) = iv; @@ -2530,7 +2497,7 @@ index 79d8ec849468..accbf0e806d0 100644 mce_start_timer(cpu, t); } -@@ -2459,6 +2487,8 @@ static void mce_disable_cpu(void *h) +@@ -2475,6 +2503,8 @@ static void mce_disable_cpu(void *h) if (!mce_available(raw_cpu_ptr(&cpu_info))) return; @@ -2539,7 +2506,7 @@ index 79d8ec849468..accbf0e806d0 100644 if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); -@@ -2481,6 +2511,7 @@ static void mce_reenable_cpu(void *h) +@@ -2497,6 +2527,7 @@ static void mce_reenable_cpu(void *h) if (b->init) wrmsrl(msr_ops.ctl(i), b->ctl); } @@ -2547,7 +2514,7 @@ index 79d8ec849468..accbf0e806d0 100644 } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ -@@ -2488,7 +2519,6 @@ static int +@@ -2504,7 +2535,6 @@ static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -2555,7 +2522,7 @@ index 79d8ec849468..accbf0e806d0 100644 switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: -@@ -2508,11 +2538,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +@@ -2524,11 +2554,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); @@ -2567,7 +2534,7 @@ index 79d8ec849468..accbf0e806d0 100644 break; } -@@ -2551,6 +2579,10 @@ static __init int mcheck_init_device(void) +@@ -2567,6 +2595,10 @@ static __init int mcheck_init_device(void) goto err_out; } @@ -2578,68 +2545,6 @@ index 79d8ec849468..accbf0e806d0 100644 if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { err = -ENOMEM; goto err_out; -diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c -index 09675712eba8..eea7557b355d 100644 ---- a/arch/x86/kernel/dumpstack_32.c -+++ b/arch/x86/kernel/dumpstack_32.c -@@ -42,7 +42,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) - { -- const unsigned cpu = get_cpu(); -+ const unsigned cpu = get_cpu_light(); - int graph = 0; - u32 *prev_esp; - -@@ -84,7 +84,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, - break; - touch_nmi_watchdog(); - } -- put_cpu(); -+ put_cpu_light(); - } - EXPORT_SYMBOL(dump_trace); - -diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c -index 9ee4520ce83c..2cd610b68868 100644 ---- a/arch/x86/kernel/dumpstack_64.c -+++ b/arch/x86/kernel/dumpstack_64.c -@@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data) - { -- const unsigned cpu = get_cpu(); -+ const unsigned cpu = get_cpu_light(); - unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); - unsigned long dummy; - unsigned used = 0; -@@ -239,7 +239,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, - * This handles the process stack: - */ - bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); -- put_cpu(); -+ put_cpu_light(); - } - EXPORT_SYMBOL(dump_trace); - -@@ -253,7 +253,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - int cpu; - int i; - -- preempt_disable(); -+ migrate_disable(); - cpu = smp_processor_id(); - - irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); -@@ -299,7 +299,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - stack++; - touch_nmi_watchdog(); - } -- preempt_enable(); -+ migrate_enable(); - - pr_cont("\n"); - show_trace_log_lvl(task, regs, sp, bp, log_lvl); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1f38d9a4d9de..053bf3b2ef39 100644 --- a/arch/x86/kernel/irq_32.c @@ -2661,7 +2566,7 @@ index 1f38d9a4d9de..053bf3b2ef39 100644 bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c -index d86be29c38c7..b0e29d1a0571 100644 +index bd7be8efdc4c..b3b0a7f7b1ca 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -35,6 +35,7 @@ @@ -2672,7 +2577,7 @@ index d86be29c38c7..b0e29d1a0571 100644 #include #include -@@ -210,6 +211,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) +@@ -195,6 +196,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); @@ -2708,7 +2613,7 @@ index d86be29c38c7..b0e29d1a0571 100644 /* * switch_to(x,y) should switch tasks from x to y. -@@ -286,6 +316,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +@@ -271,6 +301,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); @@ -2718,10 +2623,10 @@ index d86be29c38c7..b0e29d1a0571 100644 * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c -index b62c85229711..d907b281a9d6 100644 +index 6f69340f9fa3..d47f204a0fbe 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c -@@ -1938,6 +1938,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) +@@ -1939,6 +1939,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); apic->lapic_timer.timer.function = apic_timer_fn; @@ -2730,10 +2635,10 @@ index b62c85229711..d907b281a9d6 100644 /* * APIC is created enabled. This will prevent kvm_lapic_set_base from diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 699f8726539a..24f30c86510c 100644 +index f3648c978d2f..d0d0901d1c56 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c -@@ -5865,6 +5865,13 @@ int kvm_arch_init(void *opaque) +@@ -5930,6 +5930,13 @@ int kvm_arch_init(void *opaque) goto out; } @@ -2832,10 +2737,10 @@ index ada98b39b8ad..585f6829653b 100644 kmap_atomic_idx_pop(); } diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c -index fdb4d42b4ce5..8ab90fbecff0 100644 +index 9e42842e924a..5398f97172f9 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c -@@ -729,9 +729,9 @@ static void destination_plugged(struct bau_desc *bau_desc, +@@ -748,9 +748,9 @@ static void destination_plugged(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); @@ -2847,7 +2752,7 @@ index fdb4d42b4ce5..8ab90fbecff0 100644 end_uvhub_quiesce(hmaster); -@@ -751,9 +751,9 @@ static void destination_timeout(struct bau_desc *bau_desc, +@@ -770,9 +770,9 @@ static void destination_timeout(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); @@ -2859,7 +2764,7 @@ index fdb4d42b4ce5..8ab90fbecff0 100644 end_uvhub_quiesce(hmaster); -@@ -774,7 +774,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) +@@ -793,7 +793,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) cycles_t tm1; hmaster = bcp->uvhub_master; @@ -2868,7 +2773,7 @@ index fdb4d42b4ce5..8ab90fbecff0 100644 if (!bcp->baudisabled) { stat->s_bau_disabled++; tm1 = get_cycles(); -@@ -787,7 +787,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) +@@ -806,7 +806,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) } } } @@ -2877,7 +2782,7 @@ index fdb4d42b4ce5..8ab90fbecff0 100644 } static void count_max_concurr(int stat, struct bau_control *bcp, -@@ -850,7 +850,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, +@@ -869,7 +869,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, */ static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) { @@ -2886,7 +2791,7 @@ index fdb4d42b4ce5..8ab90fbecff0 100644 atomic_t *v; v = &hmaster->active_descriptor_count; -@@ -983,7 +983,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) +@@ -1002,7 +1002,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) struct bau_control *hmaster; hmaster = bcp->uvhub_master; @@ -2895,7 +2800,7 @@ index fdb4d42b4ce5..8ab90fbecff0 100644 if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { stat->s_bau_reenabled++; for_each_present_cpu(tcpu) { -@@ -995,10 +995,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) +@@ -1014,10 +1014,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) tbcp->period_giveups = 0; } } @@ -2908,10 +2813,10 @@ index fdb4d42b4ce5..8ab90fbecff0 100644 return -1; } -@@ -1916,9 +1916,9 @@ static void __init init_per_cpu_tunables(void) +@@ -1940,9 +1940,9 @@ static void __init init_per_cpu_tunables(void) bcp->cong_reps = congested_reps; - bcp->disabled_period = sec_2_cycles(disabled_period); - bcp->giveup_limit = giveup_limit; + bcp->disabled_period = sec_2_cycles(disabled_period); + bcp->giveup_limit = giveup_limit; - spin_lock_init(&bcp->queue_lock); - spin_lock_init(&bcp->uvhub_lock); - spin_lock_init(&bcp->disable_lock); @@ -3006,7 +2911,7 @@ index b333fc45f9ec..8b85916e6986 100644 /* diff --git a/block/blk-core.c b/block/blk-core.c -index 36c7ac328d8c..caa5fc1be2a2 100644 +index 14d7c0740dc0..dfd905bea77c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq) @@ -3055,7 +2960,7 @@ index 36c7ac328d8c..caa5fc1be2a2 100644 /* * Init percpu_ref in atomic mode so that it's faster to shutdown. -@@ -3171,7 +3174,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, +@@ -3177,7 +3180,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, blk_run_queue_async(q); else __blk_run_queue(q); @@ -3064,7 +2969,7 @@ index 36c7ac328d8c..caa5fc1be2a2 100644 } static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) -@@ -3219,7 +3222,6 @@ EXPORT_SYMBOL(blk_check_plugged); +@@ -3225,7 +3228,6 @@ EXPORT_SYMBOL(blk_check_plugged); void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) { struct request_queue *q; @@ -3072,7 +2977,7 @@ index 36c7ac328d8c..caa5fc1be2a2 100644 struct request *rq; LIST_HEAD(list); unsigned int depth; -@@ -3239,11 +3241,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) +@@ -3245,11 +3247,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) q = NULL; depth = 0; @@ -3084,7 +2989,7 @@ index 36c7ac328d8c..caa5fc1be2a2 100644 while (!list_empty(&list)) { rq = list_entry_rq(list.next); list_del_init(&rq->queuelist); -@@ -3256,7 +3253,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) +@@ -3262,7 +3259,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) queue_unplugged(q, depth, from_schedule); q = rq->q; depth = 0; @@ -3093,7 +2998,7 @@ index 36c7ac328d8c..caa5fc1be2a2 100644 } /* -@@ -3283,8 +3280,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) +@@ -3289,8 +3286,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) */ if (q) queue_unplugged(q, depth, from_schedule); @@ -3132,66 +3037,11 @@ index 381cb50a673c..dc8785233d94 100644 goto retry; } } -diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c -index bb3ed488f7b5..628c6c13c482 100644 ---- a/block/blk-mq-cpu.c -+++ b/block/blk-mq-cpu.c -@@ -16,7 +16,7 @@ - #include "blk-mq.h" - - static LIST_HEAD(blk_mq_cpu_notify_list); --static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock); -+static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); - - static int blk_mq_main_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -@@ -25,7 +25,10 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, - struct blk_mq_cpu_notifier *notify; - int ret = NOTIFY_OK; - -- raw_spin_lock(&blk_mq_cpu_notify_lock); -+ if (action != CPU_POST_DEAD) -+ return NOTIFY_OK; -+ -+ spin_lock(&blk_mq_cpu_notify_lock); - - list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) { - ret = notify->notify(notify->data, action, cpu); -@@ -33,7 +36,7 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, - break; - } - -- raw_spin_unlock(&blk_mq_cpu_notify_lock); -+ spin_unlock(&blk_mq_cpu_notify_lock); - return ret; - } - -@@ -41,16 +44,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) - { - BUG_ON(!notifier->notify); - -- raw_spin_lock(&blk_mq_cpu_notify_lock); -+ spin_lock(&blk_mq_cpu_notify_lock); - list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); -- raw_spin_unlock(&blk_mq_cpu_notify_lock); -+ spin_unlock(&blk_mq_cpu_notify_lock); - } - - void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) - { -- raw_spin_lock(&blk_mq_cpu_notify_lock); -+ spin_lock(&blk_mq_cpu_notify_lock); - list_del(¬ifier->list); -- raw_spin_unlock(&blk_mq_cpu_notify_lock); -+ spin_unlock(&blk_mq_cpu_notify_lock); - } - - void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, diff --git a/block/blk-mq.c b/block/blk-mq.c -index c207fa9870eb..ac71b0455e9f 100644 +index ad459e4e8071..1bfacb205bfa 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); +@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); static void blk_mq_freeze_queue_wait(struct request_queue *q) { @@ -3200,7 +3050,7 @@ index c207fa9870eb..ac71b0455e9f 100644 } /* -@@ -130,7 +130,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) +@@ -110,7 +110,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) WARN_ON_ONCE(freeze_depth < 0); if (!freeze_depth) { percpu_ref_reinit(&q->q_usage_counter); @@ -3209,7 +3059,7 @@ index c207fa9870eb..ac71b0455e9f 100644 } } EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); -@@ -149,7 +149,7 @@ void blk_mq_wake_waiters(struct request_queue *q) +@@ -129,7 +129,7 @@ void blk_mq_wake_waiters(struct request_queue *q) * dying, we need to ensure that processes currently waiting on * the queue are notified as well. */ @@ -3218,7 +3068,7 @@ index c207fa9870eb..ac71b0455e9f 100644 } bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) -@@ -197,6 +197,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, +@@ -177,6 +177,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, rq->resid_len = 0; rq->sense = NULL; @@ -3228,7 +3078,7 @@ index c207fa9870eb..ac71b0455e9f 100644 INIT_LIST_HEAD(&rq->timeout_list); rq->timeout = 0; -@@ -379,6 +382,17 @@ void blk_mq_end_request(struct request *rq, int error) +@@ -345,6 +348,17 @@ void blk_mq_end_request(struct request *rq, int error) } EXPORT_SYMBOL(blk_mq_end_request); @@ -3246,7 +3096,7 @@ index c207fa9870eb..ac71b0455e9f 100644 static void __blk_mq_complete_request_remote(void *data) { struct request *rq = data; -@@ -386,6 +400,8 @@ static void __blk_mq_complete_request_remote(void *data) +@@ -352,6 +366,8 @@ static void __blk_mq_complete_request_remote(void *data) rq->q->softirq_done_fn(rq); } @@ -3255,7 +3105,7 @@ index c207fa9870eb..ac71b0455e9f 100644 static void blk_mq_ipi_complete_request(struct request *rq) { struct blk_mq_ctx *ctx = rq->mq_ctx; -@@ -397,19 +413,23 @@ static void blk_mq_ipi_complete_request(struct request *rq) +@@ -363,19 +379,23 @@ static void blk_mq_ipi_complete_request(struct request *rq) return; } @@ -3281,10 +3131,10 @@ index c207fa9870eb..ac71b0455e9f 100644 } static void __blk_mq_complete_request(struct request *rq) -@@ -938,14 +958,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +@@ -917,14 +937,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) return; - if (!async) { + if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { - int cpu = get_cpu(); + int cpu = get_cpu_light(); if (cpumask_test_cpu(cpu, hctx->cpumask)) { @@ -3298,21 +3148,12 @@ index c207fa9870eb..ac71b0455e9f 100644 + put_cpu_light(); } - kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), -@@ -1667,7 +1687,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, - { - struct blk_mq_hw_ctx *hctx = data; - -- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) -+ if (action == CPU_POST_DEAD) - return blk_mq_hctx_cpu_offline(hctx, cpu); - - /* + kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); diff --git a/block/blk-mq.h b/block/blk-mq.h -index 9087b11037b7..0401d76e827c 100644 +index e5d25249028c..1e846b842eab 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h -@@ -86,12 +86,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, +@@ -72,12 +72,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, */ static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) { @@ -3328,7 +3169,7 @@ index 9087b11037b7..0401d76e827c 100644 struct blk_mq_alloc_data { diff --git a/block/blk-softirq.c b/block/blk-softirq.c -index 53b1737e978d..81c3c0a62edf 100644 +index 06cf9807f49a..c40342643ca0 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -51,6 +51,7 @@ static void trigger_softirq(void *data) @@ -3339,15 +3180,15 @@ index 53b1737e978d..81c3c0a62edf 100644 } /* -@@ -93,6 +94,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, - this_cpu_ptr(&blk_cpu_done)); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_enable(); -+ preempt_check_resched_rt(); - } +@@ -89,6 +90,7 @@ static int blk_softirq_cpu_dead(unsigned int cpu) + this_cpu_ptr(&blk_cpu_done)); + raise_softirq_irqoff(BLOCK_SOFTIRQ); + local_irq_enable(); ++ preempt_check_resched_rt(); - return NOTIFY_OK; -@@ -150,6 +152,7 @@ void __blk_complete_request(struct request *req) + return 0; + } +@@ -141,6 +143,7 @@ void __blk_complete_request(struct request *req) goto do_local; local_irq_restore(flags); @@ -3442,7 +3283,7 @@ index 7eefcdb00227..0ecc7f5a2f40 100644 #endif /* _CRYPTO_INTERNAL_H */ diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h -index fded776236e2..bda523219d50 100644 +index 750fa824d42c..441edf51484a 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); @@ -3618,7 +3459,7 @@ index 478cac2ed465..f7a6efdc3285 100644 /* dynamic per-device compression frontend */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c -index 04365b17ee67..b4a0577a4dbc 100644 +index 5497f7fc44d0..3826072a23c5 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -519,6 +519,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) @@ -3793,7 +3634,7 @@ index 74fcf10da374..fd4020c99b9e 100644 + #endif diff --git a/drivers/char/random.c b/drivers/char/random.c -index 3efb3bf0ab83..c894d2e266f3 100644 +index d6876d506220..0c60b1e54579 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1028,8 +1028,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) @@ -4009,7 +3850,7 @@ index 4da2af9694a2..5b6f57f500b8 100644 goto err_unregister_clksrc; diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c -index 7f0f5b26d8c5..1553f19e73e7 100644 +index 6555821bbdae..93288849b2bd 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -46,6 +46,7 @@ struct pit_data { @@ -4050,7 +3891,7 @@ index 7f0f5b26d8c5..1553f19e73e7 100644 /* update clocksource counter */ data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); -@@ -211,15 +226,6 @@ static int __init at91sam926x_pit_common_init(struct pit_data *data) +@@ -230,15 +245,6 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) return ret; } @@ -4198,10 +4039,10 @@ index adbd1de1cea5..1fac5074f2cf 100644 This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. Support for K10 and newer processors is now in acpi-cpufreq. diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c -index b35e5b6475b2..ce60807fb1d4 100644 +index a218c2e395e7..5273d8f1d5dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c -@@ -1302,7 +1302,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, +@@ -1537,7 +1537,9 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; @@ -4212,7 +4053,7 @@ index b35e5b6475b2..ce60807fb1d4 100644 i915_gem_execbuffer_move_to_active(vmas, params->request); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c -index 6f10b421487b..dd3a9a6ace11 100644 +index 1c237d02f30b..9e9b4404c0d7 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) @@ -4225,7 +4066,7 @@ index 6f10b421487b..dd3a9a6ace11 100644 #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c -index 1c2aec392412..1d85c0c791f1 100644 +index 3fc286cd1157..252a1117b103 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, @@ -4245,10 +4086,10 @@ index 1c2aec392412..1d85c0c791f1 100644 spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c -index e9a64fba6333..2aac27b13d86 100644 +index c9e83f39ec0a..6b0caae38076 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c -@@ -11647,7 +11647,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) +@@ -12131,7 +12131,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_flip_work *work; @@ -4258,18 +4099,18 @@ index e9a64fba6333..2aac27b13d86 100644 if (crtc == NULL) return; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c -index 4178849631ad..0eb939c92544 100644 +index dbed12c484c9..5c540b78e8b5 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c -@@ -38,6 +38,7 @@ +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include #include "intel_drv.h" + #include "intel_frontbuffer.h" #include - #include "i915_drv.h" -+#include - - static bool - format_is_yuv(uint32_t format) -@@ -64,6 +65,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, +@@ -65,6 +66,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } @@ -4278,7 +4119,7 @@ index 4178849631ad..0eb939c92544 100644 /** * intel_pipe_update_start() - start update of a set of display registers * @crtc: the crtc of which the registers are going to be updated -@@ -94,7 +97,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) +@@ -95,7 +98,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); max = vblank_start - 1; @@ -4287,7 +4128,7 @@ index 4178849631ad..0eb939c92544 100644 if (min <= 0 || max <= 0) return; -@@ -124,11 +127,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) +@@ -125,11 +128,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) break; } @@ -4301,7 +4142,7 @@ index 4178849631ad..0eb939c92544 100644 } finish_wait(wq, &wait); -@@ -180,7 +183,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work +@@ -181,7 +184,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work crtc->base.state->event = NULL; } @@ -4311,7 +4152,7 @@ index 4178849631ad..0eb939c92544 100644 if (crtc->debug.start_vbl_count && crtc->debug.start_vbl_count != end_vbl_count) { diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c -index 283d2841ba58..d01f6ed1977e 100644 +index 192b2d3a79cb..d5372a207326 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -23,7 +23,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) @@ -4324,10 +4165,10 @@ index 283d2841ba58..d01f6ed1977e 100644 #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c -index c3206fb8f4cf..6e2423186e2a 100644 +index cdb8cb568c15..b6d7fd964cbc 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c -@@ -1869,6 +1869,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, +@@ -1845,6 +1845,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, struct radeon_device *rdev = dev->dev_private; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ @@ -4335,7 +4176,7 @@ index c3206fb8f4cf..6e2423186e2a 100644 /* Get optional system timestamp before query. */ if (stime) -@@ -1961,6 +1962,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, +@@ -1937,6 +1938,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ @@ -4344,7 +4185,7 @@ index c3206fb8f4cf..6e2423186e2a 100644 /* Decode into vertical and horizontal scanout position. */ *vpos = position & 0x1fff; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c -index e82f7e1c217c..b57d917b6ab7 100644 +index 0276d2ef06ee..8868045eabde 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -761,6 +761,8 @@ static void vmbus_isr(void) @@ -4530,10 +4371,10 @@ index a716693417a3..be0568c722d6 100644 ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c -index d3394b6add24..506bfba6ec9f 100644 +index fddff403d5d2..cca1bb4fbfe3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c -@@ -897,7 +897,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) +@@ -902,7 +902,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ipoib_dbg_mcast(priv, "restarting multicast task\n"); @@ -4542,7 +4383,7 @@ index d3394b6add24..506bfba6ec9f 100644 netif_addr_lock(dev); spin_lock(&priv->lock); -@@ -979,7 +979,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) +@@ -984,7 +984,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) spin_unlock(&priv->lock); netif_addr_unlock(dev); @@ -4601,10 +4442,10 @@ index 4a2a9e370be7..e970d9afd179 100644 if (t2 - t1 < tx) tx = t2 - t1; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c -index 96de97a46079..a6ec875d941b 100644 +index 11a13b5be73a..baaed0ac274b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c -@@ -1832,10 +1832,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, +@@ -1923,10 +1923,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, int ret; /* @@ -4618,7 +4459,7 @@ index 96de97a46079..a6ec875d941b 100644 /* lock domain */ spin_lock(&domain->lock); -@@ -2003,10 +2003,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) +@@ -2094,10 +2094,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) struct protection_domain *domain; /* @@ -4633,7 +4474,7 @@ index 96de97a46079..a6ec875d941b 100644 if (WARN_ON(!dev_data->domain)) return; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c -index ebb5bf3ddbd9..598f5df45f6b 100644 +index d82637ab09fd..ebe41d30c093 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -479,7 +479,7 @@ struct deferred_flush_data { @@ -4645,7 +4486,7 @@ index ebb5bf3ddbd9..598f5df45f6b 100644 /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; -@@ -3626,10 +3626,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, +@@ -3715,10 +3715,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, struct intel_iommu *iommu; struct deferred_flush_entry *entry; struct deferred_flush_data *flush_data; @@ -4657,7 +4498,7 @@ index ebb5bf3ddbd9..598f5df45f6b 100644 /* Flush all CPUs' entries to avoid deferring too much. If * this becomes a bottleneck, can just flush us, and rely on -@@ -3662,8 +3660,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, +@@ -3751,8 +3749,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, } flush_data->size++; spin_unlock_irqrestore(&flush_data->lock, flags); @@ -4749,20 +4590,20 @@ index 4d200883c505..98b64ed5cb81 100644 Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c -index 5da86c8b6545..2aa092f2977e 100644 +index 31a89c8832c0..c3a7e8a9f761 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c -@@ -811,7 +811,7 @@ static void dm_old_request_fn(struct request_queue *q) +@@ -838,7 +838,7 @@ static void dm_old_request_fn(struct request_queue *q) /* Establish tio->ti before queuing work (map_tio_request) */ tio->ti = ti; - queue_kthread_work(&md->kworker, &tio->work); + kthread_queue_work(&md->kworker, &tio->work); - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c -index ee7fc3701700..ae59c9e13911 100644 +index cce6057b9aca..fa2c4de32a64 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1928,8 +1928,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) @@ -4786,1358 +4627,72 @@ index ee7fc3701700..ae59c9e13911 100644 } static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, -@@ -6438,6 +6440,7 @@ static int raid5_alloc_percpu(struct r5conf *conf) - __func__, cpu); - break; - } -+ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); +@@ -6391,6 +6393,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) + __func__, cpu); + return -ENOMEM; } - put_online_cpus(); ++ spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); + return 0; + } +@@ -6401,7 +6404,6 @@ static int raid5_alloc_percpu(struct r5conf *conf) + conf->percpu = alloc_percpu(struct raid5_percpu); + if (!conf->percpu) + return -ENOMEM; +- + err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); + if (!err) { + conf->scribble_disks = max(conf->raid_disks, diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h -index 517d4b68a1be..efe91887ecd7 100644 +index 57ec49f0839e..0739604990b7 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -504,6 +504,7 @@ struct r5conf { int recovery_disabled; /* per cpu variables */ struct raid5_percpu { -+ spinlock_t lock; /* Protection for -RT */ - struct page *spare_page; /* Used when checking P/Q in raid6 */ - struct flex_array *scribble; /* space for constructing buffer - * lists and performing address -diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig -index d00252828966..9faab404faac 100644 ---- a/drivers/misc/Kconfig -+++ b/drivers/misc/Kconfig -@@ -54,6 +54,7 @@ config AD525X_DPOT_SPI - config ATMEL_TCLIB - bool "Atmel AT32/AT91 Timer/Counter Library" - depends on (AVR32 || ARCH_AT91) -+ default y if PREEMPT_RT_FULL - help - Select this if you want a library to allocate the Timer/Counter - blocks found on many Atmel processors. This facilitates using -@@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC - are combined to make a single 32-bit timer. - - When GENERIC_CLOCKEVENTS is defined, the third timer channel -- may be used as a clock event device supporting oneshot mode -- (delays of up to two seconds) based on the 32 KiHz clock. -+ may be used as a clock event device supporting oneshot mode. - - config ATMEL_TCB_CLKSRC_BLOCK - int -@@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK - TC can be used for other purposes, such as PWM generation and - interval timing. - -+config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK -+ bool "TC Block use 32 KiHz clock" -+ depends on ATMEL_TCB_CLKSRC -+ default y if !PREEMPT_RT_FULL -+ help -+ Select this to use 32 KiHz base clock rate as TC block clock -+ source for clock events. -+ -+ - config DUMMY_IRQ - tristate "Dummy IRQ handler" - default n -@@ -114,6 +123,35 @@ config IBM_ASM - for information on the specific driver level and support statement - for your IBM server. - -+config HWLAT_DETECTOR -+ tristate "Testing module to detect hardware-induced latencies" -+ depends on DEBUG_FS -+ depends on RING_BUFFER -+ default m -+ ---help--- -+ A simple hardware latency detector. Use this module to detect -+ large latencies introduced by the behavior of the underlying -+ system firmware external to Linux. We do this using periodic -+ use of stop_machine to grab all available CPUs and measure -+ for unexplainable gaps in the CPU timestamp counter(s). By -+ default, the module is not enabled until the "enable" file -+ within the "hwlat_detector" debugfs directory is toggled. -+ -+ This module is often used to detect SMI (System Management -+ Interrupts) on x86 systems, though is not x86 specific. To -+ this end, we default to using a sample window of 1 second, -+ during which we will sample for 0.5 seconds. If an SMI or -+ similar event occurs during that time, it is recorded -+ into an 8K samples global ring buffer until retreived. -+ -+ WARNING: This software should never be enabled (it can be built -+ but should not be turned on after it is loaded) in a production -+ environment where high latencies are a concern since the -+ sampling mechanism actually introduces latencies for -+ regular tasks while the CPU(s) are being held. -+ -+ If unsure, say N -+ - config PHANTOM - tristate "Sensable PHANToM (PCI)" - depends on PCI -diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile -index fb32516ddfe2..8643df9af3c4 100644 ---- a/drivers/misc/Makefile -+++ b/drivers/misc/Makefile -@@ -38,6 +38,7 @@ obj-$(CONFIG_C2PORT) += c2port/ - obj-$(CONFIG_HMC6352) += hmc6352.o - obj-y += eeprom/ - obj-y += cb710/ -+obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o - obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o - obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o - obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o -diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c -new file mode 100644 -index 000000000000..52f5ad5fd9c0 ---- /dev/null -+++ b/drivers/misc/hwlat_detector.c -@@ -0,0 +1,1240 @@ -+/* -+ * hwlat_detector.c - A simple Hardware Latency detector. -+ * -+ * Use this module to detect large system latencies induced by the behavior of -+ * certain underlying system hardware or firmware, independent of Linux itself. -+ * The code was developed originally to detect the presence of SMIs on Intel -+ * and AMD systems, although there is no dependency upon x86 herein. -+ * -+ * The classical example usage of this module is in detecting the presence of -+ * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a -+ * somewhat special form of hardware interrupt spawned from earlier CPU debug -+ * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge -+ * LPC (or other device) to generate a special interrupt under certain -+ * circumstances, for example, upon expiration of a special SMI timer device, -+ * due to certain external thermal readings, on certain I/O address accesses, -+ * and other situations. An SMI hits a special CPU pin, triggers a special -+ * SMI mode (complete with special memory map), and the OS is unaware. -+ * -+ * Although certain hardware-inducing latencies are necessary (for example, -+ * a modern system often requires an SMI handler for correct thermal control -+ * and remote management) they can wreak havoc upon any OS-level performance -+ * guarantees toward low-latency, especially when the OS is not even made -+ * aware of the presence of these interrupts. For this reason, we need a -+ * somewhat brute force mechanism to detect these interrupts. In this case, -+ * we do it by hogging all of the CPU(s) for configurable timer intervals, -+ * sampling the built-in CPU timer, looking for discontiguous readings. -+ * -+ * WARNING: This implementation necessarily introduces latencies. Therefore, -+ * you should NEVER use this module in a production environment -+ * requiring any kind of low-latency performance guarantee(s). -+ * -+ * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. -+ * -+ * Includes useful feedback from Clark Williams -+ * -+ * This file is licensed under the terms of the GNU General Public -+ * License version 2. This program is licensed "as is" without any -+ * warranty of any kind, whether express or implied. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ -+#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ -+#define U64STR_SIZE 22 /* 20 digits max */ -+ -+#define VERSION "1.0.0" -+#define BANNER "hwlat_detector: " -+#define DRVNAME "hwlat_detector" -+#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ -+#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ -+#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ -+ -+/* Module metadata */ -+ -+MODULE_LICENSE("GPL"); -+MODULE_AUTHOR("Jon Masters "); -+MODULE_DESCRIPTION("A simple hardware latency detector"); -+MODULE_VERSION(VERSION); -+ -+/* Module parameters */ -+ -+static int debug; -+static int enabled; -+static int threshold; -+ -+module_param(debug, int, 0); /* enable debug */ -+module_param(enabled, int, 0); /* enable detector */ -+module_param(threshold, int, 0); /* latency threshold */ -+ -+/* Buffering and sampling */ -+ -+static struct ring_buffer *ring_buffer; /* sample buffer */ -+static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ -+static unsigned long buf_size = BUF_SIZE_DEFAULT; -+static struct task_struct *kthread; /* sampling thread */ -+ -+/* DebugFS filesystem entries */ -+ -+static struct dentry *debug_dir; /* debugfs directory */ -+static struct dentry *debug_max; /* maximum TSC delta */ -+static struct dentry *debug_count; /* total detect count */ -+static struct dentry *debug_sample_width; /* sample width us */ -+static struct dentry *debug_sample_window; /* sample window us */ -+static struct dentry *debug_sample; /* raw samples us */ -+static struct dentry *debug_threshold; /* threshold us */ -+static struct dentry *debug_enable; /* enable/disable */ -+ -+/* Individual samples and global state */ -+ -+struct sample; /* latency sample */ -+struct data; /* Global state */ -+ -+/* Sampling functions */ -+static int __buffer_add_sample(struct sample *sample); -+static struct sample *buffer_get_sample(struct sample *sample); -+ -+/* Threading and state */ -+static int kthread_fn(void *unused); -+static int start_kthread(void); -+static int stop_kthread(void); -+static void __reset_stats(void); -+static int init_stats(void); -+ -+/* Debugfs interface */ -+static ssize_t simple_data_read(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos, const u64 *entry); -+static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, -+ size_t cnt, loff_t *ppos, u64 *entry); -+static int debug_sample_fopen(struct inode *inode, struct file *filp); -+static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos); -+static int debug_sample_release(struct inode *inode, struct file *filp); -+static int debug_enable_fopen(struct inode *inode, struct file *filp); -+static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos); -+static ssize_t debug_enable_fwrite(struct file *file, -+ const char __user *user_buffer, -+ size_t user_size, loff_t *offset); -+ -+/* Initialization functions */ -+static int init_debugfs(void); -+static void free_debugfs(void); -+static int detector_init(void); -+static void detector_exit(void); -+ -+/* Individual latency samples are stored here when detected and packed into -+ * the ring_buffer circular buffer, where they are overwritten when -+ * more than buf_size/sizeof(sample) samples are received. */ -+struct sample { -+ u64 seqnum; /* unique sequence */ -+ u64 duration; /* ktime delta */ -+ u64 outer_duration; /* ktime delta (outer loop) */ -+ struct timespec timestamp; /* wall time */ -+ unsigned long lost; -+}; -+ -+/* keep the global state somewhere. */ -+static struct data { -+ -+ struct mutex lock; /* protect changes */ -+ -+ u64 count; /* total since reset */ -+ u64 max_sample; /* max hardware latency */ -+ u64 threshold; /* sample threshold level */ -+ -+ u64 sample_window; /* total sampling window (on+off) */ -+ u64 sample_width; /* active sampling portion of window */ -+ -+ atomic_t sample_open; /* whether the sample file is open */ -+ -+ wait_queue_head_t wq; /* waitqeue for new sample values */ -+ -+} data; -+ -+/** -+ * __buffer_add_sample - add a new latency sample recording to the ring buffer -+ * @sample: The new latency sample value -+ * -+ * This receives a new latency sample and records it in a global ring buffer. -+ * No additional locking is used in this case. -+ */ -+static int __buffer_add_sample(struct sample *sample) -+{ -+ return ring_buffer_write(ring_buffer, -+ sizeof(struct sample), sample); -+} -+ -+/** -+ * buffer_get_sample - remove a hardware latency sample from the ring buffer -+ * @sample: Pre-allocated storage for the sample -+ * -+ * This retrieves a hardware latency sample from the global circular buffer -+ */ -+static struct sample *buffer_get_sample(struct sample *sample) -+{ -+ struct ring_buffer_event *e = NULL; -+ struct sample *s = NULL; -+ unsigned int cpu = 0; -+ -+ if (!sample) -+ return NULL; -+ -+ mutex_lock(&ring_buffer_mutex); -+ for_each_online_cpu(cpu) { -+ e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); -+ if (e) -+ break; -+ } -+ -+ if (e) { -+ s = ring_buffer_event_data(e); -+ memcpy(sample, s, sizeof(struct sample)); -+ } else -+ sample = NULL; -+ mutex_unlock(&ring_buffer_mutex); -+ -+ return sample; -+} -+ -+#ifndef CONFIG_TRACING -+#define time_type ktime_t -+#define time_get() ktime_get() -+#define time_to_us(x) ktime_to_us(x) -+#define time_sub(a, b) ktime_sub(a, b) -+#define init_time(a, b) (a).tv64 = b -+#define time_u64(a) ((a).tv64) -+#else -+#define time_type u64 -+#define time_get() trace_clock_local() -+#define time_to_us(x) div_u64(x, 1000) -+#define time_sub(a, b) ((a) - (b)) -+#define init_time(a, b) (a = b) -+#define time_u64(a) a -+#endif -+/** -+ * get_sample - sample the CPU TSC and look for likely hardware latencies -+ * -+ * Used to repeatedly capture the CPU TSC (or similar), looking for potential -+ * hardware-induced latency. Called with interrupts disabled and with -+ * data.lock held. -+ */ -+static int get_sample(void) -+{ -+ time_type start, t1, t2, last_t2; -+ s64 diff, total = 0; -+ u64 sample = 0; -+ u64 outer_sample = 0; -+ int ret = -1; -+ -+ init_time(last_t2, 0); -+ start = time_get(); /* start timestamp */ -+ -+ do { -+ -+ t1 = time_get(); /* we'll look for a discontinuity */ -+ t2 = time_get(); -+ -+ if (time_u64(last_t2)) { -+ /* Check the delta from outer loop (t2 to next t1) */ -+ diff = time_to_us(time_sub(t1, last_t2)); -+ /* This shouldn't happen */ -+ if (diff < 0) { -+ pr_err(BANNER "time running backwards\n"); -+ goto out; -+ } -+ if (diff > outer_sample) -+ outer_sample = diff; -+ } -+ last_t2 = t2; -+ -+ total = time_to_us(time_sub(t2, start)); /* sample width */ -+ -+ /* This checks the inner loop (t1 to t2) */ -+ diff = time_to_us(time_sub(t2, t1)); /* current diff */ -+ -+ /* This shouldn't happen */ -+ if (diff < 0) { -+ pr_err(BANNER "time running backwards\n"); -+ goto out; -+ } -+ -+ if (diff > sample) -+ sample = diff; /* only want highest value */ -+ -+ } while (total <= data.sample_width); -+ -+ ret = 0; -+ -+ /* If we exceed the threshold value, we have found a hardware latency */ -+ if (sample > data.threshold || outer_sample > data.threshold) { -+ struct sample s; -+ -+ ret = 1; -+ -+ data.count++; -+ s.seqnum = data.count; -+ s.duration = sample; -+ s.outer_duration = outer_sample; -+ s.timestamp = CURRENT_TIME; -+ __buffer_add_sample(&s); -+ -+ /* Keep a running maximum ever recorded hardware latency */ -+ if (sample > data.max_sample) -+ data.max_sample = sample; -+ } -+ -+out: -+ return ret; -+} -+ -+/* -+ * kthread_fn - The CPU time sampling/hardware latency detection kernel thread -+ * @unused: A required part of the kthread API. -+ * -+ * Used to periodically sample the CPU TSC via a call to get_sample. We -+ * disable interrupts, which does (intentionally) introduce latency since we -+ * need to ensure nothing else might be running (and thus pre-empting). -+ * Obviously this should never be used in production environments. -+ * -+ * Currently this runs on which ever CPU it was scheduled on, but most -+ * real-worald hardware latency situations occur across several CPUs, -+ * but we might later generalize this if we find there are any actualy -+ * systems with alternate SMI delivery or other hardware latencies. -+ */ -+static int kthread_fn(void *unused) -+{ -+ int ret; -+ u64 interval; -+ -+ while (!kthread_should_stop()) { -+ -+ mutex_lock(&data.lock); -+ -+ local_irq_disable(); -+ ret = get_sample(); -+ local_irq_enable(); -+ -+ if (ret > 0) -+ wake_up(&data.wq); /* wake up reader(s) */ -+ -+ interval = data.sample_window - data.sample_width; -+ do_div(interval, USEC_PER_MSEC); /* modifies interval value */ -+ -+ mutex_unlock(&data.lock); -+ -+ if (msleep_interruptible(interval)) -+ break; -+ } -+ -+ return 0; -+} -+ -+/** -+ * start_kthread - Kick off the hardware latency sampling/detector kthread -+ * -+ * This starts a kernel thread that will sit and sample the CPU timestamp -+ * counter (TSC or similar) and look for potential hardware latencies. -+ */ -+static int start_kthread(void) -+{ -+ kthread = kthread_run(kthread_fn, NULL, -+ DRVNAME); -+ if (IS_ERR(kthread)) { -+ pr_err(BANNER "could not start sampling thread\n"); -+ enabled = 0; -+ return -ENOMEM; -+ } -+ -+ return 0; -+} -+ -+/** -+ * stop_kthread - Inform the hardware latency samping/detector kthread to stop -+ * -+ * This kicks the running hardware latency sampling/detector kernel thread and -+ * tells it to stop sampling now. Use this on unload and at system shutdown. -+ */ -+static int stop_kthread(void) -+{ -+ int ret; -+ -+ ret = kthread_stop(kthread); -+ -+ return ret; -+} -+ -+/** -+ * __reset_stats - Reset statistics for the hardware latency detector -+ * -+ * We use data to store various statistics and global state. We call this -+ * function in order to reset those when "enable" is toggled on or off, and -+ * also at initialization. Should be called with data.lock held. -+ */ -+static void __reset_stats(void) -+{ -+ data.count = 0; -+ data.max_sample = 0; -+ ring_buffer_reset(ring_buffer); /* flush out old sample entries */ -+} -+ -+/** -+ * init_stats - Setup global state statistics for the hardware latency detector -+ * -+ * We use data to store various statistics and global state. We also use -+ * a global ring buffer (ring_buffer) to keep raw samples of detected hardware -+ * induced system latencies. This function initializes these structures and -+ * allocates the global ring buffer also. -+ */ -+static int init_stats(void) -+{ -+ int ret = -ENOMEM; -+ -+ mutex_init(&data.lock); -+ init_waitqueue_head(&data.wq); -+ atomic_set(&data.sample_open, 0); -+ -+ ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); -+ -+ if (WARN(!ring_buffer, KERN_ERR BANNER -+ "failed to allocate ring buffer!\n")) -+ goto out; -+ -+ __reset_stats(); -+ data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */ -+ data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ -+ data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ -+ -+ ret = 0; -+ -+out: -+ return ret; -+ -+} -+ -+/* -+ * simple_data_read - Wrapper read function for global state debugfs entries -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to read value into -+ * @cnt: The maximum number of bytes to read -+ * @ppos: The current "file" position -+ * @entry: The entry to read from -+ * -+ * This function provides a generic read implementation for the global state -+ * "data" structure debugfs filesystem entries. It would be nice to use -+ * simple_attr_read directly, but we need to make sure that the data.lock -+ * is held during the actual read. -+ */ -+static ssize_t simple_data_read(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos, const u64 *entry) -+{ -+ char buf[U64STR_SIZE]; -+ u64 val = 0; -+ int len = 0; -+ -+ memset(buf, 0, sizeof(buf)); -+ -+ if (!entry) -+ return -EFAULT; -+ -+ mutex_lock(&data.lock); -+ val = *entry; -+ mutex_unlock(&data.lock); -+ -+ len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); -+ -+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); -+ -+} -+ -+/* -+ * simple_data_write - Wrapper write function for global state debugfs entries -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to write value from -+ * @cnt: The maximum number of bytes to write -+ * @ppos: The current "file" position -+ * @entry: The entry to write to -+ * -+ * This function provides a generic write implementation for the global state -+ * "data" structure debugfs filesystem entries. It would be nice to use -+ * simple_attr_write directly, but we need to make sure that the data.lock -+ * is held during the actual write. -+ */ -+static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, -+ size_t cnt, loff_t *ppos, u64 *entry) -+{ -+ char buf[U64STR_SIZE]; -+ int csize = min(cnt, sizeof(buf)); -+ u64 val = 0; -+ int err = 0; -+ -+ memset(buf, '\0', sizeof(buf)); -+ if (copy_from_user(buf, ubuf, csize)) -+ return -EFAULT; -+ -+ buf[U64STR_SIZE-1] = '\0'; /* just in case */ -+ err = kstrtoull(buf, 10, &val); -+ if (err) -+ return -EINVAL; -+ -+ mutex_lock(&data.lock); -+ *entry = val; -+ mutex_unlock(&data.lock); -+ -+ return csize; -+} -+ -+/** -+ * debug_count_fopen - Open function for "count" debugfs entry -+ * @inode: The in-kernel inode representation of the debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function provides an open implementation for the "count" debugfs -+ * interface to the hardware latency detector. -+ */ -+static int debug_count_fopen(struct inode *inode, struct file *filp) -+{ -+ return 0; -+} -+ -+/** -+ * debug_count_fread - Read function for "count" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to read value into -+ * @cnt: The maximum number of bytes to read -+ * @ppos: The current "file" position -+ * -+ * This function provides a read implementation for the "count" debugfs -+ * interface to the hardware latency detector. Can be used to read the -+ * number of latency readings exceeding the configured threshold since -+ * the detector was last reset (e.g. by writing a zero into "count"). -+ */ -+static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ return simple_data_read(filp, ubuf, cnt, ppos, &data.count); -+} -+ -+/** -+ * debug_count_fwrite - Write function for "count" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The user buffer that contains the value to write -+ * @cnt: The maximum number of bytes to write to "file" -+ * @ppos: The current position in the debugfs "file" -+ * -+ * This function provides a write implementation for the "count" debugfs -+ * interface to the hardware latency detector. Can be used to write a -+ * desired value, especially to zero the total count. -+ */ -+static ssize_t debug_count_fwrite(struct file *filp, -+ const char __user *ubuf, -+ size_t cnt, -+ loff_t *ppos) -+{ -+ return simple_data_write(filp, ubuf, cnt, ppos, &data.count); -+} -+ -+/** -+ * debug_enable_fopen - Dummy open function for "enable" debugfs interface -+ * @inode: The in-kernel inode representation of the debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function provides an open implementation for the "enable" debugfs -+ * interface to the hardware latency detector. -+ */ -+static int debug_enable_fopen(struct inode *inode, struct file *filp) -+{ -+ return 0; -+} -+ -+/** -+ * debug_enable_fread - Read function for "enable" debugfs interface -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to read value into -+ * @cnt: The maximum number of bytes to read -+ * @ppos: The current "file" position -+ * -+ * This function provides a read implementation for the "enable" debugfs -+ * interface to the hardware latency detector. Can be used to determine -+ * whether the detector is currently enabled ("0\n" or "1\n" returned). -+ */ -+static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ char buf[4]; -+ -+ if ((cnt < sizeof(buf)) || (*ppos)) -+ return 0; -+ -+ buf[0] = enabled ? '1' : '0'; -+ buf[1] = '\n'; -+ buf[2] = '\0'; -+ if (copy_to_user(ubuf, buf, strlen(buf))) -+ return -EFAULT; -+ return *ppos = strlen(buf); -+} -+ -+/** -+ * debug_enable_fwrite - Write function for "enable" debugfs interface -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The user buffer that contains the value to write -+ * @cnt: The maximum number of bytes to write to "file" -+ * @ppos: The current position in the debugfs "file" -+ * -+ * This function provides a write implementation for the "enable" debugfs -+ * interface to the hardware latency detector. Can be used to enable or -+ * disable the detector, which will have the side-effect of possibly -+ * also resetting the global stats and kicking off the measuring -+ * kthread (on an enable) or the converse (upon a disable). -+ */ -+static ssize_t debug_enable_fwrite(struct file *filp, -+ const char __user *ubuf, -+ size_t cnt, -+ loff_t *ppos) -+{ -+ char buf[4]; -+ int csize = min(cnt, sizeof(buf)); -+ long val = 0; -+ int err = 0; -+ -+ memset(buf, '\0', sizeof(buf)); -+ if (copy_from_user(buf, ubuf, csize)) -+ return -EFAULT; -+ -+ buf[sizeof(buf)-1] = '\0'; /* just in case */ -+ err = kstrtoul(buf, 10, &val); -+ if (err) -+ return -EINVAL; -+ -+ if (val) { -+ if (enabled) -+ goto unlock; -+ enabled = 1; -+ __reset_stats(); -+ if (start_kthread()) -+ return -EFAULT; -+ } else { -+ if (!enabled) -+ goto unlock; -+ enabled = 0; -+ err = stop_kthread(); -+ if (err) { -+ pr_err(BANNER "cannot stop kthread\n"); -+ return -EFAULT; -+ } -+ wake_up(&data.wq); /* reader(s) should return */ -+ } -+unlock: -+ return csize; -+} -+ -+/** -+ * debug_max_fopen - Open function for "max" debugfs entry -+ * @inode: The in-kernel inode representation of the debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function provides an open implementation for the "max" debugfs -+ * interface to the hardware latency detector. -+ */ -+static int debug_max_fopen(struct inode *inode, struct file *filp) -+{ -+ return 0; -+} -+ -+/** -+ * debug_max_fread - Read function for "max" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to read value into -+ * @cnt: The maximum number of bytes to read -+ * @ppos: The current "file" position -+ * -+ * This function provides a read implementation for the "max" debugfs -+ * interface to the hardware latency detector. Can be used to determine -+ * the maximum latency value observed since it was last reset. -+ */ -+static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); -+} -+ -+/** -+ * debug_max_fwrite - Write function for "max" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The user buffer that contains the value to write -+ * @cnt: The maximum number of bytes to write to "file" -+ * @ppos: The current position in the debugfs "file" -+ * -+ * This function provides a write implementation for the "max" debugfs -+ * interface to the hardware latency detector. Can be used to reset the -+ * maximum or set it to some other desired value - if, then, subsequent -+ * measurements exceed this value, the maximum will be updated. -+ */ -+static ssize_t debug_max_fwrite(struct file *filp, -+ const char __user *ubuf, -+ size_t cnt, -+ loff_t *ppos) -+{ -+ return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); -+} -+ -+ -+/** -+ * debug_sample_fopen - An open function for "sample" debugfs interface -+ * @inode: The in-kernel inode representation of this debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function handles opening the "sample" file within the hardware -+ * latency detector debugfs directory interface. This file is used to read -+ * raw samples from the global ring_buffer and allows the user to see a -+ * running latency history. Can be opened blocking or non-blocking, -+ * affecting whether it behaves as a buffer read pipe, or does not. -+ * Implements simple locking to prevent multiple simultaneous use. -+ */ -+static int debug_sample_fopen(struct inode *inode, struct file *filp) -+{ -+ if (!atomic_add_unless(&data.sample_open, 1, 1)) -+ return -EBUSY; -+ else -+ return 0; -+} -+ -+/** -+ * debug_sample_fread - A read function for "sample" debugfs interface -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The user buffer that will contain the samples read -+ * @cnt: The maximum bytes to read from the debugfs "file" -+ * @ppos: The current position in the debugfs "file" -+ * -+ * This function handles reading from the "sample" file within the hardware -+ * latency detector debugfs directory interface. This file is used to read -+ * raw samples from the global ring_buffer and allows the user to see a -+ * running latency history. By default this will block pending a new -+ * value written into the sample buffer, unless there are already a -+ * number of value(s) waiting in the buffer, or the sample file was -+ * previously opened in a non-blocking mode of operation. -+ */ -+static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ int len = 0; -+ char buf[64]; -+ struct sample *sample = NULL; -+ -+ if (!enabled) -+ return 0; -+ -+ sample = kzalloc(sizeof(struct sample), GFP_KERNEL); -+ if (!sample) -+ return -ENOMEM; -+ -+ while (!buffer_get_sample(sample)) { -+ -+ DEFINE_WAIT(wait); -+ -+ if (filp->f_flags & O_NONBLOCK) { -+ len = -EAGAIN; -+ goto out; -+ } -+ -+ prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); -+ schedule(); -+ finish_wait(&data.wq, &wait); -+ -+ if (signal_pending(current)) { -+ len = -EINTR; -+ goto out; -+ } -+ -+ if (!enabled) { /* enable was toggled */ -+ len = 0; -+ goto out; -+ } -+ } -+ -+ len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n", -+ sample->timestamp.tv_sec, -+ sample->timestamp.tv_nsec, -+ sample->duration, -+ sample->outer_duration); -+ -+ -+ /* handling partial reads is more trouble than it's worth */ -+ if (len > cnt) -+ goto out; -+ -+ if (copy_to_user(ubuf, buf, len)) -+ len = -EFAULT; -+ -+out: -+ kfree(sample); -+ return len; -+} -+ -+/** -+ * debug_sample_release - Release function for "sample" debugfs interface -+ * @inode: The in-kernel inode represenation of the debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function completes the close of the debugfs interface "sample" file. -+ * Frees the sample_open "lock" so that other users may open the interface. -+ */ -+static int debug_sample_release(struct inode *inode, struct file *filp) -+{ -+ atomic_dec(&data.sample_open); -+ -+ return 0; -+} -+ -+/** -+ * debug_threshold_fopen - Open function for "threshold" debugfs entry -+ * @inode: The in-kernel inode representation of the debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function provides an open implementation for the "threshold" debugfs -+ * interface to the hardware latency detector. -+ */ -+static int debug_threshold_fopen(struct inode *inode, struct file *filp) -+{ -+ return 0; -+} -+ -+/** -+ * debug_threshold_fread - Read function for "threshold" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to read value into -+ * @cnt: The maximum number of bytes to read -+ * @ppos: The current "file" position -+ * -+ * This function provides a read implementation for the "threshold" debugfs -+ * interface to the hardware latency detector. It can be used to determine -+ * the current threshold level at which a latency will be recorded in the -+ * global ring buffer, typically on the order of 10us. -+ */ -+static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); -+} -+ -+/** -+ * debug_threshold_fwrite - Write function for "threshold" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The user buffer that contains the value to write -+ * @cnt: The maximum number of bytes to write to "file" -+ * @ppos: The current position in the debugfs "file" -+ * -+ * This function provides a write implementation for the "threshold" debugfs -+ * interface to the hardware latency detector. It can be used to configure -+ * the threshold level at which any subsequently detected latencies will -+ * be recorded into the global ring buffer. -+ */ -+static ssize_t debug_threshold_fwrite(struct file *filp, -+ const char __user *ubuf, -+ size_t cnt, -+ loff_t *ppos) -+{ -+ int ret; -+ -+ ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); -+ -+ if (enabled) -+ wake_up_process(kthread); -+ -+ return ret; -+} -+ -+/** -+ * debug_width_fopen - Open function for "width" debugfs entry -+ * @inode: The in-kernel inode representation of the debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function provides an open implementation for the "width" debugfs -+ * interface to the hardware latency detector. -+ */ -+static int debug_width_fopen(struct inode *inode, struct file *filp) -+{ -+ return 0; -+} -+ -+/** -+ * debug_width_fread - Read function for "width" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to read value into -+ * @cnt: The maximum number of bytes to read -+ * @ppos: The current "file" position -+ * -+ * This function provides a read implementation for the "width" debugfs -+ * interface to the hardware latency detector. It can be used to determine -+ * for how many us of the total window us we will actively sample for any -+ * hardware-induced latecy periods. Obviously, it is not possible to -+ * sample constantly and have the system respond to a sample reader, or, -+ * worse, without having the system appear to have gone out to lunch. -+ */ -+static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); -+} -+ -+/** -+ * debug_width_fwrite - Write function for "width" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The user buffer that contains the value to write -+ * @cnt: The maximum number of bytes to write to "file" -+ * @ppos: The current position in the debugfs "file" -+ * -+ * This function provides a write implementation for the "width" debugfs -+ * interface to the hardware latency detector. It can be used to configure -+ * for how many us of the total window us we will actively sample for any -+ * hardware-induced latency periods. Obviously, it is not possible to -+ * sample constantly and have the system respond to a sample reader, or, -+ * worse, without having the system appear to have gone out to lunch. It -+ * is enforced that width is less that the total window size. -+ */ -+static ssize_t debug_width_fwrite(struct file *filp, -+ const char __user *ubuf, -+ size_t cnt, -+ loff_t *ppos) -+{ -+ char buf[U64STR_SIZE]; -+ int csize = min(cnt, sizeof(buf)); -+ u64 val = 0; -+ int err = 0; -+ -+ memset(buf, '\0', sizeof(buf)); -+ if (copy_from_user(buf, ubuf, csize)) -+ return -EFAULT; -+ -+ buf[U64STR_SIZE-1] = '\0'; /* just in case */ -+ err = kstrtoull(buf, 10, &val); -+ if (err) -+ return -EINVAL; -+ -+ mutex_lock(&data.lock); -+ if (val < data.sample_window) -+ data.sample_width = val; -+ else { -+ mutex_unlock(&data.lock); -+ return -EINVAL; -+ } -+ mutex_unlock(&data.lock); -+ -+ if (enabled) -+ wake_up_process(kthread); -+ -+ return csize; -+} -+ -+/** -+ * debug_window_fopen - Open function for "window" debugfs entry -+ * @inode: The in-kernel inode representation of the debugfs "file" -+ * @filp: The active open file structure for the debugfs "file" -+ * -+ * This function provides an open implementation for the "window" debugfs -+ * interface to the hardware latency detector. The window is the total time -+ * in us that will be considered one sample period. Conceptually, windows -+ * occur back-to-back and contain a sample width period during which -+ * actual sampling occurs. -+ */ -+static int debug_window_fopen(struct inode *inode, struct file *filp) -+{ -+ return 0; -+} -+ -+/** -+ * debug_window_fread - Read function for "window" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The userspace provided buffer to read value into -+ * @cnt: The maximum number of bytes to read -+ * @ppos: The current "file" position -+ * -+ * This function provides a read implementation for the "window" debugfs -+ * interface to the hardware latency detector. The window is the total time -+ * in us that will be considered one sample period. Conceptually, windows -+ * occur back-to-back and contain a sample width period during which -+ * actual sampling occurs. Can be used to read the total window size. -+ */ -+static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, -+ size_t cnt, loff_t *ppos) -+{ -+ return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); -+} -+ -+/** -+ * debug_window_fwrite - Write function for "window" debugfs entry -+ * @filp: The active open file structure for the debugfs "file" -+ * @ubuf: The user buffer that contains the value to write -+ * @cnt: The maximum number of bytes to write to "file" -+ * @ppos: The current position in the debugfs "file" -+ * -+ * This function provides a write implementation for the "window" debufds -+ * interface to the hardware latency detetector. The window is the total time -+ * in us that will be considered one sample period. Conceptually, windows -+ * occur back-to-back and contain a sample width period during which -+ * actual sampling occurs. Can be used to write a new total window size. It -+ * is enfoced that any value written must be greater than the sample width -+ * size, or an error results. -+ */ -+static ssize_t debug_window_fwrite(struct file *filp, -+ const char __user *ubuf, -+ size_t cnt, -+ loff_t *ppos) -+{ -+ char buf[U64STR_SIZE]; -+ int csize = min(cnt, sizeof(buf)); -+ u64 val = 0; -+ int err = 0; -+ -+ memset(buf, '\0', sizeof(buf)); -+ if (copy_from_user(buf, ubuf, csize)) -+ return -EFAULT; -+ -+ buf[U64STR_SIZE-1] = '\0'; /* just in case */ -+ err = kstrtoull(buf, 10, &val); -+ if (err) -+ return -EINVAL; -+ -+ mutex_lock(&data.lock); -+ if (data.sample_width < val) -+ data.sample_window = val; -+ else { -+ mutex_unlock(&data.lock); -+ return -EINVAL; -+ } -+ mutex_unlock(&data.lock); -+ -+ return csize; -+} -+ -+/* -+ * Function pointers for the "count" debugfs file operations -+ */ -+static const struct file_operations count_fops = { -+ .open = debug_count_fopen, -+ .read = debug_count_fread, -+ .write = debug_count_fwrite, -+ .owner = THIS_MODULE, -+}; -+ -+/* -+ * Function pointers for the "enable" debugfs file operations -+ */ -+static const struct file_operations enable_fops = { -+ .open = debug_enable_fopen, -+ .read = debug_enable_fread, -+ .write = debug_enable_fwrite, -+ .owner = THIS_MODULE, -+}; -+ -+/* -+ * Function pointers for the "max" debugfs file operations -+ */ -+static const struct file_operations max_fops = { -+ .open = debug_max_fopen, -+ .read = debug_max_fread, -+ .write = debug_max_fwrite, -+ .owner = THIS_MODULE, -+}; -+ -+/* -+ * Function pointers for the "sample" debugfs file operations -+ */ -+static const struct file_operations sample_fops = { -+ .open = debug_sample_fopen, -+ .read = debug_sample_fread, -+ .release = debug_sample_release, -+ .owner = THIS_MODULE, -+}; -+ -+/* -+ * Function pointers for the "threshold" debugfs file operations -+ */ -+static const struct file_operations threshold_fops = { -+ .open = debug_threshold_fopen, -+ .read = debug_threshold_fread, -+ .write = debug_threshold_fwrite, -+ .owner = THIS_MODULE, -+}; -+ -+/* -+ * Function pointers for the "width" debugfs file operations -+ */ -+static const struct file_operations width_fops = { -+ .open = debug_width_fopen, -+ .read = debug_width_fread, -+ .write = debug_width_fwrite, -+ .owner = THIS_MODULE, -+}; -+ -+/* -+ * Function pointers for the "window" debugfs file operations -+ */ -+static const struct file_operations window_fops = { -+ .open = debug_window_fopen, -+ .read = debug_window_fread, -+ .write = debug_window_fwrite, -+ .owner = THIS_MODULE, -+}; -+ -+/** -+ * init_debugfs - A function to initialize the debugfs interface files -+ * -+ * This function creates entries in debugfs for "hwlat_detector", including -+ * files to read values from the detector, current samples, and the -+ * maximum sample that has been captured since the hardware latency -+ * dectector was started. -+ */ -+static int init_debugfs(void) -+{ -+ int ret = -ENOMEM; -+ -+ debug_dir = debugfs_create_dir(DRVNAME, NULL); -+ if (!debug_dir) -+ goto err_debug_dir; -+ -+ debug_sample = debugfs_create_file("sample", 0444, -+ debug_dir, NULL, -+ &sample_fops); -+ if (!debug_sample) -+ goto err_sample; -+ -+ debug_count = debugfs_create_file("count", 0444, -+ debug_dir, NULL, -+ &count_fops); -+ if (!debug_count) -+ goto err_count; -+ -+ debug_max = debugfs_create_file("max", 0444, -+ debug_dir, NULL, -+ &max_fops); -+ if (!debug_max) -+ goto err_max; -+ -+ debug_sample_window = debugfs_create_file("window", 0644, -+ debug_dir, NULL, -+ &window_fops); -+ if (!debug_sample_window) -+ goto err_window; -+ -+ debug_sample_width = debugfs_create_file("width", 0644, -+ debug_dir, NULL, -+ &width_fops); -+ if (!debug_sample_width) -+ goto err_width; -+ -+ debug_threshold = debugfs_create_file("threshold", 0644, -+ debug_dir, NULL, -+ &threshold_fops); -+ if (!debug_threshold) -+ goto err_threshold; -+ -+ debug_enable = debugfs_create_file("enable", 0644, -+ debug_dir, &enabled, -+ &enable_fops); -+ if (!debug_enable) -+ goto err_enable; -+ -+ else { -+ ret = 0; -+ goto out; -+ } -+ -+err_enable: -+ debugfs_remove(debug_threshold); -+err_threshold: -+ debugfs_remove(debug_sample_width); -+err_width: -+ debugfs_remove(debug_sample_window); -+err_window: -+ debugfs_remove(debug_max); -+err_max: -+ debugfs_remove(debug_count); -+err_count: -+ debugfs_remove(debug_sample); -+err_sample: -+ debugfs_remove(debug_dir); -+err_debug_dir: -+out: -+ return ret; -+} -+ -+/** -+ * free_debugfs - A function to cleanup the debugfs file interface -+ */ -+static void free_debugfs(void) -+{ -+ /* could also use a debugfs_remove_recursive */ -+ debugfs_remove(debug_enable); -+ debugfs_remove(debug_threshold); -+ debugfs_remove(debug_sample_width); -+ debugfs_remove(debug_sample_window); -+ debugfs_remove(debug_max); -+ debugfs_remove(debug_count); -+ debugfs_remove(debug_sample); -+ debugfs_remove(debug_dir); -+} -+ -+/** -+ * detector_init - Standard module initialization code -+ */ -+static int detector_init(void) -+{ -+ int ret = -ENOMEM; -+ -+ pr_info(BANNER "version %s\n", VERSION); -+ -+ ret = init_stats(); -+ if (ret) -+ goto out; -+ -+ ret = init_debugfs(); -+ if (ret) -+ goto err_stats; -+ -+ if (enabled) -+ ret = start_kthread(); -+ -+ goto out; -+ -+err_stats: -+ ring_buffer_free(ring_buffer); -+out: -+ return ret; -+ -+} -+ -+/** -+ * detector_exit - Standard module cleanup code -+ */ -+static void detector_exit(void) -+{ -+ int err; -+ -+ if (enabled) { -+ enabled = 0; -+ err = stop_kthread(); -+ if (err) -+ pr_err(BANNER "cannot stop kthread\n"); -+ } -+ -+ free_debugfs(); -+ ring_buffer_free(ring_buffer); /* free up the ring buffer */ ++ spinlock_t lock; /* Protection for -RT */ + struct page *spare_page; /* Used when checking P/Q in raid6 */ + struct flex_array *scribble; /* space for constructing buffer + * lists and performing address +diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig +index 64971baf11fa..215e91e36198 100644 +--- a/drivers/misc/Kconfig ++++ b/drivers/misc/Kconfig +@@ -54,6 +54,7 @@ config AD525X_DPOT_SPI + config ATMEL_TCLIB + bool "Atmel AT32/AT91 Timer/Counter Library" + depends on (AVR32 || ARCH_AT91) ++ default y if PREEMPT_RT_FULL + help + Select this if you want a library to allocate the Timer/Counter + blocks found on many Atmel processors. This facilitates using +@@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC + are combined to make a single 32-bit timer. + + When GENERIC_CLOCKEVENTS is defined, the third timer channel +- may be used as a clock event device supporting oneshot mode +- (delays of up to two seconds) based on the 32 KiHz clock. ++ may be used as a clock event device supporting oneshot mode. + + config ATMEL_TCB_CLKSRC_BLOCK + int +@@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK + TC can be used for other purposes, such as PWM generation and + interval timing. + ++config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK ++ bool "TC Block use 32 KiHz clock" ++ depends on ATMEL_TCB_CLKSRC ++ default y if !PREEMPT_RT_FULL ++ help ++ Select this to use 32 KiHz base clock rate as TC block clock ++ source for clock events. + -+} + -+module_init(detector_init); -+module_exit(detector_exit); + config DUMMY_IRQ + tristate "Dummy IRQ handler" + default n diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index df990bb8c873..1a162709a85e 100644 --- a/drivers/mmc/host/mmci.c @@ -6168,7 +4723,7 @@ index df990bb8c873..1a162709a85e 100644 * If we have less than the fifo 'half-full' threshold to transfer, * trigger a PIO interrupt as soon as any data is available. diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c -index 25c55ab05c7d..5a1d117a8744 100644 +index 9133e7926da5..63afb921ed40 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) @@ -6212,7 +4767,7 @@ index da4c2d8a4173..1420dfb56bac 100644 enable_irq(irq); } diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c -index 56f109bc8394..02afc796bc71 100644 +index bca6935a94db..d7a35ee34d03 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, @@ -6318,7 +4873,7 @@ index dcf36537a767..1a1f2e46452c 100644 list_for_each_entry_safe(fcf, next, &del_list, list) { /* Removes fcf from current list */ diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c -index e72673b0a8fb..da598a6caa22 100644 +index 16ca31ad5ec0..c3987347e762 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, @@ -6376,7 +4931,7 @@ index edc48f3b8230..ee5c6f9dfb6f 100644 static inline uint8_t * diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c -index 987f1c729e9c..18391e07d70f 100644 +index 068c4e47fac9..a2090f640397 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3125,7 +3125,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) @@ -6392,7 +4947,7 @@ index 987f1c729e9c..18391e07d70f 100644 /* diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c -index 97f0a2bd93ed..a4f45aaa9ad4 100644 +index 95f4c1bcdb4c..0be934799bff 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,6 +29,7 @@ @@ -6403,7 +4958,7 @@ index 97f0a2bd93ed..a4f45aaa9ad4 100644 #include #include -@@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) +@@ -353,7 +354,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) } } @@ -6412,7 +4967,7 @@ index 97f0a2bd93ed..a4f45aaa9ad4 100644 { unsigned long flags; int cpu = smp_processor_id(); -@@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +@@ -370,7 +371,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) pkg_work_scheduled[phy_id]) { disable_pkg_thres_interrupt(); spin_unlock_irqrestore(&pkg_work_lock, flags); @@ -6421,7 +4976,7 @@ index 97f0a2bd93ed..a4f45aaa9ad4 100644 } pkg_work_scheduled[phy_id] = 1; spin_unlock_irqrestore(&pkg_work_lock, flags); -@@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +@@ -379,9 +380,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) schedule_delayed_work_on(cpu, &per_cpu(pkg_temp_thermal_threshold_work, cpu), msecs_to_jiffies(notify_delay_ms)); @@ -6470,7 +5025,7 @@ index 97f0a2bd93ed..a4f45aaa9ad4 100644 static int find_siblings_cpu(int cpu) { int i; -@@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void) +@@ -585,6 +625,9 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; @@ -6480,7 +5035,7 @@ index 97f0a2bd93ed..a4f45aaa9ad4 100644 spin_lock_init(&pkg_work_lock); platform_thermal_package_notify = pkg_temp_thermal_platform_thermal_notify; -@@ -608,7 +651,7 @@ static int __init pkg_temp_thermal_init(void) +@@ -609,7 +652,7 @@ static int __init pkg_temp_thermal_init(void) kfree(pkg_work_scheduled); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; @@ -6489,7 +5044,7 @@ index 97f0a2bd93ed..a4f45aaa9ad4 100644 return -ENODEV; } -@@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void) +@@ -634,6 +677,7 @@ static void __exit pkg_temp_thermal_exit(void) mutex_unlock(&phy_dev_list_mutex); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; @@ -6498,7 +5053,7 @@ index 97f0a2bd93ed..a4f45aaa9ad4 100644 cancel_delayed_work_sync( &per_cpu(pkg_temp_thermal_threshold_work, i)); diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c -index dcf43f66404f..a9ae57122841 100644 +index 240a361b674f..55e249267144 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg; @@ -6520,7 +5075,7 @@ index dcf43f66404f..a9ae57122841 100644 #include /* diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c -index 858a54633664..fc44fb59aef6 100644 +index 1731b98d2471..5cc62301e840 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -35,6 +35,7 @@ @@ -6531,7 +5086,7 @@ index 858a54633664..fc44fb59aef6 100644 #include #include #include -@@ -3109,9 +3110,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, +@@ -3144,9 +3145,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, serial8250_rpm_get(up); @@ -6544,10 +5099,10 @@ index 858a54633664..fc44fb59aef6 100644 else spin_lock_irqsave(&port->lock, flags); diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c -index 8a9e213387a7..dd1f9a426b74 100644 +index e2c33b9528d8..53af53c43e8c 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2167,13 +2167,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) +@@ -2194,13 +2194,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) clk_enable(uap->clk); @@ -6570,7 +5125,7 @@ index 8a9e213387a7..dd1f9a426b74 100644 /* * First save the CR then disable the interrupts -@@ -2197,8 +2203,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) +@@ -2224,8 +2230,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) pl011_write(old_cr, uap, REG_CR); if (locked) @@ -6611,24 +5166,11 @@ index a2a529994ba5..0ee7c4c518df 100644 } static int __init -diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c -index f36e6df2fa90..e086ea4d2997 100644 ---- a/drivers/tty/serial/sc16is7xx.c -+++ b/drivers/tty/serial/sc16is7xx.c -@@ -1240,7 +1240,7 @@ static int sc16is7xx_probe(struct device *dev, - - /* Setup interrupt */ - ret = devm_request_irq(dev, irq, sc16is7xx_irq, -- IRQF_ONESHOT | flags, dev_name(dev), s); -+ flags, dev_name(dev), s); - if (!ret) - return 0; - diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c -index d2e3f655c26f..fdd027a9bbd7 100644 +index 479e223f9cff..3418a54b4131 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c -@@ -1760,9 +1760,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) +@@ -1761,9 +1761,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) * and no one may trigger the above deadlock situation when * running complete() in tasklet. */ @@ -6641,10 +5183,10 @@ index d2e3f655c26f..fdd027a9bbd7 100644 usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c -index 5c8429f23a89..fa835fb1a186 100644 +index 17989b72cdae..88c6574b5992 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c -@@ -1509,7 +1509,7 @@ static void ffs_data_put(struct ffs_data *ffs) +@@ -1593,7 +1593,7 @@ static void ffs_data_put(struct ffs_data *ffs) pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || @@ -6654,7 +5196,7 @@ index 5c8429f23a89..fa835fb1a186 100644 kfree(ffs); } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c -index 16104b5ebdcb..5c506c2b88ad 100644 +index 1468d8f085a3..6aae3ae25c18 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) @@ -6676,7 +5218,7 @@ index 16104b5ebdcb..5c506c2b88ad 100644 epdata->status = -EINTR; } else { diff --git a/fs/aio.c b/fs/aio.c -index 4fe81d1c60f9..e68c06a4a017 100644 +index 428484f2f841..2b02e2eb2158 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -40,6 +40,7 @@ @@ -6704,7 +5246,7 @@ index 4fe81d1c60f9..e68c06a4a017 100644 aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); -@@ -578,9 +580,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) +@@ -581,9 +583,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) return cancel(&kiocb->common); } @@ -6716,7 +5258,7 @@ index 4fe81d1c60f9..e68c06a4a017 100644 pr_debug("freeing %p\n", ctx); -@@ -599,8 +601,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) +@@ -602,8 +604,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) complete(&ctx->rq_wait->comp); @@ -6727,7 +5269,7 @@ index 4fe81d1c60f9..e68c06a4a017 100644 } /* -@@ -608,9 +610,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) +@@ -611,9 +613,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ @@ -6739,7 +5281,7 @@ index 4fe81d1c60f9..e68c06a4a017 100644 struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); -@@ -629,6 +631,14 @@ static void free_ioctx_users(struct percpu_ref *ref) +@@ -632,6 +634,14 @@ static void free_ioctx_users(struct percpu_ref *ref) percpu_ref_put(&ctx->reqs); } @@ -6755,10 +5297,10 @@ index 4fe81d1c60f9..e68c06a4a017 100644 { unsigned i, new_nr; diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h -index a439548de785..7c392647d03b 100644 +index a1fba4285277..3796769b4cd1 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h -@@ -30,6 +30,7 @@ +@@ -31,6 +31,7 @@ #include #include #include @@ -6779,8 +5321,31 @@ index d8e6d421c27f..2e689ab1306b 100644 goto relock; } spin_unlock(&p->d_lock); +diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c +index 63d197724519..b8e479c5ad83 100644 +--- a/fs/btrfs/async-thread.c ++++ b/fs/btrfs/async-thread.c +@@ -306,8 +306,8 @@ + * because the callback could free the structure. + */ + wtag = work; +- work->ordered_free(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); ++ work->ordered_free(work); + } + spin_unlock_irqrestore(lock, flags); + } +@@ -339,8 +339,6 @@ + set_bit(WORK_DONE_BIT, &work->flags); + run_ordered_work(wq); + } +- if (!need_order) +- trace_btrfs_all_work_done(wq->fs_info, wtag); + } + + void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, diff --git a/fs/buffer.c b/fs/buffer.c -index 9c8eb9b6db6a..d15d77f72cf7 100644 +index b205a629001d..5646afc022ba 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) @@ -6842,7 +5407,7 @@ index 9c8eb9b6db6a..d15d77f72cf7 100644 } EXPORT_SYMBOL(end_buffer_async_write); -@@ -3384,6 +3376,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) +@@ -3383,6 +3375,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); if (ret) { INIT_LIST_HEAD(&ret->b_assoc_buffers); @@ -6992,10 +5557,10 @@ index 10db91218933..42af0a06f657 100644 static void ep_remove_wait_queue(struct eppoll_entry *pwq) diff --git a/fs/exec.c b/fs/exec.c -index 6fcfb3f7b137..751370a71ec5 100644 +index 67e86571685a..fe14cdd84016 100644 --- a/fs/exec.c +++ b/fs/exec.c -@@ -1012,12 +1012,14 @@ static int exec_mmap(struct mm_struct *mm) +@@ -1017,12 +1017,14 @@ static int exec_mmap(struct mm_struct *mm) } } task_lock(tsk); @@ -7011,10 +5576,10 @@ index 6fcfb3f7b137..751370a71ec5 100644 if (old_mm) { up_read(&old_mm->mmap_sem); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c -index 4ff9251e9d3a..8fe489ec2ef1 100644 +index 096f79997f75..310e2aabbb0d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c -@@ -1174,7 +1174,7 @@ static int fuse_direntplus_link(struct file *file, +@@ -1191,7 +1191,7 @@ static int fuse_direntplus_link(struct file *file, struct inode *dir = d_inode(parent); struct fuse_conn *fc; struct inode *inode; @@ -7036,8 +5601,152 @@ index 684996c8a3a4..6e18a06aaabe 100644 mutex_lock(&journal->j_checkpoint_mutex); /* +diff --git a/fs/locks.c b/fs/locks.c +index 22c5b4aa4961..269c6a44449a 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -935,7 +935,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) + return -ENOMEM; + } + +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + if (request->fl_flags & FL_ACCESS) + goto find_conflict; +@@ -976,7 +976,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) + + out: + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + if (new_fl) + locks_free_lock(new_fl); + locks_dispose_list(&dispose); +@@ -1013,7 +1013,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, + new_fl2 = locks_alloc_lock(); + } + +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + /* + * New lock request. Walk all POSIX locks and look for conflicts. If +@@ -1185,7 +1185,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, + } + out: + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + /* + * Free any unused locks. + */ +@@ -1460,7 +1460,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) + return error; + } + +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + + time_out_leases(inode, &dispose); +@@ -1512,13 +1512,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) + locks_insert_block(fl, new_fl); + trace_break_lease_block(inode, new_fl); + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + + locks_dispose_list(&dispose); + error = wait_event_interruptible_timeout(new_fl->fl_wait, + !new_fl->fl_next, break_time); + +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + trace_break_lease_unblock(inode, new_fl); + locks_delete_block(new_fl); +@@ -1535,7 +1535,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) + } + out: + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + locks_dispose_list(&dispose); + locks_free_lock(new_fl); + return error; +@@ -1609,7 +1609,7 @@ int fcntl_getlease(struct file *filp) + + ctx = smp_load_acquire(&inode->i_flctx); + if (ctx && !list_empty_careful(&ctx->flc_lease)) { +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + time_out_leases(inode, &dispose); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { +@@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp) + break; + } + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + + locks_dispose_list(&dispose); + } +@@ -1694,7 +1694,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr + return -EINVAL; + } + +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + time_out_leases(inode, &dispose); + error = check_conflicting_open(dentry, arg, lease->fl_flags); +@@ -1765,7 +1765,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr + lease->fl_lmops->lm_setup(lease, priv); + out: + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + locks_dispose_list(&dispose); + if (is_deleg) + inode_unlock(inode); +@@ -1788,7 +1788,7 @@ static int generic_delete_lease(struct file *filp, void *owner) + return error; + } + +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_file == filp && +@@ -1801,7 +1801,7 @@ static int generic_delete_lease(struct file *filp, void *owner) + if (victim) + error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + locks_dispose_list(&dispose); + return error; + } +@@ -2532,13 +2532,13 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx) + if (list_empty(&ctx->flc_lease)) + return; + +- percpu_down_read_preempt_disable(&file_rwsem); ++ percpu_down_read(&file_rwsem); + spin_lock(&ctx->flc_lock); + list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) + if (filp == fl->fl_file) + lease_modify(fl, F_UNLCK, &dispose); + spin_unlock(&ctx->flc_lock); +- percpu_up_read_preempt_enable(&file_rwsem); ++ percpu_up_read(&file_rwsem); + + locks_dispose_list(&dispose); + } diff --git a/fs/namei.c b/fs/namei.c -index adb04146df09..a89dfaf9f209 100644 +index 5b4eed221530..9c8dd3c83a80 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1629,7 +1629,7 @@ static struct dentry *lookup_slow(const struct qstr *name, @@ -7059,7 +5768,7 @@ index adb04146df09..a89dfaf9f209 100644 if (unlikely(IS_DEADDIR(dir_inode))) return -ENOENT; diff --git a/fs/namespace.c b/fs/namespace.c -index 7bb2cda3bfef..cf79b18e7b58 100644 +index e6c234b1a645..c9dbe5e56347 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -14,6 +14,7 @@ @@ -7070,7 +5779,7 @@ index 7bb2cda3bfef..cf79b18e7b58 100644 #include #include #include /* init_rootfs */ -@@ -353,8 +354,11 @@ int __mnt_want_write(struct vfsmount *m) +@@ -356,8 +357,11 @@ int __mnt_want_write(struct vfsmount *m) * incremented count after it has set MNT_WRITE_HOLD. */ smp_mb(); @@ -7085,7 +5794,7 @@ index 7bb2cda3bfef..cf79b18e7b58 100644 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will * be set to match its requirements. So we must not load that until diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c -index b9c65421ed81..03ffe8af8785 100644 +index dff600ae0d74..d726d2e09353 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode *inode, @@ -7103,7 +5812,7 @@ index b9c65421ed81..03ffe8af8785 100644 mutex_unlock(&sp->so_delegreturn_mutex); put_nfs_open_context(ctx); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c -index 6bc5a68e39f1..ce6488e07a13 100644 +index 5f1af4cd1a33..436c27eb9d4f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -485,7 +485,7 @@ static @@ -7115,7 +5824,7 @@ index 6bc5a68e39f1..ce6488e07a13 100644 struct dentry *dentry; struct dentry *alias; struct inode *dir = d_inode(parent); -@@ -1490,7 +1490,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, +@@ -1498,7 +1498,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned open_flags, umode_t mode, int *opened) { @@ -7124,7 +5833,7 @@ index 6bc5a68e39f1..ce6488e07a13 100644 struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; -@@ -1805,7 +1805,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) +@@ -1813,7 +1813,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) trace_nfs_rmdir_enter(dir, dentry); if (d_really_is_positive(dentry)) { @@ -7136,7 +5845,7 @@ index 6bc5a68e39f1..ce6488e07a13 100644 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ switch (error) { -@@ -1815,7 +1819,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) +@@ -1823,7 +1827,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) case -ENOENT: nfs_dentry_handle_enoent(dentry); } @@ -7165,10 +5874,10 @@ index bf4ec5ecc97e..36cd5fc9192c 100644 } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h -index 9bf64eacba5b..041da5cb80f5 100644 +index 1452177c822d..f43b01d54c59 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h -@@ -107,7 +107,7 @@ struct nfs4_state_owner { +@@ -111,7 +111,7 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; @@ -7178,10 +5887,10 @@ index 9bf64eacba5b..041da5cb80f5 100644 }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c -index a9dec32ba9ba..49b64dfb307c 100644 +index 241da19b7da4..8f9636cc298f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c -@@ -2525,7 +2525,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, +@@ -2697,7 +2697,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, unsigned int seq; int ret; @@ -7190,7 +5899,7 @@ index a9dec32ba9ba..49b64dfb307c 100644 ret = _nfs4_proc_open(opendata); if (ret != 0) -@@ -2561,7 +2561,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, +@@ -2735,7 +2735,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, ctx->state = state; if (d_inode(dentry) == state->inode) { nfs_inode_attach_open_context(ctx); @@ -7200,7 +5909,7 @@ index a9dec32ba9ba..49b64dfb307c 100644 } out: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c -index 8353f33f0466..657e13ed4b5d 100644 +index 0959c9661662..dabd834d7686 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, @@ -7212,7 +5921,7 @@ index 8353f33f0466..657e13ed4b5d 100644 mutex_init(&sp->so_delegreturn_mutex); return sp; } -@@ -1459,8 +1459,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs +@@ -1497,8 +1497,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs * recovering after a network partition or a reboot from a * server that doesn't support a grace period. */ @@ -7226,7 +5935,7 @@ index 8353f33f0466..657e13ed4b5d 100644 restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) -@@ -1528,14 +1532,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs +@@ -1567,14 +1571,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs spin_lock(&sp->so_lock); goto restart; } @@ -7402,10 +6111,10 @@ index fe251f187ff8..e89da4fb14c2 100644 /** diff --git a/fs/proc/base.c b/fs/proc/base.c -index ac0df4dde823..ad1a4723ffdd 100644 +index ca651ac00660..41d9dc789285 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c -@@ -1819,7 +1819,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, +@@ -1834,7 +1834,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, child = d_hash_and_lookup(dir, &qname); if (!child) { @@ -7415,10 +6124,10 @@ index ac0df4dde823..ad1a4723ffdd 100644 if (IS_ERR(child)) goto end_instantiate; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c -index 1b93650dda2f..c553bf3ea541 100644 +index 55313d994895..bdfc493721e9 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c -@@ -627,7 +627,7 @@ static bool proc_sys_fill_cache(struct file *file, +@@ -632,7 +632,7 @@ static bool proc_sys_fill_cache(struct file *file, child = d_lookup(dir, &qname); if (!child) { @@ -7444,10 +6153,10 @@ index 9ae4abb4110b..8644b67c48fd 100644 /* diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h -index 93b61b1f2beb..58270adb46ce 100644 +index e861a24f06f2..b5c97d3059c7 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h -@@ -131,6 +131,7 @@ +@@ -133,6 +133,7 @@ #define acpi_cache_t struct kmem_cache #define acpi_spinlock spinlock_t * @@ -7455,7 +6164,7 @@ index 93b61b1f2beb..58270adb46ce 100644 #define acpi_cpu_flags unsigned long /* Use native linux version of acpi_os_allocate_zeroed */ -@@ -149,6 +150,20 @@ +@@ -151,6 +152,20 @@ #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock @@ -7502,19 +6211,20 @@ index 6f96247226a4..fa53a21263c2 100644 #endif diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h -index e43bbffb5b7a..c23892264109 100644 +index 535ab2e13d2e..cfc246899473 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h -@@ -222,6 +222,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) +@@ -209,7 +209,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) + return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; + } - struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); - struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); +- +void __blk_mq_complete_request_remote_work(struct work_struct *work); - int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); + void blk_mq_end_request(struct request *rq, int error); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h -index e79055c8b577..8583c1af14ad 100644 +index c47c358ba052..a99c23735725 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -89,6 +89,7 @@ struct request { @@ -7698,10 +6408,10 @@ index 5d5aaae3af43..3bca1590e29f 100644 /** diff --git a/include/linux/cpu.h b/include/linux/cpu.h -index 797d9c8e9a1b..6eabd9e8a98b 100644 +index e571128ad99a..5e52d28c20c1 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h -@@ -201,6 +201,8 @@ extern void get_online_cpus(void); +@@ -182,6 +182,8 @@ extern void get_online_cpus(void); extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); @@ -7710,7 +6420,7 @@ index 797d9c8e9a1b..6eabd9e8a98b 100644 #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) -@@ -218,6 +220,8 @@ static inline void cpu_hotplug_done(void) {} +@@ -199,6 +201,8 @@ static inline void cpu_hotplug_done(void) {} #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) @@ -7720,7 +6430,7 @@ index 797d9c8e9a1b..6eabd9e8a98b 100644 #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h -index 5ff3e9a4fe5f..ed0431599fd7 100644 +index 5beed7b30561..61cab7ef458e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -11,6 +11,7 @@ @@ -7764,26 +6474,6 @@ index a6ecb34cf547..37caab306336 100644 +#endif + #endif /* defined(_LINUX_DELAY_H) */ -diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h -index 7d565afe35d2..8e31b4d245d2 100644 ---- a/include/linux/ftrace.h -+++ b/include/linux/ftrace.h -@@ -714,6 +714,7 @@ static inline void __ftrace_enabled_restore(int enabled) - #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) - #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) - -+#ifdef CONFIG_USING_GET_LOCK_PARENT_IP - static inline unsigned long get_lock_parent_ip(void) - { - unsigned long addr = CALLER_ADDR0; -@@ -725,6 +726,7 @@ static inline unsigned long get_lock_parent_ip(void) - return addr; - return CALLER_ADDR2; - } -+#endif - - #ifdef CONFIG_IRQSOFF_TRACER - extern void time_hardirqs_on(unsigned long a0, unsigned long a1); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index bb3f3297062a..a117a33ef72c 100644 --- a/include/linux/highmem.h @@ -7985,10 +6675,10 @@ index 083d61e92706..5899796f50cb 100644 /** * idr_find - return pointer for given id diff --git a/include/linux/init_task.h b/include/linux/init_task.h -index f8834f820ec2..a688d5e19578 100644 +index 325f649d77ff..8af70bcc799b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h -@@ -148,6 +148,12 @@ extern struct task_group root_task_group; +@@ -150,6 +150,12 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif @@ -8001,7 +6691,7 @@ index f8834f820ec2..a688d5e19578 100644 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN # define INIT_VTIME(tsk) \ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ -@@ -239,6 +245,7 @@ extern struct task_group root_task_group; +@@ -250,6 +256,7 @@ extern struct task_group root_task_group; .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ @@ -8010,7 +6700,7 @@ index f8834f820ec2..a688d5e19578 100644 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h -index b6683f0ffc9f..c0a351daf736 100644 +index 72f0721f75e7..480972ae47d3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,6 +14,7 @@ @@ -8066,7 +6756,7 @@ index b6683f0ffc9f..c0a351daf736 100644 void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); void (*release)(struct kref *ref); }; -@@ -398,9 +406,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, +@@ -406,9 +414,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, bool state); #ifdef CONFIG_IRQ_FORCED_THREADING @@ -8081,7 +6771,7 @@ index b6683f0ffc9f..c0a351daf736 100644 #endif #ifndef __ARCH_SET_SOFTIRQ_PENDING -@@ -457,9 +469,10 @@ struct softirq_action +@@ -465,9 +477,10 @@ struct softirq_action void (*action)(struct softirq_action *); }; @@ -8093,7 +6783,7 @@ index b6683f0ffc9f..c0a351daf736 100644 #ifdef __ARCH_HAS_DO_SOFTIRQ void do_softirq_own_stack(void); #else -@@ -468,13 +481,25 @@ static inline void do_softirq_own_stack(void) +@@ -476,13 +489,25 @@ static inline void do_softirq_own_stack(void) __do_softirq(); } #endif @@ -8119,7 +6809,7 @@ index b6683f0ffc9f..c0a351daf736 100644 DECLARE_PER_CPU(struct task_struct *, ksoftirqd); -@@ -496,8 +521,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) +@@ -504,8 +529,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its execution is still not started, it will be executed only once. @@ -8131,7 +6821,7 @@ index b6683f0ffc9f..c0a351daf736 100644 * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. -@@ -522,27 +548,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } +@@ -530,27 +556,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } enum { TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ @@ -8174,7 +6864,7 @@ index b6683f0ffc9f..c0a351daf736 100644 #define tasklet_unlock_wait(t) do { } while (0) #define tasklet_unlock(t) do { } while (0) #endif -@@ -591,12 +626,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) +@@ -599,12 +634,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) smp_mb(); } @@ -8188,7 +6878,7 @@ index b6683f0ffc9f..c0a351daf736 100644 extern void tasklet_kill(struct tasklet_struct *t); extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); extern void tasklet_init(struct tasklet_struct *t, -@@ -627,6 +657,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) +@@ -635,6 +665,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) tasklet_kill(&ttimer->tasklet); } @@ -8202,7 +6892,7 @@ index b6683f0ffc9f..c0a351daf736 100644 * Autoprobing for irqs: * diff --git a/include/linux/irq.h b/include/linux/irq.h -index 0ac26c892fe2..ede85f106aef 100644 +index e79875574b39..177cee0c3305 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -72,6 +72,7 @@ enum irqchip_irq_state; @@ -8253,10 +6943,10 @@ index 47b9ebd4a74f..2543aab05daa 100644 + #endif /* _LINUX_IRQ_WORK_H */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h -index b51beebf9804..219d9824f762 100644 +index c9be57931b58..eeeb540971ae 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h -@@ -64,6 +64,7 @@ struct irq_desc { +@@ -66,6 +66,7 @@ struct irq_desc { unsigned int irqs_unhandled; atomic_t threads_handled; int threads_handled_last; @@ -8400,7 +7090,7 @@ index 410decacff8f..0861bebfc188 100644 static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, char *help, short minlen) { return 0; } diff --git a/include/linux/kernel.h b/include/linux/kernel.h -index d96a6118d26a..37de2ce2d290 100644 +index bc6ed52a39b9..7894d55e4998 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -194,6 +194,9 @@ extern int _cond_resched(void); @@ -8421,7 +7111,7 @@ index d96a6118d26a..37de2ce2d290 100644 # define sched_annotate_sleep() do { } while (0) #endif -@@ -491,6 +495,7 @@ extern enum system_states { +@@ -488,6 +492,7 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, @@ -8429,62 +7119,6 @@ index d96a6118d26a..37de2ce2d290 100644 } system_state; #define TAINT_PROPRIETARY_MODULE 0 -diff --git a/include/linux/lglock.h b/include/linux/lglock.h -index c92ebd100d9b..6f035f635d0e 100644 ---- a/include/linux/lglock.h -+++ b/include/linux/lglock.h -@@ -34,13 +34,30 @@ - #endif - - struct lglock { -+#ifdef CONFIG_PREEMPT_RT_FULL -+ struct rt_mutex __percpu *lock; -+#else - arch_spinlock_t __percpu *lock; -+#endif - #ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lock_class_key lock_key; - struct lockdep_map lock_dep_map; - #endif - }; - -+#ifdef CONFIG_PREEMPT_RT_FULL -+# define DEFINE_LGLOCK(name) \ -+ static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \ -+ = __RT_MUTEX_INITIALIZER( name ## _lock); \ -+ struct lglock name = { .lock = &name ## _lock } -+ -+# define DEFINE_STATIC_LGLOCK(name) \ -+ static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \ -+ = __RT_MUTEX_INITIALIZER( name ## _lock); \ -+ static struct lglock name = { .lock = &name ## _lock } -+ -+#else -+ - #define DEFINE_LGLOCK(name) \ - static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ - = __ARCH_SPIN_LOCK_UNLOCKED; \ -@@ -50,6 +67,7 @@ struct lglock { - static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ - = __ARCH_SPIN_LOCK_UNLOCKED; \ - static struct lglock name = { .lock = &name ## _lock } -+#endif - - void lg_lock_init(struct lglock *lg, char *name); - -@@ -64,6 +82,12 @@ void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); - void lg_global_lock(struct lglock *lg); - void lg_global_unlock(struct lglock *lg); - -+#ifndef CONFIG_PREEMPT_RT_FULL -+#define lg_global_trylock_relax(name) lg_global_lock(name) -+#else -+void lg_global_trylock_relax(struct lglock *lg); -+#endif -+ - #else - /* When !CONFIG_SMP, map lglock to spinlock */ - #define lglock spinlock diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index cb483305e1f5..4e5062316bb6 100644 --- a/include/linux/list_bl.h @@ -8836,7 +7470,7 @@ index 000000000000..845c77f1a5ca + +#endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h -index 903200f4ec41..df670d441fc9 100644 +index 08d947fc4c59..705fb564a605 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -11,6 +11,7 @@ @@ -8847,7 +7481,7 @@ index 903200f4ec41..df670d441fc9 100644 #include #include #include -@@ -508,6 +509,9 @@ struct mm_struct { +@@ -509,6 +510,9 @@ struct mm_struct { bool tlb_flush_pending; #endif struct uprobes_state uprobes_state; @@ -8993,10 +7627,30 @@ index 000000000000..c38a44b14da5 + +#endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h -index e8d79d4ebcfe..2ae8fa187016 100644 +index d83590ef74a1..0ae3b6cf430c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h -@@ -2409,14 +2409,53 @@ void netdev_freemem(struct net_device *dev); +@@ -396,7 +396,19 @@ typedef enum rx_handler_result rx_handler_result_t; + typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); + + void __napi_schedule(struct napi_struct *n); ++ ++/* ++ * When PREEMPT_RT_FULL is defined, all device interrupt handlers ++ * run as threads, and they can also be preempted (without PREEMPT_RT ++ * interrupt threads can not be preempted). Which means that calling ++ * __napi_schedule_irqoff() from an interrupt handler can be preempted ++ * and can corrupt the napi->poll_list. ++ */ ++#ifdef CONFIG_PREEMPT_RT_FULL ++#define __napi_schedule_irqoff(n) __napi_schedule(n) ++#else + void __napi_schedule_irqoff(struct napi_struct *n); ++#endif + + static inline bool napi_disable_pending(struct napi_struct *n) + { +@@ -2461,14 +2473,53 @@ void netdev_freemem(struct net_device *dev); void synchronize_net(void); int init_dummy_netdev(struct net_device *dev); @@ -9051,7 +7705,7 @@ index e8d79d4ebcfe..2ae8fa187016 100644 struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); -@@ -2794,6 +2833,7 @@ struct softnet_data { +@@ -2851,6 +2902,7 @@ struct softnet_data { unsigned int dropped; struct sk_buff_head input_pkt_queue; struct napi_struct backlog; @@ -9115,10 +7769,10 @@ index 810124b33327..d54ca43d571f 100644 #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h -index 7cc0deee5bde..a20f49ee69ee 100644 +index beb1e10f446e..ebaf2e7bfe29 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h -@@ -1484,7 +1484,7 @@ struct nfs_unlinkdata { +@@ -1490,7 +1490,7 @@ struct nfs_unlinkdata { struct nfs_removeargs args; struct nfs_removeres res; struct dentry *dentry; @@ -9210,6 +7864,91 @@ index 4149868de4e6..babe5b9bcb91 100644 /* CPU notfiers are defined in include/linux/cpu.h. */ /* netdevice notifiers are defined in include/linux/netdevice.h */ +diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h +index 5b2e6159b744..ea940f451606 100644 +--- a/include/linux/percpu-rwsem.h ++++ b/include/linux/percpu-rwsem.h +@@ -4,7 +4,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + +@@ -12,7 +12,7 @@ struct percpu_rw_semaphore { + struct rcu_sync rss; + unsigned int __percpu *read_count; + struct rw_semaphore rw_sem; +- wait_queue_head_t writer; ++ struct swait_queue_head writer; + int readers_block; + }; + +@@ -22,13 +22,13 @@ static struct percpu_rw_semaphore name = { \ + .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ + .read_count = &__percpu_rwsem_rc_##name, \ + .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ +- .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ ++ .writer = __SWAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ + } + + extern int __percpu_down_read(struct percpu_rw_semaphore *, int); + extern void __percpu_up_read(struct percpu_rw_semaphore *); + +-static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) ++static inline void percpu_down_read(struct percpu_rw_semaphore *sem) + { + might_sleep(); + +@@ -46,16 +46,10 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * + __this_cpu_inc(*sem->read_count); + if (unlikely(!rcu_sync_is_idle(&sem->rss))) + __percpu_down_read(sem, false); /* Unconditional memory barrier */ +- barrier(); + /* +- * The barrier() prevents the compiler from ++ * The preempt_enable() prevents the compiler from + * bleeding the critical section out. + */ +-} +- +-static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +-{ +- percpu_down_read_preempt_disable(sem); + preempt_enable(); + } + +@@ -82,13 +76,9 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) + return ret; + } + +-static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) ++static inline void percpu_up_read(struct percpu_rw_semaphore *sem) + { +- /* +- * The barrier() prevents the compiler from +- * bleeding the critical section out. +- */ +- barrier(); ++ preempt_disable(); + /* + * Same as in percpu_down_read(). + */ +@@ -101,12 +91,6 @@ static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem + rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); + } + +-static inline void percpu_up_read(struct percpu_rw_semaphore *sem) +-{ +- preempt_disable(); +- percpu_up_read_preempt_enable(sem); +-} +- + extern void percpu_down_write(struct percpu_rw_semaphore *); + extern void percpu_up_write(struct percpu_rw_semaphore *); + diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 56939d3f6e53..1c7e33fc83e4 100644 --- a/include/linux/percpu.h @@ -9420,10 +8159,10 @@ index 75e4e30677f1..1cfb1cb72354 100644 struct preempt_notifier; diff --git a/include/linux/printk.h b/include/linux/printk.h -index 696a56be7d3e..310aa321ef0c 100644 +index eac1af8502bb..37e647af0b0b 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h -@@ -125,9 +125,11 @@ struct va_format { +@@ -126,9 +126,11 @@ struct va_format { #ifdef CONFIG_EARLY_PRINTK extern asmlinkage __printf(1, 2) void early_printk(const char *fmt, ...); @@ -9436,7 +8175,7 @@ index 696a56be7d3e..310aa321ef0c 100644 #ifdef CONFIG_PRINTK_NMI diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h -index 52b97db93830..fd9ea1c68db6 100644 +index af3581b8a451..f87f87dec84c 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -289,9 +289,19 @@ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, @@ -9469,15 +8208,15 @@ index 52b97db93830..fd9ea1c68db6 100644 /** diff --git a/include/linux/random.h b/include/linux/random.h -index 3d6e9815cd85..f6e8860b6494 100644 +index 7bd2403e4fef..b2df7148a42b 100644 --- a/include/linux/random.h +++ b/include/linux/random.h -@@ -20,7 +20,7 @@ struct random_ready_callback { - extern void add_device_randomness(const void *, unsigned int); +@@ -31,7 +31,7 @@ static inline void add_latent_entropy(void) {} + extern void add_input_randomness(unsigned int type, unsigned int code, - unsigned int value); --extern void add_interrupt_randomness(int irq, int irq_flags); -+extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip); + unsigned int value) __latent_entropy; +-extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; ++extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy; extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); @@ -9567,7 +8306,7 @@ index 000000000000..7066962a4379 + +#endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h -index 1aa62e1a761b..2a614acb433e 100644 +index 321f9ed552a9..a52a110bf815 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,6 +46,7 @@ @@ -9617,7 +8356,7 @@ index 1aa62e1a761b..2a614acb433e 100644 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ -@@ -500,7 +512,14 @@ extern struct lockdep_map rcu_callback_map; +@@ -501,7 +513,14 @@ extern struct lockdep_map rcu_callback_map; int debug_lockdep_rcu_enabled(void); int rcu_read_lock_held(void); @@ -9632,7 +8371,7 @@ index 1aa62e1a761b..2a614acb433e 100644 /** * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? -@@ -621,54 +640,6 @@ static inline void rcu_preempt_sleep_check(void) +@@ -622,54 +641,6 @@ static inline void rcu_preempt_sleep_check(void) }) /** @@ -9687,7 +8426,7 @@ index 1aa62e1a761b..2a614acb433e 100644 * rcu_access_pointer() - fetch RCU pointer with no dereferencing * @p: The pointer to read * -@@ -946,10 +917,14 @@ static inline void rcu_read_unlock(void) +@@ -947,10 +918,14 @@ static inline void rcu_read_unlock(void) static inline void rcu_read_lock_bh(void) { local_bh_disable(); @@ -9702,7 +8441,7 @@ index 1aa62e1a761b..2a614acb433e 100644 } /* -@@ -959,10 +934,14 @@ static inline void rcu_read_lock_bh(void) +@@ -960,10 +935,14 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { @@ -10213,7 +8952,7 @@ index 000000000000..e26bd95a57c3 +#endif +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h -index 62c68e513e39..c873ce0183ab 100644 +index 75d9a57e212e..8cb7df0f56e3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -26,6 +26,7 @@ struct sched_param { @@ -10247,7 +8986,7 @@ index 62c68e513e39..c873ce0183ab 100644 /* Task command name length */ #define TASK_COMM_LEN 16 -@@ -1009,8 +1012,18 @@ struct wake_q_head { +@@ -1013,8 +1016,18 @@ struct wake_q_head { struct wake_q_head name = { WAKE_Q_TAIL, &name.first } extern void wake_q_add(struct wake_q_head *head, @@ -10268,15 +9007,15 @@ index 62c68e513e39..c873ce0183ab 100644 /* * sched-domains (multiprocessor balancing) declarations: -@@ -1459,6 +1472,7 @@ struct tlbflush_unmap_batch { - - struct task_struct { +@@ -1481,6 +1494,7 @@ struct task_struct { + struct thread_info thread_info; + #endif volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ -+ volatile long saved_state; /* saved state for "spinlock sleepers" */ ++ volatile long saved_state; /* saved state for "spinlock sleepers" */ void *stack; atomic_t usage; unsigned int flags; /* per process flags, defined below */ -@@ -1495,6 +1509,12 @@ struct task_struct { +@@ -1520,6 +1534,12 @@ struct task_struct { #endif unsigned int policy; @@ -10289,7 +9028,7 @@ index 62c68e513e39..c873ce0183ab 100644 int nr_cpus_allowed; cpumask_t cpus_allowed; -@@ -1629,6 +1649,9 @@ struct task_struct { +@@ -1654,6 +1674,9 @@ struct task_struct { struct task_cputime cputime_expires; struct list_head cpu_timers[3]; @@ -10298,8 +9037,8 @@ index 62c68e513e39..c873ce0183ab 100644 +#endif /* process credentials */ - const struct cred __rcu *real_cred; /* objective and real subjective task -@@ -1659,10 +1682,15 @@ struct task_struct { + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ +@@ -1685,10 +1708,15 @@ struct task_struct { /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; @@ -10315,7 +9054,7 @@ index 62c68e513e39..c873ce0183ab 100644 unsigned long sas_ss_sp; size_t sas_ss_size; -@@ -1891,6 +1919,12 @@ struct task_struct { +@@ -1917,6 +1945,12 @@ struct task_struct { /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ @@ -10328,7 +9067,7 @@ index 62c68e513e39..c873ce0183ab 100644 #ifdef CONFIG_KCOV /* Coverage collection mode enabled for this task (0 if disabled). */ enum kcov_mode kcov_mode; -@@ -1916,9 +1950,23 @@ struct task_struct { +@@ -1942,9 +1976,23 @@ struct task_struct { unsigned int sequential_io; unsigned int sequential_io_avg; #endif @@ -10352,8 +9091,8 @@ index 62c68e513e39..c873ce0183ab 100644 int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; -@@ -1939,14 +1987,6 @@ extern int arch_task_struct_size __read_mostly; - # define arch_task_struct_size (sizeof(struct task_struct)) +@@ -1984,14 +2032,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) + } #endif -/* Future-safe accessor for struct task_struct's cpus_allowed. */ @@ -10367,7 +9106,7 @@ index 62c68e513e39..c873ce0183ab 100644 #define TNF_MIGRATED 0x01 #define TNF_NO_GROUP 0x02 #define TNF_SHARED 0x04 -@@ -2162,6 +2202,15 @@ extern struct pid *cad_pid; +@@ -2207,6 +2247,15 @@ extern struct pid *cad_pid; extern void free_task(struct task_struct *tsk); #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) @@ -10383,7 +9122,7 @@ index 62c68e513e39..c873ce0183ab 100644 extern void __put_task_struct(struct task_struct *t); static inline void put_task_struct(struct task_struct *t) -@@ -2169,6 +2218,7 @@ static inline void put_task_struct(struct task_struct *t) +@@ -2214,6 +2263,7 @@ static inline void put_task_struct(struct task_struct *t) if (atomic_dec_and_test(&t->usage)) __put_task_struct(t); } @@ -10391,7 +9130,7 @@ index 62c68e513e39..c873ce0183ab 100644 struct task_struct *task_rcu_dereference(struct task_struct **ptask); struct task_struct *try_get_task_struct(struct task_struct **ptask); -@@ -2210,6 +2260,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, +@@ -2255,6 +2305,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, /* * Per process flags */ @@ -10399,7 +9138,7 @@ index 62c68e513e39..c873ce0183ab 100644 #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ -@@ -2378,6 +2429,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, +@@ -2423,6 +2474,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); @@ -10410,7 +9149,7 @@ index 62c68e513e39..c873ce0183ab 100644 #else static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) -@@ -2390,6 +2445,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, +@@ -2435,6 +2490,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, return -EINVAL; return 0; } @@ -10420,7 +9159,7 @@ index 62c68e513e39..c873ce0183ab 100644 #endif #ifdef CONFIG_NO_HZ_COMMON -@@ -2624,6 +2682,7 @@ extern void xtime_update(unsigned long ticks); +@@ -2673,6 +2731,7 @@ extern void xtime_update(unsigned long ticks); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); @@ -10428,7 +9167,7 @@ index 62c68e513e39..c873ce0183ab 100644 extern void wake_up_new_task(struct task_struct *tsk); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); -@@ -2832,6 +2891,17 @@ static inline void mmdrop(struct mm_struct *mm) +@@ -2881,6 +2940,17 @@ static inline void mmdrop(struct mm_struct *mm) __mmdrop(mm); } @@ -10443,10 +9182,10 @@ index 62c68e513e39..c873ce0183ab 100644 +# define mmdrop_delayed(mm) mmdrop(mm) +#endif + - static inline bool mmget_not_zero(struct mm_struct *mm) + static inline void mmdrop_async_fn(struct work_struct *work) { - return atomic_inc_not_zero(&mm->mm_users); -@@ -3168,6 +3238,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) + struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); +@@ -3273,6 +3343,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -10490,7 +9229,7 @@ index 62c68e513e39..c873ce0183ab 100644 static inline int restart_syscall(void) { set_tsk_thread_flag(current, TIF_SIGPENDING); -@@ -3199,6 +3306,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) +@@ -3304,6 +3411,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); } @@ -10542,7 +9281,7 @@ index 62c68e513e39..c873ce0183ab 100644 /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return -@@ -3220,12 +3372,16 @@ extern int __cond_resched_lock(spinlock_t *lock); +@@ -3329,12 +3481,16 @@ extern int __cond_resched_lock(spinlock_t *lock); __cond_resched_lock(lock); \ }) @@ -10559,7 +9298,7 @@ index 62c68e513e39..c873ce0183ab 100644 static inline void cond_resched_rcu(void) { -@@ -3387,6 +3543,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) +@@ -3509,6 +3665,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ @@ -10746,7 +9485,7 @@ index b63f63eaa39c..295540fdfc72 100644 /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h -index 0f665cb26b50..59c38d1635c8 100644 +index 32810f279f8e..0db6e31161f6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -284,6 +284,7 @@ struct sk_buff_head { @@ -10757,7 +9496,7 @@ index 0f665cb26b50..59c38d1635c8 100644 }; struct sk_buff; -@@ -1565,6 +1566,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) +@@ -1573,6 +1574,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) __skb_queue_head_init(list); } @@ -10771,7 +9510,7 @@ index 0f665cb26b50..59c38d1635c8 100644 struct lock_class_key *class) { diff --git a/include/linux/smp.h b/include/linux/smp.h -index eccae4690f41..64ec52d951c3 100644 +index 8e0cb7a0f836..b16ca967ad80 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -185,6 +185,9 @@ static inline void smp_init(void) { } @@ -10848,10 +9587,10 @@ index 5344268e6e62..043263f30e81 100644 #endif /* __LINUX_SPINLOCK_API_SMP_H */ diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h new file mode 100644 -index 000000000000..7eb87584e843 +index 000000000000..3534cff3dd08 --- /dev/null +++ b/include/linux/spinlock_rt.h -@@ -0,0 +1,165 @@ +@@ -0,0 +1,164 @@ +#ifndef __LINUX_SPINLOCK_RT_H +#define __LINUX_SPINLOCK_RT_H + @@ -10894,7 +9633,6 @@ index 000000000000..7eb87584e843 +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); -+extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock); + +#define spin_lock(lock) rt_spin_lock(lock) + @@ -11292,7 +10030,7 @@ index dc8eb63c6568..e793d3a257da 100644 #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) diff --git a/include/linux/suspend.h b/include/linux/suspend.h -index 7693e39b14fe..b36eedeb28d1 100644 +index d9718378a8be..e81e6dc7dcb1 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -193,6 +193,12 @@ struct platform_freeze_ops { @@ -11321,7 +10059,7 @@ index c1f9c62a8a50..83f004a72320 100644 extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); diff --git a/include/linux/swap.h b/include/linux/swap.h -index e1d761463243..4ae9a4434ad3 100644 +index a56523cefb9b..c59a9f0d8ca1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -11,6 +11,7 @@ @@ -11332,7 +10070,7 @@ index e1d761463243..4ae9a4434ad3 100644 #include struct notifier_block; -@@ -243,7 +244,8 @@ struct swap_info_struct { +@@ -246,7 +247,8 @@ struct swap_info_struct { void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); void workingset_activation(struct page *page); @@ -11342,7 +10080,7 @@ index e1d761463243..4ae9a4434ad3 100644 static inline unsigned int workingset_node_pages(struct radix_tree_node *node) { -@@ -288,6 +290,7 @@ extern unsigned long nr_free_pagecache_pages(void); +@@ -291,6 +293,7 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ @@ -11381,10 +10119,10 @@ index 000000000000..f175fa9a6016 + +#endif /* _LINUX_SWORK_H */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h -index 2b5b10eed74f..8bf15b1858f5 100644 +index 2873baf5372a..eb1a108f17ca 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h -@@ -103,7 +103,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) +@@ -107,7 +107,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) #define test_thread_flag(flag) \ test_ti_thread_flag(current_thread_info(), flag) @@ -11487,7 +10225,7 @@ index 613771909b6e..e28c5a43229d 100644 static inline void count_vm_events(enum vm_event_item item, long delta) diff --git a/include/linux/wait.h b/include/linux/wait.h -index c3ff74d764fa..60222150a409 100644 +index 2408e8d5c05c..db50d6609195 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -8,6 +8,7 @@ @@ -11598,10 +10336,10 @@ index 000000000000..a7034298a82a + +#endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h -index d061ffeb1e71..12ef433dc3b8 100644 +index 7adf4386ac8f..d3fd5c357268 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h -@@ -70,6 +70,7 @@ struct netns_ipv4 { +@@ -69,6 +69,7 @@ struct netns_ipv4 { int sysctl_icmp_echo_ignore_all; int sysctl_icmp_echo_ignore_broadcasts; @@ -11610,7 +10348,7 @@ index d061ffeb1e71..12ef433dc3b8 100644 int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h -index 909aff2db2b3..c47219d6e4bc 100644 +index e6aa0a249672..b57736f2a8a3 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -10,6 +10,7 @@ @@ -11621,16 +10359,16 @@ index 909aff2db2b3..c47219d6e4bc 100644 struct Qdisc_ops; struct qdisc_walker; -@@ -78,7 +79,7 @@ struct Qdisc { +@@ -86,7 +87,7 @@ struct Qdisc { struct sk_buff *gso_skb ____cacheline_aligned_in_smp; - struct sk_buff_head q; + struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; - seqcount_t running; + net_seqlock_t running; struct gnet_stats_queue qstats; unsigned long state; struct Qdisc *next_sched; -@@ -90,13 +91,22 @@ struct Qdisc { +@@ -98,13 +99,22 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; }; @@ -11654,7 +10392,7 @@ index 909aff2db2b3..c47219d6e4bc 100644 if (qdisc_is_running(qdisc)) return false; /* Variant of write_seqcount_begin() telling lockdep a trylock -@@ -105,11 +115,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) +@@ -113,11 +123,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) raw_write_seqcount_begin(&qdisc->running); seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); return true; @@ -11671,7 +10409,7 @@ index 909aff2db2b3..c47219d6e4bc 100644 } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) -@@ -300,7 +315,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) +@@ -308,7 +323,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) return qdisc_lock(root); } @@ -11795,10 +10533,10 @@ index 000000000000..d3f2fbd560b1 + +#endif /* _LATENCY_HIST_H */ diff --git a/init/Kconfig b/init/Kconfig -index cac3f096050d..b6c9166d878a 100644 +index 34407f15e6d3..2ce33a32e65d 100644 --- a/init/Kconfig +++ b/init/Kconfig -@@ -496,7 +496,7 @@ config TINY_RCU +@@ -506,7 +506,7 @@ config TINY_RCU config RCU_EXPERT bool "Make expert-level adjustments to RCU configuration" @@ -11807,7 +10545,7 @@ index cac3f096050d..b6c9166d878a 100644 help This option needs to be enabled if you wish to make expert-level adjustments to RCU configuration. By default, -@@ -613,7 +613,7 @@ config RCU_FANOUT_LEAF +@@ -623,7 +623,7 @@ config RCU_FANOUT_LEAF config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" @@ -11816,7 +10554,7 @@ index cac3f096050d..b6c9166d878a 100644 default n help This option permits CPUs to enter dynticks-idle state even if -@@ -640,7 +640,7 @@ config TREE_RCU_TRACE +@@ -650,7 +650,7 @@ config TREE_RCU_TRACE config RCU_BOOST bool "Enable RCU priority boosting" depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT @@ -11825,7 +10563,27 @@ index cac3f096050d..b6c9166d878a 100644 help This option boosts the priority of preempted RCU readers that block the current preemptible RCU grace period for too long. -@@ -1054,6 +1054,7 @@ config CFS_BANDWIDTH +@@ -781,19 +781,6 @@ config RCU_NOCB_CPU_ALL + + endchoice + +-config RCU_EXPEDITE_BOOT +- bool +- default n +- help +- This option enables expedited grace periods at boot time, +- as if rcu_expedite_gp() had been invoked early in boot. +- The corresponding rcu_unexpedite_gp() is invoked from +- rcu_end_inkernel_boot(), which is intended to be invoked +- at the end of the kernel-only boot sequence, just before +- init is exec'ed. +- +- Accept the default if unsure. +- + endmenu # "RCU Subsystem" + + config BUILD_BIN2C +@@ -1064,6 +1051,7 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on CGROUP_SCHED @@ -11833,7 +10591,7 @@ index cac3f096050d..b6c9166d878a 100644 default n help This feature lets you explicitly allocate real CPU bandwidth -@@ -1761,6 +1762,7 @@ choice +@@ -1772,6 +1760,7 @@ choice config SLAB bool "SLAB" @@ -11841,250 +10599,47 @@ index cac3f096050d..b6c9166d878a 100644 select HAVE_HARDENED_USERCOPY_ALLOCATOR help The regular slab allocator that is established and known to work -@@ -1781,6 +1783,7 @@ config SLUB +@@ -1792,6 +1781,7 @@ config SLUB config SLOB depends on EXPERT bool "SLOB (Simple Allocator)" -+ depends on !PREEMPT_RT_FULL - help - SLOB replaces the stock allocator with a drastically simpler - allocator. SLOB is generally more space efficient but -@@ -1799,7 +1802,7 @@ config SLAB_FREELIST_RANDOM - - config SLUB_CPU_PARTIAL - default y -- depends on SLUB && SMP -+ depends on SLUB && SMP && !PREEMPT_RT_FULL - bool "SLUB per cpu partial cache" - help - Per cpu partial caches accellerate objects allocation and freeing -diff --git a/init/Makefile b/init/Makefile -index 7bc47ee31c36..88cf473554e0 100644 ---- a/init/Makefile -+++ b/init/Makefile -@@ -33,4 +33,4 @@ $(obj)/version.o: include/generated/compile.h - include/generated/compile.h: FORCE - @$($(quiet)chk_compile.h) - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ -- "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" -+ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" -diff --git a/init/main.c b/init/main.c -index a8a58e2794a5..e4c979e37a91 100644 ---- a/init/main.c -+++ b/init/main.c -@@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void) - setup_command_line(command_line); - setup_nr_cpu_ids(); - setup_per_cpu_areas(); -+ softirq_early_init(); - boot_cpu_state_init(); - smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ - -diff --git a/ipc/msg.c b/ipc/msg.c -index c6521c205cb4..996d89023552 100644 ---- a/ipc/msg.c -+++ b/ipc/msg.c -@@ -183,20 +183,14 @@ static void ss_wakeup(struct list_head *h, int kill) - } - } - --static void expunge_all(struct msg_queue *msq, int res) -+static void expunge_all(struct msg_queue *msq, int res, -+ struct wake_q_head *wake_q) - { - struct msg_receiver *msr, *t; - - list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { -- msr->r_msg = NULL; /* initialize expunge ordering */ -- wake_up_process(msr->r_tsk); -- /* -- * Ensure that the wakeup is visible before setting r_msg as -- * the receiving end depends on it: either spinning on a nil, -- * or dealing with -EAGAIN cases. See lockless receive part 1 -- * and 2 in do_msgrcv(). -- */ -- smp_wmb(); /* barrier (B) */ -+ -+ wake_q_add(wake_q, msr->r_tsk); - msr->r_msg = ERR_PTR(res); - } - } -@@ -213,11 +207,13 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) - { - struct msg_msg *msg, *t; - struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); -+ WAKE_Q(wake_q); - -- expunge_all(msq, -EIDRM); -+ expunge_all(msq, -EIDRM, &wake_q); - ss_wakeup(&msq->q_senders, 1); - msg_rmid(ns, msq); - ipc_unlock_object(&msq->q_perm); -+ wake_up_q(&wake_q); - rcu_read_unlock(); - - list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { -@@ -342,6 +338,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, - struct kern_ipc_perm *ipcp; - struct msqid64_ds uninitialized_var(msqid64); - struct msg_queue *msq; -+ WAKE_Q(wake_q); - int err; - - if (cmd == IPC_SET) { -@@ -389,7 +386,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, - /* sleeping receivers might be excluded by - * stricter permissions. - */ -- expunge_all(msq, -EAGAIN); -+ expunge_all(msq, -EAGAIN, &wake_q); - /* sleeping senders might be able to send - * due to a larger queue size. - */ -@@ -402,6 +399,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, - - out_unlock0: - ipc_unlock_object(&msq->q_perm); -+ wake_up_q(&wake_q); - out_unlock1: - rcu_read_unlock(); - out_up: -@@ -566,7 +564,8 @@ static int testmsg(struct msg_msg *msg, long type, int mode) - return 0; - } - --static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) -+static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, -+ struct wake_q_head *wake_q) - { - struct msg_receiver *msr, *t; - -@@ -577,27 +576,13 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) - - list_del(&msr->r_list); - if (msr->r_maxsize < msg->m_ts) { -- /* initialize pipelined send ordering */ -- msr->r_msg = NULL; -- wake_up_process(msr->r_tsk); -- /* barrier (B) see barrier comment below */ -- smp_wmb(); -+ wake_q_add(wake_q, msr->r_tsk); - msr->r_msg = ERR_PTR(-E2BIG); - } else { -- msr->r_msg = NULL; - msq->q_lrpid = task_pid_vnr(msr->r_tsk); - msq->q_rtime = get_seconds(); -- wake_up_process(msr->r_tsk); -- /* -- * Ensure that the wakeup is visible before -- * setting r_msg, as the receiving can otherwise -- * exit - once r_msg is set, the receiver can -- * continue. See lockless receive part 1 and 2 -- * in do_msgrcv(). Barrier (B). -- */ -- smp_wmb(); -+ wake_q_add(wake_q, msr->r_tsk); - msr->r_msg = msg; -- - return 1; - } - } -@@ -613,6 +598,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, - struct msg_msg *msg; - int err; - struct ipc_namespace *ns; -+ WAKE_Q(wake_q); - - ns = current->nsproxy->ipc_ns; - -@@ -698,7 +684,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, - msq->q_lspid = task_tgid_vnr(current); - msq->q_stime = get_seconds(); - -- if (!pipelined_send(msq, msg)) { -+ if (!pipelined_send(msq, msg, &wake_q)) { - /* no one is waiting for this message, enqueue it */ - list_add_tail(&msg->m_list, &msq->q_messages); - msq->q_cbytes += msgsz; -@@ -712,6 +698,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, - - out_unlock0: - ipc_unlock_object(&msq->q_perm); -+ wake_up_q(&wake_q); - out_unlock1: - rcu_read_unlock(); - if (msg != NULL) -@@ -932,57 +919,25 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl - rcu_read_lock(); - - /* Lockless receive, part 2: -- * Wait until pipelined_send or expunge_all are outside of -- * wake_up_process(). There is a race with exit(), see -- * ipc/mqueue.c for the details. The correct serialization -- * ensures that a receiver cannot continue without the wakeup -- * being visibible _before_ setting r_msg: -+ * The work in pipelined_send() and expunge_all(): -+ * - Set pointer to message -+ * - Queue the receiver task for later wakeup -+ * - Wake up the process after the lock is dropped. - * -- * CPU 0 CPU 1 -- * -- * smp_rmb(); (A) <-- pair -. -- * r_msg> | msr->r_msg = NULL; -- * | wake_up_process(); -- * `------> smp_wmb(); (B) -- * msr->r_msg = msg; -- * -- * Where (A) orders the message value read and where (B) orders -- * the write to the r_msg -- done in both pipelined_send and -- * expunge_all. -+ * Should the process wake up before this wakeup (due to a -+ * signal) it will either see the message and continue … - */ -- for (;;) { -- /* -- * Pairs with writer barrier in pipelined_send -- * or expunge_all. -- */ -- smp_rmb(); /* barrier (A) */ -- msg = (struct msg_msg *)msr_d.r_msg; -- if (msg) -- break; - -- /* -- * The cpu_relax() call is a compiler barrier -- * which forces everything in this loop to be -- * re-loaded. -- */ -- cpu_relax(); -- } -- -- /* Lockless receive, part 3: -- * If there is a message or an error then accept it without -- * locking. -- */ -+ msg = (struct msg_msg *)msr_d.r_msg; - if (msg != ERR_PTR(-EAGAIN)) - goto out_unlock1; ++ depends on !PREEMPT_RT_FULL + help + SLOB replaces the stock allocator with a drastically simpler + allocator. SLOB is generally more space efficient but +@@ -1810,7 +1800,7 @@ config SLAB_FREELIST_RANDOM + + config SLUB_CPU_PARTIAL + default y +- depends on SLUB && SMP ++ depends on SLUB && SMP && !PREEMPT_RT_FULL + bool "SLUB per cpu partial cache" + help + Per cpu partial caches accellerate objects allocation and freeing +diff --git a/init/Makefile b/init/Makefile +index c4fb45525d08..821190dfaa75 100644 +--- a/init/Makefile ++++ b/init/Makefile +@@ -35,4 +35,4 @@ $(obj)/version.o: include/generated/compile.h + include/generated/compile.h: FORCE + @$($(quiet)chk_compile.h) + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ +- "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" ++ "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" +diff --git a/init/main.c b/init/main.c +index 2858be732f6d..3c97c3c91d88 100644 +--- a/init/main.c ++++ b/init/main.c +@@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void) + setup_command_line(command_line); + setup_nr_cpu_ids(); + setup_per_cpu_areas(); ++ softirq_early_init(); + boot_cpu_state_init(); + smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ -- /* Lockless receive, part 3: -- * Acquire the queue spinlock. -- */ -+ /* -+ * … or see -EAGAIN, acquire the lock to check the message -+ * again. -+ */ - ipc_lock_object(&msq->q_perm); - -- /* Lockless receive, part 4: -- * Repeat test after acquiring the spinlock. -- */ - msg = (struct msg_msg *)msr_d.r_msg; - if (msg != ERR_PTR(-EAGAIN)) - goto out_unlock0; diff --git a/ipc/sem.c b/ipc/sem.c -index 5e318c5f749d..ec9203971539 100644 +index 10b94bc59d4a..b8360eaacc7a 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -712,6 +712,13 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) @@ -12199,29 +10754,11 @@ index 3f9c97419f02..11dbe26a8279 100644 endchoice config PREEMPT_COUNT -diff --git a/kernel/Makefile b/kernel/Makefile -index e2ec54e2b952..bff8214bf5f6 100644 ---- a/kernel/Makefile -+++ b/kernel/Makefile -@@ -11,6 +11,13 @@ obj-y = fork.o exec_domain.o panic.o \ - notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o smpboot.o - -+# Tracing may do some dangerous __builtin_return_address() operations -+# We know they are dangerous, we don't need gcc telling us that. -+ifdef CONFIG_USING_GET_LOCK_PARENT_IP -+FRAME_CFLAGS := $(call cc-disable-warning,frame-address) -+KBUILD_CFLAGS += $(FRAME_CFLAGS) -+endif -+ - obj-$(CONFIG_MULTIUSER) += groups.o - - ifdef CONFIG_FUNCTION_TRACER diff --git a/kernel/cgroup.c b/kernel/cgroup.c -index d6b729beba49..11d61b2ca938 100644 +index 85bc9beb046d..3b8da75ba2e0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c -@@ -5027,10 +5027,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) +@@ -5040,10 +5040,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) queue_work(cgroup_destroy_wq, &css->destroy_work); } @@ -12234,7 +10771,7 @@ index d6b729beba49..11d61b2ca938 100644 struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; -@@ -5071,8 +5071,8 @@ static void css_release(struct percpu_ref *ref) +@@ -5086,8 +5086,8 @@ static void css_release(struct percpu_ref *ref) struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); @@ -12245,7 +10782,7 @@ index d6b729beba49..11d61b2ca938 100644 } static void init_and_link_css(struct cgroup_subsys_state *css, -@@ -5716,6 +5716,7 @@ static int __init cgroup_wq_init(void) +@@ -5742,6 +5742,7 @@ static int __init cgroup_wq_init(void) */ cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); BUG_ON(!cgroup_destroy_wq); @@ -12254,20 +10791,10 @@ index d6b729beba49..11d61b2ca938 100644 /* * Used to destroy pidlists and separate to serve as flush domain. diff --git a/kernel/cpu.c b/kernel/cpu.c -index 341bf80f80bd..b575429a8a00 100644 +index 217fd2e7f435..69444f1bc924 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c -@@ -152,8 +152,8 @@ static struct { - #endif - } cpu_hotplug = { - .active_writer = NULL, -- .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), - .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), -+ .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), - #ifdef CONFIG_DEBUG_LOCK_ALLOC - .dep_map = {.name = "cpu_hotplug.lock" }, - #endif -@@ -166,6 +166,289 @@ static struct { +@@ -239,6 +239,289 @@ static struct { #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) @@ -12557,7 +11084,7 @@ index 341bf80f80bd..b575429a8a00 100644 void get_online_cpus(void) { -@@ -710,10 +993,14 @@ static int takedown_cpu(unsigned int cpu) +@@ -789,10 +1072,14 @@ static int takedown_cpu(unsigned int cpu) struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int err; @@ -12572,7 +11099,7 @@ index 341bf80f80bd..b575429a8a00 100644 /* * Prevent irq alloc/free while the dying cpu reorganizes the * interrupt affinities. -@@ -799,6 +1086,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, +@@ -877,6 +1164,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int prev_state, ret = 0; bool hasdied = false; @@ -12582,7 +11109,7 @@ index 341bf80f80bd..b575429a8a00 100644 if (num_online_cpus() == 1) return -EBUSY; -@@ -806,7 +1096,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, +@@ -884,7 +1174,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, if (!cpu_present(cpu)) return -EINVAL; @@ -12617,7 +11144,7 @@ index 341bf80f80bd..b575429a8a00 100644 cpuhp_tasks_frozen = tasks_frozen; -@@ -845,10 +1162,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, +@@ -923,10 +1240,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; out: @@ -12675,10 +11202,10 @@ index fc1ef736253c..83c666537a7a 100644 return r; } diff --git a/kernel/events/core.c b/kernel/events/core.c -index fc9bb2225291..bc2db7e1ae04 100644 +index 02c8421f8c01..3748cb7b2d6e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c -@@ -1042,6 +1042,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) +@@ -1050,6 +1050,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) raw_spin_lock_init(&cpuctx->hrtimer_lock); hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); timer->function = perf_mux_hrtimer_handler; @@ -12686,7 +11213,7 @@ index fc9bb2225291..bc2db7e1ae04 100644 } static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) -@@ -8215,6 +8216,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) +@@ -8335,6 +8336,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hwc->hrtimer.function = perf_swevent_hrtimer; @@ -12695,7 +11222,7 @@ index fc9bb2225291..bc2db7e1ae04 100644 /* * Since hrtimers have a fixed rate, we can do a static freq->period diff --git a/kernel/exit.c b/kernel/exit.c -index 091a78be3b09..170b672bbb38 100644 +index 3076f3089919..fb2ebcf3ca7c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk) @@ -12708,10 +11235,18 @@ index 091a78be3b09..170b672bbb38 100644 spin_unlock(&sighand->siglock); diff --git a/kernel/fork.c b/kernel/fork.c -index beb31725f7e2..e398cb9e62fa 100644 +index ba8a01564985..47784f8aed37 100644 --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -251,7 +251,9 @@ static inline void put_signal_struct(struct signal_struct *sig) +@@ -76,6 +76,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -376,13 +377,24 @@ static inline void put_signal_struct(struct signal_struct *sig) if (atomic_dec_and_test(&sig->sigcnt)) free_signal_struct(sig); } @@ -12722,7 +11257,22 @@ index beb31725f7e2..e398cb9e62fa 100644 void __put_task_struct(struct task_struct *tsk) { WARN_ON(!tsk->exit_state); -@@ -268,7 +270,18 @@ void __put_task_struct(struct task_struct *tsk) + WARN_ON(atomic_read(&tsk->usage)); + WARN_ON(tsk == current); + ++ /* ++ * Remove function-return probe instances associated with this ++ * task and put them back on the free list. ++ */ ++ kprobe_flush_task(tsk); ++ ++ /* Task is done with its stack. */ ++ put_task_stack(tsk); ++ + cgroup_free(tsk); + task_numa_free(tsk); + security_task_free(tsk); +@@ -393,7 +405,18 @@ void __put_task_struct(struct task_struct *tsk) if (!profile_handoff_task(tsk)) free_task(tsk); } @@ -12741,7 +11291,7 @@ index beb31725f7e2..e398cb9e62fa 100644 void __init __weak arch_task_cache_init(void) { } -@@ -702,6 +715,19 @@ void __mmdrop(struct mm_struct *mm) +@@ -852,6 +875,19 @@ void __mmdrop(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(__mmdrop); @@ -12761,7 +11311,7 @@ index beb31725f7e2..e398cb9e62fa 100644 static inline void __mmput(struct mm_struct *mm) { VM_BUG_ON(atomic_read(&mm->mm_users)); -@@ -1274,6 +1300,9 @@ static void rt_mutex_init_task(struct task_struct *p) +@@ -1426,6 +1462,9 @@ static void rt_mutex_init_task(struct task_struct *p) */ static void posix_cpu_timers_init(struct task_struct *tsk) { @@ -12771,7 +11321,7 @@ index beb31725f7e2..e398cb9e62fa 100644 tsk->cputime_expires.prof_exp = 0; tsk->cputime_expires.virt_exp = 0; tsk->cputime_expires.sched_exp = 0; -@@ -1399,6 +1428,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, +@@ -1552,6 +1591,7 @@ static __latent_entropy struct task_struct *copy_process( spin_lock_init(&p->alloc_lock); init_sigpending(&p->pending); @@ -12780,10 +11330,10 @@ index beb31725f7e2..e398cb9e62fa 100644 p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; diff --git a/kernel/futex.c b/kernel/futex.c -index 46cb3a301bc1..6de82b959729 100644 +index 2c4be467fecd..064917c2d9a5 100644 --- a/kernel/futex.c +++ b/kernel/futex.c -@@ -895,7 +895,9 @@ void exit_pi_state_list(struct task_struct *curr) +@@ -904,7 +904,9 @@ void exit_pi_state_list(struct task_struct *curr) * task still owns the PI-state: */ if (head->next != next) { @@ -12793,7 +11343,7 @@ index 46cb3a301bc1..6de82b959729 100644 continue; } -@@ -1290,6 +1292,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, +@@ -1299,6 +1301,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, struct futex_pi_state *pi_state = this->pi_state; u32 uninitialized_var(curval), newval; WAKE_Q(wake_q); @@ -12801,7 +11351,7 @@ index 46cb3a301bc1..6de82b959729 100644 bool deboost; int ret = 0; -@@ -1356,7 +1359,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, +@@ -1365,7 +1368,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); @@ -12811,7 +11361,7 @@ index 46cb3a301bc1..6de82b959729 100644 /* * First unlock HB so the waiter does not spin on it once he got woken -@@ -1364,8 +1368,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, +@@ -1373,8 +1377,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, * deboost first (and lose our higher priority), then the task might get * scheduled away before the wake up can take place. */ @@ -12822,7 +11372,7 @@ index 46cb3a301bc1..6de82b959729 100644 if (deboost) rt_mutex_adjust_prio(current); -@@ -1915,6 +1920,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, +@@ -1924,6 +1929,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, requeue_pi_wake_futex(this, &key2, hb2); drop_count++; continue; @@ -12839,7 +11389,7 @@ index 46cb3a301bc1..6de82b959729 100644 } else if (ret) { /* * rt_mutex_start_proxy_lock() detected a -@@ -2805,7 +2820,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, +@@ -2814,7 +2829,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, struct hrtimer_sleeper timeout, *to = NULL; struct rt_mutex_waiter rt_waiter; struct rt_mutex *pi_mutex = NULL; @@ -12848,7 +11398,7 @@ index 46cb3a301bc1..6de82b959729 100644 union futex_key key2 = FUTEX_KEY_INIT; struct futex_q q = futex_q_init; int res, ret; -@@ -2830,10 +2845,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, +@@ -2839,10 +2854,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * The waiter is allocated on our stack, manipulated by the requeue * code while we sleep on uaddr. */ @@ -12860,7 +11410,7 @@ index 46cb3a301bc1..6de82b959729 100644 ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); if (unlikely(ret != 0)) -@@ -2864,20 +2876,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, +@@ -2873,20 +2885,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, /* Queue the futex_q, drop the hb lock, wait for wakeup. */ futex_wait_queue_me(hb, &q, to); @@ -12927,7 +11477,7 @@ index 46cb3a301bc1..6de82b959729 100644 /* Check if the requeue code acquired the second futex for us. */ if (!q.rt_waiter) { -@@ -2886,14 +2933,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, +@@ -2895,14 +2942,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, * did a lock-steal - fix up the PI-state in that case. */ if (q.pi_state && (q.pi_state->owner != current)) { @@ -12945,7 +11495,7 @@ index 46cb3a301bc1..6de82b959729 100644 } } else { /* -@@ -2906,7 +2954,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, +@@ -2915,7 +2963,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); debug_rt_mutex_free_waiter(&rt_waiter); @@ -12978,7 +11528,7 @@ index d3f24905852c..f87aa8fdcc51 100644 if (!noirqdebug) note_interrupt(desc, retval); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c -index 9530fcd27704..fadf8f848299 100644 +index 6b669593e7eb..e357bf6c59d5 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -22,6 +22,7 @@ @@ -13339,7 +11889,7 @@ index ee1bc1bb8feb..ddef07958840 100644 }; diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile -index 31322a4275cd..c6bba9299d8b 100644 +index 6f88e352cd4f..5e27fb1079e7 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -2,7 +2,7 @@ @@ -13363,7 +11913,7 @@ index 31322a4275cd..c6bba9299d8b 100644 obj-$(CONFIG_LOCKDEP) += lockdep.o ifeq ($(CONFIG_PROC_FS),y) obj-$(CONFIG_LOCKDEP) += lockdep_proc.o -@@ -25,7 +29,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o +@@ -24,7 +28,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o @@ -13374,183 +11924,11 @@ index 31322a4275cd..c6bba9299d8b 100644 +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o -diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c -index 951cfcd10b4a..57e0ea72c28a 100644 ---- a/kernel/locking/lglock.c -+++ b/kernel/locking/lglock.c -@@ -4,6 +4,15 @@ - #include - #include - -+#ifndef CONFIG_PREEMPT_RT_FULL -+# define lg_lock_ptr arch_spinlock_t -+# define lg_do_lock(l) arch_spin_lock(l) -+# define lg_do_unlock(l) arch_spin_unlock(l) -+#else -+# define lg_lock_ptr struct rt_mutex -+# define lg_do_lock(l) __rt_spin_lock__no_mg(l) -+# define lg_do_unlock(l) __rt_spin_unlock(l) -+#endif - /* - * Note there is no uninit, so lglocks cannot be defined in - * modules (but it's fine to use them from there) -@@ -12,51 +21,60 @@ - - void lg_lock_init(struct lglock *lg, char *name) - { -+#ifdef CONFIG_PREEMPT_RT_FULL -+ int i; -+ -+ for_each_possible_cpu(i) { -+ struct rt_mutex *lock = per_cpu_ptr(lg->lock, i); -+ -+ rt_mutex_init(lock); -+ } -+#endif - LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); - } - EXPORT_SYMBOL(lg_lock_init); - - void lg_local_lock(struct lglock *lg) - { -- arch_spinlock_t *lock; -+ lg_lock_ptr *lock; - -- preempt_disable(); -+ migrate_disable(); - lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); - lock = this_cpu_ptr(lg->lock); -- arch_spin_lock(lock); -+ lg_do_lock(lock); - } - EXPORT_SYMBOL(lg_local_lock); - - void lg_local_unlock(struct lglock *lg) - { -- arch_spinlock_t *lock; -+ lg_lock_ptr *lock; - - lock_release(&lg->lock_dep_map, 1, _RET_IP_); - lock = this_cpu_ptr(lg->lock); -- arch_spin_unlock(lock); -- preempt_enable(); -+ lg_do_unlock(lock); -+ migrate_enable(); - } - EXPORT_SYMBOL(lg_local_unlock); - - void lg_local_lock_cpu(struct lglock *lg, int cpu) - { -- arch_spinlock_t *lock; -+ lg_lock_ptr *lock; - -- preempt_disable(); -+ preempt_disable_nort(); - lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); - lock = per_cpu_ptr(lg->lock, cpu); -- arch_spin_lock(lock); -+ lg_do_lock(lock); - } - EXPORT_SYMBOL(lg_local_lock_cpu); - - void lg_local_unlock_cpu(struct lglock *lg, int cpu) - { -- arch_spinlock_t *lock; -+ lg_lock_ptr *lock; - - lock_release(&lg->lock_dep_map, 1, _RET_IP_); - lock = per_cpu_ptr(lg->lock, cpu); -- arch_spin_unlock(lock); -- preempt_enable(); -+ lg_do_unlock(lock); -+ preempt_enable_nort(); - } - EXPORT_SYMBOL(lg_local_unlock_cpu); - -@@ -68,30 +86,30 @@ void lg_double_lock(struct lglock *lg, int cpu1, int cpu2) - if (cpu2 < cpu1) - swap(cpu1, cpu2); - -- preempt_disable(); -+ preempt_disable_nort(); - lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); -- arch_spin_lock(per_cpu_ptr(lg->lock, cpu1)); -- arch_spin_lock(per_cpu_ptr(lg->lock, cpu2)); -+ lg_do_lock(per_cpu_ptr(lg->lock, cpu1)); -+ lg_do_lock(per_cpu_ptr(lg->lock, cpu2)); - } - - void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2) - { - lock_release(&lg->lock_dep_map, 1, _RET_IP_); -- arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1)); -- arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2)); -- preempt_enable(); -+ lg_do_unlock(per_cpu_ptr(lg->lock, cpu1)); -+ lg_do_unlock(per_cpu_ptr(lg->lock, cpu2)); -+ preempt_enable_nort(); - } - - void lg_global_lock(struct lglock *lg) - { - int i; - -- preempt_disable(); -+ preempt_disable_nort(); - lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); - for_each_possible_cpu(i) { -- arch_spinlock_t *lock; -+ lg_lock_ptr *lock; - lock = per_cpu_ptr(lg->lock, i); -- arch_spin_lock(lock); -+ lg_do_lock(lock); - } - } - EXPORT_SYMBOL(lg_global_lock); -@@ -102,10 +120,35 @@ void lg_global_unlock(struct lglock *lg) - - lock_release(&lg->lock_dep_map, 1, _RET_IP_); - for_each_possible_cpu(i) { -- arch_spinlock_t *lock; -+ lg_lock_ptr *lock; - lock = per_cpu_ptr(lg->lock, i); -- arch_spin_unlock(lock); -+ lg_do_unlock(lock); - } -- preempt_enable(); -+ preempt_enable_nort(); - } - EXPORT_SYMBOL(lg_global_unlock); -+ -+#ifdef CONFIG_PREEMPT_RT_FULL -+/* -+ * HACK: If you use this, you get to keep the pieces. -+ * Used in queue_stop_cpus_work() when stop machinery -+ * is called from inactive CPU, so we can't schedule. -+ */ -+# define lg_do_trylock_relax(l) \ -+ do { \ -+ while (!__rt_spin_trylock(l)) \ -+ cpu_relax(); \ -+ } while (0) -+ -+void lg_global_trylock_relax(struct lglock *lg) -+{ -+ int i; -+ -+ lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); -+ for_each_possible_cpu(i) { -+ lg_lock_ptr *lock; -+ lock = per_cpu_ptr(lg->lock, i); -+ lg_do_trylock_relax(lock); -+ } -+} -+#endif diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c -index 589d763a49b3..4b48c4bfb60c 100644 +index 4d7ffc0a0d00..9e52009c192e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c -@@ -3686,6 +3686,7 @@ static void check_flags(unsigned long flags) +@@ -3689,6 +3689,7 @@ static void check_flags(unsigned long flags) } } @@ -13558,7 +11936,7 @@ index 589d763a49b3..4b48c4bfb60c 100644 /* * We dont accurately track softirq state in e.g. * hardirq contexts (such as on 4KSTACKS), so only -@@ -3700,6 +3701,7 @@ static void check_flags(unsigned long flags) +@@ -3703,6 +3704,7 @@ static void check_flags(unsigned long flags) DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); } } @@ -13578,6 +11956,37 @@ index f8c5af52a131..788068773e61 100644 #include #include #include +diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c +index ce182599cf2e..2ad3a1e8344c 100644 +--- a/kernel/locking/percpu-rwsem.c ++++ b/kernel/locking/percpu-rwsem.c +@@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, + /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ + rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); + __init_rwsem(&sem->rw_sem, name, rwsem_key); +- init_waitqueue_head(&sem->writer); ++ init_swait_queue_head(&sem->writer); + sem->readers_block = 0; + return 0; + } +@@ -103,7 +103,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem) + __this_cpu_dec(*sem->read_count); + + /* Prod writer to recheck readers_active */ +- wake_up(&sem->writer); ++ swake_up(&sem->writer); + } + EXPORT_SYMBOL_GPL(__percpu_up_read); + +@@ -160,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem) + */ + + /* Wait for all now active readers to complete. */ +- wait_event(sem->writer, readers_active_check(sem)); ++ swait_event(sem->writer, readers_active_check(sem)); + } + EXPORT_SYMBOL_GPL(percpu_down_write); + diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c new file mode 100644 index 000000000000..665754c00e1e @@ -14083,7 +12492,7 @@ index 000000000000..665754c00e1e +} +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c -index 1ec0f48962b3..2576f7ccf8e2 100644 +index 2c49d76f96c3..4f1a7663c34d 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -7,6 +7,11 @@ @@ -14106,8 +12515,8 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 #include "rtmutex_common.h" -@@ -69,6 +75,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) - clear_rt_mutex_waiters(lock); +@@ -133,6 +139,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) + WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); } +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) @@ -14119,7 +12528,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* * We can speed up the acquire/release, if there's no debugging state to be * set up. -@@ -350,6 +362,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, +@@ -414,6 +426,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, return debug_rt_mutex_detect_deadlock(waiter, chwalk); } @@ -14134,7 +12543,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* * Max number of times we'll walk the boosting chain: */ -@@ -357,7 +377,8 @@ int max_lock_depth = 1024; +@@ -421,7 +441,8 @@ int max_lock_depth = 1024; static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) { @@ -14144,7 +12553,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 } /* -@@ -493,7 +514,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, +@@ -557,7 +578,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * reached or the state of the chain has changed while we * dropped the locks. */ @@ -14153,7 +12562,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 goto out_unlock_pi; /* -@@ -655,13 +676,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, +@@ -719,13 +740,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * follow here. This is the end of the chain we are walking. */ if (!rt_mutex_owner(lock)) { @@ -14172,7 +12581,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 raw_spin_unlock_irq(&lock->wait_lock); return 0; } -@@ -754,6 +778,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, +@@ -818,6 +842,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, return ret; } @@ -14198,7 +12607,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* * Try to take an rt-mutex * -@@ -764,8 +807,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, +@@ -828,8 +871,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * @waiter: The waiter that is queued to the lock's wait tree if the * callsite called task_blocked_on_lock(), otherwise NULL */ @@ -14210,7 +12619,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 { /* * Before testing whether we can acquire @lock, we set the -@@ -802,8 +846,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, +@@ -866,8 +910,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, * If waiter is not the highest priority waiter of * @lock, give up. */ @@ -14222,7 +12631,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* * We can acquire the lock. Remove the waiter from the -@@ -821,14 +867,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, +@@ -885,14 +931,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, * not need to be dequeued. */ if (rt_mutex_has_waiters(lock)) { @@ -14240,7 +12649,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* * The current top waiter stays enqueued. We * don't have to change anything in the lock -@@ -877,6 +919,438 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, +@@ -941,6 +983,433 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, return 1; } @@ -14548,11 +12957,6 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 +} +EXPORT_SYMBOL(rt_spin_unlock_wait); + -+int __lockfunc __rt_spin_trylock(struct rt_mutex *lock) -+{ -+ return rt_mutex_trylock(lock); -+} -+ +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock) +{ + int ret; @@ -14679,7 +13083,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* * Task blocks on lock. * -@@ -907,6 +1381,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, +@@ -971,6 +1440,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, return -EDEADLK; raw_spin_lock(&task->pi_lock); @@ -14703,7 +13107,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 __rt_mutex_adjust_prio(task); waiter->task = task; waiter->lock = lock; -@@ -930,7 +1421,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, +@@ -994,7 +1480,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, rt_mutex_enqueue_pi(owner, waiter); __rt_mutex_adjust_prio(owner); @@ -14712,7 +13116,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 chain_walk = 1; } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { chain_walk = 1; -@@ -972,6 +1463,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, +@@ -1036,6 +1522,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, * Called with lock->wait_lock held and interrupts disabled. */ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, @@ -14720,7 +13124,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 struct rt_mutex *lock) { struct rt_mutex_waiter *waiter; -@@ -1000,7 +1492,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, +@@ -1064,7 +1551,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, raw_spin_unlock(¤t->pi_lock); @@ -14732,7 +13136,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 } /* -@@ -1014,7 +1509,7 @@ static void remove_waiter(struct rt_mutex *lock, +@@ -1078,7 +1568,7 @@ static void remove_waiter(struct rt_mutex *lock, { bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); @@ -14741,7 +13145,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 raw_spin_lock(¤t->pi_lock); rt_mutex_dequeue(lock, waiter); -@@ -1038,7 +1533,8 @@ static void remove_waiter(struct rt_mutex *lock, +@@ -1102,7 +1592,8 @@ static void remove_waiter(struct rt_mutex *lock, __rt_mutex_adjust_prio(owner); /* Store the lock on which owner is blocked or NULL */ @@ -14751,7 +13155,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 raw_spin_unlock(&owner->pi_lock); -@@ -1074,17 +1570,17 @@ void rt_mutex_adjust_pi(struct task_struct *task) +@@ -1138,17 +1629,17 @@ void rt_mutex_adjust_pi(struct task_struct *task) raw_spin_lock_irqsave(&task->pi_lock, flags); waiter = task->pi_blocked_on; @@ -14771,7 +13175,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, next_lock, NULL, task); } -@@ -1102,7 +1598,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) +@@ -1166,7 +1657,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) static int __sched __rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, @@ -14781,7 +13185,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 { int ret = 0; -@@ -1125,6 +1622,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, +@@ -1189,6 +1681,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, break; } @@ -14794,7 +13198,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 raw_spin_unlock_irq(&lock->wait_lock); debug_rt_mutex_print_deadlock(waiter); -@@ -1159,21 +1662,96 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, +@@ -1223,21 +1721,96 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, } } @@ -14895,7 +13299,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* * Technically we could use raw_spin_[un]lock_irq() here, but this can -@@ -1187,6 +1765,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, +@@ -1251,6 +1824,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, /* Try to acquire the lock again: */ if (try_to_take_rt_mutex(lock, current, NULL)) { @@ -14904,7 +13308,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); return 0; } -@@ -1201,13 +1781,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, +@@ -1265,13 +1840,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, if (likely(!ret)) /* sleep on the mutex */ @@ -14930,7 +13334,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 } /* -@@ -1267,7 +1857,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) +@@ -1331,7 +1916,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) * Return whether the current task needs to undo a potential priority boosting. */ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, @@ -14940,7 +13344,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 { unsigned long flags; -@@ -1323,7 +1914,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, +@@ -1387,7 +1973,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, * * Queue the next waiter for wakeup once we release the wait_lock. */ @@ -14949,7 +13353,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 raw_spin_unlock_irqrestore(&lock->wait_lock, flags); -@@ -1339,31 +1930,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, +@@ -1403,31 +1989,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, @@ -14990,7 +13394,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 } static inline int -@@ -1380,17 +1976,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, +@@ -1444,17 +2035,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, static inline void rt_mutex_fastunlock(struct rt_mutex *lock, bool (*slowfn)(struct rt_mutex *lock, @@ -15013,7 +13417,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* Undo pi boosting if necessary: */ if (deboost) -@@ -1407,7 +2006,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) +@@ -1471,7 +2065,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) { might_sleep(); @@ -15022,7 +13426,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 } EXPORT_SYMBOL_GPL(rt_mutex_lock); -@@ -1424,7 +2023,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) +@@ -1488,7 +2082,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { might_sleep(); @@ -15031,7 +13435,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); -@@ -1437,11 +2036,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock, +@@ -1501,11 +2095,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock, might_sleep(); return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, @@ -15063,7 +13467,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 * rt_mutex_timed_lock - lock a rt_mutex interruptible * the timeout structure is provided * by the caller -@@ -1461,6 +2079,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) +@@ -1525,6 +2138,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, RT_MUTEX_MIN_CHAINWALK, @@ -15071,7 +13475,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); -@@ -1478,7 +2097,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); +@@ -1542,7 +2156,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); */ int __sched rt_mutex_trylock(struct rt_mutex *lock) { @@ -15083,7 +13487,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 return 0; return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); -@@ -1504,13 +2127,14 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); +@@ -1568,13 +2186,14 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); * required or not. */ bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, @@ -15100,7 +13504,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 } /** -@@ -1543,13 +2167,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); +@@ -1607,13 +2226,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); void __rt_mutex_init(struct rt_mutex *lock, const char *name) { lock->owner = NULL; @@ -15115,7 +13519,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /** * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a -@@ -1564,7 +2187,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); +@@ -1628,7 +2246,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner) { @@ -15124,7 +13528,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 debug_rt_mutex_proxy_lock(lock, proxy_owner); rt_mutex_set_owner(lock, proxy_owner); rt_mutex_deadlock_account_lock(lock, proxy_owner); -@@ -1612,6 +2235,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, +@@ -1676,6 +2294,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, return 1; } @@ -15160,7 +13564,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 /* We enforce deadlock detection for futexes */ ret = task_blocks_on_rt_mutex(lock, waiter, task, RT_MUTEX_FULL_CHAINWALK); -@@ -1626,7 +2278,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, +@@ -1690,7 +2337,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, ret = 0; } @@ -15169,7 +13573,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 remove_waiter(lock, waiter); raw_spin_unlock_irq(&lock->wait_lock); -@@ -1682,7 +2334,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, +@@ -1746,7 +2393,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, set_current_state(TASK_INTERRUPTIBLE); /* sleep on the mutex */ @@ -15178,7 +13582,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 if (unlikely(ret)) remove_waiter(lock, waiter); -@@ -1697,3 +2349,89 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, +@@ -1761,3 +2408,89 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, return ret; } @@ -15269,7 +13673,7 @@ index 1ec0f48962b3..2576f7ccf8e2 100644 +EXPORT_SYMBOL(ww_mutex_unlock); +#endif diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h -index 4f5f83c7d2d3..289f062f26cd 100644 +index e317e1cbb3eb..f457c7574920 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -27,6 +27,7 @@ struct rt_mutex_waiter { @@ -15280,7 +13684,7 @@ index 4f5f83c7d2d3..289f062f26cd 100644 #ifdef CONFIG_DEBUG_RT_MUTEXES unsigned long ip; struct pid *deadlock_task_pid; -@@ -97,6 +98,9 @@ enum rtmutex_chainwalk { +@@ -98,6 +99,9 @@ enum rtmutex_chainwalk { /* * PI-futex support (proxy locking functions, etc.): */ @@ -15290,7 +13694,7 @@ index 4f5f83c7d2d3..289f062f26cd 100644 extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); -@@ -110,7 +114,8 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, +@@ -111,7 +115,8 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter); extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, @@ -15300,7 +13704,7 @@ index 4f5f83c7d2d3..289f062f26cd 100644 extern void rt_mutex_adjust_prio(struct task_struct *task); #ifdef CONFIG_DEBUG_RT_MUTEXES -@@ -119,4 +124,14 @@ extern void rt_mutex_adjust_prio(struct task_struct *task); +@@ -120,4 +125,14 @@ extern void rt_mutex_adjust_prio(struct task_struct *task); # include "rtmutex.h" #endif @@ -15384,10 +13788,10 @@ index 0374a596cffa..94970338d518 100644 + +#endif diff --git a/kernel/panic.c b/kernel/panic.c -index ca8cea1ef673..6b698115f003 100644 +index e6480e20379e..7e9c1918a94e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c -@@ -449,9 +449,11 @@ static u64 oops_id; +@@ -482,9 +482,11 @@ static u64 oops_id; static int init_oops_id(void) { @@ -15400,7 +13804,7 @@ index ca8cea1ef673..6b698115f003 100644 return 0; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c -index 33c79b6105c5..f53375bc77df 100644 +index b26dbc48c75b..968255f27a33 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -286,6 +286,8 @@ static int create_image(int platform_mode) @@ -15412,7 +13816,7 @@ index 33c79b6105c5..f53375bc77df 100644 error = syscore_suspend(); if (error) { printk(KERN_ERR "PM: Some system devices failed to power down, " -@@ -315,6 +317,7 @@ static int create_image(int platform_mode) +@@ -317,6 +319,7 @@ static int create_image(int platform_mode) syscore_resume(); Enable_irqs: @@ -15420,7 +13824,7 @@ index 33c79b6105c5..f53375bc77df 100644 local_irq_enable(); Enable_cpus: -@@ -444,6 +447,7 @@ static int resume_target_kernel(bool platform_mode) +@@ -446,6 +449,7 @@ static int resume_target_kernel(bool platform_mode) goto Enable_cpus; local_irq_disable(); @@ -15428,7 +13832,7 @@ index 33c79b6105c5..f53375bc77df 100644 error = syscore_suspend(); if (error) -@@ -477,6 +481,7 @@ static int resume_target_kernel(bool platform_mode) +@@ -479,6 +483,7 @@ static int resume_target_kernel(bool platform_mode) syscore_resume(); Enable_irqs: @@ -15436,7 +13840,7 @@ index 33c79b6105c5..f53375bc77df 100644 local_irq_enable(); Enable_cpus: -@@ -562,6 +567,7 @@ int hibernation_platform_enter(void) +@@ -564,6 +569,7 @@ int hibernation_platform_enter(void) goto Enable_cpus; local_irq_disable(); @@ -15444,7 +13848,7 @@ index 33c79b6105c5..f53375bc77df 100644 syscore_suspend(); if (pm_wakeup_pending()) { error = -EAGAIN; -@@ -574,6 +580,7 @@ int hibernation_platform_enter(void) +@@ -576,6 +582,7 @@ int hibernation_platform_enter(void) Power_up: syscore_resume(); @@ -15452,7 +13856,7 @@ index 33c79b6105c5..f53375bc77df 100644 local_irq_enable(); Enable_cpus: -@@ -674,6 +681,10 @@ static int load_image_and_restore(void) +@@ -676,6 +683,10 @@ static int load_image_and_restore(void) return error; } @@ -15463,7 +13867,7 @@ index 33c79b6105c5..f53375bc77df 100644 /** * hibernate - Carry out system hibernation, including saving the image. */ -@@ -687,6 +698,8 @@ int hibernate(void) +@@ -689,6 +700,8 @@ int hibernate(void) return -EPERM; } @@ -15472,7 +13876,7 @@ index 33c79b6105c5..f53375bc77df 100644 lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { -@@ -764,6 +777,7 @@ int hibernate(void) +@@ -766,6 +779,7 @@ int hibernate(void) atomic_inc(&snapshot_device_available); Unlock: unlock_system_sleep(); @@ -15481,10 +13885,10 @@ index 33c79b6105c5..f53375bc77df 100644 } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c -index 0acab9d7f96f..aac06aad757c 100644 +index 6ccb08f57fcb..c8cbb5ed2fe3 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c -@@ -361,6 +361,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) +@@ -369,6 +369,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); @@ -15493,7 +13897,7 @@ index 0acab9d7f96f..aac06aad757c 100644 error = syscore_suspend(); if (!error) { *wakeup = pm_wakeup_pending(); -@@ -377,6 +379,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) +@@ -385,6 +387,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) syscore_resume(); } @@ -15502,7 +13906,7 @@ index 0acab9d7f96f..aac06aad757c 100644 arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); -@@ -519,6 +523,8 @@ static int enter_state(suspend_state_t state) +@@ -527,6 +531,8 @@ static int enter_state(suspend_state_t state) return error; } @@ -15511,7 +13915,7 @@ index 0acab9d7f96f..aac06aad757c 100644 /** * pm_suspend - Externally visible function for suspending the system. * @state: System sleep state to enter. -@@ -533,6 +539,8 @@ int pm_suspend(suspend_state_t state) +@@ -541,6 +547,8 @@ int pm_suspend(suspend_state_t state) if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) return -EINVAL; @@ -15520,7 +13924,7 @@ index 0acab9d7f96f..aac06aad757c 100644 error = enter_state(state); if (error) { suspend_stats.fail++; -@@ -540,6 +548,7 @@ int pm_suspend(suspend_state_t state) +@@ -548,6 +556,7 @@ int pm_suspend(suspend_state_t state) } else { suspend_stats.success++; } @@ -15529,7 +13933,7 @@ index 0acab9d7f96f..aac06aad757c 100644 } EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c -index eea6dbc2d8cf..6f01c7ecb45e 100644 +index f7a55e9ff2f7..9277ee033271 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -351,6 +351,65 @@ __packed __aligned(4) @@ -15598,7 +14002,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* the next printk record to read by syslog(READ) or /proc/kmsg */ -@@ -1340,6 +1399,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) +@@ -1337,6 +1396,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) { char *text; int len = 0; @@ -15606,7 +14010,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) -@@ -1351,6 +1411,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) +@@ -1348,6 +1408,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) u64 seq; u32 idx; enum log_flags prev; @@ -15621,7 +14025,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 /* * Find first record that fits, including all following records, -@@ -1366,6 +1434,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) +@@ -1363,6 +1431,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) prev = msg->flags; idx = log_next(idx); seq++; @@ -15636,7 +14040,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 } /* move first record forward until length fits into the buffer */ -@@ -1379,6 +1455,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) +@@ -1376,6 +1452,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) prev = msg->flags; idx = log_next(idx); seq++; @@ -15651,7 +14055,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 } /* last message fitting into this dump */ -@@ -1419,6 +1503,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) +@@ -1416,6 +1500,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) clear_seq = log_next_seq; clear_idx = log_next_idx; } @@ -15659,7 +14063,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 raw_spin_unlock_irq(&logbuf_lock); kfree(text); -@@ -1572,6 +1657,12 @@ static void call_console_drivers(int level, +@@ -1569,6 +1654,12 @@ static void call_console_drivers(int level, if (!console_drivers) return; @@ -15672,7 +14076,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 for_each_console(con) { if (exclusive_console && con != exclusive_console) continue; -@@ -1587,6 +1678,7 @@ static void call_console_drivers(int level, +@@ -1584,6 +1675,7 @@ static void call_console_drivers(int level, else con->write(con, text, len); } @@ -15680,7 +14084,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 } /* -@@ -1750,6 +1842,13 @@ asmlinkage int vprintk_emit(int facility, int level, +@@ -1781,6 +1873,13 @@ asmlinkage int vprintk_emit(int facility, int level, /* cpu currently holding logbuf_lock in this function */ static unsigned int logbuf_cpu = UINT_MAX; @@ -15694,7 +14098,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; -@@ -1894,13 +1993,23 @@ asmlinkage int vprintk_emit(int facility, int level, +@@ -1885,13 +1984,23 @@ asmlinkage int vprintk_emit(int facility, int level, /* If called from the scheduler, we can not call up(). */ if (!in_sched) { @@ -15719,7 +14123,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 console_unlock(); lockdep_on(); } -@@ -2023,26 +2132,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func); +@@ -2014,26 +2123,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func); #endif /* CONFIG_PRINTK */ @@ -15746,7 +14150,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 static int __add_preferred_console(char *name, int idx, char *options, char *brl_options) { -@@ -2312,11 +2401,16 @@ static void console_cont_flush(char *text, size_t size) +@@ -2303,11 +2392,16 @@ static void console_cont_flush(char *text, size_t size) goto out; len = cont_print_text(text, size); @@ -15763,7 +14167,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 return; out: raw_spin_unlock_irqrestore(&logbuf_lock, flags); -@@ -2440,13 +2534,17 @@ void console_unlock(void) +@@ -2431,13 +2525,17 @@ void console_unlock(void) console_idx = log_next(console_idx); console_seq++; console_prev = msg->flags; @@ -15782,7 +14186,7 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 if (do_cond_resched) cond_resched(); } -@@ -2498,6 +2596,11 @@ void console_unblank(void) +@@ -2489,6 +2587,11 @@ void console_unblank(void) { struct console *c; @@ -15795,10 +14199,10 @@ index eea6dbc2d8cf..6f01c7ecb45e 100644 * console_unblank can no longer be called in interrupt context unless * oops_in_progress is set to 1.. diff --git a/kernel/ptrace.c b/kernel/ptrace.c -index 1d3b7665d0be..ce666639789d 100644 +index 49ba7c1ade9d..44f44b47ec07 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c -@@ -128,7 +128,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) +@@ -166,7 +166,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) spin_lock_irq(&task->sighand->siglock); if (task_is_traced(task) && !__fatal_signal_pending(task)) { @@ -15815,7 +14219,7 @@ index 1d3b7665d0be..ce666639789d 100644 } spin_unlock_irq(&task->sighand->siglock); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c -index 971e2b138063..a304670fb917 100644 +index bf08fee53dc7..eeb8ce4ad7b6 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -404,6 +404,7 @@ static struct rcu_torture_ops rcu_ops = { @@ -15840,10 +14244,10 @@ index 971e2b138063..a304670fb917 100644 * Don't even think about trying any of these in real life!!! * The names includes "busted", and they really means it! diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c -index 5d80925e7fc8..2b4bc2b2c25a 100644 +index 69a5611a7e7c..64d91f306eda 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c -@@ -56,6 +56,11 @@ +@@ -55,6 +55,11 @@ #include #include #include @@ -15855,7 +14259,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 #include "tree.h" #include "rcu.h" -@@ -259,6 +264,19 @@ void rcu_sched_qs(void) +@@ -257,6 +262,19 @@ void rcu_sched_qs(void) this_cpu_ptr(&rcu_sched_data), true); } @@ -15875,7 +14279,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 void rcu_bh_qs(void) { if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { -@@ -268,6 +286,7 @@ void rcu_bh_qs(void) +@@ -266,6 +284,7 @@ void rcu_bh_qs(void) __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); } } @@ -15883,7 +14287,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 static DEFINE_PER_CPU(int, rcu_sched_qs_mask); -@@ -448,11 +467,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched); +@@ -446,11 +465,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched); /* * Return the number of RCU BH batches started thus far for debug & stats. */ @@ -15897,7 +14301,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /* * Return the number of RCU batches completed thus far for debug & stats. -@@ -472,6 +493,7 @@ unsigned long rcu_batches_completed_sched(void) +@@ -470,6 +491,7 @@ unsigned long rcu_batches_completed_sched(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); @@ -15905,7 +14309,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /* * Return the number of RCU BH batches completed thus far for debug & stats. */ -@@ -480,6 +502,7 @@ unsigned long rcu_batches_completed_bh(void) +@@ -478,6 +500,7 @@ unsigned long rcu_batches_completed_bh(void) return rcu_bh_state.completed; } EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); @@ -15913,7 +14317,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /* * Return the number of RCU expedited batches completed thus far for -@@ -503,6 +526,7 @@ unsigned long rcu_exp_batches_completed_sched(void) +@@ -501,6 +524,7 @@ unsigned long rcu_exp_batches_completed_sched(void) } EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); @@ -15921,7 +14325,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /* * Force a quiescent state. */ -@@ -521,6 +545,13 @@ void rcu_bh_force_quiescent_state(void) +@@ -519,6 +543,13 @@ void rcu_bh_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); @@ -15935,7 +14339,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /* * Force a quiescent state for RCU-sched. */ -@@ -571,9 +602,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, +@@ -569,9 +600,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, case RCU_FLAVOR: rsp = rcu_state_p; break; @@ -15951,8 +14355,8 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /* * Do RCU core processing for the current CPU. */ --static void rcu_process_callbacks(struct softirq_action *unused) -+static void rcu_process_callbacks(void) +-static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) ++static __latent_entropy void rcu_process_callbacks(void) { struct rcu_state *rsp; @@ -16130,7 +14534,7 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /** * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. -@@ -4196,12 +4321,13 @@ void __init rcu_init(void) +@@ -4220,12 +4345,13 @@ void __init rcu_init(void) rcu_bootup_announce(); rcu_init_geometry(); @@ -16146,10 +14550,10 @@ index 5d80925e7fc8..2b4bc2b2c25a 100644 /* * We don't need protection against CPU-hotplug here because diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h -index f714f873bf9d..71631196e66e 100644 +index e99a5234d9ed..958ac107062c 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h -@@ -587,18 +587,18 @@ extern struct list_head rcu_struct_flavors; +@@ -588,18 +588,18 @@ extern struct list_head rcu_struct_flavors; */ extern struct rcu_state rcu_sched_state; @@ -16170,7 +14574,7 @@ index f714f873bf9d..71631196e66e 100644 #ifndef RCU_TREE_NONCORE -@@ -618,10 +618,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); +@@ -619,10 +619,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); static void __init __rcu_init_preempt(void); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); @@ -16183,7 +14587,7 @@ index f714f873bf9d..71631196e66e 100644 struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_BOOST */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h -index 0082fce402a0..e08cddadd9c7 100644 +index 85c5a883c6e3..dbbda005c1f9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -24,25 +24,10 @@ @@ -16457,10 +14861,39 @@ index 0082fce402a0..e08cddadd9c7 100644 /* * Prepare a CPU for idle from an RCU perspective. The first major task diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c -index f0d8322bc3ec..b40d3468ba4e 100644 +index f19271dce0a9..6b5ab88b6103 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c -@@ -295,6 +295,7 @@ int rcu_read_lock_held(void) +@@ -62,7 +62,7 @@ + #ifndef CONFIG_TINY_RCU + module_param(rcu_expedited, int, 0); + module_param(rcu_normal, int, 0); +-static int rcu_normal_after_boot; ++static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); + module_param(rcu_normal_after_boot, int, 0); + #endif /* #ifndef CONFIG_TINY_RCU */ + +@@ -129,8 +129,7 @@ bool rcu_gp_is_normal(void) + } + EXPORT_SYMBOL_GPL(rcu_gp_is_normal); + +-static atomic_t rcu_expedited_nesting = +- ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); ++static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1); + + /* + * Should normal grace-period primitives be expedited? Intended for +@@ -178,8 +177,7 @@ EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); + */ + void rcu_end_inkernel_boot(void) + { +- if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) +- rcu_unexpedite_gp(); ++ rcu_unexpedite_gp(); + if (rcu_normal_after_boot) + WRITE_ONCE(rcu_normal, 1); + } +@@ -294,6 +292,7 @@ int rcu_read_lock_held(void) } EXPORT_SYMBOL_GPL(rcu_read_lock_held); @@ -16468,7 +14901,7 @@ index f0d8322bc3ec..b40d3468ba4e 100644 /** * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * -@@ -321,6 +322,7 @@ int rcu_read_lock_bh_held(void) +@@ -320,6 +319,7 @@ int rcu_read_lock_bh_held(void) return in_softirq() || irqs_disabled(); } EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); @@ -16476,45 +14909,6 @@ index f0d8322bc3ec..b40d3468ba4e 100644 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -diff --git a/kernel/relay.c b/kernel/relay.c -index d797502140b9..cf05c17ddbed 100644 ---- a/kernel/relay.c -+++ b/kernel/relay.c -@@ -336,6 +336,10 @@ static void wakeup_readers(unsigned long data) - { - struct rchan_buf *buf = (struct rchan_buf *)data; - wake_up_interruptible(&buf->read_wait); -+ /* -+ * Stupid polling for now: -+ */ -+ mod_timer(&buf->timer, jiffies + 1); - } - - /** -@@ -353,6 +357,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) - init_waitqueue_head(&buf->read_wait); - kref_init(&buf->kref); - setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); -+ mod_timer(&buf->timer, jiffies + 1); - } else - del_timer_sync(&buf->timer); - -@@ -767,15 +772,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) - else - buf->early_bytes += buf->chan->subbuf_size - - buf->padding[old_subbuf]; -- smp_mb(); -- if (waitqueue_active(&buf->read_wait)) -- /* -- * Calling wake_up_interruptible() from here -- * will deadlock if we happen to be logging -- * from the scheduler (trying to re-grab -- * rq->lock), so defer it. -- */ -- mod_timer(&buf->timer, jiffies + 1); - } - - old = buf->data; diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 5e59b832ae2b..7337a7f60e3f 100644 --- a/kernel/sched/Makefile @@ -16623,7 +15017,7 @@ index 8d0f35debf35..b62cf6400fe0 100644 } EXPORT_SYMBOL(completion_done); diff --git a/kernel/sched/core.c b/kernel/sched/core.c -index 44817c640e99..55aafcff5810 100644 +index 154fd689fe02..a6aa5801b21e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -129,7 +129,11 @@ const_debug unsigned int sysctl_sched_features = @@ -16732,7 +15126,7 @@ index 44817c640e99..55aafcff5810 100644 return cpu; } /* -@@ -1089,6 +1134,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) +@@ -1100,6 +1145,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) lockdep_assert_held(&p->pi_lock); @@ -16744,8 +15138,8 @@ index 44817c640e99..55aafcff5810 100644 queued = task_on_rq_queued(p); running = task_current(rq, p); -@@ -1111,6 +1161,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) - enqueue_task(rq, p, ENQUEUE_RESTORE); +@@ -1122,6 +1172,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) + set_curr_task(rq, p); } +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); @@ -16829,7 +15223,7 @@ index 44817c640e99..55aafcff5810 100644 /* * Change a given task's CPU affinity. Migrate the thread to a * proper CPU and schedule it away if the CPU it's executing on -@@ -1168,7 +1296,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, +@@ -1179,7 +1307,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, } /* Can the task run on the task's current CPU? If so, we're done */ @@ -16838,7 +15232,7 @@ index 44817c640e99..55aafcff5810 100644 goto out; dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); -@@ -1355,6 +1483,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) +@@ -1366,6 +1494,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) return ret; } @@ -16857,7 +15251,7 @@ index 44817c640e99..55aafcff5810 100644 /* * wait_task_inactive - wait for a thread to unschedule. * -@@ -1399,7 +1539,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) +@@ -1410,7 +1550,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) * is actually now running somewhere else! */ while (task_running(rq, p)) { @@ -16866,7 +15260,7 @@ index 44817c640e99..55aafcff5810 100644 return 0; cpu_relax(); } -@@ -1414,7 +1554,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) +@@ -1425,7 +1565,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; @@ -16876,7 +15270,7 @@ index 44817c640e99..55aafcff5810 100644 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &rf); -@@ -1670,10 +1811,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl +@@ -1680,10 +1821,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl { activate_task(rq, p, en_flags); p->on_rq = TASK_ON_RQ_QUEUED; @@ -16887,7 +15281,7 @@ index 44817c640e99..55aafcff5810 100644 } /* -@@ -2008,8 +2145,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +@@ -2018,8 +2155,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ smp_mb__before_spinlock(); raw_spin_lock_irqsave(&p->pi_lock, flags); @@ -16916,12 +15310,13 @@ index 44817c640e99..55aafcff5810 100644 trace_sched_waking(p); -@@ -2093,53 +2249,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) +@@ -2102,53 +2258,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) } /** - * try_to_wake_up_local - try to wake up a local task with rq lock held - * @p: the thread to be awakened +- * @cookie: context's cookie for pinning - * - * Put @p on the run-queue if it's not already there. The caller must - * ensure that this_rq() is locked, @p is bound to this_rq() and not @@ -16960,8 +15355,7 @@ index 44817c640e99..55aafcff5810 100644 - ttwu_activate(rq, p, ENQUEUE_WAKEUP); - - ttwu_do_wakeup(rq, p, 0, cookie); -- if (schedstat_enabled()) -- ttwu_stat(p, smp_processor_id(), 0); +- ttwu_stat(p, smp_processor_id(), 0); -out: - raw_spin_unlock(&p->pi_lock); -} @@ -16970,7 +15364,7 @@ index 44817c640e99..55aafcff5810 100644 * wake_up_process - Wake up a specific process * @p: The process to be woken up. * -@@ -2157,6 +2266,18 @@ int wake_up_process(struct task_struct *p) +@@ -2166,6 +2275,18 @@ int wake_up_process(struct task_struct *p) } EXPORT_SYMBOL(wake_up_process); @@ -16989,7 +15383,7 @@ index 44817c640e99..55aafcff5810 100644 int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0); -@@ -2433,6 +2554,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) +@@ -2442,6 +2563,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -16999,7 +15393,7 @@ index 44817c640e99..55aafcff5810 100644 #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -2761,8 +2885,12 @@ static struct rq *finish_task_switch(struct task_struct *prev) +@@ -2770,21 +2894,16 @@ static struct rq *finish_task_switch(struct task_struct *prev) finish_arch_post_lock_switch(); fire_sched_in_preempt_notifiers(current); @@ -17013,8 +15407,21 @@ index 44817c640e99..55aafcff5810 100644 if (unlikely(prev_state == TASK_DEAD)) { if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); -@@ -3237,6 +3365,77 @@ static inline void schedule_debug(struct task_struct *prev) - schedstat_inc(this_rq(), sched_count); + +- /* +- * Remove function-return probe instances associated with this +- * task and put them back on the free list. +- */ +- kprobe_flush_task(prev); +- +- /* Task is done with its stack. */ +- put_task_stack(prev); +- + put_task_struct(prev); + } + +@@ -3252,6 +3371,77 @@ static inline void schedule_debug(struct task_struct *prev) + schedstat_inc(this_rq()->sched_count); } +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) @@ -17091,7 +15498,7 @@ index 44817c640e99..55aafcff5810 100644 /* * Pick up the highest-prio task: */ -@@ -3364,19 +3563,6 @@ static void __sched notrace __schedule(bool preempt) +@@ -3368,19 +3558,6 @@ static void __sched notrace __schedule(bool preempt) } else { deactivate_task(rq, prev, DEQUEUE_SLEEP); prev->on_rq = 0; @@ -17111,7 +15518,7 @@ index 44817c640e99..55aafcff5810 100644 } switch_count = &prev->nvcsw; } -@@ -3386,6 +3572,7 @@ static void __sched notrace __schedule(bool preempt) +@@ -3390,6 +3567,7 @@ static void __sched notrace __schedule(bool preempt) next = pick_next_task(rq, prev, cookie); clear_tsk_need_resched(prev); @@ -17119,7 +15526,7 @@ index 44817c640e99..55aafcff5810 100644 clear_preempt_need_resched(); rq->clock_skip_update = 0; -@@ -3407,9 +3594,20 @@ STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */ +@@ -3437,9 +3615,20 @@ void __noreturn do_task_dead(void) static inline void sched_submit_work(struct task_struct *tsk) { @@ -17141,7 +15548,7 @@ index 44817c640e99..55aafcff5810 100644 * If we are going to sleep and we have plugged IO queued, * make sure to submit it to avoid deadlocks. */ -@@ -3417,6 +3615,12 @@ static inline void sched_submit_work(struct task_struct *tsk) +@@ -3447,6 +3636,12 @@ static inline void sched_submit_work(struct task_struct *tsk) blk_schedule_flush_plug(tsk); } @@ -17154,7 +15561,7 @@ index 44817c640e99..55aafcff5810 100644 asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; -@@ -3427,6 +3631,7 @@ asmlinkage __visible void __sched schedule(void) +@@ -3457,6 +3652,7 @@ asmlinkage __visible void __sched schedule(void) __schedule(false); sched_preempt_enable_no_resched(); } while (need_resched()); @@ -17162,7 +15569,7 @@ index 44817c640e99..55aafcff5810 100644 } EXPORT_SYMBOL(schedule); -@@ -3490,6 +3695,30 @@ static void __sched notrace preempt_schedule_common(void) +@@ -3520,6 +3716,30 @@ static void __sched notrace preempt_schedule_common(void) } while (need_resched()); } @@ -17193,7 +15600,7 @@ index 44817c640e99..55aafcff5810 100644 #ifdef CONFIG_PREEMPT /* * this is the entry point to schedule() from in-kernel preemption -@@ -3504,7 +3733,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) +@@ -3534,7 +3754,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) */ if (likely(!preemptible())) return; @@ -17203,7 +15610,7 @@ index 44817c640e99..55aafcff5810 100644 preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -3531,6 +3761,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) +@@ -3561,6 +3782,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) if (likely(!preemptible())) return; @@ -17213,7 +15620,7 @@ index 44817c640e99..55aafcff5810 100644 do { /* * Because the function tracer can trace preempt_count_sub() -@@ -3553,7 +3786,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) +@@ -3583,7 +3807,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) * an infinite recursion. */ prev_ctx = exception_enter(); @@ -17230,7 +15637,7 @@ index 44817c640e99..55aafcff5810 100644 exception_exit(prev_ctx); preempt_latency_stop(1); -@@ -4901,6 +5143,7 @@ int __cond_resched_lock(spinlock_t *lock) +@@ -4939,6 +5172,7 @@ int __cond_resched_lock(spinlock_t *lock) } EXPORT_SYMBOL(__cond_resched_lock); @@ -17238,7 +15645,7 @@ index 44817c640e99..55aafcff5810 100644 int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); -@@ -4914,6 +5157,7 @@ int __sched __cond_resched_softirq(void) +@@ -4952,6 +5186,7 @@ int __sched __cond_resched_softirq(void) return 0; } EXPORT_SYMBOL(__cond_resched_softirq); @@ -17246,7 +15653,7 @@ index 44817c640e99..55aafcff5810 100644 /** * yield - yield the current processor to other threads. -@@ -5283,7 +5527,9 @@ void init_idle(struct task_struct *idle, int cpu) +@@ -5315,7 +5550,9 @@ void init_idle(struct task_struct *idle, int cpu) /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -17257,7 +15664,7 @@ index 44817c640e99..55aafcff5810 100644 /* * The idle tasks have their own, simple scheduling class: */ -@@ -5426,6 +5672,8 @@ void sched_setnuma(struct task_struct *p, int nid) +@@ -5458,6 +5695,8 @@ void sched_setnuma(struct task_struct *p, int nid) #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_HOTPLUG_CPU @@ -17266,7 +15673,7 @@ index 44817c640e99..55aafcff5810 100644 /* * Ensures that the idle task is using init_mm right before its cpu goes * offline. -@@ -5440,7 +5688,12 @@ void idle_task_exit(void) +@@ -5472,7 +5711,12 @@ void idle_task_exit(void) switch_mm_irqs_off(mm, &init_mm, current); finish_arch_post_lock_switch(); } @@ -17280,7 +15687,7 @@ index 44817c640e99..55aafcff5810 100644 } /* -@@ -7315,6 +7568,10 @@ int sched_cpu_dying(unsigned int cpu) +@@ -7418,6 +7662,10 @@ int sched_cpu_dying(unsigned int cpu) update_max_interval(); nohz_balance_exit_idle(cpu); hrtick_clear(rq); @@ -17291,7 +15698,7 @@ index 44817c640e99..55aafcff5810 100644 return 0; } #endif -@@ -7566,7 +7823,7 @@ void __init sched_init(void) +@@ -7698,7 +7946,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { @@ -17301,10 +15708,10 @@ index 44817c640e99..55aafcff5810 100644 return (nested == preempt_offset); } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c -index 1ce8867283dc..766da04b06a0 100644 +index 37e2449186c4..26dcaabde8b3 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c -@@ -697,6 +697,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) +@@ -687,6 +687,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); timer->function = dl_task_timer; @@ -17313,10 +15720,10 @@ index 1ce8867283dc..766da04b06a0 100644 static diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c -index 2a0a9995256d..48a9b6f57249 100644 +index fa178b62ea79..935224123441 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c -@@ -552,6 +552,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) +@@ -558,6 +558,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) P(rt_throttled); PN(rt_time); PN(rt_runtime); @@ -17326,7 +15733,7 @@ index 2a0a9995256d..48a9b6f57249 100644 #undef PN #undef P -@@ -947,6 +950,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) +@@ -953,6 +956,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) #endif P(policy); P(prio); @@ -17334,14 +15741,14 @@ index 2a0a9995256d..48a9b6f57249 100644 + P(migrate_disable); +#endif + P(nr_cpus_allowed); + #undef PN_SCHEDSTAT #undef PN #undef __PN - #undef P diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c -index 8b3610c871f2..1145079af264 100644 +index c242944f5cbd..4aeb2e2e41bc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -3508,7 +3508,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) +@@ -3518,7 +3518,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -17350,7 +15757,7 @@ index 8b3610c871f2..1145079af264 100644 /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -3532,7 +3532,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) +@@ -3542,7 +3542,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) return; if (delta > ideal_runtime) @@ -17359,7 +15766,7 @@ index 8b3610c871f2..1145079af264 100644 } static void -@@ -3677,7 +3677,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) +@@ -3684,7 +3684,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { @@ -17368,7 +15775,7 @@ index 8b3610c871f2..1145079af264 100644 return; } /* -@@ -3859,7 +3859,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) +@@ -3866,7 +3866,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -17377,7 +15784,7 @@ index 8b3610c871f2..1145079af264 100644 } static __always_inline -@@ -4487,7 +4487,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) +@@ -4494,7 +4494,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (rq->curr == p) @@ -17386,7 +15793,7 @@ index 8b3610c871f2..1145079af264 100644 return; } hrtick_start(rq, delta); -@@ -5676,7 +5676,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ +@@ -5905,7 +5905,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: @@ -17395,7 +15802,7 @@ index 8b3610c871f2..1145079af264 100644 /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -8402,7 +8402,7 @@ static void task_fork_fair(struct task_struct *p) +@@ -8631,7 +8631,7 @@ static void task_fork_fair(struct task_struct *p) * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -17404,7 +15811,7 @@ index 8b3610c871f2..1145079af264 100644 } se->vruntime -= cfs_rq->min_vruntime; -@@ -8426,7 +8426,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) +@@ -8655,7 +8655,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (rq->curr == p) { if (p->prio > oldprio) @@ -17438,7 +15845,7 @@ index 69631fa46c2f..6d28fcd08872 100644 #ifdef HAVE_RT_PUSH_IPI /* diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c -index d5690b722691..731cd0e98c15 100644 +index 2516b8df6dbb..2556baa0a97e 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) @@ -17458,10 +15865,10 @@ index d5690b722691..731cd0e98c15 100644 #endif /* CONFIG_SMP */ /* We start is dequeued state, because no RT tasks are queued */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h -index c64fc5114004..af58f9b3ece4 100644 +index 055f935d4421..19324ac27026 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -1138,6 +1138,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) +@@ -1163,6 +1163,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ #define WF_FORK 0x02 /* child wakeup after fork */ #define WF_MIGRATED 0x4 /* internal use, task got migrated */ @@ -17469,7 +15876,7 @@ index c64fc5114004..af58f9b3ece4 100644 /* * To aid in avoiding the subversion of "niceness" due to uneven distribution -@@ -1316,6 +1317,15 @@ extern void init_sched_fair_class(void); +@@ -1346,6 +1347,15 @@ extern void init_sched_fair_class(void); extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -17710,7 +16117,7 @@ index 000000000000..1950f40ca725 +} +EXPORT_SYMBOL_GPL(swork_put); diff --git a/kernel/signal.c b/kernel/signal.c -index af21afc00d08..7ead97a43298 100644 +index 75761acc77cf..ae0773c76bb0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -14,6 +14,7 @@ @@ -17943,7 +16350,7 @@ index af21afc00d08..7ead97a43298 100644 } else { /* diff --git a/kernel/softirq.c b/kernel/softirq.c -index 17caf4b63342..a602b7152de7 100644 +index 744fa611cae0..1431d08e6f21 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -21,10 +21,12 @@ @@ -17964,12 +16371,12 @@ index 17caf4b63342..a602b7152de7 100644 DEFINE_PER_CPU(struct task_struct *, ksoftirqd); +#ifdef CONFIG_PREEMPT_RT_FULL -+#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) ++#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); +#endif const char * const softirq_to_name[NR_SOFTIRQS] = { - "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", + "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; @@ -18068,7 +16475,7 @@ index 17caf4b63342..a602b7152de7 100644 /* * we cannot loop indefinitely here to avoid userspace starvation, * but we also don't want to introduce a worst case 1/HZ latency -@@ -77,6 +175,79 @@ static void wakeup_softirqd(void) +@@ -77,6 +175,37 @@ static void wakeup_softirqd(void) wake_up_process(tsk); } @@ -18103,6 +16510,13 @@ index 17caf4b63342..a602b7152de7 100644 + } +} + + /* + * If ksoftirqd is scheduled, we do not want to process pending softirqs + * right now. Let ksoftirqd handle this at its own rate, to get fairness. +@@ -88,6 +217,48 @@ static bool ksoftirqd_running(void) + return tsk && (tsk->state == TASK_RUNNING); + } + +#ifndef CONFIG_PREEMPT_RT_FULL +static inline int ksoftirqd_softirq_pending(void) +{ @@ -18148,7 +16562,7 @@ index 17caf4b63342..a602b7152de7 100644 /* * preempt_count and SOFTIRQ_OFFSET usage: * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving -@@ -232,10 +403,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) +@@ -243,10 +414,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) unsigned long end = jiffies + MAX_SOFTIRQ_TIME; unsigned long old_flags = current->flags; int max_restart = MAX_SOFTIRQ_RESTART; @@ -18159,7 +16573,7 @@ index 17caf4b63342..a602b7152de7 100644 /* * Mask out PF_MEMALLOC s current task context is borrowed for the -@@ -254,36 +423,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) +@@ -265,36 +434,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); @@ -18197,7 +16611,7 @@ index 17caf4b63342..a602b7152de7 100644 pending = local_softirq_pending(); if (pending) { -@@ -320,6 +460,310 @@ asmlinkage __visible void do_softirq(void) +@@ -331,6 +471,309 @@ asmlinkage __visible void do_softirq(void) } /* @@ -18407,7 +16821,6 @@ index 17caf4b63342..a602b7152de7 100644 + wakeup_softirqd(); +} + -+ +void __raise_softirq_irqoff(unsigned int nr) +{ + do_raise_softirq_irqoff(nr); @@ -18508,7 +16921,7 @@ index 17caf4b63342..a602b7152de7 100644 * Enter an interrupt context. */ void irq_enter(void) -@@ -330,9 +774,9 @@ void irq_enter(void) +@@ -341,9 +784,9 @@ void irq_enter(void) * Prevent raise_softirq from needlessly waking up ksoftirqd * here, as softirq will be serviced on return from interrupt. */ @@ -18520,20 +16933,26 @@ index 17caf4b63342..a602b7152de7 100644 } __irq_enter(); -@@ -340,6 +784,7 @@ void irq_enter(void) +@@ -351,9 +794,13 @@ void irq_enter(void) static inline void invoke_softirq(void) { ++#ifdef CONFIG_PREEMPT_RT_FULL ++ unsigned long flags; ++#endif ++ + if (ksoftirqd_running()) + return; +- +#ifndef CONFIG_PREEMPT_RT_FULL if (!force_irqthreads) { #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK /* -@@ -359,6 +804,18 @@ static inline void invoke_softirq(void) +@@ -373,6 +820,17 @@ static inline void invoke_softirq(void) } else { wakeup_softirqd(); } +#else /* PREEMPT_RT_FULL */ -+ unsigned long flags; + + local_irq_save(flags); + if (__this_cpu_read(ksoftirqd) && @@ -18547,7 +16966,7 @@ index 17caf4b63342..a602b7152de7 100644 } static inline void tick_irq_exit(void) -@@ -395,26 +852,6 @@ void irq_exit(void) +@@ -409,26 +867,6 @@ void irq_exit(void) trace_hardirq_exit(); /* must be last! */ } @@ -18574,7 +16993,7 @@ index 17caf4b63342..a602b7152de7 100644 void raise_softirq(unsigned int nr) { unsigned long flags; -@@ -424,12 +861,6 @@ void raise_softirq(unsigned int nr) +@@ -438,12 +876,6 @@ void raise_softirq(unsigned int nr) local_irq_restore(flags); } @@ -18587,7 +17006,7 @@ index 17caf4b63342..a602b7152de7 100644 void open_softirq(int nr, void (*action)(struct softirq_action *)) { softirq_vec[nr].action = action; -@@ -446,15 +877,45 @@ struct tasklet_head { +@@ -460,15 +892,45 @@ struct tasklet_head { static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); @@ -18637,7 +17056,7 @@ index 17caf4b63342..a602b7152de7 100644 local_irq_restore(flags); } EXPORT_SYMBOL(__tasklet_schedule); -@@ -464,10 +925,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) +@@ -478,10 +940,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) unsigned long flags; local_irq_save(flags); @@ -18649,7 +17068,7 @@ index 17caf4b63342..a602b7152de7 100644 local_irq_restore(flags); } EXPORT_SYMBOL(__tasklet_hi_schedule); -@@ -476,82 +934,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) +@@ -490,82 +949,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) { BUG_ON(!irqs_disabled()); @@ -18660,7 +17079,7 @@ index 17caf4b63342..a602b7152de7 100644 } EXPORT_SYMBOL(__tasklet_hi_schedule_first); --static void tasklet_action(struct softirq_action *a) +-static __latent_entropy void tasklet_action(struct softirq_action *a) +void tasklet_enable(struct tasklet_struct *t) { - struct tasklet_struct *list; @@ -18781,7 +17200,7 @@ index 17caf4b63342..a602b7152de7 100644 + __tasklet_action(a, list); +} + - static void tasklet_hi_action(struct softirq_action *a) + static __latent_entropy void tasklet_hi_action(struct softirq_action *a) { struct tasklet_struct *list; @@ -18821,7 +17240,7 @@ index 17caf4b63342..a602b7152de7 100644 } void tasklet_init(struct tasklet_struct *t, -@@ -572,7 +1070,7 @@ void tasklet_kill(struct tasklet_struct *t) +@@ -586,7 +1085,7 @@ void tasklet_kill(struct tasklet_struct *t) while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { do { @@ -18830,7 +17249,7 @@ index 17caf4b63342..a602b7152de7 100644 } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); -@@ -646,25 +1144,26 @@ void __init softirq_init(void) +@@ -660,25 +1159,26 @@ void __init softirq_init(void) open_softirq(HI_SOFTIRQ, tasklet_hi_action); } @@ -18874,7 +17293,7 @@ index 17caf4b63342..a602b7152de7 100644 } #ifdef CONFIG_HOTPLUG_CPU -@@ -746,16 +1245,31 @@ static struct notifier_block cpu_nfb = { +@@ -745,17 +1245,31 @@ static int takeover_tasklets(unsigned int cpu) static struct smp_hotplug_thread softirq_threads = { .store = &ksoftirqd, @@ -18897,20 +17316,21 @@ index 17caf4b63342..a602b7152de7 100644 + static __init int spawn_ksoftirqd(void) { - register_cpu_notifier(&cpu_nfb); - + cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, + takeover_tasklets); BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); +- +#ifdef CONFIG_PREEMPT_RT_FULL + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); +#endif - return 0; } + early_initcall(spawn_ksoftirqd); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c -index 4a1ca5f6da7e..3226e22b9e42 100644 +index ec9ab2f01489..8b89dbedeaff 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c -@@ -37,7 +37,7 @@ struct cpu_stop_done { +@@ -36,7 +36,7 @@ struct cpu_stop_done { struct cpu_stopper { struct task_struct *thread; @@ -18919,7 +17339,7 @@ index 4a1ca5f6da7e..3226e22b9e42 100644 bool enabled; /* is this stopper enabled? */ struct list_head works; /* list of pending works */ -@@ -83,14 +83,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) +@@ -78,14 +78,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) unsigned long flags; bool enabled; @@ -18936,10 +17356,10 @@ index 4a1ca5f6da7e..3226e22b9e42 100644 return enabled; } -@@ -232,8 +232,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, +@@ -231,8 +231,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, + struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); int err; - - lg_double_lock(&stop_cpus_lock, cpu1, cpu2); + retry: - spin_lock_irq(&stopper1->lock); - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock_irq(&stopper1->lock); @@ -18947,7 +17367,7 @@ index 4a1ca5f6da7e..3226e22b9e42 100644 err = -ENOENT; if (!stopper1->enabled || !stopper2->enabled) -@@ -243,8 +243,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, +@@ -255,8 +255,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, __cpu_stop_queue_work(stopper1, work1); __cpu_stop_queue_work(stopper2, work2); unlock: @@ -18955,46 +17375,10 @@ index 4a1ca5f6da7e..3226e22b9e42 100644 - spin_unlock_irq(&stopper1->lock); + raw_spin_unlock(&stopper2->lock); + raw_spin_unlock_irq(&stopper1->lock); - lg_double_unlock(&stop_cpus_lock, cpu1, cpu2); - - return err; -@@ -321,18 +321,21 @@ static DEFINE_MUTEX(stop_cpus_mutex); - static bool queue_stop_cpus_work(const struct cpumask *cpumask, - cpu_stop_fn_t fn, void *arg, -- struct cpu_stop_done *done) -+ struct cpu_stop_done *done, bool inactive) - { - struct cpu_stop_work *work; - unsigned int cpu; - bool queued = false; - - /* -- * Disable preemption while queueing to avoid getting -- * preempted by a stopper which might wait for other stoppers -- * to enter @fn which can lead to deadlock. -+ * Make sure that all work is queued on all cpus before -+ * any of the cpus can execute it. - */ -- lg_global_lock(&stop_cpus_lock); -+ if (!inactive) -+ lg_global_lock(&stop_cpus_lock); -+ else -+ lg_global_trylock_relax(&stop_cpus_lock); -+ - for_each_cpu(cpu, cpumask) { - work = &per_cpu(cpu_stopper.stop_work, cpu); - work->fn = fn; -@@ -352,7 +355,7 @@ static int __stop_cpus(const struct cpumask *cpumask, - struct cpu_stop_done done; - - cpu_stop_init_done(&done, cpumask_weight(cpumask)); -- if (!queue_stop_cpus_work(cpumask, fn, arg, &done)) -+ if (!queue_stop_cpus_work(cpumask, fn, arg, &done, false)) - return -ENOENT; - wait_for_completion(&done.completion); - return done.ret; -@@ -433,9 +436,9 @@ static int cpu_stop_should_run(unsigned int cpu) + if (unlikely(err == -EDEADLK)) { + while (stop_cpus_in_progress) +@@ -448,9 +448,9 @@ static int cpu_stop_should_run(unsigned int cpu) unsigned long flags; int run; @@ -19006,7 +17390,7 @@ index 4a1ca5f6da7e..3226e22b9e42 100644 return run; } -@@ -446,13 +449,13 @@ static void cpu_stopper_thread(unsigned int cpu) +@@ -461,13 +461,13 @@ static void cpu_stopper_thread(unsigned int cpu) repeat: work = NULL; @@ -19022,24 +17406,16 @@ index 4a1ca5f6da7e..3226e22b9e42 100644 if (work) { cpu_stop_fn_t fn = work->fn; -@@ -460,6 +463,16 @@ static void cpu_stopper_thread(unsigned int cpu) +@@ -475,6 +475,8 @@ static void cpu_stopper_thread(unsigned int cpu) struct cpu_stop_done *done = work->done; int ret; -+ /* -+ * Wait until the stopper finished scheduling on all -+ * cpus -+ */ -+ lg_global_lock(&stop_cpus_lock); -+ /* -+ * Let other cpu threads continue as well -+ */ -+ lg_global_unlock(&stop_cpus_lock); ++ /* XXX */ + /* cpu stop callbacks must not sleep, make in_atomic() == T */ preempt_count_inc(); ret = fn(arg); -@@ -526,10 +539,12 @@ static int __init cpu_stop_init(void) +@@ -541,7 +543,7 @@ static int __init cpu_stop_init(void) for_each_possible_cpu(cpu) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); @@ -19048,22 +17424,8 @@ index 4a1ca5f6da7e..3226e22b9e42 100644 INIT_LIST_HEAD(&stopper->works); } -+ lg_lock_init(&stop_cpus_lock, "stop_cpus_lock"); -+ - BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); - stop_machine_unpark(raw_smp_processor_id()); - stop_machine_initialized = true; -@@ -624,7 +639,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, - set_state(&msdata, MULTI_STOP_PREPARE); - cpu_stop_init_done(&done, num_active_cpus()); - queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, -- &done); -+ &done, true); - ret = multi_cpu_stop(&msdata); - - /* Busy wait for completion. */ diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c -index 9ba7c820fc23..d85f638fd99e 100644 +index bb5ec425dfe0..8338b14ed3a3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -53,6 +53,7 @@ @@ -19962,7 +18324,7 @@ index 4fcd99e12aa0..5a47f2e98faf 100644 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c -index 2ec7c00228f3..c1b30b8c671a 100644 +index 3bcb61b52f6c..66d85482a96e 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now) @@ -20007,7 +18369,7 @@ index 2ec7c00228f3..c1b30b8c671a 100644 return period; } -@@ -212,6 +217,7 @@ static void nohz_full_kick_func(struct irq_work *work) +@@ -215,6 +220,7 @@ static void nohz_full_kick_func(struct irq_work *work) static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { .func = nohz_full_kick_func, @@ -20015,7 +18377,7 @@ index 2ec7c00228f3..c1b30b8c671a 100644 }; /* -@@ -670,10 +676,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, +@@ -673,10 +679,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, /* Read jiffies and the time when jiffies were updated last */ do { @@ -20028,7 +18390,7 @@ index 2ec7c00228f3..c1b30b8c671a 100644 ts->last_jiffies = basejiff; if (rcu_needs_cpu(basemono, &next_rcu) || -@@ -874,14 +880,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) +@@ -877,14 +883,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) return false; if (unlikely(local_softirq_pending() && cpu_online(cpu))) { @@ -20044,7 +18406,7 @@ index 2ec7c00228f3..c1b30b8c671a 100644 return false; } -@@ -1190,6 +1189,7 @@ void tick_setup_sched_timer(void) +@@ -1193,6 +1192,7 @@ void tick_setup_sched_timer(void) * Emulate tick processing via per-CPU hrtimers: */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); @@ -20053,7 +18415,7 @@ index 2ec7c00228f3..c1b30b8c671a 100644 /* Get the next period (per-CPU) */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c -index 37dec7e3db43..fa8d90d2acc3 100644 +index 46e312e9be38..fa75cf5d9253 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2328,8 +2328,10 @@ EXPORT_SYMBOL(hardpps); @@ -20084,7 +18446,7 @@ index 704f595ce83f..763a3e5121ff 100644 #define CS_NAME_LEN 32 diff --git a/kernel/time/timer.c b/kernel/time/timer.c -index 32bf6f75a8fe..ba53447a03f5 100644 +index c611c47de884..08a5ab762495 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -193,8 +193,11 @@ EXPORT_SYMBOL(jiffies_64); @@ -20100,7 +18462,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 unsigned long clk; unsigned long next_expiry; unsigned int cpu; -@@ -947,10 +950,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, +@@ -948,10 +951,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, if (!(tf & TIMER_MIGRATING)) { base = get_timer_base(tf); @@ -20113,7 +18475,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 } cpu_relax(); } -@@ -1017,9 +1020,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) +@@ -1023,9 +1026,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) /* See the comment in lock_timer_base() */ timer->flags |= TIMER_MIGRATING; @@ -20125,7 +18487,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); } -@@ -1040,7 +1043,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) +@@ -1050,7 +1053,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } out_unlock: @@ -20134,7 +18496,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 return ret; } -@@ -1134,19 +1137,46 @@ void add_timer_on(struct timer_list *timer, int cpu) +@@ -1144,19 +1147,46 @@ void add_timer_on(struct timer_list *timer, int cpu) if (base != new_base) { timer->flags |= TIMER_MIGRATING; @@ -20184,7 +18546,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 /** * del_timer - deactive a timer. * @timer: the timer to be deactivated -@@ -1170,7 +1200,7 @@ int del_timer(struct timer_list *timer) +@@ -1180,7 +1210,7 @@ int del_timer(struct timer_list *timer) if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); @@ -20193,7 +18555,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 } return ret; -@@ -1198,13 +1228,13 @@ int try_to_del_timer_sync(struct timer_list *timer) +@@ -1208,13 +1238,13 @@ int try_to_del_timer_sync(struct timer_list *timer) timer_stats_timer_clear_start_info(timer); ret = detach_if_pending(timer, base, true); } @@ -20209,7 +18571,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated -@@ -1264,7 +1294,7 @@ int del_timer_sync(struct timer_list *timer) +@@ -1274,7 +1304,7 @@ int del_timer_sync(struct timer_list *timer) int ret = try_to_del_timer_sync(timer); if (ret >= 0) return ret; @@ -20218,7 +18580,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 } } EXPORT_SYMBOL(del_timer_sync); -@@ -1329,14 +1359,17 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) +@@ -1339,14 +1369,17 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) fn = timer->function; data = timer->data; @@ -20241,7 +18603,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 } } } -@@ -1505,7 +1538,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) +@@ -1515,7 +1548,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (cpu_is_offline(smp_processor_id())) return expires; @@ -20250,7 +18612,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 nextevt = __next_timer_interrupt(base); is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); base->next_expiry = nextevt; -@@ -1529,7 +1562,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) +@@ -1543,7 +1576,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if ((expires - basem) > TICK_NSEC) base->is_idle = true; } @@ -20259,7 +18621,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 return cmp_next_hrtimer_event(basem, expires); } -@@ -1594,13 +1627,13 @@ void update_process_times(int user_tick) +@@ -1608,13 +1641,13 @@ void update_process_times(int user_tick) /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); @@ -20275,7 +18637,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 run_posix_cpu_timers(p); } -@@ -1616,7 +1649,7 @@ static inline void __run_timers(struct timer_base *base) +@@ -1630,7 +1663,7 @@ static inline void __run_timers(struct timer_base *base) if (!time_after_eq(jiffies, base->clk)) return; @@ -20284,7 +18646,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 while (time_after_eq(jiffies, base->clk)) { -@@ -1626,8 +1659,8 @@ static inline void __run_timers(struct timer_base *base) +@@ -1640,8 +1673,8 @@ static inline void __run_timers(struct timer_base *base) while (levels--) expire_timers(base, heads + levels); } @@ -20295,7 +18657,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 } /* -@@ -1637,6 +1670,8 @@ static void run_timer_softirq(struct softirq_action *h) +@@ -1651,6 +1684,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); @@ -20304,7 +18666,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 __run_timers(base); if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); -@@ -1822,16 +1857,16 @@ int timers_dead_cpu(unsigned int cpu) +@@ -1836,16 +1871,16 @@ int timers_dead_cpu(unsigned int cpu) * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ @@ -20325,7 +18687,7 @@ index 32bf6f75a8fe..ba53447a03f5 100644 put_cpu_ptr(&timer_bases); } return 0; -@@ -1847,8 +1882,11 @@ static void __init init_timer_cpu(int cpu) +@@ -1861,8 +1896,11 @@ static void __init init_timer_cpu(int cpu) for (i = 0; i < NR_BASES; i++) { base = per_cpu_ptr(&timer_bases[i], cpu); base->cpu = cpu; @@ -20339,10 +18701,10 @@ index 32bf6f75a8fe..ba53447a03f5 100644 } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig -index f4b86e8ca1e7..340f14eef24a 100644 +index 2a96b063d659..812e37237eb8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig -@@ -187,6 +187,24 @@ config IRQSOFF_TRACER +@@ -182,6 +182,24 @@ config IRQSOFF_TRACER enabled. This option and the preempt-off timing option can be used together or separately.) @@ -20367,15 +18729,7 @@ index f4b86e8ca1e7..340f14eef24a 100644 config PREEMPT_TRACER bool "Preemption-off Latency Tracer" default n -@@ -197,6 +215,7 @@ config PREEMPT_TRACER - select RING_BUFFER_ALLOW_SWAP - select TRACER_SNAPSHOT - select TRACER_SNAPSHOT_PER_CPU_SWAP -+ select USING_GET_LOCK_PARENT_IP - help - This option measures the time spent in preemption-off critical - sections, with microsecond accuracy. -@@ -211,6 +230,24 @@ config PREEMPT_TRACER +@@ -206,6 +224,24 @@ config PREEMPT_TRACER enabled. This option and the irqs-off timing option can be used together or separately.) @@ -20400,9 +18754,9 @@ index f4b86e8ca1e7..340f14eef24a 100644 config SCHED_TRACER bool "Scheduling Latency Tracer" select GENERIC_TRACER -@@ -221,6 +258,74 @@ config SCHED_TRACER - This tracer tracks the latency of the highest priority task - to be scheduled in, starting from the point it has woken up. +@@ -251,6 +287,74 @@ config HWLAT_TRACER + file. Every time a latency is greater than tracing_thresh, it will + be recorded into the ring buffer. +config WAKEUP_LATENCY_HIST + bool "Scheduling Latency Histogram" @@ -20476,13 +18830,13 @@ index f4b86e8ca1e7..340f14eef24a 100644 bool "Trace process context switches and events" depends on !GENERIC_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile -index d0a1617b52b4..6bf9e9ff1fa5 100644 +index e57980845549..83af000b783c 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile -@@ -41,6 +41,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o - obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o +@@ -38,6 +38,10 @@ obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o + obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o @@ -21675,7 +20029,7 @@ index 000000000000..7f6ee70dea41 + +device_initcall(latency_hist_init); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c -index 7bc56762ca35..84ffcb813263 100644 +index 8696ce6bf2f6..277f048a4695 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1897,6 +1897,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, @@ -21746,10 +20100,10 @@ index 7bc56762ca35..84ffcb813263 100644 void diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h -index f783df416726..6f2d0fa4fbf1 100644 +index fd24b1f9ac43..852b2c81be25 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h -@@ -123,6 +123,7 @@ struct kretprobe_trace_entry_head { +@@ -124,6 +124,7 @@ struct kretprobe_trace_entry_head { * NEED_RESCHED - reschedule is requested * HARDIRQ - inside an interrupt handler * SOFTIRQ - inside a softirq handler @@ -21757,7 +20111,7 @@ index f783df416726..6f2d0fa4fbf1 100644 */ enum trace_flag_type { TRACE_FLAG_IRQS_OFF = 0x01, -@@ -132,6 +133,7 @@ enum trace_flag_type { +@@ -133,6 +134,7 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, TRACE_FLAG_PREEMPT_RESCHED = 0x20, TRACE_FLAG_NMI = 0x40, @@ -21866,7 +20220,7 @@ index 03cdff84d026..940bd10b4406 100644 start_critical_timing(a0, a1); } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c -index 0bb9cf2d53e6..455a7464772f 100644 +index 3fc20422c166..65a6dde71a7d 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) @@ -21934,7 +20288,7 @@ index b069ccbfb0b0..1a2e88e98b5e 100644 struct user_struct *alloc_uid(kuid_t uid) diff --git a/kernel/watchdog.c b/kernel/watchdog.c -index 9acb29f280ec..caba62080411 100644 +index 6d1020c03d41..70c6a2f79f7e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -315,6 +315,8 @@ static int is_softlockup(unsigned long touch_ts) @@ -21946,7 +20300,7 @@ index 9acb29f280ec..caba62080411 100644 static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, -@@ -349,6 +351,13 @@ static void watchdog_overflow_callback(struct perf_event *event, +@@ -348,6 +350,13 @@ static void watchdog_overflow_callback(struct perf_event *event, /* only print hardlockups once */ if (__this_cpu_read(hard_watchdog_warn) == true) return; @@ -21960,7 +20314,7 @@ index 9acb29f280ec..caba62080411 100644 pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); print_modules(); -@@ -366,6 +375,7 @@ static void watchdog_overflow_callback(struct perf_event *event, +@@ -365,6 +374,7 @@ static void watchdog_overflow_callback(struct perf_event *event, !test_and_set_bit(0, &hardlockup_allcpu_dumped)) trigger_allbutself_cpu_backtrace(); @@ -21968,7 +20322,7 @@ index 9acb29f280ec..caba62080411 100644 if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); -@@ -513,6 +523,7 @@ static void watchdog_enable(unsigned int cpu) +@@ -512,6 +522,7 @@ static void watchdog_enable(unsigned int cpu) /* kick off the timer for the hardlockup detector */ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; @@ -21977,7 +20331,7 @@ index 9acb29f280ec..caba62080411 100644 /* Enable the perf event */ watchdog_nmi_enable(cpu); diff --git a/kernel/workqueue.c b/kernel/workqueue.c -index ef071ca73fc3..c7a62d6adb00 100644 +index 479d840db286..24eba6620a45 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -48,6 +48,8 @@ @@ -22249,7 +20603,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } /** -@@ -1098,12 +1124,12 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) +@@ -1098,12 +1124,14 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) { if (pwq) { /* @@ -22258,14 +20612,16 @@ index ef071ca73fc3..c7a62d6adb00 100644 * following lock operations are safe. */ - spin_lock_irq(&pwq->pool->lock); ++ rcu_read_lock(); + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); put_pwq(pwq); - spin_unlock_irq(&pwq->pool->lock); + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); ++ rcu_read_unlock(); } } -@@ -1207,7 +1233,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, +@@ -1207,7 +1235,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, struct worker_pool *pool; struct pool_workqueue *pwq; @@ -22274,7 +20630,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 /* try to steal the timer if it exists */ if (is_dwork) { -@@ -1226,6 +1252,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, +@@ -1226,6 +1254,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) return 0; @@ -22282,7 +20638,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 /* * The queueing is in progress, or it is already queued. Try to * steal it from ->worklist without clearing WORK_STRUCT_PENDING. -@@ -1264,14 +1291,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, +@@ -1264,14 +1293,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, set_work_pool_and_keep_pending(work, pool->id); spin_unlock(&pool->lock); @@ -22301,7 +20657,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 return -EAGAIN; } -@@ -1373,7 +1402,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, +@@ -1373,7 +1404,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * queued or lose PENDING. Grabbing PENDING and queueing should * happen with IRQ disabled. */ @@ -22310,7 +20666,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 debug_work_activate(work); -@@ -1381,6 +1410,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, +@@ -1381,6 +1412,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, if (unlikely(wq->flags & __WQ_DRAINING) && WARN_ON_ONCE(!is_chained_work(wq))) return; @@ -22318,7 +20674,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 retry: if (req_cpu == WORK_CPU_UNBOUND) cpu = wq_select_unbound_cpu(raw_smp_processor_id()); -@@ -1437,10 +1467,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, +@@ -1437,10 +1469,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, /* pwq determined, queue */ trace_workqueue_queue_work(req_cpu, pwq, work); @@ -22331,7 +20687,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 pwq->nr_in_flight[pwq->work_color]++; work_flags = work_color_to_flags(pwq->work_color); -@@ -1458,7 +1486,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, +@@ -1458,7 +1488,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, insert_work(pwq, work, worklist, work_flags); @@ -22341,7 +20697,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } /** -@@ -1478,14 +1508,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, +@@ -1478,14 +1510,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, bool ret = false; unsigned long flags; @@ -22358,7 +20714,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 return ret; } EXPORT_SYMBOL(queue_work_on); -@@ -1552,14 +1582,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, +@@ -1552,14 +1584,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, unsigned long flags; /* read the comment in __queue_work() */ @@ -22375,7 +20731,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 return ret; } EXPORT_SYMBOL(queue_delayed_work_on); -@@ -1594,7 +1624,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, +@@ -1594,7 +1626,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, if (likely(ret >= 0)) { __queue_delayed_work(cpu, wq, dwork, delay); @@ -22384,7 +20740,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } /* -ENOENT from try_to_grab_pending() becomes %true */ -@@ -1627,7 +1657,9 @@ static void worker_enter_idle(struct worker *worker) +@@ -1627,7 +1659,9 @@ static void worker_enter_idle(struct worker *worker) worker->last_active = jiffies; /* idle_list is LIFO */ @@ -22394,7 +20750,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); -@@ -1660,7 +1692,9 @@ static void worker_leave_idle(struct worker *worker) +@@ -1660,7 +1694,9 @@ static void worker_leave_idle(struct worker *worker) return; worker_clr_flags(worker, WORKER_IDLE); pool->nr_idle--; @@ -22404,7 +20760,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } static struct worker *alloc_worker(int node) -@@ -1826,7 +1860,9 @@ static void destroy_worker(struct worker *worker) +@@ -1826,7 +1862,9 @@ static void destroy_worker(struct worker *worker) pool->nr_workers--; pool->nr_idle--; @@ -22414,7 +20770,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 worker->flags |= WORKER_DIE; wake_up_process(worker->task); } -@@ -2785,14 +2821,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) +@@ -2785,14 +2823,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) might_sleep(); @@ -22432,7 +20788,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 /* see the comment in try_to_grab_pending() with the same code */ pwq = get_work_pwq(work); if (pwq) { -@@ -2821,10 +2857,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) +@@ -2821,10 +2859,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) else lock_map_acquire_read(&pwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map); @@ -22445,7 +20801,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 return false; } -@@ -2911,7 +2948,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) +@@ -2911,7 +2950,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) /* tell other tasks trying to grab @work to back off */ mark_work_canceling(work); @@ -22454,7 +20810,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 flush_work(work); clear_work_data(work); -@@ -2966,10 +3003,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); +@@ -2966,10 +3005,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); */ bool flush_delayed_work(struct delayed_work *dwork) { @@ -22467,16 +20823,16 @@ index ef071ca73fc3..c7a62d6adb00 100644 return flush_work(&dwork->work); } EXPORT_SYMBOL(flush_delayed_work); -@@ -3004,7 +3041,7 @@ bool cancel_delayed_work(struct delayed_work *dwork) +@@ -2987,7 +3026,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) + return false; - set_work_pool_and_clear_pending(&dwork->work, - get_work_pool_id(&dwork->work)); + set_work_pool_and_clear_pending(work, get_work_pool_id(work)); - local_irq_restore(flags); + local_unlock_irqrestore(pendingb_lock, flags); return ret; } - EXPORT_SYMBOL(cancel_delayed_work); -@@ -3233,7 +3270,7 @@ static void rcu_free_pool(struct rcu_head *rcu) + +@@ -3245,7 +3284,7 @@ static void rcu_free_pool(struct rcu_head *rcu) * put_unbound_pool - put a worker_pool * @pool: worker_pool to put * @@ -22485,7 +20841,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 * safe manner. get_unbound_pool() calls this function on its failure path * and this function should be able to release pools which went through, * successfully or not, init_worker_pool(). -@@ -3287,8 +3324,8 @@ static void put_unbound_pool(struct worker_pool *pool) +@@ -3299,8 +3338,8 @@ static void put_unbound_pool(struct worker_pool *pool) del_timer_sync(&pool->idle_timer); del_timer_sync(&pool->mayday_timer); @@ -22496,7 +20852,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } /** -@@ -3395,14 +3432,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) +@@ -3407,14 +3446,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); @@ -22513,7 +20869,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } /** -@@ -4052,7 +4089,7 @@ void destroy_workqueue(struct workqueue_struct *wq) +@@ -4064,7 +4103,7 @@ void destroy_workqueue(struct workqueue_struct *wq) * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. */ @@ -22522,7 +20878,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } else { /* * We're the sole accessor of @wq at this point. Directly -@@ -4145,7 +4182,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) +@@ -4157,7 +4196,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) struct pool_workqueue *pwq; bool ret; @@ -22532,7 +20888,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 if (cpu == WORK_CPU_UNBOUND) cpu = smp_processor_id(); -@@ -4156,7 +4194,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) +@@ -4168,7 +4208,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); ret = !list_empty(&pwq->delayed_works); @@ -22542,7 +20898,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 return ret; } -@@ -4182,15 +4221,15 @@ unsigned int work_busy(struct work_struct *work) +@@ -4194,15 +4235,15 @@ unsigned int work_busy(struct work_struct *work) if (work_pending(work)) ret |= WORK_BUSY_PENDING; @@ -22562,7 +20918,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 return ret; } -@@ -4379,7 +4418,7 @@ void show_workqueue_state(void) +@@ -4391,7 +4432,7 @@ void show_workqueue_state(void) unsigned long flags; int pi; @@ -22571,7 +20927,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 pr_info("Showing busy workqueues and worker pools:\n"); -@@ -4432,7 +4471,7 @@ void show_workqueue_state(void) +@@ -4444,7 +4485,7 @@ void show_workqueue_state(void) spin_unlock_irqrestore(&pool->lock, flags); } @@ -22580,7 +20936,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } /* -@@ -4770,16 +4809,16 @@ bool freeze_workqueues_busy(void) +@@ -4782,16 +4823,16 @@ bool freeze_workqueues_busy(void) * nr_active is monotonically decreasing. It's safe * to peek without lock. */ @@ -22600,7 +20956,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 } out_unlock: mutex_unlock(&wq_pool_mutex); -@@ -4969,7 +5008,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, +@@ -4981,7 +5022,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, const char *delim = ""; int node, written = 0; @@ -22610,7 +20966,7 @@ index ef071ca73fc3..c7a62d6adb00 100644 for_each_node(node) { written += scnprintf(buf + written, PAGE_SIZE - written, "%s%d:%d", delim, node, -@@ -4977,7 +5017,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, +@@ -4989,7 +5031,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, delim = " "; } written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); @@ -22643,7 +20999,7 @@ index 8635417c587b..f000c4d6917e 100644 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ diff --git a/lib/Kconfig b/lib/Kconfig -index d79909dc01ec..fd2accb2f2bb 100644 +index 260a80e313b9..b06becb3f477 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -400,6 +400,7 @@ config CHECK_SIGNATURE @@ -22654,38 +21010,8 @@ index d79909dc01ec..fd2accb2f2bb 100644 help Use dynamic allocation for cpumask_var_t, instead of putting them on the stack. This is a bit more expensive, but avoids -diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug -index cab7405f48d2..dbc49c48ff53 100644 ---- a/lib/Kconfig.debug -+++ b/lib/Kconfig.debug -@@ -977,6 +977,7 @@ config TIMER_STATS - config DEBUG_PREEMPT - bool "Debug preemptible kernel" - depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT -+ select USING_GET_LOCK_PARENT_IP - default y - help - If you say Y here then the kernel will use a debug variant of the -@@ -1159,8 +1160,17 @@ config LOCK_TORTURE_TEST - - endmenu # lock debugging - -+config USING_GET_LOCK_PARENT_IP -+ bool -+ help -+ Enables the use of the function get_lock_parent_ip() that -+ will use __builtin_return_address(n) with n > 0 causing -+ some gcc warnings. When this is selected, those warnings -+ will be suppressed. -+ - config TRACE_IRQFLAGS - bool -+ select USING_GET_LOCK_PARENT_IP - help - Enables hooks to interrupt enabling and disabling for - either tracing or lock debugging. diff --git a/lib/debugobjects.c b/lib/debugobjects.c -index a8e12601eb37..c76d5f0beafe 100644 +index 056052dc8e91..d8494e126de8 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -308,7 +308,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) @@ -22797,7 +21123,7 @@ index 6098336df267..9decbe914595 100644 break; diff --git a/lib/irq_poll.c b/lib/irq_poll.c -index 836f7db4e548..709d4eed1df9 100644 +index 1d6565e81030..b23a79761df7 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop) @@ -22816,7 +21142,7 @@ index 836f7db4e548..709d4eed1df9 100644 } EXPORT_SYMBOL(irq_poll_complete); -@@ -95,6 +97,7 @@ static void irq_poll_softirq(struct softirq_action *h) +@@ -95,6 +97,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) } local_irq_enable(); @@ -22824,7 +21150,7 @@ index 836f7db4e548..709d4eed1df9 100644 /* Even though interrupts have been re-enabled, this * access is safe because interrupts can only add new -@@ -132,6 +135,7 @@ static void irq_poll_softirq(struct softirq_action *h) +@@ -132,6 +135,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); @@ -22832,16 +21158,16 @@ index 836f7db4e548..709d4eed1df9 100644 } /** -@@ -199,6 +203,7 @@ static int irq_poll_cpu_notify(struct notifier_block *self, - this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); - local_irq_enable(); -+ preempt_check_resched_rt(); - } +@@ -195,6 +199,7 @@ static int irq_poll_cpu_dead(unsigned int cpu) + this_cpu_ptr(&blk_cpu_iopoll)); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); + local_irq_enable(); ++ preempt_check_resched_rt(); - return NOTIFY_OK; + return 0; + } diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c -index 872a15a2a637..b93a6103fa4d 100644 +index f3a217ea0388..4611b156ef79 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) @@ -23169,13 +21495,13 @@ index 1afec32de6f2..11fa431046a8 100644 dump_stack(); diff --git a/localversion-rt b/localversion-rt new file mode 100644 -index 000000000000..0efe7ba1930e +index 000000000000..c3054d08a112 --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt5 ++-rt2 diff --git a/mm/Kconfig b/mm/Kconfig -index be0ee11fa0d9..fe2857d67973 100644 +index 86e3e0e74d20..77e5862a1ed2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -410,7 +410,7 @@ config NOMMU_INITIAL_TRIM_EXCESS @@ -23204,10 +21530,10 @@ index 8fde443f36d7..d7a863b0ec20 100644 } diff --git a/mm/compaction.c b/mm/compaction.c -index 9affb2908304..d5eb0e52e96f 100644 +index 70e6bec46dc2..6678ed58b7c6 100644 --- a/mm/compaction.c +++ b/mm/compaction.c -@@ -1585,10 +1585,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro +@@ -1593,10 +1593,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro block_start_pfn(cc->migrate_pfn, cc->order); if (cc->last_migrated_pfn < current_block_start) { @@ -23223,7 +21549,7 @@ index 9affb2908304..d5eb0e52e96f 100644 cc->last_migrated_pfn = 0; } diff --git a/mm/filemap.c b/mm/filemap.c -index ced9ef6c06b0..19f6f0d77604 100644 +index 9a50acecc473..59f749a0b738 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -159,9 +159,12 @@ static int page_cache_tree_insert(struct address_space *mapping, @@ -23283,7 +21609,7 @@ index 50b4ca6787f0..77518a3b35a1 100644 unsigned int nr_free_highpages (void) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c -index 4be518d4e68a..724240ca2f35 100644 +index 0f870ba43942..f219b4066e6d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -67,6 +67,7 @@ @@ -23303,7 +21629,7 @@ index 4be518d4e68a..724240ca2f35 100644 /* Whether legacy memory+swap accounting is active */ static bool do_memsw_account(void) { -@@ -1724,6 +1727,7 @@ struct memcg_stock_pcp { +@@ -1694,6 +1697,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE 0 }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); @@ -23311,7 +21637,7 @@ index 4be518d4e68a..724240ca2f35 100644 static DEFINE_MUTEX(percpu_charge_mutex); /** -@@ -1746,7 +1750,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) +@@ -1716,7 +1720,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) if (nr_pages > CHARGE_BATCH) return ret; @@ -23320,7 +21646,7 @@ index 4be518d4e68a..724240ca2f35 100644 stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { -@@ -1754,7 +1758,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) +@@ -1724,7 +1728,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) ret = true; } @@ -23329,7 +21655,7 @@ index 4be518d4e68a..724240ca2f35 100644 return ret; } -@@ -1781,13 +1785,13 @@ static void drain_local_stock(struct work_struct *dummy) +@@ -1751,13 +1755,13 @@ static void drain_local_stock(struct work_struct *dummy) struct memcg_stock_pcp *stock; unsigned long flags; @@ -23345,7 +21671,7 @@ index 4be518d4e68a..724240ca2f35 100644 } /* -@@ -1799,7 +1803,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) +@@ -1769,7 +1773,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) struct memcg_stock_pcp *stock; unsigned long flags; @@ -23354,7 +21680,7 @@ index 4be518d4e68a..724240ca2f35 100644 stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ -@@ -1808,7 +1812,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) +@@ -1778,7 +1782,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) } stock->nr_pages += nr_pages; @@ -23363,7 +21689,7 @@ index 4be518d4e68a..724240ca2f35 100644 } /* -@@ -1824,7 +1828,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) +@@ -1794,7 +1798,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) return; /* Notify other cpus that system-wide "drain" is running */ get_online_cpus(); @@ -23372,7 +21698,7 @@ index 4be518d4e68a..724240ca2f35 100644 for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); struct mem_cgroup *memcg; -@@ -1841,7 +1845,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) +@@ -1811,7 +1815,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) schedule_work_on(cpu, &stock->work); } } @@ -23381,7 +21707,7 @@ index 4be518d4e68a..724240ca2f35 100644 put_online_cpus(); mutex_unlock(&percpu_charge_mutex); } -@@ -4566,12 +4570,12 @@ static int mem_cgroup_move_account(struct page *page, +@@ -4550,12 +4554,12 @@ static int mem_cgroup_move_account(struct page *page, ret = 0; @@ -23396,7 +21722,7 @@ index 4be518d4e68a..724240ca2f35 100644 out_unlock: unlock_page(page); out: -@@ -5444,10 +5448,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, +@@ -5430,10 +5434,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, commit_charge(page, memcg, lrucare); @@ -23409,7 +21735,7 @@ index 4be518d4e68a..724240ca2f35 100644 if (do_memsw_account() && PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; -@@ -5503,14 +5507,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, +@@ -5489,14 +5493,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, memcg_oom_recover(memcg); } @@ -23426,7 +21752,7 @@ index 4be518d4e68a..724240ca2f35 100644 if (!mem_cgroup_is_root(memcg)) css_put_many(&memcg->css, nr_pages); -@@ -5665,10 +5669,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) +@@ -5651,10 +5655,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) commit_charge(newpage, memcg, false); @@ -23439,7 +21765,7 @@ index 4be518d4e68a..724240ca2f35 100644 } DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); -@@ -5845,6 +5849,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +@@ -5834,6 +5838,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) { struct mem_cgroup *memcg, *swap_memcg; unsigned short oldid; @@ -23447,7 +21773,7 @@ index 4be518d4e68a..724240ca2f35 100644 VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); -@@ -5885,12 +5890,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +@@ -5874,12 +5879,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) * important here to have the interrupts disabled because it is the * only synchronisation we have for udpating the per-CPU variables. */ @@ -23485,7 +21811,7 @@ index 6f4d27c5bb32..5cd25c745a8f 100644 #ifdef finish_arch_post_lock_switch finish_arch_post_lock_switch(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c -index a2214c64ed3c..4be4d5d66f73 100644 +index 34ada718ef47..21f0dc3fe2aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -61,6 +61,7 @@ @@ -23496,7 +21822,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 #include #include #include -@@ -276,6 +277,18 @@ EXPORT_SYMBOL(nr_node_ids); +@@ -281,6 +282,18 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif @@ -23515,7 +21841,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 int page_group_by_mobility_disabled __read_mostly; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT -@@ -1056,7 +1069,7 @@ static bool bulkfree_pcp_prepare(struct page *page) +@@ -1072,7 +1085,7 @@ static bool bulkfree_pcp_prepare(struct page *page) #endif /* CONFIG_DEBUG_VM */ /* @@ -23524,7 +21850,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 * Assumes all pages on list are in same zone, and of same order. * count is the number of pages to free. * -@@ -1067,19 +1080,58 @@ static bool bulkfree_pcp_prepare(struct page *page) +@@ -1083,19 +1096,58 @@ static bool bulkfree_pcp_prepare(struct page *page) * pinned" detection logic. */ static void free_pcppages_bulk(struct zone *zone, int count, @@ -23587,7 +21913,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 while (count) { struct page *page; struct list_head *list; -@@ -1095,7 +1147,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, +@@ -1111,7 +1163,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, batch_free++; if (++migratetype == MIGRATE_PCPTYPES) migratetype = 0; @@ -23596,7 +21922,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } while (list_empty(list)); /* This is the only non-empty list. Free them all. */ -@@ -1103,27 +1155,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, +@@ -1119,27 +1171,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, batch_free = count; do { @@ -23625,7 +21951,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } static void free_one_page(struct zone *zone, -@@ -1132,7 +1169,9 @@ static void free_one_page(struct zone *zone, +@@ -1148,7 +1185,9 @@ static void free_one_page(struct zone *zone, int migratetype) { unsigned long nr_scanned; @@ -23636,7 +21962,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); if (nr_scanned) __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); -@@ -1142,7 +1181,7 @@ static void free_one_page(struct zone *zone, +@@ -1158,7 +1197,7 @@ static void free_one_page(struct zone *zone, migratetype = get_pfnblock_migratetype(page, pfn); } __free_one_page(page, pfn, zone, order, migratetype); @@ -23645,7 +21971,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } static void __meminit __init_single_page(struct page *page, unsigned long pfn, -@@ -1228,10 +1267,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) +@@ -1244,10 +1283,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) return; migratetype = get_pfnblock_migratetype(page, pfn); @@ -23658,7 +21984,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } static void __init __free_pages_boot_core(struct page *page, unsigned int order) -@@ -2219,16 +2258,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, +@@ -2246,16 +2285,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) { unsigned long flags; @@ -23680,7 +22006,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } #endif -@@ -2244,16 +2285,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) +@@ -2271,16 +2312,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) unsigned long flags; struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; @@ -23706,7 +22032,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } /* -@@ -2339,8 +2385,17 @@ void drain_all_pages(struct zone *zone) +@@ -2366,8 +2412,17 @@ void drain_all_pages(struct zone *zone) else cpumask_clear_cpu(cpu, &cpus_with_pcps); } @@ -23724,7 +22050,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } #ifdef CONFIG_HIBERNATION -@@ -2400,7 +2455,7 @@ void free_hot_cold_page(struct page *page, bool cold) +@@ -2427,7 +2482,7 @@ void free_hot_cold_page(struct page *page, bool cold) migratetype = get_pfnblock_migratetype(page, pfn); set_pcppage_migratetype(page, migratetype); @@ -23733,7 +22059,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 __count_vm_event(PGFREE); /* -@@ -2426,12 +2481,17 @@ void free_hot_cold_page(struct page *page, bool cold) +@@ -2453,12 +2508,17 @@ void free_hot_cold_page(struct page *page, bool cold) pcp->count++; if (pcp->count >= pcp->high) { unsigned long batch = READ_ONCE(pcp->batch); @@ -23753,7 +22079,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } /* -@@ -2568,7 +2628,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, +@@ -2600,7 +2660,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, struct per_cpu_pages *pcp; struct list_head *list; @@ -23762,7 +22088,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 do { pcp = &this_cpu_ptr(zone->pageset)->pcp; list = &pcp->lists[migratetype]; -@@ -2595,7 +2655,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, +@@ -2627,7 +2687,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, * allocate greater than order-1 page units with __GFP_NOFAIL. */ WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); @@ -23771,7 +22097,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 do { page = NULL; -@@ -2607,22 +2667,24 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, +@@ -2639,22 +2699,24 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, if (!page) page = __rmqueue(zone, order, migratetype); } while (page && check_new_pages(page, order)); @@ -23800,7 +22126,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 return NULL; } -@@ -6528,7 +6590,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self, +@@ -6505,7 +6567,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self, int cpu = (unsigned long)hcpu; if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { @@ -23810,7 +22136,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 drain_pages(cpu); /* -@@ -6554,6 +6618,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, +@@ -6531,6 +6595,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, void __init page_alloc_init(void) { hotcpu_notifier(page_alloc_cpu_notify, 0); @@ -23818,7 +22144,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 } /* -@@ -7370,7 +7435,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7359,7 +7424,7 @@ void zone_pcp_reset(struct zone *zone) struct per_cpu_pageset *pset; /* avoid races with drain_pages() */ @@ -23827,7 +22153,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 if (zone->pageset != &boot_pageset) { for_each_online_cpu(cpu) { pset = per_cpu_ptr(zone->pageset, cpu); -@@ -7379,7 +7444,7 @@ void zone_pcp_reset(struct zone *zone) +@@ -7368,7 +7433,7 @@ void zone_pcp_reset(struct zone *zone) free_percpu(zone->pageset); zone->pageset = &boot_pageset; } @@ -23837,7 +22163,7 @@ index a2214c64ed3c..4be4d5d66f73 100644 #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/mm/slab.h b/mm/slab.h -index 9653f2e2591a..b7371e026627 100644 +index bc05fdc3edce..610cf61634f0 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -426,7 +426,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, @@ -23853,10 +22179,10 @@ index 9653f2e2591a..b7371e026627 100644 #ifdef CONFIG_SLAB struct list_head slabs_partial; /* partial list first, better asm code */ diff --git a/mm/slub.c b/mm/slub.c -index 9adae58462f8..4b386747f050 100644 +index 2b3e740609e9..1732f9c5d31f 100644 --- a/mm/slub.c +++ b/mm/slub.c -@@ -1145,7 +1145,7 @@ static noinline int free_debug_processing( +@@ -1141,7 +1141,7 @@ static noinline int free_debug_processing( unsigned long uninitialized_var(flags); int ret = 0; @@ -23865,7 +22191,7 @@ index 9adae58462f8..4b386747f050 100644 slab_lock(page); if (s->flags & SLAB_CONSISTENCY_CHECKS) { -@@ -1180,7 +1180,7 @@ static noinline int free_debug_processing( +@@ -1176,7 +1176,7 @@ static noinline int free_debug_processing( bulk_cnt, cnt); slab_unlock(page); @@ -23874,7 +22200,7 @@ index 9adae58462f8..4b386747f050 100644 if (!ret) slab_fix(s, "Object at 0x%p not freed", object); return ret; -@@ -1308,6 +1308,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, +@@ -1304,6 +1304,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, #endif /* CONFIG_SLUB_DEBUG */ @@ -23887,7 +22213,7 @@ index 9adae58462f8..4b386747f050 100644 /* * Hooks for other subsystems that check memory allocations. In a typical * production configuration these hooks all should produce no code at all. -@@ -1527,10 +1533,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) +@@ -1523,10 +1529,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) void *start, *p; int idx, order; bool shuffle; @@ -23905,7 +22231,7 @@ index 9adae58462f8..4b386747f050 100644 local_irq_enable(); flags |= s->allocflags; -@@ -1605,7 +1618,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) +@@ -1601,7 +1614,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) page->frozen = 1; out: @@ -23914,7 +22240,7 @@ index 9adae58462f8..4b386747f050 100644 local_irq_disable(); if (!page) return NULL; -@@ -1664,6 +1677,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) +@@ -1660,6 +1673,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __free_pages(page, order); } @@ -23931,7 +22257,7 @@ index 9adae58462f8..4b386747f050 100644 #define need_reserve_slab_rcu \ (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) -@@ -1695,6 +1718,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) +@@ -1691,6 +1714,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) } call_rcu(head, rcu_free_slab); @@ -23944,7 +22270,7 @@ index 9adae58462f8..4b386747f050 100644 } else __free_slab(s, page); } -@@ -1802,7 +1831,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, +@@ -1798,7 +1827,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, if (!n || !n->nr_partial) return NULL; @@ -23953,7 +22279,7 @@ index 9adae58462f8..4b386747f050 100644 list_for_each_entry_safe(page, page2, &n->partial, lru) { void *t; -@@ -1827,7 +1856,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, +@@ -1823,7 +1852,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, break; } @@ -23962,7 +22288,7 @@ index 9adae58462f8..4b386747f050 100644 return object; } -@@ -2073,7 +2102,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, +@@ -2069,7 +2098,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, * that acquire_slab() will see a slab page that * is frozen */ @@ -23971,7 +22297,7 @@ index 9adae58462f8..4b386747f050 100644 } } else { m = M_FULL; -@@ -2084,7 +2113,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, +@@ -2080,7 +2109,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, * slabs from diagnostic functions will not see * any frozen slabs. */ @@ -23980,7 +22306,7 @@ index 9adae58462f8..4b386747f050 100644 } } -@@ -2119,7 +2148,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, +@@ -2115,7 +2144,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, goto redo; if (lock) @@ -23989,7 +22315,7 @@ index 9adae58462f8..4b386747f050 100644 if (m == M_FREE) { stat(s, DEACTIVATE_EMPTY); -@@ -2151,10 +2180,10 @@ static void unfreeze_partials(struct kmem_cache *s, +@@ -2147,10 +2176,10 @@ static void unfreeze_partials(struct kmem_cache *s, n2 = get_node(s, page_to_nid(page)); if (n != n2) { if (n) @@ -24002,7 +22328,7 @@ index 9adae58462f8..4b386747f050 100644 } do { -@@ -2183,7 +2212,7 @@ static void unfreeze_partials(struct kmem_cache *s, +@@ -2179,7 +2208,7 @@ static void unfreeze_partials(struct kmem_cache *s, } if (n) @@ -24011,7 +22337,7 @@ index 9adae58462f8..4b386747f050 100644 while (discard_page) { page = discard_page; -@@ -2222,14 +2251,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) +@@ -2218,14 +2247,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) pobjects = oldpage->pobjects; pages = oldpage->pages; if (drain && pobjects > s->cpu_partial) { @@ -24033,7 +22359,7 @@ index 9adae58462f8..4b386747f050 100644 oldpage = NULL; pobjects = 0; pages = 0; -@@ -2301,7 +2337,22 @@ static bool has_cpu_slab(int cpu, void *info) +@@ -2297,7 +2333,22 @@ static bool has_cpu_slab(int cpu, void *info) static void flush_all(struct kmem_cache *s) { @@ -24056,7 +22382,7 @@ index 9adae58462f8..4b386747f050 100644 } /* -@@ -2337,10 +2388,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, +@@ -2352,10 +2403,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, unsigned long x = 0; struct page *page; @@ -24069,7 +22395,7 @@ index 9adae58462f8..4b386747f050 100644 return x; } #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ -@@ -2478,8 +2529,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) +@@ -2493,8 +2544,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) * already disabled (which is the case for bulk allocation). */ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, @@ -24081,7 +22407,7 @@ index 9adae58462f8..4b386747f050 100644 void *freelist; struct page *page; -@@ -2539,6 +2592,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +@@ -2554,6 +2607,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, VM_BUG_ON(!c->page->frozen); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); @@ -24095,7 +22421,7 @@ index 9adae58462f8..4b386747f050 100644 return freelist; new_slab: -@@ -2570,7 +2630,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +@@ -2585,7 +2645,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, deactivate_slab(s, page, get_freepointer(s, freelist)); c->page = NULL; c->freelist = NULL; @@ -24104,7 +22430,7 @@ index 9adae58462f8..4b386747f050 100644 } /* -@@ -2582,6 +2642,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +@@ -2597,6 +2657,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, { void *p; unsigned long flags; @@ -24112,7 +22438,7 @@ index 9adae58462f8..4b386747f050 100644 local_irq_save(flags); #ifdef CONFIG_PREEMPT -@@ -2593,8 +2654,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, +@@ -2608,8 +2669,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, c = this_cpu_ptr(s->cpu_slab); #endif @@ -24123,7 +22449,7 @@ index 9adae58462f8..4b386747f050 100644 return p; } -@@ -2780,7 +2842,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, +@@ -2795,7 +2857,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, do { if (unlikely(n)) { @@ -24132,7 +22458,7 @@ index 9adae58462f8..4b386747f050 100644 n = NULL; } prior = page->freelist; -@@ -2812,7 +2874,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, +@@ -2827,7 +2889,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, * Otherwise the list_lock will synchronize with * other processors updating the list of slabs. */ @@ -24141,7 +22467,7 @@ index 9adae58462f8..4b386747f050 100644 } } -@@ -2854,7 +2916,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, +@@ -2869,7 +2931,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } @@ -24150,7 +22476,7 @@ index 9adae58462f8..4b386747f050 100644 return; slab_empty: -@@ -2869,7 +2931,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, +@@ -2884,7 +2946,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, remove_full(s, n, page); } @@ -24159,7 +22485,7 @@ index 9adae58462f8..4b386747f050 100644 stat(s, FREE_SLAB); discard_slab(s, page); } -@@ -3074,6 +3136,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +@@ -3089,6 +3151,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { struct kmem_cache_cpu *c; @@ -24167,7 +22493,7 @@ index 9adae58462f8..4b386747f050 100644 int i; /* memcg and kmem_cache debug support */ -@@ -3097,7 +3160,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +@@ -3112,7 +3175,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, * of re-populating per CPU c->freelist */ p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, @@ -24176,7 +22502,7 @@ index 9adae58462f8..4b386747f050 100644 if (unlikely(!p[i])) goto error; -@@ -3109,6 +3172,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, +@@ -3124,6 +3187,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } c->tid = next_tid(c->tid); local_irq_enable(); @@ -24184,7 +22510,7 @@ index 9adae58462f8..4b386747f050 100644 /* Clear memory outside IRQ disabled fastpath loop */ if (unlikely(flags & __GFP_ZERO)) { -@@ -3256,7 +3320,7 @@ static void +@@ -3271,7 +3335,7 @@ static void init_kmem_cache_node(struct kmem_cache_node *n) { n->nr_partial = 0; @@ -24193,7 +22519,7 @@ index 9adae58462f8..4b386747f050 100644 INIT_LIST_HEAD(&n->partial); #ifdef CONFIG_SLUB_DEBUG atomic_long_set(&n->nr_slabs, 0); -@@ -3600,6 +3664,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, +@@ -3615,6 +3679,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, const char *text) { #ifdef CONFIG_SLUB_DEBUG @@ -24204,7 +22530,7 @@ index 9adae58462f8..4b386747f050 100644 void *addr = page_address(page); void *p; unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * -@@ -3620,6 +3688,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, +@@ -3635,6 +3703,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, slab_unlock(page); kfree(map); #endif @@ -24212,7 +22538,7 @@ index 9adae58462f8..4b386747f050 100644 } /* -@@ -3633,7 +3702,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) +@@ -3648,7 +3717,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) struct page *page, *h; BUG_ON(irqs_disabled()); @@ -24221,7 +22547,7 @@ index 9adae58462f8..4b386747f050 100644 list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); -@@ -3643,7 +3712,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) +@@ -3658,7 +3727,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) "Objects remaining in %s on __kmem_cache_shutdown()"); } } @@ -24230,7 +22556,7 @@ index 9adae58462f8..4b386747f050 100644 list_for_each_entry_safe(page, h, &discard, lru) discard_slab(s, page); -@@ -3901,7 +3970,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) +@@ -3916,7 +3985,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) for (i = 0; i < SHRINK_PROMOTE_MAX; i++) INIT_LIST_HEAD(promote + i); @@ -24239,7 +22565,7 @@ index 9adae58462f8..4b386747f050 100644 /* * Build lists of slabs to discard or promote. -@@ -3932,7 +4001,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) +@@ -3947,7 +4016,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) list_splice(promote + i, &n->partial); @@ -24248,7 +22574,7 @@ index 9adae58462f8..4b386747f050 100644 /* Release empty slabs */ list_for_each_entry_safe(page, t, &discard, lru) -@@ -4108,6 +4177,12 @@ void __init kmem_cache_init(void) +@@ -4123,6 +4192,12 @@ void __init kmem_cache_init(void) { static __initdata struct kmem_cache boot_kmem_cache, boot_kmem_cache_node; @@ -24261,7 +22587,7 @@ index 9adae58462f8..4b386747f050 100644 if (debug_guardpage_minorder()) slub_max_order = 0; -@@ -4354,7 +4429,7 @@ static int validate_slab_node(struct kmem_cache *s, +@@ -4331,7 +4406,7 @@ static int validate_slab_node(struct kmem_cache *s, struct page *page; unsigned long flags; @@ -24270,7 +22596,7 @@ index 9adae58462f8..4b386747f050 100644 list_for_each_entry(page, &n->partial, lru) { validate_slab_slab(s, page, map); -@@ -4376,7 +4451,7 @@ static int validate_slab_node(struct kmem_cache *s, +@@ -4353,7 +4428,7 @@ static int validate_slab_node(struct kmem_cache *s, s->name, count, atomic_long_read(&n->nr_slabs)); out: @@ -24279,7 +22605,7 @@ index 9adae58462f8..4b386747f050 100644 return count; } -@@ -4564,12 +4639,12 @@ static int list_locations(struct kmem_cache *s, char *buf, +@@ -4541,12 +4616,12 @@ static int list_locations(struct kmem_cache *s, char *buf, if (!atomic_long_read(&n->nr_slabs)) continue; @@ -24295,7 +22621,7 @@ index 9adae58462f8..4b386747f050 100644 for (i = 0; i < t.count; i++) { diff --git a/mm/swap.c b/mm/swap.c -index 75c63bb2a1da..93fe549eb11e 100644 +index 4dcf852e1e6d..69c3a5b24060 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -32,6 +32,7 @@ @@ -24500,7 +22826,7 @@ index 75c63bb2a1da..93fe549eb11e 100644 put_online_cpus(); mutex_unlock(&lock); diff --git a/mm/truncate.c b/mm/truncate.c -index a01cce450a26..4bda37604f99 100644 +index 8d8c62d89e6d..5bf1bd25d077 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -62,9 +62,12 @@ static void clear_exceptional_entry(struct address_space *mapping, @@ -24519,7 +22845,7 @@ index a01cce450a26..4bda37604f99 100644 unlock: spin_unlock_irq(&mapping->tree_lock); diff --git a/mm/vmalloc.c b/mm/vmalloc.c -index 91f44e78c516..06ec393bb97d 100644 +index f2481cb4e6b2..db4de08fa97c 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -845,7 +845,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) @@ -24574,7 +22900,7 @@ index 91f44e78c516..06ec393bb97d 100644 /* Allocate new block if nothing was found */ diff --git a/mm/vmstat.c b/mm/vmstat.c -index 89cec42d19ff..fb73631fb90b 100644 +index 604f26a4f696..312006d2db50 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -245,6 +245,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, @@ -24674,7 +23000,7 @@ index 89cec42d19ff..fb73631fb90b 100644 void __dec_zone_page_state(struct page *page, enum zone_stat_item item) diff --git a/mm/workingset.c b/mm/workingset.c -index 617475f529f4..48674bf36fb1 100644 +index fb1f9183d89a..7e6ef1a48cd3 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -334,7 +334,8 @@ void workingset_activation(struct page *page) @@ -24698,7 +23024,7 @@ index 617475f529f4..48674bf36fb1 100644 + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc); + local_unlock_irq(workingset_shadow_lock); - if (memcg_kmem_enabled()) { + if (sc->memcg) { pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, @@ -438,9 +439,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, spin_unlock(&mapping->tree_lock); @@ -24926,7 +23252,7 @@ index b0bc023d25c5..5af6426fbcbe 100644 migrate_read_unlock(zspage); unpin_tag(handle); diff --git a/net/core/dev.c b/net/core/dev.c -index ea6312057a71..d114a4692cde 100644 +index e1d731fdc72c..6ab4b7863755 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -190,6 +190,7 @@ static unsigned int napi_gen_id = NR_CPUS; @@ -25029,7 +23355,7 @@ index ea6312057a71..d114a4692cde 100644 } /** -@@ -2268,6 +2274,7 @@ static void __netif_reschedule(struct Qdisc *q) +@@ -2263,6 +2269,7 @@ static void __netif_reschedule(struct Qdisc *q) sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -25037,7 +23363,7 @@ index ea6312057a71..d114a4692cde 100644 } void __netif_schedule(struct Qdisc *q) -@@ -2349,6 +2356,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) +@@ -2344,6 +2351,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); @@ -25045,7 +23371,7 @@ index ea6312057a71..d114a4692cde 100644 } EXPORT_SYMBOL(__dev_kfree_skb_irq); -@@ -3082,7 +3090,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, +@@ -3078,7 +3086,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * This permits qdisc->running owner to get the lock more * often and dequeue packets faster. */ @@ -25057,7 +23383,7 @@ index ea6312057a71..d114a4692cde 100644 if (unlikely(contended)) spin_lock(&q->busylock); -@@ -3145,8 +3157,10 @@ static void skb_update_prio(struct sk_buff *skb) +@@ -3141,8 +3153,10 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif @@ -25068,7 +23394,7 @@ index ea6312057a71..d114a4692cde 100644 /** * dev_loopback_xmit - loop back @skb -@@ -3390,8 +3404,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) +@@ -3376,8 +3390,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) int cpu = smp_processor_id(); /* ok because BHs are off */ if (txq->xmit_lock_owner != cpu) { @@ -25078,7 +23404,7 @@ index ea6312057a71..d114a4692cde 100644 goto recursion_alert; skb = validate_xmit_skb(skb, dev); -@@ -3401,9 +3414,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) +@@ -3387,9 +3400,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -25090,7 +23416,7 @@ index ea6312057a71..d114a4692cde 100644 if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; -@@ -3777,6 +3790,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, +@@ -3763,6 +3776,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, rps_unlock(sd); local_irq_restore(flags); @@ -25098,7 +23424,7 @@ index ea6312057a71..d114a4692cde 100644 atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); -@@ -3795,7 +3809,7 @@ static int netif_rx_internal(struct sk_buff *skb) +@@ -3781,7 +3795,7 @@ static int netif_rx_internal(struct sk_buff *skb) struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -25107,7 +23433,7 @@ index ea6312057a71..d114a4692cde 100644 rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); -@@ -3805,13 +3819,13 @@ static int netif_rx_internal(struct sk_buff *skb) +@@ -3791,13 +3805,13 @@ static int netif_rx_internal(struct sk_buff *skb) ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); @@ -25124,7 +23450,7 @@ index ea6312057a71..d114a4692cde 100644 } return ret; } -@@ -3845,11 +3859,9 @@ int netif_rx_ni(struct sk_buff *skb) +@@ -3831,11 +3845,9 @@ int netif_rx_ni(struct sk_buff *skb) trace_netif_rx_ni_entry(skb); @@ -25138,31 +23464,32 @@ index ea6312057a71..d114a4692cde 100644 return err; } -@@ -4321,7 +4333,7 @@ static void flush_backlog(void *arg) +@@ -4314,7 +4326,7 @@ static void flush_backlog(struct work_struct *work) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + __skb_queue_tail(&sd->tofree_queue, skb); input_queue_head_incr(sd); } } -@@ -4330,10 +4342,13 @@ static void flush_backlog(void *arg) +@@ -4324,11 +4336,14 @@ static void flush_backlog(struct work_struct *work) skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); - kfree_skb(skb); + __skb_queue_tail(&sd->tofree_queue, skb); input_queue_head_incr(sd); } } -+ + if (!skb_queue_empty(&sd->tofree_queue)) + raise_softirq_irqoff(NET_RX_SOFTIRQ); + local_bh_enable(); ++ } - static int napi_gro_complete(struct sk_buff *skb) -@@ -4795,6 +4810,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) + static void flush_all_backlogs(void) +@@ -4809,6 +4824,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) sd->rps_ipi_list = NULL; local_irq_enable(); @@ -25170,7 +23497,7 @@ index ea6312057a71..d114a4692cde 100644 /* Send pending IPI's to kick RPS processing on remote cpus. */ while (remsd) { -@@ -4808,6 +4824,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) +@@ -4822,6 +4838,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) } else #endif local_irq_enable(); @@ -25178,7 +23505,28 @@ index ea6312057a71..d114a4692cde 100644 } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) -@@ -4889,6 +4906,7 @@ void __napi_schedule(struct napi_struct *n) +@@ -4851,7 +4868,9 @@ static int process_backlog(struct napi_struct *napi, int quota) + while (again) { + struct sk_buff *skb; + ++ local_irq_disable(); + while ((skb = __skb_dequeue(&sd->process_queue))) { ++ local_irq_enable(); + rcu_read_lock(); + __netif_receive_skb(skb); + rcu_read_unlock(); +@@ -4859,9 +4878,9 @@ static int process_backlog(struct napi_struct *napi, int quota) + if (++work >= quota) + return work; + ++ local_irq_disable(); + } + +- local_irq_disable(); + rps_lock(sd); + if (skb_queue_empty(&sd->input_pkt_queue)) { + /* +@@ -4899,9 +4918,11 @@ void __napi_schedule(struct napi_struct *n) local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); @@ -25186,7 +23534,41 @@ index ea6312057a71..d114a4692cde 100644 } EXPORT_SYMBOL(__napi_schedule); -@@ -5229,7 +5247,7 @@ static void net_rx_action(struct softirq_action *h) ++#ifndef CONFIG_PREEMPT_RT_FULL + /** + * __napi_schedule_irqoff - schedule for receive + * @n: entry to schedule +@@ -4913,6 +4934,7 @@ void __napi_schedule_irqoff(struct napi_struct *n) + ____napi_schedule(this_cpu_ptr(&softnet_data), n); + } + EXPORT_SYMBOL(__napi_schedule_irqoff); ++#endif + + void __napi_complete(struct napi_struct *n) + { +@@ -5202,13 +5224,21 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) + struct softnet_data *sd = this_cpu_ptr(&softnet_data); + unsigned long time_limit = jiffies + 2; + int budget = netdev_budget; ++ struct sk_buff_head tofree_q; ++ struct sk_buff *skb; + LIST_HEAD(list); + LIST_HEAD(repoll); + ++ __skb_queue_head_init(&tofree_q); ++ + local_irq_disable(); ++ skb_queue_splice_init(&sd->tofree_queue, &tofree_q); + list_splice_init(&sd->poll_list, &list); + local_irq_enable(); + ++ while ((skb = __skb_dequeue(&tofree_q))) ++ kfree_skb(skb); ++ + for (;;) { + struct napi_struct *n; + +@@ -5239,7 +5269,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) @@ -25195,16 +23577,7 @@ index ea6312057a71..d114a4692cde 100644 net_rps_action_and_irq_enable(sd); } -@@ -7736,7 +7754,7 @@ EXPORT_SYMBOL(free_netdev); - void synchronize_net(void) - { - might_sleep(); -- if (rtnl_is_locked()) -+ if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) - synchronize_rcu_expedited(); - else - synchronize_rcu(); -@@ -7977,16 +7995,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, +@@ -8000,16 +8030,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); @@ -25226,9 +23599,9 @@ index ea6312057a71..d114a4692cde 100644 return NOTIFY_OK; } -@@ -8288,8 +8310,9 @@ static int __init net_dev_init(void) - for_each_possible_cpu(i) { - struct softnet_data *sd = &per_cpu(softnet_data, i); +@@ -8314,8 +8348,9 @@ static int __init net_dev_init(void) + + INIT_WORK(flush, flush_backlog); - skb_queue_head_init(&sd->input_pkt_queue); - skb_queue_head_init(&sd->process_queue); @@ -25239,10 +23612,10 @@ index ea6312057a71..d114a4692cde 100644 sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS diff --git a/net/core/filter.c b/net/core/filter.c -index cb06aceb512a..3585a8982287 100644 +index b391209838ef..b86e9681a88e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c -@@ -1592,7 +1592,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) +@@ -1645,7 +1645,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) { int ret; @@ -25251,7 +23624,7 @@ index cb06aceb512a..3585a8982287 100644 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); kfree_skb(skb); return -ENETDOWN; -@@ -1600,9 +1600,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) +@@ -1653,9 +1653,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) skb->dev = dev; @@ -25330,7 +23703,7 @@ index 508e051304fb..bc3b17b78c94 100644 struct gnet_stats_basic_cpu __percpu *cpu, struct gnet_stats_basic_packed *b) diff --git a/net/core/skbuff.c b/net/core/skbuff.c -index 3864b4b68fa1..55c73ade9faa 100644 +index 1e3e0087245b..1077b39db717 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -64,6 +64,7 @@ @@ -25465,10 +23838,10 @@ index 3864b4b68fa1..55c73ade9faa 100644 void __kfree_skb_defer(struct sk_buff *skb) { diff --git a/net/core/sock.c b/net/core/sock.c -index fd7b41edf1ce..e425d259a9f0 100644 +index bc6543f7de36..2c32ee79620f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c -@@ -2508,12 +2508,11 @@ void lock_sock_nested(struct sock *sk, int subclass) +@@ -2488,12 +2488,11 @@ void lock_sock_nested(struct sock *sk, int subclass) if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; @@ -25483,7 +23856,7 @@ index fd7b41edf1ce..e425d259a9f0 100644 EXPORT_SYMBOL(lock_sock_nested); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c -index 38abe70e595f..443259a04862 100644 +index 48734ee6293f..e6864ff11352 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -69,6 +69,7 @@ @@ -25594,7 +23967,7 @@ index 38abe70e595f..443259a04862 100644 /* should there be an ICMP stat for ignored echos? */ return true; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c -index 1cb67de106fe..332a485323f0 100644 +index 80bc36b25de2..215b90adfb05 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -681,6 +681,13 @@ static struct ctl_table ipv4_net_table[] = { @@ -25612,7 +23985,7 @@ index 1cb67de106fe..332a485323f0 100644 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c -index 7158d4f8dae4..0dc007fc6704 100644 +index 2259114c7242..829e60985a81 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -62,6 +62,7 @@ @@ -25623,7 +23996,7 @@ index 7158d4f8dae4..0dc007fc6704 100644 #include #include -@@ -565,6 +566,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) +@@ -564,6 +565,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); @@ -25631,7 +24004,7 @@ index 7158d4f8dae4..0dc007fc6704 100644 /* * This routine will send an RST to the other tcp. * -@@ -692,6 +694,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +@@ -691,6 +693,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) offsetof(struct inet_timewait_sock, tw_bound_dev_if)); arg.tos = ip_hdr(skb)->tos; @@ -25640,7 +24013,7 @@ index 7158d4f8dae4..0dc007fc6704 100644 local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, -@@ -701,6 +705,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) +@@ -700,6 +704,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); @@ -25648,7 +24021,7 @@ index 7158d4f8dae4..0dc007fc6704 100644 #ifdef CONFIG_TCP_MD5SIG out: -@@ -776,6 +781,7 @@ static void tcp_v4_send_ack(struct net *net, +@@ -775,6 +780,7 @@ static void tcp_v4_send_ack(struct net *net, if (oif) arg.bound_dev_if = oif; arg.tos = tos; @@ -25656,7 +24029,7 @@ index 7158d4f8dae4..0dc007fc6704 100644 local_bh_disable(); ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), skb, &TCP_SKB_CB(skb)->header.h4.opt, -@@ -784,6 +790,7 @@ static void tcp_v4_send_ack(struct net *net, +@@ -783,6 +789,7 @@ static void tcp_v4_send_ack(struct net *net, __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); @@ -25665,10 +24038,10 @@ index 7158d4f8dae4..0dc007fc6704 100644 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c -index 9dce3b157908..525efa5309ac 100644 +index a47bbc973f2d..c1c1c64589d9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c -@@ -4064,7 +4064,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, +@@ -4156,7 +4156,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, struct ieee80211_supported_band *sband; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); @@ -25678,14 +24051,15 @@ index 9dce3b157908..525efa5309ac 100644 if (WARN_ON(status->band >= NUM_NL80211_BANDS)) goto drop; diff --git a/net/netfilter/core.c b/net/netfilter/core.c -index f39276d1c2d7..10880c89d62f 100644 +index 004af030ef1a..b64f751bda45 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c -@@ -22,11 +22,17 @@ +@@ -22,12 +22,18 @@ #include #include #include +#include + #include #include #include @@ -25700,7 +24074,7 @@ index f39276d1c2d7..10880c89d62f 100644 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c -index 33a4697d5539..475cb74bf825 100644 +index dd2332390c45..f6a703b25b6c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -63,6 +63,7 @@ @@ -25711,7 +24085,7 @@ index 33a4697d5539..475cb74bf825 100644 #include #include #include -@@ -695,7 +696,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) +@@ -694,7 +695,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) if (BLOCK_NUM_PKTS(pbd)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ @@ -25720,7 +24094,7 @@ index 33a4697d5539..475cb74bf825 100644 } } -@@ -957,7 +958,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, +@@ -956,7 +957,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, if (!(status & TP_STATUS_BLK_TMO)) { while (atomic_read(&pkc->blk_fill_in_prog)) { /* Waiting for skb_copy_bits to finish... */ @@ -25751,7 +24125,7 @@ index 977f69886c00..f3e7a36b0396 100644 } diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c -index 814d285ff802..d4d088e9be85 100644 +index 7d921e56e715..13df56a738e5 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -19,9 +19,6 @@ @@ -25765,10 +24139,10 @@ index 814d285ff802..d4d088e9be85 100644 [RXRPC_SECURITY_NONE] = &rxrpc_no_security, #ifdef CONFIG_RXKAD diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c -index 12ebde845523..99f3ce50c6c4 100644 +index 206dc24add3a..00ea9bde5bb3 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c -@@ -975,7 +975,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, +@@ -981,7 +981,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, rcu_assign_pointer(sch->stab, stab); } if (tca[TCA_RATE]) { @@ -25778,11 +24152,11 @@ index 12ebde845523..99f3ce50c6c4 100644 err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c -index 657c13362b19..cbab8d4d5864 100644 +index 6cfb6e9038c2..20727e1347de 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c -@@ -426,7 +426,11 @@ struct Qdisc noop_qdisc = { - .list = LIST_HEAD_INIT(noop_qdisc.list), +@@ -425,7 +425,11 @@ struct Qdisc noop_qdisc = { + .ops = &noop_qdisc_ops, .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, +#ifdef CONFIG_PREEMPT_RT_BASE @@ -25793,7 +24167,7 @@ index 657c13362b19..cbab8d4d5864 100644 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), }; EXPORT_SYMBOL(noop_qdisc); -@@ -620,9 +624,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, +@@ -624,9 +628,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, lockdep_set_class(&sch->busylock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); @@ -25811,7 +24185,7 @@ index 657c13362b19..cbab8d4d5864 100644 sch->ops = ops; sch->enqueue = ops->enqueue; -@@ -917,7 +929,7 @@ void dev_deactivate_many(struct list_head *head) +@@ -925,7 +937,7 @@ void dev_deactivate_many(struct list_head *head) /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) while (some_qdisc_is_busy(dev)) @@ -25821,7 +24195,7 @@ index 657c13362b19..cbab8d4d5864 100644 void dev_deactivate(struct net_device *dev) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c -index c3f652395a80..2dd84493528e 100644 +index 3bc1d61694cb..480141d45f49 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -396,7 +396,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) @@ -25874,7 +24248,7 @@ index 6fdc97ef6023..523e0420d7f0 100755 # Truncate to maximum length diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c -index c61fd50f771f..1583de410f62 100644 +index 9d33c1e85c79..3d307bda86f9 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); diff --git a/kernel.spec b/kernel.spec index 66e6fdd9..11da50de 100644 --- a/kernel.spec +++ b/kernel.spec @@ -213,7 +213,7 @@ Patch146: kernel-aufs4+vserver.patch Patch250: kernel-fix_256colors_menuconfig.patch # https://rt.wiki.kernel.org/ -# https://www.kernel.org/pub/linux/kernel/projects/rt/4.8/patch-4.8.6-rt5.patch.xz +# https://www.kernel.org/pub/linux/kernel/projects/rt/4.9/patch-4.9.4-rt2.patch.xz Patch500: kernel-rt.patch Patch2000: kernel-small_fixes.patch -- 2.44.0