]>
Commit | Line | Data |
---|---|---|
e4b2b4a8 JK |
1 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/alpha/include/asm/spinlock_types.h linux-4.14/arch/alpha/include/asm/spinlock_types.h |
2 | --- linux-4.14.orig/arch/alpha/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
3 | +++ linux-4.14/arch/alpha/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
4 | @@ -2,10 +2,6 @@ | |
5 | #ifndef _ALPHA_SPINLOCK_TYPES_H | |
6 | #define _ALPHA_SPINLOCK_TYPES_H | |
7 | ||
8 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
9 | -# error "please don't include this file directly" | |
10 | -#endif | |
11 | - | |
12 | typedef struct { | |
13 | volatile unsigned int lock; | |
14 | } arch_spinlock_t; | |
15 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/include/asm/irq.h linux-4.14/arch/arm/include/asm/irq.h | |
16 | --- linux-4.14.orig/arch/arm/include/asm/irq.h 2017-11-12 19:46:13.000000000 +0100 | |
17 | +++ linux-4.14/arch/arm/include/asm/irq.h 2018-09-05 11:05:07.000000000 +0200 | |
18 | @@ -23,6 +23,8 @@ | |
c7c16703 JK |
19 | #endif |
20 | ||
21 | #ifndef __ASSEMBLY__ | |
22 | +#include <linux/cpumask.h> | |
23 | + | |
24 | struct irqaction; | |
25 | struct pt_regs; | |
26 | extern void migrate_irqs(void); | |
e4b2b4a8 JK |
27 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/include/asm/spinlock_types.h linux-4.14/arch/arm/include/asm/spinlock_types.h |
28 | --- linux-4.14.orig/arch/arm/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
29 | +++ linux-4.14/arch/arm/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
30 | @@ -2,10 +2,6 @@ | |
31 | #ifndef __ASM_SPINLOCK_TYPES_H | |
32 | #define __ASM_SPINLOCK_TYPES_H | |
33 | ||
34 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
35 | -# error "please don't include this file directly" | |
36 | -#endif | |
37 | - | |
38 | #define TICKET_SHIFT 16 | |
39 | ||
40 | typedef struct { | |
41 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/include/asm/switch_to.h linux-4.14/arch/arm/include/asm/switch_to.h | |
42 | --- linux-4.14.orig/arch/arm/include/asm/switch_to.h 2017-11-12 19:46:13.000000000 +0100 | |
43 | +++ linux-4.14/arch/arm/include/asm/switch_to.h 2018-09-05 11:05:07.000000000 +0200 | |
44 | @@ -4,6 +4,13 @@ | |
1a6e0f06 JK |
45 | |
46 | #include <linux/thread_info.h> | |
47 | ||
48 | +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM | |
49 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); | |
50 | +#else | |
51 | +static inline void | |
52 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
53 | +#endif | |
54 | + | |
55 | /* | |
56 | * For v7 SMP cores running a preemptible kernel we may be pre-empted | |
57 | * during a TLB maintenance operation, so execute an inner-shareable dsb | |
e4b2b4a8 | 58 | @@ -26,6 +33,7 @@ |
1a6e0f06 JK |
59 | #define switch_to(prev,next,last) \ |
60 | do { \ | |
61 | __complete_pending_tlbi(); \ | |
62 | + switch_kmaps(prev, next); \ | |
63 | last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ | |
64 | } while (0) | |
65 | ||
e4b2b4a8 JK |
66 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/include/asm/thread_info.h linux-4.14/arch/arm/include/asm/thread_info.h |
67 | --- linux-4.14.orig/arch/arm/include/asm/thread_info.h 2017-11-12 19:46:13.000000000 +0100 | |
68 | +++ linux-4.14/arch/arm/include/asm/thread_info.h 2018-09-05 11:05:07.000000000 +0200 | |
69 | @@ -49,6 +49,7 @@ | |
1a6e0f06 JK |
70 | struct thread_info { |
71 | unsigned long flags; /* low level flags */ | |
72 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
73 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
74 | mm_segment_t addr_limit; /* address limit */ | |
75 | struct task_struct *task; /* main task structure */ | |
76 | __u32 cpu; /* cpu */ | |
e4b2b4a8 | 77 | @@ -142,7 +143,8 @@ |
1a6e0f06 JK |
78 | #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ |
79 | #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ | |
80 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ | |
81 | -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ | |
82 | +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ | |
83 | +#define TIF_NEED_RESCHED_LAZY 7 | |
84 | ||
85 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ | |
86 | #define TIF_USING_IWMMXT 17 | |
e4b2b4a8 | 87 | @@ -152,6 +154,7 @@ |
1a6e0f06 JK |
88 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) |
89 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
90 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
91 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
92 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
93 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
94 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
e4b2b4a8 | 95 | @@ -167,7 +170,8 @@ |
1a6e0f06 JK |
96 | * Change these and you break ASM code in entry-common.S |
97 | */ | |
98 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
99 | - _TIF_NOTIFY_RESUME | _TIF_UPROBE) | |
100 | + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
101 | + _TIF_NEED_RESCHED_LAZY) | |
102 | ||
103 | #endif /* __KERNEL__ */ | |
104 | #endif /* __ASM_ARM_THREAD_INFO_H */ | |
e4b2b4a8 JK |
105 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/Kconfig linux-4.14/arch/arm/Kconfig |
106 | --- linux-4.14.orig/arch/arm/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
107 | +++ linux-4.14/arch/arm/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
108 | @@ -45,7 +45,7 @@ | |
109 | select HARDIRQS_SW_RESEND | |
110 | select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) | |
111 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 | |
112 | - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU | |
113 | + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE | |
114 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU | |
115 | select HAVE_ARCH_MMAP_RND_BITS if MMU | |
116 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) | |
117 | @@ -85,6 +85,7 @@ | |
118 | select HAVE_PERF_EVENTS | |
119 | select HAVE_PERF_REGS | |
120 | select HAVE_PERF_USER_STACK_DUMP | |
121 | + select HAVE_PREEMPT_LAZY | |
122 | select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) | |
123 | select HAVE_REGS_AND_STACK_ACCESS_API | |
124 | select HAVE_SYSCALL_TRACEPOINTS | |
125 | @@ -2164,7 +2165,7 @@ | |
126 | ||
127 | config KERNEL_MODE_NEON | |
128 | bool "Support for NEON in kernel mode" | |
129 | - depends on NEON && AEABI | |
130 | + depends on NEON && AEABI && !PREEMPT_RT_BASE | |
131 | help | |
132 | Say Y to include support for NEON in kernel mode. | |
133 | ||
134 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/asm-offsets.c linux-4.14/arch/arm/kernel/asm-offsets.c | |
135 | --- linux-4.14.orig/arch/arm/kernel/asm-offsets.c 2017-11-12 19:46:13.000000000 +0100 | |
136 | +++ linux-4.14/arch/arm/kernel/asm-offsets.c 2018-09-05 11:05:07.000000000 +0200 | |
137 | @@ -65,6 +65,7 @@ | |
1a6e0f06 JK |
138 | BLANK(); |
139 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
140 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
141 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
142 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
143 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
144 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
e4b2b4a8 JK |
145 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/entry-armv.S linux-4.14/arch/arm/kernel/entry-armv.S |
146 | --- linux-4.14.orig/arch/arm/kernel/entry-armv.S 2017-11-12 19:46:13.000000000 +0100 | |
147 | +++ linux-4.14/arch/arm/kernel/entry-armv.S 2018-09-05 11:05:07.000000000 +0200 | |
148 | @@ -220,11 +220,18 @@ | |
1a6e0f06 JK |
149 | |
150 | #ifdef CONFIG_PREEMPT | |
151 | ldr r8, [tsk, #TI_PREEMPT] @ get preempt count | |
152 | - ldr r0, [tsk, #TI_FLAGS] @ get flags | |
153 | teq r8, #0 @ if preempt count != 0 | |
154 | + bne 1f @ return from exeption | |
155 | + ldr r0, [tsk, #TI_FLAGS] @ get flags | |
156 | + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set | |
157 | + blne svc_preempt @ preempt! | |
158 | + | |
159 | + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
160 | + teq r8, #0 @ if preempt lazy count != 0 | |
161 | movne r0, #0 @ force flags to 0 | |
162 | - tst r0, #_TIF_NEED_RESCHED | |
163 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
164 | blne svc_preempt | |
165 | +1: | |
166 | #endif | |
167 | ||
168 | svc_exit r5, irq = 1 @ return from exception | |
e4b2b4a8 | 169 | @@ -239,8 +246,14 @@ |
1a6e0f06 JK |
170 | 1: bl preempt_schedule_irq @ irq en/disable is done inside |
171 | ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS | |
172 | tst r0, #_TIF_NEED_RESCHED | |
173 | + bne 1b | |
174 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
175 | reteq r8 @ go again | |
176 | - b 1b | |
177 | + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
178 | + teq r0, #0 @ if preempt lazy count != 0 | |
179 | + beq 1b | |
180 | + ret r8 @ go again | |
181 | + | |
182 | #endif | |
183 | ||
184 | __und_fault: | |
e4b2b4a8 JK |
185 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/entry-common.S linux-4.14/arch/arm/kernel/entry-common.S |
186 | --- linux-4.14.orig/arch/arm/kernel/entry-common.S 2017-11-12 19:46:13.000000000 +0100 | |
187 | +++ linux-4.14/arch/arm/kernel/entry-common.S 2018-09-05 11:05:07.000000000 +0200 | |
188 | @@ -53,7 +53,9 @@ | |
189 | cmp r2, #TASK_SIZE | |
190 | blne addr_limit_check_failed | |
1a6e0f06 JK |
191 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing |
192 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
193 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
194 | + bne fast_work_pending | |
195 | + tst r1, #_TIF_SECCOMP | |
196 | bne fast_work_pending | |
197 | ||
e4b2b4a8 JK |
198 | |
199 | @@ -83,8 +85,11 @@ | |
200 | cmp r2, #TASK_SIZE | |
201 | blne addr_limit_check_failed | |
1a6e0f06 JK |
202 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing |
203 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
204 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
e4b2b4a8 | 205 | + bne do_slower_path |
1a6e0f06 JK |
206 | + tst r1, #_TIF_SECCOMP |
207 | beq no_work_pending | |
208 | +do_slower_path: | |
209 | UNWIND(.fnend ) | |
210 | ENDPROC(ret_fast_syscall) | |
211 | ||
e4b2b4a8 JK |
212 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/patch.c linux-4.14/arch/arm/kernel/patch.c |
213 | --- linux-4.14.orig/arch/arm/kernel/patch.c 2017-11-12 19:46:13.000000000 +0100 | |
214 | +++ linux-4.14/arch/arm/kernel/patch.c 2018-09-05 11:05:07.000000000 +0200 | |
215 | @@ -16,7 +16,7 @@ | |
c7c16703 JK |
216 | unsigned int insn; |
217 | }; | |
218 | ||
219 | -static DEFINE_SPINLOCK(patch_lock); | |
220 | +static DEFINE_RAW_SPINLOCK(patch_lock); | |
221 | ||
222 | static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) | |
223 | __acquires(&patch_lock) | |
e4b2b4a8 | 224 | @@ -33,7 +33,7 @@ |
c7c16703 JK |
225 | return addr; |
226 | ||
227 | if (flags) | |
228 | - spin_lock_irqsave(&patch_lock, *flags); | |
229 | + raw_spin_lock_irqsave(&patch_lock, *flags); | |
230 | else | |
231 | __acquire(&patch_lock); | |
232 | ||
e4b2b4a8 | 233 | @@ -48,7 +48,7 @@ |
c7c16703 JK |
234 | clear_fixmap(fixmap); |
235 | ||
236 | if (flags) | |
237 | - spin_unlock_irqrestore(&patch_lock, *flags); | |
238 | + raw_spin_unlock_irqrestore(&patch_lock, *flags); | |
239 | else | |
240 | __release(&patch_lock); | |
241 | } | |
e4b2b4a8 JK |
242 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/process.c linux-4.14/arch/arm/kernel/process.c |
243 | --- linux-4.14.orig/arch/arm/kernel/process.c 2017-11-12 19:46:13.000000000 +0100 | |
244 | +++ linux-4.14/arch/arm/kernel/process.c 2018-09-05 11:05:07.000000000 +0200 | |
245 | @@ -325,6 +325,30 @@ | |
1a6e0f06 JK |
246 | } |
247 | ||
248 | #ifdef CONFIG_MMU | |
249 | +/* | |
250 | + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not | |
251 | + * initialized by pgtable_page_ctor() then a coredump of the vector page will | |
252 | + * fail. | |
253 | + */ | |
254 | +static int __init vectors_user_mapping_init_page(void) | |
255 | +{ | |
256 | + struct page *page; | |
257 | + unsigned long addr = 0xffff0000; | |
258 | + pgd_t *pgd; | |
259 | + pud_t *pud; | |
260 | + pmd_t *pmd; | |
261 | + | |
262 | + pgd = pgd_offset_k(addr); | |
263 | + pud = pud_offset(pgd, addr); | |
264 | + pmd = pmd_offset(pud, addr); | |
265 | + page = pmd_page(*(pmd)); | |
266 | + | |
267 | + pgtable_page_ctor(page); | |
268 | + | |
269 | + return 0; | |
270 | +} | |
271 | +late_initcall(vectors_user_mapping_init_page); | |
272 | + | |
273 | #ifdef CONFIG_KUSER_HELPERS | |
274 | /* | |
275 | * The vectors page is always readable from user space for the | |
e4b2b4a8 JK |
276 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/signal.c linux-4.14/arch/arm/kernel/signal.c |
277 | --- linux-4.14.orig/arch/arm/kernel/signal.c 2017-11-12 19:46:13.000000000 +0100 | |
278 | +++ linux-4.14/arch/arm/kernel/signal.c 2018-09-05 11:05:07.000000000 +0200 | |
279 | @@ -615,7 +615,8 @@ | |
1a6e0f06 JK |
280 | */ |
281 | trace_hardirqs_off(); | |
282 | do { | |
283 | - if (likely(thread_flags & _TIF_NEED_RESCHED)) { | |
284 | + if (likely(thread_flags & (_TIF_NEED_RESCHED | | |
285 | + _TIF_NEED_RESCHED_LAZY))) { | |
286 | schedule(); | |
287 | } else { | |
288 | if (unlikely(!user_mode(regs))) | |
e4b2b4a8 JK |
289 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/smp.c linux-4.14/arch/arm/kernel/smp.c |
290 | --- linux-4.14.orig/arch/arm/kernel/smp.c 2017-11-12 19:46:13.000000000 +0100 | |
291 | +++ linux-4.14/arch/arm/kernel/smp.c 2018-09-05 11:05:07.000000000 +0200 | |
292 | @@ -236,8 +236,6 @@ | |
1a6e0f06 JK |
293 | flush_cache_louis(); |
294 | local_flush_tlb_all(); | |
295 | ||
296 | - clear_tasks_mm_cpumask(cpu); | |
297 | - | |
298 | return 0; | |
299 | } | |
300 | ||
e4b2b4a8 | 301 | @@ -255,6 +253,7 @@ |
1a6e0f06 | 302 | } |
e4b2b4a8 | 303 | pr_debug("CPU%u: shutdown\n", cpu); |
1a6e0f06 | 304 | |
e4b2b4a8 | 305 | + clear_tasks_mm_cpumask(cpu); |
1a6e0f06 | 306 | /* |
e4b2b4a8 JK |
307 | * platform_cpu_kill() is generally expected to do the powering off |
308 | * and/or cutting of clocks to the dying CPU. Optionally, this may | |
309 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/kernel/unwind.c linux-4.14/arch/arm/kernel/unwind.c | |
310 | --- linux-4.14.orig/arch/arm/kernel/unwind.c 2017-11-12 19:46:13.000000000 +0100 | |
311 | +++ linux-4.14/arch/arm/kernel/unwind.c 2018-09-05 11:05:07.000000000 +0200 | |
312 | @@ -93,7 +93,7 @@ | |
1a6e0f06 JK |
313 | static const struct unwind_idx *__origin_unwind_idx; |
314 | extern const struct unwind_idx __stop_unwind_idx[]; | |
315 | ||
316 | -static DEFINE_SPINLOCK(unwind_lock); | |
317 | +static DEFINE_RAW_SPINLOCK(unwind_lock); | |
318 | static LIST_HEAD(unwind_tables); | |
319 | ||
320 | /* Convert a prel31 symbol to an absolute address */ | |
e4b2b4a8 | 321 | @@ -201,7 +201,7 @@ |
1a6e0f06 JK |
322 | /* module unwind tables */ |
323 | struct unwind_table *table; | |
324 | ||
325 | - spin_lock_irqsave(&unwind_lock, flags); | |
326 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
327 | list_for_each_entry(table, &unwind_tables, list) { | |
328 | if (addr >= table->begin_addr && | |
329 | addr < table->end_addr) { | |
e4b2b4a8 | 330 | @@ -213,7 +213,7 @@ |
1a6e0f06 JK |
331 | break; |
332 | } | |
333 | } | |
334 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
335 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
336 | } | |
337 | ||
338 | pr_debug("%s: idx = %p\n", __func__, idx); | |
e4b2b4a8 | 339 | @@ -529,9 +529,9 @@ |
1a6e0f06 JK |
340 | tab->begin_addr = text_addr; |
341 | tab->end_addr = text_addr + text_size; | |
342 | ||
343 | - spin_lock_irqsave(&unwind_lock, flags); | |
344 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
345 | list_add_tail(&tab->list, &unwind_tables); | |
346 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
347 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
348 | ||
349 | return tab; | |
350 | } | |
e4b2b4a8 | 351 | @@ -543,9 +543,9 @@ |
1a6e0f06 JK |
352 | if (!tab) |
353 | return; | |
354 | ||
355 | - spin_lock_irqsave(&unwind_lock, flags); | |
356 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
357 | list_del(&tab->list); | |
358 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
359 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
360 | ||
361 | kfree(tab); | |
362 | } | |
e4b2b4a8 JK |
363 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mach-exynos/platsmp.c linux-4.14/arch/arm/mach-exynos/platsmp.c |
364 | --- linux-4.14.orig/arch/arm/mach-exynos/platsmp.c 2017-11-12 19:46:13.000000000 +0100 | |
365 | +++ linux-4.14/arch/arm/mach-exynos/platsmp.c 2018-09-05 11:05:07.000000000 +0200 | |
366 | @@ -229,7 +229,7 @@ | |
1a6e0f06 JK |
367 | return (void __iomem *)(S5P_VA_SCU); |
368 | } | |
369 | ||
370 | -static DEFINE_SPINLOCK(boot_lock); | |
371 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
372 | ||
373 | static void exynos_secondary_init(unsigned int cpu) | |
374 | { | |
e4b2b4a8 | 375 | @@ -242,8 +242,8 @@ |
1a6e0f06 JK |
376 | /* |
377 | * Synchronise with the boot thread. | |
378 | */ | |
379 | - spin_lock(&boot_lock); | |
380 | - spin_unlock(&boot_lock); | |
381 | + raw_spin_lock(&boot_lock); | |
382 | + raw_spin_unlock(&boot_lock); | |
383 | } | |
384 | ||
385 | int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) | |
e4b2b4a8 | 386 | @@ -307,7 +307,7 @@ |
1a6e0f06 JK |
387 | * Set synchronisation state between this boot processor |
388 | * and the secondary one | |
389 | */ | |
390 | - spin_lock(&boot_lock); | |
391 | + raw_spin_lock(&boot_lock); | |
392 | ||
393 | /* | |
394 | * The secondary processor is waiting to be released from | |
e4b2b4a8 | 395 | @@ -334,7 +334,7 @@ |
1a6e0f06 JK |
396 | |
397 | if (timeout == 0) { | |
398 | printk(KERN_ERR "cpu1 power enable failed"); | |
399 | - spin_unlock(&boot_lock); | |
400 | + raw_spin_unlock(&boot_lock); | |
401 | return -ETIMEDOUT; | |
402 | } | |
403 | } | |
e4b2b4a8 | 404 | @@ -380,7 +380,7 @@ |
1a6e0f06 JK |
405 | * calibrations, then wait for it to finish |
406 | */ | |
407 | fail: | |
408 | - spin_unlock(&boot_lock); | |
409 | + raw_spin_unlock(&boot_lock); | |
410 | ||
411 | return pen_release != -1 ? ret : 0; | |
412 | } | |
e4b2b4a8 JK |
413 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mach-hisi/platmcpm.c linux-4.14/arch/arm/mach-hisi/platmcpm.c |
414 | --- linux-4.14.orig/arch/arm/mach-hisi/platmcpm.c 2017-11-12 19:46:13.000000000 +0100 | |
415 | +++ linux-4.14/arch/arm/mach-hisi/platmcpm.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
416 | @@ -61,7 +61,7 @@ |
417 | ||
418 | static void __iomem *sysctrl, *fabric; | |
419 | static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; | |
420 | -static DEFINE_SPINLOCK(boot_lock); | |
421 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
422 | static u32 fabric_phys_addr; | |
423 | /* | |
424 | * [0]: bootwrapper physical address | |
e4b2b4a8 | 425 | @@ -113,7 +113,7 @@ |
1a6e0f06 JK |
426 | if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) |
427 | return -EINVAL; | |
428 | ||
429 | - spin_lock_irq(&boot_lock); | |
430 | + raw_spin_lock_irq(&boot_lock); | |
431 | ||
432 | if (hip04_cpu_table[cluster][cpu]) | |
433 | goto out; | |
e4b2b4a8 | 434 | @@ -147,7 +147,7 @@ |
1a6e0f06 JK |
435 | |
436 | out: | |
437 | hip04_cpu_table[cluster][cpu]++; | |
438 | - spin_unlock_irq(&boot_lock); | |
439 | + raw_spin_unlock_irq(&boot_lock); | |
440 | ||
441 | return 0; | |
442 | } | |
e4b2b4a8 | 443 | @@ -162,11 +162,11 @@ |
1a6e0f06 JK |
444 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); |
445 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | |
446 | ||
447 | - spin_lock(&boot_lock); | |
448 | + raw_spin_lock(&boot_lock); | |
449 | hip04_cpu_table[cluster][cpu]--; | |
450 | if (hip04_cpu_table[cluster][cpu] == 1) { | |
451 | /* A power_up request went ahead of us. */ | |
452 | - spin_unlock(&boot_lock); | |
453 | + raw_spin_unlock(&boot_lock); | |
454 | return; | |
455 | } else if (hip04_cpu_table[cluster][cpu] > 1) { | |
456 | pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); | |
e4b2b4a8 | 457 | @@ -174,7 +174,7 @@ |
1a6e0f06 JK |
458 | } |
459 | ||
460 | last_man = hip04_cluster_is_down(cluster); | |
461 | - spin_unlock(&boot_lock); | |
462 | + raw_spin_unlock(&boot_lock); | |
463 | if (last_man) { | |
464 | /* Since it's Cortex A15, disable L2 prefetching. */ | |
465 | asm volatile( | |
e4b2b4a8 | 466 | @@ -203,7 +203,7 @@ |
1a6e0f06 JK |
467 | cpu >= HIP04_MAX_CPUS_PER_CLUSTER); |
468 | ||
469 | count = TIMEOUT_MSEC / POLL_MSEC; | |
470 | - spin_lock_irq(&boot_lock); | |
471 | + raw_spin_lock_irq(&boot_lock); | |
472 | for (tries = 0; tries < count; tries++) { | |
473 | if (hip04_cpu_table[cluster][cpu]) | |
474 | goto err; | |
e4b2b4a8 | 475 | @@ -211,10 +211,10 @@ |
1a6e0f06 JK |
476 | data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); |
477 | if (data & CORE_WFI_STATUS(cpu)) | |
478 | break; | |
479 | - spin_unlock_irq(&boot_lock); | |
480 | + raw_spin_unlock_irq(&boot_lock); | |
481 | /* Wait for clean L2 when the whole cluster is down. */ | |
482 | msleep(POLL_MSEC); | |
483 | - spin_lock_irq(&boot_lock); | |
484 | + raw_spin_lock_irq(&boot_lock); | |
485 | } | |
486 | if (tries >= count) | |
487 | goto err; | |
e4b2b4a8 | 488 | @@ -231,10 +231,10 @@ |
1a6e0f06 JK |
489 | goto err; |
490 | if (hip04_cluster_is_down(cluster)) | |
491 | hip04_set_snoop_filter(cluster, 0); | |
492 | - spin_unlock_irq(&boot_lock); | |
493 | + raw_spin_unlock_irq(&boot_lock); | |
494 | return 1; | |
495 | err: | |
496 | - spin_unlock_irq(&boot_lock); | |
497 | + raw_spin_unlock_irq(&boot_lock); | |
498 | return 0; | |
499 | } | |
500 | #endif | |
e4b2b4a8 JK |
501 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mach-omap2/omap-smp.c linux-4.14/arch/arm/mach-omap2/omap-smp.c |
502 | --- linux-4.14.orig/arch/arm/mach-omap2/omap-smp.c 2018-09-05 11:03:20.000000000 +0200 | |
503 | +++ linux-4.14/arch/arm/mach-omap2/omap-smp.c 2018-09-05 11:05:07.000000000 +0200 | |
504 | @@ -69,7 +69,7 @@ | |
1a6e0f06 JK |
505 | .startup_addr = omap5_secondary_startup, |
506 | }; | |
507 | ||
508 | -static DEFINE_SPINLOCK(boot_lock); | |
509 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
510 | ||
511 | void __iomem *omap4_get_scu_base(void) | |
512 | { | |
e4b2b4a8 | 513 | @@ -177,8 +177,8 @@ |
1a6e0f06 JK |
514 | /* |
515 | * Synchronise with the boot thread. | |
516 | */ | |
517 | - spin_lock(&boot_lock); | |
518 | - spin_unlock(&boot_lock); | |
519 | + raw_spin_lock(&boot_lock); | |
520 | + raw_spin_unlock(&boot_lock); | |
521 | } | |
522 | ||
523 | static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
e4b2b4a8 | 524 | @@ -191,7 +191,7 @@ |
1a6e0f06 JK |
525 | * Set synchronisation state between this boot processor |
526 | * and the secondary one | |
527 | */ | |
528 | - spin_lock(&boot_lock); | |
529 | + raw_spin_lock(&boot_lock); | |
530 | ||
531 | /* | |
532 | * Update the AuxCoreBoot0 with boot state for secondary core. | |
e4b2b4a8 | 533 | @@ -270,7 +270,7 @@ |
1a6e0f06 JK |
534 | * Now the secondary core is starting up let it run its |
535 | * calibrations, then wait for it to finish | |
536 | */ | |
537 | - spin_unlock(&boot_lock); | |
538 | + raw_spin_unlock(&boot_lock); | |
539 | ||
540 | return 0; | |
541 | } | |
e4b2b4a8 JK |
542 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mach-prima2/platsmp.c linux-4.14/arch/arm/mach-prima2/platsmp.c |
543 | --- linux-4.14.orig/arch/arm/mach-prima2/platsmp.c 2017-11-12 19:46:13.000000000 +0100 | |
544 | +++ linux-4.14/arch/arm/mach-prima2/platsmp.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
545 | @@ -22,7 +22,7 @@ |
546 | ||
547 | static void __iomem *clk_base; | |
548 | ||
549 | -static DEFINE_SPINLOCK(boot_lock); | |
550 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
551 | ||
552 | static void sirfsoc_secondary_init(unsigned int cpu) | |
553 | { | |
e4b2b4a8 | 554 | @@ -36,8 +36,8 @@ |
1a6e0f06 JK |
555 | /* |
556 | * Synchronise with the boot thread. | |
557 | */ | |
558 | - spin_lock(&boot_lock); | |
559 | - spin_unlock(&boot_lock); | |
560 | + raw_spin_lock(&boot_lock); | |
561 | + raw_spin_unlock(&boot_lock); | |
562 | } | |
563 | ||
564 | static const struct of_device_id clk_ids[] = { | |
e4b2b4a8 | 565 | @@ -75,7 +75,7 @@ |
1a6e0f06 JK |
566 | /* make sure write buffer is drained */ |
567 | mb(); | |
568 | ||
569 | - spin_lock(&boot_lock); | |
570 | + raw_spin_lock(&boot_lock); | |
571 | ||
572 | /* | |
573 | * The secondary processor is waiting to be released from | |
e4b2b4a8 | 574 | @@ -107,7 +107,7 @@ |
1a6e0f06 JK |
575 | * now the secondary core is starting up let it run its |
576 | * calibrations, then wait for it to finish | |
577 | */ | |
578 | - spin_unlock(&boot_lock); | |
579 | + raw_spin_unlock(&boot_lock); | |
580 | ||
581 | return pen_release != -1 ? -ENOSYS : 0; | |
582 | } | |
e4b2b4a8 JK |
583 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mach-qcom/platsmp.c linux-4.14/arch/arm/mach-qcom/platsmp.c |
584 | --- linux-4.14.orig/arch/arm/mach-qcom/platsmp.c 2017-11-12 19:46:13.000000000 +0100 | |
585 | +++ linux-4.14/arch/arm/mach-qcom/platsmp.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
586 | @@ -46,7 +46,7 @@ |
587 | ||
588 | extern void secondary_startup_arm(void); | |
589 | ||
590 | -static DEFINE_SPINLOCK(boot_lock); | |
591 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
592 | ||
593 | #ifdef CONFIG_HOTPLUG_CPU | |
594 | static void qcom_cpu_die(unsigned int cpu) | |
e4b2b4a8 | 595 | @@ -60,8 +60,8 @@ |
1a6e0f06 JK |
596 | /* |
597 | * Synchronise with the boot thread. | |
598 | */ | |
599 | - spin_lock(&boot_lock); | |
600 | - spin_unlock(&boot_lock); | |
601 | + raw_spin_lock(&boot_lock); | |
602 | + raw_spin_unlock(&boot_lock); | |
603 | } | |
604 | ||
605 | static int scss_release_secondary(unsigned int cpu) | |
e4b2b4a8 | 606 | @@ -284,7 +284,7 @@ |
1a6e0f06 JK |
607 | * set synchronisation state between this boot processor |
608 | * and the secondary one | |
609 | */ | |
610 | - spin_lock(&boot_lock); | |
611 | + raw_spin_lock(&boot_lock); | |
612 | ||
613 | /* | |
614 | * Send the secondary CPU a soft interrupt, thereby causing | |
e4b2b4a8 | 615 | @@ -297,7 +297,7 @@ |
1a6e0f06 JK |
616 | * now the secondary core is starting up let it run its |
617 | * calibrations, then wait for it to finish | |
618 | */ | |
619 | - spin_unlock(&boot_lock); | |
620 | + raw_spin_unlock(&boot_lock); | |
621 | ||
622 | return ret; | |
623 | } | |
e4b2b4a8 JK |
624 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mach-spear/platsmp.c linux-4.14/arch/arm/mach-spear/platsmp.c |
625 | --- linux-4.14.orig/arch/arm/mach-spear/platsmp.c 2017-11-12 19:46:13.000000000 +0100 | |
626 | +++ linux-4.14/arch/arm/mach-spear/platsmp.c 2018-09-05 11:05:07.000000000 +0200 | |
627 | @@ -32,7 +32,7 @@ | |
1a6e0f06 JK |
628 | sync_cache_w(&pen_release); |
629 | } | |
630 | ||
631 | -static DEFINE_SPINLOCK(boot_lock); | |
632 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
633 | ||
634 | static void __iomem *scu_base = IOMEM(VA_SCU_BASE); | |
635 | ||
e4b2b4a8 | 636 | @@ -47,8 +47,8 @@ |
1a6e0f06 JK |
637 | /* |
638 | * Synchronise with the boot thread. | |
639 | */ | |
640 | - spin_lock(&boot_lock); | |
641 | - spin_unlock(&boot_lock); | |
642 | + raw_spin_lock(&boot_lock); | |
643 | + raw_spin_unlock(&boot_lock); | |
644 | } | |
645 | ||
646 | static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
e4b2b4a8 | 647 | @@ -59,7 +59,7 @@ |
1a6e0f06 JK |
648 | * set synchronisation state between this boot processor |
649 | * and the secondary one | |
650 | */ | |
651 | - spin_lock(&boot_lock); | |
652 | + raw_spin_lock(&boot_lock); | |
653 | ||
654 | /* | |
655 | * The secondary processor is waiting to be released from | |
e4b2b4a8 | 656 | @@ -84,7 +84,7 @@ |
1a6e0f06 JK |
657 | * now the secondary core is starting up let it run its |
658 | * calibrations, then wait for it to finish | |
659 | */ | |
660 | - spin_unlock(&boot_lock); | |
661 | + raw_spin_unlock(&boot_lock); | |
662 | ||
663 | return pen_release != -1 ? -ENOSYS : 0; | |
664 | } | |
e4b2b4a8 JK |
665 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mach-sti/platsmp.c linux-4.14/arch/arm/mach-sti/platsmp.c |
666 | --- linux-4.14.orig/arch/arm/mach-sti/platsmp.c 2017-11-12 19:46:13.000000000 +0100 | |
667 | +++ linux-4.14/arch/arm/mach-sti/platsmp.c 2018-09-05 11:05:07.000000000 +0200 | |
668 | @@ -35,7 +35,7 @@ | |
1a6e0f06 JK |
669 | sync_cache_w(&pen_release); |
670 | } | |
671 | ||
672 | -static DEFINE_SPINLOCK(boot_lock); | |
673 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
674 | ||
675 | static void sti_secondary_init(unsigned int cpu) | |
676 | { | |
e4b2b4a8 | 677 | @@ -48,8 +48,8 @@ |
1a6e0f06 JK |
678 | /* |
679 | * Synchronise with the boot thread. | |
680 | */ | |
681 | - spin_lock(&boot_lock); | |
682 | - spin_unlock(&boot_lock); | |
683 | + raw_spin_lock(&boot_lock); | |
684 | + raw_spin_unlock(&boot_lock); | |
685 | } | |
686 | ||
687 | static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
e4b2b4a8 | 688 | @@ -60,7 +60,7 @@ |
1a6e0f06 JK |
689 | * set synchronisation state between this boot processor |
690 | * and the secondary one | |
691 | */ | |
692 | - spin_lock(&boot_lock); | |
693 | + raw_spin_lock(&boot_lock); | |
694 | ||
695 | /* | |
696 | * The secondary processor is waiting to be released from | |
e4b2b4a8 | 697 | @@ -91,7 +91,7 @@ |
1a6e0f06 JK |
698 | * now the secondary core is starting up let it run its |
699 | * calibrations, then wait for it to finish | |
700 | */ | |
701 | - spin_unlock(&boot_lock); | |
702 | + raw_spin_unlock(&boot_lock); | |
703 | ||
704 | return pen_release != -1 ? -ENOSYS : 0; | |
705 | } | |
e4b2b4a8 JK |
706 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mm/fault.c linux-4.14/arch/arm/mm/fault.c |
707 | --- linux-4.14.orig/arch/arm/mm/fault.c 2017-11-12 19:46:13.000000000 +0100 | |
708 | +++ linux-4.14/arch/arm/mm/fault.c 2018-09-05 11:05:07.000000000 +0200 | |
709 | @@ -434,6 +434,9 @@ | |
1a6e0f06 JK |
710 | if (addr < TASK_SIZE) |
711 | return do_page_fault(addr, fsr, regs); | |
712 | ||
713 | + if (interrupts_enabled(regs)) | |
714 | + local_irq_enable(); | |
715 | + | |
716 | if (user_mode(regs)) | |
717 | goto bad_area; | |
718 | ||
e4b2b4a8 | 719 | @@ -501,6 +504,9 @@ |
1a6e0f06 JK |
720 | static int |
721 | do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |
722 | { | |
723 | + if (interrupts_enabled(regs)) | |
724 | + local_irq_enable(); | |
725 | + | |
726 | do_bad_area(addr, fsr, regs); | |
727 | return 0; | |
728 | } | |
e4b2b4a8 JK |
729 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/mm/highmem.c linux-4.14/arch/arm/mm/highmem.c |
730 | --- linux-4.14.orig/arch/arm/mm/highmem.c 2017-11-12 19:46:13.000000000 +0100 | |
731 | +++ linux-4.14/arch/arm/mm/highmem.c 2018-09-05 11:05:07.000000000 +0200 | |
732 | @@ -34,6 +34,11 @@ | |
1a6e0f06 JK |
733 | return *ptep; |
734 | } | |
735 | ||
736 | +static unsigned int fixmap_idx(int type) | |
737 | +{ | |
738 | + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
739 | +} | |
740 | + | |
741 | void *kmap(struct page *page) | |
742 | { | |
743 | might_sleep(); | |
e4b2b4a8 | 744 | @@ -54,12 +59,13 @@ |
1a6e0f06 JK |
745 | |
746 | void *kmap_atomic(struct page *page) | |
747 | { | |
748 | + pte_t pte = mk_pte(page, kmap_prot); | |
749 | unsigned int idx; | |
750 | unsigned long vaddr; | |
751 | void *kmap; | |
752 | int type; | |
753 | ||
754 | - preempt_disable(); | |
755 | + preempt_disable_nort(); | |
756 | pagefault_disable(); | |
757 | if (!PageHighMem(page)) | |
758 | return page_address(page); | |
e4b2b4a8 | 759 | @@ -79,7 +85,7 @@ |
1a6e0f06 JK |
760 | |
761 | type = kmap_atomic_idx_push(); | |
762 | ||
763 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
764 | + idx = fixmap_idx(type); | |
765 | vaddr = __fix_to_virt(idx); | |
766 | #ifdef CONFIG_DEBUG_HIGHMEM | |
767 | /* | |
e4b2b4a8 | 768 | @@ -93,7 +99,10 @@ |
1a6e0f06 JK |
769 | * in place, so the contained TLB flush ensures the TLB is updated |
770 | * with the new mapping. | |
771 | */ | |
772 | - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); | |
773 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
774 | + current->kmap_pte[type] = pte; | |
775 | +#endif | |
776 | + set_fixmap_pte(idx, pte); | |
777 | ||
778 | return (void *)vaddr; | |
779 | } | |
e4b2b4a8 | 780 | @@ -106,44 +115,75 @@ |
1a6e0f06 JK |
781 | |
782 | if (kvaddr >= (void *)FIXADDR_START) { | |
783 | type = kmap_atomic_idx(); | |
784 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
785 | + idx = fixmap_idx(type); | |
786 | ||
787 | if (cache_is_vivt()) | |
788 | __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); | |
789 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
790 | + current->kmap_pte[type] = __pte(0); | |
791 | +#endif | |
792 | #ifdef CONFIG_DEBUG_HIGHMEM | |
793 | BUG_ON(vaddr != __fix_to_virt(idx)); | |
794 | - set_fixmap_pte(idx, __pte(0)); | |
795 | #else | |
796 | (void) idx; /* to kill a warning */ | |
797 | #endif | |
798 | + set_fixmap_pte(idx, __pte(0)); | |
799 | kmap_atomic_idx_pop(); | |
800 | } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { | |
801 | /* this address was obtained through kmap_high_get() */ | |
802 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); | |
803 | } | |
804 | pagefault_enable(); | |
805 | - preempt_enable(); | |
806 | + preempt_enable_nort(); | |
807 | } | |
808 | EXPORT_SYMBOL(__kunmap_atomic); | |
809 | ||
810 | void *kmap_atomic_pfn(unsigned long pfn) | |
811 | { | |
812 | + pte_t pte = pfn_pte(pfn, kmap_prot); | |
813 | unsigned long vaddr; | |
814 | int idx, type; | |
815 | struct page *page = pfn_to_page(pfn); | |
816 | ||
817 | - preempt_disable(); | |
818 | + preempt_disable_nort(); | |
819 | pagefault_disable(); | |
820 | if (!PageHighMem(page)) | |
821 | return page_address(page); | |
822 | ||
823 | type = kmap_atomic_idx_push(); | |
824 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
825 | + idx = fixmap_idx(type); | |
826 | vaddr = __fix_to_virt(idx); | |
827 | #ifdef CONFIG_DEBUG_HIGHMEM | |
828 | BUG_ON(!pte_none(get_fixmap_pte(vaddr))); | |
829 | #endif | |
830 | - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); | |
831 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
832 | + current->kmap_pte[type] = pte; | |
833 | +#endif | |
834 | + set_fixmap_pte(idx, pte); | |
835 | ||
836 | return (void *)vaddr; | |
837 | } | |
838 | +#if defined CONFIG_PREEMPT_RT_FULL | |
839 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
840 | +{ | |
841 | + int i; | |
842 | + | |
843 | + /* | |
844 | + * Clear @prev's kmap_atomic mappings | |
845 | + */ | |
846 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
847 | + int idx = fixmap_idx(i); | |
848 | + | |
849 | + set_fixmap_pte(idx, __pte(0)); | |
850 | + } | |
851 | + /* | |
852 | + * Restore @next_p's kmap_atomic mappings | |
853 | + */ | |
854 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
855 | + int idx = fixmap_idx(i); | |
856 | + | |
857 | + if (!pte_none(next_p->kmap_pte[i])) | |
858 | + set_fixmap_pte(idx, next_p->kmap_pte[i]); | |
859 | + } | |
860 | +} | |
861 | +#endif | |
e4b2b4a8 JK |
862 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm/plat-versatile/platsmp.c linux-4.14/arch/arm/plat-versatile/platsmp.c |
863 | --- linux-4.14.orig/arch/arm/plat-versatile/platsmp.c 2017-11-12 19:46:13.000000000 +0100 | |
864 | +++ linux-4.14/arch/arm/plat-versatile/platsmp.c 2018-09-05 11:05:07.000000000 +0200 | |
865 | @@ -32,7 +32,7 @@ | |
1a6e0f06 JK |
866 | sync_cache_w(&pen_release); |
867 | } | |
868 | ||
869 | -static DEFINE_SPINLOCK(boot_lock); | |
870 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
871 | ||
872 | void versatile_secondary_init(unsigned int cpu) | |
873 | { | |
e4b2b4a8 | 874 | @@ -45,8 +45,8 @@ |
1a6e0f06 JK |
875 | /* |
876 | * Synchronise with the boot thread. | |
877 | */ | |
878 | - spin_lock(&boot_lock); | |
879 | - spin_unlock(&boot_lock); | |
880 | + raw_spin_lock(&boot_lock); | |
881 | + raw_spin_unlock(&boot_lock); | |
882 | } | |
883 | ||
884 | int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
e4b2b4a8 | 885 | @@ -57,7 +57,7 @@ |
1a6e0f06 JK |
886 | * Set synchronisation state between this boot processor |
887 | * and the secondary one | |
888 | */ | |
889 | - spin_lock(&boot_lock); | |
890 | + raw_spin_lock(&boot_lock); | |
891 | ||
892 | /* | |
893 | * This is really belt and braces; we hold unintended secondary | |
e4b2b4a8 | 894 | @@ -87,7 +87,7 @@ |
1a6e0f06 JK |
895 | * now the secondary core is starting up let it run its |
896 | * calibrations, then wait for it to finish | |
897 | */ | |
898 | - spin_unlock(&boot_lock); | |
899 | + raw_spin_unlock(&boot_lock); | |
900 | ||
901 | return pen_release != -1 ? -ENOSYS : 0; | |
902 | } | |
e4b2b4a8 JK |
903 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/crypto/crc32-ce-glue.c linux-4.14/arch/arm64/crypto/crc32-ce-glue.c |
904 | --- linux-4.14.orig/arch/arm64/crypto/crc32-ce-glue.c 2018-09-05 11:03:20.000000000 +0200 | |
905 | +++ linux-4.14/arch/arm64/crypto/crc32-ce-glue.c 2018-09-05 11:05:07.000000000 +0200 | |
906 | @@ -208,7 +208,8 @@ | |
907 | ||
908 | static int __init crc32_pmull_mod_init(void) | |
909 | { | |
910 | - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) { | |
911 | + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && | |
912 | + !IS_ENABLED(CONFIG_PREEMPT_RT_BASE) && (elf_hwcap & HWCAP_PMULL)) { | |
913 | crc32_pmull_algs[0].update = crc32_pmull_update; | |
914 | crc32_pmull_algs[1].update = crc32c_pmull_update; | |
915 | ||
916 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/crypto/Kconfig linux-4.14/arch/arm64/crypto/Kconfig | |
917 | --- linux-4.14.orig/arch/arm64/crypto/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
918 | +++ linux-4.14/arch/arm64/crypto/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
919 | @@ -19,19 +19,19 @@ | |
920 | ||
921 | config CRYPTO_SHA1_ARM64_CE | |
922 | tristate "SHA-1 digest algorithm (ARMv8 Crypto Extensions)" | |
923 | - depends on KERNEL_MODE_NEON | |
924 | + depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
925 | select CRYPTO_HASH | |
926 | select CRYPTO_SHA1 | |
927 | ||
928 | config CRYPTO_SHA2_ARM64_CE | |
929 | tristate "SHA-224/SHA-256 digest algorithm (ARMv8 Crypto Extensions)" | |
930 | - depends on KERNEL_MODE_NEON | |
931 | + depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
932 | select CRYPTO_HASH | |
933 | select CRYPTO_SHA256_ARM64 | |
934 | ||
935 | config CRYPTO_GHASH_ARM64_CE | |
936 | tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" | |
937 | - depends on KERNEL_MODE_NEON | |
938 | + depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
939 | select CRYPTO_HASH | |
940 | select CRYPTO_GF128MUL | |
941 | select CRYPTO_AES | |
942 | @@ -39,7 +39,7 @@ | |
943 | ||
944 | config CRYPTO_CRCT10DIF_ARM64_CE | |
945 | tristate "CRCT10DIF digest algorithm using PMULL instructions" | |
946 | - depends on KERNEL_MODE_NEON && CRC_T10DIF | |
947 | + depends on KERNEL_MODE_NEON && CRC_T10DIF && !PREEMPT_RT_BASE | |
948 | select CRYPTO_HASH | |
949 | ||
950 | config CRYPTO_CRC32_ARM64_CE | |
951 | @@ -53,13 +53,13 @@ | |
952 | ||
953 | config CRYPTO_AES_ARM64_CE | |
954 | tristate "AES core cipher using ARMv8 Crypto Extensions" | |
955 | - depends on ARM64 && KERNEL_MODE_NEON | |
956 | + depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
957 | select CRYPTO_ALGAPI | |
958 | select CRYPTO_AES_ARM64 | |
959 | ||
960 | config CRYPTO_AES_ARM64_CE_CCM | |
961 | tristate "AES in CCM mode using ARMv8 Crypto Extensions" | |
962 | - depends on ARM64 && KERNEL_MODE_NEON | |
963 | + depends on ARM64 && KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
964 | select CRYPTO_ALGAPI | |
965 | select CRYPTO_AES_ARM64_CE | |
966 | select CRYPTO_AES_ARM64 | |
967 | @@ -67,7 +67,7 @@ | |
968 | ||
969 | config CRYPTO_AES_ARM64_CE_BLK | |
970 | tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions" | |
971 | - depends on KERNEL_MODE_NEON | |
972 | + depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
973 | select CRYPTO_BLKCIPHER | |
974 | select CRYPTO_AES_ARM64_CE | |
975 | select CRYPTO_AES_ARM64 | |
976 | @@ -75,7 +75,7 @@ | |
977 | ||
978 | config CRYPTO_AES_ARM64_NEON_BLK | |
979 | tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions" | |
980 | - depends on KERNEL_MODE_NEON | |
981 | + depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
982 | select CRYPTO_BLKCIPHER | |
983 | select CRYPTO_AES_ARM64 | |
984 | select CRYPTO_AES | |
985 | @@ -83,13 +83,13 @@ | |
986 | ||
987 | config CRYPTO_CHACHA20_NEON | |
988 | tristate "NEON accelerated ChaCha20 symmetric cipher" | |
989 | - depends on KERNEL_MODE_NEON | |
990 | + depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
991 | select CRYPTO_BLKCIPHER | |
992 | select CRYPTO_CHACHA20 | |
993 | ||
994 | config CRYPTO_AES_ARM64_BS | |
995 | tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm" | |
996 | - depends on KERNEL_MODE_NEON | |
997 | + depends on KERNEL_MODE_NEON && !PREEMPT_RT_BASE | |
998 | select CRYPTO_BLKCIPHER | |
999 | select CRYPTO_AES_ARM64_NEON_BLK | |
1000 | select CRYPTO_AES_ARM64 | |
1001 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/include/asm/spinlock_types.h linux-4.14/arch/arm64/include/asm/spinlock_types.h | |
1002 | --- linux-4.14.orig/arch/arm64/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1003 | +++ linux-4.14/arch/arm64/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1004 | @@ -16,10 +16,6 @@ | |
1005 | #ifndef __ASM_SPINLOCK_TYPES_H | |
1006 | #define __ASM_SPINLOCK_TYPES_H | |
1007 | ||
1008 | -#if !defined(__LINUX_SPINLOCK_TYPES_H) && !defined(__ASM_SPINLOCK_H) | |
1009 | -# error "please don't include this file directly" | |
1010 | -#endif | |
1011 | - | |
1012 | #include <linux/types.h> | |
1a6e0f06 | 1013 | |
e4b2b4a8 JK |
1014 | #define TICKET_SHIFT 16 |
1015 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/include/asm/thread_info.h linux-4.14/arch/arm64/include/asm/thread_info.h | |
1016 | --- linux-4.14.orig/arch/arm64/include/asm/thread_info.h 2018-09-05 11:03:20.000000000 +0200 | |
1017 | +++ linux-4.14/arch/arm64/include/asm/thread_info.h 2018-09-05 11:05:07.000000000 +0200 | |
1018 | @@ -43,6 +43,7 @@ | |
1019 | u64 ttbr0; /* saved TTBR0_EL1 */ | |
1020 | #endif | |
1a6e0f06 JK |
1021 | int preempt_count; /* 0 => preemptable, <0 => bug */ |
1022 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
1a6e0f06 JK |
1023 | }; |
1024 | ||
e4b2b4a8 JK |
1025 | #define INIT_THREAD_INFO(tsk) \ |
1026 | @@ -82,6 +83,7 @@ | |
1a6e0f06 | 1027 | #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ |
e4b2b4a8 JK |
1028 | #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ |
1029 | #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ | |
1030 | +#define TIF_NEED_RESCHED_LAZY 6 | |
1a6e0f06 JK |
1031 | #define TIF_NOHZ 7 |
1032 | #define TIF_SYSCALL_TRACE 8 | |
1033 | #define TIF_SYSCALL_AUDIT 9 | |
e4b2b4a8 | 1034 | @@ -98,6 +100,7 @@ |
1a6e0f06 JK |
1035 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) |
1036 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
1037 | #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) | |
1038 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
1039 | #define _TIF_NOHZ (1 << TIF_NOHZ) | |
1040 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
1041 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
e4b2b4a8 | 1042 | @@ -109,8 +112,9 @@ |
1a6e0f06 JK |
1043 | |
1044 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
e4b2b4a8 JK |
1045 | _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ |
1046 | - _TIF_UPROBE | _TIF_FSCHECK) | |
1047 | + _TIF_UPROBE | _TIF_FSCHECK | _TIF_NEED_RESCHED_LAZY) | |
1a6e0f06 | 1048 | |
e4b2b4a8 | 1049 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) |
1a6e0f06 JK |
1050 | #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ |
1051 | _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ | |
e4b2b4a8 JK |
1052 | _TIF_NOHZ) |
1053 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/Kconfig linux-4.14/arch/arm64/Kconfig | |
1054 | --- linux-4.14.orig/arch/arm64/Kconfig 2018-09-05 11:03:20.000000000 +0200 | |
1055 | +++ linux-4.14/arch/arm64/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
1056 | @@ -103,6 +103,7 @@ | |
1057 | select HAVE_PERF_EVENTS | |
1058 | select HAVE_PERF_REGS | |
1059 | select HAVE_PERF_USER_STACK_DUMP | |
1060 | + select HAVE_PREEMPT_LAZY | |
1061 | select HAVE_REGS_AND_STACK_ACCESS_API | |
1062 | select HAVE_RCU_TABLE_FREE | |
1063 | select HAVE_SYSCALL_TRACEPOINTS | |
1064 | @@ -791,7 +792,7 @@ | |
1065 | ||
1066 | config XEN | |
1067 | bool "Xen guest support on ARM64" | |
1068 | - depends on ARM64 && OF | |
1069 | + depends on ARM64 && OF && !PREEMPT_RT_FULL | |
1070 | select SWIOTLB_XEN | |
1071 | select PARAVIRT | |
1072 | help | |
1073 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/kernel/asm-offsets.c linux-4.14/arch/arm64/kernel/asm-offsets.c | |
1074 | --- linux-4.14.orig/arch/arm64/kernel/asm-offsets.c 2018-09-05 11:03:20.000000000 +0200 | |
1075 | +++ linux-4.14/arch/arm64/kernel/asm-offsets.c 2018-09-05 11:05:07.000000000 +0200 | |
1076 | @@ -39,6 +39,7 @@ | |
1a6e0f06 | 1077 | BLANK(); |
e4b2b4a8 JK |
1078 | DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); |
1079 | DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); | |
1080 | + DEFINE(TSK_TI_PREEMPT_LAZY, offsetof(struct task_struct, thread_info.preempt_lazy_count)); | |
1081 | DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); | |
1082 | #ifdef CONFIG_ARM64_SW_TTBR0_PAN | |
1083 | DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); | |
1084 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/kernel/entry.S linux-4.14/arch/arm64/kernel/entry.S | |
1085 | --- linux-4.14.orig/arch/arm64/kernel/entry.S 2018-09-05 11:03:20.000000000 +0200 | |
1086 | +++ linux-4.14/arch/arm64/kernel/entry.S 2018-09-05 11:05:07.000000000 +0200 | |
1087 | @@ -637,11 +637,16 @@ | |
1a6e0f06 JK |
1088 | |
1089 | #ifdef CONFIG_PREEMPT | |
e4b2b4a8 | 1090 | ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count |
1a6e0f06 JK |
1091 | - cbnz w24, 1f // preempt count != 0 |
1092 | + cbnz w24, 2f // preempt count != 0 | |
e4b2b4a8 | 1093 | ldr x0, [tsk, #TSK_TI_FLAGS] // get flags |
1a6e0f06 JK |
1094 | - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? |
1095 | - bl el1_preempt | |
1096 | + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1097 | + | |
e4b2b4a8 | 1098 | + ldr w24, [tsk, #TSK_TI_PREEMPT_LAZY] // get preempt lazy count |
1a6e0f06 JK |
1099 | + cbnz w24, 2f // preempt lazy count != 0 |
1100 | + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? | |
1101 | 1: | |
1102 | + bl el1_preempt | |
1103 | +2: | |
1104 | #endif | |
1105 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1106 | bl trace_hardirqs_on | |
e4b2b4a8 | 1107 | @@ -655,6 +660,7 @@ |
1a6e0f06 | 1108 | 1: bl preempt_schedule_irq // irq en/disable is done inside |
e4b2b4a8 | 1109 | ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS |
1a6e0f06 JK |
1110 | tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? |
1111 | + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? | |
1112 | ret x24 | |
1113 | #endif | |
1114 | ||
e4b2b4a8 JK |
1115 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/arm64/kernel/signal.c linux-4.14/arch/arm64/kernel/signal.c |
1116 | --- linux-4.14.orig/arch/arm64/kernel/signal.c 2018-09-05 11:03:20.000000000 +0200 | |
1117 | +++ linux-4.14/arch/arm64/kernel/signal.c 2018-09-05 11:05:07.000000000 +0200 | |
1118 | @@ -756,7 +756,7 @@ | |
1119 | /* Check valid user FS if needed */ | |
1120 | addr_limit_user_check(); | |
1121 | ||
c7c16703 JK |
1122 | - if (thread_flags & _TIF_NEED_RESCHED) { |
1123 | + if (thread_flags & _TIF_NEED_RESCHED_MASK) { | |
1124 | schedule(); | |
1125 | } else { | |
1126 | local_irq_enable(); | |
e4b2b4a8 JK |
1127 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/blackfin/include/asm/spinlock_types.h linux-4.14/arch/blackfin/include/asm/spinlock_types.h |
1128 | --- linux-4.14.orig/arch/blackfin/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1129 | +++ linux-4.14/arch/blackfin/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1130 | @@ -7,10 +7,6 @@ | |
1131 | #ifndef __ASM_SPINLOCK_TYPES_H | |
1132 | #define __ASM_SPINLOCK_TYPES_H | |
1133 | ||
1134 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1135 | -# error "please don't include this file directly" | |
1136 | -#endif | |
1137 | - | |
1138 | #include <asm/rwlock.h> | |
1139 | ||
1140 | typedef struct { | |
1141 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/hexagon/include/asm/spinlock_types.h linux-4.14/arch/hexagon/include/asm/spinlock_types.h | |
1142 | --- linux-4.14.orig/arch/hexagon/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1143 | +++ linux-4.14/arch/hexagon/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1144 | @@ -21,10 +21,6 @@ | |
1145 | #ifndef _ASM_SPINLOCK_TYPES_H | |
1146 | #define _ASM_SPINLOCK_TYPES_H | |
1147 | ||
1148 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1149 | -# error "please don't include this file directly" | |
1150 | -#endif | |
1151 | - | |
1152 | typedef struct { | |
1153 | volatile unsigned int lock; | |
1154 | } arch_spinlock_t; | |
1155 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/ia64/include/asm/spinlock_types.h linux-4.14/arch/ia64/include/asm/spinlock_types.h | |
1156 | --- linux-4.14.orig/arch/ia64/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1157 | +++ linux-4.14/arch/ia64/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1158 | @@ -2,10 +2,6 @@ | |
1159 | #ifndef _ASM_IA64_SPINLOCK_TYPES_H | |
1160 | #define _ASM_IA64_SPINLOCK_TYPES_H | |
1161 | ||
1162 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1163 | -# error "please don't include this file directly" | |
1164 | -#endif | |
1165 | - | |
1166 | typedef struct { | |
1167 | volatile unsigned int lock; | |
1168 | } arch_spinlock_t; | |
1169 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/ia64/kernel/mca.c linux-4.14/arch/ia64/kernel/mca.c | |
1170 | --- linux-4.14.orig/arch/ia64/kernel/mca.c 2017-11-12 19:46:13.000000000 +0100 | |
1171 | +++ linux-4.14/arch/ia64/kernel/mca.c 2018-09-05 11:05:07.000000000 +0200 | |
1172 | @@ -1824,7 +1824,7 @@ | |
1173 | ti->cpu = cpu; | |
1174 | p->stack = ti; | |
1175 | p->state = TASK_UNINTERRUPTIBLE; | |
1176 | - cpumask_set_cpu(cpu, &p->cpus_allowed); | |
1177 | + cpumask_set_cpu(cpu, &p->cpus_mask); | |
1178 | INIT_LIST_HEAD(&p->tasks); | |
1179 | p->parent = p->real_parent = p->group_leader = p; | |
1180 | INIT_LIST_HEAD(&p->children); | |
1181 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/Kconfig linux-4.14/arch/Kconfig | |
1182 | --- linux-4.14.orig/arch/Kconfig 2018-09-05 11:03:20.000000000 +0200 | |
1183 | +++ linux-4.14/arch/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
1184 | @@ -20,6 +20,7 @@ | |
1185 | tristate "OProfile system profiling" | |
1186 | depends on PROFILING | |
1187 | depends on HAVE_OPROFILE | |
1188 | + depends on !PREEMPT_RT_FULL | |
1189 | select RING_BUFFER | |
1190 | select RING_BUFFER_ALLOW_SWAP | |
1191 | help | |
1192 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/m32r/include/asm/spinlock_types.h linux-4.14/arch/m32r/include/asm/spinlock_types.h | |
1193 | --- linux-4.14.orig/arch/m32r/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1194 | +++ linux-4.14/arch/m32r/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1195 | @@ -2,10 +2,6 @@ | |
1196 | #ifndef _ASM_M32R_SPINLOCK_TYPES_H | |
1197 | #define _ASM_M32R_SPINLOCK_TYPES_H | |
1198 | ||
1199 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1200 | -# error "please don't include this file directly" | |
1201 | -#endif | |
1202 | - | |
1203 | typedef struct { | |
1204 | volatile int slock; | |
1205 | } arch_spinlock_t; | |
1206 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/metag/include/asm/spinlock_types.h linux-4.14/arch/metag/include/asm/spinlock_types.h | |
1207 | --- linux-4.14.orig/arch/metag/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1208 | +++ linux-4.14/arch/metag/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1209 | @@ -2,10 +2,6 @@ | |
1210 | #ifndef _ASM_METAG_SPINLOCK_TYPES_H | |
1211 | #define _ASM_METAG_SPINLOCK_TYPES_H | |
1212 | ||
1213 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1214 | -# error "please don't include this file directly" | |
1215 | -#endif | |
1216 | - | |
1217 | typedef struct { | |
1218 | volatile unsigned int lock; | |
1219 | } arch_spinlock_t; | |
1220 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/mips/include/asm/switch_to.h linux-4.14/arch/mips/include/asm/switch_to.h | |
1221 | --- linux-4.14.orig/arch/mips/include/asm/switch_to.h 2017-11-12 19:46:13.000000000 +0100 | |
1222 | +++ linux-4.14/arch/mips/include/asm/switch_to.h 2018-09-05 11:05:07.000000000 +0200 | |
1223 | @@ -42,7 +42,7 @@ | |
1224 | * inline to try to keep the overhead down. If we have been forced to run on | |
1225 | * a "CPU" with an FPU because of a previous high level of FP computation, | |
1226 | * but did not actually use the FPU during the most recent time-slice (CU1 | |
1227 | - * isn't set), we undo the restriction on cpus_allowed. | |
1228 | + * isn't set), we undo the restriction on cpus_mask. | |
1229 | * | |
1230 | * We're not calling set_cpus_allowed() here, because we have no need to | |
1231 | * force prompt migration - we're already switching the current CPU to a | |
1232 | @@ -57,7 +57,7 @@ | |
1233 | test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \ | |
1234 | (!(KSTK_STATUS(prev) & ST0_CU1))) { \ | |
1235 | clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \ | |
1236 | - prev->cpus_allowed = prev->thread.user_cpus_allowed; \ | |
1237 | + prev->cpus_mask = prev->thread.user_cpus_allowed; \ | |
1238 | } \ | |
1239 | next->thread.emulated_fp = 0; \ | |
1240 | } while(0) | |
1241 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/mips/Kconfig linux-4.14/arch/mips/Kconfig | |
1242 | --- linux-4.14.orig/arch/mips/Kconfig 2018-09-05 11:03:20.000000000 +0200 | |
1243 | +++ linux-4.14/arch/mips/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
1244 | @@ -2519,7 +2519,7 @@ | |
1a6e0f06 JK |
1245 | # |
1246 | config HIGHMEM | |
1247 | bool "High Memory Support" | |
1248 | - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA | |
1249 | + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL | |
1250 | ||
1251 | config CPU_SUPPORTS_HIGHMEM | |
1252 | bool | |
e4b2b4a8 JK |
1253 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/mips/kernel/mips-mt-fpaff.c linux-4.14/arch/mips/kernel/mips-mt-fpaff.c |
1254 | --- linux-4.14.orig/arch/mips/kernel/mips-mt-fpaff.c 2017-11-12 19:46:13.000000000 +0100 | |
1255 | +++ linux-4.14/arch/mips/kernel/mips-mt-fpaff.c 2018-09-05 11:05:07.000000000 +0200 | |
1256 | @@ -177,7 +177,7 @@ | |
1257 | if (retval) | |
1258 | goto out_unlock; | |
1a6e0f06 | 1259 | |
e4b2b4a8 JK |
1260 | - cpumask_or(&allowed, &p->thread.user_cpus_allowed, &p->cpus_allowed); |
1261 | + cpumask_or(&allowed, &p->thread.user_cpus_allowed, p->cpus_ptr); | |
1262 | cpumask_and(&mask, &allowed, cpu_active_mask); | |
1a6e0f06 | 1263 | |
e4b2b4a8 JK |
1264 | out_unlock: |
1265 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/mips/kernel/traps.c linux-4.14/arch/mips/kernel/traps.c | |
1266 | --- linux-4.14.orig/arch/mips/kernel/traps.c 2018-09-05 11:03:20.000000000 +0200 | |
1267 | +++ linux-4.14/arch/mips/kernel/traps.c 2018-09-05 11:05:07.000000000 +0200 | |
1268 | @@ -1193,12 +1193,12 @@ | |
1269 | * restricted the allowed set to exclude any CPUs with FPUs, | |
1270 | * we'll skip the procedure. | |
1271 | */ | |
1272 | - if (cpumask_intersects(¤t->cpus_allowed, &mt_fpu_cpumask)) { | |
1273 | + if (cpumask_intersects(¤t->cpus_mask, &mt_fpu_cpumask)) { | |
1274 | cpumask_t tmask; | |
1275 | ||
1276 | current->thread.user_cpus_allowed | |
1277 | - = current->cpus_allowed; | |
1278 | - cpumask_and(&tmask, ¤t->cpus_allowed, | |
1279 | + = current->cpus_mask; | |
1280 | + cpumask_and(&tmask, ¤t->cpus_mask, | |
1281 | &mt_fpu_cpumask); | |
1282 | set_cpus_allowed_ptr(current, &tmask); | |
1283 | set_thread_flag(TIF_FPUBOUND); | |
1284 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/mn10300/include/asm/spinlock_types.h linux-4.14/arch/mn10300/include/asm/spinlock_types.h | |
1285 | --- linux-4.14.orig/arch/mn10300/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1286 | +++ linux-4.14/arch/mn10300/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1287 | @@ -2,10 +2,6 @@ | |
1288 | #ifndef _ASM_SPINLOCK_TYPES_H | |
1289 | #define _ASM_SPINLOCK_TYPES_H | |
1290 | ||
1291 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1292 | -# error "please don't include this file directly" | |
1293 | -#endif | |
1294 | - | |
1295 | typedef struct arch_spinlock { | |
1296 | unsigned int slock; | |
1297 | } arch_spinlock_t; | |
1298 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/include/asm/spinlock_types.h linux-4.14/arch/powerpc/include/asm/spinlock_types.h | |
1299 | --- linux-4.14.orig/arch/powerpc/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1300 | +++ linux-4.14/arch/powerpc/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1301 | @@ -2,10 +2,6 @@ | |
1302 | #ifndef _ASM_POWERPC_SPINLOCK_TYPES_H | |
1303 | #define _ASM_POWERPC_SPINLOCK_TYPES_H | |
1304 | ||
1305 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1306 | -# error "please don't include this file directly" | |
1307 | -#endif | |
1308 | - | |
1309 | typedef struct { | |
1310 | volatile unsigned int slock; | |
1311 | } arch_spinlock_t; | |
1312 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/include/asm/thread_info.h linux-4.14/arch/powerpc/include/asm/thread_info.h | |
1313 | --- linux-4.14.orig/arch/powerpc/include/asm/thread_info.h 2017-11-12 19:46:13.000000000 +0100 | |
1314 | +++ linux-4.14/arch/powerpc/include/asm/thread_info.h 2018-09-05 11:05:07.000000000 +0200 | |
1315 | @@ -36,6 +36,8 @@ | |
1a6e0f06 JK |
1316 | int cpu; /* cpu we're on */ |
1317 | int preempt_count; /* 0 => preemptable, | |
1318 | <0 => BUG */ | |
1319 | + int preempt_lazy_count; /* 0 => preemptable, | |
1320 | + <0 => BUG */ | |
1321 | unsigned long local_flags; /* private flags for thread */ | |
1322 | #ifdef CONFIG_LIVEPATCH | |
1323 | unsigned long *livepatch_sp; | |
e4b2b4a8 | 1324 | @@ -81,8 +83,7 @@ |
1a6e0f06 JK |
1325 | #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ |
1326 | #define TIF_SIGPENDING 1 /* signal pending */ | |
1327 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | |
1328 | -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling | |
1329 | - TIF_NEED_RESCHED */ | |
1330 | +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ | |
1331 | #define TIF_32BIT 4 /* 32 bit binary */ | |
1332 | #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ | |
e4b2b4a8 JK |
1333 | #define TIF_PATCH_PENDING 6 /* pending live patching update */ |
1334 | @@ -101,6 +102,8 @@ | |
1a6e0f06 JK |
1335 | #if defined(CONFIG_PPC64) |
1336 | #define TIF_ELF2ABI 18 /* function descriptors must die! */ | |
1337 | #endif | |
1338 | +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling | |
1339 | + TIF_NEED_RESCHED */ | |
1340 | ||
1341 | /* as above, but as bit values */ | |
1342 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
e4b2b4a8 | 1343 | @@ -120,14 +123,16 @@ |
1a6e0f06 JK |
1344 | #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) |
1345 | #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) | |
1346 | #define _TIF_NOHZ (1<<TIF_NOHZ) | |
1347 | +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) | |
1348 | #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1349 | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ | |
1350 | _TIF_NOHZ) | |
1351 | ||
1352 | #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ | |
1353 | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
e4b2b4a8 JK |
1354 | - _TIF_RESTORE_TM | _TIF_PATCH_PENDING) |
1355 | + _TIF_RESTORE_TM | _TIF_PATCH_PENDING | _TIF_NEED_RESCHED_LAZY) | |
1a6e0f06 JK |
1356 | #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) |
1357 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
1358 | ||
1359 | /* Bits in local_flags */ | |
1360 | /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ | |
e4b2b4a8 JK |
1361 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/Kconfig linux-4.14/arch/powerpc/Kconfig |
1362 | --- linux-4.14.orig/arch/powerpc/Kconfig 2018-09-05 11:03:20.000000000 +0200 | |
1363 | +++ linux-4.14/arch/powerpc/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
1364 | @@ -111,10 +111,11 @@ | |
1365 | ||
1366 | config RWSEM_GENERIC_SPINLOCK | |
1367 | bool | |
1368 | + default y if PREEMPT_RT_FULL | |
1369 | ||
1370 | config RWSEM_XCHGADD_ALGORITHM | |
1371 | bool | |
1372 | - default y | |
1373 | + default y if !PREEMPT_RT_FULL | |
1374 | ||
1375 | config GENERIC_LOCKBREAK | |
1376 | bool | |
1377 | @@ -215,6 +216,7 @@ | |
1378 | select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH | |
1379 | select HAVE_PERF_REGS | |
1380 | select HAVE_PERF_USER_STACK_DUMP | |
1381 | + select HAVE_PREEMPT_LAZY | |
1382 | select HAVE_RCU_TABLE_FREE if SMP | |
1383 | select HAVE_REGS_AND_STACK_ACCESS_API | |
1384 | select HAVE_SYSCALL_TRACEPOINTS | |
1385 | @@ -390,7 +392,7 @@ | |
1386 | ||
1387 | config HIGHMEM | |
1388 | bool "High memory support" | |
1389 | - depends on PPC32 | |
1390 | + depends on PPC32 && !PREEMPT_RT_FULL | |
1391 | ||
1392 | source kernel/Kconfig.hz | |
1393 | source kernel/Kconfig.preempt | |
1394 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/kernel/asm-offsets.c linux-4.14/arch/powerpc/kernel/asm-offsets.c | |
1395 | --- linux-4.14.orig/arch/powerpc/kernel/asm-offsets.c 2018-09-05 11:03:20.000000000 +0200 | |
1396 | +++ linux-4.14/arch/powerpc/kernel/asm-offsets.c 2018-09-05 11:05:07.000000000 +0200 | |
1397 | @@ -156,6 +156,7 @@ | |
1398 | OFFSET(TI_FLAGS, thread_info, flags); | |
1399 | OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags); | |
1400 | OFFSET(TI_PREEMPT, thread_info, preempt_count); | |
1401 | + OFFSET(TI_PREEMPT_LAZY, thread_info, preempt_lazy_count); | |
1402 | OFFSET(TI_TASK, thread_info, task); | |
1403 | OFFSET(TI_CPU, thread_info, cpu); | |
1404 | ||
1405 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/kernel/entry_32.S linux-4.14/arch/powerpc/kernel/entry_32.S | |
1406 | --- linux-4.14.orig/arch/powerpc/kernel/entry_32.S 2017-11-12 19:46:13.000000000 +0100 | |
1407 | +++ linux-4.14/arch/powerpc/kernel/entry_32.S 2018-09-05 11:05:07.000000000 +0200 | |
1408 | @@ -866,7 +866,14 @@ | |
1a6e0f06 JK |
1409 | cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ |
1410 | bne restore | |
1411 | andi. r8,r8,_TIF_NEED_RESCHED | |
1412 | + bne+ 1f | |
1413 | + lwz r0,TI_PREEMPT_LAZY(r9) | |
1414 | + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1415 | + bne restore | |
1416 | + lwz r0,TI_FLAGS(r9) | |
1417 | + andi. r0,r0,_TIF_NEED_RESCHED_LAZY | |
1418 | beq+ restore | |
1419 | +1: | |
1420 | lwz r3,_MSR(r1) | |
1421 | andi. r0,r3,MSR_EE /* interrupts off? */ | |
1422 | beq restore /* don't schedule if so */ | |
e4b2b4a8 | 1423 | @@ -877,11 +884,11 @@ |
1a6e0f06 JK |
1424 | */ |
1425 | bl trace_hardirqs_off | |
1426 | #endif | |
1427 | -1: bl preempt_schedule_irq | |
1428 | +2: bl preempt_schedule_irq | |
1429 | CURRENT_THREAD_INFO(r9, r1) | |
1430 | lwz r3,TI_FLAGS(r9) | |
1431 | - andi. r0,r3,_TIF_NEED_RESCHED | |
1432 | - bne- 1b | |
1433 | + andi. r0,r3,_TIF_NEED_RESCHED_MASK | |
1434 | + bne- 2b | |
1435 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1436 | /* And now, to properly rebalance the above, we tell lockdep they | |
1437 | * are being turned back on, which will happen when we return | |
e4b2b4a8 | 1438 | @@ -1204,7 +1211,7 @@ |
1a6e0f06 JK |
1439 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ |
1440 | ||
1441 | do_work: /* r10 contains MSR_KERNEL here */ | |
1442 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1443 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1444 | beq do_user_signal | |
1445 | ||
1446 | do_resched: /* r10 contains MSR_KERNEL here */ | |
e4b2b4a8 | 1447 | @@ -1225,7 +1232,7 @@ |
1a6e0f06 JK |
1448 | MTMSRD(r10) /* disable interrupts */ |
1449 | CURRENT_THREAD_INFO(r9, r1) | |
1450 | lwz r9,TI_FLAGS(r9) | |
1451 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1452 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1453 | bne- do_resched | |
1454 | andi. r0,r9,_TIF_USER_WORK_MASK | |
1455 | beq restore_user | |
e4b2b4a8 JK |
1456 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/kernel/entry_64.S linux-4.14/arch/powerpc/kernel/entry_64.S |
1457 | --- linux-4.14.orig/arch/powerpc/kernel/entry_64.S 2018-09-05 11:03:20.000000000 +0200 | |
1458 | +++ linux-4.14/arch/powerpc/kernel/entry_64.S 2018-09-05 11:05:07.000000000 +0200 | |
1459 | @@ -690,7 +690,7 @@ | |
1a6e0f06 JK |
1460 | bl restore_math |
1461 | b restore | |
1462 | #endif | |
1463 | -1: andi. r0,r4,_TIF_NEED_RESCHED | |
1464 | +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1465 | beq 2f | |
1466 | bl restore_interrupts | |
1467 | SCHEDULE_USER | |
e4b2b4a8 | 1468 | @@ -752,10 +752,18 @@ |
1a6e0f06 JK |
1469 | |
1470 | #ifdef CONFIG_PREEMPT | |
1471 | /* Check if we need to preempt */ | |
e4b2b4a8 | 1472 | + lwz r8,TI_PREEMPT(r9) |
1a6e0f06 JK |
1473 | + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ |
1474 | + bne restore | |
e4b2b4a8 | 1475 | andi. r0,r4,_TIF_NEED_RESCHED |
1a6e0f06 JK |
1476 | + bne+ check_count |
1477 | + | |
1478 | + andi. r0,r4,_TIF_NEED_RESCHED_LAZY | |
e4b2b4a8 | 1479 | beq+ restore |
1a6e0f06 JK |
1480 | + lwz r8,TI_PREEMPT_LAZY(r9) |
1481 | + | |
e4b2b4a8 JK |
1482 | /* Check that preempt_count() == 0 and interrupts are enabled */ |
1483 | - lwz r8,TI_PREEMPT(r9) | |
1a6e0f06 JK |
1484 | +check_count: |
1485 | cmpwi cr1,r8,0 | |
1486 | ld r0,SOFTE(r1) | |
1487 | cmpdi r0,0 | |
e4b2b4a8 | 1488 | @@ -772,7 +780,7 @@ |
1a6e0f06 JK |
1489 | /* Re-test flags and eventually loop */ |
1490 | CURRENT_THREAD_INFO(r9, r1) | |
1491 | ld r4,TI_FLAGS(r9) | |
1492 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1493 | + andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1494 | bne 1b | |
1495 | ||
1496 | /* | |
e4b2b4a8 JK |
1497 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/kernel/irq.c linux-4.14/arch/powerpc/kernel/irq.c |
1498 | --- linux-4.14.orig/arch/powerpc/kernel/irq.c 2018-09-05 11:03:20.000000000 +0200 | |
1499 | +++ linux-4.14/arch/powerpc/kernel/irq.c 2018-09-05 11:05:07.000000000 +0200 | |
1500 | @@ -693,6 +693,7 @@ | |
1a6e0f06 JK |
1501 | } |
1502 | } | |
1503 | ||
1504 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1505 | void do_softirq_own_stack(void) | |
1506 | { | |
1507 | struct thread_info *curtp, *irqtp; | |
e4b2b4a8 | 1508 | @@ -710,6 +711,7 @@ |
1a6e0f06 JK |
1509 | if (irqtp->flags) |
1510 | set_bits(irqtp->flags, &curtp->flags); | |
1511 | } | |
1512 | +#endif | |
1513 | ||
1514 | irq_hw_number_t virq_to_hw(unsigned int virq) | |
1515 | { | |
e4b2b4a8 JK |
1516 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/kernel/misc_32.S linux-4.14/arch/powerpc/kernel/misc_32.S |
1517 | --- linux-4.14.orig/arch/powerpc/kernel/misc_32.S 2017-11-12 19:46:13.000000000 +0100 | |
1518 | +++ linux-4.14/arch/powerpc/kernel/misc_32.S 2018-09-05 11:05:07.000000000 +0200 | |
c7c16703 | 1519 | @@ -41,6 +41,7 @@ |
1a6e0f06 JK |
1520 | * We store the saved ksp_limit in the unused part |
1521 | * of the STACK_FRAME_OVERHEAD | |
1522 | */ | |
1523 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1524 | _GLOBAL(call_do_softirq) | |
1525 | mflr r0 | |
1526 | stw r0,4(r1) | |
e4b2b4a8 | 1527 | @@ -57,6 +58,7 @@ |
1a6e0f06 JK |
1528 | stw r10,THREAD+KSP_LIMIT(r2) |
1529 | mtlr r0 | |
1530 | blr | |
1531 | +#endif | |
1532 | ||
1533 | /* | |
1534 | * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); | |
e4b2b4a8 JK |
1535 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/kernel/misc_64.S linux-4.14/arch/powerpc/kernel/misc_64.S |
1536 | --- linux-4.14.orig/arch/powerpc/kernel/misc_64.S 2018-09-05 11:03:20.000000000 +0200 | |
1537 | +++ linux-4.14/arch/powerpc/kernel/misc_64.S 2018-09-05 11:05:07.000000000 +0200 | |
c7c16703 | 1538 | @@ -31,6 +31,7 @@ |
1a6e0f06 JK |
1539 | |
1540 | .text | |
1541 | ||
1542 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1543 | _GLOBAL(call_do_softirq) | |
1544 | mflr r0 | |
1545 | std r0,16(r1) | |
e4b2b4a8 | 1546 | @@ -41,6 +42,7 @@ |
1a6e0f06 JK |
1547 | ld r0,16(r1) |
1548 | mtlr r0 | |
1549 | blr | |
1550 | +#endif | |
1551 | ||
1552 | _GLOBAL(call_do_irq) | |
1553 | mflr r0 | |
e4b2b4a8 JK |
1554 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/kvm/Kconfig linux-4.14/arch/powerpc/kvm/Kconfig |
1555 | --- linux-4.14.orig/arch/powerpc/kvm/Kconfig 2018-09-05 11:03:20.000000000 +0200 | |
1556 | +++ linux-4.14/arch/powerpc/kvm/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
1557 | @@ -177,6 +177,7 @@ | |
1a6e0f06 JK |
1558 | config KVM_MPIC |
1559 | bool "KVM in-kernel MPIC emulation" | |
1560 | depends on KVM && E500 | |
1561 | + depends on !PREEMPT_RT_FULL | |
1562 | select HAVE_KVM_IRQCHIP | |
1563 | select HAVE_KVM_IRQFD | |
1564 | select HAVE_KVM_IRQ_ROUTING | |
e4b2b4a8 JK |
1565 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/platforms/cell/spufs/sched.c linux-4.14/arch/powerpc/platforms/cell/spufs/sched.c |
1566 | --- linux-4.14.orig/arch/powerpc/platforms/cell/spufs/sched.c 2017-11-12 19:46:13.000000000 +0100 | |
1567 | +++ linux-4.14/arch/powerpc/platforms/cell/spufs/sched.c 2018-09-05 11:05:07.000000000 +0200 | |
1568 | @@ -141,7 +141,7 @@ | |
1569 | * runqueue. The context will be rescheduled on the proper node | |
1570 | * if it is timesliced or preempted. | |
1571 | */ | |
1572 | - cpumask_copy(&ctx->cpus_allowed, ¤t->cpus_allowed); | |
1573 | + cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr); | |
1574 | ||
1575 | /* Save the current cpu id for spu interrupt routing. */ | |
1576 | ctx->last_ran = raw_smp_processor_id(); | |
1577 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/powerpc/platforms/ps3/device-init.c linux-4.14/arch/powerpc/platforms/ps3/device-init.c | |
1578 | --- linux-4.14.orig/arch/powerpc/platforms/ps3/device-init.c 2017-11-12 19:46:13.000000000 +0100 | |
1579 | +++ linux-4.14/arch/powerpc/platforms/ps3/device-init.c 2018-09-05 11:05:07.000000000 +0200 | |
1580 | @@ -752,7 +752,7 @@ | |
1a6e0f06 JK |
1581 | } |
1582 | pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); | |
1583 | ||
1584 | - res = wait_event_interruptible(dev->done.wait, | |
1585 | + res = swait_event_interruptible(dev->done.wait, | |
1586 | dev->done.done || kthread_should_stop()); | |
1587 | if (kthread_should_stop()) | |
1588 | res = -EINTR; | |
e4b2b4a8 JK |
1589 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/s390/include/asm/spinlock_types.h linux-4.14/arch/s390/include/asm/spinlock_types.h |
1590 | --- linux-4.14.orig/arch/s390/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1591 | +++ linux-4.14/arch/s390/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1592 | @@ -2,10 +2,6 @@ | |
1593 | #ifndef __ASM_SPINLOCK_TYPES_H | |
1594 | #define __ASM_SPINLOCK_TYPES_H | |
1595 | ||
1596 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1597 | -# error "please don't include this file directly" | |
1598 | -#endif | |
1599 | - | |
1600 | typedef struct { | |
1601 | int lock; | |
1602 | } __attribute__ ((aligned (4))) arch_spinlock_t; | |
1603 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/sh/include/asm/spinlock_types.h linux-4.14/arch/sh/include/asm/spinlock_types.h | |
1604 | --- linux-4.14.orig/arch/sh/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1605 | +++ linux-4.14/arch/sh/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1606 | @@ -2,10 +2,6 @@ | |
1607 | #ifndef __ASM_SH_SPINLOCK_TYPES_H | |
1608 | #define __ASM_SH_SPINLOCK_TYPES_H | |
1609 | ||
1610 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1611 | -# error "please don't include this file directly" | |
1612 | -#endif | |
1613 | - | |
1614 | typedef struct { | |
1615 | volatile unsigned int lock; | |
1616 | } arch_spinlock_t; | |
1617 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/sh/kernel/irq.c linux-4.14/arch/sh/kernel/irq.c | |
1618 | --- linux-4.14.orig/arch/sh/kernel/irq.c 2017-11-12 19:46:13.000000000 +0100 | |
1619 | +++ linux-4.14/arch/sh/kernel/irq.c 2018-09-05 11:05:07.000000000 +0200 | |
1620 | @@ -148,6 +148,7 @@ | |
1a6e0f06 JK |
1621 | hardirq_ctx[cpu] = NULL; |
1622 | } | |
1623 | ||
1624 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1625 | void do_softirq_own_stack(void) | |
1626 | { | |
1627 | struct thread_info *curctx; | |
e4b2b4a8 | 1628 | @@ -175,6 +176,7 @@ |
1a6e0f06 JK |
1629 | "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" |
1630 | ); | |
1631 | } | |
1632 | +#endif | |
1633 | #else | |
1634 | static inline void handle_one_irq(unsigned int irq) | |
1635 | { | |
e4b2b4a8 JK |
1636 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/sparc/Kconfig linux-4.14/arch/sparc/Kconfig |
1637 | --- linux-4.14.orig/arch/sparc/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
1638 | +++ linux-4.14/arch/sparc/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
1639 | @@ -206,12 +206,10 @@ | |
1a6e0f06 JK |
1640 | source kernel/Kconfig.hz |
1641 | ||
1642 | config RWSEM_GENERIC_SPINLOCK | |
1643 | - bool | |
1644 | - default y if SPARC32 | |
1645 | + def_bool PREEMPT_RT_FULL | |
1646 | ||
1647 | config RWSEM_XCHGADD_ALGORITHM | |
1648 | - bool | |
1649 | - default y if SPARC64 | |
1650 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1651 | ||
1652 | config GENERIC_HWEIGHT | |
1653 | bool | |
e4b2b4a8 JK |
1654 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/sparc/kernel/irq_64.c linux-4.14/arch/sparc/kernel/irq_64.c |
1655 | --- linux-4.14.orig/arch/sparc/kernel/irq_64.c 2017-11-12 19:46:13.000000000 +0100 | |
1656 | +++ linux-4.14/arch/sparc/kernel/irq_64.c 2018-09-05 11:05:07.000000000 +0200 | |
1657 | @@ -855,6 +855,7 @@ | |
1a6e0f06 JK |
1658 | set_irq_regs(old_regs); |
1659 | } | |
1660 | ||
1661 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1662 | void do_softirq_own_stack(void) | |
1663 | { | |
1664 | void *orig_sp, *sp = softirq_stack[smp_processor_id()]; | |
e4b2b4a8 | 1665 | @@ -869,6 +870,7 @@ |
1a6e0f06 JK |
1666 | __asm__ __volatile__("mov %0, %%sp" |
1667 | : : "r" (orig_sp)); | |
1668 | } | |
1669 | +#endif | |
1670 | ||
1671 | #ifdef CONFIG_HOTPLUG_CPU | |
1672 | void fixup_irqs(void) | |
e4b2b4a8 JK |
1673 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/tile/include/asm/setup.h linux-4.14/arch/tile/include/asm/setup.h |
1674 | --- linux-4.14.orig/arch/tile/include/asm/setup.h 2017-11-12 19:46:13.000000000 +0100 | |
1675 | +++ linux-4.14/arch/tile/include/asm/setup.h 2018-09-05 11:05:07.000000000 +0200 | |
1676 | @@ -49,7 +49,7 @@ | |
1677 | ||
1678 | /* Hook hardwall code into changes in affinity. */ | |
1679 | #define arch_set_cpus_allowed(p, new_mask) do { \ | |
1680 | - if (!cpumask_equal(&p->cpus_allowed, new_mask)) \ | |
1681 | + if (!cpumask_equal(p->cpus_ptr, new_mask)) \ | |
1682 | hardwall_deactivate_all(p); \ | |
1683 | } while (0) | |
1684 | #endif | |
1685 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/tile/include/asm/spinlock_types.h linux-4.14/arch/tile/include/asm/spinlock_types.h | |
1686 | --- linux-4.14.orig/arch/tile/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
1687 | +++ linux-4.14/arch/tile/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
1688 | @@ -15,10 +15,6 @@ | |
1689 | #ifndef _ASM_TILE_SPINLOCK_TYPES_H | |
1690 | #define _ASM_TILE_SPINLOCK_TYPES_H | |
1691 | ||
1692 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
1693 | -# error "please don't include this file directly" | |
1694 | -#endif | |
1695 | - | |
1696 | #ifdef __tilegx__ | |
1697 | ||
1698 | /* Low 15 bits are "next"; high 15 bits are "current". */ | |
1699 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/tile/kernel/hardwall.c linux-4.14/arch/tile/kernel/hardwall.c | |
1700 | --- linux-4.14.orig/arch/tile/kernel/hardwall.c 2017-11-12 19:46:13.000000000 +0100 | |
1701 | +++ linux-4.14/arch/tile/kernel/hardwall.c 2018-09-05 11:05:07.000000000 +0200 | |
1702 | @@ -590,12 +590,12 @@ | |
1703 | * Get our affinity; if we're not bound to this tile uniquely, | |
1704 | * we can't access the network registers. | |
1705 | */ | |
1706 | - if (cpumask_weight(&p->cpus_allowed) != 1) | |
1707 | + if (p->nr_cpus_allowed != 1) | |
1708 | return -EPERM; | |
1a6e0f06 | 1709 | |
e4b2b4a8 JK |
1710 | /* Make sure we are bound to a cpu assigned to this resource. */ |
1711 | cpu = smp_processor_id(); | |
1712 | - BUG_ON(cpumask_first(&p->cpus_allowed) != cpu); | |
1713 | + BUG_ON(cpumask_first(p->cpus_ptr) != cpu); | |
1714 | if (!cpumask_test_cpu(cpu, &info->cpumask)) | |
1715 | return -EINVAL; | |
1a6e0f06 | 1716 | |
e4b2b4a8 JK |
1717 | @@ -621,17 +621,17 @@ |
1718 | * Deactivate a task's hardwall. Must hold lock for hardwall_type. | |
1719 | * This method may be called from exit_thread(), so we don't want to | |
1720 | * rely on too many fields of struct task_struct still being valid. | |
1721 | - * We assume the cpus_allowed, pid, and comm fields are still valid. | |
1722 | + * We assume the nr_cpus_allowed, pid, and comm fields are still valid. | |
1723 | */ | |
1724 | static void _hardwall_deactivate(struct hardwall_type *hwt, | |
1725 | struct task_struct *task) | |
1726 | { | |
1727 | struct thread_struct *ts = &task->thread; | |
1728 | ||
1729 | - if (cpumask_weight(&task->cpus_allowed) != 1) { | |
1730 | + if (task->nr_cpus_allowed != 1) { | |
1731 | pr_err("pid %d (%s) releasing %s hardwall with an affinity mask containing %d cpus!\n", | |
1732 | task->pid, task->comm, hwt->name, | |
1733 | - cpumask_weight(&task->cpus_allowed)); | |
1734 | + task->nr_cpus_allowed); | |
1735 | BUG(); | |
1736 | } | |
1737 | ||
1738 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/aesni-intel_glue.c linux-4.14/arch/x86/crypto/aesni-intel_glue.c | |
1739 | --- linux-4.14.orig/arch/x86/crypto/aesni-intel_glue.c 2018-09-05 11:03:20.000000000 +0200 | |
1740 | +++ linux-4.14/arch/x86/crypto/aesni-intel_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
1741 | @@ -387,14 +387,14 @@ | |
1742 | ||
1743 | err = skcipher_walk_virt(&walk, req, true); | |
1a6e0f06 JK |
1744 | |
1745 | - kernel_fpu_begin(); | |
1746 | while ((nbytes = walk.nbytes)) { | |
1747 | + kernel_fpu_begin(); | |
1748 | aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
e4b2b4a8 | 1749 | nbytes & AES_BLOCK_MASK); |
1a6e0f06 JK |
1750 | + kernel_fpu_end(); |
1751 | nbytes &= AES_BLOCK_SIZE - 1; | |
e4b2b4a8 | 1752 | err = skcipher_walk_done(&walk, nbytes); |
1a6e0f06 JK |
1753 | } |
1754 | - kernel_fpu_end(); | |
1755 | ||
1756 | return err; | |
1757 | } | |
e4b2b4a8 JK |
1758 | @@ -409,14 +409,14 @@ |
1759 | ||
1760 | err = skcipher_walk_virt(&walk, req, true); | |
1a6e0f06 JK |
1761 | |
1762 | - kernel_fpu_begin(); | |
1763 | while ((nbytes = walk.nbytes)) { | |
1764 | + kernel_fpu_begin(); | |
1765 | aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1766 | nbytes & AES_BLOCK_MASK); | |
1767 | + kernel_fpu_end(); | |
1768 | nbytes &= AES_BLOCK_SIZE - 1; | |
e4b2b4a8 | 1769 | err = skcipher_walk_done(&walk, nbytes); |
1a6e0f06 JK |
1770 | } |
1771 | - kernel_fpu_end(); | |
1772 | ||
1773 | return err; | |
1774 | } | |
e4b2b4a8 JK |
1775 | @@ -431,14 +431,14 @@ |
1776 | ||
1777 | err = skcipher_walk_virt(&walk, req, true); | |
1a6e0f06 JK |
1778 | |
1779 | - kernel_fpu_begin(); | |
1780 | while ((nbytes = walk.nbytes)) { | |
1781 | + kernel_fpu_begin(); | |
1782 | aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1783 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1784 | + kernel_fpu_end(); | |
1785 | nbytes &= AES_BLOCK_SIZE - 1; | |
e4b2b4a8 | 1786 | err = skcipher_walk_done(&walk, nbytes); |
1a6e0f06 JK |
1787 | } |
1788 | - kernel_fpu_end(); | |
1789 | ||
1790 | return err; | |
1791 | } | |
e4b2b4a8 JK |
1792 | @@ -453,14 +453,14 @@ |
1793 | ||
1794 | err = skcipher_walk_virt(&walk, req, true); | |
1a6e0f06 JK |
1795 | |
1796 | - kernel_fpu_begin(); | |
1797 | while ((nbytes = walk.nbytes)) { | |
1798 | + kernel_fpu_begin(); | |
1799 | aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1800 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1801 | + kernel_fpu_end(); | |
1802 | nbytes &= AES_BLOCK_SIZE - 1; | |
e4b2b4a8 | 1803 | err = skcipher_walk_done(&walk, nbytes); |
1a6e0f06 JK |
1804 | } |
1805 | - kernel_fpu_end(); | |
1806 | ||
1807 | return err; | |
1808 | } | |
e4b2b4a8 JK |
1809 | @@ -510,18 +510,20 @@ |
1810 | ||
1811 | err = skcipher_walk_virt(&walk, req, true); | |
1a6e0f06 JK |
1812 | |
1813 | - kernel_fpu_begin(); | |
1814 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | |
1815 | + kernel_fpu_begin(); | |
1816 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1817 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1818 | + kernel_fpu_end(); | |
1819 | nbytes &= AES_BLOCK_SIZE - 1; | |
e4b2b4a8 | 1820 | err = skcipher_walk_done(&walk, nbytes); |
1a6e0f06 JK |
1821 | } |
1822 | if (walk.nbytes) { | |
1823 | + kernel_fpu_begin(); | |
1824 | ctr_crypt_final(ctx, &walk); | |
1825 | + kernel_fpu_end(); | |
e4b2b4a8 | 1826 | err = skcipher_walk_done(&walk, 0); |
1a6e0f06 JK |
1827 | } |
1828 | - kernel_fpu_end(); | |
1829 | ||
1830 | return err; | |
1831 | } | |
e4b2b4a8 JK |
1832 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/camellia_aesni_avx2_glue.c linux-4.14/arch/x86/crypto/camellia_aesni_avx2_glue.c |
1833 | --- linux-4.14.orig/arch/x86/crypto/camellia_aesni_avx2_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
1834 | +++ linux-4.14/arch/x86/crypto/camellia_aesni_avx2_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
1835 | @@ -206,6 +206,20 @@ | |
1836 | bool fpu_enabled; | |
1837 | }; | |
1838 | ||
1839 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1840 | +static void camellia_fpu_end_rt(struct crypt_priv *ctx) | |
1841 | +{ | |
1842 | + bool fpu_enabled = ctx->fpu_enabled; | |
1843 | + | |
1844 | + if (!fpu_enabled) | |
1845 | + return; | |
1846 | + camellia_fpu_end(fpu_enabled); | |
1847 | + ctx->fpu_enabled = false; | |
1848 | +} | |
1849 | +#else | |
1850 | +static void camellia_fpu_end_rt(struct crypt_priv *ctx) { } | |
1851 | +#endif | |
1852 | + | |
1853 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |
1854 | { | |
1855 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | |
1856 | @@ -221,16 +235,19 @@ | |
1857 | } | |
1858 | ||
1859 | if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { | |
1860 | + kernel_fpu_resched(); | |
1861 | camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst); | |
1862 | srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | |
1863 | nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | |
1864 | } | |
1865 | ||
1866 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | |
1867 | + kernel_fpu_resched(); | |
1868 | camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); | |
1869 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1870 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1871 | } | |
1872 | + camellia_fpu_end_rt(ctx); | |
1873 | ||
1874 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
1875 | camellia_enc_blk(ctx->ctx, srcdst, srcdst); | |
1876 | @@ -251,16 +268,19 @@ | |
1877 | } | |
1878 | ||
1879 | if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) { | |
1880 | + kernel_fpu_resched(); | |
1881 | camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst); | |
1882 | srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | |
1883 | nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS; | |
1884 | } | |
1885 | ||
1886 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | |
1887 | + kernel_fpu_resched(); | |
1888 | camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); | |
1889 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1890 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1891 | } | |
1892 | + camellia_fpu_end_rt(ctx); | |
1893 | ||
1894 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
1895 | camellia_dec_blk(ctx->ctx, srcdst, srcdst); | |
1896 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/camellia_aesni_avx_glue.c linux-4.14/arch/x86/crypto/camellia_aesni_avx_glue.c | |
1897 | --- linux-4.14.orig/arch/x86/crypto/camellia_aesni_avx_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
1898 | +++ linux-4.14/arch/x86/crypto/camellia_aesni_avx_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
1899 | @@ -210,6 +210,21 @@ | |
1900 | bool fpu_enabled; | |
1901 | }; | |
1902 | ||
1903 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1904 | +static void camellia_fpu_end_rt(struct crypt_priv *ctx) | |
1905 | +{ | |
1906 | + bool fpu_enabled = ctx->fpu_enabled; | |
1907 | + | |
1908 | + if (!fpu_enabled) | |
1909 | + return; | |
1910 | + camellia_fpu_end(fpu_enabled); | |
1911 | + ctx->fpu_enabled = false; | |
1912 | +} | |
1913 | + | |
1914 | +#else | |
1915 | +static void camellia_fpu_end_rt(struct crypt_priv *ctx) { } | |
1916 | +#endif | |
1917 | + | |
1918 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |
1919 | { | |
1920 | const unsigned int bsize = CAMELLIA_BLOCK_SIZE; | |
1921 | @@ -225,10 +240,12 @@ | |
1922 | } | |
1923 | ||
1924 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | |
1925 | + kernel_fpu_resched(); | |
1926 | camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst); | |
1927 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1928 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1929 | } | |
1930 | + camellia_fpu_end_rt(ctx); | |
1931 | ||
1932 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
1933 | camellia_enc_blk(ctx->ctx, srcdst, srcdst); | |
1934 | @@ -249,10 +266,12 @@ | |
1935 | } | |
1936 | ||
1937 | while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) { | |
1938 | + kernel_fpu_resched(); | |
1939 | camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst); | |
1940 | srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1941 | nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS; | |
1942 | } | |
1943 | + camellia_fpu_end_rt(ctx); | |
1944 | ||
1945 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
1946 | camellia_dec_blk(ctx->ctx, srcdst, srcdst); | |
1947 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/cast5_avx_glue.c linux-4.14/arch/x86/crypto/cast5_avx_glue.c | |
1948 | --- linux-4.14.orig/arch/x86/crypto/cast5_avx_glue.c 2018-09-05 11:03:20.000000000 +0200 | |
1949 | +++ linux-4.14/arch/x86/crypto/cast5_avx_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
1950 | @@ -59,7 +59,7 @@ | |
1951 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1952 | bool enc) | |
1953 | { | |
1954 | - bool fpu_enabled = false; | |
1a6e0f06 JK |
1955 | + bool fpu_enabled; |
1956 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1957 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
1958 | unsigned int nbytes; | |
e4b2b4a8 | 1959 | @@ -73,7 +73,7 @@ |
1a6e0f06 JK |
1960 | u8 *wsrc = walk->src.virt.addr; |
1961 | u8 *wdst = walk->dst.virt.addr; | |
1962 | ||
1963 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1964 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1965 | ||
1966 | /* Process multi-block batch */ | |
1967 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
e4b2b4a8 | 1968 | @@ -102,10 +102,9 @@ |
1a6e0f06 JK |
1969 | } while (nbytes >= bsize); |
1970 | ||
1971 | done: | |
1972 | + cast5_fpu_end(fpu_enabled); | |
1973 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1974 | } | |
1975 | - | |
1976 | - cast5_fpu_end(fpu_enabled); | |
1977 | return err; | |
1978 | } | |
1979 | ||
e4b2b4a8 | 1980 | @@ -226,7 +225,7 @@ |
1a6e0f06 JK |
1981 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
1982 | struct scatterlist *src, unsigned int nbytes) | |
1983 | { | |
1984 | - bool fpu_enabled = false; | |
1985 | + bool fpu_enabled; | |
1986 | struct blkcipher_walk walk; | |
1987 | int err; | |
1988 | ||
e4b2b4a8 | 1989 | @@ -235,12 +234,11 @@ |
1a6e0f06 JK |
1990 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; |
1991 | ||
1992 | while ((nbytes = walk.nbytes)) { | |
1993 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1994 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1995 | nbytes = __cbc_decrypt(desc, &walk); | |
1996 | + cast5_fpu_end(fpu_enabled); | |
1997 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1998 | } | |
1999 | - | |
2000 | - cast5_fpu_end(fpu_enabled); | |
2001 | return err; | |
2002 | } | |
2003 | ||
e4b2b4a8 | 2004 | @@ -309,7 +307,7 @@ |
1a6e0f06 JK |
2005 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, |
2006 | struct scatterlist *src, unsigned int nbytes) | |
2007 | { | |
2008 | - bool fpu_enabled = false; | |
2009 | + bool fpu_enabled; | |
2010 | struct blkcipher_walk walk; | |
2011 | int err; | |
2012 | ||
e4b2b4a8 | 2013 | @@ -318,13 +316,12 @@ |
1a6e0f06 JK |
2014 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; |
2015 | ||
2016 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
2017 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
2018 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
2019 | nbytes = __ctr_crypt(desc, &walk); | |
2020 | + cast5_fpu_end(fpu_enabled); | |
2021 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2022 | } | |
2023 | ||
2024 | - cast5_fpu_end(fpu_enabled); | |
2025 | - | |
2026 | if (walk.nbytes) { | |
2027 | ctr_crypt_final(desc, &walk); | |
2028 | err = blkcipher_walk_done(desc, &walk, 0); | |
e4b2b4a8 JK |
2029 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/cast6_avx_glue.c linux-4.14/arch/x86/crypto/cast6_avx_glue.c |
2030 | --- linux-4.14.orig/arch/x86/crypto/cast6_avx_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
2031 | +++ linux-4.14/arch/x86/crypto/cast6_avx_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
2032 | @@ -205,19 +205,33 @@ | |
2033 | bool fpu_enabled; | |
2034 | }; | |
2035 | ||
2036 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2037 | +static void cast6_fpu_end_rt(struct crypt_priv *ctx) | |
2038 | +{ | |
2039 | + bool fpu_enabled = ctx->fpu_enabled; | |
2040 | + | |
2041 | + if (!fpu_enabled) | |
2042 | + return; | |
2043 | + cast6_fpu_end(fpu_enabled); | |
2044 | + ctx->fpu_enabled = false; | |
2045 | +} | |
2046 | + | |
2047 | +#else | |
2048 | +static void cast6_fpu_end_rt(struct crypt_priv *ctx) { } | |
2049 | +#endif | |
2050 | + | |
2051 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |
2052 | { | |
2053 | const unsigned int bsize = CAST6_BLOCK_SIZE; | |
2054 | struct crypt_priv *ctx = priv; | |
2055 | int i; | |
2056 | ||
2057 | - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); | |
2058 | - | |
2059 | if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { | |
2060 | + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); | |
2061 | cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst); | |
2062 | + cast6_fpu_end_rt(ctx); | |
2063 | return; | |
2064 | } | |
2065 | - | |
2066 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
2067 | __cast6_encrypt(ctx->ctx, srcdst, srcdst); | |
2068 | } | |
2069 | @@ -228,10 +242,10 @@ | |
2070 | struct crypt_priv *ctx = priv; | |
2071 | int i; | |
2072 | ||
2073 | - ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); | |
2074 | - | |
2075 | if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) { | |
2076 | + ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes); | |
2077 | cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst); | |
2078 | + cast6_fpu_end_rt(ctx); | |
2079 | return; | |
2080 | } | |
2081 | ||
2082 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/chacha20_glue.c linux-4.14/arch/x86/crypto/chacha20_glue.c | |
2083 | --- linux-4.14.orig/arch/x86/crypto/chacha20_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
2084 | +++ linux-4.14/arch/x86/crypto/chacha20_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
2085 | @@ -81,23 +81,24 @@ | |
2086 | ||
2087 | crypto_chacha20_init(state, ctx, walk.iv); | |
2088 | ||
2089 | - kernel_fpu_begin(); | |
2090 | - | |
2091 | while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { | |
2092 | + kernel_fpu_begin(); | |
2093 | + | |
2094 | chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, | |
2095 | rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); | |
2096 | + kernel_fpu_end(); | |
2097 | err = skcipher_walk_done(&walk, | |
2098 | walk.nbytes % CHACHA20_BLOCK_SIZE); | |
2099 | } | |
2100 | ||
2101 | if (walk.nbytes) { | |
2102 | + kernel_fpu_begin(); | |
2103 | chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, | |
2104 | walk.nbytes); | |
2105 | + kernel_fpu_end(); | |
2106 | err = skcipher_walk_done(&walk, 0); | |
2107 | } | |
2108 | ||
2109 | - kernel_fpu_end(); | |
2110 | - | |
2111 | return err; | |
2112 | } | |
2113 | ||
2114 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/glue_helper.c linux-4.14/arch/x86/crypto/glue_helper.c | |
2115 | --- linux-4.14.orig/arch/x86/crypto/glue_helper.c 2017-11-12 19:46:13.000000000 +0100 | |
2116 | +++ linux-4.14/arch/x86/crypto/glue_helper.c 2018-09-05 11:05:07.000000000 +0200 | |
2117 | @@ -40,7 +40,7 @@ | |
1a6e0f06 JK |
2118 | void *ctx = crypto_blkcipher_ctx(desc->tfm); |
2119 | const unsigned int bsize = 128 / 8; | |
2120 | unsigned int nbytes, i, func_bytes; | |
2121 | - bool fpu_enabled = false; | |
2122 | + bool fpu_enabled; | |
2123 | int err; | |
2124 | ||
2125 | err = blkcipher_walk_virt(desc, walk); | |
e4b2b4a8 | 2126 | @@ -50,7 +50,7 @@ |
1a6e0f06 JK |
2127 | u8 *wdst = walk->dst.virt.addr; |
2128 | ||
2129 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2130 | - desc, fpu_enabled, nbytes); | |
2131 | + desc, false, nbytes); | |
2132 | ||
2133 | for (i = 0; i < gctx->num_funcs; i++) { | |
2134 | func_bytes = bsize * gctx->funcs[i].num_blocks; | |
e4b2b4a8 | 2135 | @@ -72,10 +72,10 @@ |
1a6e0f06 JK |
2136 | } |
2137 | ||
2138 | done: | |
2139 | + glue_fpu_end(fpu_enabled); | |
2140 | err = blkcipher_walk_done(desc, walk, nbytes); | |
2141 | } | |
2142 | ||
2143 | - glue_fpu_end(fpu_enabled); | |
2144 | return err; | |
2145 | } | |
2146 | ||
e4b2b4a8 | 2147 | @@ -192,7 +192,7 @@ |
1a6e0f06 JK |
2148 | struct scatterlist *src, unsigned int nbytes) |
2149 | { | |
2150 | const unsigned int bsize = 128 / 8; | |
2151 | - bool fpu_enabled = false; | |
2152 | + bool fpu_enabled; | |
2153 | struct blkcipher_walk walk; | |
2154 | int err; | |
2155 | ||
e4b2b4a8 | 2156 | @@ -201,12 +201,12 @@ |
1a6e0f06 JK |
2157 | |
2158 | while ((nbytes = walk.nbytes)) { | |
2159 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2160 | - desc, fpu_enabled, nbytes); | |
2161 | + desc, false, nbytes); | |
2162 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | |
2163 | + glue_fpu_end(fpu_enabled); | |
2164 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2165 | } | |
2166 | ||
2167 | - glue_fpu_end(fpu_enabled); | |
2168 | return err; | |
2169 | } | |
2170 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | |
e4b2b4a8 | 2171 | @@ -275,7 +275,7 @@ |
1a6e0f06 JK |
2172 | struct scatterlist *src, unsigned int nbytes) |
2173 | { | |
2174 | const unsigned int bsize = 128 / 8; | |
2175 | - bool fpu_enabled = false; | |
2176 | + bool fpu_enabled; | |
2177 | struct blkcipher_walk walk; | |
2178 | int err; | |
2179 | ||
e4b2b4a8 | 2180 | @@ -284,13 +284,12 @@ |
1a6e0f06 JK |
2181 | |
2182 | while ((nbytes = walk.nbytes) >= bsize) { | |
2183 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2184 | - desc, fpu_enabled, nbytes); | |
2185 | + desc, false, nbytes); | |
2186 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | |
2187 | + glue_fpu_end(fpu_enabled); | |
2188 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2189 | } | |
2190 | ||
2191 | - glue_fpu_end(fpu_enabled); | |
2192 | - | |
2193 | if (walk.nbytes) { | |
2194 | glue_ctr_crypt_final_128bit( | |
2195 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | |
e4b2b4a8 | 2196 | @@ -380,7 +379,7 @@ |
1a6e0f06 JK |
2197 | void *tweak_ctx, void *crypt_ctx) |
2198 | { | |
2199 | const unsigned int bsize = 128 / 8; | |
2200 | - bool fpu_enabled = false; | |
2201 | + bool fpu_enabled; | |
2202 | struct blkcipher_walk walk; | |
2203 | int err; | |
2204 | ||
e4b2b4a8 | 2205 | @@ -393,21 +392,21 @@ |
1a6e0f06 JK |
2206 | |
2207 | /* set minimum length to bsize, for tweak_fn */ | |
2208 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2209 | - desc, fpu_enabled, | |
2210 | + desc, false, | |
2211 | nbytes < bsize ? bsize : nbytes); | |
2212 | - | |
2213 | /* calculate first value of T */ | |
2214 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | |
2215 | + glue_fpu_end(fpu_enabled); | |
2216 | ||
2217 | while (nbytes) { | |
2218 | + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2219 | + desc, false, nbytes); | |
2220 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | |
2221 | ||
2222 | + glue_fpu_end(fpu_enabled); | |
2223 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2224 | nbytes = walk.nbytes; | |
2225 | } | |
2226 | - | |
2227 | - glue_fpu_end(fpu_enabled); | |
2228 | - | |
2229 | return err; | |
2230 | } | |
2231 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | |
e4b2b4a8 JK |
2232 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/serpent_avx2_glue.c linux-4.14/arch/x86/crypto/serpent_avx2_glue.c |
2233 | --- linux-4.14.orig/arch/x86/crypto/serpent_avx2_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
2234 | +++ linux-4.14/arch/x86/crypto/serpent_avx2_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
2235 | @@ -184,6 +184,21 @@ | |
2236 | bool fpu_enabled; | |
2237 | }; | |
2238 | ||
2239 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2240 | +static void serpent_fpu_end_rt(struct crypt_priv *ctx) | |
2241 | +{ | |
2242 | + bool fpu_enabled = ctx->fpu_enabled; | |
2243 | + | |
2244 | + if (!fpu_enabled) | |
2245 | + return; | |
2246 | + serpent_fpu_end(fpu_enabled); | |
2247 | + ctx->fpu_enabled = false; | |
2248 | +} | |
2249 | + | |
2250 | +#else | |
2251 | +static void serpent_fpu_end_rt(struct crypt_priv *ctx) { } | |
2252 | +#endif | |
2253 | + | |
2254 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |
2255 | { | |
2256 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | |
2257 | @@ -199,10 +214,12 @@ | |
2258 | } | |
2259 | ||
2260 | while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { | |
2261 | + kernel_fpu_resched(); | |
2262 | serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); | |
2263 | srcdst += bsize * SERPENT_PARALLEL_BLOCKS; | |
2264 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | |
2265 | } | |
2266 | + serpent_fpu_end_rt(ctx); | |
2267 | ||
2268 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
2269 | __serpent_encrypt(ctx->ctx, srcdst, srcdst); | |
2270 | @@ -223,10 +240,12 @@ | |
2271 | } | |
2272 | ||
2273 | while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) { | |
2274 | + kernel_fpu_resched(); | |
2275 | serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); | |
2276 | srcdst += bsize * SERPENT_PARALLEL_BLOCKS; | |
2277 | nbytes -= bsize * SERPENT_PARALLEL_BLOCKS; | |
2278 | } | |
2279 | + serpent_fpu_end_rt(ctx); | |
2280 | ||
2281 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
2282 | __serpent_decrypt(ctx->ctx, srcdst, srcdst); | |
2283 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/serpent_avx_glue.c linux-4.14/arch/x86/crypto/serpent_avx_glue.c | |
2284 | --- linux-4.14.orig/arch/x86/crypto/serpent_avx_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
2285 | +++ linux-4.14/arch/x86/crypto/serpent_avx_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
2286 | @@ -218,16 +218,31 @@ | |
2287 | bool fpu_enabled; | |
2288 | }; | |
2289 | ||
2290 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2291 | +static void serpent_fpu_end_rt(struct crypt_priv *ctx) | |
2292 | +{ | |
2293 | + bool fpu_enabled = ctx->fpu_enabled; | |
2294 | + | |
2295 | + if (!fpu_enabled) | |
2296 | + return; | |
2297 | + serpent_fpu_end(fpu_enabled); | |
2298 | + ctx->fpu_enabled = false; | |
2299 | +} | |
2300 | + | |
2301 | +#else | |
2302 | +static void serpent_fpu_end_rt(struct crypt_priv *ctx) { } | |
2303 | +#endif | |
2304 | + | |
2305 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |
2306 | { | |
2307 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | |
2308 | struct crypt_priv *ctx = priv; | |
2309 | int i; | |
2310 | ||
2311 | - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2312 | - | |
2313 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | |
2314 | + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2315 | serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst); | |
2316 | + serpent_fpu_end_rt(ctx); | |
2317 | return; | |
2318 | } | |
2319 | ||
2320 | @@ -241,10 +256,10 @@ | |
2321 | struct crypt_priv *ctx = priv; | |
2322 | int i; | |
2323 | ||
2324 | - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2325 | - | |
2326 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | |
2327 | + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2328 | serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst); | |
2329 | + serpent_fpu_end_rt(ctx); | |
2330 | return; | |
2331 | } | |
2332 | ||
2333 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/serpent_sse2_glue.c linux-4.14/arch/x86/crypto/serpent_sse2_glue.c | |
2334 | --- linux-4.14.orig/arch/x86/crypto/serpent_sse2_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
2335 | +++ linux-4.14/arch/x86/crypto/serpent_sse2_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
2336 | @@ -187,16 +187,31 @@ | |
2337 | bool fpu_enabled; | |
2338 | }; | |
2339 | ||
2340 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2341 | +static void serpent_fpu_end_rt(struct crypt_priv *ctx) | |
2342 | +{ | |
2343 | + bool fpu_enabled = ctx->fpu_enabled; | |
2344 | + | |
2345 | + if (!fpu_enabled) | |
2346 | + return; | |
2347 | + serpent_fpu_end(fpu_enabled); | |
2348 | + ctx->fpu_enabled = false; | |
2349 | +} | |
2350 | + | |
2351 | +#else | |
2352 | +static void serpent_fpu_end_rt(struct crypt_priv *ctx) { } | |
2353 | +#endif | |
2354 | + | |
2355 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |
2356 | { | |
2357 | const unsigned int bsize = SERPENT_BLOCK_SIZE; | |
2358 | struct crypt_priv *ctx = priv; | |
2359 | int i; | |
2360 | ||
2361 | - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2362 | - | |
2363 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | |
2364 | + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2365 | serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst); | |
2366 | + serpent_fpu_end_rt(ctx); | |
2367 | return; | |
2368 | } | |
2369 | ||
2370 | @@ -210,10 +225,10 @@ | |
2371 | struct crypt_priv *ctx = priv; | |
2372 | int i; | |
2373 | ||
2374 | - ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2375 | - | |
2376 | if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) { | |
2377 | + ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes); | |
2378 | serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst); | |
2379 | + serpent_fpu_end_rt(ctx); | |
2380 | return; | |
2381 | } | |
2382 | ||
2383 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/crypto/twofish_avx_glue.c linux-4.14/arch/x86/crypto/twofish_avx_glue.c | |
2384 | --- linux-4.14.orig/arch/x86/crypto/twofish_avx_glue.c 2017-11-12 19:46:13.000000000 +0100 | |
2385 | +++ linux-4.14/arch/x86/crypto/twofish_avx_glue.c 2018-09-05 11:05:07.000000000 +0200 | |
2386 | @@ -218,6 +218,21 @@ | |
2387 | bool fpu_enabled; | |
2388 | }; | |
2389 | ||
2390 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2391 | +static void twofish_fpu_end_rt(struct crypt_priv *ctx) | |
2392 | +{ | |
2393 | + bool fpu_enabled = ctx->fpu_enabled; | |
2394 | + | |
2395 | + if (!fpu_enabled) | |
2396 | + return; | |
2397 | + twofish_fpu_end(fpu_enabled); | |
2398 | + ctx->fpu_enabled = false; | |
2399 | +} | |
2400 | + | |
2401 | +#else | |
2402 | +static void twofish_fpu_end_rt(struct crypt_priv *ctx) { } | |
2403 | +#endif | |
2404 | + | |
2405 | static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) | |
2406 | { | |
2407 | const unsigned int bsize = TF_BLOCK_SIZE; | |
2408 | @@ -228,12 +243,16 @@ | |
2409 | ||
2410 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | |
2411 | twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst); | |
2412 | + twofish_fpu_end_rt(ctx); | |
2413 | return; | |
2414 | } | |
2415 | ||
2416 | - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | |
2417 | + for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) { | |
2418 | + kernel_fpu_resched(); | |
2419 | twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst); | |
2420 | + } | |
2421 | ||
2422 | + twofish_fpu_end_rt(ctx); | |
2423 | nbytes %= bsize * 3; | |
2424 | ||
2425 | for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) | |
2426 | @@ -250,11 +269,15 @@ | |
2427 | ||
2428 | if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) { | |
2429 | twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst); | |
2430 | + twofish_fpu_end_rt(ctx); | |
2431 | return; | |
2432 | } | |
2433 | ||
2434 | - for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) | |
2435 | + for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) { | |
2436 | + kernel_fpu_resched(); | |
2437 | twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst); | |
2438 | + } | |
2439 | + twofish_fpu_end_rt(ctx); | |
2440 | ||
2441 | nbytes %= bsize * 3; | |
2442 | ||
2443 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/entry/common.c linux-4.14/arch/x86/entry/common.c | |
2444 | --- linux-4.14.orig/arch/x86/entry/common.c 2018-09-05 11:03:20.000000000 +0200 | |
2445 | +++ linux-4.14/arch/x86/entry/common.c 2018-09-05 11:05:07.000000000 +0200 | |
2446 | @@ -133,7 +133,7 @@ | |
1a6e0f06 JK |
2447 | |
2448 | #define EXIT_TO_USERMODE_LOOP_FLAGS \ | |
2449 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
e4b2b4a8 JK |
2450 | - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING) |
2451 | + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING) | |
1a6e0f06 JK |
2452 | |
2453 | static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
2454 | { | |
e4b2b4a8 | 2455 | @@ -148,9 +148,16 @@ |
1a6e0f06 JK |
2456 | /* We have work to do. */ |
2457 | local_irq_enable(); | |
2458 | ||
2459 | - if (cached_flags & _TIF_NEED_RESCHED) | |
2460 | + if (cached_flags & _TIF_NEED_RESCHED_MASK) | |
2461 | schedule(); | |
2462 | ||
2463 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
2464 | + if (unlikely(current->forced_info.si_signo)) { | |
2465 | + struct task_struct *t = current; | |
2466 | + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); | |
2467 | + t->forced_info.si_signo = 0; | |
2468 | + } | |
2469 | +#endif | |
2470 | if (cached_flags & _TIF_UPROBE) | |
2471 | uprobe_notify_resume(regs); | |
2472 | ||
e4b2b4a8 JK |
2473 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/entry/entry_32.S linux-4.14/arch/x86/entry/entry_32.S |
2474 | --- linux-4.14.orig/arch/x86/entry/entry_32.S 2018-09-05 11:03:20.000000000 +0200 | |
2475 | +++ linux-4.14/arch/x86/entry/entry_32.S 2018-09-05 11:05:07.000000000 +0200 | |
2476 | @@ -350,8 +350,25 @@ | |
1a6e0f06 JK |
2477 | ENTRY(resume_kernel) |
2478 | DISABLE_INTERRUPTS(CLBR_ANY) | |
e4b2b4a8 | 2479 | .Lneed_resched: |
1a6e0f06 JK |
2480 | + # preempt count == 0 + NEED_RS set? |
2481 | cmpl $0, PER_CPU_VAR(__preempt_count) | |
2482 | +#ifndef CONFIG_PREEMPT_LAZY | |
2483 | jnz restore_all | |
2484 | +#else | |
2485 | + jz test_int_off | |
2486 | + | |
2487 | + # atleast preempt count == 0 ? | |
2488 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2489 | + jne restore_all | |
2490 | + | |
e4b2b4a8 JK |
2491 | + movl PER_CPU_VAR(current_task), %ebp |
2492 | + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? | |
2493 | + jnz restore_all | |
1a6e0f06 | 2494 | + |
e4b2b4a8 JK |
2495 | + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) |
2496 | + jz restore_all | |
1a6e0f06 JK |
2497 | +test_int_off: |
2498 | +#endif | |
2499 | testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? | |
2500 | jz restore_all | |
2501 | call preempt_schedule_irq | |
e4b2b4a8 JK |
2502 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/entry/entry_64.S linux-4.14/arch/x86/entry/entry_64.S |
2503 | --- linux-4.14.orig/arch/x86/entry/entry_64.S 2018-09-05 11:03:20.000000000 +0200 | |
2504 | +++ linux-4.14/arch/x86/entry/entry_64.S 2018-09-05 11:05:07.000000000 +0200 | |
2505 | @@ -633,7 +633,23 @@ | |
1a6e0f06 JK |
2506 | bt $9, EFLAGS(%rsp) /* were interrupts off? */ |
2507 | jnc 1f | |
2508 | 0: cmpl $0, PER_CPU_VAR(__preempt_count) | |
2509 | +#ifndef CONFIG_PREEMPT_LAZY | |
e4b2b4a8 | 2510 | + jnz 1f |
1a6e0f06 JK |
2511 | +#else |
2512 | + jz do_preempt_schedule_irq | |
2513 | + | |
2514 | + # atleast preempt count == 0 ? | |
2515 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2516 | + jnz 1f | |
2517 | + | |
c7c16703 JK |
2518 | + movq PER_CPU_VAR(current_task), %rcx |
2519 | + cmpl $0, TASK_TI_preempt_lazy_count(%rcx) | |
e4b2b4a8 | 2520 | jnz 1f |
1a6e0f06 | 2521 | + |
c7c16703 | 2522 | + bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx) |
1a6e0f06 JK |
2523 | + jnc 1f |
2524 | +do_preempt_schedule_irq: | |
2525 | +#endif | |
2526 | call preempt_schedule_irq | |
2527 | jmp 0b | |
2528 | 1: | |
e4b2b4a8 | 2529 | @@ -988,6 +1004,7 @@ |
1a6e0f06 JK |
2530 | jmp 2b |
2531 | .previous | |
2532 | ||
2533 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2534 | /* Call softirq on interrupt stack. Interrupts are off. */ | |
2535 | ENTRY(do_softirq_own_stack) | |
2536 | pushq %rbp | |
e4b2b4a8 JK |
2537 | @@ -998,6 +1015,7 @@ |
2538 | leaveq | |
1a6e0f06 | 2539 | ret |
e4b2b4a8 | 2540 | ENDPROC(do_softirq_own_stack) |
1a6e0f06 JK |
2541 | +#endif |
2542 | ||
2543 | #ifdef CONFIG_XEN | |
e4b2b4a8 JK |
2544 | idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 |
2545 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/include/asm/fpu/api.h linux-4.14/arch/x86/include/asm/fpu/api.h | |
2546 | --- linux-4.14.orig/arch/x86/include/asm/fpu/api.h 2017-11-12 19:46:13.000000000 +0100 | |
2547 | +++ linux-4.14/arch/x86/include/asm/fpu/api.h 2018-09-05 11:05:07.000000000 +0200 | |
2548 | @@ -25,6 +25,7 @@ | |
2549 | extern void __kernel_fpu_end(void); | |
2550 | extern void kernel_fpu_begin(void); | |
2551 | extern void kernel_fpu_end(void); | |
2552 | +extern void kernel_fpu_resched(void); | |
2553 | extern bool irq_fpu_usable(void); | |
2554 | ||
2555 | /* | |
2556 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/include/asm/preempt.h linux-4.14/arch/x86/include/asm/preempt.h | |
2557 | --- linux-4.14.orig/arch/x86/include/asm/preempt.h 2017-11-12 19:46:13.000000000 +0100 | |
2558 | +++ linux-4.14/arch/x86/include/asm/preempt.h 2018-09-05 11:05:07.000000000 +0200 | |
2559 | @@ -86,17 +86,46 @@ | |
1a6e0f06 JK |
2560 | * a decrement which hits zero means we have no preempt_count and should |
2561 | * reschedule. | |
2562 | */ | |
2563 | -static __always_inline bool __preempt_count_dec_and_test(void) | |
2564 | +static __always_inline bool ____preempt_count_dec_and_test(void) | |
2565 | { | |
2566 | GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); | |
2567 | } | |
2568 | ||
2569 | +static __always_inline bool __preempt_count_dec_and_test(void) | |
2570 | +{ | |
2571 | + if (____preempt_count_dec_and_test()) | |
2572 | + return true; | |
2573 | +#ifdef CONFIG_PREEMPT_LAZY | |
2574 | + if (current_thread_info()->preempt_lazy_count) | |
2575 | + return false; | |
2576 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2577 | +#else | |
2578 | + return false; | |
2579 | +#endif | |
2580 | +} | |
2581 | + | |
2582 | /* | |
2583 | * Returns true when we need to resched and can (barring IRQ state). | |
2584 | */ | |
2585 | static __always_inline bool should_resched(int preempt_offset) | |
2586 | { | |
2587 | +#ifdef CONFIG_PREEMPT_LAZY | |
2588 | + u32 tmp; | |
2589 | + | |
2590 | + tmp = raw_cpu_read_4(__preempt_count); | |
2591 | + if (tmp == preempt_offset) | |
2592 | + return true; | |
2593 | + | |
2594 | + /* preempt count == 0 ? */ | |
2595 | + tmp &= ~PREEMPT_NEED_RESCHED; | |
2596 | + if (tmp) | |
2597 | + return false; | |
2598 | + if (current_thread_info()->preempt_lazy_count) | |
2599 | + return false; | |
2600 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2601 | +#else | |
2602 | return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); | |
2603 | +#endif | |
2604 | } | |
2605 | ||
2606 | #ifdef CONFIG_PREEMPT | |
e4b2b4a8 JK |
2607 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/include/asm/signal.h linux-4.14/arch/x86/include/asm/signal.h |
2608 | --- linux-4.14.orig/arch/x86/include/asm/signal.h 2017-11-12 19:46:13.000000000 +0100 | |
2609 | +++ linux-4.14/arch/x86/include/asm/signal.h 2018-09-05 11:05:07.000000000 +0200 | |
2610 | @@ -28,6 +28,19 @@ | |
c7c16703 JK |
2611 | #define SA_IA32_ABI 0x02000000u |
2612 | #define SA_X32_ABI 0x01000000u | |
1a6e0f06 JK |
2613 | |
2614 | +/* | |
2615 | + * Because some traps use the IST stack, we must keep preemption | |
2616 | + * disabled while calling do_trap(), but do_trap() may call | |
2617 | + * force_sig_info() which will grab the signal spin_locks for the | |
2618 | + * task, which in PREEMPT_RT_FULL are mutexes. By defining | |
2619 | + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set | |
2620 | + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the | |
2621 | + * trap. | |
2622 | + */ | |
2623 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
2624 | +#define ARCH_RT_DELAYS_SIGNAL_SEND | |
2625 | +#endif | |
2626 | + | |
2627 | #ifndef CONFIG_COMPAT | |
2628 | typedef sigset_t compat_sigset_t; | |
2629 | #endif | |
e4b2b4a8 JK |
2630 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/include/asm/stackprotector.h linux-4.14/arch/x86/include/asm/stackprotector.h |
2631 | --- linux-4.14.orig/arch/x86/include/asm/stackprotector.h 2017-11-12 19:46:13.000000000 +0100 | |
2632 | +++ linux-4.14/arch/x86/include/asm/stackprotector.h 2018-09-05 11:05:07.000000000 +0200 | |
2633 | @@ -60,7 +60,7 @@ | |
1a6e0f06 JK |
2634 | */ |
2635 | static __always_inline void boot_init_stack_canary(void) | |
2636 | { | |
2637 | - u64 canary; | |
2638 | + u64 uninitialized_var(canary); | |
2639 | u64 tsc; | |
2640 | ||
2641 | #ifdef CONFIG_X86_64 | |
e4b2b4a8 | 2642 | @@ -71,8 +71,14 @@ |
1a6e0f06 JK |
2643 | * of randomness. The TSC only matters for very early init, |
2644 | * there it already has some randomness on most systems. Later | |
2645 | * on during the bootup the random pool has true entropy too. | |
1a6e0f06 JK |
2646 | + * For preempt-rt we need to weaken the randomness a bit, as |
2647 | + * we can't call into the random generator from atomic context | |
2648 | + * due to locking constraints. We just leave canary | |
2649 | + * uninitialized and use the TSC based randomness on top of it. | |
2650 | */ | |
2651 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2652 | get_random_bytes(&canary, sizeof(canary)); | |
2653 | +#endif | |
2654 | tsc = rdtsc(); | |
2655 | canary += tsc + (tsc << 32UL); | |
e4b2b4a8 JK |
2656 | canary &= CANARY_MASK; |
2657 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/include/asm/thread_info.h linux-4.14/arch/x86/include/asm/thread_info.h | |
2658 | --- linux-4.14.orig/arch/x86/include/asm/thread_info.h 2018-09-05 11:03:20.000000000 +0200 | |
2659 | +++ linux-4.14/arch/x86/include/asm/thread_info.h 2018-09-05 11:05:07.000000000 +0200 | |
2660 | @@ -56,11 +56,14 @@ | |
c7c16703 JK |
2661 | struct thread_info { |
2662 | unsigned long flags; /* low level flags */ | |
e4b2b4a8 JK |
2663 | u32 status; /* thread synchronous flags */ |
2664 | + int preempt_lazy_count; /* 0 => lazy preemptable | |
2665 | + <0 => BUG */ | |
1a6e0f06 JK |
2666 | }; |
2667 | ||
2668 | #define INIT_THREAD_INFO(tsk) \ | |
c7c16703 JK |
2669 | { \ |
2670 | .flags = 0, \ | |
2671 | + .preempt_lazy_count = 0, \ | |
2672 | } | |
2673 | ||
2674 | #define init_stack (init_thread_union.stack) | |
e4b2b4a8 | 2675 | @@ -69,6 +72,10 @@ |
1a6e0f06 JK |
2676 | |
2677 | #include <asm/asm-offsets.h> | |
2678 | ||
2679 | +#define GET_THREAD_INFO(reg) \ | |
2680 | + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ | |
2681 | + _ASM_SUB $(THREAD_SIZE),reg ; | |
2682 | + | |
2683 | #endif | |
2684 | ||
2685 | /* | |
e4b2b4a8 | 2686 | @@ -85,6 +92,7 @@ |
1a6e0f06 JK |
2687 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ |
2688 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
2689 | #define TIF_SECCOMP 8 /* secure computing */ | |
2690 | +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ | |
2691 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | |
2692 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | |
e4b2b4a8 JK |
2693 | #define TIF_PATCH_PENDING 13 /* pending live patching update */ |
2694 | @@ -112,6 +120,7 @@ | |
1a6e0f06 JK |
2695 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) |
2696 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
2697 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | |
2698 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
2699 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | |
2700 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
e4b2b4a8 JK |
2701 | #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) |
2702 | @@ -153,6 +162,8 @@ | |
1a6e0f06 JK |
2703 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
2704 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | |
2705 | ||
2706 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
2707 | + | |
2708 | #define STACK_WARN (THREAD_SIZE/8) | |
2709 | ||
2710 | /* | |
e4b2b4a8 JK |
2711 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/include/asm/uv/uv_bau.h linux-4.14/arch/x86/include/asm/uv/uv_bau.h |
2712 | --- linux-4.14.orig/arch/x86/include/asm/uv/uv_bau.h 2017-11-12 19:46:13.000000000 +0100 | |
2713 | +++ linux-4.14/arch/x86/include/asm/uv/uv_bau.h 2018-09-05 11:05:07.000000000 +0200 | |
2714 | @@ -643,9 +643,9 @@ | |
1a6e0f06 JK |
2715 | cycles_t send_message; |
2716 | cycles_t period_end; | |
2717 | cycles_t period_time; | |
2718 | - spinlock_t uvhub_lock; | |
2719 | - spinlock_t queue_lock; | |
2720 | - spinlock_t disable_lock; | |
2721 | + raw_spinlock_t uvhub_lock; | |
2722 | + raw_spinlock_t queue_lock; | |
2723 | + raw_spinlock_t disable_lock; | |
2724 | /* tunables */ | |
2725 | int max_concurr; | |
2726 | int max_concurr_const; | |
e4b2b4a8 | 2727 | @@ -847,15 +847,15 @@ |
1a6e0f06 JK |
2728 | * to be lowered below the current 'v'. atomic_add_unless can only stop |
2729 | * on equal. | |
2730 | */ | |
2731 | -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |
2732 | +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) | |
2733 | { | |
2734 | - spin_lock(lock); | |
2735 | + raw_spin_lock(lock); | |
2736 | if (atomic_read(v) >= u) { | |
2737 | - spin_unlock(lock); | |
2738 | + raw_spin_unlock(lock); | |
2739 | return 0; | |
2740 | } | |
2741 | atomic_inc(v); | |
2742 | - spin_unlock(lock); | |
2743 | + raw_spin_unlock(lock); | |
2744 | return 1; | |
2745 | } | |
2746 | ||
e4b2b4a8 JK |
2747 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/Kconfig linux-4.14/arch/x86/Kconfig |
2748 | --- linux-4.14.orig/arch/x86/Kconfig 2018-09-05 11:03:20.000000000 +0200 | |
2749 | +++ linux-4.14/arch/x86/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
2750 | @@ -169,6 +169,7 @@ | |
2751 | select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI | |
2752 | select HAVE_PERF_REGS | |
2753 | select HAVE_PERF_USER_STACK_DUMP | |
2754 | + select HAVE_PREEMPT_LAZY | |
2755 | select HAVE_RCU_TABLE_FREE | |
2756 | select HAVE_REGS_AND_STACK_ACCESS_API | |
2757 | select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION | |
2758 | @@ -256,8 +257,11 @@ | |
2759 | def_bool y | |
2760 | depends on ISA_DMA_API | |
2761 | ||
2762 | +config RWSEM_GENERIC_SPINLOCK | |
2763 | + def_bool PREEMPT_RT_FULL | |
2764 | + | |
2765 | config RWSEM_XCHGADD_ALGORITHM | |
2766 | - def_bool y | |
2767 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1a6e0f06 | 2768 | |
e4b2b4a8 JK |
2769 | config GENERIC_CALIBRATE_DELAY |
2770 | def_bool y | |
2771 | @@ -932,7 +936,7 @@ | |
2772 | config MAXSMP | |
2773 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" | |
2774 | depends on X86_64 && SMP && DEBUG_KERNEL | |
2775 | - select CPUMASK_OFFSTACK | |
2776 | + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL | |
2777 | ---help--- | |
2778 | Enable maximum number of CPUS and NUMA Nodes for this architecture. | |
2779 | If unsure, say N. | |
2780 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kernel/apic/io_apic.c linux-4.14/arch/x86/kernel/apic/io_apic.c | |
2781 | --- linux-4.14.orig/arch/x86/kernel/apic/io_apic.c 2018-09-05 11:03:20.000000000 +0200 | |
2782 | +++ linux-4.14/arch/x86/kernel/apic/io_apic.c 2018-09-05 11:05:07.000000000 +0200 | |
2783 | @@ -1691,7 +1691,8 @@ | |
1a6e0f06 JK |
2784 | static inline bool ioapic_irqd_mask(struct irq_data *data) |
2785 | { | |
2786 | /* If we are moving the irq we need to mask it */ | |
2787 | - if (unlikely(irqd_is_setaffinity_pending(data))) { | |
2788 | + if (unlikely(irqd_is_setaffinity_pending(data) && | |
2789 | + !irqd_irq_inprogress(data))) { | |
2790 | mask_ioapic_irq(data); | |
2791 | return true; | |
2792 | } | |
e4b2b4a8 JK |
2793 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kernel/asm-offsets.c linux-4.14/arch/x86/kernel/asm-offsets.c |
2794 | --- linux-4.14.orig/arch/x86/kernel/asm-offsets.c 2018-09-05 11:03:20.000000000 +0200 | |
2795 | +++ linux-4.14/arch/x86/kernel/asm-offsets.c 2018-09-05 11:05:07.000000000 +0200 | |
2796 | @@ -38,6 +38,7 @@ | |
1a6e0f06 JK |
2797 | |
2798 | BLANK(); | |
c7c16703 JK |
2799 | OFFSET(TASK_TI_flags, task_struct, thread_info.flags); |
2800 | + OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count); | |
1a6e0f06 | 2801 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); |
c7c16703 JK |
2802 | |
2803 | BLANK(); | |
e4b2b4a8 | 2804 | @@ -94,6 +95,7 @@ |
1a6e0f06 JK |
2805 | |
2806 | BLANK(); | |
2807 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | |
2808 | + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); | |
1a6e0f06 | 2809 | |
e4b2b4a8 JK |
2810 | /* TLB state for the entry code */ |
2811 | OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask); | |
2812 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kernel/cpu/mcheck/dev-mcelog.c linux-4.14/arch/x86/kernel/cpu/mcheck/dev-mcelog.c | |
2813 | --- linux-4.14.orig/arch/x86/kernel/cpu/mcheck/dev-mcelog.c 2017-11-12 19:46:13.000000000 +0100 | |
2814 | +++ linux-4.14/arch/x86/kernel/cpu/mcheck/dev-mcelog.c 2018-09-05 11:05:07.000000000 +0200 | |
2815 | @@ -14,6 +14,7 @@ | |
2816 | #include <linux/slab.h> | |
2817 | #include <linux/kmod.h> | |
2818 | #include <linux/poll.h> | |
2819 | +#include <linux/swork.h> | |
1a6e0f06 | 2820 | |
e4b2b4a8 | 2821 | #include "mce-internal.h" |
1a6e0f06 | 2822 | |
e4b2b4a8 | 2823 | @@ -86,13 +87,43 @@ |
1a6e0f06 | 2824 | |
e4b2b4a8 | 2825 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
1a6e0f06 | 2826 | |
1a6e0f06 | 2827 | - |
e4b2b4a8 JK |
2828 | -void mce_work_trigger(void) |
2829 | +static void __mce_work_trigger(struct swork_event *event) | |
2830 | { | |
2831 | if (mce_helper[0]) | |
2832 | schedule_work(&mce_trigger_work); | |
1a6e0f06 JK |
2833 | } |
2834 | ||
1a6e0f06 JK |
2835 | +#ifdef CONFIG_PREEMPT_RT_FULL |
2836 | +static bool notify_work_ready __read_mostly; | |
2837 | +static struct swork_event notify_work; | |
2838 | + | |
2839 | +static int mce_notify_work_init(void) | |
2840 | +{ | |
2841 | + int err; | |
2842 | + | |
2843 | + err = swork_get(); | |
2844 | + if (err) | |
2845 | + return err; | |
2846 | + | |
e4b2b4a8 | 2847 | + INIT_SWORK(¬ify_work, __mce_work_trigger); |
1a6e0f06 JK |
2848 | + notify_work_ready = true; |
2849 | + return 0; | |
2850 | +} | |
2851 | + | |
e4b2b4a8 | 2852 | +void mce_work_trigger(void) |
1a6e0f06 JK |
2853 | +{ |
2854 | + if (notify_work_ready) | |
2855 | + swork_queue(¬ify_work); | |
2856 | +} | |
e4b2b4a8 | 2857 | + |
1a6e0f06 | 2858 | +#else |
e4b2b4a8 | 2859 | +void mce_work_trigger(void) |
1a6e0f06 | 2860 | +{ |
e4b2b4a8 | 2861 | + __mce_work_trigger(NULL); |
1a6e0f06 JK |
2862 | +} |
2863 | +static inline int mce_notify_work_init(void) { return 0; } | |
2864 | +#endif | |
2865 | + | |
e4b2b4a8 JK |
2866 | static ssize_t |
2867 | show_trigger(struct device *s, struct device_attribute *attr, char *buf) | |
1a6e0f06 | 2868 | { |
e4b2b4a8 JK |
2869 | @@ -356,7 +387,7 @@ |
2870 | ||
2871 | return err; | |
2872 | } | |
1a6e0f06 | 2873 | - |
e4b2b4a8 JK |
2874 | + mce_notify_work_init(); |
2875 | mce_register_decode_chain(&dev_mcelog_nb); | |
2876 | return 0; | |
2877 | } | |
2878 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kernel/cpu/mcheck/mce.c linux-4.14/arch/x86/kernel/cpu/mcheck/mce.c | |
2879 | --- linux-4.14.orig/arch/x86/kernel/cpu/mcheck/mce.c 2018-09-05 11:03:20.000000000 +0200 | |
2880 | +++ linux-4.14/arch/x86/kernel/cpu/mcheck/mce.c 2018-09-05 11:05:07.000000000 +0200 | |
2881 | @@ -42,6 +42,7 @@ | |
2882 | #include <linux/debugfs.h> | |
2883 | #include <linux/irq_work.h> | |
2884 | #include <linux/export.h> | |
2885 | +#include <linux/jiffies.h> | |
2886 | #include <linux/jump_label.h> | |
2887 | ||
2888 | #include <asm/intel-family.h> | |
2889 | @@ -1365,7 +1366,7 @@ | |
2890 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; | |
2891 | ||
2892 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | |
2893 | -static DEFINE_PER_CPU(struct timer_list, mce_timer); | |
2894 | +static DEFINE_PER_CPU(struct hrtimer, mce_timer); | |
2895 | ||
2896 | static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2897 | { | |
2898 | @@ -1374,27 +1375,19 @@ | |
2899 | ||
2900 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; | |
2901 | ||
2902 | -static void __start_timer(struct timer_list *t, unsigned long interval) | |
2903 | +static void __start_timer(struct hrtimer *t, unsigned long iv) | |
2904 | { | |
2905 | - unsigned long when = jiffies + interval; | |
2906 | - unsigned long flags; | |
1a6e0f06 | 2907 | - |
e4b2b4a8 | 2908 | - local_irq_save(flags); |
1a6e0f06 | 2909 | - |
e4b2b4a8 JK |
2910 | - if (!timer_pending(t) || time_before(when, t->expires)) |
2911 | - mod_timer(t, round_jiffies(when)); | |
2912 | + if (!iv) | |
2913 | + return; | |
2914 | ||
2915 | - local_irq_restore(flags); | |
2916 | + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), | |
2917 | + 0, HRTIMER_MODE_REL_PINNED); | |
2918 | } | |
2919 | ||
2920 | -static void mce_timer_fn(unsigned long data) | |
2921 | +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) | |
2922 | { | |
2923 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2924 | - int cpu = smp_processor_id(); | |
2925 | unsigned long iv; | |
2926 | ||
2927 | - WARN_ON(cpu != data); | |
1a6e0f06 | 2928 | - |
e4b2b4a8 JK |
2929 | iv = __this_cpu_read(mce_next_interval); |
2930 | ||
2931 | if (mce_available(this_cpu_ptr(&cpu_info))) { | |
2932 | @@ -1417,7 +1410,11 @@ | |
2933 | ||
2934 | done: | |
2935 | __this_cpu_write(mce_next_interval, iv); | |
2936 | - __start_timer(t, iv); | |
2937 | + if (!iv) | |
2938 | + return HRTIMER_NORESTART; | |
2939 | + | |
2940 | + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(iv))); | |
2941 | + return HRTIMER_RESTART; | |
1a6e0f06 JK |
2942 | } |
2943 | ||
e4b2b4a8 JK |
2944 | /* |
2945 | @@ -1425,7 +1422,7 @@ | |
2946 | */ | |
2947 | void mce_timer_kick(unsigned long interval) | |
1a6e0f06 | 2948 | { |
e4b2b4a8 JK |
2949 | - struct timer_list *t = this_cpu_ptr(&mce_timer); |
2950 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2951 | unsigned long iv = __this_cpu_read(mce_next_interval); | |
1a6e0f06 | 2952 | |
e4b2b4a8 JK |
2953 | __start_timer(t, interval); |
2954 | @@ -1440,7 +1437,7 @@ | |
2955 | int cpu; | |
1a6e0f06 | 2956 | |
e4b2b4a8 JK |
2957 | for_each_online_cpu(cpu) |
2958 | - del_timer_sync(&per_cpu(mce_timer, cpu)); | |
2959 | + hrtimer_cancel(&per_cpu(mce_timer, cpu)); | |
2960 | } | |
1a6e0f06 | 2961 | |
e4b2b4a8 JK |
2962 | /* |
2963 | @@ -1769,7 +1766,7 @@ | |
2964 | } | |
1a6e0f06 JK |
2965 | } |
2966 | ||
e4b2b4a8 JK |
2967 | -static void mce_start_timer(struct timer_list *t) |
2968 | +static void mce_start_timer(struct hrtimer *t) | |
2969 | { | |
2970 | unsigned long iv = check_interval * HZ; | |
2971 | ||
2972 | @@ -1782,18 +1779,19 @@ | |
2973 | ||
2974 | static void __mcheck_cpu_setup_timer(void) | |
1a6e0f06 JK |
2975 | { |
2976 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
e4b2b4a8 | 2977 | - unsigned int cpu = smp_processor_id(); |
1a6e0f06 | 2978 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); |
1a6e0f06 JK |
2979 | |
2980 | - setup_pinned_timer(t, mce_timer_fn, cpu); | |
2981 | + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
2982 | + t->function = mce_timer_fn; | |
1a6e0f06 JK |
2983 | } |
2984 | ||
e4b2b4a8 JK |
2985 | static void __mcheck_cpu_init_timer(void) |
2986 | { | |
2987 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2988 | - unsigned int cpu = smp_processor_id(); | |
2989 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
1a6e0f06 | 2990 | + |
e4b2b4a8 JK |
2991 | + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
2992 | + t->function = mce_timer_fn; | |
1a6e0f06 | 2993 | |
e4b2b4a8 JK |
2994 | - setup_pinned_timer(t, mce_timer_fn, cpu); |
2995 | mce_start_timer(t); | |
1a6e0f06 JK |
2996 | } |
2997 | ||
e4b2b4a8 JK |
2998 | @@ -2309,7 +2307,7 @@ |
2999 | ||
3000 | static int mce_cpu_online(unsigned int cpu) | |
1a6e0f06 | 3001 | { |
e4b2b4a8 JK |
3002 | - struct timer_list *t = this_cpu_ptr(&mce_timer); |
3003 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
3004 | int ret; | |
1a6e0f06 | 3005 | |
e4b2b4a8 JK |
3006 | mce_device_create(cpu); |
3007 | @@ -2326,10 +2324,10 @@ | |
1a6e0f06 | 3008 | |
e4b2b4a8 JK |
3009 | static int mce_cpu_pre_down(unsigned int cpu) |
3010 | { | |
3011 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
3012 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
1a6e0f06 | 3013 | |
e4b2b4a8 JK |
3014 | mce_disable_cpu(); |
3015 | - del_timer_sync(t); | |
3016 | + hrtimer_cancel(t); | |
3017 | mce_threshold_remove_device(cpu); | |
3018 | mce_device_remove(cpu); | |
3019 | return 0; | |
3020 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kernel/fpu/core.c linux-4.14/arch/x86/kernel/fpu/core.c | |
3021 | --- linux-4.14.orig/arch/x86/kernel/fpu/core.c 2018-09-05 11:03:20.000000000 +0200 | |
3022 | +++ linux-4.14/arch/x86/kernel/fpu/core.c 2018-09-05 11:05:07.000000000 +0200 | |
3023 | @@ -138,6 +138,18 @@ | |
3024 | } | |
3025 | EXPORT_SYMBOL_GPL(kernel_fpu_end); | |
3026 | ||
3027 | +void kernel_fpu_resched(void) | |
3028 | +{ | |
3029 | + WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); | |
3030 | + | |
3031 | + if (should_resched(PREEMPT_OFFSET)) { | |
3032 | + kernel_fpu_end(); | |
3033 | + cond_resched(); | |
3034 | + kernel_fpu_begin(); | |
3035 | + } | |
3036 | +} | |
3037 | +EXPORT_SYMBOL_GPL(kernel_fpu_resched); | |
3038 | + | |
3039 | /* | |
3040 | * Save the FPU state (mark it for reload if necessary): | |
3041 | * | |
3042 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kernel/irq_32.c linux-4.14/arch/x86/kernel/irq_32.c | |
3043 | --- linux-4.14.orig/arch/x86/kernel/irq_32.c 2018-09-05 11:03:20.000000000 +0200 | |
3044 | +++ linux-4.14/arch/x86/kernel/irq_32.c 2018-09-05 11:05:07.000000000 +0200 | |
3045 | @@ -130,6 +130,7 @@ | |
1a6e0f06 JK |
3046 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); |
3047 | } | |
3048 | ||
3049 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
3050 | void do_softirq_own_stack(void) | |
3051 | { | |
3052 | struct irq_stack *irqstk; | |
e4b2b4a8 | 3053 | @@ -146,6 +147,7 @@ |
1a6e0f06 JK |
3054 | |
3055 | call_on_stack(__do_softirq, isp); | |
3056 | } | |
3057 | +#endif | |
3058 | ||
3059 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) | |
3060 | { | |
e4b2b4a8 JK |
3061 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kernel/process_32.c linux-4.14/arch/x86/kernel/process_32.c |
3062 | --- linux-4.14.orig/arch/x86/kernel/process_32.c 2018-09-05 11:03:20.000000000 +0200 | |
3063 | +++ linux-4.14/arch/x86/kernel/process_32.c 2018-09-05 11:05:07.000000000 +0200 | |
3064 | @@ -38,6 +38,7 @@ | |
1a6e0f06 JK |
3065 | #include <linux/io.h> |
3066 | #include <linux/kdebug.h> | |
e4b2b4a8 | 3067 | #include <linux/syscalls.h> |
1a6e0f06 JK |
3068 | +#include <linux/highmem.h> |
3069 | ||
3070 | #include <asm/pgtable.h> | |
3071 | #include <asm/ldt.h> | |
e4b2b4a8 | 3072 | @@ -198,6 +199,35 @@ |
1a6e0f06 JK |
3073 | } |
3074 | EXPORT_SYMBOL_GPL(start_thread); | |
3075 | ||
3076 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3077 | +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
3078 | +{ | |
3079 | + int i; | |
3080 | + | |
3081 | + /* | |
3082 | + * Clear @prev's kmap_atomic mappings | |
3083 | + */ | |
3084 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
3085 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
3086 | + pte_t *ptep = kmap_pte - idx; | |
3087 | + | |
3088 | + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); | |
3089 | + } | |
3090 | + /* | |
3091 | + * Restore @next_p's kmap_atomic mappings | |
3092 | + */ | |
3093 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
3094 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
3095 | + | |
3096 | + if (!pte_none(next_p->kmap_pte[i])) | |
3097 | + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); | |
3098 | + } | |
3099 | +} | |
3100 | +#else | |
3101 | +static inline void | |
3102 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
3103 | +#endif | |
3104 | + | |
3105 | ||
3106 | /* | |
3107 | * switch_to(x,y) should switch tasks from x to y. | |
e4b2b4a8 | 3108 | @@ -273,6 +303,8 @@ |
1a6e0f06 JK |
3109 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
3110 | __switch_to_xtra(prev_p, next_p, tss); | |
3111 | ||
3112 | + switch_kmaps(prev_p, next_p); | |
3113 | + | |
3114 | /* | |
3115 | * Leave lazy mode, flushing any hypercalls made here. | |
3116 | * This must be done before restoring TLS segments so | |
e4b2b4a8 JK |
3117 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kvm/lapic.c linux-4.14/arch/x86/kvm/lapic.c |
3118 | --- linux-4.14.orig/arch/x86/kvm/lapic.c 2018-09-05 11:03:20.000000000 +0200 | |
3119 | +++ linux-4.14/arch/x86/kvm/lapic.c 2018-09-05 11:05:07.000000000 +0200 | |
3120 | @@ -2120,7 +2120,7 @@ | |
3121 | apic->vcpu = vcpu; | |
3122 | ||
1a6e0f06 | 3123 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
e4b2b4a8 JK |
3124 | - HRTIMER_MODE_ABS_PINNED); |
3125 | + HRTIMER_MODE_ABS_PINNED_HARD); | |
1a6e0f06 | 3126 | apic->lapic_timer.timer.function = apic_timer_fn; |
1a6e0f06 JK |
3127 | |
3128 | /* | |
e4b2b4a8 JK |
3129 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/kvm/x86.c linux-4.14/arch/x86/kvm/x86.c |
3130 | --- linux-4.14.orig/arch/x86/kvm/x86.c 2018-09-05 11:03:20.000000000 +0200 | |
3131 | +++ linux-4.14/arch/x86/kvm/x86.c 2018-09-05 11:05:07.000000000 +0200 | |
3132 | @@ -6285,6 +6285,13 @@ | |
1a6e0f06 JK |
3133 | goto out; |
3134 | } | |
3135 | ||
3136 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3137 | + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | |
3138 | + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); | |
3139 | + return -EOPNOTSUPP; | |
3140 | + } | |
3141 | +#endif | |
3142 | + | |
3143 | r = kvm_mmu_module_init(); | |
3144 | if (r) | |
3145 | goto out_free_percpu; | |
e4b2b4a8 JK |
3146 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/mm/highmem_32.c linux-4.14/arch/x86/mm/highmem_32.c |
3147 | --- linux-4.14.orig/arch/x86/mm/highmem_32.c 2017-11-12 19:46:13.000000000 +0100 | |
3148 | +++ linux-4.14/arch/x86/mm/highmem_32.c 2018-09-05 11:05:07.000000000 +0200 | |
3149 | @@ -32,10 +32,11 @@ | |
1a6e0f06 JK |
3150 | */ |
3151 | void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
3152 | { | |
3153 | + pte_t pte = mk_pte(page, prot); | |
3154 | unsigned long vaddr; | |
3155 | int idx, type; | |
3156 | ||
3157 | - preempt_disable(); | |
3158 | + preempt_disable_nort(); | |
3159 | pagefault_disable(); | |
3160 | ||
3161 | if (!PageHighMem(page)) | |
e4b2b4a8 | 3162 | @@ -45,7 +46,10 @@ |
1a6e0f06 JK |
3163 | idx = type + KM_TYPE_NR*smp_processor_id(); |
3164 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
3165 | BUG_ON(!pte_none(*(kmap_pte-idx))); | |
3166 | - set_pte(kmap_pte-idx, mk_pte(page, prot)); | |
3167 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3168 | + current->kmap_pte[type] = pte; | |
3169 | +#endif | |
3170 | + set_pte(kmap_pte-idx, pte); | |
3171 | arch_flush_lazy_mmu_mode(); | |
3172 | ||
3173 | return (void *)vaddr; | |
e4b2b4a8 | 3174 | @@ -88,6 +92,9 @@ |
1a6e0f06 JK |
3175 | * is a bad idea also, in case the page changes cacheability |
3176 | * attributes or becomes a protected page in a hypervisor. | |
3177 | */ | |
3178 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3179 | + current->kmap_pte[type] = __pte(0); | |
3180 | +#endif | |
3181 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
3182 | kmap_atomic_idx_pop(); | |
3183 | arch_flush_lazy_mmu_mode(); | |
e4b2b4a8 | 3184 | @@ -100,7 +107,7 @@ |
1a6e0f06 JK |
3185 | #endif |
3186 | ||
3187 | pagefault_enable(); | |
3188 | - preempt_enable(); | |
3189 | + preempt_enable_nort(); | |
3190 | } | |
3191 | EXPORT_SYMBOL(__kunmap_atomic); | |
3192 | ||
e4b2b4a8 JK |
3193 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/mm/iomap_32.c linux-4.14/arch/x86/mm/iomap_32.c |
3194 | --- linux-4.14.orig/arch/x86/mm/iomap_32.c 2017-11-12 19:46:13.000000000 +0100 | |
3195 | +++ linux-4.14/arch/x86/mm/iomap_32.c 2018-09-05 11:05:07.000000000 +0200 | |
3196 | @@ -56,6 +56,7 @@ | |
1a6e0f06 JK |
3197 | |
3198 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
3199 | { | |
3200 | + pte_t pte = pfn_pte(pfn, prot); | |
3201 | unsigned long vaddr; | |
3202 | int idx, type; | |
3203 | ||
e4b2b4a8 | 3204 | @@ -65,7 +66,12 @@ |
1a6e0f06 JK |
3205 | type = kmap_atomic_idx_push(); |
3206 | idx = type + KM_TYPE_NR * smp_processor_id(); | |
3207 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
3208 | - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); | |
3209 | + WARN_ON(!pte_none(*(kmap_pte - idx))); | |
3210 | + | |
3211 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3212 | + current->kmap_pte[type] = pte; | |
3213 | +#endif | |
3214 | + set_pte(kmap_pte - idx, pte); | |
3215 | arch_flush_lazy_mmu_mode(); | |
3216 | ||
3217 | return (void *)vaddr; | |
e4b2b4a8 | 3218 | @@ -113,6 +119,9 @@ |
1a6e0f06 JK |
3219 | * is a bad idea also, in case the page changes cacheability |
3220 | * attributes or becomes a protected page in a hypervisor. | |
3221 | */ | |
3222 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3223 | + current->kmap_pte[type] = __pte(0); | |
3224 | +#endif | |
3225 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
3226 | kmap_atomic_idx_pop(); | |
3227 | } | |
e4b2b4a8 JK |
3228 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/platform/uv/tlb_uv.c linux-4.14/arch/x86/platform/uv/tlb_uv.c |
3229 | --- linux-4.14.orig/arch/x86/platform/uv/tlb_uv.c 2018-09-05 11:03:20.000000000 +0200 | |
3230 | +++ linux-4.14/arch/x86/platform/uv/tlb_uv.c 2018-09-05 11:05:07.000000000 +0200 | |
3231 | @@ -740,9 +740,9 @@ | |
1a6e0f06 JK |
3232 | |
3233 | quiesce_local_uvhub(hmaster); | |
3234 | ||
3235 | - spin_lock(&hmaster->queue_lock); | |
3236 | + raw_spin_lock(&hmaster->queue_lock); | |
3237 | reset_with_ipi(&bau_desc->distribution, bcp); | |
3238 | - spin_unlock(&hmaster->queue_lock); | |
3239 | + raw_spin_unlock(&hmaster->queue_lock); | |
3240 | ||
3241 | end_uvhub_quiesce(hmaster); | |
3242 | ||
e4b2b4a8 | 3243 | @@ -762,9 +762,9 @@ |
1a6e0f06 JK |
3244 | |
3245 | quiesce_local_uvhub(hmaster); | |
3246 | ||
3247 | - spin_lock(&hmaster->queue_lock); | |
3248 | + raw_spin_lock(&hmaster->queue_lock); | |
3249 | reset_with_ipi(&bau_desc->distribution, bcp); | |
3250 | - spin_unlock(&hmaster->queue_lock); | |
3251 | + raw_spin_unlock(&hmaster->queue_lock); | |
3252 | ||
3253 | end_uvhub_quiesce(hmaster); | |
3254 | ||
e4b2b4a8 | 3255 | @@ -785,7 +785,7 @@ |
1a6e0f06 JK |
3256 | cycles_t tm1; |
3257 | ||
3258 | hmaster = bcp->uvhub_master; | |
3259 | - spin_lock(&hmaster->disable_lock); | |
3260 | + raw_spin_lock(&hmaster->disable_lock); | |
3261 | if (!bcp->baudisabled) { | |
3262 | stat->s_bau_disabled++; | |
3263 | tm1 = get_cycles(); | |
e4b2b4a8 | 3264 | @@ -798,7 +798,7 @@ |
1a6e0f06 JK |
3265 | } |
3266 | } | |
3267 | } | |
3268 | - spin_unlock(&hmaster->disable_lock); | |
3269 | + raw_spin_unlock(&hmaster->disable_lock); | |
3270 | } | |
3271 | ||
3272 | static void count_max_concurr(int stat, struct bau_control *bcp, | |
e4b2b4a8 | 3273 | @@ -861,7 +861,7 @@ |
1a6e0f06 JK |
3274 | */ |
3275 | static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) | |
3276 | { | |
3277 | - spinlock_t *lock = &hmaster->uvhub_lock; | |
3278 | + raw_spinlock_t *lock = &hmaster->uvhub_lock; | |
3279 | atomic_t *v; | |
3280 | ||
3281 | v = &hmaster->active_descriptor_count; | |
e4b2b4a8 | 3282 | @@ -995,7 +995,7 @@ |
1a6e0f06 JK |
3283 | struct bau_control *hmaster; |
3284 | ||
3285 | hmaster = bcp->uvhub_master; | |
3286 | - spin_lock(&hmaster->disable_lock); | |
3287 | + raw_spin_lock(&hmaster->disable_lock); | |
3288 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { | |
3289 | stat->s_bau_reenabled++; | |
3290 | for_each_present_cpu(tcpu) { | |
e4b2b4a8 | 3291 | @@ -1007,10 +1007,10 @@ |
1a6e0f06 JK |
3292 | tbcp->period_giveups = 0; |
3293 | } | |
3294 | } | |
3295 | - spin_unlock(&hmaster->disable_lock); | |
3296 | + raw_spin_unlock(&hmaster->disable_lock); | |
3297 | return 0; | |
3298 | } | |
3299 | - spin_unlock(&hmaster->disable_lock); | |
3300 | + raw_spin_unlock(&hmaster->disable_lock); | |
3301 | return -1; | |
3302 | } | |
3303 | ||
e4b2b4a8 | 3304 | @@ -1942,9 +1942,9 @@ |
1a6e0f06 | 3305 | bcp->cong_reps = congested_reps; |
c7c16703 JK |
3306 | bcp->disabled_period = sec_2_cycles(disabled_period); |
3307 | bcp->giveup_limit = giveup_limit; | |
1a6e0f06 JK |
3308 | - spin_lock_init(&bcp->queue_lock); |
3309 | - spin_lock_init(&bcp->uvhub_lock); | |
3310 | - spin_lock_init(&bcp->disable_lock); | |
3311 | + raw_spin_lock_init(&bcp->queue_lock); | |
3312 | + raw_spin_lock_init(&bcp->uvhub_lock); | |
3313 | + raw_spin_lock_init(&bcp->disable_lock); | |
3314 | } | |
3315 | } | |
3316 | ||
e4b2b4a8 JK |
3317 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/x86/platform/uv/uv_time.c linux-4.14/arch/x86/platform/uv/uv_time.c |
3318 | --- linux-4.14.orig/arch/x86/platform/uv/uv_time.c 2017-11-12 19:46:13.000000000 +0100 | |
3319 | +++ linux-4.14/arch/x86/platform/uv/uv_time.c 2018-09-05 11:05:07.000000000 +0200 | |
3320 | @@ -57,7 +57,7 @@ | |
1a6e0f06 JK |
3321 | |
3322 | /* There is one of these allocated per node */ | |
3323 | struct uv_rtc_timer_head { | |
3324 | - spinlock_t lock; | |
3325 | + raw_spinlock_t lock; | |
3326 | /* next cpu waiting for timer, local node relative: */ | |
3327 | int next_cpu; | |
3328 | /* number of cpus on this node: */ | |
e4b2b4a8 | 3329 | @@ -177,7 +177,7 @@ |
1a6e0f06 JK |
3330 | uv_rtc_deallocate_timers(); |
3331 | return -ENOMEM; | |
3332 | } | |
3333 | - spin_lock_init(&head->lock); | |
3334 | + raw_spin_lock_init(&head->lock); | |
3335 | head->ncpus = uv_blade_nr_possible_cpus(bid); | |
3336 | head->next_cpu = -1; | |
3337 | blade_info[bid] = head; | |
e4b2b4a8 | 3338 | @@ -231,7 +231,7 @@ |
1a6e0f06 JK |
3339 | unsigned long flags; |
3340 | int next_cpu; | |
3341 | ||
3342 | - spin_lock_irqsave(&head->lock, flags); | |
3343 | + raw_spin_lock_irqsave(&head->lock, flags); | |
3344 | ||
3345 | next_cpu = head->next_cpu; | |
3346 | *t = expires; | |
e4b2b4a8 | 3347 | @@ -243,12 +243,12 @@ |
1a6e0f06 JK |
3348 | if (uv_setup_intr(cpu, expires)) { |
3349 | *t = ULLONG_MAX; | |
3350 | uv_rtc_find_next_timer(head, pnode); | |
3351 | - spin_unlock_irqrestore(&head->lock, flags); | |
3352 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
3353 | return -ETIME; | |
3354 | } | |
3355 | } | |
3356 | ||
3357 | - spin_unlock_irqrestore(&head->lock, flags); | |
3358 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
3359 | return 0; | |
3360 | } | |
3361 | ||
e4b2b4a8 | 3362 | @@ -267,7 +267,7 @@ |
1a6e0f06 JK |
3363 | unsigned long flags; |
3364 | int rc = 0; | |
3365 | ||
3366 | - spin_lock_irqsave(&head->lock, flags); | |
3367 | + raw_spin_lock_irqsave(&head->lock, flags); | |
3368 | ||
3369 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | |
3370 | rc = 1; | |
e4b2b4a8 | 3371 | @@ -279,7 +279,7 @@ |
1a6e0f06 JK |
3372 | uv_rtc_find_next_timer(head, pnode); |
3373 | } | |
3374 | ||
3375 | - spin_unlock_irqrestore(&head->lock, flags); | |
3376 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
3377 | ||
3378 | return rc; | |
3379 | } | |
e4b2b4a8 JK |
3380 | @@ -299,13 +299,17 @@ |
3381 | static u64 uv_read_rtc(struct clocksource *cs) | |
1a6e0f06 JK |
3382 | { |
3383 | unsigned long offset; | |
e4b2b4a8 | 3384 | + u64 cycles; |
1a6e0f06 JK |
3385 | |
3386 | + preempt_disable(); | |
3387 | if (uv_get_min_hub_revision_id() == 1) | |
3388 | offset = 0; | |
3389 | else | |
3390 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | |
3391 | ||
e4b2b4a8 JK |
3392 | - return (u64)uv_read_local_mmr(UVH_RTC | offset); |
3393 | + cycles = (u64)uv_read_local_mmr(UVH_RTC | offset); | |
1a6e0f06 | 3394 | + preempt_enable(); |
1a6e0f06 JK |
3395 | + return cycles; |
3396 | } | |
3397 | ||
3398 | /* | |
e4b2b4a8 JK |
3399 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/arch/xtensa/include/asm/spinlock_types.h linux-4.14/arch/xtensa/include/asm/spinlock_types.h |
3400 | --- linux-4.14.orig/arch/xtensa/include/asm/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
3401 | +++ linux-4.14/arch/xtensa/include/asm/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
3402 | @@ -2,10 +2,6 @@ | |
3403 | #ifndef __ASM_SPINLOCK_TYPES_H | |
3404 | #define __ASM_SPINLOCK_TYPES_H | |
3405 | ||
3406 | -#ifndef __LINUX_SPINLOCK_TYPES_H | |
3407 | -# error "please don't include this file directly" | |
3408 | -#endif | |
3409 | - | |
3410 | typedef struct { | |
3411 | volatile unsigned int slock; | |
3412 | } arch_spinlock_t; | |
3413 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/block/blk-core.c linux-4.14/block/blk-core.c | |
3414 | --- linux-4.14.orig/block/blk-core.c 2018-09-05 11:03:20.000000000 +0200 | |
3415 | +++ linux-4.14/block/blk-core.c 2018-09-05 11:05:07.000000000 +0200 | |
3416 | @@ -116,6 +116,9 @@ | |
1a6e0f06 JK |
3417 | |
3418 | INIT_LIST_HEAD(&rq->queuelist); | |
3419 | INIT_LIST_HEAD(&rq->timeout_list); | |
3420 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3421 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3422 | +#endif | |
3423 | rq->cpu = -1; | |
3424 | rq->q = q; | |
3425 | rq->__sector = (sector_t) -1; | |
e4b2b4a8 | 3426 | @@ -280,7 +283,7 @@ |
1a6e0f06 JK |
3427 | void blk_start_queue(struct request_queue *q) |
3428 | { | |
e4b2b4a8 JK |
3429 | lockdep_assert_held(q->queue_lock); |
3430 | - WARN_ON(!in_interrupt() && !irqs_disabled()); | |
3431 | + WARN_ON_NONRT(!in_interrupt() && !irqs_disabled()); | |
3432 | WARN_ON_ONCE(q->mq_ops); | |
1a6e0f06 JK |
3433 | |
3434 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | |
e4b2b4a8 JK |
3435 | @@ -808,12 +811,21 @@ |
3436 | percpu_ref_put(&q->q_usage_counter); | |
3437 | } | |
3438 | ||
3439 | +static void blk_queue_usage_counter_release_swork(struct swork_event *sev) | |
3440 | +{ | |
3441 | + struct request_queue *q = | |
3442 | + container_of(sev, struct request_queue, mq_pcpu_wake); | |
3443 | + | |
3444 | + wake_up_all(&q->mq_freeze_wq); | |
3445 | +} | |
3446 | + | |
3447 | static void blk_queue_usage_counter_release(struct percpu_ref *ref) | |
3448 | { | |
1a6e0f06 JK |
3449 | struct request_queue *q = |
3450 | container_of(ref, struct request_queue, q_usage_counter); | |
3451 | ||
3452 | - wake_up_all(&q->mq_freeze_wq); | |
e4b2b4a8 JK |
3453 | + if (wq_has_sleeper(&q->mq_freeze_wq)) |
3454 | + swork_queue(&q->mq_pcpu_wake); | |
1a6e0f06 JK |
3455 | } |
3456 | ||
3457 | static void blk_rq_timed_out_timer(unsigned long data) | |
e4b2b4a8 | 3458 | @@ -890,6 +902,7 @@ |
1a6e0f06 JK |
3459 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
3460 | ||
e4b2b4a8 JK |
3461 | init_waitqueue_head(&q->mq_freeze_wq); |
3462 | + INIT_SWORK(&q->mq_pcpu_wake, blk_queue_usage_counter_release_swork); | |
1a6e0f06 JK |
3463 | |
3464 | /* | |
3465 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | |
e4b2b4a8 | 3466 | @@ -3308,7 +3321,7 @@ |
1a6e0f06 JK |
3467 | blk_run_queue_async(q); |
3468 | else | |
3469 | __blk_run_queue(q); | |
3470 | - spin_unlock(q->queue_lock); | |
3471 | + spin_unlock_irq(q->queue_lock); | |
3472 | } | |
3473 | ||
3474 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) | |
e4b2b4a8 | 3475 | @@ -3356,7 +3369,6 @@ |
1a6e0f06 JK |
3476 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
3477 | { | |
3478 | struct request_queue *q; | |
3479 | - unsigned long flags; | |
3480 | struct request *rq; | |
3481 | LIST_HEAD(list); | |
3482 | unsigned int depth; | |
e4b2b4a8 | 3483 | @@ -3376,11 +3388,6 @@ |
1a6e0f06 JK |
3484 | q = NULL; |
3485 | depth = 0; | |
3486 | ||
3487 | - /* | |
3488 | - * Save and disable interrupts here, to avoid doing it for every | |
3489 | - * queue lock we have to take. | |
3490 | - */ | |
3491 | - local_irq_save(flags); | |
3492 | while (!list_empty(&list)) { | |
3493 | rq = list_entry_rq(list.next); | |
3494 | list_del_init(&rq->queuelist); | |
e4b2b4a8 | 3495 | @@ -3393,7 +3400,7 @@ |
1a6e0f06 JK |
3496 | queue_unplugged(q, depth, from_schedule); |
3497 | q = rq->q; | |
3498 | depth = 0; | |
3499 | - spin_lock(q->queue_lock); | |
3500 | + spin_lock_irq(q->queue_lock); | |
3501 | } | |
3502 | ||
3503 | /* | |
e4b2b4a8 | 3504 | @@ -3420,8 +3427,6 @@ |
1a6e0f06 JK |
3505 | */ |
3506 | if (q) | |
3507 | queue_unplugged(q, depth, from_schedule); | |
3508 | - | |
3509 | - local_irq_restore(flags); | |
3510 | } | |
3511 | ||
3512 | void blk_finish_plug(struct blk_plug *plug) | |
e4b2b4a8 JK |
3513 | @@ -3631,6 +3636,8 @@ |
3514 | if (!kblockd_workqueue) | |
3515 | panic("Failed to create kblockd\n"); | |
3516 | ||
3517 | + BUG_ON(swork_get()); | |
3518 | + | |
3519 | request_cachep = kmem_cache_create("blkdev_requests", | |
3520 | sizeof(struct request), 0, SLAB_PANIC, NULL); | |
3521 | ||
3522 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/block/blk-ioc.c linux-4.14/block/blk-ioc.c | |
3523 | --- linux-4.14.orig/block/blk-ioc.c 2017-11-12 19:46:13.000000000 +0100 | |
3524 | +++ linux-4.14/block/blk-ioc.c 2018-09-05 11:05:07.000000000 +0200 | |
3525 | @@ -9,6 +9,7 @@ | |
1a6e0f06 JK |
3526 | #include <linux/blkdev.h> |
3527 | #include <linux/slab.h> | |
e4b2b4a8 | 3528 | #include <linux/sched/task.h> |
1a6e0f06 JK |
3529 | +#include <linux/delay.h> |
3530 | ||
3531 | #include "blk.h" | |
3532 | ||
e4b2b4a8 | 3533 | @@ -118,7 +119,7 @@ |
1a6e0f06 JK |
3534 | spin_unlock(q->queue_lock); |
3535 | } else { | |
3536 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3537 | - cpu_relax(); | |
3538 | + cpu_chill(); | |
3539 | spin_lock_irqsave_nested(&ioc->lock, flags, 1); | |
3540 | } | |
3541 | } | |
e4b2b4a8 JK |
3542 | @@ -202,7 +203,7 @@ |
3543 | spin_unlock(icq->q->queue_lock); | |
3544 | } else { | |
3545 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3546 | - cpu_relax(); | |
3547 | + cpu_chill(); | |
3548 | goto retry; | |
3549 | } | |
1a6e0f06 | 3550 | } |
e4b2b4a8 JK |
3551 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/block/blk-mq.c linux-4.14/block/blk-mq.c |
3552 | --- linux-4.14.orig/block/blk-mq.c 2018-09-05 11:03:20.000000000 +0200 | |
3553 | +++ linux-4.14/block/blk-mq.c 2018-09-05 11:05:07.000000000 +0200 | |
3554 | @@ -339,6 +339,9 @@ | |
3555 | /* tag was already set */ | |
3556 | rq->extra_len = 0; | |
1a6e0f06 JK |
3557 | |
3558 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3559 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3560 | +#endif | |
3561 | INIT_LIST_HEAD(&rq->timeout_list); | |
3562 | rq->timeout = 0; | |
3563 | ||
e4b2b4a8 | 3564 | @@ -533,12 +536,24 @@ |
1a6e0f06 JK |
3565 | } |
3566 | EXPORT_SYMBOL(blk_mq_end_request); | |
3567 | ||
3568 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3569 | + | |
3570 | +void __blk_mq_complete_request_remote_work(struct work_struct *work) | |
3571 | +{ | |
3572 | + struct request *rq = container_of(work, struct request, work); | |
3573 | + | |
3574 | + rq->q->softirq_done_fn(rq); | |
3575 | +} | |
3576 | + | |
3577 | +#else | |
3578 | + | |
3579 | static void __blk_mq_complete_request_remote(void *data) | |
3580 | { | |
3581 | struct request *rq = data; | |
e4b2b4a8 | 3582 | |
1a6e0f06 JK |
3583 | rq->q->softirq_done_fn(rq); |
3584 | } | |
1a6e0f06 | 3585 | +#endif |
e4b2b4a8 JK |
3586 | |
3587 | static void __blk_mq_complete_request(struct request *rq) | |
1a6e0f06 | 3588 | { |
e4b2b4a8 | 3589 | @@ -558,19 +573,27 @@ |
1a6e0f06 JK |
3590 | return; |
3591 | } | |
3592 | ||
3593 | - cpu = get_cpu(); | |
3594 | + cpu = get_cpu_light(); | |
3595 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) | |
3596 | shared = cpus_share_cache(cpu, ctx->cpu); | |
3597 | ||
3598 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | |
3599 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
e4b2b4a8 JK |
3600 | + /* |
3601 | + * We could force QUEUE_FLAG_SAME_FORCE then we would not get in | |
3602 | + * here. But we could try to invoke it one the CPU like this. | |
3603 | + */ | |
1a6e0f06 JK |
3604 | + schedule_work_on(ctx->cpu, &rq->work); |
3605 | +#else | |
3606 | rq->csd.func = __blk_mq_complete_request_remote; | |
3607 | rq->csd.info = rq; | |
3608 | rq->csd.flags = 0; | |
3609 | smp_call_function_single_async(ctx->cpu, &rq->csd); | |
3610 | +#endif | |
3611 | } else { | |
3612 | rq->q->softirq_done_fn(rq); | |
3613 | } | |
3614 | - put_cpu(); | |
3615 | + put_cpu_light(); | |
3616 | } | |
3617 | ||
e4b2b4a8 JK |
3618 | /** |
3619 | @@ -1238,14 +1261,14 @@ | |
1a6e0f06 JK |
3620 | return; |
3621 | ||
c7c16703 | 3622 | if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { |
1a6e0f06 JK |
3623 | - int cpu = get_cpu(); |
3624 | + int cpu = get_cpu_light(); | |
3625 | if (cpumask_test_cpu(cpu, hctx->cpumask)) { | |
3626 | __blk_mq_run_hw_queue(hctx); | |
3627 | - put_cpu(); | |
3628 | + put_cpu_light(); | |
3629 | return; | |
3630 | } | |
3631 | ||
3632 | - put_cpu(); | |
3633 | + put_cpu_light(); | |
3634 | } | |
3635 | ||
e4b2b4a8 JK |
3636 | kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), |
3637 | @@ -2863,10 +2886,9 @@ | |
3638 | kt = nsecs; | |
3639 | ||
3640 | mode = HRTIMER_MODE_REL; | |
3641 | - hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode); | |
3642 | + hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode, current); | |
3643 | hrtimer_set_expires(&hs.timer, kt); | |
3644 | ||
3645 | - hrtimer_init_sleeper(&hs, current); | |
3646 | do { | |
3647 | if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) | |
3648 | break; | |
3649 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/block/blk-mq.h linux-4.14/block/blk-mq.h | |
3650 | --- linux-4.14.orig/block/blk-mq.h 2018-09-05 11:03:20.000000000 +0200 | |
3651 | +++ linux-4.14/block/blk-mq.h 2018-09-05 11:05:07.000000000 +0200 | |
3652 | @@ -98,12 +98,12 @@ | |
1a6e0f06 JK |
3653 | */ |
3654 | static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) | |
3655 | { | |
3656 | - return __blk_mq_get_ctx(q, get_cpu()); | |
3657 | + return __blk_mq_get_ctx(q, get_cpu_light()); | |
3658 | } | |
3659 | ||
3660 | static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) | |
3661 | { | |
3662 | - put_cpu(); | |
3663 | + put_cpu_light(); | |
3664 | } | |
3665 | ||
3666 | struct blk_mq_alloc_data { | |
e4b2b4a8 JK |
3667 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/block/blk-softirq.c linux-4.14/block/blk-softirq.c |
3668 | --- linux-4.14.orig/block/blk-softirq.c 2017-11-12 19:46:13.000000000 +0100 | |
3669 | +++ linux-4.14/block/blk-softirq.c 2018-09-05 11:05:07.000000000 +0200 | |
3670 | @@ -53,6 +53,7 @@ | |
1a6e0f06 JK |
3671 | raise_softirq_irqoff(BLOCK_SOFTIRQ); |
3672 | ||
3673 | local_irq_restore(flags); | |
3674 | + preempt_check_resched_rt(); | |
3675 | } | |
3676 | ||
3677 | /* | |
e4b2b4a8 | 3678 | @@ -91,6 +92,7 @@ |
c7c16703 JK |
3679 | this_cpu_ptr(&blk_cpu_done)); |
3680 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3681 | local_irq_enable(); | |
3682 | + preempt_check_resched_rt(); | |
1a6e0f06 | 3683 | |
c7c16703 JK |
3684 | return 0; |
3685 | } | |
e4b2b4a8 | 3686 | @@ -143,6 +145,7 @@ |
1a6e0f06 JK |
3687 | goto do_local; |
3688 | ||
3689 | local_irq_restore(flags); | |
3690 | + preempt_check_resched_rt(); | |
3691 | } | |
3692 | ||
3693 | /** | |
e4b2b4a8 JK |
3694 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/block/bounce.c linux-4.14/block/bounce.c |
3695 | --- linux-4.14.orig/block/bounce.c 2018-09-05 11:03:20.000000000 +0200 | |
3696 | +++ linux-4.14/block/bounce.c 2018-09-05 11:05:07.000000000 +0200 | |
3697 | @@ -66,11 +66,11 @@ | |
1a6e0f06 JK |
3698 | unsigned long flags; |
3699 | unsigned char *vto; | |
3700 | ||
3701 | - local_irq_save(flags); | |
3702 | + local_irq_save_nort(flags); | |
3703 | vto = kmap_atomic(to->bv_page); | |
3704 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | |
3705 | kunmap_atomic(vto); | |
3706 | - local_irq_restore(flags); | |
3707 | + local_irq_restore_nort(flags); | |
3708 | } | |
3709 | ||
3710 | #else /* CONFIG_HIGHMEM */ | |
e4b2b4a8 JK |
3711 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/crypto/algapi.c linux-4.14/crypto/algapi.c |
3712 | --- linux-4.14.orig/crypto/algapi.c 2018-09-05 11:03:20.000000000 +0200 | |
3713 | +++ linux-4.14/crypto/algapi.c 2018-09-05 11:05:07.000000000 +0200 | |
3714 | @@ -731,13 +731,13 @@ | |
1a6e0f06 JK |
3715 | |
3716 | int crypto_register_notifier(struct notifier_block *nb) | |
3717 | { | |
3718 | - return blocking_notifier_chain_register(&crypto_chain, nb); | |
3719 | + return srcu_notifier_chain_register(&crypto_chain, nb); | |
3720 | } | |
3721 | EXPORT_SYMBOL_GPL(crypto_register_notifier); | |
3722 | ||
3723 | int crypto_unregister_notifier(struct notifier_block *nb) | |
3724 | { | |
3725 | - return blocking_notifier_chain_unregister(&crypto_chain, nb); | |
3726 | + return srcu_notifier_chain_unregister(&crypto_chain, nb); | |
3727 | } | |
3728 | EXPORT_SYMBOL_GPL(crypto_unregister_notifier); | |
3729 | ||
e4b2b4a8 JK |
3730 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/crypto/api.c linux-4.14/crypto/api.c |
3731 | --- linux-4.14.orig/crypto/api.c 2017-11-12 19:46:13.000000000 +0100 | |
3732 | +++ linux-4.14/crypto/api.c 2018-09-05 11:05:07.000000000 +0200 | |
3733 | @@ -31,7 +31,7 @@ | |
1a6e0f06 JK |
3734 | DECLARE_RWSEM(crypto_alg_sem); |
3735 | EXPORT_SYMBOL_GPL(crypto_alg_sem); | |
3736 | ||
3737 | -BLOCKING_NOTIFIER_HEAD(crypto_chain); | |
3738 | +SRCU_NOTIFIER_HEAD(crypto_chain); | |
3739 | EXPORT_SYMBOL_GPL(crypto_chain); | |
3740 | ||
3741 | static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); | |
e4b2b4a8 | 3742 | @@ -236,10 +236,10 @@ |
1a6e0f06 JK |
3743 | { |
3744 | int ok; | |
3745 | ||
3746 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3747 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3748 | if (ok == NOTIFY_DONE) { | |
3749 | request_module("cryptomgr"); | |
3750 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3751 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3752 | } | |
3753 | ||
3754 | return ok; | |
e4b2b4a8 JK |
3755 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/crypto/internal.h linux-4.14/crypto/internal.h |
3756 | --- linux-4.14.orig/crypto/internal.h 2017-11-12 19:46:13.000000000 +0100 | |
3757 | +++ linux-4.14/crypto/internal.h 2018-09-05 11:05:07.000000000 +0200 | |
3758 | @@ -47,7 +47,7 @@ | |
1a6e0f06 JK |
3759 | |
3760 | extern struct list_head crypto_alg_list; | |
3761 | extern struct rw_semaphore crypto_alg_sem; | |
3762 | -extern struct blocking_notifier_head crypto_chain; | |
3763 | +extern struct srcu_notifier_head crypto_chain; | |
3764 | ||
3765 | #ifdef CONFIG_PROC_FS | |
3766 | void __init crypto_init_proc(void); | |
e4b2b4a8 | 3767 | @@ -143,7 +143,7 @@ |
1a6e0f06 JK |
3768 | |
3769 | static inline void crypto_notify(unsigned long val, void *v) | |
3770 | { | |
3771 | - blocking_notifier_call_chain(&crypto_chain, val, v); | |
3772 | + srcu_notifier_call_chain(&crypto_chain, val, v); | |
3773 | } | |
3774 | ||
3775 | #endif /* _CRYPTO_INTERNAL_H */ | |
e4b2b4a8 JK |
3776 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/Documentation/trace/events.txt linux-4.14/Documentation/trace/events.txt |
3777 | --- linux-4.14.orig/Documentation/trace/events.txt 2017-11-12 19:46:13.000000000 +0100 | |
3778 | +++ linux-4.14/Documentation/trace/events.txt 2018-09-05 11:05:07.000000000 +0200 | |
3779 | @@ -517,1550 +517,4 @@ | |
3780 | totals derived from one or more trace event format fields and/or | |
3781 | event counts (hitcount). | |
3782 | ||
3783 | - The format of a hist trigger is as follows: | |
3784 | - | |
3785 | - hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>] | |
3786 | - [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue] | |
3787 | - [:clear][:name=histname1] [if <filter>] | |
3788 | - | |
3789 | - When a matching event is hit, an entry is added to a hash table | |
3790 | - using the key(s) and value(s) named. Keys and values correspond to | |
3791 | - fields in the event's format description. Values must correspond to | |
3792 | - numeric fields - on an event hit, the value(s) will be added to a | |
3793 | - sum kept for that field. The special string 'hitcount' can be used | |
3794 | - in place of an explicit value field - this is simply a count of | |
3795 | - event hits. If 'values' isn't specified, an implicit 'hitcount' | |
3796 | - value will be automatically created and used as the only value. | |
3797 | - Keys can be any field, or the special string 'stacktrace', which | |
3798 | - will use the event's kernel stacktrace as the key. The keywords | |
3799 | - 'keys' or 'key' can be used to specify keys, and the keywords | |
3800 | - 'values', 'vals', or 'val' can be used to specify values. Compound | |
3801 | - keys consisting of up to two fields can be specified by the 'keys' | |
3802 | - keyword. Hashing a compound key produces a unique entry in the | |
3803 | - table for each unique combination of component keys, and can be | |
3804 | - useful for providing more fine-grained summaries of event data. | |
3805 | - Additionally, sort keys consisting of up to two fields can be | |
3806 | - specified by the 'sort' keyword. If more than one field is | |
3807 | - specified, the result will be a 'sort within a sort': the first key | |
3808 | - is taken to be the primary sort key and the second the secondary | |
3809 | - key. If a hist trigger is given a name using the 'name' parameter, | |
3810 | - its histogram data will be shared with other triggers of the same | |
3811 | - name, and trigger hits will update this common data. Only triggers | |
3812 | - with 'compatible' fields can be combined in this way; triggers are | |
3813 | - 'compatible' if the fields named in the trigger share the same | |
3814 | - number and type of fields and those fields also have the same names. | |
3815 | - Note that any two events always share the compatible 'hitcount' and | |
3816 | - 'stacktrace' fields and can therefore be combined using those | |
3817 | - fields, however pointless that may be. | |
3818 | - | |
3819 | - 'hist' triggers add a 'hist' file to each event's subdirectory. | |
3820 | - Reading the 'hist' file for the event will dump the hash table in | |
3821 | - its entirety to stdout. If there are multiple hist triggers | |
3822 | - attached to an event, there will be a table for each trigger in the | |
3823 | - output. The table displayed for a named trigger will be the same as | |
3824 | - any other instance having the same name. Each printed hash table | |
3825 | - entry is a simple list of the keys and values comprising the entry; | |
3826 | - keys are printed first and are delineated by curly braces, and are | |
3827 | - followed by the set of value fields for the entry. By default, | |
3828 | - numeric fields are displayed as base-10 integers. This can be | |
3829 | - modified by appending any of the following modifiers to the field | |
3830 | - name: | |
3831 | - | |
3832 | - .hex display a number as a hex value | |
3833 | - .sym display an address as a symbol | |
3834 | - .sym-offset display an address as a symbol and offset | |
3835 | - .syscall display a syscall id as a system call name | |
3836 | - .execname display a common_pid as a program name | |
3837 | - | |
3838 | - Note that in general the semantics of a given field aren't | |
3839 | - interpreted when applying a modifier to it, but there are some | |
3840 | - restrictions to be aware of in this regard: | |
3841 | - | |
3842 | - - only the 'hex' modifier can be used for values (because values | |
3843 | - are essentially sums, and the other modifiers don't make sense | |
3844 | - in that context). | |
3845 | - - the 'execname' modifier can only be used on a 'common_pid'. The | |
3846 | - reason for this is that the execname is simply the 'comm' value | |
3847 | - saved for the 'current' process when an event was triggered, | |
3848 | - which is the same as the common_pid value saved by the event | |
3849 | - tracing code. Trying to apply that comm value to other pid | |
3850 | - values wouldn't be correct, and typically events that care save | |
3851 | - pid-specific comm fields in the event itself. | |
3852 | - | |
3853 | - A typical usage scenario would be the following to enable a hist | |
3854 | - trigger, read its current contents, and then turn it off: | |
3855 | - | |
3856 | - # echo 'hist:keys=skbaddr.hex:vals=len' > \ | |
3857 | - /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
3858 | - | |
3859 | - # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | |
3860 | - | |
3861 | - # echo '!hist:keys=skbaddr.hex:vals=len' > \ | |
3862 | - /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
3863 | - | |
3864 | - The trigger file itself can be read to show the details of the | |
3865 | - currently attached hist trigger. This information is also displayed | |
3866 | - at the top of the 'hist' file when read. | |
3867 | - | |
3868 | - By default, the size of the hash table is 2048 entries. The 'size' | |
3869 | - parameter can be used to specify more or fewer than that. The units | |
3870 | - are in terms of hashtable entries - if a run uses more entries than | |
3871 | - specified, the results will show the number of 'drops', the number | |
3872 | - of hits that were ignored. The size should be a power of 2 between | |
3873 | - 128 and 131072 (any non- power-of-2 number specified will be rounded | |
3874 | - up). | |
3875 | - | |
3876 | - The 'sort' parameter can be used to specify a value field to sort | |
3877 | - on. The default if unspecified is 'hitcount' and the default sort | |
3878 | - order is 'ascending'. To sort in the opposite direction, append | |
3879 | - .descending' to the sort key. | |
3880 | - | |
3881 | - The 'pause' parameter can be used to pause an existing hist trigger | |
3882 | - or to start a hist trigger but not log any events until told to do | |
3883 | - so. 'continue' or 'cont' can be used to start or restart a paused | |
3884 | - hist trigger. | |
3885 | - | |
3886 | - The 'clear' parameter will clear the contents of a running hist | |
3887 | - trigger and leave its current paused/active state. | |
3888 | - | |
3889 | - Note that the 'pause', 'cont', and 'clear' parameters should be | |
3890 | - applied using 'append' shell operator ('>>') if applied to an | |
3891 | - existing trigger, rather than via the '>' operator, which will cause | |
3892 | - the trigger to be removed through truncation. | |
3893 | - | |
3894 | -- enable_hist/disable_hist | |
3895 | - | |
3896 | - The enable_hist and disable_hist triggers can be used to have one | |
3897 | - event conditionally start and stop another event's already-attached | |
3898 | - hist trigger. Any number of enable_hist and disable_hist triggers | |
3899 | - can be attached to a given event, allowing that event to kick off | |
3900 | - and stop aggregations on a host of other events. | |
3901 | - | |
3902 | - The format is very similar to the enable/disable_event triggers: | |
3903 | - | |
3904 | - enable_hist:<system>:<event>[:count] | |
3905 | - disable_hist:<system>:<event>[:count] | |
3906 | - | |
3907 | - Instead of enabling or disabling the tracing of the target event | |
3908 | - into the trace buffer as the enable/disable_event triggers do, the | |
3909 | - enable/disable_hist triggers enable or disable the aggregation of | |
3910 | - the target event into a hash table. | |
3911 | - | |
3912 | - A typical usage scenario for the enable_hist/disable_hist triggers | |
3913 | - would be to first set up a paused hist trigger on some event, | |
3914 | - followed by an enable_hist/disable_hist pair that turns the hist | |
3915 | - aggregation on and off when conditions of interest are hit: | |
3916 | - | |
3917 | - # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \ | |
3918 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
3919 | - | |
3920 | - # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | |
3921 | - /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
3922 | - | |
3923 | - # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | |
3924 | - /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
3925 | - | |
3926 | - The above sets up an initially paused hist trigger which is unpaused | |
3927 | - and starts aggregating events when a given program is executed, and | |
3928 | - which stops aggregating when the process exits and the hist trigger | |
3929 | - is paused again. | |
3930 | - | |
3931 | - The examples below provide a more concrete illustration of the | |
3932 | - concepts and typical usage patterns discussed above. | |
3933 | - | |
3934 | - | |
3935 | -6.2 'hist' trigger examples | |
3936 | ---------------------------- | |
3937 | - | |
3938 | - The first set of examples creates aggregations using the kmalloc | |
3939 | - event. The fields that can be used for the hist trigger are listed | |
3940 | - in the kmalloc event's format file: | |
3941 | - | |
3942 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format | |
3943 | - name: kmalloc | |
3944 | - ID: 374 | |
3945 | - format: | |
3946 | - field:unsigned short common_type; offset:0; size:2; signed:0; | |
3947 | - field:unsigned char common_flags; offset:2; size:1; signed:0; | |
3948 | - field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | |
3949 | - field:int common_pid; offset:4; size:4; signed:1; | |
3950 | - | |
3951 | - field:unsigned long call_site; offset:8; size:8; signed:0; | |
3952 | - field:const void * ptr; offset:16; size:8; signed:0; | |
3953 | - field:size_t bytes_req; offset:24; size:8; signed:0; | |
3954 | - field:size_t bytes_alloc; offset:32; size:8; signed:0; | |
3955 | - field:gfp_t gfp_flags; offset:40; size:4; signed:0; | |
3956 | - | |
3957 | - We'll start by creating a hist trigger that generates a simple table | |
3958 | - that lists the total number of bytes requested for each function in | |
3959 | - the kernel that made one or more calls to kmalloc: | |
3960 | - | |
3961 | - # echo 'hist:key=call_site:val=bytes_req' > \ | |
3962 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
3963 | - | |
3964 | - This tells the tracing system to create a 'hist' trigger using the | |
3965 | - call_site field of the kmalloc event as the key for the table, which | |
3966 | - just means that each unique call_site address will have an entry | |
3967 | - created for it in the table. The 'val=bytes_req' parameter tells | |
3968 | - the hist trigger that for each unique entry (call_site) in the | |
3969 | - table, it should keep a running total of the number of bytes | |
3970 | - requested by that call_site. | |
3971 | - | |
3972 | - We'll let it run for awhile and then dump the contents of the 'hist' | |
3973 | - file in the kmalloc event's subdirectory (for readability, a number | |
3974 | - of entries have been omitted): | |
3975 | - | |
3976 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
3977 | - # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | |
3978 | - | |
3979 | - { call_site: 18446744072106379007 } hitcount: 1 bytes_req: 176 | |
3980 | - { call_site: 18446744071579557049 } hitcount: 1 bytes_req: 1024 | |
3981 | - { call_site: 18446744071580608289 } hitcount: 1 bytes_req: 16384 | |
3982 | - { call_site: 18446744071581827654 } hitcount: 1 bytes_req: 24 | |
3983 | - { call_site: 18446744071580700980 } hitcount: 1 bytes_req: 8 | |
3984 | - { call_site: 18446744071579359876 } hitcount: 1 bytes_req: 152 | |
3985 | - { call_site: 18446744071580795365 } hitcount: 3 bytes_req: 144 | |
3986 | - { call_site: 18446744071581303129 } hitcount: 3 bytes_req: 144 | |
3987 | - { call_site: 18446744071580713234 } hitcount: 4 bytes_req: 2560 | |
3988 | - { call_site: 18446744071580933750 } hitcount: 4 bytes_req: 736 | |
3989 | - . | |
3990 | - . | |
3991 | - . | |
3992 | - { call_site: 18446744072106047046 } hitcount: 69 bytes_req: 5576 | |
3993 | - { call_site: 18446744071582116407 } hitcount: 73 bytes_req: 2336 | |
3994 | - { call_site: 18446744072106054684 } hitcount: 136 bytes_req: 140504 | |
3995 | - { call_site: 18446744072106224230 } hitcount: 136 bytes_req: 19584 | |
3996 | - { call_site: 18446744072106078074 } hitcount: 153 bytes_req: 2448 | |
3997 | - { call_site: 18446744072106062406 } hitcount: 153 bytes_req: 36720 | |
3998 | - { call_site: 18446744071582507929 } hitcount: 153 bytes_req: 37088 | |
3999 | - { call_site: 18446744072102520590 } hitcount: 273 bytes_req: 10920 | |
4000 | - { call_site: 18446744071582143559 } hitcount: 358 bytes_req: 716 | |
4001 | - { call_site: 18446744072106465852 } hitcount: 417 bytes_req: 56712 | |
4002 | - { call_site: 18446744072102523378 } hitcount: 485 bytes_req: 27160 | |
4003 | - { call_site: 18446744072099568646 } hitcount: 1676 bytes_req: 33520 | |
4004 | - | |
4005 | - Totals: | |
4006 | - Hits: 4610 | |
4007 | - Entries: 45 | |
4008 | - Dropped: 0 | |
4009 | - | |
4010 | - The output displays a line for each entry, beginning with the key | |
4011 | - specified in the trigger, followed by the value(s) also specified in | |
4012 | - the trigger. At the beginning of the output is a line that displays | |
4013 | - the trigger info, which can also be displayed by reading the | |
4014 | - 'trigger' file: | |
4015 | - | |
4016 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4017 | - hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | |
4018 | - | |
4019 | - At the end of the output are a few lines that display the overall | |
4020 | - totals for the run. The 'Hits' field shows the total number of | |
4021 | - times the event trigger was hit, the 'Entries' field shows the total | |
4022 | - number of used entries in the hash table, and the 'Dropped' field | |
4023 | - shows the number of hits that were dropped because the number of | |
4024 | - used entries for the run exceeded the maximum number of entries | |
4025 | - allowed for the table (normally 0, but if not a hint that you may | |
4026 | - want to increase the size of the table using the 'size' parameter). | |
4027 | - | |
4028 | - Notice in the above output that there's an extra field, 'hitcount', | |
4029 | - which wasn't specified in the trigger. Also notice that in the | |
4030 | - trigger info output, there's a parameter, 'sort=hitcount', which | |
4031 | - wasn't specified in the trigger either. The reason for that is that | |
4032 | - every trigger implicitly keeps a count of the total number of hits | |
4033 | - attributed to a given entry, called the 'hitcount'. That hitcount | |
4034 | - information is explicitly displayed in the output, and in the | |
4035 | - absence of a user-specified sort parameter, is used as the default | |
4036 | - sort field. | |
4037 | - | |
4038 | - The value 'hitcount' can be used in place of an explicit value in | |
4039 | - the 'values' parameter if you don't really need to have any | |
4040 | - particular field summed and are mainly interested in hit | |
4041 | - frequencies. | |
4042 | - | |
4043 | - To turn the hist trigger off, simply call up the trigger in the | |
4044 | - command history and re-execute it with a '!' prepended: | |
4045 | - | |
4046 | - # echo '!hist:key=call_site:val=bytes_req' > \ | |
4047 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4048 | - | |
4049 | - Finally, notice that the call_site as displayed in the output above | |
4050 | - isn't really very useful. It's an address, but normally addresses | |
4051 | - are displayed in hex. To have a numeric field displayed as a hex | |
4052 | - value, simply append '.hex' to the field name in the trigger: | |
4053 | - | |
4054 | - # echo 'hist:key=call_site.hex:val=bytes_req' > \ | |
4055 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4056 | - | |
4057 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
4058 | - # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active] | |
4059 | - | |
4060 | - { call_site: ffffffffa026b291 } hitcount: 1 bytes_req: 433 | |
4061 | - { call_site: ffffffffa07186ff } hitcount: 1 bytes_req: 176 | |
4062 | - { call_site: ffffffff811ae721 } hitcount: 1 bytes_req: 16384 | |
4063 | - { call_site: ffffffff811c5134 } hitcount: 1 bytes_req: 8 | |
4064 | - { call_site: ffffffffa04a9ebb } hitcount: 1 bytes_req: 511 | |
4065 | - { call_site: ffffffff8122e0a6 } hitcount: 1 bytes_req: 12 | |
4066 | - { call_site: ffffffff8107da84 } hitcount: 1 bytes_req: 152 | |
4067 | - { call_site: ffffffff812d8246 } hitcount: 1 bytes_req: 24 | |
4068 | - { call_site: ffffffff811dc1e5 } hitcount: 3 bytes_req: 144 | |
4069 | - { call_site: ffffffffa02515e8 } hitcount: 3 bytes_req: 648 | |
4070 | - { call_site: ffffffff81258159 } hitcount: 3 bytes_req: 144 | |
4071 | - { call_site: ffffffff811c80f4 } hitcount: 4 bytes_req: 544 | |
4072 | - . | |
4073 | - . | |
4074 | - . | |
4075 | - { call_site: ffffffffa06c7646 } hitcount: 106 bytes_req: 8024 | |
4076 | - { call_site: ffffffffa06cb246 } hitcount: 132 bytes_req: 31680 | |
4077 | - { call_site: ffffffffa06cef7a } hitcount: 132 bytes_req: 2112 | |
4078 | - { call_site: ffffffff8137e399 } hitcount: 132 bytes_req: 23232 | |
4079 | - { call_site: ffffffffa06c941c } hitcount: 185 bytes_req: 171360 | |
4080 | - { call_site: ffffffffa06f2a66 } hitcount: 185 bytes_req: 26640 | |
4081 | - { call_site: ffffffffa036a70e } hitcount: 265 bytes_req: 10600 | |
4082 | - { call_site: ffffffff81325447 } hitcount: 292 bytes_req: 584 | |
4083 | - { call_site: ffffffffa072da3c } hitcount: 446 bytes_req: 60656 | |
4084 | - { call_site: ffffffffa036b1f2 } hitcount: 526 bytes_req: 29456 | |
4085 | - { call_site: ffffffffa0099c06 } hitcount: 1780 bytes_req: 35600 | |
4086 | - | |
4087 | - Totals: | |
4088 | - Hits: 4775 | |
4089 | - Entries: 46 | |
4090 | - Dropped: 0 | |
4091 | - | |
4092 | - Even that's only marginally more useful - while hex values do look | |
4093 | - more like addresses, what users are typically more interested in | |
4094 | - when looking at text addresses are the corresponding symbols | |
4095 | - instead. To have an address displayed as symbolic value instead, | |
4096 | - simply append '.sym' or '.sym-offset' to the field name in the | |
4097 | - trigger: | |
4098 | - | |
4099 | - # echo 'hist:key=call_site.sym:val=bytes_req' > \ | |
4100 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4101 | - | |
4102 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
4103 | - # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active] | |
4104 | - | |
4105 | - { call_site: [ffffffff810adcb9] syslog_print_all } hitcount: 1 bytes_req: 1024 | |
4106 | - { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | |
4107 | - { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | |
4108 | - { call_site: [ffffffff8154acbe] usb_alloc_urb } hitcount: 1 bytes_req: 192 | |
4109 | - { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | |
4110 | - { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | |
4111 | - { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | |
4112 | - { call_site: [ffffffff811febd5] fsnotify_alloc_group } hitcount: 2 bytes_req: 528 | |
4113 | - { call_site: [ffffffff81440f58] __tty_buffer_request_room } hitcount: 2 bytes_req: 2624 | |
4114 | - { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 2 bytes_req: 96 | |
4115 | - { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211] } hitcount: 2 bytes_req: 464 | |
4116 | - { call_site: [ffffffff81672406] tcp_get_metrics } hitcount: 2 bytes_req: 304 | |
4117 | - { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | |
4118 | - { call_site: [ffffffff81089b05] sched_create_group } hitcount: 2 bytes_req: 1424 | |
4119 | - . | |
4120 | - . | |
4121 | - . | |
4122 | - { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1185 bytes_req: 123240 | |
4123 | - { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 1185 bytes_req: 104280 | |
4124 | - { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 1402 bytes_req: 190672 | |
4125 | - { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 1518 bytes_req: 146208 | |
4126 | - { call_site: [ffffffffa029070e] drm_vma_node_allow [drm] } hitcount: 1746 bytes_req: 69840 | |
4127 | - { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 2021 bytes_req: 792312 | |
4128 | - { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 2592 bytes_req: 145152 | |
4129 | - { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2629 bytes_req: 378576 | |
4130 | - { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2629 bytes_req: 3783248 | |
4131 | - { call_site: [ffffffff81325607] apparmor_file_alloc_security } hitcount: 5192 bytes_req: 10384 | |
4132 | - { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 5529 bytes_req: 110584 | |
4133 | - { call_site: [ffffffff8131ebf7] aa_alloc_task_context } hitcount: 21943 bytes_req: 702176 | |
4134 | - { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 55759 bytes_req: 5074265 | |
4135 | - | |
4136 | - Totals: | |
4137 | - Hits: 109928 | |
4138 | - Entries: 71 | |
4139 | - Dropped: 0 | |
4140 | - | |
4141 | - Because the default sort key above is 'hitcount', the above shows a | |
4142 | - the list of call_sites by increasing hitcount, so that at the bottom | |
4143 | - we see the functions that made the most kmalloc calls during the | |
4144 | - run. If instead we we wanted to see the top kmalloc callers in | |
4145 | - terms of the number of bytes requested rather than the number of | |
4146 | - calls, and we wanted the top caller to appear at the top, we can use | |
4147 | - the 'sort' parameter, along with the 'descending' modifier: | |
4148 | - | |
4149 | - # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \ | |
4150 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4151 | - | |
4152 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
4153 | - # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | |
4154 | - | |
4155 | - { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2186 bytes_req: 3397464 | |
4156 | - { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1790 bytes_req: 712176 | |
4157 | - { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 8132 bytes_req: 513135 | |
4158 | - { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 106 bytes_req: 440128 | |
4159 | - { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2186 bytes_req: 314784 | |
4160 | - { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 2174 bytes_req: 208992 | |
4161 | - { call_site: [ffffffff811ae8e1] __kmalloc } hitcount: 8 bytes_req: 131072 | |
4162 | - { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 859 bytes_req: 116824 | |
4163 | - { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 1834 bytes_req: 102704 | |
4164 | - { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 972 bytes_req: 101088 | |
4165 | - { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 972 bytes_req: 85536 | |
4166 | - { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 3333 bytes_req: 66664 | |
4167 | - { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 209 bytes_req: 61632 | |
4168 | - . | |
4169 | - . | |
4170 | - . | |
4171 | - { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | |
4172 | - { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | |
4173 | - { call_site: [ffffffff812d8406] copy_semundo } hitcount: 2 bytes_req: 48 | |
4174 | - { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 1 bytes_req: 48 | |
4175 | - { call_site: [ffffffffa027121a] drm_getmagic [drm] } hitcount: 1 bytes_req: 48 | |
4176 | - { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | |
4177 | - { call_site: [ffffffff811c52f4] bprm_change_interp } hitcount: 2 bytes_req: 16 | |
4178 | - { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | |
4179 | - { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | |
4180 | - { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | |
4181 | - | |
4182 | - Totals: | |
4183 | - Hits: 32133 | |
4184 | - Entries: 81 | |
4185 | - Dropped: 0 | |
4186 | - | |
4187 | - To display the offset and size information in addition to the symbol | |
4188 | - name, just use 'sym-offset' instead: | |
4189 | - | |
4190 | - # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \ | |
4191 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4192 | - | |
4193 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
4194 | - # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | |
4195 | - | |
4196 | - { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915] } hitcount: 4569 bytes_req: 3163720 | |
4197 | - { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915] } hitcount: 4569 bytes_req: 657936 | |
4198 | - { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915] } hitcount: 1519 bytes_req: 472936 | |
4199 | - { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915] } hitcount: 3050 bytes_req: 211832 | |
4200 | - { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50 } hitcount: 34 bytes_req: 148384 | |
4201 | - { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915] } hitcount: 1385 bytes_req: 144040 | |
4202 | - { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0 } hitcount: 8 bytes_req: 131072 | |
4203 | - { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm] } hitcount: 1385 bytes_req: 121880 | |
4204 | - { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm] } hitcount: 1848 bytes_req: 103488 | |
4205 | - { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915] } hitcount: 461 bytes_req: 62696 | |
4206 | - { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm] } hitcount: 1541 bytes_req: 61640 | |
4207 | - { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0 } hitcount: 57 bytes_req: 57456 | |
4208 | - . | |
4209 | - . | |
4210 | - . | |
4211 | - { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0 } hitcount: 2 bytes_req: 128 | |
4212 | - { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm] } hitcount: 3 bytes_req: 96 | |
4213 | - { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0 } hitcount: 8 bytes_req: 96 | |
4214 | - { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650 } hitcount: 3 bytes_req: 84 | |
4215 | - { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110 } hitcount: 1 bytes_req: 8 | |
4216 | - { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid] } hitcount: 1 bytes_req: 7 | |
4217 | - { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid] } hitcount: 1 bytes_req: 7 | |
4218 | - | |
4219 | - Totals: | |
4220 | - Hits: 26098 | |
4221 | - Entries: 64 | |
4222 | - Dropped: 0 | |
4223 | - | |
4224 | - We can also add multiple fields to the 'values' parameter. For | |
4225 | - example, we might want to see the total number of bytes allocated | |
4226 | - alongside bytes requested, and display the result sorted by bytes | |
4227 | - allocated in a descending order: | |
4228 | - | |
4229 | - # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \ | |
4230 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4231 | - | |
4232 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
4233 | - # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active] | |
4234 | - | |
4235 | - { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 7403 bytes_req: 4084360 bytes_alloc: 5958016 | |
4236 | - { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 541 bytes_req: 2213968 bytes_alloc: 2228224 | |
4237 | - { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 7404 bytes_req: 1066176 bytes_alloc: 1421568 | |
4238 | - { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1565 bytes_req: 557368 bytes_alloc: 1037760 | |
4239 | - { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 9557 bytes_req: 595778 bytes_alloc: 695744 | |
4240 | - { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 5839 bytes_req: 430680 bytes_alloc: 470400 | |
4241 | - { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 2388 bytes_req: 324768 bytes_alloc: 458496 | |
4242 | - { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 3911 bytes_req: 219016 bytes_alloc: 250304 | |
4243 | - { call_site: [ffffffff815f8d7b] sk_prot_alloc } hitcount: 235 bytes_req: 236880 bytes_alloc: 240640 | |
4244 | - { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 557 bytes_req: 169024 bytes_alloc: 221760 | |
4245 | - { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 9378 bytes_req: 187548 bytes_alloc: 206312 | |
4246 | - { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1519 bytes_req: 157976 bytes_alloc: 194432 | |
4247 | - . | |
4248 | - . | |
4249 | - . | |
4250 | - { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach } hitcount: 2 bytes_req: 144 bytes_alloc: 192 | |
4251 | - { call_site: [ffffffff81097ee8] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
4252 | - { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
4253 | - { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
4254 | - { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
4255 | - { call_site: [ffffffff81213e80] load_elf_binary } hitcount: 3 bytes_req: 84 bytes_alloc: 96 | |
4256 | - { call_site: [ffffffff81079a2e] kthread_create_on_node } hitcount: 1 bytes_req: 56 bytes_alloc: 64 | |
4257 | - { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | |
4258 | - { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 bytes_alloc: 8 | |
4259 | - { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | |
4260 | - | |
4261 | - Totals: | |
4262 | - Hits: 66598 | |
4263 | - Entries: 65 | |
4264 | - Dropped: 0 | |
4265 | - | |
4266 | - Finally, to finish off our kmalloc example, instead of simply having | |
4267 | - the hist trigger display symbolic call_sites, we can have the hist | |
4268 | - trigger additionally display the complete set of kernel stack traces | |
4269 | - that led to each call_site. To do that, we simply use the special | |
4270 | - value 'stacktrace' for the key parameter: | |
4271 | - | |
4272 | - # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \ | |
4273 | - /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
4274 | - | |
4275 | - The above trigger will use the kernel stack trace in effect when an | |
4276 | - event is triggered as the key for the hash table. This allows the | |
4277 | - enumeration of every kernel callpath that led up to a particular | |
4278 | - event, along with a running total of any of the event fields for | |
4279 | - that event. Here we tally bytes requested and bytes allocated for | |
4280 | - every callpath in the system that led up to a kmalloc (in this case | |
4281 | - every callpath to a kmalloc for a kernel compile): | |
4282 | - | |
4283 | - # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
4284 | - # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active] | |
4285 | - | |
4286 | - { stacktrace: | |
4287 | - __kmalloc_track_caller+0x10b/0x1a0 | |
4288 | - kmemdup+0x20/0x50 | |
4289 | - hidraw_report_event+0x8a/0x120 [hid] | |
4290 | - hid_report_raw_event+0x3ea/0x440 [hid] | |
4291 | - hid_input_report+0x112/0x190 [hid] | |
4292 | - hid_irq_in+0xc2/0x260 [usbhid] | |
4293 | - __usb_hcd_giveback_urb+0x72/0x120 | |
4294 | - usb_giveback_urb_bh+0x9e/0xe0 | |
4295 | - tasklet_hi_action+0xf8/0x100 | |
4296 | - __do_softirq+0x114/0x2c0 | |
4297 | - irq_exit+0xa5/0xb0 | |
4298 | - do_IRQ+0x5a/0xf0 | |
4299 | - ret_from_intr+0x0/0x30 | |
4300 | - cpuidle_enter+0x17/0x20 | |
4301 | - cpu_startup_entry+0x315/0x3e0 | |
4302 | - rest_init+0x7c/0x80 | |
4303 | - } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | |
4304 | - { stacktrace: | |
4305 | - __kmalloc_track_caller+0x10b/0x1a0 | |
4306 | - kmemdup+0x20/0x50 | |
4307 | - hidraw_report_event+0x8a/0x120 [hid] | |
4308 | - hid_report_raw_event+0x3ea/0x440 [hid] | |
4309 | - hid_input_report+0x112/0x190 [hid] | |
4310 | - hid_irq_in+0xc2/0x260 [usbhid] | |
4311 | - __usb_hcd_giveback_urb+0x72/0x120 | |
4312 | - usb_giveback_urb_bh+0x9e/0xe0 | |
4313 | - tasklet_hi_action+0xf8/0x100 | |
4314 | - __do_softirq+0x114/0x2c0 | |
4315 | - irq_exit+0xa5/0xb0 | |
4316 | - do_IRQ+0x5a/0xf0 | |
4317 | - ret_from_intr+0x0/0x30 | |
4318 | - } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | |
4319 | - { stacktrace: | |
4320 | - kmem_cache_alloc_trace+0xeb/0x150 | |
4321 | - aa_alloc_task_context+0x27/0x40 | |
4322 | - apparmor_cred_prepare+0x1f/0x50 | |
4323 | - security_prepare_creds+0x16/0x20 | |
4324 | - prepare_creds+0xdf/0x1a0 | |
4325 | - SyS_capset+0xb5/0x200 | |
4326 | - system_call_fastpath+0x12/0x6a | |
4327 | - } hitcount: 1 bytes_req: 32 bytes_alloc: 32 | |
4328 | - . | |
4329 | - . | |
4330 | - . | |
4331 | - { stacktrace: | |
4332 | - __kmalloc+0x11b/0x1b0 | |
4333 | - i915_gem_execbuffer2+0x6c/0x2c0 [i915] | |
4334 | - drm_ioctl+0x349/0x670 [drm] | |
4335 | - do_vfs_ioctl+0x2f0/0x4f0 | |
4336 | - SyS_ioctl+0x81/0xa0 | |
4337 | - system_call_fastpath+0x12/0x6a | |
4338 | - } hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808 | |
4339 | - { stacktrace: | |
4340 | - __kmalloc+0x11b/0x1b0 | |
4341 | - load_elf_phdrs+0x76/0xa0 | |
4342 | - load_elf_binary+0x102/0x1650 | |
4343 | - search_binary_handler+0x97/0x1d0 | |
4344 | - do_execveat_common.isra.34+0x551/0x6e0 | |
4345 | - SyS_execve+0x3a/0x50 | |
4346 | - return_from_execve+0x0/0x23 | |
4347 | - } hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048 | |
4348 | - { stacktrace: | |
4349 | - kmem_cache_alloc_trace+0xeb/0x150 | |
4350 | - apparmor_file_alloc_security+0x27/0x40 | |
4351 | - security_file_alloc+0x16/0x20 | |
4352 | - get_empty_filp+0x93/0x1c0 | |
4353 | - path_openat+0x31/0x5f0 | |
4354 | - do_filp_open+0x3a/0x90 | |
4355 | - do_sys_open+0x128/0x220 | |
4356 | - SyS_open+0x1e/0x20 | |
4357 | - system_call_fastpath+0x12/0x6a | |
4358 | - } hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376 | |
4359 | - { stacktrace: | |
4360 | - __kmalloc+0x11b/0x1b0 | |
4361 | - seq_buf_alloc+0x1b/0x50 | |
4362 | - seq_read+0x2cc/0x370 | |
4363 | - proc_reg_read+0x3d/0x80 | |
4364 | - __vfs_read+0x28/0xe0 | |
4365 | - vfs_read+0x86/0x140 | |
4366 | - SyS_read+0x46/0xb0 | |
4367 | - system_call_fastpath+0x12/0x6a | |
4368 | - } hitcount: 19133 bytes_req: 78368768 bytes_alloc: 78368768 | |
4369 | - | |
4370 | - Totals: | |
4371 | - Hits: 6085872 | |
4372 | - Entries: 253 | |
4373 | - Dropped: 0 | |
4374 | - | |
4375 | - If you key a hist trigger on common_pid, in order for example to | |
4376 | - gather and display sorted totals for each process, you can use the | |
4377 | - special .execname modifier to display the executable names for the | |
4378 | - processes in the table rather than raw pids. The example below | |
4379 | - keeps a per-process sum of total bytes read: | |
4380 | - | |
4381 | - # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \ | |
4382 | - /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger | |
4383 | - | |
4384 | - # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist | |
4385 | - # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active] | |
4386 | - | |
4387 | - { common_pid: gnome-terminal [ 3196] } hitcount: 280 count: 1093512 | |
4388 | - { common_pid: Xorg [ 1309] } hitcount: 525 count: 256640 | |
4389 | - { common_pid: compiz [ 2889] } hitcount: 59 count: 254400 | |
4390 | - { common_pid: bash [ 8710] } hitcount: 3 count: 66369 | |
4391 | - { common_pid: dbus-daemon-lau [ 8703] } hitcount: 49 count: 47739 | |
4392 | - { common_pid: irqbalance [ 1252] } hitcount: 27 count: 27648 | |
4393 | - { common_pid: 01ifupdown [ 8705] } hitcount: 3 count: 17216 | |
4394 | - { common_pid: dbus-daemon [ 772] } hitcount: 10 count: 12396 | |
4395 | - { common_pid: Socket Thread [ 8342] } hitcount: 11 count: 11264 | |
4396 | - { common_pid: nm-dhcp-client. [ 8701] } hitcount: 6 count: 7424 | |
4397 | - { common_pid: gmain [ 1315] } hitcount: 18 count: 6336 | |
4398 | - . | |
4399 | - . | |
4400 | - . | |
4401 | - { common_pid: postgres [ 1892] } hitcount: 2 count: 32 | |
4402 | - { common_pid: postgres [ 1891] } hitcount: 2 count: 32 | |
4403 | - { common_pid: gmain [ 8704] } hitcount: 2 count: 32 | |
4404 | - { common_pid: upstart-dbus-br [ 2740] } hitcount: 21 count: 21 | |
4405 | - { common_pid: nm-dispatcher.a [ 8696] } hitcount: 1 count: 16 | |
4406 | - { common_pid: indicator-datet [ 2904] } hitcount: 1 count: 16 | |
4407 | - { common_pid: gdbus [ 2998] } hitcount: 1 count: 16 | |
4408 | - { common_pid: rtkit-daemon [ 2052] } hitcount: 1 count: 8 | |
4409 | - { common_pid: init [ 1] } hitcount: 2 count: 2 | |
4410 | - | |
4411 | - Totals: | |
4412 | - Hits: 2116 | |
4413 | - Entries: 51 | |
4414 | - Dropped: 0 | |
4415 | - | |
4416 | - Similarly, if you key a hist trigger on syscall id, for example to | |
4417 | - gather and display a list of systemwide syscall hits, you can use | |
4418 | - the special .syscall modifier to display the syscall names rather | |
4419 | - than raw ids. The example below keeps a running total of syscall | |
4420 | - counts for the system during the run: | |
4421 | - | |
4422 | - # echo 'hist:key=id.syscall:val=hitcount' > \ | |
4423 | - /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | |
4424 | - | |
4425 | - # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | |
4426 | - # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active] | |
4427 | - | |
4428 | - { id: sys_fsync [ 74] } hitcount: 1 | |
4429 | - { id: sys_newuname [ 63] } hitcount: 1 | |
4430 | - { id: sys_prctl [157] } hitcount: 1 | |
4431 | - { id: sys_statfs [137] } hitcount: 1 | |
4432 | - { id: sys_symlink [ 88] } hitcount: 1 | |
4433 | - { id: sys_sendmmsg [307] } hitcount: 1 | |
4434 | - { id: sys_semctl [ 66] } hitcount: 1 | |
4435 | - { id: sys_readlink [ 89] } hitcount: 3 | |
4436 | - { id: sys_bind [ 49] } hitcount: 3 | |
4437 | - { id: sys_getsockname [ 51] } hitcount: 3 | |
4438 | - { id: sys_unlink [ 87] } hitcount: 3 | |
4439 | - { id: sys_rename [ 82] } hitcount: 4 | |
4440 | - { id: unknown_syscall [ 58] } hitcount: 4 | |
4441 | - { id: sys_connect [ 42] } hitcount: 4 | |
4442 | - { id: sys_getpid [ 39] } hitcount: 4 | |
4443 | - . | |
4444 | - . | |
4445 | - . | |
4446 | - { id: sys_rt_sigprocmask [ 14] } hitcount: 952 | |
4447 | - { id: sys_futex [202] } hitcount: 1534 | |
4448 | - { id: sys_write [ 1] } hitcount: 2689 | |
4449 | - { id: sys_setitimer [ 38] } hitcount: 2797 | |
4450 | - { id: sys_read [ 0] } hitcount: 3202 | |
4451 | - { id: sys_select [ 23] } hitcount: 3773 | |
4452 | - { id: sys_writev [ 20] } hitcount: 4531 | |
4453 | - { id: sys_poll [ 7] } hitcount: 8314 | |
4454 | - { id: sys_recvmsg [ 47] } hitcount: 13738 | |
4455 | - { id: sys_ioctl [ 16] } hitcount: 21843 | |
4456 | - | |
4457 | - Totals: | |
4458 | - Hits: 67612 | |
4459 | - Entries: 72 | |
4460 | - Dropped: 0 | |
4461 | - | |
4462 | - The syscall counts above provide a rough overall picture of system | |
4463 | - call activity on the system; we can see for example that the most | |
4464 | - popular system call on this system was the 'sys_ioctl' system call. | |
4465 | - | |
4466 | - We can use 'compound' keys to refine that number and provide some | |
4467 | - further insight as to which processes exactly contribute to the | |
4468 | - overall ioctl count. | |
4469 | - | |
4470 | - The command below keeps a hitcount for every unique combination of | |
4471 | - system call id and pid - the end result is essentially a table | |
4472 | - that keeps a per-pid sum of system call hits. The results are | |
4473 | - sorted using the system call id as the primary key, and the | |
4474 | - hitcount sum as the secondary key: | |
4475 | - | |
4476 | - # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \ | |
4477 | - /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | |
4478 | - | |
4479 | - # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | |
4480 | - # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active] | |
4481 | - | |
4482 | - { id: sys_read [ 0], common_pid: rtkit-daemon [ 1877] } hitcount: 1 | |
4483 | - { id: sys_read [ 0], common_pid: gdbus [ 2976] } hitcount: 1 | |
4484 | - { id: sys_read [ 0], common_pid: console-kit-dae [ 3400] } hitcount: 1 | |
4485 | - { id: sys_read [ 0], common_pid: postgres [ 1865] } hitcount: 1 | |
4486 | - { id: sys_read [ 0], common_pid: deja-dup-monito [ 3543] } hitcount: 2 | |
4487 | - { id: sys_read [ 0], common_pid: NetworkManager [ 890] } hitcount: 2 | |
4488 | - { id: sys_read [ 0], common_pid: evolution-calen [ 3048] } hitcount: 2 | |
4489 | - { id: sys_read [ 0], common_pid: postgres [ 1864] } hitcount: 2 | |
4490 | - { id: sys_read [ 0], common_pid: nm-applet [ 3022] } hitcount: 2 | |
4491 | - { id: sys_read [ 0], common_pid: whoopsie [ 1212] } hitcount: 2 | |
4492 | - . | |
4493 | - . | |
4494 | - . | |
4495 | - { id: sys_ioctl [ 16], common_pid: bash [ 8479] } hitcount: 1 | |
4496 | - { id: sys_ioctl [ 16], common_pid: bash [ 3472] } hitcount: 12 | |
4497 | - { id: sys_ioctl [ 16], common_pid: gnome-terminal [ 3199] } hitcount: 16 | |
4498 | - { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 1808 | |
4499 | - { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 5580 | |
4500 | - . | |
4501 | - . | |
4502 | - . | |
4503 | - { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2690] } hitcount: 3 | |
4504 | - { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2688] } hitcount: 16 | |
4505 | - { id: sys_inotify_add_watch [254], common_pid: gmain [ 975] } hitcount: 2 | |
4506 | - { id: sys_inotify_add_watch [254], common_pid: gmain [ 3204] } hitcount: 4 | |
4507 | - { id: sys_inotify_add_watch [254], common_pid: gmain [ 2888] } hitcount: 4 | |
4508 | - { id: sys_inotify_add_watch [254], common_pid: gmain [ 3003] } hitcount: 4 | |
4509 | - { id: sys_inotify_add_watch [254], common_pid: gmain [ 2873] } hitcount: 4 | |
4510 | - { id: sys_inotify_add_watch [254], common_pid: gmain [ 3196] } hitcount: 6 | |
4511 | - { id: sys_openat [257], common_pid: java [ 2623] } hitcount: 2 | |
4512 | - { id: sys_eventfd2 [290], common_pid: ibus-ui-gtk3 [ 2760] } hitcount: 4 | |
4513 | - { id: sys_eventfd2 [290], common_pid: compiz [ 2994] } hitcount: 6 | |
4514 | - | |
4515 | - Totals: | |
4516 | - Hits: 31536 | |
4517 | - Entries: 323 | |
4518 | - Dropped: 0 | |
4519 | - | |
4520 | - The above list does give us a breakdown of the ioctl syscall by | |
4521 | - pid, but it also gives us quite a bit more than that, which we | |
4522 | - don't really care about at the moment. Since we know the syscall | |
4523 | - id for sys_ioctl (16, displayed next to the sys_ioctl name), we | |
4524 | - can use that to filter out all the other syscalls: | |
4525 | - | |
4526 | - # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \ | |
4527 | - /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | |
4528 | - | |
4529 | - # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | |
4530 | - # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active] | |
4531 | - | |
4532 | - { id: sys_ioctl [ 16], common_pid: gmain [ 2769] } hitcount: 1 | |
4533 | - { id: sys_ioctl [ 16], common_pid: evolution-addre [ 8571] } hitcount: 1 | |
4534 | - { id: sys_ioctl [ 16], common_pid: gmain [ 3003] } hitcount: 1 | |
4535 | - { id: sys_ioctl [ 16], common_pid: gmain [ 2781] } hitcount: 1 | |
4536 | - { id: sys_ioctl [ 16], common_pid: gmain [ 2829] } hitcount: 1 | |
4537 | - { id: sys_ioctl [ 16], common_pid: bash [ 8726] } hitcount: 1 | |
4538 | - { id: sys_ioctl [ 16], common_pid: bash [ 8508] } hitcount: 1 | |
4539 | - { id: sys_ioctl [ 16], common_pid: gmain [ 2970] } hitcount: 1 | |
4540 | - { id: sys_ioctl [ 16], common_pid: gmain [ 2768] } hitcount: 1 | |
4541 | - . | |
4542 | - . | |
4543 | - . | |
4544 | - { id: sys_ioctl [ 16], common_pid: pool [ 8559] } hitcount: 45 | |
4545 | - { id: sys_ioctl [ 16], common_pid: pool [ 8555] } hitcount: 48 | |
4546 | - { id: sys_ioctl [ 16], common_pid: pool [ 8551] } hitcount: 48 | |
4547 | - { id: sys_ioctl [ 16], common_pid: avahi-daemon [ 896] } hitcount: 66 | |
4548 | - { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 26674 | |
4549 | - { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 73443 | |
4550 | - | |
4551 | - Totals: | |
4552 | - Hits: 101162 | |
4553 | - Entries: 103 | |
4554 | - Dropped: 0 | |
4555 | - | |
4556 | - The above output shows that 'compiz' and 'Xorg' are far and away | |
4557 | - the heaviest ioctl callers (which might lead to questions about | |
4558 | - whether they really need to be making all those calls and to | |
4559 | - possible avenues for further investigation.) | |
4560 | - | |
4561 | - The compound key examples used a key and a sum value (hitcount) to | |
4562 | - sort the output, but we can just as easily use two keys instead. | |
4563 | - Here's an example where we use a compound key composed of the the | |
4564 | - common_pid and size event fields. Sorting with pid as the primary | |
4565 | - key and 'size' as the secondary key allows us to display an | |
4566 | - ordered summary of the recvfrom sizes, with counts, received by | |
4567 | - each process: | |
4568 | - | |
4569 | - # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \ | |
4570 | - /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger | |
4571 | - | |
4572 | - # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist | |
4573 | - # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active] | |
4574 | - | |
4575 | - { common_pid: smbd [ 784], size: 4 } hitcount: 1 | |
4576 | - { common_pid: dnsmasq [ 1412], size: 4096 } hitcount: 672 | |
4577 | - { common_pid: postgres [ 1796], size: 1000 } hitcount: 6 | |
4578 | - { common_pid: postgres [ 1867], size: 1000 } hitcount: 10 | |
4579 | - { common_pid: bamfdaemon [ 2787], size: 28 } hitcount: 2 | |
4580 | - { common_pid: bamfdaemon [ 2787], size: 14360 } hitcount: 1 | |
4581 | - { common_pid: compiz [ 2994], size: 8 } hitcount: 1 | |
4582 | - { common_pid: compiz [ 2994], size: 20 } hitcount: 11 | |
4583 | - { common_pid: gnome-terminal [ 3199], size: 4 } hitcount: 2 | |
4584 | - { common_pid: firefox [ 8817], size: 4 } hitcount: 1 | |
4585 | - { common_pid: firefox [ 8817], size: 8 } hitcount: 5 | |
4586 | - { common_pid: firefox [ 8817], size: 588 } hitcount: 2 | |
4587 | - { common_pid: firefox [ 8817], size: 628 } hitcount: 1 | |
4588 | - { common_pid: firefox [ 8817], size: 6944 } hitcount: 1 | |
4589 | - { common_pid: firefox [ 8817], size: 408880 } hitcount: 2 | |
4590 | - { common_pid: firefox [ 8822], size: 8 } hitcount: 2 | |
4591 | - { common_pid: firefox [ 8822], size: 160 } hitcount: 2 | |
4592 | - { common_pid: firefox [ 8822], size: 320 } hitcount: 2 | |
4593 | - { common_pid: firefox [ 8822], size: 352 } hitcount: 1 | |
4594 | - . | |
4595 | - . | |
4596 | - . | |
4597 | - { common_pid: pool [ 8923], size: 1960 } hitcount: 10 | |
4598 | - { common_pid: pool [ 8923], size: 2048 } hitcount: 10 | |
4599 | - { common_pid: pool [ 8924], size: 1960 } hitcount: 10 | |
4600 | - { common_pid: pool [ 8924], size: 2048 } hitcount: 10 | |
4601 | - { common_pid: pool [ 8928], size: 1964 } hitcount: 4 | |
4602 | - { common_pid: pool [ 8928], size: 1965 } hitcount: 2 | |
4603 | - { common_pid: pool [ 8928], size: 2048 } hitcount: 6 | |
4604 | - { common_pid: pool [ 8929], size: 1982 } hitcount: 1 | |
4605 | - { common_pid: pool [ 8929], size: 2048 } hitcount: 1 | |
4606 | - | |
4607 | - Totals: | |
4608 | - Hits: 2016 | |
4609 | - Entries: 224 | |
4610 | - Dropped: 0 | |
4611 | - | |
4612 | - The above example also illustrates the fact that although a compound | |
4613 | - key is treated as a single entity for hashing purposes, the sub-keys | |
4614 | - it's composed of can be accessed independently. | |
4615 | - | |
4616 | - The next example uses a string field as the hash key and | |
4617 | - demonstrates how you can manually pause and continue a hist trigger. | |
4618 | - In this example, we'll aggregate fork counts and don't expect a | |
4619 | - large number of entries in the hash table, so we'll drop it to a | |
4620 | - much smaller number, say 256: | |
4621 | - | |
4622 | - # echo 'hist:key=child_comm:val=hitcount:size=256' > \ | |
4623 | - /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
4624 | - | |
4625 | - # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
4626 | - # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | |
4627 | - | |
4628 | - { child_comm: dconf worker } hitcount: 1 | |
4629 | - { child_comm: ibus-daemon } hitcount: 1 | |
4630 | - { child_comm: whoopsie } hitcount: 1 | |
4631 | - { child_comm: smbd } hitcount: 1 | |
4632 | - { child_comm: gdbus } hitcount: 1 | |
4633 | - { child_comm: kthreadd } hitcount: 1 | |
4634 | - { child_comm: dconf worker } hitcount: 1 | |
4635 | - { child_comm: evolution-alarm } hitcount: 2 | |
4636 | - { child_comm: Socket Thread } hitcount: 2 | |
4637 | - { child_comm: postgres } hitcount: 2 | |
4638 | - { child_comm: bash } hitcount: 3 | |
4639 | - { child_comm: compiz } hitcount: 3 | |
4640 | - { child_comm: evolution-sourc } hitcount: 4 | |
4641 | - { child_comm: dhclient } hitcount: 4 | |
4642 | - { child_comm: pool } hitcount: 5 | |
4643 | - { child_comm: nm-dispatcher.a } hitcount: 8 | |
4644 | - { child_comm: firefox } hitcount: 8 | |
4645 | - { child_comm: dbus-daemon } hitcount: 8 | |
4646 | - { child_comm: glib-pacrunner } hitcount: 10 | |
4647 | - { child_comm: evolution } hitcount: 23 | |
4648 | - | |
4649 | - Totals: | |
4650 | - Hits: 89 | |
4651 | - Entries: 20 | |
4652 | - Dropped: 0 | |
4653 | - | |
4654 | - If we want to pause the hist trigger, we can simply append :pause to | |
4655 | - the command that started the trigger. Notice that the trigger info | |
4656 | - displays as [paused]: | |
4657 | - | |
4658 | - # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \ | |
4659 | - /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
4660 | - | |
4661 | - # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
4662 | - # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused] | |
4663 | - | |
4664 | - { child_comm: dconf worker } hitcount: 1 | |
4665 | - { child_comm: kthreadd } hitcount: 1 | |
4666 | - { child_comm: dconf worker } hitcount: 1 | |
4667 | - { child_comm: gdbus } hitcount: 1 | |
4668 | - { child_comm: ibus-daemon } hitcount: 1 | |
4669 | - { child_comm: Socket Thread } hitcount: 2 | |
4670 | - { child_comm: evolution-alarm } hitcount: 2 | |
4671 | - { child_comm: smbd } hitcount: 2 | |
4672 | - { child_comm: bash } hitcount: 3 | |
4673 | - { child_comm: whoopsie } hitcount: 3 | |
4674 | - { child_comm: compiz } hitcount: 3 | |
4675 | - { child_comm: evolution-sourc } hitcount: 4 | |
4676 | - { child_comm: pool } hitcount: 5 | |
4677 | - { child_comm: postgres } hitcount: 6 | |
4678 | - { child_comm: firefox } hitcount: 8 | |
4679 | - { child_comm: dhclient } hitcount: 10 | |
4680 | - { child_comm: emacs } hitcount: 12 | |
4681 | - { child_comm: dbus-daemon } hitcount: 20 | |
4682 | - { child_comm: nm-dispatcher.a } hitcount: 20 | |
4683 | - { child_comm: evolution } hitcount: 35 | |
4684 | - { child_comm: glib-pacrunner } hitcount: 59 | |
4685 | - | |
4686 | - Totals: | |
4687 | - Hits: 199 | |
4688 | - Entries: 21 | |
4689 | - Dropped: 0 | |
4690 | - | |
4691 | - To manually continue having the trigger aggregate events, append | |
4692 | - :cont instead. Notice that the trigger info displays as [active] | |
4693 | - again, and the data has changed: | |
4694 | - | |
4695 | - # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \ | |
4696 | - /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
4697 | - | |
4698 | - # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
4699 | - # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | |
4700 | - | |
4701 | - { child_comm: dconf worker } hitcount: 1 | |
4702 | - { child_comm: dconf worker } hitcount: 1 | |
4703 | - { child_comm: kthreadd } hitcount: 1 | |
4704 | - { child_comm: gdbus } hitcount: 1 | |
4705 | - { child_comm: ibus-daemon } hitcount: 1 | |
4706 | - { child_comm: Socket Thread } hitcount: 2 | |
4707 | - { child_comm: evolution-alarm } hitcount: 2 | |
4708 | - { child_comm: smbd } hitcount: 2 | |
4709 | - { child_comm: whoopsie } hitcount: 3 | |
4710 | - { child_comm: compiz } hitcount: 3 | |
4711 | - { child_comm: evolution-sourc } hitcount: 4 | |
4712 | - { child_comm: bash } hitcount: 5 | |
4713 | - { child_comm: pool } hitcount: 5 | |
4714 | - { child_comm: postgres } hitcount: 6 | |
4715 | - { child_comm: firefox } hitcount: 8 | |
4716 | - { child_comm: dhclient } hitcount: 11 | |
4717 | - { child_comm: emacs } hitcount: 12 | |
4718 | - { child_comm: dbus-daemon } hitcount: 22 | |
4719 | - { child_comm: nm-dispatcher.a } hitcount: 22 | |
4720 | - { child_comm: evolution } hitcount: 35 | |
4721 | - { child_comm: glib-pacrunner } hitcount: 59 | |
4722 | - | |
4723 | - Totals: | |
4724 | - Hits: 206 | |
4725 | - Entries: 21 | |
4726 | - Dropped: 0 | |
4727 | - | |
4728 | - The previous example showed how to start and stop a hist trigger by | |
4729 | - appending 'pause' and 'continue' to the hist trigger command. A | |
4730 | - hist trigger can also be started in a paused state by initially | |
4731 | - starting the trigger with ':pause' appended. This allows you to | |
4732 | - start the trigger only when you're ready to start collecting data | |
4733 | - and not before. For example, you could start the trigger in a | |
4734 | - paused state, then unpause it and do something you want to measure, | |
4735 | - then pause the trigger again when done. | |
4736 | - | |
4737 | - Of course, doing this manually can be difficult and error-prone, but | |
4738 | - it is possible to automatically start and stop a hist trigger based | |
4739 | - on some condition, via the enable_hist and disable_hist triggers. | |
4740 | - | |
4741 | - For example, suppose we wanted to take a look at the relative | |
4742 | - weights in terms of skb length for each callpath that leads to a | |
4743 | - netif_receieve_skb event when downloading a decent-sized file using | |
4744 | - wget. | |
4745 | - | |
4746 | - First we set up an initially paused stacktrace trigger on the | |
4747 | - netif_receive_skb event: | |
4748 | - | |
4749 | - # echo 'hist:key=stacktrace:vals=len:pause' > \ | |
4750 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
4751 | - | |
4752 | - Next, we set up an 'enable_hist' trigger on the sched_process_exec | |
4753 | - event, with an 'if filename==/usr/bin/wget' filter. The effect of | |
4754 | - this new trigger is that it will 'unpause' the hist trigger we just | |
4755 | - set up on netif_receive_skb if and only if it sees a | |
4756 | - sched_process_exec event with a filename of '/usr/bin/wget'. When | |
4757 | - that happens, all netif_receive_skb events are aggregated into a | |
4758 | - hash table keyed on stacktrace: | |
4759 | - | |
4760 | - # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | |
4761 | - /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
4762 | - | |
4763 | - The aggregation continues until the netif_receive_skb is paused | |
4764 | - again, which is what the following disable_hist event does by | |
4765 | - creating a similar setup on the sched_process_exit event, using the | |
4766 | - filter 'comm==wget': | |
4767 | - | |
4768 | - # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | |
4769 | - /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
4770 | - | |
4771 | - Whenever a process exits and the comm field of the disable_hist | |
4772 | - trigger filter matches 'comm==wget', the netif_receive_skb hist | |
4773 | - trigger is disabled. | |
4774 | - | |
4775 | - The overall effect is that netif_receive_skb events are aggregated | |
4776 | - into the hash table for only the duration of the wget. Executing a | |
4777 | - wget command and then listing the 'hist' file will display the | |
4778 | - output generated by the wget command: | |
4779 | - | |
4780 | - $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | |
4781 | - | |
4782 | - # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | |
4783 | - # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | |
4784 | - | |
4785 | - { stacktrace: | |
4786 | - __netif_receive_skb_core+0x46d/0x990 | |
4787 | - __netif_receive_skb+0x18/0x60 | |
4788 | - netif_receive_skb_internal+0x23/0x90 | |
4789 | - napi_gro_receive+0xc8/0x100 | |
4790 | - ieee80211_deliver_skb+0xd6/0x270 [mac80211] | |
4791 | - ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | |
4792 | - ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | |
4793 | - ieee80211_rx+0x31d/0x900 [mac80211] | |
4794 | - iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | |
4795 | - iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | |
4796 | - iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | |
4797 | - irq_thread_fn+0x20/0x50 | |
4798 | - irq_thread+0x11f/0x150 | |
4799 | - kthread+0xd2/0xf0 | |
4800 | - ret_from_fork+0x42/0x70 | |
4801 | - } hitcount: 85 len: 28884 | |
4802 | - { stacktrace: | |
4803 | - __netif_receive_skb_core+0x46d/0x990 | |
4804 | - __netif_receive_skb+0x18/0x60 | |
4805 | - netif_receive_skb_internal+0x23/0x90 | |
4806 | - napi_gro_complete+0xa4/0xe0 | |
4807 | - dev_gro_receive+0x23a/0x360 | |
4808 | - napi_gro_receive+0x30/0x100 | |
4809 | - ieee80211_deliver_skb+0xd6/0x270 [mac80211] | |
4810 | - ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | |
4811 | - ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | |
4812 | - ieee80211_rx+0x31d/0x900 [mac80211] | |
4813 | - iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | |
4814 | - iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | |
4815 | - iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | |
4816 | - irq_thread_fn+0x20/0x50 | |
4817 | - irq_thread+0x11f/0x150 | |
4818 | - kthread+0xd2/0xf0 | |
4819 | - } hitcount: 98 len: 664329 | |
4820 | - { stacktrace: | |
4821 | - __netif_receive_skb_core+0x46d/0x990 | |
4822 | - __netif_receive_skb+0x18/0x60 | |
4823 | - process_backlog+0xa8/0x150 | |
4824 | - net_rx_action+0x15d/0x340 | |
4825 | - __do_softirq+0x114/0x2c0 | |
4826 | - do_softirq_own_stack+0x1c/0x30 | |
4827 | - do_softirq+0x65/0x70 | |
4828 | - __local_bh_enable_ip+0xb5/0xc0 | |
4829 | - ip_finish_output+0x1f4/0x840 | |
4830 | - ip_output+0x6b/0xc0 | |
4831 | - ip_local_out_sk+0x31/0x40 | |
4832 | - ip_send_skb+0x1a/0x50 | |
4833 | - udp_send_skb+0x173/0x2a0 | |
4834 | - udp_sendmsg+0x2bf/0x9f0 | |
4835 | - inet_sendmsg+0x64/0xa0 | |
4836 | - sock_sendmsg+0x3d/0x50 | |
4837 | - } hitcount: 115 len: 13030 | |
4838 | - { stacktrace: | |
4839 | - __netif_receive_skb_core+0x46d/0x990 | |
4840 | - __netif_receive_skb+0x18/0x60 | |
4841 | - netif_receive_skb_internal+0x23/0x90 | |
4842 | - napi_gro_complete+0xa4/0xe0 | |
4843 | - napi_gro_flush+0x6d/0x90 | |
4844 | - iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi] | |
4845 | - irq_thread_fn+0x20/0x50 | |
4846 | - irq_thread+0x11f/0x150 | |
4847 | - kthread+0xd2/0xf0 | |
4848 | - ret_from_fork+0x42/0x70 | |
4849 | - } hitcount: 934 len: 5512212 | |
4850 | - | |
4851 | - Totals: | |
4852 | - Hits: 1232 | |
4853 | - Entries: 4 | |
4854 | - Dropped: 0 | |
4855 | - | |
4856 | - The above shows all the netif_receive_skb callpaths and their total | |
4857 | - lengths for the duration of the wget command. | |
4858 | - | |
4859 | - The 'clear' hist trigger param can be used to clear the hash table. | |
4860 | - Suppose we wanted to try another run of the previous example but | |
4861 | - this time also wanted to see the complete list of events that went | |
4862 | - into the histogram. In order to avoid having to set everything up | |
4863 | - again, we can just clear the histogram first: | |
4864 | - | |
4865 | - # echo 'hist:key=stacktrace:vals=len:clear' >> \ | |
4866 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
4867 | - | |
4868 | - Just to verify that it is in fact cleared, here's what we now see in | |
4869 | - the hist file: | |
4870 | - | |
4871 | - # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | |
4872 | - # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | |
4873 | - | |
4874 | - Totals: | |
4875 | - Hits: 0 | |
4876 | - Entries: 0 | |
4877 | - Dropped: 0 | |
4878 | - | |
4879 | - Since we want to see the detailed list of every netif_receive_skb | |
4880 | - event occurring during the new run, which are in fact the same | |
4881 | - events being aggregated into the hash table, we add some additional | |
4882 | - 'enable_event' events to the triggering sched_process_exec and | |
4883 | - sched_process_exit events as such: | |
4884 | - | |
4885 | - # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \ | |
4886 | - /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
4887 | - | |
4888 | - # echo 'disable_event:net:netif_receive_skb if comm==wget' > \ | |
4889 | - /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
4890 | - | |
4891 | - If you read the trigger files for the sched_process_exec and | |
4892 | - sched_process_exit triggers, you should see two triggers for each: | |
4893 | - one enabling/disabling the hist aggregation and the other | |
4894 | - enabling/disabling the logging of events: | |
4895 | - | |
4896 | - # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
4897 | - enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | |
4898 | - enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | |
4899 | - | |
4900 | - # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
4901 | - enable_event:net:netif_receive_skb:unlimited if comm==wget | |
4902 | - disable_hist:net:netif_receive_skb:unlimited if comm==wget | |
4903 | - | |
4904 | - In other words, whenever either of the sched_process_exec or | |
4905 | - sched_process_exit events is hit and matches 'wget', it enables or | |
4906 | - disables both the histogram and the event log, and what you end up | |
4907 | - with is a hash table and set of events just covering the specified | |
4908 | - duration. Run the wget command again: | |
4909 | - | |
4910 | - $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | |
4911 | - | |
4912 | - Displaying the 'hist' file should show something similar to what you | |
4913 | - saw in the last run, but this time you should also see the | |
4914 | - individual events in the trace file: | |
4915 | - | |
4916 | - # cat /sys/kernel/debug/tracing/trace | |
4917 | - | |
4918 | - # tracer: nop | |
4919 | - # | |
4920 | - # entries-in-buffer/entries-written: 183/1426 #P:4 | |
4921 | - # | |
4922 | - # _-----=> irqs-off | |
4923 | - # / _----=> need-resched | |
4924 | - # | / _---=> hardirq/softirq | |
4925 | - # || / _--=> preempt-depth | |
4926 | - # ||| / delay | |
4927 | - # TASK-PID CPU# |||| TIMESTAMP FUNCTION | |
4928 | - # | | | |||| | | | |
4929 | - wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60 | |
4930 | - wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60 | |
4931 | - dnsmasq-1382 [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130 | |
4932 | - dnsmasq-1382 [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138 | |
4933 | - ##### CPU 2 buffer started #### | |
4934 | - irq/29-iwlwifi-559 [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948 | |
4935 | - irq/29-iwlwifi-559 [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500 | |
4936 | - irq/29-iwlwifi-559 [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948 | |
4937 | - irq/29-iwlwifi-559 [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948 | |
4938 | - irq/29-iwlwifi-559 [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500 | |
4939 | - . | |
4940 | - . | |
4941 | - . | |
4942 | - | |
4943 | - The following example demonstrates how multiple hist triggers can be | |
4944 | - attached to a given event. This capability can be useful for | |
4945 | - creating a set of different summaries derived from the same set of | |
4946 | - events, or for comparing the effects of different filters, among | |
4947 | - other things. | |
4948 | - | |
4949 | - # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \ | |
4950 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
4951 | - # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \ | |
4952 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
4953 | - # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \ | |
4954 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
4955 | - # echo 'hist:keys=skbaddr.hex:vals=len' >> \ | |
4956 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
4957 | - # echo 'hist:keys=len:vals=common_preempt_count' >> \ | |
4958 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
4959 | - | |
4960 | - The above set of commands create four triggers differing only in | |
4961 | - their filters, along with a completely different though fairly | |
4962 | - nonsensical trigger. Note that in order to append multiple hist | |
4963 | - triggers to the same file, you should use the '>>' operator to | |
4964 | - append them ('>' will also add the new hist trigger, but will remove | |
4965 | - any existing hist triggers beforehand). | |
4966 | - | |
4967 | - Displaying the contents of the 'hist' file for the event shows the | |
4968 | - contents of all five histograms: | |
4969 | - | |
4970 | - # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | |
4971 | - | |
4972 | - # event histogram | |
4973 | - # | |
4974 | - # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active] | |
4975 | - # | |
4976 | - | |
4977 | - { len: 176 } hitcount: 1 common_preempt_count: 0 | |
4978 | - { len: 223 } hitcount: 1 common_preempt_count: 0 | |
4979 | - { len: 4854 } hitcount: 1 common_preempt_count: 0 | |
4980 | - { len: 395 } hitcount: 1 common_preempt_count: 0 | |
4981 | - { len: 177 } hitcount: 1 common_preempt_count: 0 | |
4982 | - { len: 446 } hitcount: 1 common_preempt_count: 0 | |
4983 | - { len: 1601 } hitcount: 1 common_preempt_count: 0 | |
4984 | - . | |
4985 | - . | |
4986 | - . | |
4987 | - { len: 1280 } hitcount: 66 common_preempt_count: 0 | |
4988 | - { len: 116 } hitcount: 81 common_preempt_count: 40 | |
4989 | - { len: 708 } hitcount: 112 common_preempt_count: 0 | |
4990 | - { len: 46 } hitcount: 221 common_preempt_count: 0 | |
4991 | - { len: 1264 } hitcount: 458 common_preempt_count: 0 | |
4992 | - | |
4993 | - Totals: | |
4994 | - Hits: 1428 | |
4995 | - Entries: 147 | |
4996 | - Dropped: 0 | |
4997 | - | |
4998 | - | |
4999 | - # event histogram | |
5000 | - # | |
5001 | - # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | |
5002 | - # | |
5003 | - | |
5004 | - { skbaddr: ffff8800baee5e00 } hitcount: 1 len: 130 | |
5005 | - { skbaddr: ffff88005f3d5600 } hitcount: 1 len: 1280 | |
5006 | - { skbaddr: ffff88005f3d4900 } hitcount: 1 len: 1280 | |
5007 | - { skbaddr: ffff88009fed6300 } hitcount: 1 len: 115 | |
5008 | - { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 115 | |
5009 | - { skbaddr: ffff88008cdb1900 } hitcount: 1 len: 46 | |
5010 | - { skbaddr: ffff880064b5ef00 } hitcount: 1 len: 118 | |
5011 | - { skbaddr: ffff880044e3c700 } hitcount: 1 len: 60 | |
5012 | - { skbaddr: ffff880100065900 } hitcount: 1 len: 46 | |
5013 | - { skbaddr: ffff8800d46bd500 } hitcount: 1 len: 116 | |
5014 | - { skbaddr: ffff88005f3d5f00 } hitcount: 1 len: 1280 | |
5015 | - { skbaddr: ffff880100064700 } hitcount: 1 len: 365 | |
5016 | - { skbaddr: ffff8800badb6f00 } hitcount: 1 len: 60 | |
5017 | - . | |
5018 | - . | |
5019 | - . | |
5020 | - { skbaddr: ffff88009fe0be00 } hitcount: 27 len: 24677 | |
5021 | - { skbaddr: ffff88009fe0a400 } hitcount: 27 len: 23052 | |
5022 | - { skbaddr: ffff88009fe0b700 } hitcount: 31 len: 25589 | |
5023 | - { skbaddr: ffff88009fe0b600 } hitcount: 32 len: 27326 | |
5024 | - { skbaddr: ffff88006a462800 } hitcount: 68 len: 71678 | |
5025 | - { skbaddr: ffff88006a463700 } hitcount: 70 len: 72678 | |
5026 | - { skbaddr: ffff88006a462b00 } hitcount: 71 len: 77589 | |
5027 | - { skbaddr: ffff88006a463600 } hitcount: 73 len: 71307 | |
5028 | - { skbaddr: ffff88006a462200 } hitcount: 81 len: 81032 | |
5029 | - | |
5030 | - Totals: | |
5031 | - Hits: 1451 | |
5032 | - Entries: 318 | |
5033 | - Dropped: 0 | |
5034 | - | |
5035 | - | |
5036 | - # event histogram | |
5037 | - # | |
5038 | - # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active] | |
5039 | - # | |
5040 | - | |
5041 | - | |
5042 | - Totals: | |
5043 | - Hits: 0 | |
5044 | - Entries: 0 | |
5045 | - Dropped: 0 | |
5046 | - | |
5047 | - | |
5048 | - # event histogram | |
5049 | - # | |
5050 | - # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active] | |
5051 | - # | |
5052 | - | |
5053 | - { skbaddr: ffff88009fd2c300 } hitcount: 1 len: 7212 | |
5054 | - { skbaddr: ffff8800d2bcce00 } hitcount: 1 len: 7212 | |
5055 | - { skbaddr: ffff8800d2bcd700 } hitcount: 1 len: 7212 | |
5056 | - { skbaddr: ffff8800d2bcda00 } hitcount: 1 len: 21492 | |
5057 | - { skbaddr: ffff8800ae2e2d00 } hitcount: 1 len: 7212 | |
5058 | - { skbaddr: ffff8800d2bcdb00 } hitcount: 1 len: 7212 | |
5059 | - { skbaddr: ffff88006a4df500 } hitcount: 1 len: 4854 | |
5060 | - { skbaddr: ffff88008ce47b00 } hitcount: 1 len: 18636 | |
5061 | - { skbaddr: ffff8800ae2e2200 } hitcount: 1 len: 12924 | |
5062 | - { skbaddr: ffff88005f3e1000 } hitcount: 1 len: 4356 | |
5063 | - { skbaddr: ffff8800d2bcdc00 } hitcount: 2 len: 24420 | |
5064 | - { skbaddr: ffff8800d2bcc200 } hitcount: 2 len: 12996 | |
5065 | - | |
5066 | - Totals: | |
5067 | - Hits: 14 | |
5068 | - Entries: 12 | |
5069 | - Dropped: 0 | |
5070 | - | |
5071 | - | |
5072 | - # event histogram | |
5073 | - # | |
5074 | - # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active] | |
5075 | - # | |
5076 | - | |
5077 | - | |
5078 | - Totals: | |
5079 | - Hits: 0 | |
5080 | - Entries: 0 | |
5081 | - Dropped: 0 | |
5082 | - | |
5083 | - Named triggers can be used to have triggers share a common set of | |
5084 | - histogram data. This capability is mostly useful for combining the | |
5085 | - output of events generated by tracepoints contained inside inline | |
5086 | - functions, but names can be used in a hist trigger on any event. | |
5087 | - For example, these two triggers when hit will update the same 'len' | |
5088 | - field in the shared 'foo' histogram data: | |
5089 | - | |
5090 | - # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | |
5091 | - /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
5092 | - # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | |
5093 | - /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
5094 | - | |
5095 | - You can see that they're updating common histogram data by reading | |
5096 | - each event's hist files at the same time: | |
5097 | - | |
5098 | - # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist; | |
5099 | - cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | |
5100 | - | |
5101 | - # event histogram | |
5102 | - # | |
5103 | - # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | |
5104 | - # | |
5105 | - | |
5106 | - { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | |
5107 | - { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | |
5108 | - { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | |
5109 | - { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | |
5110 | - { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | |
5111 | - { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | |
5112 | - { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | |
5113 | - { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | |
5114 | - { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | |
5115 | - { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | |
5116 | - { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | |
5117 | - { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | |
5118 | - { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | |
5119 | - { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | |
5120 | - { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | |
5121 | - { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | |
5122 | - { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | |
5123 | - { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | |
5124 | - { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | |
5125 | - { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | |
5126 | - { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | |
5127 | - { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | |
5128 | - { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | |
5129 | - { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | |
5130 | - { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | |
5131 | - { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | |
5132 | - { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | |
5133 | - { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | |
5134 | - { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | |
5135 | - { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | |
5136 | - { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | |
5137 | - { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | |
5138 | - { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | |
5139 | - { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | |
5140 | - { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | |
5141 | - { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | |
5142 | - { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | |
5143 | - { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | |
5144 | - { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | |
5145 | - { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | |
5146 | - { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | |
5147 | - { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | |
5148 | - | |
5149 | - Totals: | |
5150 | - Hits: 81 | |
5151 | - Entries: 42 | |
5152 | - Dropped: 0 | |
5153 | - # event histogram | |
5154 | - # | |
5155 | - # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | |
5156 | - # | |
5157 | - | |
5158 | - { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | |
5159 | - { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | |
5160 | - { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | |
5161 | - { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | |
5162 | - { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | |
5163 | - { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | |
5164 | - { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | |
5165 | - { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | |
5166 | - { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | |
5167 | - { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | |
5168 | - { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | |
5169 | - { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | |
5170 | - { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | |
5171 | - { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | |
5172 | - { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | |
5173 | - { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | |
5174 | - { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | |
5175 | - { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | |
5176 | - { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | |
5177 | - { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | |
5178 | - { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | |
5179 | - { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | |
5180 | - { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | |
5181 | - { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | |
5182 | - { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | |
5183 | - { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | |
5184 | - { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | |
5185 | - { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | |
5186 | - { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | |
5187 | - { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | |
5188 | - { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | |
5189 | - { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | |
5190 | - { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | |
5191 | - { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | |
5192 | - { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | |
5193 | - { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | |
5194 | - { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | |
5195 | - { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | |
5196 | - { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | |
5197 | - { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | |
5198 | - { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | |
5199 | - { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | |
5200 | - | |
5201 | - Totals: | |
5202 | - Hits: 81 | |
5203 | - Entries: 42 | |
5204 | - Dropped: 0 | |
5205 | - | |
5206 | - And here's an example that shows how to combine histogram data from | |
5207 | - any two events even if they don't share any 'compatible' fields | |
5208 | - other than 'hitcount' and 'stacktrace'. These commands create a | |
5209 | - couple of triggers named 'bar' using those fields: | |
5210 | - | |
5211 | - # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | |
5212 | - /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
5213 | - # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | |
5214 | - /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
5215 | - | |
5216 | - And displaying the output of either shows some interesting if | |
5217 | - somewhat confusing output: | |
5218 | - | |
5219 | - # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
5220 | - # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | |
5221 | - | |
5222 | - # event histogram | |
5223 | - # | |
5224 | - # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active] | |
5225 | - # | |
5226 | - | |
5227 | - { stacktrace: | |
5228 | - _do_fork+0x18e/0x330 | |
5229 | - kernel_thread+0x29/0x30 | |
5230 | - kthreadd+0x154/0x1b0 | |
5231 | - ret_from_fork+0x3f/0x70 | |
5232 | - } hitcount: 1 | |
5233 | - { stacktrace: | |
5234 | - netif_rx_internal+0xb2/0xd0 | |
5235 | - netif_rx_ni+0x20/0x70 | |
5236 | - dev_loopback_xmit+0xaa/0xd0 | |
5237 | - ip_mc_output+0x126/0x240 | |
5238 | - ip_local_out_sk+0x31/0x40 | |
5239 | - igmp_send_report+0x1e9/0x230 | |
5240 | - igmp_timer_expire+0xe9/0x120 | |
5241 | - call_timer_fn+0x39/0xf0 | |
5242 | - run_timer_softirq+0x1e1/0x290 | |
5243 | - __do_softirq+0xfd/0x290 | |
5244 | - irq_exit+0x98/0xb0 | |
5245 | - smp_apic_timer_interrupt+0x4a/0x60 | |
5246 | - apic_timer_interrupt+0x6d/0x80 | |
5247 | - cpuidle_enter+0x17/0x20 | |
5248 | - call_cpuidle+0x3b/0x60 | |
5249 | - cpu_startup_entry+0x22d/0x310 | |
5250 | - } hitcount: 1 | |
5251 | - { stacktrace: | |
5252 | - netif_rx_internal+0xb2/0xd0 | |
5253 | - netif_rx_ni+0x20/0x70 | |
5254 | - dev_loopback_xmit+0xaa/0xd0 | |
5255 | - ip_mc_output+0x17f/0x240 | |
5256 | - ip_local_out_sk+0x31/0x40 | |
5257 | - ip_send_skb+0x1a/0x50 | |
5258 | - udp_send_skb+0x13e/0x270 | |
5259 | - udp_sendmsg+0x2bf/0x980 | |
5260 | - inet_sendmsg+0x67/0xa0 | |
5261 | - sock_sendmsg+0x38/0x50 | |
5262 | - SYSC_sendto+0xef/0x170 | |
5263 | - SyS_sendto+0xe/0x10 | |
5264 | - entry_SYSCALL_64_fastpath+0x12/0x6a | |
5265 | - } hitcount: 2 | |
5266 | - { stacktrace: | |
5267 | - netif_rx_internal+0xb2/0xd0 | |
5268 | - netif_rx+0x1c/0x60 | |
5269 | - loopback_xmit+0x6c/0xb0 | |
5270 | - dev_hard_start_xmit+0x219/0x3a0 | |
5271 | - __dev_queue_xmit+0x415/0x4f0 | |
5272 | - dev_queue_xmit_sk+0x13/0x20 | |
5273 | - ip_finish_output2+0x237/0x340 | |
5274 | - ip_finish_output+0x113/0x1d0 | |
5275 | - ip_output+0x66/0xc0 | |
5276 | - ip_local_out_sk+0x31/0x40 | |
5277 | - ip_send_skb+0x1a/0x50 | |
5278 | - udp_send_skb+0x16d/0x270 | |
5279 | - udp_sendmsg+0x2bf/0x980 | |
5280 | - inet_sendmsg+0x67/0xa0 | |
5281 | - sock_sendmsg+0x38/0x50 | |
5282 | - ___sys_sendmsg+0x14e/0x270 | |
5283 | - } hitcount: 76 | |
5284 | - { stacktrace: | |
5285 | - netif_rx_internal+0xb2/0xd0 | |
5286 | - netif_rx+0x1c/0x60 | |
5287 | - loopback_xmit+0x6c/0xb0 | |
5288 | - dev_hard_start_xmit+0x219/0x3a0 | |
5289 | - __dev_queue_xmit+0x415/0x4f0 | |
5290 | - dev_queue_xmit_sk+0x13/0x20 | |
5291 | - ip_finish_output2+0x237/0x340 | |
5292 | - ip_finish_output+0x113/0x1d0 | |
5293 | - ip_output+0x66/0xc0 | |
5294 | - ip_local_out_sk+0x31/0x40 | |
5295 | - ip_send_skb+0x1a/0x50 | |
5296 | - udp_send_skb+0x16d/0x270 | |
5297 | - udp_sendmsg+0x2bf/0x980 | |
5298 | - inet_sendmsg+0x67/0xa0 | |
5299 | - sock_sendmsg+0x38/0x50 | |
5300 | - ___sys_sendmsg+0x269/0x270 | |
5301 | - } hitcount: 77 | |
5302 | - { stacktrace: | |
5303 | - netif_rx_internal+0xb2/0xd0 | |
5304 | - netif_rx+0x1c/0x60 | |
5305 | - loopback_xmit+0x6c/0xb0 | |
5306 | - dev_hard_start_xmit+0x219/0x3a0 | |
5307 | - __dev_queue_xmit+0x415/0x4f0 | |
5308 | - dev_queue_xmit_sk+0x13/0x20 | |
5309 | - ip_finish_output2+0x237/0x340 | |
5310 | - ip_finish_output+0x113/0x1d0 | |
5311 | - ip_output+0x66/0xc0 | |
5312 | - ip_local_out_sk+0x31/0x40 | |
5313 | - ip_send_skb+0x1a/0x50 | |
5314 | - udp_send_skb+0x16d/0x270 | |
5315 | - udp_sendmsg+0x2bf/0x980 | |
5316 | - inet_sendmsg+0x67/0xa0 | |
5317 | - sock_sendmsg+0x38/0x50 | |
5318 | - SYSC_sendto+0xef/0x170 | |
5319 | - } hitcount: 88 | |
5320 | - { stacktrace: | |
5321 | - _do_fork+0x18e/0x330 | |
5322 | - SyS_clone+0x19/0x20 | |
5323 | - entry_SYSCALL_64_fastpath+0x12/0x6a | |
5324 | - } hitcount: 244 | |
5325 | - | |
5326 | - Totals: | |
5327 | - Hits: 489 | |
5328 | - Entries: 7 | |
5329 | - Dropped: 0 | |
5330 | + See Documentation/trace/histogram.txt for details and examples. | |
5331 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/Documentation/trace/ftrace.txt linux-4.14/Documentation/trace/ftrace.txt | |
5332 | --- linux-4.14.orig/Documentation/trace/ftrace.txt 2017-11-12 19:46:13.000000000 +0100 | |
5333 | +++ linux-4.14/Documentation/trace/ftrace.txt 2018-09-05 11:05:07.000000000 +0200 | |
5334 | @@ -539,6 +539,30 @@ | |
5335 | ||
5336 | See events.txt for more information. | |
5337 | ||
5338 | + timestamp_mode: | |
5339 | + | |
5340 | + Certain tracers may change the timestamp mode used when | |
5341 | + logging trace events into the event buffer. Events with | |
5342 | + different modes can coexist within a buffer but the mode in | |
5343 | + effect when an event is logged determines which timestamp mode | |
5344 | + is used for that event. The default timestamp mode is | |
5345 | + 'delta'. | |
5346 | + | |
5347 | + Usual timestamp modes for tracing: | |
5348 | + | |
5349 | + # cat timestamp_mode | |
5350 | + [delta] absolute | |
5351 | + | |
5352 | + The timestamp mode with the square brackets around it is the | |
5353 | + one in effect. | |
5354 | + | |
5355 | + delta: Default timestamp mode - timestamp is a delta against | |
5356 | + a per-buffer timestamp. | |
5357 | + | |
5358 | + absolute: The timestamp is a full timestamp, not a delta | |
5359 | + against some other value. As such it takes up more | |
5360 | + space and is less efficient. | |
5361 | + | |
5362 | hwlat_detector: | |
5363 | ||
5364 | Directory for the Hardware Latency Detector. | |
5365 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/Documentation/trace/histogram.txt linux-4.14/Documentation/trace/histogram.txt | |
5366 | --- linux-4.14.orig/Documentation/trace/histogram.txt 1970-01-01 01:00:00.000000000 +0100 | |
5367 | +++ linux-4.14/Documentation/trace/histogram.txt 2018-09-05 11:05:07.000000000 +0200 | |
5368 | @@ -0,0 +1,1995 @@ | |
5369 | + Event Histograms | |
5370 | + | |
5371 | + Documentation written by Tom Zanussi | |
5372 | + | |
5373 | +1. Introduction | |
5374 | +=============== | |
5375 | + | |
5376 | + Histogram triggers are special event triggers that can be used to | |
5377 | + aggregate trace event data into histograms. For information on | |
5378 | + trace events and event triggers, see Documentation/trace/events.txt. | |
5379 | + | |
5380 | + | |
5381 | +2. Histogram Trigger Command | |
5382 | +============================ | |
5383 | + | |
5384 | + A histogram trigger command is an event trigger command that | |
5385 | + aggregates event hits into a hash table keyed on one or more trace | |
5386 | + event format fields (or stacktrace) and a set of running totals | |
5387 | + derived from one or more trace event format fields and/or event | |
5388 | + counts (hitcount). | |
5389 | + | |
5390 | + The format of a hist trigger is as follows: | |
5391 | + | |
5392 | + hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>] | |
5393 | + [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue] | |
5394 | + [:clear][:name=histname1] [if <filter>] | |
5395 | + | |
5396 | + When a matching event is hit, an entry is added to a hash table | |
5397 | + using the key(s) and value(s) named. Keys and values correspond to | |
5398 | + fields in the event's format description. Values must correspond to | |
5399 | + numeric fields - on an event hit, the value(s) will be added to a | |
5400 | + sum kept for that field. The special string 'hitcount' can be used | |
5401 | + in place of an explicit value field - this is simply a count of | |
5402 | + event hits. If 'values' isn't specified, an implicit 'hitcount' | |
5403 | + value will be automatically created and used as the only value. | |
5404 | + Keys can be any field, or the special string 'stacktrace', which | |
5405 | + will use the event's kernel stacktrace as the key. The keywords | |
5406 | + 'keys' or 'key' can be used to specify keys, and the keywords | |
5407 | + 'values', 'vals', or 'val' can be used to specify values. Compound | |
5408 | + keys consisting of up to two fields can be specified by the 'keys' | |
5409 | + keyword. Hashing a compound key produces a unique entry in the | |
5410 | + table for each unique combination of component keys, and can be | |
5411 | + useful for providing more fine-grained summaries of event data. | |
5412 | + Additionally, sort keys consisting of up to two fields can be | |
5413 | + specified by the 'sort' keyword. If more than one field is | |
5414 | + specified, the result will be a 'sort within a sort': the first key | |
5415 | + is taken to be the primary sort key and the second the secondary | |
5416 | + key. If a hist trigger is given a name using the 'name' parameter, | |
5417 | + its histogram data will be shared with other triggers of the same | |
5418 | + name, and trigger hits will update this common data. Only triggers | |
5419 | + with 'compatible' fields can be combined in this way; triggers are | |
5420 | + 'compatible' if the fields named in the trigger share the same | |
5421 | + number and type of fields and those fields also have the same names. | |
5422 | + Note that any two events always share the compatible 'hitcount' and | |
5423 | + 'stacktrace' fields and can therefore be combined using those | |
5424 | + fields, however pointless that may be. | |
5425 | + | |
5426 | + 'hist' triggers add a 'hist' file to each event's subdirectory. | |
5427 | + Reading the 'hist' file for the event will dump the hash table in | |
5428 | + its entirety to stdout. If there are multiple hist triggers | |
5429 | + attached to an event, there will be a table for each trigger in the | |
5430 | + output. The table displayed for a named trigger will be the same as | |
5431 | + any other instance having the same name. Each printed hash table | |
5432 | + entry is a simple list of the keys and values comprising the entry; | |
5433 | + keys are printed first and are delineated by curly braces, and are | |
5434 | + followed by the set of value fields for the entry. By default, | |
5435 | + numeric fields are displayed as base-10 integers. This can be | |
5436 | + modified by appending any of the following modifiers to the field | |
5437 | + name: | |
5438 | + | |
5439 | + .hex display a number as a hex value | |
5440 | + .sym display an address as a symbol | |
5441 | + .sym-offset display an address as a symbol and offset | |
5442 | + .syscall display a syscall id as a system call name | |
5443 | + .execname display a common_pid as a program name | |
5444 | + .log2 display log2 value rather than raw number | |
5445 | + .usecs display a common_timestamp in microseconds | |
5446 | + | |
5447 | + Note that in general the semantics of a given field aren't | |
5448 | + interpreted when applying a modifier to it, but there are some | |
5449 | + restrictions to be aware of in this regard: | |
5450 | + | |
5451 | + - only the 'hex' modifier can be used for values (because values | |
5452 | + are essentially sums, and the other modifiers don't make sense | |
5453 | + in that context). | |
5454 | + - the 'execname' modifier can only be used on a 'common_pid'. The | |
5455 | + reason for this is that the execname is simply the 'comm' value | |
5456 | + saved for the 'current' process when an event was triggered, | |
5457 | + which is the same as the common_pid value saved by the event | |
5458 | + tracing code. Trying to apply that comm value to other pid | |
5459 | + values wouldn't be correct, and typically events that care save | |
5460 | + pid-specific comm fields in the event itself. | |
5461 | + | |
5462 | + A typical usage scenario would be the following to enable a hist | |
5463 | + trigger, read its current contents, and then turn it off: | |
5464 | + | |
5465 | + # echo 'hist:keys=skbaddr.hex:vals=len' > \ | |
5466 | + /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
5467 | + | |
5468 | + # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | |
5469 | + | |
5470 | + # echo '!hist:keys=skbaddr.hex:vals=len' > \ | |
5471 | + /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
5472 | + | |
5473 | + The trigger file itself can be read to show the details of the | |
5474 | + currently attached hist trigger. This information is also displayed | |
5475 | + at the top of the 'hist' file when read. | |
5476 | + | |
5477 | + By default, the size of the hash table is 2048 entries. The 'size' | |
5478 | + parameter can be used to specify more or fewer than that. The units | |
5479 | + are in terms of hashtable entries - if a run uses more entries than | |
5480 | + specified, the results will show the number of 'drops', the number | |
5481 | + of hits that were ignored. The size should be a power of 2 between | |
5482 | + 128 and 131072 (any non- power-of-2 number specified will be rounded | |
5483 | + up). | |
5484 | + | |
5485 | + The 'sort' parameter can be used to specify a value field to sort | |
5486 | + on. The default if unspecified is 'hitcount' and the default sort | |
5487 | + order is 'ascending'. To sort in the opposite direction, append | |
5488 | + .descending' to the sort key. | |
5489 | + | |
5490 | + The 'pause' parameter can be used to pause an existing hist trigger | |
5491 | + or to start a hist trigger but not log any events until told to do | |
5492 | + so. 'continue' or 'cont' can be used to start or restart a paused | |
5493 | + hist trigger. | |
5494 | + | |
5495 | + The 'clear' parameter will clear the contents of a running hist | |
5496 | + trigger and leave its current paused/active state. | |
5497 | + | |
5498 | + Note that the 'pause', 'cont', and 'clear' parameters should be | |
5499 | + applied using 'append' shell operator ('>>') if applied to an | |
5500 | + existing trigger, rather than via the '>' operator, which will cause | |
5501 | + the trigger to be removed through truncation. | |
5502 | + | |
5503 | +- enable_hist/disable_hist | |
5504 | + | |
5505 | + The enable_hist and disable_hist triggers can be used to have one | |
5506 | + event conditionally start and stop another event's already-attached | |
5507 | + hist trigger. Any number of enable_hist and disable_hist triggers | |
5508 | + can be attached to a given event, allowing that event to kick off | |
5509 | + and stop aggregations on a host of other events. | |
5510 | + | |
5511 | + The format is very similar to the enable/disable_event triggers: | |
5512 | + | |
5513 | + enable_hist:<system>:<event>[:count] | |
5514 | + disable_hist:<system>:<event>[:count] | |
5515 | + | |
5516 | + Instead of enabling or disabling the tracing of the target event | |
5517 | + into the trace buffer as the enable/disable_event triggers do, the | |
5518 | + enable/disable_hist triggers enable or disable the aggregation of | |
5519 | + the target event into a hash table. | |
5520 | + | |
5521 | + A typical usage scenario for the enable_hist/disable_hist triggers | |
5522 | + would be to first set up a paused hist trigger on some event, | |
5523 | + followed by an enable_hist/disable_hist pair that turns the hist | |
5524 | + aggregation on and off when conditions of interest are hit: | |
5525 | + | |
5526 | + # echo 'hist:keys=skbaddr.hex:vals=len:pause' > \ | |
5527 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
5528 | + | |
5529 | + # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | |
5530 | + /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
5531 | + | |
5532 | + # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | |
5533 | + /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
5534 | + | |
5535 | + The above sets up an initially paused hist trigger which is unpaused | |
5536 | + and starts aggregating events when a given program is executed, and | |
5537 | + which stops aggregating when the process exits and the hist trigger | |
5538 | + is paused again. | |
5539 | + | |
5540 | + The examples below provide a more concrete illustration of the | |
5541 | + concepts and typical usage patterns discussed above. | |
5542 | + | |
5543 | + 'special' event fields | |
5544 | + ------------------------ | |
5545 | + | |
5546 | + There are a number of 'special event fields' available for use as | |
5547 | + keys or values in a hist trigger. These look like and behave as if | |
5548 | + they were actual event fields, but aren't really part of the event's | |
5549 | + field definition or format file. They are however available for any | |
5550 | + event, and can be used anywhere an actual event field could be. | |
5551 | + They are: | |
5552 | + | |
5553 | + common_timestamp u64 - timestamp (from ring buffer) associated | |
5554 | + with the event, in nanoseconds. May be | |
5555 | + modified by .usecs to have timestamps | |
5556 | + interpreted as microseconds. | |
5557 | + cpu int - the cpu on which the event occurred. | |
5558 | + | |
5559 | + Extended error information | |
5560 | + -------------------------- | |
5561 | + | |
5562 | + For some error conditions encountered when invoking a hist trigger | |
5563 | + command, extended error information is available via the | |
5564 | + corresponding event's 'hist' file. Reading the hist file after an | |
5565 | + error will display more detailed information about what went wrong, | |
5566 | + if information is available. This extended error information will | |
5567 | + be available until the next hist trigger command for that event. | |
5568 | + | |
5569 | + If available for a given error condition, the extended error | |
5570 | + information and usage takes the following form: | |
5571 | + | |
5572 | + # echo xxx > /sys/kernel/debug/tracing/events/sched/sched_wakeup/trigger | |
5573 | + echo: write error: Invalid argument | |
5574 | + | |
5575 | + # cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/hist | |
5576 | + ERROR: Couldn't yyy: zzz | |
5577 | + Last command: xxx | |
5578 | + | |
5579 | +6.2 'hist' trigger examples | |
5580 | +--------------------------- | |
5581 | + | |
5582 | + The first set of examples creates aggregations using the kmalloc | |
5583 | + event. The fields that can be used for the hist trigger are listed | |
5584 | + in the kmalloc event's format file: | |
5585 | + | |
5586 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/format | |
5587 | + name: kmalloc | |
5588 | + ID: 374 | |
5589 | + format: | |
5590 | + field:unsigned short common_type; offset:0; size:2; signed:0; | |
5591 | + field:unsigned char common_flags; offset:2; size:1; signed:0; | |
5592 | + field:unsigned char common_preempt_count; offset:3; size:1; signed:0; | |
5593 | + field:int common_pid; offset:4; size:4; signed:1; | |
5594 | + | |
5595 | + field:unsigned long call_site; offset:8; size:8; signed:0; | |
5596 | + field:const void * ptr; offset:16; size:8; signed:0; | |
5597 | + field:size_t bytes_req; offset:24; size:8; signed:0; | |
5598 | + field:size_t bytes_alloc; offset:32; size:8; signed:0; | |
5599 | + field:gfp_t gfp_flags; offset:40; size:4; signed:0; | |
5600 | + | |
5601 | + We'll start by creating a hist trigger that generates a simple table | |
5602 | + that lists the total number of bytes requested for each function in | |
5603 | + the kernel that made one or more calls to kmalloc: | |
5604 | + | |
5605 | + # echo 'hist:key=call_site:val=bytes_req' > \ | |
5606 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5607 | + | |
5608 | + This tells the tracing system to create a 'hist' trigger using the | |
5609 | + call_site field of the kmalloc event as the key for the table, which | |
5610 | + just means that each unique call_site address will have an entry | |
5611 | + created for it in the table. The 'val=bytes_req' parameter tells | |
5612 | + the hist trigger that for each unique entry (call_site) in the | |
5613 | + table, it should keep a running total of the number of bytes | |
5614 | + requested by that call_site. | |
5615 | + | |
5616 | + We'll let it run for awhile and then dump the contents of the 'hist' | |
5617 | + file in the kmalloc event's subdirectory (for readability, a number | |
5618 | + of entries have been omitted): | |
5619 | + | |
5620 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
5621 | + # trigger info: hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | |
5622 | + | |
5623 | + { call_site: 18446744072106379007 } hitcount: 1 bytes_req: 176 | |
5624 | + { call_site: 18446744071579557049 } hitcount: 1 bytes_req: 1024 | |
5625 | + { call_site: 18446744071580608289 } hitcount: 1 bytes_req: 16384 | |
5626 | + { call_site: 18446744071581827654 } hitcount: 1 bytes_req: 24 | |
5627 | + { call_site: 18446744071580700980 } hitcount: 1 bytes_req: 8 | |
5628 | + { call_site: 18446744071579359876 } hitcount: 1 bytes_req: 152 | |
5629 | + { call_site: 18446744071580795365 } hitcount: 3 bytes_req: 144 | |
5630 | + { call_site: 18446744071581303129 } hitcount: 3 bytes_req: 144 | |
5631 | + { call_site: 18446744071580713234 } hitcount: 4 bytes_req: 2560 | |
5632 | + { call_site: 18446744071580933750 } hitcount: 4 bytes_req: 736 | |
5633 | + . | |
5634 | + . | |
5635 | + . | |
5636 | + { call_site: 18446744072106047046 } hitcount: 69 bytes_req: 5576 | |
5637 | + { call_site: 18446744071582116407 } hitcount: 73 bytes_req: 2336 | |
5638 | + { call_site: 18446744072106054684 } hitcount: 136 bytes_req: 140504 | |
5639 | + { call_site: 18446744072106224230 } hitcount: 136 bytes_req: 19584 | |
5640 | + { call_site: 18446744072106078074 } hitcount: 153 bytes_req: 2448 | |
5641 | + { call_site: 18446744072106062406 } hitcount: 153 bytes_req: 36720 | |
5642 | + { call_site: 18446744071582507929 } hitcount: 153 bytes_req: 37088 | |
5643 | + { call_site: 18446744072102520590 } hitcount: 273 bytes_req: 10920 | |
5644 | + { call_site: 18446744071582143559 } hitcount: 358 bytes_req: 716 | |
5645 | + { call_site: 18446744072106465852 } hitcount: 417 bytes_req: 56712 | |
5646 | + { call_site: 18446744072102523378 } hitcount: 485 bytes_req: 27160 | |
5647 | + { call_site: 18446744072099568646 } hitcount: 1676 bytes_req: 33520 | |
5648 | + | |
5649 | + Totals: | |
5650 | + Hits: 4610 | |
5651 | + Entries: 45 | |
5652 | + Dropped: 0 | |
5653 | + | |
5654 | + The output displays a line for each entry, beginning with the key | |
5655 | + specified in the trigger, followed by the value(s) also specified in | |
5656 | + the trigger. At the beginning of the output is a line that displays | |
5657 | + the trigger info, which can also be displayed by reading the | |
5658 | + 'trigger' file: | |
5659 | + | |
5660 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5661 | + hist:keys=call_site:vals=bytes_req:sort=hitcount:size=2048 [active] | |
5662 | + | |
5663 | + At the end of the output are a few lines that display the overall | |
5664 | + totals for the run. The 'Hits' field shows the total number of | |
5665 | + times the event trigger was hit, the 'Entries' field shows the total | |
5666 | + number of used entries in the hash table, and the 'Dropped' field | |
5667 | + shows the number of hits that were dropped because the number of | |
5668 | + used entries for the run exceeded the maximum number of entries | |
5669 | + allowed for the table (normally 0, but if not a hint that you may | |
5670 | + want to increase the size of the table using the 'size' parameter). | |
5671 | + | |
5672 | + Notice in the above output that there's an extra field, 'hitcount', | |
5673 | + which wasn't specified in the trigger. Also notice that in the | |
5674 | + trigger info output, there's a parameter, 'sort=hitcount', which | |
5675 | + wasn't specified in the trigger either. The reason for that is that | |
5676 | + every trigger implicitly keeps a count of the total number of hits | |
5677 | + attributed to a given entry, called the 'hitcount'. That hitcount | |
5678 | + information is explicitly displayed in the output, and in the | |
5679 | + absence of a user-specified sort parameter, is used as the default | |
5680 | + sort field. | |
5681 | + | |
5682 | + The value 'hitcount' can be used in place of an explicit value in | |
5683 | + the 'values' parameter if you don't really need to have any | |
5684 | + particular field summed and are mainly interested in hit | |
5685 | + frequencies. | |
5686 | + | |
5687 | + To turn the hist trigger off, simply call up the trigger in the | |
5688 | + command history and re-execute it with a '!' prepended: | |
5689 | + | |
5690 | + # echo '!hist:key=call_site:val=bytes_req' > \ | |
5691 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5692 | + | |
5693 | + Finally, notice that the call_site as displayed in the output above | |
5694 | + isn't really very useful. It's an address, but normally addresses | |
5695 | + are displayed in hex. To have a numeric field displayed as a hex | |
5696 | + value, simply append '.hex' to the field name in the trigger: | |
5697 | + | |
5698 | + # echo 'hist:key=call_site.hex:val=bytes_req' > \ | |
5699 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5700 | + | |
5701 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
5702 | + # trigger info: hist:keys=call_site.hex:vals=bytes_req:sort=hitcount:size=2048 [active] | |
5703 | + | |
5704 | + { call_site: ffffffffa026b291 } hitcount: 1 bytes_req: 433 | |
5705 | + { call_site: ffffffffa07186ff } hitcount: 1 bytes_req: 176 | |
5706 | + { call_site: ffffffff811ae721 } hitcount: 1 bytes_req: 16384 | |
5707 | + { call_site: ffffffff811c5134 } hitcount: 1 bytes_req: 8 | |
5708 | + { call_site: ffffffffa04a9ebb } hitcount: 1 bytes_req: 511 | |
5709 | + { call_site: ffffffff8122e0a6 } hitcount: 1 bytes_req: 12 | |
5710 | + { call_site: ffffffff8107da84 } hitcount: 1 bytes_req: 152 | |
5711 | + { call_site: ffffffff812d8246 } hitcount: 1 bytes_req: 24 | |
5712 | + { call_site: ffffffff811dc1e5 } hitcount: 3 bytes_req: 144 | |
5713 | + { call_site: ffffffffa02515e8 } hitcount: 3 bytes_req: 648 | |
5714 | + { call_site: ffffffff81258159 } hitcount: 3 bytes_req: 144 | |
5715 | + { call_site: ffffffff811c80f4 } hitcount: 4 bytes_req: 544 | |
5716 | + . | |
5717 | + . | |
5718 | + . | |
5719 | + { call_site: ffffffffa06c7646 } hitcount: 106 bytes_req: 8024 | |
5720 | + { call_site: ffffffffa06cb246 } hitcount: 132 bytes_req: 31680 | |
5721 | + { call_site: ffffffffa06cef7a } hitcount: 132 bytes_req: 2112 | |
5722 | + { call_site: ffffffff8137e399 } hitcount: 132 bytes_req: 23232 | |
5723 | + { call_site: ffffffffa06c941c } hitcount: 185 bytes_req: 171360 | |
5724 | + { call_site: ffffffffa06f2a66 } hitcount: 185 bytes_req: 26640 | |
5725 | + { call_site: ffffffffa036a70e } hitcount: 265 bytes_req: 10600 | |
5726 | + { call_site: ffffffff81325447 } hitcount: 292 bytes_req: 584 | |
5727 | + { call_site: ffffffffa072da3c } hitcount: 446 bytes_req: 60656 | |
5728 | + { call_site: ffffffffa036b1f2 } hitcount: 526 bytes_req: 29456 | |
5729 | + { call_site: ffffffffa0099c06 } hitcount: 1780 bytes_req: 35600 | |
5730 | + | |
5731 | + Totals: | |
5732 | + Hits: 4775 | |
5733 | + Entries: 46 | |
5734 | + Dropped: 0 | |
5735 | + | |
5736 | + Even that's only marginally more useful - while hex values do look | |
5737 | + more like addresses, what users are typically more interested in | |
5738 | + when looking at text addresses are the corresponding symbols | |
5739 | + instead. To have an address displayed as symbolic value instead, | |
5740 | + simply append '.sym' or '.sym-offset' to the field name in the | |
5741 | + trigger: | |
5742 | + | |
5743 | + # echo 'hist:key=call_site.sym:val=bytes_req' > \ | |
5744 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5745 | + | |
5746 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
5747 | + # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=hitcount:size=2048 [active] | |
5748 | + | |
5749 | + { call_site: [ffffffff810adcb9] syslog_print_all } hitcount: 1 bytes_req: 1024 | |
5750 | + { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | |
5751 | + { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | |
5752 | + { call_site: [ffffffff8154acbe] usb_alloc_urb } hitcount: 1 bytes_req: 192 | |
5753 | + { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | |
5754 | + { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | |
5755 | + { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | |
5756 | + { call_site: [ffffffff811febd5] fsnotify_alloc_group } hitcount: 2 bytes_req: 528 | |
5757 | + { call_site: [ffffffff81440f58] __tty_buffer_request_room } hitcount: 2 bytes_req: 2624 | |
5758 | + { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 2 bytes_req: 96 | |
5759 | + { call_site: [ffffffffa05e19af] ieee80211_start_tx_ba_session [mac80211] } hitcount: 2 bytes_req: 464 | |
5760 | + { call_site: [ffffffff81672406] tcp_get_metrics } hitcount: 2 bytes_req: 304 | |
5761 | + { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | |
5762 | + { call_site: [ffffffff81089b05] sched_create_group } hitcount: 2 bytes_req: 1424 | |
5763 | + . | |
5764 | + . | |
5765 | + . | |
5766 | + { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1185 bytes_req: 123240 | |
5767 | + { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 1185 bytes_req: 104280 | |
5768 | + { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 1402 bytes_req: 190672 | |
5769 | + { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 1518 bytes_req: 146208 | |
5770 | + { call_site: [ffffffffa029070e] drm_vma_node_allow [drm] } hitcount: 1746 bytes_req: 69840 | |
5771 | + { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 2021 bytes_req: 792312 | |
5772 | + { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 2592 bytes_req: 145152 | |
5773 | + { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2629 bytes_req: 378576 | |
5774 | + { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2629 bytes_req: 3783248 | |
5775 | + { call_site: [ffffffff81325607] apparmor_file_alloc_security } hitcount: 5192 bytes_req: 10384 | |
5776 | + { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 5529 bytes_req: 110584 | |
5777 | + { call_site: [ffffffff8131ebf7] aa_alloc_task_context } hitcount: 21943 bytes_req: 702176 | |
5778 | + { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 55759 bytes_req: 5074265 | |
5779 | + | |
5780 | + Totals: | |
5781 | + Hits: 109928 | |
5782 | + Entries: 71 | |
5783 | + Dropped: 0 | |
5784 | + | |
5785 | + Because the default sort key above is 'hitcount', the above shows a | |
5786 | + the list of call_sites by increasing hitcount, so that at the bottom | |
5787 | + we see the functions that made the most kmalloc calls during the | |
5788 | + run. If instead we we wanted to see the top kmalloc callers in | |
5789 | + terms of the number of bytes requested rather than the number of | |
5790 | + calls, and we wanted the top caller to appear at the top, we can use | |
5791 | + the 'sort' parameter, along with the 'descending' modifier: | |
5792 | + | |
5793 | + # echo 'hist:key=call_site.sym:val=bytes_req:sort=bytes_req.descending' > \ | |
5794 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5795 | + | |
5796 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
5797 | + # trigger info: hist:keys=call_site.sym:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | |
5798 | + | |
5799 | + { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 2186 bytes_req: 3397464 | |
5800 | + { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1790 bytes_req: 712176 | |
5801 | + { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 8132 bytes_req: 513135 | |
5802 | + { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 106 bytes_req: 440128 | |
5803 | + { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 2186 bytes_req: 314784 | |
5804 | + { call_site: [ffffffff812891ca] ext4_find_extent } hitcount: 2174 bytes_req: 208992 | |
5805 | + { call_site: [ffffffff811ae8e1] __kmalloc } hitcount: 8 bytes_req: 131072 | |
5806 | + { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 859 bytes_req: 116824 | |
5807 | + { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 1834 bytes_req: 102704 | |
5808 | + { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 972 bytes_req: 101088 | |
5809 | + { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl [drm] } hitcount: 972 bytes_req: 85536 | |
5810 | + { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 3333 bytes_req: 66664 | |
5811 | + { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 209 bytes_req: 61632 | |
5812 | + . | |
5813 | + . | |
5814 | + . | |
5815 | + { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 | |
5816 | + { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 | |
5817 | + { call_site: [ffffffff812d8406] copy_semundo } hitcount: 2 bytes_req: 48 | |
5818 | + { call_site: [ffffffff81200ba6] inotify_new_group } hitcount: 1 bytes_req: 48 | |
5819 | + { call_site: [ffffffffa027121a] drm_getmagic [drm] } hitcount: 1 bytes_req: 48 | |
5820 | + { call_site: [ffffffff811e3a25] __seq_open_private } hitcount: 1 bytes_req: 40 | |
5821 | + { call_site: [ffffffff811c52f4] bprm_change_interp } hitcount: 2 bytes_req: 16 | |
5822 | + { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 | |
5823 | + { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 | |
5824 | + { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 | |
5825 | + | |
5826 | + Totals: | |
5827 | + Hits: 32133 | |
5828 | + Entries: 81 | |
5829 | + Dropped: 0 | |
5830 | + | |
5831 | + To display the offset and size information in addition to the symbol | |
5832 | + name, just use 'sym-offset' instead: | |
5833 | + | |
5834 | + # echo 'hist:key=call_site.sym-offset:val=bytes_req:sort=bytes_req.descending' > \ | |
5835 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5836 | + | |
5837 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
5838 | + # trigger info: hist:keys=call_site.sym-offset:vals=bytes_req:sort=bytes_req.descending:size=2048 [active] | |
5839 | + | |
5840 | + { call_site: [ffffffffa046041c] i915_gem_execbuffer2+0x6c/0x2c0 [i915] } hitcount: 4569 bytes_req: 3163720 | |
5841 | + { call_site: [ffffffffa0489a66] intel_ring_begin+0xc6/0x1f0 [i915] } hitcount: 4569 bytes_req: 657936 | |
5842 | + { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23+0x694/0x1020 [i915] } hitcount: 1519 bytes_req: 472936 | |
5843 | + { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23+0x516/0x1020 [i915] } hitcount: 3050 bytes_req: 211832 | |
5844 | + { call_site: [ffffffff811e2a1b] seq_buf_alloc+0x1b/0x50 } hitcount: 34 bytes_req: 148384 | |
5845 | + { call_site: [ffffffffa04a580c] intel_crtc_page_flip+0xbc/0x870 [i915] } hitcount: 1385 bytes_req: 144040 | |
5846 | + { call_site: [ffffffff811ae8e1] __kmalloc+0x191/0x1b0 } hitcount: 8 bytes_req: 131072 | |
5847 | + { call_site: [ffffffffa0287592] drm_mode_page_flip_ioctl+0x282/0x360 [drm] } hitcount: 1385 bytes_req: 121880 | |
5848 | + { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc+0x32/0x100 [drm] } hitcount: 1848 bytes_req: 103488 | |
5849 | + { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state+0x2c/0xa0 [i915] } hitcount: 461 bytes_req: 62696 | |
5850 | + { call_site: [ffffffffa029070e] drm_vma_node_allow+0x2e/0xd0 [drm] } hitcount: 1541 bytes_req: 61640 | |
5851 | + { call_site: [ffffffff815f8d7b] sk_prot_alloc+0xcb/0x1b0 } hitcount: 57 bytes_req: 57456 | |
5852 | + . | |
5853 | + . | |
5854 | + . | |
5855 | + { call_site: [ffffffff8109524a] alloc_fair_sched_group+0x5a/0x1a0 } hitcount: 2 bytes_req: 128 | |
5856 | + { call_site: [ffffffffa027b921] drm_vm_open_locked+0x31/0xa0 [drm] } hitcount: 3 bytes_req: 96 | |
5857 | + { call_site: [ffffffff8122e266] proc_self_follow_link+0x76/0xb0 } hitcount: 8 bytes_req: 96 | |
5858 | + { call_site: [ffffffff81213e80] load_elf_binary+0x240/0x1650 } hitcount: 3 bytes_req: 84 | |
5859 | + { call_site: [ffffffff8154bc62] usb_control_msg+0x42/0x110 } hitcount: 1 bytes_req: 8 | |
5860 | + { call_site: [ffffffffa00bf6fe] hidraw_send_report+0x7e/0x1a0 [hid] } hitcount: 1 bytes_req: 7 | |
5861 | + { call_site: [ffffffffa00bf1ca] hidraw_report_event+0x8a/0x120 [hid] } hitcount: 1 bytes_req: 7 | |
5862 | + | |
5863 | + Totals: | |
5864 | + Hits: 26098 | |
5865 | + Entries: 64 | |
5866 | + Dropped: 0 | |
5867 | + | |
5868 | + We can also add multiple fields to the 'values' parameter. For | |
5869 | + example, we might want to see the total number of bytes allocated | |
5870 | + alongside bytes requested, and display the result sorted by bytes | |
5871 | + allocated in a descending order: | |
5872 | + | |
5873 | + # echo 'hist:keys=call_site.sym:values=bytes_req,bytes_alloc:sort=bytes_alloc.descending' > \ | |
5874 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5875 | + | |
5876 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
5877 | + # trigger info: hist:keys=call_site.sym:vals=bytes_req,bytes_alloc:sort=bytes_alloc.descending:size=2048 [active] | |
5878 | + | |
5879 | + { call_site: [ffffffffa046041c] i915_gem_execbuffer2 [i915] } hitcount: 7403 bytes_req: 4084360 bytes_alloc: 5958016 | |
5880 | + { call_site: [ffffffff811e2a1b] seq_buf_alloc } hitcount: 541 bytes_req: 2213968 bytes_alloc: 2228224 | |
5881 | + { call_site: [ffffffffa0489a66] intel_ring_begin [i915] } hitcount: 7404 bytes_req: 1066176 bytes_alloc: 1421568 | |
5882 | + { call_site: [ffffffffa045e7c4] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 1565 bytes_req: 557368 bytes_alloc: 1037760 | |
5883 | + { call_site: [ffffffff8125847d] ext4_htree_store_dirent } hitcount: 9557 bytes_req: 595778 bytes_alloc: 695744 | |
5884 | + { call_site: [ffffffffa045e646] i915_gem_do_execbuffer.isra.23 [i915] } hitcount: 5839 bytes_req: 430680 bytes_alloc: 470400 | |
5885 | + { call_site: [ffffffffa04c4a3c] intel_plane_duplicate_state [i915] } hitcount: 2388 bytes_req: 324768 bytes_alloc: 458496 | |
5886 | + { call_site: [ffffffffa02911f2] drm_modeset_lock_crtc [drm] } hitcount: 3911 bytes_req: 219016 bytes_alloc: 250304 | |
5887 | + { call_site: [ffffffff815f8d7b] sk_prot_alloc } hitcount: 235 bytes_req: 236880 bytes_alloc: 240640 | |
5888 | + { call_site: [ffffffff8137e559] sg_kmalloc } hitcount: 557 bytes_req: 169024 bytes_alloc: 221760 | |
5889 | + { call_site: [ffffffffa00b7c06] hid_report_raw_event [hid] } hitcount: 9378 bytes_req: 187548 bytes_alloc: 206312 | |
5890 | + { call_site: [ffffffffa04a580c] intel_crtc_page_flip [i915] } hitcount: 1519 bytes_req: 157976 bytes_alloc: 194432 | |
5891 | + . | |
5892 | + . | |
5893 | + . | |
5894 | + { call_site: [ffffffff8109bd3b] sched_autogroup_create_attach } hitcount: 2 bytes_req: 144 bytes_alloc: 192 | |
5895 | + { call_site: [ffffffff81097ee8] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
5896 | + { call_site: [ffffffff8109524a] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
5897 | + { call_site: [ffffffff81095225] alloc_fair_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
5898 | + { call_site: [ffffffff81097ec2] alloc_rt_sched_group } hitcount: 2 bytes_req: 128 bytes_alloc: 128 | |
5899 | + { call_site: [ffffffff81213e80] load_elf_binary } hitcount: 3 bytes_req: 84 bytes_alloc: 96 | |
5900 | + { call_site: [ffffffff81079a2e] kthread_create_on_node } hitcount: 1 bytes_req: 56 bytes_alloc: 64 | |
5901 | + { call_site: [ffffffffa00bf6fe] hidraw_send_report [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | |
5902 | + { call_site: [ffffffff8154bc62] usb_control_msg } hitcount: 1 bytes_req: 8 bytes_alloc: 8 | |
5903 | + { call_site: [ffffffffa00bf1ca] hidraw_report_event [hid] } hitcount: 1 bytes_req: 7 bytes_alloc: 8 | |
5904 | + | |
5905 | + Totals: | |
5906 | + Hits: 66598 | |
5907 | + Entries: 65 | |
5908 | + Dropped: 0 | |
5909 | + | |
5910 | + Finally, to finish off our kmalloc example, instead of simply having | |
5911 | + the hist trigger display symbolic call_sites, we can have the hist | |
5912 | + trigger additionally display the complete set of kernel stack traces | |
5913 | + that led to each call_site. To do that, we simply use the special | |
5914 | + value 'stacktrace' for the key parameter: | |
5915 | + | |
5916 | + # echo 'hist:keys=stacktrace:values=bytes_req,bytes_alloc:sort=bytes_alloc' > \ | |
5917 | + /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger | |
5918 | + | |
5919 | + The above trigger will use the kernel stack trace in effect when an | |
5920 | + event is triggered as the key for the hash table. This allows the | |
5921 | + enumeration of every kernel callpath that led up to a particular | |
5922 | + event, along with a running total of any of the event fields for | |
5923 | + that event. Here we tally bytes requested and bytes allocated for | |
5924 | + every callpath in the system that led up to a kmalloc (in this case | |
5925 | + every callpath to a kmalloc for a kernel compile): | |
5926 | + | |
5927 | + # cat /sys/kernel/debug/tracing/events/kmem/kmalloc/hist | |
5928 | + # trigger info: hist:keys=stacktrace:vals=bytes_req,bytes_alloc:sort=bytes_alloc:size=2048 [active] | |
5929 | + | |
5930 | + { stacktrace: | |
5931 | + __kmalloc_track_caller+0x10b/0x1a0 | |
5932 | + kmemdup+0x20/0x50 | |
5933 | + hidraw_report_event+0x8a/0x120 [hid] | |
5934 | + hid_report_raw_event+0x3ea/0x440 [hid] | |
5935 | + hid_input_report+0x112/0x190 [hid] | |
5936 | + hid_irq_in+0xc2/0x260 [usbhid] | |
5937 | + __usb_hcd_giveback_urb+0x72/0x120 | |
5938 | + usb_giveback_urb_bh+0x9e/0xe0 | |
5939 | + tasklet_hi_action+0xf8/0x100 | |
5940 | + __do_softirq+0x114/0x2c0 | |
5941 | + irq_exit+0xa5/0xb0 | |
5942 | + do_IRQ+0x5a/0xf0 | |
5943 | + ret_from_intr+0x0/0x30 | |
5944 | + cpuidle_enter+0x17/0x20 | |
5945 | + cpu_startup_entry+0x315/0x3e0 | |
5946 | + rest_init+0x7c/0x80 | |
5947 | + } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | |
5948 | + { stacktrace: | |
5949 | + __kmalloc_track_caller+0x10b/0x1a0 | |
5950 | + kmemdup+0x20/0x50 | |
5951 | + hidraw_report_event+0x8a/0x120 [hid] | |
5952 | + hid_report_raw_event+0x3ea/0x440 [hid] | |
5953 | + hid_input_report+0x112/0x190 [hid] | |
5954 | + hid_irq_in+0xc2/0x260 [usbhid] | |
5955 | + __usb_hcd_giveback_urb+0x72/0x120 | |
5956 | + usb_giveback_urb_bh+0x9e/0xe0 | |
5957 | + tasklet_hi_action+0xf8/0x100 | |
5958 | + __do_softirq+0x114/0x2c0 | |
5959 | + irq_exit+0xa5/0xb0 | |
5960 | + do_IRQ+0x5a/0xf0 | |
5961 | + ret_from_intr+0x0/0x30 | |
5962 | + } hitcount: 3 bytes_req: 21 bytes_alloc: 24 | |
5963 | + { stacktrace: | |
5964 | + kmem_cache_alloc_trace+0xeb/0x150 | |
5965 | + aa_alloc_task_context+0x27/0x40 | |
5966 | + apparmor_cred_prepare+0x1f/0x50 | |
5967 | + security_prepare_creds+0x16/0x20 | |
5968 | + prepare_creds+0xdf/0x1a0 | |
5969 | + SyS_capset+0xb5/0x200 | |
5970 | + system_call_fastpath+0x12/0x6a | |
5971 | + } hitcount: 1 bytes_req: 32 bytes_alloc: 32 | |
5972 | + . | |
5973 | + . | |
5974 | + . | |
5975 | + { stacktrace: | |
5976 | + __kmalloc+0x11b/0x1b0 | |
5977 | + i915_gem_execbuffer2+0x6c/0x2c0 [i915] | |
5978 | + drm_ioctl+0x349/0x670 [drm] | |
5979 | + do_vfs_ioctl+0x2f0/0x4f0 | |
5980 | + SyS_ioctl+0x81/0xa0 | |
5981 | + system_call_fastpath+0x12/0x6a | |
5982 | + } hitcount: 17726 bytes_req: 13944120 bytes_alloc: 19593808 | |
5983 | + { stacktrace: | |
5984 | + __kmalloc+0x11b/0x1b0 | |
5985 | + load_elf_phdrs+0x76/0xa0 | |
5986 | + load_elf_binary+0x102/0x1650 | |
5987 | + search_binary_handler+0x97/0x1d0 | |
5988 | + do_execveat_common.isra.34+0x551/0x6e0 | |
5989 | + SyS_execve+0x3a/0x50 | |
5990 | + return_from_execve+0x0/0x23 | |
5991 | + } hitcount: 33348 bytes_req: 17152128 bytes_alloc: 20226048 | |
5992 | + { stacktrace: | |
5993 | + kmem_cache_alloc_trace+0xeb/0x150 | |
5994 | + apparmor_file_alloc_security+0x27/0x40 | |
5995 | + security_file_alloc+0x16/0x20 | |
5996 | + get_empty_filp+0x93/0x1c0 | |
5997 | + path_openat+0x31/0x5f0 | |
5998 | + do_filp_open+0x3a/0x90 | |
5999 | + do_sys_open+0x128/0x220 | |
6000 | + SyS_open+0x1e/0x20 | |
6001 | + system_call_fastpath+0x12/0x6a | |
6002 | + } hitcount: 4766422 bytes_req: 9532844 bytes_alloc: 38131376 | |
6003 | + { stacktrace: | |
6004 | + __kmalloc+0x11b/0x1b0 | |
6005 | + seq_buf_alloc+0x1b/0x50 | |
6006 | + seq_read+0x2cc/0x370 | |
6007 | + proc_reg_read+0x3d/0x80 | |
6008 | + __vfs_read+0x28/0xe0 | |
6009 | + vfs_read+0x86/0x140 | |
6010 | + SyS_read+0x46/0xb0 | |
6011 | + system_call_fastpath+0x12/0x6a | |
6012 | + } hitcount: 19133 bytes_req: 78368768 bytes_alloc: 78368768 | |
6013 | + | |
6014 | + Totals: | |
6015 | + Hits: 6085872 | |
6016 | + Entries: 253 | |
6017 | + Dropped: 0 | |
6018 | + | |
6019 | + If you key a hist trigger on common_pid, in order for example to | |
6020 | + gather and display sorted totals for each process, you can use the | |
6021 | + special .execname modifier to display the executable names for the | |
6022 | + processes in the table rather than raw pids. The example below | |
6023 | + keeps a per-process sum of total bytes read: | |
6024 | + | |
6025 | + # echo 'hist:key=common_pid.execname:val=count:sort=count.descending' > \ | |
6026 | + /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger | |
6027 | + | |
6028 | + # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/hist | |
6029 | + # trigger info: hist:keys=common_pid.execname:vals=count:sort=count.descending:size=2048 [active] | |
6030 | + | |
6031 | + { common_pid: gnome-terminal [ 3196] } hitcount: 280 count: 1093512 | |
6032 | + { common_pid: Xorg [ 1309] } hitcount: 525 count: 256640 | |
6033 | + { common_pid: compiz [ 2889] } hitcount: 59 count: 254400 | |
6034 | + { common_pid: bash [ 8710] } hitcount: 3 count: 66369 | |
6035 | + { common_pid: dbus-daemon-lau [ 8703] } hitcount: 49 count: 47739 | |
6036 | + { common_pid: irqbalance [ 1252] } hitcount: 27 count: 27648 | |
6037 | + { common_pid: 01ifupdown [ 8705] } hitcount: 3 count: 17216 | |
6038 | + { common_pid: dbus-daemon [ 772] } hitcount: 10 count: 12396 | |
6039 | + { common_pid: Socket Thread [ 8342] } hitcount: 11 count: 11264 | |
6040 | + { common_pid: nm-dhcp-client. [ 8701] } hitcount: 6 count: 7424 | |
6041 | + { common_pid: gmain [ 1315] } hitcount: 18 count: 6336 | |
6042 | + . | |
6043 | + . | |
6044 | + . | |
6045 | + { common_pid: postgres [ 1892] } hitcount: 2 count: 32 | |
6046 | + { common_pid: postgres [ 1891] } hitcount: 2 count: 32 | |
6047 | + { common_pid: gmain [ 8704] } hitcount: 2 count: 32 | |
6048 | + { common_pid: upstart-dbus-br [ 2740] } hitcount: 21 count: 21 | |
6049 | + { common_pid: nm-dispatcher.a [ 8696] } hitcount: 1 count: 16 | |
6050 | + { common_pid: indicator-datet [ 2904] } hitcount: 1 count: 16 | |
6051 | + { common_pid: gdbus [ 2998] } hitcount: 1 count: 16 | |
6052 | + { common_pid: rtkit-daemon [ 2052] } hitcount: 1 count: 8 | |
6053 | + { common_pid: init [ 1] } hitcount: 2 count: 2 | |
6054 | + | |
6055 | + Totals: | |
6056 | + Hits: 2116 | |
6057 | + Entries: 51 | |
6058 | + Dropped: 0 | |
6059 | + | |
6060 | + Similarly, if you key a hist trigger on syscall id, for example to | |
6061 | + gather and display a list of systemwide syscall hits, you can use | |
6062 | + the special .syscall modifier to display the syscall names rather | |
6063 | + than raw ids. The example below keeps a running total of syscall | |
6064 | + counts for the system during the run: | |
6065 | + | |
6066 | + # echo 'hist:key=id.syscall:val=hitcount' > \ | |
6067 | + /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | |
6068 | + | |
6069 | + # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | |
6070 | + # trigger info: hist:keys=id.syscall:vals=hitcount:sort=hitcount:size=2048 [active] | |
6071 | + | |
6072 | + { id: sys_fsync [ 74] } hitcount: 1 | |
6073 | + { id: sys_newuname [ 63] } hitcount: 1 | |
6074 | + { id: sys_prctl [157] } hitcount: 1 | |
6075 | + { id: sys_statfs [137] } hitcount: 1 | |
6076 | + { id: sys_symlink [ 88] } hitcount: 1 | |
6077 | + { id: sys_sendmmsg [307] } hitcount: 1 | |
6078 | + { id: sys_semctl [ 66] } hitcount: 1 | |
6079 | + { id: sys_readlink [ 89] } hitcount: 3 | |
6080 | + { id: sys_bind [ 49] } hitcount: 3 | |
6081 | + { id: sys_getsockname [ 51] } hitcount: 3 | |
6082 | + { id: sys_unlink [ 87] } hitcount: 3 | |
6083 | + { id: sys_rename [ 82] } hitcount: 4 | |
6084 | + { id: unknown_syscall [ 58] } hitcount: 4 | |
6085 | + { id: sys_connect [ 42] } hitcount: 4 | |
6086 | + { id: sys_getpid [ 39] } hitcount: 4 | |
6087 | + . | |
6088 | + . | |
6089 | + . | |
6090 | + { id: sys_rt_sigprocmask [ 14] } hitcount: 952 | |
6091 | + { id: sys_futex [202] } hitcount: 1534 | |
6092 | + { id: sys_write [ 1] } hitcount: 2689 | |
6093 | + { id: sys_setitimer [ 38] } hitcount: 2797 | |
6094 | + { id: sys_read [ 0] } hitcount: 3202 | |
6095 | + { id: sys_select [ 23] } hitcount: 3773 | |
6096 | + { id: sys_writev [ 20] } hitcount: 4531 | |
6097 | + { id: sys_poll [ 7] } hitcount: 8314 | |
6098 | + { id: sys_recvmsg [ 47] } hitcount: 13738 | |
6099 | + { id: sys_ioctl [ 16] } hitcount: 21843 | |
6100 | + | |
6101 | + Totals: | |
6102 | + Hits: 67612 | |
6103 | + Entries: 72 | |
6104 | + Dropped: 0 | |
6105 | + | |
6106 | + The syscall counts above provide a rough overall picture of system | |
6107 | + call activity on the system; we can see for example that the most | |
6108 | + popular system call on this system was the 'sys_ioctl' system call. | |
6109 | + | |
6110 | + We can use 'compound' keys to refine that number and provide some | |
6111 | + further insight as to which processes exactly contribute to the | |
6112 | + overall ioctl count. | |
6113 | + | |
6114 | + The command below keeps a hitcount for every unique combination of | |
6115 | + system call id and pid - the end result is essentially a table | |
6116 | + that keeps a per-pid sum of system call hits. The results are | |
6117 | + sorted using the system call id as the primary key, and the | |
6118 | + hitcount sum as the secondary key: | |
6119 | + | |
6120 | + # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount' > \ | |
6121 | + /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | |
6122 | + | |
6123 | + # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | |
6124 | + # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 [active] | |
6125 | + | |
6126 | + { id: sys_read [ 0], common_pid: rtkit-daemon [ 1877] } hitcount: 1 | |
6127 | + { id: sys_read [ 0], common_pid: gdbus [ 2976] } hitcount: 1 | |
6128 | + { id: sys_read [ 0], common_pid: console-kit-dae [ 3400] } hitcount: 1 | |
6129 | + { id: sys_read [ 0], common_pid: postgres [ 1865] } hitcount: 1 | |
6130 | + { id: sys_read [ 0], common_pid: deja-dup-monito [ 3543] } hitcount: 2 | |
6131 | + { id: sys_read [ 0], common_pid: NetworkManager [ 890] } hitcount: 2 | |
6132 | + { id: sys_read [ 0], common_pid: evolution-calen [ 3048] } hitcount: 2 | |
6133 | + { id: sys_read [ 0], common_pid: postgres [ 1864] } hitcount: 2 | |
6134 | + { id: sys_read [ 0], common_pid: nm-applet [ 3022] } hitcount: 2 | |
6135 | + { id: sys_read [ 0], common_pid: whoopsie [ 1212] } hitcount: 2 | |
6136 | + . | |
6137 | + . | |
6138 | + . | |
6139 | + { id: sys_ioctl [ 16], common_pid: bash [ 8479] } hitcount: 1 | |
6140 | + { id: sys_ioctl [ 16], common_pid: bash [ 3472] } hitcount: 12 | |
6141 | + { id: sys_ioctl [ 16], common_pid: gnome-terminal [ 3199] } hitcount: 16 | |
6142 | + { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 1808 | |
6143 | + { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 5580 | |
6144 | + . | |
6145 | + . | |
6146 | + . | |
6147 | + { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2690] } hitcount: 3 | |
6148 | + { id: sys_waitid [247], common_pid: upstart-dbus-br [ 2688] } hitcount: 16 | |
6149 | + { id: sys_inotify_add_watch [254], common_pid: gmain [ 975] } hitcount: 2 | |
6150 | + { id: sys_inotify_add_watch [254], common_pid: gmain [ 3204] } hitcount: 4 | |
6151 | + { id: sys_inotify_add_watch [254], common_pid: gmain [ 2888] } hitcount: 4 | |
6152 | + { id: sys_inotify_add_watch [254], common_pid: gmain [ 3003] } hitcount: 4 | |
6153 | + { id: sys_inotify_add_watch [254], common_pid: gmain [ 2873] } hitcount: 4 | |
6154 | + { id: sys_inotify_add_watch [254], common_pid: gmain [ 3196] } hitcount: 6 | |
6155 | + { id: sys_openat [257], common_pid: java [ 2623] } hitcount: 2 | |
6156 | + { id: sys_eventfd2 [290], common_pid: ibus-ui-gtk3 [ 2760] } hitcount: 4 | |
6157 | + { id: sys_eventfd2 [290], common_pid: compiz [ 2994] } hitcount: 6 | |
6158 | + | |
6159 | + Totals: | |
6160 | + Hits: 31536 | |
6161 | + Entries: 323 | |
6162 | + Dropped: 0 | |
6163 | + | |
6164 | + The above list does give us a breakdown of the ioctl syscall by | |
6165 | + pid, but it also gives us quite a bit more than that, which we | |
6166 | + don't really care about at the moment. Since we know the syscall | |
6167 | + id for sys_ioctl (16, displayed next to the sys_ioctl name), we | |
6168 | + can use that to filter out all the other syscalls: | |
6169 | + | |
6170 | + # echo 'hist:key=id.syscall,common_pid.execname:val=hitcount:sort=id,hitcount if id == 16' > \ | |
6171 | + /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/trigger | |
6172 | + | |
6173 | + # cat /sys/kernel/debug/tracing/events/raw_syscalls/sys_enter/hist | |
6174 | + # trigger info: hist:keys=id.syscall,common_pid.execname:vals=hitcount:sort=id.syscall,hitcount:size=2048 if id == 16 [active] | |
6175 | + | |
6176 | + { id: sys_ioctl [ 16], common_pid: gmain [ 2769] } hitcount: 1 | |
6177 | + { id: sys_ioctl [ 16], common_pid: evolution-addre [ 8571] } hitcount: 1 | |
6178 | + { id: sys_ioctl [ 16], common_pid: gmain [ 3003] } hitcount: 1 | |
6179 | + { id: sys_ioctl [ 16], common_pid: gmain [ 2781] } hitcount: 1 | |
6180 | + { id: sys_ioctl [ 16], common_pid: gmain [ 2829] } hitcount: 1 | |
6181 | + { id: sys_ioctl [ 16], common_pid: bash [ 8726] } hitcount: 1 | |
6182 | + { id: sys_ioctl [ 16], common_pid: bash [ 8508] } hitcount: 1 | |
6183 | + { id: sys_ioctl [ 16], common_pid: gmain [ 2970] } hitcount: 1 | |
6184 | + { id: sys_ioctl [ 16], common_pid: gmain [ 2768] } hitcount: 1 | |
6185 | + . | |
6186 | + . | |
6187 | + . | |
6188 | + { id: sys_ioctl [ 16], common_pid: pool [ 8559] } hitcount: 45 | |
6189 | + { id: sys_ioctl [ 16], common_pid: pool [ 8555] } hitcount: 48 | |
6190 | + { id: sys_ioctl [ 16], common_pid: pool [ 8551] } hitcount: 48 | |
6191 | + { id: sys_ioctl [ 16], common_pid: avahi-daemon [ 896] } hitcount: 66 | |
6192 | + { id: sys_ioctl [ 16], common_pid: Xorg [ 1267] } hitcount: 26674 | |
6193 | + { id: sys_ioctl [ 16], common_pid: compiz [ 2994] } hitcount: 73443 | |
6194 | + | |
6195 | + Totals: | |
6196 | + Hits: 101162 | |
6197 | + Entries: 103 | |
6198 | + Dropped: 0 | |
6199 | + | |
6200 | + The above output shows that 'compiz' and 'Xorg' are far and away | |
6201 | + the heaviest ioctl callers (which might lead to questions about | |
6202 | + whether they really need to be making all those calls and to | |
6203 | + possible avenues for further investigation.) | |
6204 | + | |
6205 | + The compound key examples used a key and a sum value (hitcount) to | |
6206 | + sort the output, but we can just as easily use two keys instead. | |
6207 | + Here's an example where we use a compound key composed of the the | |
6208 | + common_pid and size event fields. Sorting with pid as the primary | |
6209 | + key and 'size' as the secondary key allows us to display an | |
6210 | + ordered summary of the recvfrom sizes, with counts, received by | |
6211 | + each process: | |
6212 | + | |
6213 | + # echo 'hist:key=common_pid.execname,size:val=hitcount:sort=common_pid,size' > \ | |
6214 | + /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/trigger | |
6215 | + | |
6216 | + # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_recvfrom/hist | |
6217 | + # trigger info: hist:keys=common_pid.execname,size:vals=hitcount:sort=common_pid.execname,size:size=2048 [active] | |
6218 | + | |
6219 | + { common_pid: smbd [ 784], size: 4 } hitcount: 1 | |
6220 | + { common_pid: dnsmasq [ 1412], size: 4096 } hitcount: 672 | |
6221 | + { common_pid: postgres [ 1796], size: 1000 } hitcount: 6 | |
6222 | + { common_pid: postgres [ 1867], size: 1000 } hitcount: 10 | |
6223 | + { common_pid: bamfdaemon [ 2787], size: 28 } hitcount: 2 | |
6224 | + { common_pid: bamfdaemon [ 2787], size: 14360 } hitcount: 1 | |
6225 | + { common_pid: compiz [ 2994], size: 8 } hitcount: 1 | |
6226 | + { common_pid: compiz [ 2994], size: 20 } hitcount: 11 | |
6227 | + { common_pid: gnome-terminal [ 3199], size: 4 } hitcount: 2 | |
6228 | + { common_pid: firefox [ 8817], size: 4 } hitcount: 1 | |
6229 | + { common_pid: firefox [ 8817], size: 8 } hitcount: 5 | |
6230 | + { common_pid: firefox [ 8817], size: 588 } hitcount: 2 | |
6231 | + { common_pid: firefox [ 8817], size: 628 } hitcount: 1 | |
6232 | + { common_pid: firefox [ 8817], size: 6944 } hitcount: 1 | |
6233 | + { common_pid: firefox [ 8817], size: 408880 } hitcount: 2 | |
6234 | + { common_pid: firefox [ 8822], size: 8 } hitcount: 2 | |
6235 | + { common_pid: firefox [ 8822], size: 160 } hitcount: 2 | |
6236 | + { common_pid: firefox [ 8822], size: 320 } hitcount: 2 | |
6237 | + { common_pid: firefox [ 8822], size: 352 } hitcount: 1 | |
6238 | + . | |
6239 | + . | |
6240 | + . | |
6241 | + { common_pid: pool [ 8923], size: 1960 } hitcount: 10 | |
6242 | + { common_pid: pool [ 8923], size: 2048 } hitcount: 10 | |
6243 | + { common_pid: pool [ 8924], size: 1960 } hitcount: 10 | |
6244 | + { common_pid: pool [ 8924], size: 2048 } hitcount: 10 | |
6245 | + { common_pid: pool [ 8928], size: 1964 } hitcount: 4 | |
6246 | + { common_pid: pool [ 8928], size: 1965 } hitcount: 2 | |
6247 | + { common_pid: pool [ 8928], size: 2048 } hitcount: 6 | |
6248 | + { common_pid: pool [ 8929], size: 1982 } hitcount: 1 | |
6249 | + { common_pid: pool [ 8929], size: 2048 } hitcount: 1 | |
6250 | + | |
6251 | + Totals: | |
6252 | + Hits: 2016 | |
6253 | + Entries: 224 | |
6254 | + Dropped: 0 | |
6255 | + | |
6256 | + The above example also illustrates the fact that although a compound | |
6257 | + key is treated as a single entity for hashing purposes, the sub-keys | |
6258 | + it's composed of can be accessed independently. | |
6259 | + | |
6260 | + The next example uses a string field as the hash key and | |
6261 | + demonstrates how you can manually pause and continue a hist trigger. | |
6262 | + In this example, we'll aggregate fork counts and don't expect a | |
6263 | + large number of entries in the hash table, so we'll drop it to a | |
6264 | + much smaller number, say 256: | |
6265 | + | |
6266 | + # echo 'hist:key=child_comm:val=hitcount:size=256' > \ | |
6267 | + /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
6268 | + | |
6269 | + # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
6270 | + # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | |
6271 | + | |
6272 | + { child_comm: dconf worker } hitcount: 1 | |
6273 | + { child_comm: ibus-daemon } hitcount: 1 | |
6274 | + { child_comm: whoopsie } hitcount: 1 | |
6275 | + { child_comm: smbd } hitcount: 1 | |
6276 | + { child_comm: gdbus } hitcount: 1 | |
6277 | + { child_comm: kthreadd } hitcount: 1 | |
6278 | + { child_comm: dconf worker } hitcount: 1 | |
6279 | + { child_comm: evolution-alarm } hitcount: 2 | |
6280 | + { child_comm: Socket Thread } hitcount: 2 | |
6281 | + { child_comm: postgres } hitcount: 2 | |
6282 | + { child_comm: bash } hitcount: 3 | |
6283 | + { child_comm: compiz } hitcount: 3 | |
6284 | + { child_comm: evolution-sourc } hitcount: 4 | |
6285 | + { child_comm: dhclient } hitcount: 4 | |
6286 | + { child_comm: pool } hitcount: 5 | |
6287 | + { child_comm: nm-dispatcher.a } hitcount: 8 | |
6288 | + { child_comm: firefox } hitcount: 8 | |
6289 | + { child_comm: dbus-daemon } hitcount: 8 | |
6290 | + { child_comm: glib-pacrunner } hitcount: 10 | |
6291 | + { child_comm: evolution } hitcount: 23 | |
6292 | + | |
6293 | + Totals: | |
6294 | + Hits: 89 | |
6295 | + Entries: 20 | |
6296 | + Dropped: 0 | |
6297 | + | |
6298 | + If we want to pause the hist trigger, we can simply append :pause to | |
6299 | + the command that started the trigger. Notice that the trigger info | |
6300 | + displays as [paused]: | |
6301 | + | |
6302 | + # echo 'hist:key=child_comm:val=hitcount:size=256:pause' >> \ | |
6303 | + /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
6304 | + | |
6305 | + # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
6306 | + # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [paused] | |
6307 | + | |
6308 | + { child_comm: dconf worker } hitcount: 1 | |
6309 | + { child_comm: kthreadd } hitcount: 1 | |
6310 | + { child_comm: dconf worker } hitcount: 1 | |
6311 | + { child_comm: gdbus } hitcount: 1 | |
6312 | + { child_comm: ibus-daemon } hitcount: 1 | |
6313 | + { child_comm: Socket Thread } hitcount: 2 | |
6314 | + { child_comm: evolution-alarm } hitcount: 2 | |
6315 | + { child_comm: smbd } hitcount: 2 | |
6316 | + { child_comm: bash } hitcount: 3 | |
6317 | + { child_comm: whoopsie } hitcount: 3 | |
6318 | + { child_comm: compiz } hitcount: 3 | |
6319 | + { child_comm: evolution-sourc } hitcount: 4 | |
6320 | + { child_comm: pool } hitcount: 5 | |
6321 | + { child_comm: postgres } hitcount: 6 | |
6322 | + { child_comm: firefox } hitcount: 8 | |
6323 | + { child_comm: dhclient } hitcount: 10 | |
6324 | + { child_comm: emacs } hitcount: 12 | |
6325 | + { child_comm: dbus-daemon } hitcount: 20 | |
6326 | + { child_comm: nm-dispatcher.a } hitcount: 20 | |
6327 | + { child_comm: evolution } hitcount: 35 | |
6328 | + { child_comm: glib-pacrunner } hitcount: 59 | |
6329 | + | |
6330 | + Totals: | |
6331 | + Hits: 199 | |
6332 | + Entries: 21 | |
6333 | + Dropped: 0 | |
6334 | + | |
6335 | + To manually continue having the trigger aggregate events, append | |
6336 | + :cont instead. Notice that the trigger info displays as [active] | |
6337 | + again, and the data has changed: | |
6338 | + | |
6339 | + # echo 'hist:key=child_comm:val=hitcount:size=256:cont' >> \ | |
6340 | + /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
6341 | + | |
6342 | + # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
6343 | + # trigger info: hist:keys=child_comm:vals=hitcount:sort=hitcount:size=256 [active] | |
6344 | + | |
6345 | + { child_comm: dconf worker } hitcount: 1 | |
6346 | + { child_comm: dconf worker } hitcount: 1 | |
6347 | + { child_comm: kthreadd } hitcount: 1 | |
6348 | + { child_comm: gdbus } hitcount: 1 | |
6349 | + { child_comm: ibus-daemon } hitcount: 1 | |
6350 | + { child_comm: Socket Thread } hitcount: 2 | |
6351 | + { child_comm: evolution-alarm } hitcount: 2 | |
6352 | + { child_comm: smbd } hitcount: 2 | |
6353 | + { child_comm: whoopsie } hitcount: 3 | |
6354 | + { child_comm: compiz } hitcount: 3 | |
6355 | + { child_comm: evolution-sourc } hitcount: 4 | |
6356 | + { child_comm: bash } hitcount: 5 | |
6357 | + { child_comm: pool } hitcount: 5 | |
6358 | + { child_comm: postgres } hitcount: 6 | |
6359 | + { child_comm: firefox } hitcount: 8 | |
6360 | + { child_comm: dhclient } hitcount: 11 | |
6361 | + { child_comm: emacs } hitcount: 12 | |
6362 | + { child_comm: dbus-daemon } hitcount: 22 | |
6363 | + { child_comm: nm-dispatcher.a } hitcount: 22 | |
6364 | + { child_comm: evolution } hitcount: 35 | |
6365 | + { child_comm: glib-pacrunner } hitcount: 59 | |
6366 | + | |
6367 | + Totals: | |
6368 | + Hits: 206 | |
6369 | + Entries: 21 | |
6370 | + Dropped: 0 | |
6371 | + | |
6372 | + The previous example showed how to start and stop a hist trigger by | |
6373 | + appending 'pause' and 'continue' to the hist trigger command. A | |
6374 | + hist trigger can also be started in a paused state by initially | |
6375 | + starting the trigger with ':pause' appended. This allows you to | |
6376 | + start the trigger only when you're ready to start collecting data | |
6377 | + and not before. For example, you could start the trigger in a | |
6378 | + paused state, then unpause it and do something you want to measure, | |
6379 | + then pause the trigger again when done. | |
6380 | + | |
6381 | + Of course, doing this manually can be difficult and error-prone, but | |
6382 | + it is possible to automatically start and stop a hist trigger based | |
6383 | + on some condition, via the enable_hist and disable_hist triggers. | |
6384 | + | |
6385 | + For example, suppose we wanted to take a look at the relative | |
6386 | + weights in terms of skb length for each callpath that leads to a | |
6387 | + netif_receieve_skb event when downloading a decent-sized file using | |
6388 | + wget. | |
6389 | + | |
6390 | + First we set up an initially paused stacktrace trigger on the | |
6391 | + netif_receive_skb event: | |
6392 | + | |
6393 | + # echo 'hist:key=stacktrace:vals=len:pause' > \ | |
6394 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6395 | + | |
6396 | + Next, we set up an 'enable_hist' trigger on the sched_process_exec | |
6397 | + event, with an 'if filename==/usr/bin/wget' filter. The effect of | |
6398 | + this new trigger is that it will 'unpause' the hist trigger we just | |
6399 | + set up on netif_receive_skb if and only if it sees a | |
6400 | + sched_process_exec event with a filename of '/usr/bin/wget'. When | |
6401 | + that happens, all netif_receive_skb events are aggregated into a | |
6402 | + hash table keyed on stacktrace: | |
6403 | + | |
6404 | + # echo 'enable_hist:net:netif_receive_skb if filename==/usr/bin/wget' > \ | |
6405 | + /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
6406 | + | |
6407 | + The aggregation continues until the netif_receive_skb is paused | |
6408 | + again, which is what the following disable_hist event does by | |
6409 | + creating a similar setup on the sched_process_exit event, using the | |
6410 | + filter 'comm==wget': | |
6411 | + | |
6412 | + # echo 'disable_hist:net:netif_receive_skb if comm==wget' > \ | |
6413 | + /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
6414 | + | |
6415 | + Whenever a process exits and the comm field of the disable_hist | |
6416 | + trigger filter matches 'comm==wget', the netif_receive_skb hist | |
6417 | + trigger is disabled. | |
6418 | + | |
6419 | + The overall effect is that netif_receive_skb events are aggregated | |
6420 | + into the hash table for only the duration of the wget. Executing a | |
6421 | + wget command and then listing the 'hist' file will display the | |
6422 | + output generated by the wget command: | |
6423 | + | |
6424 | + $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | |
6425 | + | |
6426 | + # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | |
6427 | + # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | |
6428 | + | |
6429 | + { stacktrace: | |
6430 | + __netif_receive_skb_core+0x46d/0x990 | |
6431 | + __netif_receive_skb+0x18/0x60 | |
6432 | + netif_receive_skb_internal+0x23/0x90 | |
6433 | + napi_gro_receive+0xc8/0x100 | |
6434 | + ieee80211_deliver_skb+0xd6/0x270 [mac80211] | |
6435 | + ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | |
6436 | + ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | |
6437 | + ieee80211_rx+0x31d/0x900 [mac80211] | |
6438 | + iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | |
6439 | + iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | |
6440 | + iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | |
6441 | + irq_thread_fn+0x20/0x50 | |
6442 | + irq_thread+0x11f/0x150 | |
6443 | + kthread+0xd2/0xf0 | |
6444 | + ret_from_fork+0x42/0x70 | |
6445 | + } hitcount: 85 len: 28884 | |
6446 | + { stacktrace: | |
6447 | + __netif_receive_skb_core+0x46d/0x990 | |
6448 | + __netif_receive_skb+0x18/0x60 | |
6449 | + netif_receive_skb_internal+0x23/0x90 | |
6450 | + napi_gro_complete+0xa4/0xe0 | |
6451 | + dev_gro_receive+0x23a/0x360 | |
6452 | + napi_gro_receive+0x30/0x100 | |
6453 | + ieee80211_deliver_skb+0xd6/0x270 [mac80211] | |
6454 | + ieee80211_rx_handlers+0xccf/0x22f0 [mac80211] | |
6455 | + ieee80211_prepare_and_rx_handle+0x4e7/0xc40 [mac80211] | |
6456 | + ieee80211_rx+0x31d/0x900 [mac80211] | |
6457 | + iwlagn_rx_reply_rx+0x3db/0x6f0 [iwldvm] | |
6458 | + iwl_rx_dispatch+0x8e/0xf0 [iwldvm] | |
6459 | + iwl_pcie_irq_handler+0xe3c/0x12f0 [iwlwifi] | |
6460 | + irq_thread_fn+0x20/0x50 | |
6461 | + irq_thread+0x11f/0x150 | |
6462 | + kthread+0xd2/0xf0 | |
6463 | + } hitcount: 98 len: 664329 | |
6464 | + { stacktrace: | |
6465 | + __netif_receive_skb_core+0x46d/0x990 | |
6466 | + __netif_receive_skb+0x18/0x60 | |
6467 | + process_backlog+0xa8/0x150 | |
6468 | + net_rx_action+0x15d/0x340 | |
6469 | + __do_softirq+0x114/0x2c0 | |
6470 | + do_softirq_own_stack+0x1c/0x30 | |
6471 | + do_softirq+0x65/0x70 | |
6472 | + __local_bh_enable_ip+0xb5/0xc0 | |
6473 | + ip_finish_output+0x1f4/0x840 | |
6474 | + ip_output+0x6b/0xc0 | |
6475 | + ip_local_out_sk+0x31/0x40 | |
6476 | + ip_send_skb+0x1a/0x50 | |
6477 | + udp_send_skb+0x173/0x2a0 | |
6478 | + udp_sendmsg+0x2bf/0x9f0 | |
6479 | + inet_sendmsg+0x64/0xa0 | |
6480 | + sock_sendmsg+0x3d/0x50 | |
6481 | + } hitcount: 115 len: 13030 | |
6482 | + { stacktrace: | |
6483 | + __netif_receive_skb_core+0x46d/0x990 | |
6484 | + __netif_receive_skb+0x18/0x60 | |
6485 | + netif_receive_skb_internal+0x23/0x90 | |
6486 | + napi_gro_complete+0xa4/0xe0 | |
6487 | + napi_gro_flush+0x6d/0x90 | |
6488 | + iwl_pcie_irq_handler+0x92a/0x12f0 [iwlwifi] | |
6489 | + irq_thread_fn+0x20/0x50 | |
6490 | + irq_thread+0x11f/0x150 | |
6491 | + kthread+0xd2/0xf0 | |
6492 | + ret_from_fork+0x42/0x70 | |
6493 | + } hitcount: 934 len: 5512212 | |
6494 | + | |
6495 | + Totals: | |
6496 | + Hits: 1232 | |
6497 | + Entries: 4 | |
6498 | + Dropped: 0 | |
6499 | + | |
6500 | + The above shows all the netif_receive_skb callpaths and their total | |
6501 | + lengths for the duration of the wget command. | |
6502 | + | |
6503 | + The 'clear' hist trigger param can be used to clear the hash table. | |
6504 | + Suppose we wanted to try another run of the previous example but | |
6505 | + this time also wanted to see the complete list of events that went | |
6506 | + into the histogram. In order to avoid having to set everything up | |
6507 | + again, we can just clear the histogram first: | |
6508 | + | |
6509 | + # echo 'hist:key=stacktrace:vals=len:clear' >> \ | |
6510 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6511 | + | |
6512 | + Just to verify that it is in fact cleared, here's what we now see in | |
6513 | + the hist file: | |
6514 | + | |
6515 | + # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | |
6516 | + # trigger info: hist:keys=stacktrace:vals=len:sort=hitcount:size=2048 [paused] | |
6517 | + | |
6518 | + Totals: | |
6519 | + Hits: 0 | |
6520 | + Entries: 0 | |
6521 | + Dropped: 0 | |
6522 | + | |
6523 | + Since we want to see the detailed list of every netif_receive_skb | |
6524 | + event occurring during the new run, which are in fact the same | |
6525 | + events being aggregated into the hash table, we add some additional | |
6526 | + 'enable_event' events to the triggering sched_process_exec and | |
6527 | + sched_process_exit events as such: | |
6528 | + | |
6529 | + # echo 'enable_event:net:netif_receive_skb if filename==/usr/bin/wget' > \ | |
6530 | + /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
6531 | + | |
6532 | + # echo 'disable_event:net:netif_receive_skb if comm==wget' > \ | |
6533 | + /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
6534 | + | |
6535 | + If you read the trigger files for the sched_process_exec and | |
6536 | + sched_process_exit triggers, you should see two triggers for each: | |
6537 | + one enabling/disabling the hist aggregation and the other | |
6538 | + enabling/disabling the logging of events: | |
6539 | + | |
6540 | + # cat /sys/kernel/debug/tracing/events/sched/sched_process_exec/trigger | |
6541 | + enable_event:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | |
6542 | + enable_hist:net:netif_receive_skb:unlimited if filename==/usr/bin/wget | |
6543 | + | |
6544 | + # cat /sys/kernel/debug/tracing/events/sched/sched_process_exit/trigger | |
6545 | + enable_event:net:netif_receive_skb:unlimited if comm==wget | |
6546 | + disable_hist:net:netif_receive_skb:unlimited if comm==wget | |
6547 | + | |
6548 | + In other words, whenever either of the sched_process_exec or | |
6549 | + sched_process_exit events is hit and matches 'wget', it enables or | |
6550 | + disables both the histogram and the event log, and what you end up | |
6551 | + with is a hash table and set of events just covering the specified | |
6552 | + duration. Run the wget command again: | |
6553 | + | |
6554 | + $ wget https://www.kernel.org/pub/linux/kernel/v3.x/patch-3.19.xz | |
6555 | + | |
6556 | + Displaying the 'hist' file should show something similar to what you | |
6557 | + saw in the last run, but this time you should also see the | |
6558 | + individual events in the trace file: | |
6559 | + | |
6560 | + # cat /sys/kernel/debug/tracing/trace | |
6561 | + | |
6562 | + # tracer: nop | |
6563 | + # | |
6564 | + # entries-in-buffer/entries-written: 183/1426 #P:4 | |
6565 | + # | |
6566 | + # _-----=> irqs-off | |
6567 | + # / _----=> need-resched | |
6568 | + # | / _---=> hardirq/softirq | |
6569 | + # || / _--=> preempt-depth | |
6570 | + # ||| / delay | |
6571 | + # TASK-PID CPU# |||| TIMESTAMP FUNCTION | |
6572 | + # | | | |||| | | | |
6573 | + wget-15108 [000] ..s1 31769.606929: netif_receive_skb: dev=lo skbaddr=ffff88009c353100 len=60 | |
6574 | + wget-15108 [000] ..s1 31769.606999: netif_receive_skb: dev=lo skbaddr=ffff88009c353200 len=60 | |
6575 | + dnsmasq-1382 [000] ..s1 31769.677652: netif_receive_skb: dev=lo skbaddr=ffff88009c352b00 len=130 | |
6576 | + dnsmasq-1382 [000] ..s1 31769.685917: netif_receive_skb: dev=lo skbaddr=ffff88009c352200 len=138 | |
6577 | + ##### CPU 2 buffer started #### | |
6578 | + irq/29-iwlwifi-559 [002] ..s. 31772.031529: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433d00 len=2948 | |
6579 | + irq/29-iwlwifi-559 [002] ..s. 31772.031572: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432200 len=1500 | |
6580 | + irq/29-iwlwifi-559 [002] ..s. 31772.032196: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433100 len=2948 | |
6581 | + irq/29-iwlwifi-559 [002] ..s. 31772.032761: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d433000 len=2948 | |
6582 | + irq/29-iwlwifi-559 [002] ..s. 31772.033220: netif_receive_skb: dev=wlan0 skbaddr=ffff88009d432e00 len=1500 | |
6583 | + . | |
6584 | + . | |
6585 | + . | |
6586 | + | |
6587 | + The following example demonstrates how multiple hist triggers can be | |
6588 | + attached to a given event. This capability can be useful for | |
6589 | + creating a set of different summaries derived from the same set of | |
6590 | + events, or for comparing the effects of different filters, among | |
6591 | + other things. | |
6592 | + | |
6593 | + # echo 'hist:keys=skbaddr.hex:vals=len if len < 0' >> \ | |
6594 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6595 | + # echo 'hist:keys=skbaddr.hex:vals=len if len > 4096' >> \ | |
6596 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6597 | + # echo 'hist:keys=skbaddr.hex:vals=len if len == 256' >> \ | |
6598 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6599 | + # echo 'hist:keys=skbaddr.hex:vals=len' >> \ | |
6600 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6601 | + # echo 'hist:keys=len:vals=common_preempt_count' >> \ | |
6602 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6603 | + | |
6604 | + The above set of commands create four triggers differing only in | |
6605 | + their filters, along with a completely different though fairly | |
6606 | + nonsensical trigger. Note that in order to append multiple hist | |
6607 | + triggers to the same file, you should use the '>>' operator to | |
6608 | + append them ('>' will also add the new hist trigger, but will remove | |
6609 | + any existing hist triggers beforehand). | |
6610 | + | |
6611 | + Displaying the contents of the 'hist' file for the event shows the | |
6612 | + contents of all five histograms: | |
6613 | + | |
6614 | + # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist | |
6615 | + | |
6616 | + # event histogram | |
6617 | + # | |
6618 | + # trigger info: hist:keys=len:vals=hitcount,common_preempt_count:sort=hitcount:size=2048 [active] | |
6619 | + # | |
6620 | + | |
6621 | + { len: 176 } hitcount: 1 common_preempt_count: 0 | |
6622 | + { len: 223 } hitcount: 1 common_preempt_count: 0 | |
6623 | + { len: 4854 } hitcount: 1 common_preempt_count: 0 | |
6624 | + { len: 395 } hitcount: 1 common_preempt_count: 0 | |
6625 | + { len: 177 } hitcount: 1 common_preempt_count: 0 | |
6626 | + { len: 446 } hitcount: 1 common_preempt_count: 0 | |
6627 | + { len: 1601 } hitcount: 1 common_preempt_count: 0 | |
6628 | + . | |
6629 | + . | |
6630 | + . | |
6631 | + { len: 1280 } hitcount: 66 common_preempt_count: 0 | |
6632 | + { len: 116 } hitcount: 81 common_preempt_count: 40 | |
6633 | + { len: 708 } hitcount: 112 common_preempt_count: 0 | |
6634 | + { len: 46 } hitcount: 221 common_preempt_count: 0 | |
6635 | + { len: 1264 } hitcount: 458 common_preempt_count: 0 | |
6636 | + | |
6637 | + Totals: | |
6638 | + Hits: 1428 | |
6639 | + Entries: 147 | |
6640 | + Dropped: 0 | |
6641 | + | |
6642 | + | |
6643 | + # event histogram | |
6644 | + # | |
6645 | + # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | |
6646 | + # | |
6647 | + | |
6648 | + { skbaddr: ffff8800baee5e00 } hitcount: 1 len: 130 | |
6649 | + { skbaddr: ffff88005f3d5600 } hitcount: 1 len: 1280 | |
6650 | + { skbaddr: ffff88005f3d4900 } hitcount: 1 len: 1280 | |
6651 | + { skbaddr: ffff88009fed6300 } hitcount: 1 len: 115 | |
6652 | + { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 115 | |
6653 | + { skbaddr: ffff88008cdb1900 } hitcount: 1 len: 46 | |
6654 | + { skbaddr: ffff880064b5ef00 } hitcount: 1 len: 118 | |
6655 | + { skbaddr: ffff880044e3c700 } hitcount: 1 len: 60 | |
6656 | + { skbaddr: ffff880100065900 } hitcount: 1 len: 46 | |
6657 | + { skbaddr: ffff8800d46bd500 } hitcount: 1 len: 116 | |
6658 | + { skbaddr: ffff88005f3d5f00 } hitcount: 1 len: 1280 | |
6659 | + { skbaddr: ffff880100064700 } hitcount: 1 len: 365 | |
6660 | + { skbaddr: ffff8800badb6f00 } hitcount: 1 len: 60 | |
6661 | + . | |
6662 | + . | |
6663 | + . | |
6664 | + { skbaddr: ffff88009fe0be00 } hitcount: 27 len: 24677 | |
6665 | + { skbaddr: ffff88009fe0a400 } hitcount: 27 len: 23052 | |
6666 | + { skbaddr: ffff88009fe0b700 } hitcount: 31 len: 25589 | |
6667 | + { skbaddr: ffff88009fe0b600 } hitcount: 32 len: 27326 | |
6668 | + { skbaddr: ffff88006a462800 } hitcount: 68 len: 71678 | |
6669 | + { skbaddr: ffff88006a463700 } hitcount: 70 len: 72678 | |
6670 | + { skbaddr: ffff88006a462b00 } hitcount: 71 len: 77589 | |
6671 | + { skbaddr: ffff88006a463600 } hitcount: 73 len: 71307 | |
6672 | + { skbaddr: ffff88006a462200 } hitcount: 81 len: 81032 | |
6673 | + | |
6674 | + Totals: | |
6675 | + Hits: 1451 | |
6676 | + Entries: 318 | |
6677 | + Dropped: 0 | |
6678 | + | |
6679 | + | |
6680 | + # event histogram | |
6681 | + # | |
6682 | + # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len == 256 [active] | |
6683 | + # | |
6684 | + | |
6685 | + | |
6686 | + Totals: | |
6687 | + Hits: 0 | |
6688 | + Entries: 0 | |
6689 | + Dropped: 0 | |
6690 | + | |
6691 | + | |
6692 | + # event histogram | |
6693 | + # | |
6694 | + # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len > 4096 [active] | |
6695 | + # | |
6696 | + | |
6697 | + { skbaddr: ffff88009fd2c300 } hitcount: 1 len: 7212 | |
6698 | + { skbaddr: ffff8800d2bcce00 } hitcount: 1 len: 7212 | |
6699 | + { skbaddr: ffff8800d2bcd700 } hitcount: 1 len: 7212 | |
6700 | + { skbaddr: ffff8800d2bcda00 } hitcount: 1 len: 21492 | |
6701 | + { skbaddr: ffff8800ae2e2d00 } hitcount: 1 len: 7212 | |
6702 | + { skbaddr: ffff8800d2bcdb00 } hitcount: 1 len: 7212 | |
6703 | + { skbaddr: ffff88006a4df500 } hitcount: 1 len: 4854 | |
6704 | + { skbaddr: ffff88008ce47b00 } hitcount: 1 len: 18636 | |
6705 | + { skbaddr: ffff8800ae2e2200 } hitcount: 1 len: 12924 | |
6706 | + { skbaddr: ffff88005f3e1000 } hitcount: 1 len: 4356 | |
6707 | + { skbaddr: ffff8800d2bcdc00 } hitcount: 2 len: 24420 | |
6708 | + { skbaddr: ffff8800d2bcc200 } hitcount: 2 len: 12996 | |
6709 | + | |
6710 | + Totals: | |
6711 | + Hits: 14 | |
6712 | + Entries: 12 | |
6713 | + Dropped: 0 | |
6714 | + | |
6715 | + | |
6716 | + # event histogram | |
6717 | + # | |
6718 | + # trigger info: hist:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 if len < 0 [active] | |
6719 | + # | |
6720 | + | |
6721 | + | |
6722 | + Totals: | |
6723 | + Hits: 0 | |
6724 | + Entries: 0 | |
6725 | + Dropped: 0 | |
6726 | + | |
6727 | + Named triggers can be used to have triggers share a common set of | |
6728 | + histogram data. This capability is mostly useful for combining the | |
6729 | + output of events generated by tracepoints contained inside inline | |
6730 | + functions, but names can be used in a hist trigger on any event. | |
6731 | + For example, these two triggers when hit will update the same 'len' | |
6732 | + field in the shared 'foo' histogram data: | |
6733 | + | |
6734 | + # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | |
6735 | + /sys/kernel/debug/tracing/events/net/netif_receive_skb/trigger | |
6736 | + # echo 'hist:name=foo:keys=skbaddr.hex:vals=len' > \ | |
6737 | + /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
6738 | + | |
6739 | + You can see that they're updating common histogram data by reading | |
6740 | + each event's hist files at the same time: | |
6741 | + | |
6742 | + # cat /sys/kernel/debug/tracing/events/net/netif_receive_skb/hist; | |
6743 | + cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | |
6744 | + | |
6745 | + # event histogram | |
6746 | + # | |
6747 | + # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | |
6748 | + # | |
6749 | + | |
6750 | + { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | |
6751 | + { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | |
6752 | + { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | |
6753 | + { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | |
6754 | + { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | |
6755 | + { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | |
6756 | + { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | |
6757 | + { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | |
6758 | + { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | |
6759 | + { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | |
6760 | + { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | |
6761 | + { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | |
6762 | + { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | |
6763 | + { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | |
6764 | + { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | |
6765 | + { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | |
6766 | + { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | |
6767 | + { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | |
6768 | + { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | |
6769 | + { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | |
6770 | + { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | |
6771 | + { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | |
6772 | + { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | |
6773 | + { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | |
6774 | + { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | |
6775 | + { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | |
6776 | + { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | |
6777 | + { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | |
6778 | + { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | |
6779 | + { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | |
6780 | + { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | |
6781 | + { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | |
6782 | + { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | |
6783 | + { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | |
6784 | + { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | |
6785 | + { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | |
6786 | + { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | |
6787 | + { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | |
6788 | + { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | |
6789 | + { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | |
6790 | + { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | |
6791 | + { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | |
6792 | + | |
6793 | + Totals: | |
6794 | + Hits: 81 | |
6795 | + Entries: 42 | |
6796 | + Dropped: 0 | |
6797 | + # event histogram | |
6798 | + # | |
6799 | + # trigger info: hist:name=foo:keys=skbaddr.hex:vals=hitcount,len:sort=hitcount:size=2048 [active] | |
6800 | + # | |
6801 | + | |
6802 | + { skbaddr: ffff88000ad53500 } hitcount: 1 len: 46 | |
6803 | + { skbaddr: ffff8800af5a1500 } hitcount: 1 len: 76 | |
6804 | + { skbaddr: ffff8800d62a1900 } hitcount: 1 len: 46 | |
6805 | + { skbaddr: ffff8800d2bccb00 } hitcount: 1 len: 468 | |
6806 | + { skbaddr: ffff8800d3c69900 } hitcount: 1 len: 46 | |
6807 | + { skbaddr: ffff88009ff09100 } hitcount: 1 len: 52 | |
6808 | + { skbaddr: ffff88010f13ab00 } hitcount: 1 len: 168 | |
6809 | + { skbaddr: ffff88006a54f400 } hitcount: 1 len: 46 | |
6810 | + { skbaddr: ffff8800d2bcc500 } hitcount: 1 len: 260 | |
6811 | + { skbaddr: ffff880064505000 } hitcount: 1 len: 46 | |
6812 | + { skbaddr: ffff8800baf24e00 } hitcount: 1 len: 32 | |
6813 | + { skbaddr: ffff88009fe0ad00 } hitcount: 1 len: 46 | |
6814 | + { skbaddr: ffff8800d3edff00 } hitcount: 1 len: 44 | |
6815 | + { skbaddr: ffff88009fe0b400 } hitcount: 1 len: 168 | |
6816 | + { skbaddr: ffff8800a1c55a00 } hitcount: 1 len: 40 | |
6817 | + { skbaddr: ffff8800d2bcd100 } hitcount: 1 len: 40 | |
6818 | + { skbaddr: ffff880064505f00 } hitcount: 1 len: 174 | |
6819 | + { skbaddr: ffff8800a8bff200 } hitcount: 1 len: 160 | |
6820 | + { skbaddr: ffff880044e3cc00 } hitcount: 1 len: 76 | |
6821 | + { skbaddr: ffff8800a8bfe700 } hitcount: 1 len: 46 | |
6822 | + { skbaddr: ffff8800d2bcdc00 } hitcount: 1 len: 32 | |
6823 | + { skbaddr: ffff8800a1f64800 } hitcount: 1 len: 46 | |
6824 | + { skbaddr: ffff8800d2bcde00 } hitcount: 1 len: 988 | |
6825 | + { skbaddr: ffff88006a5dea00 } hitcount: 1 len: 46 | |
6826 | + { skbaddr: ffff88002e37a200 } hitcount: 1 len: 44 | |
6827 | + { skbaddr: ffff8800a1f32c00 } hitcount: 2 len: 676 | |
6828 | + { skbaddr: ffff88000ad52600 } hitcount: 2 len: 107 | |
6829 | + { skbaddr: ffff8800a1f91e00 } hitcount: 2 len: 92 | |
6830 | + { skbaddr: ffff8800af5a0200 } hitcount: 2 len: 142 | |
6831 | + { skbaddr: ffff8800d2bcc600 } hitcount: 2 len: 220 | |
6832 | + { skbaddr: ffff8800ba36f500 } hitcount: 2 len: 92 | |
6833 | + { skbaddr: ffff8800d021f800 } hitcount: 2 len: 92 | |
6834 | + { skbaddr: ffff8800a1f33600 } hitcount: 2 len: 675 | |
6835 | + { skbaddr: ffff8800a8bfff00 } hitcount: 3 len: 138 | |
6836 | + { skbaddr: ffff8800d62a1300 } hitcount: 3 len: 138 | |
6837 | + { skbaddr: ffff88002e37a100 } hitcount: 4 len: 184 | |
6838 | + { skbaddr: ffff880064504400 } hitcount: 4 len: 184 | |
6839 | + { skbaddr: ffff8800a8bfec00 } hitcount: 4 len: 184 | |
6840 | + { skbaddr: ffff88000ad53700 } hitcount: 5 len: 230 | |
6841 | + { skbaddr: ffff8800d2bcdb00 } hitcount: 5 len: 196 | |
6842 | + { skbaddr: ffff8800a1f90000 } hitcount: 6 len: 276 | |
6843 | + { skbaddr: ffff88006a54f900 } hitcount: 6 len: 276 | |
6844 | + | |
6845 | + Totals: | |
6846 | + Hits: 81 | |
6847 | + Entries: 42 | |
6848 | + Dropped: 0 | |
6849 | + | |
6850 | + And here's an example that shows how to combine histogram data from | |
6851 | + any two events even if they don't share any 'compatible' fields | |
6852 | + other than 'hitcount' and 'stacktrace'. These commands create a | |
6853 | + couple of triggers named 'bar' using those fields: | |
6854 | + | |
6855 | + # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | |
6856 | + /sys/kernel/debug/tracing/events/sched/sched_process_fork/trigger | |
6857 | + # echo 'hist:name=bar:key=stacktrace:val=hitcount' > \ | |
6858 | + /sys/kernel/debug/tracing/events/net/netif_rx/trigger | |
6859 | + | |
6860 | + And displaying the output of either shows some interesting if | |
6861 | + somewhat confusing output: | |
6862 | + | |
6863 | + # cat /sys/kernel/debug/tracing/events/sched/sched_process_fork/hist | |
6864 | + # cat /sys/kernel/debug/tracing/events/net/netif_rx/hist | |
6865 | + | |
6866 | + # event histogram | |
6867 | + # | |
6868 | + # trigger info: hist:name=bar:keys=stacktrace:vals=hitcount:sort=hitcount:size=2048 [active] | |
6869 | + # | |
6870 | + | |
6871 | + { stacktrace: | |
6872 | + _do_fork+0x18e/0x330 | |
6873 | + kernel_thread+0x29/0x30 | |
6874 | + kthreadd+0x154/0x1b0 | |
6875 | + ret_from_fork+0x3f/0x70 | |
6876 | + } hitcount: 1 | |
6877 | + { stacktrace: | |
6878 | + netif_rx_internal+0xb2/0xd0 | |
6879 | + netif_rx_ni+0x20/0x70 | |
6880 | + dev_loopback_xmit+0xaa/0xd0 | |
6881 | + ip_mc_output+0x126/0x240 | |
6882 | + ip_local_out_sk+0x31/0x40 | |
6883 | + igmp_send_report+0x1e9/0x230 | |
6884 | + igmp_timer_expire+0xe9/0x120 | |
6885 | + call_timer_fn+0x39/0xf0 | |
6886 | + run_timer_softirq+0x1e1/0x290 | |
6887 | + __do_softirq+0xfd/0x290 | |
6888 | + irq_exit+0x98/0xb0 | |
6889 | + smp_apic_timer_interrupt+0x4a/0x60 | |
6890 | + apic_timer_interrupt+0x6d/0x80 | |
6891 | + cpuidle_enter+0x17/0x20 | |
6892 | + call_cpuidle+0x3b/0x60 | |
6893 | + cpu_startup_entry+0x22d/0x310 | |
6894 | + } hitcount: 1 | |
6895 | + { stacktrace: | |
6896 | + netif_rx_internal+0xb2/0xd0 | |
6897 | + netif_rx_ni+0x20/0x70 | |
6898 | + dev_loopback_xmit+0xaa/0xd0 | |
6899 | + ip_mc_output+0x17f/0x240 | |
6900 | + ip_local_out_sk+0x31/0x40 | |
6901 | + ip_send_skb+0x1a/0x50 | |
6902 | + udp_send_skb+0x13e/0x270 | |
6903 | + udp_sendmsg+0x2bf/0x980 | |
6904 | + inet_sendmsg+0x67/0xa0 | |
6905 | + sock_sendmsg+0x38/0x50 | |
6906 | + SYSC_sendto+0xef/0x170 | |
6907 | + SyS_sendto+0xe/0x10 | |
6908 | + entry_SYSCALL_64_fastpath+0x12/0x6a | |
6909 | + } hitcount: 2 | |
6910 | + { stacktrace: | |
6911 | + netif_rx_internal+0xb2/0xd0 | |
6912 | + netif_rx+0x1c/0x60 | |
6913 | + loopback_xmit+0x6c/0xb0 | |
6914 | + dev_hard_start_xmit+0x219/0x3a0 | |
6915 | + __dev_queue_xmit+0x415/0x4f0 | |
6916 | + dev_queue_xmit_sk+0x13/0x20 | |
6917 | + ip_finish_output2+0x237/0x340 | |
6918 | + ip_finish_output+0x113/0x1d0 | |
6919 | + ip_output+0x66/0xc0 | |
6920 | + ip_local_out_sk+0x31/0x40 | |
6921 | + ip_send_skb+0x1a/0x50 | |
6922 | + udp_send_skb+0x16d/0x270 | |
6923 | + udp_sendmsg+0x2bf/0x980 | |
6924 | + inet_sendmsg+0x67/0xa0 | |
6925 | + sock_sendmsg+0x38/0x50 | |
6926 | + ___sys_sendmsg+0x14e/0x270 | |
6927 | + } hitcount: 76 | |
6928 | + { stacktrace: | |
6929 | + netif_rx_internal+0xb2/0xd0 | |
6930 | + netif_rx+0x1c/0x60 | |
6931 | + loopback_xmit+0x6c/0xb0 | |
6932 | + dev_hard_start_xmit+0x219/0x3a0 | |
6933 | + __dev_queue_xmit+0x415/0x4f0 | |
6934 | + dev_queue_xmit_sk+0x13/0x20 | |
6935 | + ip_finish_output2+0x237/0x340 | |
6936 | + ip_finish_output+0x113/0x1d0 | |
6937 | + ip_output+0x66/0xc0 | |
6938 | + ip_local_out_sk+0x31/0x40 | |
6939 | + ip_send_skb+0x1a/0x50 | |
6940 | + udp_send_skb+0x16d/0x270 | |
6941 | + udp_sendmsg+0x2bf/0x980 | |
6942 | + inet_sendmsg+0x67/0xa0 | |
6943 | + sock_sendmsg+0x38/0x50 | |
6944 | + ___sys_sendmsg+0x269/0x270 | |
6945 | + } hitcount: 77 | |
6946 | + { stacktrace: | |
6947 | + netif_rx_internal+0xb2/0xd0 | |
6948 | + netif_rx+0x1c/0x60 | |
6949 | + loopback_xmit+0x6c/0xb0 | |
6950 | + dev_hard_start_xmit+0x219/0x3a0 | |
6951 | + __dev_queue_xmit+0x415/0x4f0 | |
6952 | + dev_queue_xmit_sk+0x13/0x20 | |
6953 | + ip_finish_output2+0x237/0x340 | |
6954 | + ip_finish_output+0x113/0x1d0 | |
6955 | + ip_output+0x66/0xc0 | |
6956 | + ip_local_out_sk+0x31/0x40 | |
6957 | + ip_send_skb+0x1a/0x50 | |
6958 | + udp_send_skb+0x16d/0x270 | |
6959 | + udp_sendmsg+0x2bf/0x980 | |
6960 | + inet_sendmsg+0x67/0xa0 | |
6961 | + sock_sendmsg+0x38/0x50 | |
6962 | + SYSC_sendto+0xef/0x170 | |
6963 | + } hitcount: 88 | |
6964 | + { stacktrace: | |
6965 | + _do_fork+0x18e/0x330 | |
6966 | + SyS_clone+0x19/0x20 | |
6967 | + entry_SYSCALL_64_fastpath+0x12/0x6a | |
6968 | + } hitcount: 244 | |
6969 | + | |
6970 | + Totals: | |
6971 | + Hits: 489 | |
6972 | + Entries: 7 | |
6973 | + Dropped: 0 | |
6974 | + | |
6975 | + | |
6976 | +2.2 Inter-event hist triggers | |
6977 | +----------------------------- | |
6978 | + | |
6979 | +Inter-event hist triggers are hist triggers that combine values from | |
6980 | +one or more other events and create a histogram using that data. Data | |
6981 | +from an inter-event histogram can in turn become the source for | |
6982 | +further combined histograms, thus providing a chain of related | |
6983 | +histograms, which is important for some applications. | |
6984 | + | |
6985 | +The most important example of an inter-event quantity that can be used | |
6986 | +in this manner is latency, which is simply a difference in timestamps | |
6987 | +between two events. Although latency is the most important | |
6988 | +inter-event quantity, note that because the support is completely | |
6989 | +general across the trace event subsystem, any event field can be used | |
6990 | +in an inter-event quantity. | |
6991 | + | |
6992 | +An example of a histogram that combines data from other histograms | |
6993 | +into a useful chain would be a 'wakeupswitch latency' histogram that | |
6994 | +combines a 'wakeup latency' histogram and a 'switch latency' | |
6995 | +histogram. | |
6996 | + | |
6997 | +Normally, a hist trigger specification consists of a (possibly | |
6998 | +compound) key along with one or more numeric values, which are | |
6999 | +continually updated sums associated with that key. A histogram | |
7000 | +specification in this case consists of individual key and value | |
7001 | +specifications that refer to trace event fields associated with a | |
7002 | +single event type. | |
7003 | + | |
7004 | +The inter-event hist trigger extension allows fields from multiple | |
7005 | +events to be referenced and combined into a multi-event histogram | |
7006 | +specification. In support of this overall goal, a few enabling | |
7007 | +features have been added to the hist trigger support: | |
7008 | + | |
7009 | + - In order to compute an inter-event quantity, a value from one | |
7010 | + event needs to saved and then referenced from another event. This | |
7011 | + requires the introduction of support for histogram 'variables'. | |
7012 | + | |
7013 | + - The computation of inter-event quantities and their combination | |
7014 | + require some minimal amount of support for applying simple | |
7015 | + expressions to variables (+ and -). | |
7016 | + | |
7017 | + - A histogram consisting of inter-event quantities isn't logically a | |
7018 | + histogram on either event (so having the 'hist' file for either | |
7019 | + event host the histogram output doesn't really make sense). To | |
7020 | + address the idea that the histogram is associated with a | |
7021 | + combination of events, support is added allowing the creation of | |
7022 | + 'synthetic' events that are events derived from other events. | |
7023 | + These synthetic events are full-fledged events just like any other | |
7024 | + and can be used as such, as for instance to create the | |
7025 | + 'combination' histograms mentioned previously. | |
7026 | + | |
7027 | + - A set of 'actions' can be associated with histogram entries - | |
7028 | + these can be used to generate the previously mentioned synthetic | |
7029 | + events, but can also be used for other purposes, such as for | |
7030 | + example saving context when a 'max' latency has been hit. | |
7031 | + | |
7032 | + - Trace events don't have a 'timestamp' associated with them, but | |
7033 | + there is an implicit timestamp saved along with an event in the | |
7034 | + underlying ftrace ring buffer. This timestamp is now exposed as a | |
7035 | + a synthetic field named 'common_timestamp' which can be used in | |
7036 | + histograms as if it were any other event field; it isn't an actual | |
7037 | + field in the trace format but rather is a synthesized value that | |
7038 | + nonetheless can be used as if it were an actual field. By default | |
7039 | + it is in units of nanoseconds; appending '.usecs' to a | |
7040 | + common_timestamp field changes the units to microseconds. | |
7041 | + | |
7042 | +A note on inter-event timestamps: If common_timestamp is used in a | |
7043 | +histogram, the trace buffer is automatically switched over to using | |
7044 | +absolute timestamps and the "global" trace clock, in order to avoid | |
7045 | +bogus timestamp differences with other clocks that aren't coherent | |
7046 | +across CPUs. This can be overridden by specifying one of the other | |
7047 | +trace clocks instead, using the "clock=XXX" hist trigger attribute, | |
7048 | +where XXX is any of the clocks listed in the tracing/trace_clock | |
7049 | +pseudo-file. | |
7050 | + | |
7051 | +These features are described in more detail in the following sections. | |
7052 | + | |
7053 | +2.2.1 Histogram Variables | |
7054 | +------------------------- | |
7055 | + | |
7056 | +Variables are simply named locations used for saving and retrieving | |
7057 | +values between matching events. A 'matching' event is defined as an | |
7058 | +event that has a matching key - if a variable is saved for a histogram | |
7059 | +entry corresponding to that key, any subsequent event with a matching | |
7060 | +key can access that variable. | |
7061 | + | |
7062 | +A variable's value is normally available to any subsequent event until | |
7063 | +it is set to something else by a subsequent event. The one exception | |
7064 | +to that rule is that any variable used in an expression is essentially | |
7065 | +'read-once' - once it's used by an expression in a subsequent event, | |
7066 | +it's reset to its 'unset' state, which means it can't be used again | |
7067 | +unless it's set again. This ensures not only that an event doesn't | |
7068 | +use an uninitialized variable in a calculation, but that that variable | |
7069 | +is used only once and not for any unrelated subsequent match. | |
7070 | + | |
7071 | +The basic syntax for saving a variable is to simply prefix a unique | |
7072 | +variable name not corresponding to any keyword along with an '=' sign | |
7073 | +to any event field. | |
7074 | + | |
7075 | +Either keys or values can be saved and retrieved in this way. This | |
7076 | +creates a variable named 'ts0' for a histogram entry with the key | |
7077 | +'next_pid': | |
7078 | + | |
7079 | + # echo 'hist:keys=next_pid:vals=$ts0:ts0=common_timestamp ... >> \ | |
7080 | + event/trigger | |
7081 | + | |
7082 | +The ts0 variable can be accessed by any subsequent event having the | |
7083 | +same pid as 'next_pid'. | |
7084 | + | |
7085 | +Variable references are formed by prepending the variable name with | |
7086 | +the '$' sign. Thus for example, the ts0 variable above would be | |
7087 | +referenced as '$ts0' in expressions. | |
7088 | + | |
7089 | +Because 'vals=' is used, the common_timestamp variable value above | |
7090 | +will also be summed as a normal histogram value would (though for a | |
7091 | +timestamp it makes little sense). | |
7092 | + | |
7093 | +The below shows that a key value can also be saved in the same way: | |
7094 | + | |
7095 | + # echo 'hist:timer_pid=common_pid:key=timer_pid ...' >> event/trigger | |
7096 | + | |
7097 | +If a variable isn't a key variable or prefixed with 'vals=', the | |
7098 | +associated event field will be saved in a variable but won't be summed | |
7099 | +as a value: | |
7100 | + | |
7101 | + # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger | |
7102 | + | |
7103 | +Multiple variables can be assigned at the same time. The below would | |
7104 | +result in both ts0 and b being created as variables, with both | |
7105 | +common_timestamp and field1 additionally being summed as values: | |
7106 | + | |
7107 | + # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \ | |
7108 | + event/trigger | |
7109 | + | |
7110 | +Note that variable assignments can appear either preceding or | |
7111 | +following their use. The command below behaves identically to the | |
7112 | +command above: | |
7113 | + | |
7114 | + # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \ | |
7115 | + event/trigger | |
7116 | + | |
7117 | +Any number of variables not bound to a 'vals=' prefix can also be | |
7118 | +assigned by simply separating them with colons. Below is the same | |
7119 | +thing but without the values being summed in the histogram: | |
7120 | + | |
7121 | + # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger | |
7122 | + | |
7123 | +Variables set as above can be referenced and used in expressions on | |
7124 | +another event. | |
7125 | + | |
7126 | +For example, here's how a latency can be calculated: | |
7127 | + | |
7128 | + # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger | |
7129 | + # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger | |
7130 | + | |
7131 | +In the first line above, the event's timetamp is saved into the | |
7132 | +variable ts0. In the next line, ts0 is subtracted from the second | |
7133 | +event's timestamp to produce the latency, which is then assigned into | |
7134 | +yet another variable, 'wakeup_lat'. The hist trigger below in turn | |
7135 | +makes use of the wakeup_lat variable to compute a combined latency | |
7136 | +using the same key and variable from yet another event: | |
7137 | + | |
7138 | + # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger | |
7139 | + | |
7140 | +2.2.2 Synthetic Events | |
7141 | +---------------------- | |
7142 | + | |
7143 | +Synthetic events are user-defined events generated from hist trigger | |
7144 | +variables or fields associated with one or more other events. Their | |
7145 | +purpose is to provide a mechanism for displaying data spanning | |
7146 | +multiple events consistent with the existing and already familiar | |
7147 | +usage for normal events. | |
7148 | + | |
7149 | +To define a synthetic event, the user writes a simple specification | |
7150 | +consisting of the name of the new event along with one or more | |
7151 | +variables and their types, which can be any valid field type, | |
7152 | +separated by semicolons, to the tracing/synthetic_events file. | |
7153 | + | |
7154 | +For instance, the following creates a new event named 'wakeup_latency' | |
7155 | +with 3 fields: lat, pid, and prio. Each of those fields is simply a | |
7156 | +variable reference to a variable on another event: | |
7157 | + | |
7158 | + # echo 'wakeup_latency \ | |
7159 | + u64 lat; \ | |
7160 | + pid_t pid; \ | |
7161 | + int prio' >> \ | |
7162 | + /sys/kernel/debug/tracing/synthetic_events | |
7163 | + | |
7164 | +Reading the tracing/synthetic_events file lists all the currently | |
7165 | +defined synthetic events, in this case the event defined above: | |
7166 | + | |
7167 | + # cat /sys/kernel/debug/tracing/synthetic_events | |
7168 | + wakeup_latency u64 lat; pid_t pid; int prio | |
7169 | + | |
7170 | +An existing synthetic event definition can be removed by prepending | |
7171 | +the command that defined it with a '!': | |
7172 | + | |
7173 | + # echo '!wakeup_latency u64 lat pid_t pid int prio' >> \ | |
7174 | + /sys/kernel/debug/tracing/synthetic_events | |
7175 | + | |
7176 | +At this point, there isn't yet an actual 'wakeup_latency' event | |
7177 | +instantiated in the event subsytem - for this to happen, a 'hist | |
7178 | +trigger action' needs to be instantiated and bound to actual fields | |
7179 | +and variables defined on other events (see Section 6.3.3 below). | |
7180 | + | |
7181 | +Once that is done, an event instance is created, and a histogram can | |
7182 | +be defined using it: | |
7183 | + | |
7184 | + # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \ | |
7185 | + /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger | |
7186 | + | |
7187 | +The new event is created under the tracing/events/synthetic/ directory | |
7188 | +and looks and behaves just like any other event: | |
7189 | + | |
7190 | + # ls /sys/kernel/debug/tracing/events/synthetic/wakeup_latency | |
7191 | + enable filter format hist id trigger | |
7192 | + | |
7193 | +Like any other event, once a histogram is enabled for the event, the | |
7194 | +output can be displayed by reading the event's 'hist' file. | |
7195 | + | |
7196 | +2.2.3 Hist trigger 'actions' | |
7197 | +---------------------------- | |
7198 | + | |
7199 | +A hist trigger 'action' is a function that's executed whenever a | |
7200 | +histogram entry is added or updated. | |
7201 | + | |
7202 | +The default 'action' if no special function is explicity specified is | |
7203 | +as it always has been, to simply update the set of values associated | |
7204 | +with an entry. Some applications, however, may want to perform | |
7205 | +additional actions at that point, such as generate another event, or | |
7206 | +compare and save a maximum. | |
7207 | + | |
7208 | +The following additional actions are available. To specify an action | |
7209 | +for a given event, simply specify the action between colons in the | |
7210 | +hist trigger specification. | |
7211 | + | |
7212 | + - onmatch(matching.event).<synthetic_event_name>(param list) | |
7213 | + | |
7214 | + The 'onmatch(matching.event).<synthetic_event_name>(params)' hist | |
7215 | + trigger action is invoked whenever an event matches and the | |
7216 | + histogram entry would be added or updated. It causes the named | |
7217 | + synthetic event to be generated with the values given in the | |
7218 | + 'param list'. The result is the generation of a synthetic event | |
7219 | + that consists of the values contained in those variables at the | |
7220 | + time the invoking event was hit. | |
7221 | + | |
7222 | + The 'param list' consists of one or more parameters which may be | |
7223 | + either variables or fields defined on either the 'matching.event' | |
7224 | + or the target event. The variables or fields specified in the | |
7225 | + param list may be either fully-qualified or unqualified. If a | |
7226 | + variable is specified as unqualified, it must be unique between | |
7227 | + the two events. A field name used as a param can be unqualified | |
7228 | + if it refers to the target event, but must be fully qualified if | |
7229 | + it refers to the matching event. A fully-qualified name is of the | |
7230 | + form 'system.event_name.$var_name' or 'system.event_name.field'. | |
7231 | + | |
7232 | + The 'matching.event' specification is simply the fully qualified | |
7233 | + event name of the event that matches the target event for the | |
7234 | + onmatch() functionality, in the form 'system.event_name'. | |
7235 | + | |
7236 | + Finally, the number and type of variables/fields in the 'param | |
7237 | + list' must match the number and types of the fields in the | |
7238 | + synthetic event being generated. | |
7239 | + | |
7240 | + As an example the below defines a simple synthetic event and uses | |
7241 | + a variable defined on the sched_wakeup_new event as a parameter | |
7242 | + when invoking the synthetic event. Here we define the synthetic | |
7243 | + event: | |
7244 | + | |
7245 | + # echo 'wakeup_new_test pid_t pid' >> \ | |
7246 | + /sys/kernel/debug/tracing/synthetic_events | |
7247 | + | |
7248 | + # cat /sys/kernel/debug/tracing/synthetic_events | |
7249 | + wakeup_new_test pid_t pid | |
7250 | + | |
7251 | + The following hist trigger both defines the missing testpid | |
7252 | + variable and specifies an onmatch() action that generates a | |
7253 | + wakeup_new_test synthetic event whenever a sched_wakeup_new event | |
7254 | + occurs, which because of the 'if comm == "cyclictest"' filter only | |
7255 | + happens when the executable is cyclictest: | |
7256 | + | |
7257 | + # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\ | |
7258 | + wakeup_new_test($testpid) if comm=="cyclictest"' >> \ | |
7259 | + /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger | |
7260 | + | |
7261 | + Creating and displaying a histogram based on those events is now | |
7262 | + just a matter of using the fields and new synthetic event in the | |
7263 | + tracing/events/synthetic directory, as usual: | |
7264 | + | |
7265 | + # echo 'hist:keys=pid:sort=pid' >> \ | |
7266 | + /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/trigger | |
7267 | + | |
7268 | + Running 'cyclictest' should cause wakeup_new events to generate | |
7269 | + wakeup_new_test synthetic events which should result in histogram | |
7270 | + output in the wakeup_new_test event's hist file: | |
7271 | + | |
7272 | + # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_new_test/hist | |
7273 | + | |
7274 | + A more typical usage would be to use two events to calculate a | |
7275 | + latency. The following example uses a set of hist triggers to | |
7276 | + produce a 'wakeup_latency' histogram: | |
7277 | + | |
7278 | + First, we define a 'wakeup_latency' synthetic event: | |
7279 | + | |
7280 | + # echo 'wakeup_latency u64 lat; pid_t pid; int prio' >> \ | |
7281 | + /sys/kernel/debug/tracing/synthetic_events | |
7282 | + | |
7283 | + Next, we specify that whenever we see a sched_waking event for a | |
7284 | + cyclictest thread, save the timestamp in a 'ts0' variable: | |
7285 | + | |
7286 | + # echo 'hist:keys=$saved_pid:saved_pid=pid:ts0=common_timestamp.usecs \ | |
7287 | + if comm=="cyclictest"' >> \ | |
7288 | + /sys/kernel/debug/tracing/events/sched/sched_waking/trigger | |
7289 | + | |
7290 | + Then, when the corresponding thread is actually scheduled onto the | |
7291 | + CPU by a sched_switch event, calculate the latency and use that | |
7292 | + along with another variable and an event field to generate a | |
7293 | + wakeup_latency synthetic event: | |
7294 | + | |
7295 | + # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:\ | |
7296 | + onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,\ | |
7297 | + $saved_pid,next_prio) if next_comm=="cyclictest"' >> \ | |
7298 | + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger | |
7299 | + | |
7300 | + We also need to create a histogram on the wakeup_latency synthetic | |
7301 | + event in order to aggregate the generated synthetic event data: | |
7302 | + | |
7303 | + # echo 'hist:keys=pid,prio,lat:sort=pid,lat' >> \ | |
7304 | + /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger | |
7305 | + | |
7306 | + Finally, once we've run cyclictest to actually generate some | |
7307 | + events, we can see the output by looking at the wakeup_latency | |
7308 | + synthetic event's hist file: | |
7309 | + | |
7310 | + # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist | |
7311 | + | |
7312 | + - onmax(var).save(field,.. .) | |
7313 | + | |
7314 | + The 'onmax(var).save(field,...)' hist trigger action is invoked | |
7315 | + whenever the value of 'var' associated with a histogram entry | |
7316 | + exceeds the current maximum contained in that variable. | |
7317 | + | |
7318 | + The end result is that the trace event fields specified as the | |
7319 | + onmax.save() params will be saved if 'var' exceeds the current | |
7320 | + maximum for that hist trigger entry. This allows context from the | |
7321 | + event that exhibited the new maximum to be saved for later | |
7322 | + reference. When the histogram is displayed, additional fields | |
7323 | + displaying the saved values will be printed. | |
7324 | + | |
7325 | + As an example the below defines a couple of hist triggers, one for | |
7326 | + sched_waking and another for sched_switch, keyed on pid. Whenever | |
7327 | + a sched_waking occurs, the timestamp is saved in the entry | |
7328 | + corresponding to the current pid, and when the scheduler switches | |
7329 | + back to that pid, the timestamp difference is calculated. If the | |
7330 | + resulting latency, stored in wakeup_lat, exceeds the current | |
7331 | + maximum latency, the values specified in the save() fields are | |
7332 | + recoreded: | |
7333 | + | |
7334 | + # echo 'hist:keys=pid:ts0=common_timestamp.usecs \ | |
7335 | + if comm=="cyclictest"' >> \ | |
7336 | + /sys/kernel/debug/tracing/events/sched/sched_waking/trigger | |
7337 | + | |
7338 | + # echo 'hist:keys=next_pid:\ | |
7339 | + wakeup_lat=common_timestamp.usecs-$ts0:\ | |
7340 | + onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) \ | |
7341 | + if next_comm=="cyclictest"' >> \ | |
7342 | + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger | |
7343 | + | |
7344 | + When the histogram is displayed, the max value and the saved | |
7345 | + values corresponding to the max are displayed following the rest | |
7346 | + of the fields: | |
7347 | + | |
7348 | + # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist | |
7349 | + { next_pid: 2255 } hitcount: 239 | |
7350 | + common_timestamp-ts0: 0 | |
7351 | + max: 27 | |
7352 | + next_comm: cyclictest | |
7353 | + prev_pid: 0 prev_prio: 120 prev_comm: swapper/1 | |
7354 | + | |
7355 | + { next_pid: 2256 } hitcount: 2355 | |
7356 | + common_timestamp-ts0: 0 | |
7357 | + max: 49 next_comm: cyclictest | |
7358 | + prev_pid: 0 prev_prio: 120 prev_comm: swapper/0 | |
7359 | + | |
7360 | + Totals: | |
7361 | + Hits: 12970 | |
7362 | + Entries: 2 | |
7363 | + Dropped: 0 | |
7364 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/acpi/acpica/acglobal.h linux-4.14/drivers/acpi/acpica/acglobal.h | |
7365 | --- linux-4.14.orig/drivers/acpi/acpica/acglobal.h 2017-11-12 19:46:13.000000000 +0100 | |
7366 | +++ linux-4.14/drivers/acpi/acpica/acglobal.h 2018-09-05 11:05:07.000000000 +0200 | |
7367 | @@ -116,7 +116,7 @@ | |
7368 | * interrupt level | |
7369 | */ | |
7370 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ | |
7371 | -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
7372 | +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
7373 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); | |
1a6e0f06 | 7374 | |
e4b2b4a8 JK |
7375 | /* Mutex for _OSI support */ |
7376 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/acpi/acpica/hwregs.c linux-4.14/drivers/acpi/acpica/hwregs.c | |
7377 | --- linux-4.14.orig/drivers/acpi/acpica/hwregs.c 2017-11-12 19:46:13.000000000 +0100 | |
7378 | +++ linux-4.14/drivers/acpi/acpica/hwregs.c 2018-09-05 11:05:07.000000000 +0200 | |
7379 | @@ -428,14 +428,14 @@ | |
7380 | ACPI_BITMASK_ALL_FIXED_STATUS, | |
7381 | ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); | |
1a6e0f06 | 7382 | |
e4b2b4a8 JK |
7383 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); |
7384 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
1a6e0f06 | 7385 | |
e4b2b4a8 | 7386 | /* Clear the fixed events in PM1 A/B */ |
1a6e0f06 | 7387 | |
e4b2b4a8 JK |
7388 | status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, |
7389 | ACPI_BITMASK_ALL_FIXED_STATUS); | |
1a6e0f06 | 7390 | |
e4b2b4a8 JK |
7391 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); |
7392 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
1a6e0f06 | 7393 | |
e4b2b4a8 JK |
7394 | if (ACPI_FAILURE(status)) { |
7395 | goto exit; | |
7396 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/acpi/acpica/hwxface.c linux-4.14/drivers/acpi/acpica/hwxface.c | |
7397 | --- linux-4.14.orig/drivers/acpi/acpica/hwxface.c 2017-11-12 19:46:13.000000000 +0100 | |
7398 | +++ linux-4.14/drivers/acpi/acpica/hwxface.c 2018-09-05 11:05:07.000000000 +0200 | |
7399 | @@ -373,7 +373,7 @@ | |
7400 | return_ACPI_STATUS(AE_BAD_PARAMETER); | |
7401 | } | |
1a6e0f06 | 7402 | |
e4b2b4a8 JK |
7403 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); |
7404 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
1a6e0f06 | 7405 | |
e4b2b4a8 JK |
7406 | /* |
7407 | * At this point, we know that the parent register is one of the | |
7408 | @@ -434,7 +434,7 @@ | |
1a6e0f06 | 7409 | |
e4b2b4a8 JK |
7410 | unlock_and_exit: |
7411 | ||
7412 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
7413 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
7414 | return_ACPI_STATUS(status); | |
7415 | } | |
7416 | ||
7417 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/acpi/acpica/utmutex.c linux-4.14/drivers/acpi/acpica/utmutex.c | |
7418 | --- linux-4.14.orig/drivers/acpi/acpica/utmutex.c 2017-11-12 19:46:13.000000000 +0100 | |
7419 | +++ linux-4.14/drivers/acpi/acpica/utmutex.c 2018-09-05 11:05:07.000000000 +0200 | |
7420 | @@ -88,7 +88,7 @@ | |
7421 | return_ACPI_STATUS (status); | |
7422 | } | |
7423 | ||
7424 | - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); | |
7425 | + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); | |
7426 | if (ACPI_FAILURE (status)) { | |
7427 | return_ACPI_STATUS (status); | |
7428 | } | |
7429 | @@ -145,7 +145,7 @@ | |
7430 | /* Delete the spinlocks */ | |
7431 | ||
7432 | acpi_os_delete_lock(acpi_gbl_gpe_lock); | |
7433 | - acpi_os_delete_lock(acpi_gbl_hardware_lock); | |
7434 | + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); | |
7435 | acpi_os_delete_lock(acpi_gbl_reference_count_lock); | |
7436 | ||
7437 | /* Delete the reader/writer lock */ | |
7438 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ata/libata-sff.c linux-4.14/drivers/ata/libata-sff.c | |
7439 | --- linux-4.14.orig/drivers/ata/libata-sff.c 2017-11-12 19:46:13.000000000 +0100 | |
7440 | +++ linux-4.14/drivers/ata/libata-sff.c 2018-09-05 11:05:07.000000000 +0200 | |
7441 | @@ -679,9 +679,9 @@ | |
7442 | unsigned long flags; | |
7443 | unsigned int consumed; | |
7444 | ||
7445 | - local_irq_save(flags); | |
7446 | + local_irq_save_nort(flags); | |
7447 | consumed = ata_sff_data_xfer32(qc, buf, buflen, rw); | |
7448 | - local_irq_restore(flags); | |
7449 | + local_irq_restore_nort(flags); | |
7450 | ||
7451 | return consumed; | |
7452 | } | |
7453 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/block/brd.c linux-4.14/drivers/block/brd.c | |
7454 | --- linux-4.14.orig/drivers/block/brd.c 2017-11-12 19:46:13.000000000 +0100 | |
7455 | +++ linux-4.14/drivers/block/brd.c 2018-09-05 11:05:07.000000000 +0200 | |
7456 | @@ -60,7 +60,6 @@ | |
7457 | /* | |
7458 | * Look up and return a brd's page for a given sector. | |
7459 | */ | |
7460 | -static DEFINE_MUTEX(brd_mutex); | |
7461 | static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) | |
7462 | { | |
7463 | pgoff_t idx; | |
7464 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/block/zram/zcomp.c linux-4.14/drivers/block/zram/zcomp.c | |
7465 | --- linux-4.14.orig/drivers/block/zram/zcomp.c 2017-11-12 19:46:13.000000000 +0100 | |
7466 | +++ linux-4.14/drivers/block/zram/zcomp.c 2018-09-05 11:05:07.000000000 +0200 | |
7467 | @@ -116,12 +116,20 @@ | |
1a6e0f06 JK |
7468 | |
7469 | struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) | |
7470 | { | |
7471 | - return *get_cpu_ptr(comp->stream); | |
7472 | + struct zcomp_strm *zstrm; | |
7473 | + | |
e4b2b4a8 | 7474 | + zstrm = *get_local_ptr(comp->stream); |
1a6e0f06 JK |
7475 | + spin_lock(&zstrm->zcomp_lock); |
7476 | + return zstrm; | |
7477 | } | |
7478 | ||
7479 | void zcomp_stream_put(struct zcomp *comp) | |
7480 | { | |
7481 | - put_cpu_ptr(comp->stream); | |
7482 | + struct zcomp_strm *zstrm; | |
7483 | + | |
7484 | + zstrm = *this_cpu_ptr(comp->stream); | |
7485 | + spin_unlock(&zstrm->zcomp_lock); | |
e4b2b4a8 | 7486 | + put_local_ptr(zstrm); |
1a6e0f06 JK |
7487 | } |
7488 | ||
7489 | int zcomp_compress(struct zcomp_strm *zstrm, | |
e4b2b4a8 JK |
7490 | @@ -171,6 +179,7 @@ |
7491 | pr_err("Can't allocate a compression stream\n"); | |
7492 | return -ENOMEM; | |
7493 | } | |
7494 | + spin_lock_init(&zstrm->zcomp_lock); | |
7495 | *per_cpu_ptr(comp->stream, cpu) = zstrm; | |
7496 | return 0; | |
7497 | } | |
7498 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/block/zram/zcomp.h linux-4.14/drivers/block/zram/zcomp.h | |
7499 | --- linux-4.14.orig/drivers/block/zram/zcomp.h 2017-11-12 19:46:13.000000000 +0100 | |
7500 | +++ linux-4.14/drivers/block/zram/zcomp.h 2018-09-05 11:05:07.000000000 +0200 | |
7501 | @@ -14,6 +14,7 @@ | |
1a6e0f06 JK |
7502 | /* compression/decompression buffer */ |
7503 | void *buffer; | |
7504 | struct crypto_comp *tfm; | |
7505 | + spinlock_t zcomp_lock; | |
7506 | }; | |
7507 | ||
7508 | /* dynamic per-device compression frontend */ | |
e4b2b4a8 JK |
7509 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/block/zram/zram_drv.c linux-4.14/drivers/block/zram/zram_drv.c |
7510 | --- linux-4.14.orig/drivers/block/zram/zram_drv.c 2017-11-12 19:46:13.000000000 +0100 | |
7511 | +++ linux-4.14/drivers/block/zram/zram_drv.c 2018-09-05 11:05:07.000000000 +0200 | |
7512 | @@ -756,6 +756,30 @@ | |
7513 | static DEVICE_ATTR_RO(mm_stat); | |
7514 | static DEVICE_ATTR_RO(debug_stat); | |
1a6e0f06 | 7515 | |
e4b2b4a8 JK |
7516 | +#ifdef CONFIG_PREEMPT_RT_BASE |
7517 | +static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) | |
7518 | +{ | |
7519 | + size_t index; | |
7520 | + | |
7521 | + for (index = 0; index < num_pages; index++) | |
7522 | + spin_lock_init(&zram->table[index].lock); | |
7523 | +} | |
7524 | + | |
7525 | +static void zram_slot_lock(struct zram *zram, u32 index) | |
7526 | +{ | |
7527 | + spin_lock(&zram->table[index].lock); | |
7528 | + __set_bit(ZRAM_ACCESS, &zram->table[index].value); | |
7529 | +} | |
7530 | + | |
7531 | +static void zram_slot_unlock(struct zram *zram, u32 index) | |
7532 | +{ | |
7533 | + __clear_bit(ZRAM_ACCESS, &zram->table[index].value); | |
7534 | + spin_unlock(&zram->table[index].lock); | |
7535 | +} | |
7536 | + | |
7537 | +#else | |
7538 | +static void zram_meta_init_table_locks(struct zram *zram, size_t num_pages) { } | |
1a6e0f06 | 7539 | + |
e4b2b4a8 JK |
7540 | static void zram_slot_lock(struct zram *zram, u32 index) |
7541 | { | |
7542 | bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value); | |
7543 | @@ -765,6 +789,7 @@ | |
7544 | { | |
7545 | bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value); | |
7546 | } | |
7547 | +#endif | |
1a6e0f06 | 7548 | |
e4b2b4a8 JK |
7549 | static void zram_meta_free(struct zram *zram, u64 disksize) |
7550 | { | |
7551 | @@ -794,6 +819,7 @@ | |
7552 | return false; | |
7553 | } | |
7554 | ||
7555 | + zram_meta_init_table_locks(zram, num_pages); | |
7556 | return true; | |
7557 | } | |
7558 | ||
7559 | @@ -845,6 +871,7 @@ | |
1a6e0f06 JK |
7560 | unsigned long handle; |
7561 | unsigned int size; | |
e4b2b4a8 | 7562 | void *src, *dst; |
1a6e0f06 JK |
7563 | + struct zcomp_strm *zstrm; |
7564 | ||
e4b2b4a8 JK |
7565 | if (zram_wb_enabled(zram)) { |
7566 | zram_slot_lock(zram, index); | |
7567 | @@ -879,6 +906,7 @@ | |
1a6e0f06 | 7568 | |
e4b2b4a8 | 7569 | size = zram_get_obj_size(zram, index); |
1a6e0f06 JK |
7570 | |
7571 | + zstrm = zcomp_stream_get(zram->comp); | |
e4b2b4a8 | 7572 | src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO); |
1a6e0f06 | 7573 | if (size == PAGE_SIZE) { |
e4b2b4a8 JK |
7574 | dst = kmap_atomic(page); |
7575 | @@ -886,14 +914,13 @@ | |
7576 | kunmap_atomic(dst); | |
7577 | ret = 0; | |
1a6e0f06 JK |
7578 | } else { |
7579 | - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
e4b2b4a8 JK |
7580 | |
7581 | dst = kmap_atomic(page); | |
7582 | ret = zcomp_decompress(zstrm, src, size, dst); | |
7583 | kunmap_atomic(dst); | |
1a6e0f06 JK |
7584 | - zcomp_stream_put(zram->comp); |
7585 | } | |
e4b2b4a8 | 7586 | zs_unmap_object(zram->mem_pool, handle); |
1a6e0f06 | 7587 | + zcomp_stream_put(zram->comp); |
e4b2b4a8 | 7588 | zram_slot_unlock(zram, index); |
1a6e0f06 JK |
7589 | |
7590 | /* Should NEVER happen. Return bio error if it does. */ | |
e4b2b4a8 JK |
7591 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/block/zram/zram_drv.h linux-4.14/drivers/block/zram/zram_drv.h |
7592 | --- linux-4.14.orig/drivers/block/zram/zram_drv.h 2017-11-12 19:46:13.000000000 +0100 | |
7593 | +++ linux-4.14/drivers/block/zram/zram_drv.h 2018-09-05 11:05:07.000000000 +0200 | |
7594 | @@ -77,6 +77,9 @@ | |
7595 | unsigned long element; | |
7596 | }; | |
1a6e0f06 JK |
7597 | unsigned long value; |
7598 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7599 | + spinlock_t lock; | |
7600 | +#endif | |
7601 | }; | |
7602 | ||
7603 | struct zram_stats { | |
e4b2b4a8 JK |
7604 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/char/random.c linux-4.14/drivers/char/random.c |
7605 | --- linux-4.14.orig/drivers/char/random.c 2018-09-05 11:03:20.000000000 +0200 | |
7606 | +++ linux-4.14/drivers/char/random.c 2018-09-05 11:05:07.000000000 +0200 | |
7607 | @@ -265,6 +265,7 @@ | |
7608 | #include <linux/syscalls.h> | |
7609 | #include <linux/completion.h> | |
7610 | #include <linux/uuid.h> | |
7611 | +#include <linux/locallock.h> | |
7612 | #include <crypto/chacha20.h> | |
7613 | ||
7614 | #include <asm/processor.h> | |
7615 | @@ -856,7 +857,7 @@ | |
7616 | invalidate_batched_entropy(); | |
7617 | crng_init = 1; | |
7618 | wake_up_interruptible(&crng_init_wait); | |
7619 | - pr_notice("random: fast init done\n"); | |
7620 | + /* pr_notice("random: fast init done\n"); */ | |
7621 | } | |
7622 | return 1; | |
7623 | } | |
7624 | @@ -941,17 +942,21 @@ | |
7625 | crng_init = 2; | |
7626 | process_random_ready_list(); | |
7627 | wake_up_interruptible(&crng_init_wait); | |
7628 | - pr_notice("random: crng init done\n"); | |
7629 | + /* pr_notice("random: crng init done\n"); */ | |
7630 | if (unseeded_warning.missed) { | |
7631 | +#if 0 | |
7632 | pr_notice("random: %d get_random_xx warning(s) missed " | |
7633 | "due to ratelimiting\n", | |
7634 | unseeded_warning.missed); | |
7635 | +#endif | |
7636 | unseeded_warning.missed = 0; | |
7637 | } | |
7638 | if (urandom_warning.missed) { | |
7639 | +#if 0 | |
7640 | pr_notice("random: %d urandom warning(s) missed " | |
7641 | "due to ratelimiting\n", | |
7642 | urandom_warning.missed); | |
7643 | +#endif | |
7644 | urandom_warning.missed = 0; | |
7645 | } | |
7646 | } | |
7647 | @@ -1122,8 +1127,6 @@ | |
1a6e0f06 JK |
7648 | } sample; |
7649 | long delta, delta2, delta3; | |
7650 | ||
7651 | - preempt_disable(); | |
7652 | - | |
7653 | sample.jiffies = jiffies; | |
7654 | sample.cycles = random_get_entropy(); | |
7655 | sample.num = num; | |
e4b2b4a8 | 7656 | @@ -1164,7 +1167,6 @@ |
1a6e0f06 JK |
7657 | */ |
7658 | credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); | |
7659 | } | |
7660 | - preempt_enable(); | |
7661 | } | |
7662 | ||
7663 | void add_input_randomness(unsigned int type, unsigned int code, | |
e4b2b4a8 JK |
7664 | @@ -1221,28 +1223,27 @@ |
7665 | return *ptr; | |
1a6e0f06 JK |
7666 | } |
7667 | ||
7668 | -void add_interrupt_randomness(int irq, int irq_flags) | |
7669 | +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) | |
7670 | { | |
7671 | struct entropy_store *r; | |
7672 | struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); | |
7673 | - struct pt_regs *regs = get_irq_regs(); | |
7674 | unsigned long now = jiffies; | |
7675 | cycles_t cycles = random_get_entropy(); | |
7676 | __u32 c_high, j_high; | |
7677 | - __u64 ip; | |
7678 | unsigned long seed; | |
7679 | int credit = 0; | |
7680 | ||
7681 | if (cycles == 0) | |
7682 | - cycles = get_reg(fast_pool, regs); | |
7683 | + cycles = get_reg(fast_pool, NULL); | |
7684 | c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; | |
7685 | j_high = (sizeof(now) > 4) ? now >> 32 : 0; | |
7686 | fast_pool->pool[0] ^= cycles ^ j_high ^ irq; | |
7687 | fast_pool->pool[1] ^= now ^ c_high; | |
7688 | - ip = regs ? instruction_pointer(regs) : _RET_IP_; | |
7689 | + if (!ip) | |
7690 | + ip = _RET_IP_; | |
7691 | fast_pool->pool[2] ^= ip; | |
7692 | fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : | |
7693 | - get_reg(fast_pool, regs); | |
7694 | + get_reg(fast_pool, NULL); | |
7695 | ||
7696 | fast_mix(fast_pool); | |
7697 | add_interrupt_bench(cycles); | |
e4b2b4a8 JK |
7698 | @@ -2200,6 +2201,7 @@ |
7699 | * at any point prior. | |
7700 | */ | |
7701 | static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); | |
7702 | +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_u64_lock); | |
7703 | u64 get_random_u64(void) | |
7704 | { | |
7705 | u64 ret; | |
7706 | @@ -2220,7 +2222,7 @@ | |
7707 | warn_unseeded_randomness(&previous); | |
7708 | ||
7709 | use_lock = READ_ONCE(crng_init) < 2; | |
7710 | - batch = &get_cpu_var(batched_entropy_u64); | |
7711 | + batch = &get_locked_var(batched_entropy_u64_lock, batched_entropy_u64); | |
7712 | if (use_lock) | |
7713 | read_lock_irqsave(&batched_entropy_reset_lock, flags); | |
7714 | if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { | |
7715 | @@ -2230,12 +2232,13 @@ | |
7716 | ret = batch->entropy_u64[batch->position++]; | |
7717 | if (use_lock) | |
7718 | read_unlock_irqrestore(&batched_entropy_reset_lock, flags); | |
7719 | - put_cpu_var(batched_entropy_u64); | |
7720 | + put_locked_var(batched_entropy_u64_lock, batched_entropy_u64); | |
7721 | return ret; | |
7722 | } | |
7723 | EXPORT_SYMBOL(get_random_u64); | |
7724 | ||
7725 | static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32); | |
7726 | +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_u32_lock); | |
7727 | u32 get_random_u32(void) | |
7728 | { | |
7729 | u32 ret; | |
7730 | @@ -2250,7 +2253,7 @@ | |
7731 | warn_unseeded_randomness(&previous); | |
7732 | ||
7733 | use_lock = READ_ONCE(crng_init) < 2; | |
7734 | - batch = &get_cpu_var(batched_entropy_u32); | |
7735 | + batch = &get_locked_var(batched_entropy_u32_lock, batched_entropy_u32); | |
7736 | if (use_lock) | |
7737 | read_lock_irqsave(&batched_entropy_reset_lock, flags); | |
7738 | if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { | |
7739 | @@ -2260,7 +2263,7 @@ | |
7740 | ret = batch->entropy_u32[batch->position++]; | |
7741 | if (use_lock) | |
7742 | read_unlock_irqrestore(&batched_entropy_reset_lock, flags); | |
7743 | - put_cpu_var(batched_entropy_u32); | |
7744 | + put_locked_var(batched_entropy_u32_lock, batched_entropy_u32); | |
7745 | return ret; | |
7746 | } | |
7747 | EXPORT_SYMBOL(get_random_u32); | |
7748 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/char/tpm/tpm_tis.c linux-4.14/drivers/char/tpm/tpm_tis.c | |
7749 | --- linux-4.14.orig/drivers/char/tpm/tpm_tis.c 2018-09-05 11:03:20.000000000 +0200 | |
7750 | +++ linux-4.14/drivers/char/tpm/tpm_tis.c 2018-09-05 11:05:07.000000000 +0200 | |
7751 | @@ -52,6 +52,31 @@ | |
7752 | return container_of(data, struct tpm_tis_tcg_phy, priv); | |
7753 | } | |
7754 | ||
7755 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7756 | +/* | |
7757 | + * Flushes previous write operations to chip so that a subsequent | |
7758 | + * ioread*()s won't stall a cpu. | |
7759 | + */ | |
7760 | +static inline void tpm_tis_flush(void __iomem *iobase) | |
7761 | +{ | |
7762 | + ioread8(iobase + TPM_ACCESS(0)); | |
7763 | +} | |
7764 | +#else | |
7765 | +#define tpm_tis_flush(iobase) do { } while (0) | |
7766 | +#endif | |
7767 | + | |
7768 | +static inline void tpm_tis_iowrite8(u8 b, void __iomem *iobase, u32 addr) | |
7769 | +{ | |
7770 | + iowrite8(b, iobase + addr); | |
7771 | + tpm_tis_flush(iobase); | |
7772 | +} | |
7773 | + | |
7774 | +static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr) | |
7775 | +{ | |
7776 | + iowrite32(b, iobase + addr); | |
7777 | + tpm_tis_flush(iobase); | |
7778 | +} | |
7779 | + | |
7780 | static bool interrupts = true; | |
7781 | module_param(interrupts, bool, 0444); | |
7782 | MODULE_PARM_DESC(interrupts, "Enable interrupts"); | |
7783 | @@ -149,7 +174,7 @@ | |
7784 | struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); | |
7785 | ||
7786 | while (len--) | |
7787 | - iowrite8(*value++, phy->iobase + addr); | |
7788 | + tpm_tis_iowrite8(*value++, phy->iobase, addr); | |
7789 | ||
7790 | return 0; | |
7791 | } | |
7792 | @@ -176,7 +201,7 @@ | |
7793 | { | |
7794 | struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); | |
7795 | ||
7796 | - iowrite32(value, phy->iobase + addr); | |
7797 | + tpm_tis_iowrite32(value, phy->iobase, addr); | |
7798 | ||
7799 | return 0; | |
7800 | } | |
7801 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/clocksource/tcb_clksrc.c linux-4.14/drivers/clocksource/tcb_clksrc.c | |
7802 | --- linux-4.14.orig/drivers/clocksource/tcb_clksrc.c 2017-11-12 19:46:13.000000000 +0100 | |
7803 | +++ linux-4.14/drivers/clocksource/tcb_clksrc.c 2018-09-05 11:05:07.000000000 +0200 | |
7804 | @@ -25,8 +25,7 @@ | |
1a6e0f06 JK |
7805 | * this 32 bit free-running counter. the second channel is not used. |
7806 | * | |
7807 | * - The third channel may be used to provide a 16-bit clockevent | |
7808 | - * source, used in either periodic or oneshot mode. This runs | |
7809 | - * at 32 KiHZ, and can handle delays of up to two seconds. | |
7810 | + * source, used in either periodic or oneshot mode. | |
7811 | * | |
7812 | * A boot clocksource and clockevent source are also currently needed, | |
7813 | * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so | |
e4b2b4a8 | 7814 | @@ -126,6 +125,8 @@ |
1a6e0f06 JK |
7815 | struct tc_clkevt_device { |
7816 | struct clock_event_device clkevt; | |
7817 | struct clk *clk; | |
7818 | + bool clk_enabled; | |
7819 | + u32 freq; | |
7820 | void __iomem *regs; | |
7821 | }; | |
7822 | ||
e4b2b4a8 | 7823 | @@ -134,15 +135,26 @@ |
1a6e0f06 JK |
7824 | return container_of(clkevt, struct tc_clkevt_device, clkevt); |
7825 | } | |
7826 | ||
7827 | -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, | |
7828 | - * because using one of the divided clocks would usually mean the | |
7829 | - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). | |
7830 | - * | |
7831 | - * A divided clock could be good for high resolution timers, since | |
7832 | - * 30.5 usec resolution can seem "low". | |
7833 | - */ | |
7834 | static u32 timer_clock; | |
7835 | ||
7836 | +static void tc_clk_disable(struct clock_event_device *d) | |
7837 | +{ | |
7838 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
7839 | + | |
7840 | + clk_disable(tcd->clk); | |
7841 | + tcd->clk_enabled = false; | |
7842 | +} | |
7843 | + | |
7844 | +static void tc_clk_enable(struct clock_event_device *d) | |
7845 | +{ | |
7846 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
7847 | + | |
7848 | + if (tcd->clk_enabled) | |
7849 | + return; | |
7850 | + clk_enable(tcd->clk); | |
7851 | + tcd->clk_enabled = true; | |
7852 | +} | |
7853 | + | |
7854 | static int tc_shutdown(struct clock_event_device *d) | |
7855 | { | |
7856 | struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
e4b2b4a8 | 7857 | @@ -150,8 +162,14 @@ |
1a6e0f06 | 7858 | |
e4b2b4a8 JK |
7859 | writel(0xff, regs + ATMEL_TC_REG(2, IDR)); |
7860 | writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); | |
1a6e0f06 JK |
7861 | + return 0; |
7862 | +} | |
7863 | + | |
7864 | +static int tc_shutdown_clk_off(struct clock_event_device *d) | |
7865 | +{ | |
7866 | + tc_shutdown(d); | |
7867 | if (!clockevent_state_detached(d)) | |
7868 | - clk_disable(tcd->clk); | |
7869 | + tc_clk_disable(d); | |
7870 | ||
7871 | return 0; | |
7872 | } | |
e4b2b4a8 | 7873 | @@ -164,9 +182,9 @@ |
1a6e0f06 JK |
7874 | if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) |
7875 | tc_shutdown(d); | |
7876 | ||
7877 | - clk_enable(tcd->clk); | |
7878 | + tc_clk_enable(d); | |
7879 | ||
7880 | - /* slow clock, count up to RC, then irq and stop */ | |
7881 | + /* count up to RC, then irq and stop */ | |
e4b2b4a8 | 7882 | writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | |
1a6e0f06 | 7883 | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); |
e4b2b4a8 JK |
7884 | writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); |
7885 | @@ -186,12 +204,12 @@ | |
1a6e0f06 JK |
7886 | /* By not making the gentime core emulate periodic mode on top |
7887 | * of oneshot, we get lower overhead and improved accuracy. | |
7888 | */ | |
7889 | - clk_enable(tcd->clk); | |
7890 | + tc_clk_enable(d); | |
7891 | ||
7892 | - /* slow clock, count up to RC, then irq and restart */ | |
7893 | + /* count up to RC, then irq and restart */ | |
e4b2b4a8 | 7894 | writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, |
1a6e0f06 | 7895 | regs + ATMEL_TC_REG(2, CMR)); |
e4b2b4a8 JK |
7896 | - writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); |
7897 | + writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
1a6e0f06 JK |
7898 | |
7899 | /* Enable clock and interrupts on RC compare */ | |
e4b2b4a8 JK |
7900 | writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); |
7901 | @@ -218,9 +236,13 @@ | |
1a6e0f06 JK |
7902 | .features = CLOCK_EVT_FEAT_PERIODIC | |
7903 | CLOCK_EVT_FEAT_ONESHOT, | |
7904 | /* Should be lower than at91rm9200's system timer */ | |
7905 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
7906 | .rating = 125, | |
7907 | +#else | |
7908 | + .rating = 200, | |
7909 | +#endif | |
7910 | .set_next_event = tc_next_event, | |
7911 | - .set_state_shutdown = tc_shutdown, | |
7912 | + .set_state_shutdown = tc_shutdown_clk_off, | |
7913 | .set_state_periodic = tc_set_periodic, | |
7914 | .set_state_oneshot = tc_set_oneshot, | |
7915 | }, | |
e4b2b4a8 | 7916 | @@ -240,8 +262,9 @@ |
1a6e0f06 JK |
7917 | return IRQ_NONE; |
7918 | } | |
7919 | ||
7920 | -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
7921 | +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) | |
7922 | { | |
7923 | + unsigned divisor = atmel_tc_divisors[divisor_idx]; | |
7924 | int ret; | |
7925 | struct clk *t2_clk = tc->clk[2]; | |
7926 | int irq = tc->irq[2]; | |
e4b2b4a8 | 7927 | @@ -262,7 +285,11 @@ |
1a6e0f06 JK |
7928 | clkevt.regs = tc->regs; |
7929 | clkevt.clk = t2_clk; | |
7930 | ||
7931 | - timer_clock = clk32k_divisor_idx; | |
7932 | + timer_clock = divisor_idx; | |
7933 | + if (!divisor) | |
7934 | + clkevt.freq = 32768; | |
7935 | + else | |
7936 | + clkevt.freq = clk_get_rate(t2_clk) / divisor; | |
7937 | ||
7938 | clkevt.clkevt.cpumask = cpumask_of(0); | |
7939 | ||
e4b2b4a8 | 7940 | @@ -273,7 +300,7 @@ |
1a6e0f06 JK |
7941 | return ret; |
7942 | } | |
7943 | ||
7944 | - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); | |
7945 | + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); | |
7946 | ||
7947 | return ret; | |
7948 | } | |
e4b2b4a8 | 7949 | @@ -410,7 +437,11 @@ |
1a6e0f06 JK |
7950 | goto err_disable_t1; |
7951 | ||
7952 | /* channel 2: periodic and oneshot timer support */ | |
7953 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
7954 | ret = setup_clkevents(tc, clk32k_divisor_idx); | |
7955 | +#else | |
7956 | + ret = setup_clkevents(tc, best_divisor_idx); | |
7957 | +#endif | |
7958 | if (ret) | |
7959 | goto err_unregister_clksrc; | |
7960 | ||
e4b2b4a8 JK |
7961 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/clocksource/timer-atmel-pit.c linux-4.14/drivers/clocksource/timer-atmel-pit.c |
7962 | --- linux-4.14.orig/drivers/clocksource/timer-atmel-pit.c 2017-11-12 19:46:13.000000000 +0100 | |
7963 | +++ linux-4.14/drivers/clocksource/timer-atmel-pit.c 2018-09-05 11:05:07.000000000 +0200 | |
7964 | @@ -46,6 +46,7 @@ | |
1a6e0f06 JK |
7965 | u32 cycle; |
7966 | u32 cnt; | |
7967 | unsigned int irq; | |
7968 | + bool irq_requested; | |
7969 | struct clk *mck; | |
7970 | }; | |
7971 | ||
e4b2b4a8 | 7972 | @@ -96,15 +97,29 @@ |
1a6e0f06 JK |
7973 | |
7974 | /* disable irq, leaving the clocksource active */ | |
7975 | pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); | |
7976 | + if (data->irq_requested) { | |
7977 | + free_irq(data->irq, data); | |
7978 | + data->irq_requested = false; | |
7979 | + } | |
7980 | return 0; | |
7981 | } | |
7982 | ||
7983 | +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); | |
7984 | /* | |
7985 | * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) | |
7986 | */ | |
7987 | static int pit_clkevt_set_periodic(struct clock_event_device *dev) | |
7988 | { | |
7989 | struct pit_data *data = clkevt_to_pit_data(dev); | |
7990 | + int ret; | |
7991 | + | |
7992 | + ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
7993 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
7994 | + "at91_tick", data); | |
7995 | + if (ret) | |
7996 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
7997 | + | |
7998 | + data->irq_requested = true; | |
7999 | ||
8000 | /* update clocksource counter */ | |
8001 | data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); | |
e4b2b4a8 | 8002 | @@ -230,15 +245,6 @@ |
1a6e0f06 JK |
8003 | return ret; |
8004 | } | |
8005 | ||
8006 | - /* Set up irq handler */ | |
8007 | - ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
8008 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8009 | - "at91_tick", data); | |
8010 | - if (ret) { | |
8011 | - pr_err("Unable to setup IRQ\n"); | |
8012 | - return ret; | |
8013 | - } | |
8014 | - | |
8015 | /* Set up and register clockevents */ | |
8016 | data->clkevt.name = "pit"; | |
8017 | data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; | |
e4b2b4a8 JK |
8018 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/clocksource/timer-atmel-st.c linux-4.14/drivers/clocksource/timer-atmel-st.c |
8019 | --- linux-4.14.orig/drivers/clocksource/timer-atmel-st.c 2017-11-12 19:46:13.000000000 +0100 | |
8020 | +++ linux-4.14/drivers/clocksource/timer-atmel-st.c 2018-09-05 11:05:07.000000000 +0200 | |
8021 | @@ -115,18 +115,29 @@ | |
1a6e0f06 JK |
8022 | last_crtr = read_CRTR(); |
8023 | } | |
8024 | ||
8025 | +static int atmel_st_irq; | |
8026 | + | |
8027 | static int clkevt32k_shutdown(struct clock_event_device *evt) | |
8028 | { | |
8029 | clkdev32k_disable_and_flush_irq(); | |
8030 | irqmask = 0; | |
8031 | regmap_write(regmap_st, AT91_ST_IER, irqmask); | |
8032 | + free_irq(atmel_st_irq, regmap_st); | |
8033 | return 0; | |
8034 | } | |
8035 | ||
8036 | static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
8037 | { | |
8038 | + int ret; | |
8039 | + | |
8040 | clkdev32k_disable_and_flush_irq(); | |
8041 | ||
8042 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
8043 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8044 | + "at91_tick", regmap_st); | |
8045 | + if (ret) | |
8046 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
8047 | + | |
8048 | /* | |
8049 | * ALM for oneshot irqs, set by next_event() | |
8050 | * before 32 seconds have passed. | |
e4b2b4a8 | 8051 | @@ -139,8 +150,16 @@ |
1a6e0f06 JK |
8052 | |
8053 | static int clkevt32k_set_periodic(struct clock_event_device *dev) | |
8054 | { | |
8055 | + int ret; | |
8056 | + | |
8057 | clkdev32k_disable_and_flush_irq(); | |
8058 | ||
8059 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
8060 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8061 | + "at91_tick", regmap_st); | |
8062 | + if (ret) | |
8063 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
8064 | + | |
8065 | /* PIT for periodic irqs; fixed rate of 1/HZ */ | |
8066 | irqmask = AT91_ST_PITS; | |
8067 | regmap_write(regmap_st, AT91_ST_PIMR, timer_latch); | |
e4b2b4a8 | 8068 | @@ -198,7 +217,7 @@ |
1a6e0f06 JK |
8069 | { |
8070 | struct clk *sclk; | |
8071 | unsigned int sclk_rate, val; | |
8072 | - int irq, ret; | |
8073 | + int ret; | |
8074 | ||
8075 | regmap_st = syscon_node_to_regmap(node); | |
8076 | if (IS_ERR(regmap_st)) { | |
e4b2b4a8 | 8077 | @@ -212,21 +231,12 @@ |
1a6e0f06 JK |
8078 | regmap_read(regmap_st, AT91_ST_SR, &val); |
8079 | ||
8080 | /* Get the interrupts property */ | |
8081 | - irq = irq_of_parse_and_map(node, 0); | |
8082 | - if (!irq) { | |
8083 | + atmel_st_irq = irq_of_parse_and_map(node, 0); | |
8084 | + if (!atmel_st_irq) { | |
8085 | pr_err("Unable to get IRQ from DT\n"); | |
8086 | return -EINVAL; | |
8087 | } | |
8088 | ||
8089 | - /* Make IRQs happen for the system timer */ | |
8090 | - ret = request_irq(irq, at91rm9200_timer_interrupt, | |
8091 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8092 | - "at91_tick", regmap_st); | |
8093 | - if (ret) { | |
8094 | - pr_err("Unable to setup IRQ\n"); | |
8095 | - return ret; | |
8096 | - } | |
8097 | - | |
8098 | sclk = of_clk_get(node, 0); | |
8099 | if (IS_ERR(sclk)) { | |
8100 | pr_err("Unable to get slow clock\n"); | |
e4b2b4a8 JK |
8101 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/connector/cn_proc.c linux-4.14/drivers/connector/cn_proc.c |
8102 | --- linux-4.14.orig/drivers/connector/cn_proc.c 2017-11-12 19:46:13.000000000 +0100 | |
8103 | +++ linux-4.14/drivers/connector/cn_proc.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
8104 | @@ -32,6 +32,7 @@ |
8105 | #include <linux/pid_namespace.h> | |
8106 | ||
8107 | #include <linux/cn_proc.h> | |
8108 | +#include <linux/locallock.h> | |
8109 | ||
8110 | /* | |
8111 | * Size of a cn_msg followed by a proc_event structure. Since the | |
e4b2b4a8 | 8112 | @@ -54,10 +55,11 @@ |
1a6e0f06 JK |
8113 | |
8114 | /* proc_event_counts is used as the sequence number of the netlink message */ | |
8115 | static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; | |
8116 | +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock); | |
8117 | ||
8118 | static inline void send_msg(struct cn_msg *msg) | |
8119 | { | |
8120 | - preempt_disable(); | |
8121 | + local_lock(send_msg_lock); | |
8122 | ||
8123 | msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; | |
8124 | ((struct proc_event *)msg->data)->cpu = smp_processor_id(); | |
e4b2b4a8 | 8125 | @@ -70,7 +72,7 @@ |
1a6e0f06 JK |
8126 | */ |
8127 | cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); | |
8128 | ||
8129 | - preempt_enable(); | |
8130 | + local_unlock(send_msg_lock); | |
8131 | } | |
8132 | ||
8133 | void proc_fork_connector(struct task_struct *task) | |
e4b2b4a8 JK |
8134 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/cpufreq/Kconfig.x86 linux-4.14/drivers/cpufreq/Kconfig.x86 |
8135 | --- linux-4.14.orig/drivers/cpufreq/Kconfig.x86 2017-11-12 19:46:13.000000000 +0100 | |
8136 | +++ linux-4.14/drivers/cpufreq/Kconfig.x86 2018-09-05 11:05:07.000000000 +0200 | |
8137 | @@ -125,7 +125,7 @@ | |
1a6e0f06 JK |
8138 | |
8139 | config X86_POWERNOW_K8 | |
8140 | tristate "AMD Opteron/Athlon64 PowerNow!" | |
8141 | - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ | |
8142 | + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE | |
8143 | help | |
8144 | This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. | |
8145 | Support for K10 and newer processors is now in acpi-cpufreq. | |
e4b2b4a8 JK |
8146 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/gpu/drm/i915/i915_gem_timeline.c linux-4.14/drivers/gpu/drm/i915/i915_gem_timeline.c |
8147 | --- linux-4.14.orig/drivers/gpu/drm/i915/i915_gem_timeline.c 2017-11-12 19:46:13.000000000 +0100 | |
8148 | +++ linux-4.14/drivers/gpu/drm/i915/i915_gem_timeline.c 2018-09-05 11:05:07.000000000 +0200 | |
8149 | @@ -33,11 +33,8 @@ | |
8150 | { | |
8151 | tl->fence_context = context; | |
8152 | tl->common = parent; | |
8153 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
8154 | - __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); | |
8155 | -#else | |
8156 | spin_lock_init(&tl->lock); | |
8157 | -#endif | |
8158 | + lockdep_set_class_and_name(&tl->lock, lockclass, lockname); | |
8159 | init_request_active(&tl->last_request, NULL); | |
8160 | INIT_LIST_HEAD(&tl->requests); | |
8161 | i915_syncmap_init(&tl->sync); | |
8162 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/gpu/drm/i915/i915_irq.c linux-4.14/drivers/gpu/drm/i915/i915_irq.c | |
8163 | --- linux-4.14.orig/drivers/gpu/drm/i915/i915_irq.c 2018-09-05 11:03:21.000000000 +0200 | |
8164 | +++ linux-4.14/drivers/gpu/drm/i915/i915_irq.c 2018-09-05 11:05:07.000000000 +0200 | |
8165 | @@ -867,6 +867,7 @@ | |
1a6e0f06 JK |
8166 | spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); |
8167 | ||
8168 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
8169 | + preempt_disable_rt(); | |
8170 | ||
8171 | /* Get optional system timestamp before query. */ | |
8172 | if (stime) | |
e4b2b4a8 | 8173 | @@ -918,6 +919,7 @@ |
1a6e0f06 JK |
8174 | *etime = ktime_get(); |
8175 | ||
8176 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
8177 | + preempt_enable_rt(); | |
8178 | ||
8179 | spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); | |
8180 | ||
e4b2b4a8 JK |
8181 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/gpu/drm/i915/intel_sprite.c linux-4.14/drivers/gpu/drm/i915/intel_sprite.c |
8182 | --- linux-4.14.orig/drivers/gpu/drm/i915/intel_sprite.c 2018-09-05 11:03:21.000000000 +0200 | |
8183 | +++ linux-4.14/drivers/gpu/drm/i915/intel_sprite.c 2018-09-05 11:05:07.000000000 +0200 | |
8184 | @@ -36,6 +36,7 @@ | |
c7c16703 JK |
8185 | #include <drm/drm_rect.h> |
8186 | #include <drm/drm_atomic.h> | |
8187 | #include <drm/drm_plane_helper.h> | |
8188 | +#include <linux/locallock.h> | |
1a6e0f06 | 8189 | #include "intel_drv.h" |
c7c16703 | 8190 | #include "intel_frontbuffer.h" |
1a6e0f06 | 8191 | #include <drm/i915_drm.h> |
e4b2b4a8 | 8192 | @@ -67,7 +68,7 @@ |
1a6e0f06 JK |
8193 | } |
8194 | ||
e4b2b4a8 JK |
8195 | #define VBLANK_EVASION_TIME_US 100 |
8196 | - | |
1a6e0f06 | 8197 | +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); |
1a6e0f06 JK |
8198 | /** |
8199 | * intel_pipe_update_start() - start update of a set of display registers | |
8200 | * @crtc: the crtc of which the registers are going to be updated | |
e4b2b4a8 JK |
8201 | @@ -102,7 +103,7 @@ |
8202 | VBLANK_EVASION_TIME_US); | |
1a6e0f06 JK |
8203 | max = vblank_start - 1; |
8204 | ||
8205 | - local_irq_disable(); | |
8206 | + local_lock_irq(pipe_update_lock); | |
8207 | ||
8208 | if (min <= 0 || max <= 0) | |
8209 | return; | |
e4b2b4a8 | 8210 | @@ -132,11 +133,11 @@ |
1a6e0f06 JK |
8211 | break; |
8212 | } | |
8213 | ||
8214 | - local_irq_enable(); | |
8215 | + local_unlock_irq(pipe_update_lock); | |
8216 | ||
8217 | timeout = schedule_timeout(timeout); | |
8218 | ||
8219 | - local_irq_disable(); | |
8220 | + local_lock_irq(pipe_update_lock); | |
8221 | } | |
8222 | ||
8223 | finish_wait(wq, &wait); | |
e4b2b4a8 | 8224 | @@ -201,7 +202,7 @@ |
1a6e0f06 JK |
8225 | crtc->base.state->event = NULL; |
8226 | } | |
8227 | ||
8228 | - local_irq_enable(); | |
8229 | + local_unlock_irq(pipe_update_lock); | |
8230 | ||
e4b2b4a8 JK |
8231 | if (intel_vgpu_active(dev_priv)) |
8232 | return; | |
8233 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/gpu/drm/radeon/radeon_display.c linux-4.14/drivers/gpu/drm/radeon/radeon_display.c | |
8234 | --- linux-4.14.orig/drivers/gpu/drm/radeon/radeon_display.c 2017-11-12 19:46:13.000000000 +0100 | |
8235 | +++ linux-4.14/drivers/gpu/drm/radeon/radeon_display.c 2018-09-05 11:05:07.000000000 +0200 | |
8236 | @@ -1839,6 +1839,7 @@ | |
1a6e0f06 JK |
8237 | struct radeon_device *rdev = dev->dev_private; |
8238 | ||
8239 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
8240 | + preempt_disable_rt(); | |
8241 | ||
8242 | /* Get optional system timestamp before query. */ | |
8243 | if (stime) | |
e4b2b4a8 | 8244 | @@ -1931,6 +1932,7 @@ |
1a6e0f06 JK |
8245 | *etime = ktime_get(); |
8246 | ||
8247 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
8248 | + preempt_enable_rt(); | |
8249 | ||
8250 | /* Decode into vertical and horizontal scanout position. */ | |
8251 | *vpos = position & 0x1fff; | |
e4b2b4a8 JK |
8252 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/hv/vmbus_drv.c linux-4.14/drivers/hv/vmbus_drv.c |
8253 | --- linux-4.14.orig/drivers/hv/vmbus_drv.c 2018-09-05 11:03:21.000000000 +0200 | |
8254 | +++ linux-4.14/drivers/hv/vmbus_drv.c 2018-09-05 11:05:37.000000000 +0200 | |
8255 | @@ -39,6 +39,7 @@ | |
8256 | #include <asm/hyperv.h> | |
8257 | #include <asm/hypervisor.h> | |
8258 | #include <asm/mshyperv.h> | |
8259 | +#include <asm/irq_regs.h> | |
8260 | #include <linux/notifier.h> | |
8261 | #include <linux/ptrace.h> | |
8262 | #include <linux/screen_info.h> | |
8263 | @@ -966,6 +967,8 @@ | |
8264 | void *page_addr = hv_cpu->synic_event_page; | |
1a6e0f06 JK |
8265 | struct hv_message *msg; |
8266 | union hv_synic_event_flags *event; | |
8267 | + struct pt_regs *regs = get_irq_regs(); | |
8268 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
8269 | bool handled = false; | |
8270 | ||
e4b2b4a8 JK |
8271 | if (unlikely(page_addr == NULL)) |
8272 | @@ -1009,7 +1012,7 @@ | |
8273 | tasklet_schedule(&hv_cpu->msg_dpc); | |
1a6e0f06 JK |
8274 | } |
8275 | ||
8276 | - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); | |
8277 | + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); | |
8278 | } | |
8279 | ||
8280 | ||
e4b2b4a8 JK |
8281 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ide/alim15x3.c linux-4.14/drivers/ide/alim15x3.c |
8282 | --- linux-4.14.orig/drivers/ide/alim15x3.c 2017-11-12 19:46:13.000000000 +0100 | |
8283 | +++ linux-4.14/drivers/ide/alim15x3.c 2018-09-05 11:05:07.000000000 +0200 | |
8284 | @@ -234,7 +234,7 @@ | |
1a6e0f06 JK |
8285 | |
8286 | isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); | |
8287 | ||
8288 | - local_irq_save(flags); | |
8289 | + local_irq_save_nort(flags); | |
8290 | ||
8291 | if (m5229_revision < 0xC2) { | |
8292 | /* | |
e4b2b4a8 | 8293 | @@ -325,7 +325,7 @@ |
1a6e0f06 JK |
8294 | } |
8295 | pci_dev_put(north); | |
8296 | pci_dev_put(isa_dev); | |
8297 | - local_irq_restore(flags); | |
8298 | + local_irq_restore_nort(flags); | |
8299 | return 0; | |
8300 | } | |
8301 | ||
e4b2b4a8 JK |
8302 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ide/hpt366.c linux-4.14/drivers/ide/hpt366.c |
8303 | --- linux-4.14.orig/drivers/ide/hpt366.c 2017-11-12 19:46:13.000000000 +0100 | |
8304 | +++ linux-4.14/drivers/ide/hpt366.c 2018-09-05 11:05:07.000000000 +0200 | |
8305 | @@ -1236,7 +1236,7 @@ | |
1a6e0f06 JK |
8306 | |
8307 | dma_old = inb(base + 2); | |
8308 | ||
8309 | - local_irq_save(flags); | |
8310 | + local_irq_save_nort(flags); | |
8311 | ||
8312 | dma_new = dma_old; | |
8313 | pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); | |
e4b2b4a8 | 8314 | @@ -1247,7 +1247,7 @@ |
1a6e0f06 JK |
8315 | if (dma_new != dma_old) |
8316 | outb(dma_new, base + 2); | |
8317 | ||
8318 | - local_irq_restore(flags); | |
8319 | + local_irq_restore_nort(flags); | |
8320 | ||
8321 | printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", | |
8322 | hwif->name, base, base + 7); | |
e4b2b4a8 JK |
8323 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ide/ide-io.c linux-4.14/drivers/ide/ide-io.c |
8324 | --- linux-4.14.orig/drivers/ide/ide-io.c 2017-11-12 19:46:13.000000000 +0100 | |
8325 | +++ linux-4.14/drivers/ide/ide-io.c 2018-09-05 11:05:07.000000000 +0200 | |
8326 | @@ -660,7 +660,7 @@ | |
8327 | /* disable_irq_nosync ?? */ | |
8328 | disable_irq(hwif->irq); | |
8329 | /* local CPU only, as if we were handling an interrupt */ | |
8330 | - local_irq_disable(); | |
8331 | + local_irq_disable_nort(); | |
8332 | if (hwif->polling) { | |
8333 | startstop = handler(drive); | |
8334 | } else if (drive_is_ready(drive)) { | |
8335 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ide/ide-iops.c linux-4.14/drivers/ide/ide-iops.c | |
8336 | --- linux-4.14.orig/drivers/ide/ide-iops.c 2017-11-12 19:46:13.000000000 +0100 | |
8337 | +++ linux-4.14/drivers/ide/ide-iops.c 2018-09-05 11:05:07.000000000 +0200 | |
8338 | @@ -129,12 +129,12 @@ | |
8339 | if ((stat & ATA_BUSY) == 0) | |
8340 | break; | |
8341 | ||
8342 | - local_irq_restore(flags); | |
8343 | + local_irq_restore_nort(flags); | |
8344 | *rstat = stat; | |
8345 | return -EBUSY; | |
8346 | } | |
8347 | } | |
8348 | - local_irq_restore(flags); | |
8349 | + local_irq_restore_nort(flags); | |
8350 | } | |
8351 | /* | |
8352 | * Allow status to settle, then read it again. | |
8353 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ide/ide-io-std.c linux-4.14/drivers/ide/ide-io-std.c | |
8354 | --- linux-4.14.orig/drivers/ide/ide-io-std.c 2017-11-12 19:46:13.000000000 +0100 | |
8355 | +++ linux-4.14/drivers/ide/ide-io-std.c 2018-09-05 11:05:07.000000000 +0200 | |
8356 | @@ -175,7 +175,7 @@ | |
1a6e0f06 JK |
8357 | unsigned long uninitialized_var(flags); |
8358 | ||
8359 | if ((io_32bit & 2) && !mmio) { | |
8360 | - local_irq_save(flags); | |
8361 | + local_irq_save_nort(flags); | |
8362 | ata_vlb_sync(io_ports->nsect_addr); | |
8363 | } | |
8364 | ||
e4b2b4a8 | 8365 | @@ -186,7 +186,7 @@ |
1a6e0f06 JK |
8366 | insl(data_addr, buf, words); |
8367 | ||
8368 | if ((io_32bit & 2) && !mmio) | |
8369 | - local_irq_restore(flags); | |
8370 | + local_irq_restore_nort(flags); | |
8371 | ||
8372 | if (((len + 1) & 3) < 2) | |
8373 | return; | |
e4b2b4a8 | 8374 | @@ -219,7 +219,7 @@ |
1a6e0f06 JK |
8375 | unsigned long uninitialized_var(flags); |
8376 | ||
8377 | if ((io_32bit & 2) && !mmio) { | |
8378 | - local_irq_save(flags); | |
8379 | + local_irq_save_nort(flags); | |
8380 | ata_vlb_sync(io_ports->nsect_addr); | |
8381 | } | |
8382 | ||
e4b2b4a8 | 8383 | @@ -230,7 +230,7 @@ |
1a6e0f06 JK |
8384 | outsl(data_addr, buf, words); |
8385 | ||
8386 | if ((io_32bit & 2) && !mmio) | |
8387 | - local_irq_restore(flags); | |
8388 | + local_irq_restore_nort(flags); | |
8389 | ||
8390 | if (((len + 1) & 3) < 2) | |
8391 | return; | |
e4b2b4a8 JK |
8392 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ide/ide-probe.c linux-4.14/drivers/ide/ide-probe.c |
8393 | --- linux-4.14.orig/drivers/ide/ide-probe.c 2017-11-12 19:46:13.000000000 +0100 | |
8394 | +++ linux-4.14/drivers/ide/ide-probe.c 2018-09-05 11:05:07.000000000 +0200 | |
8395 | @@ -196,10 +196,10 @@ | |
1a6e0f06 JK |
8396 | int bswap = 1; |
8397 | ||
8398 | /* local CPU only; some systems need this */ | |
8399 | - local_irq_save(flags); | |
8400 | + local_irq_save_nort(flags); | |
8401 | /* read 512 bytes of id info */ | |
8402 | hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); | |
8403 | - local_irq_restore(flags); | |
8404 | + local_irq_restore_nort(flags); | |
8405 | ||
8406 | drive->dev_flags |= IDE_DFLAG_ID_READ; | |
8407 | #ifdef DEBUG | |
e4b2b4a8 JK |
8408 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/ide/ide-taskfile.c linux-4.14/drivers/ide/ide-taskfile.c |
8409 | --- linux-4.14.orig/drivers/ide/ide-taskfile.c 2017-11-12 19:46:13.000000000 +0100 | |
8410 | +++ linux-4.14/drivers/ide/ide-taskfile.c 2018-09-05 11:05:07.000000000 +0200 | |
8411 | @@ -251,7 +251,7 @@ | |
1a6e0f06 JK |
8412 | |
8413 | page_is_high = PageHighMem(page); | |
8414 | if (page_is_high) | |
8415 | - local_irq_save(flags); | |
8416 | + local_irq_save_nort(flags); | |
8417 | ||
8418 | buf = kmap_atomic(page) + offset; | |
8419 | ||
e4b2b4a8 | 8420 | @@ -272,7 +272,7 @@ |
1a6e0f06 JK |
8421 | kunmap_atomic(buf); |
8422 | ||
8423 | if (page_is_high) | |
8424 | - local_irq_restore(flags); | |
8425 | + local_irq_restore_nort(flags); | |
8426 | ||
8427 | len -= nr_bytes; | |
8428 | } | |
e4b2b4a8 | 8429 | @@ -415,7 +415,7 @@ |
1a6e0f06 JK |
8430 | } |
8431 | ||
8432 | if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) | |
8433 | - local_irq_disable(); | |
8434 | + local_irq_disable_nort(); | |
8435 | ||
8436 | ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); | |
8437 | ||
e4b2b4a8 JK |
8438 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/infiniband/hw/hfi1/affinity.c linux-4.14/drivers/infiniband/hw/hfi1/affinity.c |
8439 | --- linux-4.14.orig/drivers/infiniband/hw/hfi1/affinity.c 2018-09-05 11:03:22.000000000 +0200 | |
8440 | +++ linux-4.14/drivers/infiniband/hw/hfi1/affinity.c 2018-09-05 11:05:07.000000000 +0200 | |
8441 | @@ -575,7 +575,7 @@ | |
8442 | struct hfi1_affinity_node *entry; | |
8443 | cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; | |
8444 | const struct cpumask *node_mask, | |
8445 | - *proc_mask = ¤t->cpus_allowed; | |
8446 | + *proc_mask = current->cpus_ptr; | |
8447 | struct hfi1_affinity_node_list *affinity = &node_affinity; | |
8448 | struct cpu_mask_set *set = &affinity->proc; | |
8449 | ||
8450 | @@ -583,7 +583,7 @@ | |
8451 | * check whether process/context affinity has already | |
8452 | * been set | |
8453 | */ | |
8454 | - if (cpumask_weight(proc_mask) == 1) { | |
8455 | + if (current->nr_cpus_allowed == 1) { | |
8456 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", | |
8457 | current->pid, current->comm, | |
8458 | cpumask_pr_args(proc_mask)); | |
8459 | @@ -594,7 +594,7 @@ | |
8460 | cpu = cpumask_first(proc_mask); | |
8461 | cpumask_set_cpu(cpu, &set->used); | |
8462 | goto done; | |
8463 | - } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { | |
8464 | + } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { | |
8465 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", | |
8466 | current->pid, current->comm, | |
8467 | cpumask_pr_args(proc_mask)); | |
8468 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/infiniband/hw/hfi1/sdma.c linux-4.14/drivers/infiniband/hw/hfi1/sdma.c | |
8469 | --- linux-4.14.orig/drivers/infiniband/hw/hfi1/sdma.c 2017-11-12 19:46:13.000000000 +0100 | |
8470 | +++ linux-4.14/drivers/infiniband/hw/hfi1/sdma.c 2018-09-05 11:05:07.000000000 +0200 | |
8471 | @@ -856,14 +856,13 @@ | |
8472 | { | |
8473 | struct sdma_rht_node *rht_node; | |
8474 | struct sdma_engine *sde = NULL; | |
8475 | - const struct cpumask *current_mask = ¤t->cpus_allowed; | |
8476 | unsigned long cpu_id; | |
8477 | ||
8478 | /* | |
8479 | * To ensure that always the same sdma engine(s) will be | |
8480 | * selected make sure the process is pinned to this CPU only. | |
8481 | */ | |
8482 | - if (cpumask_weight(current_mask) != 1) | |
8483 | + if (current->nr_cpus_allowed != 1) | |
8484 | goto out; | |
8485 | ||
8486 | cpu_id = smp_processor_id(); | |
8487 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/infiniband/hw/qib/qib_file_ops.c linux-4.14/drivers/infiniband/hw/qib/qib_file_ops.c | |
8488 | --- linux-4.14.orig/drivers/infiniband/hw/qib/qib_file_ops.c 2018-09-05 11:03:22.000000000 +0200 | |
8489 | +++ linux-4.14/drivers/infiniband/hw/qib/qib_file_ops.c 2018-09-05 11:05:07.000000000 +0200 | |
8490 | @@ -1167,7 +1167,7 @@ | |
8491 | static void assign_ctxt_affinity(struct file *fp, struct qib_devdata *dd) | |
8492 | { | |
8493 | struct qib_filedata *fd = fp->private_data; | |
8494 | - const unsigned int weight = cpumask_weight(¤t->cpus_allowed); | |
8495 | + const unsigned int weight = current->nr_cpus_allowed; | |
8496 | const struct cpumask *local_mask = cpumask_of_pcibus(dd->pcidev->bus); | |
8497 | int local_cpu; | |
8498 | ||
8499 | @@ -1648,9 +1648,8 @@ | |
8500 | ret = find_free_ctxt(i_minor - 1, fp, uinfo); | |
8501 | else { | |
8502 | int unit; | |
8503 | - const unsigned int cpu = cpumask_first(¤t->cpus_allowed); | |
8504 | - const unsigned int weight = | |
8505 | - cpumask_weight(¤t->cpus_allowed); | |
8506 | + const unsigned int cpu = cpumask_first(current->cpus_ptr); | |
8507 | + const unsigned int weight = current->nr_cpus_allowed; | |
8508 | ||
8509 | if (weight == 1 && !test_bit(cpu, qib_cpulist)) | |
8510 | if (!find_hca(cpu, &unit) && unit >= 0) | |
8511 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c linux-4.14/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
8512 | --- linux-4.14.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2018-09-05 11:03:22.000000000 +0200 | |
8513 | +++ linux-4.14/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2018-09-05 11:05:07.000000000 +0200 | |
8514 | @@ -898,7 +898,7 @@ | |
1a6e0f06 JK |
8515 | |
8516 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
8517 | ||
8518 | - local_irq_save(flags); | |
8519 | + local_irq_save_nort(flags); | |
8520 | netif_addr_lock(dev); | |
8521 | spin_lock(&priv->lock); | |
8522 | ||
e4b2b4a8 | 8523 | @@ -980,7 +980,7 @@ |
1a6e0f06 JK |
8524 | |
8525 | spin_unlock(&priv->lock); | |
8526 | netif_addr_unlock(dev); | |
8527 | - local_irq_restore(flags); | |
8528 | + local_irq_restore_nort(flags); | |
8529 | ||
e4b2b4a8 JK |
8530 | ipoib_mcast_remove_list(&remove_list); |
8531 | ||
8532 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/input/gameport/gameport.c linux-4.14/drivers/input/gameport/gameport.c | |
8533 | --- linux-4.14.orig/drivers/input/gameport/gameport.c 2017-11-12 19:46:13.000000000 +0100 | |
8534 | +++ linux-4.14/drivers/input/gameport/gameport.c 2018-09-05 11:05:07.000000000 +0200 | |
8535 | @@ -91,13 +91,13 @@ | |
8536 | tx = ~0; | |
8537 | ||
8538 | for (i = 0; i < 50; i++) { | |
8539 | - local_irq_save(flags); | |
8540 | + local_irq_save_nort(flags); | |
8541 | t1 = ktime_get_ns(); | |
8542 | for (t = 0; t < 50; t++) | |
8543 | gameport_read(gameport); | |
8544 | t2 = ktime_get_ns(); | |
8545 | t3 = ktime_get_ns(); | |
8546 | - local_irq_restore(flags); | |
8547 | + local_irq_restore_nort(flags); | |
8548 | udelay(i * 10); | |
8549 | t = (t2 - t1) - (t3 - t2); | |
8550 | if (t < tx) | |
8551 | @@ -124,12 +124,12 @@ | |
8552 | tx = 1 << 30; | |
8553 | ||
8554 | for(i = 0; i < 50; i++) { | |
8555 | - local_irq_save(flags); | |
8556 | + local_irq_save_nort(flags); | |
8557 | GET_TIME(t1); | |
8558 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
8559 | GET_TIME(t2); | |
8560 | GET_TIME(t3); | |
8561 | - local_irq_restore(flags); | |
8562 | + local_irq_restore_nort(flags); | |
8563 | udelay(i * 10); | |
8564 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; | |
8565 | } | |
8566 | @@ -148,11 +148,11 @@ | |
8567 | tx = 1 << 30; | |
8568 | ||
8569 | for(i = 0; i < 50; i++) { | |
8570 | - local_irq_save(flags); | |
8571 | + local_irq_save_nort(flags); | |
8572 | t1 = rdtsc(); | |
8573 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
8574 | t2 = rdtsc(); | |
8575 | - local_irq_restore(flags); | |
8576 | + local_irq_restore_nort(flags); | |
8577 | udelay(i * 10); | |
8578 | if (t2 - t1 < tx) tx = t2 - t1; | |
8579 | } | |
8580 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/iommu/amd_iommu.c linux-4.14/drivers/iommu/amd_iommu.c | |
8581 | --- linux-4.14.orig/drivers/iommu/amd_iommu.c 2018-09-05 11:03:22.000000000 +0200 | |
8582 | +++ linux-4.14/drivers/iommu/amd_iommu.c 2018-09-05 11:05:07.000000000 +0200 | |
8583 | @@ -81,11 +81,12 @@ | |
8584 | */ | |
8585 | #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) | |
8586 | ||
8587 | -static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |
8588 | +static DEFINE_SPINLOCK(amd_iommu_devtable_lock); | |
8589 | +static DEFINE_SPINLOCK(pd_bitmap_lock); | |
8590 | +static DEFINE_SPINLOCK(iommu_table_lock); | |
8591 | ||
8592 | /* List of all available dev_data structures */ | |
8593 | -static LIST_HEAD(dev_data_list); | |
8594 | -static DEFINE_SPINLOCK(dev_data_list_lock); | |
8595 | +static LLIST_HEAD(dev_data_list); | |
8596 | ||
8597 | LIST_HEAD(ioapic_map); | |
8598 | LIST_HEAD(hpet_map); | |
8599 | @@ -204,40 +205,33 @@ | |
8600 | static struct iommu_dev_data *alloc_dev_data(u16 devid) | |
8601 | { | |
8602 | struct iommu_dev_data *dev_data; | |
8603 | - unsigned long flags; | |
8604 | ||
8605 | dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); | |
8606 | if (!dev_data) | |
8607 | return NULL; | |
8608 | ||
8609 | dev_data->devid = devid; | |
8610 | - | |
8611 | - spin_lock_irqsave(&dev_data_list_lock, flags); | |
8612 | - list_add_tail(&dev_data->dev_data_list, &dev_data_list); | |
8613 | - spin_unlock_irqrestore(&dev_data_list_lock, flags); | |
8614 | - | |
8615 | ratelimit_default_init(&dev_data->rs); | |
8616 | ||
8617 | + llist_add(&dev_data->dev_data_list, &dev_data_list); | |
8618 | return dev_data; | |
8619 | } | |
8620 | ||
8621 | static struct iommu_dev_data *search_dev_data(u16 devid) | |
8622 | { | |
8623 | struct iommu_dev_data *dev_data; | |
8624 | - unsigned long flags; | |
8625 | + struct llist_node *node; | |
8626 | ||
8627 | - spin_lock_irqsave(&dev_data_list_lock, flags); | |
8628 | - list_for_each_entry(dev_data, &dev_data_list, dev_data_list) { | |
8629 | + if (llist_empty(&dev_data_list)) | |
8630 | + return NULL; | |
8631 | + | |
8632 | + node = dev_data_list.first; | |
8633 | + llist_for_each_entry(dev_data, node, dev_data_list) { | |
8634 | if (dev_data->devid == devid) | |
8635 | - goto out_unlock; | |
8636 | + return dev_data; | |
8637 | } | |
8638 | ||
8639 | - dev_data = NULL; | |
8640 | - | |
8641 | -out_unlock: | |
8642 | - spin_unlock_irqrestore(&dev_data_list_lock, flags); | |
8643 | - | |
8644 | - return dev_data; | |
8645 | + return NULL; | |
8646 | } | |
8647 | ||
8648 | static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) | |
8649 | @@ -1056,9 +1050,9 @@ | |
8650 | unsigned long flags; | |
8651 | int ret; | |
8652 | ||
8653 | - spin_lock_irqsave(&iommu->lock, flags); | |
8654 | + raw_spin_lock_irqsave(&iommu->lock, flags); | |
8655 | ret = __iommu_queue_command_sync(iommu, cmd, sync); | |
8656 | - spin_unlock_irqrestore(&iommu->lock, flags); | |
8657 | + raw_spin_unlock_irqrestore(&iommu->lock, flags); | |
8658 | ||
8659 | return ret; | |
8660 | } | |
8661 | @@ -1084,7 +1078,7 @@ | |
8662 | ||
8663 | build_completion_wait(&cmd, (u64)&iommu->cmd_sem); | |
8664 | ||
8665 | - spin_lock_irqsave(&iommu->lock, flags); | |
8666 | + raw_spin_lock_irqsave(&iommu->lock, flags); | |
8667 | ||
8668 | iommu->cmd_sem = 0; | |
8669 | ||
8670 | @@ -1095,7 +1089,7 @@ | |
8671 | ret = wait_on_sem(&iommu->cmd_sem); | |
8672 | ||
8673 | out_unlock: | |
8674 | - spin_unlock_irqrestore(&iommu->lock, flags); | |
8675 | + raw_spin_unlock_irqrestore(&iommu->lock, flags); | |
8676 | ||
8677 | return ret; | |
8678 | } | |
8679 | @@ -1604,29 +1598,26 @@ | |
8680 | ||
8681 | static u16 domain_id_alloc(void) | |
8682 | { | |
8683 | - unsigned long flags; | |
8684 | int id; | |
8685 | ||
8686 | - write_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8687 | + spin_lock(&pd_bitmap_lock); | |
8688 | id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID); | |
8689 | BUG_ON(id == 0); | |
8690 | if (id > 0 && id < MAX_DOMAIN_ID) | |
8691 | __set_bit(id, amd_iommu_pd_alloc_bitmap); | |
8692 | else | |
8693 | id = 0; | |
8694 | - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8695 | + spin_unlock(&pd_bitmap_lock); | |
8696 | ||
8697 | return id; | |
8698 | } | |
8699 | ||
8700 | static void domain_id_free(int id) | |
8701 | { | |
8702 | - unsigned long flags; | |
8703 | - | |
8704 | - write_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8705 | + spin_lock(&pd_bitmap_lock); | |
8706 | if (id > 0 && id < MAX_DOMAIN_ID) | |
8707 | __clear_bit(id, amd_iommu_pd_alloc_bitmap); | |
8708 | - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8709 | + spin_unlock(&pd_bitmap_lock); | |
8710 | } | |
8711 | ||
8712 | #define DEFINE_FREE_PT_FN(LVL, FN) \ | |
8713 | @@ -1946,10 +1937,10 @@ | |
8714 | int ret; | |
8715 | ||
8716 | /* | |
8717 | - * Must be called with IRQs disabled. Warn here to detect early | |
8718 | - * when its not. | |
8719 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
8720 | + * detect early when its not. | |
8721 | */ | |
8722 | - WARN_ON(!irqs_disabled()); | |
8723 | + WARN_ON_NONRT(!irqs_disabled()); | |
8724 | ||
8725 | /* lock domain */ | |
8726 | spin_lock(&domain->lock); | |
8727 | @@ -2095,9 +2086,9 @@ | |
8728 | } | |
8729 | ||
8730 | skip_ats_check: | |
8731 | - write_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8732 | + spin_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8733 | ret = __attach_device(dev_data, domain); | |
8734 | - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8735 | + spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8736 | ||
8737 | /* | |
8738 | * We might boot into a crash-kernel here. The crashed kernel | |
8739 | @@ -2117,10 +2108,10 @@ | |
8740 | struct protection_domain *domain; | |
8741 | ||
8742 | /* | |
8743 | - * Must be called with IRQs disabled. Warn here to detect early | |
8744 | - * when its not. | |
8745 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
8746 | + * detect early when its not. | |
8747 | */ | |
8748 | - WARN_ON(!irqs_disabled()); | |
8749 | + WARN_ON_NONRT(!irqs_disabled()); | |
8750 | ||
8751 | if (WARN_ON(!dev_data->domain)) | |
8752 | return; | |
8753 | @@ -2147,9 +2138,9 @@ | |
8754 | domain = dev_data->domain; | |
8755 | ||
8756 | /* lock device table */ | |
8757 | - write_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8758 | + spin_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8759 | __detach_device(dev_data); | |
8760 | - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8761 | + spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8762 | ||
8763 | if (!dev_is_pci(dev)) | |
8764 | return; | |
8765 | @@ -2813,7 +2804,7 @@ | |
8766 | struct iommu_dev_data *entry; | |
8767 | unsigned long flags; | |
8768 | ||
8769 | - write_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8770 | + spin_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8771 | ||
8772 | while (!list_empty(&domain->dev_list)) { | |
8773 | entry = list_first_entry(&domain->dev_list, | |
8774 | @@ -2821,7 +2812,7 @@ | |
8775 | __detach_device(entry); | |
8776 | } | |
8777 | ||
8778 | - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8779 | + spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8780 | } | |
8781 | ||
8782 | static void protection_domain_free(struct protection_domain *domain) | |
8783 | @@ -3588,14 +3579,62 @@ | |
8784 | amd_iommu_dev_table[devid].data[2] = dte; | |
8785 | } | |
8786 | ||
8787 | -static struct irq_remap_table *get_irq_table(u16 devid, bool ioapic) | |
8788 | +static struct irq_remap_table *get_irq_table(u16 devid) | |
8789 | +{ | |
8790 | + struct irq_remap_table *table; | |
8791 | + | |
8792 | + if (WARN_ONCE(!amd_iommu_rlookup_table[devid], | |
8793 | + "%s: no iommu for devid %x\n", __func__, devid)) | |
8794 | + return NULL; | |
8795 | + | |
8796 | + table = irq_lookup_table[devid]; | |
8797 | + if (WARN_ONCE(!table, "%s: no table for devid %x\n", __func__, devid)) | |
8798 | + return NULL; | |
8799 | + | |
8800 | + return table; | |
8801 | +} | |
8802 | + | |
8803 | +static struct irq_remap_table *__alloc_irq_table(void) | |
8804 | +{ | |
8805 | + struct irq_remap_table *table; | |
8806 | + | |
8807 | + table = kzalloc(sizeof(*table), GFP_KERNEL); | |
8808 | + if (!table) | |
8809 | + return NULL; | |
8810 | + | |
8811 | + table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_KERNEL); | |
8812 | + if (!table->table) { | |
8813 | + kfree(table); | |
8814 | + return NULL; | |
8815 | + } | |
8816 | + raw_spin_lock_init(&table->lock); | |
8817 | + | |
8818 | + if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) | |
8819 | + memset(table->table, 0, | |
8820 | + MAX_IRQS_PER_TABLE * sizeof(u32)); | |
8821 | + else | |
8822 | + memset(table->table, 0, | |
8823 | + (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); | |
8824 | + return table; | |
8825 | +} | |
8826 | + | |
8827 | +static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, | |
8828 | + struct irq_remap_table *table) | |
8829 | +{ | |
8830 | + irq_lookup_table[devid] = table; | |
8831 | + set_dte_irq_entry(devid, table); | |
8832 | + iommu_flush_dte(iommu, devid); | |
8833 | +} | |
8834 | + | |
8835 | +static struct irq_remap_table *alloc_irq_table(u16 devid) | |
8836 | { | |
8837 | struct irq_remap_table *table = NULL; | |
8838 | + struct irq_remap_table *new_table = NULL; | |
8839 | struct amd_iommu *iommu; | |
8840 | unsigned long flags; | |
8841 | u16 alias; | |
8842 | ||
8843 | - write_lock_irqsave(&amd_iommu_devtable_lock, flags); | |
8844 | + spin_lock_irqsave(&iommu_table_lock, flags); | |
8845 | ||
8846 | iommu = amd_iommu_rlookup_table[devid]; | |
8847 | if (!iommu) | |
8848 | @@ -3608,60 +3647,45 @@ | |
8849 | alias = amd_iommu_alias_table[devid]; | |
8850 | table = irq_lookup_table[alias]; | |
8851 | if (table) { | |
8852 | - irq_lookup_table[devid] = table; | |
8853 | - set_dte_irq_entry(devid, table); | |
8854 | - iommu_flush_dte(iommu, devid); | |
8855 | - goto out; | |
8856 | + set_remap_table_entry(iommu, devid, table); | |
8857 | + goto out_wait; | |
8858 | } | |
8859 | + spin_unlock_irqrestore(&iommu_table_lock, flags); | |
8860 | ||
8861 | /* Nothing there yet, allocate new irq remapping table */ | |
8862 | - table = kzalloc(sizeof(*table), GFP_ATOMIC); | |
8863 | - if (!table) | |
8864 | - goto out_unlock; | |
8865 | - | |
8866 | - /* Initialize table spin-lock */ | |
8867 | - spin_lock_init(&table->lock); | |
8868 | + new_table = __alloc_irq_table(); | |
8869 | + if (!new_table) | |
8870 | + return NULL; | |
8871 | ||
8872 | - if (ioapic) | |
8873 | - /* Keep the first 32 indexes free for IOAPIC interrupts */ | |
8874 | - table->min_index = 32; | |
8875 | + spin_lock_irqsave(&iommu_table_lock, flags); | |
8876 | ||
8877 | - table->table = kmem_cache_alloc(amd_iommu_irq_cache, GFP_ATOMIC); | |
8878 | - if (!table->table) { | |
8879 | - kfree(table); | |
8880 | - table = NULL; | |
8881 | + table = irq_lookup_table[devid]; | |
8882 | + if (table) | |
8883 | goto out_unlock; | |
8884 | - } | |
8885 | - | |
8886 | - if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir)) | |
8887 | - memset(table->table, 0, | |
8888 | - MAX_IRQS_PER_TABLE * sizeof(u32)); | |
8889 | - else | |
8890 | - memset(table->table, 0, | |
8891 | - (MAX_IRQS_PER_TABLE * (sizeof(u64) * 2))); | |
8892 | - | |
8893 | - if (ioapic) { | |
8894 | - int i; | |
8895 | ||
8896 | - for (i = 0; i < 32; ++i) | |
8897 | - iommu->irte_ops->set_allocated(table, i); | |
8898 | + table = irq_lookup_table[alias]; | |
8899 | + if (table) { | |
8900 | + set_remap_table_entry(iommu, devid, table); | |
8901 | + goto out_wait; | |
8902 | } | |
8903 | ||
8904 | - irq_lookup_table[devid] = table; | |
8905 | - set_dte_irq_entry(devid, table); | |
8906 | - iommu_flush_dte(iommu, devid); | |
8907 | - if (devid != alias) { | |
8908 | - irq_lookup_table[alias] = table; | |
8909 | - set_dte_irq_entry(alias, table); | |
8910 | - iommu_flush_dte(iommu, alias); | |
8911 | - } | |
8912 | + table = new_table; | |
8913 | + new_table = NULL; | |
8914 | ||
8915 | -out: | |
8916 | + set_remap_table_entry(iommu, devid, table); | |
8917 | + if (devid != alias) | |
8918 | + set_remap_table_entry(iommu, alias, table); | |
8919 | + | |
8920 | +out_wait: | |
8921 | iommu_completion_wait(iommu); | |
8922 | ||
8923 | out_unlock: | |
8924 | - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | |
8925 | + spin_unlock_irqrestore(&iommu_table_lock, flags); | |
8926 | ||
8927 | + if (new_table) { | |
8928 | + kmem_cache_free(amd_iommu_irq_cache, new_table->table); | |
8929 | + kfree(new_table); | |
8930 | + } | |
8931 | return table; | |
8932 | } | |
8933 | ||
8934 | @@ -3675,11 +3699,11 @@ | |
8935 | if (!iommu) | |
8936 | return -ENODEV; | |
8937 | ||
8938 | - table = get_irq_table(devid, false); | |
8939 | + table = alloc_irq_table(devid); | |
8940 | if (!table) | |
8941 | return -ENODEV; | |
8942 | ||
8943 | - spin_lock_irqsave(&table->lock, flags); | |
8944 | + raw_spin_lock_irqsave(&table->lock, flags); | |
8945 | ||
8946 | /* Scan table for free entries */ | |
8947 | for (c = 0, index = table->min_index; | |
8948 | @@ -3702,7 +3726,7 @@ | |
8949 | index = -ENOSPC; | |
8950 | ||
8951 | out: | |
8952 | - spin_unlock_irqrestore(&table->lock, flags); | |
8953 | + raw_spin_unlock_irqrestore(&table->lock, flags); | |
8954 | ||
8955 | return index; | |
8956 | } | |
8957 | @@ -3719,11 +3743,11 @@ | |
8958 | if (iommu == NULL) | |
8959 | return -EINVAL; | |
8960 | ||
8961 | - table = get_irq_table(devid, false); | |
8962 | + table = get_irq_table(devid); | |
8963 | if (!table) | |
8964 | return -ENOMEM; | |
8965 | ||
8966 | - spin_lock_irqsave(&table->lock, flags); | |
8967 | + raw_spin_lock_irqsave(&table->lock, flags); | |
8968 | ||
8969 | entry = (struct irte_ga *)table->table; | |
8970 | entry = &entry[index]; | |
8971 | @@ -3734,7 +3758,7 @@ | |
8972 | if (data) | |
8973 | data->ref = entry; | |
8974 | ||
8975 | - spin_unlock_irqrestore(&table->lock, flags); | |
8976 | + raw_spin_unlock_irqrestore(&table->lock, flags); | |
8977 | ||
8978 | iommu_flush_irt(iommu, devid); | |
8979 | iommu_completion_wait(iommu); | |
8980 | @@ -3752,13 +3776,13 @@ | |
8981 | if (iommu == NULL) | |
8982 | return -EINVAL; | |
8983 | ||
8984 | - table = get_irq_table(devid, false); | |
8985 | + table = get_irq_table(devid); | |
8986 | if (!table) | |
8987 | return -ENOMEM; | |
8988 | ||
8989 | - spin_lock_irqsave(&table->lock, flags); | |
8990 | + raw_spin_lock_irqsave(&table->lock, flags); | |
8991 | table->table[index] = irte->val; | |
8992 | - spin_unlock_irqrestore(&table->lock, flags); | |
8993 | + raw_spin_unlock_irqrestore(&table->lock, flags); | |
8994 | ||
8995 | iommu_flush_irt(iommu, devid); | |
8996 | iommu_completion_wait(iommu); | |
8997 | @@ -3776,13 +3800,13 @@ | |
8998 | if (iommu == NULL) | |
8999 | return; | |
9000 | ||
9001 | - table = get_irq_table(devid, false); | |
9002 | + table = get_irq_table(devid); | |
9003 | if (!table) | |
9004 | return; | |
9005 | ||
9006 | - spin_lock_irqsave(&table->lock, flags); | |
9007 | + raw_spin_lock_irqsave(&table->lock, flags); | |
9008 | iommu->irte_ops->clear_allocated(table, index); | |
9009 | - spin_unlock_irqrestore(&table->lock, flags); | |
9010 | + raw_spin_unlock_irqrestore(&table->lock, flags); | |
9011 | ||
9012 | iommu_flush_irt(iommu, devid); | |
9013 | iommu_completion_wait(iommu); | |
9014 | @@ -3863,10 +3887,8 @@ | |
9015 | u8 vector, u32 dest_apicid) | |
9016 | { | |
9017 | struct irte_ga *irte = (struct irte_ga *) entry; | |
9018 | - struct iommu_dev_data *dev_data = search_dev_data(devid); | |
9019 | ||
9020 | - if (!dev_data || !dev_data->use_vapic || | |
9021 | - !irte->lo.fields_remap.guest_mode) { | |
9022 | + if (!irte->lo.fields_remap.guest_mode) { | |
9023 | irte->hi.fields.vector = vector; | |
9024 | irte->lo.fields_remap.destination = dest_apicid; | |
9025 | modify_irte_ga(devid, index, irte, NULL); | |
9026 | @@ -4072,7 +4094,7 @@ | |
9027 | struct amd_ir_data *data = NULL; | |
9028 | struct irq_cfg *cfg; | |
9029 | int i, ret, devid; | |
9030 | - int index = -1; | |
9031 | + int index; | |
9032 | ||
9033 | if (!info) | |
9034 | return -EINVAL; | |
9035 | @@ -4096,10 +4118,26 @@ | |
9036 | return ret; | |
9037 | ||
9038 | if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) { | |
9039 | - if (get_irq_table(devid, true)) | |
9040 | + struct irq_remap_table *table; | |
9041 | + struct amd_iommu *iommu; | |
9042 | + | |
9043 | + table = alloc_irq_table(devid); | |
9044 | + if (table) { | |
9045 | + if (!table->min_index) { | |
9046 | + /* | |
9047 | + * Keep the first 32 indexes free for IOAPIC | |
9048 | + * interrupts. | |
9049 | + */ | |
9050 | + table->min_index = 32; | |
9051 | + iommu = amd_iommu_rlookup_table[devid]; | |
9052 | + for (i = 0; i < 32; ++i) | |
9053 | + iommu->irte_ops->set_allocated(table, i); | |
9054 | + } | |
9055 | + WARN_ON(table->min_index != 32); | |
9056 | index = info->ioapic_pin; | |
9057 | - else | |
9058 | - ret = -ENOMEM; | |
9059 | + } else { | |
9060 | + index = -ENOMEM; | |
9061 | + } | |
9062 | } else { | |
9063 | index = alloc_irq_index(devid, nr_irqs); | |
9064 | } | |
9065 | @@ -4343,7 +4381,7 @@ | |
9066 | { | |
9067 | unsigned long flags; | |
9068 | struct amd_iommu *iommu; | |
9069 | - struct irq_remap_table *irt; | |
9070 | + struct irq_remap_table *table; | |
9071 | struct amd_ir_data *ir_data = (struct amd_ir_data *)data; | |
9072 | int devid = ir_data->irq_2_irte.devid; | |
9073 | struct irte_ga *entry = (struct irte_ga *) ir_data->entry; | |
9074 | @@ -4357,11 +4395,11 @@ | |
9075 | if (!iommu) | |
9076 | return -ENODEV; | |
9077 | ||
9078 | - irt = get_irq_table(devid, false); | |
9079 | - if (!irt) | |
9080 | + table = get_irq_table(devid); | |
9081 | + if (!table) | |
9082 | return -ENODEV; | |
9083 | ||
9084 | - spin_lock_irqsave(&irt->lock, flags); | |
9085 | + raw_spin_lock_irqsave(&table->lock, flags); | |
9086 | ||
9087 | if (ref->lo.fields_vapic.guest_mode) { | |
9088 | if (cpu >= 0) | |
9089 | @@ -4370,7 +4408,7 @@ | |
9090 | barrier(); | |
9091 | } | |
9092 | ||
9093 | - spin_unlock_irqrestore(&irt->lock, flags); | |
9094 | + raw_spin_unlock_irqrestore(&table->lock, flags); | |
9095 | ||
9096 | iommu_flush_irt(iommu, devid); | |
9097 | iommu_completion_wait(iommu); | |
9098 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/iommu/amd_iommu_init.c linux-4.14/drivers/iommu/amd_iommu_init.c | |
9099 | --- linux-4.14.orig/drivers/iommu/amd_iommu_init.c 2017-11-12 19:46:13.000000000 +0100 | |
9100 | +++ linux-4.14/drivers/iommu/amd_iommu_init.c 2018-09-05 11:05:07.000000000 +0200 | |
9101 | @@ -1474,7 +1474,7 @@ | |
9102 | { | |
9103 | int ret; | |
9104 | ||
9105 | - spin_lock_init(&iommu->lock); | |
9106 | + raw_spin_lock_init(&iommu->lock); | |
9107 | ||
9108 | /* Add IOMMU to internal data structures */ | |
9109 | list_add_tail(&iommu->list, &amd_iommu_list); | |
9110 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/iommu/amd_iommu_types.h linux-4.14/drivers/iommu/amd_iommu_types.h | |
9111 | --- linux-4.14.orig/drivers/iommu/amd_iommu_types.h 2017-11-12 19:46:13.000000000 +0100 | |
9112 | +++ linux-4.14/drivers/iommu/amd_iommu_types.h 2018-09-05 11:05:07.000000000 +0200 | |
9113 | @@ -406,7 +406,7 @@ | |
9114 | #define IRQ_TABLE_ALIGNMENT 128 | |
9115 | ||
9116 | struct irq_remap_table { | |
9117 | - spinlock_t lock; | |
9118 | + raw_spinlock_t lock; | |
9119 | unsigned min_index; | |
9120 | u32 *table; | |
9121 | }; | |
9122 | @@ -488,7 +488,7 @@ | |
9123 | int index; | |
9124 | ||
9125 | /* locks the accesses to the hardware */ | |
9126 | - spinlock_t lock; | |
9127 | + raw_spinlock_t lock; | |
9128 | ||
9129 | /* Pointer to PCI device of this IOMMU */ | |
9130 | struct pci_dev *dev; | |
9131 | @@ -625,7 +625,7 @@ | |
9132 | */ | |
9133 | struct iommu_dev_data { | |
9134 | struct list_head list; /* For domain->dev_list */ | |
9135 | - struct list_head dev_data_list; /* For global dev_data_list */ | |
9136 | + struct llist_node dev_data_list; /* For global dev_data_list */ | |
9137 | struct protection_domain *domain; /* Domain the device is bound to */ | |
9138 | u16 devid; /* PCI Device ID */ | |
9139 | u16 alias; /* Alias Device ID */ | |
9140 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/iommu/iova.c linux-4.14/drivers/iommu/iova.c | |
9141 | --- linux-4.14.orig/drivers/iommu/iova.c 2017-11-12 19:46:13.000000000 +0100 | |
9142 | +++ linux-4.14/drivers/iommu/iova.c 2018-09-05 11:05:07.000000000 +0200 | |
9143 | @@ -570,7 +570,7 @@ | |
9144 | unsigned long pfn, unsigned long pages, | |
9145 | unsigned long data) | |
9146 | { | |
9147 | - struct iova_fq *fq = get_cpu_ptr(iovad->fq); | |
9148 | + struct iova_fq *fq = raw_cpu_ptr(iovad->fq); | |
9149 | unsigned long flags; | |
9150 | unsigned idx; | |
9151 | ||
9152 | @@ -600,8 +600,6 @@ | |
9153 | if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) | |
9154 | mod_timer(&iovad->fq_timer, | |
9155 | jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); | |
9156 | - | |
9157 | - put_cpu_ptr(iovad->fq); | |
9158 | } | |
9159 | EXPORT_SYMBOL_GPL(queue_iova); | |
9160 | ||
9161 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/leds/trigger/Kconfig linux-4.14/drivers/leds/trigger/Kconfig | |
9162 | --- linux-4.14.orig/drivers/leds/trigger/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
9163 | +++ linux-4.14/drivers/leds/trigger/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
9164 | @@ -69,7 +69,7 @@ | |
9165 | ||
9166 | config LEDS_TRIGGER_CPU | |
9167 | bool "LED CPU Trigger" | |
9168 | - depends on LEDS_TRIGGERS | |
9169 | + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE | |
9170 | help | |
9171 | This allows LEDs to be controlled by active CPUs. This shows | |
9172 | the active CPUs across an array of LEDs so you can see which | |
9173 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/md/bcache/Kconfig linux-4.14/drivers/md/bcache/Kconfig | |
9174 | --- linux-4.14.orig/drivers/md/bcache/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
9175 | +++ linux-4.14/drivers/md/bcache/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
9176 | @@ -1,6 +1,7 @@ | |
9177 | ||
9178 | config BCACHE | |
9179 | tristate "Block device as cache" | |
9180 | + depends on !PREEMPT_RT_FULL | |
9181 | ---help--- | |
9182 | Allows a block device to be used as cache for other devices; uses | |
9183 | a btree for indexing and the layout is optimized for SSDs. | |
9184 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/md/dm-rq.c linux-4.14/drivers/md/dm-rq.c | |
9185 | --- linux-4.14.orig/drivers/md/dm-rq.c 2017-11-12 19:46:13.000000000 +0100 | |
9186 | +++ linux-4.14/drivers/md/dm-rq.c 2018-09-05 11:05:07.000000000 +0200 | |
9187 | @@ -671,7 +671,7 @@ | |
9188 | /* Establish tio->ti before queuing work (map_tio_request) */ | |
9189 | tio->ti = ti; | |
9190 | kthread_queue_work(&md->kworker, &tio->work); | |
9191 | - BUG_ON(!irqs_disabled()); | |
9192 | + BUG_ON_NONRT(!irqs_disabled()); | |
9193 | } | |
9194 | } | |
9195 | ||
9196 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/md/raid5.c linux-4.14/drivers/md/raid5.c | |
9197 | --- linux-4.14.orig/drivers/md/raid5.c 2018-09-05 11:03:22.000000000 +0200 | |
9198 | +++ linux-4.14/drivers/md/raid5.c 2018-09-05 11:05:07.000000000 +0200 | |
9199 | @@ -410,7 +410,7 @@ | |
9200 | md_wakeup_thread(conf->mddev->thread); | |
9201 | return; | |
9202 | slow_path: | |
9203 | - local_irq_save(flags); | |
9204 | + local_irq_save_nort(flags); | |
9205 | /* we are ok here if STRIPE_ON_RELEASE_LIST is set or not */ | |
9206 | if (atomic_dec_and_lock(&sh->count, &conf->device_lock)) { | |
9207 | INIT_LIST_HEAD(&list); | |
9208 | @@ -419,7 +419,7 @@ | |
9209 | spin_unlock(&conf->device_lock); | |
9210 | release_inactive_stripe_list(conf, &list, hash); | |
9211 | } | |
9212 | - local_irq_restore(flags); | |
9213 | + local_irq_restore_nort(flags); | |
9214 | } | |
9215 | ||
9216 | static inline void remove_hash(struct stripe_head *sh) | |
9217 | @@ -2067,8 +2067,9 @@ | |
9218 | struct raid5_percpu *percpu; | |
9219 | unsigned long cpu; | |
9220 | ||
9221 | - cpu = get_cpu(); | |
9222 | + cpu = get_cpu_light(); | |
9223 | percpu = per_cpu_ptr(conf->percpu, cpu); | |
9224 | + spin_lock(&percpu->lock); | |
9225 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | |
9226 | ops_run_biofill(sh); | |
9227 | overlap_clear++; | |
9228 | @@ -2127,7 +2128,8 @@ | |
9229 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | |
9230 | wake_up(&sh->raid_conf->wait_for_overlap); | |
9231 | } | |
9232 | - put_cpu(); | |
9233 | + spin_unlock(&percpu->lock); | |
9234 | + put_cpu_light(); | |
9235 | } | |
9236 | ||
9237 | static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) | |
9238 | @@ -6775,6 +6777,7 @@ | |
9239 | __func__, cpu); | |
9240 | return -ENOMEM; | |
9241 | } | |
9242 | + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); | |
9243 | return 0; | |
9244 | } | |
9245 | ||
9246 | @@ -6785,7 +6788,6 @@ | |
9247 | conf->percpu = alloc_percpu(struct raid5_percpu); | |
9248 | if (!conf->percpu) | |
9249 | return -ENOMEM; | |
9250 | - | |
9251 | err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); | |
9252 | if (!err) { | |
9253 | conf->scribble_disks = max(conf->raid_disks, | |
9254 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/md/raid5.h linux-4.14/drivers/md/raid5.h | |
9255 | --- linux-4.14.orig/drivers/md/raid5.h 2017-11-12 19:46:13.000000000 +0100 | |
9256 | +++ linux-4.14/drivers/md/raid5.h 2018-09-05 11:05:07.000000000 +0200 | |
9257 | @@ -624,6 +624,7 @@ | |
9258 | int recovery_disabled; | |
9259 | /* per cpu variables */ | |
9260 | struct raid5_percpu { | |
9261 | + spinlock_t lock; /* Protection for -RT */ | |
9262 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | |
9263 | struct flex_array *scribble; /* space for constructing buffer | |
9264 | * lists and performing address | |
9265 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/mfd/atmel-smc.c linux-4.14/drivers/mfd/atmel-smc.c | |
9266 | --- linux-4.14.orig/drivers/mfd/atmel-smc.c 2017-11-12 19:46:13.000000000 +0100 | |
9267 | +++ linux-4.14/drivers/mfd/atmel-smc.c 2018-09-05 11:05:07.000000000 +0200 | |
9268 | @@ -12,6 +12,7 @@ | |
9269 | */ | |
9270 | ||
9271 | #include <linux/mfd/syscon/atmel-smc.h> | |
9272 | +#include <linux/string.h> | |
9273 | ||
9274 | /** | |
9275 | * atmel_smc_cs_conf_init - initialize a SMC CS conf | |
9276 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/misc/Kconfig linux-4.14/drivers/misc/Kconfig | |
9277 | --- linux-4.14.orig/drivers/misc/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
9278 | +++ linux-4.14/drivers/misc/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
9279 | @@ -54,6 +54,7 @@ | |
9280 | config ATMEL_TCLIB | |
9281 | bool "Atmel AT32/AT91 Timer/Counter Library" | |
9282 | depends on (AVR32 || ARCH_AT91) | |
9283 | + default y if PREEMPT_RT_FULL | |
9284 | help | |
9285 | Select this if you want a library to allocate the Timer/Counter | |
9286 | blocks found on many Atmel processors. This facilitates using | |
9287 | @@ -69,8 +70,7 @@ | |
9288 | are combined to make a single 32-bit timer. | |
9289 | ||
9290 | When GENERIC_CLOCKEVENTS is defined, the third timer channel | |
9291 | - may be used as a clock event device supporting oneshot mode | |
9292 | - (delays of up to two seconds) based on the 32 KiHz clock. | |
9293 | + may be used as a clock event device supporting oneshot mode. | |
9294 | ||
9295 | config ATMEL_TCB_CLKSRC_BLOCK | |
9296 | int | |
9297 | @@ -84,6 +84,15 @@ | |
9298 | TC can be used for other purposes, such as PWM generation and | |
9299 | interval timing. | |
9300 | ||
9301 | +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
9302 | + bool "TC Block use 32 KiHz clock" | |
9303 | + depends on ATMEL_TCB_CLKSRC | |
9304 | + default y if !PREEMPT_RT_FULL | |
9305 | + help | |
9306 | + Select this to use 32 KiHz base clock rate as TC block clock | |
9307 | + source for clock events. | |
9308 | + | |
9309 | + | |
9310 | config DUMMY_IRQ | |
9311 | tristate "Dummy IRQ handler" | |
9312 | default n | |
9313 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/mmc/host/mmci.c linux-4.14/drivers/mmc/host/mmci.c | |
9314 | --- linux-4.14.orig/drivers/mmc/host/mmci.c 2017-11-12 19:46:13.000000000 +0100 | |
9315 | +++ linux-4.14/drivers/mmc/host/mmci.c 2018-09-05 11:05:07.000000000 +0200 | |
9316 | @@ -1200,15 +1200,12 @@ | |
9317 | struct sg_mapping_iter *sg_miter = &host->sg_miter; | |
9318 | struct variant_data *variant = host->variant; | |
9319 | void __iomem *base = host->base; | |
9320 | - unsigned long flags; | |
9321 | u32 status; | |
9322 | ||
9323 | status = readl(base + MMCISTATUS); | |
9324 | ||
9325 | dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); | |
9326 | ||
9327 | - local_irq_save(flags); | |
9328 | - | |
9329 | do { | |
9330 | unsigned int remain, len; | |
9331 | char *buffer; | |
9332 | @@ -1248,8 +1245,6 @@ | |
9333 | ||
9334 | sg_miter_stop(sg_miter); | |
9335 | ||
9336 | - local_irq_restore(flags); | |
9337 | - | |
9338 | /* | |
9339 | * If we have less than the fifo 'half-full' threshold to transfer, | |
9340 | * trigger a PIO interrupt as soon as any data is available. | |
9341 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/net/ethernet/3com/3c59x.c linux-4.14/drivers/net/ethernet/3com/3c59x.c | |
9342 | --- linux-4.14.orig/drivers/net/ethernet/3com/3c59x.c 2017-11-12 19:46:13.000000000 +0100 | |
9343 | +++ linux-4.14/drivers/net/ethernet/3com/3c59x.c 2018-09-05 11:05:07.000000000 +0200 | |
9344 | @@ -842,9 +842,9 @@ | |
9345 | { | |
9346 | struct vortex_private *vp = netdev_priv(dev); | |
9347 | unsigned long flags; | |
9348 | - local_irq_save(flags); | |
9349 | + local_irq_save_nort(flags); | |
9350 | (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); | |
9351 | - local_irq_restore(flags); | |
9352 | + local_irq_restore_nort(flags); | |
9353 | } | |
9354 | #endif | |
9355 | ||
9356 | @@ -1908,12 +1908,12 @@ | |
9357 | * Block interrupts because vortex_interrupt does a bare spin_lock() | |
9358 | */ | |
9359 | unsigned long flags; | |
9360 | - local_irq_save(flags); | |
9361 | + local_irq_save_nort(flags); | |
9362 | if (vp->full_bus_master_tx) | |
9363 | boomerang_interrupt(dev->irq, dev); | |
9364 | else | |
9365 | vortex_interrupt(dev->irq, dev); | |
9366 | - local_irq_restore(flags); | |
9367 | + local_irq_restore_nort(flags); | |
9368 | } | |
9369 | } | |
9370 | ||
9371 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/net/ethernet/marvell/mvpp2.c linux-4.14/drivers/net/ethernet/marvell/mvpp2.c | |
9372 | --- linux-4.14.orig/drivers/net/ethernet/marvell/mvpp2.c 2018-09-05 11:03:22.000000000 +0200 | |
9373 | +++ linux-4.14/drivers/net/ethernet/marvell/mvpp2.c 2018-09-05 11:05:07.000000000 +0200 | |
9374 | @@ -830,9 +830,8 @@ | |
9375 | /* Per-CPU port control */ | |
9376 | struct mvpp2_port_pcpu { | |
9377 | struct hrtimer tx_done_timer; | |
9378 | + struct net_device *dev; | |
9379 | bool timer_scheduled; | |
9380 | - /* Tasklet for egress finalization */ | |
9381 | - struct tasklet_struct tx_done_tasklet; | |
9382 | }; | |
9383 | ||
9384 | struct mvpp2_queue_vector { | |
9385 | @@ -5954,46 +5953,34 @@ | |
9386 | } | |
9387 | } | |
9388 | ||
9389 | -static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu) | |
9390 | -{ | |
9391 | - ktime_t interval; | |
9392 | - | |
9393 | - if (!port_pcpu->timer_scheduled) { | |
9394 | - port_pcpu->timer_scheduled = true; | |
9395 | - interval = MVPP2_TXDONE_HRTIMER_PERIOD_NS; | |
9396 | - hrtimer_start(&port_pcpu->tx_done_timer, interval, | |
9397 | - HRTIMER_MODE_REL_PINNED); | |
9398 | - } | |
9399 | -} | |
9400 | - | |
9401 | -static void mvpp2_tx_proc_cb(unsigned long data) | |
9402 | +static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer) | |
9403 | { | |
9404 | - struct net_device *dev = (struct net_device *)data; | |
9405 | - struct mvpp2_port *port = netdev_priv(dev); | |
9406 | - struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu); | |
9407 | + struct net_device *dev; | |
9408 | + struct mvpp2_port *port; | |
9409 | + struct mvpp2_port_pcpu *port_pcpu; | |
9410 | unsigned int tx_todo, cause; | |
9411 | ||
9412 | + port_pcpu = container_of(timer, struct mvpp2_port_pcpu, tx_done_timer); | |
9413 | + dev = port_pcpu->dev; | |
9414 | + | |
9415 | if (!netif_running(dev)) | |
9416 | - return; | |
9417 | + return HRTIMER_NORESTART; | |
9418 | + | |
9419 | port_pcpu->timer_scheduled = false; | |
9420 | + port = netdev_priv(dev); | |
9421 | ||
9422 | /* Process all the Tx queues */ | |
9423 | cause = (1 << port->ntxqs) - 1; | |
9424 | tx_todo = mvpp2_tx_done(port, cause, smp_processor_id()); | |
9425 | ||
9426 | /* Set the timer in case not all the packets were processed */ | |
9427 | - if (tx_todo) | |
9428 | - mvpp2_timer_set(port_pcpu); | |
9429 | -} | |
9430 | - | |
9431 | -static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer) | |
9432 | -{ | |
9433 | - struct mvpp2_port_pcpu *port_pcpu = container_of(timer, | |
9434 | - struct mvpp2_port_pcpu, | |
9435 | - tx_done_timer); | |
9436 | - | |
9437 | - tasklet_schedule(&port_pcpu->tx_done_tasklet); | |
9438 | + if (tx_todo && !port_pcpu->timer_scheduled) { | |
9439 | + port_pcpu->timer_scheduled = true; | |
9440 | + hrtimer_forward_now(&port_pcpu->tx_done_timer, | |
9441 | + MVPP2_TXDONE_HRTIMER_PERIOD_NS); | |
9442 | ||
9443 | + return HRTIMER_RESTART; | |
9444 | + } | |
9445 | return HRTIMER_NORESTART; | |
9446 | } | |
9447 | ||
9448 | @@ -6482,7 +6469,12 @@ | |
9449 | txq_pcpu->count > 0) { | |
9450 | struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu); | |
9451 | ||
9452 | - mvpp2_timer_set(port_pcpu); | |
9453 | + if (!port_pcpu->timer_scheduled) { | |
9454 | + port_pcpu->timer_scheduled = true; | |
9455 | + hrtimer_start(&port_pcpu->tx_done_timer, | |
9456 | + MVPP2_TXDONE_HRTIMER_PERIOD_NS, | |
9457 | + HRTIMER_MODE_REL_PINNED_SOFT); | |
9458 | + } | |
9459 | } | |
9460 | ||
9461 | return NETDEV_TX_OK; | |
9462 | @@ -6871,7 +6863,6 @@ | |
9463 | ||
9464 | hrtimer_cancel(&port_pcpu->tx_done_timer); | |
9465 | port_pcpu->timer_scheduled = false; | |
9466 | - tasklet_kill(&port_pcpu->tx_done_tasklet); | |
9467 | } | |
9468 | } | |
9469 | mvpp2_cleanup_rxqs(port); | |
9470 | @@ -7644,13 +7635,10 @@ | |
9471 | port_pcpu = per_cpu_ptr(port->pcpu, cpu); | |
9472 | ||
9473 | hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC, | |
9474 | - HRTIMER_MODE_REL_PINNED); | |
9475 | + HRTIMER_MODE_REL_PINNED_SOFT); | |
9476 | port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb; | |
9477 | port_pcpu->timer_scheduled = false; | |
9478 | - | |
9479 | - tasklet_init(&port_pcpu->tx_done_tasklet, | |
9480 | - mvpp2_tx_proc_cb, | |
9481 | - (unsigned long)dev); | |
9482 | + port_pcpu->dev = dev; | |
9483 | } | |
9484 | } | |
9485 | ||
9486 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/net/wireless/intersil/orinoco/orinoco_usb.c linux-4.14/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
9487 | --- linux-4.14.orig/drivers/net/wireless/intersil/orinoco/orinoco_usb.c 2017-11-12 19:46:13.000000000 +0100 | |
9488 | +++ linux-4.14/drivers/net/wireless/intersil/orinoco/orinoco_usb.c 2018-09-05 11:05:07.000000000 +0200 | |
9489 | @@ -697,7 +697,7 @@ | |
9490 | while (!ctx->done.done && msecs--) | |
9491 | udelay(1000); | |
9492 | } else { | |
9493 | - wait_event_interruptible(ctx->done.wait, | |
9494 | + swait_event_interruptible(ctx->done.wait, | |
9495 | ctx->done.done); | |
9496 | } | |
9497 | break; | |
9498 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/net/wireless/mac80211_hwsim.c linux-4.14/drivers/net/wireless/mac80211_hwsim.c | |
9499 | --- linux-4.14.orig/drivers/net/wireless/mac80211_hwsim.c 2018-09-05 11:03:22.000000000 +0200 | |
9500 | +++ linux-4.14/drivers/net/wireless/mac80211_hwsim.c 2018-09-05 11:05:07.000000000 +0200 | |
9501 | @@ -537,7 +537,7 @@ | |
9502 | unsigned int rx_filter; | |
9503 | bool started, idle, scanning; | |
9504 | struct mutex mutex; | |
9505 | - struct tasklet_hrtimer beacon_timer; | |
9506 | + struct hrtimer beacon_timer; | |
9507 | enum ps_mode { | |
9508 | PS_DISABLED, PS_ENABLED, PS_AUTO_POLL, PS_MANUAL_POLL | |
9509 | } ps; | |
9510 | @@ -1423,7 +1423,7 @@ | |
9511 | { | |
9512 | struct mac80211_hwsim_data *data = hw->priv; | |
9513 | data->started = false; | |
9514 | - tasklet_hrtimer_cancel(&data->beacon_timer); | |
9515 | + hrtimer_cancel(&data->beacon_timer); | |
9516 | wiphy_debug(hw->wiphy, "%s\n", __func__); | |
9517 | } | |
9518 | ||
9519 | @@ -1546,14 +1546,12 @@ | |
9520 | mac80211_hwsim_beacon(struct hrtimer *timer) | |
9521 | { | |
9522 | struct mac80211_hwsim_data *data = | |
9523 | - container_of(timer, struct mac80211_hwsim_data, | |
9524 | - beacon_timer.timer); | |
9525 | + container_of(timer, struct mac80211_hwsim_data, beacon_timer); | |
9526 | struct ieee80211_hw *hw = data->hw; | |
9527 | u64 bcn_int = data->beacon_int; | |
9528 | - ktime_t next_bcn; | |
9529 | ||
9530 | if (!data->started) | |
9531 | - goto out; | |
9532 | + return HRTIMER_NORESTART; | |
9533 | ||
9534 | ieee80211_iterate_active_interfaces_atomic( | |
9535 | hw, IEEE80211_IFACE_ITER_NORMAL, | |
9536 | @@ -1565,11 +1563,9 @@ | |
9537 | data->bcn_delta = 0; | |
9538 | } | |
9539 | ||
9540 | - next_bcn = ktime_add(hrtimer_get_expires(timer), | |
9541 | - ns_to_ktime(bcn_int * 1000)); | |
9542 | - tasklet_hrtimer_start(&data->beacon_timer, next_bcn, HRTIMER_MODE_ABS); | |
9543 | -out: | |
9544 | - return HRTIMER_NORESTART; | |
9545 | + hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer), | |
9546 | + ns_to_ktime(bcn_int * NSEC_PER_USEC)); | |
9547 | + return HRTIMER_RESTART; | |
9548 | } | |
9549 | ||
9550 | static const char * const hwsim_chanwidths[] = { | |
9551 | @@ -1643,15 +1639,15 @@ | |
9552 | mutex_unlock(&data->mutex); | |
9553 | ||
9554 | if (!data->started || !data->beacon_int) | |
9555 | - tasklet_hrtimer_cancel(&data->beacon_timer); | |
9556 | - else if (!hrtimer_is_queued(&data->beacon_timer.timer)) { | |
9557 | + hrtimer_cancel(&data->beacon_timer); | |
9558 | + else if (!hrtimer_is_queued(&data->beacon_timer)) { | |
9559 | u64 tsf = mac80211_hwsim_get_tsf(hw, NULL); | |
9560 | u32 bcn_int = data->beacon_int; | |
9561 | u64 until_tbtt = bcn_int - do_div(tsf, bcn_int); | |
9562 | ||
9563 | - tasklet_hrtimer_start(&data->beacon_timer, | |
9564 | - ns_to_ktime(until_tbtt * 1000), | |
9565 | - HRTIMER_MODE_REL); | |
9566 | + hrtimer_start(&data->beacon_timer, | |
9567 | + ns_to_ktime(until_tbtt * 1000), | |
9568 | + HRTIMER_MODE_REL_SOFT); | |
9569 | } | |
9570 | ||
9571 | return 0; | |
9572 | @@ -1714,7 +1710,7 @@ | |
9573 | info->enable_beacon, info->beacon_int); | |
9574 | vp->bcn_en = info->enable_beacon; | |
9575 | if (data->started && | |
9576 | - !hrtimer_is_queued(&data->beacon_timer.timer) && | |
9577 | + !hrtimer_is_queued(&data->beacon_timer) && | |
9578 | info->enable_beacon) { | |
9579 | u64 tsf, until_tbtt; | |
9580 | u32 bcn_int; | |
9581 | @@ -1722,9 +1718,9 @@ | |
9582 | tsf = mac80211_hwsim_get_tsf(hw, vif); | |
9583 | bcn_int = data->beacon_int; | |
9584 | until_tbtt = bcn_int - do_div(tsf, bcn_int); | |
9585 | - tasklet_hrtimer_start(&data->beacon_timer, | |
9586 | - ns_to_ktime(until_tbtt * 1000), | |
9587 | - HRTIMER_MODE_REL); | |
9588 | + hrtimer_start(&data->beacon_timer, | |
9589 | + ns_to_ktime(until_tbtt * 1000), | |
9590 | + HRTIMER_MODE_REL_SOFT); | |
9591 | } else if (!info->enable_beacon) { | |
9592 | unsigned int count = 0; | |
9593 | ieee80211_iterate_active_interfaces_atomic( | |
9594 | @@ -1733,7 +1729,7 @@ | |
9595 | wiphy_debug(hw->wiphy, " beaconing vifs remaining: %u", | |
9596 | count); | |
9597 | if (count == 0) { | |
9598 | - tasklet_hrtimer_cancel(&data->beacon_timer); | |
9599 | + hrtimer_cancel(&data->beacon_timer); | |
9600 | data->beacon_int = 0; | |
9601 | } | |
9602 | } | |
9603 | @@ -2725,9 +2721,9 @@ | |
9604 | data->debugfs, | |
9605 | data, &hwsim_simulate_radar); | |
9606 | ||
9607 | - tasklet_hrtimer_init(&data->beacon_timer, | |
9608 | - mac80211_hwsim_beacon, | |
9609 | - CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
9610 | + hrtimer_init(&data->beacon_timer, CLOCK_MONOTONIC, | |
9611 | + HRTIMER_MODE_ABS_SOFT); | |
9612 | + data->beacon_timer.function = mac80211_hwsim_beacon; | |
9613 | ||
9614 | spin_lock_bh(&hwsim_radio_lock); | |
9615 | list_add_tail(&data->list, &hwsim_radios); | |
9616 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/pci/switch/switchtec.c linux-4.14/drivers/pci/switch/switchtec.c | |
9617 | --- linux-4.14.orig/drivers/pci/switch/switchtec.c 2017-11-12 19:46:13.000000000 +0100 | |
9618 | +++ linux-4.14/drivers/pci/switch/switchtec.c 2018-09-05 11:05:07.000000000 +0200 | |
9619 | @@ -306,10 +306,11 @@ | |
9620 | ||
9621 | enum mrpc_state state; | |
9622 | ||
9623 | - struct completion comp; | |
9624 | + wait_queue_head_t cmd_comp; | |
9625 | struct kref kref; | |
9626 | struct list_head list; | |
9627 | ||
9628 | + bool cmd_done; | |
9629 | u32 cmd; | |
9630 | u32 status; | |
9631 | u32 return_code; | |
9632 | @@ -331,7 +332,7 @@ | |
9633 | stuser->stdev = stdev; | |
9634 | kref_init(&stuser->kref); | |
9635 | INIT_LIST_HEAD(&stuser->list); | |
9636 | - init_completion(&stuser->comp); | |
9637 | + init_waitqueue_head(&stuser->cmd_comp); | |
9638 | stuser->event_cnt = atomic_read(&stdev->event_cnt); | |
9639 | ||
9640 | dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); | |
9641 | @@ -414,7 +415,7 @@ | |
9642 | kref_get(&stuser->kref); | |
9643 | stuser->read_len = sizeof(stuser->data); | |
9644 | stuser_set_state(stuser, MRPC_QUEUED); | |
9645 | - init_completion(&stuser->comp); | |
9646 | + stuser->cmd_done = false; | |
9647 | list_add_tail(&stuser->list, &stdev->mrpc_queue); | |
9648 | ||
9649 | mrpc_cmd_submit(stdev); | |
9650 | @@ -451,7 +452,8 @@ | |
9651 | stuser->read_len); | |
9652 | ||
9653 | out: | |
9654 | - complete_all(&stuser->comp); | |
9655 | + stuser->cmd_done = true; | |
9656 | + wake_up_interruptible(&stuser->cmd_comp); | |
9657 | list_del_init(&stuser->list); | |
9658 | stuser_put(stuser); | |
9659 | stdev->mrpc_busy = 0; | |
9660 | @@ -721,10 +723,11 @@ | |
9661 | mutex_unlock(&stdev->mrpc_mutex); | |
9662 | ||
9663 | if (filp->f_flags & O_NONBLOCK) { | |
9664 | - if (!try_wait_for_completion(&stuser->comp)) | |
9665 | + if (!READ_ONCE(stuser->cmd_done)) | |
9666 | return -EAGAIN; | |
9667 | } else { | |
9668 | - rc = wait_for_completion_interruptible(&stuser->comp); | |
9669 | + rc = wait_event_interruptible(stuser->cmd_comp, | |
9670 | + stuser->cmd_done); | |
9671 | if (rc < 0) | |
9672 | return rc; | |
9673 | } | |
9674 | @@ -772,7 +775,7 @@ | |
9675 | struct switchtec_dev *stdev = stuser->stdev; | |
9676 | int ret = 0; | |
9677 | ||
9678 | - poll_wait(filp, &stuser->comp.wait, wait); | |
9679 | + poll_wait(filp, &stuser->cmd_comp, wait); | |
9680 | poll_wait(filp, &stdev->event_wq, wait); | |
9681 | ||
9682 | if (lock_mutex_and_test_alive(stdev)) | |
9683 | @@ -780,7 +783,7 @@ | |
9684 | ||
9685 | mutex_unlock(&stdev->mrpc_mutex); | |
9686 | ||
9687 | - if (try_wait_for_completion(&stuser->comp)) | |
9688 | + if (READ_ONCE(stuser->cmd_done)) | |
9689 | ret |= POLLIN | POLLRDNORM; | |
9690 | ||
9691 | if (stuser->event_cnt != atomic_read(&stdev->event_cnt)) | |
9692 | @@ -1255,7 +1258,8 @@ | |
9693 | ||
9694 | /* Wake up and kill any users waiting on an MRPC request */ | |
9695 | list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) { | |
9696 | - complete_all(&stuser->comp); | |
9697 | + stuser->cmd_done = true; | |
9698 | + wake_up_interruptible(&stuser->cmd_comp); | |
9699 | list_del_init(&stuser->list); | |
9700 | stuser_put(stuser); | |
9701 | } | |
9702 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/scsi/fcoe/fcoe.c linux-4.14/drivers/scsi/fcoe/fcoe.c | |
9703 | --- linux-4.14.orig/drivers/scsi/fcoe/fcoe.c 2017-11-12 19:46:13.000000000 +0100 | |
9704 | +++ linux-4.14/drivers/scsi/fcoe/fcoe.c 2018-09-05 11:05:07.000000000 +0200 | |
9705 | @@ -1464,11 +1464,11 @@ | |
9706 | static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) | |
9707 | { | |
9708 | struct fcoe_percpu_s *fps; | |
9709 | - int rc; | |
9710 | + int rc, cpu = get_cpu_light(); | |
9711 | ||
9712 | - fps = &get_cpu_var(fcoe_percpu); | |
9713 | + fps = &per_cpu(fcoe_percpu, cpu); | |
9714 | rc = fcoe_get_paged_crc_eof(skb, tlen, fps); | |
9715 | - put_cpu_var(fcoe_percpu); | |
9716 | + put_cpu_light(); | |
9717 | ||
9718 | return rc; | |
9719 | } | |
9720 | @@ -1655,11 +1655,11 @@ | |
9721 | return 0; | |
9722 | } | |
9723 | ||
9724 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
9725 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
9726 | stats->InvalidCRCCount++; | |
9727 | if (stats->InvalidCRCCount < 5) | |
9728 | printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); | |
9729 | - put_cpu(); | |
9730 | + put_cpu_light(); | |
9731 | return -EINVAL; | |
9732 | } | |
9733 | ||
9734 | @@ -1702,7 +1702,7 @@ | |
9735 | */ | |
9736 | hp = (struct fcoe_hdr *) skb_network_header(skb); | |
9737 | ||
9738 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
9739 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
9740 | if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { | |
9741 | if (stats->ErrorFrames < 5) | |
9742 | printk(KERN_WARNING "fcoe: FCoE version " | |
9743 | @@ -1734,13 +1734,13 @@ | |
9744 | goto drop; | |
9745 | ||
9746 | if (!fcoe_filter_frames(lport, fp)) { | |
9747 | - put_cpu(); | |
9748 | + put_cpu_light(); | |
9749 | fc_exch_recv(lport, fp); | |
9750 | return; | |
9751 | } | |
9752 | drop: | |
9753 | stats->ErrorFrames++; | |
9754 | - put_cpu(); | |
9755 | + put_cpu_light(); | |
9756 | kfree_skb(skb); | |
9757 | } | |
9758 | ||
9759 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/scsi/fcoe/fcoe_ctlr.c linux-4.14/drivers/scsi/fcoe/fcoe_ctlr.c | |
9760 | --- linux-4.14.orig/drivers/scsi/fcoe/fcoe_ctlr.c 2017-11-12 19:46:13.000000000 +0100 | |
9761 | +++ linux-4.14/drivers/scsi/fcoe/fcoe_ctlr.c 2018-09-05 11:05:07.000000000 +0200 | |
9762 | @@ -835,7 +835,7 @@ | |
9763 | ||
9764 | INIT_LIST_HEAD(&del_list); | |
9765 | ||
9766 | - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); | |
9767 | + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); | |
9768 | ||
9769 | list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { | |
9770 | deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; | |
9771 | @@ -871,7 +871,7 @@ | |
9772 | sel_time = fcf->time; | |
9773 | } | |
9774 | } | |
9775 | - put_cpu(); | |
9776 | + put_cpu_light(); | |
9777 | ||
9778 | list_for_each_entry_safe(fcf, next, &del_list, list) { | |
9779 | /* Removes fcf from current list */ | |
9780 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/scsi/libfc/fc_exch.c linux-4.14/drivers/scsi/libfc/fc_exch.c | |
9781 | --- linux-4.14.orig/drivers/scsi/libfc/fc_exch.c 2017-11-12 19:46:13.000000000 +0100 | |
9782 | +++ linux-4.14/drivers/scsi/libfc/fc_exch.c 2018-09-05 11:05:07.000000000 +0200 | |
9783 | @@ -833,10 +833,10 @@ | |
9784 | } | |
9785 | memset(ep, 0, sizeof(*ep)); | |
9786 | ||
9787 | - cpu = get_cpu(); | |
9788 | + cpu = get_cpu_light(); | |
9789 | pool = per_cpu_ptr(mp->pool, cpu); | |
9790 | spin_lock_bh(&pool->lock); | |
9791 | - put_cpu(); | |
9792 | + put_cpu_light(); | |
9793 | ||
9794 | /* peek cache of free slot */ | |
9795 | if (pool->left != FC_XID_UNKNOWN) { | |
9796 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/scsi/libsas/sas_ata.c linux-4.14/drivers/scsi/libsas/sas_ata.c | |
9797 | --- linux-4.14.orig/drivers/scsi/libsas/sas_ata.c 2017-11-12 19:46:13.000000000 +0100 | |
9798 | +++ linux-4.14/drivers/scsi/libsas/sas_ata.c 2018-09-05 11:05:07.000000000 +0200 | |
9799 | @@ -190,7 +190,7 @@ | |
9800 | /* TODO: audit callers to ensure they are ready for qc_issue to | |
9801 | * unconditionally re-enable interrupts | |
9802 | */ | |
9803 | - local_irq_save(flags); | |
9804 | + local_irq_save_nort(flags); | |
9805 | spin_unlock(ap->lock); | |
9806 | ||
9807 | /* If the device fell off, no sense in issuing commands */ | |
9808 | @@ -252,7 +252,7 @@ | |
9809 | ||
9810 | out: | |
9811 | spin_lock(ap->lock); | |
9812 | - local_irq_restore(flags); | |
9813 | + local_irq_restore_nort(flags); | |
9814 | return ret; | |
9815 | } | |
9816 | ||
9817 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/scsi/qla2xxx/qla_inline.h linux-4.14/drivers/scsi/qla2xxx/qla_inline.h | |
9818 | --- linux-4.14.orig/drivers/scsi/qla2xxx/qla_inline.h 2018-09-05 11:03:22.000000000 +0200 | |
9819 | +++ linux-4.14/drivers/scsi/qla2xxx/qla_inline.h 2018-09-05 11:05:07.000000000 +0200 | |
9820 | @@ -59,12 +59,12 @@ | |
9821 | { | |
9822 | unsigned long flags; | |
9823 | struct qla_hw_data *ha = rsp->hw; | |
9824 | - local_irq_save(flags); | |
9825 | + local_irq_save_nort(flags); | |
9826 | if (IS_P3P_TYPE(ha)) | |
9827 | qla82xx_poll(0, rsp); | |
9828 | else | |
9829 | ha->isp_ops->intr_handler(0, rsp); | |
9830 | - local_irq_restore(flags); | |
9831 | + local_irq_restore_nort(flags); | |
9832 | } | |
9833 | ||
9834 | static inline uint8_t * | |
9835 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/staging/greybus/audio_manager.c linux-4.14/drivers/staging/greybus/audio_manager.c | |
9836 | --- linux-4.14.orig/drivers/staging/greybus/audio_manager.c 2017-11-12 19:46:13.000000000 +0100 | |
9837 | +++ linux-4.14/drivers/staging/greybus/audio_manager.c 2018-09-05 11:05:07.000000000 +0200 | |
9838 | @@ -10,7 +10,7 @@ | |
9839 | #include <linux/sysfs.h> | |
9840 | #include <linux/module.h> | |
9841 | #include <linux/init.h> | |
9842 | -#include <linux/rwlock.h> | |
9843 | +#include <linux/spinlock.h> | |
9844 | #include <linux/idr.h> | |
9845 | ||
9846 | #include "audio_manager.h" | |
9847 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/target/target_core_tmr.c linux-4.14/drivers/target/target_core_tmr.c | |
9848 | --- linux-4.14.orig/drivers/target/target_core_tmr.c 2018-09-05 11:03:22.000000000 +0200 | |
9849 | +++ linux-4.14/drivers/target/target_core_tmr.c 2018-09-05 11:05:07.000000000 +0200 | |
9850 | @@ -114,8 +114,6 @@ | |
9851 | { | |
9852 | struct se_session *sess = se_cmd->se_sess; | |
9853 | ||
9854 | - assert_spin_locked(&sess->sess_cmd_lock); | |
9855 | - WARN_ON_ONCE(!irqs_disabled()); | |
9856 | /* | |
9857 | * If command already reached CMD_T_COMPLETE state within | |
9858 | * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown, | |
9859 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/target/target_core_transport.c linux-4.14/drivers/target/target_core_transport.c | |
9860 | --- linux-4.14.orig/drivers/target/target_core_transport.c 2018-09-05 11:03:22.000000000 +0200 | |
9861 | +++ linux-4.14/drivers/target/target_core_transport.c 2018-09-05 11:05:07.000000000 +0200 | |
9862 | @@ -2966,9 +2966,6 @@ | |
9863 | __acquires(&cmd->t_state_lock) | |
9864 | { | |
9865 | ||
9866 | - assert_spin_locked(&cmd->t_state_lock); | |
9867 | - WARN_ON_ONCE(!irqs_disabled()); | |
9868 | - | |
9869 | if (fabric_stop) | |
9870 | cmd->transport_state |= CMD_T_FABRIC_STOP; | |
9871 | ||
9872 | @@ -3238,9 +3235,6 @@ | |
9873 | { | |
9874 | int ret; | |
9875 | ||
9876 | - assert_spin_locked(&cmd->t_state_lock); | |
9877 | - WARN_ON_ONCE(!irqs_disabled()); | |
9878 | - | |
9879 | if (!(cmd->transport_state & CMD_T_ABORTED)) | |
9880 | return 0; | |
9881 | /* | |
9882 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/thermal/x86_pkg_temp_thermal.c linux-4.14/drivers/thermal/x86_pkg_temp_thermal.c | |
9883 | --- linux-4.14.orig/drivers/thermal/x86_pkg_temp_thermal.c 2017-11-12 19:46:13.000000000 +0100 | |
9884 | +++ linux-4.14/drivers/thermal/x86_pkg_temp_thermal.c 2018-09-05 11:05:07.000000000 +0200 | |
9885 | @@ -29,6 +29,7 @@ | |
9886 | #include <linux/pm.h> | |
9887 | #include <linux/thermal.h> | |
9888 | #include <linux/debugfs.h> | |
9889 | +#include <linux/swork.h> | |
9890 | #include <asm/cpu_device_id.h> | |
9891 | #include <asm/mce.h> | |
9892 | ||
9893 | @@ -329,7 +330,7 @@ | |
9894 | schedule_delayed_work_on(cpu, work, ms); | |
9895 | } | |
9896 | ||
9897 | -static int pkg_thermal_notify(u64 msr_val) | |
9898 | +static void pkg_thermal_notify_work(struct swork_event *event) | |
9899 | { | |
9900 | int cpu = smp_processor_id(); | |
9901 | struct pkg_device *pkgdev; | |
9902 | @@ -348,8 +349,46 @@ | |
9903 | } | |
9904 | ||
9905 | spin_unlock_irqrestore(&pkg_temp_lock, flags); | |
9906 | +} | |
9907 | + | |
9908 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9909 | +static struct swork_event notify_work; | |
9910 | + | |
9911 | +static int pkg_thermal_notify_work_init(void) | |
9912 | +{ | |
9913 | + int err; | |
9914 | + | |
9915 | + err = swork_get(); | |
9916 | + if (err) | |
9917 | + return err; | |
9918 | + | |
9919 | + INIT_SWORK(¬ify_work, pkg_thermal_notify_work); | |
9920 | + return 0; | |
9921 | +} | |
9922 | + | |
9923 | +static void pkg_thermal_notify_work_cleanup(void) | |
9924 | +{ | |
9925 | + swork_put(); | |
9926 | +} | |
9927 | + | |
9928 | +static int pkg_thermal_notify(u64 msr_val) | |
9929 | +{ | |
9930 | + swork_queue(¬ify_work); | |
9931 | + return 0; | |
9932 | +} | |
9933 | + | |
9934 | +#else /* !CONFIG_PREEMPT_RT_FULL */ | |
9935 | + | |
9936 | +static int pkg_thermal_notify_work_init(void) { return 0; } | |
9937 | + | |
9938 | +static void pkg_thermal_notify_work_cleanup(void) { } | |
9939 | + | |
9940 | +static int pkg_thermal_notify(u64 msr_val) | |
9941 | +{ | |
9942 | + pkg_thermal_notify_work(NULL); | |
9943 | return 0; | |
9944 | } | |
9945 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
9946 | ||
9947 | static int pkg_temp_thermal_device_add(unsigned int cpu) | |
9948 | { | |
9949 | @@ -515,10 +554,15 @@ | |
9950 | if (!x86_match_cpu(pkg_temp_thermal_ids)) | |
9951 | return -ENODEV; | |
9952 | ||
9953 | + if (!pkg_thermal_notify_work_init()) | |
9954 | + return -ENODEV; | |
9955 | + | |
9956 | max_packages = topology_max_packages(); | |
9957 | packages = kzalloc(max_packages * sizeof(struct pkg_device *), GFP_KERNEL); | |
9958 | - if (!packages) | |
9959 | - return -ENOMEM; | |
9960 | + if (!packages) { | |
9961 | + ret = -ENOMEM; | |
9962 | + goto err; | |
9963 | + } | |
9964 | ||
9965 | ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online", | |
9966 | pkg_thermal_cpu_online, pkg_thermal_cpu_offline); | |
9967 | @@ -536,6 +580,7 @@ | |
9968 | return 0; | |
9969 | ||
9970 | err: | |
9971 | + pkg_thermal_notify_work_cleanup(); | |
9972 | kfree(packages); | |
9973 | return ret; | |
9974 | } | |
9975 | @@ -549,6 +594,7 @@ | |
9976 | cpuhp_remove_state(pkg_thermal_hp_state); | |
9977 | debugfs_remove_recursive(debugfs); | |
9978 | kfree(packages); | |
9979 | + pkg_thermal_notify_work_cleanup(); | |
9980 | } | |
9981 | module_exit(pkg_temp_thermal_exit) | |
9982 | ||
9983 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/tty/serial/8250/8250_core.c linux-4.14/drivers/tty/serial/8250/8250_core.c | |
9984 | --- linux-4.14.orig/drivers/tty/serial/8250/8250_core.c 2017-11-12 19:46:13.000000000 +0100 | |
9985 | +++ linux-4.14/drivers/tty/serial/8250/8250_core.c 2018-09-05 11:05:07.000000000 +0200 | |
9986 | @@ -58,7 +58,16 @@ | |
9987 | ||
9988 | static unsigned int skip_txen_test; /* force skip of txen test at init time */ | |
9989 | ||
9990 | -#define PASS_LIMIT 512 | |
9991 | +/* | |
9992 | + * On -rt we can have a more delays, and legitimately | |
9993 | + * so - so don't drop work spuriously and spam the | |
9994 | + * syslog: | |
9995 | + */ | |
9996 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9997 | +# define PASS_LIMIT 1000000 | |
9998 | +#else | |
9999 | +# define PASS_LIMIT 512 | |
10000 | +#endif | |
10001 | ||
10002 | #include <asm/serial.h> | |
10003 | /* | |
10004 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/tty/serial/8250/8250_port.c linux-4.14/drivers/tty/serial/8250/8250_port.c | |
10005 | --- linux-4.14.orig/drivers/tty/serial/8250/8250_port.c 2018-09-05 11:03:22.000000000 +0200 | |
10006 | +++ linux-4.14/drivers/tty/serial/8250/8250_port.c 2018-09-05 11:05:07.000000000 +0200 | |
10007 | @@ -35,6 +35,7 @@ | |
10008 | #include <linux/nmi.h> | |
10009 | #include <linux/mutex.h> | |
10010 | #include <linux/slab.h> | |
10011 | +#include <linux/kdb.h> | |
10012 | #include <linux/uaccess.h> | |
10013 | #include <linux/pm_runtime.h> | |
10014 | #include <linux/ktime.h> | |
10015 | @@ -3224,9 +3225,9 @@ | |
10016 | ||
10017 | serial8250_rpm_get(up); | |
10018 | ||
10019 | - if (port->sysrq) | |
10020 | + if (port->sysrq || oops_in_progress) | |
10021 | locked = 0; | |
10022 | - else if (oops_in_progress) | |
10023 | + else if (in_kdb_printk()) | |
10024 | locked = spin_trylock_irqsave(&port->lock, flags); | |
10025 | else | |
10026 | spin_lock_irqsave(&port->lock, flags); | |
10027 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/tty/serial/amba-pl011.c linux-4.14/drivers/tty/serial/amba-pl011.c | |
10028 | --- linux-4.14.orig/drivers/tty/serial/amba-pl011.c 2018-09-05 11:03:22.000000000 +0200 | |
10029 | +++ linux-4.14/drivers/tty/serial/amba-pl011.c 2018-09-05 11:05:07.000000000 +0200 | |
10030 | @@ -2236,13 +2236,19 @@ | |
10031 | ||
10032 | clk_enable(uap->clk); | |
10033 | ||
10034 | - local_irq_save(flags); | |
10035 | + /* | |
10036 | + * local_irq_save(flags); | |
10037 | + * | |
10038 | + * This local_irq_save() is nonsense. If we come in via sysrq | |
10039 | + * handling then interrupts are already disabled. Aside of | |
10040 | + * that the port.sysrq check is racy on SMP regardless. | |
10041 | + */ | |
10042 | if (uap->port.sysrq) | |
10043 | locked = 0; | |
10044 | else if (oops_in_progress) | |
10045 | - locked = spin_trylock(&uap->port.lock); | |
10046 | + locked = spin_trylock_irqsave(&uap->port.lock, flags); | |
10047 | else | |
10048 | - spin_lock(&uap->port.lock); | |
10049 | + spin_lock_irqsave(&uap->port.lock, flags); | |
10050 | ||
10051 | /* | |
10052 | * First save the CR then disable the interrupts | |
10053 | @@ -2268,8 +2274,7 @@ | |
10054 | pl011_write(old_cr, uap, REG_CR); | |
10055 | ||
10056 | if (locked) | |
10057 | - spin_unlock(&uap->port.lock); | |
10058 | - local_irq_restore(flags); | |
10059 | + spin_unlock_irqrestore(&uap->port.lock, flags); | |
10060 | ||
10061 | clk_disable(uap->clk); | |
10062 | } | |
10063 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/tty/serial/omap-serial.c linux-4.14/drivers/tty/serial/omap-serial.c | |
10064 | --- linux-4.14.orig/drivers/tty/serial/omap-serial.c 2018-09-05 11:03:22.000000000 +0200 | |
10065 | +++ linux-4.14/drivers/tty/serial/omap-serial.c 2018-09-05 11:05:07.000000000 +0200 | |
10066 | @@ -1311,13 +1311,10 @@ | |
10067 | ||
10068 | pm_runtime_get_sync(up->dev); | |
10069 | ||
10070 | - local_irq_save(flags); | |
10071 | - if (up->port.sysrq) | |
10072 | - locked = 0; | |
10073 | - else if (oops_in_progress) | |
10074 | - locked = spin_trylock(&up->port.lock); | |
10075 | + if (up->port.sysrq || oops_in_progress) | |
10076 | + locked = spin_trylock_irqsave(&up->port.lock, flags); | |
10077 | else | |
10078 | - spin_lock(&up->port.lock); | |
10079 | + spin_lock_irqsave(&up->port.lock, flags); | |
10080 | ||
10081 | /* | |
10082 | * First save the IER then disable the interrupts | |
10083 | @@ -1346,8 +1343,7 @@ | |
10084 | pm_runtime_mark_last_busy(up->dev); | |
10085 | pm_runtime_put_autosuspend(up->dev); | |
10086 | if (locked) | |
10087 | - spin_unlock(&up->port.lock); | |
10088 | - local_irq_restore(flags); | |
10089 | + spin_unlock_irqrestore(&up->port.lock, flags); | |
10090 | } | |
10091 | ||
10092 | static int __init | |
10093 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/usb/core/hcd.c linux-4.14/drivers/usb/core/hcd.c | |
10094 | --- linux-4.14.orig/drivers/usb/core/hcd.c 2018-09-05 11:03:22.000000000 +0200 | |
10095 | +++ linux-4.14/drivers/usb/core/hcd.c 2018-09-05 11:05:07.000000000 +0200 | |
10096 | @@ -1775,9 +1775,9 @@ | |
10097 | * and no one may trigger the above deadlock situation when | |
10098 | * running complete() in tasklet. | |
10099 | */ | |
10100 | - local_irq_save(flags); | |
10101 | + local_irq_save_nort(flags); | |
10102 | urb->complete(urb); | |
10103 | - local_irq_restore(flags); | |
10104 | + local_irq_restore_nort(flags); | |
10105 | ||
10106 | usb_anchor_resume_wakeups(anchor); | |
10107 | atomic_dec(&urb->use_count); | |
10108 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/usb/gadget/function/f_fs.c linux-4.14/drivers/usb/gadget/function/f_fs.c | |
10109 | --- linux-4.14.orig/drivers/usb/gadget/function/f_fs.c 2018-09-05 11:03:22.000000000 +0200 | |
10110 | +++ linux-4.14/drivers/usb/gadget/function/f_fs.c 2018-09-05 11:05:07.000000000 +0200 | |
10111 | @@ -1623,7 +1623,7 @@ | |
10112 | pr_info("%s(): freeing\n", __func__); | |
10113 | ffs_data_clear(ffs); | |
10114 | BUG_ON(waitqueue_active(&ffs->ev.waitq) || | |
10115 | - waitqueue_active(&ffs->ep0req_completion.wait) || | |
10116 | + swait_active(&ffs->ep0req_completion.wait) || | |
10117 | waitqueue_active(&ffs->wait)); | |
10118 | destroy_workqueue(ffs->io_completion_wq); | |
10119 | kfree(ffs->dev_name); | |
10120 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/usb/gadget/function/f_ncm.c linux-4.14/drivers/usb/gadget/function/f_ncm.c | |
10121 | --- linux-4.14.orig/drivers/usb/gadget/function/f_ncm.c 2017-11-12 19:46:13.000000000 +0100 | |
10122 | +++ linux-4.14/drivers/usb/gadget/function/f_ncm.c 2018-09-05 11:05:07.000000000 +0200 | |
10123 | @@ -77,9 +77,7 @@ | |
10124 | struct sk_buff *skb_tx_ndp; | |
10125 | u16 ndp_dgram_count; | |
10126 | bool timer_force_tx; | |
10127 | - struct tasklet_struct tx_tasklet; | |
10128 | struct hrtimer task_timer; | |
10129 | - | |
10130 | bool timer_stopping; | |
10131 | }; | |
10132 | ||
10133 | @@ -1108,7 +1106,7 @@ | |
10134 | ||
10135 | /* Delay the timer. */ | |
10136 | hrtimer_start(&ncm->task_timer, TX_TIMEOUT_NSECS, | |
10137 | - HRTIMER_MODE_REL); | |
10138 | + HRTIMER_MODE_REL_SOFT); | |
10139 | ||
10140 | /* Add the datagram position entries */ | |
10141 | ntb_ndp = skb_put_zero(ncm->skb_tx_ndp, dgram_idx_len); | |
10142 | @@ -1152,17 +1150,15 @@ | |
10143 | } | |
10144 | ||
10145 | /* | |
10146 | - * This transmits the NTB if there are frames waiting. | |
10147 | + * The transmit should only be run if no skb data has been sent | |
10148 | + * for a certain duration. | |
10149 | */ | |
10150 | -static void ncm_tx_tasklet(unsigned long data) | |
10151 | +static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) | |
10152 | { | |
10153 | - struct f_ncm *ncm = (void *)data; | |
10154 | - | |
10155 | - if (ncm->timer_stopping) | |
10156 | - return; | |
10157 | + struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); | |
10158 | ||
10159 | /* Only send if data is available. */ | |
10160 | - if (ncm->skb_tx_data) { | |
10161 | + if (!ncm->timer_stopping && ncm->skb_tx_data) { | |
10162 | ncm->timer_force_tx = true; | |
10163 | ||
10164 | /* XXX This allowance of a NULL skb argument to ndo_start_xmit | |
10165 | @@ -1175,16 +1171,6 @@ | |
10166 | ||
10167 | ncm->timer_force_tx = false; | |
10168 | } | |
10169 | -} | |
10170 | - | |
10171 | -/* | |
10172 | - * The transmit should only be run if no skb data has been sent | |
10173 | - * for a certain duration. | |
10174 | - */ | |
10175 | -static enum hrtimer_restart ncm_tx_timeout(struct hrtimer *data) | |
10176 | -{ | |
10177 | - struct f_ncm *ncm = container_of(data, struct f_ncm, task_timer); | |
10178 | - tasklet_schedule(&ncm->tx_tasklet); | |
10179 | return HRTIMER_NORESTART; | |
10180 | } | |
10181 | ||
10182 | @@ -1517,8 +1503,7 @@ | |
10183 | ncm->port.open = ncm_open; | |
10184 | ncm->port.close = ncm_close; | |
10185 | ||
10186 | - tasklet_init(&ncm->tx_tasklet, ncm_tx_tasklet, (unsigned long) ncm); | |
10187 | - hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
10188 | + hrtimer_init(&ncm->task_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); | |
10189 | ncm->task_timer.function = ncm_tx_timeout; | |
10190 | ||
10191 | DBG(cdev, "CDC Network: %s speed IN/%s OUT/%s NOTIFY/%s\n", | |
10192 | @@ -1627,7 +1612,6 @@ | |
10193 | DBG(c->cdev, "ncm unbind\n"); | |
10194 | ||
10195 | hrtimer_cancel(&ncm->task_timer); | |
10196 | - tasklet_kill(&ncm->tx_tasklet); | |
10197 | ||
10198 | ncm_string_defs[0].id = 0; | |
10199 | usb_free_all_descriptors(f); | |
10200 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/drivers/usb/gadget/legacy/inode.c linux-4.14/drivers/usb/gadget/legacy/inode.c | |
10201 | --- linux-4.14.orig/drivers/usb/gadget/legacy/inode.c 2017-11-12 19:46:13.000000000 +0100 | |
10202 | +++ linux-4.14/drivers/usb/gadget/legacy/inode.c 2018-09-05 11:05:07.000000000 +0200 | |
10203 | @@ -347,7 +347,7 @@ | |
10204 | spin_unlock_irq (&epdata->dev->lock); | |
10205 | ||
10206 | if (likely (value == 0)) { | |
10207 | - value = wait_event_interruptible (done.wait, done.done); | |
10208 | + value = swait_event_interruptible (done.wait, done.done); | |
10209 | if (value != 0) { | |
10210 | spin_lock_irq (&epdata->dev->lock); | |
10211 | if (likely (epdata->ep != NULL)) { | |
10212 | @@ -356,7 +356,7 @@ | |
10213 | usb_ep_dequeue (epdata->ep, epdata->req); | |
10214 | spin_unlock_irq (&epdata->dev->lock); | |
10215 | ||
10216 | - wait_event (done.wait, done.done); | |
10217 | + swait_event (done.wait, done.done); | |
10218 | if (epdata->status == -ECONNRESET) | |
10219 | epdata->status = -EINTR; | |
10220 | } else { | |
10221 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/aio.c linux-4.14/fs/aio.c | |
10222 | --- linux-4.14.orig/fs/aio.c 2018-09-05 11:03:22.000000000 +0200 | |
10223 | +++ linux-4.14/fs/aio.c 2018-09-05 11:05:07.000000000 +0200 | |
10224 | @@ -40,6 +40,7 @@ | |
10225 | #include <linux/ramfs.h> | |
10226 | #include <linux/percpu-refcount.h> | |
10227 | #include <linux/mount.h> | |
10228 | +#include <linux/swork.h> | |
10229 | ||
10230 | #include <asm/kmap_types.h> | |
10231 | #include <linux/uaccess.h> | |
10232 | @@ -117,6 +118,7 @@ | |
10233 | ||
10234 | struct rcu_head free_rcu; | |
10235 | struct work_struct free_work; /* see free_ioctx() */ | |
10236 | + struct swork_event free_swork; /* see free_ioctx() */ | |
10237 | ||
10238 | /* | |
10239 | * signals when all in-flight requests are done | |
10240 | @@ -259,6 +261,7 @@ | |
10241 | .mount = aio_mount, | |
10242 | .kill_sb = kill_anon_super, | |
10243 | }; | |
10244 | + BUG_ON(swork_get()); | |
10245 | aio_mnt = kern_mount(&aio_fs); | |
10246 | if (IS_ERR(aio_mnt)) | |
10247 | panic("Failed to create aio fs mount."); | |
10248 | @@ -633,9 +636,9 @@ | |
10249 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - | |
10250 | * now it's safe to cancel any that need to be. | |
10251 | */ | |
10252 | -static void free_ioctx_users(struct percpu_ref *ref) | |
10253 | +static void free_ioctx_users_work(struct swork_event *sev) | |
10254 | { | |
10255 | - struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
10256 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_swork); | |
10257 | struct aio_kiocb *req; | |
10258 | ||
10259 | spin_lock_irq(&ctx->ctx_lock); | |
10260 | @@ -653,6 +656,14 @@ | |
10261 | percpu_ref_put(&ctx->reqs); | |
10262 | } | |
10263 | ||
10264 | +static void free_ioctx_users(struct percpu_ref *ref) | |
10265 | +{ | |
10266 | + struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
10267 | + | |
10268 | + INIT_SWORK(&ctx->free_swork, free_ioctx_users_work); | |
10269 | + swork_queue(&ctx->free_swork); | |
10270 | +} | |
10271 | + | |
10272 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |
10273 | { | |
10274 | unsigned i, new_nr; | |
10275 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/autofs4/autofs_i.h linux-4.14/fs/autofs4/autofs_i.h | |
10276 | --- linux-4.14.orig/fs/autofs4/autofs_i.h 2017-11-12 19:46:13.000000000 +0100 | |
10277 | +++ linux-4.14/fs/autofs4/autofs_i.h 2018-09-05 11:05:07.000000000 +0200 | |
10278 | @@ -20,6 +20,7 @@ | |
10279 | #include <linux/sched.h> | |
10280 | #include <linux/mount.h> | |
10281 | #include <linux/namei.h> | |
10282 | +#include <linux/delay.h> | |
10283 | #include <linux/uaccess.h> | |
10284 | #include <linux/mutex.h> | |
10285 | #include <linux/spinlock.h> | |
10286 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/autofs4/expire.c linux-4.14/fs/autofs4/expire.c | |
10287 | --- linux-4.14.orig/fs/autofs4/expire.c 2017-11-12 19:46:13.000000000 +0100 | |
10288 | +++ linux-4.14/fs/autofs4/expire.c 2018-09-05 11:05:07.000000000 +0200 | |
10289 | @@ -148,7 +148,7 @@ | |
10290 | parent = p->d_parent; | |
10291 | if (!spin_trylock(&parent->d_lock)) { | |
10292 | spin_unlock(&p->d_lock); | |
10293 | - cpu_relax(); | |
10294 | + cpu_chill(); | |
10295 | goto relock; | |
10296 | } | |
10297 | spin_unlock(&p->d_lock); | |
10298 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/buffer.c linux-4.14/fs/buffer.c | |
10299 | --- linux-4.14.orig/fs/buffer.c 2018-09-05 11:03:22.000000000 +0200 | |
10300 | +++ linux-4.14/fs/buffer.c 2018-09-05 11:05:07.000000000 +0200 | |
10301 | @@ -302,8 +302,7 @@ | |
10302 | * decide that the page is now completely done. | |
10303 | */ | |
10304 | first = page_buffers(page); | |
10305 | - local_irq_save(flags); | |
10306 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
10307 | + flags = bh_uptodate_lock_irqsave(first); | |
10308 | clear_buffer_async_read(bh); | |
10309 | unlock_buffer(bh); | |
10310 | tmp = bh; | |
10311 | @@ -316,8 +315,7 @@ | |
10312 | } | |
10313 | tmp = tmp->b_this_page; | |
10314 | } while (tmp != bh); | |
10315 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
10316 | - local_irq_restore(flags); | |
10317 | + bh_uptodate_unlock_irqrestore(first, flags); | |
10318 | ||
10319 | /* | |
10320 | * If none of the buffers had errors and they are all | |
10321 | @@ -329,9 +327,7 @@ | |
10322 | return; | |
10323 | ||
10324 | still_busy: | |
10325 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
10326 | - local_irq_restore(flags); | |
10327 | - return; | |
10328 | + bh_uptodate_unlock_irqrestore(first, flags); | |
10329 | } | |
10330 | ||
10331 | /* | |
10332 | @@ -358,8 +354,7 @@ | |
10333 | } | |
10334 | ||
10335 | first = page_buffers(page); | |
10336 | - local_irq_save(flags); | |
10337 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
10338 | + flags = bh_uptodate_lock_irqsave(first); | |
10339 | ||
10340 | clear_buffer_async_write(bh); | |
10341 | unlock_buffer(bh); | |
10342 | @@ -371,15 +366,12 @@ | |
10343 | } | |
10344 | tmp = tmp->b_this_page; | |
10345 | } | |
10346 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
10347 | - local_irq_restore(flags); | |
10348 | + bh_uptodate_unlock_irqrestore(first, flags); | |
10349 | end_page_writeback(page); | |
10350 | return; | |
10351 | ||
10352 | still_busy: | |
10353 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
10354 | - local_irq_restore(flags); | |
10355 | - return; | |
10356 | + bh_uptodate_unlock_irqrestore(first, flags); | |
10357 | } | |
10358 | EXPORT_SYMBOL(end_buffer_async_write); | |
10359 | ||
10360 | @@ -3417,6 +3409,7 @@ | |
10361 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); | |
10362 | if (ret) { | |
10363 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | |
10364 | + buffer_head_init_locks(ret); | |
10365 | preempt_disable(); | |
10366 | __this_cpu_inc(bh_accounting.nr); | |
10367 | recalc_bh_state(); | |
10368 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/cifs/readdir.c linux-4.14/fs/cifs/readdir.c | |
10369 | --- linux-4.14.orig/fs/cifs/readdir.c 2017-11-12 19:46:13.000000000 +0100 | |
10370 | +++ linux-4.14/fs/cifs/readdir.c 2018-09-05 11:05:07.000000000 +0200 | |
10371 | @@ -80,7 +80,7 @@ | |
10372 | struct inode *inode; | |
10373 | struct super_block *sb = parent->d_sb; | |
10374 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | |
10375 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
10376 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
10377 | ||
10378 | cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); | |
10379 | ||
10380 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/dcache.c linux-4.14/fs/dcache.c | |
10381 | --- linux-4.14.orig/fs/dcache.c 2018-09-05 11:03:29.000000000 +0200 | |
10382 | +++ linux-4.14/fs/dcache.c 2018-09-05 11:05:07.000000000 +0200 | |
10383 | @@ -19,6 +19,7 @@ | |
10384 | #include <linux/mm.h> | |
10385 | #include <linux/fs.h> | |
10386 | #include <linux/fsnotify.h> | |
10387 | +#include <linux/delay.h> | |
10388 | #include <linux/slab.h> | |
10389 | #include <linux/init.h> | |
10390 | #include <linux/hash.h> | |
10391 | @@ -793,6 +794,8 @@ | |
10392 | */ | |
10393 | void dput(struct dentry *dentry) | |
10394 | { | |
10395 | + struct dentry *parent; | |
10396 | + | |
10397 | if (unlikely(!dentry)) | |
10398 | return; | |
10399 | ||
10400 | @@ -829,9 +832,18 @@ | |
10401 | return; | |
10402 | ||
10403 | kill_it: | |
10404 | - dentry = dentry_kill(dentry); | |
10405 | - if (dentry) { | |
10406 | - cond_resched(); | |
10407 | + parent = dentry_kill(dentry); | |
10408 | + if (parent) { | |
10409 | + int r; | |
10410 | + | |
10411 | + if (parent == dentry) { | |
10412 | + /* the task with the highest priority won't schedule */ | |
10413 | + r = cond_resched(); | |
10414 | + if (!r) | |
10415 | + cpu_chill(); | |
10416 | + } else { | |
10417 | + dentry = parent; | |
10418 | + } | |
10419 | goto repeat; | |
10420 | } | |
10421 | } | |
10422 | @@ -2394,7 +2406,7 @@ | |
10423 | if (dentry->d_lockref.count == 1) { | |
10424 | if (!spin_trylock(&inode->i_lock)) { | |
10425 | spin_unlock(&dentry->d_lock); | |
10426 | - cpu_relax(); | |
10427 | + cpu_chill(); | |
10428 | goto again; | |
10429 | } | |
10430 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | |
10431 | @@ -2439,9 +2451,10 @@ | |
10432 | static inline unsigned start_dir_add(struct inode *dir) | |
10433 | { | |
10434 | ||
10435 | + preempt_disable_rt(); | |
10436 | for (;;) { | |
10437 | - unsigned n = dir->i_dir_seq; | |
10438 | - if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) | |
10439 | + unsigned n = dir->__i_dir_seq; | |
10440 | + if (!(n & 1) && cmpxchg(&dir->__i_dir_seq, n, n + 1) == n) | |
10441 | return n; | |
10442 | cpu_relax(); | |
10443 | } | |
10444 | @@ -2449,26 +2462,30 @@ | |
10445 | ||
10446 | static inline void end_dir_add(struct inode *dir, unsigned n) | |
10447 | { | |
10448 | - smp_store_release(&dir->i_dir_seq, n + 2); | |
10449 | + smp_store_release(&dir->__i_dir_seq, n + 2); | |
10450 | + preempt_enable_rt(); | |
10451 | } | |
10452 | ||
10453 | static void d_wait_lookup(struct dentry *dentry) | |
10454 | { | |
10455 | - if (d_in_lookup(dentry)) { | |
10456 | - DECLARE_WAITQUEUE(wait, current); | |
10457 | - add_wait_queue(dentry->d_wait, &wait); | |
10458 | - do { | |
10459 | - set_current_state(TASK_UNINTERRUPTIBLE); | |
10460 | - spin_unlock(&dentry->d_lock); | |
10461 | - schedule(); | |
10462 | - spin_lock(&dentry->d_lock); | |
10463 | - } while (d_in_lookup(dentry)); | |
10464 | - } | |
10465 | + struct swait_queue __wait; | |
10466 | + | |
10467 | + if (!d_in_lookup(dentry)) | |
10468 | + return; | |
10469 | + | |
10470 | + INIT_LIST_HEAD(&__wait.task_list); | |
10471 | + do { | |
10472 | + prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE); | |
10473 | + spin_unlock(&dentry->d_lock); | |
10474 | + schedule(); | |
10475 | + spin_lock(&dentry->d_lock); | |
10476 | + } while (d_in_lookup(dentry)); | |
10477 | + finish_swait(dentry->d_wait, &__wait); | |
10478 | } | |
10479 | ||
10480 | struct dentry *d_alloc_parallel(struct dentry *parent, | |
10481 | const struct qstr *name, | |
10482 | - wait_queue_head_t *wq) | |
10483 | + struct swait_queue_head *wq) | |
10484 | { | |
10485 | unsigned int hash = name->hash; | |
10486 | struct hlist_bl_head *b = in_lookup_hash(parent, hash); | |
10487 | @@ -2482,7 +2499,7 @@ | |
10488 | ||
10489 | retry: | |
10490 | rcu_read_lock(); | |
10491 | - seq = smp_load_acquire(&parent->d_inode->i_dir_seq); | |
10492 | + seq = smp_load_acquire(&parent->d_inode->__i_dir_seq); | |
10493 | r_seq = read_seqbegin(&rename_lock); | |
10494 | dentry = __d_lookup_rcu(parent, name, &d_seq); | |
10495 | if (unlikely(dentry)) { | |
10496 | @@ -2510,7 +2527,7 @@ | |
10497 | } | |
10498 | ||
10499 | hlist_bl_lock(b); | |
10500 | - if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { | |
10501 | + if (unlikely(READ_ONCE(parent->d_inode->__i_dir_seq) != seq)) { | |
10502 | hlist_bl_unlock(b); | |
10503 | rcu_read_unlock(); | |
10504 | goto retry; | |
10505 | @@ -2583,7 +2600,7 @@ | |
10506 | hlist_bl_lock(b); | |
10507 | dentry->d_flags &= ~DCACHE_PAR_LOOKUP; | |
10508 | __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); | |
10509 | - wake_up_all(dentry->d_wait); | |
10510 | + swake_up_all(dentry->d_wait); | |
10511 | dentry->d_wait = NULL; | |
10512 | hlist_bl_unlock(b); | |
10513 | INIT_HLIST_NODE(&dentry->d_u.d_alias); | |
10514 | @@ -3619,6 +3636,8 @@ | |
10515 | ||
10516 | static void __init dcache_init_early(void) | |
10517 | { | |
10518 | + unsigned int loop; | |
10519 | + | |
10520 | /* If hashes are distributed across NUMA nodes, defer | |
10521 | * hash allocation until vmalloc space is available. | |
10522 | */ | |
10523 | @@ -3635,10 +3654,14 @@ | |
10524 | &d_hash_mask, | |
10525 | 0, | |
10526 | 0); | |
10527 | + | |
10528 | + for (loop = 0; loop < (1U << d_hash_shift); loop++) | |
10529 | + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); | |
10530 | } | |
10531 | ||
10532 | static void __init dcache_init(void) | |
10533 | { | |
10534 | + unsigned int loop; | |
10535 | /* | |
10536 | * A constructor could be added for stable state like the lists, | |
10537 | * but it is probably not worth it because of the cache nature | |
10538 | @@ -3661,6 +3684,10 @@ | |
10539 | &d_hash_mask, | |
10540 | 0, | |
10541 | 0); | |
10542 | + | |
10543 | + for (loop = 0; loop < (1U << d_hash_shift); loop++) | |
10544 | + INIT_HLIST_BL_HEAD(dentry_hashtable + loop); | |
10545 | + | |
10546 | } | |
10547 | ||
10548 | /* SLAB cache for __getname() consumers */ | |
10549 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/eventpoll.c linux-4.14/fs/eventpoll.c | |
10550 | --- linux-4.14.orig/fs/eventpoll.c 2017-11-12 19:46:13.000000000 +0100 | |
10551 | +++ linux-4.14/fs/eventpoll.c 2018-09-05 11:05:07.000000000 +0200 | |
10552 | @@ -587,12 +587,12 @@ | |
10553 | */ | |
10554 | static void ep_poll_safewake(wait_queue_head_t *wq) | |
10555 | { | |
10556 | - int this_cpu = get_cpu(); | |
10557 | + int this_cpu = get_cpu_light(); | |
10558 | ||
10559 | ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, | |
10560 | ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); | |
10561 | ||
10562 | - put_cpu(); | |
10563 | + put_cpu_light(); | |
10564 | } | |
10565 | ||
10566 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | |
10567 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/exec.c linux-4.14/fs/exec.c | |
10568 | --- linux-4.14.orig/fs/exec.c 2018-09-05 11:03:29.000000000 +0200 | |
10569 | +++ linux-4.14/fs/exec.c 2018-09-05 11:05:07.000000000 +0200 | |
10570 | @@ -1025,12 +1025,14 @@ | |
10571 | } | |
10572 | } | |
10573 | task_lock(tsk); | |
10574 | + preempt_disable_rt(); | |
10575 | active_mm = tsk->active_mm; | |
10576 | tsk->mm = mm; | |
10577 | tsk->active_mm = mm; | |
10578 | activate_mm(active_mm, mm); | |
10579 | tsk->mm->vmacache_seqnum = 0; | |
10580 | vmacache_flush(tsk); | |
10581 | + preempt_enable_rt(); | |
10582 | task_unlock(tsk); | |
10583 | if (old_mm) { | |
10584 | up_read(&old_mm->mmap_sem); | |
10585 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/ext4/page-io.c linux-4.14/fs/ext4/page-io.c | |
10586 | --- linux-4.14.orig/fs/ext4/page-io.c 2017-11-12 19:46:13.000000000 +0100 | |
10587 | +++ linux-4.14/fs/ext4/page-io.c 2018-09-05 11:05:07.000000000 +0200 | |
10588 | @@ -95,8 +95,7 @@ | |
10589 | * We check all buffers in the page under BH_Uptodate_Lock | |
10590 | * to avoid races with other end io clearing async_write flags | |
10591 | */ | |
10592 | - local_irq_save(flags); | |
10593 | - bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | |
10594 | + flags = bh_uptodate_lock_irqsave(head); | |
10595 | do { | |
10596 | if (bh_offset(bh) < bio_start || | |
10597 | bh_offset(bh) + bh->b_size > bio_end) { | |
10598 | @@ -108,8 +107,7 @@ | |
10599 | if (bio->bi_status) | |
10600 | buffer_io_error(bh); | |
10601 | } while ((bh = bh->b_this_page) != head); | |
10602 | - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | |
10603 | - local_irq_restore(flags); | |
10604 | + bh_uptodate_unlock_irqrestore(head, flags); | |
10605 | if (!under_io) { | |
10606 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | |
10607 | if (data_page) | |
10608 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/fuse/dir.c linux-4.14/fs/fuse/dir.c | |
10609 | --- linux-4.14.orig/fs/fuse/dir.c 2018-09-05 11:03:22.000000000 +0200 | |
10610 | +++ linux-4.14/fs/fuse/dir.c 2018-09-05 11:05:07.000000000 +0200 | |
10611 | @@ -1187,7 +1187,7 @@ | |
10612 | struct inode *dir = d_inode(parent); | |
10613 | struct fuse_conn *fc; | |
10614 | struct inode *inode; | |
10615 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
10616 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
10617 | ||
10618 | if (!o->nodeid) { | |
10619 | /* | |
10620 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/inode.c linux-4.14/fs/inode.c | |
10621 | --- linux-4.14.orig/fs/inode.c 2018-09-05 11:03:29.000000000 +0200 | |
10622 | +++ linux-4.14/fs/inode.c 2018-09-05 11:05:07.000000000 +0200 | |
10623 | @@ -154,7 +154,7 @@ | |
10624 | inode->i_bdev = NULL; | |
10625 | inode->i_cdev = NULL; | |
10626 | inode->i_link = NULL; | |
10627 | - inode->i_dir_seq = 0; | |
10628 | + inode->__i_dir_seq = 0; | |
10629 | inode->i_rdev = 0; | |
10630 | inode->dirtied_when = 0; | |
10631 | ||
10632 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/libfs.c linux-4.14/fs/libfs.c | |
10633 | --- linux-4.14.orig/fs/libfs.c 2017-11-12 19:46:13.000000000 +0100 | |
10634 | +++ linux-4.14/fs/libfs.c 2018-09-05 11:05:07.000000000 +0200 | |
10635 | @@ -90,7 +90,7 @@ | |
10636 | struct list_head *from, | |
10637 | int count) | |
10638 | { | |
10639 | - unsigned *seq = &parent->d_inode->i_dir_seq, n; | |
10640 | + unsigned *seq = &parent->d_inode->__i_dir_seq, n; | |
10641 | struct dentry *res; | |
10642 | struct list_head *p; | |
10643 | bool skipped; | |
10644 | @@ -123,8 +123,9 @@ | |
10645 | static void move_cursor(struct dentry *cursor, struct list_head *after) | |
10646 | { | |
10647 | struct dentry *parent = cursor->d_parent; | |
10648 | - unsigned n, *seq = &parent->d_inode->i_dir_seq; | |
10649 | + unsigned n, *seq = &parent->d_inode->__i_dir_seq; | |
10650 | spin_lock(&parent->d_lock); | |
10651 | + preempt_disable_rt(); | |
10652 | for (;;) { | |
10653 | n = *seq; | |
10654 | if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) | |
10655 | @@ -137,6 +138,7 @@ | |
10656 | else | |
10657 | list_add_tail(&cursor->d_child, &parent->d_subdirs); | |
10658 | smp_store_release(seq, n + 2); | |
10659 | + preempt_enable_rt(); | |
10660 | spin_unlock(&parent->d_lock); | |
10661 | } | |
10662 | ||
10663 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/locks.c linux-4.14/fs/locks.c | |
10664 | --- linux-4.14.orig/fs/locks.c 2017-11-12 19:46:13.000000000 +0100 | |
10665 | +++ linux-4.14/fs/locks.c 2018-09-05 11:05:07.000000000 +0200 | |
10666 | @@ -945,7 +945,7 @@ | |
10667 | return -ENOMEM; | |
10668 | } | |
10669 | ||
10670 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10671 | + percpu_down_read(&file_rwsem); | |
10672 | spin_lock(&ctx->flc_lock); | |
10673 | if (request->fl_flags & FL_ACCESS) | |
10674 | goto find_conflict; | |
10675 | @@ -986,7 +986,7 @@ | |
10676 | ||
10677 | out: | |
10678 | spin_unlock(&ctx->flc_lock); | |
10679 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10680 | + percpu_up_read(&file_rwsem); | |
10681 | if (new_fl) | |
10682 | locks_free_lock(new_fl); | |
10683 | locks_dispose_list(&dispose); | |
10684 | @@ -1023,7 +1023,7 @@ | |
10685 | new_fl2 = locks_alloc_lock(); | |
10686 | } | |
10687 | ||
10688 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10689 | + percpu_down_read(&file_rwsem); | |
10690 | spin_lock(&ctx->flc_lock); | |
10691 | /* | |
10692 | * New lock request. Walk all POSIX locks and look for conflicts. If | |
10693 | @@ -1195,7 +1195,7 @@ | |
10694 | } | |
10695 | out: | |
10696 | spin_unlock(&ctx->flc_lock); | |
10697 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10698 | + percpu_up_read(&file_rwsem); | |
10699 | /* | |
10700 | * Free any unused locks. | |
10701 | */ | |
10702 | @@ -1470,7 +1470,7 @@ | |
10703 | return error; | |
10704 | } | |
10705 | ||
10706 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10707 | + percpu_down_read(&file_rwsem); | |
10708 | spin_lock(&ctx->flc_lock); | |
10709 | ||
10710 | time_out_leases(inode, &dispose); | |
10711 | @@ -1522,13 +1522,13 @@ | |
10712 | locks_insert_block(fl, new_fl); | |
10713 | trace_break_lease_block(inode, new_fl); | |
10714 | spin_unlock(&ctx->flc_lock); | |
10715 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10716 | + percpu_up_read(&file_rwsem); | |
10717 | ||
10718 | locks_dispose_list(&dispose); | |
10719 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | |
10720 | !new_fl->fl_next, break_time); | |
10721 | ||
10722 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10723 | + percpu_down_read(&file_rwsem); | |
10724 | spin_lock(&ctx->flc_lock); | |
10725 | trace_break_lease_unblock(inode, new_fl); | |
10726 | locks_delete_block(new_fl); | |
10727 | @@ -1545,7 +1545,7 @@ | |
10728 | } | |
10729 | out: | |
10730 | spin_unlock(&ctx->flc_lock); | |
10731 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10732 | + percpu_up_read(&file_rwsem); | |
10733 | locks_dispose_list(&dispose); | |
10734 | locks_free_lock(new_fl); | |
10735 | return error; | |
10736 | @@ -1619,7 +1619,7 @@ | |
10737 | ||
10738 | ctx = smp_load_acquire(&inode->i_flctx); | |
10739 | if (ctx && !list_empty_careful(&ctx->flc_lease)) { | |
10740 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10741 | + percpu_down_read(&file_rwsem); | |
10742 | spin_lock(&ctx->flc_lock); | |
10743 | time_out_leases(inode, &dispose); | |
10744 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
10745 | @@ -1629,7 +1629,7 @@ | |
10746 | break; | |
10747 | } | |
10748 | spin_unlock(&ctx->flc_lock); | |
10749 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10750 | + percpu_up_read(&file_rwsem); | |
10751 | ||
10752 | locks_dispose_list(&dispose); | |
10753 | } | |
10754 | @@ -1704,7 +1704,7 @@ | |
10755 | return -EINVAL; | |
10756 | } | |
10757 | ||
10758 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10759 | + percpu_down_read(&file_rwsem); | |
10760 | spin_lock(&ctx->flc_lock); | |
10761 | time_out_leases(inode, &dispose); | |
10762 | error = check_conflicting_open(dentry, arg, lease->fl_flags); | |
10763 | @@ -1775,7 +1775,7 @@ | |
10764 | lease->fl_lmops->lm_setup(lease, priv); | |
10765 | out: | |
10766 | spin_unlock(&ctx->flc_lock); | |
10767 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10768 | + percpu_up_read(&file_rwsem); | |
10769 | locks_dispose_list(&dispose); | |
10770 | if (is_deleg) | |
10771 | inode_unlock(inode); | |
10772 | @@ -1798,7 +1798,7 @@ | |
10773 | return error; | |
10774 | } | |
10775 | ||
10776 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10777 | + percpu_down_read(&file_rwsem); | |
10778 | spin_lock(&ctx->flc_lock); | |
10779 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
10780 | if (fl->fl_file == filp && | |
10781 | @@ -1811,7 +1811,7 @@ | |
10782 | if (victim) | |
10783 | error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); | |
10784 | spin_unlock(&ctx->flc_lock); | |
10785 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10786 | + percpu_up_read(&file_rwsem); | |
10787 | locks_dispose_list(&dispose); | |
10788 | return error; | |
10789 | } | |
10790 | @@ -2535,13 +2535,13 @@ | |
10791 | if (list_empty(&ctx->flc_lease)) | |
10792 | return; | |
10793 | ||
10794 | - percpu_down_read_preempt_disable(&file_rwsem); | |
10795 | + percpu_down_read(&file_rwsem); | |
10796 | spin_lock(&ctx->flc_lock); | |
10797 | list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) | |
10798 | if (filp == fl->fl_file) | |
10799 | lease_modify(fl, F_UNLCK, &dispose); | |
10800 | spin_unlock(&ctx->flc_lock); | |
10801 | - percpu_up_read_preempt_enable(&file_rwsem); | |
10802 | + percpu_up_read(&file_rwsem); | |
10803 | ||
10804 | locks_dispose_list(&dispose); | |
10805 | } | |
10806 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/namei.c linux-4.14/fs/namei.c | |
10807 | --- linux-4.14.orig/fs/namei.c 2018-09-05 11:03:22.000000000 +0200 | |
10808 | +++ linux-4.14/fs/namei.c 2018-09-05 11:05:07.000000000 +0200 | |
10809 | @@ -1627,7 +1627,7 @@ | |
10810 | { | |
10811 | struct dentry *dentry = ERR_PTR(-ENOENT), *old; | |
10812 | struct inode *inode = dir->d_inode; | |
10813 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
10814 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
10815 | ||
10816 | inode_lock_shared(inode); | |
10817 | /* Don't go there if it's already dead */ | |
10818 | @@ -3100,7 +3100,7 @@ | |
10819 | struct dentry *dentry; | |
10820 | int error, create_error = 0; | |
10821 | umode_t mode = op->mode; | |
10822 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
10823 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
10824 | ||
10825 | if (unlikely(IS_DEADDIR(dir_inode))) | |
10826 | return -ENOENT; | |
10827 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/namespace.c linux-4.14/fs/namespace.c | |
10828 | --- linux-4.14.orig/fs/namespace.c 2018-09-05 11:03:29.000000000 +0200 | |
10829 | +++ linux-4.14/fs/namespace.c 2018-09-05 11:05:07.000000000 +0200 | |
10830 | @@ -14,6 +14,7 @@ | |
10831 | #include <linux/mnt_namespace.h> | |
10832 | #include <linux/user_namespace.h> | |
10833 | #include <linux/namei.h> | |
10834 | +#include <linux/delay.h> | |
10835 | #include <linux/security.h> | |
10836 | #include <linux/cred.h> | |
10837 | #include <linux/idr.h> | |
10838 | @@ -353,8 +354,11 @@ | |
10839 | * incremented count after it has set MNT_WRITE_HOLD. | |
10840 | */ | |
10841 | smp_mb(); | |
10842 | - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) | |
10843 | - cpu_relax(); | |
10844 | + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { | |
10845 | + preempt_enable(); | |
10846 | + cpu_chill(); | |
10847 | + preempt_disable(); | |
10848 | + } | |
10849 | /* | |
10850 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | |
10851 | * be set to match its requirements. So we must not load that until | |
10852 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/nfs/delegation.c linux-4.14/fs/nfs/delegation.c | |
10853 | --- linux-4.14.orig/fs/nfs/delegation.c 2017-11-12 19:46:13.000000000 +0100 | |
10854 | +++ linux-4.14/fs/nfs/delegation.c 2018-09-05 11:05:07.000000000 +0200 | |
10855 | @@ -150,11 +150,11 @@ | |
10856 | sp = state->owner; | |
10857 | /* Block nfs4_proc_unlck */ | |
10858 | mutex_lock(&sp->so_delegreturn_mutex); | |
10859 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
10860 | + seq = read_seqbegin(&sp->so_reclaim_seqlock); | |
10861 | err = nfs4_open_delegation_recall(ctx, state, stateid, type); | |
10862 | if (!err) | |
10863 | err = nfs_delegation_claim_locks(ctx, state, stateid); | |
10864 | - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
10865 | + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
10866 | err = -EAGAIN; | |
10867 | mutex_unlock(&sp->so_delegreturn_mutex); | |
10868 | put_nfs_open_context(ctx); | |
10869 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/nfs/dir.c linux-4.14/fs/nfs/dir.c | |
10870 | --- linux-4.14.orig/fs/nfs/dir.c 2018-09-05 11:03:22.000000000 +0200 | |
10871 | +++ linux-4.14/fs/nfs/dir.c 2018-09-05 11:05:07.000000000 +0200 | |
10872 | @@ -452,7 +452,7 @@ | |
10873 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |
10874 | { | |
10875 | struct qstr filename = QSTR_INIT(entry->name, entry->len); | |
10876 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
10877 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
10878 | struct dentry *dentry; | |
10879 | struct dentry *alias; | |
10880 | struct inode *dir = d_inode(parent); | |
10881 | @@ -1443,7 +1443,7 @@ | |
10882 | struct file *file, unsigned open_flags, | |
10883 | umode_t mode, int *opened) | |
10884 | { | |
10885 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
10886 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
10887 | struct nfs_open_context *ctx; | |
10888 | struct dentry *res; | |
10889 | struct iattr attr = { .ia_valid = ATTR_OPEN }; | |
10890 | @@ -1763,7 +1763,11 @@ | |
10891 | ||
10892 | trace_nfs_rmdir_enter(dir, dentry); | |
10893 | if (d_really_is_positive(dentry)) { | |
10894 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10895 | + down(&NFS_I(d_inode(dentry))->rmdir_sem); | |
10896 | +#else | |
10897 | down_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
10898 | +#endif | |
10899 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
10900 | /* Ensure the VFS deletes this inode */ | |
10901 | switch (error) { | |
10902 | @@ -1773,7 +1777,11 @@ | |
10903 | case -ENOENT: | |
10904 | nfs_dentry_handle_enoent(dentry); | |
10905 | } | |
10906 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10907 | + up(&NFS_I(d_inode(dentry))->rmdir_sem); | |
10908 | +#else | |
10909 | up_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
10910 | +#endif | |
10911 | } else | |
10912 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
10913 | trace_nfs_rmdir_exit(dir, dentry, error); | |
10914 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/nfs/inode.c linux-4.14/fs/nfs/inode.c | |
10915 | --- linux-4.14.orig/fs/nfs/inode.c 2017-11-12 19:46:13.000000000 +0100 | |
10916 | +++ linux-4.14/fs/nfs/inode.c 2018-09-05 11:05:07.000000000 +0200 | |
10917 | @@ -2014,7 +2014,11 @@ | |
10918 | atomic_long_set(&nfsi->nrequests, 0); | |
10919 | atomic_long_set(&nfsi->commit_info.ncommit, 0); | |
10920 | atomic_set(&nfsi->commit_info.rpcs_out, 0); | |
10921 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10922 | + sema_init(&nfsi->rmdir_sem, 1); | |
10923 | +#else | |
10924 | init_rwsem(&nfsi->rmdir_sem); | |
10925 | +#endif | |
10926 | mutex_init(&nfsi->commit_mutex); | |
10927 | nfs4_init_once(nfsi); | |
10928 | } | |
10929 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/nfs/nfs4_fs.h linux-4.14/fs/nfs/nfs4_fs.h | |
10930 | --- linux-4.14.orig/fs/nfs/nfs4_fs.h 2018-09-05 11:03:22.000000000 +0200 | |
10931 | +++ linux-4.14/fs/nfs/nfs4_fs.h 2018-09-05 11:05:07.000000000 +0200 | |
10932 | @@ -112,7 +112,7 @@ | |
10933 | unsigned long so_flags; | |
10934 | struct list_head so_states; | |
10935 | struct nfs_seqid_counter so_seqid; | |
10936 | - seqcount_t so_reclaim_seqcount; | |
10937 | + seqlock_t so_reclaim_seqlock; | |
10938 | struct mutex so_delegreturn_mutex; | |
10939 | }; | |
10940 | ||
10941 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/nfs/nfs4proc.c linux-4.14/fs/nfs/nfs4proc.c | |
10942 | --- linux-4.14.orig/fs/nfs/nfs4proc.c 2018-09-05 11:03:22.000000000 +0200 | |
10943 | +++ linux-4.14/fs/nfs/nfs4proc.c 2018-09-05 11:05:07.000000000 +0200 | |
10944 | @@ -2689,7 +2689,7 @@ | |
10945 | unsigned int seq; | |
10946 | int ret; | |
10947 | ||
10948 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
10949 | + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
10950 | ||
10951 | ret = _nfs4_proc_open(opendata); | |
10952 | if (ret != 0) | |
10953 | @@ -2727,7 +2727,7 @@ | |
10954 | ||
10955 | if (d_inode(dentry) == state->inode) { | |
10956 | nfs_inode_attach_open_context(ctx); | |
10957 | - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
10958 | + if (read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
10959 | nfs4_schedule_stateid_recovery(server, state); | |
10960 | } | |
10961 | out: | |
10962 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/nfs/nfs4state.c linux-4.14/fs/nfs/nfs4state.c | |
10963 | --- linux-4.14.orig/fs/nfs/nfs4state.c 2018-09-05 11:03:22.000000000 +0200 | |
10964 | +++ linux-4.14/fs/nfs/nfs4state.c 2018-09-05 11:05:07.000000000 +0200 | |
10965 | @@ -494,7 +494,7 @@ | |
10966 | nfs4_init_seqid_counter(&sp->so_seqid); | |
10967 | atomic_set(&sp->so_count, 1); | |
10968 | INIT_LIST_HEAD(&sp->so_lru); | |
10969 | - seqcount_init(&sp->so_reclaim_seqcount); | |
10970 | + seqlock_init(&sp->so_reclaim_seqlock); | |
10971 | mutex_init(&sp->so_delegreturn_mutex); | |
10972 | return sp; | |
10973 | } | |
10974 | @@ -1519,8 +1519,12 @@ | |
10975 | * recovering after a network partition or a reboot from a | |
10976 | * server that doesn't support a grace period. | |
10977 | */ | |
10978 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10979 | + write_seqlock(&sp->so_reclaim_seqlock); | |
10980 | +#else | |
10981 | + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
10982 | +#endif | |
10983 | spin_lock(&sp->so_lock); | |
10984 | - raw_write_seqcount_begin(&sp->so_reclaim_seqcount); | |
10985 | restart: | |
10986 | list_for_each_entry(state, &sp->so_states, open_states) { | |
10987 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) | |
10988 | @@ -1589,14 +1593,20 @@ | |
10989 | spin_lock(&sp->so_lock); | |
10990 | goto restart; | |
10991 | } | |
10992 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
10993 | spin_unlock(&sp->so_lock); | |
10994 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10995 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
10996 | +#else | |
10997 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
10998 | +#endif | |
10999 | return 0; | |
11000 | out_err: | |
11001 | nfs4_put_open_state(state); | |
11002 | - spin_lock(&sp->so_lock); | |
11003 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
11004 | - spin_unlock(&sp->so_lock); | |
11005 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11006 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
11007 | +#else | |
11008 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
11009 | +#endif | |
11010 | return status; | |
11011 | } | |
11012 | ||
11013 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/nfs/unlink.c linux-4.14/fs/nfs/unlink.c | |
11014 | --- linux-4.14.orig/fs/nfs/unlink.c 2017-11-12 19:46:13.000000000 +0100 | |
11015 | +++ linux-4.14/fs/nfs/unlink.c 2018-09-05 11:05:07.000000000 +0200 | |
11016 | @@ -13,7 +13,7 @@ | |
11017 | #include <linux/sunrpc/clnt.h> | |
11018 | #include <linux/nfs_fs.h> | |
11019 | #include <linux/sched.h> | |
11020 | -#include <linux/wait.h> | |
11021 | +#include <linux/swait.h> | |
11022 | #include <linux/namei.h> | |
11023 | #include <linux/fsnotify.h> | |
11024 | ||
11025 | @@ -52,6 +52,29 @@ | |
11026 | rpc_restart_call_prepare(task); | |
11027 | } | |
11028 | ||
11029 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11030 | +static void nfs_down_anon(struct semaphore *sema) | |
11031 | +{ | |
11032 | + down(sema); | |
11033 | +} | |
11034 | + | |
11035 | +static void nfs_up_anon(struct semaphore *sema) | |
11036 | +{ | |
11037 | + up(sema); | |
11038 | +} | |
11039 | + | |
11040 | +#else | |
11041 | +static void nfs_down_anon(struct rw_semaphore *rwsem) | |
11042 | +{ | |
11043 | + down_read_non_owner(rwsem); | |
11044 | +} | |
11045 | + | |
11046 | +static void nfs_up_anon(struct rw_semaphore *rwsem) | |
11047 | +{ | |
11048 | + up_read_non_owner(rwsem); | |
11049 | +} | |
11050 | +#endif | |
11051 | + | |
11052 | /** | |
11053 | * nfs_async_unlink_release - Release the sillydelete data. | |
11054 | * @task: rpc_task of the sillydelete | |
11055 | @@ -65,7 +88,7 @@ | |
11056 | struct dentry *dentry = data->dentry; | |
11057 | struct super_block *sb = dentry->d_sb; | |
11058 | ||
11059 | - up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
11060 | + nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
11061 | d_lookup_done(dentry); | |
11062 | nfs_free_unlinkdata(data); | |
11063 | dput(dentry); | |
11064 | @@ -118,10 +141,10 @@ | |
11065 | struct inode *dir = d_inode(dentry->d_parent); | |
11066 | struct dentry *alias; | |
11067 | ||
11068 | - down_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
11069 | + nfs_down_anon(&NFS_I(dir)->rmdir_sem); | |
11070 | alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); | |
11071 | if (IS_ERR(alias)) { | |
11072 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
11073 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
11074 | return 0; | |
11075 | } | |
11076 | if (!d_in_lookup(alias)) { | |
11077 | @@ -143,7 +166,7 @@ | |
11078 | ret = 0; | |
11079 | spin_unlock(&alias->d_lock); | |
11080 | dput(alias); | |
11081 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
11082 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
11083 | /* | |
11084 | * If we'd displaced old cached devname, free it. At that | |
11085 | * point dentry is definitely not a root, so we won't need | |
11086 | @@ -183,7 +206,7 @@ | |
11087 | goto out_free_name; | |
11088 | } | |
11089 | data->res.dir_attr = &data->dir_attr; | |
11090 | - init_waitqueue_head(&data->wq); | |
11091 | + init_swait_queue_head(&data->wq); | |
11092 | ||
11093 | status = -EBUSY; | |
11094 | spin_lock(&dentry->d_lock); | |
11095 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/ntfs/aops.c linux-4.14/fs/ntfs/aops.c | |
11096 | --- linux-4.14.orig/fs/ntfs/aops.c 2017-11-12 19:46:13.000000000 +0100 | |
11097 | +++ linux-4.14/fs/ntfs/aops.c 2018-09-05 11:05:07.000000000 +0200 | |
11098 | @@ -93,13 +93,13 @@ | |
11099 | ofs = 0; | |
11100 | if (file_ofs < init_size) | |
11101 | ofs = init_size - file_ofs; | |
11102 | - local_irq_save(flags); | |
11103 | + local_irq_save_nort(flags); | |
11104 | kaddr = kmap_atomic(page); | |
11105 | memset(kaddr + bh_offset(bh) + ofs, 0, | |
11106 | bh->b_size - ofs); | |
11107 | flush_dcache_page(page); | |
11108 | kunmap_atomic(kaddr); | |
11109 | - local_irq_restore(flags); | |
11110 | + local_irq_restore_nort(flags); | |
11111 | } | |
11112 | } else { | |
11113 | clear_buffer_uptodate(bh); | |
11114 | @@ -108,8 +108,7 @@ | |
11115 | "0x%llx.", (unsigned long long)bh->b_blocknr); | |
11116 | } | |
11117 | first = page_buffers(page); | |
11118 | - local_irq_save(flags); | |
11119 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
11120 | + flags = bh_uptodate_lock_irqsave(first); | |
11121 | clear_buffer_async_read(bh); | |
11122 | unlock_buffer(bh); | |
11123 | tmp = bh; | |
11124 | @@ -124,8 +123,7 @@ | |
11125 | } | |
11126 | tmp = tmp->b_this_page; | |
11127 | } while (tmp != bh); | |
11128 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11129 | - local_irq_restore(flags); | |
11130 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11131 | /* | |
11132 | * If none of the buffers had errors then we can set the page uptodate, | |
11133 | * but we first have to perform the post read mst fixups, if the | |
11134 | @@ -146,13 +144,13 @@ | |
11135 | recs = PAGE_SIZE / rec_size; | |
11136 | /* Should have been verified before we got here... */ | |
11137 | BUG_ON(!recs); | |
11138 | - local_irq_save(flags); | |
11139 | + local_irq_save_nort(flags); | |
11140 | kaddr = kmap_atomic(page); | |
11141 | for (i = 0; i < recs; i++) | |
11142 | post_read_mst_fixup((NTFS_RECORD*)(kaddr + | |
11143 | i * rec_size), rec_size); | |
11144 | kunmap_atomic(kaddr); | |
11145 | - local_irq_restore(flags); | |
11146 | + local_irq_restore_nort(flags); | |
11147 | flush_dcache_page(page); | |
11148 | if (likely(page_uptodate && !PageError(page))) | |
11149 | SetPageUptodate(page); | |
11150 | @@ -160,9 +158,7 @@ | |
11151 | unlock_page(page); | |
11152 | return; | |
11153 | still_busy: | |
11154 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11155 | - local_irq_restore(flags); | |
11156 | - return; | |
11157 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11158 | } | |
11159 | ||
11160 | /** | |
11161 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/proc/array.c linux-4.14/fs/proc/array.c | |
11162 | --- linux-4.14.orig/fs/proc/array.c 2018-09-05 11:03:22.000000000 +0200 | |
11163 | +++ linux-4.14/fs/proc/array.c 2018-09-05 11:05:07.000000000 +0200 | |
11164 | @@ -386,9 +386,9 @@ | |
11165 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) | |
11166 | { | |
11167 | seq_printf(m, "Cpus_allowed:\t%*pb\n", | |
11168 | - cpumask_pr_args(&task->cpus_allowed)); | |
11169 | + cpumask_pr_args(task->cpus_ptr)); | |
11170 | seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", | |
11171 | - cpumask_pr_args(&task->cpus_allowed)); | |
11172 | + cpumask_pr_args(task->cpus_ptr)); | |
11173 | } | |
11174 | ||
11175 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |
11176 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/proc/base.c linux-4.14/fs/proc/base.c | |
11177 | --- linux-4.14.orig/fs/proc/base.c 2018-09-05 11:03:28.000000000 +0200 | |
11178 | +++ linux-4.14/fs/proc/base.c 2018-09-05 11:05:07.000000000 +0200 | |
11179 | @@ -1886,7 +1886,7 @@ | |
11180 | ||
11181 | child = d_hash_and_lookup(dir, &qname); | |
11182 | if (!child) { | |
11183 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
11184 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
11185 | child = d_alloc_parallel(dir, &qname, &wq); | |
11186 | if (IS_ERR(child)) | |
11187 | goto end_instantiate; | |
11188 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/proc/proc_sysctl.c linux-4.14/fs/proc/proc_sysctl.c | |
11189 | --- linux-4.14.orig/fs/proc/proc_sysctl.c 2018-09-05 11:03:22.000000000 +0200 | |
11190 | +++ linux-4.14/fs/proc/proc_sysctl.c 2018-09-05 11:05:07.000000000 +0200 | |
11191 | @@ -679,7 +679,7 @@ | |
11192 | ||
11193 | child = d_lookup(dir, &qname); | |
11194 | if (!child) { | |
11195 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
11196 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
11197 | child = d_alloc_parallel(dir, &qname, &wq); | |
11198 | if (IS_ERR(child)) | |
11199 | return false; | |
11200 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/timerfd.c linux-4.14/fs/timerfd.c | |
11201 | --- linux-4.14.orig/fs/timerfd.c 2017-11-12 19:46:13.000000000 +0100 | |
11202 | +++ linux-4.14/fs/timerfd.c 2018-09-05 11:05:07.000000000 +0200 | |
11203 | @@ -471,7 +471,10 @@ | |
11204 | break; | |
11205 | } | |
11206 | spin_unlock_irq(&ctx->wqh.lock); | |
11207 | - cpu_relax(); | |
11208 | + if (isalarm(ctx)) | |
11209 | + hrtimer_wait_for_timer(&ctx->t.alarm.timer); | |
11210 | + else | |
11211 | + hrtimer_wait_for_timer(&ctx->t.tmr); | |
11212 | } | |
11213 | ||
11214 | /* | |
11215 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/fs/xfs/xfs_aops.c linux-4.14/fs/xfs/xfs_aops.c | |
11216 | --- linux-4.14.orig/fs/xfs/xfs_aops.c 2018-09-05 11:03:22.000000000 +0200 | |
11217 | +++ linux-4.14/fs/xfs/xfs_aops.c 2018-09-05 11:05:07.000000000 +0200 | |
11218 | @@ -120,8 +120,7 @@ | |
11219 | ASSERT(bvec->bv_offset + bvec->bv_len <= PAGE_SIZE); | |
11220 | ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); | |
11221 | ||
11222 | - local_irq_save(flags); | |
11223 | - bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | |
11224 | + flags = bh_uptodate_lock_irqsave(head); | |
11225 | do { | |
11226 | if (off >= bvec->bv_offset && | |
11227 | off < bvec->bv_offset + bvec->bv_len) { | |
11228 | @@ -143,8 +142,7 @@ | |
11229 | } | |
11230 | off += bh->b_size; | |
11231 | } while ((bh = bh->b_this_page) != head); | |
11232 | - bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | |
11233 | - local_irq_restore(flags); | |
11234 | + bh_uptodate_unlock_irqrestore(head, flags); | |
11235 | ||
11236 | if (!busy) | |
11237 | end_page_writeback(bvec->bv_page); | |
11238 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/acpi/platform/aclinux.h linux-4.14/include/acpi/platform/aclinux.h | |
11239 | --- linux-4.14.orig/include/acpi/platform/aclinux.h 2017-11-12 19:46:13.000000000 +0100 | |
11240 | +++ linux-4.14/include/acpi/platform/aclinux.h 2018-09-05 11:05:07.000000000 +0200 | |
11241 | @@ -134,6 +134,7 @@ | |
11242 | ||
11243 | #define acpi_cache_t struct kmem_cache | |
11244 | #define acpi_spinlock spinlock_t * | |
11245 | +#define acpi_raw_spinlock raw_spinlock_t * | |
11246 | #define acpi_cpu_flags unsigned long | |
11247 | ||
11248 | /* Use native linux version of acpi_os_allocate_zeroed */ | |
11249 | @@ -152,6 +153,20 @@ | |
11250 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id | |
11251 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock | |
11252 | ||
11253 | +#define acpi_os_create_raw_lock(__handle) \ | |
11254 | +({ \ | |
11255 | + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ | |
11256 | + \ | |
11257 | + if (lock) { \ | |
11258 | + *(__handle) = lock; \ | |
11259 | + raw_spin_lock_init(*(__handle)); \ | |
11260 | + } \ | |
11261 | + lock ? AE_OK : AE_NO_MEMORY; \ | |
11262 | + }) | |
11263 | + | |
11264 | +#define acpi_os_delete_raw_lock(__handle) kfree(__handle) | |
11265 | + | |
11266 | + | |
11267 | /* | |
11268 | * OSL interfaces used by debugger/disassembler | |
11269 | */ | |
11270 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/asm-generic/bug.h linux-4.14/include/asm-generic/bug.h | |
11271 | --- linux-4.14.orig/include/asm-generic/bug.h 2018-09-05 11:03:22.000000000 +0200 | |
11272 | +++ linux-4.14/include/asm-generic/bug.h 2018-09-05 11:05:07.000000000 +0200 | |
11273 | @@ -234,6 +234,20 @@ | |
11274 | # define WARN_ON_SMP(x) ({0;}) | |
11275 | #endif | |
11276 | ||
11277 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11278 | +# define BUG_ON_RT(c) BUG_ON(c) | |
11279 | +# define BUG_ON_NONRT(c) do { } while (0) | |
11280 | +# define WARN_ON_RT(condition) WARN_ON(condition) | |
11281 | +# define WARN_ON_NONRT(condition) do { } while (0) | |
11282 | +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) | |
11283 | +#else | |
11284 | +# define BUG_ON_RT(c) do { } while (0) | |
11285 | +# define BUG_ON_NONRT(c) BUG_ON(c) | |
11286 | +# define WARN_ON_RT(condition) do { } while (0) | |
11287 | +# define WARN_ON_NONRT(condition) WARN_ON(condition) | |
11288 | +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) | |
11289 | +#endif | |
11290 | + | |
11291 | #endif /* __ASSEMBLY__ */ | |
11292 | ||
11293 | #endif | |
11294 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/blkdev.h linux-4.14/include/linux/blkdev.h | |
11295 | --- linux-4.14.orig/include/linux/blkdev.h 2018-09-05 11:03:22.000000000 +0200 | |
11296 | +++ linux-4.14/include/linux/blkdev.h 2018-09-05 11:05:07.000000000 +0200 | |
11297 | @@ -27,6 +27,7 @@ | |
11298 | #include <linux/percpu-refcount.h> | |
11299 | #include <linux/scatterlist.h> | |
11300 | #include <linux/blkzoned.h> | |
11301 | +#include <linux/swork.h> | |
11302 | ||
11303 | struct module; | |
11304 | struct scsi_ioctl_command; | |
11305 | @@ -134,6 +135,9 @@ | |
11306 | */ | |
11307 | struct request { | |
11308 | struct list_head queuelist; | |
11309 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11310 | + struct work_struct work; | |
11311 | +#endif | |
11312 | union { | |
11313 | struct __call_single_data csd; | |
11314 | u64 fifo_time; | |
11315 | @@ -596,6 +600,7 @@ | |
11316 | #endif | |
11317 | struct rcu_head rcu_head; | |
11318 | wait_queue_head_t mq_freeze_wq; | |
11319 | + struct swork_event mq_pcpu_wake; | |
11320 | struct percpu_ref q_usage_counter; | |
11321 | struct list_head all_q_node; | |
11322 | ||
11323 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/blk-mq.h linux-4.14/include/linux/blk-mq.h | |
11324 | --- linux-4.14.orig/include/linux/blk-mq.h 2017-11-12 19:46:13.000000000 +0100 | |
11325 | +++ linux-4.14/include/linux/blk-mq.h 2018-09-05 11:05:07.000000000 +0200 | |
11326 | @@ -226,7 +226,7 @@ | |
11327 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; | |
11328 | } | |
11329 | ||
11330 | - | |
11331 | +void __blk_mq_complete_request_remote_work(struct work_struct *work); | |
11332 | int blk_mq_request_started(struct request *rq); | |
11333 | void blk_mq_start_request(struct request *rq); | |
11334 | void blk_mq_end_request(struct request *rq, blk_status_t error); | |
11335 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/bottom_half.h linux-4.14/include/linux/bottom_half.h | |
11336 | --- linux-4.14.orig/include/linux/bottom_half.h 2017-11-12 19:46:13.000000000 +0100 | |
11337 | +++ linux-4.14/include/linux/bottom_half.h 2018-09-05 11:05:07.000000000 +0200 | |
11338 | @@ -4,6 +4,39 @@ | |
11339 | ||
11340 | #include <linux/preempt.h> | |
11341 | ||
11342 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11343 | + | |
11344 | +extern void __local_bh_disable(void); | |
11345 | +extern void _local_bh_enable(void); | |
11346 | +extern void __local_bh_enable(void); | |
11347 | + | |
11348 | +static inline void local_bh_disable(void) | |
11349 | +{ | |
11350 | + __local_bh_disable(); | |
11351 | +} | |
11352 | + | |
11353 | +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |
11354 | +{ | |
11355 | + __local_bh_disable(); | |
11356 | +} | |
11357 | + | |
11358 | +static inline void local_bh_enable(void) | |
11359 | +{ | |
11360 | + __local_bh_enable(); | |
11361 | +} | |
11362 | + | |
11363 | +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) | |
11364 | +{ | |
11365 | + __local_bh_enable(); | |
11366 | +} | |
11367 | + | |
11368 | +static inline void local_bh_enable_ip(unsigned long ip) | |
11369 | +{ | |
11370 | + __local_bh_enable(); | |
11371 | +} | |
11372 | + | |
11373 | +#else | |
11374 | + | |
11375 | #ifdef CONFIG_TRACE_IRQFLAGS | |
11376 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); | |
11377 | #else | |
11378 | @@ -31,5 +64,6 @@ | |
11379 | { | |
11380 | __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); | |
11381 | } | |
11382 | +#endif | |
11383 | ||
11384 | #endif /* _LINUX_BH_H */ | |
11385 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/buffer_head.h linux-4.14/include/linux/buffer_head.h | |
11386 | --- linux-4.14.orig/include/linux/buffer_head.h 2017-11-12 19:46:13.000000000 +0100 | |
11387 | +++ linux-4.14/include/linux/buffer_head.h 2018-09-05 11:05:07.000000000 +0200 | |
11388 | @@ -76,8 +76,50 @@ | |
11389 | struct address_space *b_assoc_map; /* mapping this buffer is | |
11390 | associated with */ | |
11391 | atomic_t b_count; /* users using this buffer_head */ | |
11392 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11393 | + spinlock_t b_uptodate_lock; | |
11394 | +#if IS_ENABLED(CONFIG_JBD2) | |
11395 | + spinlock_t b_state_lock; | |
11396 | + spinlock_t b_journal_head_lock; | |
11397 | +#endif | |
11398 | +#endif | |
11399 | }; | |
11400 | ||
11401 | +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) | |
11402 | +{ | |
11403 | + unsigned long flags; | |
11404 | + | |
11405 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11406 | + local_irq_save(flags); | |
11407 | + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); | |
11408 | +#else | |
11409 | + spin_lock_irqsave(&bh->b_uptodate_lock, flags); | |
11410 | +#endif | |
11411 | + return flags; | |
11412 | +} | |
11413 | + | |
11414 | +static inline void | |
11415 | +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) | |
11416 | +{ | |
11417 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11418 | + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); | |
11419 | + local_irq_restore(flags); | |
11420 | +#else | |
11421 | + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); | |
11422 | +#endif | |
11423 | +} | |
11424 | + | |
11425 | +static inline void buffer_head_init_locks(struct buffer_head *bh) | |
11426 | +{ | |
11427 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11428 | + spin_lock_init(&bh->b_uptodate_lock); | |
11429 | +#if IS_ENABLED(CONFIG_JBD2) | |
11430 | + spin_lock_init(&bh->b_state_lock); | |
11431 | + spin_lock_init(&bh->b_journal_head_lock); | |
11432 | +#endif | |
11433 | +#endif | |
11434 | +} | |
11435 | + | |
11436 | /* | |
11437 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() | |
11438 | * and buffer_foo() functions. | |
11439 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/cgroup-defs.h linux-4.14/include/linux/cgroup-defs.h | |
11440 | --- linux-4.14.orig/include/linux/cgroup-defs.h 2018-09-05 11:03:22.000000000 +0200 | |
11441 | +++ linux-4.14/include/linux/cgroup-defs.h 2018-09-05 11:05:07.000000000 +0200 | |
11442 | @@ -19,6 +19,7 @@ | |
11443 | #include <linux/percpu-rwsem.h> | |
11444 | #include <linux/workqueue.h> | |
11445 | #include <linux/bpf-cgroup.h> | |
11446 | +#include <linux/swork.h> | |
11447 | ||
11448 | #ifdef CONFIG_CGROUPS | |
11449 | ||
11450 | @@ -152,6 +153,7 @@ | |
11451 | /* percpu_ref killing and RCU release */ | |
11452 | struct rcu_head rcu_head; | |
11453 | struct work_struct destroy_work; | |
11454 | + struct swork_event destroy_swork; | |
11455 | ||
11456 | /* | |
11457 | * PI: the parent css. Placed here for cache proximity to following | |
11458 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/completion.h linux-4.14/include/linux/completion.h | |
11459 | --- linux-4.14.orig/include/linux/completion.h 2017-11-12 19:46:13.000000000 +0100 | |
11460 | +++ linux-4.14/include/linux/completion.h 2018-09-05 11:05:07.000000000 +0200 | |
11461 | @@ -9,7 +9,7 @@ | |
11462 | * See kernel/sched/completion.c for details. | |
11463 | */ | |
11464 | ||
11465 | -#include <linux/wait.h> | |
11466 | +#include <linux/swait.h> | |
11467 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | |
11468 | #include <linux/lockdep.h> | |
11469 | #endif | |
11470 | @@ -28,7 +28,7 @@ | |
11471 | */ | |
11472 | struct completion { | |
11473 | unsigned int done; | |
11474 | - wait_queue_head_t wait; | |
11475 | + struct swait_queue_head wait; | |
11476 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | |
11477 | struct lockdep_map_cross map; | |
11478 | #endif | |
11479 | @@ -67,11 +67,11 @@ | |
11480 | ||
11481 | #ifdef CONFIG_LOCKDEP_COMPLETIONS | |
11482 | #define COMPLETION_INITIALIZER(work) \ | |
11483 | - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ | |
11484 | + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ | |
11485 | STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } | |
11486 | #else | |
11487 | #define COMPLETION_INITIALIZER(work) \ | |
11488 | - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
11489 | + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
11490 | #endif | |
11491 | ||
11492 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | |
11493 | @@ -117,7 +117,7 @@ | |
11494 | static inline void __init_completion(struct completion *x) | |
11495 | { | |
11496 | x->done = 0; | |
11497 | - init_waitqueue_head(&x->wait); | |
11498 | + init_swait_queue_head(&x->wait); | |
11499 | } | |
11500 | ||
11501 | /** | |
11502 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/cpu.h linux-4.14/include/linux/cpu.h | |
11503 | --- linux-4.14.orig/include/linux/cpu.h 2018-09-05 11:03:22.000000000 +0200 | |
11504 | +++ linux-4.14/include/linux/cpu.h 2018-09-05 11:05:07.000000000 +0200 | |
11505 | @@ -120,6 +120,8 @@ | |
11506 | extern void cpu_hotplug_enable(void); | |
11507 | void clear_tasks_mm_cpumask(int cpu); | |
11508 | int cpu_down(unsigned int cpu); | |
11509 | +extern void pin_current_cpu(void); | |
11510 | +extern void unpin_current_cpu(void); | |
11511 | ||
11512 | #else /* CONFIG_HOTPLUG_CPU */ | |
11513 | ||
11514 | @@ -130,6 +132,9 @@ | |
11515 | static inline void lockdep_assert_cpus_held(void) { } | |
11516 | static inline void cpu_hotplug_disable(void) { } | |
11517 | static inline void cpu_hotplug_enable(void) { } | |
11518 | +static inline void pin_current_cpu(void) { } | |
11519 | +static inline void unpin_current_cpu(void) { } | |
11520 | + | |
11521 | #endif /* !CONFIG_HOTPLUG_CPU */ | |
11522 | ||
11523 | /* Wrappers which go away once all code is converted */ | |
11524 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/dcache.h linux-4.14/include/linux/dcache.h | |
11525 | --- linux-4.14.orig/include/linux/dcache.h 2018-09-05 11:03:22.000000000 +0200 | |
11526 | +++ linux-4.14/include/linux/dcache.h 2018-09-05 11:05:07.000000000 +0200 | |
11527 | @@ -107,7 +107,7 @@ | |
11528 | ||
11529 | union { | |
11530 | struct list_head d_lru; /* LRU list */ | |
11531 | - wait_queue_head_t *d_wait; /* in-lookup ones only */ | |
11532 | + struct swait_queue_head *d_wait; /* in-lookup ones only */ | |
11533 | }; | |
11534 | struct list_head d_child; /* child of parent list */ | |
11535 | struct list_head d_subdirs; /* our children */ | |
11536 | @@ -238,7 +238,7 @@ | |
11537 | extern struct dentry * d_alloc(struct dentry *, const struct qstr *); | |
11538 | extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); | |
11539 | extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, | |
11540 | - wait_queue_head_t *); | |
11541 | + struct swait_queue_head *); | |
11542 | extern struct dentry * d_splice_alias(struct inode *, struct dentry *); | |
11543 | extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); | |
11544 | extern struct dentry * d_exact_alias(struct dentry *, struct inode *); | |
11545 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/delay.h linux-4.14/include/linux/delay.h | |
11546 | --- linux-4.14.orig/include/linux/delay.h 2017-11-12 19:46:13.000000000 +0100 | |
11547 | +++ linux-4.14/include/linux/delay.h 2018-09-05 11:05:07.000000000 +0200 | |
11548 | @@ -64,4 +64,10 @@ | |
11549 | msleep(seconds * 1000); | |
11550 | } | |
11551 | ||
11552 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11553 | +extern void cpu_chill(void); | |
11554 | +#else | |
11555 | +# define cpu_chill() cpu_relax() | |
11556 | +#endif | |
11557 | + | |
11558 | #endif /* defined(_LINUX_DELAY_H) */ | |
11559 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/fs.h linux-4.14/include/linux/fs.h | |
11560 | --- linux-4.14.orig/include/linux/fs.h 2018-09-05 11:03:29.000000000 +0200 | |
11561 | +++ linux-4.14/include/linux/fs.h 2018-09-05 11:05:07.000000000 +0200 | |
11562 | @@ -655,7 +655,7 @@ | |
11563 | struct block_device *i_bdev; | |
11564 | struct cdev *i_cdev; | |
11565 | char *i_link; | |
11566 | - unsigned i_dir_seq; | |
11567 | + unsigned __i_dir_seq; | |
11568 | }; | |
11569 | ||
11570 | __u32 i_generation; | |
11571 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/highmem.h linux-4.14/include/linux/highmem.h | |
11572 | --- linux-4.14.orig/include/linux/highmem.h 2017-11-12 19:46:13.000000000 +0100 | |
11573 | +++ linux-4.14/include/linux/highmem.h 2018-09-05 11:05:07.000000000 +0200 | |
11574 | @@ -8,6 +8,7 @@ | |
11575 | #include <linux/mm.h> | |
11576 | #include <linux/uaccess.h> | |
11577 | #include <linux/hardirq.h> | |
11578 | +#include <linux/sched.h> | |
11579 | ||
11580 | #include <asm/cacheflush.h> | |
11581 | ||
11582 | @@ -66,7 +67,7 @@ | |
11583 | ||
11584 | static inline void *kmap_atomic(struct page *page) | |
11585 | { | |
11586 | - preempt_disable(); | |
11587 | + preempt_disable_nort(); | |
11588 | pagefault_disable(); | |
11589 | return page_address(page); | |
11590 | } | |
11591 | @@ -75,7 +76,7 @@ | |
11592 | static inline void __kunmap_atomic(void *addr) | |
11593 | { | |
11594 | pagefault_enable(); | |
11595 | - preempt_enable(); | |
11596 | + preempt_enable_nort(); | |
11597 | } | |
11598 | ||
11599 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) | |
11600 | @@ -87,32 +88,51 @@ | |
11601 | ||
11602 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
11603 | ||
11604 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
11605 | DECLARE_PER_CPU(int, __kmap_atomic_idx); | |
11606 | +#endif | |
11607 | ||
11608 | static inline int kmap_atomic_idx_push(void) | |
11609 | { | |
11610 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
11611 | int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; | |
11612 | ||
11613 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
11614 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
11615 | WARN_ON_ONCE(in_irq() && !irqs_disabled()); | |
11616 | BUG_ON(idx >= KM_TYPE_NR); | |
11617 | -#endif | |
11618 | +# endif | |
11619 | return idx; | |
11620 | +#else | |
11621 | + current->kmap_idx++; | |
11622 | + BUG_ON(current->kmap_idx > KM_TYPE_NR); | |
11623 | + return current->kmap_idx - 1; | |
11624 | +#endif | |
11625 | } | |
11626 | ||
11627 | static inline int kmap_atomic_idx(void) | |
11628 | { | |
11629 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
11630 | return __this_cpu_read(__kmap_atomic_idx) - 1; | |
11631 | +#else | |
11632 | + return current->kmap_idx - 1; | |
11633 | +#endif | |
11634 | } | |
11635 | ||
11636 | static inline void kmap_atomic_idx_pop(void) | |
11637 | { | |
11638 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
11639 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
11640 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
11641 | int idx = __this_cpu_dec_return(__kmap_atomic_idx); | |
11642 | ||
11643 | BUG_ON(idx < 0); | |
11644 | -#else | |
11645 | +# else | |
11646 | __this_cpu_dec(__kmap_atomic_idx); | |
11647 | +# endif | |
11648 | +#else | |
11649 | + current->kmap_idx--; | |
11650 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
11651 | + BUG_ON(current->kmap_idx < 0); | |
11652 | +# endif | |
11653 | #endif | |
11654 | } | |
11655 | ||
11656 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/hrtimer.h linux-4.14/include/linux/hrtimer.h | |
11657 | --- linux-4.14.orig/include/linux/hrtimer.h 2017-11-12 19:46:13.000000000 +0100 | |
11658 | +++ linux-4.14/include/linux/hrtimer.h 2018-09-05 11:05:07.000000000 +0200 | |
11659 | @@ -22,19 +22,42 @@ | |
11660 | #include <linux/percpu.h> | |
11661 | #include <linux/timer.h> | |
11662 | #include <linux/timerqueue.h> | |
11663 | +#include <linux/wait.h> | |
11664 | ||
11665 | struct hrtimer_clock_base; | |
11666 | struct hrtimer_cpu_base; | |
11667 | ||
11668 | /* | |
11669 | * Mode arguments of xxx_hrtimer functions: | |
11670 | + * | |
11671 | + * HRTIMER_MODE_ABS - Time value is absolute | |
11672 | + * HRTIMER_MODE_REL - Time value is relative to now | |
11673 | + * HRTIMER_MODE_PINNED - Timer is bound to CPU (is only considered | |
11674 | + * when starting the timer) | |
11675 | + * HRTIMER_MODE_SOFT - Timer callback function will be executed in | |
11676 | + * soft irq context | |
11677 | */ | |
11678 | enum hrtimer_mode { | |
11679 | - HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */ | |
11680 | - HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */ | |
11681 | - HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */ | |
11682 | - HRTIMER_MODE_ABS_PINNED = 0x02, | |
11683 | - HRTIMER_MODE_REL_PINNED = 0x03, | |
11684 | + HRTIMER_MODE_ABS = 0x00, | |
11685 | + HRTIMER_MODE_REL = 0x01, | |
11686 | + HRTIMER_MODE_PINNED = 0x02, | |
11687 | + HRTIMER_MODE_SOFT = 0x04, | |
11688 | + HRTIMER_MODE_HARD = 0x08, | |
11689 | + | |
11690 | + HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED, | |
11691 | + HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED, | |
11692 | + | |
11693 | + HRTIMER_MODE_ABS_SOFT = HRTIMER_MODE_ABS | HRTIMER_MODE_SOFT, | |
11694 | + HRTIMER_MODE_REL_SOFT = HRTIMER_MODE_REL | HRTIMER_MODE_SOFT, | |
11695 | + | |
11696 | + HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT, | |
11697 | + HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT, | |
11698 | + | |
11699 | + HRTIMER_MODE_ABS_HARD = HRTIMER_MODE_ABS | HRTIMER_MODE_HARD, | |
11700 | + HRTIMER_MODE_REL_HARD = HRTIMER_MODE_REL | HRTIMER_MODE_HARD, | |
11701 | + | |
11702 | + HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD, | |
11703 | + HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD, | |
11704 | }; | |
11705 | ||
11706 | /* | |
11707 | @@ -87,6 +110,7 @@ | |
11708 | * @base: pointer to the timer base (per cpu and per clock) | |
11709 | * @state: state information (See bit values above) | |
11710 | * @is_rel: Set if the timer was armed relative | |
11711 | + * @is_soft: Set if hrtimer will be expired in soft interrupt context. | |
11712 | * | |
11713 | * The hrtimer structure must be initialized by hrtimer_init() | |
11714 | */ | |
11715 | @@ -97,6 +121,7 @@ | |
11716 | struct hrtimer_clock_base *base; | |
11717 | u8 state; | |
11718 | u8 is_rel; | |
11719 | + u8 is_soft; | |
11720 | }; | |
11721 | ||
11722 | /** | |
11723 | @@ -112,9 +137,9 @@ | |
11724 | }; | |
11725 | ||
11726 | #ifdef CONFIG_64BIT | |
11727 | -# define HRTIMER_CLOCK_BASE_ALIGN 64 | |
11728 | +# define __hrtimer_clock_base_align ____cacheline_aligned | |
11729 | #else | |
11730 | -# define HRTIMER_CLOCK_BASE_ALIGN 32 | |
11731 | +# define __hrtimer_clock_base_align | |
11732 | #endif | |
11733 | ||
11734 | /** | |
11735 | @@ -123,48 +148,57 @@ | |
11736 | * @index: clock type index for per_cpu support when moving a | |
11737 | * timer to a base on another cpu. | |
11738 | * @clockid: clock id for per_cpu support | |
11739 | + * @seq: seqcount around __run_hrtimer | |
11740 | + * @running: pointer to the currently running hrtimer | |
11741 | * @active: red black tree root node for the active timers | |
11742 | * @get_time: function to retrieve the current time of the clock | |
11743 | * @offset: offset of this clock to the monotonic base | |
11744 | */ | |
11745 | struct hrtimer_clock_base { | |
11746 | struct hrtimer_cpu_base *cpu_base; | |
11747 | - int index; | |
11748 | + unsigned int index; | |
11749 | clockid_t clockid; | |
11750 | + seqcount_t seq; | |
11751 | + struct hrtimer *running; | |
11752 | struct timerqueue_head active; | |
11753 | ktime_t (*get_time)(void); | |
11754 | ktime_t offset; | |
11755 | -} __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); | |
11756 | +} __hrtimer_clock_base_align; | |
11757 | ||
11758 | enum hrtimer_base_type { | |
11759 | HRTIMER_BASE_MONOTONIC, | |
11760 | HRTIMER_BASE_REALTIME, | |
11761 | HRTIMER_BASE_BOOTTIME, | |
11762 | HRTIMER_BASE_TAI, | |
11763 | + HRTIMER_BASE_MONOTONIC_SOFT, | |
11764 | + HRTIMER_BASE_REALTIME_SOFT, | |
11765 | + HRTIMER_BASE_BOOTTIME_SOFT, | |
11766 | + HRTIMER_BASE_TAI_SOFT, | |
11767 | HRTIMER_MAX_CLOCK_BASES, | |
11768 | }; | |
11769 | ||
11770 | -/* | |
11771 | +/** | |
11772 | * struct hrtimer_cpu_base - the per cpu clock bases | |
11773 | * @lock: lock protecting the base and associated clock bases | |
11774 | * and timers | |
11775 | - * @seq: seqcount around __run_hrtimer | |
11776 | - * @running: pointer to the currently running hrtimer | |
11777 | * @cpu: cpu number | |
11778 | * @active_bases: Bitfield to mark bases with active timers | |
11779 | * @clock_was_set_seq: Sequence counter of clock was set events | |
11780 | - * @migration_enabled: The migration of hrtimers to other cpus is enabled | |
11781 | - * @nohz_active: The nohz functionality is enabled | |
11782 | - * @expires_next: absolute time of the next event which was scheduled | |
11783 | - * via clock_set_next_event() | |
11784 | - * @next_timer: Pointer to the first expiring timer | |
11785 | - * @in_hrtirq: hrtimer_interrupt() is currently executing | |
11786 | * @hres_active: State of high resolution mode | |
11787 | + * @in_hrtirq: hrtimer_interrupt() is currently executing | |
11788 | * @hang_detected: The last hrtimer interrupt detected a hang | |
11789 | + * @softirq_activated: displays, if the softirq is raised - update of softirq | |
11790 | + * related settings is not required then. | |
11791 | * @nr_events: Total number of hrtimer interrupt events | |
11792 | * @nr_retries: Total number of hrtimer interrupt retries | |
11793 | * @nr_hangs: Total number of hrtimer interrupt hangs | |
11794 | * @max_hang_time: Maximum time spent in hrtimer_interrupt | |
11795 | + * @expires_next: absolute time of the next event, is required for remote | |
11796 | + * hrtimer enqueue; it is the total first expiry time (hard | |
11797 | + * and soft hrtimer are taken into account) | |
11798 | + * @next_timer: Pointer to the first expiring timer | |
11799 | + * @softirq_expires_next: Time to check, if soft queues needs also to be expired | |
11800 | + * @softirq_next_timer: Pointer to the first expiring softirq based timer | |
11801 | * @clock_base: array of clock bases for this cpu | |
11802 | * | |
11803 | * Note: next_timer is just an optimization for __remove_hrtimer(). | |
11804 | @@ -173,31 +207,31 @@ | |
11805 | */ | |
11806 | struct hrtimer_cpu_base { | |
11807 | raw_spinlock_t lock; | |
11808 | - seqcount_t seq; | |
11809 | - struct hrtimer *running; | |
11810 | unsigned int cpu; | |
11811 | unsigned int active_bases; | |
11812 | unsigned int clock_was_set_seq; | |
11813 | - bool migration_enabled; | |
11814 | - bool nohz_active; | |
11815 | + unsigned int hres_active : 1, | |
11816 | + in_hrtirq : 1, | |
11817 | + hang_detected : 1, | |
11818 | + softirq_activated : 1; | |
11819 | #ifdef CONFIG_HIGH_RES_TIMERS | |
11820 | - unsigned int in_hrtirq : 1, | |
11821 | - hres_active : 1, | |
11822 | - hang_detected : 1; | |
11823 | - ktime_t expires_next; | |
11824 | - struct hrtimer *next_timer; | |
11825 | unsigned int nr_events; | |
11826 | - unsigned int nr_retries; | |
11827 | - unsigned int nr_hangs; | |
11828 | + unsigned short nr_retries; | |
11829 | + unsigned short nr_hangs; | |
11830 | unsigned int max_hang_time; | |
11831 | #endif | |
11832 | + ktime_t expires_next; | |
11833 | + struct hrtimer *next_timer; | |
11834 | + ktime_t softirq_expires_next; | |
11835 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11836 | + wait_queue_head_t wait; | |
11837 | +#endif | |
11838 | + struct hrtimer *softirq_next_timer; | |
11839 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | |
11840 | } ____cacheline_aligned; | |
11841 | ||
11842 | static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time) | |
11843 | { | |
11844 | - BUILD_BUG_ON(sizeof(struct hrtimer_clock_base) > HRTIMER_CLOCK_BASE_ALIGN); | |
11845 | - | |
11846 | timer->node.expires = time; | |
11847 | timer->_softexpires = time; | |
11848 | } | |
11849 | @@ -266,16 +300,17 @@ | |
11850 | return timer->base->get_time(); | |
11851 | } | |
11852 | ||
11853 | +static inline int hrtimer_is_hres_active(struct hrtimer *timer) | |
11854 | +{ | |
11855 | + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? | |
11856 | + timer->base->cpu_base->hres_active : 0; | |
11857 | +} | |
11858 | + | |
11859 | #ifdef CONFIG_HIGH_RES_TIMERS | |
11860 | struct clock_event_device; | |
11861 | ||
11862 | extern void hrtimer_interrupt(struct clock_event_device *dev); | |
11863 | ||
11864 | -static inline int hrtimer_is_hres_active(struct hrtimer *timer) | |
11865 | -{ | |
11866 | - return timer->base->cpu_base->hres_active; | |
11867 | -} | |
11868 | - | |
11869 | /* | |
11870 | * The resolution of the clocks. The resolution value is returned in | |
11871 | * the clock_getres() system call to give application programmers an | |
11872 | @@ -298,11 +333,6 @@ | |
11873 | ||
11874 | #define hrtimer_resolution (unsigned int)LOW_RES_NSEC | |
11875 | ||
11876 | -static inline int hrtimer_is_hres_active(struct hrtimer *timer) | |
11877 | -{ | |
11878 | - return 0; | |
11879 | -} | |
11880 | - | |
11881 | static inline void clock_was_set_delayed(void) { } | |
11882 | ||
11883 | #endif | |
11884 | @@ -344,10 +374,17 @@ | |
11885 | /* Initialize timers: */ | |
11886 | extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock, | |
11887 | enum hrtimer_mode mode); | |
11888 | +extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, | |
11889 | + enum hrtimer_mode mode, | |
11890 | + struct task_struct *task); | |
11891 | ||
11892 | #ifdef CONFIG_DEBUG_OBJECTS_TIMERS | |
11893 | extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock, | |
11894 | enum hrtimer_mode mode); | |
11895 | +extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, | |
11896 | + clockid_t clock_id, | |
11897 | + enum hrtimer_mode mode, | |
11898 | + struct task_struct *task); | |
11899 | ||
11900 | extern void destroy_hrtimer_on_stack(struct hrtimer *timer); | |
11901 | #else | |
11902 | @@ -357,6 +394,15 @@ | |
11903 | { | |
11904 | hrtimer_init(timer, which_clock, mode); | |
11905 | } | |
11906 | + | |
11907 | +static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, | |
11908 | + clockid_t clock_id, | |
11909 | + enum hrtimer_mode mode, | |
11910 | + struct task_struct *task) | |
11911 | +{ | |
11912 | + hrtimer_init_sleeper(sl, clock_id, mode, task); | |
11913 | +} | |
11914 | + | |
11915 | static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { } | |
11916 | #endif | |
11917 | ||
11918 | @@ -365,11 +411,12 @@ | |
11919 | u64 range_ns, const enum hrtimer_mode mode); | |
11920 | ||
11921 | /** | |
11922 | - * hrtimer_start - (re)start an hrtimer on the current CPU | |
11923 | + * hrtimer_start - (re)start an hrtimer | |
11924 | * @timer: the timer to be added | |
11925 | * @tim: expiry time | |
11926 | - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or | |
11927 | - * relative (HRTIMER_MODE_REL) | |
11928 | + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or | |
11929 | + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); | |
11930 | + * softirq based mode is considered for debug purpose only! | |
11931 | */ | |
11932 | static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, | |
11933 | const enum hrtimer_mode mode) | |
11934 | @@ -396,6 +443,13 @@ | |
11935 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | |
11936 | } | |
11937 | ||
11938 | +/* Softirq preemption could deadlock timer removal */ | |
11939 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11940 | + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); | |
11941 | +#else | |
11942 | +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) | |
11943 | +#endif | |
11944 | + | |
11945 | /* Query timers: */ | |
11946 | extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); | |
11947 | ||
11948 | @@ -420,9 +474,9 @@ | |
11949 | * Helper function to check, whether the timer is running the callback | |
11950 | * function | |
11951 | */ | |
11952 | -static inline int hrtimer_callback_running(struct hrtimer *timer) | |
11953 | +static inline int hrtimer_callback_running(const struct hrtimer *timer) | |
11954 | { | |
11955 | - return timer->base->cpu_base->running == timer; | |
11956 | + return timer->base->running == timer; | |
11957 | } | |
11958 | ||
11959 | /* Forward a hrtimer so it expires after now: */ | |
11960 | @@ -458,15 +512,12 @@ | |
11961 | const enum hrtimer_mode mode, | |
11962 | const clockid_t clockid); | |
11963 | ||
11964 | -extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | |
11965 | - struct task_struct *tsk); | |
11966 | - | |
11967 | extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta, | |
11968 | const enum hrtimer_mode mode); | |
11969 | extern int schedule_hrtimeout_range_clock(ktime_t *expires, | |
11970 | u64 delta, | |
11971 | const enum hrtimer_mode mode, | |
11972 | - int clock); | |
11973 | + clockid_t clock_id); | |
11974 | extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); | |
11975 | ||
11976 | /* Soft interrupt function to run the hrtimer queues: */ | |
11977 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/idr.h linux-4.14/include/linux/idr.h | |
11978 | --- linux-4.14.orig/include/linux/idr.h 2017-11-12 19:46:13.000000000 +0100 | |
11979 | +++ linux-4.14/include/linux/idr.h 2018-09-05 11:05:07.000000000 +0200 | |
11980 | @@ -167,10 +167,7 @@ | |
11981 | * Each idr_preload() should be matched with an invocation of this | |
11982 | * function. See idr_preload() for details. | |
11983 | */ | |
11984 | -static inline void idr_preload_end(void) | |
11985 | -{ | |
11986 | - preempt_enable(); | |
11987 | -} | |
11988 | +void idr_preload_end(void); | |
11989 | ||
11990 | /** | |
11991 | * idr_find - return pointer for given id | |
11992 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/init_task.h linux-4.14/include/linux/init_task.h | |
11993 | --- linux-4.14.orig/include/linux/init_task.h 2017-11-12 19:46:13.000000000 +0100 | |
11994 | +++ linux-4.14/include/linux/init_task.h 2018-09-05 11:05:07.000000000 +0200 | |
11995 | @@ -163,6 +163,12 @@ | |
11996 | # define INIT_PERF_EVENTS(tsk) | |
11997 | #endif | |
11998 | ||
11999 | +#if defined(CONFIG_POSIX_TIMERS) && defined(CONFIG_PREEMPT_RT_BASE) | |
12000 | +# define INIT_TIMER_LIST .posix_timer_list = NULL, | |
12001 | +#else | |
12002 | +# define INIT_TIMER_LIST | |
12003 | +#endif | |
12004 | + | |
12005 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
12006 | # define INIT_VTIME(tsk) \ | |
12007 | .vtime.seqcount = SEQCNT_ZERO(tsk.vtime.seqcount), \ | |
12008 | @@ -234,7 +240,8 @@ | |
12009 | .static_prio = MAX_PRIO-20, \ | |
12010 | .normal_prio = MAX_PRIO-20, \ | |
12011 | .policy = SCHED_NORMAL, \ | |
12012 | - .cpus_allowed = CPU_MASK_ALL, \ | |
12013 | + .cpus_ptr = &tsk.cpus_mask, \ | |
12014 | + .cpus_mask = CPU_MASK_ALL, \ | |
12015 | .nr_cpus_allowed= NR_CPUS, \ | |
12016 | .mm = NULL, \ | |
12017 | .active_mm = &init_mm, \ | |
12018 | @@ -276,6 +283,7 @@ | |
12019 | INIT_CPU_TIMERS(tsk) \ | |
12020 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ | |
12021 | .timer_slack_ns = 50000, /* 50 usec default slack */ \ | |
12022 | + INIT_TIMER_LIST \ | |
12023 | .pids = { \ | |
12024 | [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ | |
12025 | [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ | |
12026 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/interrupt.h linux-4.14/include/linux/interrupt.h | |
12027 | --- linux-4.14.orig/include/linux/interrupt.h 2018-09-05 11:03:22.000000000 +0200 | |
12028 | +++ linux-4.14/include/linux/interrupt.h 2018-09-05 11:05:07.000000000 +0200 | |
12029 | @@ -15,6 +15,7 @@ | |
12030 | #include <linux/hrtimer.h> | |
12031 | #include <linux/kref.h> | |
12032 | #include <linux/workqueue.h> | |
12033 | +#include <linux/swork.h> | |
12034 | ||
12035 | #include <linux/atomic.h> | |
12036 | #include <asm/ptrace.h> | |
12037 | @@ -63,6 +64,7 @@ | |
12038 | * interrupt handler after suspending interrupts. For system | |
12039 | * wakeup devices users need to implement wakeup detection in | |
12040 | * their interrupt handlers. | |
12041 | + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) | |
12042 | */ | |
12043 | #define IRQF_SHARED 0x00000080 | |
12044 | #define IRQF_PROBE_SHARED 0x00000100 | |
12045 | @@ -76,6 +78,7 @@ | |
12046 | #define IRQF_NO_THREAD 0x00010000 | |
12047 | #define IRQF_EARLY_RESUME 0x00020000 | |
12048 | #define IRQF_COND_SUSPEND 0x00040000 | |
12049 | +#define IRQF_NO_SOFTIRQ_CALL 0x00080000 | |
12050 | ||
12051 | #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) | |
12052 | ||
12053 | @@ -207,7 +210,7 @@ | |
12054 | #ifdef CONFIG_LOCKDEP | |
12055 | # define local_irq_enable_in_hardirq() do { } while (0) | |
12056 | #else | |
12057 | -# define local_irq_enable_in_hardirq() local_irq_enable() | |
12058 | +# define local_irq_enable_in_hardirq() local_irq_enable_nort() | |
12059 | #endif | |
12060 | ||
12061 | extern void disable_irq_nosync(unsigned int irq); | |
12062 | @@ -227,6 +230,7 @@ | |
12063 | * struct irq_affinity_notify - context for notification of IRQ affinity changes | |
12064 | * @irq: Interrupt to which notification applies | |
12065 | * @kref: Reference count, for internal use | |
12066 | + * @swork: Swork item, for internal use | |
12067 | * @work: Work item, for internal use | |
12068 | * @notify: Function to be called on change. This will be | |
12069 | * called in process context. | |
12070 | @@ -238,7 +242,11 @@ | |
12071 | struct irq_affinity_notify { | |
12072 | unsigned int irq; | |
12073 | struct kref kref; | |
12074 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12075 | + struct swork_event swork; | |
12076 | +#else | |
12077 | struct work_struct work; | |
12078 | +#endif | |
12079 | void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); | |
12080 | void (*release)(struct kref *ref); | |
12081 | }; | |
12082 | @@ -429,9 +437,13 @@ | |
12083 | bool state); | |
12084 | ||
12085 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
12086 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
12087 | extern bool force_irqthreads; | |
12088 | +# else | |
12089 | +# define force_irqthreads (true) | |
12090 | +# endif | |
12091 | #else | |
12092 | -#define force_irqthreads (0) | |
12093 | +#define force_irqthreads (false) | |
12094 | #endif | |
12095 | ||
12096 | #ifndef __ARCH_SET_SOFTIRQ_PENDING | |
12097 | @@ -488,9 +500,10 @@ | |
12098 | void (*action)(struct softirq_action *); | |
12099 | }; | |
12100 | ||
12101 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12102 | asmlinkage void do_softirq(void); | |
12103 | asmlinkage void __do_softirq(void); | |
12104 | - | |
12105 | +static inline void thread_do_softirq(void) { do_softirq(); } | |
12106 | #ifdef __ARCH_HAS_DO_SOFTIRQ | |
12107 | void do_softirq_own_stack(void); | |
12108 | #else | |
12109 | @@ -499,13 +512,25 @@ | |
12110 | __do_softirq(); | |
12111 | } | |
12112 | #endif | |
12113 | +#else | |
12114 | +extern void thread_do_softirq(void); | |
12115 | +#endif | |
12116 | ||
12117 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | |
12118 | extern void softirq_init(void); | |
12119 | extern void __raise_softirq_irqoff(unsigned int nr); | |
12120 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12121 | +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); | |
12122 | +#else | |
12123 | +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
12124 | +{ | |
12125 | + __raise_softirq_irqoff(nr); | |
12126 | +} | |
12127 | +#endif | |
12128 | ||
12129 | extern void raise_softirq_irqoff(unsigned int nr); | |
12130 | extern void raise_softirq(unsigned int nr); | |
12131 | +extern void softirq_check_pending_idle(void); | |
12132 | ||
12133 | DECLARE_PER_CPU(struct task_struct *, ksoftirqd); | |
12134 | ||
12135 | @@ -527,8 +552,9 @@ | |
12136 | to be executed on some cpu at least once after this. | |
12137 | * If the tasklet is already scheduled, but its execution is still not | |
12138 | started, it will be executed only once. | |
12139 | - * If this tasklet is already running on another CPU (or schedule is called | |
12140 | - from tasklet itself), it is rescheduled for later. | |
12141 | + * If this tasklet is already running on another CPU, it is rescheduled | |
12142 | + for later. | |
12143 | + * Schedule must not be called from the tasklet itself (a lockup occurs) | |
12144 | * Tasklet is strictly serialized wrt itself, but not | |
12145 | wrt another tasklets. If client needs some intertask synchronization, | |
12146 | he makes it with spinlocks. | |
12147 | @@ -553,27 +579,36 @@ | |
12148 | enum | |
12149 | { | |
12150 | TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ | |
12151 | - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ | |
12152 | + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ | |
12153 | + TASKLET_STATE_PENDING /* Tasklet is pending */ | |
12154 | }; | |
12155 | ||
12156 | -#ifdef CONFIG_SMP | |
12157 | +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) | |
12158 | +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) | |
12159 | +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) | |
12160 | + | |
12161 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
12162 | static inline int tasklet_trylock(struct tasklet_struct *t) | |
12163 | { | |
12164 | return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); | |
12165 | } | |
12166 | ||
12167 | +static inline int tasklet_tryunlock(struct tasklet_struct *t) | |
12168 | +{ | |
12169 | + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; | |
12170 | +} | |
12171 | + | |
12172 | static inline void tasklet_unlock(struct tasklet_struct *t) | |
12173 | { | |
12174 | smp_mb__before_atomic(); | |
12175 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | |
12176 | } | |
12177 | ||
12178 | -static inline void tasklet_unlock_wait(struct tasklet_struct *t) | |
12179 | -{ | |
12180 | - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } | |
12181 | -} | |
12182 | +extern void tasklet_unlock_wait(struct tasklet_struct *t); | |
12183 | + | |
12184 | #else | |
12185 | #define tasklet_trylock(t) 1 | |
12186 | +#define tasklet_tryunlock(t) 1 | |
12187 | #define tasklet_unlock_wait(t) do { } while (0) | |
12188 | #define tasklet_unlock(t) do { } while (0) | |
12189 | #endif | |
12190 | @@ -607,41 +642,17 @@ | |
12191 | smp_mb(); | |
12192 | } | |
12193 | ||
12194 | -static inline void tasklet_enable(struct tasklet_struct *t) | |
12195 | -{ | |
12196 | - smp_mb__before_atomic(); | |
12197 | - atomic_dec(&t->count); | |
12198 | -} | |
12199 | - | |
12200 | +extern void tasklet_enable(struct tasklet_struct *t); | |
12201 | extern void tasklet_kill(struct tasklet_struct *t); | |
12202 | extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); | |
12203 | extern void tasklet_init(struct tasklet_struct *t, | |
12204 | void (*func)(unsigned long), unsigned long data); | |
12205 | ||
12206 | -struct tasklet_hrtimer { | |
12207 | - struct hrtimer timer; | |
12208 | - struct tasklet_struct tasklet; | |
12209 | - enum hrtimer_restart (*function)(struct hrtimer *); | |
12210 | -}; | |
12211 | - | |
12212 | -extern void | |
12213 | -tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, | |
12214 | - enum hrtimer_restart (*function)(struct hrtimer *), | |
12215 | - clockid_t which_clock, enum hrtimer_mode mode); | |
12216 | - | |
12217 | -static inline | |
12218 | -void tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time, | |
12219 | - const enum hrtimer_mode mode) | |
12220 | -{ | |
12221 | - hrtimer_start(&ttimer->timer, time, mode); | |
12222 | -} | |
12223 | - | |
12224 | -static inline | |
12225 | -void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) | |
12226 | -{ | |
12227 | - hrtimer_cancel(&ttimer->timer); | |
12228 | - tasklet_kill(&ttimer->tasklet); | |
12229 | -} | |
12230 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12231 | +extern void softirq_early_init(void); | |
12232 | +#else | |
12233 | +static inline void softirq_early_init(void) { } | |
12234 | +#endif | |
12235 | ||
12236 | /* | |
12237 | * Autoprobing for irqs: | |
12238 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/irqdesc.h linux-4.14/include/linux/irqdesc.h | |
12239 | --- linux-4.14.orig/include/linux/irqdesc.h 2017-11-12 19:46:13.000000000 +0100 | |
12240 | +++ linux-4.14/include/linux/irqdesc.h 2018-09-05 11:05:07.000000000 +0200 | |
12241 | @@ -70,6 +70,7 @@ | |
12242 | unsigned int irqs_unhandled; | |
12243 | atomic_t threads_handled; | |
12244 | int threads_handled_last; | |
12245 | + u64 random_ip; | |
12246 | raw_spinlock_t lock; | |
12247 | struct cpumask *percpu_enabled; | |
12248 | const struct cpumask *percpu_affinity; | |
12249 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/irqflags.h linux-4.14/include/linux/irqflags.h | |
12250 | --- linux-4.14.orig/include/linux/irqflags.h 2017-11-12 19:46:13.000000000 +0100 | |
12251 | +++ linux-4.14/include/linux/irqflags.h 2018-09-05 11:05:07.000000000 +0200 | |
12252 | @@ -34,16 +34,6 @@ | |
12253 | current->hardirq_context--; \ | |
12254 | crossrelease_hist_end(XHLOCK_HARD); \ | |
12255 | } while (0) | |
12256 | -# define lockdep_softirq_enter() \ | |
12257 | -do { \ | |
12258 | - current->softirq_context++; \ | |
12259 | - crossrelease_hist_start(XHLOCK_SOFT); \ | |
12260 | -} while (0) | |
12261 | -# define lockdep_softirq_exit() \ | |
12262 | -do { \ | |
12263 | - current->softirq_context--; \ | |
12264 | - crossrelease_hist_end(XHLOCK_SOFT); \ | |
12265 | -} while (0) | |
12266 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | |
12267 | #else | |
12268 | # define trace_hardirqs_on() do { } while (0) | |
12269 | @@ -56,9 +46,23 @@ | |
12270 | # define trace_softirqs_enabled(p) 0 | |
12271 | # define trace_hardirq_enter() do { } while (0) | |
12272 | # define trace_hardirq_exit() do { } while (0) | |
12273 | +# define INIT_TRACE_IRQFLAGS | |
12274 | +#endif | |
12275 | + | |
12276 | +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) | |
12277 | +# define lockdep_softirq_enter() \ | |
12278 | +do { \ | |
12279 | + current->softirq_context++; \ | |
12280 | + crossrelease_hist_start(XHLOCK_SOFT); \ | |
12281 | +} while (0) | |
12282 | +# define lockdep_softirq_exit() \ | |
12283 | +do { \ | |
12284 | + current->softirq_context--; \ | |
12285 | + crossrelease_hist_end(XHLOCK_SOFT); \ | |
12286 | +} while (0) | |
12287 | +#else | |
12288 | # define lockdep_softirq_enter() do { } while (0) | |
12289 | # define lockdep_softirq_exit() do { } while (0) | |
12290 | -# define INIT_TRACE_IRQFLAGS | |
12291 | #endif | |
12292 | ||
12293 | #if defined(CONFIG_IRQSOFF_TRACER) || \ | |
12294 | @@ -165,4 +169,23 @@ | |
12295 | ||
12296 | #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) | |
12297 | ||
12298 | +/* | |
12299 | + * local_irq* variants depending on RT/!RT | |
12300 | + */ | |
12301 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12302 | +# define local_irq_disable_nort() do { } while (0) | |
12303 | +# define local_irq_enable_nort() do { } while (0) | |
12304 | +# define local_irq_save_nort(flags) local_save_flags(flags) | |
12305 | +# define local_irq_restore_nort(flags) (void)(flags) | |
12306 | +# define local_irq_disable_rt() local_irq_disable() | |
12307 | +# define local_irq_enable_rt() local_irq_enable() | |
12308 | +#else | |
12309 | +# define local_irq_disable_nort() local_irq_disable() | |
12310 | +# define local_irq_enable_nort() local_irq_enable() | |
12311 | +# define local_irq_save_nort(flags) local_irq_save(flags) | |
12312 | +# define local_irq_restore_nort(flags) local_irq_restore(flags) | |
12313 | +# define local_irq_disable_rt() do { } while (0) | |
12314 | +# define local_irq_enable_rt() do { } while (0) | |
12315 | +#endif | |
12316 | + | |
12317 | #endif | |
12318 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/irq.h linux-4.14/include/linux/irq.h | |
12319 | --- linux-4.14.orig/include/linux/irq.h 2018-09-05 11:03:22.000000000 +0200 | |
12320 | +++ linux-4.14/include/linux/irq.h 2018-09-05 11:05:07.000000000 +0200 | |
12321 | @@ -74,6 +74,7 @@ | |
12322 | * IRQ_IS_POLLED - Always polled by another interrupt. Exclude | |
12323 | * it from the spurious interrupt detection | |
12324 | * mechanism and from core side polling. | |
12325 | + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) | |
12326 | * IRQ_DISABLE_UNLAZY - Disable lazy irq disable | |
12327 | */ | |
12328 | enum { | |
12329 | @@ -101,13 +102,14 @@ | |
12330 | IRQ_PER_CPU_DEVID = (1 << 17), | |
12331 | IRQ_IS_POLLED = (1 << 18), | |
12332 | IRQ_DISABLE_UNLAZY = (1 << 19), | |
12333 | + IRQ_NO_SOFTIRQ_CALL = (1 << 20), | |
12334 | }; | |
12335 | ||
12336 | #define IRQF_MODIFY_MASK \ | |
12337 | (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ | |
12338 | IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ | |
12339 | IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ | |
12340 | - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) | |
12341 | + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) | |
12342 | ||
12343 | #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | |
12344 | ||
12345 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/irq_work.h linux-4.14/include/linux/irq_work.h | |
12346 | --- linux-4.14.orig/include/linux/irq_work.h 2017-11-12 19:46:13.000000000 +0100 | |
12347 | +++ linux-4.14/include/linux/irq_work.h 2018-09-05 11:05:07.000000000 +0200 | |
12348 | @@ -17,6 +17,7 @@ | |
12349 | #define IRQ_WORK_BUSY 2UL | |
12350 | #define IRQ_WORK_FLAGS 3UL | |
12351 | #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ | |
12352 | +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ | |
12353 | ||
12354 | struct irq_work { | |
12355 | unsigned long flags; | |
12356 | @@ -52,4 +53,10 @@ | |
12357 | static inline void irq_work_run(void) { } | |
12358 | #endif | |
12359 | ||
12360 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
12361 | +void irq_work_tick_soft(void); | |
12362 | +#else | |
12363 | +static inline void irq_work_tick_soft(void) { } | |
12364 | +#endif | |
12365 | + | |
12366 | #endif /* _LINUX_IRQ_WORK_H */ | |
12367 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/jbd2.h linux-4.14/include/linux/jbd2.h | |
12368 | --- linux-4.14.orig/include/linux/jbd2.h 2018-09-05 11:03:22.000000000 +0200 | |
12369 | +++ linux-4.14/include/linux/jbd2.h 2018-09-05 11:05:07.000000000 +0200 | |
12370 | @@ -347,32 +347,56 @@ | |
12371 | ||
12372 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | |
12373 | { | |
12374 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12375 | bit_spin_lock(BH_State, &bh->b_state); | |
12376 | +#else | |
12377 | + spin_lock(&bh->b_state_lock); | |
12378 | +#endif | |
12379 | } | |
12380 | ||
12381 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | |
12382 | { | |
12383 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12384 | return bit_spin_trylock(BH_State, &bh->b_state); | |
12385 | +#else | |
12386 | + return spin_trylock(&bh->b_state_lock); | |
12387 | +#endif | |
12388 | } | |
12389 | ||
12390 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | |
12391 | { | |
12392 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12393 | return bit_spin_is_locked(BH_State, &bh->b_state); | |
12394 | +#else | |
12395 | + return spin_is_locked(&bh->b_state_lock); | |
12396 | +#endif | |
12397 | } | |
12398 | ||
12399 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | |
12400 | { | |
12401 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12402 | bit_spin_unlock(BH_State, &bh->b_state); | |
12403 | +#else | |
12404 | + spin_unlock(&bh->b_state_lock); | |
12405 | +#endif | |
12406 | } | |
12407 | ||
12408 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | |
12409 | { | |
12410 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12411 | bit_spin_lock(BH_JournalHead, &bh->b_state); | |
12412 | +#else | |
12413 | + spin_lock(&bh->b_journal_head_lock); | |
12414 | +#endif | |
12415 | } | |
12416 | ||
12417 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |
12418 | { | |
12419 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12420 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | |
12421 | +#else | |
12422 | + spin_unlock(&bh->b_journal_head_lock); | |
12423 | +#endif | |
12424 | } | |
12425 | ||
12426 | #define J_ASSERT(assert) BUG_ON(!(assert)) | |
12427 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/kdb.h linux-4.14/include/linux/kdb.h | |
12428 | --- linux-4.14.orig/include/linux/kdb.h 2017-11-12 19:46:13.000000000 +0100 | |
12429 | +++ linux-4.14/include/linux/kdb.h 2018-09-05 11:05:07.000000000 +0200 | |
12430 | @@ -167,6 +167,7 @@ | |
12431 | extern __printf(1, 2) int kdb_printf(const char *, ...); | |
12432 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); | |
12433 | ||
12434 | +#define in_kdb_printk() (kdb_trap_printk) | |
12435 | extern void kdb_init(int level); | |
12436 | ||
12437 | /* Access to kdb specific polling devices */ | |
12438 | @@ -201,6 +202,7 @@ | |
12439 | extern int kdb_unregister(char *); | |
12440 | #else /* ! CONFIG_KGDB_KDB */ | |
12441 | static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } | |
12442 | +#define in_kdb_printk() (0) | |
12443 | static inline void kdb_init(int level) {} | |
12444 | static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, | |
12445 | char *help, short minlen) { return 0; } | |
12446 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/kernel.h linux-4.14/include/linux/kernel.h | |
12447 | --- linux-4.14.orig/include/linux/kernel.h 2017-11-12 19:46:13.000000000 +0100 | |
12448 | +++ linux-4.14/include/linux/kernel.h 2018-09-05 11:05:07.000000000 +0200 | |
12449 | @@ -225,6 +225,9 @@ | |
12450 | */ | |
12451 | # define might_sleep() \ | |
12452 | do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
12453 | + | |
12454 | +# define might_sleep_no_state_check() \ | |
12455 | + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
12456 | # define sched_annotate_sleep() (current->task_state_change = 0) | |
12457 | #else | |
12458 | static inline void ___might_sleep(const char *file, int line, | |
12459 | @@ -232,6 +235,7 @@ | |
12460 | static inline void __might_sleep(const char *file, int line, | |
12461 | int preempt_offset) { } | |
12462 | # define might_sleep() do { might_resched(); } while (0) | |
12463 | +# define might_sleep_no_state_check() do { might_resched(); } while (0) | |
12464 | # define sched_annotate_sleep() do { } while (0) | |
12465 | #endif | |
12466 | ||
12467 | @@ -531,6 +535,7 @@ | |
12468 | SYSTEM_HALT, | |
12469 | SYSTEM_POWER_OFF, | |
12470 | SYSTEM_RESTART, | |
12471 | + SYSTEM_SUSPEND, | |
12472 | } system_state; | |
12473 | ||
12474 | #define TAINT_PROPRIETARY_MODULE 0 | |
12475 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/list_bl.h linux-4.14/include/linux/list_bl.h | |
12476 | --- linux-4.14.orig/include/linux/list_bl.h 2017-11-12 19:46:13.000000000 +0100 | |
12477 | +++ linux-4.14/include/linux/list_bl.h 2018-09-05 11:05:07.000000000 +0200 | |
12478 | @@ -3,6 +3,7 @@ | |
12479 | #define _LINUX_LIST_BL_H | |
12480 | ||
12481 | #include <linux/list.h> | |
12482 | +#include <linux/spinlock.h> | |
12483 | #include <linux/bit_spinlock.h> | |
12484 | ||
12485 | /* | |
12486 | @@ -33,13 +34,24 @@ | |
12487 | ||
12488 | struct hlist_bl_head { | |
12489 | struct hlist_bl_node *first; | |
12490 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12491 | + raw_spinlock_t lock; | |
12492 | +#endif | |
12493 | }; | |
12494 | ||
12495 | struct hlist_bl_node { | |
12496 | struct hlist_bl_node *next, **pprev; | |
12497 | }; | |
12498 | -#define INIT_HLIST_BL_HEAD(ptr) \ | |
12499 | - ((ptr)->first = NULL) | |
12500 | + | |
12501 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12502 | +#define INIT_HLIST_BL_HEAD(h) \ | |
12503 | +do { \ | |
12504 | + (h)->first = NULL; \ | |
12505 | + raw_spin_lock_init(&(h)->lock); \ | |
12506 | +} while (0) | |
12507 | +#else | |
12508 | +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL | |
12509 | +#endif | |
12510 | ||
12511 | static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) | |
12512 | { | |
12513 | @@ -119,12 +131,26 @@ | |
12514 | ||
12515 | static inline void hlist_bl_lock(struct hlist_bl_head *b) | |
12516 | { | |
12517 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12518 | bit_spin_lock(0, (unsigned long *)b); | |
12519 | +#else | |
12520 | + raw_spin_lock(&b->lock); | |
12521 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
12522 | + __set_bit(0, (unsigned long *)b); | |
12523 | +#endif | |
12524 | +#endif | |
12525 | } | |
12526 | ||
12527 | static inline void hlist_bl_unlock(struct hlist_bl_head *b) | |
12528 | { | |
12529 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12530 | __bit_spin_unlock(0, (unsigned long *)b); | |
12531 | +#else | |
12532 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
12533 | + __clear_bit(0, (unsigned long *)b); | |
12534 | +#endif | |
12535 | + raw_spin_unlock(&b->lock); | |
12536 | +#endif | |
12537 | } | |
12538 | ||
12539 | static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) | |
12540 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/locallock.h linux-4.14/include/linux/locallock.h | |
12541 | --- linux-4.14.orig/include/linux/locallock.h 1970-01-01 01:00:00.000000000 +0100 | |
12542 | +++ linux-4.14/include/linux/locallock.h 2018-09-05 11:05:07.000000000 +0200 | |
12543 | @@ -0,0 +1,271 @@ | |
12544 | +#ifndef _LINUX_LOCALLOCK_H | |
12545 | +#define _LINUX_LOCALLOCK_H | |
12546 | + | |
12547 | +#include <linux/percpu.h> | |
12548 | +#include <linux/spinlock.h> | |
12549 | + | |
12550 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12551 | + | |
12552 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
12553 | +# define LL_WARN(cond) WARN_ON(cond) | |
12554 | +#else | |
12555 | +# define LL_WARN(cond) do { } while (0) | |
12556 | +#endif | |
12557 | + | |
12558 | +/* | |
12559 | + * per cpu lock based substitute for local_irq_*() | |
12560 | + */ | |
12561 | +struct local_irq_lock { | |
12562 | + spinlock_t lock; | |
12563 | + struct task_struct *owner; | |
12564 | + int nestcnt; | |
12565 | + unsigned long flags; | |
12566 | +}; | |
12567 | + | |
12568 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ | |
12569 | + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ | |
12570 | + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } | |
12571 | + | |
12572 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ | |
12573 | + DECLARE_PER_CPU(struct local_irq_lock, lvar) | |
12574 | + | |
12575 | +#define local_irq_lock_init(lvar) \ | |
12576 | + do { \ | |
12577 | + int __cpu; \ | |
12578 | + for_each_possible_cpu(__cpu) \ | |
12579 | + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ | |
12580 | + } while (0) | |
12581 | + | |
12582 | +static inline void __local_lock(struct local_irq_lock *lv) | |
12583 | +{ | |
12584 | + if (lv->owner != current) { | |
12585 | + spin_lock(&lv->lock); | |
12586 | + LL_WARN(lv->owner); | |
12587 | + LL_WARN(lv->nestcnt); | |
12588 | + lv->owner = current; | |
12589 | + } | |
12590 | + lv->nestcnt++; | |
12591 | +} | |
12592 | + | |
12593 | +#define local_lock(lvar) \ | |
12594 | + do { __local_lock(&get_local_var(lvar)); } while (0) | |
12595 | + | |
12596 | +#define local_lock_on(lvar, cpu) \ | |
12597 | + do { __local_lock(&per_cpu(lvar, cpu)); } while (0) | |
12598 | + | |
12599 | +static inline int __local_trylock(struct local_irq_lock *lv) | |
12600 | +{ | |
12601 | + if (lv->owner != current && spin_trylock(&lv->lock)) { | |
12602 | + LL_WARN(lv->owner); | |
12603 | + LL_WARN(lv->nestcnt); | |
12604 | + lv->owner = current; | |
12605 | + lv->nestcnt = 1; | |
12606 | + return 1; | |
12607 | + } else if (lv->owner == current) { | |
12608 | + lv->nestcnt++; | |
12609 | + return 1; | |
12610 | + } | |
12611 | + return 0; | |
12612 | +} | |
12613 | + | |
12614 | +#define local_trylock(lvar) \ | |
12615 | + ({ \ | |
12616 | + int __locked; \ | |
12617 | + __locked = __local_trylock(&get_local_var(lvar)); \ | |
12618 | + if (!__locked) \ | |
12619 | + put_local_var(lvar); \ | |
12620 | + __locked; \ | |
12621 | + }) | |
12622 | + | |
12623 | +static inline void __local_unlock(struct local_irq_lock *lv) | |
12624 | +{ | |
12625 | + LL_WARN(lv->nestcnt == 0); | |
12626 | + LL_WARN(lv->owner != current); | |
12627 | + if (--lv->nestcnt) | |
12628 | + return; | |
12629 | + | |
12630 | + lv->owner = NULL; | |
12631 | + spin_unlock(&lv->lock); | |
12632 | +} | |
12633 | + | |
12634 | +#define local_unlock(lvar) \ | |
12635 | + do { \ | |
12636 | + __local_unlock(this_cpu_ptr(&lvar)); \ | |
12637 | + put_local_var(lvar); \ | |
12638 | + } while (0) | |
12639 | + | |
12640 | +#define local_unlock_on(lvar, cpu) \ | |
12641 | + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) | |
12642 | + | |
12643 | +static inline void __local_lock_irq(struct local_irq_lock *lv) | |
12644 | +{ | |
12645 | + spin_lock_irqsave(&lv->lock, lv->flags); | |
12646 | + LL_WARN(lv->owner); | |
12647 | + LL_WARN(lv->nestcnt); | |
12648 | + lv->owner = current; | |
12649 | + lv->nestcnt = 1; | |
12650 | +} | |
12651 | + | |
12652 | +#define local_lock_irq(lvar) \ | |
12653 | + do { __local_lock_irq(&get_local_var(lvar)); } while (0) | |
12654 | + | |
12655 | +#define local_lock_irq_on(lvar, cpu) \ | |
12656 | + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) | |
12657 | + | |
12658 | +static inline void __local_unlock_irq(struct local_irq_lock *lv) | |
12659 | +{ | |
12660 | + LL_WARN(!lv->nestcnt); | |
12661 | + LL_WARN(lv->owner != current); | |
12662 | + lv->owner = NULL; | |
12663 | + lv->nestcnt = 0; | |
12664 | + spin_unlock_irq(&lv->lock); | |
12665 | +} | |
12666 | + | |
12667 | +#define local_unlock_irq(lvar) \ | |
12668 | + do { \ | |
12669 | + __local_unlock_irq(this_cpu_ptr(&lvar)); \ | |
12670 | + put_local_var(lvar); \ | |
12671 | + } while (0) | |
12672 | + | |
12673 | +#define local_unlock_irq_on(lvar, cpu) \ | |
12674 | + do { \ | |
12675 | + __local_unlock_irq(&per_cpu(lvar, cpu)); \ | |
12676 | + } while (0) | |
12677 | + | |
12678 | +static inline int __local_lock_irqsave(struct local_irq_lock *lv) | |
12679 | +{ | |
12680 | + if (lv->owner != current) { | |
12681 | + __local_lock_irq(lv); | |
12682 | + return 0; | |
12683 | + } else { | |
12684 | + lv->nestcnt++; | |
12685 | + return 1; | |
12686 | + } | |
12687 | +} | |
12688 | + | |
12689 | +#define local_lock_irqsave(lvar, _flags) \ | |
12690 | + do { \ | |
12691 | + if (__local_lock_irqsave(&get_local_var(lvar))) \ | |
12692 | + put_local_var(lvar); \ | |
12693 | + _flags = __this_cpu_read(lvar.flags); \ | |
12694 | + } while (0) | |
12695 | + | |
12696 | +#define local_lock_irqsave_on(lvar, _flags, cpu) \ | |
12697 | + do { \ | |
12698 | + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ | |
12699 | + _flags = per_cpu(lvar, cpu).flags; \ | |
12700 | + } while (0) | |
12701 | + | |
12702 | +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, | |
12703 | + unsigned long flags) | |
12704 | +{ | |
12705 | + LL_WARN(!lv->nestcnt); | |
12706 | + LL_WARN(lv->owner != current); | |
12707 | + if (--lv->nestcnt) | |
12708 | + return 0; | |
12709 | + | |
12710 | + lv->owner = NULL; | |
12711 | + spin_unlock_irqrestore(&lv->lock, lv->flags); | |
12712 | + return 1; | |
12713 | +} | |
12714 | + | |
12715 | +#define local_unlock_irqrestore(lvar, flags) \ | |
12716 | + do { \ | |
12717 | + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \ | |
12718 | + put_local_var(lvar); \ | |
12719 | + } while (0) | |
12720 | + | |
12721 | +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ | |
12722 | + do { \ | |
12723 | + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ | |
12724 | + } while (0) | |
12725 | + | |
12726 | +#define local_spin_trylock_irq(lvar, lock) \ | |
12727 | + ({ \ | |
12728 | + int __locked; \ | |
12729 | + local_lock_irq(lvar); \ | |
12730 | + __locked = spin_trylock(lock); \ | |
12731 | + if (!__locked) \ | |
12732 | + local_unlock_irq(lvar); \ | |
12733 | + __locked; \ | |
12734 | + }) | |
12735 | + | |
12736 | +#define local_spin_lock_irq(lvar, lock) \ | |
12737 | + do { \ | |
12738 | + local_lock_irq(lvar); \ | |
12739 | + spin_lock(lock); \ | |
12740 | + } while (0) | |
12741 | + | |
12742 | +#define local_spin_unlock_irq(lvar, lock) \ | |
12743 | + do { \ | |
12744 | + spin_unlock(lock); \ | |
12745 | + local_unlock_irq(lvar); \ | |
12746 | + } while (0) | |
12747 | + | |
12748 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
12749 | + do { \ | |
12750 | + local_lock_irqsave(lvar, flags); \ | |
12751 | + spin_lock(lock); \ | |
12752 | + } while (0) | |
12753 | + | |
12754 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
12755 | + do { \ | |
12756 | + spin_unlock(lock); \ | |
12757 | + local_unlock_irqrestore(lvar, flags); \ | |
12758 | + } while (0) | |
12759 | + | |
12760 | +#define get_locked_var(lvar, var) \ | |
12761 | + (*({ \ | |
12762 | + local_lock(lvar); \ | |
12763 | + this_cpu_ptr(&var); \ | |
12764 | + })) | |
12765 | + | |
12766 | +#define put_locked_var(lvar, var) local_unlock(lvar); | |
12767 | + | |
12768 | +#define local_lock_cpu(lvar) \ | |
12769 | + ({ \ | |
12770 | + local_lock(lvar); \ | |
12771 | + smp_processor_id(); \ | |
12772 | + }) | |
12773 | + | |
12774 | +#define local_unlock_cpu(lvar) local_unlock(lvar) | |
12775 | + | |
12776 | +#else /* PREEMPT_RT_BASE */ | |
12777 | + | |
12778 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar | |
12779 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar | |
12780 | + | |
12781 | +static inline void local_irq_lock_init(int lvar) { } | |
12782 | + | |
12783 | +#define local_trylock(lvar) \ | |
12784 | + ({ \ | |
12785 | + preempt_disable(); \ | |
12786 | + 1; \ | |
12787 | + }) | |
12788 | + | |
12789 | +#define local_lock(lvar) preempt_disable() | |
12790 | +#define local_unlock(lvar) preempt_enable() | |
12791 | +#define local_lock_irq(lvar) local_irq_disable() | |
12792 | +#define local_lock_irq_on(lvar, cpu) local_irq_disable() | |
12793 | +#define local_unlock_irq(lvar) local_irq_enable() | |
12794 | +#define local_unlock_irq_on(lvar, cpu) local_irq_enable() | |
12795 | +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) | |
12796 | +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) | |
12797 | + | |
12798 | +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) | |
12799 | +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) | |
12800 | +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) | |
12801 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
12802 | + spin_lock_irqsave(lock, flags) | |
12803 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
12804 | + spin_unlock_irqrestore(lock, flags) | |
12805 | + | |
12806 | +#define get_locked_var(lvar, var) get_cpu_var(var) | |
12807 | +#define put_locked_var(lvar, var) put_cpu_var(var) | |
12808 | + | |
12809 | +#define local_lock_cpu(lvar) get_cpu() | |
12810 | +#define local_unlock_cpu(lvar) put_cpu() | |
12811 | + | |
12812 | +#endif | |
12813 | + | |
12814 | +#endif | |
12815 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/mm_types.h linux-4.14/include/linux/mm_types.h | |
12816 | --- linux-4.14.orig/include/linux/mm_types.h 2018-09-05 11:03:28.000000000 +0200 | |
12817 | +++ linux-4.14/include/linux/mm_types.h 2018-09-05 11:05:07.000000000 +0200 | |
12818 | @@ -12,6 +12,7 @@ | |
12819 | #include <linux/completion.h> | |
12820 | #include <linux/cpumask.h> | |
12821 | #include <linux/uprobes.h> | |
12822 | +#include <linux/rcupdate.h> | |
12823 | #include <linux/page-flags-layout.h> | |
12824 | #include <linux/workqueue.h> | |
12825 | ||
12826 | @@ -498,6 +499,9 @@ | |
12827 | bool tlb_flush_batched; | |
12828 | #endif | |
12829 | struct uprobes_state uprobes_state; | |
12830 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12831 | + struct rcu_head delayed_drop; | |
12832 | +#endif | |
12833 | #ifdef CONFIG_HUGETLB_PAGE | |
12834 | atomic_long_t hugetlb_usage; | |
12835 | #endif | |
12836 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/mutex.h linux-4.14/include/linux/mutex.h | |
12837 | --- linux-4.14.orig/include/linux/mutex.h 2017-11-12 19:46:13.000000000 +0100 | |
12838 | +++ linux-4.14/include/linux/mutex.h 2018-09-05 11:05:07.000000000 +0200 | |
12839 | @@ -23,6 +23,17 @@ | |
1a6e0f06 | 12840 | |
e4b2b4a8 | 12841 | struct ww_acquire_ctx; |
1a6e0f06 | 12842 | |
e4b2b4a8 JK |
12843 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
12844 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
12845 | + , .dep_map = { .name = #lockname } | |
12846 | +#else | |
12847 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
12848 | +#endif | |
12849 | + | |
12850 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12851 | +# include <linux/mutex_rt.h> | |
12852 | +#else | |
12853 | + | |
12854 | /* | |
12855 | * Simple, straightforward mutexes with strict semantics: | |
12856 | * | |
12857 | @@ -114,13 +125,6 @@ | |
12858 | __mutex_init((mutex), #mutex, &__key); \ | |
12859 | } while (0) | |
1a6e0f06 | 12860 | |
e4b2b4a8 JK |
12861 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC |
12862 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
12863 | - , .dep_map = { .name = #lockname } | |
12864 | -#else | |
12865 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
12866 | -#endif | |
12867 | - | |
12868 | #define __MUTEX_INITIALIZER(lockname) \ | |
12869 | { .owner = ATOMIC_LONG_INIT(0) \ | |
12870 | , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ | |
12871 | @@ -228,4 +232,6 @@ | |
12872 | return mutex_trylock(lock); | |
12873 | } | |
1a6e0f06 | 12874 | |
e4b2b4a8 JK |
12875 | +#endif /* !PREEMPT_RT_FULL */ |
12876 | + | |
12877 | #endif /* __LINUX_MUTEX_H */ | |
12878 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/mutex_rt.h linux-4.14/include/linux/mutex_rt.h | |
12879 | --- linux-4.14.orig/include/linux/mutex_rt.h 1970-01-01 01:00:00.000000000 +0100 | |
12880 | +++ linux-4.14/include/linux/mutex_rt.h 2018-09-05 11:05:07.000000000 +0200 | |
12881 | @@ -0,0 +1,130 @@ | |
12882 | +#ifndef __LINUX_MUTEX_RT_H | |
12883 | +#define __LINUX_MUTEX_RT_H | |
12884 | + | |
12885 | +#ifndef __LINUX_MUTEX_H | |
12886 | +#error "Please include mutex.h" | |
12887 | +#endif | |
12888 | + | |
12889 | +#include <linux/rtmutex.h> | |
12890 | + | |
12891 | +/* FIXME: Just for __lockfunc */ | |
12892 | +#include <linux/spinlock.h> | |
12893 | + | |
12894 | +struct mutex { | |
12895 | + struct rt_mutex lock; | |
12896 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12897 | + struct lockdep_map dep_map; | |
12898 | +#endif | |
12899 | +}; | |
12900 | + | |
12901 | +#define __MUTEX_INITIALIZER(mutexname) \ | |
12902 | + { \ | |
12903 | + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ | |
12904 | + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ | |
12905 | + } | |
12906 | + | |
12907 | +#define DEFINE_MUTEX(mutexname) \ | |
12908 | + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) | |
12909 | + | |
12910 | +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); | |
12911 | +extern void __lockfunc _mutex_lock(struct mutex *lock); | |
12912 | +extern void __lockfunc _mutex_lock_io(struct mutex *lock); | |
12913 | +extern void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass); | |
12914 | +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); | |
12915 | +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); | |
12916 | +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); | |
12917 | +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); | |
12918 | +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); | |
12919 | +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); | |
12920 | +extern int __lockfunc _mutex_trylock(struct mutex *lock); | |
12921 | +extern void __lockfunc _mutex_unlock(struct mutex *lock); | |
12922 | + | |
12923 | +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) | |
12924 | +#define mutex_lock(l) _mutex_lock(l) | |
12925 | +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) | |
12926 | +#define mutex_lock_killable(l) _mutex_lock_killable(l) | |
12927 | +#define mutex_trylock(l) _mutex_trylock(l) | |
12928 | +#define mutex_unlock(l) _mutex_unlock(l) | |
12929 | +#define mutex_lock_io(l) _mutex_lock_io(l); | |
12930 | + | |
12931 | +#define __mutex_owner(l) ((l)->lock.owner) | |
12932 | + | |
12933 | +#ifdef CONFIG_DEBUG_MUTEXES | |
12934 | +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) | |
12935 | +#else | |
12936 | +static inline void mutex_destroy(struct mutex *lock) {} | |
12937 | +#endif | |
12938 | + | |
12939 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12940 | +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) | |
12941 | +# define mutex_lock_interruptible_nested(l, s) \ | |
12942 | + _mutex_lock_interruptible_nested(l, s) | |
12943 | +# define mutex_lock_killable_nested(l, s) \ | |
12944 | + _mutex_lock_killable_nested(l, s) | |
12945 | +# define mutex_lock_io_nested(l, s) _mutex_lock_io_nested(l, s) | |
12946 | + | |
12947 | +# define mutex_lock_nest_lock(lock, nest_lock) \ | |
12948 | +do { \ | |
12949 | + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ | |
12950 | + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ | |
12951 | +} while (0) | |
12952 | + | |
12953 | +#else | |
12954 | +# define mutex_lock_nested(l, s) _mutex_lock(l) | |
12955 | +# define mutex_lock_interruptible_nested(l, s) \ | |
12956 | + _mutex_lock_interruptible(l) | |
12957 | +# define mutex_lock_killable_nested(l, s) \ | |
12958 | + _mutex_lock_killable(l) | |
12959 | +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) | |
12960 | +# define mutex_lock_io_nested(l, s) _mutex_lock_io(l) | |
12961 | +#endif | |
12962 | + | |
12963 | +# define mutex_init(mutex) \ | |
12964 | +do { \ | |
12965 | + static struct lock_class_key __key; \ | |
12966 | + \ | |
12967 | + rt_mutex_init(&(mutex)->lock); \ | |
12968 | + __mutex_do_init((mutex), #mutex, &__key); \ | |
12969 | +} while (0) | |
12970 | + | |
12971 | +# define __mutex_init(mutex, name, key) \ | |
12972 | +do { \ | |
12973 | + rt_mutex_init(&(mutex)->lock); \ | |
12974 | + __mutex_do_init((mutex), name, key); \ | |
12975 | +} while (0) | |
12976 | + | |
12977 | +/** | |
12978 | + * These values are chosen such that FAIL and SUCCESS match the | |
12979 | + * values of the regular mutex_trylock(). | |
12980 | + */ | |
12981 | +enum mutex_trylock_recursive_enum { | |
12982 | + MUTEX_TRYLOCK_FAILED = 0, | |
12983 | + MUTEX_TRYLOCK_SUCCESS = 1, | |
12984 | + MUTEX_TRYLOCK_RECURSIVE, | |
12985 | +}; | |
12986 | +/** | |
12987 | + * mutex_trylock_recursive - trylock variant that allows recursive locking | |
12988 | + * @lock: mutex to be locked | |
12989 | + * | |
12990 | + * This function should not be used, _ever_. It is purely for hysterical GEM | |
12991 | + * raisins, and once those are gone this will be removed. | |
12992 | + * | |
12993 | + * Returns: | |
12994 | + * MUTEX_TRYLOCK_FAILED - trylock failed, | |
12995 | + * MUTEX_TRYLOCK_SUCCESS - lock acquired, | |
12996 | + * MUTEX_TRYLOCK_RECURSIVE - we already owned the lock. | |
12997 | + */ | |
12998 | +int __rt_mutex_owner_current(struct rt_mutex *lock); | |
12999 | + | |
13000 | +static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum | |
13001 | +mutex_trylock_recursive(struct mutex *lock) | |
13002 | +{ | |
13003 | + if (unlikely(__rt_mutex_owner_current(&lock->lock))) | |
13004 | + return MUTEX_TRYLOCK_RECURSIVE; | |
13005 | + | |
13006 | + return mutex_trylock(lock); | |
13007 | +} | |
13008 | + | |
13009 | +extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | |
13010 | + | |
13011 | +#endif | |
13012 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/netdevice.h linux-4.14/include/linux/netdevice.h | |
13013 | --- linux-4.14.orig/include/linux/netdevice.h 2018-09-05 11:03:22.000000000 +0200 | |
13014 | +++ linux-4.14/include/linux/netdevice.h 2018-09-05 11:05:07.000000000 +0200 | |
13015 | @@ -409,7 +409,19 @@ | |
13016 | typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); | |
1a6e0f06 | 13017 | |
e4b2b4a8 JK |
13018 | void __napi_schedule(struct napi_struct *n); |
13019 | + | |
13020 | +/* | |
13021 | + * When PREEMPT_RT_FULL is defined, all device interrupt handlers | |
13022 | + * run as threads, and they can also be preempted (without PREEMPT_RT | |
13023 | + * interrupt threads can not be preempted). Which means that calling | |
13024 | + * __napi_schedule_irqoff() from an interrupt handler can be preempted | |
13025 | + * and can corrupt the napi->poll_list. | |
13026 | + */ | |
13027 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13028 | +#define __napi_schedule_irqoff(n) __napi_schedule(n) | |
13029 | +#else | |
13030 | void __napi_schedule_irqoff(struct napi_struct *n); | |
13031 | +#endif | |
1a6e0f06 | 13032 | |
e4b2b4a8 JK |
13033 | static inline bool napi_disable_pending(struct napi_struct *n) |
13034 | { | |
13035 | @@ -571,7 +583,11 @@ | |
13036 | * write-mostly part | |
13037 | */ | |
13038 | spinlock_t _xmit_lock ____cacheline_aligned_in_smp; | |
13039 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13040 | + struct task_struct *xmit_lock_owner; | |
13041 | +#else | |
13042 | int xmit_lock_owner; | |
13043 | +#endif | |
1a6e0f06 | 13044 | /* |
e4b2b4a8 | 13045 | * Time (in jiffies) of last Tx |
1a6e0f06 | 13046 | */ |
e4b2b4a8 JK |
13047 | @@ -2433,14 +2449,53 @@ |
13048 | void synchronize_net(void); | |
13049 | int init_dummy_netdev(struct net_device *dev); | |
1a6e0f06 | 13050 | |
e4b2b4a8 JK |
13051 | -DECLARE_PER_CPU(int, xmit_recursion); |
13052 | #define XMIT_RECURSION_LIMIT 10 | |
13053 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13054 | +static inline int dev_recursion_level(void) | |
13055 | +{ | |
13056 | + return current->xmit_recursion; | |
13057 | +} | |
13058 | + | |
13059 | +static inline int xmit_rec_read(void) | |
13060 | +{ | |
13061 | + return current->xmit_recursion; | |
13062 | +} | |
13063 | + | |
13064 | +static inline void xmit_rec_inc(void) | |
13065 | +{ | |
13066 | + current->xmit_recursion++; | |
13067 | +} | |
13068 | + | |
13069 | +static inline void xmit_rec_dec(void) | |
13070 | +{ | |
13071 | + current->xmit_recursion--; | |
13072 | +} | |
13073 | + | |
13074 | +#else | |
13075 | + | |
13076 | +DECLARE_PER_CPU(int, xmit_recursion); | |
1a6e0f06 | 13077 | |
e4b2b4a8 JK |
13078 | static inline int dev_recursion_level(void) |
13079 | { | |
13080 | return this_cpu_read(xmit_recursion); | |
13081 | } | |
1a6e0f06 | 13082 | |
e4b2b4a8 JK |
13083 | +static inline int xmit_rec_read(void) |
13084 | +{ | |
13085 | + return __this_cpu_read(xmit_recursion); | |
13086 | +} | |
13087 | + | |
13088 | +static inline void xmit_rec_inc(void) | |
13089 | +{ | |
13090 | + __this_cpu_inc(xmit_recursion); | |
13091 | +} | |
13092 | + | |
13093 | +static inline void xmit_rec_dec(void) | |
13094 | +{ | |
13095 | + __this_cpu_dec(xmit_recursion); | |
13096 | +} | |
13097 | +#endif | |
13098 | + | |
13099 | struct net_device *dev_get_by_index(struct net *net, int ifindex); | |
13100 | struct net_device *__dev_get_by_index(struct net *net, int ifindex); | |
13101 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); | |
13102 | @@ -2792,6 +2847,7 @@ | |
13103 | unsigned int dropped; | |
13104 | struct sk_buff_head input_pkt_queue; | |
13105 | struct napi_struct backlog; | |
13106 | + struct sk_buff_head tofree_queue; | |
1a6e0f06 | 13107 | |
e4b2b4a8 | 13108 | }; |
1a6e0f06 | 13109 | |
e4b2b4a8 JK |
13110 | @@ -3515,10 +3571,48 @@ |
13111 | return (1 << debug_value) - 1; | |
1a6e0f06 JK |
13112 | } |
13113 | ||
e4b2b4a8 JK |
13114 | +#ifdef CONFIG_PREEMPT_RT_FULL |
13115 | +static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) | |
13116 | +{ | |
13117 | + txq->xmit_lock_owner = current; | |
13118 | +} | |
13119 | + | |
13120 | +static inline void netdev_queue_clear_owner(struct netdev_queue *txq) | |
13121 | +{ | |
13122 | + txq->xmit_lock_owner = NULL; | |
13123 | +} | |
13124 | + | |
13125 | +static inline bool netdev_queue_has_owner(struct netdev_queue *txq) | |
13126 | +{ | |
13127 | + if (txq->xmit_lock_owner != NULL) | |
13128 | + return true; | |
13129 | + return false; | |
13130 | +} | |
13131 | + | |
13132 | +#else | |
13133 | + | |
13134 | +static inline void netdev_queue_set_owner(struct netdev_queue *txq, int cpu) | |
13135 | +{ | |
13136 | + txq->xmit_lock_owner = cpu; | |
13137 | +} | |
13138 | + | |
13139 | +static inline void netdev_queue_clear_owner(struct netdev_queue *txq) | |
13140 | +{ | |
13141 | + txq->xmit_lock_owner = -1; | |
13142 | +} | |
13143 | + | |
13144 | +static inline bool netdev_queue_has_owner(struct netdev_queue *txq) | |
13145 | +{ | |
13146 | + if (txq->xmit_lock_owner != -1) | |
13147 | + return true; | |
13148 | + return false; | |
13149 | +} | |
13150 | +#endif | |
13151 | + | |
13152 | static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) | |
13153 | { | |
13154 | spin_lock(&txq->_xmit_lock); | |
13155 | - txq->xmit_lock_owner = cpu; | |
13156 | + netdev_queue_set_owner(txq, cpu); | |
13157 | } | |
1a6e0f06 | 13158 | |
e4b2b4a8 JK |
13159 | static inline bool __netif_tx_acquire(struct netdev_queue *txq) |
13160 | @@ -3535,32 +3629,32 @@ | |
13161 | static inline void __netif_tx_lock_bh(struct netdev_queue *txq) | |
13162 | { | |
13163 | spin_lock_bh(&txq->_xmit_lock); | |
13164 | - txq->xmit_lock_owner = smp_processor_id(); | |
13165 | + netdev_queue_set_owner(txq, smp_processor_id()); | |
13166 | } | |
1a6e0f06 | 13167 | |
e4b2b4a8 JK |
13168 | static inline bool __netif_tx_trylock(struct netdev_queue *txq) |
13169 | { | |
13170 | bool ok = spin_trylock(&txq->_xmit_lock); | |
13171 | if (likely(ok)) | |
13172 | - txq->xmit_lock_owner = smp_processor_id(); | |
13173 | + netdev_queue_set_owner(txq, smp_processor_id()); | |
13174 | return ok; | |
13175 | } | |
1a6e0f06 | 13176 | |
e4b2b4a8 JK |
13177 | static inline void __netif_tx_unlock(struct netdev_queue *txq) |
13178 | { | |
13179 | - txq->xmit_lock_owner = -1; | |
13180 | + netdev_queue_clear_owner(txq); | |
13181 | spin_unlock(&txq->_xmit_lock); | |
13182 | } | |
1a6e0f06 | 13183 | |
e4b2b4a8 JK |
13184 | static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) |
13185 | { | |
13186 | - txq->xmit_lock_owner = -1; | |
13187 | + netdev_queue_clear_owner(txq); | |
13188 | spin_unlock_bh(&txq->_xmit_lock); | |
13189 | } | |
1a6e0f06 | 13190 | |
e4b2b4a8 JK |
13191 | static inline void txq_trans_update(struct netdev_queue *txq) |
13192 | { | |
13193 | - if (txq->xmit_lock_owner != -1) | |
13194 | + if (netdev_queue_has_owner(txq)) | |
13195 | txq->trans_start = jiffies; | |
13196 | } | |
1a6e0f06 | 13197 | |
e4b2b4a8 JK |
13198 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/netfilter/x_tables.h linux-4.14/include/linux/netfilter/x_tables.h |
13199 | --- linux-4.14.orig/include/linux/netfilter/x_tables.h 2018-09-05 11:03:22.000000000 +0200 | |
13200 | +++ linux-4.14/include/linux/netfilter/x_tables.h 2018-09-05 11:05:07.000000000 +0200 | |
13201 | @@ -6,6 +6,7 @@ | |
13202 | #include <linux/netdevice.h> | |
13203 | #include <linux/static_key.h> | |
13204 | #include <linux/netfilter.h> | |
13205 | +#include <linux/locallock.h> | |
13206 | #include <uapi/linux/netfilter/x_tables.h> | |
1a6e0f06 | 13207 | |
e4b2b4a8 JK |
13208 | /* Test a struct->invflags and a boolean for inequality */ |
13209 | @@ -341,6 +342,8 @@ | |
13210 | */ | |
13211 | DECLARE_PER_CPU(seqcount_t, xt_recseq); | |
1a6e0f06 | 13212 | |
e4b2b4a8 JK |
13213 | +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); |
13214 | + | |
13215 | /* xt_tee_enabled - true if x_tables needs to handle reentrancy | |
13216 | * | |
13217 | * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. | |
13218 | @@ -361,6 +364,9 @@ | |
13219 | { | |
13220 | unsigned int addend; | |
1a6e0f06 | 13221 | |
e4b2b4a8 JK |
13222 | + /* RT protection */ |
13223 | + local_lock(xt_write_lock); | |
13224 | + | |
13225 | /* | |
13226 | * Low order bit of sequence is set if we already | |
13227 | * called xt_write_recseq_begin(). | |
13228 | @@ -391,6 +397,7 @@ | |
13229 | /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ | |
13230 | smp_wmb(); | |
13231 | __this_cpu_add(xt_recseq.sequence, addend); | |
13232 | + local_unlock(xt_write_lock); | |
13233 | } | |
1a6e0f06 | 13234 | |
e4b2b4a8 JK |
13235 | /* |
13236 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/nfs_fs.h linux-4.14/include/linux/nfs_fs.h | |
13237 | --- linux-4.14.orig/include/linux/nfs_fs.h 2017-11-12 19:46:13.000000000 +0100 | |
13238 | +++ linux-4.14/include/linux/nfs_fs.h 2018-09-05 11:05:07.000000000 +0200 | |
13239 | @@ -162,7 +162,11 @@ | |
1a6e0f06 | 13240 | |
e4b2b4a8 JK |
13241 | /* Readers: in-flight sillydelete RPC calls */ |
13242 | /* Writers: rmdir */ | |
13243 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13244 | + struct semaphore rmdir_sem; | |
13245 | +#else | |
13246 | struct rw_semaphore rmdir_sem; | |
13247 | +#endif | |
13248 | struct mutex commit_mutex; | |
1a6e0f06 | 13249 | |
e4b2b4a8 JK |
13250 | #if IS_ENABLED(CONFIG_NFS_V4) |
13251 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/nfs_xdr.h linux-4.14/include/linux/nfs_xdr.h | |
13252 | --- linux-4.14.orig/include/linux/nfs_xdr.h 2017-11-12 19:46:13.000000000 +0100 | |
13253 | +++ linux-4.14/include/linux/nfs_xdr.h 2018-09-05 11:05:07.000000000 +0200 | |
13254 | @@ -1530,7 +1530,7 @@ | |
13255 | struct nfs_removeargs args; | |
13256 | struct nfs_removeres res; | |
13257 | struct dentry *dentry; | |
13258 | - wait_queue_head_t wq; | |
13259 | + struct swait_queue_head wq; | |
13260 | struct rpc_cred *cred; | |
13261 | struct nfs_fattr dir_attr; | |
13262 | long timeout; | |
13263 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/notifier.h linux-4.14/include/linux/notifier.h | |
13264 | --- linux-4.14.orig/include/linux/notifier.h 2017-11-12 19:46:13.000000000 +0100 | |
13265 | +++ linux-4.14/include/linux/notifier.h 2018-09-05 11:05:07.000000000 +0200 | |
13266 | @@ -7,7 +7,7 @@ | |
13267 | * | |
13268 | * Alan Cox <Alan.Cox@linux.org> | |
13269 | */ | |
13270 | - | |
13271 | + | |
13272 | #ifndef _LINUX_NOTIFIER_H | |
13273 | #define _LINUX_NOTIFIER_H | |
13274 | #include <linux/errno.h> | |
13275 | @@ -43,9 +43,7 @@ | |
13276 | * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. | |
13277 | * As compensation, srcu_notifier_chain_unregister() is rather expensive. | |
13278 | * SRCU notifier chains should be used when the chain will be called very | |
13279 | - * often but notifier_blocks will seldom be removed. Also, SRCU notifier | |
13280 | - * chains are slightly more difficult to use because they require special | |
13281 | - * runtime initialization. | |
13282 | + * often but notifier_blocks will seldom be removed. | |
13283 | */ | |
1a6e0f06 | 13284 | |
e4b2b4a8 JK |
13285 | struct notifier_block; |
13286 | @@ -91,7 +89,7 @@ | |
13287 | (name)->head = NULL; \ | |
13288 | } while (0) | |
1a6e0f06 | 13289 | |
e4b2b4a8 JK |
13290 | -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ |
13291 | +/* srcu_notifier_heads must be cleaned up dynamically */ | |
13292 | extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
13293 | #define srcu_cleanup_notifier_head(name) \ | |
13294 | cleanup_srcu_struct(&(name)->srcu); | |
13295 | @@ -104,7 +102,13 @@ | |
13296 | .head = NULL } | |
13297 | #define RAW_NOTIFIER_INIT(name) { \ | |
13298 | .head = NULL } | |
13299 | -/* srcu_notifier_heads cannot be initialized statically */ | |
13300 | + | |
13301 | +#define SRCU_NOTIFIER_INIT(name, pcpu) \ | |
13302 | + { \ | |
13303 | + .mutex = __MUTEX_INITIALIZER(name.mutex), \ | |
13304 | + .head = NULL, \ | |
13305 | + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ | |
13306 | + } | |
1a6e0f06 | 13307 | |
e4b2b4a8 JK |
13308 | #define ATOMIC_NOTIFIER_HEAD(name) \ |
13309 | struct atomic_notifier_head name = \ | |
13310 | @@ -116,6 +120,26 @@ | |
13311 | struct raw_notifier_head name = \ | |
13312 | RAW_NOTIFIER_INIT(name) | |
1a6e0f06 | 13313 | |
e4b2b4a8 JK |
13314 | +#ifdef CONFIG_TREE_SRCU |
13315 | +#define _SRCU_NOTIFIER_HEAD(name, mod) \ | |
13316 | + static DEFINE_PER_CPU(struct srcu_data, \ | |
13317 | + name##_head_srcu_data); \ | |
13318 | + mod struct srcu_notifier_head name = \ | |
13319 | + SRCU_NOTIFIER_INIT(name, name##_head_srcu_data) | |
13320 | + | |
13321 | +#else | |
13322 | +#define _SRCU_NOTIFIER_HEAD(name, mod) \ | |
13323 | + mod struct srcu_notifier_head name = \ | |
13324 | + SRCU_NOTIFIER_INIT(name, name) | |
13325 | + | |
13326 | +#endif | |
13327 | + | |
13328 | +#define SRCU_NOTIFIER_HEAD(name) \ | |
13329 | + _SRCU_NOTIFIER_HEAD(name, ) | |
13330 | + | |
13331 | +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ | |
13332 | + _SRCU_NOTIFIER_HEAD(name, static) | |
13333 | + | |
13334 | #ifdef __KERNEL__ | |
1a6e0f06 | 13335 | |
e4b2b4a8 JK |
13336 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, |
13337 | @@ -185,12 +209,12 @@ | |
c7c16703 | 13338 | |
e4b2b4a8 JK |
13339 | /* |
13340 | * Declared notifiers so far. I can imagine quite a few more chains | |
13341 | - * over time (eg laptop power reset chains, reboot chain (to clean | |
13342 | + * over time (eg laptop power reset chains, reboot chain (to clean | |
13343 | * device units up), device [un]mount chain, module load/unload chain, | |
13344 | - * low memory chain, screenblank chain (for plug in modular screenblankers) | |
13345 | + * low memory chain, screenblank chain (for plug in modular screenblankers) | |
13346 | * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... | |
13347 | */ | |
13348 | - | |
13349 | + | |
13350 | /* CPU notfiers are defined in include/linux/cpu.h. */ | |
c7c16703 | 13351 | |
e4b2b4a8 JK |
13352 | /* netdevice notifiers are defined in include/linux/netdevice.h */ |
13353 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/percpu.h linux-4.14/include/linux/percpu.h | |
13354 | --- linux-4.14.orig/include/linux/percpu.h 2017-11-12 19:46:13.000000000 +0100 | |
13355 | +++ linux-4.14/include/linux/percpu.h 2018-09-05 11:05:07.000000000 +0200 | |
13356 | @@ -19,6 +19,35 @@ | |
13357 | #define PERCPU_MODULE_RESERVE 0 | |
13358 | #endif | |
c7c16703 | 13359 | |
e4b2b4a8 JK |
13360 | +#ifdef CONFIG_PREEMPT_RT_FULL |
13361 | + | |
13362 | +#define get_local_var(var) (*({ \ | |
13363 | + migrate_disable(); \ | |
13364 | + this_cpu_ptr(&var); })) | |
1a6e0f06 | 13365 | + |
e4b2b4a8 JK |
13366 | +#define put_local_var(var) do { \ |
13367 | + (void)&(var); \ | |
13368 | + migrate_enable(); \ | |
13369 | +} while (0) | |
1a6e0f06 | 13370 | + |
e4b2b4a8 JK |
13371 | +# define get_local_ptr(var) ({ \ |
13372 | + migrate_disable(); \ | |
13373 | + this_cpu_ptr(var); }) | |
13374 | + | |
13375 | +# define put_local_ptr(var) do { \ | |
13376 | + (void)(var); \ | |
13377 | + migrate_enable(); \ | |
13378 | +} while (0) | |
13379 | + | |
13380 | +#else | |
13381 | + | |
13382 | +#define get_local_var(var) get_cpu_var(var) | |
13383 | +#define put_local_var(var) put_cpu_var(var) | |
13384 | +#define get_local_ptr(var) get_cpu_ptr(var) | |
13385 | +#define put_local_ptr(var) put_cpu_ptr(var) | |
13386 | + | |
13387 | +#endif | |
13388 | + | |
13389 | /* minimum unit size, also is the maximum supported allocation size */ | |
13390 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) | |
1a6e0f06 | 13391 | |
e4b2b4a8 JK |
13392 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/percpu-rwsem.h linux-4.14/include/linux/percpu-rwsem.h |
13393 | --- linux-4.14.orig/include/linux/percpu-rwsem.h 2018-09-05 11:03:22.000000000 +0200 | |
13394 | +++ linux-4.14/include/linux/percpu-rwsem.h 2018-09-05 11:05:07.000000000 +0200 | |
13395 | @@ -29,7 +29,7 @@ | |
13396 | extern int __percpu_down_read(struct percpu_rw_semaphore *, int); | |
13397 | extern void __percpu_up_read(struct percpu_rw_semaphore *); | |
1a6e0f06 | 13398 | |
e4b2b4a8 JK |
13399 | -static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) |
13400 | +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) | |
13401 | { | |
13402 | might_sleep(); | |
1a6e0f06 | 13403 | |
e4b2b4a8 JK |
13404 | @@ -47,16 +47,10 @@ |
13405 | __this_cpu_inc(*sem->read_count); | |
13406 | if (unlikely(!rcu_sync_is_idle(&sem->rss))) | |
13407 | __percpu_down_read(sem, false); /* Unconditional memory barrier */ | |
13408 | - barrier(); | |
13409 | /* | |
13410 | - * The barrier() prevents the compiler from | |
13411 | + * The preempt_enable() prevents the compiler from | |
13412 | * bleeding the critical section out. | |
13413 | */ | |
13414 | -} | |
1a6e0f06 | 13415 | - |
e4b2b4a8 JK |
13416 | -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) |
13417 | -{ | |
13418 | - percpu_down_read_preempt_disable(sem); | |
13419 | preempt_enable(); | |
13420 | } | |
1a6e0f06 | 13421 | |
e4b2b4a8 JK |
13422 | @@ -83,13 +77,9 @@ |
13423 | return ret; | |
13424 | } | |
1a6e0f06 | 13425 | |
e4b2b4a8 JK |
13426 | -static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) |
13427 | +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) | |
1a6e0f06 | 13428 | { |
e4b2b4a8 JK |
13429 | - /* |
13430 | - * The barrier() prevents the compiler from | |
13431 | - * bleeding the critical section out. | |
13432 | - */ | |
13433 | - barrier(); | |
13434 | + preempt_disable(); | |
13435 | /* | |
13436 | * Same as in percpu_down_read(). | |
13437 | */ | |
13438 | @@ -102,12 +92,6 @@ | |
13439 | rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); | |
1a6e0f06 | 13440 | } |
1f39f580 | 13441 | |
e4b2b4a8 JK |
13442 | -static inline void percpu_up_read(struct percpu_rw_semaphore *sem) |
13443 | -{ | |
13444 | - preempt_disable(); | |
13445 | - percpu_up_read_preempt_enable(sem); | |
13446 | -} | |
13447 | - | |
13448 | extern void percpu_down_write(struct percpu_rw_semaphore *); | |
13449 | extern void percpu_up_write(struct percpu_rw_semaphore *); | |
1f39f580 | 13450 | |
e4b2b4a8 JK |
13451 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/pid.h linux-4.14/include/linux/pid.h |
13452 | --- linux-4.14.orig/include/linux/pid.h 2017-11-12 19:46:13.000000000 +0100 | |
13453 | +++ linux-4.14/include/linux/pid.h 2018-09-05 11:05:07.000000000 +0200 | |
13454 | @@ -3,6 +3,7 @@ | |
13455 | #define _LINUX_PID_H | |
1f39f580 | 13456 | |
e4b2b4a8 JK |
13457 | #include <linux/rculist.h> |
13458 | +#include <linux/atomic.h> | |
1f39f580 | 13459 | |
e4b2b4a8 JK |
13460 | enum pid_type |
13461 | { | |
13462 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/posix-timers.h linux-4.14/include/linux/posix-timers.h | |
13463 | --- linux-4.14.orig/include/linux/posix-timers.h 2017-11-12 19:46:13.000000000 +0100 | |
13464 | +++ linux-4.14/include/linux/posix-timers.h 2018-09-05 11:05:07.000000000 +0200 | |
13465 | @@ -101,8 +101,8 @@ | |
13466 | struct { | |
13467 | struct alarm alarmtimer; | |
13468 | } alarm; | |
13469 | - struct rcu_head rcu; | |
13470 | } it; | |
13471 | + struct rcu_head rcu; | |
13472 | }; | |
1f39f580 | 13473 | |
e4b2b4a8 JK |
13474 | void run_posix_cpu_timers(struct task_struct *task); |
13475 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/preempt.h linux-4.14/include/linux/preempt.h | |
13476 | --- linux-4.14.orig/include/linux/preempt.h 2017-11-12 19:46:13.000000000 +0100 | |
13477 | +++ linux-4.14/include/linux/preempt.h 2018-09-05 11:05:07.000000000 +0200 | |
13478 | @@ -51,7 +51,11 @@ | |
13479 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | |
13480 | #define NMI_OFFSET (1UL << NMI_SHIFT) | |
1f39f580 | 13481 | |
e4b2b4a8 JK |
13482 | -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) |
13483 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13484 | +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
13485 | +#else | |
13486 | +# define SOFTIRQ_DISABLE_OFFSET (0) | |
13487 | +#endif | |
1f39f580 | 13488 | |
e4b2b4a8 JK |
13489 | /* We use the MSB mostly because its available */ |
13490 | #define PREEMPT_NEED_RESCHED 0x80000000 | |
13491 | @@ -81,9 +85,15 @@ | |
13492 | #include <asm/preempt.h> | |
1f39f580 | 13493 | |
e4b2b4a8 JK |
13494 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) |
13495 | -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
13496 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | |
13497 | | NMI_MASK)) | |
13498 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13499 | +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
13500 | +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
13501 | +#else | |
13502 | +# define softirq_count() (0UL) | |
13503 | +extern int in_serving_softirq(void); | |
13504 | +#endif | |
1f39f580 | 13505 | |
e4b2b4a8 JK |
13506 | /* |
13507 | * Are we doing bottom half or hardware interrupt processing? | |
13508 | @@ -101,7 +111,6 @@ | |
13509 | #define in_irq() (hardirq_count()) | |
13510 | #define in_softirq() (softirq_count()) | |
13511 | #define in_interrupt() (irq_count()) | |
13512 | -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
13513 | #define in_nmi() (preempt_count() & NMI_MASK) | |
13514 | #define in_task() (!(preempt_count() & \ | |
13515 | (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) | |
13516 | @@ -118,7 +127,11 @@ | |
13517 | /* | |
13518 | * The preempt_count offset after spin_lock() | |
13519 | */ | |
13520 | +#if !defined(CONFIG_PREEMPT_RT_FULL) | |
13521 | #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET | |
13522 | +#else | |
13523 | +#define PREEMPT_LOCK_OFFSET 0 | |
13524 | +#endif | |
1f39f580 | 13525 | |
e4b2b4a8 JK |
13526 | /* |
13527 | * The preempt_count offset needed for things like: | |
13528 | @@ -167,6 +180,20 @@ | |
13529 | #define preempt_count_inc() preempt_count_add(1) | |
13530 | #define preempt_count_dec() preempt_count_sub(1) | |
1f39f580 | 13531 | |
e4b2b4a8 JK |
13532 | +#ifdef CONFIG_PREEMPT_LAZY |
13533 | +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) | |
13534 | +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) | |
13535 | +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) | |
13536 | +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) | |
13537 | +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) | |
13538 | +#else | |
13539 | +#define add_preempt_lazy_count(val) do { } while (0) | |
13540 | +#define sub_preempt_lazy_count(val) do { } while (0) | |
13541 | +#define inc_preempt_lazy_count() do { } while (0) | |
13542 | +#define dec_preempt_lazy_count() do { } while (0) | |
13543 | +#define preempt_lazy_count() (0) | |
13544 | +#endif | |
13545 | + | |
13546 | #ifdef CONFIG_PREEMPT_COUNT | |
1f39f580 | 13547 | |
e4b2b4a8 JK |
13548 | #define preempt_disable() \ |
13549 | @@ -175,16 +202,53 @@ | |
13550 | barrier(); \ | |
13551 | } while (0) | |
1f39f580 | 13552 | |
e4b2b4a8 JK |
13553 | +#define preempt_lazy_disable() \ |
13554 | +do { \ | |
13555 | + inc_preempt_lazy_count(); \ | |
13556 | + barrier(); \ | |
13557 | +} while (0) | |
13558 | + | |
13559 | #define sched_preempt_enable_no_resched() \ | |
13560 | do { \ | |
13561 | barrier(); \ | |
13562 | preempt_count_dec(); \ | |
13563 | } while (0) | |
1f39f580 | 13564 | |
e4b2b4a8 JK |
13565 | -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() |
13566 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13567 | +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
13568 | +# define preempt_check_resched_rt() preempt_check_resched() | |
13569 | +#else | |
13570 | +# define preempt_enable_no_resched() preempt_enable() | |
13571 | +# define preempt_check_resched_rt() barrier(); | |
13572 | +#endif | |
1f39f580 | 13573 | |
e4b2b4a8 | 13574 | #define preemptible() (preempt_count() == 0 && !irqs_disabled()) |
1f39f580 | 13575 | |
e4b2b4a8 JK |
13576 | +#ifdef CONFIG_SMP |
13577 | + | |
13578 | +extern void migrate_disable(void); | |
13579 | +extern void migrate_enable(void); | |
13580 | + | |
13581 | +int __migrate_disabled(struct task_struct *p); | |
13582 | + | |
13583 | +#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) | |
13584 | + | |
13585 | +extern void migrate_disable(void); | |
13586 | +extern void migrate_enable(void); | |
13587 | +static inline int __migrate_disabled(struct task_struct *p) | |
13588 | +{ | |
13589 | + return 0; | |
13590 | +} | |
13591 | + | |
13592 | +#else | |
13593 | +#define migrate_disable() barrier() | |
13594 | +#define migrate_enable() barrier() | |
13595 | +static inline int __migrate_disabled(struct task_struct *p) | |
13596 | +{ | |
13597 | + return 0; | |
13598 | +} | |
13599 | +#endif | |
13600 | + | |
13601 | #ifdef CONFIG_PREEMPT | |
13602 | #define preempt_enable() \ | |
13603 | do { \ | |
13604 | @@ -206,6 +270,13 @@ | |
13605 | __preempt_schedule(); \ | |
13606 | } while (0) | |
1f39f580 | 13607 | |
e4b2b4a8 JK |
13608 | +#define preempt_lazy_enable() \ |
13609 | +do { \ | |
13610 | + dec_preempt_lazy_count(); \ | |
13611 | + barrier(); \ | |
13612 | + preempt_check_resched(); \ | |
13613 | +} while (0) | |
13614 | + | |
13615 | #else /* !CONFIG_PREEMPT */ | |
13616 | #define preempt_enable() \ | |
13617 | do { \ | |
13618 | @@ -213,6 +284,12 @@ | |
13619 | preempt_count_dec(); \ | |
13620 | } while (0) | |
1f39f580 | 13621 | |
e4b2b4a8 JK |
13622 | +#define preempt_lazy_enable() \ |
13623 | +do { \ | |
13624 | + dec_preempt_lazy_count(); \ | |
13625 | + barrier(); \ | |
13626 | +} while (0) | |
13627 | + | |
13628 | #define preempt_enable_notrace() \ | |
13629 | do { \ | |
13630 | barrier(); \ | |
13631 | @@ -251,8 +328,16 @@ | |
13632 | #define preempt_disable_notrace() barrier() | |
13633 | #define preempt_enable_no_resched_notrace() barrier() | |
13634 | #define preempt_enable_notrace() barrier() | |
13635 | +#define preempt_check_resched_rt() barrier() | |
13636 | #define preemptible() 0 | |
1f39f580 | 13637 | |
e4b2b4a8 JK |
13638 | +#define migrate_disable() barrier() |
13639 | +#define migrate_enable() barrier() | |
13640 | + | |
13641 | +static inline int __migrate_disabled(struct task_struct *p) | |
13642 | +{ | |
13643 | + return 0; | |
13644 | +} | |
13645 | #endif /* CONFIG_PREEMPT_COUNT */ | |
1f39f580 | 13646 | |
e4b2b4a8 JK |
13647 | #ifdef MODULE |
13648 | @@ -271,10 +356,22 @@ | |
13649 | } while (0) | |
13650 | #define preempt_fold_need_resched() \ | |
13651 | do { \ | |
13652 | - if (tif_need_resched()) \ | |
13653 | + if (tif_need_resched_now()) \ | |
13654 | set_preempt_need_resched(); \ | |
13655 | } while (0) | |
1f39f580 | 13656 | |
e4b2b4a8 JK |
13657 | +#ifdef CONFIG_PREEMPT_RT_FULL |
13658 | +# define preempt_disable_rt() preempt_disable() | |
13659 | +# define preempt_enable_rt() preempt_enable() | |
13660 | +# define preempt_disable_nort() barrier() | |
13661 | +# define preempt_enable_nort() barrier() | |
13662 | +#else | |
13663 | +# define preempt_disable_rt() barrier() | |
13664 | +# define preempt_enable_rt() barrier() | |
13665 | +# define preempt_disable_nort() preempt_disable() | |
13666 | +# define preempt_enable_nort() preempt_enable() | |
13667 | +#endif | |
13668 | + | |
13669 | #ifdef CONFIG_PREEMPT_NOTIFIERS | |
1f39f580 | 13670 | |
e4b2b4a8 JK |
13671 | struct preempt_notifier; |
13672 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/printk.h linux-4.14/include/linux/printk.h | |
13673 | --- linux-4.14.orig/include/linux/printk.h 2017-11-12 19:46:13.000000000 +0100 | |
13674 | +++ linux-4.14/include/linux/printk.h 2018-09-05 11:05:07.000000000 +0200 | |
13675 | @@ -142,9 +142,11 @@ | |
13676 | #ifdef CONFIG_EARLY_PRINTK | |
13677 | extern asmlinkage __printf(1, 2) | |
13678 | void early_printk(const char *fmt, ...); | |
13679 | +extern void printk_kill(void); | |
13680 | #else | |
13681 | static inline __printf(1, 2) __cold | |
13682 | void early_printk(const char *s, ...) { } | |
13683 | +static inline void printk_kill(void) { } | |
13684 | #endif | |
1f39f580 | 13685 | |
e4b2b4a8 JK |
13686 | #ifdef CONFIG_PRINTK_NMI |
13687 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/radix-tree.h linux-4.14/include/linux/radix-tree.h | |
13688 | --- linux-4.14.orig/include/linux/radix-tree.h 2017-11-12 19:46:13.000000000 +0100 | |
13689 | +++ linux-4.14/include/linux/radix-tree.h 2018-09-05 11:05:07.000000000 +0200 | |
13690 | @@ -328,6 +328,8 @@ | |
13691 | int radix_tree_preload(gfp_t gfp_mask); | |
13692 | int radix_tree_maybe_preload(gfp_t gfp_mask); | |
13693 | int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); | |
13694 | +void radix_tree_preload_end(void); | |
13695 | + | |
13696 | void radix_tree_init(void); | |
13697 | void *radix_tree_tag_set(struct radix_tree_root *, | |
13698 | unsigned long index, unsigned int tag); | |
13699 | @@ -347,11 +349,6 @@ | |
13700 | unsigned int max_items, unsigned int tag); | |
13701 | int radix_tree_tagged(const struct radix_tree_root *, unsigned int tag); | |
1f39f580 | 13702 | |
e4b2b4a8 JK |
13703 | -static inline void radix_tree_preload_end(void) |
13704 | -{ | |
13705 | - preempt_enable(); | |
13706 | -} | |
13707 | - | |
13708 | int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t); | |
13709 | int radix_tree_split(struct radix_tree_root *, unsigned long index, | |
13710 | unsigned new_order); | |
13711 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/random.h linux-4.14/include/linux/random.h | |
13712 | --- linux-4.14.orig/include/linux/random.h 2017-11-12 19:46:13.000000000 +0100 | |
13713 | +++ linux-4.14/include/linux/random.h 2018-09-05 11:05:07.000000000 +0200 | |
13714 | @@ -32,7 +32,7 @@ | |
1f39f580 | 13715 | |
e4b2b4a8 JK |
13716 | extern void add_input_randomness(unsigned int type, unsigned int code, |
13717 | unsigned int value) __latent_entropy; | |
13718 | -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; | |
13719 | +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy; | |
1f39f580 | 13720 | |
e4b2b4a8 JK |
13721 | extern void get_random_bytes(void *buf, int nbytes); |
13722 | extern int wait_for_random_bytes(void); | |
13723 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rbtree_augmented.h linux-4.14/include/linux/rbtree_augmented.h | |
13724 | --- linux-4.14.orig/include/linux/rbtree_augmented.h 2017-11-12 19:46:13.000000000 +0100 | |
13725 | +++ linux-4.14/include/linux/rbtree_augmented.h 2018-09-05 11:05:07.000000000 +0200 | |
13726 | @@ -26,6 +26,7 @@ | |
1f39f580 | 13727 | |
e4b2b4a8 JK |
13728 | #include <linux/compiler.h> |
13729 | #include <linux/rbtree.h> | |
13730 | +#include <linux/rcupdate.h> | |
1f39f580 | 13731 | |
e4b2b4a8 JK |
13732 | /* |
13733 | * Please note - only struct rb_augment_callbacks and the prototypes for | |
13734 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rbtree.h linux-4.14/include/linux/rbtree.h | |
13735 | --- linux-4.14.orig/include/linux/rbtree.h 2017-11-12 19:46:13.000000000 +0100 | |
13736 | +++ linux-4.14/include/linux/rbtree.h 2018-09-05 11:05:07.000000000 +0200 | |
13737 | @@ -31,7 +31,7 @@ | |
1f39f580 | 13738 | |
e4b2b4a8 JK |
13739 | #include <linux/kernel.h> |
13740 | #include <linux/stddef.h> | |
13741 | -#include <linux/rcupdate.h> | |
13742 | +#include <linux/rcu_assign_pointer.h> | |
1f39f580 | 13743 | |
e4b2b4a8 JK |
13744 | struct rb_node { |
13745 | unsigned long __rb_parent_color; | |
13746 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rbtree_latch.h linux-4.14/include/linux/rbtree_latch.h | |
13747 | --- linux-4.14.orig/include/linux/rbtree_latch.h 2017-11-12 19:46:13.000000000 +0100 | |
13748 | +++ linux-4.14/include/linux/rbtree_latch.h 2018-09-05 11:05:07.000000000 +0200 | |
13749 | @@ -35,6 +35,7 @@ | |
1f39f580 | 13750 | |
e4b2b4a8 JK |
13751 | #include <linux/rbtree.h> |
13752 | #include <linux/seqlock.h> | |
13753 | +#include <linux/rcupdate.h> | |
1f39f580 | 13754 | |
e4b2b4a8 JK |
13755 | struct latch_tree_node { |
13756 | struct rb_node node[2]; | |
13757 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rcu_assign_pointer.h linux-4.14/include/linux/rcu_assign_pointer.h | |
13758 | --- linux-4.14.orig/include/linux/rcu_assign_pointer.h 1970-01-01 01:00:00.000000000 +0100 | |
13759 | +++ linux-4.14/include/linux/rcu_assign_pointer.h 2018-09-05 11:05:07.000000000 +0200 | |
13760 | @@ -0,0 +1,54 @@ | |
13761 | +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__ | |
13762 | +#define __LINUX_RCU_ASSIGN_POINTER_H__ | |
13763 | +#include <linux/compiler.h> | |
13764 | +#include <asm/barrier.h> | |
13765 | + | |
13766 | +/** | |
13767 | + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
13768 | + * @v: The value to statically initialize with. | |
13769 | + */ | |
13770 | +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
13771 | + | |
13772 | +/** | |
13773 | + * rcu_assign_pointer() - assign to RCU-protected pointer | |
13774 | + * @p: pointer to assign to | |
13775 | + * @v: value to assign (publish) | |
13776 | + * | |
13777 | + * Assigns the specified value to the specified RCU-protected | |
13778 | + * pointer, ensuring that any concurrent RCU readers will see | |
13779 | + * any prior initialization. | |
13780 | + * | |
13781 | + * Inserts memory barriers on architectures that require them | |
13782 | + * (which is most of them), and also prevents the compiler from | |
13783 | + * reordering the code that initializes the structure after the pointer | |
13784 | + * assignment. More importantly, this call documents which pointers | |
13785 | + * will be dereferenced by RCU read-side code. | |
13786 | + * | |
13787 | + * In some special cases, you may use RCU_INIT_POINTER() instead | |
13788 | + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
13789 | + * to the fact that it does not constrain either the CPU or the compiler. | |
13790 | + * That said, using RCU_INIT_POINTER() when you should have used | |
13791 | + * rcu_assign_pointer() is a very bad thing that results in | |
13792 | + * impossible-to-diagnose memory corruption. So please be careful. | |
13793 | + * See the RCU_INIT_POINTER() comment header for details. | |
13794 | + * | |
13795 | + * Note that rcu_assign_pointer() evaluates each of its arguments only | |
13796 | + * once, appearances notwithstanding. One of the "extra" evaluations | |
13797 | + * is in typeof() and the other visible only to sparse (__CHECKER__), | |
13798 | + * neither of which actually execute the argument. As with most cpp | |
13799 | + * macros, this execute-arguments-only-once property is important, so | |
13800 | + * please be careful when making changes to rcu_assign_pointer() and the | |
13801 | + * other macros that it invokes. | |
13802 | + */ | |
13803 | +#define rcu_assign_pointer(p, v) \ | |
13804 | +({ \ | |
13805 | + uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
13806 | + \ | |
13807 | + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
13808 | + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
13809 | + else \ | |
13810 | + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
13811 | + _r_a_p__v; \ | |
13812 | +}) | |
13813 | + | |
13814 | +#endif | |
13815 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rcupdate.h linux-4.14/include/linux/rcupdate.h | |
13816 | --- linux-4.14.orig/include/linux/rcupdate.h 2018-09-05 11:03:22.000000000 +0200 | |
13817 | +++ linux-4.14/include/linux/rcupdate.h 2018-09-05 11:05:07.000000000 +0200 | |
13818 | @@ -42,6 +42,7 @@ | |
13819 | #include <linux/lockdep.h> | |
13820 | #include <asm/processor.h> | |
13821 | #include <linux/cpumask.h> | |
13822 | +#include <linux/rcu_assign_pointer.h> | |
1f39f580 | 13823 | |
e4b2b4a8 JK |
13824 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) |
13825 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | |
13826 | @@ -55,7 +56,11 @@ | |
13827 | #define call_rcu call_rcu_sched | |
13828 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
1f39f580 | 13829 | |
e4b2b4a8 JK |
13830 | +#ifdef CONFIG_PREEMPT_RT_FULL |
13831 | +#define call_rcu_bh call_rcu | |
13832 | +#else | |
13833 | void call_rcu_bh(struct rcu_head *head, rcu_callback_t func); | |
13834 | +#endif | |
13835 | void call_rcu_sched(struct rcu_head *head, rcu_callback_t func); | |
13836 | void synchronize_sched(void); | |
13837 | void rcu_barrier_tasks(void); | |
13838 | @@ -74,6 +79,11 @@ | |
13839 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | |
13840 | */ | |
13841 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
13842 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13843 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
13844 | +#else | |
13845 | +static inline int sched_rcu_preempt_depth(void) { return 0; } | |
13846 | +#endif | |
1f39f580 | 13847 | |
e4b2b4a8 | 13848 | #else /* #ifdef CONFIG_PREEMPT_RCU */ |
1f39f580 | 13849 | |
e4b2b4a8 JK |
13850 | @@ -99,6 +109,8 @@ |
13851 | return 0; | |
1f39f580 JK |
13852 | } |
13853 | ||
e4b2b4a8 JK |
13854 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() |
13855 | + | |
13856 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
1f39f580 | 13857 | |
e4b2b4a8 JK |
13858 | /* Internal to kernel */ |
13859 | @@ -255,7 +267,14 @@ | |
13860 | extern struct lockdep_map rcu_callback_map; | |
13861 | int debug_lockdep_rcu_enabled(void); | |
13862 | int rcu_read_lock_held(void); | |
13863 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13864 | +static inline int rcu_read_lock_bh_held(void) | |
13865 | +{ | |
13866 | + return rcu_read_lock_held(); | |
13867 | +} | |
13868 | +#else | |
13869 | int rcu_read_lock_bh_held(void); | |
13870 | +#endif | |
13871 | int rcu_read_lock_sched_held(void); | |
1f39f580 | 13872 | |
e4b2b4a8 JK |
13873 | #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
13874 | @@ -365,54 +384,6 @@ | |
13875 | }) | |
1f39f580 | 13876 | |
e4b2b4a8 JK |
13877 | /** |
13878 | - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
13879 | - * @v: The value to statically initialize with. | |
13880 | - */ | |
13881 | -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
13882 | - | |
13883 | -/** | |
13884 | - * rcu_assign_pointer() - assign to RCU-protected pointer | |
13885 | - * @p: pointer to assign to | |
13886 | - * @v: value to assign (publish) | |
13887 | - * | |
13888 | - * Assigns the specified value to the specified RCU-protected | |
13889 | - * pointer, ensuring that any concurrent RCU readers will see | |
13890 | - * any prior initialization. | |
13891 | - * | |
13892 | - * Inserts memory barriers on architectures that require them | |
13893 | - * (which is most of them), and also prevents the compiler from | |
13894 | - * reordering the code that initializes the structure after the pointer | |
13895 | - * assignment. More importantly, this call documents which pointers | |
13896 | - * will be dereferenced by RCU read-side code. | |
13897 | - * | |
13898 | - * In some special cases, you may use RCU_INIT_POINTER() instead | |
13899 | - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
13900 | - * to the fact that it does not constrain either the CPU or the compiler. | |
13901 | - * That said, using RCU_INIT_POINTER() when you should have used | |
13902 | - * rcu_assign_pointer() is a very bad thing that results in | |
13903 | - * impossible-to-diagnose memory corruption. So please be careful. | |
13904 | - * See the RCU_INIT_POINTER() comment header for details. | |
13905 | - * | |
13906 | - * Note that rcu_assign_pointer() evaluates each of its arguments only | |
13907 | - * once, appearances notwithstanding. One of the "extra" evaluations | |
13908 | - * is in typeof() and the other visible only to sparse (__CHECKER__), | |
13909 | - * neither of which actually execute the argument. As with most cpp | |
13910 | - * macros, this execute-arguments-only-once property is important, so | |
13911 | - * please be careful when making changes to rcu_assign_pointer() and the | |
13912 | - * other macros that it invokes. | |
13913 | - */ | |
13914 | -#define rcu_assign_pointer(p, v) \ | |
13915 | -({ \ | |
13916 | - uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
13917 | - \ | |
13918 | - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
13919 | - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
13920 | - else \ | |
13921 | - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
13922 | - _r_a_p__v; \ | |
13923 | -}) | |
13924 | - | |
13925 | -/** | |
13926 | * rcu_swap_protected() - swap an RCU and a regular pointer | |
13927 | * @rcu_ptr: RCU pointer | |
13928 | * @ptr: regular pointer | |
13929 | @@ -707,10 +678,14 @@ | |
13930 | static inline void rcu_read_lock_bh(void) | |
13931 | { | |
13932 | local_bh_disable(); | |
13933 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13934 | + rcu_read_lock(); | |
13935 | +#else | |
13936 | __acquire(RCU_BH); | |
13937 | rcu_lock_acquire(&rcu_bh_lock_map); | |
13938 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
13939 | "rcu_read_lock_bh() used illegally while idle"); | |
13940 | +#endif | |
1f39f580 | 13941 | } |
1f39f580 | 13942 | |
e4b2b4a8 JK |
13943 | /* |
13944 | @@ -720,10 +695,14 @@ | |
13945 | */ | |
13946 | static inline void rcu_read_unlock_bh(void) | |
1a6e0f06 | 13947 | { |
e4b2b4a8 JK |
13948 | +#ifdef CONFIG_PREEMPT_RT_FULL |
13949 | + rcu_read_unlock(); | |
13950 | +#else | |
13951 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
13952 | "rcu_read_unlock_bh() used illegally while idle"); | |
13953 | rcu_lock_release(&rcu_bh_lock_map); | |
13954 | __release(RCU_BH); | |
13955 | +#endif | |
13956 | local_bh_enable(); | |
1a6e0f06 | 13957 | } |
1a6e0f06 | 13958 | |
e4b2b4a8 JK |
13959 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rcutree.h linux-4.14/include/linux/rcutree.h |
13960 | --- linux-4.14.orig/include/linux/rcutree.h 2017-11-12 19:46:13.000000000 +0100 | |
13961 | +++ linux-4.14/include/linux/rcutree.h 2018-09-05 11:05:07.000000000 +0200 | |
13962 | @@ -44,7 +44,11 @@ | |
13963 | rcu_note_context_switch(false); | |
1a6e0f06 JK |
13964 | } |
13965 | ||
e4b2b4a8 JK |
13966 | +#ifdef CONFIG_PREEMPT_RT_FULL |
13967 | +# define synchronize_rcu_bh synchronize_rcu | |
13968 | +#else | |
13969 | void synchronize_rcu_bh(void); | |
13970 | +#endif | |
13971 | void synchronize_sched_expedited(void); | |
13972 | void synchronize_rcu_expedited(void); | |
1a6e0f06 | 13973 | |
e4b2b4a8 | 13974 | @@ -72,7 +76,11 @@ |
1a6e0f06 JK |
13975 | } |
13976 | ||
e4b2b4a8 JK |
13977 | void rcu_barrier(void); |
13978 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13979 | +# define rcu_barrier_bh rcu_barrier | |
13980 | +#else | |
13981 | void rcu_barrier_bh(void); | |
13982 | +#endif | |
13983 | void rcu_barrier_sched(void); | |
13984 | unsigned long get_state_synchronize_rcu(void); | |
13985 | void cond_synchronize_rcu(unsigned long oldstate); | |
13986 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/ring_buffer.h linux-4.14/include/linux/ring_buffer.h | |
13987 | --- linux-4.14.orig/include/linux/ring_buffer.h 2018-09-05 11:03:22.000000000 +0200 | |
13988 | +++ linux-4.14/include/linux/ring_buffer.h 2018-09-05 11:05:07.000000000 +0200 | |
13989 | @@ -34,10 +34,12 @@ | |
13990 | * array[0] = time delta (28 .. 59) | |
13991 | * size = 8 bytes | |
13992 | * | |
13993 | - * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock | |
13994 | - * array[0] = tv_nsec | |
13995 | - * array[1..2] = tv_sec | |
13996 | - * size = 16 bytes | |
13997 | + * @RINGBUF_TYPE_TIME_STAMP: Absolute timestamp | |
13998 | + * Same format as TIME_EXTEND except that the | |
13999 | + * value is an absolute timestamp, not a delta | |
14000 | + * event.time_delta contains bottom 27 bits | |
14001 | + * array[0] = top (28 .. 59) bits | |
14002 | + * size = 8 bytes | |
14003 | * | |
14004 | * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX: | |
14005 | * Data record | |
14006 | @@ -54,12 +56,12 @@ | |
14007 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, | |
14008 | RINGBUF_TYPE_PADDING, | |
14009 | RINGBUF_TYPE_TIME_EXTEND, | |
14010 | - /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ | |
14011 | RINGBUF_TYPE_TIME_STAMP, | |
14012 | }; | |
1a6e0f06 | 14013 | |
e4b2b4a8 JK |
14014 | unsigned ring_buffer_event_length(struct ring_buffer_event *event); |
14015 | void *ring_buffer_event_data(struct ring_buffer_event *event); | |
14016 | +u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); | |
1a6e0f06 | 14017 | |
e4b2b4a8 JK |
14018 | /* |
14019 | * ring_buffer_discard_commit will remove an event that has not | |
14020 | @@ -115,6 +117,9 @@ | |
14021 | int ring_buffer_write(struct ring_buffer *buffer, | |
14022 | unsigned long length, void *data); | |
14023 | ||
14024 | +void ring_buffer_nest_start(struct ring_buffer *buffer); | |
14025 | +void ring_buffer_nest_end(struct ring_buffer *buffer); | |
14026 | + | |
14027 | struct ring_buffer_event * | |
14028 | ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, | |
14029 | unsigned long *lost_events); | |
14030 | @@ -179,6 +184,8 @@ | |
14031 | int cpu, u64 *ts); | |
14032 | void ring_buffer_set_clock(struct ring_buffer *buffer, | |
14033 | u64 (*clock)(void)); | |
14034 | +void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); | |
14035 | +bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); | |
14036 | ||
14037 | size_t ring_buffer_page_len(void *page); | |
14038 | ||
14039 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rtmutex.h linux-4.14/include/linux/rtmutex.h | |
14040 | --- linux-4.14.orig/include/linux/rtmutex.h 2017-11-12 19:46:13.000000000 +0100 | |
14041 | +++ linux-4.14/include/linux/rtmutex.h 2018-09-05 11:05:07.000000000 +0200 | |
14042 | @@ -14,11 +14,15 @@ | |
14043 | #define __LINUX_RT_MUTEX_H | |
1a6e0f06 | 14044 | |
e4b2b4a8 JK |
14045 | #include <linux/linkage.h> |
14046 | +#include <linux/spinlock_types_raw.h> | |
14047 | #include <linux/rbtree.h> | |
14048 | -#include <linux/spinlock_types.h> | |
1a6e0f06 | 14049 | |
e4b2b4a8 | 14050 | extern int max_lock_depth; /* for sysctl */ |
1a6e0f06 | 14051 | |
e4b2b4a8 JK |
14052 | +#ifdef CONFIG_DEBUG_MUTEXES |
14053 | +#include <linux/debug_locks.h> | |
14054 | +#endif | |
14055 | + | |
14056 | /** | |
14057 | * The rt_mutex structure | |
14058 | * | |
14059 | @@ -31,8 +35,8 @@ | |
14060 | raw_spinlock_t wait_lock; | |
14061 | struct rb_root_cached waiters; | |
14062 | struct task_struct *owner; | |
14063 | -#ifdef CONFIG_DEBUG_RT_MUTEXES | |
14064 | int save_state; | |
14065 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
14066 | const char *name, *file; | |
14067 | int line; | |
14068 | void *magic; | |
14069 | @@ -82,16 +86,23 @@ | |
14070 | #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) | |
14071 | #endif | |
1a6e0f06 | 14072 | |
e4b2b4a8 JK |
14073 | -#define __RT_MUTEX_INITIALIZER(mutexname) \ |
14074 | - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
14075 | +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
14076 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
14077 | , .waiters = RB_ROOT_CACHED \ | |
14078 | , .owner = NULL \ | |
14079 | __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ | |
14080 | - __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname)} | |
14081 | + __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) | |
14082 | + | |
14083 | +#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
14084 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } | |
1a6e0f06 | 14085 | |
e4b2b4a8 JK |
14086 | #define DEFINE_RT_MUTEX(mutexname) \ |
14087 | struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) | |
1a6e0f06 | 14088 | |
e4b2b4a8 JK |
14089 | +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ |
14090 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
14091 | + , .save_state = 1 } | |
14092 | + | |
14093 | /** | |
14094 | * rt_mutex_is_locked - is the mutex locked | |
14095 | * @lock: the mutex to be queried | |
14096 | @@ -108,6 +119,7 @@ | |
1a6e0f06 | 14097 | |
e4b2b4a8 JK |
14098 | extern void rt_mutex_lock(struct rt_mutex *lock); |
14099 | extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); | |
14100 | +extern int rt_mutex_lock_killable(struct rt_mutex *lock); | |
14101 | extern int rt_mutex_timed_lock(struct rt_mutex *lock, | |
14102 | struct hrtimer_sleeper *timeout); | |
1a6e0f06 | 14103 | |
e4b2b4a8 JK |
14104 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rwlock_rt.h linux-4.14/include/linux/rwlock_rt.h |
14105 | --- linux-4.14.orig/include/linux/rwlock_rt.h 1970-01-01 01:00:00.000000000 +0100 | |
14106 | +++ linux-4.14/include/linux/rwlock_rt.h 2018-09-05 11:05:07.000000000 +0200 | |
14107 | @@ -0,0 +1,119 @@ | |
14108 | +#ifndef __LINUX_RWLOCK_RT_H | |
14109 | +#define __LINUX_RWLOCK_RT_H | |
14110 | + | |
14111 | +#ifndef __LINUX_SPINLOCK_H | |
14112 | +#error Do not include directly. Use spinlock.h | |
1a6e0f06 | 14113 | +#endif |
e4b2b4a8 JK |
14114 | + |
14115 | +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); | |
14116 | +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); | |
14117 | +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); | |
14118 | +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); | |
14119 | +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); | |
14120 | +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); | |
14121 | +extern int __lockfunc rt_read_can_lock(rwlock_t *rwlock); | |
14122 | +extern int __lockfunc rt_write_can_lock(rwlock_t *rwlock); | |
14123 | +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); | |
14124 | + | |
14125 | +#define read_can_lock(rwlock) rt_read_can_lock(rwlock) | |
14126 | +#define write_can_lock(rwlock) rt_write_can_lock(rwlock) | |
14127 | + | |
14128 | +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) | |
14129 | +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) | |
14130 | + | |
14131 | +static inline int __write_trylock_rt_irqsave(rwlock_t *lock, unsigned long *flags) | |
14132 | +{ | |
14133 | + /* XXX ARCH_IRQ_ENABLED */ | |
14134 | + *flags = 0; | |
14135 | + return rt_write_trylock(lock); | |
1a6e0f06 JK |
14136 | +} |
14137 | + | |
e4b2b4a8 JK |
14138 | +#define write_trylock_irqsave(lock, flags) \ |
14139 | + __cond_lock(lock, __write_trylock_rt_irqsave(lock, &(flags))) | |
14140 | + | |
14141 | +#define read_lock_irqsave(lock, flags) \ | |
14142 | + do { \ | |
14143 | + typecheck(unsigned long, flags); \ | |
14144 | + rt_read_lock(lock); \ | |
14145 | + flags = 0; \ | |
14146 | + } while (0) | |
14147 | + | |
14148 | +#define write_lock_irqsave(lock, flags) \ | |
14149 | + do { \ | |
14150 | + typecheck(unsigned long, flags); \ | |
14151 | + rt_write_lock(lock); \ | |
14152 | + flags = 0; \ | |
14153 | + } while (0) | |
1a6e0f06 | 14154 | + |
e4b2b4a8 | 14155 | +#define read_lock(lock) rt_read_lock(lock) |
1a6e0f06 | 14156 | + |
e4b2b4a8 JK |
14157 | +#define read_lock_bh(lock) \ |
14158 | + do { \ | |
14159 | + local_bh_disable(); \ | |
14160 | + rt_read_lock(lock); \ | |
14161 | + } while (0) | |
1a6e0f06 | 14162 | + |
e4b2b4a8 | 14163 | +#define read_lock_irq(lock) read_lock(lock) |
1a6e0f06 | 14164 | + |
e4b2b4a8 | 14165 | +#define write_lock(lock) rt_write_lock(lock) |
1a6e0f06 | 14166 | + |
e4b2b4a8 JK |
14167 | +#define write_lock_bh(lock) \ |
14168 | + do { \ | |
14169 | + local_bh_disable(); \ | |
14170 | + rt_write_lock(lock); \ | |
14171 | + } while (0) | |
1a6e0f06 | 14172 | + |
e4b2b4a8 | 14173 | +#define write_lock_irq(lock) write_lock(lock) |
1a6e0f06 | 14174 | + |
e4b2b4a8 | 14175 | +#define read_unlock(lock) rt_read_unlock(lock) |
1a6e0f06 | 14176 | + |
e4b2b4a8 JK |
14177 | +#define read_unlock_bh(lock) \ |
14178 | + do { \ | |
14179 | + rt_read_unlock(lock); \ | |
14180 | + local_bh_enable(); \ | |
14181 | + } while (0) | |
1a6e0f06 | 14182 | + |
e4b2b4a8 | 14183 | +#define read_unlock_irq(lock) read_unlock(lock) |
1a6e0f06 | 14184 | + |
e4b2b4a8 JK |
14185 | +#define write_unlock(lock) rt_write_unlock(lock) |
14186 | + | |
14187 | +#define write_unlock_bh(lock) \ | |
14188 | + do { \ | |
14189 | + rt_write_unlock(lock); \ | |
14190 | + local_bh_enable(); \ | |
14191 | + } while (0) | |
14192 | + | |
14193 | +#define write_unlock_irq(lock) write_unlock(lock) | |
14194 | + | |
14195 | +#define read_unlock_irqrestore(lock, flags) \ | |
14196 | + do { \ | |
14197 | + typecheck(unsigned long, flags); \ | |
14198 | + (void) flags; \ | |
14199 | + rt_read_unlock(lock); \ | |
14200 | + } while (0) | |
14201 | + | |
14202 | +#define write_unlock_irqrestore(lock, flags) \ | |
14203 | + do { \ | |
14204 | + typecheck(unsigned long, flags); \ | |
14205 | + (void) flags; \ | |
14206 | + rt_write_unlock(lock); \ | |
14207 | + } while (0) | |
14208 | + | |
14209 | +#define rwlock_init(rwl) \ | |
14210 | +do { \ | |
14211 | + static struct lock_class_key __key; \ | |
14212 | + \ | |
14213 | + __rt_rwlock_init(rwl, #rwl, &__key); \ | |
14214 | +} while (0) | |
1a6e0f06 | 14215 | + |
1a6e0f06 | 14216 | +/* |
e4b2b4a8 | 14217 | + * Internal functions made global for CPU pinning |
1a6e0f06 | 14218 | + */ |
e4b2b4a8 JK |
14219 | +void __read_rt_lock(struct rt_rw_lock *lock); |
14220 | +int __read_rt_trylock(struct rt_rw_lock *lock); | |
14221 | +void __write_rt_lock(struct rt_rw_lock *lock); | |
14222 | +int __write_rt_trylock(struct rt_rw_lock *lock); | |
14223 | +void __read_rt_unlock(struct rt_rw_lock *lock); | |
14224 | +void __write_rt_unlock(struct rt_rw_lock *lock); | |
14225 | + | |
1a6e0f06 | 14226 | +#endif |
e4b2b4a8 JK |
14227 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rwlock_types.h linux-4.14/include/linux/rwlock_types.h |
14228 | --- linux-4.14.orig/include/linux/rwlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
14229 | +++ linux-4.14/include/linux/rwlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
14230 | @@ -1,6 +1,10 @@ | |
14231 | #ifndef __LINUX_RWLOCK_TYPES_H | |
14232 | #define __LINUX_RWLOCK_TYPES_H | |
1a6e0f06 | 14233 | |
e4b2b4a8 JK |
14234 | +#if !defined(__LINUX_SPINLOCK_TYPES_H) |
14235 | +# error "Do not include directly, include spinlock_types.h" | |
14236 | +#endif | |
14237 | + | |
1a6e0f06 | 14238 | /* |
e4b2b4a8 JK |
14239 | * include/linux/rwlock_types.h - generic rwlock type definitions |
14240 | * and initializers | |
14241 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rwlock_types_rt.h linux-4.14/include/linux/rwlock_types_rt.h | |
14242 | --- linux-4.14.orig/include/linux/rwlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100 | |
14243 | +++ linux-4.14/include/linux/rwlock_types_rt.h 2018-09-05 11:05:07.000000000 +0200 | |
14244 | @@ -0,0 +1,55 @@ | |
14245 | +#ifndef __LINUX_RWLOCK_TYPES_RT_H | |
14246 | +#define __LINUX_RWLOCK_TYPES_RT_H | |
14247 | + | |
14248 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
14249 | +#error "Do not include directly. Include spinlock_types.h instead" | |
14250 | +#endif | |
14251 | + | |
14252 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14253 | +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
14254 | +#else | |
14255 | +# define RW_DEP_MAP_INIT(lockname) | |
14256 | +#endif | |
14257 | + | |
14258 | +typedef struct rt_rw_lock rwlock_t; | |
14259 | + | |
14260 | +#define __RW_LOCK_UNLOCKED(name) __RWLOCK_RT_INITIALIZER(name) | |
14261 | + | |
14262 | +#define DEFINE_RWLOCK(name) \ | |
14263 | + rwlock_t name = __RW_LOCK_UNLOCKED(name) | |
14264 | + | |
14265 | +/* | |
14266 | + * A reader biased implementation primarily for CPU pinning. | |
14267 | + * | |
14268 | + * Can be selected as general replacement for the single reader RT rwlock | |
14269 | + * variant | |
14270 | + */ | |
14271 | +struct rt_rw_lock { | |
14272 | + struct rt_mutex rtmutex; | |
14273 | + atomic_t readers; | |
14274 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14275 | + struct lockdep_map dep_map; | |
14276 | +#endif | |
14277 | +}; | |
14278 | + | |
14279 | +#define READER_BIAS (1U << 31) | |
14280 | +#define WRITER_BIAS (1U << 30) | |
14281 | + | |
14282 | +#define __RWLOCK_RT_INITIALIZER(name) \ | |
14283 | +{ \ | |
14284 | + .readers = ATOMIC_INIT(READER_BIAS), \ | |
14285 | + .rtmutex = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.rtmutex), \ | |
14286 | + RW_DEP_MAP_INIT(name) \ | |
14287 | +} | |
14288 | + | |
14289 | +void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, | |
14290 | + struct lock_class_key *key); | |
14291 | + | |
14292 | +#define rwlock_biased_rt_init(rwlock) \ | |
14293 | + do { \ | |
14294 | + static struct lock_class_key __key; \ | |
14295 | + \ | |
14296 | + __rwlock_biased_rt_init((rwlock), #rwlock, &__key); \ | |
14297 | + } while (0) | |
14298 | + | |
14299 | +#endif | |
14300 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rwsem.h linux-4.14/include/linux/rwsem.h | |
14301 | --- linux-4.14.orig/include/linux/rwsem.h 2018-09-05 11:03:22.000000000 +0200 | |
14302 | +++ linux-4.14/include/linux/rwsem.h 2018-09-05 11:05:07.000000000 +0200 | |
14303 | @@ -20,6 +20,10 @@ | |
14304 | #include <linux/osq_lock.h> | |
14305 | #endif | |
1a6e0f06 | 14306 | |
e4b2b4a8 JK |
14307 | +#ifdef CONFIG_PREEMPT_RT_FULL |
14308 | +#include <linux/rwsem_rt.h> | |
14309 | +#else /* PREEMPT_RT_FULL */ | |
14310 | + | |
14311 | struct rw_semaphore; | |
1a6e0f06 | 14312 | |
e4b2b4a8 JK |
14313 | #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK |
14314 | @@ -114,6 +118,13 @@ | |
14315 | return !list_empty(&sem->wait_list); | |
1a6e0f06 JK |
14316 | } |
14317 | ||
e4b2b4a8 JK |
14318 | +#endif /* !PREEMPT_RT_FULL */ |
14319 | + | |
14320 | +/* | |
14321 | + * The functions below are the same for all rwsem implementations including | |
14322 | + * the RT specific variant. | |
14323 | + */ | |
14324 | + | |
1a6e0f06 | 14325 | /* |
e4b2b4a8 | 14326 | * lock for reading |
1a6e0f06 | 14327 | */ |
e4b2b4a8 JK |
14328 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/rwsem_rt.h linux-4.14/include/linux/rwsem_rt.h |
14329 | --- linux-4.14.orig/include/linux/rwsem_rt.h 1970-01-01 01:00:00.000000000 +0100 | |
14330 | +++ linux-4.14/include/linux/rwsem_rt.h 2018-09-05 11:05:07.000000000 +0200 | |
14331 | @@ -0,0 +1,67 @@ | |
14332 | +#ifndef _LINUX_RWSEM_RT_H | |
14333 | +#define _LINUX_RWSEM_RT_H | |
14334 | + | |
14335 | +#ifndef _LINUX_RWSEM_H | |
14336 | +#error "Include rwsem.h" | |
14337 | +#endif | |
14338 | + | |
14339 | +#include <linux/rtmutex.h> | |
14340 | +#include <linux/swait.h> | |
14341 | + | |
14342 | +#define READER_BIAS (1U << 31) | |
14343 | +#define WRITER_BIAS (1U << 30) | |
14344 | + | |
14345 | +struct rw_semaphore { | |
14346 | + atomic_t readers; | |
14347 | + struct rt_mutex rtmutex; | |
14348 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14349 | + struct lockdep_map dep_map; | |
14350 | +#endif | |
14351 | +}; | |
14352 | + | |
14353 | +#define __RWSEM_INITIALIZER(name) \ | |
14354 | +{ \ | |
14355 | + .readers = ATOMIC_INIT(READER_BIAS), \ | |
14356 | + .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ | |
14357 | + RW_DEP_MAP_INIT(name) \ | |
14358 | +} | |
14359 | + | |
14360 | +#define DECLARE_RWSEM(lockname) \ | |
14361 | + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) | |
14362 | + | |
14363 | +extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
14364 | + struct lock_class_key *key); | |
14365 | + | |
14366 | +#define __init_rwsem(sem, name, key) \ | |
14367 | +do { \ | |
14368 | + rt_mutex_init(&(sem)->rtmutex); \ | |
14369 | + __rwsem_init((sem), (name), (key)); \ | |
14370 | +} while (0) | |
14371 | + | |
14372 | +#define init_rwsem(sem) \ | |
14373 | +do { \ | |
14374 | + static struct lock_class_key __key; \ | |
14375 | + \ | |
14376 | + __init_rwsem((sem), #sem, &__key); \ | |
14377 | +} while (0) | |
14378 | + | |
14379 | +static inline int rwsem_is_locked(struct rw_semaphore *sem) | |
1a6e0f06 | 14380 | +{ |
e4b2b4a8 JK |
14381 | + return atomic_read(&sem->readers) != READER_BIAS; |
14382 | +} | |
1a6e0f06 | 14383 | + |
e4b2b4a8 JK |
14384 | +static inline int rwsem_is_contended(struct rw_semaphore *sem) |
14385 | +{ | |
14386 | + return atomic_read(&sem->readers) > 0; | |
1a6e0f06 JK |
14387 | +} |
14388 | + | |
e4b2b4a8 JK |
14389 | +extern void __down_read(struct rw_semaphore *sem); |
14390 | +extern int __down_read_trylock(struct rw_semaphore *sem); | |
14391 | +extern void __down_write(struct rw_semaphore *sem); | |
14392 | +extern int __must_check __down_write_killable(struct rw_semaphore *sem); | |
14393 | +extern int __down_write_trylock(struct rw_semaphore *sem); | |
14394 | +extern void __up_read(struct rw_semaphore *sem); | |
14395 | +extern void __up_write(struct rw_semaphore *sem); | |
14396 | +extern void __downgrade_write(struct rw_semaphore *sem); | |
14397 | + | |
14398 | +#endif | |
14399 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/sched/mm.h linux-4.14/include/linux/sched/mm.h | |
14400 | --- linux-4.14.orig/include/linux/sched/mm.h 2017-11-12 19:46:13.000000000 +0100 | |
14401 | +++ linux-4.14/include/linux/sched/mm.h 2018-09-05 11:05:07.000000000 +0200 | |
14402 | @@ -43,6 +43,17 @@ | |
14403 | __mmdrop(mm); | |
1a6e0f06 | 14404 | } |
1a6e0f06 | 14405 | |
e4b2b4a8 JK |
14406 | +#ifdef CONFIG_PREEMPT_RT_BASE |
14407 | +extern void __mmdrop_delayed(struct rcu_head *rhp); | |
14408 | +static inline void mmdrop_delayed(struct mm_struct *mm) | |
14409 | +{ | |
14410 | + if (atomic_dec_and_test(&mm->mm_count)) | |
14411 | + call_rcu(&mm->delayed_drop, __mmdrop_delayed); | |
14412 | +} | |
14413 | +#else | |
14414 | +# define mmdrop_delayed(mm) mmdrop(mm) | |
14415 | +#endif | |
1a6e0f06 | 14416 | + |
e4b2b4a8 JK |
14417 | static inline void mmdrop_async_fn(struct work_struct *work) |
14418 | { | |
14419 | struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); | |
14420 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/sched/task.h linux-4.14/include/linux/sched/task.h | |
14421 | --- linux-4.14.orig/include/linux/sched/task.h 2018-09-05 11:03:22.000000000 +0200 | |
14422 | +++ linux-4.14/include/linux/sched/task.h 2018-09-05 11:05:07.000000000 +0200 | |
14423 | @@ -88,6 +88,15 @@ | |
1a6e0f06 | 14424 | |
e4b2b4a8 | 14425 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) |
1a6e0f06 | 14426 | |
e4b2b4a8 JK |
14427 | +#ifdef CONFIG_PREEMPT_RT_BASE |
14428 | +extern void __put_task_struct_cb(struct rcu_head *rhp); | |
1a6e0f06 | 14429 | + |
e4b2b4a8 JK |
14430 | +static inline void put_task_struct(struct task_struct *t) |
14431 | +{ | |
14432 | + if (atomic_dec_and_test(&t->usage)) | |
14433 | + call_rcu(&t->put_rcu, __put_task_struct_cb); | |
14434 | +} | |
14435 | +#else | |
14436 | extern void __put_task_struct(struct task_struct *t); | |
14437 | ||
14438 | static inline void put_task_struct(struct task_struct *t) | |
14439 | @@ -95,7 +104,7 @@ | |
14440 | if (atomic_dec_and_test(&t->usage)) | |
14441 | __put_task_struct(t); | |
1a6e0f06 | 14442 | } |
e4b2b4a8 JK |
14443 | - |
14444 | +#endif | |
14445 | struct task_struct *task_rcu_dereference(struct task_struct **ptask); | |
1a6e0f06 | 14446 | |
e4b2b4a8 JK |
14447 | #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT |
14448 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/sched/wake_q.h linux-4.14/include/linux/sched/wake_q.h | |
14449 | --- linux-4.14.orig/include/linux/sched/wake_q.h 2017-11-12 19:46:13.000000000 +0100 | |
14450 | +++ linux-4.14/include/linux/sched/wake_q.h 2018-09-05 11:05:07.000000000 +0200 | |
14451 | @@ -47,8 +47,29 @@ | |
14452 | head->lastp = &head->first; | |
14453 | } | |
1a6e0f06 | 14454 | |
e4b2b4a8 JK |
14455 | -extern void wake_q_add(struct wake_q_head *head, |
14456 | - struct task_struct *task); | |
14457 | -extern void wake_up_q(struct wake_q_head *head); | |
14458 | +extern void __wake_q_add(struct wake_q_head *head, | |
14459 | + struct task_struct *task, bool sleeper); | |
14460 | +static inline void wake_q_add(struct wake_q_head *head, | |
14461 | + struct task_struct *task) | |
14462 | +{ | |
14463 | + __wake_q_add(head, task, false); | |
14464 | +} | |
14465 | + | |
14466 | +static inline void wake_q_add_sleeper(struct wake_q_head *head, | |
14467 | + struct task_struct *task) | |
14468 | +{ | |
14469 | + __wake_q_add(head, task, true); | |
14470 | +} | |
1a6e0f06 | 14471 | + |
e4b2b4a8 JK |
14472 | +extern void __wake_up_q(struct wake_q_head *head, bool sleeper); |
14473 | +static inline void wake_up_q(struct wake_q_head *head) | |
14474 | +{ | |
14475 | + __wake_up_q(head, false); | |
14476 | +} | |
1a6e0f06 | 14477 | + |
e4b2b4a8 JK |
14478 | +static inline void wake_up_q_sleeper(struct wake_q_head *head) |
14479 | +{ | |
14480 | + __wake_up_q(head, true); | |
14481 | +} | |
1a6e0f06 | 14482 | |
e4b2b4a8 JK |
14483 | #endif /* _LINUX_SCHED_WAKE_Q_H */ |
14484 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/sched.h linux-4.14/include/linux/sched.h | |
14485 | --- linux-4.14.orig/include/linux/sched.h 2018-09-05 11:03:22.000000000 +0200 | |
14486 | +++ linux-4.14/include/linux/sched.h 2018-09-05 11:05:07.000000000 +0200 | |
14487 | @@ -27,6 +27,7 @@ | |
14488 | #include <linux/signal_types.h> | |
14489 | #include <linux/mm_types_task.h> | |
14490 | #include <linux/task_io_accounting.h> | |
14491 | +#include <asm/kmap_types.h> | |
1a6e0f06 | 14492 | |
e4b2b4a8 JK |
14493 | /* task_struct member predeclarations (sorted alphabetically): */ |
14494 | struct audit_context; | |
14495 | @@ -93,7 +94,6 @@ | |
1a6e0f06 | 14496 | |
e4b2b4a8 JK |
14497 | /* Convenience macros for the sake of wake_up(): */ |
14498 | #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) | |
14499 | -#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) | |
1a6e0f06 | 14500 | |
e4b2b4a8 JK |
14501 | /* get_task_state(): */ |
14502 | #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ | |
14503 | @@ -101,12 +101,8 @@ | |
14504 | __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ | |
14505 | TASK_PARKED) | |
1a6e0f06 | 14506 | |
e4b2b4a8 JK |
14507 | -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) |
14508 | - | |
14509 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) | |
c7c16703 | 14510 | |
e4b2b4a8 JK |
14511 | -#define task_is_stopped_or_traced(task) ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) |
14512 | - | |
14513 | #define task_contributes_to_load(task) ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | |
14514 | (task->flags & PF_FROZEN) == 0 && \ | |
14515 | (task->state & TASK_NOLOAD) == 0) | |
14516 | @@ -134,6 +130,11 @@ | |
14517 | smp_store_mb(current->state, (state_value)); \ | |
14518 | } while (0) | |
c7c16703 | 14519 | |
e4b2b4a8 JK |
14520 | +#define __set_current_state_no_track(state_value) \ |
14521 | + current->state = (state_value); | |
14522 | +#define set_current_state_no_track(state_value) \ | |
14523 | + smp_store_mb(current->state, (state_value)); | |
14524 | + | |
14525 | #define set_special_state(state_value) \ | |
14526 | do { \ | |
14527 | unsigned long flags; /* may shadow */ \ | |
14528 | @@ -187,6 +188,9 @@ | |
14529 | #define set_current_state(state_value) \ | |
14530 | smp_store_mb(current->state, (state_value)) | |
14531 | ||
14532 | +#define __set_current_state_no_track(state_value) __set_current_state(state_value) | |
14533 | +#define set_current_state_no_track(state_value) set_current_state(state_value) | |
14534 | + | |
14535 | /* | |
14536 | * set_special_state() should be used for those states when the blocking task | |
14537 | * can not use the regular condition based wait-loop. In that case we must | |
14538 | @@ -566,6 +570,8 @@ | |
14539 | #endif | |
14540 | /* -1 unrunnable, 0 runnable, >0 stopped: */ | |
14541 | volatile long state; | |
14542 | + /* saved state for "spinlock sleepers" */ | |
14543 | + volatile long saved_state; | |
c7c16703 | 14544 | |
c7c16703 | 14545 | /* |
e4b2b4a8 JK |
14546 | * This begins the randomizable portion of task_struct. Only |
14547 | @@ -618,7 +624,25 @@ | |
14548 | ||
14549 | unsigned int policy; | |
14550 | int nr_cpus_allowed; | |
14551 | - cpumask_t cpus_allowed; | |
14552 | + const cpumask_t *cpus_ptr; | |
14553 | + cpumask_t cpus_mask; | |
14554 | +#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) | |
14555 | + int migrate_disable; | |
14556 | + int migrate_disable_update; | |
14557 | + int pinned_on_cpu; | |
14558 | +# ifdef CONFIG_SCHED_DEBUG | |
14559 | + int migrate_disable_atomic; | |
14560 | +# endif | |
14561 | + | |
14562 | +#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) | |
14563 | + int migrate_disable; | |
14564 | +# ifdef CONFIG_SCHED_DEBUG | |
14565 | + int migrate_disable_atomic; | |
14566 | +# endif | |
14567 | +#endif | |
14568 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14569 | + int sleeping_lock; | |
14570 | +#endif | |
c7c16703 | 14571 | |
e4b2b4a8 JK |
14572 | #ifdef CONFIG_PREEMPT_RCU |
14573 | int rcu_read_lock_nesting; | |
14574 | @@ -777,6 +801,9 @@ | |
14575 | #ifdef CONFIG_POSIX_TIMERS | |
14576 | struct task_cputime cputime_expires; | |
14577 | struct list_head cpu_timers[3]; | |
14578 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
14579 | + struct task_struct *posix_timer_list; | |
14580 | +#endif | |
14581 | #endif | |
c7c16703 | 14582 | |
e4b2b4a8 JK |
14583 | /* Process credentials: */ |
14584 | @@ -820,11 +847,17 @@ | |
14585 | /* Signal handlers: */ | |
14586 | struct signal_struct *signal; | |
14587 | struct sighand_struct *sighand; | |
14588 | + struct sigqueue *sigqueue_cache; | |
14589 | + | |
14590 | sigset_t blocked; | |
14591 | sigset_t real_blocked; | |
14592 | /* Restored if set_restore_sigmask() was used: */ | |
14593 | sigset_t saved_sigmask; | |
14594 | struct sigpending pending; | |
14595 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14596 | + /* TODO: move me into ->restart_block ? */ | |
14597 | + struct siginfo forced_info; | |
14598 | +#endif | |
14599 | unsigned long sas_ss_sp; | |
14600 | size_t sas_ss_size; | |
14601 | unsigned int sas_ss_flags; | |
14602 | @@ -849,6 +882,7 @@ | |
14603 | raw_spinlock_t pi_lock; | |
14604 | ||
14605 | struct wake_q_node wake_q; | |
14606 | + struct wake_q_node wake_q_sleeper; | |
14607 | ||
14608 | #ifdef CONFIG_RT_MUTEXES | |
14609 | /* PI waiters blocked on a rt_mutex held by this task: */ | |
14610 | @@ -1116,9 +1150,23 @@ | |
14611 | unsigned int sequential_io; | |
14612 | unsigned int sequential_io_avg; | |
14613 | #endif | |
14614 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
14615 | + struct rcu_head put_rcu; | |
14616 | + int softirq_nestcnt; | |
14617 | + unsigned int softirqs_raised; | |
14618 | +#endif | |
14619 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14620 | +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 | |
14621 | + int kmap_idx; | |
14622 | + pte_t kmap_pte[KM_TYPE_NR]; | |
14623 | +# endif | |
14624 | +#endif | |
14625 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
14626 | unsigned long task_state_change; | |
14627 | #endif | |
14628 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14629 | + int xmit_recursion; | |
14630 | +#endif | |
14631 | int pagefault_disabled; | |
14632 | #ifdef CONFIG_MMU | |
14633 | struct task_struct *oom_reaper_list; | |
14634 | @@ -1332,6 +1380,7 @@ | |
14635 | /* | |
14636 | * Per process flags | |
14637 | */ | |
14638 | +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ | |
14639 | #define PF_IDLE 0x00000002 /* I am an IDLE thread */ | |
14640 | #define PF_EXITING 0x00000004 /* Getting shut down */ | |
14641 | #define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ | |
14642 | @@ -1355,7 +1404,7 @@ | |
14643 | #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ | |
14644 | #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ | |
14645 | #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ | |
14646 | -#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ | |
14647 | +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ | |
14648 | #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ | |
14649 | #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ | |
14650 | #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ | |
14651 | @@ -1535,6 +1584,7 @@ | |
14652 | ||
14653 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); | |
14654 | extern int wake_up_process(struct task_struct *tsk); | |
14655 | +extern int wake_up_lock_sleeper(struct task_struct *tsk); | |
14656 | extern void wake_up_new_task(struct task_struct *tsk); | |
14657 | ||
14658 | #ifdef CONFIG_SMP | |
14659 | @@ -1611,6 +1661,89 @@ | |
14660 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); | |
14661 | } | |
14662 | ||
14663 | +#ifdef CONFIG_PREEMPT_LAZY | |
14664 | +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) | |
14665 | +{ | |
14666 | + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
14667 | +} | |
14668 | + | |
14669 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) | |
14670 | +{ | |
14671 | + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
14672 | +} | |
14673 | + | |
14674 | +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) | |
14675 | +{ | |
14676 | + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); | |
14677 | +} | |
14678 | + | |
14679 | +static inline int need_resched_lazy(void) | |
14680 | +{ | |
14681 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
14682 | +} | |
14683 | + | |
14684 | +static inline int need_resched_now(void) | |
14685 | +{ | |
14686 | + return test_thread_flag(TIF_NEED_RESCHED); | |
14687 | +} | |
14688 | + | |
14689 | +#else | |
14690 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } | |
14691 | +static inline int need_resched_lazy(void) { return 0; } | |
14692 | + | |
14693 | +static inline int need_resched_now(void) | |
14694 | +{ | |
14695 | + return test_thread_flag(TIF_NEED_RESCHED); | |
14696 | +} | |
14697 | + | |
14698 | +#endif | |
14699 | + | |
14700 | + | |
14701 | +static inline bool __task_is_stopped_or_traced(struct task_struct *task) | |
14702 | +{ | |
14703 | + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) | |
14704 | + return true; | |
14705 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14706 | + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) | |
14707 | + return true; | |
14708 | +#endif | |
14709 | + return false; | |
14710 | +} | |
14711 | + | |
14712 | +static inline bool task_is_stopped_or_traced(struct task_struct *task) | |
14713 | +{ | |
14714 | + bool traced_stopped; | |
14715 | + | |
14716 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14717 | + unsigned long flags; | |
14718 | + | |
14719 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
14720 | + traced_stopped = __task_is_stopped_or_traced(task); | |
14721 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14722 | +#else | |
14723 | + traced_stopped = __task_is_stopped_or_traced(task); | |
14724 | +#endif | |
14725 | + return traced_stopped; | |
14726 | +} | |
14727 | + | |
14728 | +static inline bool task_is_traced(struct task_struct *task) | |
14729 | +{ | |
14730 | + bool traced = false; | |
14731 | + | |
14732 | + if (task->state & __TASK_TRACED) | |
14733 | + return true; | |
14734 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14735 | + /* in case the task is sleeping on tasklist_lock */ | |
14736 | + raw_spin_lock_irq(&task->pi_lock); | |
14737 | + if (task->state & __TASK_TRACED) | |
14738 | + traced = true; | |
14739 | + else if (task->saved_state & __TASK_TRACED) | |
14740 | + traced = true; | |
14741 | + raw_spin_unlock_irq(&task->pi_lock); | |
14742 | +#endif | |
14743 | + return traced; | |
14744 | +} | |
14745 | + | |
14746 | /* | |
14747 | * cond_resched() and cond_resched_lock(): latency reduction via | |
14748 | * explicit rescheduling in places that are safe. The return | |
14749 | @@ -1636,12 +1769,16 @@ | |
14750 | __cond_resched_lock(lock); \ | |
14751 | }) | |
14752 | ||
14753 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14754 | extern int __cond_resched_softirq(void); | |
14755 | ||
14756 | #define cond_resched_softirq() ({ \ | |
14757 | ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ | |
14758 | __cond_resched_softirq(); \ | |
14759 | }) | |
14760 | +#else | |
14761 | +# define cond_resched_softirq() cond_resched() | |
14762 | +#endif | |
c7c16703 | 14763 | |
e4b2b4a8 JK |
14764 | static inline void cond_resched_rcu(void) |
14765 | { | |
14766 | @@ -1671,6 +1808,23 @@ | |
14767 | return unlikely(tif_need_resched()); | |
14768 | } | |
c7c16703 | 14769 | |
e4b2b4a8 JK |
14770 | +#ifdef CONFIG_PREEMPT_RT_FULL |
14771 | +static inline void sleeping_lock_inc(void) | |
14772 | +{ | |
14773 | + current->sleeping_lock++; | |
14774 | +} | |
14775 | + | |
14776 | +static inline void sleeping_lock_dec(void) | |
14777 | +{ | |
14778 | + current->sleeping_lock--; | |
14779 | +} | |
14780 | + | |
14781 | +#else | |
14782 | + | |
14783 | +static inline void sleeping_lock_inc(void) { } | |
14784 | +static inline void sleeping_lock_dec(void) { } | |
14785 | +#endif | |
14786 | + | |
14787 | /* | |
14788 | * Wrappers for p->thread_info->cpu access. No-op on UP. | |
14789 | */ | |
14790 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/seqlock.h linux-4.14/include/linux/seqlock.h | |
14791 | --- linux-4.14.orig/include/linux/seqlock.h 2017-11-12 19:46:13.000000000 +0100 | |
14792 | +++ linux-4.14/include/linux/seqlock.h 2018-09-05 11:05:07.000000000 +0200 | |
14793 | @@ -221,20 +221,30 @@ | |
14794 | return __read_seqcount_retry(s, start); | |
14795 | } | |
c7c16703 | 14796 | |
e4b2b4a8 JK |
14797 | - |
14798 | - | |
14799 | -static inline void raw_write_seqcount_begin(seqcount_t *s) | |
14800 | +static inline void __raw_write_seqcount_begin(seqcount_t *s) | |
14801 | { | |
14802 | s->sequence++; | |
14803 | smp_wmb(); | |
14804 | } | |
c7c16703 | 14805 | |
e4b2b4a8 JK |
14806 | -static inline void raw_write_seqcount_end(seqcount_t *s) |
14807 | +static inline void raw_write_seqcount_begin(seqcount_t *s) | |
14808 | +{ | |
14809 | + preempt_disable_rt(); | |
14810 | + __raw_write_seqcount_begin(s); | |
14811 | +} | |
14812 | + | |
14813 | +static inline void __raw_write_seqcount_end(seqcount_t *s) | |
14814 | { | |
14815 | smp_wmb(); | |
14816 | s->sequence++; | |
14817 | } | |
c7c16703 | 14818 | |
e4b2b4a8 JK |
14819 | +static inline void raw_write_seqcount_end(seqcount_t *s) |
14820 | +{ | |
14821 | + __raw_write_seqcount_end(s); | |
14822 | + preempt_enable_rt(); | |
14823 | +} | |
14824 | + | |
14825 | /** | |
14826 | * raw_write_seqcount_barrier - do a seq write barrier | |
14827 | * @s: pointer to seqcount_t | |
14828 | @@ -429,10 +439,32 @@ | |
14829 | /* | |
14830 | * Read side functions for starting and finalizing a read side section. | |
14831 | */ | |
14832 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14833 | static inline unsigned read_seqbegin(const seqlock_t *sl) | |
14834 | { | |
14835 | return read_seqcount_begin(&sl->seqcount); | |
14836 | } | |
14837 | +#else | |
14838 | +/* | |
14839 | + * Starvation safe read side for RT | |
14840 | + */ | |
14841 | +static inline unsigned read_seqbegin(seqlock_t *sl) | |
14842 | +{ | |
14843 | + unsigned ret; | |
14844 | + | |
14845 | +repeat: | |
14846 | + ret = ACCESS_ONCE(sl->seqcount.sequence); | |
14847 | + if (unlikely(ret & 1)) { | |
14848 | + /* | |
14849 | + * Take the lock and let the writer proceed (i.e. evtl | |
14850 | + * boost it), otherwise we could loop here forever. | |
14851 | + */ | |
14852 | + spin_unlock_wait(&sl->lock); | |
14853 | + goto repeat; | |
14854 | + } | |
14855 | + return ret; | |
14856 | +} | |
14857 | +#endif | |
c7c16703 | 14858 | |
e4b2b4a8 JK |
14859 | static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) |
14860 | { | |
14861 | @@ -447,36 +479,45 @@ | |
14862 | static inline void write_seqlock(seqlock_t *sl) | |
14863 | { | |
14864 | spin_lock(&sl->lock); | |
14865 | - write_seqcount_begin(&sl->seqcount); | |
14866 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14867 | +} | |
14868 | + | |
14869 | +static inline int try_write_seqlock(seqlock_t *sl) | |
14870 | +{ | |
14871 | + if (spin_trylock(&sl->lock)) { | |
14872 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14873 | + return 1; | |
14874 | + } | |
14875 | + return 0; | |
c7c16703 | 14876 | } |
c7c16703 | 14877 | |
e4b2b4a8 JK |
14878 | static inline void write_sequnlock(seqlock_t *sl) |
14879 | { | |
14880 | - write_seqcount_end(&sl->seqcount); | |
14881 | + __raw_write_seqcount_end(&sl->seqcount); | |
14882 | spin_unlock(&sl->lock); | |
14883 | } | |
c7c16703 | 14884 | |
e4b2b4a8 JK |
14885 | static inline void write_seqlock_bh(seqlock_t *sl) |
14886 | { | |
14887 | spin_lock_bh(&sl->lock); | |
14888 | - write_seqcount_begin(&sl->seqcount); | |
14889 | + __raw_write_seqcount_begin(&sl->seqcount); | |
c7c16703 | 14890 | } |
e4b2b4a8 JK |
14891 | |
14892 | static inline void write_sequnlock_bh(seqlock_t *sl) | |
1a6e0f06 | 14893 | { |
e4b2b4a8 JK |
14894 | - write_seqcount_end(&sl->seqcount); |
14895 | + __raw_write_seqcount_end(&sl->seqcount); | |
14896 | spin_unlock_bh(&sl->lock); | |
14897 | } | |
1a6e0f06 | 14898 | |
e4b2b4a8 JK |
14899 | static inline void write_seqlock_irq(seqlock_t *sl) |
14900 | { | |
14901 | spin_lock_irq(&sl->lock); | |
14902 | - write_seqcount_begin(&sl->seqcount); | |
14903 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14904 | } | |
1a6e0f06 | 14905 | |
e4b2b4a8 | 14906 | static inline void write_sequnlock_irq(seqlock_t *sl) |
1a6e0f06 | 14907 | { |
e4b2b4a8 JK |
14908 | - write_seqcount_end(&sl->seqcount); |
14909 | + __raw_write_seqcount_end(&sl->seqcount); | |
14910 | spin_unlock_irq(&sl->lock); | |
14911 | } | |
14912 | ||
14913 | @@ -485,7 +526,7 @@ | |
14914 | unsigned long flags; | |
14915 | ||
14916 | spin_lock_irqsave(&sl->lock, flags); | |
14917 | - write_seqcount_begin(&sl->seqcount); | |
14918 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14919 | return flags; | |
14920 | } | |
14921 | ||
14922 | @@ -495,7 +536,7 @@ | |
14923 | static inline void | |
14924 | write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | |
1a6e0f06 | 14925 | { |
e4b2b4a8 JK |
14926 | - write_seqcount_end(&sl->seqcount); |
14927 | + __raw_write_seqcount_end(&sl->seqcount); | |
14928 | spin_unlock_irqrestore(&sl->lock, flags); | |
14929 | } | |
1a6e0f06 | 14930 | |
e4b2b4a8 JK |
14931 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/signal.h linux-4.14/include/linux/signal.h |
14932 | --- linux-4.14.orig/include/linux/signal.h 2017-11-12 19:46:13.000000000 +0100 | |
14933 | +++ linux-4.14/include/linux/signal.h 2018-09-05 11:05:07.000000000 +0200 | |
14934 | @@ -243,6 +243,7 @@ | |
1a6e0f06 JK |
14935 | } |
14936 | ||
e4b2b4a8 JK |
14937 | extern void flush_sigqueue(struct sigpending *queue); |
14938 | +extern void flush_task_sigqueue(struct task_struct *tsk); | |
14939 | ||
14940 | /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ | |
14941 | static inline int valid_signal(unsigned long sig) | |
14942 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/skbuff.h linux-4.14/include/linux/skbuff.h | |
14943 | --- linux-4.14.orig/include/linux/skbuff.h 2018-09-05 11:03:22.000000000 +0200 | |
14944 | +++ linux-4.14/include/linux/skbuff.h 2018-09-05 11:05:07.000000000 +0200 | |
14945 | @@ -287,6 +287,7 @@ | |
14946 | ||
14947 | __u32 qlen; | |
14948 | spinlock_t lock; | |
14949 | + raw_spinlock_t raw_lock; | |
1a6e0f06 JK |
14950 | }; |
14951 | ||
e4b2b4a8 JK |
14952 | struct sk_buff; |
14953 | @@ -1667,6 +1668,12 @@ | |
14954 | __skb_queue_head_init(list); | |
14955 | } | |
1a6e0f06 | 14956 | |
e4b2b4a8 JK |
14957 | +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) |
14958 | +{ | |
14959 | + raw_spin_lock_init(&list->raw_lock); | |
14960 | + __skb_queue_head_init(list); | |
14961 | +} | |
14962 | + | |
14963 | static inline void skb_queue_head_init_class(struct sk_buff_head *list, | |
14964 | struct lock_class_key *class) | |
14965 | { | |
14966 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/smp.h linux-4.14/include/linux/smp.h | |
14967 | --- linux-4.14.orig/include/linux/smp.h 2017-11-12 19:46:13.000000000 +0100 | |
14968 | +++ linux-4.14/include/linux/smp.h 2018-09-05 11:05:07.000000000 +0200 | |
14969 | @@ -202,6 +202,9 @@ | |
14970 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) | |
14971 | #define put_cpu() preempt_enable() | |
1a6e0f06 | 14972 | |
e4b2b4a8 JK |
14973 | +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) |
14974 | +#define put_cpu_light() migrate_enable() | |
14975 | + | |
14976 | /* | |
14977 | * Callback to arch code if there's nosmp or maxcpus=0 on the | |
14978 | * boot command line: | |
14979 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock_api_smp.h linux-4.14/include/linux/spinlock_api_smp.h | |
14980 | --- linux-4.14.orig/include/linux/spinlock_api_smp.h 2017-11-12 19:46:13.000000000 +0100 | |
14981 | +++ linux-4.14/include/linux/spinlock_api_smp.h 2018-09-05 11:05:07.000000000 +0200 | |
14982 | @@ -187,6 +187,8 @@ | |
14983 | return 0; | |
1a6e0f06 | 14984 | } |
e4b2b4a8 JK |
14985 | |
14986 | -#include <linux/rwlock_api_smp.h> | |
14987 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14988 | +# include <linux/rwlock_api_smp.h> | |
1a6e0f06 | 14989 | +#endif |
e4b2b4a8 JK |
14990 | |
14991 | #endif /* __LINUX_SPINLOCK_API_SMP_H */ | |
14992 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock.h linux-4.14/include/linux/spinlock.h | |
14993 | --- linux-4.14.orig/include/linux/spinlock.h 2017-11-12 19:46:13.000000000 +0100 | |
14994 | +++ linux-4.14/include/linux/spinlock.h 2018-09-05 11:05:07.000000000 +0200 | |
14995 | @@ -286,7 +286,11 @@ | |
14996 | #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) | |
14997 | ||
14998 | /* Include rwlock functions */ | |
14999 | -#include <linux/rwlock.h> | |
1a6e0f06 | 15000 | +#ifdef CONFIG_PREEMPT_RT_FULL |
e4b2b4a8 | 15001 | +# include <linux/rwlock_rt.h> |
1a6e0f06 | 15002 | +#else |
e4b2b4a8 | 15003 | +# include <linux/rwlock.h> |
1a6e0f06 | 15004 | +#endif |
1a6e0f06 | 15005 | |
e4b2b4a8 JK |
15006 | /* |
15007 | * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: | |
15008 | @@ -297,6 +301,10 @@ | |
15009 | # include <linux/spinlock_api_up.h> | |
15010 | #endif | |
1a6e0f06 | 15011 | |
e4b2b4a8 JK |
15012 | +#ifdef CONFIG_PREEMPT_RT_FULL |
15013 | +# include <linux/spinlock_rt.h> | |
15014 | +#else /* PREEMPT_RT_FULL */ | |
15015 | + | |
15016 | /* | |
15017 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n | |
15018 | */ | |
15019 | @@ -421,4 +429,6 @@ | |
15020 | #define atomic_dec_and_lock(atomic, lock) \ | |
15021 | __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) | |
1a6e0f06 | 15022 | |
e4b2b4a8 JK |
15023 | +#endif /* !PREEMPT_RT_FULL */ |
15024 | + | |
15025 | #endif /* __LINUX_SPINLOCK_H */ | |
15026 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock_rt.h linux-4.14/include/linux/spinlock_rt.h | |
15027 | --- linux-4.14.orig/include/linux/spinlock_rt.h 1970-01-01 01:00:00.000000000 +0100 | |
15028 | +++ linux-4.14/include/linux/spinlock_rt.h 2018-09-05 11:05:07.000000000 +0200 | |
15029 | @@ -0,0 +1,159 @@ | |
15030 | +#ifndef __LINUX_SPINLOCK_RT_H | |
15031 | +#define __LINUX_SPINLOCK_RT_H | |
15032 | + | |
15033 | +#ifndef __LINUX_SPINLOCK_H | |
15034 | +#error Do not include directly. Use spinlock.h | |
15035 | +#endif | |
15036 | + | |
15037 | +#include <linux/bug.h> | |
15038 | + | |
15039 | +extern void | |
15040 | +__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key); | |
15041 | + | |
15042 | +#define spin_lock_init(slock) \ | |
15043 | +do { \ | |
15044 | + static struct lock_class_key __key; \ | |
15045 | + \ | |
15046 | + rt_mutex_init(&(slock)->lock); \ | |
15047 | + __rt_spin_lock_init(slock, #slock, &__key); \ | |
15048 | +} while (0) | |
15049 | + | |
15050 | +extern void __lockfunc rt_spin_lock(spinlock_t *lock); | |
15051 | +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); | |
15052 | +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); | |
15053 | +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); | |
15054 | +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); | |
15055 | +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); | |
15056 | +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); | |
15057 | +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); | |
15058 | +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); | |
15059 | + | |
15060 | +/* | |
15061 | + * lockdep-less calls, for derived types like rwlock: | |
15062 | + * (for trylock they can use rt_mutex_trylock() directly. | |
15063 | + * Migrate disable handling must be done at the call site. | |
15064 | + */ | |
15065 | +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); | |
15066 | +extern void __lockfunc __rt_spin_trylock(struct rt_mutex *lock); | |
15067 | +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); | |
15068 | + | |
15069 | +#define spin_lock(lock) rt_spin_lock(lock) | |
15070 | + | |
15071 | +#define spin_lock_bh(lock) \ | |
15072 | + do { \ | |
15073 | + local_bh_disable(); \ | |
15074 | + rt_spin_lock(lock); \ | |
15075 | + } while (0) | |
15076 | + | |
15077 | +#define spin_lock_irq(lock) spin_lock(lock) | |
15078 | + | |
15079 | +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) | |
15080 | + | |
15081 | +#define spin_trylock(lock) \ | |
15082 | +({ \ | |
15083 | + int __locked; \ | |
15084 | + __locked = spin_do_trylock(lock); \ | |
15085 | + __locked; \ | |
15086 | +}) | |
15087 | + | |
15088 | +#ifdef CONFIG_LOCKDEP | |
15089 | +# define spin_lock_nested(lock, subclass) \ | |
15090 | + do { \ | |
15091 | + rt_spin_lock_nested(lock, subclass); \ | |
15092 | + } while (0) | |
15093 | + | |
15094 | +#define spin_lock_bh_nested(lock, subclass) \ | |
15095 | + do { \ | |
15096 | + local_bh_disable(); \ | |
15097 | + rt_spin_lock_nested(lock, subclass); \ | |
15098 | + } while (0) | |
15099 | + | |
15100 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
15101 | + do { \ | |
15102 | + typecheck(unsigned long, flags); \ | |
15103 | + flags = 0; \ | |
15104 | + rt_spin_lock_nested(lock, subclass); \ | |
15105 | + } while (0) | |
15106 | +#else | |
15107 | +# define spin_lock_nested(lock, subclass) spin_lock(lock) | |
15108 | +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) | |
15109 | + | |
15110 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
15111 | + do { \ | |
15112 | + typecheck(unsigned long, flags); \ | |
15113 | + flags = 0; \ | |
15114 | + spin_lock(lock); \ | |
15115 | + } while (0) | |
15116 | +#endif | |
15117 | + | |
15118 | +#define spin_lock_irqsave(lock, flags) \ | |
15119 | + do { \ | |
15120 | + typecheck(unsigned long, flags); \ | |
15121 | + flags = 0; \ | |
15122 | + spin_lock(lock); \ | |
15123 | + } while (0) | |
15124 | + | |
15125 | +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) | |
1a6e0f06 | 15126 | +{ |
e4b2b4a8 JK |
15127 | + unsigned long flags = 0; |
15128 | +#ifdef CONFIG_TRACE_IRQFLAGS | |
15129 | + flags = rt_spin_lock_trace_flags(lock); | |
15130 | +#else | |
15131 | + spin_lock(lock); /* lock_local */ | |
15132 | +#endif | |
15133 | + return flags; | |
1a6e0f06 JK |
15134 | +} |
15135 | + | |
e4b2b4a8 JK |
15136 | +/* FIXME: we need rt_spin_lock_nest_lock */ |
15137 | +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) | |
15138 | + | |
15139 | +#define spin_unlock(lock) rt_spin_unlock(lock) | |
15140 | + | |
15141 | +#define spin_unlock_bh(lock) \ | |
15142 | + do { \ | |
15143 | + rt_spin_unlock(lock); \ | |
15144 | + local_bh_enable(); \ | |
15145 | + } while (0) | |
15146 | + | |
15147 | +#define spin_unlock_irq(lock) spin_unlock(lock) | |
15148 | + | |
15149 | +#define spin_unlock_irqrestore(lock, flags) \ | |
15150 | + do { \ | |
15151 | + typecheck(unsigned long, flags); \ | |
15152 | + (void) flags; \ | |
15153 | + spin_unlock(lock); \ | |
15154 | + } while (0) | |
15155 | + | |
15156 | +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) | |
15157 | +#define spin_trylock_irq(lock) spin_trylock(lock) | |
15158 | + | |
15159 | +#define spin_trylock_irqsave(lock, flags) \ | |
15160 | + rt_spin_trylock_irqsave(lock, &(flags)) | |
15161 | + | |
15162 | +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) | |
15163 | + | |
15164 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
15165 | +# define spin_is_contended(lock) ((lock)->break_lock) | |
15166 | +#else | |
15167 | +# define spin_is_contended(lock) (((void)(lock), 0)) | |
15168 | +#endif | |
15169 | + | |
15170 | +static inline int spin_can_lock(spinlock_t *lock) | |
1a6e0f06 | 15171 | +{ |
e4b2b4a8 | 15172 | + return !rt_mutex_is_locked(&lock->lock); |
1a6e0f06 JK |
15173 | +} |
15174 | + | |
e4b2b4a8 | 15175 | +static inline int spin_is_locked(spinlock_t *lock) |
1a6e0f06 | 15176 | +{ |
e4b2b4a8 | 15177 | + return rt_mutex_is_locked(&lock->lock); |
1a6e0f06 JK |
15178 | +} |
15179 | + | |
e4b2b4a8 | 15180 | +static inline void assert_spin_locked(spinlock_t *lock) |
1a6e0f06 | 15181 | +{ |
e4b2b4a8 | 15182 | + BUG_ON(!spin_is_locked(lock)); |
1a6e0f06 | 15183 | +} |
1a6e0f06 | 15184 | + |
e4b2b4a8 JK |
15185 | +#define atomic_dec_and_lock(atomic, lock) \ |
15186 | + atomic_dec_and_spin_lock(atomic, lock) | |
15187 | + | |
15188 | +#endif | |
15189 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock_types.h linux-4.14/include/linux/spinlock_types.h | |
15190 | --- linux-4.14.orig/include/linux/spinlock_types.h 2017-11-12 19:46:13.000000000 +0100 | |
15191 | +++ linux-4.14/include/linux/spinlock_types.h 2018-09-05 11:05:07.000000000 +0200 | |
15192 | @@ -9,80 +9,15 @@ | |
15193 | * Released under the General Public License (GPL). | |
15194 | */ | |
1a6e0f06 | 15195 | |
e4b2b4a8 JK |
15196 | -#if defined(CONFIG_SMP) |
15197 | -# include <asm/spinlock_types.h> | |
15198 | -#else | |
15199 | -# include <linux/spinlock_types_up.h> | |
15200 | -#endif | |
15201 | - | |
15202 | -#include <linux/lockdep.h> | |
15203 | - | |
15204 | -typedef struct raw_spinlock { | |
15205 | - arch_spinlock_t raw_lock; | |
15206 | -#ifdef CONFIG_GENERIC_LOCKBREAK | |
15207 | - unsigned int break_lock; | |
15208 | -#endif | |
15209 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
15210 | - unsigned int magic, owner_cpu; | |
15211 | - void *owner; | |
15212 | -#endif | |
15213 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15214 | - struct lockdep_map dep_map; | |
15215 | -#endif | |
15216 | -} raw_spinlock_t; | |
15217 | - | |
15218 | -#define SPINLOCK_MAGIC 0xdead4ead | |
15219 | - | |
15220 | -#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
15221 | - | |
15222 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15223 | -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
15224 | -#else | |
15225 | -# define SPIN_DEP_MAP_INIT(lockname) | |
15226 | -#endif | |
15227 | +#include <linux/spinlock_types_raw.h> | |
1a6e0f06 | 15228 | |
e4b2b4a8 JK |
15229 | -#ifdef CONFIG_DEBUG_SPINLOCK |
15230 | -# define SPIN_DEBUG_INIT(lockname) \ | |
15231 | - .magic = SPINLOCK_MAGIC, \ | |
15232 | - .owner_cpu = -1, \ | |
15233 | - .owner = SPINLOCK_OWNER_INIT, | |
15234 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15235 | +# include <linux/spinlock_types_nort.h> | |
15236 | +# include <linux/rwlock_types.h> | |
15237 | #else | |
15238 | -# define SPIN_DEBUG_INIT(lockname) | |
15239 | -#endif | |
15240 | - | |
15241 | -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
15242 | - { \ | |
15243 | - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
15244 | - SPIN_DEBUG_INIT(lockname) \ | |
15245 | - SPIN_DEP_MAP_INIT(lockname) } | |
15246 | - | |
15247 | -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
15248 | - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
15249 | - | |
15250 | -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
15251 | - | |
15252 | -typedef struct spinlock { | |
15253 | - union { | |
15254 | - struct raw_spinlock rlock; | |
15255 | - | |
15256 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15257 | -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
15258 | - struct { | |
15259 | - u8 __padding[LOCK_PADSIZE]; | |
15260 | - struct lockdep_map dep_map; | |
15261 | - }; | |
15262 | +# include <linux/rtmutex.h> | |
15263 | +# include <linux/spinlock_types_rt.h> | |
15264 | +# include <linux/rwlock_types_rt.h> | |
15265 | #endif | |
15266 | - }; | |
15267 | -} spinlock_t; | |
15268 | - | |
15269 | -#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
15270 | - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
15271 | - | |
15272 | -#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
15273 | - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
15274 | - | |
15275 | -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
15276 | - | |
15277 | -#include <linux/rwlock_types.h> | |
1a6e0f06 | 15278 | |
e4b2b4a8 JK |
15279 | #endif /* __LINUX_SPINLOCK_TYPES_H */ |
15280 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock_types_nort.h linux-4.14/include/linux/spinlock_types_nort.h | |
15281 | --- linux-4.14.orig/include/linux/spinlock_types_nort.h 1970-01-01 01:00:00.000000000 +0100 | |
15282 | +++ linux-4.14/include/linux/spinlock_types_nort.h 2018-09-05 11:05:07.000000000 +0200 | |
15283 | @@ -0,0 +1,33 @@ | |
15284 | +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H | |
15285 | +#define __LINUX_SPINLOCK_TYPES_NORT_H | |
1a6e0f06 | 15286 | + |
e4b2b4a8 JK |
15287 | +#ifndef __LINUX_SPINLOCK_TYPES_H |
15288 | +#error "Do not include directly. Include spinlock_types.h instead" | |
15289 | +#endif | |
1a6e0f06 | 15290 | + |
e4b2b4a8 JK |
15291 | +/* |
15292 | + * The non RT version maps spinlocks to raw_spinlocks | |
15293 | + */ | |
15294 | +typedef struct spinlock { | |
15295 | + union { | |
15296 | + struct raw_spinlock rlock; | |
1a6e0f06 | 15297 | + |
e4b2b4a8 JK |
15298 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
15299 | +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
15300 | + struct { | |
15301 | + u8 __padding[LOCK_PADSIZE]; | |
15302 | + struct lockdep_map dep_map; | |
15303 | + }; | |
1a6e0f06 | 15304 | +#endif |
e4b2b4a8 JK |
15305 | + }; |
15306 | +} spinlock_t; | |
1a6e0f06 | 15307 | + |
e4b2b4a8 JK |
15308 | +#define __SPIN_LOCK_INITIALIZER(lockname) \ |
15309 | + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
1a6e0f06 | 15310 | + |
e4b2b4a8 JK |
15311 | +#define __SPIN_LOCK_UNLOCKED(lockname) \ |
15312 | + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
1a6e0f06 | 15313 | + |
e4b2b4a8 | 15314 | +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) |
1a6e0f06 | 15315 | + |
e4b2b4a8 JK |
15316 | +#endif |
15317 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock_types_raw.h linux-4.14/include/linux/spinlock_types_raw.h | |
15318 | --- linux-4.14.orig/include/linux/spinlock_types_raw.h 1970-01-01 01:00:00.000000000 +0100 | |
15319 | +++ linux-4.14/include/linux/spinlock_types_raw.h 2018-09-05 11:05:07.000000000 +0200 | |
15320 | @@ -0,0 +1,58 @@ | |
15321 | +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H | |
15322 | +#define __LINUX_SPINLOCK_TYPES_RAW_H | |
1a6e0f06 | 15323 | + |
e4b2b4a8 | 15324 | +#include <linux/types.h> |
1a6e0f06 | 15325 | + |
e4b2b4a8 JK |
15326 | +#if defined(CONFIG_SMP) |
15327 | +# include <asm/spinlock_types.h> | |
15328 | +#else | |
15329 | +# include <linux/spinlock_types_up.h> | |
15330 | +#endif | |
1a6e0f06 | 15331 | + |
e4b2b4a8 JK |
15332 | +#include <linux/lockdep.h> |
15333 | + | |
15334 | +typedef struct raw_spinlock { | |
15335 | + arch_spinlock_t raw_lock; | |
15336 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
15337 | + unsigned int break_lock; | |
15338 | +#endif | |
15339 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
15340 | + unsigned int magic, owner_cpu; | |
15341 | + void *owner; | |
15342 | +#endif | |
15343 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15344 | + struct lockdep_map dep_map; | |
15345 | +#endif | |
15346 | +} raw_spinlock_t; | |
15347 | + | |
15348 | +#define SPINLOCK_MAGIC 0xdead4ead | |
15349 | + | |
15350 | +#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
1a6e0f06 | 15351 | + |
e4b2b4a8 JK |
15352 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
15353 | +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
1a6e0f06 | 15354 | +#else |
e4b2b4a8 JK |
15355 | +# define SPIN_DEP_MAP_INIT(lockname) |
15356 | +#endif | |
1a6e0f06 | 15357 | + |
e4b2b4a8 JK |
15358 | +#ifdef CONFIG_DEBUG_SPINLOCK |
15359 | +# define SPIN_DEBUG_INIT(lockname) \ | |
15360 | + .magic = SPINLOCK_MAGIC, \ | |
15361 | + .owner_cpu = -1, \ | |
15362 | + .owner = SPINLOCK_OWNER_INIT, | |
15363 | +#else | |
15364 | +# define SPIN_DEBUG_INIT(lockname) | |
1a6e0f06 | 15365 | +#endif |
e4b2b4a8 JK |
15366 | + |
15367 | +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
15368 | + { \ | |
15369 | + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
15370 | + SPIN_DEBUG_INIT(lockname) \ | |
15371 | + SPIN_DEP_MAP_INIT(lockname) } | |
15372 | + | |
15373 | +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
15374 | + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
15375 | + | |
15376 | +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
15377 | + | |
1a6e0f06 | 15378 | +#endif |
e4b2b4a8 JK |
15379 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock_types_rt.h linux-4.14/include/linux/spinlock_types_rt.h |
15380 | --- linux-4.14.orig/include/linux/spinlock_types_rt.h 1970-01-01 01:00:00.000000000 +0100 | |
15381 | +++ linux-4.14/include/linux/spinlock_types_rt.h 2018-09-05 11:05:07.000000000 +0200 | |
15382 | @@ -0,0 +1,48 @@ | |
15383 | +#ifndef __LINUX_SPINLOCK_TYPES_RT_H | |
15384 | +#define __LINUX_SPINLOCK_TYPES_RT_H | |
15385 | + | |
15386 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
15387 | +#error "Do not include directly. Include spinlock_types.h instead" | |
1a6e0f06 | 15388 | +#endif |
1a6e0f06 | 15389 | + |
e4b2b4a8 JK |
15390 | +#include <linux/cache.h> |
15391 | + | |
15392 | +/* | |
15393 | + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: | |
15394 | + */ | |
15395 | +typedef struct spinlock { | |
15396 | + struct rt_mutex lock; | |
15397 | + unsigned int break_lock; | |
15398 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15399 | + struct lockdep_map dep_map; | |
1a6e0f06 | 15400 | +#endif |
e4b2b4a8 | 15401 | +} spinlock_t; |
1a6e0f06 | 15402 | + |
e4b2b4a8 JK |
15403 | +#ifdef CONFIG_DEBUG_RT_MUTEXES |
15404 | +# define __RT_SPIN_INITIALIZER(name) \ | |
15405 | + { \ | |
15406 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
15407 | + .save_state = 1, \ | |
15408 | + .file = __FILE__, \ | |
15409 | + .line = __LINE__ , \ | |
15410 | + } | |
1a6e0f06 | 15411 | +#else |
e4b2b4a8 JK |
15412 | +# define __RT_SPIN_INITIALIZER(name) \ |
15413 | + { \ | |
15414 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
15415 | + .save_state = 1, \ | |
15416 | + } | |
1a6e0f06 | 15417 | +#endif |
1a6e0f06 | 15418 | + |
e4b2b4a8 JK |
15419 | +/* |
15420 | +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) | |
15421 | +*/ | |
15422 | + | |
15423 | +#define __SPIN_LOCK_UNLOCKED(name) \ | |
15424 | + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ | |
15425 | + SPIN_DEP_MAP_INIT(name) } | |
15426 | + | |
15427 | +#define DEFINE_SPINLOCK(name) \ | |
15428 | + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) | |
15429 | + | |
1a6e0f06 | 15430 | +#endif |
e4b2b4a8 JK |
15431 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/spinlock_types_up.h linux-4.14/include/linux/spinlock_types_up.h |
15432 | --- linux-4.14.orig/include/linux/spinlock_types_up.h 2017-11-12 19:46:13.000000000 +0100 | |
15433 | +++ linux-4.14/include/linux/spinlock_types_up.h 2018-09-05 11:05:07.000000000 +0200 | |
15434 | @@ -1,10 +1,6 @@ | |
15435 | #ifndef __LINUX_SPINLOCK_TYPES_UP_H | |
15436 | #define __LINUX_SPINLOCK_TYPES_UP_H | |
1a6e0f06 | 15437 | |
e4b2b4a8 JK |
15438 | -#ifndef __LINUX_SPINLOCK_TYPES_H |
15439 | -# error "please don't include this file directly" | |
15440 | -#endif | |
15441 | - | |
15442 | /* | |
15443 | * include/linux/spinlock_types_up.h - spinlock type definitions for UP | |
15444 | * | |
15445 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/srcutiny.h linux-4.14/include/linux/srcutiny.h | |
15446 | --- linux-4.14.orig/include/linux/srcutiny.h 2017-11-12 19:46:13.000000000 +0100 | |
15447 | +++ linux-4.14/include/linux/srcutiny.h 2018-09-05 11:05:07.000000000 +0200 | |
15448 | @@ -43,7 +43,7 @@ | |
1a6e0f06 | 15449 | |
e4b2b4a8 | 15450 | void srcu_drive_gp(struct work_struct *wp); |
1a6e0f06 | 15451 | |
e4b2b4a8 JK |
15452 | -#define __SRCU_STRUCT_INIT(name) \ |
15453 | +#define __SRCU_STRUCT_INIT(name, __ignored) \ | |
15454 | { \ | |
15455 | .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ | |
15456 | .srcu_cb_tail = &name.srcu_cb_head, \ | |
15457 | @@ -56,9 +56,9 @@ | |
15458 | * Tree SRCU, which needs some per-CPU data. | |
1a6e0f06 | 15459 | */ |
e4b2b4a8 JK |
15460 | #define DEFINE_SRCU(name) \ |
15461 | - struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
15462 | + struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) | |
15463 | #define DEFINE_STATIC_SRCU(name) \ | |
15464 | - static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
15465 | + static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) | |
15466 | ||
15467 | void synchronize_srcu(struct srcu_struct *sp); | |
15468 | ||
15469 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/srcutree.h linux-4.14/include/linux/srcutree.h | |
15470 | --- linux-4.14.orig/include/linux/srcutree.h 2017-11-12 19:46:13.000000000 +0100 | |
15471 | +++ linux-4.14/include/linux/srcutree.h 2018-09-05 11:05:07.000000000 +0200 | |
15472 | @@ -40,7 +40,7 @@ | |
15473 | unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ | |
15474 | ||
15475 | /* Update-side state. */ | |
15476 | - raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; | |
15477 | + spinlock_t __private lock ____cacheline_internodealigned_in_smp; | |
15478 | struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ | |
15479 | unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ | |
15480 | unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ | |
15481 | @@ -58,7 +58,7 @@ | |
15482 | * Node in SRCU combining tree, similar in function to rcu_data. | |
15483 | */ | |
15484 | struct srcu_node { | |
15485 | - raw_spinlock_t __private lock; | |
15486 | + spinlock_t __private lock; | |
15487 | unsigned long srcu_have_cbs[4]; /* GP seq for children */ | |
15488 | /* having CBs, but only */ | |
15489 | /* is > ->srcu_gq_seq. */ | |
15490 | @@ -78,7 +78,7 @@ | |
15491 | struct srcu_node *level[RCU_NUM_LVLS + 1]; | |
15492 | /* First node at each level. */ | |
15493 | struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ | |
15494 | - raw_spinlock_t __private lock; /* Protect counters */ | |
15495 | + spinlock_t __private lock; /* Protect counters */ | |
15496 | struct mutex srcu_gp_mutex; /* Serialize GP work. */ | |
15497 | unsigned int srcu_idx; /* Current rdr array element. */ | |
15498 | unsigned long srcu_gp_seq; /* Grace-period seq #. */ | |
15499 | @@ -104,10 +104,10 @@ | |
15500 | #define SRCU_STATE_SCAN1 1 | |
15501 | #define SRCU_STATE_SCAN2 2 | |
1a6e0f06 | 15502 | |
e4b2b4a8 JK |
15503 | -#define __SRCU_STRUCT_INIT(name) \ |
15504 | +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ | |
15505 | { \ | |
15506 | - .sda = &name##_srcu_data, \ | |
15507 | - .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ | |
15508 | + .sda = &pcpu_name, \ | |
15509 | + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ | |
15510 | .srcu_gp_seq_needed = 0 - 1, \ | |
15511 | __SRCU_DEP_MAP_INIT(name) \ | |
15512 | } | |
15513 | @@ -133,7 +133,7 @@ | |
1a6e0f06 | 15514 | */ |
e4b2b4a8 JK |
15515 | #define __DEFINE_SRCU(name, is_static) \ |
15516 | static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ | |
15517 | - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
15518 | + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_data) | |
15519 | #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) | |
15520 | #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) | |
15521 | ||
15522 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/suspend.h linux-4.14/include/linux/suspend.h | |
15523 | --- linux-4.14.orig/include/linux/suspend.h 2018-09-05 11:03:22.000000000 +0200 | |
15524 | +++ linux-4.14/include/linux/suspend.h 2018-09-05 11:05:07.000000000 +0200 | |
15525 | @@ -196,6 +196,12 @@ | |
15526 | void (*end)(void); | |
1a6e0f06 JK |
15527 | }; |
15528 | ||
e4b2b4a8 JK |
15529 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) |
15530 | +extern bool pm_in_action; | |
15531 | +#else | |
15532 | +# define pm_in_action false | |
15533 | +#endif | |
15534 | + | |
15535 | #ifdef CONFIG_SUSPEND | |
15536 | extern suspend_state_t mem_sleep_current; | |
15537 | extern suspend_state_t mem_sleep_default; | |
15538 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/swait.h linux-4.14/include/linux/swait.h | |
15539 | --- linux-4.14.orig/include/linux/swait.h 2017-11-12 19:46:13.000000000 +0100 | |
15540 | +++ linux-4.14/include/linux/swait.h 2018-09-05 11:05:07.000000000 +0200 | |
15541 | @@ -5,6 +5,7 @@ | |
15542 | #include <linux/list.h> | |
15543 | #include <linux/stddef.h> | |
15544 | #include <linux/spinlock.h> | |
15545 | +#include <linux/wait.h> | |
15546 | #include <asm/current.h> | |
1a6e0f06 | 15547 | |
e4b2b4a8 JK |
15548 | /* |
15549 | @@ -147,6 +148,7 @@ | |
15550 | extern void swake_up(struct swait_queue_head *q); | |
15551 | extern void swake_up_all(struct swait_queue_head *q); | |
15552 | extern void swake_up_locked(struct swait_queue_head *q); | |
15553 | +extern void swake_up_all_locked(struct swait_queue_head *q); | |
1a6e0f06 | 15554 | |
e4b2b4a8 JK |
15555 | extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); |
15556 | extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); | |
15557 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/swap.h linux-4.14/include/linux/swap.h | |
15558 | --- linux-4.14.orig/include/linux/swap.h 2017-11-12 19:46:13.000000000 +0100 | |
15559 | +++ linux-4.14/include/linux/swap.h 2018-09-05 11:05:07.000000000 +0200 | |
15560 | @@ -12,6 +12,7 @@ | |
15561 | #include <linux/fs.h> | |
15562 | #include <linux/atomic.h> | |
15563 | #include <linux/page-flags.h> | |
15564 | +#include <linux/locallock.h> | |
15565 | #include <asm/page.h> | |
15566 | ||
15567 | struct notifier_block; | |
15568 | @@ -297,7 +298,8 @@ | |
15569 | void *workingset_eviction(struct address_space *mapping, struct page *page); | |
15570 | bool workingset_refault(void *shadow); | |
15571 | void workingset_activation(struct page *page); | |
15572 | -void workingset_update_node(struct radix_tree_node *node, void *private); | |
15573 | +void __workingset_update_node(struct radix_tree_node *node, void *private); | |
15574 | +DECLARE_LOCAL_IRQ_LOCK(shadow_nodes_lock); | |
1a6e0f06 | 15575 | |
e4b2b4a8 JK |
15576 | /* linux/mm/page_alloc.c */ |
15577 | extern unsigned long totalram_pages; | |
15578 | @@ -310,6 +312,7 @@ | |
1a6e0f06 | 15579 | |
1a6e0f06 | 15580 | |
e4b2b4a8 JK |
15581 | /* linux/mm/swap.c */ |
15582 | +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); | |
15583 | extern void lru_cache_add(struct page *); | |
15584 | extern void lru_cache_add_anon(struct page *page); | |
15585 | extern void lru_cache_add_file(struct page *page); | |
15586 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/swork.h linux-4.14/include/linux/swork.h | |
15587 | --- linux-4.14.orig/include/linux/swork.h 1970-01-01 01:00:00.000000000 +0100 | |
15588 | +++ linux-4.14/include/linux/swork.h 2018-09-05 11:05:07.000000000 +0200 | |
15589 | @@ -0,0 +1,24 @@ | |
15590 | +#ifndef _LINUX_SWORK_H | |
15591 | +#define _LINUX_SWORK_H | |
15592 | + | |
15593 | +#include <linux/list.h> | |
15594 | + | |
15595 | +struct swork_event { | |
15596 | + struct list_head item; | |
15597 | + unsigned long flags; | |
15598 | + void (*func)(struct swork_event *); | |
15599 | +}; | |
15600 | + | |
15601 | +static inline void INIT_SWORK(struct swork_event *event, | |
15602 | + void (*func)(struct swork_event *)) | |
15603 | +{ | |
15604 | + event->flags = 0; | |
15605 | + event->func = func; | |
15606 | +} | |
15607 | + | |
15608 | +bool swork_queue(struct swork_event *sev); | |
15609 | + | |
15610 | +int swork_get(void); | |
15611 | +void swork_put(void); | |
15612 | + | |
15613 | +#endif /* _LINUX_SWORK_H */ | |
15614 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/thread_info.h linux-4.14/include/linux/thread_info.h | |
15615 | --- linux-4.14.orig/include/linux/thread_info.h 2018-09-05 11:03:22.000000000 +0200 | |
15616 | +++ linux-4.14/include/linux/thread_info.h 2018-09-05 11:05:07.000000000 +0200 | |
15617 | @@ -86,7 +86,17 @@ | |
15618 | #define test_thread_flag(flag) \ | |
15619 | test_ti_thread_flag(current_thread_info(), flag) | |
15620 | ||
15621 | -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
15622 | +#ifdef CONFIG_PREEMPT_LAZY | |
15623 | +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ | |
15624 | + test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
15625 | +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) | |
15626 | +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
15627 | + | |
1a6e0f06 | 15628 | +#else |
e4b2b4a8 JK |
15629 | +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) |
15630 | +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) | |
15631 | +#define tif_need_resched_lazy() 0 | |
1a6e0f06 | 15632 | +#endif |
1a6e0f06 | 15633 | |
e4b2b4a8 JK |
15634 | #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES |
15635 | static inline int arch_within_stack_frames(const void * const stack, | |
15636 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/timer.h linux-4.14/include/linux/timer.h | |
15637 | --- linux-4.14.orig/include/linux/timer.h 2018-09-05 11:03:22.000000000 +0200 | |
15638 | +++ linux-4.14/include/linux/timer.h 2018-09-05 11:05:07.000000000 +0200 | |
15639 | @@ -213,7 +213,7 @@ | |
1a6e0f06 | 15640 | |
e4b2b4a8 | 15641 | extern int try_to_del_timer_sync(struct timer_list *timer); |
1a6e0f06 | 15642 | |
e4b2b4a8 JK |
15643 | -#ifdef CONFIG_SMP |
15644 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
15645 | extern int del_timer_sync(struct timer_list *timer); | |
15646 | #else | |
15647 | # define del_timer_sync(t) del_timer(t) | |
15648 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/trace_events.h linux-4.14/include/linux/trace_events.h | |
15649 | --- linux-4.14.orig/include/linux/trace_events.h 2017-11-12 19:46:13.000000000 +0100 | |
15650 | +++ linux-4.14/include/linux/trace_events.h 2018-09-05 11:05:07.000000000 +0200 | |
15651 | @@ -62,6 +62,9 @@ | |
15652 | unsigned char flags; | |
15653 | unsigned char preempt_count; | |
15654 | int pid; | |
15655 | + unsigned short migrate_disable; | |
15656 | + unsigned short padding; | |
15657 | + unsigned char preempt_lazy_count; | |
15658 | }; | |
1a6e0f06 | 15659 | |
e4b2b4a8 JK |
15660 | #define TRACE_EVENT_TYPE_MAX \ |
15661 | @@ -402,11 +405,13 @@ | |
15662 | ||
15663 | extern int filter_match_preds(struct event_filter *filter, void *rec); | |
15664 | ||
15665 | -extern enum event_trigger_type event_triggers_call(struct trace_event_file *file, | |
15666 | - void *rec); | |
15667 | -extern void event_triggers_post_call(struct trace_event_file *file, | |
15668 | - enum event_trigger_type tt, | |
15669 | - void *rec); | |
15670 | +extern enum event_trigger_type | |
15671 | +event_triggers_call(struct trace_event_file *file, void *rec, | |
15672 | + struct ring_buffer_event *event); | |
15673 | +extern void | |
15674 | +event_triggers_post_call(struct trace_event_file *file, | |
15675 | + enum event_trigger_type tt, | |
15676 | + void *rec, struct ring_buffer_event *event); | |
1a6e0f06 | 15677 | |
e4b2b4a8 | 15678 | bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); |
1a6e0f06 | 15679 | |
e4b2b4a8 | 15680 | @@ -426,7 +431,7 @@ |
1a6e0f06 | 15681 | |
e4b2b4a8 JK |
15682 | if (!(eflags & EVENT_FILE_FL_TRIGGER_COND)) { |
15683 | if (eflags & EVENT_FILE_FL_TRIGGER_MODE) | |
15684 | - event_triggers_call(file, NULL); | |
15685 | + event_triggers_call(file, NULL, NULL); | |
15686 | if (eflags & EVENT_FILE_FL_SOFT_DISABLED) | |
15687 | return true; | |
15688 | if (eflags & EVENT_FILE_FL_PID_FILTER) | |
15689 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/uaccess.h linux-4.14/include/linux/uaccess.h | |
15690 | --- linux-4.14.orig/include/linux/uaccess.h 2017-11-12 19:46:13.000000000 +0100 | |
15691 | +++ linux-4.14/include/linux/uaccess.h 2018-09-05 11:05:07.000000000 +0200 | |
15692 | @@ -185,6 +185,7 @@ | |
15693 | */ | |
15694 | static inline void pagefault_disable(void) | |
1a6e0f06 | 15695 | { |
e4b2b4a8 JK |
15696 | + migrate_disable(); |
15697 | pagefault_disabled_inc(); | |
15698 | /* | |
15699 | * make sure to have issued the store before a pagefault | |
15700 | @@ -201,6 +202,7 @@ | |
15701 | */ | |
15702 | barrier(); | |
15703 | pagefault_disabled_dec(); | |
15704 | + migrate_enable(); | |
15705 | } | |
1a6e0f06 | 15706 | |
e4b2b4a8 JK |
15707 | /* |
15708 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/vmstat.h linux-4.14/include/linux/vmstat.h | |
15709 | --- linux-4.14.orig/include/linux/vmstat.h 2017-11-12 19:46:13.000000000 +0100 | |
15710 | +++ linux-4.14/include/linux/vmstat.h 2018-09-05 11:05:07.000000000 +0200 | |
15711 | @@ -33,7 +33,9 @@ | |
15712 | */ | |
15713 | static inline void __count_vm_event(enum vm_event_item item) | |
15714 | { | |
15715 | + preempt_disable_rt(); | |
15716 | raw_cpu_inc(vm_event_states.event[item]); | |
15717 | + preempt_enable_rt(); | |
1a6e0f06 JK |
15718 | } |
15719 | ||
e4b2b4a8 JK |
15720 | static inline void count_vm_event(enum vm_event_item item) |
15721 | @@ -43,7 +45,9 @@ | |
15722 | ||
15723 | static inline void __count_vm_events(enum vm_event_item item, long delta) | |
1a6e0f06 | 15724 | { |
e4b2b4a8 JK |
15725 | + preempt_disable_rt(); |
15726 | raw_cpu_add(vm_event_states.event[item], delta); | |
15727 | + preempt_enable_rt(); | |
1a6e0f06 JK |
15728 | } |
15729 | ||
e4b2b4a8 JK |
15730 | static inline void count_vm_events(enum vm_event_item item, long delta) |
15731 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/linux/wait.h linux-4.14/include/linux/wait.h | |
15732 | --- linux-4.14.orig/include/linux/wait.h 2017-11-12 19:46:13.000000000 +0100 | |
15733 | +++ linux-4.14/include/linux/wait.h 2018-09-05 11:05:07.000000000 +0200 | |
15734 | @@ -10,6 +10,7 @@ | |
15735 | ||
15736 | #include <asm/current.h> | |
15737 | #include <uapi/linux/wait.h> | |
15738 | +#include <linux/atomic.h> | |
15739 | ||
15740 | typedef struct wait_queue_entry wait_queue_entry_t; | |
15741 | ||
15742 | @@ -486,8 +487,8 @@ | |
15743 | int __ret = 0; \ | |
15744 | struct hrtimer_sleeper __t; \ | |
15745 | \ | |
15746 | - hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); \ | |
15747 | - hrtimer_init_sleeper(&__t, current); \ | |
15748 | + hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, HRTIMER_MODE_REL, \ | |
15749 | + current); \ | |
15750 | if ((timeout) != KTIME_MAX) \ | |
15751 | hrtimer_start_range_ns(&__t.timer, timeout, \ | |
15752 | current->timer_slack_ns, \ | |
15753 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/net/gen_stats.h linux-4.14/include/net/gen_stats.h | |
15754 | --- linux-4.14.orig/include/net/gen_stats.h 2017-11-12 19:46:13.000000000 +0100 | |
15755 | +++ linux-4.14/include/net/gen_stats.h 2018-09-05 11:05:07.000000000 +0200 | |
15756 | @@ -6,6 +6,7 @@ | |
15757 | #include <linux/socket.h> | |
15758 | #include <linux/rtnetlink.h> | |
15759 | #include <linux/pkt_sched.h> | |
15760 | +#include <net/net_seq_lock.h> | |
15761 | ||
15762 | struct gnet_stats_basic_cpu { | |
15763 | struct gnet_stats_basic_packed bstats; | |
15764 | @@ -36,11 +37,11 @@ | |
15765 | spinlock_t *lock, struct gnet_dump *d, | |
15766 | int padattr); | |
1a6e0f06 | 15767 | |
e4b2b4a8 JK |
15768 | -int gnet_stats_copy_basic(const seqcount_t *running, |
15769 | +int gnet_stats_copy_basic(net_seqlock_t *running, | |
15770 | struct gnet_dump *d, | |
15771 | struct gnet_stats_basic_cpu __percpu *cpu, | |
15772 | struct gnet_stats_basic_packed *b); | |
15773 | -void __gnet_stats_copy_basic(const seqcount_t *running, | |
15774 | +void __gnet_stats_copy_basic(net_seqlock_t *running, | |
15775 | struct gnet_stats_basic_packed *bstats, | |
15776 | struct gnet_stats_basic_cpu __percpu *cpu, | |
15777 | struct gnet_stats_basic_packed *b); | |
15778 | @@ -57,13 +58,13 @@ | |
15779 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
15780 | struct net_rate_estimator __rcu **rate_est, | |
15781 | spinlock_t *stats_lock, | |
15782 | - seqcount_t *running, struct nlattr *opt); | |
15783 | + net_seqlock_t *running, struct nlattr *opt); | |
15784 | void gen_kill_estimator(struct net_rate_estimator __rcu **ptr); | |
15785 | int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
15786 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
15787 | struct net_rate_estimator __rcu **ptr, | |
15788 | spinlock_t *stats_lock, | |
15789 | - seqcount_t *running, struct nlattr *opt); | |
15790 | + net_seqlock_t *running, struct nlattr *opt); | |
15791 | bool gen_estimator_active(struct net_rate_estimator __rcu **ptr); | |
15792 | bool gen_estimator_read(struct net_rate_estimator __rcu **ptr, | |
15793 | struct gnet_stats_rate_est64 *sample); | |
15794 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/net/neighbour.h linux-4.14/include/net/neighbour.h | |
15795 | --- linux-4.14.orig/include/net/neighbour.h 2017-11-12 19:46:13.000000000 +0100 | |
15796 | +++ linux-4.14/include/net/neighbour.h 2018-09-05 11:05:07.000000000 +0200 | |
15797 | @@ -450,7 +450,7 @@ | |
1a6e0f06 | 15798 | } |
e4b2b4a8 | 15799 | #endif |
1a6e0f06 | 15800 | |
e4b2b4a8 JK |
15801 | -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) |
15802 | +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) | |
15803 | { | |
15804 | unsigned int seq; | |
15805 | unsigned int hh_len; | |
15806 | @@ -474,7 +474,7 @@ | |
1a6e0f06 | 15807 | |
e4b2b4a8 JK |
15808 | static inline int neigh_output(struct neighbour *n, struct sk_buff *skb) |
15809 | { | |
15810 | - const struct hh_cache *hh = &n->hh; | |
15811 | + struct hh_cache *hh = &n->hh; | |
1a6e0f06 | 15812 | |
e4b2b4a8 JK |
15813 | if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) |
15814 | return neigh_hh_output(hh, skb); | |
15815 | @@ -515,7 +515,7 @@ | |
1a6e0f06 | 15816 | |
e4b2b4a8 | 15817 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) |
1a6e0f06 | 15818 | |
e4b2b4a8 JK |
15819 | -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, |
15820 | +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n, | |
15821 | const struct net_device *dev) | |
15822 | { | |
15823 | unsigned int seq; | |
15824 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/net/net_seq_lock.h linux-4.14/include/net/net_seq_lock.h | |
15825 | --- linux-4.14.orig/include/net/net_seq_lock.h 1970-01-01 01:00:00.000000000 +0100 | |
15826 | +++ linux-4.14/include/net/net_seq_lock.h 2018-09-05 11:05:07.000000000 +0200 | |
15827 | @@ -0,0 +1,15 @@ | |
15828 | +#ifndef __NET_NET_SEQ_LOCK_H__ | |
15829 | +#define __NET_NET_SEQ_LOCK_H__ | |
15830 | + | |
1a6e0f06 | 15831 | +#ifdef CONFIG_PREEMPT_RT_BASE |
e4b2b4a8 JK |
15832 | +# define net_seqlock_t seqlock_t |
15833 | +# define net_seq_begin(__r) read_seqbegin(__r) | |
15834 | +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) | |
15835 | + | |
1a6e0f06 | 15836 | +#else |
e4b2b4a8 JK |
15837 | +# define net_seqlock_t seqcount_t |
15838 | +# define net_seq_begin(__r) read_seqcount_begin(__r) | |
15839 | +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s) | |
1a6e0f06 JK |
15840 | +#endif |
15841 | + | |
e4b2b4a8 JK |
15842 | +#endif |
15843 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/net/sch_generic.h linux-4.14/include/net/sch_generic.h | |
15844 | --- linux-4.14.orig/include/net/sch_generic.h 2018-09-05 11:03:22.000000000 +0200 | |
15845 | +++ linux-4.14/include/net/sch_generic.h 2018-09-05 11:05:07.000000000 +0200 | |
15846 | @@ -10,6 +10,7 @@ | |
15847 | #include <linux/percpu.h> | |
15848 | #include <linux/dynamic_queue_limits.h> | |
15849 | #include <linux/list.h> | |
15850 | +#include <net/net_seq_lock.h> | |
15851 | #include <linux/refcount.h> | |
15852 | #include <linux/workqueue.h> | |
15853 | #include <net/gen_stats.h> | |
15854 | @@ -90,7 +91,7 @@ | |
15855 | struct sk_buff *gso_skb ____cacheline_aligned_in_smp; | |
15856 | struct qdisc_skb_head q; | |
15857 | struct gnet_stats_basic_packed bstats; | |
15858 | - seqcount_t running; | |
15859 | + net_seqlock_t running; | |
15860 | struct gnet_stats_queue qstats; | |
15861 | unsigned long state; | |
15862 | struct Qdisc *next_sched; | |
15863 | @@ -109,13 +110,22 @@ | |
15864 | refcount_inc(&qdisc->refcnt); | |
15865 | } | |
1a6e0f06 | 15866 | |
e4b2b4a8 JK |
15867 | -static inline bool qdisc_is_running(const struct Qdisc *qdisc) |
15868 | +static inline bool qdisc_is_running(struct Qdisc *qdisc) | |
1a6e0f06 | 15869 | { |
e4b2b4a8 JK |
15870 | +#ifdef CONFIG_PREEMPT_RT_BASE |
15871 | + return spin_is_locked(&qdisc->running.lock) ? true : false; | |
1a6e0f06 | 15872 | +#else |
e4b2b4a8 | 15873 | return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; |
1a6e0f06 | 15874 | +#endif |
e4b2b4a8 | 15875 | } |
1a6e0f06 | 15876 | |
e4b2b4a8 JK |
15877 | static inline bool qdisc_run_begin(struct Qdisc *qdisc) |
15878 | { | |
1a6e0f06 | 15879 | +#ifdef CONFIG_PREEMPT_RT_BASE |
e4b2b4a8 JK |
15880 | + if (try_write_seqlock(&qdisc->running)) |
15881 | + return true; | |
15882 | + return false; | |
1a6e0f06 | 15883 | +#else |
e4b2b4a8 JK |
15884 | if (qdisc_is_running(qdisc)) |
15885 | return false; | |
15886 | /* Variant of write_seqcount_begin() telling lockdep a trylock | |
15887 | @@ -124,11 +134,16 @@ | |
15888 | raw_write_seqcount_begin(&qdisc->running); | |
15889 | seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); | |
15890 | return true; | |
1a6e0f06 | 15891 | +#endif |
e4b2b4a8 | 15892 | } |
1a6e0f06 | 15893 | |
e4b2b4a8 JK |
15894 | static inline void qdisc_run_end(struct Qdisc *qdisc) |
15895 | { | |
1a6e0f06 | 15896 | +#ifdef CONFIG_PREEMPT_RT_BASE |
e4b2b4a8 | 15897 | + write_sequnlock(&qdisc->running); |
1a6e0f06 | 15898 | +#else |
e4b2b4a8 | 15899 | write_seqcount_end(&qdisc->running); |
1a6e0f06 | 15900 | +#endif |
e4b2b4a8 | 15901 | } |
1a6e0f06 | 15902 | |
e4b2b4a8 JK |
15903 | static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) |
15904 | @@ -337,7 +352,7 @@ | |
15905 | return qdisc_lock(root); | |
15906 | } | |
1a6e0f06 | 15907 | |
e4b2b4a8 JK |
15908 | -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) |
15909 | +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
15910 | { | |
15911 | struct Qdisc *root = qdisc_root_sleeping(qdisc); | |
1a6e0f06 | 15912 | |
e4b2b4a8 JK |
15913 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/net/xfrm.h linux-4.14/include/net/xfrm.h |
15914 | --- linux-4.14.orig/include/net/xfrm.h 2018-09-05 11:03:22.000000000 +0200 | |
15915 | +++ linux-4.14/include/net/xfrm.h 2018-09-05 11:05:07.000000000 +0200 | |
15916 | @@ -217,7 +217,7 @@ | |
15917 | struct xfrm_stats stats; | |
15918 | ||
15919 | struct xfrm_lifetime_cur curlft; | |
15920 | - struct tasklet_hrtimer mtimer; | |
15921 | + struct hrtimer mtimer; | |
15922 | ||
15923 | struct xfrm_state_offload xso; | |
15924 | ||
15925 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/include/trace/events/timer.h linux-4.14/include/trace/events/timer.h | |
15926 | --- linux-4.14.orig/include/trace/events/timer.h 2018-09-05 11:03:22.000000000 +0200 | |
15927 | +++ linux-4.14/include/trace/events/timer.h 2018-09-05 11:05:07.000000000 +0200 | |
15928 | @@ -148,7 +148,11 @@ | |
15929 | { HRTIMER_MODE_ABS, "ABS" }, \ | |
15930 | { HRTIMER_MODE_REL, "REL" }, \ | |
15931 | { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ | |
15932 | - { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }) | |
15933 | + { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }, \ | |
15934 | + { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ | |
15935 | + { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ | |
15936 | + { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ | |
15937 | + { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }) | |
1a6e0f06 | 15938 | |
e4b2b4a8 JK |
15939 | /** |
15940 | * hrtimer_init - called when the hrtimer is initialized | |
15941 | @@ -186,15 +190,16 @@ | |
15942 | */ | |
15943 | TRACE_EVENT(hrtimer_start, | |
15944 | ||
15945 | - TP_PROTO(struct hrtimer *hrtimer), | |
15946 | + TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode), | |
15947 | ||
15948 | - TP_ARGS(hrtimer), | |
15949 | + TP_ARGS(hrtimer, mode), | |
15950 | ||
15951 | TP_STRUCT__entry( | |
15952 | __field( void *, hrtimer ) | |
15953 | __field( void *, function ) | |
15954 | __field( s64, expires ) | |
15955 | __field( s64, softexpires ) | |
15956 | + __field( enum hrtimer_mode, mode ) | |
15957 | ), | |
15958 | ||
15959 | TP_fast_assign( | |
15960 | @@ -202,12 +207,14 @@ | |
15961 | __entry->function = hrtimer->function; | |
15962 | __entry->expires = hrtimer_get_expires(hrtimer); | |
15963 | __entry->softexpires = hrtimer_get_softexpires(hrtimer); | |
15964 | + __entry->mode = mode; | |
15965 | ), | |
15966 | ||
15967 | - TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", | |
15968 | - __entry->hrtimer, __entry->function, | |
15969 | + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu " | |
15970 | + "mode=%s", __entry->hrtimer, __entry->function, | |
15971 | (unsigned long long) __entry->expires, | |
15972 | - (unsigned long long) __entry->softexpires) | |
15973 | + (unsigned long long) __entry->softexpires, | |
15974 | + decode_hrtimer_mode(__entry->mode)) | |
15975 | ); | |
1a6e0f06 | 15976 | |
e4b2b4a8 JK |
15977 | /** |
15978 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/init/Kconfig linux-4.14/init/Kconfig | |
15979 | --- linux-4.14.orig/init/Kconfig 2018-09-05 11:03:22.000000000 +0200 | |
15980 | +++ linux-4.14/init/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
15981 | @@ -744,6 +744,7 @@ | |
15982 | config RT_GROUP_SCHED | |
15983 | bool "Group scheduling for SCHED_RR/FIFO" | |
15984 | depends on CGROUP_SCHED | |
15985 | + depends on !PREEMPT_RT_FULL | |
15986 | default n | |
15987 | help | |
15988 | This feature lets you explicitly allocate real CPU bandwidth | |
15989 | @@ -1533,6 +1534,7 @@ | |
1a6e0f06 | 15990 | |
e4b2b4a8 JK |
15991 | config SLAB |
15992 | bool "SLAB" | |
15993 | + depends on !PREEMPT_RT_FULL | |
15994 | select HAVE_HARDENED_USERCOPY_ALLOCATOR | |
15995 | help | |
15996 | The regular slab allocator that is established and known to work | |
15997 | @@ -1553,6 +1555,7 @@ | |
15998 | config SLOB | |
15999 | depends on EXPERT | |
16000 | bool "SLOB (Simple Allocator)" | |
16001 | + depends on !PREEMPT_RT_FULL | |
16002 | help | |
16003 | SLOB replaces the stock allocator with a drastically simpler | |
16004 | allocator. SLOB is generally more space efficient but | |
16005 | @@ -1594,7 +1597,7 @@ | |
1a6e0f06 | 16006 | |
e4b2b4a8 JK |
16007 | config SLUB_CPU_PARTIAL |
16008 | default y | |
16009 | - depends on SLUB && SMP | |
16010 | + depends on SLUB && SMP && !PREEMPT_RT_FULL | |
16011 | bool "SLUB per cpu partial cache" | |
16012 | help | |
16013 | Per cpu partial caches accellerate objects allocation and freeing | |
16014 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/init/main.c linux-4.14/init/main.c | |
16015 | --- linux-4.14.orig/init/main.c 2018-09-05 11:03:22.000000000 +0200 | |
16016 | +++ linux-4.14/init/main.c 2018-09-05 11:05:07.000000000 +0200 | |
16017 | @@ -543,6 +543,7 @@ | |
16018 | setup_command_line(command_line); | |
16019 | setup_nr_cpu_ids(); | |
16020 | setup_per_cpu_areas(); | |
16021 | + softirq_early_init(); | |
16022 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | |
16023 | boot_cpu_hotplug_init(); | |
1a6e0f06 | 16024 | |
e4b2b4a8 JK |
16025 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/init/Makefile linux-4.14/init/Makefile |
16026 | --- linux-4.14.orig/init/Makefile 2017-11-12 19:46:13.000000000 +0100 | |
16027 | +++ linux-4.14/init/Makefile 2018-09-05 11:05:07.000000000 +0200 | |
16028 | @@ -36,4 +36,4 @@ | |
16029 | include/generated/compile.h: FORCE | |
16030 | @$($(quiet)chk_compile.h) | |
16031 | $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ | |
16032 | - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" | |
16033 | + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" | |
16034 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/cgroup/cgroup.c linux-4.14/kernel/cgroup/cgroup.c | |
16035 | --- linux-4.14.orig/kernel/cgroup/cgroup.c 2018-09-05 11:03:22.000000000 +0200 | |
16036 | +++ linux-4.14/kernel/cgroup/cgroup.c 2018-09-05 11:05:07.000000000 +0200 | |
16037 | @@ -4508,10 +4508,10 @@ | |
16038 | queue_work(cgroup_destroy_wq, &css->destroy_work); | |
1a6e0f06 JK |
16039 | } |
16040 | ||
e4b2b4a8 JK |
16041 | -static void css_release_work_fn(struct work_struct *work) |
16042 | +static void css_release_work_fn(struct swork_event *sev) | |
1a6e0f06 | 16043 | { |
e4b2b4a8 JK |
16044 | struct cgroup_subsys_state *css = |
16045 | - container_of(work, struct cgroup_subsys_state, destroy_work); | |
16046 | + container_of(sev, struct cgroup_subsys_state, destroy_swork); | |
16047 | struct cgroup_subsys *ss = css->ss; | |
16048 | struct cgroup *cgrp = css->cgroup; | |
1a6e0f06 | 16049 | |
e4b2b4a8 JK |
16050 | @@ -4562,8 +4562,8 @@ |
16051 | struct cgroup_subsys_state *css = | |
16052 | container_of(ref, struct cgroup_subsys_state, refcnt); | |
1a6e0f06 | 16053 | |
e4b2b4a8 JK |
16054 | - INIT_WORK(&css->destroy_work, css_release_work_fn); |
16055 | - queue_work(cgroup_destroy_wq, &css->destroy_work); | |
16056 | + INIT_SWORK(&css->destroy_swork, css_release_work_fn); | |
16057 | + swork_queue(&css->destroy_swork); | |
1a6e0f06 JK |
16058 | } |
16059 | ||
e4b2b4a8 JK |
16060 | static void init_and_link_css(struct cgroup_subsys_state *css, |
16061 | @@ -5269,6 +5269,7 @@ | |
16062 | */ | |
16063 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | |
16064 | BUG_ON(!cgroup_destroy_wq); | |
16065 | + BUG_ON(swork_get()); | |
16066 | return 0; | |
16067 | } | |
16068 | core_initcall(cgroup_wq_init); | |
16069 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/cgroup/cpuset.c linux-4.14/kernel/cgroup/cpuset.c | |
16070 | --- linux-4.14.orig/kernel/cgroup/cpuset.c 2017-11-12 19:46:13.000000000 +0100 | |
16071 | +++ linux-4.14/kernel/cgroup/cpuset.c 2018-09-05 11:05:07.000000000 +0200 | |
16072 | @@ -288,7 +288,7 @@ | |
1a6e0f06 | 16073 | */ |
1a6e0f06 | 16074 | |
e4b2b4a8 JK |
16075 | static DEFINE_MUTEX(cpuset_mutex); |
16076 | -static DEFINE_SPINLOCK(callback_lock); | |
16077 | +static DEFINE_RAW_SPINLOCK(callback_lock); | |
1a6e0f06 | 16078 | |
e4b2b4a8 | 16079 | static struct workqueue_struct *cpuset_migrate_mm_wq; |
1a6e0f06 | 16080 | |
e4b2b4a8 JK |
16081 | @@ -926,9 +926,9 @@ |
16082 | continue; | |
16083 | rcu_read_unlock(); | |
1a6e0f06 | 16084 | |
e4b2b4a8 JK |
16085 | - spin_lock_irq(&callback_lock); |
16086 | + raw_spin_lock_irq(&callback_lock); | |
16087 | cpumask_copy(cp->effective_cpus, new_cpus); | |
16088 | - spin_unlock_irq(&callback_lock); | |
16089 | + raw_spin_unlock_irq(&callback_lock); | |
1a6e0f06 | 16090 | |
e4b2b4a8 JK |
16091 | WARN_ON(!is_in_v2_mode() && |
16092 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); | |
16093 | @@ -993,9 +993,9 @@ | |
16094 | if (retval < 0) | |
16095 | return retval; | |
1a6e0f06 | 16096 | |
e4b2b4a8 JK |
16097 | - spin_lock_irq(&callback_lock); |
16098 | + raw_spin_lock_irq(&callback_lock); | |
16099 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); | |
16100 | - spin_unlock_irq(&callback_lock); | |
16101 | + raw_spin_unlock_irq(&callback_lock); | |
16102 | ||
16103 | /* use trialcs->cpus_allowed as a temp variable */ | |
16104 | update_cpumasks_hier(cs, trialcs->cpus_allowed); | |
16105 | @@ -1179,9 +1179,9 @@ | |
16106 | continue; | |
16107 | rcu_read_unlock(); | |
16108 | ||
16109 | - spin_lock_irq(&callback_lock); | |
16110 | + raw_spin_lock_irq(&callback_lock); | |
16111 | cp->effective_mems = *new_mems; | |
16112 | - spin_unlock_irq(&callback_lock); | |
16113 | + raw_spin_unlock_irq(&callback_lock); | |
16114 | ||
16115 | WARN_ON(!is_in_v2_mode() && | |
16116 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); | |
16117 | @@ -1249,9 +1249,9 @@ | |
16118 | if (retval < 0) | |
16119 | goto done; | |
16120 | ||
16121 | - spin_lock_irq(&callback_lock); | |
16122 | + raw_spin_lock_irq(&callback_lock); | |
16123 | cs->mems_allowed = trialcs->mems_allowed; | |
16124 | - spin_unlock_irq(&callback_lock); | |
16125 | + raw_spin_unlock_irq(&callback_lock); | |
16126 | ||
16127 | /* use trialcs->mems_allowed as a temp variable */ | |
16128 | update_nodemasks_hier(cs, &trialcs->mems_allowed); | |
16129 | @@ -1342,9 +1342,9 @@ | |
16130 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) | |
16131 | || (is_spread_page(cs) != is_spread_page(trialcs))); | |
16132 | ||
16133 | - spin_lock_irq(&callback_lock); | |
16134 | + raw_spin_lock_irq(&callback_lock); | |
16135 | cs->flags = trialcs->flags; | |
16136 | - spin_unlock_irq(&callback_lock); | |
16137 | + raw_spin_unlock_irq(&callback_lock); | |
16138 | ||
16139 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | |
16140 | rebuild_sched_domains_locked(); | |
16141 | @@ -1759,7 +1759,7 @@ | |
16142 | cpuset_filetype_t type = seq_cft(sf)->private; | |
16143 | int ret = 0; | |
1a6e0f06 | 16144 | |
e4b2b4a8 JK |
16145 | - spin_lock_irq(&callback_lock); |
16146 | + raw_spin_lock_irq(&callback_lock); | |
1a6e0f06 | 16147 | |
e4b2b4a8 JK |
16148 | switch (type) { |
16149 | case FILE_CPULIST: | |
16150 | @@ -1778,7 +1778,7 @@ | |
16151 | ret = -EINVAL; | |
16152 | } | |
1a6e0f06 | 16153 | |
e4b2b4a8 JK |
16154 | - spin_unlock_irq(&callback_lock); |
16155 | + raw_spin_unlock_irq(&callback_lock); | |
16156 | return ret; | |
1a6e0f06 JK |
16157 | } |
16158 | ||
e4b2b4a8 | 16159 | @@ -1993,12 +1993,12 @@ |
1a6e0f06 | 16160 | |
e4b2b4a8 | 16161 | cpuset_inc(); |
1a6e0f06 | 16162 | |
e4b2b4a8 JK |
16163 | - spin_lock_irq(&callback_lock); |
16164 | + raw_spin_lock_irq(&callback_lock); | |
16165 | if (is_in_v2_mode()) { | |
16166 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); | |
16167 | cs->effective_mems = parent->effective_mems; | |
16168 | } | |
16169 | - spin_unlock_irq(&callback_lock); | |
16170 | + raw_spin_unlock_irq(&callback_lock); | |
16171 | ||
16172 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) | |
16173 | goto out_unlock; | |
16174 | @@ -2025,12 +2025,12 @@ | |
16175 | } | |
16176 | rcu_read_unlock(); | |
16177 | ||
16178 | - spin_lock_irq(&callback_lock); | |
16179 | + raw_spin_lock_irq(&callback_lock); | |
16180 | cs->mems_allowed = parent->mems_allowed; | |
16181 | cs->effective_mems = parent->mems_allowed; | |
16182 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); | |
16183 | cpumask_copy(cs->effective_cpus, parent->cpus_allowed); | |
16184 | - spin_unlock_irq(&callback_lock); | |
16185 | + raw_spin_unlock_irq(&callback_lock); | |
16186 | out_unlock: | |
16187 | mutex_unlock(&cpuset_mutex); | |
16188 | return 0; | |
16189 | @@ -2069,7 +2069,7 @@ | |
16190 | static void cpuset_bind(struct cgroup_subsys_state *root_css) | |
1a6e0f06 | 16191 | { |
e4b2b4a8 JK |
16192 | mutex_lock(&cpuset_mutex); |
16193 | - spin_lock_irq(&callback_lock); | |
16194 | + raw_spin_lock_irq(&callback_lock); | |
16195 | ||
16196 | if (is_in_v2_mode()) { | |
16197 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); | |
16198 | @@ -2080,7 +2080,7 @@ | |
16199 | top_cpuset.mems_allowed = top_cpuset.effective_mems; | |
16200 | } | |
16201 | ||
16202 | - spin_unlock_irq(&callback_lock); | |
16203 | + raw_spin_unlock_irq(&callback_lock); | |
16204 | mutex_unlock(&cpuset_mutex); | |
1a6e0f06 JK |
16205 | } |
16206 | ||
e4b2b4a8 JK |
16207 | @@ -2094,7 +2094,7 @@ |
16208 | if (task_css_is_root(task, cpuset_cgrp_id)) | |
16209 | return; | |
16210 | ||
16211 | - set_cpus_allowed_ptr(task, ¤t->cpus_allowed); | |
16212 | + set_cpus_allowed_ptr(task, current->cpus_ptr); | |
16213 | task->mems_allowed = current->mems_allowed; | |
1a6e0f06 JK |
16214 | } |
16215 | ||
e4b2b4a8 | 16216 | @@ -2178,12 +2178,12 @@ |
1a6e0f06 | 16217 | { |
e4b2b4a8 | 16218 | bool is_empty; |
1a6e0f06 | 16219 | |
e4b2b4a8 JK |
16220 | - spin_lock_irq(&callback_lock); |
16221 | + raw_spin_lock_irq(&callback_lock); | |
16222 | cpumask_copy(cs->cpus_allowed, new_cpus); | |
16223 | cpumask_copy(cs->effective_cpus, new_cpus); | |
16224 | cs->mems_allowed = *new_mems; | |
16225 | cs->effective_mems = *new_mems; | |
16226 | - spin_unlock_irq(&callback_lock); | |
16227 | + raw_spin_unlock_irq(&callback_lock); | |
1a6e0f06 | 16228 | |
e4b2b4a8 JK |
16229 | /* |
16230 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, | |
16231 | @@ -2220,10 +2220,10 @@ | |
16232 | if (nodes_empty(*new_mems)) | |
16233 | *new_mems = parent_cs(cs)->effective_mems; | |
1a6e0f06 | 16234 | |
e4b2b4a8 JK |
16235 | - spin_lock_irq(&callback_lock); |
16236 | + raw_spin_lock_irq(&callback_lock); | |
16237 | cpumask_copy(cs->effective_cpus, new_cpus); | |
16238 | cs->effective_mems = *new_mems; | |
16239 | - spin_unlock_irq(&callback_lock); | |
16240 | + raw_spin_unlock_irq(&callback_lock); | |
1a6e0f06 | 16241 | |
e4b2b4a8 JK |
16242 | if (cpus_updated) |
16243 | update_tasks_cpumask(cs); | |
16244 | @@ -2316,21 +2316,21 @@ | |
1a6e0f06 | 16245 | |
e4b2b4a8 JK |
16246 | /* synchronize cpus_allowed to cpu_active_mask */ |
16247 | if (cpus_updated) { | |
16248 | - spin_lock_irq(&callback_lock); | |
16249 | + raw_spin_lock_irq(&callback_lock); | |
16250 | if (!on_dfl) | |
16251 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | |
16252 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); | |
16253 | - spin_unlock_irq(&callback_lock); | |
16254 | + raw_spin_unlock_irq(&callback_lock); | |
16255 | /* we don't mess with cpumasks of tasks in top_cpuset */ | |
16256 | } | |
1a6e0f06 | 16257 | |
e4b2b4a8 JK |
16258 | /* synchronize mems_allowed to N_MEMORY */ |
16259 | if (mems_updated) { | |
16260 | - spin_lock_irq(&callback_lock); | |
16261 | + raw_spin_lock_irq(&callback_lock); | |
16262 | if (!on_dfl) | |
16263 | top_cpuset.mems_allowed = new_mems; | |
16264 | top_cpuset.effective_mems = new_mems; | |
16265 | - spin_unlock_irq(&callback_lock); | |
16266 | + raw_spin_unlock_irq(&callback_lock); | |
16267 | update_tasks_nodemask(&top_cpuset); | |
16268 | } | |
1a6e0f06 | 16269 | |
e4b2b4a8 JK |
16270 | @@ -2429,11 +2429,11 @@ |
16271 | { | |
16272 | unsigned long flags; | |
1a6e0f06 | 16273 | |
e4b2b4a8 JK |
16274 | - spin_lock_irqsave(&callback_lock, flags); |
16275 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
16276 | rcu_read_lock(); | |
16277 | guarantee_online_cpus(task_cs(tsk), pmask); | |
16278 | rcu_read_unlock(); | |
16279 | - spin_unlock_irqrestore(&callback_lock, flags); | |
16280 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
16281 | } | |
1a6e0f06 | 16282 | |
e4b2b4a8 JK |
16283 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) |
16284 | @@ -2481,11 +2481,11 @@ | |
16285 | nodemask_t mask; | |
16286 | unsigned long flags; | |
1a6e0f06 | 16287 | |
e4b2b4a8 JK |
16288 | - spin_lock_irqsave(&callback_lock, flags); |
16289 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
16290 | rcu_read_lock(); | |
16291 | guarantee_online_mems(task_cs(tsk), &mask); | |
16292 | rcu_read_unlock(); | |
16293 | - spin_unlock_irqrestore(&callback_lock, flags); | |
16294 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
1a6e0f06 | 16295 | |
e4b2b4a8 | 16296 | return mask; |
1a6e0f06 | 16297 | } |
e4b2b4a8 JK |
16298 | @@ -2577,14 +2577,14 @@ |
16299 | return true; | |
16300 | ||
16301 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | |
16302 | - spin_lock_irqsave(&callback_lock, flags); | |
16303 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
16304 | ||
16305 | rcu_read_lock(); | |
16306 | cs = nearest_hardwall_ancestor(task_cs(current)); | |
16307 | allowed = node_isset(node, cs->mems_allowed); | |
16308 | rcu_read_unlock(); | |
1a6e0f06 | 16309 | |
e4b2b4a8 JK |
16310 | - spin_unlock_irqrestore(&callback_lock, flags); |
16311 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
16312 | return allowed; | |
1a6e0f06 JK |
16313 | } |
16314 | ||
e4b2b4a8 JK |
16315 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/cpu.c linux-4.14/kernel/cpu.c |
16316 | --- linux-4.14.orig/kernel/cpu.c 2018-09-05 11:03:22.000000000 +0200 | |
16317 | +++ linux-4.14/kernel/cpu.c 2018-09-05 11:05:07.000000000 +0200 | |
16318 | @@ -74,6 +74,11 @@ | |
16319 | .fail = CPUHP_INVALID, | |
16320 | }; | |
16321 | ||
16322 | +#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PREEMPT_RT_FULL) | |
16323 | +static DEFINE_PER_CPU(struct rt_rw_lock, cpuhp_pin_lock) = \ | |
16324 | + __RWLOCK_RT_INITIALIZER(cpuhp_pin_lock); | |
1a6e0f06 JK |
16325 | +#endif |
16326 | + | |
e4b2b4a8 JK |
16327 | #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) |
16328 | static struct lockdep_map cpuhp_state_up_map = | |
16329 | STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); | |
16330 | @@ -287,6 +292,55 @@ | |
16331 | ||
16332 | #ifdef CONFIG_HOTPLUG_CPU | |
16333 | ||
16334 | +/** | |
16335 | + * pin_current_cpu - Prevent the current cpu from being unplugged | |
1a6e0f06 | 16336 | + */ |
e4b2b4a8 | 16337 | +void pin_current_cpu(void) |
1a6e0f06 | 16338 | +{ |
e4b2b4a8 JK |
16339 | +#ifdef CONFIG_PREEMPT_RT_FULL |
16340 | + struct rt_rw_lock *cpuhp_pin; | |
16341 | + unsigned int cpu; | |
16342 | + int ret; | |
1a6e0f06 | 16343 | + |
e4b2b4a8 JK |
16344 | +again: |
16345 | + cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock); | |
16346 | + ret = __read_rt_trylock(cpuhp_pin); | |
16347 | + if (ret) { | |
16348 | + current->pinned_on_cpu = smp_processor_id(); | |
16349 | + return; | |
16350 | + } | |
16351 | + cpu = smp_processor_id(); | |
16352 | + preempt_lazy_enable(); | |
16353 | + preempt_enable(); | |
1a6e0f06 | 16354 | + |
e4b2b4a8 | 16355 | + __read_rt_lock(cpuhp_pin); |
1a6e0f06 | 16356 | + |
e4b2b4a8 JK |
16357 | + preempt_disable(); |
16358 | + preempt_lazy_disable(); | |
16359 | + if (cpu != smp_processor_id()) { | |
16360 | + __read_rt_unlock(cpuhp_pin); | |
16361 | + goto again; | |
16362 | + } | |
16363 | + current->pinned_on_cpu = cpu; | |
16364 | +#endif | |
16365 | +} | |
1a6e0f06 | 16366 | + |
e4b2b4a8 JK |
16367 | +/** |
16368 | + * unpin_current_cpu - Allow unplug of current cpu | |
16369 | + */ | |
16370 | +void unpin_current_cpu(void) | |
16371 | +{ | |
16372 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16373 | + struct rt_rw_lock *cpuhp_pin = this_cpu_ptr(&cpuhp_pin_lock); | |
1a6e0f06 | 16374 | + |
e4b2b4a8 JK |
16375 | + if (WARN_ON(current->pinned_on_cpu != smp_processor_id())) |
16376 | + cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, current->pinned_on_cpu); | |
1a6e0f06 | 16377 | + |
e4b2b4a8 JK |
16378 | + current->pinned_on_cpu = -1; |
16379 | + __read_rt_unlock(cpuhp_pin); | |
16380 | +#endif | |
16381 | +} | |
1a6e0f06 | 16382 | + |
e4b2b4a8 JK |
16383 | DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); |
16384 | ||
16385 | void cpus_read_lock(void) | |
16386 | @@ -843,6 +897,9 @@ | |
16387 | ||
16388 | static int takedown_cpu(unsigned int cpu) | |
16389 | { | |
16390 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16391 | + struct rt_rw_lock *cpuhp_pin = per_cpu_ptr(&cpuhp_pin_lock, cpu); | |
16392 | +#endif | |
16393 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | |
16394 | int err; | |
16395 | ||
16396 | @@ -855,11 +912,18 @@ | |
16397 | */ | |
16398 | irq_lock_sparse(); | |
16399 | ||
16400 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16401 | + __write_rt_lock(cpuhp_pin); | |
1a6e0f06 JK |
16402 | +#endif |
16403 | + | |
e4b2b4a8 JK |
16404 | /* |
16405 | * So now all preempt/rcu users must observe !cpu_active(). | |
16406 | */ | |
16407 | err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); | |
16408 | if (err) { | |
16409 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16410 | + __write_rt_unlock(cpuhp_pin); | |
1a6e0f06 | 16411 | +#endif |
e4b2b4a8 JK |
16412 | /* CPU refused to die */ |
16413 | irq_unlock_sparse(); | |
16414 | /* Unpark the hotplug thread so we can rollback there */ | |
16415 | @@ -878,6 +942,9 @@ | |
16416 | wait_for_ap_thread(st, false); | |
16417 | BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); | |
16418 | ||
16419 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16420 | + __write_rt_unlock(cpuhp_pin); | |
1a6e0f06 | 16421 | +#endif |
e4b2b4a8 JK |
16422 | /* Interrupts are moved away from the dying cpu, reenable alloc/free */ |
16423 | irq_unlock_sparse(); | |
1a6e0f06 | 16424 | |
e4b2b4a8 JK |
16425 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/debug/kdb/kdb_io.c linux-4.14/kernel/debug/kdb/kdb_io.c |
16426 | --- linux-4.14.orig/kernel/debug/kdb/kdb_io.c 2018-09-05 11:03:22.000000000 +0200 | |
16427 | +++ linux-4.14/kernel/debug/kdb/kdb_io.c 2018-09-05 11:05:07.000000000 +0200 | |
16428 | @@ -854,9 +854,11 @@ | |
16429 | va_list ap; | |
16430 | int r; | |
16431 | ||
16432 | + kdb_trap_printk++; | |
16433 | va_start(ap, fmt); | |
16434 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); | |
16435 | va_end(ap); | |
16436 | + kdb_trap_printk--; | |
16437 | ||
16438 | return r; | |
16439 | } | |
16440 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/events/core.c linux-4.14/kernel/events/core.c | |
16441 | --- linux-4.14.orig/kernel/events/core.c 2018-09-05 11:03:22.000000000 +0200 | |
16442 | +++ linux-4.14/kernel/events/core.c 2018-09-05 11:05:07.000000000 +0200 | |
16443 | @@ -1065,7 +1065,7 @@ | |
16444 | cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval); | |
16445 | ||
16446 | raw_spin_lock_init(&cpuctx->hrtimer_lock); | |
16447 | - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); | |
16448 | + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED_HARD); | |
16449 | timer->function = perf_mux_hrtimer_handler; | |
16450 | } | |
16451 | ||
16452 | @@ -8750,7 +8750,7 @@ | |
16453 | if (!is_sampling_event(event)) | |
16454 | return; | |
16455 | ||
16456 | - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
16457 | + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); | |
16458 | hwc->hrtimer.function = perf_swevent_hrtimer; | |
16459 | ||
16460 | /* | |
16461 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/exit.c linux-4.14/kernel/exit.c | |
16462 | --- linux-4.14.orig/kernel/exit.c 2018-09-05 11:03:22.000000000 +0200 | |
16463 | +++ linux-4.14/kernel/exit.c 2018-09-05 11:05:07.000000000 +0200 | |
16464 | @@ -159,7 +159,7 @@ | |
16465 | * Do this under ->siglock, we can race with another thread | |
16466 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | |
16467 | */ | |
16468 | - flush_sigqueue(&tsk->pending); | |
16469 | + flush_task_sigqueue(tsk); | |
16470 | tsk->sighand = NULL; | |
16471 | spin_unlock(&sighand->siglock); | |
16472 | ||
16473 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/fork.c linux-4.14/kernel/fork.c | |
16474 | --- linux-4.14.orig/kernel/fork.c 2018-09-05 11:03:28.000000000 +0200 | |
16475 | +++ linux-4.14/kernel/fork.c 2018-09-05 11:05:07.000000000 +0200 | |
16476 | @@ -40,6 +40,7 @@ | |
16477 | #include <linux/hmm.h> | |
16478 | #include <linux/fs.h> | |
16479 | #include <linux/mm.h> | |
16480 | +#include <linux/kprobes.h> | |
16481 | #include <linux/vmacache.h> | |
16482 | #include <linux/nsproxy.h> | |
16483 | #include <linux/capability.h> | |
16484 | @@ -407,13 +408,24 @@ | |
16485 | if (atomic_dec_and_test(&sig->sigcnt)) | |
16486 | free_signal_struct(sig); | |
16487 | } | |
16488 | - | |
16489 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
16490 | +static | |
1a6e0f06 | 16491 | +#endif |
e4b2b4a8 JK |
16492 | void __put_task_struct(struct task_struct *tsk) |
16493 | { | |
16494 | WARN_ON(!tsk->exit_state); | |
16495 | WARN_ON(atomic_read(&tsk->usage)); | |
16496 | WARN_ON(tsk == current); | |
16497 | ||
16498 | + /* | |
16499 | + * Remove function-return probe instances associated with this | |
16500 | + * task and put them back on the free list. | |
16501 | + */ | |
16502 | + kprobe_flush_task(tsk); | |
1a6e0f06 | 16503 | + |
e4b2b4a8 JK |
16504 | + /* Task is done with its stack. */ |
16505 | + put_task_stack(tsk); | |
16506 | + | |
16507 | cgroup_free(tsk); | |
16508 | task_numa_free(tsk); | |
16509 | security_task_free(tsk); | |
16510 | @@ -424,7 +436,18 @@ | |
16511 | if (!profile_handoff_task(tsk)) | |
16512 | free_task(tsk); | |
16513 | } | |
16514 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
16515 | EXPORT_SYMBOL_GPL(__put_task_struct); | |
1a6e0f06 | 16516 | +#else |
e4b2b4a8 JK |
16517 | +void __put_task_struct_cb(struct rcu_head *rhp) |
16518 | +{ | |
16519 | + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu); | |
1a6e0f06 | 16520 | + |
e4b2b4a8 JK |
16521 | + __put_task_struct(tsk); |
16522 | + | |
16523 | +} | |
16524 | +EXPORT_SYMBOL_GPL(__put_task_struct_cb); | |
16525 | +#endif | |
16526 | ||
16527 | void __init __weak arch_task_cache_init(void) { } | |
16528 | ||
16529 | @@ -563,7 +586,8 @@ | |
16530 | #ifdef CONFIG_CC_STACKPROTECTOR | |
16531 | tsk->stack_canary = get_random_canary(); | |
1a6e0f06 | 16532 | #endif |
e4b2b4a8 JK |
16533 | - |
16534 | + if (orig->cpus_ptr == &orig->cpus_mask) | |
16535 | + tsk->cpus_ptr = &tsk->cpus_mask; | |
16536 | /* | |
16537 | * One for us, one for whoever does the "release_task()" (usually | |
16538 | * parent) | |
16539 | @@ -575,6 +599,7 @@ | |
16540 | tsk->splice_pipe = NULL; | |
16541 | tsk->task_frag.page = NULL; | |
16542 | tsk->wake_q.next = NULL; | |
16543 | + tsk->wake_q_sleeper.next = NULL; | |
1a6e0f06 | 16544 | |
e4b2b4a8 JK |
16545 | account_kernel_stack(tsk, 1); |
16546 | ||
16547 | @@ -915,6 +940,19 @@ | |
16548 | } | |
16549 | EXPORT_SYMBOL_GPL(__mmdrop); | |
16550 | ||
16551 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
16552 | +/* | |
16553 | + * RCU callback for delayed mm drop. Not strictly rcu, but we don't | |
16554 | + * want another facility to make this work. | |
16555 | + */ | |
16556 | +void __mmdrop_delayed(struct rcu_head *rhp) | |
16557 | +{ | |
16558 | + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); | |
16559 | + | |
16560 | + __mmdrop(mm); | |
16561 | +} | |
16562 | +#endif | |
16563 | + | |
16564 | static inline void __mmput(struct mm_struct *mm) | |
16565 | { | |
16566 | VM_BUG_ON(atomic_read(&mm->mm_users)); | |
16567 | @@ -1494,6 +1532,9 @@ | |
16568 | */ | |
16569 | static void posix_cpu_timers_init(struct task_struct *tsk) | |
16570 | { | |
16571 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
16572 | + tsk->posix_timer_list = NULL; | |
16573 | +#endif | |
16574 | tsk->cputime_expires.prof_exp = 0; | |
16575 | tsk->cputime_expires.virt_exp = 0; | |
16576 | tsk->cputime_expires.sched_exp = 0; | |
16577 | @@ -1646,6 +1687,7 @@ | |
16578 | spin_lock_init(&p->alloc_lock); | |
16579 | ||
16580 | init_sigpending(&p->pending); | |
16581 | + p->sigqueue_cache = NULL; | |
16582 | ||
16583 | p->utime = p->stime = p->gtime = 0; | |
16584 | #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME | |
16585 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/futex.c linux-4.14/kernel/futex.c | |
16586 | --- linux-4.14.orig/kernel/futex.c 2018-09-05 11:03:22.000000000 +0200 | |
16587 | +++ linux-4.14/kernel/futex.c 2018-09-05 11:05:07.000000000 +0200 | |
16588 | @@ -936,7 +936,9 @@ | |
16589 | if (head->next != next) { | |
16590 | /* retain curr->pi_lock for the loop invariant */ | |
16591 | raw_spin_unlock(&pi_state->pi_mutex.wait_lock); | |
16592 | + raw_spin_unlock_irq(&curr->pi_lock); | |
16593 | spin_unlock(&hb->lock); | |
16594 | + raw_spin_lock_irq(&curr->pi_lock); | |
16595 | put_pi_state(pi_state); | |
16596 | continue; | |
16597 | } | |
16598 | @@ -1430,6 +1432,7 @@ | |
16599 | struct task_struct *new_owner; | |
16600 | bool postunlock = false; | |
16601 | DEFINE_WAKE_Q(wake_q); | |
16602 | + DEFINE_WAKE_Q(wake_sleeper_q); | |
16603 | int ret = 0; | |
16604 | ||
16605 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | |
16606 | @@ -1491,13 +1494,13 @@ | |
16607 | pi_state->owner = new_owner; | |
16608 | raw_spin_unlock(&new_owner->pi_lock); | |
16609 | ||
16610 | - postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); | |
1a6e0f06 | 16611 | - |
e4b2b4a8 JK |
16612 | + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, |
16613 | + &wake_sleeper_q); | |
16614 | out_unlock: | |
16615 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
16616 | ||
16617 | if (postunlock) | |
16618 | - rt_mutex_postunlock(&wake_q); | |
16619 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
16620 | ||
16621 | return ret; | |
16622 | } | |
16623 | @@ -2104,6 +2107,16 @@ | |
16624 | requeue_pi_wake_futex(this, &key2, hb2); | |
16625 | drop_count++; | |
16626 | continue; | |
16627 | + } else if (ret == -EAGAIN) { | |
16628 | + /* | |
16629 | + * Waiter was woken by timeout or | |
16630 | + * signal and has set pi_blocked_on to | |
16631 | + * PI_WAKEUP_INPROGRESS before we | |
16632 | + * tried to enqueue it on the rtmutex. | |
16633 | + */ | |
16634 | + this->pi_state = NULL; | |
16635 | + put_pi_state(pi_state); | |
16636 | + continue; | |
16637 | } else if (ret) { | |
16638 | /* | |
16639 | * rt_mutex_start_proxy_lock() detected a | |
16640 | @@ -2642,10 +2655,9 @@ | |
16641 | if (abs_time) { | |
16642 | to = &timeout; | |
16643 | ||
16644 | - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? | |
16645 | - CLOCK_REALTIME : CLOCK_MONOTONIC, | |
16646 | - HRTIMER_MODE_ABS); | |
16647 | - hrtimer_init_sleeper(to, current); | |
16648 | + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? | |
16649 | + CLOCK_REALTIME : CLOCK_MONOTONIC, | |
16650 | + HRTIMER_MODE_ABS, current); | |
16651 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | |
16652 | current->timer_slack_ns); | |
16653 | } | |
16654 | @@ -2744,9 +2756,8 @@ | |
16655 | ||
16656 | if (time) { | |
16657 | to = &timeout; | |
16658 | - hrtimer_init_on_stack(&to->timer, CLOCK_REALTIME, | |
16659 | - HRTIMER_MODE_ABS); | |
16660 | - hrtimer_init_sleeper(to, current); | |
16661 | + hrtimer_init_sleeper_on_stack(to, CLOCK_REALTIME, | |
16662 | + HRTIMER_MODE_ABS, current); | |
16663 | hrtimer_set_expires(&to->timer, *time); | |
16664 | } | |
16665 | ||
16666 | @@ -2801,7 +2812,7 @@ | |
16667 | goto no_block; | |
16668 | } | |
16669 | ||
16670 | - rt_mutex_init_waiter(&rt_waiter); | |
16671 | + rt_mutex_init_waiter(&rt_waiter, false); | |
16672 | ||
16673 | /* | |
16674 | * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not | |
16675 | @@ -2816,9 +2827,18 @@ | |
16676 | * lock handoff sequence. | |
16677 | */ | |
16678 | raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); | |
16679 | + /* | |
16680 | + * the migrate_disable() here disables migration in the in_atomic() fast | |
16681 | + * path which is enabled again in the following spin_unlock(). We have | |
16682 | + * one migrate_disable() pending in the slow-path which is reversed | |
16683 | + * after the raw_spin_unlock_irq() where we leave the atomic context. | |
16684 | + */ | |
16685 | + migrate_disable(); | |
16686 | + | |
16687 | spin_unlock(q.lock_ptr); | |
16688 | ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); | |
16689 | raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); | |
16690 | + migrate_enable(); | |
16691 | ||
16692 | if (ret) { | |
16693 | if (ret == 1) | |
16694 | @@ -2965,11 +2985,21 @@ | |
16695 | * observed. | |
16696 | */ | |
16697 | raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
16698 | + /* | |
16699 | + * Magic trickery for now to make the RT migrate disable | |
16700 | + * logic happy. The following spin_unlock() happens with | |
16701 | + * interrupts disabled so the internal migrate_enable() | |
16702 | + * won't undo the migrate_disable() which was issued when | |
16703 | + * locking hb->lock. | |
16704 | + */ | |
16705 | + migrate_disable(); | |
16706 | spin_unlock(&hb->lock); | |
16707 | ||
16708 | /* drops pi_state->pi_mutex.wait_lock */ | |
16709 | ret = wake_futex_pi(uaddr, uval, pi_state); | |
16710 | ||
16711 | + migrate_enable(); | |
16712 | + | |
16713 | put_pi_state(pi_state); | |
16714 | ||
16715 | /* | |
16716 | @@ -3127,7 +3157,7 @@ | |
16717 | struct hrtimer_sleeper timeout, *to = NULL; | |
16718 | struct futex_pi_state *pi_state = NULL; | |
16719 | struct rt_mutex_waiter rt_waiter; | |
16720 | - struct futex_hash_bucket *hb; | |
16721 | + struct futex_hash_bucket *hb, *hb2; | |
16722 | union futex_key key2 = FUTEX_KEY_INIT; | |
16723 | struct futex_q q = futex_q_init; | |
16724 | int res, ret; | |
16725 | @@ -3143,10 +3173,9 @@ | |
16726 | ||
16727 | if (abs_time) { | |
16728 | to = &timeout; | |
16729 | - hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ? | |
16730 | - CLOCK_REALTIME : CLOCK_MONOTONIC, | |
16731 | - HRTIMER_MODE_ABS); | |
16732 | - hrtimer_init_sleeper(to, current); | |
16733 | + hrtimer_init_sleeper_on_stack(to, (flags & FLAGS_CLOCKRT) ? | |
16734 | + CLOCK_REALTIME : CLOCK_MONOTONIC, | |
16735 | + HRTIMER_MODE_ABS, current); | |
16736 | hrtimer_set_expires_range_ns(&to->timer, *abs_time, | |
16737 | current->timer_slack_ns); | |
16738 | } | |
16739 | @@ -3155,7 +3184,7 @@ | |
16740 | * The waiter is allocated on our stack, manipulated by the requeue | |
16741 | * code while we sleep on uaddr. | |
16742 | */ | |
16743 | - rt_mutex_init_waiter(&rt_waiter); | |
16744 | + rt_mutex_init_waiter(&rt_waiter, false); | |
1a6e0f06 | 16745 | |
e4b2b4a8 JK |
16746 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); |
16747 | if (unlikely(ret != 0)) | |
16748 | @@ -3186,20 +3215,55 @@ | |
16749 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ | |
16750 | futex_wait_queue_me(hb, &q, to); | |
1a6e0f06 | 16751 | |
e4b2b4a8 JK |
16752 | - spin_lock(&hb->lock); |
16753 | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
16754 | - spin_unlock(&hb->lock); | |
16755 | - if (ret) | |
16756 | - goto out_put_keys; | |
16757 | + /* | |
16758 | + * On RT we must avoid races with requeue and trying to block | |
16759 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by | |
16760 | + * serializing access to pi_blocked_on with pi_lock. | |
16761 | + */ | |
16762 | + raw_spin_lock_irq(¤t->pi_lock); | |
16763 | + if (current->pi_blocked_on) { | |
16764 | + /* | |
16765 | + * We have been requeued or are in the process of | |
16766 | + * being requeued. | |
16767 | + */ | |
16768 | + raw_spin_unlock_irq(¤t->pi_lock); | |
16769 | + } else { | |
16770 | + /* | |
16771 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS | |
16772 | + * prevents a concurrent requeue from moving us to the | |
16773 | + * uaddr2 rtmutex. After that we can safely acquire | |
16774 | + * (and possibly block on) hb->lock. | |
16775 | + */ | |
16776 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; | |
16777 | + raw_spin_unlock_irq(¤t->pi_lock); | |
1a6e0f06 | 16778 | + |
e4b2b4a8 | 16779 | + spin_lock(&hb->lock); |
1a6e0f06 | 16780 | + |
e4b2b4a8 JK |
16781 | + /* |
16782 | + * Clean up pi_blocked_on. We might leak it otherwise | |
16783 | + * when we succeeded with the hb->lock in the fast | |
16784 | + * path. | |
16785 | + */ | |
16786 | + raw_spin_lock_irq(¤t->pi_lock); | |
16787 | + current->pi_blocked_on = NULL; | |
16788 | + raw_spin_unlock_irq(¤t->pi_lock); | |
1a6e0f06 | 16789 | + |
e4b2b4a8 JK |
16790 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); |
16791 | + spin_unlock(&hb->lock); | |
16792 | + if (ret) | |
16793 | + goto out_put_keys; | |
1a6e0f06 | 16794 | + } |
c7c16703 | 16795 | |
e4b2b4a8 JK |
16796 | /* |
16797 | - * In order for us to be here, we know our q.key == key2, and since | |
16798 | - * we took the hb->lock above, we also know that futex_requeue() has | |
16799 | - * completed and we no longer have to concern ourselves with a wakeup | |
16800 | - * race with the atomic proxy lock acquisition by the requeue code. The | |
16801 | - * futex_requeue dropped our key1 reference and incremented our key2 | |
16802 | - * reference count. | |
16803 | + * In order to be here, we have either been requeued, are in | |
16804 | + * the process of being requeued, or requeue successfully | |
16805 | + * acquired uaddr2 on our behalf. If pi_blocked_on was | |
16806 | + * non-null above, we may be racing with a requeue. Do not | |
16807 | + * rely on q->lock_ptr to be hb2->lock until after blocking on | |
16808 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 | |
16809 | + * reference and incremented our key2 reference count. | |
16810 | */ | |
16811 | + hb2 = hash_futex(&key2); | |
16812 | ||
16813 | /* Check if the requeue code acquired the second futex for us. */ | |
16814 | if (!q.rt_waiter) { | |
16815 | @@ -3208,7 +3272,8 @@ | |
16816 | * did a lock-steal - fix up the PI-state in that case. | |
16817 | */ | |
16818 | if (q.pi_state && (q.pi_state->owner != current)) { | |
16819 | - spin_lock(q.lock_ptr); | |
16820 | + spin_lock(&hb2->lock); | |
16821 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
16822 | ret = fixup_pi_state_owner(uaddr2, &q, current); | |
16823 | if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { | |
16824 | pi_state = q.pi_state; | |
16825 | @@ -3219,7 +3284,7 @@ | |
16826 | * the requeue_pi() code acquired for us. | |
16827 | */ | |
16828 | put_pi_state(q.pi_state); | |
16829 | - spin_unlock(q.lock_ptr); | |
16830 | + spin_unlock(&hb2->lock); | |
16831 | } | |
16832 | } else { | |
16833 | struct rt_mutex *pi_mutex; | |
16834 | @@ -3233,7 +3298,8 @@ | |
16835 | pi_mutex = &q.pi_state->pi_mutex; | |
16836 | ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); | |
16837 | ||
16838 | - spin_lock(q.lock_ptr); | |
16839 | + spin_lock(&hb2->lock); | |
16840 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
16841 | if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) | |
16842 | ret = 0; | |
16843 | ||
16844 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/irq/handle.c linux-4.14/kernel/irq/handle.c | |
16845 | --- linux-4.14.orig/kernel/irq/handle.c 2017-11-12 19:46:13.000000000 +0100 | |
16846 | +++ linux-4.14/kernel/irq/handle.c 2018-09-05 11:05:07.000000000 +0200 | |
16847 | @@ -183,10 +183,16 @@ | |
16848 | { | |
16849 | irqreturn_t retval; | |
16850 | unsigned int flags = 0; | |
16851 | + struct pt_regs *regs = get_irq_regs(); | |
16852 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
16853 | ||
16854 | retval = __handle_irq_event_percpu(desc, &flags); | |
16855 | ||
16856 | - add_interrupt_randomness(desc->irq_data.irq, flags); | |
c7c16703 | 16857 | +#ifdef CONFIG_PREEMPT_RT_FULL |
e4b2b4a8 | 16858 | + desc->random_ip = ip; |
c7c16703 | 16859 | +#else |
e4b2b4a8 | 16860 | + add_interrupt_randomness(desc->irq_data.irq, flags, ip); |
c7c16703 JK |
16861 | +#endif |
16862 | ||
e4b2b4a8 JK |
16863 | if (!noirqdebug) |
16864 | note_interrupt(desc, retval); | |
16865 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/irq/manage.c linux-4.14/kernel/irq/manage.c | |
16866 | --- linux-4.14.orig/kernel/irq/manage.c 2018-09-05 11:03:22.000000000 +0200 | |
16867 | +++ linux-4.14/kernel/irq/manage.c 2018-09-05 11:05:07.000000000 +0200 | |
16868 | @@ -24,6 +24,7 @@ | |
16869 | #include "internals.h" | |
1a6e0f06 | 16870 | |
e4b2b4a8 JK |
16871 | #ifdef CONFIG_IRQ_FORCED_THREADING |
16872 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
16873 | __read_mostly bool force_irqthreads; | |
16874 | ||
16875 | static int __init setup_forced_irqthreads(char *arg) | |
16876 | @@ -32,6 +33,7 @@ | |
16877 | return 0; | |
16878 | } | |
16879 | early_param("threadirqs", setup_forced_irqthreads); | |
16880 | +# endif | |
16881 | #endif | |
16882 | ||
16883 | static void __synchronize_hardirq(struct irq_desc *desc) | |
16884 | @@ -224,7 +226,12 @@ | |
16885 | ||
16886 | if (desc->affinity_notify) { | |
16887 | kref_get(&desc->affinity_notify->kref); | |
1a6e0f06 | 16888 | + |
e4b2b4a8 JK |
16889 | +#ifdef CONFIG_PREEMPT_RT_BASE |
16890 | + swork_queue(&desc->affinity_notify->swork); | |
1a6e0f06 | 16891 | +#else |
e4b2b4a8 JK |
16892 | schedule_work(&desc->affinity_notify->work); |
16893 | +#endif | |
16894 | } | |
16895 | irqd_set(data, IRQD_AFFINITY_SET); | |
1a6e0f06 | 16896 | |
e4b2b4a8 JK |
16897 | @@ -262,10 +269,8 @@ |
16898 | } | |
16899 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | |
16900 | ||
16901 | -static void irq_affinity_notify(struct work_struct *work) | |
16902 | +static void _irq_affinity_notify(struct irq_affinity_notify *notify) | |
1a6e0f06 | 16903 | { |
e4b2b4a8 JK |
16904 | - struct irq_affinity_notify *notify = |
16905 | - container_of(work, struct irq_affinity_notify, work); | |
16906 | struct irq_desc *desc = irq_to_desc(notify->irq); | |
16907 | cpumask_var_t cpumask; | |
16908 | unsigned long flags; | |
16909 | @@ -287,6 +292,35 @@ | |
16910 | kref_put(¬ify->kref, notify->release); | |
1a6e0f06 JK |
16911 | } |
16912 | ||
e4b2b4a8 JK |
16913 | +#ifdef CONFIG_PREEMPT_RT_BASE |
16914 | +static void init_helper_thread(void) | |
1a6e0f06 | 16915 | +{ |
e4b2b4a8 JK |
16916 | + static int init_sworker_once; |
16917 | + | |
16918 | + if (init_sworker_once) | |
16919 | + return; | |
16920 | + if (WARN_ON(swork_get())) | |
16921 | + return; | |
16922 | + init_sworker_once = 1; | |
1a6e0f06 JK |
16923 | +} |
16924 | + | |
e4b2b4a8 | 16925 | +static void irq_affinity_notify(struct swork_event *swork) |
1a6e0f06 | 16926 | +{ |
e4b2b4a8 JK |
16927 | + struct irq_affinity_notify *notify = |
16928 | + container_of(swork, struct irq_affinity_notify, swork); | |
16929 | + _irq_affinity_notify(notify); | |
1a6e0f06 JK |
16930 | +} |
16931 | + | |
e4b2b4a8 JK |
16932 | +#else |
16933 | + | |
16934 | +static void irq_affinity_notify(struct work_struct *work) | |
1a6e0f06 | 16935 | +{ |
e4b2b4a8 JK |
16936 | + struct irq_affinity_notify *notify = |
16937 | + container_of(work, struct irq_affinity_notify, work); | |
16938 | + _irq_affinity_notify(notify); | |
1a6e0f06 JK |
16939 | +} |
16940 | +#endif | |
16941 | + | |
e4b2b4a8 JK |
16942 | /** |
16943 | * irq_set_affinity_notifier - control notification of IRQ affinity changes | |
16944 | * @irq: Interrupt for which to enable/disable notification | |
16945 | @@ -315,7 +349,12 @@ | |
16946 | if (notify) { | |
16947 | notify->irq = irq; | |
16948 | kref_init(¬ify->kref); | |
16949 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
16950 | + INIT_SWORK(¬ify->swork, irq_affinity_notify); | |
16951 | + init_helper_thread(); | |
16952 | +#else | |
16953 | INIT_WORK(¬ify->work, irq_affinity_notify); | |
16954 | +#endif | |
16955 | } | |
1a6e0f06 | 16956 | |
e4b2b4a8 JK |
16957 | raw_spin_lock_irqsave(&desc->lock, flags); |
16958 | @@ -883,7 +922,15 @@ | |
16959 | local_bh_disable(); | |
16960 | ret = action->thread_fn(action->irq, action->dev_id); | |
16961 | irq_finalize_oneshot(desc, action); | |
16962 | - local_bh_enable(); | |
16963 | + /* | |
16964 | + * Interrupts which have real time requirements can be set up | |
16965 | + * to avoid softirq processing in the thread handler. This is | |
16966 | + * safe as these interrupts do not raise soft interrupts. | |
16967 | + */ | |
16968 | + if (irq_settings_no_softirq_call(desc)) | |
16969 | + _local_bh_enable(); | |
16970 | + else | |
16971 | + local_bh_enable(); | |
16972 | return ret; | |
16973 | } | |
1a6e0f06 | 16974 | |
e4b2b4a8 JK |
16975 | @@ -980,6 +1027,12 @@ |
16976 | if (action_ret == IRQ_WAKE_THREAD) | |
16977 | irq_wake_secondary(desc, action); | |
1a6e0f06 | 16978 | |
e4b2b4a8 JK |
16979 | +#ifdef CONFIG_PREEMPT_RT_FULL |
16980 | + migrate_disable(); | |
16981 | + add_interrupt_randomness(action->irq, 0, | |
16982 | + desc->random_ip ^ (unsigned long) action); | |
16983 | + migrate_enable(); | |
16984 | +#endif | |
16985 | wake_threads_waitq(desc); | |
16986 | } | |
1a6e0f06 | 16987 | |
e4b2b4a8 JK |
16988 | @@ -1378,6 +1431,9 @@ |
16989 | irqd_set(&desc->irq_data, IRQD_NO_BALANCING); | |
16990 | } | |
1a6e0f06 | 16991 | |
e4b2b4a8 JK |
16992 | + if (new->flags & IRQF_NO_SOFTIRQ_CALL) |
16993 | + irq_settings_set_no_softirq_call(desc); | |
1a6e0f06 | 16994 | + |
e4b2b4a8 JK |
16995 | if (irq_settings_can_autoenable(desc)) { |
16996 | irq_startup(desc, IRQ_RESEND, IRQ_START_COND); | |
16997 | } else { | |
16998 | @@ -2159,7 +2215,7 @@ | |
16999 | * This call sets the internal irqchip state of an interrupt, | |
17000 | * depending on the value of @which. | |
1a6e0f06 | 17001 | * |
e4b2b4a8 JK |
17002 | - * This function should be called with preemption disabled if the |
17003 | + * This function should be called with migration disabled if the | |
17004 | * interrupt controller has per-cpu registers. | |
17005 | */ | |
17006 | int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
17007 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/irq/settings.h linux-4.14/kernel/irq/settings.h | |
17008 | --- linux-4.14.orig/kernel/irq/settings.h 2017-11-12 19:46:13.000000000 +0100 | |
17009 | +++ linux-4.14/kernel/irq/settings.h 2018-09-05 11:05:07.000000000 +0200 | |
17010 | @@ -17,6 +17,7 @@ | |
17011 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | |
17012 | _IRQ_IS_POLLED = IRQ_IS_POLLED, | |
17013 | _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, | |
17014 | + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL, | |
17015 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | |
17016 | }; | |
1a6e0f06 | 17017 | |
e4b2b4a8 JK |
17018 | @@ -31,6 +32,7 @@ |
17019 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | |
17020 | #define IRQ_IS_POLLED GOT_YOU_MORON | |
17021 | #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON | |
17022 | +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON | |
17023 | #undef IRQF_MODIFY_MASK | |
17024 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | |
1a6e0f06 | 17025 | |
e4b2b4a8 JK |
17026 | @@ -41,6 +43,16 @@ |
17027 | desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); | |
17028 | } | |
1a6e0f06 | 17029 | |
e4b2b4a8 JK |
17030 | +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc) |
17031 | +{ | |
17032 | + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL; | |
17033 | +} | |
1a6e0f06 | 17034 | + |
e4b2b4a8 JK |
17035 | +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc) |
17036 | +{ | |
17037 | + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL; | |
17038 | +} | |
1a6e0f06 | 17039 | + |
e4b2b4a8 JK |
17040 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) |
17041 | { | |
17042 | return desc->status_use_accessors & _IRQ_PER_CPU; | |
17043 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/irq/spurious.c linux-4.14/kernel/irq/spurious.c | |
17044 | --- linux-4.14.orig/kernel/irq/spurious.c 2017-11-12 19:46:13.000000000 +0100 | |
17045 | +++ linux-4.14/kernel/irq/spurious.c 2018-09-05 11:05:07.000000000 +0200 | |
17046 | @@ -445,6 +445,10 @@ | |
1a6e0f06 | 17047 | |
e4b2b4a8 JK |
17048 | static int __init irqfixup_setup(char *str) |
17049 | { | |
17050 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17051 | + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
17052 | + return 1; | |
17053 | +#endif | |
17054 | irqfixup = 1; | |
17055 | printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); | |
17056 | printk(KERN_WARNING "This may impact system performance.\n"); | |
17057 | @@ -457,6 +461,10 @@ | |
1a6e0f06 | 17058 | |
e4b2b4a8 JK |
17059 | static int __init irqpoll_setup(char *str) |
17060 | { | |
17061 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17062 | + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
17063 | + return 1; | |
17064 | +#endif | |
17065 | irqfixup = 2; | |
17066 | printk(KERN_WARNING "Misrouted IRQ fixup and polling support " | |
17067 | "enabled\n"); | |
17068 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/irq_work.c linux-4.14/kernel/irq_work.c | |
17069 | --- linux-4.14.orig/kernel/irq_work.c 2017-11-12 19:46:13.000000000 +0100 | |
17070 | +++ linux-4.14/kernel/irq_work.c 2018-09-05 11:05:07.000000000 +0200 | |
17071 | @@ -17,6 +17,7 @@ | |
17072 | #include <linux/cpu.h> | |
17073 | #include <linux/notifier.h> | |
17074 | #include <linux/smp.h> | |
17075 | +#include <linux/interrupt.h> | |
17076 | #include <asm/processor.h> | |
1a6e0f06 | 17077 | |
1a6e0f06 | 17078 | |
e4b2b4a8 | 17079 | @@ -65,6 +66,8 @@ |
1a6e0f06 | 17080 | */ |
e4b2b4a8 JK |
17081 | bool irq_work_queue_on(struct irq_work *work, int cpu) |
17082 | { | |
17083 | + struct llist_head *list; | |
1a6e0f06 | 17084 | + |
e4b2b4a8 JK |
17085 | /* All work should have been flushed before going offline */ |
17086 | WARN_ON_ONCE(cpu_is_offline(cpu)); | |
1a6e0f06 | 17087 | |
e4b2b4a8 JK |
17088 | @@ -75,7 +78,12 @@ |
17089 | if (!irq_work_claim(work)) | |
17090 | return false; | |
17091 | ||
17092 | - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | |
17093 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) | |
17094 | + list = &per_cpu(lazy_list, cpu); | |
17095 | + else | |
17096 | + list = &per_cpu(raised_list, cpu); | |
17097 | + | |
17098 | + if (llist_add(&work->llnode, list)) | |
17099 | arch_send_call_function_single_ipi(cpu); | |
c7c16703 | 17100 | |
e4b2b4a8 JK |
17101 | return true; |
17102 | @@ -86,6 +94,9 @@ | |
17103 | /* Enqueue the irq work @work on the current CPU */ | |
17104 | bool irq_work_queue(struct irq_work *work) | |
17105 | { | |
17106 | + struct llist_head *list; | |
17107 | + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
17108 | + | |
17109 | /* Only queue if not already pending */ | |
17110 | if (!irq_work_claim(work)) | |
17111 | return false; | |
17112 | @@ -93,13 +104,15 @@ | |
17113 | /* Queue the entry and raise the IPI if needed. */ | |
17114 | preempt_disable(); | |
c7c16703 | 17115 | |
e4b2b4a8 JK |
17116 | - /* If the work is "lazy", handle it from next tick if any */ |
17117 | - if (work->flags & IRQ_WORK_LAZY) { | |
17118 | - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && | |
17119 | - tick_nohz_tick_stopped()) | |
17120 | - arch_irq_work_raise(); | |
17121 | - } else { | |
17122 | - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) | |
17123 | + lazy_work = work->flags & IRQ_WORK_LAZY; | |
17124 | + | |
17125 | + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) | |
17126 | + list = this_cpu_ptr(&lazy_list); | |
17127 | + else | |
17128 | + list = this_cpu_ptr(&raised_list); | |
17129 | + | |
17130 | + if (llist_add(&work->llnode, list)) { | |
17131 | + if (!lazy_work || tick_nohz_tick_stopped()) | |
17132 | arch_irq_work_raise(); | |
17133 | } | |
c7c16703 | 17134 | |
e4b2b4a8 JK |
17135 | @@ -116,9 +129,8 @@ |
17136 | raised = this_cpu_ptr(&raised_list); | |
17137 | lazy = this_cpu_ptr(&lazy_list); | |
c7c16703 | 17138 | |
e4b2b4a8 JK |
17139 | - if (llist_empty(raised) || arch_irq_work_has_interrupt()) |
17140 | - if (llist_empty(lazy)) | |
17141 | - return false; | |
17142 | + if (llist_empty(raised) && llist_empty(lazy)) | |
17143 | + return false; | |
c7c16703 | 17144 | |
e4b2b4a8 JK |
17145 | /* All work should have been flushed before going offline */ |
17146 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | |
17147 | @@ -132,7 +144,7 @@ | |
17148 | struct irq_work *work; | |
17149 | struct llist_node *llnode; | |
c7c16703 | 17150 | |
e4b2b4a8 JK |
17151 | - BUG_ON(!irqs_disabled()); |
17152 | + BUG_ON_NONRT(!irqs_disabled()); | |
c7c16703 | 17153 | |
e4b2b4a8 JK |
17154 | if (llist_empty(list)) |
17155 | return; | |
17156 | @@ -169,7 +181,16 @@ | |
17157 | void irq_work_run(void) | |
c7c16703 | 17158 | { |
e4b2b4a8 JK |
17159 | irq_work_run_list(this_cpu_ptr(&raised_list)); |
17160 | - irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
17161 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { | |
17162 | + /* | |
17163 | + * NOTE: we raise softirq via IPI for safety, | |
17164 | + * and execute in irq_work_tick() to move the | |
17165 | + * overhead from hard to soft irq context. | |
17166 | + */ | |
17167 | + if (!llist_empty(this_cpu_ptr(&lazy_list))) | |
17168 | + raise_softirq(TIMER_SOFTIRQ); | |
17169 | + } else | |
17170 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
c7c16703 | 17171 | } |
e4b2b4a8 | 17172 | EXPORT_SYMBOL_GPL(irq_work_run); |
c7c16703 | 17173 | |
e4b2b4a8 | 17174 | @@ -179,8 +200,17 @@ |
1a6e0f06 | 17175 | |
e4b2b4a8 JK |
17176 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) |
17177 | irq_work_run_list(raised); | |
1a6e0f06 | 17178 | + |
e4b2b4a8 JK |
17179 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) |
17180 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
17181 | +} | |
1a6e0f06 | 17182 | + |
e4b2b4a8 JK |
17183 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) |
17184 | +void irq_work_tick_soft(void) | |
17185 | +{ | |
17186 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
17187 | } | |
1a6e0f06 | 17188 | +#endif |
1a6e0f06 | 17189 | |
e4b2b4a8 JK |
17190 | /* |
17191 | * Synchronize against the irq_work @entry, ensures the entry is not | |
17192 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/Kconfig.locks linux-4.14/kernel/Kconfig.locks | |
17193 | --- linux-4.14.orig/kernel/Kconfig.locks 2017-11-12 19:46:13.000000000 +0100 | |
17194 | +++ linux-4.14/kernel/Kconfig.locks 2018-09-05 11:05:07.000000000 +0200 | |
17195 | @@ -225,11 +225,11 @@ | |
1a6e0f06 | 17196 | |
e4b2b4a8 JK |
17197 | config MUTEX_SPIN_ON_OWNER |
17198 | def_bool y | |
17199 | - depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW | |
17200 | + depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
1a6e0f06 | 17201 | |
e4b2b4a8 JK |
17202 | config RWSEM_SPIN_ON_OWNER |
17203 | def_bool y | |
17204 | - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | |
17205 | + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
1a6e0f06 | 17206 | |
e4b2b4a8 JK |
17207 | config LOCK_SPIN_ON_OWNER |
17208 | def_bool y | |
17209 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/Kconfig.preempt linux-4.14/kernel/Kconfig.preempt | |
17210 | --- linux-4.14.orig/kernel/Kconfig.preempt 2017-11-12 19:46:13.000000000 +0100 | |
17211 | +++ linux-4.14/kernel/Kconfig.preempt 2018-09-05 11:05:07.000000000 +0200 | |
17212 | @@ -1,3 +1,16 @@ | |
17213 | +config PREEMPT | |
17214 | + bool | |
17215 | + select PREEMPT_COUNT | |
17216 | + | |
17217 | +config PREEMPT_RT_BASE | |
17218 | + bool | |
17219 | + select PREEMPT | |
17220 | + | |
17221 | +config HAVE_PREEMPT_LAZY | |
17222 | + bool | |
17223 | + | |
17224 | +config PREEMPT_LAZY | |
17225 | + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL | |
1a6e0f06 | 17226 | |
e4b2b4a8 JK |
17227 | choice |
17228 | prompt "Preemption Model" | |
17229 | @@ -33,9 +46,9 @@ | |
1a6e0f06 | 17230 | |
e4b2b4a8 | 17231 | Select this if you are building a kernel for a desktop system. |
1a6e0f06 | 17232 | |
e4b2b4a8 JK |
17233 | -config PREEMPT |
17234 | +config PREEMPT__LL | |
17235 | bool "Preemptible Kernel (Low-Latency Desktop)" | |
17236 | - select PREEMPT_COUNT | |
17237 | + select PREEMPT | |
17238 | select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK | |
17239 | help | |
17240 | This option reduces the latency of the kernel by making | |
17241 | @@ -52,6 +65,22 @@ | |
17242 | embedded system with latency requirements in the milliseconds | |
17243 | range. | |
1a6e0f06 | 17244 | |
e4b2b4a8 JK |
17245 | +config PREEMPT_RTB |
17246 | + bool "Preemptible Kernel (Basic RT)" | |
17247 | + select PREEMPT_RT_BASE | |
17248 | + help | |
17249 | + This option is basically the same as (Low-Latency Desktop) but | |
17250 | + enables changes which are preliminary for the full preemptible | |
17251 | + RT kernel. | |
1a6e0f06 | 17252 | + |
e4b2b4a8 JK |
17253 | +config PREEMPT_RT_FULL |
17254 | + bool "Fully Preemptible Kernel (RT)" | |
17255 | + depends on IRQ_FORCED_THREADING | |
17256 | + select PREEMPT_RT_BASE | |
17257 | + select PREEMPT_RCU | |
17258 | + help | |
17259 | + All and everything | |
17260 | + | |
17261 | endchoice | |
1a6e0f06 | 17262 | |
e4b2b4a8 JK |
17263 | config PREEMPT_COUNT |
17264 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/ksysfs.c linux-4.14/kernel/ksysfs.c | |
17265 | --- linux-4.14.orig/kernel/ksysfs.c 2017-11-12 19:46:13.000000000 +0100 | |
17266 | +++ linux-4.14/kernel/ksysfs.c 2018-09-05 11:05:07.000000000 +0200 | |
17267 | @@ -140,6 +140,15 @@ | |
1a6e0f06 | 17268 | |
e4b2b4a8 | 17269 | #endif /* CONFIG_CRASH_CORE */ |
1a6e0f06 | 17270 | |
e4b2b4a8 JK |
17271 | +#if defined(CONFIG_PREEMPT_RT_FULL) |
17272 | +static ssize_t realtime_show(struct kobject *kobj, | |
17273 | + struct kobj_attribute *attr, char *buf) | |
17274 | +{ | |
17275 | + return sprintf(buf, "%d\n", 1); | |
17276 | +} | |
17277 | +KERNEL_ATTR_RO(realtime); | |
17278 | +#endif | |
17279 | + | |
17280 | /* whether file capabilities are enabled */ | |
17281 | static ssize_t fscaps_show(struct kobject *kobj, | |
17282 | struct kobj_attribute *attr, char *buf) | |
17283 | @@ -231,6 +240,9 @@ | |
17284 | &rcu_expedited_attr.attr, | |
17285 | &rcu_normal_attr.attr, | |
17286 | #endif | |
17287 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17288 | + &realtime_attr.attr, | |
1a6e0f06 | 17289 | +#endif |
e4b2b4a8 JK |
17290 | NULL |
17291 | }; | |
1a6e0f06 | 17292 | |
e4b2b4a8 JK |
17293 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/lockdep.c linux-4.14/kernel/locking/lockdep.c |
17294 | --- linux-4.14.orig/kernel/locking/lockdep.c 2018-09-05 11:03:29.000000000 +0200 | |
17295 | +++ linux-4.14/kernel/locking/lockdep.c 2018-09-05 11:05:07.000000000 +0200 | |
17296 | @@ -3916,6 +3916,7 @@ | |
17297 | } | |
17298 | } | |
1a6e0f06 | 17299 | |
e4b2b4a8 JK |
17300 | +#ifndef CONFIG_PREEMPT_RT_FULL |
17301 | /* | |
17302 | * We dont accurately track softirq state in e.g. | |
17303 | * hardirq contexts (such as on 4KSTACKS), so only | |
17304 | @@ -3930,6 +3931,7 @@ | |
17305 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); | |
17306 | } | |
17307 | } | |
17308 | +#endif | |
1a6e0f06 | 17309 | |
e4b2b4a8 JK |
17310 | if (!debug_locks) |
17311 | print_irqtrace_events(current); | |
17312 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/locktorture.c linux-4.14/kernel/locking/locktorture.c | |
17313 | --- linux-4.14.orig/kernel/locking/locktorture.c 2018-09-05 11:03:22.000000000 +0200 | |
17314 | +++ linux-4.14/kernel/locking/locktorture.c 2018-09-05 11:05:07.000000000 +0200 | |
17315 | @@ -26,7 +26,6 @@ | |
17316 | #include <linux/kthread.h> | |
17317 | #include <linux/sched/rt.h> | |
17318 | #include <linux/spinlock.h> | |
17319 | -#include <linux/rwlock.h> | |
17320 | #include <linux/mutex.h> | |
17321 | #include <linux/rwsem.h> | |
17322 | #include <linux/smp.h> | |
17323 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/Makefile linux-4.14/kernel/locking/Makefile | |
17324 | --- linux-4.14.orig/kernel/locking/Makefile 2017-11-12 19:46:13.000000000 +0100 | |
17325 | +++ linux-4.14/kernel/locking/Makefile 2018-09-05 11:05:07.000000000 +0200 | |
17326 | @@ -3,7 +3,7 @@ | |
17327 | # and is generally not a function of system call inputs. | |
17328 | KCOV_INSTRUMENT := n | |
1a6e0f06 | 17329 | |
e4b2b4a8 JK |
17330 | -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o |
17331 | +obj-y += semaphore.o percpu-rwsem.o | |
1a6e0f06 | 17332 | |
e4b2b4a8 JK |
17333 | ifdef CONFIG_FUNCTION_TRACER |
17334 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | |
17335 | @@ -12,7 +12,11 @@ | |
17336 | CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) | |
17337 | endif | |
17338 | ||
17339 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
17340 | +obj-y += mutex.o | |
17341 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | |
17342 | +endif | |
17343 | +obj-y += rwsem.o | |
17344 | obj-$(CONFIG_LOCKDEP) += lockdep.o | |
17345 | ifeq ($(CONFIG_PROC_FS),y) | |
17346 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | |
17347 | @@ -25,8 +29,11 @@ | |
17348 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | |
17349 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |
17350 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |
17351 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
17352 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | |
17353 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | |
17354 | +endif | |
17355 | +obj-$(CONFIG_PREEMPT_RT_FULL) += mutex-rt.o rwsem-rt.o rwlock-rt.o | |
17356 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o | |
17357 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | |
17358 | obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o | |
17359 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/mutex-rt.c linux-4.14/kernel/locking/mutex-rt.c | |
17360 | --- linux-4.14.orig/kernel/locking/mutex-rt.c 1970-01-01 01:00:00.000000000 +0100 | |
17361 | +++ linux-4.14/kernel/locking/mutex-rt.c 2018-09-05 11:05:07.000000000 +0200 | |
17362 | @@ -0,0 +1,223 @@ | |
17363 | +/* | |
17364 | + * kernel/rt.c | |
17365 | + * | |
17366 | + * Real-Time Preemption Support | |
17367 | + * | |
17368 | + * started by Ingo Molnar: | |
17369 | + * | |
17370 | + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
17371 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
17372 | + * | |
17373 | + * historic credit for proving that Linux spinlocks can be implemented via | |
17374 | + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow | |
17375 | + * and others) who prototyped it on 2.4 and did lots of comparative | |
17376 | + * research and analysis; TimeSys, for proving that you can implement a | |
17377 | + * fully preemptible kernel via the use of IRQ threading and mutexes; | |
17378 | + * Bill Huey for persuasively arguing on lkml that the mutex model is the | |
17379 | + * right one; and to MontaVista, who ported pmutexes to 2.6. | |
17380 | + * | |
17381 | + * This code is a from-scratch implementation and is not based on pmutexes, | |
17382 | + * but the idea of converting spinlocks to mutexes is used here too. | |
17383 | + * | |
17384 | + * lock debugging, locking tree, deadlock detection: | |
17385 | + * | |
17386 | + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey | |
17387 | + * Released under the General Public License (GPL). | |
17388 | + * | |
17389 | + * Includes portions of the generic R/W semaphore implementation from: | |
17390 | + * | |
17391 | + * Copyright (c) 2001 David Howells (dhowells@redhat.com). | |
17392 | + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | |
17393 | + * - Derived also from comments by Linus | |
17394 | + * | |
17395 | + * Pending ownership of locks and ownership stealing: | |
17396 | + * | |
17397 | + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt | |
17398 | + * | |
17399 | + * (also by Steven Rostedt) | |
17400 | + * - Converted single pi_lock to individual task locks. | |
17401 | + * | |
17402 | + * By Esben Nielsen: | |
17403 | + * Doing priority inheritance with help of the scheduler. | |
17404 | + * | |
17405 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
17406 | + * - major rework based on Esben Nielsens initial patch | |
17407 | + * - replaced thread_info references by task_struct refs | |
17408 | + * - removed task->pending_owner dependency | |
17409 | + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks | |
17410 | + * in the scheduler return path as discussed with Steven Rostedt | |
17411 | + * | |
17412 | + * Copyright (C) 2006, Kihon Technologies Inc. | |
17413 | + * Steven Rostedt <rostedt@goodmis.org> | |
17414 | + * - debugged and patched Thomas Gleixner's rework. | |
17415 | + * - added back the cmpxchg to the rework. | |
17416 | + * - turned atomic require back on for SMP. | |
17417 | + */ | |
17418 | + | |
17419 | +#include <linux/spinlock.h> | |
17420 | +#include <linux/rtmutex.h> | |
17421 | +#include <linux/sched.h> | |
17422 | +#include <linux/delay.h> | |
17423 | +#include <linux/module.h> | |
17424 | +#include <linux/kallsyms.h> | |
17425 | +#include <linux/syscalls.h> | |
17426 | +#include <linux/interrupt.h> | |
17427 | +#include <linux/plist.h> | |
17428 | +#include <linux/fs.h> | |
17429 | +#include <linux/futex.h> | |
17430 | +#include <linux/hrtimer.h> | |
17431 | + | |
17432 | +#include "rtmutex_common.h" | |
17433 | + | |
17434 | +/* | |
17435 | + * struct mutex functions | |
17436 | + */ | |
17437 | +void __mutex_do_init(struct mutex *mutex, const char *name, | |
17438 | + struct lock_class_key *key) | |
17439 | +{ | |
17440 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
17441 | + /* | |
17442 | + * Make sure we are not reinitializing a held lock: | |
17443 | + */ | |
17444 | + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); | |
17445 | + lockdep_init_map(&mutex->dep_map, name, key, 0); | |
1a6e0f06 | 17446 | +#endif |
e4b2b4a8 JK |
17447 | + mutex->lock.save_state = 0; |
17448 | +} | |
17449 | +EXPORT_SYMBOL(__mutex_do_init); | |
17450 | + | |
17451 | +void __lockfunc _mutex_lock(struct mutex *lock) | |
17452 | +{ | |
17453 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
17454 | + __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); | |
17455 | +} | |
17456 | +EXPORT_SYMBOL(_mutex_lock); | |
17457 | + | |
17458 | +void __lockfunc _mutex_lock_io(struct mutex *lock) | |
17459 | +{ | |
17460 | + int token; | |
17461 | + | |
17462 | + token = io_schedule_prepare(); | |
17463 | + _mutex_lock(lock); | |
17464 | + io_schedule_finish(token); | |
17465 | +} | |
17466 | +EXPORT_SYMBOL_GPL(_mutex_lock_io); | |
17467 | + | |
17468 | +int __lockfunc _mutex_lock_interruptible(struct mutex *lock) | |
17469 | +{ | |
17470 | + int ret; | |
17471 | + | |
17472 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
17473 | + ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE); | |
17474 | + if (ret) | |
17475 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
17476 | + return ret; | |
17477 | +} | |
17478 | +EXPORT_SYMBOL(_mutex_lock_interruptible); | |
17479 | + | |
17480 | +int __lockfunc _mutex_lock_killable(struct mutex *lock) | |
17481 | +{ | |
17482 | + int ret; | |
17483 | + | |
17484 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
17485 | + ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE); | |
17486 | + if (ret) | |
17487 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
17488 | + return ret; | |
17489 | +} | |
17490 | +EXPORT_SYMBOL(_mutex_lock_killable); | |
17491 | + | |
17492 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
17493 | +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) | |
17494 | +{ | |
17495 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
17496 | + __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); | |
17497 | +} | |
17498 | +EXPORT_SYMBOL(_mutex_lock_nested); | |
1a6e0f06 | 17499 | + |
e4b2b4a8 JK |
17500 | +void __lockfunc _mutex_lock_io_nested(struct mutex *lock, int subclass) |
17501 | +{ | |
17502 | + int token; | |
1f39f580 | 17503 | + |
e4b2b4a8 | 17504 | + token = io_schedule_prepare(); |
1a6e0f06 | 17505 | + |
e4b2b4a8 JK |
17506 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); |
17507 | + __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); | |
1a6e0f06 | 17508 | + |
e4b2b4a8 JK |
17509 | + io_schedule_finish(token); |
17510 | +} | |
17511 | +EXPORT_SYMBOL_GPL(_mutex_lock_io_nested); | |
1a6e0f06 | 17512 | + |
e4b2b4a8 JK |
17513 | +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) |
17514 | +{ | |
17515 | + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); | |
17516 | + __rt_mutex_lock_state(&lock->lock, TASK_UNINTERRUPTIBLE); | |
17517 | +} | |
17518 | +EXPORT_SYMBOL(_mutex_lock_nest_lock); | |
1a6e0f06 | 17519 | + |
e4b2b4a8 | 17520 | +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) |
1a6e0f06 | 17521 | +{ |
e4b2b4a8 JK |
17522 | + int ret; |
17523 | + | |
17524 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
17525 | + ret = __rt_mutex_lock_state(&lock->lock, TASK_INTERRUPTIBLE); | |
17526 | + if (ret) | |
17527 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
17528 | + return ret; | |
1a6e0f06 | 17529 | +} |
e4b2b4a8 JK |
17530 | +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); |
17531 | + | |
17532 | +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) | |
17533 | +{ | |
17534 | + int ret; | |
17535 | + | |
17536 | + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
17537 | + ret = __rt_mutex_lock_state(&lock->lock, TASK_KILLABLE); | |
17538 | + if (ret) | |
17539 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
17540 | + return ret; | |
17541 | +} | |
17542 | +EXPORT_SYMBOL(_mutex_lock_killable_nested); | |
1a6e0f06 | 17543 | +#endif |
e4b2b4a8 JK |
17544 | + |
17545 | +int __lockfunc _mutex_trylock(struct mutex *lock) | |
17546 | +{ | |
17547 | + int ret = __rt_mutex_trylock(&lock->lock); | |
17548 | + | |
17549 | + if (ret) | |
17550 | + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
17551 | + | |
17552 | + return ret; | |
17553 | +} | |
17554 | +EXPORT_SYMBOL(_mutex_trylock); | |
17555 | + | |
17556 | +void __lockfunc _mutex_unlock(struct mutex *lock) | |
17557 | +{ | |
17558 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
17559 | + __rt_mutex_unlock(&lock->lock); | |
17560 | +} | |
17561 | +EXPORT_SYMBOL(_mutex_unlock); | |
17562 | + | |
17563 | +/** | |
17564 | + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | |
17565 | + * @cnt: the atomic which we are to dec | |
17566 | + * @lock: the mutex to return holding if we dec to 0 | |
17567 | + * | |
17568 | + * return true and hold lock if we dec to 0, return false otherwise | |
17569 | + */ | |
17570 | +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |
17571 | +{ | |
17572 | + /* dec if we can't possibly hit 0 */ | |
17573 | + if (atomic_add_unless(cnt, -1, 1)) | |
17574 | + return 0; | |
17575 | + /* we might hit 0, so take the lock */ | |
17576 | + mutex_lock(lock); | |
17577 | + if (!atomic_dec_and_test(cnt)) { | |
17578 | + /* when we actually did the dec, we didn't hit 0 */ | |
17579 | + mutex_unlock(lock); | |
17580 | + return 0; | |
17581 | + } | |
17582 | + /* we hit 0, and we hold the lock */ | |
17583 | + return 1; | |
17584 | +} | |
17585 | +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | |
17586 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/rtmutex.c linux-4.14/kernel/locking/rtmutex.c | |
17587 | --- linux-4.14.orig/kernel/locking/rtmutex.c 2018-09-05 11:03:22.000000000 +0200 | |
17588 | +++ linux-4.14/kernel/locking/rtmutex.c 2018-09-05 11:05:07.000000000 +0200 | |
17589 | @@ -7,6 +7,11 @@ | |
17590 | * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
17591 | * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt | |
17592 | * Copyright (C) 2006 Esben Nielsen | |
17593 | + * Adaptive Spinlocks: | |
17594 | + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, | |
17595 | + * and Peter Morreale, | |
17596 | + * Adaptive Spinlocks simplification: | |
17597 | + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> | |
1a6e0f06 | 17598 | * |
e4b2b4a8 JK |
17599 | * See Documentation/locking/rt-mutex-design.txt for details. |
17600 | */ | |
17601 | @@ -18,6 +23,8 @@ | |
17602 | #include <linux/sched/wake_q.h> | |
17603 | #include <linux/sched/debug.h> | |
17604 | #include <linux/timer.h> | |
17605 | +#include <linux/ww_mutex.h> | |
17606 | +#include <linux/blkdev.h> | |
17607 | ||
17608 | #include "rtmutex_common.h" | |
17609 | ||
17610 | @@ -135,6 +142,12 @@ | |
17611 | WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); | |
1a6e0f06 JK |
17612 | } |
17613 | ||
e4b2b4a8 JK |
17614 | +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) |
17615 | +{ | |
17616 | + return waiter && waiter != PI_WAKEUP_INPROGRESS && | |
17617 | + waiter != PI_REQUEUE_INPROGRESS; | |
17618 | +} | |
17619 | + | |
1a6e0f06 | 17620 | /* |
e4b2b4a8 JK |
17621 | * We can speed up the acquire/release, if there's no debugging state to be |
17622 | * set up. | |
17623 | @@ -228,7 +241,7 @@ | |
17624 | * Only use with rt_mutex_waiter_{less,equal}() | |
1a6e0f06 | 17625 | */ |
e4b2b4a8 JK |
17626 | #define task_to_waiter(p) \ |
17627 | - &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } | |
17628 | + &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline, .task = (p) } | |
1a6e0f06 | 17629 | |
e4b2b4a8 JK |
17630 | static inline int |
17631 | rt_mutex_waiter_less(struct rt_mutex_waiter *left, | |
17632 | @@ -268,6 +281,27 @@ | |
17633 | return 1; | |
1a6e0f06 JK |
17634 | } |
17635 | ||
e4b2b4a8 JK |
17636 | +#define STEAL_NORMAL 0 |
17637 | +#define STEAL_LATERAL 1 | |
17638 | + | |
17639 | +static inline int | |
17640 | +rt_mutex_steal(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, int mode) | |
17641 | +{ | |
17642 | + struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock); | |
17643 | + | |
17644 | + if (waiter == top_waiter || rt_mutex_waiter_less(waiter, top_waiter)) | |
17645 | + return 1; | |
17646 | + | |
17647 | + /* | |
17648 | + * Note that RT tasks are excluded from lateral-steals | |
17649 | + * to prevent the introduction of an unbounded latency. | |
17650 | + */ | |
17651 | + if (mode == STEAL_NORMAL || rt_task(waiter->task)) | |
17652 | + return 0; | |
17653 | + | |
17654 | + return rt_mutex_waiter_equal(waiter, top_waiter); | |
17655 | +} | |
17656 | + | |
17657 | static void | |
17658 | rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) | |
17659 | { | |
17660 | @@ -372,6 +406,14 @@ | |
17661 | return debug_rt_mutex_detect_deadlock(waiter, chwalk); | |
1a6e0f06 JK |
17662 | } |
17663 | ||
e4b2b4a8 JK |
17664 | +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) |
17665 | +{ | |
17666 | + if (waiter->savestate) | |
17667 | + wake_up_lock_sleeper(waiter->task); | |
17668 | + else | |
17669 | + wake_up_process(waiter->task); | |
17670 | +} | |
17671 | + | |
17672 | /* | |
17673 | * Max number of times we'll walk the boosting chain: | |
17674 | */ | |
17675 | @@ -379,7 +421,8 @@ | |
1a6e0f06 | 17676 | |
e4b2b4a8 JK |
17677 | static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) |
17678 | { | |
17679 | - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; | |
17680 | + return rt_mutex_real_waiter(p->pi_blocked_on) ? | |
17681 | + p->pi_blocked_on->lock : NULL; | |
17682 | } | |
1a6e0f06 | 17683 | |
e4b2b4a8 JK |
17684 | /* |
17685 | @@ -515,7 +558,7 @@ | |
17686 | * reached or the state of the chain has changed while we | |
17687 | * dropped the locks. | |
17688 | */ | |
17689 | - if (!waiter) | |
17690 | + if (!rt_mutex_real_waiter(waiter)) | |
17691 | goto out_unlock_pi; | |
1a6e0f06 | 17692 | |
e4b2b4a8 JK |
17693 | /* |
17694 | @@ -696,13 +739,16 @@ | |
17695 | * follow here. This is the end of the chain we are walking. | |
17696 | */ | |
17697 | if (!rt_mutex_owner(lock)) { | |
17698 | + struct rt_mutex_waiter *lock_top_waiter; | |
1a6e0f06 | 17699 | + |
e4b2b4a8 JK |
17700 | /* |
17701 | * If the requeue [7] above changed the top waiter, | |
17702 | * then we need to wake the new top waiter up to try | |
17703 | * to get the lock. | |
17704 | */ | |
17705 | - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) | |
17706 | - wake_up_process(rt_mutex_top_waiter(lock)->task); | |
17707 | + lock_top_waiter = rt_mutex_top_waiter(lock); | |
17708 | + if (prerequeue_top_waiter != lock_top_waiter) | |
17709 | + rt_mutex_wake_waiter(lock_top_waiter); | |
17710 | raw_spin_unlock_irq(&lock->wait_lock); | |
17711 | return 0; | |
17712 | } | |
17713 | @@ -804,9 +850,11 @@ | |
17714 | * @task: The task which wants to acquire the lock | |
17715 | * @waiter: The waiter that is queued to the lock's wait tree if the | |
17716 | * callsite called task_blocked_on_lock(), otherwise NULL | |
17717 | + * @mode: Lock steal mode (STEAL_NORMAL, STEAL_LATERAL) | |
17718 | */ | |
17719 | -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
17720 | - struct rt_mutex_waiter *waiter) | |
17721 | +static int __try_to_take_rt_mutex(struct rt_mutex *lock, | |
17722 | + struct task_struct *task, | |
17723 | + struct rt_mutex_waiter *waiter, int mode) | |
17724 | { | |
17725 | lockdep_assert_held(&lock->wait_lock); | |
1a6e0f06 | 17726 | |
e4b2b4a8 JK |
17727 | @@ -842,12 +890,11 @@ |
17728 | */ | |
17729 | if (waiter) { | |
17730 | /* | |
17731 | - * If waiter is not the highest priority waiter of | |
17732 | - * @lock, give up. | |
17733 | + * If waiter is not the highest priority waiter of @lock, | |
17734 | + * or its peer when lateral steal is allowed, give up. | |
17735 | */ | |
17736 | - if (waiter != rt_mutex_top_waiter(lock)) | |
17737 | + if (!rt_mutex_steal(lock, waiter, mode)) | |
17738 | return 0; | |
17739 | - | |
17740 | /* | |
17741 | * We can acquire the lock. Remove the waiter from the | |
17742 | * lock waiters tree. | |
17743 | @@ -865,14 +912,12 @@ | |
17744 | */ | |
17745 | if (rt_mutex_has_waiters(lock)) { | |
17746 | /* | |
17747 | - * If @task->prio is greater than or equal to | |
17748 | - * the top waiter priority (kernel view), | |
17749 | - * @task lost. | |
17750 | + * If @task->prio is greater than the top waiter | |
17751 | + * priority (kernel view), or equal to it when a | |
17752 | + * lateral steal is forbidden, @task lost. | |
17753 | */ | |
17754 | - if (!rt_mutex_waiter_less(task_to_waiter(task), | |
17755 | - rt_mutex_top_waiter(lock))) | |
17756 | + if (!rt_mutex_steal(lock, task_to_waiter(task), mode)) | |
17757 | return 0; | |
17758 | - | |
17759 | /* | |
17760 | * The current top waiter stays enqueued. We | |
17761 | * don't have to change anything in the lock | |
17762 | @@ -919,6 +964,351 @@ | |
17763 | return 1; | |
17764 | } | |
1a6e0f06 | 17765 | |
e4b2b4a8 JK |
17766 | +#ifdef CONFIG_PREEMPT_RT_FULL |
17767 | +/* | |
17768 | + * preemptible spin_lock functions: | |
17769 | + */ | |
17770 | +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, | |
17771 | + void (*slowfn)(struct rt_mutex *lock)) | |
17772 | +{ | |
17773 | + might_sleep_no_state_check(); | |
17774 | + | |
17775 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
17776 | + return; | |
17777 | + else | |
17778 | + slowfn(lock); | |
17779 | +} | |
17780 | + | |
17781 | +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, | |
17782 | + void (*slowfn)(struct rt_mutex *lock)) | |
17783 | +{ | |
17784 | + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) | |
17785 | + return; | |
17786 | + else | |
17787 | + slowfn(lock); | |
17788 | +} | |
17789 | +#ifdef CONFIG_SMP | |
17790 | +/* | |
17791 | + * Note that owner is a speculative pointer and dereferencing relies | |
17792 | + * on rcu_read_lock() and the check against the lock owner. | |
17793 | + */ | |
17794 | +static int adaptive_wait(struct rt_mutex *lock, | |
17795 | + struct task_struct *owner) | |
17796 | +{ | |
17797 | + int res = 0; | |
17798 | + | |
17799 | + rcu_read_lock(); | |
17800 | + for (;;) { | |
17801 | + if (owner != rt_mutex_owner(lock)) | |
17802 | + break; | |
17803 | + /* | |
17804 | + * Ensure that owner->on_cpu is dereferenced _after_ | |
17805 | + * checking the above to be valid. | |
17806 | + */ | |
17807 | + barrier(); | |
17808 | + if (!owner->on_cpu) { | |
17809 | + res = 1; | |
17810 | + break; | |
17811 | + } | |
17812 | + cpu_relax(); | |
17813 | + } | |
17814 | + rcu_read_unlock(); | |
17815 | + return res; | |
17816 | +} | |
17817 | +#else | |
17818 | +static int adaptive_wait(struct rt_mutex *lock, | |
17819 | + struct task_struct *orig_owner) | |
17820 | +{ | |
17821 | + return 1; | |
17822 | +} | |
1a6e0f06 JK |
17823 | +#endif |
17824 | + | |
e4b2b4a8 JK |
17825 | +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
17826 | + struct rt_mutex_waiter *waiter, | |
17827 | + struct task_struct *task, | |
17828 | + enum rtmutex_chainwalk chwalk); | |
17829 | +/* | |
17830 | + * Slow path lock function spin_lock style: this variant is very | |
17831 | + * careful not to miss any non-lock wakeups. | |
17832 | + * | |
17833 | + * We store the current state under p->pi_lock in p->saved_state and | |
17834 | + * the try_to_wake_up() code handles this accordingly. | |
17835 | + */ | |
17836 | +void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, | |
17837 | + struct rt_mutex_waiter *waiter, | |
17838 | + unsigned long flags) | |
17839 | +{ | |
17840 | + struct task_struct *lock_owner, *self = current; | |
17841 | + struct rt_mutex_waiter *top_waiter; | |
17842 | + int ret; | |
1a6e0f06 | 17843 | + |
e4b2b4a8 JK |
17844 | + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) |
17845 | + return; | |
1a6e0f06 | 17846 | + |
e4b2b4a8 | 17847 | + BUG_ON(rt_mutex_owner(lock) == self); |
1a6e0f06 | 17848 | + |
e4b2b4a8 JK |
17849 | + /* |
17850 | + * We save whatever state the task is in and we'll restore it | |
17851 | + * after acquiring the lock taking real wakeups into account | |
17852 | + * as well. We are serialized via pi_lock against wakeups. See | |
17853 | + * try_to_wake_up(). | |
17854 | + */ | |
17855 | + raw_spin_lock(&self->pi_lock); | |
17856 | + self->saved_state = self->state; | |
17857 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
17858 | + raw_spin_unlock(&self->pi_lock); | |
1a6e0f06 | 17859 | + |
e4b2b4a8 JK |
17860 | + ret = task_blocks_on_rt_mutex(lock, waiter, self, RT_MUTEX_MIN_CHAINWALK); |
17861 | + BUG_ON(ret); | |
1a6e0f06 | 17862 | + |
e4b2b4a8 JK |
17863 | + for (;;) { |
17864 | + /* Try to acquire the lock again. */ | |
17865 | + if (__try_to_take_rt_mutex(lock, self, waiter, STEAL_LATERAL)) | |
17866 | + break; | |
1a6e0f06 | 17867 | + |
e4b2b4a8 JK |
17868 | + top_waiter = rt_mutex_top_waiter(lock); |
17869 | + lock_owner = rt_mutex_owner(lock); | |
1a6e0f06 | 17870 | + |
e4b2b4a8 | 17871 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
1a6e0f06 | 17872 | + |
e4b2b4a8 | 17873 | + debug_rt_mutex_print_deadlock(waiter); |
1a6e0f06 | 17874 | + |
e4b2b4a8 JK |
17875 | + if (top_waiter != waiter || adaptive_wait(lock, lock_owner)) |
17876 | + schedule(); | |
1a6e0f06 | 17877 | + |
e4b2b4a8 | 17878 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); |
1a6e0f06 | 17879 | + |
e4b2b4a8 JK |
17880 | + raw_spin_lock(&self->pi_lock); |
17881 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
17882 | + raw_spin_unlock(&self->pi_lock); | |
17883 | + } | |
1a6e0f06 | 17884 | + |
e4b2b4a8 JK |
17885 | + /* |
17886 | + * Restore the task state to current->saved_state. We set it | |
17887 | + * to the original state above and the try_to_wake_up() code | |
17888 | + * has possibly updated it when a real (non-rtmutex) wakeup | |
17889 | + * happened while we were blocked. Clear saved_state so | |
17890 | + * try_to_wakeup() does not get confused. | |
17891 | + */ | |
17892 | + raw_spin_lock(&self->pi_lock); | |
17893 | + __set_current_state_no_track(self->saved_state); | |
17894 | + self->saved_state = TASK_RUNNING; | |
17895 | + raw_spin_unlock(&self->pi_lock); | |
1a6e0f06 | 17896 | + |
e4b2b4a8 JK |
17897 | + /* |
17898 | + * try_to_take_rt_mutex() sets the waiter bit | |
17899 | + * unconditionally. We might have to fix that up: | |
17900 | + */ | |
17901 | + fixup_rt_mutex_waiters(lock); | |
1a6e0f06 | 17902 | + |
e4b2b4a8 JK |
17903 | + BUG_ON(rt_mutex_has_waiters(lock) && waiter == rt_mutex_top_waiter(lock)); |
17904 | + BUG_ON(!RB_EMPTY_NODE(&waiter->tree_entry)); | |
17905 | +} | |
1a6e0f06 | 17906 | + |
e4b2b4a8 JK |
17907 | +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock) |
17908 | +{ | |
17909 | + struct rt_mutex_waiter waiter; | |
17910 | + unsigned long flags; | |
1a6e0f06 | 17911 | + |
e4b2b4a8 | 17912 | + rt_mutex_init_waiter(&waiter, true); |
1a6e0f06 | 17913 | + |
e4b2b4a8 JK |
17914 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); |
17915 | + rt_spin_lock_slowlock_locked(lock, &waiter, flags); | |
17916 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
17917 | + debug_rt_mutex_free_waiter(&waiter); | |
17918 | +} | |
1a6e0f06 | 17919 | + |
e4b2b4a8 JK |
17920 | +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, |
17921 | + struct wake_q_head *wake_q, | |
17922 | + struct wake_q_head *wq_sleeper); | |
17923 | +/* | |
17924 | + * Slow path to release a rt_mutex spin_lock style | |
17925 | + */ | |
17926 | +void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) | |
17927 | +{ | |
17928 | + unsigned long flags; | |
17929 | + DEFINE_WAKE_Q(wake_q); | |
17930 | + DEFINE_WAKE_Q(wake_sleeper_q); | |
17931 | + bool postunlock; | |
1a6e0f06 | 17932 | + |
e4b2b4a8 JK |
17933 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); |
17934 | + postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); | |
17935 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
1a6e0f06 | 17936 | + |
e4b2b4a8 JK |
17937 | + if (postunlock) |
17938 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
17939 | +} | |
1a6e0f06 | 17940 | + |
e4b2b4a8 JK |
17941 | +void __lockfunc rt_spin_lock(spinlock_t *lock) |
17942 | +{ | |
17943 | + sleeping_lock_inc(); | |
17944 | + migrate_disable(); | |
17945 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
17946 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); | |
17947 | +} | |
17948 | +EXPORT_SYMBOL(rt_spin_lock); | |
1a6e0f06 | 17949 | + |
e4b2b4a8 JK |
17950 | +void __lockfunc __rt_spin_lock(struct rt_mutex *lock) |
17951 | +{ | |
17952 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock); | |
17953 | +} | |
1a6e0f06 | 17954 | + |
e4b2b4a8 JK |
17955 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
17956 | +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) | |
17957 | +{ | |
17958 | + sleeping_lock_inc(); | |
17959 | + migrate_disable(); | |
17960 | + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
17961 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock); | |
17962 | +} | |
17963 | +EXPORT_SYMBOL(rt_spin_lock_nested); | |
17964 | +#endif | |
1a6e0f06 | 17965 | + |
e4b2b4a8 JK |
17966 | +void __lockfunc rt_spin_unlock(spinlock_t *lock) |
17967 | +{ | |
17968 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
17969 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
17970 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
17971 | + migrate_enable(); | |
17972 | + sleeping_lock_dec(); | |
17973 | +} | |
17974 | +EXPORT_SYMBOL(rt_spin_unlock); | |
1a6e0f06 | 17975 | + |
e4b2b4a8 JK |
17976 | +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) |
17977 | +{ | |
17978 | + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); | |
17979 | +} | |
17980 | +EXPORT_SYMBOL(__rt_spin_unlock); | |
17981 | + | |
17982 | +/* | |
17983 | + * Wait for the lock to get unlocked: instead of polling for an unlock | |
17984 | + * (like raw spinlocks do), we lock and unlock, to force the kernel to | |
17985 | + * schedule if there's contention: | |
17986 | + */ | |
17987 | +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) | |
17988 | +{ | |
17989 | + spin_lock(lock); | |
17990 | + spin_unlock(lock); | |
17991 | +} | |
17992 | +EXPORT_SYMBOL(rt_spin_unlock_wait); | |
17993 | + | |
17994 | +int __lockfunc rt_spin_trylock(spinlock_t *lock) | |
17995 | +{ | |
17996 | + int ret; | |
17997 | + | |
17998 | + sleeping_lock_inc(); | |
17999 | + migrate_disable(); | |
18000 | + ret = __rt_mutex_trylock(&lock->lock); | |
18001 | + if (ret) { | |
18002 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
18003 | + } else { | |
18004 | + migrate_enable(); | |
18005 | + sleeping_lock_dec(); | |
18006 | + } | |
18007 | + return ret; | |
18008 | +} | |
18009 | +EXPORT_SYMBOL(rt_spin_trylock); | |
18010 | + | |
18011 | +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) | |
18012 | +{ | |
18013 | + int ret; | |
18014 | + | |
18015 | + local_bh_disable(); | |
18016 | + ret = __rt_mutex_trylock(&lock->lock); | |
18017 | + if (ret) { | |
18018 | + sleeping_lock_inc(); | |
18019 | + migrate_disable(); | |
18020 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
18021 | + } else | |
18022 | + local_bh_enable(); | |
18023 | + return ret; | |
18024 | +} | |
18025 | +EXPORT_SYMBOL(rt_spin_trylock_bh); | |
18026 | + | |
18027 | +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) | |
18028 | +{ | |
18029 | + int ret; | |
1a6e0f06 | 18030 | + |
e4b2b4a8 JK |
18031 | + *flags = 0; |
18032 | + ret = __rt_mutex_trylock(&lock->lock); | |
18033 | + if (ret) { | |
18034 | + sleeping_lock_inc(); | |
18035 | + migrate_disable(); | |
18036 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
18037 | + } | |
18038 | + return ret; | |
18039 | +} | |
18040 | +EXPORT_SYMBOL(rt_spin_trylock_irqsave); | |
1a6e0f06 | 18041 | + |
e4b2b4a8 JK |
18042 | +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) |
18043 | +{ | |
18044 | + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ | |
18045 | + if (atomic_add_unless(atomic, -1, 1)) | |
18046 | + return 0; | |
18047 | + rt_spin_lock(lock); | |
18048 | + if (atomic_dec_and_test(atomic)) | |
18049 | + return 1; | |
18050 | + rt_spin_unlock(lock); | |
18051 | + return 0; | |
18052 | +} | |
18053 | +EXPORT_SYMBOL(atomic_dec_and_spin_lock); | |
1a6e0f06 | 18054 | + |
e4b2b4a8 JK |
18055 | +void |
18056 | +__rt_spin_lock_init(spinlock_t *lock, const char *name, struct lock_class_key *key) | |
18057 | +{ | |
1a6e0f06 | 18058 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
e4b2b4a8 JK |
18059 | + /* |
18060 | + * Make sure we are not reinitializing a held lock: | |
18061 | + */ | |
18062 | + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | |
18063 | + lockdep_init_map(&lock->dep_map, name, key, 0); | |
1a6e0f06 | 18064 | +#endif |
e4b2b4a8 JK |
18065 | +} |
18066 | +EXPORT_SYMBOL(__rt_spin_lock_init); | |
1a6e0f06 | 18067 | + |
e4b2b4a8 | 18068 | +#endif /* PREEMPT_RT_FULL */ |
1a6e0f06 | 18069 | + |
e4b2b4a8 JK |
18070 | +#ifdef CONFIG_PREEMPT_RT_FULL |
18071 | + static inline int __sched | |
18072 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
18073 | +{ | |
18074 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
18075 | + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | |
1a6e0f06 | 18076 | + |
e4b2b4a8 JK |
18077 | + if (!hold_ctx) |
18078 | + return 0; | |
1a6e0f06 | 18079 | + |
e4b2b4a8 JK |
18080 | + if (unlikely(ctx == hold_ctx)) |
18081 | + return -EALREADY; | |
1a6e0f06 | 18082 | + |
e4b2b4a8 JK |
18083 | + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && |
18084 | + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | |
18085 | +#ifdef CONFIG_DEBUG_MUTEXES | |
18086 | + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); | |
18087 | + ctx->contending_lock = ww; | |
18088 | +#endif | |
18089 | + return -EDEADLK; | |
18090 | + } | |
1a6e0f06 | 18091 | + |
e4b2b4a8 JK |
18092 | + return 0; |
18093 | +} | |
18094 | +#else | |
18095 | + static inline int __sched | |
18096 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
18097 | +{ | |
18098 | + BUG(); | |
18099 | + return 0; | |
18100 | +} | |
1a6e0f06 | 18101 | + |
1a6e0f06 JK |
18102 | +#endif |
18103 | + | |
e4b2b4a8 JK |
18104 | +static inline int |
18105 | +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
18106 | + struct rt_mutex_waiter *waiter) | |
18107 | +{ | |
18108 | + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); | |
18109 | +} | |
1a6e0f06 | 18110 | + |
e4b2b4a8 JK |
18111 | /* |
18112 | * Task blocks on lock. | |
18113 | * | |
18114 | @@ -951,6 +1341,22 @@ | |
18115 | return -EDEADLK; | |
18116 | ||
18117 | raw_spin_lock(&task->pi_lock); | |
18118 | + /* | |
18119 | + * In the case of futex requeue PI, this will be a proxy | |
18120 | + * lock. The task will wake unaware that it is enqueueed on | |
18121 | + * this lock. Avoid blocking on two locks and corrupting | |
18122 | + * pi_blocked_on via the PI_WAKEUP_INPROGRESS | |
18123 | + * flag. futex_wait_requeue_pi() sets this when it wakes up | |
18124 | + * before requeue (due to a signal or timeout). Do not enqueue | |
18125 | + * the task if PI_WAKEUP_INPROGRESS is set. | |
18126 | + */ | |
18127 | + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) { | |
18128 | + raw_spin_unlock(&task->pi_lock); | |
18129 | + return -EAGAIN; | |
18130 | + } | |
1a6e0f06 | 18131 | + |
e4b2b4a8 | 18132 | + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); |
1a6e0f06 | 18133 | + |
e4b2b4a8 JK |
18134 | waiter->task = task; |
18135 | waiter->lock = lock; | |
18136 | waiter->prio = task->prio; | |
18137 | @@ -974,7 +1380,7 @@ | |
18138 | rt_mutex_enqueue_pi(owner, waiter); | |
18139 | ||
18140 | rt_mutex_adjust_prio(owner); | |
18141 | - if (owner->pi_blocked_on) | |
18142 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
18143 | chain_walk = 1; | |
18144 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { | |
18145 | chain_walk = 1; | |
18146 | @@ -1016,6 +1422,7 @@ | |
18147 | * Called with lock->wait_lock held and interrupts disabled. | |
18148 | */ | |
18149 | static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
18150 | + struct wake_q_head *wake_sleeper_q, | |
18151 | struct rt_mutex *lock) | |
18152 | { | |
18153 | struct rt_mutex_waiter *waiter; | |
18154 | @@ -1055,7 +1462,10 @@ | |
18155 | * Pairs with preempt_enable() in rt_mutex_postunlock(); | |
18156 | */ | |
18157 | preempt_disable(); | |
18158 | - wake_q_add(wake_q, waiter->task); | |
18159 | + if (waiter->savestate) | |
18160 | + wake_q_add_sleeper(wake_sleeper_q, waiter->task); | |
18161 | + else | |
18162 | + wake_q_add(wake_q, waiter->task); | |
18163 | raw_spin_unlock(¤t->pi_lock); | |
18164 | } | |
18165 | ||
18166 | @@ -1070,7 +1480,7 @@ | |
18167 | { | |
18168 | bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); | |
18169 | struct task_struct *owner = rt_mutex_owner(lock); | |
18170 | - struct rt_mutex *next_lock; | |
18171 | + struct rt_mutex *next_lock = NULL; | |
18172 | ||
18173 | lockdep_assert_held(&lock->wait_lock); | |
18174 | ||
18175 | @@ -1096,7 +1506,8 @@ | |
18176 | rt_mutex_adjust_prio(owner); | |
18177 | ||
18178 | /* Store the lock on which owner is blocked or NULL */ | |
18179 | - next_lock = task_blocked_on_lock(owner); | |
18180 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
18181 | + next_lock = task_blocked_on_lock(owner); | |
18182 | ||
18183 | raw_spin_unlock(&owner->pi_lock); | |
18184 | ||
18185 | @@ -1132,26 +1543,28 @@ | |
18186 | raw_spin_lock_irqsave(&task->pi_lock, flags); | |
18187 | ||
18188 | waiter = task->pi_blocked_on; | |
18189 | - if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { | |
18190 | + if (!rt_mutex_real_waiter(waiter) || | |
18191 | + rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { | |
18192 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18193 | return; | |
18194 | } | |
18195 | next_lock = waiter->lock; | |
18196 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18197 | ||
18198 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | |
18199 | get_task_struct(task); | |
18200 | ||
18201 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18202 | rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, | |
18203 | next_lock, NULL, task); | |
18204 | } | |
18205 | ||
18206 | -void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) | |
18207 | +void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) | |
18208 | { | |
18209 | debug_rt_mutex_init_waiter(waiter); | |
18210 | RB_CLEAR_NODE(&waiter->pi_tree_entry); | |
18211 | RB_CLEAR_NODE(&waiter->tree_entry); | |
18212 | waiter->task = NULL; | |
18213 | + waiter->savestate = savestate; | |
18214 | } | |
18215 | ||
18216 | /** | |
18217 | @@ -1167,7 +1580,8 @@ | |
18218 | static int __sched | |
18219 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
18220 | struct hrtimer_sleeper *timeout, | |
18221 | - struct rt_mutex_waiter *waiter) | |
18222 | + struct rt_mutex_waiter *waiter, | |
18223 | + struct ww_acquire_ctx *ww_ctx) | |
18224 | { | |
18225 | int ret = 0; | |
18226 | ||
18227 | @@ -1176,16 +1590,17 @@ | |
18228 | if (try_to_take_rt_mutex(lock, current, waiter)) | |
18229 | break; | |
18230 | ||
18231 | - /* | |
18232 | - * TASK_INTERRUPTIBLE checks for signals and | |
18233 | - * timeout. Ignored otherwise. | |
18234 | - */ | |
18235 | - if (likely(state == TASK_INTERRUPTIBLE)) { | |
18236 | - /* Signal pending? */ | |
18237 | - if (signal_pending(current)) | |
18238 | - ret = -EINTR; | |
18239 | - if (timeout && !timeout->task) | |
18240 | - ret = -ETIMEDOUT; | |
18241 | + if (timeout && !timeout->task) { | |
18242 | + ret = -ETIMEDOUT; | |
18243 | + break; | |
18244 | + } | |
18245 | + if (signal_pending_state(state, current)) { | |
18246 | + ret = -EINTR; | |
18247 | + break; | |
18248 | + } | |
1a6e0f06 | 18249 | + |
e4b2b4a8 JK |
18250 | + if (ww_ctx && ww_ctx->acquired > 0) { |
18251 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
18252 | if (ret) | |
18253 | break; | |
18254 | } | |
18255 | @@ -1224,33 +1639,104 @@ | |
18256 | } | |
18257 | } | |
18258 | ||
18259 | -/* | |
18260 | - * Slow path lock function: | |
18261 | - */ | |
18262 | -static int __sched | |
18263 | -rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
18264 | - struct hrtimer_sleeper *timeout, | |
18265 | - enum rtmutex_chainwalk chwalk) | |
18266 | +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | |
18267 | + struct ww_acquire_ctx *ww_ctx) | |
18268 | { | |
18269 | - struct rt_mutex_waiter waiter; | |
18270 | - unsigned long flags; | |
18271 | - int ret = 0; | |
18272 | +#ifdef CONFIG_DEBUG_MUTEXES | |
18273 | + /* | |
18274 | + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | |
18275 | + * but released with a normal mutex_unlock in this call. | |
18276 | + * | |
18277 | + * This should never happen, always use ww_mutex_unlock. | |
18278 | + */ | |
18279 | + DEBUG_LOCKS_WARN_ON(ww->ctx); | |
18280 | ||
18281 | - rt_mutex_init_waiter(&waiter); | |
18282 | + /* | |
18283 | + * Not quite done after calling ww_acquire_done() ? | |
18284 | + */ | |
18285 | + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | |
1a6e0f06 | 18286 | + |
e4b2b4a8 JK |
18287 | + if (ww_ctx->contending_lock) { |
18288 | + /* | |
18289 | + * After -EDEADLK you tried to | |
18290 | + * acquire a different ww_mutex? Bad! | |
18291 | + */ | |
18292 | + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | |
1a6e0f06 | 18293 | + |
e4b2b4a8 JK |
18294 | + /* |
18295 | + * You called ww_mutex_lock after receiving -EDEADLK, | |
18296 | + * but 'forgot' to unlock everything else first? | |
18297 | + */ | |
18298 | + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | |
18299 | + ww_ctx->contending_lock = NULL; | |
18300 | + } | |
18301 | ||
18302 | /* | |
18303 | - * Technically we could use raw_spin_[un]lock_irq() here, but this can | |
18304 | - * be called in early boot if the cmpxchg() fast path is disabled | |
18305 | - * (debug, no architecture support). In this case we will acquire the | |
18306 | - * rtmutex with lock->wait_lock held. But we cannot unconditionally | |
18307 | - * enable interrupts in that early boot case. So we need to use the | |
18308 | - * irqsave/restore variants. | |
18309 | + * Naughty, using a different class will lead to undefined behavior! | |
18310 | */ | |
18311 | - raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
18312 | + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | |
18313 | +#endif | |
18314 | + ww_ctx->acquired++; | |
18315 | +} | |
1a6e0f06 | 18316 | + |
e4b2b4a8 JK |
18317 | +#ifdef CONFIG_PREEMPT_RT_FULL |
18318 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
18319 | + struct ww_acquire_ctx *ww_ctx) | |
18320 | +{ | |
18321 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
18322 | + struct rt_mutex_waiter *waiter, *n; | |
1a6e0f06 | 18323 | + |
e4b2b4a8 JK |
18324 | + /* |
18325 | + * This branch gets optimized out for the common case, | |
18326 | + * and is only important for ww_mutex_lock. | |
18327 | + */ | |
18328 | + ww_mutex_lock_acquired(ww, ww_ctx); | |
18329 | + ww->ctx = ww_ctx; | |
1a6e0f06 | 18330 | + |
e4b2b4a8 JK |
18331 | + /* |
18332 | + * Give any possible sleeping processes the chance to wake up, | |
18333 | + * so they can recheck if they have to back off. | |
18334 | + */ | |
18335 | + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters.rb_root, | |
18336 | + tree_entry) { | |
18337 | + /* XXX debug rt mutex waiter wakeup */ | |
1a6e0f06 | 18338 | + |
e4b2b4a8 JK |
18339 | + BUG_ON(waiter->lock != lock); |
18340 | + rt_mutex_wake_waiter(waiter); | |
18341 | + } | |
1a6e0f06 JK |
18342 | +} |
18343 | + | |
e4b2b4a8 | 18344 | +#else |
1a6e0f06 | 18345 | + |
e4b2b4a8 JK |
18346 | +static void ww_mutex_account_lock(struct rt_mutex *lock, |
18347 | + struct ww_acquire_ctx *ww_ctx) | |
1a6e0f06 | 18348 | +{ |
e4b2b4a8 | 18349 | + BUG(); |
1a6e0f06 | 18350 | +} |
e4b2b4a8 | 18351 | +#endif |
1a6e0f06 | 18352 | + |
e4b2b4a8 JK |
18353 | +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, |
18354 | + struct hrtimer_sleeper *timeout, | |
18355 | + enum rtmutex_chainwalk chwalk, | |
18356 | + struct ww_acquire_ctx *ww_ctx, | |
18357 | + struct rt_mutex_waiter *waiter) | |
1a6e0f06 | 18358 | +{ |
e4b2b4a8 | 18359 | + int ret; |
1a6e0f06 | 18360 | + |
e4b2b4a8 JK |
18361 | +#ifdef CONFIG_PREEMPT_RT_FULL |
18362 | + if (ww_ctx) { | |
18363 | + struct ww_mutex *ww; | |
1a6e0f06 | 18364 | + |
e4b2b4a8 JK |
18365 | + ww = container_of(lock, struct ww_mutex, base.lock); |
18366 | + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) | |
18367 | + return -EALREADY; | |
18368 | + } | |
18369 | +#endif | |
18370 | ||
18371 | /* Try to acquire the lock again: */ | |
18372 | if (try_to_take_rt_mutex(lock, current, NULL)) { | |
18373 | - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
18374 | + if (ww_ctx) | |
18375 | + ww_mutex_account_lock(lock, ww_ctx); | |
18376 | return 0; | |
18377 | } | |
18378 | ||
18379 | @@ -1260,17 +1746,27 @@ | |
18380 | if (unlikely(timeout)) | |
18381 | hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | |
18382 | ||
18383 | - ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); | |
18384 | + ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); | |
18385 | ||
18386 | - if (likely(!ret)) | |
18387 | + if (likely(!ret)) { | |
18388 | /* sleep on the mutex */ | |
18389 | - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | |
18390 | + ret = __rt_mutex_slowlock(lock, state, timeout, waiter, | |
18391 | + ww_ctx); | |
18392 | + } else if (ww_ctx) { | |
18393 | + /* ww_mutex received EDEADLK, let it become EALREADY */ | |
18394 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
18395 | + BUG_ON(!ret); | |
18396 | + } | |
18397 | ||
18398 | if (unlikely(ret)) { | |
18399 | __set_current_state(TASK_RUNNING); | |
18400 | if (rt_mutex_has_waiters(lock)) | |
18401 | - remove_waiter(lock, &waiter); | |
18402 | - rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
18403 | + remove_waiter(lock, waiter); | |
18404 | + /* ww_mutex want to report EDEADLK/EALREADY, let them */ | |
18405 | + if (!ww_ctx) | |
18406 | + rt_mutex_handle_deadlock(ret, chwalk, waiter); | |
18407 | + } else if (ww_ctx) { | |
18408 | + ww_mutex_account_lock(lock, ww_ctx); | |
18409 | } | |
18410 | ||
18411 | /* | |
18412 | @@ -1278,6 +1774,36 @@ | |
18413 | * unconditionally. We might have to fix that up. | |
18414 | */ | |
18415 | fixup_rt_mutex_waiters(lock); | |
18416 | + return ret; | |
1a6e0f06 JK |
18417 | +} |
18418 | + | |
e4b2b4a8 JK |
18419 | +/* |
18420 | + * Slow path lock function: | |
18421 | + */ | |
18422 | +static int __sched | |
18423 | +rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
18424 | + struct hrtimer_sleeper *timeout, | |
18425 | + enum rtmutex_chainwalk chwalk, | |
18426 | + struct ww_acquire_ctx *ww_ctx) | |
1a6e0f06 | 18427 | +{ |
e4b2b4a8 JK |
18428 | + struct rt_mutex_waiter waiter; |
18429 | + unsigned long flags; | |
18430 | + int ret = 0; | |
1a6e0f06 | 18431 | + |
e4b2b4a8 | 18432 | + rt_mutex_init_waiter(&waiter, false); |
1a6e0f06 | 18433 | + |
e4b2b4a8 JK |
18434 | + /* |
18435 | + * Technically we could use raw_spin_[un]lock_irq() here, but this can | |
18436 | + * be called in early boot if the cmpxchg() fast path is disabled | |
18437 | + * (debug, no architecture support). In this case we will acquire the | |
18438 | + * rtmutex with lock->wait_lock held. But we cannot unconditionally | |
18439 | + * enable interrupts in that early boot case. So we need to use the | |
18440 | + * irqsave/restore variants. | |
18441 | + */ | |
18442 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
1a6e0f06 | 18443 | + |
e4b2b4a8 JK |
18444 | + ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, |
18445 | + &waiter); | |
18446 | ||
18447 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
18448 | ||
18449 | @@ -1338,7 +1864,8 @@ | |
18450 | * Return whether the current task needs to call rt_mutex_postunlock(). | |
18451 | */ | |
18452 | static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
18453 | - struct wake_q_head *wake_q) | |
18454 | + struct wake_q_head *wake_q, | |
18455 | + struct wake_q_head *wake_sleeper_q) | |
18456 | { | |
18457 | unsigned long flags; | |
18458 | ||
18459 | @@ -1392,7 +1919,7 @@ | |
18460 | * | |
18461 | * Queue the next waiter for wakeup once we release the wait_lock. | |
18462 | */ | |
18463 | - mark_wakeup_next_waiter(wake_q, lock); | |
18464 | + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); | |
18465 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
18466 | ||
18467 | return true; /* call rt_mutex_postunlock() */ | |
18468 | @@ -1406,29 +1933,45 @@ | |
18469 | */ | |
18470 | static inline int | |
18471 | rt_mutex_fastlock(struct rt_mutex *lock, int state, | |
18472 | + struct ww_acquire_ctx *ww_ctx, | |
18473 | int (*slowfn)(struct rt_mutex *lock, int state, | |
18474 | struct hrtimer_sleeper *timeout, | |
18475 | - enum rtmutex_chainwalk chwalk)) | |
18476 | + enum rtmutex_chainwalk chwalk, | |
18477 | + struct ww_acquire_ctx *ww_ctx)) | |
18478 | { | |
18479 | if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
18480 | return 0; | |
18481 | ||
18482 | - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); | |
18483 | + /* | |
18484 | + * If rt_mutex blocks, the function sched_submit_work will not call | |
18485 | + * blk_schedule_flush_plug (because tsk_is_pi_blocked would be true). | |
18486 | + * We must call blk_schedule_flush_plug here, if we don't call it, | |
18487 | + * a deadlock in device mapper may happen. | |
18488 | + */ | |
18489 | + if (unlikely(blk_needs_flush_plug(current))) | |
18490 | + blk_schedule_flush_plug(current); | |
1a6e0f06 | 18491 | + |
e4b2b4a8 JK |
18492 | + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); |
18493 | } | |
18494 | ||
18495 | static inline int | |
18496 | rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, | |
18497 | struct hrtimer_sleeper *timeout, | |
18498 | enum rtmutex_chainwalk chwalk, | |
18499 | + struct ww_acquire_ctx *ww_ctx, | |
18500 | int (*slowfn)(struct rt_mutex *lock, int state, | |
18501 | struct hrtimer_sleeper *timeout, | |
18502 | - enum rtmutex_chainwalk chwalk)) | |
18503 | + enum rtmutex_chainwalk chwalk, | |
18504 | + struct ww_acquire_ctx *ww_ctx)) | |
18505 | { | |
18506 | if (chwalk == RT_MUTEX_MIN_CHAINWALK && | |
18507 | likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
18508 | return 0; | |
18509 | ||
18510 | - return slowfn(lock, state, timeout, chwalk); | |
18511 | + if (unlikely(blk_needs_flush_plug(current))) | |
18512 | + blk_schedule_flush_plug(current); | |
1a6e0f06 | 18513 | + |
e4b2b4a8 JK |
18514 | + return slowfn(lock, state, timeout, chwalk, ww_ctx); |
18515 | } | |
18516 | ||
18517 | static inline int | |
18518 | @@ -1444,9 +1987,11 @@ | |
18519 | /* | |
18520 | * Performs the wakeup of the the top-waiter and re-enables preemption. | |
18521 | */ | |
18522 | -void rt_mutex_postunlock(struct wake_q_head *wake_q) | |
18523 | +void rt_mutex_postunlock(struct wake_q_head *wake_q, | |
18524 | + struct wake_q_head *wake_sleeper_q) | |
18525 | { | |
18526 | wake_up_q(wake_q); | |
18527 | + wake_up_q_sleeper(wake_sleeper_q); | |
18528 | ||
18529 | /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ | |
18530 | preempt_enable(); | |
18531 | @@ -1455,15 +2000,40 @@ | |
18532 | static inline void | |
18533 | rt_mutex_fastunlock(struct rt_mutex *lock, | |
18534 | bool (*slowfn)(struct rt_mutex *lock, | |
18535 | - struct wake_q_head *wqh)) | |
18536 | + struct wake_q_head *wqh, | |
18537 | + struct wake_q_head *wq_sleeper)) | |
18538 | { | |
18539 | DEFINE_WAKE_Q(wake_q); | |
18540 | + DEFINE_WAKE_Q(wake_sleeper_q); | |
18541 | ||
18542 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) | |
18543 | return; | |
18544 | ||
18545 | - if (slowfn(lock, &wake_q)) | |
18546 | - rt_mutex_postunlock(&wake_q); | |
18547 | + if (slowfn(lock, &wake_q, &wake_sleeper_q)) | |
18548 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
1a6e0f06 JK |
18549 | +} |
18550 | + | |
e4b2b4a8 | 18551 | +int __sched __rt_mutex_lock_state(struct rt_mutex *lock, int state) |
1a6e0f06 | 18552 | +{ |
e4b2b4a8 JK |
18553 | + might_sleep(); |
18554 | + return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); | |
1a6e0f06 JK |
18555 | +} |
18556 | + | |
e4b2b4a8 JK |
18557 | +/** |
18558 | + * rt_mutex_lock_state - lock a rt_mutex with a given state | |
18559 | + * | |
18560 | + * @lock: The rt_mutex to be locked | |
18561 | + * @state: The state to set when blocking on the rt_mutex | |
18562 | + */ | |
18563 | +static int __sched rt_mutex_lock_state(struct rt_mutex *lock, int state) | |
1a6e0f06 | 18564 | +{ |
e4b2b4a8 | 18565 | + int ret; |
1a6e0f06 | 18566 | + |
e4b2b4a8 JK |
18567 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); |
18568 | + ret = __rt_mutex_lock_state(lock, state); | |
18569 | + if (ret) | |
18570 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18571 | + return ret; | |
18572 | } | |
18573 | ||
18574 | /** | |
18575 | @@ -1473,10 +2043,7 @@ | |
18576 | */ | |
18577 | void __sched rt_mutex_lock(struct rt_mutex *lock) | |
18578 | { | |
18579 | - might_sleep(); | |
18580 | - | |
18581 | - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
18582 | - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); | |
18583 | + rt_mutex_lock_state(lock, TASK_UNINTERRUPTIBLE); | |
18584 | } | |
18585 | EXPORT_SYMBOL_GPL(rt_mutex_lock); | |
18586 | ||
18587 | @@ -1491,16 +2058,7 @@ | |
18588 | */ | |
18589 | int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) | |
18590 | { | |
18591 | - int ret; | |
18592 | - | |
18593 | - might_sleep(); | |
18594 | - | |
18595 | - mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
18596 | - ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); | |
18597 | - if (ret) | |
18598 | - mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18599 | - | |
18600 | - return ret; | |
18601 | + return rt_mutex_lock_state(lock, TASK_INTERRUPTIBLE); | |
18602 | } | |
18603 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | |
18604 | ||
18605 | @@ -1518,6 +2076,22 @@ | |
18606 | } | |
18607 | ||
18608 | /** | |
18609 | + * rt_mutex_lock_killable - lock a rt_mutex killable | |
18610 | + * | |
18611 | + * @lock: the rt_mutex to be locked | |
18612 | + * @detect_deadlock: deadlock detection on/off | |
18613 | + * | |
18614 | + * Returns: | |
18615 | + * 0 on success | |
18616 | + * -EINTR when interrupted by a signal | |
18617 | + */ | |
18618 | +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) | |
1a6e0f06 | 18619 | +{ |
e4b2b4a8 | 18620 | + return rt_mutex_lock_state(lock, TASK_KILLABLE); |
1a6e0f06 | 18621 | +} |
e4b2b4a8 | 18622 | +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); |
1a6e0f06 | 18623 | + |
e4b2b4a8 JK |
18624 | +/** |
18625 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | |
18626 | * the timeout structure is provided | |
18627 | * by the caller | |
18628 | @@ -1540,6 +2114,7 @@ | |
18629 | mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
18630 | ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
18631 | RT_MUTEX_MIN_CHAINWALK, | |
18632 | + NULL, | |
18633 | rt_mutex_slowlock); | |
18634 | if (ret) | |
18635 | mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18636 | @@ -1548,6 +2123,18 @@ | |
18637 | } | |
18638 | EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
18639 | ||
18640 | +int __sched __rt_mutex_trylock(struct rt_mutex *lock) | |
1a6e0f06 | 18641 | +{ |
e4b2b4a8 JK |
18642 | +#ifdef CONFIG_PREEMPT_RT_FULL |
18643 | + if (WARN_ON_ONCE(in_irq() || in_nmi())) | |
1a6e0f06 | 18644 | +#else |
e4b2b4a8 JK |
18645 | + if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) |
18646 | +#endif | |
18647 | + return 0; | |
1a6e0f06 | 18648 | + |
e4b2b4a8 | 18649 | + return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); |
1a6e0f06 | 18650 | +} |
e4b2b4a8 JK |
18651 | + |
18652 | /** | |
18653 | * rt_mutex_trylock - try to lock a rt_mutex | |
18654 | * | |
18655 | @@ -1563,10 +2150,7 @@ | |
18656 | { | |
18657 | int ret; | |
1a6e0f06 | 18658 | |
e4b2b4a8 JK |
18659 | - if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) |
18660 | - return 0; | |
18661 | - | |
18662 | - ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); | |
18663 | + ret = __rt_mutex_trylock(lock); | |
18664 | if (ret) | |
18665 | mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
1a6e0f06 | 18666 | |
e4b2b4a8 JK |
18667 | @@ -1574,6 +2158,11 @@ |
18668 | } | |
18669 | EXPORT_SYMBOL_GPL(rt_mutex_trylock); | |
1a6e0f06 | 18670 | |
e4b2b4a8 JK |
18671 | +void __sched __rt_mutex_unlock(struct rt_mutex *lock) |
18672 | +{ | |
18673 | + rt_mutex_fastunlock(lock, rt_mutex_slowunlock); | |
18674 | +} | |
1a6e0f06 | 18675 | + |
e4b2b4a8 JK |
18676 | /** |
18677 | * rt_mutex_unlock - unlock a rt_mutex | |
18678 | * | |
18679 | @@ -1582,16 +2171,13 @@ | |
18680 | void __sched rt_mutex_unlock(struct rt_mutex *lock) | |
18681 | { | |
18682 | mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18683 | - rt_mutex_fastunlock(lock, rt_mutex_slowunlock); | |
18684 | + __rt_mutex_unlock(lock); | |
18685 | } | |
18686 | EXPORT_SYMBOL_GPL(rt_mutex_unlock); | |
18687 | ||
18688 | -/** | |
18689 | - * Futex variant, that since futex variants do not use the fast-path, can be | |
18690 | - * simple and will not need to retry. | |
18691 | - */ | |
18692 | -bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, | |
18693 | - struct wake_q_head *wake_q) | |
18694 | +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, | |
18695 | + struct wake_q_head *wake_q, | |
18696 | + struct wake_q_head *wq_sleeper) | |
18697 | { | |
18698 | lockdep_assert_held(&lock->wait_lock); | |
18699 | ||
18700 | @@ -1608,22 +2194,35 @@ | |
18701 | * avoid inversion prior to the wakeup. preempt_disable() | |
18702 | * therein pairs with rt_mutex_postunlock(). | |
18703 | */ | |
18704 | - mark_wakeup_next_waiter(wake_q, lock); | |
18705 | + mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); | |
1a6e0f06 | 18706 | |
e4b2b4a8 JK |
18707 | return true; /* call postunlock() */ |
18708 | } | |
1a6e0f06 | 18709 | |
e4b2b4a8 JK |
18710 | +/** |
18711 | + * Futex variant, that since futex variants do not use the fast-path, can be | |
18712 | + * simple and will not need to retry. | |
18713 | + */ | |
18714 | +bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, | |
18715 | + struct wake_q_head *wake_q, | |
18716 | + struct wake_q_head *wq_sleeper) | |
1a6e0f06 | 18717 | +{ |
e4b2b4a8 | 18718 | + return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); |
1a6e0f06 JK |
18719 | +} |
18720 | + | |
e4b2b4a8 JK |
18721 | void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) |
18722 | { | |
18723 | DEFINE_WAKE_Q(wake_q); | |
18724 | + DEFINE_WAKE_Q(wake_sleeper_q); | |
18725 | + unsigned long flags; | |
18726 | bool postunlock; | |
1a6e0f06 | 18727 | |
e4b2b4a8 JK |
18728 | - raw_spin_lock_irq(&lock->wait_lock); |
18729 | - postunlock = __rt_mutex_futex_unlock(lock, &wake_q); | |
18730 | - raw_spin_unlock_irq(&lock->wait_lock); | |
18731 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
18732 | + postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); | |
18733 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
1a6e0f06 | 18734 | |
e4b2b4a8 JK |
18735 | if (postunlock) |
18736 | - rt_mutex_postunlock(&wake_q); | |
18737 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
18738 | } | |
1a6e0f06 | 18739 | |
e4b2b4a8 JK |
18740 | /** |
18741 | @@ -1662,7 +2261,7 @@ | |
18742 | if (name && key) | |
18743 | debug_rt_mutex_init(lock, name, key); | |
18744 | } | |
18745 | -EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
18746 | +EXPORT_SYMBOL(__rt_mutex_init); | |
1a6e0f06 | 18747 | |
e4b2b4a8 JK |
18748 | /** |
18749 | * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a | |
18750 | @@ -1682,6 +2281,14 @@ | |
18751 | struct task_struct *proxy_owner) | |
18752 | { | |
18753 | __rt_mutex_init(lock, NULL, NULL); | |
18754 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
18755 | + /* | |
18756 | + * get another key class for the wait_lock. LOCK_PI and UNLOCK_PI is | |
18757 | + * holding the ->wait_lock of the proxy_lock while unlocking a sleeping | |
18758 | + * lock. | |
18759 | + */ | |
18760 | + raw_spin_lock_init(&lock->wait_lock); | |
1a6e0f06 | 18761 | +#endif |
e4b2b4a8 JK |
18762 | debug_rt_mutex_proxy_lock(lock, proxy_owner); |
18763 | rt_mutex_set_owner(lock, proxy_owner); | |
18764 | } | |
18765 | @@ -1714,6 +2321,34 @@ | |
18766 | if (try_to_take_rt_mutex(lock, task, NULL)) | |
18767 | return 1; | |
1a6e0f06 | 18768 | |
1a6e0f06 | 18769 | +#ifdef CONFIG_PREEMPT_RT_FULL |
e4b2b4a8 JK |
18770 | + /* |
18771 | + * In PREEMPT_RT there's an added race. | |
18772 | + * If the task, that we are about to requeue, times out, | |
18773 | + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue | |
18774 | + * to skip this task. But right after the task sets | |
18775 | + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then | |
18776 | + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. | |
18777 | + * This will replace the PI_WAKEUP_INPROGRESS with the actual | |
18778 | + * lock that it blocks on. We *must not* place this task | |
18779 | + * on this proxy lock in that case. | |
18780 | + * | |
18781 | + * To prevent this race, we first take the task's pi_lock | |
18782 | + * and check if it has updated its pi_blocked_on. If it has, | |
18783 | + * we assume that it woke up and we return -EAGAIN. | |
18784 | + * Otherwise, we set the task's pi_blocked_on to | |
18785 | + * PI_REQUEUE_INPROGRESS, so that if the task is waking up | |
18786 | + * it will know that we are in the process of requeuing it. | |
18787 | + */ | |
18788 | + raw_spin_lock(&task->pi_lock); | |
18789 | + if (task->pi_blocked_on) { | |
18790 | + raw_spin_unlock(&task->pi_lock); | |
18791 | + return -EAGAIN; | |
18792 | + } | |
18793 | + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; | |
18794 | + raw_spin_unlock(&task->pi_lock); | |
1a6e0f06 | 18795 | +#endif |
1a6e0f06 | 18796 | + |
e4b2b4a8 JK |
18797 | /* We enforce deadlock detection for futexes */ |
18798 | ret = task_blocks_on_rt_mutex(lock, waiter, task, | |
18799 | RT_MUTEX_FULL_CHAINWALK); | |
18800 | @@ -1728,7 +2363,7 @@ | |
18801 | ret = 0; | |
18802 | } | |
1a6e0f06 | 18803 | |
e4b2b4a8 JK |
18804 | - if (unlikely(ret)) |
18805 | + if (ret && rt_mutex_has_waiters(lock)) | |
18806 | remove_waiter(lock, waiter); | |
1a6e0f06 | 18807 | |
e4b2b4a8 JK |
18808 | debug_rt_mutex_print_deadlock(waiter); |
18809 | @@ -1803,17 +2438,36 @@ | |
18810 | struct hrtimer_sleeper *to, | |
18811 | struct rt_mutex_waiter *waiter) | |
18812 | { | |
18813 | + struct task_struct *tsk = current; | |
18814 | int ret; | |
18815 | ||
18816 | raw_spin_lock_irq(&lock->wait_lock); | |
18817 | /* sleep on the mutex */ | |
18818 | set_current_state(TASK_INTERRUPTIBLE); | |
18819 | - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | |
18820 | + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); | |
18821 | /* | |
18822 | * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might | |
18823 | * have to fix that up. | |
18824 | */ | |
18825 | fixup_rt_mutex_waiters(lock); | |
18826 | + /* | |
18827 | + * RT has a problem here when the wait got interrupted by a timeout | |
18828 | + * or a signal. task->pi_blocked_on is still set. The task must | |
18829 | + * acquire the hash bucket lock when returning from this function. | |
18830 | + * | |
18831 | + * If the hash bucket lock is contended then the | |
18832 | + * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in | |
18833 | + * task_blocks_on_rt_mutex() will trigger. This can be avoided by | |
18834 | + * clearing task->pi_blocked_on which removes the task from the | |
18835 | + * boosting chain of the rtmutex. That's correct because the task | |
18836 | + * is not longer blocked on it. | |
18837 | + */ | |
18838 | + if (ret) { | |
18839 | + raw_spin_lock(&tsk->pi_lock); | |
18840 | + tsk->pi_blocked_on = NULL; | |
18841 | + raw_spin_unlock(&tsk->pi_lock); | |
18842 | + } | |
1a6e0f06 | 18843 | + |
e4b2b4a8 | 18844 | raw_spin_unlock_irq(&lock->wait_lock); |
1a6e0f06 | 18845 | |
e4b2b4a8 JK |
18846 | return ret; |
18847 | @@ -1874,3 +2528,99 @@ | |
1a6e0f06 | 18848 | |
e4b2b4a8 | 18849 | return cleanup; |
1a6e0f06 | 18850 | } |
e4b2b4a8 JK |
18851 | + |
18852 | +static inline int | |
18853 | +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
1a6e0f06 | 18854 | +{ |
e4b2b4a8 JK |
18855 | +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH |
18856 | + unsigned tmp; | |
18857 | + | |
18858 | + if (ctx->deadlock_inject_countdown-- == 0) { | |
18859 | + tmp = ctx->deadlock_inject_interval; | |
18860 | + if (tmp > UINT_MAX/4) | |
18861 | + tmp = UINT_MAX; | |
18862 | + else | |
18863 | + tmp = tmp*2 + tmp + tmp/2; | |
18864 | + | |
18865 | + ctx->deadlock_inject_interval = tmp; | |
18866 | + ctx->deadlock_inject_countdown = tmp; | |
18867 | + ctx->contending_lock = lock; | |
18868 | + | |
18869 | + ww_mutex_unlock(lock); | |
18870 | + | |
18871 | + return -EDEADLK; | |
18872 | + } | |
1a6e0f06 JK |
18873 | +#endif |
18874 | + | |
e4b2b4a8 | 18875 | + return 0; |
1a6e0f06 JK |
18876 | +} |
18877 | + | |
e4b2b4a8 JK |
18878 | +#ifdef CONFIG_PREEMPT_RT_FULL |
18879 | +int __sched | |
18880 | +ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
1a6e0f06 | 18881 | +{ |
e4b2b4a8 | 18882 | + int ret; |
1a6e0f06 | 18883 | + |
e4b2b4a8 | 18884 | + might_sleep(); |
1a6e0f06 | 18885 | + |
e4b2b4a8 JK |
18886 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, |
18887 | + ctx ? &ctx->dep_map : NULL, _RET_IP_); | |
18888 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, | |
18889 | + ctx); | |
18890 | + if (ret) | |
18891 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
18892 | + else if (!ret && ctx && ctx->acquired > 1) | |
18893 | + return ww_mutex_deadlock_injection(lock, ctx); | |
1a6e0f06 | 18894 | + |
e4b2b4a8 | 18895 | + return ret; |
1a6e0f06 | 18896 | +} |
e4b2b4a8 | 18897 | +EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible); |
1a6e0f06 | 18898 | + |
e4b2b4a8 JK |
18899 | +int __sched |
18900 | +ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
1a6e0f06 | 18901 | +{ |
e4b2b4a8 | 18902 | + int ret; |
1a6e0f06 | 18903 | + |
e4b2b4a8 | 18904 | + might_sleep(); |
1a6e0f06 | 18905 | + |
e4b2b4a8 JK |
18906 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, |
18907 | + ctx ? &ctx->dep_map : NULL, _RET_IP_); | |
18908 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, | |
18909 | + ctx); | |
18910 | + if (ret) | |
18911 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
18912 | + else if (!ret && ctx && ctx->acquired > 1) | |
18913 | + return ww_mutex_deadlock_injection(lock, ctx); | |
18914 | + | |
18915 | + return ret; | |
1a6e0f06 | 18916 | +} |
e4b2b4a8 | 18917 | +EXPORT_SYMBOL_GPL(ww_mutex_lock); |
1a6e0f06 | 18918 | + |
e4b2b4a8 | 18919 | +void __sched ww_mutex_unlock(struct ww_mutex *lock) |
1a6e0f06 | 18920 | +{ |
e4b2b4a8 | 18921 | + int nest = !!lock->ctx; |
1a6e0f06 | 18922 | + |
e4b2b4a8 JK |
18923 | + /* |
18924 | + * The unlocking fastpath is the 0->1 transition from 'locked' | |
18925 | + * into 'unlocked' state: | |
18926 | + */ | |
18927 | + if (nest) { | |
18928 | +#ifdef CONFIG_DEBUG_MUTEXES | |
18929 | + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | |
1a6e0f06 | 18930 | +#endif |
e4b2b4a8 JK |
18931 | + if (lock->ctx->acquired > 0) |
18932 | + lock->ctx->acquired--; | |
18933 | + lock->ctx = NULL; | |
18934 | + } | |
18935 | + | |
18936 | + mutex_release(&lock->base.dep_map, nest, _RET_IP_); | |
18937 | + __rt_mutex_unlock(&lock->base.lock); | |
1a6e0f06 | 18938 | +} |
e4b2b4a8 | 18939 | +EXPORT_SYMBOL(ww_mutex_unlock); |
1a6e0f06 | 18940 | + |
e4b2b4a8 | 18941 | +int __rt_mutex_owner_current(struct rt_mutex *lock) |
1a6e0f06 | 18942 | +{ |
e4b2b4a8 | 18943 | + return rt_mutex_owner(lock) == current; |
1a6e0f06 | 18944 | +} |
e4b2b4a8 | 18945 | +EXPORT_SYMBOL(__rt_mutex_owner_current); |
1a6e0f06 | 18946 | +#endif |
e4b2b4a8 JK |
18947 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/rtmutex_common.h linux-4.14/kernel/locking/rtmutex_common.h |
18948 | --- linux-4.14.orig/kernel/locking/rtmutex_common.h 2018-09-05 11:03:22.000000000 +0200 | |
18949 | +++ linux-4.14/kernel/locking/rtmutex_common.h 2018-09-05 11:05:07.000000000 +0200 | |
18950 | @@ -15,6 +15,7 @@ | |
1a6e0f06 | 18951 | |
e4b2b4a8 JK |
18952 | #include <linux/rtmutex.h> |
18953 | #include <linux/sched/wake_q.h> | |
18954 | +#include <linux/sched/debug.h> | |
1a6e0f06 | 18955 | |
e4b2b4a8 JK |
18956 | /* |
18957 | * This is the control structure for tasks blocked on a rt_mutex, | |
18958 | @@ -29,6 +30,7 @@ | |
18959 | struct rb_node pi_tree_entry; | |
18960 | struct task_struct *task; | |
18961 | struct rt_mutex *lock; | |
18962 | + bool savestate; | |
18963 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
18964 | unsigned long ip; | |
18965 | struct pid *deadlock_task_pid; | |
18966 | @@ -129,12 +131,15 @@ | |
18967 | /* | |
18968 | * PI-futex support (proxy locking functions, etc.): | |
18969 | */ | |
18970 | +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) | |
18971 | +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) | |
18972 | + | |
18973 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | |
18974 | extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
18975 | struct task_struct *proxy_owner); | |
18976 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, | |
18977 | struct task_struct *proxy_owner); | |
18978 | -extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); | |
18979 | +extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); | |
18980 | extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
18981 | struct rt_mutex_waiter *waiter, | |
18982 | struct task_struct *task); | |
18983 | @@ -152,9 +157,27 @@ | |
18984 | ||
18985 | extern void rt_mutex_futex_unlock(struct rt_mutex *lock); | |
18986 | extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, | |
18987 | - struct wake_q_head *wqh); | |
18988 | + struct wake_q_head *wqh, | |
18989 | + struct wake_q_head *wq_sleeper); | |
18990 | ||
18991 | -extern void rt_mutex_postunlock(struct wake_q_head *wake_q); | |
18992 | +extern void rt_mutex_postunlock(struct wake_q_head *wake_q, | |
18993 | + struct wake_q_head *wake_sleeper_q); | |
18994 | + | |
18995 | +/* RW semaphore special interface */ | |
18996 | +struct ww_acquire_ctx; | |
18997 | + | |
18998 | +extern int __rt_mutex_lock_state(struct rt_mutex *lock, int state); | |
18999 | +extern int __rt_mutex_trylock(struct rt_mutex *lock); | |
19000 | +extern void __rt_mutex_unlock(struct rt_mutex *lock); | |
19001 | +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, | |
19002 | + struct hrtimer_sleeper *timeout, | |
19003 | + enum rtmutex_chainwalk chwalk, | |
19004 | + struct ww_acquire_ctx *ww_ctx, | |
19005 | + struct rt_mutex_waiter *waiter); | |
19006 | +void __sched rt_spin_lock_slowlock_locked(struct rt_mutex *lock, | |
19007 | + struct rt_mutex_waiter *waiter, | |
19008 | + unsigned long flags); | |
19009 | +void __sched rt_spin_lock_slowunlock(struct rt_mutex *lock); | |
1a6e0f06 | 19010 | |
e4b2b4a8 JK |
19011 | #ifdef CONFIG_DEBUG_RT_MUTEXES |
19012 | # include "rtmutex-debug.h" | |
19013 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/rwlock-rt.c linux-4.14/kernel/locking/rwlock-rt.c | |
19014 | --- linux-4.14.orig/kernel/locking/rwlock-rt.c 1970-01-01 01:00:00.000000000 +0100 | |
19015 | +++ linux-4.14/kernel/locking/rwlock-rt.c 2018-09-05 11:05:07.000000000 +0200 | |
19016 | @@ -0,0 +1,378 @@ | |
19017 | +/* | |
19018 | + */ | |
19019 | +#include <linux/sched/debug.h> | |
19020 | +#include <linux/export.h> | |
19021 | + | |
19022 | +#include "rtmutex_common.h" | |
19023 | +#include <linux/rwlock_types_rt.h> | |
19024 | + | |
19025 | +/* | |
19026 | + * RT-specific reader/writer locks | |
19027 | + * | |
19028 | + * write_lock() | |
19029 | + * 1) Lock lock->rtmutex | |
19030 | + * 2) Remove the reader BIAS to force readers into the slow path | |
19031 | + * 3) Wait until all readers have left the critical region | |
19032 | + * 4) Mark it write locked | |
19033 | + * | |
19034 | + * write_unlock() | |
19035 | + * 1) Remove the write locked marker | |
19036 | + * 2) Set the reader BIAS so readers can use the fast path again | |
19037 | + * 3) Unlock lock->rtmutex to release blocked readers | |
19038 | + * | |
19039 | + * read_lock() | |
19040 | + * 1) Try fast path acquisition (reader BIAS is set) | |
19041 | + * 2) Take lock->rtmutex.wait_lock which protects the writelocked flag | |
19042 | + * 3) If !writelocked, acquire it for read | |
19043 | + * 4) If writelocked, block on lock->rtmutex | |
19044 | + * 5) unlock lock->rtmutex, goto 1) | |
19045 | + * | |
19046 | + * read_unlock() | |
19047 | + * 1) Try fast path release (reader count != 1) | |
19048 | + * 2) Wake the writer waiting in write_lock()#3 | |
19049 | + * | |
19050 | + * read_lock()#3 has the consequence, that rw locks on RT are not writer | |
19051 | + * fair, but writers, which should be avoided in RT tasks (think tasklist | |
19052 | + * lock), are subject to the rtmutex priority/DL inheritance mechanism. | |
19053 | + * | |
19054 | + * It's possible to make the rw locks writer fair by keeping a list of | |
19055 | + * active readers. A blocked writer would force all newly incoming readers | |
19056 | + * to block on the rtmutex, but the rtmutex would have to be proxy locked | |
19057 | + * for one reader after the other. We can't use multi-reader inheritance | |
19058 | + * because there is no way to support that with | |
19059 | + * SCHED_DEADLINE. Implementing the one by one reader boosting/handover | |
19060 | + * mechanism is a major surgery for a very dubious value. | |
19061 | + * | |
19062 | + * The risk of writer starvation is there, but the pathological use cases | |
19063 | + * which trigger it are not necessarily the typical RT workloads. | |
19064 | + */ | |
19065 | + | |
19066 | +void __rwlock_biased_rt_init(struct rt_rw_lock *lock, const char *name, | |
19067 | + struct lock_class_key *key) | |
1a6e0f06 | 19068 | +{ |
e4b2b4a8 JK |
19069 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
19070 | + /* | |
19071 | + * Make sure we are not reinitializing a held semaphore: | |
19072 | + */ | |
19073 | + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | |
19074 | + lockdep_init_map(&lock->dep_map, name, key, 0); | |
1a6e0f06 | 19075 | +#endif |
e4b2b4a8 JK |
19076 | + atomic_set(&lock->readers, READER_BIAS); |
19077 | + rt_mutex_init(&lock->rtmutex); | |
19078 | + lock->rtmutex.save_state = 1; | |
1a6e0f06 JK |
19079 | +} |
19080 | + | |
e4b2b4a8 | 19081 | +int __read_rt_trylock(struct rt_rw_lock *lock) |
1a6e0f06 | 19082 | +{ |
e4b2b4a8 | 19083 | + int r, old; |
1a6e0f06 | 19084 | + |
e4b2b4a8 JK |
19085 | + /* |
19086 | + * Increment reader count, if lock->readers < 0, i.e. READER_BIAS is | |
19087 | + * set. | |
19088 | + */ | |
19089 | + for (r = atomic_read(&lock->readers); r < 0;) { | |
19090 | + old = atomic_cmpxchg(&lock->readers, r, r + 1); | |
19091 | + if (likely(old == r)) | |
19092 | + return 1; | |
19093 | + r = old; | |
19094 | + } | |
19095 | + return 0; | |
1a6e0f06 JK |
19096 | +} |
19097 | + | |
e4b2b4a8 | 19098 | +void __sched __read_rt_lock(struct rt_rw_lock *lock) |
1a6e0f06 | 19099 | +{ |
e4b2b4a8 JK |
19100 | + struct rt_mutex *m = &lock->rtmutex; |
19101 | + struct rt_mutex_waiter waiter; | |
19102 | + unsigned long flags; | |
1a6e0f06 | 19103 | + |
e4b2b4a8 JK |
19104 | + if (__read_rt_trylock(lock)) |
19105 | + return; | |
19106 | + | |
19107 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
19108 | + /* | |
19109 | + * Allow readers as long as the writer has not completely | |
19110 | + * acquired the semaphore for write. | |
19111 | + */ | |
19112 | + if (atomic_read(&lock->readers) != WRITER_BIAS) { | |
19113 | + atomic_inc(&lock->readers); | |
19114 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19115 | + return; | |
19116 | + } | |
19117 | + | |
19118 | + /* | |
19119 | + * Call into the slow lock path with the rtmutex->wait_lock | |
19120 | + * held, so this can't result in the following race: | |
19121 | + * | |
19122 | + * Reader1 Reader2 Writer | |
19123 | + * read_lock() | |
19124 | + * write_lock() | |
19125 | + * rtmutex_lock(m) | |
19126 | + * swait() | |
19127 | + * read_lock() | |
19128 | + * unlock(m->wait_lock) | |
19129 | + * read_unlock() | |
19130 | + * swake() | |
19131 | + * lock(m->wait_lock) | |
19132 | + * lock->writelocked=true | |
19133 | + * unlock(m->wait_lock) | |
19134 | + * | |
19135 | + * write_unlock() | |
19136 | + * lock->writelocked=false | |
19137 | + * rtmutex_unlock(m) | |
19138 | + * read_lock() | |
19139 | + * write_lock() | |
19140 | + * rtmutex_lock(m) | |
19141 | + * swait() | |
19142 | + * rtmutex_lock(m) | |
19143 | + * | |
19144 | + * That would put Reader1 behind the writer waiting on | |
19145 | + * Reader2 to call read_unlock() which might be unbound. | |
19146 | + */ | |
19147 | + rt_mutex_init_waiter(&waiter, false); | |
19148 | + rt_spin_lock_slowlock_locked(m, &waiter, flags); | |
19149 | + /* | |
19150 | + * The slowlock() above is guaranteed to return with the rtmutex is | |
19151 | + * now held, so there can't be a writer active. Increment the reader | |
19152 | + * count and immediately drop the rtmutex again. | |
19153 | + */ | |
19154 | + atomic_inc(&lock->readers); | |
19155 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19156 | + rt_spin_lock_slowunlock(m); | |
19157 | + | |
19158 | + debug_rt_mutex_free_waiter(&waiter); | |
1a6e0f06 JK |
19159 | +} |
19160 | + | |
e4b2b4a8 | 19161 | +void __read_rt_unlock(struct rt_rw_lock *lock) |
1a6e0f06 | 19162 | +{ |
e4b2b4a8 JK |
19163 | + struct rt_mutex *m = &lock->rtmutex; |
19164 | + struct task_struct *tsk; | |
19165 | + | |
19166 | + /* | |
19167 | + * sem->readers can only hit 0 when a writer is waiting for the | |
19168 | + * active readers to leave the critical region. | |
19169 | + */ | |
19170 | + if (!atomic_dec_and_test(&lock->readers)) | |
19171 | + return; | |
19172 | + | |
19173 | + raw_spin_lock_irq(&m->wait_lock); | |
19174 | + /* | |
19175 | + * Wake the writer, i.e. the rtmutex owner. It might release the | |
19176 | + * rtmutex concurrently in the fast path, but to clean up the rw | |
19177 | + * lock it needs to acquire m->wait_lock. The worst case which can | |
19178 | + * happen is a spurious wakeup. | |
19179 | + */ | |
19180 | + tsk = rt_mutex_owner(m); | |
19181 | + if (tsk) | |
19182 | + wake_up_process(tsk); | |
19183 | + | |
19184 | + raw_spin_unlock_irq(&m->wait_lock); | |
1a6e0f06 JK |
19185 | +} |
19186 | + | |
e4b2b4a8 JK |
19187 | +static void __write_unlock_common(struct rt_rw_lock *lock, int bias, |
19188 | + unsigned long flags) | |
1a6e0f06 | 19189 | +{ |
e4b2b4a8 | 19190 | + struct rt_mutex *m = &lock->rtmutex; |
1a6e0f06 | 19191 | + |
e4b2b4a8 JK |
19192 | + atomic_add(READER_BIAS - bias, &lock->readers); |
19193 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19194 | + rt_spin_lock_slowunlock(m); | |
1a6e0f06 JK |
19195 | +} |
19196 | + | |
e4b2b4a8 | 19197 | +void __sched __write_rt_lock(struct rt_rw_lock *lock) |
1a6e0f06 | 19198 | +{ |
e4b2b4a8 JK |
19199 | + struct rt_mutex *m = &lock->rtmutex; |
19200 | + struct task_struct *self = current; | |
19201 | + unsigned long flags; | |
19202 | + | |
19203 | + /* Take the rtmutex as a first step */ | |
19204 | + __rt_spin_lock(m); | |
19205 | + | |
19206 | + /* Force readers into slow path */ | |
19207 | + atomic_sub(READER_BIAS, &lock->readers); | |
19208 | + | |
19209 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
19210 | + | |
19211 | + raw_spin_lock(&self->pi_lock); | |
19212 | + self->saved_state = self->state; | |
19213 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
19214 | + raw_spin_unlock(&self->pi_lock); | |
19215 | + | |
19216 | + for (;;) { | |
19217 | + /* Have all readers left the critical region? */ | |
19218 | + if (!atomic_read(&lock->readers)) { | |
19219 | + atomic_set(&lock->readers, WRITER_BIAS); | |
19220 | + raw_spin_lock(&self->pi_lock); | |
19221 | + __set_current_state_no_track(self->saved_state); | |
19222 | + self->saved_state = TASK_RUNNING; | |
19223 | + raw_spin_unlock(&self->pi_lock); | |
19224 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19225 | + return; | |
19226 | + } | |
19227 | + | |
19228 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19229 | + | |
19230 | + if (atomic_read(&lock->readers) != 0) | |
19231 | + schedule(); | |
19232 | + | |
19233 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
19234 | + | |
19235 | + raw_spin_lock(&self->pi_lock); | |
19236 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
19237 | + raw_spin_unlock(&self->pi_lock); | |
19238 | + } | |
19239 | +} | |
19240 | + | |
19241 | +int __write_rt_trylock(struct rt_rw_lock *lock) | |
1a6e0f06 | 19242 | +{ |
e4b2b4a8 JK |
19243 | + struct rt_mutex *m = &lock->rtmutex; |
19244 | + unsigned long flags; | |
19245 | + | |
19246 | + if (!__rt_mutex_trylock(m)) | |
19247 | + return 0; | |
19248 | + | |
19249 | + atomic_sub(READER_BIAS, &lock->readers); | |
19250 | + | |
19251 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
19252 | + if (!atomic_read(&lock->readers)) { | |
19253 | + atomic_set(&lock->readers, WRITER_BIAS); | |
19254 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19255 | + return 1; | |
19256 | + } | |
19257 | + __write_unlock_common(lock, 0, flags); | |
19258 | + return 0; | |
1a6e0f06 JK |
19259 | +} |
19260 | + | |
e4b2b4a8 JK |
19261 | +void __write_rt_unlock(struct rt_rw_lock *lock) |
19262 | +{ | |
19263 | + struct rt_mutex *m = &lock->rtmutex; | |
19264 | + unsigned long flags; | |
1a6e0f06 | 19265 | + |
e4b2b4a8 JK |
19266 | + raw_spin_lock_irqsave(&m->wait_lock, flags); |
19267 | + __write_unlock_common(lock, WRITER_BIAS, flags); | |
19268 | +} | |
1a6e0f06 | 19269 | + |
e4b2b4a8 JK |
19270 | +/* Map the reader biased implementation */ |
19271 | +static inline int do_read_rt_trylock(rwlock_t *rwlock) | |
1a6e0f06 | 19272 | +{ |
e4b2b4a8 | 19273 | + return __read_rt_trylock(rwlock); |
1a6e0f06 JK |
19274 | +} |
19275 | + | |
e4b2b4a8 JK |
19276 | +static inline int do_write_rt_trylock(rwlock_t *rwlock) |
19277 | +{ | |
19278 | + return __write_rt_trylock(rwlock); | |
19279 | +} | |
1a6e0f06 | 19280 | + |
e4b2b4a8 JK |
19281 | +static inline void do_read_rt_lock(rwlock_t *rwlock) |
19282 | +{ | |
19283 | + __read_rt_lock(rwlock); | |
19284 | +} | |
1a6e0f06 | 19285 | + |
e4b2b4a8 JK |
19286 | +static inline void do_write_rt_lock(rwlock_t *rwlock) |
19287 | +{ | |
19288 | + __write_rt_lock(rwlock); | |
19289 | +} | |
1a6e0f06 | 19290 | + |
e4b2b4a8 JK |
19291 | +static inline void do_read_rt_unlock(rwlock_t *rwlock) |
19292 | +{ | |
19293 | + __read_rt_unlock(rwlock); | |
19294 | +} | |
1a6e0f06 | 19295 | + |
e4b2b4a8 JK |
19296 | +static inline void do_write_rt_unlock(rwlock_t *rwlock) |
19297 | +{ | |
19298 | + __write_rt_unlock(rwlock); | |
19299 | +} | |
1a6e0f06 | 19300 | + |
e4b2b4a8 JK |
19301 | +static inline void do_rwlock_rt_init(rwlock_t *rwlock, const char *name, |
19302 | + struct lock_class_key *key) | |
19303 | +{ | |
19304 | + __rwlock_biased_rt_init(rwlock, name, key); | |
19305 | +} | |
1a6e0f06 | 19306 | + |
e4b2b4a8 JK |
19307 | +int __lockfunc rt_read_can_lock(rwlock_t *rwlock) |
19308 | +{ | |
19309 | + return atomic_read(&rwlock->readers) < 0; | |
19310 | +} | |
1a6e0f06 | 19311 | + |
e4b2b4a8 JK |
19312 | +int __lockfunc rt_write_can_lock(rwlock_t *rwlock) |
19313 | +{ | |
19314 | + return atomic_read(&rwlock->readers) == READER_BIAS; | |
19315 | +} | |
1a6e0f06 JK |
19316 | + |
19317 | +/* | |
e4b2b4a8 | 19318 | + * The common functions which get wrapped into the rwlock API. |
1a6e0f06 | 19319 | + */ |
e4b2b4a8 JK |
19320 | +int __lockfunc rt_read_trylock(rwlock_t *rwlock) |
19321 | +{ | |
19322 | + int ret; | |
1a6e0f06 | 19323 | + |
e4b2b4a8 JK |
19324 | + sleeping_lock_inc(); |
19325 | + migrate_disable(); | |
19326 | + ret = do_read_rt_trylock(rwlock); | |
19327 | + if (ret) { | |
19328 | + rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_); | |
19329 | + } else { | |
19330 | + migrate_enable(); | |
19331 | + sleeping_lock_dec(); | |
19332 | + } | |
19333 | + return ret; | |
19334 | +} | |
19335 | +EXPORT_SYMBOL(rt_read_trylock); | |
1a6e0f06 | 19336 | + |
e4b2b4a8 JK |
19337 | +int __lockfunc rt_write_trylock(rwlock_t *rwlock) |
19338 | +{ | |
19339 | + int ret; | |
1a6e0f06 | 19340 | + |
e4b2b4a8 JK |
19341 | + sleeping_lock_inc(); |
19342 | + migrate_disable(); | |
19343 | + ret = do_write_rt_trylock(rwlock); | |
19344 | + if (ret) { | |
19345 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
19346 | + } else { | |
19347 | + migrate_enable(); | |
19348 | + sleeping_lock_dec(); | |
19349 | + } | |
19350 | + return ret; | |
19351 | +} | |
19352 | +EXPORT_SYMBOL(rt_write_trylock); | |
1a6e0f06 | 19353 | + |
e4b2b4a8 JK |
19354 | +void __lockfunc rt_read_lock(rwlock_t *rwlock) |
19355 | +{ | |
19356 | + sleeping_lock_inc(); | |
19357 | + migrate_disable(); | |
19358 | + rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); | |
19359 | + do_read_rt_lock(rwlock); | |
19360 | +} | |
19361 | +EXPORT_SYMBOL(rt_read_lock); | |
1a6e0f06 | 19362 | + |
e4b2b4a8 JK |
19363 | +void __lockfunc rt_write_lock(rwlock_t *rwlock) |
19364 | +{ | |
19365 | + sleeping_lock_inc(); | |
19366 | + migrate_disable(); | |
19367 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
19368 | + do_write_rt_lock(rwlock); | |
19369 | +} | |
19370 | +EXPORT_SYMBOL(rt_write_lock); | |
1a6e0f06 | 19371 | + |
e4b2b4a8 JK |
19372 | +void __lockfunc rt_read_unlock(rwlock_t *rwlock) |
19373 | +{ | |
19374 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
19375 | + do_read_rt_unlock(rwlock); | |
19376 | + migrate_enable(); | |
19377 | + sleeping_lock_dec(); | |
19378 | +} | |
19379 | +EXPORT_SYMBOL(rt_read_unlock); | |
1a6e0f06 | 19380 | + |
e4b2b4a8 JK |
19381 | +void __lockfunc rt_write_unlock(rwlock_t *rwlock) |
19382 | +{ | |
19383 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
19384 | + do_write_rt_unlock(rwlock); | |
19385 | + migrate_enable(); | |
19386 | + sleeping_lock_dec(); | |
19387 | +} | |
19388 | +EXPORT_SYMBOL(rt_write_unlock); | |
1a6e0f06 | 19389 | + |
e4b2b4a8 JK |
19390 | +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) |
19391 | +{ | |
19392 | + do_rwlock_rt_init(rwlock, name, key); | |
19393 | +} | |
19394 | +EXPORT_SYMBOL(__rt_rwlock_init); | |
19395 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/rwsem-rt.c linux-4.14/kernel/locking/rwsem-rt.c | |
19396 | --- linux-4.14.orig/kernel/locking/rwsem-rt.c 1970-01-01 01:00:00.000000000 +0100 | |
19397 | +++ linux-4.14/kernel/locking/rwsem-rt.c 2018-09-05 11:05:07.000000000 +0200 | |
19398 | @@ -0,0 +1,269 @@ | |
19399 | +/* | |
19400 | + */ | |
19401 | +#include <linux/rwsem.h> | |
19402 | +#include <linux/sched/debug.h> | |
19403 | +#include <linux/sched/signal.h> | |
19404 | +#include <linux/export.h> | |
1a6e0f06 | 19405 | + |
e4b2b4a8 JK |
19406 | +#include "rtmutex_common.h" |
19407 | + | |
19408 | +/* | |
19409 | + * RT-specific reader/writer semaphores | |
19410 | + * | |
19411 | + * down_write() | |
19412 | + * 1) Lock sem->rtmutex | |
19413 | + * 2) Remove the reader BIAS to force readers into the slow path | |
19414 | + * 3) Wait until all readers have left the critical region | |
19415 | + * 4) Mark it write locked | |
19416 | + * | |
19417 | + * up_write() | |
19418 | + * 1) Remove the write locked marker | |
19419 | + * 2) Set the reader BIAS so readers can use the fast path again | |
19420 | + * 3) Unlock sem->rtmutex to release blocked readers | |
19421 | + * | |
19422 | + * down_read() | |
19423 | + * 1) Try fast path acquisition (reader BIAS is set) | |
19424 | + * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag | |
19425 | + * 3) If !writelocked, acquire it for read | |
19426 | + * 4) If writelocked, block on sem->rtmutex | |
19427 | + * 5) unlock sem->rtmutex, goto 1) | |
19428 | + * | |
19429 | + * up_read() | |
19430 | + * 1) Try fast path release (reader count != 1) | |
19431 | + * 2) Wake the writer waiting in down_write()#3 | |
19432 | + * | |
19433 | + * down_read()#3 has the consequence, that rw semaphores on RT are not writer | |
19434 | + * fair, but writers, which should be avoided in RT tasks (think mmap_sem), | |
19435 | + * are subject to the rtmutex priority/DL inheritance mechanism. | |
19436 | + * | |
19437 | + * It's possible to make the rw semaphores writer fair by keeping a list of | |
19438 | + * active readers. A blocked writer would force all newly incoming readers to | |
19439 | + * block on the rtmutex, but the rtmutex would have to be proxy locked for one | |
19440 | + * reader after the other. We can't use multi-reader inheritance because there | |
19441 | + * is no way to support that with SCHED_DEADLINE. Implementing the one by one | |
19442 | + * reader boosting/handover mechanism is a major surgery for a very dubious | |
19443 | + * value. | |
19444 | + * | |
19445 | + * The risk of writer starvation is there, but the pathological use cases | |
19446 | + * which trigger it are not necessarily the typical RT workloads. | |
19447 | + */ | |
19448 | + | |
19449 | +void __rwsem_init(struct rw_semaphore *sem, const char *name, | |
19450 | + struct lock_class_key *key) | |
19451 | +{ | |
19452 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
19453 | + /* | |
19454 | + * Make sure we are not reinitializing a held semaphore: | |
19455 | + */ | |
19456 | + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
19457 | + lockdep_init_map(&sem->dep_map, name, key, 0); | |
1a6e0f06 | 19458 | +#endif |
e4b2b4a8 JK |
19459 | + atomic_set(&sem->readers, READER_BIAS); |
19460 | +} | |
19461 | +EXPORT_SYMBOL(__rwsem_init); | |
19462 | + | |
19463 | +int __down_read_trylock(struct rw_semaphore *sem) | |
19464 | +{ | |
19465 | + int r, old; | |
19466 | + | |
19467 | + /* | |
19468 | + * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is | |
19469 | + * set. | |
19470 | + */ | |
19471 | + for (r = atomic_read(&sem->readers); r < 0;) { | |
19472 | + old = atomic_cmpxchg(&sem->readers, r, r + 1); | |
19473 | + if (likely(old == r)) | |
19474 | + return 1; | |
19475 | + r = old; | |
19476 | + } | |
19477 | + return 0; | |
19478 | +} | |
19479 | + | |
19480 | +void __sched __down_read(struct rw_semaphore *sem) | |
19481 | +{ | |
19482 | + struct rt_mutex *m = &sem->rtmutex; | |
19483 | + struct rt_mutex_waiter waiter; | |
19484 | + | |
19485 | + if (__down_read_trylock(sem)) | |
19486 | + return; | |
19487 | + | |
19488 | + might_sleep(); | |
19489 | + raw_spin_lock_irq(&m->wait_lock); | |
19490 | + /* | |
19491 | + * Allow readers as long as the writer has not completely | |
19492 | + * acquired the semaphore for write. | |
19493 | + */ | |
19494 | + if (atomic_read(&sem->readers) != WRITER_BIAS) { | |
19495 | + atomic_inc(&sem->readers); | |
19496 | + raw_spin_unlock_irq(&m->wait_lock); | |
19497 | + return; | |
19498 | + } | |
1a6e0f06 | 19499 | + |
e4b2b4a8 JK |
19500 | + /* |
19501 | + * Call into the slow lock path with the rtmutex->wait_lock | |
19502 | + * held, so this can't result in the following race: | |
19503 | + * | |
19504 | + * Reader1 Reader2 Writer | |
19505 | + * down_read() | |
19506 | + * down_write() | |
19507 | + * rtmutex_lock(m) | |
19508 | + * swait() | |
19509 | + * down_read() | |
19510 | + * unlock(m->wait_lock) | |
19511 | + * up_read() | |
19512 | + * swake() | |
19513 | + * lock(m->wait_lock) | |
19514 | + * sem->writelocked=true | |
19515 | + * unlock(m->wait_lock) | |
19516 | + * | |
19517 | + * up_write() | |
19518 | + * sem->writelocked=false | |
19519 | + * rtmutex_unlock(m) | |
19520 | + * down_read() | |
19521 | + * down_write() | |
19522 | + * rtmutex_lock(m) | |
19523 | + * swait() | |
19524 | + * rtmutex_lock(m) | |
19525 | + * | |
19526 | + * That would put Reader1 behind the writer waiting on | |
19527 | + * Reader2 to call up_read() which might be unbound. | |
19528 | + */ | |
19529 | + rt_mutex_init_waiter(&waiter, false); | |
19530 | + rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL, | |
19531 | + RT_MUTEX_MIN_CHAINWALK, NULL, | |
19532 | + &waiter); | |
19533 | + /* | |
19534 | + * The slowlock() above is guaranteed to return with the rtmutex is | |
19535 | + * now held, so there can't be a writer active. Increment the reader | |
19536 | + * count and immediately drop the rtmutex again. | |
19537 | + */ | |
19538 | + atomic_inc(&sem->readers); | |
19539 | + raw_spin_unlock_irq(&m->wait_lock); | |
19540 | + __rt_mutex_unlock(m); | |
1a6e0f06 | 19541 | + |
e4b2b4a8 JK |
19542 | + debug_rt_mutex_free_waiter(&waiter); |
19543 | +} | |
19544 | + | |
19545 | +void __up_read(struct rw_semaphore *sem) | |
1a6e0f06 | 19546 | +{ |
e4b2b4a8 JK |
19547 | + struct rt_mutex *m = &sem->rtmutex; |
19548 | + struct task_struct *tsk; | |
19549 | + | |
19550 | + /* | |
19551 | + * sem->readers can only hit 0 when a writer is waiting for the | |
19552 | + * active readers to leave the critical region. | |
19553 | + */ | |
19554 | + if (!atomic_dec_and_test(&sem->readers)) | |
19555 | + return; | |
19556 | + | |
19557 | + might_sleep(); | |
19558 | + raw_spin_lock_irq(&m->wait_lock); | |
19559 | + /* | |
19560 | + * Wake the writer, i.e. the rtmutex owner. It might release the | |
19561 | + * rtmutex concurrently in the fast path (due to a signal), but to | |
19562 | + * clean up the rwsem it needs to acquire m->wait_lock. The worst | |
19563 | + * case which can happen is a spurious wakeup. | |
19564 | + */ | |
19565 | + tsk = rt_mutex_owner(m); | |
19566 | + if (tsk) | |
19567 | + wake_up_process(tsk); | |
19568 | + | |
19569 | + raw_spin_unlock_irq(&m->wait_lock); | |
1a6e0f06 JK |
19570 | +} |
19571 | + | |
e4b2b4a8 JK |
19572 | +static void __up_write_unlock(struct rw_semaphore *sem, int bias, |
19573 | + unsigned long flags) | |
19574 | +{ | |
19575 | + struct rt_mutex *m = &sem->rtmutex; | |
1a6e0f06 | 19576 | + |
e4b2b4a8 JK |
19577 | + atomic_add(READER_BIAS - bias, &sem->readers); |
19578 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19579 | + __rt_mutex_unlock(m); | |
19580 | +} | |
1a6e0f06 | 19581 | + |
e4b2b4a8 JK |
19582 | +static int __sched __down_write_common(struct rw_semaphore *sem, int state) |
19583 | +{ | |
19584 | + struct rt_mutex *m = &sem->rtmutex; | |
19585 | + unsigned long flags; | |
1a6e0f06 | 19586 | + |
e4b2b4a8 JK |
19587 | + /* Take the rtmutex as a first step */ |
19588 | + if (__rt_mutex_lock_state(m, state)) | |
19589 | + return -EINTR; | |
1a6e0f06 | 19590 | + |
e4b2b4a8 JK |
19591 | + /* Force readers into slow path */ |
19592 | + atomic_sub(READER_BIAS, &sem->readers); | |
19593 | + might_sleep(); | |
1a6e0f06 | 19594 | + |
e4b2b4a8 JK |
19595 | + set_current_state(state); |
19596 | + for (;;) { | |
19597 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
19598 | + /* Have all readers left the critical region? */ | |
19599 | + if (!atomic_read(&sem->readers)) { | |
19600 | + atomic_set(&sem->readers, WRITER_BIAS); | |
19601 | + __set_current_state(TASK_RUNNING); | |
19602 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19603 | + return 0; | |
19604 | + } | |
1a6e0f06 | 19605 | + |
e4b2b4a8 JK |
19606 | + if (signal_pending_state(state, current)) { |
19607 | + __set_current_state(TASK_RUNNING); | |
19608 | + __up_write_unlock(sem, 0, flags); | |
19609 | + return -EINTR; | |
19610 | + } | |
19611 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
1a6e0f06 | 19612 | + |
e4b2b4a8 JK |
19613 | + if (atomic_read(&sem->readers) != 0) { |
19614 | + schedule(); | |
19615 | + set_current_state(state); | |
19616 | + } | |
19617 | + } | |
19618 | +} | |
1a6e0f06 | 19619 | + |
e4b2b4a8 JK |
19620 | +void __sched __down_write(struct rw_semaphore *sem) |
19621 | +{ | |
19622 | + __down_write_common(sem, TASK_UNINTERRUPTIBLE); | |
19623 | +} | |
1a6e0f06 | 19624 | + |
e4b2b4a8 | 19625 | +int __sched __down_write_killable(struct rw_semaphore *sem) |
1a6e0f06 | 19626 | +{ |
e4b2b4a8 | 19627 | + return __down_write_common(sem, TASK_KILLABLE); |
1a6e0f06 JK |
19628 | +} |
19629 | + | |
e4b2b4a8 | 19630 | +int __down_write_trylock(struct rw_semaphore *sem) |
1a6e0f06 | 19631 | +{ |
e4b2b4a8 JK |
19632 | + struct rt_mutex *m = &sem->rtmutex; |
19633 | + unsigned long flags; | |
19634 | + | |
19635 | + if (!__rt_mutex_trylock(m)) | |
19636 | + return 0; | |
19637 | + | |
19638 | + atomic_sub(READER_BIAS, &sem->readers); | |
19639 | + | |
19640 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
19641 | + if (!atomic_read(&sem->readers)) { | |
19642 | + atomic_set(&sem->readers, WRITER_BIAS); | |
19643 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
19644 | + return 1; | |
19645 | + } | |
19646 | + __up_write_unlock(sem, 0, flags); | |
19647 | + return 0; | |
1a6e0f06 JK |
19648 | +} |
19649 | + | |
e4b2b4a8 | 19650 | +void __up_write(struct rw_semaphore *sem) |
1a6e0f06 | 19651 | +{ |
e4b2b4a8 JK |
19652 | + struct rt_mutex *m = &sem->rtmutex; |
19653 | + unsigned long flags; | |
19654 | + | |
19655 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
19656 | + __up_write_unlock(sem, WRITER_BIAS, flags); | |
1a6e0f06 JK |
19657 | +} |
19658 | + | |
e4b2b4a8 JK |
19659 | +void __downgrade_write(struct rw_semaphore *sem) |
19660 | +{ | |
19661 | + struct rt_mutex *m = &sem->rtmutex; | |
19662 | + unsigned long flags; | |
1a6e0f06 | 19663 | + |
e4b2b4a8 JK |
19664 | + raw_spin_lock_irqsave(&m->wait_lock, flags); |
19665 | + /* Release it and account current as reader */ | |
19666 | + __up_write_unlock(sem, WRITER_BIAS - 1, flags); | |
19667 | +} | |
19668 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/spinlock.c linux-4.14/kernel/locking/spinlock.c | |
19669 | --- linux-4.14.orig/kernel/locking/spinlock.c 2017-11-12 19:46:13.000000000 +0100 | |
19670 | +++ linux-4.14/kernel/locking/spinlock.c 2018-09-05 11:05:07.000000000 +0200 | |
19671 | @@ -125,8 +125,11 @@ | |
19672 | * __[spin|read|write]_lock_bh() | |
1a6e0f06 | 19673 | */ |
e4b2b4a8 | 19674 | BUILD_LOCK_OPS(spin, raw_spinlock); |
1a6e0f06 JK |
19675 | + |
19676 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
e4b2b4a8 JK |
19677 | BUILD_LOCK_OPS(read, rwlock); |
19678 | BUILD_LOCK_OPS(write, rwlock); | |
19679 | +#endif | |
19680 | ||
1a6e0f06 JK |
19681 | #endif |
19682 | ||
e4b2b4a8 JK |
19683 | @@ -210,6 +213,8 @@ |
19684 | EXPORT_SYMBOL(_raw_spin_unlock_bh); | |
19685 | #endif | |
19686 | ||
19687 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1a6e0f06 | 19688 | + |
e4b2b4a8 JK |
19689 | #ifndef CONFIG_INLINE_READ_TRYLOCK |
19690 | int __lockfunc _raw_read_trylock(rwlock_t *lock) | |
19691 | { | |
19692 | @@ -354,6 +359,8 @@ | |
19693 | EXPORT_SYMBOL(_raw_write_unlock_bh); | |
19694 | #endif | |
19695 | ||
19696 | +#endif /* !PREEMPT_RT_FULL */ | |
1a6e0f06 | 19697 | + |
e4b2b4a8 JK |
19698 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
19699 | ||
19700 | void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | |
19701 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/locking/spinlock_debug.c linux-4.14/kernel/locking/spinlock_debug.c | |
19702 | --- linux-4.14.orig/kernel/locking/spinlock_debug.c 2017-11-12 19:46:13.000000000 +0100 | |
19703 | +++ linux-4.14/kernel/locking/spinlock_debug.c 2018-09-05 11:05:07.000000000 +0200 | |
19704 | @@ -31,6 +31,7 @@ | |
19705 | ||
19706 | EXPORT_SYMBOL(__raw_spin_lock_init); | |
19707 | ||
19708 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
19709 | void __rwlock_init(rwlock_t *lock, const char *name, | |
19710 | struct lock_class_key *key) | |
19711 | { | |
19712 | @@ -48,6 +49,7 @@ | |
19713 | } | |
19714 | ||
19715 | EXPORT_SYMBOL(__rwlock_init); | |
1a6e0f06 | 19716 | +#endif |
e4b2b4a8 JK |
19717 | |
19718 | static void spin_dump(raw_spinlock_t *lock, const char *msg) | |
19719 | { | |
19720 | @@ -135,6 +137,7 @@ | |
19721 | arch_spin_unlock(&lock->raw_lock); | |
19722 | } | |
19723 | ||
19724 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
19725 | static void rwlock_bug(rwlock_t *lock, const char *msg) | |
19726 | { | |
19727 | if (!debug_locks_off()) | |
19728 | @@ -224,3 +227,5 @@ | |
19729 | debug_write_unlock(lock); | |
19730 | arch_write_unlock(&lock->raw_lock); | |
19731 | } | |
1a6e0f06 JK |
19732 | + |
19733 | +#endif | |
e4b2b4a8 JK |
19734 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/panic.c linux-4.14/kernel/panic.c |
19735 | --- linux-4.14.orig/kernel/panic.c 2017-11-12 19:46:13.000000000 +0100 | |
19736 | +++ linux-4.14/kernel/panic.c 2018-09-05 11:05:07.000000000 +0200 | |
19737 | @@ -482,9 +482,11 @@ | |
19738 | ||
19739 | static int init_oops_id(void) | |
19740 | { | |
19741 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
19742 | if (!oops_id) | |
19743 | get_random_bytes(&oops_id, sizeof(oops_id)); | |
19744 | else | |
1a6e0f06 | 19745 | +#endif |
e4b2b4a8 JK |
19746 | oops_id++; |
19747 | ||
19748 | return 0; | |
19749 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/power/hibernate.c linux-4.14/kernel/power/hibernate.c | |
19750 | --- linux-4.14.orig/kernel/power/hibernate.c 2017-11-12 19:46:13.000000000 +0100 | |
19751 | +++ linux-4.14/kernel/power/hibernate.c 2018-09-05 11:05:07.000000000 +0200 | |
19752 | @@ -287,6 +287,8 @@ | |
19753 | ||
19754 | local_irq_disable(); | |
19755 | ||
19756 | + system_state = SYSTEM_SUSPEND; | |
1a6e0f06 | 19757 | + |
e4b2b4a8 JK |
19758 | error = syscore_suspend(); |
19759 | if (error) { | |
19760 | pr_err("Some system devices failed to power down, aborting hibernation\n"); | |
19761 | @@ -317,6 +319,7 @@ | |
19762 | syscore_resume(); | |
19763 | ||
19764 | Enable_irqs: | |
19765 | + system_state = SYSTEM_RUNNING; | |
19766 | local_irq_enable(); | |
19767 | ||
19768 | Enable_cpus: | |
19769 | @@ -445,6 +448,7 @@ | |
19770 | goto Enable_cpus; | |
19771 | ||
19772 | local_irq_disable(); | |
19773 | + system_state = SYSTEM_SUSPEND; | |
19774 | ||
19775 | error = syscore_suspend(); | |
19776 | if (error) | |
19777 | @@ -478,6 +482,7 @@ | |
19778 | syscore_resume(); | |
19779 | ||
19780 | Enable_irqs: | |
19781 | + system_state = SYSTEM_RUNNING; | |
19782 | local_irq_enable(); | |
19783 | ||
19784 | Enable_cpus: | |
19785 | @@ -563,6 +568,7 @@ | |
19786 | goto Enable_cpus; | |
19787 | ||
19788 | local_irq_disable(); | |
19789 | + system_state = SYSTEM_SUSPEND; | |
19790 | syscore_suspend(); | |
19791 | if (pm_wakeup_pending()) { | |
19792 | error = -EAGAIN; | |
19793 | @@ -575,6 +581,7 @@ | |
19794 | ||
19795 | Power_up: | |
19796 | syscore_resume(); | |
19797 | + system_state = SYSTEM_RUNNING; | |
19798 | local_irq_enable(); | |
19799 | ||
19800 | Enable_cpus: | |
19801 | @@ -672,6 +679,10 @@ | |
19802 | return error; | |
19803 | } | |
19804 | ||
19805 | +#ifndef CONFIG_SUSPEND | |
19806 | +bool pm_in_action; | |
1a6e0f06 | 19807 | +#endif |
1a6e0f06 | 19808 | + |
e4b2b4a8 JK |
19809 | /** |
19810 | * hibernate - Carry out system hibernation, including saving the image. | |
19811 | */ | |
19812 | @@ -685,6 +696,8 @@ | |
19813 | return -EPERM; | |
19814 | } | |
19815 | ||
19816 | + pm_in_action = true; | |
1a6e0f06 | 19817 | + |
e4b2b4a8 JK |
19818 | lock_system_sleep(); |
19819 | /* The snapshot device should not be opened while we're running */ | |
19820 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
19821 | @@ -763,6 +776,7 @@ | |
19822 | atomic_inc(&snapshot_device_available); | |
19823 | Unlock: | |
19824 | unlock_system_sleep(); | |
19825 | + pm_in_action = false; | |
19826 | pr_info("hibernation exit\n"); | |
19827 | ||
19828 | return error; | |
19829 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/power/suspend.c linux-4.14/kernel/power/suspend.c | |
19830 | --- linux-4.14.orig/kernel/power/suspend.c 2018-09-05 11:03:22.000000000 +0200 | |
19831 | +++ linux-4.14/kernel/power/suspend.c 2018-09-05 11:05:07.000000000 +0200 | |
19832 | @@ -428,6 +428,8 @@ | |
19833 | arch_suspend_disable_irqs(); | |
19834 | BUG_ON(!irqs_disabled()); | |
19835 | ||
19836 | + system_state = SYSTEM_SUSPEND; | |
1a6e0f06 | 19837 | + |
e4b2b4a8 JK |
19838 | error = syscore_suspend(); |
19839 | if (!error) { | |
19840 | *wakeup = pm_wakeup_pending(); | |
19841 | @@ -443,6 +445,8 @@ | |
19842 | syscore_resume(); | |
19843 | } | |
19844 | ||
19845 | + system_state = SYSTEM_RUNNING; | |
1a6e0f06 | 19846 | + |
e4b2b4a8 JK |
19847 | arch_suspend_enable_irqs(); |
19848 | BUG_ON(irqs_disabled()); | |
19849 | ||
19850 | @@ -589,6 +593,8 @@ | |
19851 | return error; | |
19852 | } | |
19853 | ||
19854 | +bool pm_in_action; | |
1a6e0f06 | 19855 | + |
e4b2b4a8 JK |
19856 | /** |
19857 | * pm_suspend - Externally visible function for suspending the system. | |
19858 | * @state: System sleep state to enter. | |
19859 | @@ -603,6 +609,7 @@ | |
19860 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) | |
19861 | return -EINVAL; | |
19862 | ||
19863 | + pm_in_action = true; | |
19864 | pr_info("suspend entry (%s)\n", mem_sleep_labels[state]); | |
19865 | error = enter_state(state); | |
19866 | if (error) { | |
19867 | @@ -612,6 +619,7 @@ | |
19868 | suspend_stats.success++; | |
19869 | } | |
19870 | pr_info("suspend exit\n"); | |
19871 | + pm_in_action = false; | |
19872 | return error; | |
19873 | } | |
19874 | EXPORT_SYMBOL(pm_suspend); | |
19875 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/printk/printk.c linux-4.14/kernel/printk/printk.c | |
19876 | --- linux-4.14.orig/kernel/printk/printk.c 2017-11-12 19:46:13.000000000 +0100 | |
19877 | +++ linux-4.14/kernel/printk/printk.c 2018-09-05 11:05:07.000000000 +0200 | |
19878 | @@ -400,6 +400,65 @@ | |
19879 | printk_safe_exit_irqrestore(flags); \ | |
19880 | } while (0) | |
19881 | ||
19882 | +#ifdef CONFIG_EARLY_PRINTK | |
19883 | +struct console *early_console; | |
1a6e0f06 | 19884 | + |
e4b2b4a8 JK |
19885 | +static void early_vprintk(const char *fmt, va_list ap) |
19886 | +{ | |
19887 | + if (early_console) { | |
19888 | + char buf[512]; | |
19889 | + int n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
1a6e0f06 | 19890 | + |
e4b2b4a8 JK |
19891 | + early_console->write(early_console, buf, n); |
19892 | + } | |
19893 | +} | |
1a6e0f06 | 19894 | + |
e4b2b4a8 JK |
19895 | +asmlinkage void early_printk(const char *fmt, ...) |
19896 | +{ | |
19897 | + va_list ap; | |
1a6e0f06 | 19898 | + |
e4b2b4a8 JK |
19899 | + va_start(ap, fmt); |
19900 | + early_vprintk(fmt, ap); | |
19901 | + va_end(ap); | |
19902 | +} | |
1a6e0f06 JK |
19903 | + |
19904 | +/* | |
e4b2b4a8 JK |
19905 | + * This is independent of any log levels - a global |
19906 | + * kill switch that turns off all of printk. | |
19907 | + * | |
19908 | + * Used by the NMI watchdog if early-printk is enabled. | |
1a6e0f06 | 19909 | + */ |
e4b2b4a8 JK |
19910 | +static bool __read_mostly printk_killswitch; |
19911 | + | |
19912 | +static int __init force_early_printk_setup(char *str) | |
19913 | +{ | |
19914 | + printk_killswitch = true; | |
19915 | + return 0; | |
19916 | +} | |
19917 | +early_param("force_early_printk", force_early_printk_setup); | |
19918 | + | |
19919 | +void printk_kill(void) | |
19920 | +{ | |
19921 | + printk_killswitch = true; | |
19922 | +} | |
19923 | + | |
19924 | +#ifdef CONFIG_PRINTK | |
19925 | +static int forced_early_printk(const char *fmt, va_list ap) | |
19926 | +{ | |
19927 | + if (!printk_killswitch) | |
19928 | + return 0; | |
19929 | + early_vprintk(fmt, ap); | |
19930 | + return 1; | |
19931 | +} | |
1a6e0f06 | 19932 | +#endif |
1a6e0f06 | 19933 | + |
1a6e0f06 | 19934 | +#else |
e4b2b4a8 JK |
19935 | +static inline int forced_early_printk(const char *fmt, va_list ap) |
19936 | +{ | |
19937 | + return 0; | |
19938 | +} | |
19939 | +#endif | |
19940 | + | |
19941 | #ifdef CONFIG_PRINTK | |
19942 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | |
19943 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | |
19944 | @@ -1348,6 +1407,8 @@ | |
19945 | { | |
19946 | char *text; | |
19947 | int len = 0; | |
19948 | + int attempts = 0; | |
19949 | + int num_msg; | |
19950 | ||
19951 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); | |
19952 | if (!text) | |
19953 | @@ -1359,6 +1420,14 @@ | |
19954 | u64 seq; | |
19955 | u32 idx; | |
19956 | ||
19957 | +try_again: | |
19958 | + attempts++; | |
19959 | + if (attempts > 10) { | |
19960 | + len = -EBUSY; | |
19961 | + goto out; | |
19962 | + } | |
19963 | + num_msg = 0; | |
19964 | + | |
19965 | /* | |
19966 | * Find first record that fits, including all following records, | |
19967 | * into the user-provided buffer for this dump. | |
19968 | @@ -1371,6 +1440,14 @@ | |
19969 | len += msg_print_text(msg, true, NULL, 0); | |
19970 | idx = log_next(idx); | |
19971 | seq++; | |
19972 | + num_msg++; | |
19973 | + if (num_msg > 5) { | |
19974 | + num_msg = 0; | |
19975 | + logbuf_unlock_irq(); | |
19976 | + logbuf_lock_irq(); | |
19977 | + if (clear_seq < log_first_seq) | |
19978 | + goto try_again; | |
19979 | + } | |
19980 | } | |
19981 | ||
19982 | /* move first record forward until length fits into the buffer */ | |
19983 | @@ -1382,6 +1459,14 @@ | |
19984 | len -= msg_print_text(msg, true, NULL, 0); | |
19985 | idx = log_next(idx); | |
19986 | seq++; | |
19987 | + num_msg++; | |
19988 | + if (num_msg > 5) { | |
19989 | + num_msg = 0; | |
19990 | + logbuf_unlock_irq(); | |
19991 | + logbuf_lock_irq(); | |
19992 | + if (clear_seq < log_first_seq) | |
19993 | + goto try_again; | |
19994 | + } | |
19995 | } | |
19996 | ||
19997 | /* last message fitting into this dump */ | |
19998 | @@ -1420,6 +1505,7 @@ | |
19999 | clear_seq = log_next_seq; | |
20000 | clear_idx = log_next_idx; | |
20001 | } | |
20002 | +out: | |
20003 | logbuf_unlock_irq(); | |
20004 | ||
20005 | kfree(text); | |
20006 | @@ -1558,6 +1644,12 @@ | |
20007 | if (!console_drivers) | |
20008 | return; | |
20009 | ||
20010 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
20011 | + if (in_irq() || in_nmi()) | |
20012 | + return; | |
1a6e0f06 | 20013 | + } |
1a6e0f06 | 20014 | + |
e4b2b4a8 JK |
20015 | + migrate_disable(); |
20016 | for_each_console(con) { | |
20017 | if (exclusive_console && con != exclusive_console) | |
20018 | continue; | |
20019 | @@ -1573,6 +1665,7 @@ | |
20020 | else | |
20021 | con->write(con, text, len); | |
20022 | } | |
20023 | + migrate_enable(); | |
20024 | } | |
20025 | ||
20026 | int printk_delay_msec __read_mostly; | |
20027 | @@ -1692,6 +1785,13 @@ | |
20028 | int printed_len; | |
20029 | bool in_sched = false; | |
20030 | ||
20031 | + /* | |
20032 | + * Fall back to early_printk if a debugging subsystem has | |
20033 | + * killed printk output | |
20034 | + */ | |
20035 | + if (unlikely(forced_early_printk(fmt, args))) | |
20036 | + return 1; | |
1a6e0f06 | 20037 | + |
e4b2b4a8 JK |
20038 | if (level == LOGLEVEL_SCHED) { |
20039 | level = LOGLEVEL_DEFAULT; | |
20040 | in_sched = true; | |
20041 | @@ -1748,12 +1848,22 @@ | |
20042 | ||
20043 | /* If called from the scheduler, we can not call up(). */ | |
20044 | if (!in_sched) { | |
20045 | + int may_trylock = 1; | |
1a6e0f06 | 20046 | + |
e4b2b4a8 JK |
20047 | +#ifdef CONFIG_PREEMPT_RT_FULL |
20048 | + /* | |
20049 | + * we can't take a sleeping lock with IRQs or preeption disabled | |
20050 | + * so we can't print in these contexts | |
20051 | + */ | |
20052 | + if (!(preempt_count() == 0 && !irqs_disabled())) | |
20053 | + may_trylock = 0; | |
1a6e0f06 | 20054 | +#endif |
e4b2b4a8 JK |
20055 | /* |
20056 | * Try to acquire and then immediately release the console | |
20057 | * semaphore. The release will print out buffers and wake up | |
20058 | * /dev/kmsg and syslog() users. | |
20059 | */ | |
20060 | - if (console_trylock()) | |
20061 | + if (may_trylock && console_trylock()) | |
20062 | console_unlock(); | |
20063 | } | |
1a6e0f06 | 20064 | |
e4b2b4a8 | 20065 | @@ -1863,26 +1973,6 @@ |
1a6e0f06 | 20066 | |
e4b2b4a8 | 20067 | #endif /* CONFIG_PRINTK */ |
1a6e0f06 | 20068 | |
e4b2b4a8 JK |
20069 | -#ifdef CONFIG_EARLY_PRINTK |
20070 | -struct console *early_console; | |
20071 | - | |
20072 | -asmlinkage __visible void early_printk(const char *fmt, ...) | |
20073 | -{ | |
20074 | - va_list ap; | |
20075 | - char buf[512]; | |
20076 | - int n; | |
20077 | - | |
20078 | - if (!early_console) | |
20079 | - return; | |
20080 | - | |
20081 | - va_start(ap, fmt); | |
20082 | - n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
20083 | - va_end(ap); | |
20084 | - | |
20085 | - early_console->write(early_console, buf, n); | |
20086 | -} | |
20087 | -#endif | |
20088 | - | |
20089 | static int __add_preferred_console(char *name, int idx, char *options, | |
20090 | char *brl_options) | |
20091 | { | |
20092 | @@ -2229,10 +2319,15 @@ | |
20093 | console_seq++; | |
20094 | raw_spin_unlock(&logbuf_lock); | |
1a6e0f06 | 20095 | |
e4b2b4a8 JK |
20096 | +#ifdef CONFIG_PREEMPT_RT_FULL |
20097 | + printk_safe_exit_irqrestore(flags); | |
20098 | + call_console_drivers(ext_text, ext_len, text, len); | |
1a6e0f06 | 20099 | +#else |
e4b2b4a8 JK |
20100 | stop_critical_timings(); /* don't trace print latency */ |
20101 | call_console_drivers(ext_text, ext_len, text, len); | |
20102 | start_critical_timings(); | |
20103 | printk_safe_exit_irqrestore(flags); | |
1a6e0f06 | 20104 | +#endif |
1a6e0f06 | 20105 | |
e4b2b4a8 JK |
20106 | if (do_cond_resched) |
20107 | cond_resched(); | |
20108 | @@ -2286,6 +2381,11 @@ | |
1a6e0f06 | 20109 | { |
e4b2b4a8 | 20110 | struct console *c; |
1a6e0f06 | 20111 | |
e4b2b4a8 JK |
20112 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { |
20113 | + if (in_irq() || in_nmi()) | |
20114 | + return; | |
20115 | + } | |
1a6e0f06 | 20116 | + |
e4b2b4a8 JK |
20117 | /* |
20118 | * console_unblank can no longer be called in interrupt context unless | |
20119 | * oops_in_progress is set to 1.. | |
20120 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/ptrace.c linux-4.14/kernel/ptrace.c | |
20121 | --- linux-4.14.orig/kernel/ptrace.c 2017-11-12 19:46:13.000000000 +0100 | |
20122 | +++ linux-4.14/kernel/ptrace.c 2018-09-05 11:05:07.000000000 +0200 | |
20123 | @@ -175,7 +175,14 @@ | |
20124 | ||
20125 | spin_lock_irq(&task->sighand->siglock); | |
20126 | if (task_is_traced(task) && !__fatal_signal_pending(task)) { | |
20127 | - task->state = __TASK_TRACED; | |
20128 | + unsigned long flags; | |
1a6e0f06 | 20129 | + |
e4b2b4a8 JK |
20130 | + raw_spin_lock_irqsave(&task->pi_lock, flags); |
20131 | + if (task->state & __TASK_TRACED) | |
20132 | + task->state = __TASK_TRACED; | |
20133 | + else | |
20134 | + task->saved_state = __TASK_TRACED; | |
20135 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
20136 | ret = true; | |
20137 | } | |
20138 | spin_unlock_irq(&task->sighand->siglock); | |
20139 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/Kconfig linux-4.14/kernel/rcu/Kconfig | |
20140 | --- linux-4.14.orig/kernel/rcu/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
20141 | +++ linux-4.14/kernel/rcu/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
20142 | @@ -36,7 +36,7 @@ | |
1a6e0f06 | 20143 | |
e4b2b4a8 JK |
20144 | config RCU_EXPERT |
20145 | bool "Make expert-level adjustments to RCU configuration" | |
20146 | - default n | |
20147 | + default y if PREEMPT_RT_FULL | |
20148 | help | |
20149 | This option needs to be enabled if you wish to make | |
20150 | expert-level adjustments to RCU configuration. By default, | |
20151 | @@ -172,7 +172,7 @@ | |
20152 | ||
20153 | config RCU_FAST_NO_HZ | |
20154 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | |
20155 | - depends on NO_HZ_COMMON && SMP && RCU_EXPERT | |
20156 | + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL | |
20157 | default n | |
20158 | help | |
20159 | This option permits CPUs to enter dynticks-idle state even if | |
20160 | @@ -191,7 +191,7 @@ | |
20161 | config RCU_BOOST | |
20162 | bool "Enable RCU priority boosting" | |
20163 | depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT | |
20164 | - default n | |
20165 | + default y if PREEMPT_RT_FULL | |
20166 | help | |
20167 | This option boosts the priority of preempted RCU readers that | |
20168 | block the current preemptible RCU grace period for too long. | |
20169 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/rcu.h linux-4.14/kernel/rcu/rcu.h | |
20170 | --- linux-4.14.orig/kernel/rcu/rcu.h 2017-11-12 19:46:13.000000000 +0100 | |
20171 | +++ linux-4.14/kernel/rcu/rcu.h 2018-09-05 11:05:07.000000000 +0200 | |
20172 | @@ -462,18 +462,26 @@ | |
20173 | extern unsigned long rcutorture_testseq; | |
20174 | extern unsigned long rcutorture_vernum; | |
20175 | unsigned long rcu_batches_started(void); | |
20176 | -unsigned long rcu_batches_started_bh(void); | |
20177 | unsigned long rcu_batches_started_sched(void); | |
20178 | unsigned long rcu_batches_completed(void); | |
20179 | -unsigned long rcu_batches_completed_bh(void); | |
20180 | unsigned long rcu_batches_completed_sched(void); | |
20181 | unsigned long rcu_exp_batches_completed(void); | |
20182 | unsigned long rcu_exp_batches_completed_sched(void); | |
20183 | unsigned long srcu_batches_completed(struct srcu_struct *sp); | |
20184 | void show_rcu_gp_kthreads(void); | |
20185 | void rcu_force_quiescent_state(void); | |
20186 | -void rcu_bh_force_quiescent_state(void); | |
20187 | void rcu_sched_force_quiescent_state(void); | |
1a6e0f06 | 20188 | + |
e4b2b4a8 JK |
20189 | +#ifndef CONFIG_PREEMPT_RT_FULL |
20190 | +void rcu_bh_force_quiescent_state(void); | |
20191 | +unsigned long rcu_batches_started_bh(void); | |
20192 | +unsigned long rcu_batches_completed_bh(void); | |
1a6e0f06 | 20193 | +#else |
e4b2b4a8 JK |
20194 | +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state |
20195 | +# define rcu_batches_completed_bh rcu_batches_completed | |
20196 | +# define rcu_batches_started_bh rcu_batches_completed | |
1a6e0f06 | 20197 | +#endif |
e4b2b4a8 JK |
20198 | + |
20199 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | |
1a6e0f06 | 20200 | |
e4b2b4a8 JK |
20201 | #ifdef CONFIG_RCU_NOCB_CPU |
20202 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/rcu_segcblist.c linux-4.14/kernel/rcu/rcu_segcblist.c | |
20203 | --- linux-4.14.orig/kernel/rcu/rcu_segcblist.c 2017-11-12 19:46:13.000000000 +0100 | |
20204 | +++ linux-4.14/kernel/rcu/rcu_segcblist.c 2018-09-05 11:05:07.000000000 +0200 | |
20205 | @@ -23,6 +23,7 @@ | |
20206 | #include <linux/types.h> | |
20207 | #include <linux/kernel.h> | |
20208 | #include <linux/interrupt.h> | |
20209 | +#include <linux/rcupdate.h> | |
1a6e0f06 | 20210 | |
e4b2b4a8 | 20211 | #include "rcu_segcblist.h" |
1a6e0f06 | 20212 | |
e4b2b4a8 JK |
20213 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/rcutorture.c linux-4.14/kernel/rcu/rcutorture.c |
20214 | --- linux-4.14.orig/kernel/rcu/rcutorture.c 2017-11-12 19:46:13.000000000 +0100 | |
20215 | +++ linux-4.14/kernel/rcu/rcutorture.c 2018-09-05 11:05:07.000000000 +0200 | |
20216 | @@ -417,6 +417,7 @@ | |
20217 | .name = "rcu" | |
1a6e0f06 JK |
20218 | }; |
20219 | ||
e4b2b4a8 JK |
20220 | +#ifndef CONFIG_PREEMPT_RT_FULL |
20221 | /* | |
20222 | * Definitions for rcu_bh torture testing. | |
1a6e0f06 | 20223 | */ |
e4b2b4a8 JK |
20224 | @@ -456,6 +457,12 @@ |
20225 | .name = "rcu_bh" | |
20226 | }; | |
1a6e0f06 | 20227 | |
e4b2b4a8 JK |
20228 | +#else |
20229 | +static struct rcu_torture_ops rcu_bh_ops = { | |
20230 | + .ttype = INVALID_RCU_FLAVOR, | |
20231 | +}; | |
20232 | +#endif | |
20233 | + | |
1a6e0f06 | 20234 | /* |
e4b2b4a8 JK |
20235 | * Don't even think about trying any of these in real life!!! |
20236 | * The names includes "busted", and they really means it! | |
20237 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/srcutree.c linux-4.14/kernel/rcu/srcutree.c | |
20238 | --- linux-4.14.orig/kernel/rcu/srcutree.c 2017-11-12 19:46:13.000000000 +0100 | |
20239 | +++ linux-4.14/kernel/rcu/srcutree.c 2018-09-05 11:05:07.000000000 +0200 | |
20240 | @@ -36,6 +36,8 @@ | |
20241 | #include <linux/delay.h> | |
20242 | #include <linux/module.h> | |
20243 | #include <linux/srcu.h> | |
20244 | +#include <linux/cpu.h> | |
20245 | +#include <linux/locallock.h> | |
1a6e0f06 | 20246 | |
e4b2b4a8 JK |
20247 | #include "rcu.h" |
20248 | #include "rcu_segcblist.h" | |
20249 | @@ -53,6 +55,33 @@ | |
20250 | static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); | |
20251 | static void process_srcu(struct work_struct *work); | |
20252 | ||
20253 | +/* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ | |
20254 | +#define spin_lock_rcu_node(p) \ | |
20255 | +do { \ | |
20256 | + spin_lock(&ACCESS_PRIVATE(p, lock)); \ | |
20257 | + smp_mb__after_unlock_lock(); \ | |
20258 | +} while (0) | |
20259 | + | |
20260 | +#define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) | |
20261 | + | |
20262 | +#define spin_lock_irq_rcu_node(p) \ | |
20263 | +do { \ | |
20264 | + spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ | |
20265 | + smp_mb__after_unlock_lock(); \ | |
20266 | +} while (0) | |
20267 | + | |
20268 | +#define spin_unlock_irq_rcu_node(p) \ | |
20269 | + spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) | |
20270 | + | |
20271 | +#define spin_lock_irqsave_rcu_node(p, flags) \ | |
20272 | +do { \ | |
20273 | + spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ | |
20274 | + smp_mb__after_unlock_lock(); \ | |
20275 | +} while (0) | |
20276 | + | |
20277 | +#define spin_unlock_irqrestore_rcu_node(p, flags) \ | |
20278 | + spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ | |
20279 | + | |
20280 | /* | |
20281 | * Initialize SRCU combining tree. Note that statically allocated | |
20282 | * srcu_struct structures might already have srcu_read_lock() and | |
20283 | @@ -77,7 +106,7 @@ | |
20284 | ||
20285 | /* Each pass through this loop initializes one srcu_node structure. */ | |
20286 | rcu_for_each_node_breadth_first(sp, snp) { | |
20287 | - raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); | |
20288 | + spin_lock_init(&ACCESS_PRIVATE(snp, lock)); | |
20289 | WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != | |
20290 | ARRAY_SIZE(snp->srcu_data_have_cbs)); | |
20291 | for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { | |
20292 | @@ -111,7 +140,7 @@ | |
20293 | snp_first = sp->level[level]; | |
20294 | for_each_possible_cpu(cpu) { | |
20295 | sdp = per_cpu_ptr(sp->sda, cpu); | |
20296 | - raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); | |
20297 | + spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); | |
20298 | rcu_segcblist_init(&sdp->srcu_cblist); | |
20299 | sdp->srcu_cblist_invoking = false; | |
20300 | sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; | |
20301 | @@ -170,7 +199,7 @@ | |
20302 | /* Don't re-initialize a lock while it is held. */ | |
20303 | debug_check_no_locks_freed((void *)sp, sizeof(*sp)); | |
20304 | lockdep_init_map(&sp->dep_map, name, key, 0); | |
20305 | - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); | |
20306 | + spin_lock_init(&ACCESS_PRIVATE(sp, lock)); | |
20307 | return init_srcu_struct_fields(sp, false); | |
20308 | } | |
20309 | EXPORT_SYMBOL_GPL(__init_srcu_struct); | |
20310 | @@ -187,7 +216,7 @@ | |
1a6e0f06 | 20311 | */ |
e4b2b4a8 JK |
20312 | int init_srcu_struct(struct srcu_struct *sp) |
20313 | { | |
20314 | - raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock)); | |
20315 | + spin_lock_init(&ACCESS_PRIVATE(sp, lock)); | |
20316 | return init_srcu_struct_fields(sp, false); | |
20317 | } | |
20318 | EXPORT_SYMBOL_GPL(init_srcu_struct); | |
20319 | @@ -210,13 +239,13 @@ | |
20320 | /* The smp_load_acquire() pairs with the smp_store_release(). */ | |
20321 | if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ | |
20322 | return; /* Already initialized. */ | |
20323 | - raw_spin_lock_irqsave_rcu_node(sp, flags); | |
20324 | + spin_lock_irqsave_rcu_node(sp, flags); | |
20325 | if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { | |
20326 | - raw_spin_unlock_irqrestore_rcu_node(sp, flags); | |
20327 | + spin_unlock_irqrestore_rcu_node(sp, flags); | |
20328 | return; | |
20329 | } | |
20330 | init_srcu_struct_fields(sp, true); | |
20331 | - raw_spin_unlock_irqrestore_rcu_node(sp, flags); | |
20332 | + spin_unlock_irqrestore_rcu_node(sp, flags); | |
1a6e0f06 JK |
20333 | } |
20334 | ||
e4b2b4a8 JK |
20335 | /* |
20336 | @@ -425,21 +454,6 @@ | |
20337 | } | |
1a6e0f06 | 20338 | |
e4b2b4a8 JK |
20339 | /* |
20340 | - * Track online CPUs to guide callback workqueue placement. | |
20341 | - */ | |
20342 | -DEFINE_PER_CPU(bool, srcu_online); | |
20343 | - | |
20344 | -void srcu_online_cpu(unsigned int cpu) | |
20345 | -{ | |
20346 | - WRITE_ONCE(per_cpu(srcu_online, cpu), true); | |
20347 | -} | |
20348 | - | |
20349 | -void srcu_offline_cpu(unsigned int cpu) | |
20350 | -{ | |
20351 | - WRITE_ONCE(per_cpu(srcu_online, cpu), false); | |
20352 | -} | |
20353 | - | |
20354 | -/* | |
20355 | * Place the workqueue handler on the specified CPU if online, otherwise | |
20356 | * just run it whereever. This is useful for placing workqueue handlers | |
20357 | * that are to invoke the specified CPU's callbacks. | |
20358 | @@ -450,12 +464,12 @@ | |
1a6e0f06 | 20359 | { |
e4b2b4a8 JK |
20360 | bool ret; |
20361 | ||
20362 | - preempt_disable(); | |
20363 | - if (READ_ONCE(per_cpu(srcu_online, cpu))) | |
20364 | + cpus_read_lock(); | |
20365 | + if (cpu_online(cpu)) | |
20366 | ret = queue_delayed_work_on(cpu, wq, dwork, delay); | |
20367 | else | |
20368 | ret = queue_delayed_work(wq, dwork, delay); | |
20369 | - preempt_enable(); | |
20370 | + cpus_read_unlock(); | |
20371 | return ret; | |
1a6e0f06 JK |
20372 | } |
20373 | ||
e4b2b4a8 JK |
20374 | @@ -513,7 +527,7 @@ |
20375 | mutex_lock(&sp->srcu_cb_mutex); | |
20376 | ||
20377 | /* End the current grace period. */ | |
20378 | - raw_spin_lock_irq_rcu_node(sp); | |
20379 | + spin_lock_irq_rcu_node(sp); | |
20380 | idx = rcu_seq_state(sp->srcu_gp_seq); | |
20381 | WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); | |
20382 | cbdelay = srcu_get_delay(sp); | |
20383 | @@ -522,7 +536,7 @@ | |
20384 | gpseq = rcu_seq_current(&sp->srcu_gp_seq); | |
20385 | if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) | |
20386 | sp->srcu_gp_seq_needed_exp = gpseq; | |
20387 | - raw_spin_unlock_irq_rcu_node(sp); | |
20388 | + spin_unlock_irq_rcu_node(sp); | |
20389 | mutex_unlock(&sp->srcu_gp_mutex); | |
20390 | /* A new grace period can start at this point. But only one. */ | |
20391 | ||
20392 | @@ -530,7 +544,7 @@ | |
20393 | idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); | |
20394 | idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); | |
20395 | rcu_for_each_node_breadth_first(sp, snp) { | |
20396 | - raw_spin_lock_irq_rcu_node(snp); | |
20397 | + spin_lock_irq_rcu_node(snp); | |
20398 | cbs = false; | |
20399 | if (snp >= sp->level[rcu_num_lvls - 1]) | |
20400 | cbs = snp->srcu_have_cbs[idx] == gpseq; | |
20401 | @@ -540,7 +554,7 @@ | |
20402 | snp->srcu_gp_seq_needed_exp = gpseq; | |
20403 | mask = snp->srcu_data_have_cbs[idx]; | |
20404 | snp->srcu_data_have_cbs[idx] = 0; | |
20405 | - raw_spin_unlock_irq_rcu_node(snp); | |
20406 | + spin_unlock_irq_rcu_node(snp); | |
20407 | if (cbs) | |
20408 | srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); | |
20409 | ||
20410 | @@ -548,11 +562,11 @@ | |
20411 | if (!(gpseq & counter_wrap_check)) | |
20412 | for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { | |
20413 | sdp = per_cpu_ptr(sp->sda, cpu); | |
20414 | - raw_spin_lock_irqsave_rcu_node(sdp, flags); | |
20415 | + spin_lock_irqsave_rcu_node(sdp, flags); | |
20416 | if (ULONG_CMP_GE(gpseq, | |
20417 | sdp->srcu_gp_seq_needed + 100)) | |
20418 | sdp->srcu_gp_seq_needed = gpseq; | |
20419 | - raw_spin_unlock_irqrestore_rcu_node(sdp, flags); | |
20420 | + spin_unlock_irqrestore_rcu_node(sdp, flags); | |
20421 | } | |
20422 | } | |
1a6e0f06 | 20423 | |
e4b2b4a8 JK |
20424 | @@ -560,17 +574,17 @@ |
20425 | mutex_unlock(&sp->srcu_cb_mutex); | |
20426 | ||
20427 | /* Start a new grace period if needed. */ | |
20428 | - raw_spin_lock_irq_rcu_node(sp); | |
20429 | + spin_lock_irq_rcu_node(sp); | |
20430 | gpseq = rcu_seq_current(&sp->srcu_gp_seq); | |
20431 | if (!rcu_seq_state(gpseq) && | |
20432 | ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { | |
20433 | srcu_gp_start(sp); | |
20434 | - raw_spin_unlock_irq_rcu_node(sp); | |
20435 | + spin_unlock_irq_rcu_node(sp); | |
20436 | /* Throttle expedited grace periods: Should be rare! */ | |
20437 | srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff | |
20438 | ? 0 : SRCU_INTERVAL); | |
20439 | } else { | |
20440 | - raw_spin_unlock_irq_rcu_node(sp); | |
20441 | + spin_unlock_irq_rcu_node(sp); | |
20442 | } | |
20443 | } | |
1a6e0f06 | 20444 | |
e4b2b4a8 JK |
20445 | @@ -590,18 +604,18 @@ |
20446 | if (rcu_seq_done(&sp->srcu_gp_seq, s) || | |
20447 | ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) | |
20448 | return; | |
20449 | - raw_spin_lock_irqsave_rcu_node(snp, flags); | |
20450 | + spin_lock_irqsave_rcu_node(snp, flags); | |
20451 | if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { | |
20452 | - raw_spin_unlock_irqrestore_rcu_node(snp, flags); | |
20453 | + spin_unlock_irqrestore_rcu_node(snp, flags); | |
20454 | return; | |
20455 | } | |
20456 | WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); | |
20457 | - raw_spin_unlock_irqrestore_rcu_node(snp, flags); | |
20458 | + spin_unlock_irqrestore_rcu_node(snp, flags); | |
20459 | } | |
20460 | - raw_spin_lock_irqsave_rcu_node(sp, flags); | |
20461 | + spin_lock_irqsave_rcu_node(sp, flags); | |
20462 | if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) | |
20463 | sp->srcu_gp_seq_needed_exp = s; | |
20464 | - raw_spin_unlock_irqrestore_rcu_node(sp, flags); | |
20465 | + spin_unlock_irqrestore_rcu_node(sp, flags); | |
20466 | } | |
1a6e0f06 | 20467 | |
e4b2b4a8 JK |
20468 | /* |
20469 | @@ -623,12 +637,12 @@ | |
20470 | for (; snp != NULL; snp = snp->srcu_parent) { | |
20471 | if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) | |
20472 | return; /* GP already done and CBs recorded. */ | |
20473 | - raw_spin_lock_irqsave_rcu_node(snp, flags); | |
20474 | + spin_lock_irqsave_rcu_node(snp, flags); | |
20475 | if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { | |
20476 | snp_seq = snp->srcu_have_cbs[idx]; | |
20477 | if (snp == sdp->mynode && snp_seq == s) | |
20478 | snp->srcu_data_have_cbs[idx] |= sdp->grpmask; | |
20479 | - raw_spin_unlock_irqrestore_rcu_node(snp, flags); | |
20480 | + spin_unlock_irqrestore_rcu_node(snp, flags); | |
20481 | if (snp == sdp->mynode && snp_seq != s) { | |
20482 | srcu_schedule_cbs_sdp(sdp, do_norm | |
20483 | ? SRCU_INTERVAL | |
20484 | @@ -644,11 +658,11 @@ | |
20485 | snp->srcu_data_have_cbs[idx] |= sdp->grpmask; | |
20486 | if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) | |
20487 | snp->srcu_gp_seq_needed_exp = s; | |
20488 | - raw_spin_unlock_irqrestore_rcu_node(snp, flags); | |
20489 | + spin_unlock_irqrestore_rcu_node(snp, flags); | |
20490 | } | |
20491 | ||
20492 | /* Top of tree, must ensure the grace period will be started. */ | |
20493 | - raw_spin_lock_irqsave_rcu_node(sp, flags); | |
20494 | + spin_lock_irqsave_rcu_node(sp, flags); | |
20495 | if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { | |
20496 | /* | |
20497 | * Record need for grace period s. Pair with load | |
20498 | @@ -667,7 +681,7 @@ | |
20499 | queue_delayed_work(system_power_efficient_wq, &sp->work, | |
20500 | srcu_get_delay(sp)); | |
20501 | } | |
20502 | - raw_spin_unlock_irqrestore_rcu_node(sp, flags); | |
20503 | + spin_unlock_irqrestore_rcu_node(sp, flags); | |
1a6e0f06 | 20504 | } |
1a6e0f06 | 20505 | |
e4b2b4a8 JK |
20506 | /* |
20507 | @@ -736,6 +750,8 @@ | |
20508 | * negligible when amoritized over that time period, and the extra latency | |
20509 | * of a needlessly non-expedited grace period is similarly negligible. | |
20510 | */ | |
20511 | +static DEFINE_LOCAL_IRQ_LOCK(sp_llock); | |
20512 | + | |
20513 | static bool srcu_might_be_idle(struct srcu_struct *sp) | |
1a6e0f06 | 20514 | { |
e4b2b4a8 JK |
20515 | unsigned long curseq; |
20516 | @@ -744,13 +760,13 @@ | |
20517 | unsigned long t; | |
1a6e0f06 | 20518 | |
e4b2b4a8 JK |
20519 | /* If the local srcu_data structure has callbacks, not idle. */ |
20520 | - local_irq_save(flags); | |
20521 | + local_lock_irqsave(sp_llock, flags); | |
20522 | sdp = this_cpu_ptr(sp->sda); | |
20523 | if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { | |
20524 | - local_irq_restore(flags); | |
20525 | + local_unlock_irqrestore(sp_llock, flags); | |
20526 | return false; /* Callbacks already present, so not idle. */ | |
20527 | } | |
20528 | - local_irq_restore(flags); | |
20529 | + local_unlock_irqrestore(sp_llock, flags); | |
1a6e0f06 | 20530 | |
e4b2b4a8 JK |
20531 | /* |
20532 | * No local callbacks, so probabalistically probe global state. | |
20533 | @@ -828,9 +844,9 @@ | |
20534 | return; | |
20535 | } | |
20536 | rhp->func = func; | |
20537 | - local_irq_save(flags); | |
20538 | + local_lock_irqsave(sp_llock, flags); | |
20539 | sdp = this_cpu_ptr(sp->sda); | |
20540 | - raw_spin_lock_rcu_node(sdp); | |
20541 | + spin_lock_rcu_node(sdp); | |
20542 | rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); | |
20543 | rcu_segcblist_advance(&sdp->srcu_cblist, | |
20544 | rcu_seq_current(&sp->srcu_gp_seq)); | |
20545 | @@ -844,7 +860,8 @@ | |
20546 | sdp->srcu_gp_seq_needed_exp = s; | |
20547 | needexp = true; | |
20548 | } | |
20549 | - raw_spin_unlock_irqrestore_rcu_node(sdp, flags); | |
20550 | + spin_unlock_rcu_node(sdp); | |
20551 | + local_unlock_irqrestore(sp_llock, flags); | |
20552 | if (needgp) | |
20553 | srcu_funnel_gp_start(sp, sdp, s, do_norm); | |
20554 | else if (needexp) | |
20555 | @@ -900,7 +917,7 @@ | |
20556 | ||
20557 | /* | |
20558 | * Make sure that later code is ordered after the SRCU grace | |
20559 | - * period. This pairs with the raw_spin_lock_irq_rcu_node() | |
20560 | + * period. This pairs with the spin_lock_irq_rcu_node() | |
20561 | * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed | |
20562 | * because the current CPU might have been totally uninvolved with | |
20563 | * (and thus unordered against) that grace period. | |
20564 | @@ -1024,7 +1041,7 @@ | |
20565 | */ | |
20566 | for_each_possible_cpu(cpu) { | |
20567 | sdp = per_cpu_ptr(sp->sda, cpu); | |
20568 | - raw_spin_lock_irq_rcu_node(sdp); | |
20569 | + spin_lock_irq_rcu_node(sdp); | |
20570 | atomic_inc(&sp->srcu_barrier_cpu_cnt); | |
20571 | sdp->srcu_barrier_head.func = srcu_barrier_cb; | |
20572 | debug_rcu_head_queue(&sdp->srcu_barrier_head); | |
20573 | @@ -1033,7 +1050,7 @@ | |
20574 | debug_rcu_head_unqueue(&sdp->srcu_barrier_head); | |
20575 | atomic_dec(&sp->srcu_barrier_cpu_cnt); | |
20576 | } | |
20577 | - raw_spin_unlock_irq_rcu_node(sdp); | |
20578 | + spin_unlock_irq_rcu_node(sdp); | |
20579 | } | |
20580 | ||
20581 | /* Remove the initial count, at which point reaching zero can happen. */ | |
20582 | @@ -1082,17 +1099,17 @@ | |
20583 | */ | |
20584 | idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ | |
20585 | if (idx == SRCU_STATE_IDLE) { | |
20586 | - raw_spin_lock_irq_rcu_node(sp); | |
20587 | + spin_lock_irq_rcu_node(sp); | |
20588 | if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { | |
20589 | WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); | |
20590 | - raw_spin_unlock_irq_rcu_node(sp); | |
20591 | + spin_unlock_irq_rcu_node(sp); | |
20592 | mutex_unlock(&sp->srcu_gp_mutex); | |
20593 | return; | |
20594 | } | |
20595 | idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); | |
20596 | if (idx == SRCU_STATE_IDLE) | |
20597 | srcu_gp_start(sp); | |
20598 | - raw_spin_unlock_irq_rcu_node(sp); | |
20599 | + spin_unlock_irq_rcu_node(sp); | |
20600 | if (idx != SRCU_STATE_IDLE) { | |
20601 | mutex_unlock(&sp->srcu_gp_mutex); | |
20602 | return; /* Someone else started the grace period. */ | |
20603 | @@ -1141,19 +1158,19 @@ | |
20604 | sdp = container_of(work, struct srcu_data, work.work); | |
20605 | sp = sdp->sp; | |
20606 | rcu_cblist_init(&ready_cbs); | |
20607 | - raw_spin_lock_irq_rcu_node(sdp); | |
20608 | + spin_lock_irq_rcu_node(sdp); | |
20609 | rcu_segcblist_advance(&sdp->srcu_cblist, | |
20610 | rcu_seq_current(&sp->srcu_gp_seq)); | |
20611 | if (sdp->srcu_cblist_invoking || | |
20612 | !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { | |
20613 | - raw_spin_unlock_irq_rcu_node(sdp); | |
20614 | + spin_unlock_irq_rcu_node(sdp); | |
20615 | return; /* Someone else on the job or nothing to do. */ | |
20616 | } | |
20617 | ||
20618 | /* We are on the job! Extract and invoke ready callbacks. */ | |
20619 | sdp->srcu_cblist_invoking = true; | |
20620 | rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); | |
20621 | - raw_spin_unlock_irq_rcu_node(sdp); | |
20622 | + spin_unlock_irq_rcu_node(sdp); | |
20623 | rhp = rcu_cblist_dequeue(&ready_cbs); | |
20624 | for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { | |
20625 | debug_rcu_head_unqueue(rhp); | |
20626 | @@ -1166,13 +1183,13 @@ | |
20627 | * Update counts, accelerate new callbacks, and if needed, | |
20628 | * schedule another round of callback invocation. | |
20629 | */ | |
20630 | - raw_spin_lock_irq_rcu_node(sdp); | |
20631 | + spin_lock_irq_rcu_node(sdp); | |
20632 | rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); | |
20633 | (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, | |
20634 | rcu_seq_snap(&sp->srcu_gp_seq)); | |
20635 | sdp->srcu_cblist_invoking = false; | |
20636 | more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); | |
20637 | - raw_spin_unlock_irq_rcu_node(sdp); | |
20638 | + spin_unlock_irq_rcu_node(sdp); | |
20639 | if (more) | |
20640 | srcu_schedule_cbs_sdp(sdp, 0); | |
20641 | } | |
20642 | @@ -1185,7 +1202,7 @@ | |
20643 | { | |
20644 | bool pushgp = true; | |
20645 | ||
20646 | - raw_spin_lock_irq_rcu_node(sp); | |
20647 | + spin_lock_irq_rcu_node(sp); | |
20648 | if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { | |
20649 | if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { | |
20650 | /* All requests fulfilled, time to go idle. */ | |
20651 | @@ -1195,7 +1212,7 @@ | |
20652 | /* Outstanding request and no GP. Start one. */ | |
20653 | srcu_gp_start(sp); | |
20654 | } | |
20655 | - raw_spin_unlock_irq_rcu_node(sp); | |
20656 | + spin_unlock_irq_rcu_node(sp); | |
20657 | ||
20658 | if (pushgp) | |
20659 | queue_delayed_work(system_power_efficient_wq, &sp->work, delay); | |
20660 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/tree.c linux-4.14/kernel/rcu/tree.c | |
20661 | --- linux-4.14.orig/kernel/rcu/tree.c 2017-11-12 19:46:13.000000000 +0100 | |
20662 | +++ linux-4.14/kernel/rcu/tree.c 2018-09-05 11:05:07.000000000 +0200 | |
20663 | @@ -58,6 +58,11 @@ | |
20664 | #include <linux/trace_events.h> | |
20665 | #include <linux/suspend.h> | |
20666 | #include <linux/ftrace.h> | |
20667 | +#include <linux/delay.h> | |
20668 | +#include <linux/gfp.h> | |
20669 | +#include <linux/oom.h> | |
20670 | +#include <linux/smpboot.h> | |
20671 | +#include "../time/tick-internal.h" | |
20672 | ||
20673 | #include "tree.h" | |
20674 | #include "rcu.h" | |
20675 | @@ -243,6 +248,19 @@ | |
20676 | this_cpu_ptr(&rcu_sched_data), true); | |
20677 | } | |
20678 | ||
20679 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20680 | +static void rcu_preempt_qs(void); | |
1a6e0f06 | 20681 | + |
e4b2b4a8 JK |
20682 | +void rcu_bh_qs(void) |
20683 | +{ | |
20684 | + unsigned long flags; | |
1a6e0f06 | 20685 | + |
e4b2b4a8 JK |
20686 | + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */ |
20687 | + local_irq_save(flags); | |
20688 | + rcu_preempt_qs(); | |
20689 | + local_irq_restore(flags); | |
20690 | +} | |
1a6e0f06 | 20691 | +#else |
e4b2b4a8 JK |
20692 | void rcu_bh_qs(void) |
20693 | { | |
20694 | RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); | |
20695 | @@ -253,6 +271,7 @@ | |
20696 | __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); | |
20697 | } | |
20698 | } | |
1a6e0f06 | 20699 | +#endif |
1a6e0f06 | 20700 | |
e4b2b4a8 JK |
20701 | /* |
20702 | * Steal a bit from the bottom of ->dynticks for idle entry/exit | |
20703 | @@ -564,11 +583,13 @@ | |
20704 | /* | |
20705 | * Return the number of RCU BH batches started thus far for debug & stats. | |
20706 | */ | |
20707 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20708 | unsigned long rcu_batches_started_bh(void) | |
20709 | { | |
20710 | return rcu_bh_state.gpnum; | |
20711 | } | |
20712 | EXPORT_SYMBOL_GPL(rcu_batches_started_bh); | |
20713 | +#endif | |
1a6e0f06 | 20714 | |
e4b2b4a8 JK |
20715 | /* |
20716 | * Return the number of RCU batches completed thus far for debug & stats. | |
20717 | @@ -588,6 +609,7 @@ | |
20718 | } | |
20719 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | |
1a6e0f06 | 20720 | |
e4b2b4a8 JK |
20721 | +#ifndef CONFIG_PREEMPT_RT_FULL |
20722 | /* | |
20723 | * Return the number of RCU BH batches completed thus far for debug & stats. | |
20724 | */ | |
20725 | @@ -596,6 +618,7 @@ | |
20726 | return rcu_bh_state.completed; | |
20727 | } | |
20728 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | |
1a6e0f06 | 20729 | +#endif |
e4b2b4a8 JK |
20730 | |
20731 | /* | |
20732 | * Return the number of RCU expedited batches completed thus far for | |
20733 | @@ -619,6 +642,7 @@ | |
1a6e0f06 | 20734 | } |
e4b2b4a8 | 20735 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); |
1a6e0f06 | 20736 | |
e4b2b4a8 JK |
20737 | +#ifndef CONFIG_PREEMPT_RT_FULL |
20738 | /* | |
20739 | * Force a quiescent state. | |
20740 | */ | |
20741 | @@ -637,6 +661,13 @@ | |
1a6e0f06 | 20742 | } |
e4b2b4a8 | 20743 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); |
1a6e0f06 | 20744 | |
1a6e0f06 | 20745 | +#else |
e4b2b4a8 JK |
20746 | +void rcu_force_quiescent_state(void) |
20747 | +{ | |
20748 | +} | |
20749 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |
1a6e0f06 | 20750 | +#endif |
e4b2b4a8 JK |
20751 | + |
20752 | /* | |
20753 | * Force a quiescent state for RCU-sched. | |
20754 | */ | |
20755 | @@ -687,9 +718,11 @@ | |
20756 | case RCU_FLAVOR: | |
20757 | rsp = rcu_state_p; | |
20758 | break; | |
20759 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20760 | case RCU_BH_FLAVOR: | |
20761 | rsp = &rcu_bh_state; | |
20762 | break; | |
20763 | +#endif | |
20764 | case RCU_SCHED_FLAVOR: | |
20765 | rsp = &rcu_sched_state; | |
20766 | break; | |
20767 | @@ -2918,18 +2951,17 @@ | |
20768 | /* | |
20769 | * Do RCU core processing for the current CPU. | |
20770 | */ | |
20771 | -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) | |
20772 | +static __latent_entropy void rcu_process_callbacks(void) | |
20773 | { | |
20774 | struct rcu_state *rsp; | |
1a6e0f06 | 20775 | |
e4b2b4a8 JK |
20776 | if (cpu_is_offline(smp_processor_id())) |
20777 | return; | |
20778 | - trace_rcu_utilization(TPS("Start RCU core")); | |
20779 | for_each_rcu_flavor(rsp) | |
20780 | __rcu_process_callbacks(rsp); | |
20781 | - trace_rcu_utilization(TPS("End RCU core")); | |
1a6e0f06 JK |
20782 | } |
20783 | ||
e4b2b4a8 JK |
20784 | +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); |
20785 | /* | |
20786 | * Schedule RCU callback invocation. If the specified type of RCU | |
20787 | * does not support RCU priority boosting, just do a direct call, | |
20788 | @@ -2941,18 +2973,105 @@ | |
1a6e0f06 | 20789 | { |
e4b2b4a8 JK |
20790 | if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) |
20791 | return; | |
20792 | - if (likely(!rsp->boost)) { | |
20793 | - rcu_do_batch(rsp, rdp); | |
20794 | + rcu_do_batch(rsp, rdp); | |
20795 | +} | |
1a6e0f06 | 20796 | + |
e4b2b4a8 JK |
20797 | +static void rcu_wake_cond(struct task_struct *t, int status) |
20798 | +{ | |
20799 | + /* | |
20800 | + * If the thread is yielding, only wake it when this | |
20801 | + * is invoked from idle | |
20802 | + */ | |
20803 | + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) | |
20804 | + wake_up_process(t); | |
20805 | +} | |
1a6e0f06 | 20806 | + |
e4b2b4a8 JK |
20807 | +/* |
20808 | + * Wake up this CPU's rcuc kthread to do RCU core processing. | |
20809 | + */ | |
20810 | +static void invoke_rcu_core(void) | |
20811 | +{ | |
20812 | + unsigned long flags; | |
20813 | + struct task_struct *t; | |
1a6e0f06 | 20814 | + |
e4b2b4a8 JK |
20815 | + if (!cpu_online(smp_processor_id())) |
20816 | return; | |
20817 | + local_irq_save(flags); | |
20818 | + __this_cpu_write(rcu_cpu_has_work, 1); | |
20819 | + t = __this_cpu_read(rcu_cpu_kthread_task); | |
20820 | + if (t != NULL && current != t) | |
20821 | + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); | |
20822 | + local_irq_restore(flags); | |
20823 | +} | |
1a6e0f06 | 20824 | + |
e4b2b4a8 JK |
20825 | +static void rcu_cpu_kthread_park(unsigned int cpu) |
20826 | +{ | |
20827 | + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
20828 | +} | |
1a6e0f06 | 20829 | + |
e4b2b4a8 | 20830 | +static int rcu_cpu_kthread_should_run(unsigned int cpu) |
1a6e0f06 | 20831 | +{ |
e4b2b4a8 | 20832 | + return __this_cpu_read(rcu_cpu_has_work); |
1a6e0f06 JK |
20833 | +} |
20834 | + | |
e4b2b4a8 JK |
20835 | +/* |
20836 | + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
20837 | + * RCU softirq used in flavors and configurations of RCU that do not | |
20838 | + * support RCU priority boosting. | |
20839 | + */ | |
20840 | +static void rcu_cpu_kthread(unsigned int cpu) | |
20841 | +{ | |
20842 | + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
20843 | + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
20844 | + int spincnt; | |
20845 | + | |
20846 | + for (spincnt = 0; spincnt < 10; spincnt++) { | |
20847 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
20848 | + local_bh_disable(); | |
20849 | + *statusp = RCU_KTHREAD_RUNNING; | |
20850 | + this_cpu_inc(rcu_cpu_kthread_loops); | |
20851 | + local_irq_disable(); | |
20852 | + work = *workp; | |
20853 | + *workp = 0; | |
20854 | + local_irq_enable(); | |
20855 | + if (work) | |
20856 | + rcu_process_callbacks(); | |
20857 | + local_bh_enable(); | |
20858 | + if (*workp == 0) { | |
20859 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
20860 | + *statusp = RCU_KTHREAD_WAITING; | |
20861 | + return; | |
20862 | + } | |
20863 | } | |
20864 | - invoke_rcu_callbacks_kthread(); | |
20865 | + *statusp = RCU_KTHREAD_YIELDING; | |
20866 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
20867 | + schedule_timeout_interruptible(2); | |
20868 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
20869 | + *statusp = RCU_KTHREAD_WAITING; | |
20870 | } | |
1a6e0f06 | 20871 | |
e4b2b4a8 JK |
20872 | -static void invoke_rcu_core(void) |
20873 | +static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
20874 | + .store = &rcu_cpu_kthread_task, | |
20875 | + .thread_should_run = rcu_cpu_kthread_should_run, | |
20876 | + .thread_fn = rcu_cpu_kthread, | |
20877 | + .thread_comm = "rcuc/%u", | |
20878 | + .setup = rcu_cpu_kthread_setup, | |
20879 | + .park = rcu_cpu_kthread_park, | |
20880 | +}; | |
20881 | + | |
20882 | +/* | |
20883 | + * Spawn per-CPU RCU core processing kthreads. | |
20884 | + */ | |
20885 | +static int __init rcu_spawn_core_kthreads(void) | |
1a6e0f06 | 20886 | { |
e4b2b4a8 JK |
20887 | - if (cpu_online(smp_processor_id())) |
20888 | - raise_softirq(RCU_SOFTIRQ); | |
20889 | + int cpu; | |
20890 | + | |
20891 | + for_each_possible_cpu(cpu) | |
20892 | + per_cpu(rcu_cpu_has_work, cpu) = 0; | |
20893 | + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
20894 | + return 0; | |
20895 | } | |
20896 | +early_initcall(rcu_spawn_core_kthreads); | |
1a6e0f06 | 20897 | |
e4b2b4a8 JK |
20898 | /* |
20899 | * Handle any core-RCU processing required by a call_rcu() invocation. | |
20900 | @@ -3113,6 +3232,7 @@ | |
1a6e0f06 | 20901 | } |
e4b2b4a8 | 20902 | EXPORT_SYMBOL_GPL(call_rcu_sched); |
1a6e0f06 | 20903 | |
e4b2b4a8 | 20904 | +#ifndef CONFIG_PREEMPT_RT_FULL |
1a6e0f06 | 20905 | /** |
e4b2b4a8 JK |
20906 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. |
20907 | * @head: structure to be used for queueing the RCU updates. | |
20908 | @@ -3140,6 +3260,7 @@ | |
20909 | __call_rcu(head, func, &rcu_bh_state, -1, 0); | |
1a6e0f06 | 20910 | } |
e4b2b4a8 JK |
20911 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
20912 | +#endif | |
1a6e0f06 | 20913 | |
e4b2b4a8 JK |
20914 | /* |
20915 | * Queue an RCU callback for lazy invocation after a grace period. | |
20916 | @@ -3225,6 +3346,7 @@ | |
20917 | } | |
20918 | EXPORT_SYMBOL_GPL(synchronize_sched); | |
1a6e0f06 | 20919 | |
e4b2b4a8 JK |
20920 | +#ifndef CONFIG_PREEMPT_RT_FULL |
20921 | /** | |
20922 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | |
20923 | * | |
20924 | @@ -3251,6 +3373,7 @@ | |
20925 | wait_rcu_gp(call_rcu_bh); | |
20926 | } | |
20927 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | |
20928 | +#endif | |
1a6e0f06 | 20929 | |
e4b2b4a8 JK |
20930 | /** |
20931 | * get_state_synchronize_rcu - Snapshot current RCU state | |
20932 | @@ -3601,6 +3724,7 @@ | |
20933 | mutex_unlock(&rsp->barrier_mutex); | |
20934 | } | |
1a6e0f06 | 20935 | |
e4b2b4a8 JK |
20936 | +#ifndef CONFIG_PREEMPT_RT_FULL |
20937 | /** | |
20938 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | |
20939 | */ | |
20940 | @@ -3609,6 +3733,7 @@ | |
20941 | _rcu_barrier(&rcu_bh_state); | |
20942 | } | |
20943 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |
20944 | +#endif | |
1a6e0f06 | 20945 | |
e4b2b4a8 JK |
20946 | /** |
20947 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | |
20948 | @@ -3741,8 +3866,6 @@ | |
20949 | { | |
20950 | sync_sched_exp_online_cleanup(cpu); | |
20951 | rcutree_affinity_setting(cpu, -1); | |
20952 | - if (IS_ENABLED(CONFIG_TREE_SRCU)) | |
20953 | - srcu_online_cpu(cpu); | |
20954 | return 0; | |
20955 | } | |
1a6e0f06 | 20956 | |
e4b2b4a8 JK |
20957 | @@ -3753,8 +3876,6 @@ |
20958 | int rcutree_offline_cpu(unsigned int cpu) | |
20959 | { | |
20960 | rcutree_affinity_setting(cpu, cpu); | |
20961 | - if (IS_ENABLED(CONFIG_TREE_SRCU)) | |
20962 | - srcu_offline_cpu(cpu); | |
20963 | return 0; | |
20964 | } | |
1a6e0f06 | 20965 | |
e4b2b4a8 | 20966 | @@ -4184,12 +4305,13 @@ |
1a6e0f06 | 20967 | |
e4b2b4a8 JK |
20968 | rcu_bootup_announce(); |
20969 | rcu_init_geometry(); | |
20970 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20971 | rcu_init_one(&rcu_bh_state); | |
20972 | +#endif | |
20973 | rcu_init_one(&rcu_sched_state); | |
20974 | if (dump_tree) | |
20975 | rcu_dump_rcu_node_tree(&rcu_sched_state); | |
20976 | __rcu_init_preempt(); | |
20977 | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
1a6e0f06 | 20978 | |
e4b2b4a8 JK |
20979 | /* |
20980 | * We don't need protection against CPU-hotplug here because | |
20981 | @@ -4200,8 +4322,6 @@ | |
20982 | for_each_online_cpu(cpu) { | |
20983 | rcutree_prepare_cpu(cpu); | |
20984 | rcu_cpu_starting(cpu); | |
20985 | - if (IS_ENABLED(CONFIG_TREE_SRCU)) | |
20986 | - srcu_online_cpu(cpu); | |
20987 | } | |
1a6e0f06 JK |
20988 | } |
20989 | ||
e4b2b4a8 JK |
20990 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/tree.h linux-4.14/kernel/rcu/tree.h |
20991 | --- linux-4.14.orig/kernel/rcu/tree.h 2017-11-12 19:46:13.000000000 +0100 | |
20992 | +++ linux-4.14/kernel/rcu/tree.h 2018-09-05 11:05:07.000000000 +0200 | |
20993 | @@ -427,7 +427,9 @@ | |
20994 | */ | |
20995 | extern struct rcu_state rcu_sched_state; | |
1a6e0f06 | 20996 | |
e4b2b4a8 JK |
20997 | +#ifndef CONFIG_PREEMPT_RT_FULL |
20998 | extern struct rcu_state rcu_bh_state; | |
20999 | +#endif | |
1a6e0f06 | 21000 | |
e4b2b4a8 JK |
21001 | #ifdef CONFIG_PREEMPT_RCU |
21002 | extern struct rcu_state rcu_preempt_state; | |
21003 | @@ -436,12 +438,10 @@ | |
21004 | int rcu_dynticks_snap(struct rcu_dynticks *rdtp); | |
21005 | bool rcu_eqs_special_set(int cpu); | |
1a6e0f06 | 21006 | |
e4b2b4a8 JK |
21007 | -#ifdef CONFIG_RCU_BOOST |
21008 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
21009 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | |
21010 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
21011 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | |
21012 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
1a6e0f06 | 21013 | |
e4b2b4a8 JK |
21014 | #ifndef RCU_TREE_NONCORE |
21015 | ||
21016 | @@ -461,10 +461,9 @@ | |
21017 | static void __init __rcu_init_preempt(void); | |
21018 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | |
21019 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | |
21020 | -static void invoke_rcu_callbacks_kthread(void); | |
21021 | static bool rcu_is_callbacks_kthread(void); | |
21022 | +static void rcu_cpu_kthread_setup(unsigned int cpu); | |
21023 | #ifdef CONFIG_RCU_BOOST | |
21024 | -static void rcu_preempt_do_callbacks(void); | |
21025 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
21026 | struct rcu_node *rnp); | |
21027 | #endif /* #ifdef CONFIG_RCU_BOOST */ | |
21028 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/tree_plugin.h linux-4.14/kernel/rcu/tree_plugin.h | |
21029 | --- linux-4.14.orig/kernel/rcu/tree_plugin.h 2018-09-05 11:03:22.000000000 +0200 | |
21030 | +++ linux-4.14/kernel/rcu/tree_plugin.h 2018-09-05 11:05:07.000000000 +0200 | |
21031 | @@ -24,39 +24,16 @@ | |
21032 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | |
21033 | */ | |
21034 | ||
21035 | -#include <linux/delay.h> | |
21036 | -#include <linux/gfp.h> | |
21037 | -#include <linux/oom.h> | |
21038 | -#include <linux/sched/debug.h> | |
21039 | -#include <linux/smpboot.h> | |
21040 | -#include <uapi/linux/sched/types.h> | |
21041 | -#include "../time/tick-internal.h" | |
21042 | - | |
21043 | -#ifdef CONFIG_RCU_BOOST | |
21044 | - | |
21045 | #include "../locking/rtmutex_common.h" | |
21046 | ||
21047 | /* | |
21048 | * Control variables for per-CPU and per-rcu_node kthreads. These | |
21049 | * handle all flavors of RCU. | |
21050 | */ | |
21051 | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
21052 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
21053 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
21054 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
21055 | ||
21056 | -#else /* #ifdef CONFIG_RCU_BOOST */ | |
21057 | - | |
21058 | -/* | |
21059 | - * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST, | |
21060 | - * all uses are in dead code. Provide a definition to keep the compiler | |
21061 | - * happy, but add WARN_ON_ONCE() to complain if used in the wrong place. | |
21062 | - * This probably needs to be excluded from -rt builds. | |
21063 | - */ | |
21064 | -#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) | |
21065 | - | |
21066 | -#endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
21067 | - | |
21068 | #ifdef CONFIG_RCU_NOCB_CPU | |
21069 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | |
21070 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ | |
21071 | @@ -324,9 +301,13 @@ | |
21072 | struct task_struct *t = current; | |
21073 | struct rcu_data *rdp; | |
21074 | struct rcu_node *rnp; | |
21075 | + int sleeping_l = 0; | |
21076 | ||
21077 | RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n"); | |
21078 | - WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0); | |
21079 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
21080 | + sleeping_l = t->sleeping_lock; | |
21081 | +#endif | |
21082 | + WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0 && !sleeping_l); | |
21083 | if (t->rcu_read_lock_nesting > 0 && | |
21084 | !t->rcu_read_unlock_special.b.blocked) { | |
21085 | ||
21086 | @@ -463,7 +444,7 @@ | |
21087 | } | |
21088 | ||
21089 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | |
21090 | - if (in_irq() || in_serving_softirq()) { | |
21091 | + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { | |
21092 | lockdep_rcu_suspicious(__FILE__, __LINE__, | |
21093 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | |
21094 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", | |
21095 | @@ -530,7 +511,7 @@ | |
21096 | ||
21097 | /* Unboost if we were boosted. */ | |
21098 | if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) | |
21099 | - rt_mutex_unlock(&rnp->boost_mtx); | |
21100 | + rt_mutex_futex_unlock(&rnp->boost_mtx); | |
21101 | ||
21102 | /* | |
21103 | * If this was the last task on the expedited lists, | |
21104 | @@ -684,15 +665,6 @@ | |
21105 | t->rcu_read_unlock_special.b.need_qs = true; | |
21106 | } | |
21107 | ||
21108 | -#ifdef CONFIG_RCU_BOOST | |
21109 | - | |
21110 | -static void rcu_preempt_do_callbacks(void) | |
21111 | -{ | |
21112 | - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); | |
21113 | -} | |
21114 | - | |
21115 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
21116 | - | |
21117 | /** | |
21118 | * call_rcu() - Queue an RCU callback for invocation after a grace period. | |
21119 | * @head: structure to be used for queueing the RCU updates. | |
21120 | @@ -915,20 +887,23 @@ | |
21121 | ||
21122 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
1a6e0f06 | 21123 | |
1a6e0f06 | 21124 | +/* |
e4b2b4a8 | 21125 | + * If boosting, set rcuc kthreads to realtime priority. |
1a6e0f06 | 21126 | + */ |
e4b2b4a8 | 21127 | +static void rcu_cpu_kthread_setup(unsigned int cpu) |
1a6e0f06 | 21128 | +{ |
e4b2b4a8 JK |
21129 | #ifdef CONFIG_RCU_BOOST |
21130 | + struct sched_param sp; | |
21131 | ||
21132 | -#include "../locking/rtmutex_common.h" | |
21133 | - | |
21134 | -static void rcu_wake_cond(struct task_struct *t, int status) | |
21135 | -{ | |
21136 | - /* | |
21137 | - * If the thread is yielding, only wake it when this | |
21138 | - * is invoked from idle | |
21139 | - */ | |
21140 | - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) | |
21141 | - wake_up_process(t); | |
21142 | + sp.sched_priority = kthread_prio; | |
21143 | + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
21144 | +#endif /* #ifdef CONFIG_RCU_BOOST */ | |
21145 | } | |
21146 | ||
21147 | +#ifdef CONFIG_RCU_BOOST | |
1a6e0f06 | 21148 | + |
e4b2b4a8 | 21149 | +#include "../locking/rtmutex_common.h" |
1a6e0f06 | 21150 | + |
e4b2b4a8 JK |
21151 | /* |
21152 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | |
21153 | * or ->boost_tasks, advancing the pointer to the next task in the | |
21154 | @@ -1071,23 +1046,6 @@ | |
21155 | } | |
1a6e0f06 | 21156 | |
e4b2b4a8 JK |
21157 | /* |
21158 | - * Wake up the per-CPU kthread to invoke RCU callbacks. | |
21159 | - */ | |
21160 | -static void invoke_rcu_callbacks_kthread(void) | |
21161 | -{ | |
21162 | - unsigned long flags; | |
21163 | - | |
21164 | - local_irq_save(flags); | |
21165 | - __this_cpu_write(rcu_cpu_has_work, 1); | |
21166 | - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | |
21167 | - current != __this_cpu_read(rcu_cpu_kthread_task)) { | |
21168 | - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), | |
21169 | - __this_cpu_read(rcu_cpu_kthread_status)); | |
21170 | - } | |
21171 | - local_irq_restore(flags); | |
21172 | -} | |
21173 | - | |
21174 | -/* | |
21175 | * Is the current CPU running the RCU-callbacks kthread? | |
21176 | * Caller must have preemption disabled. | |
21177 | */ | |
21178 | @@ -1141,67 +1099,6 @@ | |
21179 | return 0; | |
21180 | } | |
1a6e0f06 | 21181 | |
e4b2b4a8 JK |
21182 | -static void rcu_kthread_do_work(void) |
21183 | -{ | |
21184 | - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); | |
21185 | - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); | |
21186 | - rcu_preempt_do_callbacks(); | |
21187 | -} | |
21188 | - | |
21189 | -static void rcu_cpu_kthread_setup(unsigned int cpu) | |
21190 | -{ | |
21191 | - struct sched_param sp; | |
21192 | - | |
21193 | - sp.sched_priority = kthread_prio; | |
21194 | - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
21195 | -} | |
21196 | - | |
21197 | -static void rcu_cpu_kthread_park(unsigned int cpu) | |
21198 | -{ | |
21199 | - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
21200 | -} | |
21201 | - | |
21202 | -static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
21203 | -{ | |
21204 | - return __this_cpu_read(rcu_cpu_has_work); | |
21205 | -} | |
21206 | - | |
21207 | -/* | |
21208 | - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
21209 | - * RCU softirq used in flavors and configurations of RCU that do not | |
21210 | - * support RCU priority boosting. | |
21211 | - */ | |
21212 | -static void rcu_cpu_kthread(unsigned int cpu) | |
21213 | -{ | |
21214 | - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
21215 | - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
21216 | - int spincnt; | |
21217 | - | |
21218 | - for (spincnt = 0; spincnt < 10; spincnt++) { | |
21219 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
21220 | - local_bh_disable(); | |
21221 | - *statusp = RCU_KTHREAD_RUNNING; | |
21222 | - this_cpu_inc(rcu_cpu_kthread_loops); | |
21223 | - local_irq_disable(); | |
21224 | - work = *workp; | |
21225 | - *workp = 0; | |
21226 | - local_irq_enable(); | |
21227 | - if (work) | |
21228 | - rcu_kthread_do_work(); | |
21229 | - local_bh_enable(); | |
21230 | - if (*workp == 0) { | |
21231 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
21232 | - *statusp = RCU_KTHREAD_WAITING; | |
21233 | - return; | |
21234 | - } | |
21235 | - } | |
21236 | - *statusp = RCU_KTHREAD_YIELDING; | |
21237 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
21238 | - schedule_timeout_interruptible(2); | |
21239 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
21240 | - *statusp = RCU_KTHREAD_WAITING; | |
21241 | -} | |
21242 | - | |
21243 | /* | |
21244 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | |
21245 | * served by the rcu_node in question. The CPU hotplug lock is still | |
21246 | @@ -1232,26 +1129,12 @@ | |
21247 | free_cpumask_var(cm); | |
21248 | } | |
1a6e0f06 | 21249 | |
e4b2b4a8 JK |
21250 | -static struct smp_hotplug_thread rcu_cpu_thread_spec = { |
21251 | - .store = &rcu_cpu_kthread_task, | |
21252 | - .thread_should_run = rcu_cpu_kthread_should_run, | |
21253 | - .thread_fn = rcu_cpu_kthread, | |
21254 | - .thread_comm = "rcuc/%u", | |
21255 | - .setup = rcu_cpu_kthread_setup, | |
21256 | - .park = rcu_cpu_kthread_park, | |
21257 | -}; | |
21258 | - | |
21259 | /* | |
21260 | * Spawn boost kthreads -- called as soon as the scheduler is running. | |
21261 | */ | |
21262 | static void __init rcu_spawn_boost_kthreads(void) | |
21263 | { | |
21264 | struct rcu_node *rnp; | |
21265 | - int cpu; | |
21266 | - | |
21267 | - for_each_possible_cpu(cpu) | |
21268 | - per_cpu(rcu_cpu_has_work, cpu) = 0; | |
21269 | - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
21270 | rcu_for_each_leaf_node(rcu_state_p, rnp) | |
21271 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | |
21272 | } | |
21273 | @@ -1274,11 +1157,6 @@ | |
21274 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | |
21275 | } | |
1a6e0f06 | 21276 | |
e4b2b4a8 JK |
21277 | -static void invoke_rcu_callbacks_kthread(void) |
21278 | -{ | |
21279 | - WARN_ON_ONCE(1); | |
21280 | -} | |
21281 | - | |
21282 | static bool rcu_is_callbacks_kthread(void) | |
21283 | { | |
21284 | return false; | |
21285 | @@ -1302,7 +1180,7 @@ | |
1a6e0f06 | 21286 | |
e4b2b4a8 | 21287 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ |
1a6e0f06 | 21288 | |
e4b2b4a8 JK |
21289 | -#if !defined(CONFIG_RCU_FAST_NO_HZ) |
21290 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) | |
1a6e0f06 | 21291 | |
e4b2b4a8 JK |
21292 | /* |
21293 | * Check to see if any future RCU-related work will need to be done | |
21294 | @@ -1318,7 +1196,9 @@ | |
21295 | *nextevt = KTIME_MAX; | |
21296 | return rcu_cpu_has_callbacks(NULL); | |
1a6e0f06 | 21297 | } |
e4b2b4a8 | 21298 | +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ |
1a6e0f06 | 21299 | |
e4b2b4a8 JK |
21300 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) |
21301 | /* | |
21302 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | |
21303 | * after it. | |
21304 | @@ -1414,6 +1294,8 @@ | |
21305 | return cbs_ready; | |
21306 | } | |
1f39f580 | 21307 | |
e4b2b4a8 JK |
21308 | +#ifndef CONFIG_PREEMPT_RT_FULL |
21309 | + | |
21310 | /* | |
21311 | * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | |
21312 | * to invoke. If the CPU has callbacks, try to advance them. Tell the | |
21313 | @@ -1456,6 +1338,7 @@ | |
21314 | *nextevt = basemono + dj * TICK_NSEC; | |
21315 | return 0; | |
21316 | } | |
21317 | +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */ | |
1f39f580 | 21318 | |
e4b2b4a8 JK |
21319 | /* |
21320 | * Prepare a CPU for idle from an RCU perspective. The first major task | |
21321 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/rcu/update.c linux-4.14/kernel/rcu/update.c | |
21322 | --- linux-4.14.orig/kernel/rcu/update.c 2018-09-05 11:03:22.000000000 +0200 | |
21323 | +++ linux-4.14/kernel/rcu/update.c 2018-09-05 11:05:07.000000000 +0200 | |
21324 | @@ -66,7 +66,7 @@ | |
21325 | module_param(rcu_expedited, int, 0); | |
21326 | extern int rcu_normal; /* from sysctl */ | |
21327 | module_param(rcu_normal, int, 0); | |
21328 | -static int rcu_normal_after_boot; | |
21329 | +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
21330 | module_param(rcu_normal_after_boot, int, 0); | |
21331 | #endif /* #ifndef CONFIG_TINY_RCU */ | |
1f39f580 | 21332 | |
e4b2b4a8 JK |
21333 | @@ -333,6 +333,7 @@ |
21334 | } | |
21335 | EXPORT_SYMBOL_GPL(rcu_read_lock_held); | |
1f39f580 | 21336 | |
e4b2b4a8 JK |
21337 | +#ifndef CONFIG_PREEMPT_RT_FULL |
21338 | /** | |
21339 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | |
21340 | * | |
21341 | @@ -359,6 +360,7 @@ | |
21342 | return in_softirq() || irqs_disabled(); | |
21343 | } | |
21344 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |
21345 | +#endif | |
1f39f580 | 21346 | |
e4b2b4a8 | 21347 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
1f39f580 | 21348 | |
e4b2b4a8 JK |
21349 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/completion.c linux-4.14/kernel/sched/completion.c |
21350 | --- linux-4.14.orig/kernel/sched/completion.c 2017-11-12 19:46:13.000000000 +0100 | |
21351 | +++ linux-4.14/kernel/sched/completion.c 2018-09-05 11:05:07.000000000 +0200 | |
21352 | @@ -32,7 +32,7 @@ | |
21353 | { | |
21354 | unsigned long flags; | |
1f39f580 | 21355 | |
e4b2b4a8 JK |
21356 | - spin_lock_irqsave(&x->wait.lock, flags); |
21357 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
1f39f580 | 21358 | |
e4b2b4a8 JK |
21359 | /* |
21360 | * Perform commit of crossrelease here. | |
21361 | @@ -41,8 +41,8 @@ | |
1f39f580 | 21362 | |
e4b2b4a8 JK |
21363 | if (x->done != UINT_MAX) |
21364 | x->done++; | |
21365 | - __wake_up_locked(&x->wait, TASK_NORMAL, 1); | |
21366 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
21367 | + swake_up_locked(&x->wait); | |
21368 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
21369 | } | |
21370 | EXPORT_SYMBOL(complete); | |
1f39f580 | 21371 | |
e4b2b4a8 JK |
21372 | @@ -66,10 +66,10 @@ |
21373 | { | |
21374 | unsigned long flags; | |
1f39f580 | 21375 | |
e4b2b4a8 JK |
21376 | - spin_lock_irqsave(&x->wait.lock, flags); |
21377 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
21378 | x->done = UINT_MAX; | |
21379 | - __wake_up_locked(&x->wait, TASK_NORMAL, 0); | |
21380 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
21381 | + swake_up_all_locked(&x->wait); | |
21382 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
21383 | } | |
21384 | EXPORT_SYMBOL(complete_all); | |
1f39f580 | 21385 | |
e4b2b4a8 JK |
21386 | @@ -78,20 +78,20 @@ |
21387 | long (*action)(long), long timeout, int state) | |
21388 | { | |
21389 | if (!x->done) { | |
21390 | - DECLARE_WAITQUEUE(wait, current); | |
21391 | + DECLARE_SWAITQUEUE(wait); | |
1f39f580 | 21392 | |
e4b2b4a8 JK |
21393 | - __add_wait_queue_entry_tail_exclusive(&x->wait, &wait); |
21394 | + __prepare_to_swait(&x->wait, &wait); | |
21395 | do { | |
21396 | if (signal_pending_state(state, current)) { | |
21397 | timeout = -ERESTARTSYS; | |
21398 | break; | |
21399 | } | |
21400 | __set_current_state(state); | |
21401 | - spin_unlock_irq(&x->wait.lock); | |
21402 | + raw_spin_unlock_irq(&x->wait.lock); | |
21403 | timeout = action(timeout); | |
21404 | - spin_lock_irq(&x->wait.lock); | |
21405 | + raw_spin_lock_irq(&x->wait.lock); | |
21406 | } while (!x->done && timeout); | |
21407 | - __remove_wait_queue(&x->wait, &wait); | |
21408 | + __finish_swait(&x->wait, &wait); | |
21409 | if (!x->done) | |
21410 | return timeout; | |
21411 | } | |
21412 | @@ -108,9 +108,9 @@ | |
1f39f580 | 21413 | |
e4b2b4a8 | 21414 | complete_acquire(x); |
1f39f580 | 21415 | |
e4b2b4a8 JK |
21416 | - spin_lock_irq(&x->wait.lock); |
21417 | + raw_spin_lock_irq(&x->wait.lock); | |
21418 | timeout = do_wait_for_common(x, action, timeout, state); | |
21419 | - spin_unlock_irq(&x->wait.lock); | |
21420 | + raw_spin_unlock_irq(&x->wait.lock); | |
1f39f580 | 21421 | |
e4b2b4a8 JK |
21422 | complete_release(x); |
21423 | ||
21424 | @@ -299,12 +299,12 @@ | |
21425 | if (!READ_ONCE(x->done)) | |
21426 | return 0; | |
21427 | ||
21428 | - spin_lock_irqsave(&x->wait.lock, flags); | |
21429 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
21430 | if (!x->done) | |
21431 | ret = 0; | |
21432 | else if (x->done != UINT_MAX) | |
21433 | x->done--; | |
21434 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
21435 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
1f39f580 JK |
21436 | return ret; |
21437 | } | |
e4b2b4a8 JK |
21438 | EXPORT_SYMBOL(try_wait_for_completion); |
21439 | @@ -330,8 +330,8 @@ | |
21440 | * otherwise we can end up freeing the completion before complete() | |
21441 | * is done referencing it. | |
21442 | */ | |
21443 | - spin_lock_irqsave(&x->wait.lock, flags); | |
21444 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
21445 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
21446 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
21447 | return true; | |
21448 | } | |
21449 | EXPORT_SYMBOL(completion_done); | |
21450 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/core.c linux-4.14/kernel/sched/core.c | |
21451 | --- linux-4.14.orig/kernel/sched/core.c 2018-09-05 11:03:22.000000000 +0200 | |
21452 | +++ linux-4.14/kernel/sched/core.c 2018-09-05 11:05:07.000000000 +0200 | |
21453 | @@ -59,7 +59,11 @@ | |
21454 | * Number of tasks to iterate in a single balance run. | |
21455 | * Limited because this is done with IRQs disabled. | |
21456 | */ | |
21457 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21458 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | |
21459 | +#else | |
21460 | +const_debug unsigned int sysctl_sched_nr_migrate = 8; | |
21461 | +#endif | |
1f39f580 | 21462 | |
e4b2b4a8 JK |
21463 | /* |
21464 | * period over which we average the RT time consumption, measured | |
21465 | @@ -341,7 +345,7 @@ | |
21466 | rq->hrtick_csd.info = rq; | |
21467 | #endif | |
1f39f580 | 21468 | |
e4b2b4a8 JK |
21469 | - hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
21470 | + hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); | |
21471 | rq->hrtick_timer.function = hrtick; | |
21472 | } | |
21473 | #else /* CONFIG_SCHED_HRTICK */ | |
21474 | @@ -423,9 +427,15 @@ | |
21475 | #endif | |
21476 | #endif | |
1f39f580 | 21477 | |
e4b2b4a8 JK |
21478 | -void wake_q_add(struct wake_q_head *head, struct task_struct *task) |
21479 | +void __wake_q_add(struct wake_q_head *head, struct task_struct *task, | |
21480 | + bool sleeper) | |
21481 | { | |
21482 | - struct wake_q_node *node = &task->wake_q; | |
21483 | + struct wake_q_node *node; | |
21484 | + | |
21485 | + if (sleeper) | |
21486 | + node = &task->wake_q_sleeper; | |
21487 | + else | |
21488 | + node = &task->wake_q; | |
1f39f580 | 21489 | |
e4b2b4a8 JK |
21490 | /* |
21491 | * Atomically grab the task, if ->wake_q is !nil already it means | |
21492 | @@ -447,24 +457,32 @@ | |
21493 | head->lastp = &node->next; | |
21494 | } | |
21495 | ||
21496 | -void wake_up_q(struct wake_q_head *head) | |
21497 | +void __wake_up_q(struct wake_q_head *head, bool sleeper) | |
21498 | { | |
21499 | struct wake_q_node *node = head->first; | |
21500 | ||
21501 | while (node != WAKE_Q_TAIL) { | |
21502 | struct task_struct *task; | |
21503 | ||
21504 | - task = container_of(node, struct task_struct, wake_q); | |
21505 | + if (sleeper) | |
21506 | + task = container_of(node, struct task_struct, wake_q_sleeper); | |
21507 | + else | |
21508 | + task = container_of(node, struct task_struct, wake_q); | |
21509 | BUG_ON(!task); | |
21510 | /* Task can safely be re-inserted now: */ | |
21511 | node = node->next; | |
21512 | - task->wake_q.next = NULL; | |
21513 | - | |
21514 | + if (sleeper) | |
21515 | + task->wake_q_sleeper.next = NULL; | |
21516 | + else | |
21517 | + task->wake_q.next = NULL; | |
21518 | /* | |
21519 | * wake_up_process() implies a wmb() to pair with the queueing | |
21520 | * in wake_q_add() so as not to miss wakeups. | |
21521 | */ | |
21522 | - wake_up_process(task); | |
21523 | + if (sleeper) | |
21524 | + wake_up_lock_sleeper(task); | |
21525 | + else | |
21526 | + wake_up_process(task); | |
21527 | put_task_struct(task); | |
1f39f580 | 21528 | } |
e4b2b4a8 JK |
21529 | } |
21530 | @@ -500,6 +518,48 @@ | |
21531 | trace_sched_wake_idle_without_ipi(cpu); | |
21532 | } | |
1f39f580 | 21533 | |
e4b2b4a8 JK |
21534 | +#ifdef CONFIG_PREEMPT_LAZY |
21535 | + | |
21536 | +static int tsk_is_polling(struct task_struct *p) | |
21537 | +{ | |
21538 | +#ifdef TIF_POLLING_NRFLAG | |
21539 | + return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG); | |
21540 | +#else | |
21541 | + return 0; | |
21542 | +#endif | |
21543 | +} | |
21544 | + | |
21545 | +void resched_curr_lazy(struct rq *rq) | |
21546 | +{ | |
21547 | + struct task_struct *curr = rq->curr; | |
21548 | + int cpu; | |
21549 | + | |
21550 | + if (!sched_feat(PREEMPT_LAZY)) { | |
21551 | + resched_curr(rq); | |
21552 | + return; | |
21553 | + } | |
21554 | + | |
21555 | + lockdep_assert_held(&rq->lock); | |
21556 | + | |
21557 | + if (test_tsk_need_resched(curr)) | |
21558 | + return; | |
21559 | + | |
21560 | + if (test_tsk_need_resched_lazy(curr)) | |
21561 | + return; | |
21562 | + | |
21563 | + set_tsk_need_resched_lazy(curr); | |
21564 | + | |
21565 | + cpu = cpu_of(rq); | |
21566 | + if (cpu == smp_processor_id()) | |
21567 | + return; | |
21568 | + | |
21569 | + /* NEED_RESCHED_LAZY must be visible before we test polling */ | |
21570 | + smp_mb(); | |
21571 | + if (!tsk_is_polling(curr)) | |
21572 | + smp_send_reschedule(cpu); | |
21573 | +} | |
21574 | +#endif | |
21575 | + | |
21576 | void resched_cpu(int cpu) | |
1f39f580 | 21577 | { |
e4b2b4a8 JK |
21578 | struct rq *rq = cpu_rq(cpu); |
21579 | @@ -523,11 +583,14 @@ | |
21580 | */ | |
21581 | int get_nohz_timer_target(void) | |
21582 | { | |
21583 | - int i, cpu = smp_processor_id(); | |
21584 | + int i, cpu; | |
21585 | struct sched_domain *sd; | |
1f39f580 | 21586 | |
e4b2b4a8 JK |
21587 | + preempt_disable_rt(); |
21588 | + cpu = smp_processor_id(); | |
21589 | + | |
21590 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) | |
21591 | - return cpu; | |
21592 | + goto preempt_en_rt; | |
1f39f580 | 21593 | |
e4b2b4a8 JK |
21594 | rcu_read_lock(); |
21595 | for_each_domain(cpu, sd) { | |
21596 | @@ -546,6 +609,8 @@ | |
21597 | cpu = housekeeping_any_cpu(); | |
21598 | unlock: | |
21599 | rcu_read_unlock(); | |
21600 | +preempt_en_rt: | |
21601 | + preempt_enable_rt(); | |
21602 | return cpu; | |
1f39f580 JK |
21603 | } |
21604 | ||
e4b2b4a8 JK |
21605 | @@ -912,7 +977,7 @@ |
21606 | */ | |
21607 | static inline bool is_cpu_allowed(struct task_struct *p, int cpu) | |
1f39f580 | 21608 | { |
e4b2b4a8 JK |
21609 | - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) |
21610 | + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) | |
21611 | return false; | |
1f39f580 | 21612 | |
e4b2b4a8 JK |
21613 | if (is_per_cpu_kthread(p)) |
21614 | @@ -1007,7 +1072,7 @@ | |
21615 | local_irq_disable(); | |
1f39f580 | 21616 | /* |
e4b2b4a8 JK |
21617 | * We need to explicitly wake pending tasks before running |
21618 | - * __migrate_task() such that we will not miss enforcing cpus_allowed | |
21619 | + * __migrate_task() such that we will not miss enforcing cpus_ptr | |
21620 | * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. | |
21621 | */ | |
21622 | sched_ttwu_pending(); | |
21623 | @@ -1038,11 +1103,19 @@ | |
21624 | */ | |
21625 | void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask) | |
1f39f580 | 21626 | { |
e4b2b4a8 JK |
21627 | - cpumask_copy(&p->cpus_allowed, new_mask); |
21628 | + cpumask_copy(&p->cpus_mask, new_mask); | |
21629 | p->nr_cpus_allowed = cpumask_weight(new_mask); | |
21630 | } | |
1f39f580 | 21631 | |
e4b2b4a8 JK |
21632 | -void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
21633 | +#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) | |
21634 | +int __migrate_disabled(struct task_struct *p) | |
21635 | +{ | |
21636 | + return p->migrate_disable; | |
21637 | +} | |
21638 | +#endif | |
21639 | + | |
21640 | +static void __do_set_cpus_allowed_tail(struct task_struct *p, | |
21641 | + const struct cpumask *new_mask) | |
21642 | { | |
21643 | struct rq *rq = task_rq(p); | |
21644 | bool queued, running; | |
21645 | @@ -1071,6 +1144,20 @@ | |
21646 | set_curr_task(rq, p); | |
1f39f580 JK |
21647 | } |
21648 | ||
e4b2b4a8 JK |
21649 | +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
21650 | +{ | |
21651 | +#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) | |
21652 | + if (__migrate_disabled(p)) { | |
21653 | + lockdep_assert_held(&p->pi_lock); | |
21654 | + | |
21655 | + cpumask_copy(&p->cpus_mask, new_mask); | |
21656 | + p->migrate_disable_update = 1; | |
21657 | + return; | |
21658 | + } | |
21659 | +#endif | |
21660 | + __do_set_cpus_allowed_tail(p, new_mask); | |
21661 | +} | |
21662 | + | |
21663 | /* | |
21664 | * Change a given task's CPU affinity. Migrate the thread to a | |
21665 | * proper CPU and schedule it away if the CPU it's executing on | |
21666 | @@ -1108,7 +1195,7 @@ | |
21667 | goto out; | |
21668 | } | |
1f39f580 | 21669 | |
e4b2b4a8 JK |
21670 | - if (cpumask_equal(&p->cpus_allowed, new_mask)) |
21671 | + if (cpumask_equal(p->cpus_ptr, new_mask)) | |
21672 | goto out; | |
1f39f580 | 21673 | |
e4b2b4a8 JK |
21674 | if (!cpumask_intersects(new_mask, cpu_valid_mask)) { |
21675 | @@ -1129,9 +1216,16 @@ | |
21676 | } | |
1f39f580 | 21677 | |
e4b2b4a8 JK |
21678 | /* Can the task run on the task's current CPU? If so, we're done */ |
21679 | - if (cpumask_test_cpu(task_cpu(p), new_mask)) | |
21680 | + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) | |
21681 | goto out; | |
1f39f580 | 21682 | |
e4b2b4a8 JK |
21683 | +#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) |
21684 | + if (__migrate_disabled(p)) { | |
21685 | + p->migrate_disable_update = 1; | |
21686 | + goto out; | |
21687 | + } | |
21688 | +#endif | |
21689 | + | |
21690 | dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); | |
21691 | if (task_running(rq, p) || p->state == TASK_WAKING) { | |
21692 | struct migration_arg arg = { p, dest_cpu }; | |
21693 | @@ -1269,10 +1363,10 @@ | |
21694 | if (task_cpu(arg->src_task) != arg->src_cpu) | |
21695 | goto unlock; | |
1f39f580 | 21696 | |
e4b2b4a8 JK |
21697 | - if (!cpumask_test_cpu(arg->dst_cpu, &arg->src_task->cpus_allowed)) |
21698 | + if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr)) | |
21699 | goto unlock; | |
1f39f580 | 21700 | |
e4b2b4a8 JK |
21701 | - if (!cpumask_test_cpu(arg->src_cpu, &arg->dst_task->cpus_allowed)) |
21702 | + if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr)) | |
21703 | goto unlock; | |
1a6e0f06 | 21704 | |
e4b2b4a8 JK |
21705 | __migrate_swap_task(arg->src_task, arg->dst_cpu); |
21706 | @@ -1313,10 +1407,10 @@ | |
21707 | if (!cpu_active(arg.src_cpu) || !cpu_active(arg.dst_cpu)) | |
21708 | goto out; | |
1a6e0f06 | 21709 | |
e4b2b4a8 JK |
21710 | - if (!cpumask_test_cpu(arg.dst_cpu, &arg.src_task->cpus_allowed)) |
21711 | + if (!cpumask_test_cpu(arg.dst_cpu, arg.src_task->cpus_ptr)) | |
21712 | goto out; | |
1a6e0f06 | 21713 | |
e4b2b4a8 JK |
21714 | - if (!cpumask_test_cpu(arg.src_cpu, &arg.dst_task->cpus_allowed)) |
21715 | + if (!cpumask_test_cpu(arg.src_cpu, arg.dst_task->cpus_ptr)) | |
21716 | goto out; | |
1a6e0f06 | 21717 | |
e4b2b4a8 JK |
21718 | trace_sched_swap_numa(cur, arg.src_cpu, p, arg.dst_cpu); |
21719 | @@ -1326,6 +1420,18 @@ | |
21720 | return ret; | |
1a6e0f06 JK |
21721 | } |
21722 | ||
e4b2b4a8 JK |
21723 | +static bool check_task_state(struct task_struct *p, long match_state) |
21724 | +{ | |
21725 | + bool match = false; | |
21726 | + | |
21727 | + raw_spin_lock_irq(&p->pi_lock); | |
21728 | + if (p->state == match_state || p->saved_state == match_state) | |
21729 | + match = true; | |
21730 | + raw_spin_unlock_irq(&p->pi_lock); | |
21731 | + | |
21732 | + return match; | |
21733 | +} | |
21734 | + | |
21735 | /* | |
21736 | * wait_task_inactive - wait for a thread to unschedule. | |
21737 | * | |
21738 | @@ -1370,7 +1476,7 @@ | |
21739 | * is actually now running somewhere else! | |
21740 | */ | |
21741 | while (task_running(rq, p)) { | |
21742 | - if (match_state && unlikely(p->state != match_state)) | |
21743 | + if (match_state && !check_task_state(p, match_state)) | |
21744 | return 0; | |
21745 | cpu_relax(); | |
21746 | } | |
21747 | @@ -1385,7 +1491,8 @@ | |
21748 | running = task_running(rq, p); | |
21749 | queued = task_on_rq_queued(p); | |
21750 | ncsw = 0; | |
21751 | - if (!match_state || p->state == match_state) | |
21752 | + if (!match_state || p->state == match_state || | |
21753 | + p->saved_state == match_state) | |
21754 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | |
21755 | task_rq_unlock(rq, p, &rf); | |
1a6e0f06 | 21756 | |
e4b2b4a8 JK |
21757 | @@ -1460,7 +1567,7 @@ |
21758 | EXPORT_SYMBOL_GPL(kick_process); | |
1a6e0f06 | 21759 | |
e4b2b4a8 JK |
21760 | /* |
21761 | - * ->cpus_allowed is protected by both rq->lock and p->pi_lock | |
21762 | + * ->cpus_ptr is protected by both rq->lock and p->pi_lock | |
21763 | * | |
21764 | * A few notes on cpu_active vs cpu_online: | |
21765 | * | |
21766 | @@ -1500,14 +1607,14 @@ | |
21767 | for_each_cpu(dest_cpu, nodemask) { | |
21768 | if (!cpu_active(dest_cpu)) | |
21769 | continue; | |
21770 | - if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | |
21771 | + if (cpumask_test_cpu(dest_cpu, p->cpus_ptr)) | |
21772 | return dest_cpu; | |
21773 | } | |
21774 | } | |
c7c16703 | 21775 | |
e4b2b4a8 JK |
21776 | for (;;) { |
21777 | /* Any allowed, online CPU? */ | |
21778 | - for_each_cpu(dest_cpu, &p->cpus_allowed) { | |
21779 | + for_each_cpu(dest_cpu, p->cpus_ptr) { | |
21780 | if (!is_cpu_allowed(p, dest_cpu)) | |
21781 | continue; | |
21782 | ||
21783 | @@ -1551,7 +1658,7 @@ | |
21784 | } | |
21785 | ||
21786 | /* | |
21787 | - * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. | |
21788 | + * The caller (fork, wakeup) owns p->pi_lock, ->cpus_ptr is stable. | |
21789 | */ | |
21790 | static inline | |
21791 | int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | |
21792 | @@ -1561,11 +1668,11 @@ | |
21793 | if (p->nr_cpus_allowed > 1) | |
21794 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); | |
21795 | else | |
21796 | - cpu = cpumask_any(&p->cpus_allowed); | |
21797 | + cpu = cpumask_any(p->cpus_ptr); | |
21798 | ||
21799 | /* | |
21800 | * In order not to call set_task_cpu() on a blocking task we need | |
21801 | - * to rely on ttwu() to place the task on a valid ->cpus_allowed | |
21802 | + * to rely on ttwu() to place the task on a valid ->cpus_ptr | |
21803 | * CPU. | |
21804 | * | |
21805 | * Since this is common to all placement strategies, this lives here. | |
21806 | @@ -1668,10 +1775,6 @@ | |
1a6e0f06 | 21807 | { |
e4b2b4a8 JK |
21808 | activate_task(rq, p, en_flags); |
21809 | p->on_rq = TASK_ON_RQ_QUEUED; | |
21810 | - | |
21811 | - /* If a worker is waking up, notify the workqueue: */ | |
21812 | - if (p->flags & PF_WQ_WORKER) | |
21813 | - wq_worker_waking_up(p, cpu_of(rq)); | |
21814 | } | |
c7c16703 | 21815 | |
e4b2b4a8 JK |
21816 | /* |
21817 | @@ -1995,8 +2098,27 @@ | |
21818 | */ | |
21819 | raw_spin_lock_irqsave(&p->pi_lock, flags); | |
21820 | smp_mb__after_spinlock(); | |
21821 | - if (!(p->state & state)) | |
21822 | + if (!(p->state & state)) { | |
21823 | + /* | |
21824 | + * The task might be running due to a spinlock sleeper | |
21825 | + * wakeup. Check the saved state and set it to running | |
21826 | + * if the wakeup condition is true. | |
21827 | + */ | |
21828 | + if (!(wake_flags & WF_LOCK_SLEEPER)) { | |
21829 | + if (p->saved_state & state) { | |
21830 | + p->saved_state = TASK_RUNNING; | |
21831 | + success = 1; | |
21832 | + } | |
21833 | + } | |
21834 | goto out; | |
21835 | + } | |
21836 | + | |
c7c16703 | 21837 | + /* |
e4b2b4a8 JK |
21838 | + * If this is a regular wakeup, then we can unconditionally |
21839 | + * clear the saved state of a "lock sleeper". | |
c7c16703 | 21840 | + */ |
e4b2b4a8 JK |
21841 | + if (!(wake_flags & WF_LOCK_SLEEPER)) |
21842 | + p->saved_state = TASK_RUNNING; | |
1a6e0f06 | 21843 | |
e4b2b4a8 | 21844 | trace_sched_waking(p); |
1a6e0f06 | 21845 | |
e4b2b4a8 | 21846 | @@ -2093,56 +2215,6 @@ |
1a6e0f06 | 21847 | } |
1a6e0f06 | 21848 | |
e4b2b4a8 JK |
21849 | /** |
21850 | - * try_to_wake_up_local - try to wake up a local task with rq lock held | |
21851 | - * @p: the thread to be awakened | |
21852 | - * @rf: request-queue flags for pinning | |
21853 | - * | |
21854 | - * Put @p on the run-queue if it's not already there. The caller must | |
21855 | - * ensure that this_rq() is locked, @p is bound to this_rq() and not | |
21856 | - * the current task. | |
21857 | - */ | |
21858 | -static void try_to_wake_up_local(struct task_struct *p, struct rq_flags *rf) | |
21859 | -{ | |
21860 | - struct rq *rq = task_rq(p); | |
21861 | - | |
21862 | - if (WARN_ON_ONCE(rq != this_rq()) || | |
21863 | - WARN_ON_ONCE(p == current)) | |
21864 | - return; | |
21865 | - | |
21866 | - lockdep_assert_held(&rq->lock); | |
21867 | - | |
21868 | - if (!raw_spin_trylock(&p->pi_lock)) { | |
21869 | - /* | |
21870 | - * This is OK, because current is on_cpu, which avoids it being | |
21871 | - * picked for load-balance and preemption/IRQs are still | |
21872 | - * disabled avoiding further scheduler activity on it and we've | |
21873 | - * not yet picked a replacement task. | |
21874 | - */ | |
21875 | - rq_unlock(rq, rf); | |
21876 | - raw_spin_lock(&p->pi_lock); | |
21877 | - rq_relock(rq, rf); | |
21878 | - } | |
21879 | - | |
21880 | - if (!(p->state & TASK_NORMAL)) | |
21881 | - goto out; | |
21882 | - | |
21883 | - trace_sched_waking(p); | |
21884 | - | |
21885 | - if (!task_on_rq_queued(p)) { | |
21886 | - if (p->in_iowait) { | |
21887 | - delayacct_blkio_end(p); | |
21888 | - atomic_dec(&rq->nr_iowait); | |
21889 | - } | |
21890 | - ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK); | |
21891 | - } | |
21892 | - | |
21893 | - ttwu_do_wakeup(rq, p, 0, rf); | |
21894 | - ttwu_stat(p, smp_processor_id(), 0); | |
21895 | -out: | |
21896 | - raw_spin_unlock(&p->pi_lock); | |
21897 | -} | |
21898 | - | |
21899 | -/** | |
21900 | * wake_up_process - Wake up a specific process | |
21901 | * @p: The process to be woken up. | |
21902 | * | |
21903 | @@ -2160,6 +2232,18 @@ | |
21904 | } | |
21905 | EXPORT_SYMBOL(wake_up_process); | |
21906 | ||
21907 | +/** | |
21908 | + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" | |
21909 | + * @p: The process to be woken up. | |
21910 | + * | |
21911 | + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate | |
21912 | + * the nature of the wakeup. | |
1a6e0f06 | 21913 | + */ |
e4b2b4a8 | 21914 | +int wake_up_lock_sleeper(struct task_struct *p) |
1a6e0f06 | 21915 | +{ |
e4b2b4a8 | 21916 | + return try_to_wake_up(p, TASK_UNINTERRUPTIBLE, WF_LOCK_SLEEPER); |
1a6e0f06 | 21917 | +} |
1a6e0f06 | 21918 | + |
e4b2b4a8 | 21919 | int wake_up_state(struct task_struct *p, unsigned int state) |
1a6e0f06 | 21920 | { |
e4b2b4a8 JK |
21921 | return try_to_wake_up(p, state, 0); |
21922 | @@ -2420,6 +2504,9 @@ | |
21923 | p->on_cpu = 0; | |
21924 | #endif | |
21925 | init_task_preempt_count(p); | |
21926 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
21927 | + task_thread_info(p)->preempt_lazy_count = 0; | |
1a6e0f06 | 21928 | +#endif |
e4b2b4a8 JK |
21929 | #ifdef CONFIG_SMP |
21930 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | |
21931 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
21932 | @@ -2462,7 +2549,7 @@ | |
21933 | #ifdef CONFIG_SMP | |
21934 | /* | |
21935 | * Fork balancing, do it here and not earlier because: | |
21936 | - * - cpus_allowed can change in the fork path | |
21937 | + * - cpus_ptr can change in the fork path | |
21938 | * - any previously selected CPU might disappear through hotplug | |
21939 | * | |
21940 | * Use __set_task_cpu() to avoid calling sched_class::migrate_task_rq, | |
21941 | @@ -2675,21 +2762,16 @@ | |
21942 | finish_arch_post_lock_switch(); | |
1a6e0f06 | 21943 | |
e4b2b4a8 JK |
21944 | fire_sched_in_preempt_notifiers(current); |
21945 | + /* | |
21946 | + * We use mmdrop_delayed() here so we don't have to do the | |
21947 | + * full __mmdrop() when we are the last user. | |
21948 | + */ | |
21949 | if (mm) | |
21950 | - mmdrop(mm); | |
21951 | + mmdrop_delayed(mm); | |
21952 | if (unlikely(prev_state == TASK_DEAD)) { | |
21953 | if (prev->sched_class->task_dead) | |
21954 | prev->sched_class->task_dead(prev); | |
1a6e0f06 | 21955 | |
e4b2b4a8 JK |
21956 | - /* |
21957 | - * Remove function-return probe instances associated with this | |
21958 | - * task and put them back on the free list. | |
21959 | - */ | |
21960 | - kprobe_flush_task(prev); | |
21961 | - | |
21962 | - /* Task is done with its stack. */ | |
21963 | - put_task_stack(prev); | |
21964 | - | |
21965 | put_task_struct(prev); | |
21966 | } | |
1a6e0f06 | 21967 | |
e4b2b4a8 JK |
21968 | @@ -3336,25 +3418,13 @@ |
21969 | atomic_inc(&rq->nr_iowait); | |
21970 | delayacct_blkio_start(); | |
21971 | } | |
21972 | - | |
21973 | - /* | |
21974 | - * If a worker went to sleep, notify and ask workqueue | |
21975 | - * whether it wants to wake up a task to maintain | |
21976 | - * concurrency. | |
21977 | - */ | |
21978 | - if (prev->flags & PF_WQ_WORKER) { | |
21979 | - struct task_struct *to_wakeup; | |
21980 | - | |
21981 | - to_wakeup = wq_worker_sleeping(prev); | |
21982 | - if (to_wakeup) | |
21983 | - try_to_wake_up_local(to_wakeup, &rf); | |
21984 | - } | |
21985 | } | |
21986 | switch_count = &prev->nvcsw; | |
21987 | } | |
1a6e0f06 | 21988 | |
e4b2b4a8 JK |
21989 | next = pick_next_task(rq, prev, &rf); |
21990 | clear_tsk_need_resched(prev); | |
21991 | + clear_tsk_need_resched_lazy(prev); | |
21992 | clear_preempt_need_resched(); | |
1a6e0f06 | 21993 | |
e4b2b4a8 JK |
21994 | if (likely(prev != next)) { |
21995 | @@ -3407,8 +3477,19 @@ | |
1a6e0f06 | 21996 | |
e4b2b4a8 JK |
21997 | static inline void sched_submit_work(struct task_struct *tsk) |
21998 | { | |
21999 | - if (!tsk->state || tsk_is_pi_blocked(tsk)) | |
22000 | + if (!tsk->state) | |
22001 | + return; | |
1a6e0f06 | 22002 | + /* |
e4b2b4a8 JK |
22003 | + * If a worker went to sleep, notify and ask workqueue whether |
22004 | + * it wants to wake up a task to maintain concurrency. | |
1a6e0f06 | 22005 | + */ |
e4b2b4a8 JK |
22006 | + if (tsk->flags & PF_WQ_WORKER) |
22007 | + wq_worker_sleeping(tsk); | |
1a6e0f06 | 22008 | + |
1a6e0f06 | 22009 | + |
e4b2b4a8 JK |
22010 | + if (tsk_is_pi_blocked(tsk)) |
22011 | return; | |
1a6e0f06 | 22012 | + |
1a6e0f06 | 22013 | /* |
e4b2b4a8 JK |
22014 | * If we are going to sleep and we have plugged IO queued, |
22015 | * make sure to submit it to avoid deadlocks. | |
22016 | @@ -3417,6 +3498,12 @@ | |
22017 | blk_schedule_flush_plug(tsk); | |
22018 | } | |
1a6e0f06 | 22019 | |
e4b2b4a8 JK |
22020 | +static void sched_update_worker(struct task_struct *tsk) |
22021 | +{ | |
22022 | + if (tsk->flags & PF_WQ_WORKER) | |
22023 | + wq_worker_running(tsk); | |
22024 | +} | |
22025 | + | |
22026 | asmlinkage __visible void __sched schedule(void) | |
1a6e0f06 | 22027 | { |
e4b2b4a8 JK |
22028 | struct task_struct *tsk = current; |
22029 | @@ -3427,6 +3514,7 @@ | |
22030 | __schedule(false); | |
22031 | sched_preempt_enable_no_resched(); | |
22032 | } while (need_resched()); | |
22033 | + sched_update_worker(tsk); | |
22034 | } | |
22035 | EXPORT_SYMBOL(schedule); | |
1a6e0f06 | 22036 | |
e4b2b4a8 JK |
22037 | @@ -3515,6 +3603,30 @@ |
22038 | } while (need_resched()); | |
22039 | } | |
1a6e0f06 | 22040 | |
e4b2b4a8 JK |
22041 | +#ifdef CONFIG_PREEMPT_LAZY |
22042 | +/* | |
22043 | + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is | |
22044 | + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as | |
22045 | + * preempt_lazy_count counter >0. | |
22046 | + */ | |
22047 | +static __always_inline int preemptible_lazy(void) | |
22048 | +{ | |
22049 | + if (test_thread_flag(TIF_NEED_RESCHED)) | |
22050 | + return 1; | |
22051 | + if (current_thread_info()->preempt_lazy_count) | |
22052 | + return 0; | |
22053 | + return 1; | |
22054 | +} | |
22055 | + | |
1a6e0f06 | 22056 | +#else |
e4b2b4a8 JK |
22057 | + |
22058 | +static inline int preemptible_lazy(void) | |
22059 | +{ | |
22060 | + return 1; | |
22061 | +} | |
22062 | + | |
1a6e0f06 | 22063 | +#endif |
e4b2b4a8 JK |
22064 | + |
22065 | #ifdef CONFIG_PREEMPT | |
22066 | /* | |
22067 | * this is the entry point to schedule() from in-kernel preemption | |
22068 | @@ -3529,7 +3641,8 @@ | |
22069 | */ | |
22070 | if (likely(!preemptible())) | |
22071 | return; | |
22072 | - | |
22073 | + if (!preemptible_lazy()) | |
22074 | + return; | |
22075 | preempt_schedule_common(); | |
22076 | } | |
22077 | NOKPROBE_SYMBOL(preempt_schedule); | |
22078 | @@ -3556,6 +3669,9 @@ | |
22079 | if (likely(!preemptible())) | |
22080 | return; | |
1a6e0f06 | 22081 | |
e4b2b4a8 JK |
22082 | + if (!preemptible_lazy()) |
22083 | + return; | |
22084 | + | |
22085 | do { | |
22086 | /* | |
22087 | * Because the function tracer can trace preempt_count_sub() | |
22088 | @@ -3578,7 +3694,16 @@ | |
22089 | * an infinite recursion. | |
22090 | */ | |
22091 | prev_ctx = exception_enter(); | |
22092 | + /* | |
22093 | + * The add/subtract must not be traced by the function | |
22094 | + * tracer. But we still want to account for the | |
22095 | + * preempt off latency tracer. Since the _notrace versions | |
22096 | + * of add/subtract skip the accounting for latency tracer | |
22097 | + * we must force it manually. | |
22098 | + */ | |
22099 | + start_critical_timings(); | |
22100 | __schedule(true); | |
22101 | + stop_critical_timings(); | |
22102 | exception_exit(prev_ctx); | |
1a6e0f06 | 22103 | |
e4b2b4a8 JK |
22104 | preempt_latency_stop(1); |
22105 | @@ -4164,7 +4289,7 @@ | |
22106 | * the entire root_domain to become SCHED_DEADLINE. We | |
22107 | * will also fail if there's no bandwidth available. | |
22108 | */ | |
22109 | - if (!cpumask_subset(span, &p->cpus_allowed) || | |
22110 | + if (!cpumask_subset(span, p->cpus_ptr) || | |
22111 | rq->rd->dl_bw.bw == 0) { | |
22112 | task_rq_unlock(rq, p, &rf); | |
22113 | return -EPERM; | |
22114 | @@ -4758,7 +4883,7 @@ | |
22115 | goto out_unlock; | |
1a6e0f06 | 22116 | |
e4b2b4a8 JK |
22117 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
22118 | - cpumask_and(mask, &p->cpus_allowed, cpu_active_mask); | |
22119 | + cpumask_and(mask, &p->cpus_mask, cpu_active_mask); | |
22120 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | |
22121 | ||
22122 | out_unlock: | |
22123 | @@ -4877,6 +5002,7 @@ | |
22124 | } | |
22125 | EXPORT_SYMBOL(__cond_resched_lock); | |
22126 | ||
22127 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22128 | int __sched __cond_resched_softirq(void) | |
22129 | { | |
22130 | BUG_ON(!in_softirq()); | |
22131 | @@ -4890,6 +5016,7 @@ | |
1a6e0f06 JK |
22132 | return 0; |
22133 | } | |
e4b2b4a8 JK |
22134 | EXPORT_SYMBOL(__cond_resched_softirq); |
22135 | +#endif | |
1a6e0f06 | 22136 | |
e4b2b4a8 JK |
22137 | /** |
22138 | * yield - yield the current processor to other threads. | |
22139 | @@ -5284,7 +5411,9 @@ | |
1a6e0f06 | 22140 | |
e4b2b4a8 JK |
22141 | /* Set the preempt count _outside_ the spinlocks! */ |
22142 | init_idle_preempt_count(idle, cpu); | |
22143 | - | |
22144 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
22145 | + task_thread_info(idle)->preempt_lazy_count = 0; | |
1a6e0f06 | 22146 | +#endif |
e4b2b4a8 JK |
22147 | /* |
22148 | * The idle tasks have their own, simple scheduling class: | |
22149 | */ | |
22150 | @@ -5323,7 +5452,7 @@ | |
22151 | * allowed nodes is unnecessary. Thus, cpusets are not | |
22152 | * applicable for such threads. This prevents checking for | |
22153 | * success of set_cpus_allowed_ptr() on all attached tasks | |
22154 | - * before cpus_allowed may be changed. | |
22155 | + * before cpus_mask may be changed. | |
22156 | */ | |
22157 | if (p->flags & PF_NO_SETAFFINITY) { | |
22158 | ret = -EINVAL; | |
22159 | @@ -5350,7 +5479,7 @@ | |
22160 | if (curr_cpu == target_cpu) | |
22161 | return 0; | |
22162 | ||
22163 | - if (!cpumask_test_cpu(target_cpu, &p->cpus_allowed)) | |
22164 | + if (!cpumask_test_cpu(target_cpu, p->cpus_ptr)) | |
22165 | return -EINVAL; | |
22166 | ||
22167 | /* TODO: This is not properly updating schedstats */ | |
22168 | @@ -5389,6 +5518,8 @@ | |
22169 | #endif /* CONFIG_NUMA_BALANCING */ | |
1a6e0f06 | 22170 | |
e4b2b4a8 JK |
22171 | #ifdef CONFIG_HOTPLUG_CPU |
22172 | +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); | |
22173 | + | |
22174 | /* | |
22175 | * Ensure that the idle task is using init_mm right before its CPU goes | |
22176 | * offline. | |
22177 | @@ -5403,7 +5534,12 @@ | |
22178 | switch_mm(mm, &init_mm, current); | |
22179 | finish_arch_post_lock_switch(); | |
22180 | } | |
22181 | - mmdrop(mm); | |
22182 | + /* | |
22183 | + * Defer the cleanup to an alive cpu. On RT we can neither | |
22184 | + * call mmdrop() nor mmdrop_delayed() from here. | |
22185 | + */ | |
22186 | + per_cpu(idle_last_mm, smp_processor_id()) = mm; | |
22187 | + | |
1a6e0f06 | 22188 | } |
1a6e0f06 | 22189 | |
e4b2b4a8 JK |
22190 | /* |
22191 | @@ -5487,7 +5623,7 @@ | |
22192 | put_prev_task(rq, next); | |
22193 | ||
22194 | /* | |
22195 | - * Rules for changing task_struct::cpus_allowed are holding | |
22196 | + * Rules for changing task_struct::cpus_mask are holding | |
22197 | * both pi_lock and rq->lock, such that holding either | |
22198 | * stabilizes the mask. | |
22199 | * | |
22200 | @@ -5718,6 +5854,10 @@ | |
22201 | update_max_interval(); | |
22202 | nohz_balance_exit_idle(cpu); | |
22203 | hrtick_clear(rq); | |
22204 | + if (per_cpu(idle_last_mm, cpu)) { | |
22205 | + mmdrop_delayed(per_cpu(idle_last_mm, cpu)); | |
22206 | + per_cpu(idle_last_mm, cpu) = NULL; | |
22207 | + } | |
22208 | return 0; | |
1a6e0f06 | 22209 | } |
e4b2b4a8 JK |
22210 | #endif |
22211 | @@ -5964,7 +6104,7 @@ | |
22212 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
22213 | static inline int preempt_count_equals(int preempt_offset) | |
22214 | { | |
22215 | - int nested = preempt_count() + rcu_preempt_depth(); | |
22216 | + int nested = preempt_count() + sched_rcu_preempt_depth(); | |
1a6e0f06 | 22217 | |
e4b2b4a8 JK |
22218 | return (nested == preempt_offset); |
22219 | } | |
22220 | @@ -6756,3 +6896,197 @@ | |
22221 | /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, | |
22222 | /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, | |
22223 | }; | |
1a6e0f06 | 22224 | + |
e4b2b4a8 | 22225 | +#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) |
1a6e0f06 | 22226 | + |
e4b2b4a8 JK |
22227 | +static inline void |
22228 | +update_nr_migratory(struct task_struct *p, long delta) | |
1a6e0f06 | 22229 | +{ |
e4b2b4a8 JK |
22230 | + if (unlikely((p->sched_class == &rt_sched_class || |
22231 | + p->sched_class == &dl_sched_class) && | |
22232 | + p->nr_cpus_allowed > 1)) { | |
22233 | + if (p->sched_class == &rt_sched_class) | |
22234 | + task_rq(p)->rt.rt_nr_migratory += delta; | |
22235 | + else | |
22236 | + task_rq(p)->dl.dl_nr_migratory += delta; | |
22237 | + } | |
1a6e0f06 JK |
22238 | +} |
22239 | + | |
e4b2b4a8 JK |
22240 | +static inline void |
22241 | +migrate_disable_update_cpus_allowed(struct task_struct *p) | |
22242 | +{ | |
22243 | + struct rq *rq; | |
22244 | + struct rq_flags rf; | |
1a6e0f06 | 22245 | + |
e4b2b4a8 JK |
22246 | + p->cpus_ptr = cpumask_of(smp_processor_id()); |
22247 | + | |
22248 | + rq = task_rq_lock(p, &rf); | |
22249 | + update_nr_migratory(p, -1); | |
22250 | + p->nr_cpus_allowed = 1; | |
22251 | + task_rq_unlock(rq, p, &rf); | |
22252 | +} | |
22253 | + | |
22254 | +static inline void | |
22255 | +migrate_enable_update_cpus_allowed(struct task_struct *p) | |
1a6e0f06 | 22256 | +{ |
e4b2b4a8 JK |
22257 | + struct rq *rq; |
22258 | + struct rq_flags rf; | |
22259 | + | |
22260 | + p->cpus_ptr = &p->cpus_mask; | |
22261 | + | |
22262 | + rq = task_rq_lock(p, &rf); | |
22263 | + p->nr_cpus_allowed = cpumask_weight(&p->cpus_mask); | |
22264 | + update_nr_migratory(p, 1); | |
22265 | + task_rq_unlock(rq, p, &rf); | |
1a6e0f06 | 22266 | +} |
1a6e0f06 | 22267 | + |
e4b2b4a8 JK |
22268 | +void migrate_disable(void) |
22269 | +{ | |
22270 | + struct task_struct *p = current; | |
22271 | + | |
22272 | + if (in_atomic() || irqs_disabled()) { | |
22273 | +#ifdef CONFIG_SCHED_DEBUG | |
22274 | + p->migrate_disable_atomic++; | |
1a6e0f06 | 22275 | +#endif |
e4b2b4a8 JK |
22276 | + return; |
22277 | + } | |
22278 | +#ifdef CONFIG_SCHED_DEBUG | |
22279 | + if (unlikely(p->migrate_disable_atomic)) { | |
22280 | + tracing_off(); | |
22281 | + WARN_ON_ONCE(1); | |
22282 | + } | |
1a6e0f06 | 22283 | +#endif |
1a6e0f06 | 22284 | + |
e4b2b4a8 JK |
22285 | + if (p->migrate_disable) { |
22286 | + p->migrate_disable++; | |
22287 | + return; | |
22288 | + } | |
22289 | + | |
22290 | + preempt_disable(); | |
22291 | + preempt_lazy_disable(); | |
22292 | + pin_current_cpu(); | |
22293 | + | |
22294 | + migrate_disable_update_cpus_allowed(p); | |
22295 | + p->migrate_disable = 1; | |
22296 | + | |
22297 | + preempt_enable(); | |
1a6e0f06 | 22298 | +} |
e4b2b4a8 | 22299 | +EXPORT_SYMBOL(migrate_disable); |
1a6e0f06 | 22300 | + |
e4b2b4a8 | 22301 | +void migrate_enable(void) |
1a6e0f06 | 22302 | +{ |
e4b2b4a8 JK |
22303 | + struct task_struct *p = current; |
22304 | + | |
22305 | + if (in_atomic() || irqs_disabled()) { | |
22306 | +#ifdef CONFIG_SCHED_DEBUG | |
22307 | + p->migrate_disable_atomic--; | |
22308 | +#endif | |
22309 | + return; | |
22310 | + } | |
22311 | + | |
22312 | +#ifdef CONFIG_SCHED_DEBUG | |
22313 | + if (unlikely(p->migrate_disable_atomic)) { | |
22314 | + tracing_off(); | |
22315 | + WARN_ON_ONCE(1); | |
22316 | + } | |
22317 | +#endif | |
22318 | + | |
22319 | + WARN_ON_ONCE(p->migrate_disable <= 0); | |
22320 | + if (p->migrate_disable > 1) { | |
22321 | + p->migrate_disable--; | |
22322 | + return; | |
22323 | + } | |
22324 | + | |
22325 | + preempt_disable(); | |
22326 | + | |
22327 | + p->migrate_disable = 0; | |
22328 | + migrate_enable_update_cpus_allowed(p); | |
22329 | + | |
22330 | + if (p->migrate_disable_update) { | |
22331 | + struct rq *rq; | |
22332 | + struct rq_flags rf; | |
22333 | + | |
22334 | + rq = task_rq_lock(p, &rf); | |
22335 | + update_rq_clock(rq); | |
22336 | + | |
22337 | + __do_set_cpus_allowed_tail(p, &p->cpus_mask); | |
22338 | + task_rq_unlock(rq, p, &rf); | |
22339 | + | |
22340 | + p->migrate_disable_update = 0; | |
22341 | + | |
22342 | + WARN_ON(smp_processor_id() != task_cpu(p)); | |
22343 | + if (!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { | |
22344 | + const struct cpumask *cpu_valid_mask = cpu_active_mask; | |
22345 | + struct migration_arg arg; | |
22346 | + unsigned int dest_cpu; | |
22347 | + | |
22348 | + if (p->flags & PF_KTHREAD) { | |
22349 | + /* | |
22350 | + * Kernel threads are allowed on online && !active CPUs | |
22351 | + */ | |
22352 | + cpu_valid_mask = cpu_online_mask; | |
22353 | + } | |
22354 | + dest_cpu = cpumask_any_and(cpu_valid_mask, &p->cpus_mask); | |
22355 | + arg.task = p; | |
22356 | + arg.dest_cpu = dest_cpu; | |
22357 | + | |
22358 | + unpin_current_cpu(); | |
22359 | + preempt_lazy_enable(); | |
22360 | + preempt_enable(); | |
22361 | + stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); | |
22362 | + tlb_migrate_finish(p->mm); | |
22363 | + | |
22364 | + return; | |
22365 | + } | |
22366 | + } | |
22367 | + unpin_current_cpu(); | |
22368 | + preempt_lazy_enable(); | |
22369 | + preempt_enable(); | |
1a6e0f06 | 22370 | +} |
e4b2b4a8 | 22371 | +EXPORT_SYMBOL(migrate_enable); |
1a6e0f06 | 22372 | + |
e4b2b4a8 JK |
22373 | +#elif !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT_BASE) |
22374 | +void migrate_disable(void) | |
22375 | +{ | |
22376 | + struct task_struct *p = current; | |
22377 | + | |
22378 | + if (in_atomic() || irqs_disabled()) { | |
22379 | +#ifdef CONFIG_SCHED_DEBUG | |
22380 | + p->migrate_disable_atomic++; | |
1a6e0f06 | 22381 | +#endif |
e4b2b4a8 JK |
22382 | + return; |
22383 | + } | |
22384 | +#ifdef CONFIG_SCHED_DEBUG | |
22385 | + if (unlikely(p->migrate_disable_atomic)) { | |
22386 | + tracing_off(); | |
22387 | + WARN_ON_ONCE(1); | |
22388 | + } | |
22389 | +#endif | |
22390 | + | |
22391 | + p->migrate_disable++; | |
22392 | +} | |
22393 | +EXPORT_SYMBOL(migrate_disable); | |
22394 | + | |
22395 | +void migrate_enable(void) | |
22396 | +{ | |
22397 | + struct task_struct *p = current; | |
22398 | + | |
22399 | + if (in_atomic() || irqs_disabled()) { | |
22400 | +#ifdef CONFIG_SCHED_DEBUG | |
22401 | + p->migrate_disable_atomic--; | |
22402 | +#endif | |
22403 | + return; | |
22404 | + } | |
22405 | + | |
22406 | +#ifdef CONFIG_SCHED_DEBUG | |
22407 | + if (unlikely(p->migrate_disable_atomic)) { | |
22408 | + tracing_off(); | |
22409 | + WARN_ON_ONCE(1); | |
22410 | + } | |
1a6e0f06 | 22411 | +#endif |
1a6e0f06 | 22412 | + |
e4b2b4a8 JK |
22413 | + WARN_ON_ONCE(p->migrate_disable <= 0); |
22414 | + p->migrate_disable--; | |
22415 | +} | |
22416 | +EXPORT_SYMBOL(migrate_enable); | |
22417 | +#endif | |
22418 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/cpudeadline.c linux-4.14/kernel/sched/cpudeadline.c | |
22419 | --- linux-4.14.orig/kernel/sched/cpudeadline.c 2017-11-12 19:46:13.000000000 +0100 | |
22420 | +++ linux-4.14/kernel/sched/cpudeadline.c 2018-09-05 11:05:07.000000000 +0200 | |
22421 | @@ -127,13 +127,13 @@ | |
22422 | const struct sched_dl_entity *dl_se = &p->dl; | |
1a6e0f06 | 22423 | |
e4b2b4a8 JK |
22424 | if (later_mask && |
22425 | - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { | |
22426 | + cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) { | |
22427 | return 1; | |
22428 | } else { | |
22429 | int best_cpu = cpudl_maximum(cp); | |
22430 | WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); | |
22431 | ||
22432 | - if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) && | |
22433 | + if (cpumask_test_cpu(best_cpu, p->cpus_ptr) && | |
22434 | dl_time_before(dl_se->deadline, cp->elements[0].dl)) { | |
22435 | if (later_mask) | |
22436 | cpumask_set_cpu(best_cpu, later_mask); | |
22437 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/cpupri.c linux-4.14/kernel/sched/cpupri.c | |
22438 | --- linux-4.14.orig/kernel/sched/cpupri.c 2017-11-12 19:46:13.000000000 +0100 | |
22439 | +++ linux-4.14/kernel/sched/cpupri.c 2018-09-05 11:05:07.000000000 +0200 | |
22440 | @@ -103,11 +103,11 @@ | |
22441 | if (skip) | |
22442 | continue; | |
1a6e0f06 | 22443 | |
e4b2b4a8 JK |
22444 | - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) |
22445 | + if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) | |
22446 | continue; | |
1a6e0f06 | 22447 | |
e4b2b4a8 JK |
22448 | if (lowest_mask) { |
22449 | - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | |
22450 | + cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); | |
22451 | ||
22452 | /* | |
22453 | * We have to ensure that we have at least one bit | |
22454 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/deadline.c linux-4.14/kernel/sched/deadline.c | |
22455 | --- linux-4.14.orig/kernel/sched/deadline.c 2018-09-05 11:03:22.000000000 +0200 | |
22456 | +++ linux-4.14/kernel/sched/deadline.c 2018-09-05 11:05:07.000000000 +0200 | |
22457 | @@ -504,7 +504,7 @@ | |
22458 | * If we cannot preempt any rq, fall back to pick any | |
22459 | * online cpu. | |
22460 | */ | |
22461 | - cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | |
22462 | + cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr); | |
22463 | if (cpu >= nr_cpu_ids) { | |
22464 | /* | |
22465 | * Fail to find any suitable cpu. | |
22466 | @@ -1020,7 +1020,7 @@ | |
1a6e0f06 | 22467 | { |
e4b2b4a8 | 22468 | struct hrtimer *timer = &dl_se->dl_timer; |
1a6e0f06 | 22469 | |
e4b2b4a8 JK |
22470 | - hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
22471 | + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); | |
22472 | timer->function = dl_task_timer; | |
22473 | } | |
22474 | ||
22475 | @@ -1749,7 +1749,7 @@ | |
22476 | static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) | |
22477 | { | |
22478 | if (!task_running(rq, p) && | |
22479 | - cpumask_test_cpu(cpu, &p->cpus_allowed)) | |
22480 | + cpumask_test_cpu(cpu, p->cpus_ptr)) | |
22481 | return 1; | |
22482 | return 0; | |
22483 | } | |
22484 | @@ -1899,7 +1899,7 @@ | |
22485 | /* Retry if something changed. */ | |
22486 | if (double_lock_balance(rq, later_rq)) { | |
22487 | if (unlikely(task_rq(task) != rq || | |
22488 | - !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || | |
22489 | + !cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) || | |
22490 | task_running(rq, task) || | |
22491 | !dl_task(task) || | |
22492 | !task_on_rq_queued(task))) { | |
22493 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/debug.c linux-4.14/kernel/sched/debug.c | |
22494 | --- linux-4.14.orig/kernel/sched/debug.c 2017-11-12 19:46:13.000000000 +0100 | |
22495 | +++ linux-4.14/kernel/sched/debug.c 2018-09-05 11:05:07.000000000 +0200 | |
22496 | @@ -1017,6 +1017,10 @@ | |
22497 | P(dl.runtime); | |
22498 | P(dl.deadline); | |
1a6e0f06 | 22499 | } |
e4b2b4a8 JK |
22500 | +#if defined(CONFIG_PREEMPT_COUNT) && defined(CONFIG_SMP) |
22501 | + P(migrate_disable); | |
22502 | +#endif | |
22503 | + P(nr_cpus_allowed); | |
22504 | #undef PN_SCHEDSTAT | |
22505 | #undef PN | |
22506 | #undef __PN | |
22507 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/fair.c linux-4.14/kernel/sched/fair.c | |
22508 | --- linux-4.14.orig/kernel/sched/fair.c 2018-09-05 11:03:22.000000000 +0200 | |
22509 | +++ linux-4.14/kernel/sched/fair.c 2018-09-05 11:05:07.000000000 +0200 | |
22510 | @@ -1596,7 +1596,7 @@ | |
22511 | */ | |
22512 | if (cur) { | |
22513 | /* Skip this swap candidate if cannot move to the source cpu */ | |
22514 | - if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) | |
22515 | + if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr)) | |
22516 | goto unlock; | |
1a6e0f06 | 22517 | |
e4b2b4a8 JK |
22518 | /* |
22519 | @@ -1706,7 +1706,7 @@ | |
1a6e0f06 | 22520 | |
e4b2b4a8 JK |
22521 | for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) { |
22522 | /* Skip this CPU if the source task cannot migrate */ | |
22523 | - if (!cpumask_test_cpu(cpu, &env->p->cpus_allowed)) | |
22524 | + if (!cpumask_test_cpu(cpu, env->p->cpus_ptr)) | |
22525 | continue; | |
1a6e0f06 | 22526 | |
e4b2b4a8 JK |
22527 | env->dst_cpu = cpu; |
22528 | @@ -3840,7 +3840,7 @@ | |
22529 | ideal_runtime = sched_slice(cfs_rq, curr); | |
22530 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | |
22531 | if (delta_exec > ideal_runtime) { | |
22532 | - resched_curr(rq_of(cfs_rq)); | |
22533 | + resched_curr_lazy(rq_of(cfs_rq)); | |
22534 | /* | |
22535 | * The current task ran long enough, ensure it doesn't get | |
22536 | * re-elected due to buddy favours. | |
22537 | @@ -3864,7 +3864,7 @@ | |
22538 | return; | |
1a6e0f06 | 22539 | |
e4b2b4a8 JK |
22540 | if (delta > ideal_runtime) |
22541 | - resched_curr(rq_of(cfs_rq)); | |
22542 | + resched_curr_lazy(rq_of(cfs_rq)); | |
22543 | } | |
1a6e0f06 | 22544 | |
e4b2b4a8 JK |
22545 | static void |
22546 | @@ -4006,7 +4006,7 @@ | |
22547 | * validating it and just reschedule. | |
22548 | */ | |
22549 | if (queued) { | |
22550 | - resched_curr(rq_of(cfs_rq)); | |
22551 | + resched_curr_lazy(rq_of(cfs_rq)); | |
1a6e0f06 | 22552 | return; |
e4b2b4a8 JK |
22553 | } |
22554 | /* | |
22555 | @@ -4188,7 +4188,7 @@ | |
22556 | * hierarchy can be throttled | |
22557 | */ | |
22558 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | |
22559 | - resched_curr(rq_of(cfs_rq)); | |
22560 | + resched_curr_lazy(rq_of(cfs_rq)); | |
1a6e0f06 | 22561 | } |
1a6e0f06 | 22562 | |
e4b2b4a8 JK |
22563 | static __always_inline |
22564 | @@ -4837,7 +4837,7 @@ | |
1a6e0f06 | 22565 | |
e4b2b4a8 JK |
22566 | if (delta < 0) { |
22567 | if (rq->curr == p) | |
22568 | - resched_curr(rq); | |
22569 | + resched_curr_lazy(rq); | |
22570 | return; | |
22571 | } | |
22572 | hrtick_start(rq, delta); | |
22573 | @@ -5475,7 +5475,7 @@ | |
1a6e0f06 | 22574 | |
e4b2b4a8 JK |
22575 | /* Skip over this group if it has no CPUs allowed */ |
22576 | if (!cpumask_intersects(sched_group_span(group), | |
22577 | - &p->cpus_allowed)) | |
22578 | + p->cpus_ptr)) | |
22579 | continue; | |
1a6e0f06 | 22580 | |
e4b2b4a8 JK |
22581 | local_group = cpumask_test_cpu(this_cpu, |
22582 | @@ -5595,7 +5595,7 @@ | |
22583 | return cpumask_first(sched_group_span(group)); | |
22584 | ||
22585 | /* Traverse only the allowed CPUs */ | |
22586 | - for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { | |
22587 | + for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { | |
22588 | if (idle_cpu(i)) { | |
22589 | struct rq *rq = cpu_rq(i); | |
22590 | struct cpuidle_state *idle = idle_get_state(rq); | |
22591 | @@ -5698,7 +5698,7 @@ | |
22592 | if (!test_idle_cores(target, false)) | |
22593 | return -1; | |
22594 | ||
22595 | - cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); | |
22596 | + cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); | |
22597 | ||
22598 | for_each_cpu_wrap(core, cpus, target) { | |
22599 | bool idle = true; | |
22600 | @@ -5732,7 +5732,7 @@ | |
22601 | return -1; | |
22602 | ||
22603 | for_each_cpu(cpu, cpu_smt_mask(target)) { | |
22604 | - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) | |
22605 | + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) | |
22606 | continue; | |
22607 | if (idle_cpu(cpu)) | |
22608 | return cpu; | |
22609 | @@ -5795,7 +5795,7 @@ | |
22610 | for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { | |
22611 | if (!--nr) | |
22612 | return -1; | |
22613 | - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) | |
22614 | + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) | |
22615 | continue; | |
22616 | if (idle_cpu(cpu)) | |
22617 | break; | |
22618 | @@ -5950,7 +5950,7 @@ | |
22619 | if (sd_flag & SD_BALANCE_WAKE) { | |
22620 | record_wakee(p); | |
22621 | want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu) | |
22622 | - && cpumask_test_cpu(cpu, &p->cpus_allowed); | |
22623 | + && cpumask_test_cpu(cpu, p->cpus_ptr); | |
22624 | } | |
1a6e0f06 | 22625 | |
e4b2b4a8 JK |
22626 | rcu_read_lock(); |
22627 | @@ -6231,7 +6231,7 @@ | |
22628 | return; | |
1a6e0f06 | 22629 | |
e4b2b4a8 JK |
22630 | preempt: |
22631 | - resched_curr(rq); | |
22632 | + resched_curr_lazy(rq); | |
22633 | /* | |
22634 | * Only set the backward buddy when the current task is still | |
22635 | * on the rq. This can happen when a wakeup gets interleaved | |
22636 | @@ -6699,14 +6699,14 @@ | |
22637 | /* | |
22638 | * We do not migrate tasks that are: | |
22639 | * 1) throttled_lb_pair, or | |
22640 | - * 2) cannot be migrated to this CPU due to cpus_allowed, or | |
22641 | + * 2) cannot be migrated to this CPU due to cpus_ptr, or | |
22642 | * 3) running (obviously), or | |
22643 | * 4) are cache-hot on their current CPU. | |
22644 | */ | |
22645 | if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) | |
22646 | return 0; | |
1a6e0f06 | 22647 | |
e4b2b4a8 JK |
22648 | - if (!cpumask_test_cpu(env->dst_cpu, &p->cpus_allowed)) { |
22649 | + if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { | |
22650 | int cpu; | |
1a6e0f06 | 22651 | |
e4b2b4a8 JK |
22652 | schedstat_inc(p->se.statistics.nr_failed_migrations_affine); |
22653 | @@ -6726,7 +6726,7 @@ | |
1a6e0f06 | 22654 | |
e4b2b4a8 JK |
22655 | /* Prevent to re-select dst_cpu via env's cpus */ |
22656 | for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { | |
22657 | - if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { | |
22658 | + if (cpumask_test_cpu(cpu, p->cpus_ptr)) { | |
22659 | env->flags |= LBF_DST_PINNED; | |
22660 | env->new_dst_cpu = cpu; | |
22661 | break; | |
22662 | @@ -7295,7 +7295,7 @@ | |
1a6e0f06 | 22663 | |
e4b2b4a8 JK |
22664 | /* |
22665 | * Group imbalance indicates (and tries to solve) the problem where balancing | |
22666 | - * groups is inadequate due to ->cpus_allowed constraints. | |
22667 | + * groups is inadequate due to ->cpus_ptr constraints. | |
22668 | * | |
22669 | * Imagine a situation of two groups of 4 cpus each and 4 tasks each with a | |
22670 | * cpumask covering 1 cpu of the first group and 3 cpus of the second group. | |
22671 | @@ -7871,7 +7871,7 @@ | |
1a6e0f06 | 22672 | /* |
e4b2b4a8 JK |
22673 | * If the busiest group is imbalanced the below checks don't |
22674 | * work because they assume all things are equal, which typically | |
22675 | - * isn't true due to cpus_allowed constraints and the like. | |
22676 | + * isn't true due to cpus_ptr constraints and the like. | |
22677 | */ | |
22678 | if (busiest->group_type == group_imbalanced) | |
22679 | goto force_balance; | |
22680 | @@ -8263,7 +8263,7 @@ | |
22681 | * if the curr task on busiest cpu can't be | |
22682 | * moved to this_cpu | |
22683 | */ | |
22684 | - if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { | |
22685 | + if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) { | |
22686 | raw_spin_unlock_irqrestore(&busiest->lock, | |
22687 | flags); | |
22688 | env.flags |= LBF_ALL_PINNED; | |
22689 | @@ -9085,7 +9085,7 @@ | |
22690 | * 'current' within the tree based on its new key value. | |
22691 | */ | |
22692 | swap(curr->vruntime, se->vruntime); | |
22693 | - resched_curr(rq); | |
22694 | + resched_curr_lazy(rq); | |
1a6e0f06 | 22695 | } |
e4b2b4a8 JK |
22696 | |
22697 | se->vruntime -= cfs_rq->min_vruntime; | |
22698 | @@ -9109,7 +9109,7 @@ | |
22699 | */ | |
22700 | if (rq->curr == p) { | |
22701 | if (p->prio > oldprio) | |
22702 | - resched_curr(rq); | |
22703 | + resched_curr_lazy(rq); | |
22704 | } else | |
22705 | check_preempt_curr(rq, p, 0); | |
22706 | } | |
22707 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/features.h linux-4.14/kernel/sched/features.h | |
22708 | --- linux-4.14.orig/kernel/sched/features.h 2017-11-12 19:46:13.000000000 +0100 | |
22709 | +++ linux-4.14/kernel/sched/features.h 2018-09-05 11:05:07.000000000 +0200 | |
22710 | @@ -46,11 +46,19 @@ | |
22711 | */ | |
22712 | SCHED_FEAT(NONTASK_CAPACITY, true) | |
22713 | ||
22714 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
22715 | +SCHED_FEAT(TTWU_QUEUE, false) | |
22716 | +# ifdef CONFIG_PREEMPT_LAZY | |
22717 | +SCHED_FEAT(PREEMPT_LAZY, true) | |
22718 | +# endif | |
22719 | +#else | |
22720 | + | |
22721 | /* | |
22722 | * Queue remote wakeups on the target CPU and process them | |
22723 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | |
22724 | */ | |
22725 | SCHED_FEAT(TTWU_QUEUE, true) | |
1a6e0f06 JK |
22726 | +#endif |
22727 | ||
e4b2b4a8 JK |
22728 | /* |
22729 | * When doing wakeups, attempt to limit superfluous scans of the LLC domain. | |
22730 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/Makefile linux-4.14/kernel/sched/Makefile | |
22731 | --- linux-4.14.orig/kernel/sched/Makefile 2017-11-12 19:46:13.000000000 +0100 | |
22732 | +++ linux-4.14/kernel/sched/Makefile 2018-09-05 11:05:07.000000000 +0200 | |
22733 | @@ -18,7 +18,7 @@ | |
22734 | ||
22735 | obj-y += core.o loadavg.o clock.o cputime.o | |
22736 | obj-y += idle_task.o fair.o rt.o deadline.o | |
22737 | -obj-y += wait.o wait_bit.o swait.o completion.o idle.o | |
22738 | +obj-y += wait.o wait_bit.o swait.o swork.o completion.o idle.o | |
22739 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o | |
22740 | obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o | |
22741 | obj-$(CONFIG_SCHEDSTATS) += stats.o | |
22742 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/rt.c linux-4.14/kernel/sched/rt.c | |
22743 | --- linux-4.14.orig/kernel/sched/rt.c 2018-09-05 11:03:22.000000000 +0200 | |
22744 | +++ linux-4.14/kernel/sched/rt.c 2018-09-05 11:05:07.000000000 +0200 | |
22745 | @@ -47,8 +47,8 @@ | |
c7c16703 | 22746 | |
e4b2b4a8 | 22747 | raw_spin_lock_init(&rt_b->rt_runtime_lock); |
c7c16703 | 22748 | |
e4b2b4a8 JK |
22749 | - hrtimer_init(&rt_b->rt_period_timer, |
22750 | - CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
22751 | + hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, | |
22752 | + HRTIMER_MODE_REL_HARD); | |
22753 | rt_b->rt_period_timer.function = sched_rt_period_timer; | |
22754 | } | |
c7c16703 | 22755 | |
e4b2b4a8 JK |
22756 | @@ -1594,7 +1594,7 @@ |
22757 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | |
22758 | { | |
22759 | if (!task_running(rq, p) && | |
22760 | - cpumask_test_cpu(cpu, &p->cpus_allowed)) | |
22761 | + cpumask_test_cpu(cpu, p->cpus_ptr)) | |
22762 | return 1; | |
22763 | return 0; | |
c7c16703 | 22764 | } |
e4b2b4a8 JK |
22765 | @@ -1729,7 +1729,7 @@ |
22766 | * Also make sure that it wasn't scheduled on its rq. | |
22767 | */ | |
22768 | if (unlikely(task_rq(task) != rq || | |
22769 | - !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_allowed) || | |
22770 | + !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) || | |
22771 | task_running(rq, task) || | |
22772 | !rt_task(task) || | |
22773 | !task_on_rq_queued(task))) { | |
22774 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/sched.h linux-4.14/kernel/sched/sched.h | |
22775 | --- linux-4.14.orig/kernel/sched/sched.h 2018-09-05 11:03:22.000000000 +0200 | |
22776 | +++ linux-4.14/kernel/sched/sched.h 2018-09-05 11:05:07.000000000 +0200 | |
22777 | @@ -1354,6 +1354,7 @@ | |
22778 | #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ | |
22779 | #define WF_FORK 0x02 /* child wakeup after fork */ | |
22780 | #define WF_MIGRATED 0x4 /* internal use, task got migrated */ | |
22781 | +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ | |
c7c16703 | 22782 | |
e4b2b4a8 JK |
22783 | /* |
22784 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | |
22785 | @@ -1545,6 +1546,15 @@ | |
22786 | extern void resched_curr(struct rq *rq); | |
22787 | extern void resched_cpu(int cpu); | |
22788 | ||
22789 | +#ifdef CONFIG_PREEMPT_LAZY | |
22790 | +extern void resched_curr_lazy(struct rq *rq); | |
22791 | +#else | |
22792 | +static inline void resched_curr_lazy(struct rq *rq) | |
1a6e0f06 | 22793 | +{ |
e4b2b4a8 | 22794 | + resched_curr(rq); |
1a6e0f06 | 22795 | +} |
1a6e0f06 JK |
22796 | +#endif |
22797 | + | |
e4b2b4a8 JK |
22798 | extern struct rt_bandwidth def_rt_bandwidth; |
22799 | extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); | |
22800 | ||
22801 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/swait.c linux-4.14/kernel/sched/swait.c | |
22802 | --- linux-4.14.orig/kernel/sched/swait.c 2017-11-12 19:46:13.000000000 +0100 | |
22803 | +++ linux-4.14/kernel/sched/swait.c 2018-09-05 11:05:07.000000000 +0200 | |
22804 | @@ -1,6 +1,7 @@ | |
22805 | // SPDX-License-Identifier: GPL-2.0 | |
22806 | #include <linux/sched/signal.h> | |
22807 | #include <linux/swait.h> | |
22808 | +#include <linux/suspend.h> | |
22809 | ||
22810 | void __init_swait_queue_head(struct swait_queue_head *q, const char *name, | |
22811 | struct lock_class_key *key) | |
22812 | @@ -30,6 +31,25 @@ | |
22813 | } | |
22814 | EXPORT_SYMBOL(swake_up_locked); | |
22815 | ||
22816 | +void swake_up_all_locked(struct swait_queue_head *q) | |
1a6e0f06 | 22817 | +{ |
e4b2b4a8 JK |
22818 | + struct swait_queue *curr; |
22819 | + int wakes = 0; | |
1a6e0f06 | 22820 | + |
e4b2b4a8 | 22821 | + while (!list_empty(&q->task_list)) { |
1a6e0f06 | 22822 | + |
e4b2b4a8 JK |
22823 | + curr = list_first_entry(&q->task_list, typeof(*curr), |
22824 | + task_list); | |
22825 | + wake_up_process(curr->task); | |
22826 | + list_del_init(&curr->task_list); | |
22827 | + wakes++; | |
22828 | + } | |
22829 | + if (pm_in_action) | |
22830 | + return; | |
22831 | + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes); | |
1a6e0f06 | 22832 | +} |
e4b2b4a8 | 22833 | +EXPORT_SYMBOL(swake_up_all_locked); |
1a6e0f06 | 22834 | + |
e4b2b4a8 JK |
22835 | void swake_up(struct swait_queue_head *q) |
22836 | { | |
22837 | unsigned long flags; | |
22838 | @@ -49,6 +69,7 @@ | |
22839 | struct swait_queue *curr; | |
22840 | LIST_HEAD(tmp); | |
22841 | ||
22842 | + WARN_ON(irqs_disabled()); | |
22843 | raw_spin_lock_irq(&q->lock); | |
22844 | list_splice_init(&q->task_list, &tmp); | |
22845 | while (!list_empty(&tmp)) { | |
22846 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/swork.c linux-4.14/kernel/sched/swork.c | |
22847 | --- linux-4.14.orig/kernel/sched/swork.c 1970-01-01 01:00:00.000000000 +0100 | |
22848 | +++ linux-4.14/kernel/sched/swork.c 2018-09-05 11:05:07.000000000 +0200 | |
22849 | @@ -0,0 +1,173 @@ | |
1a6e0f06 | 22850 | +/* |
e4b2b4a8 JK |
22851 | + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de |
22852 | + * | |
22853 | + * Provides a framework for enqueuing callbacks from irq context | |
22854 | + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. | |
1a6e0f06 | 22855 | + */ |
1a6e0f06 | 22856 | + |
e4b2b4a8 JK |
22857 | +#include <linux/swait.h> |
22858 | +#include <linux/swork.h> | |
22859 | +#include <linux/kthread.h> | |
22860 | +#include <linux/slab.h> | |
22861 | +#include <linux/spinlock.h> | |
22862 | +#include <linux/export.h> | |
22863 | + | |
22864 | +#define SWORK_EVENT_PENDING (1 << 0) | |
22865 | + | |
22866 | +static DEFINE_MUTEX(worker_mutex); | |
22867 | +static struct sworker *glob_worker; | |
22868 | + | |
22869 | +struct sworker { | |
22870 | + struct list_head events; | |
22871 | + struct swait_queue_head wq; | |
1a6e0f06 | 22872 | + |
e4b2b4a8 JK |
22873 | + raw_spinlock_t lock; |
22874 | + | |
22875 | + struct task_struct *task; | |
22876 | + int refs; | |
22877 | +}; | |
1a6e0f06 | 22878 | + |
e4b2b4a8 | 22879 | +static bool swork_readable(struct sworker *worker) |
1a6e0f06 | 22880 | +{ |
e4b2b4a8 | 22881 | + bool r; |
1a6e0f06 | 22882 | + |
e4b2b4a8 JK |
22883 | + if (kthread_should_stop()) |
22884 | + return true; | |
22885 | + | |
22886 | + raw_spin_lock_irq(&worker->lock); | |
22887 | + r = !list_empty(&worker->events); | |
22888 | + raw_spin_unlock_irq(&worker->lock); | |
22889 | + | |
22890 | + return r; | |
1a6e0f06 | 22891 | +} |
1a6e0f06 | 22892 | + |
e4b2b4a8 | 22893 | +static int swork_kthread(void *arg) |
1a6e0f06 | 22894 | +{ |
e4b2b4a8 | 22895 | + struct sworker *worker = arg; |
1a6e0f06 | 22896 | + |
e4b2b4a8 JK |
22897 | + for (;;) { |
22898 | + swait_event_interruptible(worker->wq, | |
22899 | + swork_readable(worker)); | |
22900 | + if (kthread_should_stop()) | |
22901 | + break; | |
1a6e0f06 | 22902 | + |
e4b2b4a8 JK |
22903 | + raw_spin_lock_irq(&worker->lock); |
22904 | + while (!list_empty(&worker->events)) { | |
22905 | + struct swork_event *sev; | |
1a6e0f06 | 22906 | + |
e4b2b4a8 JK |
22907 | + sev = list_first_entry(&worker->events, |
22908 | + struct swork_event, item); | |
22909 | + list_del(&sev->item); | |
22910 | + raw_spin_unlock_irq(&worker->lock); | |
1a6e0f06 | 22911 | + |
e4b2b4a8 JK |
22912 | + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, |
22913 | + &sev->flags)); | |
22914 | + sev->func(sev); | |
22915 | + raw_spin_lock_irq(&worker->lock); | |
22916 | + } | |
22917 | + raw_spin_unlock_irq(&worker->lock); | |
22918 | + } | |
22919 | + return 0; | |
1a6e0f06 | 22920 | +} |
1a6e0f06 | 22921 | + |
e4b2b4a8 | 22922 | +static struct sworker *swork_create(void) |
1a6e0f06 | 22923 | +{ |
e4b2b4a8 | 22924 | + struct sworker *worker; |
1a6e0f06 | 22925 | + |
e4b2b4a8 JK |
22926 | + worker = kzalloc(sizeof(*worker), GFP_KERNEL); |
22927 | + if (!worker) | |
22928 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 22929 | + |
e4b2b4a8 JK |
22930 | + INIT_LIST_HEAD(&worker->events); |
22931 | + raw_spin_lock_init(&worker->lock); | |
22932 | + init_swait_queue_head(&worker->wq); | |
1a6e0f06 | 22933 | + |
e4b2b4a8 JK |
22934 | + worker->task = kthread_run(swork_kthread, worker, "kswork"); |
22935 | + if (IS_ERR(worker->task)) { | |
22936 | + kfree(worker); | |
22937 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 22938 | + } |
1a6e0f06 | 22939 | + |
e4b2b4a8 | 22940 | + return worker; |
1a6e0f06 | 22941 | +} |
1a6e0f06 | 22942 | + |
e4b2b4a8 | 22943 | +static void swork_destroy(struct sworker *worker) |
1a6e0f06 | 22944 | +{ |
e4b2b4a8 JK |
22945 | + kthread_stop(worker->task); |
22946 | + | |
22947 | + WARN_ON(!list_empty(&worker->events)); | |
22948 | + kfree(worker); | |
1a6e0f06 | 22949 | +} |
1a6e0f06 | 22950 | + |
e4b2b4a8 JK |
22951 | +/** |
22952 | + * swork_queue - queue swork | |
22953 | + * | |
22954 | + * Returns %false if @work was already on a queue, %true otherwise. | |
22955 | + * | |
22956 | + * The work is queued and processed on a random CPU | |
22957 | + */ | |
22958 | +bool swork_queue(struct swork_event *sev) | |
1a6e0f06 | 22959 | +{ |
e4b2b4a8 | 22960 | + unsigned long flags; |
1a6e0f06 | 22961 | + |
e4b2b4a8 JK |
22962 | + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) |
22963 | + return false; | |
1a6e0f06 | 22964 | + |
e4b2b4a8 JK |
22965 | + raw_spin_lock_irqsave(&glob_worker->lock, flags); |
22966 | + list_add_tail(&sev->item, &glob_worker->events); | |
22967 | + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); | |
1a6e0f06 | 22968 | + |
e4b2b4a8 JK |
22969 | + swake_up(&glob_worker->wq); |
22970 | + return true; | |
1a6e0f06 | 22971 | +} |
e4b2b4a8 | 22972 | +EXPORT_SYMBOL_GPL(swork_queue); |
1a6e0f06 | 22973 | + |
e4b2b4a8 JK |
22974 | +/** |
22975 | + * swork_get - get an instance of the sworker | |
22976 | + * | |
22977 | + * Returns an negative error code if the initialization if the worker did not | |
22978 | + * work, %0 otherwise. | |
22979 | + * | |
22980 | + */ | |
22981 | +int swork_get(void) | |
1a6e0f06 | 22982 | +{ |
e4b2b4a8 | 22983 | + struct sworker *worker; |
1a6e0f06 | 22984 | + |
e4b2b4a8 JK |
22985 | + mutex_lock(&worker_mutex); |
22986 | + if (!glob_worker) { | |
22987 | + worker = swork_create(); | |
22988 | + if (IS_ERR(worker)) { | |
22989 | + mutex_unlock(&worker_mutex); | |
22990 | + return -ENOMEM; | |
22991 | + } | |
1a6e0f06 | 22992 | + |
e4b2b4a8 JK |
22993 | + glob_worker = worker; |
22994 | + } | |
1a6e0f06 | 22995 | + |
e4b2b4a8 JK |
22996 | + glob_worker->refs++; |
22997 | + mutex_unlock(&worker_mutex); | |
1a6e0f06 | 22998 | + |
e4b2b4a8 | 22999 | + return 0; |
1a6e0f06 | 23000 | +} |
e4b2b4a8 | 23001 | +EXPORT_SYMBOL_GPL(swork_get); |
1a6e0f06 | 23002 | + |
e4b2b4a8 JK |
23003 | +/** |
23004 | + * swork_put - puts an instance of the sworker | |
23005 | + * | |
23006 | + * Will destroy the sworker thread. This function must not be called until all | |
23007 | + * queued events have been completed. | |
1a6e0f06 | 23008 | + */ |
e4b2b4a8 | 23009 | +void swork_put(void) |
1a6e0f06 | 23010 | +{ |
e4b2b4a8 | 23011 | + mutex_lock(&worker_mutex); |
1a6e0f06 | 23012 | + |
e4b2b4a8 JK |
23013 | + glob_worker->refs--; |
23014 | + if (glob_worker->refs > 0) | |
23015 | + goto out; | |
1a6e0f06 | 23016 | + |
e4b2b4a8 JK |
23017 | + swork_destroy(glob_worker); |
23018 | + glob_worker = NULL; | |
23019 | +out: | |
23020 | + mutex_unlock(&worker_mutex); | |
1a6e0f06 | 23021 | +} |
e4b2b4a8 JK |
23022 | +EXPORT_SYMBOL_GPL(swork_put); |
23023 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/sched/topology.c linux-4.14/kernel/sched/topology.c | |
23024 | --- linux-4.14.orig/kernel/sched/topology.c 2018-09-05 11:03:22.000000000 +0200 | |
23025 | +++ linux-4.14/kernel/sched/topology.c 2018-09-05 11:05:07.000000000 +0200 | |
23026 | @@ -286,6 +286,7 @@ | |
23027 | rd->rto_cpu = -1; | |
23028 | raw_spin_lock_init(&rd->rto_lock); | |
23029 | init_irq_work(&rd->rto_push_work, rto_push_irq_work_func); | |
23030 | + rd->rto_push_work.flags |= IRQ_WORK_HARD_IRQ; | |
23031 | #endif | |
23032 | ||
23033 | init_dl_bw(&rd->dl_bw); | |
23034 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/signal.c linux-4.14/kernel/signal.c | |
23035 | --- linux-4.14.orig/kernel/signal.c 2018-09-05 11:03:22.000000000 +0200 | |
23036 | +++ linux-4.14/kernel/signal.c 2018-09-05 11:05:07.000000000 +0200 | |
23037 | @@ -19,6 +19,7 @@ | |
23038 | #include <linux/sched/task.h> | |
23039 | #include <linux/sched/task_stack.h> | |
23040 | #include <linux/sched/cputime.h> | |
23041 | +#include <linux/sched/rt.h> | |
23042 | #include <linux/fs.h> | |
23043 | #include <linux/tty.h> | |
23044 | #include <linux/binfmts.h> | |
23045 | @@ -360,13 +361,30 @@ | |
23046 | return false; | |
23047 | } | |
23048 | ||
23049 | +static inline struct sigqueue *get_task_cache(struct task_struct *t) | |
1a6e0f06 | 23050 | +{ |
e4b2b4a8 | 23051 | + struct sigqueue *q = t->sigqueue_cache; |
1a6e0f06 | 23052 | + |
e4b2b4a8 JK |
23053 | + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) |
23054 | + return NULL; | |
23055 | + return q; | |
1a6e0f06 | 23056 | +} |
1a6e0f06 | 23057 | + |
e4b2b4a8 | 23058 | +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) |
1a6e0f06 | 23059 | +{ |
e4b2b4a8 JK |
23060 | + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) |
23061 | + return 0; | |
23062 | + return 1; | |
1a6e0f06 | 23063 | +} |
1a6e0f06 | 23064 | + |
e4b2b4a8 JK |
23065 | /* |
23066 | * allocate a new signal queue record | |
23067 | * - this may be called without locks if and only if t == current, otherwise an | |
23068 | * appropriate lock must be held to stop the target task from exiting | |
23069 | */ | |
23070 | static struct sigqueue * | |
23071 | -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) | |
23072 | +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, | |
23073 | + int override_rlimit, int fromslab) | |
23074 | { | |
23075 | struct sigqueue *q = NULL; | |
23076 | struct user_struct *user; | |
23077 | @@ -383,7 +401,10 @@ | |
23078 | if (override_rlimit || | |
23079 | atomic_read(&user->sigpending) <= | |
23080 | task_rlimit(t, RLIMIT_SIGPENDING)) { | |
23081 | - q = kmem_cache_alloc(sigqueue_cachep, flags); | |
23082 | + if (!fromslab) | |
23083 | + q = get_task_cache(t); | |
23084 | + if (!q) | |
23085 | + q = kmem_cache_alloc(sigqueue_cachep, flags); | |
23086 | } else { | |
23087 | print_dropped_signal(sig); | |
23088 | } | |
23089 | @@ -400,6 +421,13 @@ | |
23090 | return q; | |
23091 | } | |
23092 | ||
23093 | +static struct sigqueue * | |
23094 | +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, | |
23095 | + int override_rlimit) | |
1a6e0f06 | 23096 | +{ |
e4b2b4a8 | 23097 | + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); |
1a6e0f06 | 23098 | +} |
1a6e0f06 | 23099 | + |
e4b2b4a8 JK |
23100 | static void __sigqueue_free(struct sigqueue *q) |
23101 | { | |
23102 | if (q->flags & SIGQUEUE_PREALLOC) | |
23103 | @@ -409,6 +437,21 @@ | |
23104 | kmem_cache_free(sigqueue_cachep, q); | |
23105 | } | |
23106 | ||
23107 | +static void sigqueue_free_current(struct sigqueue *q) | |
1a6e0f06 | 23108 | +{ |
e4b2b4a8 JK |
23109 | + struct user_struct *up; |
23110 | + | |
23111 | + if (q->flags & SIGQUEUE_PREALLOC) | |
23112 | + return; | |
23113 | + | |
23114 | + up = q->user; | |
23115 | + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { | |
23116 | + atomic_dec(&up->sigpending); | |
23117 | + free_uid(up); | |
23118 | + } else | |
23119 | + __sigqueue_free(q); | |
1a6e0f06 | 23120 | +} |
1a6e0f06 | 23121 | + |
e4b2b4a8 JK |
23122 | void flush_sigqueue(struct sigpending *queue) |
23123 | { | |
23124 | struct sigqueue *q; | |
23125 | @@ -422,6 +465,21 @@ | |
23126 | } | |
23127 | ||
23128 | /* | |
23129 | + * Called from __exit_signal. Flush tsk->pending and | |
23130 | + * tsk->sigqueue_cache | |
23131 | + */ | |
23132 | +void flush_task_sigqueue(struct task_struct *tsk) | |
1a6e0f06 | 23133 | +{ |
e4b2b4a8 | 23134 | + struct sigqueue *q; |
1a6e0f06 | 23135 | + |
e4b2b4a8 | 23136 | + flush_sigqueue(&tsk->pending); |
1a6e0f06 | 23137 | + |
e4b2b4a8 JK |
23138 | + q = get_task_cache(tsk); |
23139 | + if (q) | |
23140 | + kmem_cache_free(sigqueue_cachep, q); | |
1a6e0f06 JK |
23141 | +} |
23142 | + | |
e4b2b4a8 JK |
23143 | +/* |
23144 | * Flush all pending signals for this kthread. | |
23145 | */ | |
23146 | void flush_signals(struct task_struct *t) | |
23147 | @@ -542,7 +600,7 @@ | |
23148 | (info->si_code == SI_TIMER) && | |
23149 | (info->si_sys_private); | |
23150 | ||
23151 | - __sigqueue_free(first); | |
23152 | + sigqueue_free_current(first); | |
23153 | } else { | |
23154 | /* | |
23155 | * Ok, it wasn't in the queue. This must be | |
23156 | @@ -578,6 +636,8 @@ | |
23157 | bool resched_timer = false; | |
23158 | int signr; | |
23159 | ||
23160 | + WARN_ON_ONCE(tsk != current); | |
23161 | + | |
23162 | /* We only dequeue private signals from ourselves, we don't let | |
23163 | * signalfd steal them | |
23164 | */ | |
23165 | @@ -1177,8 +1237,8 @@ | |
23166 | * We don't want to have recursive SIGSEGV's etc, for example, | |
23167 | * that is why we also clear SIGNAL_UNKILLABLE. | |
23168 | */ | |
23169 | -int | |
23170 | -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
23171 | +static int | |
23172 | +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
23173 | { | |
23174 | unsigned long int flags; | |
23175 | int ret, blocked, ignored; | |
23176 | @@ -1207,6 +1267,39 @@ | |
23177 | return ret; | |
23178 | } | |
23179 | ||
23180 | +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
1a6e0f06 | 23181 | +{ |
e4b2b4a8 JK |
23182 | +/* |
23183 | + * On some archs, PREEMPT_RT has to delay sending a signal from a trap | |
23184 | + * since it can not enable preemption, and the signal code's spin_locks | |
23185 | + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will | |
23186 | + * send the signal on exit of the trap. | |
23187 | + */ | |
23188 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
23189 | + if (in_atomic()) { | |
23190 | + if (WARN_ON_ONCE(t != current)) | |
23191 | + return 0; | |
23192 | + if (WARN_ON_ONCE(t->forced_info.si_signo)) | |
23193 | + return 0; | |
1a6e0f06 | 23194 | + |
e4b2b4a8 JK |
23195 | + if (is_si_special(info)) { |
23196 | + WARN_ON_ONCE(info != SEND_SIG_PRIV); | |
23197 | + t->forced_info.si_signo = sig; | |
23198 | + t->forced_info.si_errno = 0; | |
23199 | + t->forced_info.si_code = SI_KERNEL; | |
23200 | + t->forced_info.si_pid = 0; | |
23201 | + t->forced_info.si_uid = 0; | |
23202 | + } else { | |
23203 | + t->forced_info = *info; | |
23204 | + } | |
1a6e0f06 | 23205 | + |
e4b2b4a8 JK |
23206 | + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); |
23207 | + return 0; | |
23208 | + } | |
23209 | +#endif | |
23210 | + return do_force_sig_info(sig, info, t); | |
1a6e0f06 | 23211 | +} |
1a6e0f06 | 23212 | + |
e4b2b4a8 JK |
23213 | /* |
23214 | * Nuke all other threads in the group. | |
23215 | */ | |
23216 | @@ -1241,12 +1334,12 @@ | |
23217 | * Disable interrupts early to avoid deadlocks. | |
23218 | * See rcu_read_unlock() comment header for details. | |
23219 | */ | |
23220 | - local_irq_save(*flags); | |
23221 | + local_irq_save_nort(*flags); | |
23222 | rcu_read_lock(); | |
23223 | sighand = rcu_dereference(tsk->sighand); | |
23224 | if (unlikely(sighand == NULL)) { | |
23225 | rcu_read_unlock(); | |
23226 | - local_irq_restore(*flags); | |
23227 | + local_irq_restore_nort(*flags); | |
23228 | break; | |
23229 | } | |
23230 | /* | |
23231 | @@ -1267,7 +1360,7 @@ | |
23232 | } | |
23233 | spin_unlock(&sighand->siglock); | |
23234 | rcu_read_unlock(); | |
23235 | - local_irq_restore(*flags); | |
23236 | + local_irq_restore_nort(*flags); | |
23237 | } | |
23238 | ||
23239 | return sighand; | |
23240 | @@ -1514,7 +1607,8 @@ | |
23241 | */ | |
23242 | struct sigqueue *sigqueue_alloc(void) | |
23243 | { | |
23244 | - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); | |
23245 | + /* Preallocated sigqueue objects always from the slabcache ! */ | |
23246 | + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); | |
23247 | ||
23248 | if (q) | |
23249 | q->flags |= SIGQUEUE_PREALLOC; | |
23250 | @@ -1888,15 +1982,7 @@ | |
23251 | if (gstop_done && ptrace_reparented(current)) | |
23252 | do_notify_parent_cldstop(current, false, why); | |
23253 | ||
23254 | - /* | |
23255 | - * Don't want to allow preemption here, because | |
23256 | - * sys_ptrace() needs this task to be inactive. | |
23257 | - * | |
23258 | - * XXX: implement read_unlock_no_resched(). | |
23259 | - */ | |
23260 | - preempt_disable(); | |
23261 | read_unlock(&tasklist_lock); | |
23262 | - preempt_enable_no_resched(); | |
23263 | freezable_schedule(); | |
23264 | } else { | |
23265 | /* | |
23266 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/softirq.c linux-4.14/kernel/softirq.c | |
23267 | --- linux-4.14.orig/kernel/softirq.c 2018-09-05 11:03:22.000000000 +0200 | |
23268 | +++ linux-4.14/kernel/softirq.c 2018-09-05 11:05:07.000000000 +0200 | |
23269 | @@ -21,11 +21,14 @@ | |
23270 | #include <linux/freezer.h> | |
23271 | #include <linux/kthread.h> | |
23272 | #include <linux/rcupdate.h> | |
23273 | +#include <linux/delay.h> | |
23274 | #include <linux/ftrace.h> | |
23275 | #include <linux/smp.h> | |
23276 | #include <linux/smpboot.h> | |
23277 | #include <linux/tick.h> | |
23278 | +#include <linux/locallock.h> | |
23279 | #include <linux/irq.h> | |
23280 | +#include <linux/sched/types.h> | |
23281 | ||
23282 | #define CREATE_TRACE_POINTS | |
23283 | #include <trace/events/irq.h> | |
23284 | @@ -56,12 +59,108 @@ | |
23285 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | |
23286 | ||
23287 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |
23288 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23289 | +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) | |
23290 | +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); | |
23291 | +#endif | |
23292 | ||
23293 | const char * const softirq_to_name[NR_SOFTIRQS] = { | |
23294 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", | |
23295 | "TASKLET", "SCHED", "HRTIMER", "RCU" | |
23296 | }; | |
23297 | ||
23298 | +#ifdef CONFIG_NO_HZ_COMMON | |
23299 | +# ifdef CONFIG_PREEMPT_RT_FULL | |
1a6e0f06 | 23300 | + |
e4b2b4a8 JK |
23301 | +struct softirq_runner { |
23302 | + struct task_struct *runner[NR_SOFTIRQS]; | |
23303 | +}; | |
1a6e0f06 | 23304 | + |
e4b2b4a8 | 23305 | +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners); |
1a6e0f06 | 23306 | + |
e4b2b4a8 | 23307 | +static inline void softirq_set_runner(unsigned int sirq) |
1a6e0f06 | 23308 | +{ |
e4b2b4a8 | 23309 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); |
1a6e0f06 | 23310 | + |
e4b2b4a8 | 23311 | + sr->runner[sirq] = current; |
1a6e0f06 | 23312 | +} |
1a6e0f06 | 23313 | + |
e4b2b4a8 | 23314 | +static inline void softirq_clr_runner(unsigned int sirq) |
1a6e0f06 | 23315 | +{ |
e4b2b4a8 JK |
23316 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); |
23317 | + | |
23318 | + sr->runner[sirq] = NULL; | |
1a6e0f06 | 23319 | +} |
1a6e0f06 | 23320 | + |
e4b2b4a8 JK |
23321 | +/* |
23322 | + * On preempt-rt a softirq running context might be blocked on a | |
23323 | + * lock. There might be no other runnable task on this CPU because the | |
23324 | + * lock owner runs on some other CPU. So we have to go into idle with | |
23325 | + * the pending bit set. Therefor we need to check this otherwise we | |
23326 | + * warn about false positives which confuses users and defeats the | |
23327 | + * whole purpose of this test. | |
1a6e0f06 | 23328 | + * |
e4b2b4a8 | 23329 | + * This code is called with interrupts disabled. |
1a6e0f06 | 23330 | + */ |
e4b2b4a8 | 23331 | +void softirq_check_pending_idle(void) |
1a6e0f06 | 23332 | +{ |
e4b2b4a8 JK |
23333 | + static int rate_limit; |
23334 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
23335 | + u32 warnpending; | |
23336 | + int i; | |
23337 | + | |
23338 | + if (rate_limit >= 10) | |
23339 | + return; | |
23340 | + | |
23341 | + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; | |
23342 | + for (i = 0; i < NR_SOFTIRQS; i++) { | |
23343 | + struct task_struct *tsk = sr->runner[i]; | |
23344 | + | |
23345 | + /* | |
23346 | + * The wakeup code in rtmutex.c wakes up the task | |
23347 | + * _before_ it sets pi_blocked_on to NULL under | |
23348 | + * tsk->pi_lock. So we need to check for both: state | |
23349 | + * and pi_blocked_on. | |
23350 | + */ | |
23351 | + if (tsk) { | |
23352 | + raw_spin_lock(&tsk->pi_lock); | |
23353 | + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { | |
23354 | + /* Clear all bits pending in that task */ | |
23355 | + warnpending &= ~(tsk->softirqs_raised); | |
23356 | + warnpending &= ~(1 << i); | |
23357 | + } | |
23358 | + raw_spin_unlock(&tsk->pi_lock); | |
23359 | + } | |
1a6e0f06 | 23360 | + } |
e4b2b4a8 JK |
23361 | + |
23362 | + if (warnpending) { | |
23363 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
23364 | + warnpending); | |
23365 | + rate_limit++; | |
23366 | + } | |
23367 | +} | |
23368 | +# else | |
23369 | +/* | |
23370 | + * On !PREEMPT_RT we just printk rate limited: | |
23371 | + */ | |
23372 | +void softirq_check_pending_idle(void) | |
1a6e0f06 | 23373 | +{ |
e4b2b4a8 JK |
23374 | + static int rate_limit; |
23375 | + | |
23376 | + if (rate_limit < 10 && | |
23377 | + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
23378 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
23379 | + local_softirq_pending()); | |
23380 | + rate_limit++; | |
23381 | + } | |
1a6e0f06 | 23382 | +} |
e4b2b4a8 JK |
23383 | +# endif |
23384 | + | |
23385 | +#else /* !CONFIG_NO_HZ_COMMON */ | |
23386 | +static inline void softirq_set_runner(unsigned int sirq) { } | |
23387 | +static inline void softirq_clr_runner(unsigned int sirq) { } | |
23388 | +#endif | |
1a6e0f06 JK |
23389 | + |
23390 | /* | |
e4b2b4a8 JK |
23391 | * we cannot loop indefinitely here to avoid userspace starvation, |
23392 | * but we also don't want to introduce a worst case 1/HZ latency | |
23393 | @@ -77,6 +176,38 @@ | |
23394 | wake_up_process(tsk); | |
1a6e0f06 JK |
23395 | } |
23396 | ||
e4b2b4a8 JK |
23397 | +#ifdef CONFIG_PREEMPT_RT_FULL |
23398 | +static void wakeup_timer_softirqd(void) | |
1a6e0f06 | 23399 | +{ |
e4b2b4a8 JK |
23400 | + /* Interrupts are disabled: no need to stop preemption */ |
23401 | + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); | |
23402 | + | |
23403 | + if (tsk && tsk->state != TASK_RUNNING) | |
23404 | + wake_up_process(tsk); | |
1a6e0f06 | 23405 | +} |
e4b2b4a8 | 23406 | +#endif |
1a6e0f06 | 23407 | + |
e4b2b4a8 JK |
23408 | +static void handle_softirq(unsigned int vec_nr) |
23409 | +{ | |
23410 | + struct softirq_action *h = softirq_vec + vec_nr; | |
23411 | + int prev_count; | |
1a6e0f06 | 23412 | + |
e4b2b4a8 | 23413 | + prev_count = preempt_count(); |
1a6e0f06 | 23414 | + |
e4b2b4a8 | 23415 | + kstat_incr_softirqs_this_cpu(vec_nr); |
1a6e0f06 | 23416 | + |
e4b2b4a8 JK |
23417 | + trace_softirq_entry(vec_nr); |
23418 | + h->action(h); | |
23419 | + trace_softirq_exit(vec_nr); | |
23420 | + if (unlikely(prev_count != preempt_count())) { | |
23421 | + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
23422 | + vec_nr, softirq_to_name[vec_nr], h->action, | |
23423 | + prev_count, preempt_count()); | |
23424 | + preempt_count_set(prev_count); | |
23425 | + } | |
1a6e0f06 JK |
23426 | +} |
23427 | + | |
e4b2b4a8 | 23428 | +#ifndef CONFIG_PREEMPT_RT_FULL |
1a6e0f06 | 23429 | /* |
e4b2b4a8 JK |
23430 | * If ksoftirqd is scheduled, we do not want to process pending softirqs |
23431 | * right now. Let ksoftirqd handle this at its own rate, to get fairness, | |
23432 | @@ -92,6 +223,47 @@ | |
23433 | return tsk && (tsk->state == TASK_RUNNING); | |
1a6e0f06 JK |
23434 | } |
23435 | ||
e4b2b4a8 | 23436 | +static inline int ksoftirqd_softirq_pending(void) |
1a6e0f06 | 23437 | +{ |
e4b2b4a8 | 23438 | + return local_softirq_pending(); |
1a6e0f06 JK |
23439 | +} |
23440 | + | |
e4b2b4a8 | 23441 | +static void handle_pending_softirqs(u32 pending) |
1a6e0f06 | 23442 | +{ |
e4b2b4a8 JK |
23443 | + struct softirq_action *h = softirq_vec; |
23444 | + int softirq_bit; | |
1a6e0f06 | 23445 | + |
e4b2b4a8 JK |
23446 | + local_irq_enable(); |
23447 | + | |
23448 | + h = softirq_vec; | |
23449 | + | |
23450 | + while ((softirq_bit = ffs(pending))) { | |
23451 | + unsigned int vec_nr; | |
23452 | + | |
23453 | + h += softirq_bit - 1; | |
23454 | + vec_nr = h - softirq_vec; | |
23455 | + handle_softirq(vec_nr); | |
23456 | + | |
23457 | + h++; | |
23458 | + pending >>= softirq_bit; | |
1a6e0f06 | 23459 | + } |
e4b2b4a8 JK |
23460 | + |
23461 | + rcu_bh_qs(); | |
23462 | + local_irq_disable(); | |
1a6e0f06 | 23463 | +} |
e4b2b4a8 JK |
23464 | + |
23465 | +static void run_ksoftirqd(unsigned int cpu) | |
1a6e0f06 | 23466 | +{ |
e4b2b4a8 JK |
23467 | + local_irq_disable(); |
23468 | + if (ksoftirqd_softirq_pending()) { | |
23469 | + __do_softirq(); | |
23470 | + local_irq_enable(); | |
23471 | + cond_resched_rcu_qs(); | |
23472 | + return; | |
23473 | + } | |
23474 | + local_irq_enable(); | |
1a6e0f06 | 23475 | +} |
1a6e0f06 | 23476 | + |
e4b2b4a8 JK |
23477 | /* |
23478 | * preempt_count and SOFTIRQ_OFFSET usage: | |
23479 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | |
23480 | @@ -247,10 +419,8 @@ | |
23481 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | |
23482 | unsigned long old_flags = current->flags; | |
23483 | int max_restart = MAX_SOFTIRQ_RESTART; | |
23484 | - struct softirq_action *h; | |
23485 | bool in_hardirq; | |
23486 | __u32 pending; | |
23487 | - int softirq_bit; | |
23488 | ||
23489 | /* | |
23490 | * Mask out PF_MEMALLOC s current task context is borrowed for the | |
23491 | @@ -269,36 +439,7 @@ | |
23492 | /* Reset the pending bitmask before enabling irqs */ | |
23493 | set_softirq_pending(0); | |
23494 | ||
23495 | - local_irq_enable(); | |
23496 | - | |
23497 | - h = softirq_vec; | |
23498 | - | |
23499 | - while ((softirq_bit = ffs(pending))) { | |
23500 | - unsigned int vec_nr; | |
23501 | - int prev_count; | |
23502 | - | |
23503 | - h += softirq_bit - 1; | |
23504 | - | |
23505 | - vec_nr = h - softirq_vec; | |
23506 | - prev_count = preempt_count(); | |
23507 | - | |
23508 | - kstat_incr_softirqs_this_cpu(vec_nr); | |
23509 | - | |
23510 | - trace_softirq_entry(vec_nr); | |
23511 | - h->action(h); | |
23512 | - trace_softirq_exit(vec_nr); | |
23513 | - if (unlikely(prev_count != preempt_count())) { | |
23514 | - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
23515 | - vec_nr, softirq_to_name[vec_nr], h->action, | |
23516 | - prev_count, preempt_count()); | |
23517 | - preempt_count_set(prev_count); | |
23518 | - } | |
23519 | - h++; | |
23520 | - pending >>= softirq_bit; | |
23521 | - } | |
23522 | - | |
23523 | - rcu_bh_qs(); | |
23524 | - local_irq_disable(); | |
23525 | + handle_pending_softirqs(pending); | |
23526 | ||
23527 | pending = local_softirq_pending(); | |
23528 | if (pending) { | |
23529 | @@ -335,6 +476,309 @@ | |
23530 | } | |
23531 | ||
23532 | /* | |
23533 | + * This function must run with irqs disabled! | |
1a6e0f06 | 23534 | + */ |
e4b2b4a8 | 23535 | +void raise_softirq_irqoff(unsigned int nr) |
1a6e0f06 | 23536 | +{ |
e4b2b4a8 | 23537 | + __raise_softirq_irqoff(nr); |
1a6e0f06 JK |
23538 | + |
23539 | + /* | |
e4b2b4a8 JK |
23540 | + * If we're in an interrupt or softirq, we're done |
23541 | + * (this also catches softirq-disabled code). We will | |
23542 | + * actually run the softirq once we return from | |
23543 | + * the irq or softirq. | |
23544 | + * | |
23545 | + * Otherwise we wake up ksoftirqd to make sure we | |
23546 | + * schedule the softirq soon. | |
1a6e0f06 | 23547 | + */ |
e4b2b4a8 JK |
23548 | + if (!in_interrupt()) |
23549 | + wakeup_softirqd(); | |
23550 | +} | |
1a6e0f06 | 23551 | + |
e4b2b4a8 JK |
23552 | +void __raise_softirq_irqoff(unsigned int nr) |
23553 | +{ | |
23554 | + trace_softirq_raise(nr); | |
23555 | + or_softirq_pending(1UL << nr); | |
23556 | +} | |
1a6e0f06 | 23557 | + |
e4b2b4a8 JK |
23558 | +static inline void local_bh_disable_nort(void) { local_bh_disable(); } |
23559 | +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } | |
23560 | +static void ksoftirqd_set_sched_params(unsigned int cpu) { } | |
1a6e0f06 | 23561 | + |
e4b2b4a8 | 23562 | +#else /* !PREEMPT_RT_FULL */ |
1a6e0f06 | 23563 | + |
e4b2b4a8 JK |
23564 | +/* |
23565 | + * On RT we serialize softirq execution with a cpu local lock per softirq | |
23566 | + */ | |
23567 | +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks); | |
1a6e0f06 | 23568 | + |
e4b2b4a8 JK |
23569 | +void __init softirq_early_init(void) |
23570 | +{ | |
23571 | + int i; | |
1a6e0f06 | 23572 | + |
e4b2b4a8 JK |
23573 | + for (i = 0; i < NR_SOFTIRQS; i++) |
23574 | + local_irq_lock_init(local_softirq_locks[i]); | |
23575 | +} | |
1a6e0f06 | 23576 | + |
e4b2b4a8 JK |
23577 | +static void lock_softirq(int which) |
23578 | +{ | |
23579 | + local_lock(local_softirq_locks[which]); | |
23580 | +} | |
1a6e0f06 | 23581 | + |
e4b2b4a8 JK |
23582 | +static void unlock_softirq(int which) |
23583 | +{ | |
23584 | + local_unlock(local_softirq_locks[which]); | |
23585 | +} | |
1a6e0f06 | 23586 | + |
e4b2b4a8 JK |
23587 | +static void do_single_softirq(int which) |
23588 | +{ | |
23589 | + unsigned long old_flags = current->flags; | |
1a6e0f06 | 23590 | + |
e4b2b4a8 JK |
23591 | + current->flags &= ~PF_MEMALLOC; |
23592 | + vtime_account_irq_enter(current); | |
23593 | + current->flags |= PF_IN_SOFTIRQ; | |
23594 | + lockdep_softirq_enter(); | |
23595 | + local_irq_enable(); | |
23596 | + handle_softirq(which); | |
23597 | + local_irq_disable(); | |
23598 | + lockdep_softirq_exit(); | |
23599 | + current->flags &= ~PF_IN_SOFTIRQ; | |
23600 | + vtime_account_irq_enter(current); | |
23601 | + current_restore_flags(old_flags, PF_MEMALLOC); | |
1a6e0f06 JK |
23602 | +} |
23603 | + | |
1a6e0f06 | 23604 | +/* |
e4b2b4a8 JK |
23605 | + * Called with interrupts disabled. Process softirqs which were raised |
23606 | + * in current context (or on behalf of ksoftirqd). | |
1a6e0f06 | 23607 | + */ |
e4b2b4a8 | 23608 | +static void do_current_softirqs(void) |
1a6e0f06 | 23609 | +{ |
e4b2b4a8 JK |
23610 | + while (current->softirqs_raised) { |
23611 | + int i = __ffs(current->softirqs_raised); | |
23612 | + unsigned int pending, mask = (1U << i); | |
1a6e0f06 | 23613 | + |
e4b2b4a8 JK |
23614 | + current->softirqs_raised &= ~mask; |
23615 | + local_irq_enable(); | |
1a6e0f06 | 23616 | + |
e4b2b4a8 JK |
23617 | + /* |
23618 | + * If the lock is contended, we boost the owner to | |
23619 | + * process the softirq or leave the critical section | |
23620 | + * now. | |
23621 | + */ | |
23622 | + lock_softirq(i); | |
23623 | + local_irq_disable(); | |
23624 | + softirq_set_runner(i); | |
23625 | + /* | |
23626 | + * Check with the local_softirq_pending() bits, | |
23627 | + * whether we need to process this still or if someone | |
23628 | + * else took care of it. | |
23629 | + */ | |
23630 | + pending = local_softirq_pending(); | |
23631 | + if (pending & mask) { | |
23632 | + set_softirq_pending(pending & ~mask); | |
23633 | + do_single_softirq(i); | |
23634 | + } | |
23635 | + softirq_clr_runner(i); | |
23636 | + WARN_ON(current->softirq_nestcnt != 1); | |
23637 | + local_irq_enable(); | |
23638 | + unlock_softirq(i); | |
23639 | + local_irq_disable(); | |
1a6e0f06 | 23640 | + } |
1a6e0f06 JK |
23641 | +} |
23642 | + | |
e4b2b4a8 | 23643 | +void __local_bh_disable(void) |
1a6e0f06 | 23644 | +{ |
e4b2b4a8 JK |
23645 | + if (++current->softirq_nestcnt == 1) |
23646 | + migrate_disable(); | |
23647 | +} | |
23648 | +EXPORT_SYMBOL(__local_bh_disable); | |
1a6e0f06 | 23649 | + |
e4b2b4a8 JK |
23650 | +void __local_bh_enable(void) |
23651 | +{ | |
23652 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
23653 | + return; | |
1a6e0f06 | 23654 | + |
e4b2b4a8 JK |
23655 | + local_irq_disable(); |
23656 | + if (current->softirq_nestcnt == 1 && current->softirqs_raised) | |
23657 | + do_current_softirqs(); | |
23658 | + local_irq_enable(); | |
1a6e0f06 | 23659 | + |
e4b2b4a8 JK |
23660 | + if (--current->softirq_nestcnt == 0) |
23661 | + migrate_enable(); | |
1a6e0f06 | 23662 | +} |
e4b2b4a8 | 23663 | +EXPORT_SYMBOL(__local_bh_enable); |
1a6e0f06 | 23664 | + |
e4b2b4a8 | 23665 | +void _local_bh_enable(void) |
1a6e0f06 | 23666 | +{ |
e4b2b4a8 JK |
23667 | + if (WARN_ON(current->softirq_nestcnt == 0)) |
23668 | + return; | |
23669 | + if (--current->softirq_nestcnt == 0) | |
23670 | + migrate_enable(); | |
1a6e0f06 | 23671 | +} |
e4b2b4a8 | 23672 | +EXPORT_SYMBOL(_local_bh_enable); |
1a6e0f06 | 23673 | + |
e4b2b4a8 | 23674 | +int in_serving_softirq(void) |
1a6e0f06 | 23675 | +{ |
e4b2b4a8 | 23676 | + return current->flags & PF_IN_SOFTIRQ; |
1a6e0f06 | 23677 | +} |
e4b2b4a8 | 23678 | +EXPORT_SYMBOL(in_serving_softirq); |
1a6e0f06 | 23679 | + |
e4b2b4a8 JK |
23680 | +/* Called with preemption disabled */ |
23681 | +static void run_ksoftirqd(unsigned int cpu) | |
1a6e0f06 | 23682 | +{ |
e4b2b4a8 JK |
23683 | + local_irq_disable(); |
23684 | + current->softirq_nestcnt++; | |
23685 | + | |
23686 | + do_current_softirqs(); | |
23687 | + current->softirq_nestcnt--; | |
23688 | + local_irq_enable(); | |
23689 | + cond_resched_rcu_qs(); | |
1a6e0f06 | 23690 | +} |
1a6e0f06 | 23691 | + |
e4b2b4a8 JK |
23692 | +/* |
23693 | + * Called from netif_rx_ni(). Preemption enabled, but migration | |
23694 | + * disabled. So the cpu can't go away under us. | |
23695 | + */ | |
23696 | +void thread_do_softirq(void) | |
1a6e0f06 | 23697 | +{ |
e4b2b4a8 JK |
23698 | + if (!in_serving_softirq() && current->softirqs_raised) { |
23699 | + current->softirq_nestcnt++; | |
23700 | + do_current_softirqs(); | |
23701 | + current->softirq_nestcnt--; | |
23702 | + } | |
1a6e0f06 | 23703 | +} |
1a6e0f06 | 23704 | + |
e4b2b4a8 | 23705 | +static void do_raise_softirq_irqoff(unsigned int nr) |
1a6e0f06 | 23706 | +{ |
e4b2b4a8 JK |
23707 | + unsigned int mask; |
23708 | + | |
23709 | + mask = 1UL << nr; | |
23710 | + | |
23711 | + trace_softirq_raise(nr); | |
23712 | + or_softirq_pending(mask); | |
23713 | + | |
23714 | + /* | |
23715 | + * If we are not in a hard interrupt and inside a bh disabled | |
23716 | + * region, we simply raise the flag on current. local_bh_enable() | |
23717 | + * will make sure that the softirq is executed. Otherwise we | |
23718 | + * delegate it to ksoftirqd. | |
23719 | + */ | |
23720 | + if (!in_irq() && current->softirq_nestcnt) | |
23721 | + current->softirqs_raised |= mask; | |
23722 | + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) | |
23723 | + return; | |
23724 | + | |
23725 | + if (mask & TIMER_SOFTIRQS) | |
23726 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
23727 | + else | |
23728 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
1a6e0f06 | 23729 | +} |
1a6e0f06 | 23730 | + |
e4b2b4a8 | 23731 | +static void wakeup_proper_softirq(unsigned int nr) |
1a6e0f06 | 23732 | +{ |
e4b2b4a8 JK |
23733 | + if ((1UL << nr) & TIMER_SOFTIRQS) |
23734 | + wakeup_timer_softirqd(); | |
23735 | + else | |
23736 | + wakeup_softirqd(); | |
1a6e0f06 | 23737 | +} |
1a6e0f06 | 23738 | + |
e4b2b4a8 | 23739 | +void __raise_softirq_irqoff(unsigned int nr) |
1a6e0f06 | 23740 | +{ |
e4b2b4a8 JK |
23741 | + do_raise_softirq_irqoff(nr); |
23742 | + if (!in_irq() && !current->softirq_nestcnt) | |
23743 | + wakeup_proper_softirq(nr); | |
1a6e0f06 | 23744 | +} |
1a6e0f06 | 23745 | + |
e4b2b4a8 JK |
23746 | +/* |
23747 | + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd | |
23748 | + */ | |
23749 | +void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
1a6e0f06 | 23750 | +{ |
e4b2b4a8 | 23751 | + unsigned int mask; |
1a6e0f06 | 23752 | + |
e4b2b4a8 JK |
23753 | + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) || |
23754 | + !__this_cpu_read(ktimer_softirqd))) | |
23755 | + return; | |
23756 | + mask = 1UL << nr; | |
1a6e0f06 | 23757 | + |
e4b2b4a8 JK |
23758 | + trace_softirq_raise(nr); |
23759 | + or_softirq_pending(mask); | |
23760 | + if (mask & TIMER_SOFTIRQS) | |
23761 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
23762 | + else | |
23763 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
23764 | + wakeup_proper_softirq(nr); | |
1a6e0f06 | 23765 | +} |
1a6e0f06 JK |
23766 | + |
23767 | +/* | |
e4b2b4a8 | 23768 | + * This function must run with irqs disabled! |
1a6e0f06 | 23769 | + */ |
e4b2b4a8 | 23770 | +void raise_softirq_irqoff(unsigned int nr) |
1a6e0f06 | 23771 | +{ |
e4b2b4a8 JK |
23772 | + do_raise_softirq_irqoff(nr); |
23773 | + | |
23774 | + /* | |
23775 | + * If we're in an hard interrupt we let irq return code deal | |
23776 | + * with the wakeup of ksoftirqd. | |
23777 | + */ | |
23778 | + if (in_irq()) | |
23779 | + return; | |
23780 | + /* | |
23781 | + * If we are in thread context but outside of a bh disabled | |
23782 | + * region, we need to wake ksoftirqd as well. | |
23783 | + * | |
23784 | + * CHECKME: Some of the places which do that could be wrapped | |
23785 | + * into local_bh_disable/enable pairs. Though it's unclear | |
23786 | + * whether this is worth the effort. To find those places just | |
23787 | + * raise a WARN() if the condition is met. | |
23788 | + */ | |
23789 | + if (!current->softirq_nestcnt) | |
23790 | + wakeup_proper_softirq(nr); | |
1a6e0f06 | 23791 | +} |
1a6e0f06 | 23792 | + |
e4b2b4a8 | 23793 | +static inline int ksoftirqd_softirq_pending(void) |
1a6e0f06 | 23794 | +{ |
e4b2b4a8 JK |
23795 | + return current->softirqs_raised; |
23796 | +} | |
1a6e0f06 | 23797 | + |
e4b2b4a8 JK |
23798 | +static inline void local_bh_disable_nort(void) { } |
23799 | +static inline void _local_bh_enable_nort(void) { } | |
23800 | + | |
23801 | +static inline void ksoftirqd_set_sched_params(unsigned int cpu) | |
23802 | +{ | |
23803 | + /* Take over all but timer pending softirqs when starting */ | |
23804 | + local_irq_disable(); | |
23805 | + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; | |
23806 | + local_irq_enable(); | |
1a6e0f06 | 23807 | +} |
1a6e0f06 | 23808 | + |
e4b2b4a8 | 23809 | +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) |
1a6e0f06 | 23810 | +{ |
e4b2b4a8 | 23811 | + struct sched_param param = { .sched_priority = 1 }; |
1a6e0f06 | 23812 | + |
e4b2b4a8 JK |
23813 | + sched_setscheduler(current, SCHED_FIFO, ¶m); |
23814 | + | |
23815 | + /* Take over timer pending softirqs when starting */ | |
23816 | + local_irq_disable(); | |
23817 | + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; | |
23818 | + local_irq_enable(); | |
1a6e0f06 | 23819 | +} |
1a6e0f06 | 23820 | + |
e4b2b4a8 JK |
23821 | +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, |
23822 | + bool online) | |
1a6e0f06 | 23823 | +{ |
e4b2b4a8 | 23824 | + struct sched_param param = { .sched_priority = 0 }; |
1a6e0f06 | 23825 | + |
e4b2b4a8 | 23826 | + sched_setscheduler(current, SCHED_NORMAL, ¶m); |
1a6e0f06 | 23827 | +} |
1a6e0f06 | 23828 | + |
e4b2b4a8 | 23829 | +static int ktimer_softirqd_should_run(unsigned int cpu) |
1a6e0f06 | 23830 | +{ |
e4b2b4a8 | 23831 | + return current->softirqs_raised; |
1a6e0f06 | 23832 | +} |
1a6e0f06 | 23833 | + |
e4b2b4a8 JK |
23834 | +#endif /* PREEMPT_RT_FULL */ |
23835 | +/* | |
23836 | * Enter an interrupt context. | |
23837 | */ | |
23838 | void irq_enter(void) | |
23839 | @@ -345,9 +789,9 @@ | |
23840 | * Prevent raise_softirq from needlessly waking up ksoftirqd | |
23841 | * here, as softirq will be serviced on return from interrupt. | |
23842 | */ | |
23843 | - local_bh_disable(); | |
23844 | + local_bh_disable_nort(); | |
23845 | tick_irq_enter(); | |
23846 | - _local_bh_enable(); | |
23847 | + _local_bh_enable_nort(); | |
23848 | } | |
23849 | ||
23850 | __irq_enter(); | |
23851 | @@ -355,6 +799,7 @@ | |
23852 | ||
23853 | static inline void invoke_softirq(void) | |
23854 | { | |
23855 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23856 | if (ksoftirqd_running(local_softirq_pending())) | |
23857 | return; | |
23858 | ||
23859 | @@ -377,6 +822,18 @@ | |
23860 | } else { | |
23861 | wakeup_softirqd(); | |
23862 | } | |
23863 | +#else /* PREEMPT_RT_FULL */ | |
23864 | + unsigned long flags; | |
23865 | + | |
23866 | + local_irq_save(flags); | |
23867 | + if (__this_cpu_read(ksoftirqd) && | |
23868 | + __this_cpu_read(ksoftirqd)->softirqs_raised) | |
23869 | + wakeup_softirqd(); | |
23870 | + if (__this_cpu_read(ktimer_softirqd) && | |
23871 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised) | |
23872 | + wakeup_timer_softirqd(); | |
23873 | + local_irq_restore(flags); | |
23874 | +#endif | |
23875 | } | |
23876 | ||
23877 | static inline void tick_irq_exit(void) | |
23878 | @@ -385,7 +842,13 @@ | |
23879 | int cpu = smp_processor_id(); | |
23880 | ||
23881 | /* Make sure that timer wheel updates are propagated */ | |
23882 | - if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) { | |
23883 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
23884 | + if ((idle_cpu(cpu) || tick_nohz_full_cpu(cpu)) && | |
23885 | + !need_resched() && !local_softirq_pending()) | |
23886 | +#else | |
23887 | + if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) | |
23888 | +#endif | |
23889 | + { | |
23890 | if (!in_irq()) | |
23891 | tick_nohz_irq_exit(); | |
23892 | } | |
23893 | @@ -413,26 +876,6 @@ | |
23894 | trace_hardirq_exit(); /* must be last! */ | |
23895 | } | |
23896 | ||
23897 | -/* | |
23898 | - * This function must run with irqs disabled! | |
23899 | - */ | |
23900 | -inline void raise_softirq_irqoff(unsigned int nr) | |
23901 | -{ | |
23902 | - __raise_softirq_irqoff(nr); | |
23903 | - | |
23904 | - /* | |
23905 | - * If we're in an interrupt or softirq, we're done | |
23906 | - * (this also catches softirq-disabled code). We will | |
23907 | - * actually run the softirq once we return from | |
23908 | - * the irq or softirq. | |
23909 | - * | |
23910 | - * Otherwise we wake up ksoftirqd to make sure we | |
23911 | - * schedule the softirq soon. | |
23912 | - */ | |
23913 | - if (!in_interrupt()) | |
23914 | - wakeup_softirqd(); | |
23915 | -} | |
23916 | - | |
23917 | void raise_softirq(unsigned int nr) | |
23918 | { | |
23919 | unsigned long flags; | |
23920 | @@ -442,12 +885,6 @@ | |
23921 | local_irq_restore(flags); | |
23922 | } | |
23923 | ||
23924 | -void __raise_softirq_irqoff(unsigned int nr) | |
23925 | -{ | |
23926 | - trace_softirq_raise(nr); | |
23927 | - or_softirq_pending(1UL << nr); | |
23928 | -} | |
23929 | - | |
23930 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | |
23931 | { | |
23932 | softirq_vec[nr].action = action; | |
23933 | @@ -464,15 +901,45 @@ | |
23934 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); | |
23935 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | |
23936 | ||
23937 | +static void inline | |
23938 | +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) | |
1a6e0f06 | 23939 | +{ |
e4b2b4a8 JK |
23940 | + if (tasklet_trylock(t)) { |
23941 | +again: | |
23942 | + /* We may have been preempted before tasklet_trylock | |
23943 | + * and __tasklet_action may have already run. | |
23944 | + * So double check the sched bit while the takslet | |
23945 | + * is locked before adding it to the list. | |
23946 | + */ | |
23947 | + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { | |
23948 | + t->next = NULL; | |
23949 | + *head->tail = t; | |
23950 | + head->tail = &(t->next); | |
23951 | + raise_softirq_irqoff(nr); | |
23952 | + tasklet_unlock(t); | |
23953 | + } else { | |
23954 | + /* This is subtle. If we hit the corner case above | |
23955 | + * It is possible that we get preempted right here, | |
23956 | + * and another task has successfully called | |
23957 | + * tasklet_schedule(), then this function, and | |
23958 | + * failed on the trylock. Thus we must be sure | |
23959 | + * before releasing the tasklet lock, that the | |
23960 | + * SCHED_BIT is clear. Otherwise the tasklet | |
23961 | + * may get its SCHED_BIT set, but not added to the | |
23962 | + * list | |
23963 | + */ | |
23964 | + if (!tasklet_tryunlock(t)) | |
23965 | + goto again; | |
23966 | + } | |
23967 | + } | |
1a6e0f06 | 23968 | +} |
1a6e0f06 | 23969 | + |
e4b2b4a8 JK |
23970 | void __tasklet_schedule(struct tasklet_struct *t) |
23971 | { | |
23972 | unsigned long flags; | |
23973 | ||
23974 | local_irq_save(flags); | |
23975 | - t->next = NULL; | |
23976 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
23977 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
23978 | - raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
23979 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); | |
23980 | local_irq_restore(flags); | |
23981 | } | |
23982 | EXPORT_SYMBOL(__tasklet_schedule); | |
23983 | @@ -482,50 +949,108 @@ | |
23984 | unsigned long flags; | |
23985 | ||
23986 | local_irq_save(flags); | |
23987 | - t->next = NULL; | |
23988 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
23989 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
23990 | - raise_softirq_irqoff(HI_SOFTIRQ); | |
23991 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); | |
23992 | local_irq_restore(flags); | |
23993 | } | |
23994 | EXPORT_SYMBOL(__tasklet_hi_schedule); | |
23995 | ||
23996 | -static __latent_entropy void tasklet_action(struct softirq_action *a) | |
23997 | +void tasklet_enable(struct tasklet_struct *t) | |
23998 | { | |
23999 | - struct tasklet_struct *list; | |
24000 | + if (!atomic_dec_and_test(&t->count)) | |
24001 | + return; | |
24002 | + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) | |
24003 | + tasklet_schedule(t); | |
1a6e0f06 | 24004 | +} |
e4b2b4a8 JK |
24005 | +EXPORT_SYMBOL(tasklet_enable); |
24006 | ||
24007 | - local_irq_disable(); | |
24008 | - list = __this_cpu_read(tasklet_vec.head); | |
24009 | - __this_cpu_write(tasklet_vec.head, NULL); | |
24010 | - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
24011 | - local_irq_enable(); | |
24012 | +static void __tasklet_action(struct softirq_action *a, | |
24013 | + struct tasklet_struct *list) | |
24014 | +{ | |
24015 | + int loops = 1000000; | |
24016 | ||
24017 | while (list) { | |
24018 | struct tasklet_struct *t = list; | |
24019 | ||
24020 | list = list->next; | |
24021 | ||
24022 | - if (tasklet_trylock(t)) { | |
24023 | - if (!atomic_read(&t->count)) { | |
24024 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
24025 | - &t->state)) | |
24026 | - BUG(); | |
24027 | - t->func(t->data); | |
24028 | - tasklet_unlock(t); | |
24029 | - continue; | |
24030 | - } | |
24031 | - tasklet_unlock(t); | |
24032 | + /* | |
24033 | + * Should always succeed - after a tasklist got on the | |
24034 | + * list (after getting the SCHED bit set from 0 to 1), | |
24035 | + * nothing but the tasklet softirq it got queued to can | |
24036 | + * lock it: | |
24037 | + */ | |
24038 | + if (!tasklet_trylock(t)) { | |
24039 | + WARN_ON(1); | |
24040 | + continue; | |
24041 | } | |
24042 | ||
24043 | - local_irq_disable(); | |
24044 | t->next = NULL; | |
24045 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
24046 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
24047 | - __raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
24048 | - local_irq_enable(); | |
1a6e0f06 | 24049 | + |
e4b2b4a8 JK |
24050 | + /* |
24051 | + * If we cannot handle the tasklet because it's disabled, | |
24052 | + * mark it as pending. tasklet_enable() will later | |
24053 | + * re-schedule the tasklet. | |
24054 | + */ | |
24055 | + if (unlikely(atomic_read(&t->count))) { | |
24056 | +out_disabled: | |
24057 | + /* implicit unlock: */ | |
24058 | + wmb(); | |
24059 | + t->state = TASKLET_STATEF_PENDING; | |
24060 | + continue; | |
24061 | + } | |
1a6e0f06 | 24062 | + |
e4b2b4a8 JK |
24063 | + /* |
24064 | + * After this point on the tasklet might be rescheduled | |
24065 | + * on another CPU, but it can only be added to another | |
24066 | + * CPU's tasklet list if we unlock the tasklet (which we | |
24067 | + * dont do yet). | |
24068 | + */ | |
24069 | + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
24070 | + WARN_ON(1); | |
1a6e0f06 | 24071 | + |
e4b2b4a8 JK |
24072 | +again: |
24073 | + t->func(t->data); | |
1a6e0f06 | 24074 | + |
e4b2b4a8 JK |
24075 | + /* |
24076 | + * Try to unlock the tasklet. We must use cmpxchg, because | |
24077 | + * another CPU might have scheduled or disabled the tasklet. | |
24078 | + * We only allow the STATE_RUN -> 0 transition here. | |
24079 | + */ | |
24080 | + while (!tasklet_tryunlock(t)) { | |
24081 | + /* | |
24082 | + * If it got disabled meanwhile, bail out: | |
24083 | + */ | |
24084 | + if (atomic_read(&t->count)) | |
24085 | + goto out_disabled; | |
24086 | + /* | |
24087 | + * If it got scheduled meanwhile, re-execute | |
24088 | + * the tasklet function: | |
24089 | + */ | |
24090 | + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
24091 | + goto again; | |
24092 | + if (!--loops) { | |
24093 | + printk("hm, tasklet state: %08lx\n", t->state); | |
24094 | + WARN_ON(1); | |
24095 | + tasklet_unlock(t); | |
24096 | + break; | |
24097 | + } | |
24098 | + } | |
24099 | } | |
24100 | } | |
24101 | ||
24102 | +static __latent_entropy void tasklet_action(struct softirq_action *a) | |
24103 | +{ | |
24104 | + struct tasklet_struct *list; | |
1a6e0f06 | 24105 | + |
e4b2b4a8 JK |
24106 | + local_irq_disable(); |
24107 | + list = __this_cpu_read(tasklet_vec.head); | |
24108 | + __this_cpu_write(tasklet_vec.head, NULL); | |
24109 | + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
24110 | + local_irq_enable(); | |
1a6e0f06 | 24111 | + |
e4b2b4a8 | 24112 | + __tasklet_action(a, list); |
1a6e0f06 | 24113 | +} |
e4b2b4a8 JK |
24114 | + |
24115 | static __latent_entropy void tasklet_hi_action(struct softirq_action *a) | |
24116 | { | |
24117 | struct tasklet_struct *list; | |
24118 | @@ -536,30 +1061,7 @@ | |
24119 | __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); | |
24120 | local_irq_enable(); | |
24121 | ||
24122 | - while (list) { | |
24123 | - struct tasklet_struct *t = list; | |
24124 | - | |
24125 | - list = list->next; | |
24126 | - | |
24127 | - if (tasklet_trylock(t)) { | |
24128 | - if (!atomic_read(&t->count)) { | |
24129 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
24130 | - &t->state)) | |
24131 | - BUG(); | |
24132 | - t->func(t->data); | |
24133 | - tasklet_unlock(t); | |
24134 | - continue; | |
24135 | - } | |
24136 | - tasklet_unlock(t); | |
24137 | - } | |
24138 | - | |
24139 | - local_irq_disable(); | |
24140 | - t->next = NULL; | |
24141 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
24142 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
24143 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
24144 | - local_irq_enable(); | |
24145 | - } | |
24146 | + __tasklet_action(a, list); | |
24147 | } | |
24148 | ||
24149 | void tasklet_init(struct tasklet_struct *t, | |
24150 | @@ -580,7 +1082,7 @@ | |
24151 | ||
24152 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | |
24153 | do { | |
24154 | - yield(); | |
24155 | + msleep(1); | |
24156 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); | |
24157 | } | |
24158 | tasklet_unlock_wait(t); | |
24159 | @@ -588,57 +1090,6 @@ | |
24160 | } | |
24161 | EXPORT_SYMBOL(tasklet_kill); | |
24162 | ||
24163 | -/* | |
24164 | - * tasklet_hrtimer | |
24165 | - */ | |
24166 | - | |
24167 | -/* | |
24168 | - * The trampoline is called when the hrtimer expires. It schedules a tasklet | |
24169 | - * to run __tasklet_hrtimer_trampoline() which in turn will call the intended | |
24170 | - * hrtimer callback, but from softirq context. | |
24171 | - */ | |
24172 | -static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer) | |
24173 | -{ | |
24174 | - struct tasklet_hrtimer *ttimer = | |
24175 | - container_of(timer, struct tasklet_hrtimer, timer); | |
24176 | - | |
24177 | - tasklet_hi_schedule(&ttimer->tasklet); | |
24178 | - return HRTIMER_NORESTART; | |
24179 | -} | |
24180 | - | |
24181 | -/* | |
24182 | - * Helper function which calls the hrtimer callback from | |
24183 | - * tasklet/softirq context | |
24184 | - */ | |
24185 | -static void __tasklet_hrtimer_trampoline(unsigned long data) | |
24186 | -{ | |
24187 | - struct tasklet_hrtimer *ttimer = (void *)data; | |
24188 | - enum hrtimer_restart restart; | |
24189 | - | |
24190 | - restart = ttimer->function(&ttimer->timer); | |
24191 | - if (restart != HRTIMER_NORESTART) | |
24192 | - hrtimer_restart(&ttimer->timer); | |
24193 | -} | |
24194 | - | |
24195 | -/** | |
24196 | - * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks | |
24197 | - * @ttimer: tasklet_hrtimer which is initialized | |
24198 | - * @function: hrtimer callback function which gets called from softirq context | |
24199 | - * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME) | |
24200 | - * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL) | |
24201 | - */ | |
24202 | -void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, | |
24203 | - enum hrtimer_restart (*function)(struct hrtimer *), | |
24204 | - clockid_t which_clock, enum hrtimer_mode mode) | |
24205 | -{ | |
24206 | - hrtimer_init(&ttimer->timer, which_clock, mode); | |
24207 | - ttimer->timer.function = __hrtimer_tasklet_trampoline; | |
24208 | - tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline, | |
24209 | - (unsigned long)ttimer); | |
24210 | - ttimer->function = function; | |
24211 | -} | |
24212 | -EXPORT_SYMBOL_GPL(tasklet_hrtimer_init); | |
24213 | - | |
24214 | void __init softirq_init(void) | |
24215 | { | |
24216 | int cpu; | |
24217 | @@ -654,25 +1105,26 @@ | |
24218 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); | |
24219 | } | |
24220 | ||
24221 | -static int ksoftirqd_should_run(unsigned int cpu) | |
24222 | -{ | |
24223 | - return local_softirq_pending(); | |
24224 | -} | |
24225 | - | |
24226 | -static void run_ksoftirqd(unsigned int cpu) | |
24227 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
24228 | +void tasklet_unlock_wait(struct tasklet_struct *t) | |
24229 | { | |
24230 | - local_irq_disable(); | |
24231 | - if (local_softirq_pending()) { | |
24232 | + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { | |
24233 | /* | |
24234 | - * We can safely run softirq on inline stack, as we are not deep | |
24235 | - * in the task stack here. | |
24236 | + * Hack for now to avoid this busy-loop: | |
24237 | */ | |
24238 | - __do_softirq(); | |
24239 | - local_irq_enable(); | |
24240 | - cond_resched_rcu_qs(); | |
24241 | - return; | |
24242 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24243 | + msleep(1); | |
1a6e0f06 | 24244 | +#else |
e4b2b4a8 JK |
24245 | + barrier(); |
24246 | +#endif | |
24247 | } | |
24248 | - local_irq_enable(); | |
1a6e0f06 | 24249 | +} |
e4b2b4a8 | 24250 | +EXPORT_SYMBOL(tasklet_unlock_wait); |
1a6e0f06 JK |
24251 | +#endif |
24252 | + | |
e4b2b4a8 | 24253 | +static int ksoftirqd_should_run(unsigned int cpu) |
1a6e0f06 | 24254 | +{ |
e4b2b4a8 JK |
24255 | + return ksoftirqd_softirq_pending(); |
24256 | } | |
1a6e0f06 | 24257 | |
e4b2b4a8 JK |
24258 | #ifdef CONFIG_HOTPLUG_CPU |
24259 | @@ -739,17 +1191,31 @@ | |
24260 | ||
24261 | static struct smp_hotplug_thread softirq_threads = { | |
24262 | .store = &ksoftirqd, | |
24263 | + .setup = ksoftirqd_set_sched_params, | |
24264 | .thread_should_run = ksoftirqd_should_run, | |
24265 | .thread_fn = run_ksoftirqd, | |
24266 | .thread_comm = "ksoftirqd/%u", | |
24267 | }; | |
24268 | ||
24269 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24270 | +static struct smp_hotplug_thread softirq_timer_threads = { | |
24271 | + .store = &ktimer_softirqd, | |
24272 | + .setup = ktimer_softirqd_set_sched_params, | |
24273 | + .cleanup = ktimer_softirqd_clr_sched_params, | |
24274 | + .thread_should_run = ktimer_softirqd_should_run, | |
24275 | + .thread_fn = run_ksoftirqd, | |
24276 | + .thread_comm = "ktimersoftd/%u", | |
24277 | +}; | |
24278 | +#endif | |
1a6e0f06 | 24279 | + |
e4b2b4a8 JK |
24280 | static __init int spawn_ksoftirqd(void) |
24281 | { | |
24282 | cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, | |
24283 | takeover_tasklets); | |
24284 | BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); | |
24285 | - | |
24286 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24287 | + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); | |
24288 | +#endif | |
24289 | return 0; | |
24290 | } | |
24291 | early_initcall(spawn_ksoftirqd); | |
24292 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/stop_machine.c linux-4.14/kernel/stop_machine.c | |
24293 | --- linux-4.14.orig/kernel/stop_machine.c 2018-09-05 11:03:22.000000000 +0200 | |
24294 | +++ linux-4.14/kernel/stop_machine.c 2018-09-05 11:05:07.000000000 +0200 | |
24295 | @@ -496,6 +496,8 @@ | |
24296 | struct cpu_stop_done *done = work->done; | |
24297 | int ret; | |
24298 | ||
24299 | + /* XXX */ | |
1a6e0f06 | 24300 | + |
e4b2b4a8 JK |
24301 | /* cpu stop callbacks must not sleep, make in_atomic() == T */ |
24302 | preempt_count_inc(); | |
24303 | ret = fn(arg); | |
24304 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/alarmtimer.c linux-4.14/kernel/time/alarmtimer.c | |
24305 | --- linux-4.14.orig/kernel/time/alarmtimer.c 2018-09-05 11:03:22.000000000 +0200 | |
24306 | +++ linux-4.14/kernel/time/alarmtimer.c 2018-09-05 11:05:07.000000000 +0200 | |
24307 | @@ -436,7 +436,7 @@ | |
24308 | int ret = alarm_try_to_cancel(alarm); | |
24309 | if (ret >= 0) | |
24310 | return ret; | |
24311 | - cpu_relax(); | |
24312 | + hrtimer_wait_for_timer(&alarm->timer); | |
24313 | } | |
24314 | } | |
24315 | EXPORT_SYMBOL_GPL(alarm_cancel); | |
24316 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/hrtimer.c linux-4.14/kernel/time/hrtimer.c | |
24317 | --- linux-4.14.orig/kernel/time/hrtimer.c 2018-09-05 11:03:22.000000000 +0200 | |
24318 | +++ linux-4.14/kernel/time/hrtimer.c 2018-09-05 11:05:07.000000000 +0200 | |
24319 | @@ -60,6 +60,15 @@ | |
24320 | #include "tick-internal.h" | |
1a6e0f06 | 24321 | |
e4b2b4a8 JK |
24322 | /* |
24323 | + * Masks for selecting the soft and hard context timers from | |
24324 | + * cpu_base->active | |
24325 | + */ | |
24326 | +#define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) | |
24327 | +#define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) | |
24328 | +#define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) | |
24329 | +#define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) | |
24330 | + | |
24331 | +/* | |
24332 | * The timer bases: | |
24333 | * | |
24334 | * There are more clockids than hrtimer bases. Thus, we index | |
24335 | @@ -70,7 +79,6 @@ | |
24336 | DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = | |
1a6e0f06 | 24337 | { |
e4b2b4a8 JK |
24338 | .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), |
24339 | - .seq = SEQCNT_ZERO(hrtimer_bases.seq), | |
24340 | .clock_base = | |
24341 | { | |
24342 | { | |
24343 | @@ -93,6 +101,26 @@ | |
24344 | .clockid = CLOCK_TAI, | |
24345 | .get_time = &ktime_get_clocktai, | |
24346 | }, | |
24347 | + { | |
24348 | + .index = HRTIMER_BASE_MONOTONIC_SOFT, | |
24349 | + .clockid = CLOCK_MONOTONIC, | |
24350 | + .get_time = &ktime_get, | |
24351 | + }, | |
24352 | + { | |
24353 | + .index = HRTIMER_BASE_REALTIME_SOFT, | |
24354 | + .clockid = CLOCK_REALTIME, | |
24355 | + .get_time = &ktime_get_real, | |
24356 | + }, | |
24357 | + { | |
24358 | + .index = HRTIMER_BASE_BOOTTIME_SOFT, | |
24359 | + .clockid = CLOCK_BOOTTIME, | |
24360 | + .get_time = &ktime_get_boottime, | |
24361 | + }, | |
24362 | + { | |
24363 | + .index = HRTIMER_BASE_TAI_SOFT, | |
24364 | + .clockid = CLOCK_TAI, | |
24365 | + .get_time = &ktime_get_clocktai, | |
24366 | + }, | |
24367 | } | |
24368 | }; | |
1a6e0f06 | 24369 | |
e4b2b4a8 JK |
24370 | @@ -118,7 +146,6 @@ |
24371 | * timer->base->cpu_base | |
24372 | */ | |
24373 | static struct hrtimer_cpu_base migration_cpu_base = { | |
24374 | - .seq = SEQCNT_ZERO(migration_cpu_base), | |
24375 | .clock_base = { { .cpu_base = &migration_cpu_base, }, }, | |
24376 | }; | |
1a6e0f06 | 24377 | |
e4b2b4a8 | 24378 | @@ -156,45 +183,33 @@ |
1a6e0f06 JK |
24379 | } |
24380 | ||
24381 | /* | |
e4b2b4a8 JK |
24382 | - * With HIGHRES=y we do not migrate the timer when it is expiring |
24383 | - * before the next event on the target cpu because we cannot reprogram | |
24384 | - * the target cpu hardware and we would cause it to fire late. | |
24385 | + * We do not migrate the timer when it is expiring before the next | |
24386 | + * event on the target cpu. When high resolution is enabled, we cannot | |
24387 | + * reprogram the target cpu hardware and we would cause it to fire | |
24388 | + * late. To keep it simple, we handle the high resolution enabled and | |
24389 | + * disabled case similar. | |
24390 | * | |
24391 | * Called with cpu_base->lock of target cpu held. | |
24392 | */ | |
24393 | static int | |
24394 | hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) | |
1a6e0f06 | 24395 | { |
e4b2b4a8 JK |
24396 | -#ifdef CONFIG_HIGH_RES_TIMERS |
24397 | ktime_t expires; | |
1a6e0f06 | 24398 | |
e4b2b4a8 JK |
24399 | - if (!new_base->cpu_base->hres_active) |
24400 | - return 0; | |
24401 | - | |
24402 | expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); | |
24403 | - return expires <= new_base->cpu_base->expires_next; | |
24404 | -#else | |
24405 | - return 0; | |
24406 | -#endif | |
24407 | + return expires < new_base->cpu_base->expires_next; | |
24408 | } | |
1a6e0f06 | 24409 | |
e4b2b4a8 JK |
24410 | -#ifdef CONFIG_NO_HZ_COMMON |
24411 | -static inline | |
24412 | -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, | |
24413 | - int pinned) | |
24414 | -{ | |
24415 | - if (pinned || !base->migration_enabled) | |
24416 | - return base; | |
24417 | - return &per_cpu(hrtimer_bases, get_nohz_timer_target()); | |
24418 | -} | |
24419 | -#else | |
24420 | static inline | |
24421 | struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, | |
24422 | int pinned) | |
24423 | { | |
24424 | +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | |
24425 | + if (static_branch_unlikely(&timers_migration_enabled) && !pinned) | |
24426 | + return &per_cpu(hrtimer_bases, get_nohz_timer_target()); | |
24427 | +#endif | |
24428 | return base; | |
24429 | } | |
24430 | -#endif | |
1a6e0f06 | 24431 | |
e4b2b4a8 JK |
24432 | /* |
24433 | * We switch the timer base to a power-optimized selected CPU target, | |
24434 | @@ -396,7 +411,8 @@ | |
24435 | debug_object_init(timer, &hrtimer_debug_descr); | |
24436 | } | |
1a6e0f06 | 24437 | |
e4b2b4a8 JK |
24438 | -static inline void debug_hrtimer_activate(struct hrtimer *timer) |
24439 | +static inline void debug_hrtimer_activate(struct hrtimer *timer, | |
24440 | + enum hrtimer_mode mode) | |
24441 | { | |
24442 | debug_object_activate(timer, &hrtimer_debug_descr); | |
24443 | } | |
24444 | @@ -429,8 +445,10 @@ | |
24445 | EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); | |
1a6e0f06 | 24446 | |
e4b2b4a8 JK |
24447 | #else |
24448 | + | |
24449 | static inline void debug_hrtimer_init(struct hrtimer *timer) { } | |
24450 | -static inline void debug_hrtimer_activate(struct hrtimer *timer) { } | |
24451 | +static inline void debug_hrtimer_activate(struct hrtimer *timer, | |
24452 | + enum hrtimer_mode mode) { } | |
24453 | static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } | |
24454 | #endif | |
1a6e0f06 | 24455 | |
e4b2b4a8 JK |
24456 | @@ -442,10 +460,11 @@ |
24457 | trace_hrtimer_init(timer, clockid, mode); | |
1a6e0f06 | 24458 | } |
1a6e0f06 | 24459 | |
e4b2b4a8 JK |
24460 | -static inline void debug_activate(struct hrtimer *timer) |
24461 | +static inline void debug_activate(struct hrtimer *timer, | |
24462 | + enum hrtimer_mode mode) | |
24463 | { | |
24464 | - debug_hrtimer_activate(timer); | |
24465 | - trace_hrtimer_start(timer); | |
24466 | + debug_hrtimer_activate(timer, mode); | |
24467 | + trace_hrtimer_start(timer, mode); | |
24468 | } | |
1a6e0f06 | 24469 | |
e4b2b4a8 JK |
24470 | static inline void debug_deactivate(struct hrtimer *timer) |
24471 | @@ -454,35 +473,43 @@ | |
24472 | trace_hrtimer_cancel(timer); | |
1a6e0f06 JK |
24473 | } |
24474 | ||
e4b2b4a8 JK |
24475 | -#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) |
24476 | -static inline void hrtimer_update_next_timer(struct hrtimer_cpu_base *cpu_base, | |
24477 | - struct hrtimer *timer) | |
24478 | +static struct hrtimer_clock_base * | |
24479 | +__next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) | |
24480 | { | |
24481 | -#ifdef CONFIG_HIGH_RES_TIMERS | |
24482 | - cpu_base->next_timer = timer; | |
24483 | -#endif | |
24484 | + unsigned int idx; | |
1a6e0f06 | 24485 | + |
e4b2b4a8 JK |
24486 | + if (!*active) |
24487 | + return NULL; | |
1a6e0f06 | 24488 | + |
e4b2b4a8 JK |
24489 | + idx = __ffs(*active); |
24490 | + *active &= ~(1U << idx); | |
1a6e0f06 | 24491 | + |
e4b2b4a8 JK |
24492 | + return &cpu_base->clock_base[idx]; |
24493 | } | |
24494 | ||
24495 | -static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base) | |
24496 | +#define for_each_active_base(base, cpu_base, active) \ | |
24497 | + while ((base = __next_base((cpu_base), &(active)))) | |
1a6e0f06 | 24498 | + |
e4b2b4a8 JK |
24499 | +static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, |
24500 | + unsigned int active, | |
24501 | + ktime_t expires_next) | |
24502 | { | |
24503 | - struct hrtimer_clock_base *base = cpu_base->clock_base; | |
24504 | - unsigned int active = cpu_base->active_bases; | |
24505 | - ktime_t expires, expires_next = KTIME_MAX; | |
24506 | + struct hrtimer_clock_base *base; | |
24507 | + ktime_t expires; | |
24508 | ||
24509 | - hrtimer_update_next_timer(cpu_base, NULL); | |
24510 | - for (; active; base++, active >>= 1) { | |
24511 | + for_each_active_base(base, cpu_base, active) { | |
24512 | struct timerqueue_node *next; | |
24513 | struct hrtimer *timer; | |
24514 | ||
24515 | - if (!(active & 0x01)) | |
24516 | - continue; | |
24517 | - | |
24518 | next = timerqueue_getnext(&base->active); | |
24519 | timer = container_of(next, struct hrtimer, node); | |
24520 | expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | |
24521 | if (expires < expires_next) { | |
24522 | expires_next = expires; | |
24523 | - hrtimer_update_next_timer(cpu_base, timer); | |
24524 | + if (timer->is_soft) | |
24525 | + cpu_base->softirq_next_timer = timer; | |
24526 | + else | |
24527 | + cpu_base->next_timer = timer; | |
24528 | } | |
24529 | } | |
24530 | /* | |
24531 | @@ -494,7 +521,47 @@ | |
24532 | expires_next = 0; | |
24533 | return expires_next; | |
24534 | } | |
24535 | -#endif | |
1a6e0f06 | 24536 | + |
e4b2b4a8 JK |
24537 | +/* |
24538 | + * Recomputes cpu_base::*next_timer and returns the earliest expires_next but | |
24539 | + * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. | |
24540 | + * | |
24541 | + * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, | |
24542 | + * those timers will get run whenever the softirq gets handled, at the end of | |
24543 | + * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. | |
24544 | + * | |
24545 | + * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. | |
24546 | + * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual | |
24547 | + * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. | |
24548 | + * | |
24549 | + * @active_mask must be one of: | |
24550 | + * - HRTIMER_ACTIVE_ALL, | |
24551 | + * - HRTIMER_ACTIVE_SOFT, or | |
24552 | + * - HRTIMER_ACTIVE_HARD. | |
24553 | + */ | |
24554 | +static ktime_t | |
24555 | +__hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) | |
1a6e0f06 | 24556 | +{ |
e4b2b4a8 JK |
24557 | + unsigned int active; |
24558 | + struct hrtimer *next_timer = NULL; | |
24559 | + ktime_t expires_next = KTIME_MAX; | |
1a6e0f06 | 24560 | + |
e4b2b4a8 JK |
24561 | + if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { |
24562 | + active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; | |
24563 | + cpu_base->softirq_next_timer = NULL; | |
24564 | + expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX); | |
24565 | + | |
24566 | + next_timer = cpu_base->softirq_next_timer; | |
1a6e0f06 | 24567 | + } |
1a6e0f06 | 24568 | + |
e4b2b4a8 JK |
24569 | + if (active_mask & HRTIMER_ACTIVE_HARD) { |
24570 | + active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; | |
24571 | + cpu_base->next_timer = next_timer; | |
24572 | + expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); | |
24573 | + } | |
1a6e0f06 | 24574 | + |
e4b2b4a8 | 24575 | + return expires_next; |
1a6e0f06 | 24576 | +} |
e4b2b4a8 JK |
24577 | |
24578 | static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) | |
24579 | { | |
24580 | @@ -502,36 +569,14 @@ | |
24581 | ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; | |
24582 | ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; | |
24583 | ||
24584 | - return ktime_get_update_offsets_now(&base->clock_was_set_seq, | |
24585 | + ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, | |
24586 | offs_real, offs_boot, offs_tai); | |
24587 | -} | |
24588 | - | |
24589 | -/* High resolution timer related functions */ | |
24590 | -#ifdef CONFIG_HIGH_RES_TIMERS | |
24591 | - | |
24592 | -/* | |
24593 | - * High resolution timer enabled ? | |
24594 | - */ | |
24595 | -static bool hrtimer_hres_enabled __read_mostly = true; | |
24596 | -unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; | |
24597 | -EXPORT_SYMBOL_GPL(hrtimer_resolution); | |
24598 | - | |
24599 | -/* | |
24600 | - * Enable / Disable high resolution mode | |
24601 | - */ | |
24602 | -static int __init setup_hrtimer_hres(char *str) | |
24603 | -{ | |
24604 | - return (kstrtobool(str, &hrtimer_hres_enabled) == 0); | |
24605 | -} | |
24606 | ||
24607 | -__setup("highres=", setup_hrtimer_hres); | |
24608 | + base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; | |
24609 | + base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; | |
24610 | + base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; | |
24611 | ||
24612 | -/* | |
24613 | - * hrtimer_high_res_enabled - query, if the highres mode is enabled | |
24614 | - */ | |
24615 | -static inline int hrtimer_is_hres_enabled(void) | |
24616 | -{ | |
24617 | - return hrtimer_hres_enabled; | |
24618 | + return now; | |
24619 | } | |
24620 | ||
1a6e0f06 | 24621 | /* |
e4b2b4a8 | 24622 | @@ -539,7 +584,8 @@ |
1a6e0f06 | 24623 | */ |
e4b2b4a8 | 24624 | static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) |
1a6e0f06 | 24625 | { |
e4b2b4a8 JK |
24626 | - return cpu_base->hres_active; |
24627 | + return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? | |
24628 | + cpu_base->hres_active : 0; | |
24629 | } | |
1a6e0f06 | 24630 | |
e4b2b4a8 JK |
24631 | static inline int hrtimer_hres_active(void) |
24632 | @@ -557,10 +603,23 @@ | |
24633 | { | |
24634 | ktime_t expires_next; | |
1a6e0f06 | 24635 | |
e4b2b4a8 JK |
24636 | - if (!cpu_base->hres_active) |
24637 | - return; | |
24638 | + /* | |
24639 | + * Find the current next expiration time. | |
24640 | + */ | |
24641 | + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); | |
1a6e0f06 | 24642 | |
e4b2b4a8 JK |
24643 | - expires_next = __hrtimer_get_next_event(cpu_base); |
24644 | + if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { | |
24645 | + /* | |
24646 | + * When the softirq is activated, hrtimer has to be | |
24647 | + * programmed with the first hard hrtimer because soft | |
24648 | + * timer interrupt could occur too late. | |
24649 | + */ | |
24650 | + if (cpu_base->softirq_activated) | |
24651 | + expires_next = __hrtimer_get_next_event(cpu_base, | |
24652 | + HRTIMER_ACTIVE_HARD); | |
24653 | + else | |
24654 | + cpu_base->softirq_expires_next = expires_next; | |
1a6e0f06 JK |
24655 | + } |
24656 | ||
e4b2b4a8 JK |
24657 | if (skip_equal && expires_next == cpu_base->expires_next) |
24658 | return; | |
24659 | @@ -568,6 +627,9 @@ | |
24660 | cpu_base->expires_next = expires_next; | |
1a6e0f06 JK |
24661 | |
24662 | /* | |
e4b2b4a8 JK |
24663 | + * If hres is not active, hardware does not have to be |
24664 | + * reprogrammed yet. | |
24665 | + * | |
24666 | * If a hang was detected in the last timer interrupt then we | |
24667 | * leave the hang delay active in the hardware. We want the | |
24668 | * system to make progress. That also prevents the following | |
24669 | @@ -581,83 +643,38 @@ | |
24670 | * set. So we'd effectivly block all timers until the T2 event | |
24671 | * fires. | |
1a6e0f06 | 24672 | */ |
e4b2b4a8 JK |
24673 | - if (cpu_base->hang_detected) |
24674 | + if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) | |
24675 | return; | |
1a6e0f06 | 24676 | |
e4b2b4a8 JK |
24677 | tick_program_event(cpu_base->expires_next, 1); |
24678 | } | |
1a6e0f06 | 24679 | |
e4b2b4a8 JK |
24680 | +/* High resolution timer related functions */ |
24681 | +#ifdef CONFIG_HIGH_RES_TIMERS | |
24682 | + | |
24683 | /* | |
24684 | - * When a timer is enqueued and expires earlier than the already enqueued | |
24685 | - * timers, we have to check, whether it expires earlier than the timer for | |
24686 | - * which the clock event device was armed. | |
24687 | - * | |
24688 | - * Called with interrupts disabled and base->cpu_base.lock held | |
24689 | + * High resolution timer enabled ? | |
1a6e0f06 | 24690 | */ |
e4b2b4a8 JK |
24691 | -static void hrtimer_reprogram(struct hrtimer *timer, |
24692 | - struct hrtimer_clock_base *base) | |
24693 | -{ | |
24694 | - struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); | |
24695 | - ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | |
24696 | - | |
24697 | - WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); | |
24698 | - | |
24699 | - /* | |
24700 | - * If the timer is not on the current cpu, we cannot reprogram | |
24701 | - * the other cpus clock event device. | |
24702 | - */ | |
24703 | - if (base->cpu_base != cpu_base) | |
24704 | - return; | |
24705 | - | |
24706 | - /* | |
24707 | - * If the hrtimer interrupt is running, then it will | |
24708 | - * reevaluate the clock bases and reprogram the clock event | |
24709 | - * device. The callbacks are always executed in hard interrupt | |
24710 | - * context so we don't need an extra check for a running | |
24711 | - * callback. | |
24712 | - */ | |
24713 | - if (cpu_base->in_hrtirq) | |
24714 | - return; | |
24715 | - | |
24716 | - /* | |
24717 | - * CLOCK_REALTIME timer might be requested with an absolute | |
24718 | - * expiry time which is less than base->offset. Set it to 0. | |
24719 | - */ | |
24720 | - if (expires < 0) | |
24721 | - expires = 0; | |
24722 | - | |
24723 | - if (expires >= cpu_base->expires_next) | |
24724 | - return; | |
24725 | - | |
24726 | - /* Update the pointer to the next expiring timer */ | |
24727 | - cpu_base->next_timer = timer; | |
24728 | - | |
24729 | - /* | |
24730 | - * If a hang was detected in the last timer interrupt then we | |
24731 | - * do not schedule a timer which is earlier than the expiry | |
24732 | - * which we enforced in the hang detection. We want the system | |
24733 | - * to make progress. | |
24734 | - */ | |
24735 | - if (cpu_base->hang_detected) | |
24736 | - return; | |
24737 | +static bool hrtimer_hres_enabled __read_mostly = true; | |
24738 | +unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; | |
24739 | +EXPORT_SYMBOL_GPL(hrtimer_resolution); | |
24740 | ||
24741 | - /* | |
24742 | - * Program the timer hardware. We enforce the expiry for | |
24743 | - * events which are already in the past. | |
24744 | - */ | |
24745 | - cpu_base->expires_next = expires; | |
24746 | - tick_program_event(expires, 1); | |
24747 | +/* | |
24748 | + * Enable / Disable high resolution mode | |
24749 | + */ | |
24750 | +static int __init setup_hrtimer_hres(char *str) | |
24751 | +{ | |
24752 | + return (kstrtobool(str, &hrtimer_hres_enabled) == 0); | |
1a6e0f06 JK |
24753 | } |
24754 | ||
e4b2b4a8 JK |
24755 | +__setup("highres=", setup_hrtimer_hres); |
24756 | + | |
24757 | /* | |
24758 | - * Initialize the high resolution related parts of cpu_base | |
24759 | + * hrtimer_high_res_enabled - query, if the highres mode is enabled | |
24760 | */ | |
24761 | -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) | |
24762 | +static inline int hrtimer_is_hres_enabled(void) | |
1a6e0f06 | 24763 | { |
e4b2b4a8 JK |
24764 | - base->expires_next = KTIME_MAX; |
24765 | - base->hang_detected = 0; | |
24766 | - base->hres_active = 0; | |
24767 | - base->next_timer = NULL; | |
24768 | + return hrtimer_hres_enabled; | |
1a6e0f06 JK |
24769 | } |
24770 | ||
e4b2b4a8 JK |
24771 | /* |
24772 | @@ -669,7 +686,7 @@ | |
1a6e0f06 | 24773 | { |
e4b2b4a8 | 24774 | struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); |
1a6e0f06 | 24775 | |
e4b2b4a8 JK |
24776 | - if (!base->hres_active) |
24777 | + if (!__hrtimer_hres_active(base)) | |
24778 | return; | |
1a6e0f06 | 24779 | |
e4b2b4a8 JK |
24780 | raw_spin_lock(&base->lock); |
24781 | @@ -698,6 +715,29 @@ | |
24782 | retrigger_next_event(NULL); | |
24783 | } | |
1a6e0f06 | 24784 | |
e4b2b4a8 JK |
24785 | +#ifdef CONFIG_PREEMPT_RT_FULL |
24786 | + | |
24787 | +static struct swork_event clock_set_delay_work; | |
24788 | + | |
24789 | +static void run_clock_set_delay(struct swork_event *event) | |
24790 | +{ | |
24791 | + clock_was_set(); | |
24792 | +} | |
24793 | + | |
24794 | +void clock_was_set_delayed(void) | |
24795 | +{ | |
24796 | + swork_queue(&clock_set_delay_work); | |
24797 | +} | |
24798 | + | |
24799 | +static __init int create_clock_set_delay_thread(void) | |
24800 | +{ | |
24801 | + WARN_ON(swork_get()); | |
24802 | + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay); | |
24803 | + return 0; | |
24804 | +} | |
24805 | +early_initcall(create_clock_set_delay_thread); | |
24806 | +#else /* PREEMPT_RT_FULL */ | |
24807 | + | |
24808 | static void clock_was_set_work(struct work_struct *work) | |
1a6e0f06 | 24809 | { |
e4b2b4a8 JK |
24810 | clock_was_set(); |
24811 | @@ -713,26 +753,106 @@ | |
24812 | { | |
24813 | schedule_work(&hrtimer_work); | |
1a6e0f06 | 24814 | } |
e4b2b4a8 | 24815 | +#endif |
1a6e0f06 | 24816 | |
e4b2b4a8 | 24817 | #else |
1a6e0f06 | 24818 | |
e4b2b4a8 JK |
24819 | -static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *b) { return 0; } |
24820 | -static inline int hrtimer_hres_active(void) { return 0; } | |
24821 | static inline int hrtimer_is_hres_enabled(void) { return 0; } | |
24822 | static inline void hrtimer_switch_to_hres(void) { } | |
24823 | -static inline void | |
24824 | -hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } | |
24825 | -static inline int hrtimer_reprogram(struct hrtimer *timer, | |
24826 | - struct hrtimer_clock_base *base) | |
24827 | -{ | |
24828 | - return 0; | |
24829 | -} | |
24830 | -static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | |
24831 | static inline void retrigger_next_event(void *arg) { } | |
1a6e0f06 | 24832 | |
e4b2b4a8 | 24833 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
1a6e0f06 | 24834 | |
e4b2b4a8 JK |
24835 | /* |
24836 | + * When a timer is enqueued and expires earlier than the already enqueued | |
24837 | + * timers, we have to check, whether it expires earlier than the timer for | |
24838 | + * which the clock event device was armed. | |
24839 | + * | |
24840 | + * Called with interrupts disabled and base->cpu_base.lock held | |
24841 | + */ | |
24842 | +static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) | |
24843 | +{ | |
24844 | + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); | |
24845 | + struct hrtimer_clock_base *base = timer->base; | |
24846 | + ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); | |
24847 | + | |
24848 | + WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); | |
24849 | + | |
24850 | + /* | |
24851 | + * CLOCK_REALTIME timer might be requested with an absolute | |
24852 | + * expiry time which is less than base->offset. Set it to 0. | |
24853 | + */ | |
24854 | + if (expires < 0) | |
24855 | + expires = 0; | |
24856 | + | |
24857 | + if (timer->is_soft) { | |
24858 | + /* | |
24859 | + * soft hrtimer could be started on a remote CPU. In this | |
24860 | + * case softirq_expires_next needs to be updated on the | |
24861 | + * remote CPU. The soft hrtimer will not expire before the | |
24862 | + * first hard hrtimer on the remote CPU - | |
24863 | + * hrtimer_check_target() prevents this case. | |
24864 | + */ | |
24865 | + struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; | |
24866 | + | |
24867 | + if (timer_cpu_base->softirq_activated) | |
24868 | + return; | |
24869 | + | |
24870 | + if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) | |
24871 | + return; | |
24872 | + | |
24873 | + timer_cpu_base->softirq_next_timer = timer; | |
24874 | + timer_cpu_base->softirq_expires_next = expires; | |
24875 | + | |
24876 | + if (!ktime_before(expires, timer_cpu_base->expires_next) || | |
24877 | + !reprogram) | |
24878 | + return; | |
24879 | + } | |
24880 | + | |
24881 | + /* | |
24882 | + * If the timer is not on the current cpu, we cannot reprogram | |
24883 | + * the other cpus clock event device. | |
24884 | + */ | |
24885 | + if (base->cpu_base != cpu_base) | |
24886 | + return; | |
24887 | + | |
24888 | + /* | |
24889 | + * If the hrtimer interrupt is running, then it will | |
24890 | + * reevaluate the clock bases and reprogram the clock event | |
24891 | + * device. The callbacks are always executed in hard interrupt | |
24892 | + * context so we don't need an extra check for a running | |
24893 | + * callback. | |
24894 | + */ | |
24895 | + if (cpu_base->in_hrtirq) | |
24896 | + return; | |
24897 | + | |
24898 | + if (expires >= cpu_base->expires_next) | |
24899 | + return; | |
24900 | + | |
24901 | + /* Update the pointer to the next expiring timer */ | |
24902 | + cpu_base->next_timer = timer; | |
24903 | + cpu_base->expires_next = expires; | |
24904 | + | |
24905 | + /* | |
24906 | + * If hres is not active, hardware does not have to be | |
24907 | + * programmed yet. | |
24908 | + * | |
24909 | + * If a hang was detected in the last timer interrupt then we | |
24910 | + * do not schedule a timer which is earlier than the expiry | |
24911 | + * which we enforced in the hang detection. We want the system | |
24912 | + * to make progress. | |
24913 | + */ | |
24914 | + if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) | |
24915 | + return; | |
24916 | + | |
24917 | + /* | |
24918 | + * Program the timer hardware. We enforce the expiry for | |
24919 | + * events which are already in the past. | |
24920 | + */ | |
24921 | + tick_program_event(expires, 1); | |
24922 | +} | |
24923 | + | |
24924 | +/* | |
24925 | * Clock realtime was set | |
24926 | * | |
24927 | * Change the offset of the realtime clock vs. the monotonic | |
24928 | @@ -830,6 +950,33 @@ | |
1a6e0f06 | 24929 | } |
e4b2b4a8 | 24930 | EXPORT_SYMBOL_GPL(hrtimer_forward); |
1a6e0f06 | 24931 | |
e4b2b4a8 JK |
24932 | +#ifdef CONFIG_PREEMPT_RT_BASE |
24933 | +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) | |
24934 | + | |
24935 | +/** | |
24936 | + * hrtimer_wait_for_timer - Wait for a running timer | |
1a6e0f06 | 24937 | + * |
e4b2b4a8 | 24938 | + * @timer: timer to wait for |
1a6e0f06 | 24939 | + * |
e4b2b4a8 JK |
24940 | + * The function waits in case the timers callback function is |
24941 | + * currently executed on the waitqueue of the timer base. The | |
24942 | + * waitqueue is woken up after the timer callback function has | |
24943 | + * finished execution. | |
1a6e0f06 | 24944 | + */ |
e4b2b4a8 | 24945 | +void hrtimer_wait_for_timer(const struct hrtimer *timer) |
1a6e0f06 | 24946 | +{ |
e4b2b4a8 | 24947 | + struct hrtimer_clock_base *base = timer->base; |
1a6e0f06 | 24948 | + |
e4b2b4a8 JK |
24949 | + if (base && base->cpu_base && |
24950 | + base->index >= HRTIMER_BASE_MONOTONIC_SOFT) | |
24951 | + wait_event(base->cpu_base->wait, | |
24952 | + !(hrtimer_callback_running(timer))); | |
1a6e0f06 | 24953 | +} |
1a6e0f06 | 24954 | + |
1a6e0f06 | 24955 | +#else |
e4b2b4a8 | 24956 | +# define wake_up_timer_waiters(b) do { } while (0) |
1a6e0f06 | 24957 | +#endif |
e4b2b4a8 JK |
24958 | + |
24959 | /* | |
24960 | * enqueue_hrtimer - internal function to (re)start a timer | |
24961 | * | |
24962 | @@ -839,9 +986,10 @@ | |
24963 | * Returns 1 when the new timer is the leftmost timer in the tree. | |
1a6e0f06 | 24964 | */ |
e4b2b4a8 JK |
24965 | static int enqueue_hrtimer(struct hrtimer *timer, |
24966 | - struct hrtimer_clock_base *base) | |
24967 | + struct hrtimer_clock_base *base, | |
24968 | + enum hrtimer_mode mode) | |
1a6e0f06 | 24969 | { |
e4b2b4a8 JK |
24970 | - debug_activate(timer); |
24971 | + debug_activate(timer, mode); | |
1a6e0f06 | 24972 | |
e4b2b4a8 JK |
24973 | base->cpu_base->active_bases |= 1 << base->index; |
24974 | ||
24975 | @@ -874,7 +1022,6 @@ | |
24976 | if (!timerqueue_del(&base->active, &timer->node)) | |
24977 | cpu_base->active_bases &= ~(1 << base->index); | |
24978 | ||
24979 | -#ifdef CONFIG_HIGH_RES_TIMERS | |
24980 | /* | |
24981 | * Note: If reprogram is false we do not update | |
24982 | * cpu_base->next_timer. This happens when we remove the first | |
24983 | @@ -885,7 +1032,6 @@ | |
24984 | */ | |
24985 | if (reprogram && timer == cpu_base->next_timer) | |
24986 | hrtimer_force_reprogram(cpu_base, 1); | |
24987 | -#endif | |
24988 | } | |
1a6e0f06 | 24989 | |
e4b2b4a8 JK |
24990 | /* |
24991 | @@ -934,22 +1080,36 @@ | |
24992 | return tim; | |
1a6e0f06 | 24993 | } |
1a6e0f06 | 24994 | |
e4b2b4a8 JK |
24995 | -/** |
24996 | - * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU | |
24997 | - * @timer: the timer to be added | |
24998 | - * @tim: expiry time | |
24999 | - * @delta_ns: "slack" range for the timer | |
25000 | - * @mode: expiry mode: absolute (HRTIMER_MODE_ABS) or | |
25001 | - * relative (HRTIMER_MODE_REL) | |
25002 | - */ | |
25003 | -void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |
25004 | - u64 delta_ns, const enum hrtimer_mode mode) | |
25005 | +static void | |
25006 | +hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) | |
1a6e0f06 | 25007 | { |
e4b2b4a8 JK |
25008 | - struct hrtimer_clock_base *base, *new_base; |
25009 | - unsigned long flags; | |
25010 | - int leftmost; | |
25011 | + ktime_t expires; | |
1a6e0f06 | 25012 | |
e4b2b4a8 | 25013 | - base = lock_hrtimer_base(timer, &flags); |
1a6e0f06 | 25014 | + /* |
e4b2b4a8 | 25015 | + * Find the next SOFT expiration. |
1a6e0f06 | 25016 | + */ |
e4b2b4a8 | 25017 | + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); |
1a6e0f06 | 25018 | + |
e4b2b4a8 JK |
25019 | + /* |
25020 | + * reprogramming needs to be triggered, even if the next soft | |
25021 | + * hrtimer expires at the same time than the next hard | |
25022 | + * hrtimer. cpu_base->softirq_expires_next needs to be updated! | |
25023 | + */ | |
25024 | + if (expires == KTIME_MAX) | |
25025 | + return; | |
1a6e0f06 | 25026 | + |
e4b2b4a8 JK |
25027 | + /* |
25028 | + * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() | |
25029 | + * cpu_base->*expires_next is only set by hrtimer_reprogram() | |
25030 | + */ | |
25031 | + hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); | |
1a6e0f06 | 25032 | +} |
1a6e0f06 | 25033 | + |
e4b2b4a8 JK |
25034 | +static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, |
25035 | + u64 delta_ns, const enum hrtimer_mode mode, | |
25036 | + struct hrtimer_clock_base *base) | |
1a6e0f06 | 25037 | +{ |
e4b2b4a8 JK |
25038 | + struct hrtimer_clock_base *new_base; |
25039 | ||
25040 | /* Remove an active timer from the queue: */ | |
25041 | remove_hrtimer(timer, base, true); | |
25042 | @@ -964,21 +1124,37 @@ | |
25043 | /* Switch the timer base, if necessary: */ | |
25044 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | |
25045 | ||
25046 | - leftmost = enqueue_hrtimer(timer, new_base); | |
25047 | - if (!leftmost) | |
25048 | - goto unlock; | |
25049 | + return enqueue_hrtimer(timer, new_base, mode); | |
1a6e0f06 | 25050 | +} |
1a6e0f06 | 25051 | + |
e4b2b4a8 JK |
25052 | +/** |
25053 | + * hrtimer_start_range_ns - (re)start an hrtimer | |
25054 | + * @timer: the timer to be added | |
25055 | + * @tim: expiry time | |
25056 | + * @delta_ns: "slack" range for the timer | |
25057 | + * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or | |
25058 | + * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); | |
25059 | + * softirq based mode is considered for debug purpose only! | |
25060 | + */ | |
25061 | +void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |
25062 | + u64 delta_ns, const enum hrtimer_mode mode) | |
1a6e0f06 | 25063 | +{ |
e4b2b4a8 JK |
25064 | + struct hrtimer_clock_base *base; |
25065 | + unsigned long flags; | |
1a6e0f06 JK |
25066 | + |
25067 | + /* | |
e4b2b4a8 JK |
25068 | + * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft |
25069 | + * match. | |
1a6e0f06 | 25070 | + */ |
e4b2b4a8 JK |
25071 | +#ifndef CONFIG_PREEMPT_RT_BASE |
25072 | + WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); | |
1a6e0f06 | 25073 | +#endif |
1a6e0f06 | 25074 | + |
e4b2b4a8 JK |
25075 | + base = lock_hrtimer_base(timer, &flags); |
25076 | + | |
25077 | + if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) | |
25078 | + hrtimer_reprogram(timer, true); | |
25079 | ||
25080 | - if (!hrtimer_is_hres_active(timer)) { | |
25081 | - /* | |
25082 | - * Kick to reschedule the next tick to handle the new timer | |
25083 | - * on dynticks target. | |
25084 | - */ | |
25085 | - if (new_base->cpu_base->nohz_active) | |
25086 | - wake_up_nohz_cpu(new_base->cpu_base->cpu); | |
25087 | - } else { | |
25088 | - hrtimer_reprogram(timer, new_base); | |
25089 | - } | |
25090 | -unlock: | |
25091 | unlock_hrtimer_base(timer, &flags); | |
25092 | } | |
25093 | EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); | |
25094 | @@ -1035,7 +1211,7 @@ | |
25095 | ||
25096 | if (ret >= 0) | |
25097 | return ret; | |
25098 | - cpu_relax(); | |
25099 | + hrtimer_wait_for_timer(timer); | |
25100 | } | |
25101 | } | |
25102 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | |
25103 | @@ -1076,7 +1252,7 @@ | |
25104 | raw_spin_lock_irqsave(&cpu_base->lock, flags); | |
25105 | ||
25106 | if (!__hrtimer_hres_active(cpu_base)) | |
25107 | - expires = __hrtimer_get_next_event(cpu_base); | |
25108 | + expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); | |
25109 | ||
25110 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | |
25111 | ||
25112 | @@ -1099,8 +1275,16 @@ | |
25113 | static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |
25114 | enum hrtimer_mode mode) | |
25115 | { | |
25116 | - struct hrtimer_cpu_base *cpu_base; | |
25117 | + bool softtimer; | |
25118 | int base; | |
25119 | + struct hrtimer_cpu_base *cpu_base; | |
25120 | + | |
25121 | + softtimer = !!(mode & HRTIMER_MODE_SOFT); | |
25122 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25123 | + if (!softtimer && !(mode & HRTIMER_MODE_HARD)) | |
25124 | + softtimer = true; | |
1a6e0f06 | 25125 | +#endif |
e4b2b4a8 JK |
25126 | + base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; |
25127 | ||
25128 | memset(timer, 0, sizeof(struct hrtimer)); | |
25129 | ||
25130 | @@ -1114,7 +1298,8 @@ | |
25131 | if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) | |
25132 | clock_id = CLOCK_MONOTONIC; | |
25133 | ||
25134 | - base = hrtimer_clockid_to_base(clock_id); | |
25135 | + base += hrtimer_clockid_to_base(clock_id); | |
25136 | + timer->is_soft = softtimer; | |
25137 | timer->base = &cpu_base->clock_base[base]; | |
25138 | timerqueue_init(&timer->node); | |
25139 | } | |
25140 | @@ -1123,7 +1308,13 @@ | |
25141 | * hrtimer_init - initialize a timer to the given clock | |
25142 | * @timer: the timer to be initialized | |
25143 | * @clock_id: the clock to be used | |
25144 | - * @mode: timer mode abs/rel | |
25145 | + * @mode: The modes which are relevant for intitialization: | |
25146 | + * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, | |
25147 | + * HRTIMER_MODE_REL_SOFT | |
25148 | + * | |
25149 | + * The PINNED variants of the above can be handed in, | |
25150 | + * but the PINNED bit is ignored as pinning happens | |
25151 | + * when the hrtimer is started | |
1a6e0f06 | 25152 | */ |
e4b2b4a8 JK |
25153 | void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, |
25154 | enum hrtimer_mode mode) | |
25155 | @@ -1142,19 +1333,19 @@ | |
25156 | */ | |
25157 | bool hrtimer_active(const struct hrtimer *timer) | |
25158 | { | |
25159 | - struct hrtimer_cpu_base *cpu_base; | |
25160 | + struct hrtimer_clock_base *base; | |
25161 | unsigned int seq; | |
25162 | ||
25163 | do { | |
25164 | - cpu_base = READ_ONCE(timer->base->cpu_base); | |
25165 | - seq = raw_read_seqcount_begin(&cpu_base->seq); | |
25166 | + base = READ_ONCE(timer->base); | |
25167 | + seq = raw_read_seqcount_begin(&base->seq); | |
25168 | ||
25169 | if (timer->state != HRTIMER_STATE_INACTIVE || | |
25170 | - cpu_base->running == timer) | |
25171 | + base->running == timer) | |
25172 | return true; | |
25173 | ||
25174 | - } while (read_seqcount_retry(&cpu_base->seq, seq) || | |
25175 | - cpu_base != READ_ONCE(timer->base->cpu_base)); | |
25176 | + } while (read_seqcount_retry(&base->seq, seq) || | |
25177 | + base != READ_ONCE(timer->base)); | |
25178 | ||
25179 | return false; | |
25180 | } | |
25181 | @@ -1180,7 +1371,8 @@ | |
25182 | ||
25183 | static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |
25184 | struct hrtimer_clock_base *base, | |
25185 | - struct hrtimer *timer, ktime_t *now) | |
25186 | + struct hrtimer *timer, ktime_t *now, | |
25187 | + unsigned long flags) | |
25188 | { | |
25189 | enum hrtimer_restart (*fn)(struct hrtimer *); | |
25190 | int restart; | |
25191 | @@ -1188,16 +1380,16 @@ | |
25192 | lockdep_assert_held(&cpu_base->lock); | |
25193 | ||
25194 | debug_deactivate(timer); | |
25195 | - cpu_base->running = timer; | |
25196 | + base->running = timer; | |
25197 | ||
25198 | /* | |
25199 | * Separate the ->running assignment from the ->state assignment. | |
25200 | * | |
25201 | * As with a regular write barrier, this ensures the read side in | |
25202 | - * hrtimer_active() cannot observe cpu_base->running == NULL && | |
25203 | + * hrtimer_active() cannot observe base->running == NULL && | |
25204 | * timer->state == INACTIVE. | |
25205 | */ | |
25206 | - raw_write_seqcount_barrier(&cpu_base->seq); | |
25207 | + raw_write_seqcount_barrier(&base->seq); | |
25208 | ||
25209 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); | |
25210 | fn = timer->function; | |
25211 | @@ -1211,15 +1403,15 @@ | |
25212 | timer->is_rel = false; | |
25213 | ||
25214 | /* | |
25215 | - * Because we run timers from hardirq context, there is no chance | |
25216 | - * they get migrated to another cpu, therefore its safe to unlock | |
25217 | - * the timer base. | |
25218 | + * The timer is marked as running in the cpu base, so it is | |
25219 | + * protected against migration to a different CPU even if the lock | |
25220 | + * is dropped. | |
25221 | */ | |
25222 | - raw_spin_unlock(&cpu_base->lock); | |
25223 | + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | |
25224 | trace_hrtimer_expire_entry(timer, now); | |
25225 | restart = fn(timer); | |
25226 | trace_hrtimer_expire_exit(timer); | |
25227 | - raw_spin_lock(&cpu_base->lock); | |
25228 | + raw_spin_lock_irq(&cpu_base->lock); | |
25229 | ||
25230 | /* | |
25231 | * Note: We clear the running state after enqueue_hrtimer and | |
25232 | @@ -1232,33 +1424,31 @@ | |
25233 | */ | |
25234 | if (restart != HRTIMER_NORESTART && | |
25235 | !(timer->state & HRTIMER_STATE_ENQUEUED)) | |
25236 | - enqueue_hrtimer(timer, base); | |
25237 | + enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); | |
25238 | ||
25239 | /* | |
25240 | * Separate the ->running assignment from the ->state assignment. | |
25241 | * | |
25242 | * As with a regular write barrier, this ensures the read side in | |
25243 | - * hrtimer_active() cannot observe cpu_base->running == NULL && | |
25244 | + * hrtimer_active() cannot observe base->running.timer == NULL && | |
25245 | * timer->state == INACTIVE. | |
25246 | */ | |
25247 | - raw_write_seqcount_barrier(&cpu_base->seq); | |
25248 | + raw_write_seqcount_barrier(&base->seq); | |
1a6e0f06 | 25249 | |
e4b2b4a8 JK |
25250 | - WARN_ON_ONCE(cpu_base->running != timer); |
25251 | - cpu_base->running = NULL; | |
25252 | + WARN_ON_ONCE(base->running != timer); | |
25253 | + base->running = NULL; | |
25254 | } | |
1a6e0f06 | 25255 | |
e4b2b4a8 JK |
25256 | -static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) |
25257 | +static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, | |
25258 | + unsigned long flags, unsigned int active_mask) | |
25259 | { | |
25260 | - struct hrtimer_clock_base *base = cpu_base->clock_base; | |
25261 | - unsigned int active = cpu_base->active_bases; | |
25262 | + struct hrtimer_clock_base *base; | |
25263 | + unsigned int active = cpu_base->active_bases & active_mask; | |
25264 | ||
25265 | - for (; active; base++, active >>= 1) { | |
25266 | + for_each_active_base(base, cpu_base, active) { | |
25267 | struct timerqueue_node *node; | |
25268 | ktime_t basenow; | |
25269 | ||
25270 | - if (!(active & 0x01)) | |
25271 | - continue; | |
25272 | - | |
25273 | basenow = ktime_add(now, base->offset); | |
25274 | ||
25275 | while ((node = timerqueue_getnext(&base->active))) { | |
25276 | @@ -1281,11 +1471,29 @@ | |
25277 | if (basenow < hrtimer_get_softexpires_tv64(timer)) | |
25278 | break; | |
25279 | ||
25280 | - __run_hrtimer(cpu_base, base, timer, &basenow); | |
25281 | + __run_hrtimer(cpu_base, base, timer, &basenow, flags); | |
25282 | } | |
25283 | } | |
25284 | } | |
25285 | ||
25286 | +static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) | |
1a6e0f06 | 25287 | +{ |
e4b2b4a8 JK |
25288 | + struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); |
25289 | + unsigned long flags; | |
25290 | + ktime_t now; | |
1a6e0f06 | 25291 | + |
e4b2b4a8 | 25292 | + raw_spin_lock_irqsave(&cpu_base->lock, flags); |
1a6e0f06 | 25293 | + |
e4b2b4a8 JK |
25294 | + now = hrtimer_update_base(cpu_base); |
25295 | + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); | |
25296 | + | |
25297 | + cpu_base->softirq_activated = 0; | |
25298 | + hrtimer_update_softirq_timer(cpu_base, true); | |
25299 | + | |
25300 | + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | |
25301 | + wake_up_timer_waiters(cpu_base); | |
25302 | +} | |
25303 | + | |
25304 | #ifdef CONFIG_HIGH_RES_TIMERS | |
1a6e0f06 | 25305 | |
e4b2b4a8 JK |
25306 | /* |
25307 | @@ -1296,13 +1504,14 @@ | |
25308 | { | |
25309 | struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); | |
25310 | ktime_t expires_next, now, entry_time, delta; | |
25311 | + unsigned long flags; | |
25312 | int retries = 0; | |
1a6e0f06 | 25313 | |
e4b2b4a8 JK |
25314 | BUG_ON(!cpu_base->hres_active); |
25315 | cpu_base->nr_events++; | |
25316 | dev->next_event = KTIME_MAX; | |
1a6e0f06 | 25317 | |
e4b2b4a8 JK |
25318 | - raw_spin_lock(&cpu_base->lock); |
25319 | + raw_spin_lock_irqsave(&cpu_base->lock, flags); | |
25320 | entry_time = now = hrtimer_update_base(cpu_base); | |
25321 | retry: | |
25322 | cpu_base->in_hrtirq = 1; | |
25323 | @@ -1315,17 +1524,23 @@ | |
25324 | */ | |
25325 | cpu_base->expires_next = KTIME_MAX; | |
1a6e0f06 | 25326 | |
e4b2b4a8 JK |
25327 | - __hrtimer_run_queues(cpu_base, now); |
25328 | + if (!ktime_before(now, cpu_base->softirq_expires_next)) { | |
25329 | + cpu_base->softirq_expires_next = KTIME_MAX; | |
25330 | + cpu_base->softirq_activated = 1; | |
25331 | + raise_softirq_irqoff(HRTIMER_SOFTIRQ); | |
25332 | + } | |
1a6e0f06 | 25333 | + |
e4b2b4a8 | 25334 | + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); |
1a6e0f06 | 25335 | |
e4b2b4a8 JK |
25336 | /* Reevaluate the clock bases for the next expiry */ |
25337 | - expires_next = __hrtimer_get_next_event(cpu_base); | |
25338 | + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); | |
25339 | /* | |
25340 | * Store the new expiry value so the migration code can verify | |
25341 | * against it. | |
25342 | */ | |
25343 | cpu_base->expires_next = expires_next; | |
25344 | cpu_base->in_hrtirq = 0; | |
25345 | - raw_spin_unlock(&cpu_base->lock); | |
25346 | + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | |
25347 | ||
25348 | /* Reprogramming necessary ? */ | |
25349 | if (!tick_program_event(expires_next, 0)) { | |
25350 | @@ -1346,7 +1561,7 @@ | |
25351 | * Acquire base lock for updating the offsets and retrieving | |
25352 | * the current time. | |
25353 | */ | |
25354 | - raw_spin_lock(&cpu_base->lock); | |
25355 | + raw_spin_lock_irqsave(&cpu_base->lock, flags); | |
25356 | now = hrtimer_update_base(cpu_base); | |
25357 | cpu_base->nr_retries++; | |
25358 | if (++retries < 3) | |
25359 | @@ -1359,7 +1574,8 @@ | |
25360 | */ | |
25361 | cpu_base->nr_hangs++; | |
25362 | cpu_base->hang_detected = 1; | |
25363 | - raw_spin_unlock(&cpu_base->lock); | |
25364 | + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | |
25365 | + | |
25366 | delta = ktime_sub(now, entry_time); | |
25367 | if ((unsigned int)delta > cpu_base->max_hang_time) | |
25368 | cpu_base->max_hang_time = (unsigned int) delta; | |
25369 | @@ -1401,6 +1617,7 @@ | |
25370 | void hrtimer_run_queues(void) | |
25371 | { | |
25372 | struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); | |
25373 | + unsigned long flags; | |
25374 | ktime_t now; | |
1a6e0f06 | 25375 | |
e4b2b4a8 JK |
25376 | if (__hrtimer_hres_active(cpu_base)) |
25377 | @@ -1418,10 +1635,17 @@ | |
25378 | return; | |
25379 | } | |
1a6e0f06 | 25380 | |
e4b2b4a8 JK |
25381 | - raw_spin_lock(&cpu_base->lock); |
25382 | + raw_spin_lock_irqsave(&cpu_base->lock, flags); | |
25383 | now = hrtimer_update_base(cpu_base); | |
25384 | - __hrtimer_run_queues(cpu_base, now); | |
25385 | - raw_spin_unlock(&cpu_base->lock); | |
25386 | + | |
25387 | + if (!ktime_before(now, cpu_base->softirq_expires_next)) { | |
25388 | + cpu_base->softirq_expires_next = KTIME_MAX; | |
25389 | + cpu_base->softirq_activated = 1; | |
25390 | + raise_softirq_irqoff(HRTIMER_SOFTIRQ); | |
25391 | + } | |
25392 | + | |
25393 | + __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); | |
25394 | + raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | |
1a6e0f06 JK |
25395 | } |
25396 | ||
e4b2b4a8 JK |
25397 | /* |
25398 | @@ -1440,13 +1664,65 @@ | |
25399 | return HRTIMER_NORESTART; | |
1a6e0f06 JK |
25400 | } |
25401 | ||
e4b2b4a8 JK |
25402 | -void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) |
25403 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25404 | +static bool task_is_realtime(struct task_struct *tsk) | |
1a6e0f06 | 25405 | { |
e4b2b4a8 | 25406 | + int policy = tsk->policy; |
1a6e0f06 | 25407 | + |
e4b2b4a8 JK |
25408 | + if (policy == SCHED_FIFO || policy == SCHED_RR) |
25409 | + return true; | |
25410 | + if (policy == SCHED_DEADLINE) | |
25411 | + return true; | |
25412 | + return false; | |
25413 | +} | |
1a6e0f06 | 25414 | +#endif |
e4b2b4a8 JK |
25415 | + |
25416 | +static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | |
25417 | + clockid_t clock_id, | |
25418 | + enum hrtimer_mode mode, | |
25419 | + struct task_struct *task) | |
25420 | +{ | |
25421 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25422 | + if (!(mode & (HRTIMER_MODE_SOFT | HRTIMER_MODE_HARD))) { | |
25423 | + if (task_is_realtime(current) || system_state != SYSTEM_RUNNING) | |
25424 | + mode |= HRTIMER_MODE_HARD; | |
25425 | + else | |
25426 | + mode |= HRTIMER_MODE_SOFT; | |
25427 | + } | |
1a6e0f06 | 25428 | +#endif |
e4b2b4a8 JK |
25429 | + __hrtimer_init(&sl->timer, clock_id, mode); |
25430 | sl->timer.function = hrtimer_wakeup; | |
25431 | sl->task = task; | |
25432 | } | |
25433 | + | |
25434 | +/** | |
25435 | + * hrtimer_init_sleeper - initialize sleeper to the given clock | |
25436 | + * @sl: sleeper to be initialized | |
25437 | + * @clock_id: the clock to be used | |
25438 | + * @mode: timer mode abs/rel | |
25439 | + * @task: the task to wake up | |
25440 | + */ | |
25441 | +void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, | |
25442 | + enum hrtimer_mode mode, struct task_struct *task) | |
25443 | +{ | |
25444 | + debug_init(&sl->timer, clock_id, mode); | |
25445 | + __hrtimer_init_sleeper(sl, clock_id, mode, task); | |
25446 | + | |
25447 | +} | |
25448 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | |
1a6e0f06 | 25449 | |
e4b2b4a8 JK |
25450 | +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS |
25451 | +void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, | |
25452 | + clockid_t clock_id, | |
25453 | + enum hrtimer_mode mode, | |
25454 | + struct task_struct *task) | |
25455 | +{ | |
25456 | + debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); | |
25457 | + __hrtimer_init_sleeper(sl, clock_id, mode, task); | |
25458 | +} | |
25459 | +EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); | |
25460 | +#endif | |
1a6e0f06 | 25461 | + |
e4b2b4a8 JK |
25462 | int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) |
25463 | { | |
25464 | switch(restart->nanosleep.type) { | |
25465 | @@ -1470,8 +1746,6 @@ | |
25466 | { | |
25467 | struct restart_block *restart; | |
1a6e0f06 | 25468 | |
e4b2b4a8 JK |
25469 | - hrtimer_init_sleeper(t, current); |
25470 | - | |
25471 | do { | |
25472 | set_current_state(TASK_INTERRUPTIBLE); | |
25473 | hrtimer_start_expires(&t->timer, mode); | |
25474 | @@ -1508,10 +1782,9 @@ | |
25475 | struct hrtimer_sleeper t; | |
25476 | int ret; | |
1a6e0f06 | 25477 | |
e4b2b4a8 JK |
25478 | - hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, |
25479 | - HRTIMER_MODE_ABS); | |
25480 | + hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, | |
25481 | + HRTIMER_MODE_ABS, current); | |
25482 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | |
25483 | - | |
25484 | ret = do_nanosleep(&t, HRTIMER_MODE_ABS); | |
25485 | destroy_hrtimer_on_stack(&t.timer); | |
25486 | return ret; | |
25487 | @@ -1529,7 +1802,7 @@ | |
25488 | if (dl_task(current) || rt_task(current)) | |
25489 | slack = 0; | |
1a6e0f06 | 25490 | |
e4b2b4a8 JK |
25491 | - hrtimer_init_on_stack(&t.timer, clockid, mode); |
25492 | + hrtimer_init_sleeper_on_stack(&t, clockid, mode, current); | |
25493 | hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); | |
25494 | ret = do_nanosleep(&t, mode); | |
25495 | if (ret != -ERESTART_RESTARTBLOCK) | |
25496 | @@ -1585,6 +1858,27 @@ | |
25497 | } | |
25498 | #endif | |
1a6e0f06 | 25499 | |
e4b2b4a8 JK |
25500 | +#ifdef CONFIG_PREEMPT_RT_FULL |
25501 | +/* | |
25502 | + * Sleep for 1 ms in hope whoever holds what we want will let it go. | |
25503 | + */ | |
25504 | +void cpu_chill(void) | |
25505 | +{ | |
25506 | + ktime_t chill_time; | |
25507 | + unsigned int freeze_flag = current->flags & PF_NOFREEZE; | |
25508 | + | |
25509 | + chill_time = ktime_set(0, NSEC_PER_MSEC); | |
25510 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
25511 | + current->flags |= PF_NOFREEZE; | |
25512 | + sleeping_lock_inc(); | |
25513 | + schedule_hrtimeout(&chill_time, HRTIMER_MODE_REL_HARD); | |
25514 | + sleeping_lock_dec(); | |
25515 | + if (!freeze_flag) | |
25516 | + current->flags &= ~PF_NOFREEZE; | |
25517 | +} | |
25518 | +EXPORT_SYMBOL(cpu_chill); | |
25519 | +#endif | |
25520 | + | |
25521 | /* | |
25522 | * Functions related to boot-time initialization: | |
25523 | */ | |
25524 | @@ -1598,9 +1892,17 @@ | |
25525 | timerqueue_init_head(&cpu_base->clock_base[i].active); | |
25526 | } | |
1a6e0f06 | 25527 | |
e4b2b4a8 JK |
25528 | - cpu_base->active_bases = 0; |
25529 | cpu_base->cpu = cpu; | |
25530 | - hrtimer_init_hres(cpu_base); | |
25531 | + cpu_base->active_bases = 0; | |
25532 | + cpu_base->hres_active = 0; | |
25533 | + cpu_base->hang_detected = 0; | |
25534 | + cpu_base->next_timer = NULL; | |
25535 | + cpu_base->softirq_next_timer = NULL; | |
25536 | + cpu_base->expires_next = KTIME_MAX; | |
25537 | + cpu_base->softirq_expires_next = KTIME_MAX; | |
25538 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25539 | + init_waitqueue_head(&cpu_base->wait); | |
25540 | +#endif | |
25541 | return 0; | |
25542 | } | |
1a6e0f06 | 25543 | |
e4b2b4a8 JK |
25544 | @@ -1632,7 +1934,7 @@ |
25545 | * sort out already expired timers and reprogram the | |
25546 | * event device. | |
25547 | */ | |
25548 | - enqueue_hrtimer(timer, new_base); | |
25549 | + enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); | |
25550 | } | |
25551 | } | |
1a6e0f06 | 25552 | |
e4b2b4a8 JK |
25553 | @@ -1644,6 +1946,12 @@ |
25554 | BUG_ON(cpu_online(scpu)); | |
25555 | tick_cancel_sched_timer(scpu); | |
25556 | ||
25557 | + /* | |
25558 | + * this BH disable ensures that raise_softirq_irqoff() does | |
25559 | + * not wakeup ksoftirqd (and acquire the pi-lock) while | |
25560 | + * holding the cpu_base lock | |
25561 | + */ | |
25562 | + local_bh_disable(); | |
1a6e0f06 | 25563 | local_irq_disable(); |
e4b2b4a8 JK |
25564 | old_base = &per_cpu(hrtimer_bases, scpu); |
25565 | new_base = this_cpu_ptr(&hrtimer_bases); | |
25566 | @@ -1659,12 +1967,19 @@ | |
25567 | &new_base->clock_base[i]); | |
25568 | } | |
1a6e0f06 | 25569 | |
e4b2b4a8 JK |
25570 | + /* |
25571 | + * The migration might have changed the first expiring softirq | |
25572 | + * timer on this CPU. Update it. | |
25573 | + */ | |
25574 | + hrtimer_update_softirq_timer(new_base, false); | |
25575 | + | |
25576 | raw_spin_unlock(&old_base->lock); | |
25577 | raw_spin_unlock(&new_base->lock); | |
25578 | ||
25579 | /* Check, if we got expired work to do */ | |
25580 | __hrtimer_peek_ahead_timers(); | |
1a6e0f06 | 25581 | local_irq_enable(); |
e4b2b4a8 JK |
25582 | + local_bh_enable(); |
25583 | return 0; | |
25584 | } | |
1a6e0f06 | 25585 | |
e4b2b4a8 JK |
25586 | @@ -1673,18 +1988,19 @@ |
25587 | void __init hrtimers_init(void) | |
25588 | { | |
25589 | hrtimers_prepare_cpu(smp_processor_id()); | |
25590 | + open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); | |
1a6e0f06 JK |
25591 | } |
25592 | ||
1a6e0f06 | 25593 | /** |
e4b2b4a8 JK |
25594 | * schedule_hrtimeout_range_clock - sleep until timeout |
25595 | * @expires: timeout value (ktime_t) | |
25596 | * @delta: slack in expires timeout (ktime_t) | |
25597 | - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | |
25598 | - * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME | |
25599 | + * @mode: timer mode | |
25600 | + * @clock_id: timer clock to be used | |
1a6e0f06 | 25601 | */ |
e4b2b4a8 JK |
25602 | int __sched |
25603 | schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, | |
25604 | - const enum hrtimer_mode mode, int clock) | |
25605 | + const enum hrtimer_mode mode, clockid_t clock_id) | |
25606 | { | |
25607 | struct hrtimer_sleeper t; | |
25608 | ||
25609 | @@ -1705,11 +2021,9 @@ | |
25610 | return -EINTR; | |
1a6e0f06 JK |
25611 | } |
25612 | ||
e4b2b4a8 JK |
25613 | - hrtimer_init_on_stack(&t.timer, clock, mode); |
25614 | + hrtimer_init_sleeper_on_stack(&t, clock_id, mode, current); | |
25615 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | |
25616 | ||
25617 | - hrtimer_init_sleeper(&t, current); | |
25618 | - | |
25619 | hrtimer_start_expires(&t.timer, mode); | |
25620 | ||
25621 | if (likely(t.task)) | |
25622 | @@ -1727,7 +2041,7 @@ | |
25623 | * schedule_hrtimeout_range - sleep until timeout | |
25624 | * @expires: timeout value (ktime_t) | |
25625 | * @delta: slack in expires timeout (ktime_t) | |
25626 | - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | |
25627 | + * @mode: timer mode | |
25628 | * | |
25629 | * Make the current task sleep until the given expiry time has | |
25630 | * elapsed. The routine will return immediately unless | |
25631 | @@ -1766,7 +2080,7 @@ | |
25632 | /** | |
25633 | * schedule_hrtimeout - sleep until timeout | |
25634 | * @expires: timeout value (ktime_t) | |
25635 | - * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL | |
25636 | + * @mode: timer mode | |
25637 | * | |
25638 | * Make the current task sleep until the given expiry time has | |
25639 | * elapsed. The routine will return immediately unless | |
25640 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/itimer.c linux-4.14/kernel/time/itimer.c | |
25641 | --- linux-4.14.orig/kernel/time/itimer.c 2017-11-12 19:46:13.000000000 +0100 | |
25642 | +++ linux-4.14/kernel/time/itimer.c 2018-09-05 11:05:07.000000000 +0200 | |
25643 | @@ -214,6 +214,7 @@ | |
25644 | /* We are sharing ->siglock with it_real_fn() */ | |
25645 | if (hrtimer_try_to_cancel(timer) < 0) { | |
25646 | spin_unlock_irq(&tsk->sighand->siglock); | |
25647 | + hrtimer_wait_for_timer(&tsk->signal->real_timer); | |
25648 | goto again; | |
25649 | } | |
25650 | expires = timeval_to_ktime(value->it_value); | |
25651 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/jiffies.c linux-4.14/kernel/time/jiffies.c | |
25652 | --- linux-4.14.orig/kernel/time/jiffies.c 2017-11-12 19:46:13.000000000 +0100 | |
25653 | +++ linux-4.14/kernel/time/jiffies.c 2018-09-05 11:05:07.000000000 +0200 | |
25654 | @@ -74,7 +74,8 @@ | |
25655 | .max_cycles = 10, | |
25656 | }; | |
25657 | ||
25658 | -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); | |
25659 | +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); | |
25660 | +__cacheline_aligned_in_smp seqcount_t jiffies_seq; | |
25661 | ||
25662 | #if (BITS_PER_LONG < 64) | |
25663 | u64 get_jiffies_64(void) | |
25664 | @@ -83,9 +84,9 @@ | |
25665 | u64 ret; | |
25666 | ||
25667 | do { | |
25668 | - seq = read_seqbegin(&jiffies_lock); | |
25669 | + seq = read_seqcount_begin(&jiffies_seq); | |
25670 | ret = jiffies_64; | |
25671 | - } while (read_seqretry(&jiffies_lock, seq)); | |
25672 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
25673 | return ret; | |
1a6e0f06 | 25674 | } |
e4b2b4a8 JK |
25675 | EXPORT_SYMBOL(get_jiffies_64); |
25676 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/posix-cpu-timers.c linux-4.14/kernel/time/posix-cpu-timers.c | |
25677 | --- linux-4.14.orig/kernel/time/posix-cpu-timers.c 2017-11-12 19:46:13.000000000 +0100 | |
25678 | +++ linux-4.14/kernel/time/posix-cpu-timers.c 2018-09-05 11:05:07.000000000 +0200 | |
25679 | @@ -3,8 +3,10 @@ | |
25680 | * Implement CPU time clocks for the POSIX clock interface. | |
25681 | */ | |
1a6e0f06 | 25682 | |
e4b2b4a8 JK |
25683 | +#include <uapi/linux/sched/types.h> |
25684 | #include <linux/sched/signal.h> | |
25685 | #include <linux/sched/cputime.h> | |
25686 | +#include <linux/sched/rt.h> | |
25687 | #include <linux/posix-timers.h> | |
25688 | #include <linux/errno.h> | |
25689 | #include <linux/math64.h> | |
25690 | @@ -14,6 +16,7 @@ | |
25691 | #include <linux/tick.h> | |
25692 | #include <linux/workqueue.h> | |
25693 | #include <linux/compat.h> | |
25694 | +#include <linux/smpboot.h> | |
1a6e0f06 | 25695 | |
e4b2b4a8 | 25696 | #include "posix-timers.h" |
1a6e0f06 | 25697 | |
e4b2b4a8 JK |
25698 | @@ -603,7 +606,7 @@ |
25699 | /* | |
25700 | * Disarm any old timer after extracting its expiry time. | |
25701 | */ | |
25702 | - WARN_ON_ONCE(!irqs_disabled()); | |
25703 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
1a6e0f06 | 25704 | |
e4b2b4a8 JK |
25705 | ret = 0; |
25706 | old_incr = timer->it.cpu.incr; | |
25707 | @@ -1034,7 +1037,7 @@ | |
25708 | /* | |
25709 | * Now re-arm for the new expiry time. | |
25710 | */ | |
25711 | - WARN_ON_ONCE(!irqs_disabled()); | |
25712 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
25713 | arm_timer(timer); | |
25714 | unlock: | |
25715 | unlock_task_sighand(p, &flags); | |
25716 | @@ -1119,13 +1122,13 @@ | |
25717 | * already updated our counts. We need to check if any timers fire now. | |
25718 | * Interrupts are disabled. | |
25719 | */ | |
25720 | -void run_posix_cpu_timers(struct task_struct *tsk) | |
25721 | +static void __run_posix_cpu_timers(struct task_struct *tsk) | |
25722 | { | |
25723 | LIST_HEAD(firing); | |
25724 | struct k_itimer *timer, *next; | |
25725 | unsigned long flags; | |
1a6e0f06 | 25726 | |
e4b2b4a8 JK |
25727 | - WARN_ON_ONCE(!irqs_disabled()); |
25728 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
1a6e0f06 | 25729 | |
e4b2b4a8 JK |
25730 | /* |
25731 | * The fast path checks that there are no expired thread or thread | |
25732 | @@ -1179,6 +1182,152 @@ | |
1a6e0f06 | 25733 | } |
1a6e0f06 | 25734 | } |
1a6e0f06 | 25735 | |
e4b2b4a8 JK |
25736 | +#ifdef CONFIG_PREEMPT_RT_BASE |
25737 | +#include <linux/kthread.h> | |
25738 | +#include <linux/cpu.h> | |
25739 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); | |
25740 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); | |
25741 | +DEFINE_PER_CPU(bool, posix_timer_th_active); | |
1a6e0f06 | 25742 | + |
e4b2b4a8 | 25743 | +static void posix_cpu_kthread_fn(unsigned int cpu) |
1a6e0f06 | 25744 | +{ |
e4b2b4a8 JK |
25745 | + struct task_struct *tsk = NULL; |
25746 | + struct task_struct *next = NULL; | |
1a6e0f06 | 25747 | + |
e4b2b4a8 JK |
25748 | + BUG_ON(per_cpu(posix_timer_task, cpu) != current); |
25749 | + | |
25750 | + /* grab task list */ | |
25751 | + raw_local_irq_disable(); | |
25752 | + tsk = per_cpu(posix_timer_tasklist, cpu); | |
25753 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
25754 | + raw_local_irq_enable(); | |
25755 | + | |
25756 | + /* its possible the list is empty, just return */ | |
25757 | + if (!tsk) | |
25758 | + return; | |
25759 | + | |
25760 | + /* Process task list */ | |
25761 | + while (1) { | |
25762 | + /* save next */ | |
25763 | + next = tsk->posix_timer_list; | |
25764 | + | |
25765 | + /* run the task timers, clear its ptr and | |
25766 | + * unreference it | |
25767 | + */ | |
25768 | + __run_posix_cpu_timers(tsk); | |
25769 | + tsk->posix_timer_list = NULL; | |
25770 | + put_task_struct(tsk); | |
25771 | + | |
25772 | + /* check if this is the last on the list */ | |
25773 | + if (next == tsk) | |
25774 | + break; | |
25775 | + tsk = next; | |
1a6e0f06 JK |
25776 | + } |
25777 | +} | |
25778 | + | |
e4b2b4a8 | 25779 | +static inline int __fastpath_timer_check(struct task_struct *tsk) |
1a6e0f06 | 25780 | +{ |
e4b2b4a8 JK |
25781 | + /* tsk == current, ensure it is safe to use ->signal/sighand */ |
25782 | + if (unlikely(tsk->exit_state)) | |
25783 | + return 0; | |
1a6e0f06 | 25784 | + |
e4b2b4a8 JK |
25785 | + if (!task_cputime_zero(&tsk->cputime_expires)) |
25786 | + return 1; | |
25787 | + | |
25788 | + if (!task_cputime_zero(&tsk->signal->cputime_expires)) | |
25789 | + return 1; | |
25790 | + | |
25791 | + return 0; | |
1a6e0f06 JK |
25792 | +} |
25793 | + | |
e4b2b4a8 JK |
25794 | +void run_posix_cpu_timers(struct task_struct *tsk) |
25795 | +{ | |
25796 | + unsigned int cpu = smp_processor_id(); | |
25797 | + struct task_struct *tasklist; | |
1a6e0f06 | 25798 | + |
e4b2b4a8 JK |
25799 | + BUG_ON(!irqs_disabled()); |
25800 | + | |
25801 | + if (per_cpu(posix_timer_th_active, cpu) != true) | |
25802 | + return; | |
25803 | + | |
25804 | + /* get per-cpu references */ | |
25805 | + tasklist = per_cpu(posix_timer_tasklist, cpu); | |
25806 | + | |
25807 | + /* check to see if we're already queued */ | |
25808 | + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { | |
25809 | + get_task_struct(tsk); | |
25810 | + if (tasklist) { | |
25811 | + tsk->posix_timer_list = tasklist; | |
25812 | + } else { | |
25813 | + /* | |
25814 | + * The list is terminated by a self-pointing | |
25815 | + * task_struct | |
25816 | + */ | |
25817 | + tsk->posix_timer_list = tsk; | |
25818 | + } | |
25819 | + per_cpu(posix_timer_tasklist, cpu) = tsk; | |
25820 | + | |
25821 | + wake_up_process(per_cpu(posix_timer_task, cpu)); | |
25822 | + } | |
25823 | +} | |
25824 | + | |
25825 | +static int posix_cpu_kthread_should_run(unsigned int cpu) | |
1a6e0f06 | 25826 | +{ |
e4b2b4a8 | 25827 | + return __this_cpu_read(posix_timer_tasklist) != NULL; |
1a6e0f06 | 25828 | +} |
1a6e0f06 | 25829 | + |
e4b2b4a8 | 25830 | +static void posix_cpu_kthread_park(unsigned int cpu) |
1a6e0f06 | 25831 | +{ |
e4b2b4a8 | 25832 | + this_cpu_write(posix_timer_th_active, false); |
1a6e0f06 JK |
25833 | +} |
25834 | + | |
e4b2b4a8 | 25835 | +static void posix_cpu_kthread_unpark(unsigned int cpu) |
1a6e0f06 | 25836 | +{ |
e4b2b4a8 | 25837 | + this_cpu_write(posix_timer_th_active, true); |
1a6e0f06 | 25838 | +} |
1a6e0f06 | 25839 | + |
e4b2b4a8 JK |
25840 | +static void posix_cpu_kthread_setup(unsigned int cpu) |
25841 | +{ | |
25842 | + struct sched_param sp; | |
25843 | + | |
25844 | + sp.sched_priority = MAX_RT_PRIO - 1; | |
25845 | + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
25846 | + posix_cpu_kthread_unpark(cpu); | |
25847 | +} | |
25848 | + | |
25849 | +static struct smp_hotplug_thread posix_cpu_thread = { | |
25850 | + .store = &posix_timer_task, | |
25851 | + .thread_should_run = posix_cpu_kthread_should_run, | |
25852 | + .thread_fn = posix_cpu_kthread_fn, | |
25853 | + .thread_comm = "posixcputmr/%u", | |
25854 | + .setup = posix_cpu_kthread_setup, | |
25855 | + .park = posix_cpu_kthread_park, | |
25856 | + .unpark = posix_cpu_kthread_unpark, | |
25857 | +}; | |
25858 | + | |
25859 | +static int __init posix_cpu_thread_init(void) | |
1a6e0f06 | 25860 | +{ |
e4b2b4a8 JK |
25861 | + /* Start one for boot CPU. */ |
25862 | + unsigned long cpu; | |
25863 | + int ret; | |
25864 | + | |
25865 | + /* init the per-cpu posix_timer_tasklets */ | |
25866 | + for_each_possible_cpu(cpu) | |
25867 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
25868 | + | |
25869 | + ret = smpboot_register_percpu_thread(&posix_cpu_thread); | |
25870 | + WARN_ON(ret); | |
25871 | + | |
1a6e0f06 JK |
25872 | + return 0; |
25873 | +} | |
e4b2b4a8 JK |
25874 | +early_initcall(posix_cpu_thread_init); |
25875 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
25876 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
25877 | +{ | |
25878 | + __run_posix_cpu_timers(tsk); | |
25879 | +} | |
25880 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
25881 | + | |
25882 | /* | |
25883 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | |
25884 | * The tsk->sighand->siglock must be held by the caller. | |
25885 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/posix-timers.c linux-4.14/kernel/time/posix-timers.c | |
25886 | --- linux-4.14.orig/kernel/time/posix-timers.c 2018-09-05 11:03:22.000000000 +0200 | |
25887 | +++ linux-4.14/kernel/time/posix-timers.c 2018-09-05 11:05:07.000000000 +0200 | |
25888 | @@ -434,6 +434,7 @@ | |
25889 | static struct pid *good_sigevent(sigevent_t * event) | |
25890 | { | |
25891 | struct task_struct *rtn = current->group_leader; | |
25892 | + int sig = event->sigev_signo; | |
25893 | ||
25894 | switch (event->sigev_notify) { | |
25895 | case SIGEV_SIGNAL | SIGEV_THREAD_ID: | |
25896 | @@ -443,7 +444,8 @@ | |
25897 | /* FALLTHRU */ | |
25898 | case SIGEV_SIGNAL: | |
25899 | case SIGEV_THREAD: | |
25900 | - if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) | |
25901 | + if (sig <= 0 || sig > SIGRTMAX || | |
25902 | + sig_kernel_only(sig) || sig_kernel_coredump(sig)) | |
25903 | return NULL; | |
25904 | /* FALLTHRU */ | |
25905 | case SIGEV_NONE: | |
25906 | @@ -469,7 +471,7 @@ | |
25907 | ||
25908 | static void k_itimer_rcu_free(struct rcu_head *head) | |
25909 | { | |
25910 | - struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); | |
25911 | + struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); | |
25912 | ||
25913 | kmem_cache_free(posix_timers_cache, tmr); | |
25914 | } | |
25915 | @@ -486,7 +488,7 @@ | |
25916 | } | |
25917 | put_pid(tmr->it_pid); | |
25918 | sigqueue_free(tmr->sigq); | |
25919 | - call_rcu(&tmr->it.rcu, k_itimer_rcu_free); | |
25920 | + call_rcu(&tmr->rcu, k_itimer_rcu_free); | |
25921 | } | |
25922 | ||
25923 | static int common_timer_create(struct k_itimer *new_timer) | |
25924 | @@ -825,6 +827,22 @@ | |
25925 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | |
25926 | } | |
25927 | ||
25928 | +/* | |
25929 | + * Protected by RCU! | |
25930 | + */ | |
25931 | +static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr) | |
25932 | +{ | |
25933 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25934 | + if (kc->timer_arm == common_hrtimer_arm) | |
25935 | + hrtimer_wait_for_timer(&timr->it.real.timer); | |
25936 | + else if (kc == &alarm_clock) | |
25937 | + hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer); | |
25938 | + else | |
25939 | + /* FIXME: Whacky hack for posix-cpu-timers */ | |
25940 | + schedule_timeout(1); | |
1a6e0f06 | 25941 | +#endif |
e4b2b4a8 | 25942 | +} |
1a6e0f06 | 25943 | + |
e4b2b4a8 JK |
25944 | static int common_hrtimer_try_to_cancel(struct k_itimer *timr) |
25945 | { | |
25946 | return hrtimer_try_to_cancel(&timr->it.real.timer); | |
25947 | @@ -889,6 +907,7 @@ | |
25948 | if (!timr) | |
25949 | return -EINVAL; | |
25950 | ||
25951 | + rcu_read_lock(); | |
25952 | kc = timr->kclock; | |
25953 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) | |
25954 | error = -EINVAL; | |
25955 | @@ -897,9 +916,12 @@ | |
25956 | ||
25957 | unlock_timer(timr, flag); | |
25958 | if (error == TIMER_RETRY) { | |
25959 | + timer_wait_for_callback(kc, timr); | |
25960 | old_spec64 = NULL; // We already got the old time... | |
25961 | + rcu_read_unlock(); | |
25962 | goto retry; | |
25963 | } | |
25964 | + rcu_read_unlock(); | |
25965 | ||
25966 | return error; | |
25967 | } | |
25968 | @@ -981,10 +1003,15 @@ | |
25969 | if (!timer) | |
25970 | return -EINVAL; | |
25971 | ||
25972 | + rcu_read_lock(); | |
25973 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
25974 | unlock_timer(timer, flags); | |
25975 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
25976 | + timer); | |
25977 | + rcu_read_unlock(); | |
25978 | goto retry_delete; | |
25979 | } | |
25980 | + rcu_read_unlock(); | |
25981 | ||
25982 | spin_lock(¤t->sighand->siglock); | |
25983 | list_del(&timer->list); | |
25984 | @@ -1010,8 +1037,18 @@ | |
25985 | retry_delete: | |
25986 | spin_lock_irqsave(&timer->it_lock, flags); | |
25987 | ||
25988 | + /* On RT we can race with a deletion */ | |
25989 | + if (!timer->it_signal) { | |
25990 | + unlock_timer(timer, flags); | |
25991 | + return; | |
25992 | + } | |
25993 | + | |
25994 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
25995 | + rcu_read_lock(); | |
25996 | unlock_timer(timer, flags); | |
25997 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
25998 | + timer); | |
25999 | + rcu_read_unlock(); | |
26000 | goto retry_delete; | |
26001 | } | |
26002 | list_del(&timer->list); | |
26003 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/tick-broadcast-hrtimer.c linux-4.14/kernel/time/tick-broadcast-hrtimer.c | |
26004 | --- linux-4.14.orig/kernel/time/tick-broadcast-hrtimer.c 2017-11-12 19:46:13.000000000 +0100 | |
26005 | +++ linux-4.14/kernel/time/tick-broadcast-hrtimer.c 2018-09-05 11:05:07.000000000 +0200 | |
26006 | @@ -106,7 +106,7 @@ | |
26007 | ||
26008 | void tick_setup_hrtimer_broadcast(void) | |
1a6e0f06 | 26009 | { |
e4b2b4a8 JK |
26010 | - hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); |
26011 | + hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); | |
26012 | bctimer.function = bc_handler; | |
26013 | clockevents_register_device(&ce_broadcast_hrtimer); | |
26014 | } | |
26015 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/tick-common.c linux-4.14/kernel/time/tick-common.c | |
26016 | --- linux-4.14.orig/kernel/time/tick-common.c 2017-11-12 19:46:13.000000000 +0100 | |
26017 | +++ linux-4.14/kernel/time/tick-common.c 2018-09-05 11:05:07.000000000 +0200 | |
26018 | @@ -79,13 +79,15 @@ | |
26019 | static void tick_periodic(int cpu) | |
26020 | { | |
26021 | if (tick_do_timer_cpu == cpu) { | |
26022 | - write_seqlock(&jiffies_lock); | |
26023 | + raw_spin_lock(&jiffies_lock); | |
26024 | + write_seqcount_begin(&jiffies_seq); | |
1a6e0f06 | 26025 | |
e4b2b4a8 JK |
26026 | /* Keep track of the next tick event */ |
26027 | tick_next_period = ktime_add(tick_next_period, tick_period); | |
1a6e0f06 | 26028 | |
e4b2b4a8 JK |
26029 | do_timer(1); |
26030 | - write_sequnlock(&jiffies_lock); | |
26031 | + write_seqcount_end(&jiffies_seq); | |
26032 | + raw_spin_unlock(&jiffies_lock); | |
26033 | update_wall_time(); | |
26034 | } | |
1a6e0f06 | 26035 | |
e4b2b4a8 JK |
26036 | @@ -157,9 +159,9 @@ |
26037 | ktime_t next; | |
1a6e0f06 | 26038 | |
e4b2b4a8 JK |
26039 | do { |
26040 | - seq = read_seqbegin(&jiffies_lock); | |
26041 | + seq = read_seqcount_begin(&jiffies_seq); | |
26042 | next = tick_next_period; | |
26043 | - } while (read_seqretry(&jiffies_lock, seq)); | |
26044 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
1a6e0f06 | 26045 | |
e4b2b4a8 JK |
26046 | clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); |
26047 | ||
26048 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/tick-internal.h linux-4.14/kernel/time/tick-internal.h | |
26049 | --- linux-4.14.orig/kernel/time/tick-internal.h 2017-11-12 19:46:13.000000000 +0100 | |
26050 | +++ linux-4.14/kernel/time/tick-internal.h 2018-09-05 11:05:07.000000000 +0200 | |
26051 | @@ -150,16 +150,15 @@ | |
26052 | ||
26053 | #ifdef CONFIG_NO_HZ_COMMON | |
26054 | extern unsigned long tick_nohz_active; | |
26055 | -#else | |
26056 | +extern void timers_update_nohz(void); | |
26057 | +# ifdef CONFIG_SMP | |
26058 | +extern struct static_key_false timers_migration_enabled; | |
26059 | +# endif | |
26060 | +#else /* CONFIG_NO_HZ_COMMON */ | |
26061 | +static inline void timers_update_nohz(void) { } | |
26062 | #define tick_nohz_active (0) | |
26063 | #endif | |
26064 | ||
26065 | -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | |
26066 | -extern void timers_update_migration(bool update_nohz); | |
26067 | -#else | |
26068 | -static inline void timers_update_migration(bool update_nohz) { } | |
26069 | -#endif | |
26070 | - | |
26071 | DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); | |
26072 | ||
26073 | extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); | |
26074 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/tick-sched.c linux-4.14/kernel/time/tick-sched.c | |
26075 | --- linux-4.14.orig/kernel/time/tick-sched.c 2018-09-05 11:03:22.000000000 +0200 | |
26076 | +++ linux-4.14/kernel/time/tick-sched.c 2018-09-05 11:05:07.000000000 +0200 | |
26077 | @@ -66,7 +66,8 @@ | |
1a6e0f06 JK |
26078 | return; |
26079 | ||
e4b2b4a8 JK |
26080 | /* Reevaluate with jiffies_lock held */ |
26081 | - write_seqlock(&jiffies_lock); | |
26082 | + raw_spin_lock(&jiffies_lock); | |
26083 | + write_seqcount_begin(&jiffies_seq); | |
26084 | ||
26085 | delta = ktime_sub(now, last_jiffies_update); | |
26086 | if (delta >= tick_period) { | |
26087 | @@ -89,10 +90,12 @@ | |
26088 | /* Keep the tick_next_period variable up to date */ | |
26089 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | |
26090 | } else { | |
26091 | - write_sequnlock(&jiffies_lock); | |
26092 | + write_seqcount_end(&jiffies_seq); | |
26093 | + raw_spin_unlock(&jiffies_lock); | |
26094 | return; | |
1a6e0f06 | 26095 | } |
e4b2b4a8 JK |
26096 | - write_sequnlock(&jiffies_lock); |
26097 | + write_seqcount_end(&jiffies_seq); | |
26098 | + raw_spin_unlock(&jiffies_lock); | |
26099 | update_wall_time(); | |
26100 | } | |
26101 | ||
26102 | @@ -103,12 +106,14 @@ | |
26103 | { | |
26104 | ktime_t period; | |
26105 | ||
26106 | - write_seqlock(&jiffies_lock); | |
26107 | + raw_spin_lock(&jiffies_lock); | |
26108 | + write_seqcount_begin(&jiffies_seq); | |
26109 | /* Did we start the jiffies update yet ? */ | |
26110 | if (last_jiffies_update == 0) | |
26111 | last_jiffies_update = tick_next_period; | |
26112 | period = last_jiffies_update; | |
26113 | - write_sequnlock(&jiffies_lock); | |
26114 | + write_seqcount_end(&jiffies_seq); | |
26115 | + raw_spin_unlock(&jiffies_lock); | |
26116 | return period; | |
1a6e0f06 JK |
26117 | } |
26118 | ||
e4b2b4a8 JK |
26119 | @@ -225,6 +230,7 @@ |
26120 | ||
26121 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |
26122 | .func = nohz_full_kick_func, | |
26123 | + .flags = IRQ_WORK_HARD_IRQ, | |
26124 | }; | |
26125 | ||
1a6e0f06 | 26126 | /* |
e4b2b4a8 | 26127 | @@ -689,10 +695,10 @@ |
1a6e0f06 | 26128 | |
e4b2b4a8 JK |
26129 | /* Read jiffies and the time when jiffies were updated last */ |
26130 | do { | |
26131 | - seq = read_seqbegin(&jiffies_lock); | |
26132 | + seq = read_seqcount_begin(&jiffies_seq); | |
26133 | basemono = last_jiffies_update; | |
26134 | basejiff = jiffies; | |
26135 | - } while (read_seqretry(&jiffies_lock, seq)); | |
26136 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
26137 | ts->last_jiffies = basejiff; | |
1a6e0f06 | 26138 | |
e4b2b4a8 JK |
26139 | /* |
26140 | @@ -906,14 +912,7 @@ | |
26141 | return false; | |
26142 | ||
26143 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | |
26144 | - static int ratelimit; | |
26145 | - | |
26146 | - if (ratelimit < 10 && | |
26147 | - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
26148 | - pr_warn("NOHZ: local_softirq_pending %02x\n", | |
26149 | - (unsigned int) local_softirq_pending()); | |
26150 | - ratelimit++; | |
26151 | - } | |
26152 | + softirq_check_pending_idle(); | |
26153 | return false; | |
1a6e0f06 | 26154 | } |
1a6e0f06 | 26155 | |
e4b2b4a8 JK |
26156 | @@ -1132,7 +1131,7 @@ |
26157 | ts->nohz_mode = mode; | |
26158 | /* One update is enough */ | |
26159 | if (!test_and_set_bit(0, &tick_nohz_active)) | |
26160 | - timers_update_migration(true); | |
26161 | + timers_update_nohz(); | |
26162 | } | |
1a6e0f06 | 26163 | |
e4b2b4a8 JK |
26164 | /** |
26165 | @@ -1250,7 +1249,7 @@ | |
26166 | /* | |
26167 | * Emulate tick processing via per-CPU hrtimers: | |
26168 | */ | |
26169 | - hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
26170 | + hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); | |
26171 | ts->sched_timer.function = tick_sched_timer; | |
26172 | ||
26173 | /* Get the next period (per-CPU) */ | |
26174 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/timekeeping.c linux-4.14/kernel/time/timekeeping.c | |
26175 | --- linux-4.14.orig/kernel/time/timekeeping.c 2017-11-12 19:46:13.000000000 +0100 | |
26176 | +++ linux-4.14/kernel/time/timekeeping.c 2018-09-05 11:05:07.000000000 +0200 | |
26177 | @@ -2326,8 +2326,10 @@ | |
26178 | */ | |
26179 | void xtime_update(unsigned long ticks) | |
1a6e0f06 | 26180 | { |
e4b2b4a8 JK |
26181 | - write_seqlock(&jiffies_lock); |
26182 | + raw_spin_lock(&jiffies_lock); | |
26183 | + write_seqcount_begin(&jiffies_seq); | |
26184 | do_timer(ticks); | |
26185 | - write_sequnlock(&jiffies_lock); | |
26186 | + write_seqcount_end(&jiffies_seq); | |
26187 | + raw_spin_unlock(&jiffies_lock); | |
26188 | update_wall_time(); | |
26189 | } | |
26190 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/timekeeping.h linux-4.14/kernel/time/timekeeping.h | |
26191 | --- linux-4.14.orig/kernel/time/timekeeping.h 2017-11-12 19:46:13.000000000 +0100 | |
26192 | +++ linux-4.14/kernel/time/timekeeping.h 2018-09-05 11:05:07.000000000 +0200 | |
26193 | @@ -18,7 +18,8 @@ | |
26194 | extern void do_timer(unsigned long ticks); | |
26195 | extern void update_wall_time(void); | |
1a6e0f06 | 26196 | |
e4b2b4a8 JK |
26197 | -extern seqlock_t jiffies_lock; |
26198 | +extern raw_spinlock_t jiffies_lock; | |
26199 | +extern seqcount_t jiffies_seq; | |
1a6e0f06 | 26200 | |
e4b2b4a8 JK |
26201 | #define CS_NAME_LEN 32 |
26202 | ||
26203 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/time/timer.c linux-4.14/kernel/time/timer.c | |
26204 | --- linux-4.14.orig/kernel/time/timer.c 2018-09-05 11:03:22.000000000 +0200 | |
26205 | +++ linux-4.14/kernel/time/timer.c 2018-09-05 11:05:07.000000000 +0200 | |
26206 | @@ -44,6 +44,7 @@ | |
26207 | #include <linux/sched/debug.h> | |
26208 | #include <linux/slab.h> | |
26209 | #include <linux/compat.h> | |
26210 | +#include <linux/swait.h> | |
26211 | ||
26212 | #include <linux/uaccess.h> | |
26213 | #include <asm/unistd.h> | |
26214 | @@ -197,11 +198,12 @@ | |
26215 | struct timer_base { | |
26216 | raw_spinlock_t lock; | |
26217 | struct timer_list *running_timer; | |
26218 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26219 | + struct swait_queue_head wait_for_running_timer; | |
1a6e0f06 | 26220 | +#endif |
e4b2b4a8 JK |
26221 | unsigned long clk; |
26222 | unsigned long next_expiry; | |
26223 | unsigned int cpu; | |
26224 | - bool migration_enabled; | |
26225 | - bool nohz_active; | |
26226 | bool is_idle; | |
26227 | bool must_forward_clk; | |
26228 | DECLARE_BITMAP(pending_map, WHEEL_SIZE); | |
26229 | @@ -210,45 +212,73 @@ | |
1a6e0f06 | 26230 | |
e4b2b4a8 | 26231 | static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); |
1a6e0f06 | 26232 | |
e4b2b4a8 JK |
26233 | -#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) |
26234 | +#ifdef CONFIG_NO_HZ_COMMON | |
1a6e0f06 | 26235 | + |
e4b2b4a8 JK |
26236 | +static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); |
26237 | +static DEFINE_MUTEX(timer_keys_mutex); | |
26238 | + | |
26239 | +static struct swork_event timer_update_swork; | |
26240 | + | |
26241 | +#ifdef CONFIG_SMP | |
26242 | unsigned int sysctl_timer_migration = 1; | |
1a6e0f06 | 26243 | |
e4b2b4a8 JK |
26244 | -void timers_update_migration(bool update_nohz) |
26245 | +DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); | |
1a6e0f06 | 26246 | + |
e4b2b4a8 JK |
26247 | +static void timers_update_migration(void) |
26248 | { | |
26249 | bool on = sysctl_timer_migration && tick_nohz_active; | |
26250 | - unsigned int cpu; | |
1a6e0f06 | 26251 | |
e4b2b4a8 JK |
26252 | - /* Avoid the loop, if nothing to update */ |
26253 | - if (this_cpu_read(timer_bases[BASE_STD].migration_enabled) == on) | |
26254 | - return; | |
26255 | + if (on) | |
26256 | + static_branch_enable(&timers_migration_enabled); | |
26257 | + else | |
26258 | + static_branch_disable(&timers_migration_enabled); | |
26259 | +} | |
26260 | +#else | |
26261 | +static inline void timers_update_migration(void) { } | |
26262 | +#endif /* !CONFIG_SMP */ | |
26263 | ||
26264 | - for_each_possible_cpu(cpu) { | |
26265 | - per_cpu(timer_bases[BASE_STD].migration_enabled, cpu) = on; | |
26266 | - per_cpu(timer_bases[BASE_DEF].migration_enabled, cpu) = on; | |
26267 | - per_cpu(hrtimer_bases.migration_enabled, cpu) = on; | |
26268 | - if (!update_nohz) | |
26269 | - continue; | |
26270 | - per_cpu(timer_bases[BASE_STD].nohz_active, cpu) = true; | |
26271 | - per_cpu(timer_bases[BASE_DEF].nohz_active, cpu) = true; | |
26272 | - per_cpu(hrtimer_bases.nohz_active, cpu) = true; | |
26273 | - } | |
26274 | +static void timer_update_keys(struct swork_event *event) | |
26275 | +{ | |
26276 | + mutex_lock(&timer_keys_mutex); | |
26277 | + timers_update_migration(); | |
26278 | + static_branch_enable(&timers_nohz_active); | |
26279 | + mutex_unlock(&timer_keys_mutex); | |
1a6e0f06 JK |
26280 | } |
26281 | ||
e4b2b4a8 JK |
26282 | +void timers_update_nohz(void) |
26283 | +{ | |
26284 | + swork_queue(&timer_update_swork); | |
26285 | +} | |
1a6e0f06 | 26286 | + |
e4b2b4a8 | 26287 | +static __init int hrtimer_init_thread(void) |
1a6e0f06 | 26288 | +{ |
e4b2b4a8 JK |
26289 | + WARN_ON(swork_get()); |
26290 | + INIT_SWORK(&timer_update_swork, timer_update_keys); | |
26291 | + return 0; | |
26292 | +} | |
26293 | +early_initcall(hrtimer_init_thread); | |
1a6e0f06 | 26294 | + |
e4b2b4a8 JK |
26295 | int timer_migration_handler(struct ctl_table *table, int write, |
26296 | void __user *buffer, size_t *lenp, | |
26297 | loff_t *ppos) | |
26298 | { | |
26299 | - static DEFINE_MUTEX(mutex); | |
26300 | int ret; | |
26301 | ||
26302 | - mutex_lock(&mutex); | |
26303 | + mutex_lock(&timer_keys_mutex); | |
26304 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | |
26305 | if (!ret && write) | |
26306 | - timers_update_migration(false); | |
26307 | - mutex_unlock(&mutex); | |
26308 | + timers_update_migration(); | |
26309 | + mutex_unlock(&timer_keys_mutex); | |
26310 | return ret; | |
26311 | } | |
26312 | -#endif | |
26313 | + | |
26314 | +static inline bool is_timers_nohz_active(void) | |
26315 | +{ | |
26316 | + return static_branch_unlikely(&timers_nohz_active); | |
1a6e0f06 JK |
26317 | +} |
26318 | +#else | |
e4b2b4a8 JK |
26319 | +static inline bool is_timers_nohz_active(void) { return false; } |
26320 | +#endif /* NO_HZ_COMMON */ | |
26321 | ||
26322 | static unsigned long round_jiffies_common(unsigned long j, int cpu, | |
26323 | bool force_up) | |
26324 | @@ -534,7 +564,7 @@ | |
26325 | static void | |
26326 | trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) | |
1a6e0f06 | 26327 | { |
e4b2b4a8 JK |
26328 | - if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) |
26329 | + if (!is_timers_nohz_active()) | |
26330 | return; | |
1a6e0f06 | 26331 | |
e4b2b4a8 JK |
26332 | /* |
26333 | @@ -840,21 +870,20 @@ | |
26334 | return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); | |
26335 | } | |
1a6e0f06 | 26336 | |
e4b2b4a8 JK |
26337 | -#ifdef CONFIG_NO_HZ_COMMON |
26338 | static inline struct timer_base * | |
26339 | get_target_base(struct timer_base *base, unsigned tflags) | |
1a6e0f06 | 26340 | { |
e4b2b4a8 JK |
26341 | -#ifdef CONFIG_SMP |
26342 | - if ((tflags & TIMER_PINNED) || !base->migration_enabled) | |
26343 | - return get_timer_this_cpu_base(tflags); | |
26344 | - return get_timer_cpu_base(tflags, get_nohz_timer_target()); | |
26345 | -#else | |
26346 | - return get_timer_this_cpu_base(tflags); | |
26347 | +#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) | |
26348 | + if (static_branch_unlikely(&timers_migration_enabled) && | |
26349 | + !(tflags & TIMER_PINNED)) | |
26350 | + return get_timer_cpu_base(tflags, get_nohz_timer_target()); | |
26351 | #endif | |
26352 | + return get_timer_this_cpu_base(tflags); | |
1a6e0f06 | 26353 | } |
1a6e0f06 | 26354 | |
e4b2b4a8 JK |
26355 | static inline void forward_timer_base(struct timer_base *base) |
26356 | { | |
26357 | +#ifdef CONFIG_NO_HZ_COMMON | |
26358 | unsigned long jnow; | |
1a6e0f06 | 26359 | |
e4b2b4a8 JK |
26360 | /* |
26361 | @@ -878,16 +907,8 @@ | |
26362 | base->clk = jnow; | |
26363 | else | |
26364 | base->clk = base->next_expiry; | |
26365 | -} | |
26366 | -#else | |
26367 | -static inline struct timer_base * | |
26368 | -get_target_base(struct timer_base *base, unsigned tflags) | |
26369 | -{ | |
26370 | - return get_timer_this_cpu_base(tflags); | |
26371 | -} | |
26372 | - | |
26373 | -static inline void forward_timer_base(struct timer_base *base) { } | |
26374 | #endif | |
26375 | +} | |
1a6e0f06 | 26376 | |
1a6e0f06 | 26377 | |
1a6e0f06 | 26378 | /* |
e4b2b4a8 | 26379 | @@ -1130,6 +1151,33 @@ |
1a6e0f06 | 26380 | } |
e4b2b4a8 | 26381 | EXPORT_SYMBOL_GPL(add_timer_on); |
1a6e0f06 | 26382 | |
e4b2b4a8 JK |
26383 | +#ifdef CONFIG_PREEMPT_RT_FULL |
26384 | +/* | |
26385 | + * Wait for a running timer | |
26386 | + */ | |
26387 | +static void wait_for_running_timer(struct timer_list *timer) | |
26388 | +{ | |
26389 | + struct timer_base *base; | |
26390 | + u32 tf = timer->flags; | |
26391 | + | |
26392 | + if (tf & TIMER_MIGRATING) | |
26393 | + return; | |
26394 | + | |
26395 | + base = get_timer_base(tf); | |
26396 | + swait_event(base->wait_for_running_timer, | |
26397 | + base->running_timer != timer); | |
26398 | +} | |
26399 | + | |
26400 | +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) | |
1a6e0f06 | 26401 | +#else |
e4b2b4a8 | 26402 | +static inline void wait_for_running_timer(struct timer_list *timer) |
1a6e0f06 | 26403 | +{ |
e4b2b4a8 | 26404 | + cpu_relax(); |
1a6e0f06 | 26405 | +} |
e4b2b4a8 JK |
26406 | + |
26407 | +# define wakeup_timer_waiters(b) do { } while (0) | |
1a6e0f06 JK |
26408 | +#endif |
26409 | + | |
e4b2b4a8 JK |
26410 | /** |
26411 | * del_timer - deactivate a timer. | |
26412 | * @timer: the timer to be deactivated | |
26413 | @@ -1185,7 +1233,7 @@ | |
26414 | } | |
26415 | EXPORT_SYMBOL(try_to_del_timer_sync); | |
26416 | ||
26417 | -#ifdef CONFIG_SMP | |
26418 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
26419 | /** | |
26420 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | |
26421 | * @timer: the timer to be deactivated | |
26422 | @@ -1245,7 +1293,7 @@ | |
26423 | int ret = try_to_del_timer_sync(timer); | |
26424 | if (ret >= 0) | |
26425 | return ret; | |
26426 | - cpu_relax(); | |
26427 | + wait_for_running_timer(timer); | |
26428 | } | |
26429 | } | |
26430 | EXPORT_SYMBOL(del_timer_sync); | |
26431 | @@ -1309,13 +1357,16 @@ | |
26432 | fn = timer->function; | |
26433 | data = timer->data; | |
26434 | ||
26435 | - if (timer->flags & TIMER_IRQSAFE) { | |
26436 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && | |
26437 | + timer->flags & TIMER_IRQSAFE) { | |
26438 | raw_spin_unlock(&base->lock); | |
26439 | call_timer_fn(timer, fn, data); | |
26440 | + base->running_timer = NULL; | |
26441 | raw_spin_lock(&base->lock); | |
26442 | } else { | |
26443 | raw_spin_unlock_irq(&base->lock); | |
26444 | call_timer_fn(timer, fn, data); | |
26445 | + base->running_timer = NULL; | |
26446 | raw_spin_lock_irq(&base->lock); | |
26447 | } | |
26448 | } | |
26449 | @@ -1584,13 +1635,13 @@ | |
26450 | ||
26451 | /* Note: this timer irq context must be accounted for as well. */ | |
26452 | account_process_tick(p, user_tick); | |
26453 | + scheduler_tick(); | |
26454 | run_local_timers(); | |
26455 | rcu_check_callbacks(user_tick); | |
26456 | -#ifdef CONFIG_IRQ_WORK | |
26457 | +#if defined(CONFIG_IRQ_WORK) | |
26458 | if (in_irq()) | |
26459 | irq_work_tick(); | |
26460 | #endif | |
26461 | - scheduler_tick(); | |
26462 | if (IS_ENABLED(CONFIG_POSIX_TIMERS)) | |
26463 | run_posix_cpu_timers(p); | |
26464 | } | |
26465 | @@ -1617,8 +1668,8 @@ | |
26466 | while (levels--) | |
26467 | expire_timers(base, heads + levels); | |
26468 | } | |
26469 | - base->running_timer = NULL; | |
26470 | raw_spin_unlock_irq(&base->lock); | |
26471 | + wakeup_timer_waiters(base); | |
26472 | } | |
26473 | ||
1a6e0f06 | 26474 | /* |
e4b2b4a8 JK |
26475 | @@ -1628,6 +1679,7 @@ |
26476 | { | |
26477 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | |
26478 | ||
26479 | + irq_work_tick_soft(); | |
26480 | /* | |
26481 | * must_forward_clk must be cleared before running timers so that any | |
26482 | * timer functions that call mod_timer will not try to forward the | |
26483 | @@ -1864,6 +1916,9 @@ | |
26484 | base->cpu = cpu; | |
26485 | raw_spin_lock_init(&base->lock); | |
26486 | base->clk = jiffies; | |
26487 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26488 | + init_swait_queue_head(&base->wait_for_running_timer); | |
1a6e0f06 | 26489 | +#endif |
e4b2b4a8 JK |
26490 | } |
26491 | } | |
26492 | ||
26493 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/Kconfig linux-4.14/kernel/trace/Kconfig | |
26494 | --- linux-4.14.orig/kernel/trace/Kconfig 2018-09-05 11:03:22.000000000 +0200 | |
26495 | +++ linux-4.14/kernel/trace/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
26496 | @@ -585,7 +585,10 @@ | |
26497 | event activity as an initial guide for further investigation | |
26498 | using more advanced tools. | |
26499 | ||
26500 | - See Documentation/trace/events.txt. | |
26501 | + Inter-event tracing of quantities such as latencies is also | |
26502 | + supported using hist triggers under this option. | |
26503 | + | |
26504 | + See Documentation/trace/histogram.txt. | |
26505 | If in doubt, say N. | |
26506 | ||
26507 | config MMIOTRACE_TEST | |
26508 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/ring_buffer.c linux-4.14/kernel/trace/ring_buffer.c | |
26509 | --- linux-4.14.orig/kernel/trace/ring_buffer.c 2018-09-05 11:03:22.000000000 +0200 | |
26510 | +++ linux-4.14/kernel/trace/ring_buffer.c 2018-09-05 11:05:07.000000000 +0200 | |
26511 | @@ -41,6 +41,8 @@ | |
26512 | RINGBUF_TYPE_PADDING); | |
26513 | trace_seq_printf(s, "\ttime_extend : type == %d\n", | |
26514 | RINGBUF_TYPE_TIME_EXTEND); | |
26515 | + trace_seq_printf(s, "\ttime_stamp : type == %d\n", | |
26516 | + RINGBUF_TYPE_TIME_STAMP); | |
26517 | trace_seq_printf(s, "\tdata max type_len == %d\n", | |
26518 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | |
26519 | ||
26520 | @@ -140,12 +142,15 @@ | |
26521 | ||
26522 | enum { | |
26523 | RB_LEN_TIME_EXTEND = 8, | |
26524 | - RB_LEN_TIME_STAMP = 16, | |
26525 | + RB_LEN_TIME_STAMP = 8, | |
26526 | }; | |
26527 | ||
26528 | #define skip_time_extend(event) \ | |
26529 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | |
26530 | ||
26531 | +#define extended_time(event) \ | |
26532 | + (event->type_len >= RINGBUF_TYPE_TIME_EXTEND) | |
26533 | + | |
26534 | static inline int rb_null_event(struct ring_buffer_event *event) | |
1a6e0f06 | 26535 | { |
e4b2b4a8 JK |
26536 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
26537 | @@ -209,7 +214,7 @@ | |
26538 | { | |
26539 | unsigned len = 0; | |
1a6e0f06 | 26540 | |
e4b2b4a8 JK |
26541 | - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { |
26542 | + if (extended_time(event)) { | |
26543 | /* time extends include the data event after it */ | |
26544 | len = RB_LEN_TIME_EXTEND; | |
26545 | event = skip_time_extend(event); | |
26546 | @@ -231,7 +236,7 @@ | |
26547 | { | |
26548 | unsigned length; | |
1a6e0f06 | 26549 | |
e4b2b4a8 JK |
26550 | - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) |
26551 | + if (extended_time(event)) | |
26552 | event = skip_time_extend(event); | |
26553 | ||
26554 | length = rb_event_length(event); | |
26555 | @@ -248,7 +253,7 @@ | |
26556 | static __always_inline void * | |
26557 | rb_event_data(struct ring_buffer_event *event) | |
1a6e0f06 | 26558 | { |
e4b2b4a8 JK |
26559 | - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) |
26560 | + if (extended_time(event)) | |
26561 | event = skip_time_extend(event); | |
26562 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | |
26563 | /* If length is in len field, then array[0] has the data */ | |
26564 | @@ -275,6 +280,27 @@ | |
26565 | #define TS_MASK ((1ULL << TS_SHIFT) - 1) | |
26566 | #define TS_DELTA_TEST (~TS_MASK) | |
1a6e0f06 | 26567 | |
e4b2b4a8 JK |
26568 | +/** |
26569 | + * ring_buffer_event_time_stamp - return the event's extended timestamp | |
26570 | + * @event: the event to get the timestamp of | |
26571 | + * | |
26572 | + * Returns the extended timestamp associated with a data event. | |
26573 | + * An extended time_stamp is a 64-bit timestamp represented | |
26574 | + * internally in a special way that makes the best use of space | |
26575 | + * contained within a ring buffer event. This function decodes | |
26576 | + * it and maps it to a straight u64 value. | |
26577 | + */ | |
26578 | +u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event) | |
1a6e0f06 | 26579 | +{ |
e4b2b4a8 | 26580 | + u64 ts; |
1a6e0f06 | 26581 | + |
e4b2b4a8 JK |
26582 | + ts = event->array[0]; |
26583 | + ts <<= TS_SHIFT; | |
26584 | + ts += event->time_delta; | |
1a6e0f06 | 26585 | + |
e4b2b4a8 JK |
26586 | + return ts; |
26587 | +} | |
26588 | + | |
26589 | /* Flag when events were overwritten */ | |
26590 | #define RB_MISSED_EVENTS (1 << 31) | |
26591 | /* Missed count stored at end */ | |
26592 | @@ -451,6 +477,7 @@ | |
26593 | struct buffer_page *reader_page; | |
26594 | unsigned long lost_events; | |
26595 | unsigned long last_overrun; | |
26596 | + unsigned long nest; | |
26597 | local_t entries_bytes; | |
26598 | local_t entries; | |
26599 | local_t overrun; | |
26600 | @@ -488,6 +515,7 @@ | |
26601 | u64 (*clock)(void); | |
26602 | ||
26603 | struct rb_irq_work irq_work; | |
26604 | + bool time_stamp_abs; | |
26605 | }; | |
26606 | ||
26607 | struct ring_buffer_iter { | |
26608 | @@ -1387,6 +1415,16 @@ | |
26609 | buffer->clock = clock; | |
1a6e0f06 JK |
26610 | } |
26611 | ||
e4b2b4a8 | 26612 | +void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs) |
1a6e0f06 | 26613 | +{ |
e4b2b4a8 | 26614 | + buffer->time_stamp_abs = abs; |
1a6e0f06 JK |
26615 | +} |
26616 | + | |
e4b2b4a8 | 26617 | +bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer) |
1a6e0f06 | 26618 | +{ |
e4b2b4a8 | 26619 | + return buffer->time_stamp_abs; |
1a6e0f06 JK |
26620 | +} |
26621 | + | |
e4b2b4a8 JK |
26622 | static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); |
26623 | ||
26624 | static inline unsigned long rb_page_entries(struct buffer_page *bpage) | |
26625 | @@ -2217,12 +2255,15 @@ | |
26626 | ||
26627 | /* Slow path, do not inline */ | |
26628 | static noinline struct ring_buffer_event * | |
26629 | -rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | |
26630 | +rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) | |
26631 | { | |
26632 | - event->type_len = RINGBUF_TYPE_TIME_EXTEND; | |
26633 | + if (abs) | |
26634 | + event->type_len = RINGBUF_TYPE_TIME_STAMP; | |
26635 | + else | |
26636 | + event->type_len = RINGBUF_TYPE_TIME_EXTEND; | |
26637 | ||
26638 | - /* Not the first event on the page? */ | |
26639 | - if (rb_event_index(event)) { | |
26640 | + /* Not the first event on the page, or not delta? */ | |
26641 | + if (abs || rb_event_index(event)) { | |
26642 | event->time_delta = delta & TS_MASK; | |
26643 | event->array[0] = delta >> TS_SHIFT; | |
26644 | } else { | |
26645 | @@ -2265,7 +2306,9 @@ | |
26646 | * add it to the start of the resevered space. | |
26647 | */ | |
26648 | if (unlikely(info->add_timestamp)) { | |
26649 | - event = rb_add_time_stamp(event, delta); | |
26650 | + bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer); | |
26651 | + | |
26652 | + event = rb_add_time_stamp(event, info->delta, abs); | |
26653 | length -= RB_LEN_TIME_EXTEND; | |
26654 | delta = 0; | |
26655 | } | |
26656 | @@ -2453,7 +2496,7 @@ | |
26657 | ||
26658 | static inline void rb_event_discard(struct ring_buffer_event *event) | |
26659 | { | |
26660 | - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | |
26661 | + if (extended_time(event)) | |
26662 | event = skip_time_extend(event); | |
26663 | ||
26664 | /* array[0] holds the actual length for the discarded event */ | |
26665 | @@ -2497,10 +2540,11 @@ | |
26666 | cpu_buffer->write_stamp = | |
26667 | cpu_buffer->commit_page->page->time_stamp; | |
26668 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | |
26669 | - delta = event->array[0]; | |
26670 | - delta <<= TS_SHIFT; | |
26671 | - delta += event->time_delta; | |
26672 | + delta = ring_buffer_event_time_stamp(event); | |
26673 | cpu_buffer->write_stamp += delta; | |
26674 | + } else if (event->type_len == RINGBUF_TYPE_TIME_STAMP) { | |
26675 | + delta = ring_buffer_event_time_stamp(event); | |
26676 | + cpu_buffer->write_stamp = delta; | |
26677 | } else | |
26678 | cpu_buffer->write_stamp += event->time_delta; | |
26679 | } | |
26680 | @@ -2583,22 +2627,19 @@ | |
26681 | trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) | |
26682 | { | |
26683 | unsigned int val = cpu_buffer->current_context; | |
26684 | + unsigned long pc = preempt_count(); | |
26685 | int bit; | |
26686 | ||
26687 | - if (in_interrupt()) { | |
26688 | - if (in_nmi()) | |
26689 | - bit = RB_CTX_NMI; | |
26690 | - else if (in_irq()) | |
26691 | - bit = RB_CTX_IRQ; | |
26692 | - else | |
26693 | - bit = RB_CTX_SOFTIRQ; | |
26694 | - } else | |
26695 | + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) | |
26696 | bit = RB_CTX_NORMAL; | |
26697 | + else | |
26698 | + bit = pc & NMI_MASK ? RB_CTX_NMI : | |
26699 | + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; | |
26700 | ||
26701 | - if (unlikely(val & (1 << bit))) | |
26702 | + if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) | |
26703 | return 1; | |
26704 | ||
26705 | - val |= (1 << bit); | |
26706 | + val |= (1 << (bit + cpu_buffer->nest)); | |
26707 | cpu_buffer->current_context = val; | |
26708 | ||
26709 | return 0; | |
26710 | @@ -2607,7 +2648,57 @@ | |
26711 | static __always_inline void | |
26712 | trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) | |
26713 | { | |
26714 | - cpu_buffer->current_context &= cpu_buffer->current_context - 1; | |
26715 | + cpu_buffer->current_context &= | |
26716 | + cpu_buffer->current_context - (1 << cpu_buffer->nest); | |
26717 | +} | |
26718 | + | |
26719 | +/* The recursive locking above uses 4 bits */ | |
26720 | +#define NESTED_BITS 4 | |
26721 | + | |
26722 | +/** | |
26723 | + * ring_buffer_nest_start - Allow to trace while nested | |
26724 | + * @buffer: The ring buffer to modify | |
26725 | + * | |
26726 | + * The ring buffer has a safty mechanism to prevent recursion. | |
26727 | + * But there may be a case where a trace needs to be done while | |
26728 | + * tracing something else. In this case, calling this function | |
26729 | + * will allow this function to nest within a currently active | |
26730 | + * ring_buffer_lock_reserve(). | |
26731 | + * | |
26732 | + * Call this function before calling another ring_buffer_lock_reserve() and | |
26733 | + * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). | |
1a6e0f06 | 26734 | + */ |
e4b2b4a8 | 26735 | +void ring_buffer_nest_start(struct ring_buffer *buffer) |
1a6e0f06 | 26736 | +{ |
e4b2b4a8 JK |
26737 | + struct ring_buffer_per_cpu *cpu_buffer; |
26738 | + int cpu; | |
1a6e0f06 | 26739 | + |
e4b2b4a8 JK |
26740 | + /* Enabled by ring_buffer_nest_end() */ |
26741 | + preempt_disable_notrace(); | |
26742 | + cpu = raw_smp_processor_id(); | |
26743 | + cpu_buffer = buffer->buffers[cpu]; | |
26744 | + /* This is the shift value for the above recusive locking */ | |
26745 | + cpu_buffer->nest += NESTED_BITS; | |
1a6e0f06 JK |
26746 | +} |
26747 | + | |
e4b2b4a8 JK |
26748 | +/** |
26749 | + * ring_buffer_nest_end - Allow to trace while nested | |
26750 | + * @buffer: The ring buffer to modify | |
26751 | + * | |
26752 | + * Must be called after ring_buffer_nest_start() and after the | |
26753 | + * ring_buffer_unlock_commit(). | |
1a6e0f06 | 26754 | + */ |
e4b2b4a8 | 26755 | +void ring_buffer_nest_end(struct ring_buffer *buffer) |
1a6e0f06 | 26756 | +{ |
e4b2b4a8 | 26757 | + struct ring_buffer_per_cpu *cpu_buffer; |
1a6e0f06 JK |
26758 | + int cpu; |
26759 | + | |
e4b2b4a8 JK |
26760 | + /* disabled by ring_buffer_nest_start() */ |
26761 | + cpu = raw_smp_processor_id(); | |
26762 | + cpu_buffer = buffer->buffers[cpu]; | |
26763 | + /* This is the shift value for the above recusive locking */ | |
26764 | + cpu_buffer->nest -= NESTED_BITS; | |
26765 | + preempt_enable_notrace(); | |
26766 | } | |
26767 | ||
26768 | /** | |
26769 | @@ -2683,7 +2774,7 @@ | |
26770 | * If this is the first commit on the page, then it has the same | |
26771 | * timestamp as the page itself. | |
26772 | */ | |
26773 | - if (!tail) | |
26774 | + if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer)) | |
26775 | info->delta = 0; | |
26776 | ||
26777 | /* See if we shot pass the end of this buffer page */ | |
26778 | @@ -2760,8 +2851,11 @@ | |
26779 | /* make sure this diff is calculated here */ | |
26780 | barrier(); | |
26781 | ||
26782 | - /* Did the write stamp get updated already? */ | |
26783 | - if (likely(info.ts >= cpu_buffer->write_stamp)) { | |
26784 | + if (ring_buffer_time_stamp_abs(buffer)) { | |
26785 | + info.delta = info.ts; | |
26786 | + rb_handle_timestamp(cpu_buffer, &info); | |
26787 | + } else /* Did the write stamp get updated already? */ | |
26788 | + if (likely(info.ts >= cpu_buffer->write_stamp)) { | |
26789 | info.delta = diff; | |
26790 | if (unlikely(test_time_stamp(info.delta))) | |
26791 | rb_handle_timestamp(cpu_buffer, &info); | |
26792 | @@ -3459,14 +3553,13 @@ | |
26793 | return; | |
26794 | ||
26795 | case RINGBUF_TYPE_TIME_EXTEND: | |
26796 | - delta = event->array[0]; | |
26797 | - delta <<= TS_SHIFT; | |
26798 | - delta += event->time_delta; | |
26799 | + delta = ring_buffer_event_time_stamp(event); | |
26800 | cpu_buffer->read_stamp += delta; | |
26801 | return; | |
26802 | ||
26803 | case RINGBUF_TYPE_TIME_STAMP: | |
26804 | - /* FIXME: not implemented */ | |
26805 | + delta = ring_buffer_event_time_stamp(event); | |
26806 | + cpu_buffer->read_stamp = delta; | |
26807 | return; | |
26808 | ||
26809 | case RINGBUF_TYPE_DATA: | |
26810 | @@ -3490,14 +3583,13 @@ | |
26811 | return; | |
26812 | ||
26813 | case RINGBUF_TYPE_TIME_EXTEND: | |
26814 | - delta = event->array[0]; | |
26815 | - delta <<= TS_SHIFT; | |
26816 | - delta += event->time_delta; | |
26817 | + delta = ring_buffer_event_time_stamp(event); | |
26818 | iter->read_stamp += delta; | |
26819 | return; | |
26820 | ||
26821 | case RINGBUF_TYPE_TIME_STAMP: | |
26822 | - /* FIXME: not implemented */ | |
26823 | + delta = ring_buffer_event_time_stamp(event); | |
26824 | + iter->read_stamp = delta; | |
26825 | return; | |
26826 | ||
26827 | case RINGBUF_TYPE_DATA: | |
26828 | @@ -3721,6 +3813,8 @@ | |
26829 | struct buffer_page *reader; | |
26830 | int nr_loops = 0; | |
26831 | ||
26832 | + if (ts) | |
26833 | + *ts = 0; | |
26834 | again: | |
26835 | /* | |
26836 | * We repeat when a time extend is encountered. | |
26837 | @@ -3757,12 +3851,17 @@ | |
26838 | goto again; | |
26839 | ||
26840 | case RINGBUF_TYPE_TIME_STAMP: | |
26841 | - /* FIXME: not implemented */ | |
26842 | + if (ts) { | |
26843 | + *ts = ring_buffer_event_time_stamp(event); | |
26844 | + ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | |
26845 | + cpu_buffer->cpu, ts); | |
26846 | + } | |
26847 | + /* Internal data, OK to advance */ | |
26848 | rb_advance_reader(cpu_buffer); | |
26849 | goto again; | |
26850 | ||
26851 | case RINGBUF_TYPE_DATA: | |
26852 | - if (ts) { | |
26853 | + if (ts && !(*ts)) { | |
26854 | *ts = cpu_buffer->read_stamp + event->time_delta; | |
26855 | ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | |
26856 | cpu_buffer->cpu, ts); | |
26857 | @@ -3787,6 +3886,9 @@ | |
26858 | struct ring_buffer_event *event; | |
26859 | int nr_loops = 0; | |
26860 | ||
26861 | + if (ts) | |
26862 | + *ts = 0; | |
26863 | + | |
26864 | cpu_buffer = iter->cpu_buffer; | |
26865 | buffer = cpu_buffer->buffer; | |
26866 | ||
26867 | @@ -3839,12 +3941,17 @@ | |
26868 | goto again; | |
26869 | ||
26870 | case RINGBUF_TYPE_TIME_STAMP: | |
26871 | - /* FIXME: not implemented */ | |
26872 | + if (ts) { | |
26873 | + *ts = ring_buffer_event_time_stamp(event); | |
26874 | + ring_buffer_normalize_time_stamp(cpu_buffer->buffer, | |
26875 | + cpu_buffer->cpu, ts); | |
26876 | + } | |
26877 | + /* Internal data, OK to advance */ | |
26878 | rb_advance_iter(iter); | |
26879 | goto again; | |
26880 | ||
26881 | case RINGBUF_TYPE_DATA: | |
26882 | - if (ts) { | |
26883 | + if (ts && !(*ts)) { | |
26884 | *ts = iter->read_stamp + event->time_delta; | |
26885 | ring_buffer_normalize_time_stamp(buffer, | |
26886 | cpu_buffer->cpu, ts); | |
26887 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace.c linux-4.14/kernel/trace/trace.c | |
26888 | --- linux-4.14.orig/kernel/trace/trace.c 2018-09-05 11:03:22.000000000 +0200 | |
26889 | +++ linux-4.14/kernel/trace/trace.c 2018-09-05 11:05:07.000000000 +0200 | |
26890 | @@ -1170,6 +1170,14 @@ | |
26891 | ARCH_TRACE_CLOCKS | |
26892 | }; | |
26893 | ||
26894 | +bool trace_clock_in_ns(struct trace_array *tr) | |
26895 | +{ | |
26896 | + if (trace_clocks[tr->clock_id].in_ns) | |
26897 | + return true; | |
26898 | + | |
26899 | + return false; | |
1a6e0f06 | 26900 | +} |
1a6e0f06 JK |
26901 | + |
26902 | /* | |
e4b2b4a8 | 26903 | * trace_parser_get_init - gets the buffer for trace parser |
1a6e0f06 | 26904 | */ |
e4b2b4a8 JK |
26905 | @@ -2127,6 +2135,7 @@ |
26906 | struct task_struct *tsk = current; | |
26907 | ||
26908 | entry->preempt_count = pc & 0xff; | |
26909 | + entry->preempt_lazy_count = preempt_lazy_count(); | |
26910 | entry->pid = (tsk) ? tsk->pid : 0; | |
26911 | entry->flags = | |
26912 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | |
26913 | @@ -2137,8 +2146,11 @@ | |
26914 | ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | | |
26915 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | | |
26916 | ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | | |
26917 | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
26918 | + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
26919 | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | | |
26920 | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); | |
26921 | + | |
26922 | + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; | |
1a6e0f06 | 26923 | } |
e4b2b4a8 | 26924 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); |
1a6e0f06 | 26925 | |
e4b2b4a8 JK |
26926 | @@ -2275,7 +2287,7 @@ |
26927 | ||
26928 | *current_rb = trace_file->tr->trace_buffer.buffer; | |
26929 | ||
26930 | - if ((trace_file->flags & | |
26931 | + if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & | |
26932 | (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && | |
26933 | (entry = this_cpu_read(trace_buffered_event))) { | |
26934 | /* Try to use the per cpu buffer first */ | |
26935 | @@ -3342,14 +3354,17 @@ | |
26936 | ||
26937 | static void print_lat_help_header(struct seq_file *m) | |
26938 | { | |
26939 | - seq_puts(m, "# _------=> CPU# \n" | |
26940 | - "# / _-----=> irqs-off \n" | |
26941 | - "# | / _----=> need-resched \n" | |
26942 | - "# || / _---=> hardirq/softirq \n" | |
26943 | - "# ||| / _--=> preempt-depth \n" | |
26944 | - "# |||| / delay \n" | |
26945 | - "# cmd pid ||||| time | caller \n" | |
26946 | - "# \\ / ||||| \\ | / \n"); | |
26947 | + seq_puts(m, "# _--------=> CPU# \n" | |
26948 | + "# / _-------=> irqs-off \n" | |
26949 | + "# | / _------=> need-resched \n" | |
26950 | + "# || / _-----=> need-resched_lazy \n" | |
26951 | + "# ||| / _----=> hardirq/softirq \n" | |
26952 | + "# |||| / _---=> preempt-depth \n" | |
26953 | + "# ||||| / _--=> preempt-lazy-depth\n" | |
26954 | + "# |||||| / _-=> migrate-disable \n" | |
26955 | + "# ||||||| / delay \n" | |
26956 | + "# cmd pid |||||||| time | caller \n" | |
26957 | + "# \\ / |||||||| \\ | / \n"); | |
1a6e0f06 | 26958 | } |
1a6e0f06 | 26959 | |
e4b2b4a8 JK |
26960 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) |
26961 | @@ -3385,15 +3400,17 @@ | |
26962 | tgid ? tgid_space : space); | |
26963 | seq_printf(m, "# %s / _----=> need-resched\n", | |
26964 | tgid ? tgid_space : space); | |
26965 | - seq_printf(m, "# %s| / _---=> hardirq/softirq\n", | |
26966 | + seq_printf(m, "# %s| / _----=> need-resched_lazy\n", | |
26967 | tgid ? tgid_space : space); | |
26968 | - seq_printf(m, "# %s|| / _--=> preempt-depth\n", | |
26969 | + seq_printf(m, "# %s|| / _---=> hardirq/softirq\n", | |
26970 | tgid ? tgid_space : space); | |
26971 | - seq_printf(m, "# %s||| / delay\n", | |
26972 | + seq_printf(m, "# %s||| / _--=> preempt-depth\n", | |
26973 | tgid ? tgid_space : space); | |
26974 | - seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n", | |
26975 | + seq_printf(m, "# %s|||| / delay\n", | |
26976 | + tgid ? tgid_space : space); | |
26977 | + seq_printf(m, "# TASK-PID %sCPU# ||||| TIMESTAMP FUNCTION\n", | |
26978 | tgid ? " TGID " : space); | |
26979 | - seq_printf(m, "# | | %s | |||| | |\n", | |
26980 | + seq_printf(m, "# | | %s | ||||| | |\n", | |
26981 | tgid ? " | " : space); | |
26982 | } | |
26983 | ||
26984 | @@ -4531,6 +4548,9 @@ | |
26985 | #ifdef CONFIG_X86_64 | |
26986 | " x86-tsc: TSC cycle counter\n" | |
26987 | #endif | |
26988 | + "\n timestamp_mode\t-view the mode used to timestamp events\n" | |
26989 | + " delta: Delta difference against a buffer-wide timestamp\n" | |
26990 | + " absolute: Absolute (standalone) timestamp\n" | |
26991 | "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n" | |
26992 | "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n" | |
26993 | " tracing_cpumask\t- Limit which CPUs to trace\n" | |
26994 | @@ -4707,8 +4727,9 @@ | |
26995 | "\t .sym display an address as a symbol\n" | |
26996 | "\t .sym-offset display an address as a symbol and offset\n" | |
26997 | "\t .execname display a common_pid as a program name\n" | |
26998 | - "\t .syscall display a syscall id as a syscall name\n\n" | |
26999 | - "\t .log2 display log2 value rather than raw number\n\n" | |
27000 | + "\t .syscall display a syscall id as a syscall name\n" | |
27001 | + "\t .log2 display log2 value rather than raw number\n" | |
27002 | + "\t .usecs display a common_timestamp in microseconds\n\n" | |
27003 | "\t The 'pause' parameter can be used to pause an existing hist\n" | |
27004 | "\t trigger or to start a hist trigger but not log any events\n" | |
27005 | "\t until told to do so. 'continue' can be used to start or\n" | |
27006 | @@ -6218,7 +6239,7 @@ | |
27007 | return 0; | |
1a6e0f06 | 27008 | } |
1a6e0f06 | 27009 | |
e4b2b4a8 JK |
27010 | -static int tracing_set_clock(struct trace_array *tr, const char *clockstr) |
27011 | +int tracing_set_clock(struct trace_array *tr, const char *clockstr) | |
27012 | { | |
27013 | int i; | |
27014 | ||
27015 | @@ -6298,6 +6319,71 @@ | |
27016 | return ret; | |
1a6e0f06 | 27017 | } |
e4b2b4a8 JK |
27018 | |
27019 | +static int tracing_time_stamp_mode_show(struct seq_file *m, void *v) | |
27020 | +{ | |
27021 | + struct trace_array *tr = m->private; | |
27022 | + | |
27023 | + mutex_lock(&trace_types_lock); | |
27024 | + | |
27025 | + if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer)) | |
27026 | + seq_puts(m, "delta [absolute]\n"); | |
27027 | + else | |
27028 | + seq_puts(m, "[delta] absolute\n"); | |
27029 | + | |
27030 | + mutex_unlock(&trace_types_lock); | |
27031 | + | |
27032 | + return 0; | |
27033 | +} | |
27034 | + | |
27035 | +static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file) | |
27036 | +{ | |
27037 | + struct trace_array *tr = inode->i_private; | |
27038 | + int ret; | |
27039 | + | |
27040 | + if (tracing_disabled) | |
27041 | + return -ENODEV; | |
27042 | + | |
27043 | + if (trace_array_get(tr)) | |
27044 | + return -ENODEV; | |
27045 | + | |
27046 | + ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private); | |
27047 | + if (ret < 0) | |
27048 | + trace_array_put(tr); | |
27049 | + | |
27050 | + return ret; | |
27051 | +} | |
27052 | + | |
27053 | +int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs) | |
27054 | +{ | |
27055 | + int ret = 0; | |
27056 | + | |
27057 | + mutex_lock(&trace_types_lock); | |
27058 | + | |
27059 | + if (abs && tr->time_stamp_abs_ref++) | |
27060 | + goto out; | |
27061 | + | |
27062 | + if (!abs) { | |
27063 | + if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) { | |
27064 | + ret = -EINVAL; | |
27065 | + goto out; | |
27066 | + } | |
27067 | + | |
27068 | + if (--tr->time_stamp_abs_ref) | |
27069 | + goto out; | |
27070 | + } | |
27071 | + | |
27072 | + ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs); | |
27073 | + | |
27074 | +#ifdef CONFIG_TRACER_MAX_TRACE | |
27075 | + if (tr->max_buffer.buffer) | |
27076 | + ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs); | |
1a6e0f06 | 27077 | +#endif |
e4b2b4a8 JK |
27078 | + out: |
27079 | + mutex_unlock(&trace_types_lock); | |
27080 | + | |
27081 | + return ret; | |
27082 | +} | |
27083 | + | |
27084 | struct ftrace_buffer_info { | |
27085 | struct trace_iterator iter; | |
27086 | void *spare; | |
27087 | @@ -6545,6 +6631,13 @@ | |
27088 | .write = tracing_clock_write, | |
27089 | }; | |
1a6e0f06 | 27090 | |
e4b2b4a8 JK |
27091 | +static const struct file_operations trace_time_stamp_mode_fops = { |
27092 | + .open = tracing_time_stamp_mode_open, | |
27093 | + .read = seq_read, | |
27094 | + .llseek = seq_lseek, | |
27095 | + .release = tracing_single_release_tr, | |
27096 | +}; | |
27097 | + | |
27098 | #ifdef CONFIG_TRACER_SNAPSHOT | |
27099 | static const struct file_operations snapshot_fops = { | |
27100 | .open = tracing_snapshot_open, | |
27101 | @@ -7682,6 +7775,7 @@ | |
27102 | struct trace_array *tr; | |
27103 | int ret; | |
1a6e0f06 | 27104 | |
e4b2b4a8 JK |
27105 | + mutex_lock(&event_mutex); |
27106 | mutex_lock(&trace_types_lock); | |
1a6e0f06 | 27107 | |
e4b2b4a8 JK |
27108 | ret = -EEXIST; |
27109 | @@ -7714,6 +7808,7 @@ | |
1a6e0f06 | 27110 | |
e4b2b4a8 JK |
27111 | INIT_LIST_HEAD(&tr->systems); |
27112 | INIT_LIST_HEAD(&tr->events); | |
27113 | + INIT_LIST_HEAD(&tr->hist_vars); | |
1a6e0f06 | 27114 | |
e4b2b4a8 JK |
27115 | if (allocate_trace_buffers(tr, trace_buf_size) < 0) |
27116 | goto out_free_tr; | |
27117 | @@ -7737,6 +7832,7 @@ | |
27118 | list_add(&tr->list, &ftrace_trace_arrays); | |
1a6e0f06 | 27119 | |
e4b2b4a8 JK |
27120 | mutex_unlock(&trace_types_lock); |
27121 | + mutex_unlock(&event_mutex); | |
1a6e0f06 | 27122 | |
e4b2b4a8 | 27123 | return 0; |
1a6e0f06 | 27124 | |
e4b2b4a8 | 27125 | @@ -7748,6 +7844,7 @@ |
1a6e0f06 | 27126 | |
e4b2b4a8 JK |
27127 | out_unlock: |
27128 | mutex_unlock(&trace_types_lock); | |
27129 | + mutex_unlock(&event_mutex); | |
1a6e0f06 | 27130 | |
e4b2b4a8 | 27131 | return ret; |
1a6e0f06 | 27132 | |
e4b2b4a8 JK |
27133 | @@ -7760,6 +7857,7 @@ |
27134 | int ret; | |
27135 | int i; | |
1a6e0f06 | 27136 | |
e4b2b4a8 JK |
27137 | + mutex_lock(&event_mutex); |
27138 | mutex_lock(&trace_types_lock); | |
1a6e0f06 | 27139 | |
e4b2b4a8 JK |
27140 | ret = -ENODEV; |
27141 | @@ -7805,6 +7903,7 @@ | |
1a6e0f06 | 27142 | |
e4b2b4a8 JK |
27143 | out_unlock: |
27144 | mutex_unlock(&trace_types_lock); | |
27145 | + mutex_unlock(&event_mutex); | |
1a6e0f06 | 27146 | |
e4b2b4a8 JK |
27147 | return ret; |
27148 | } | |
27149 | @@ -7862,6 +7961,9 @@ | |
27150 | trace_create_file("tracing_on", 0644, d_tracer, | |
27151 | tr, &rb_simple_fops); | |
1a6e0f06 | 27152 | |
e4b2b4a8 JK |
27153 | + trace_create_file("timestamp_mode", 0444, d_tracer, tr, |
27154 | + &trace_time_stamp_mode_fops); | |
1a6e0f06 | 27155 | + |
e4b2b4a8 | 27156 | create_trace_options_dir(tr); |
1a6e0f06 | 27157 | |
e4b2b4a8 JK |
27158 | #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) |
27159 | @@ -8271,6 +8373,92 @@ | |
1a6e0f06 | 27160 | } |
e4b2b4a8 | 27161 | EXPORT_SYMBOL_GPL(ftrace_dump); |
1a6e0f06 | 27162 | |
e4b2b4a8 | 27163 | +int trace_run_command(const char *buf, int (*createfn)(int, char **)) |
1a6e0f06 | 27164 | +{ |
e4b2b4a8 JK |
27165 | + char **argv; |
27166 | + int argc, ret; | |
1a6e0f06 | 27167 | + |
e4b2b4a8 JK |
27168 | + argc = 0; |
27169 | + ret = 0; | |
27170 | + argv = argv_split(GFP_KERNEL, buf, &argc); | |
27171 | + if (!argv) | |
27172 | + return -ENOMEM; | |
27173 | + | |
27174 | + if (argc) | |
27175 | + ret = createfn(argc, argv); | |
27176 | + | |
27177 | + argv_free(argv); | |
27178 | + | |
27179 | + return ret; | |
1a6e0f06 JK |
27180 | +} |
27181 | + | |
e4b2b4a8 JK |
27182 | +#define WRITE_BUFSIZE 4096 |
27183 | + | |
27184 | +ssize_t trace_parse_run_command(struct file *file, const char __user *buffer, | |
27185 | + size_t count, loff_t *ppos, | |
27186 | + int (*createfn)(int, char **)) | |
27187 | +{ | |
27188 | + char *kbuf, *buf, *tmp; | |
27189 | + int ret = 0; | |
27190 | + size_t done = 0; | |
27191 | + size_t size; | |
27192 | + | |
27193 | + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); | |
27194 | + if (!kbuf) | |
27195 | + return -ENOMEM; | |
27196 | + | |
27197 | + while (done < count) { | |
27198 | + size = count - done; | |
27199 | + | |
27200 | + if (size >= WRITE_BUFSIZE) | |
27201 | + size = WRITE_BUFSIZE - 1; | |
27202 | + | |
27203 | + if (copy_from_user(kbuf, buffer + done, size)) { | |
27204 | + ret = -EFAULT; | |
27205 | + goto out; | |
27206 | + } | |
27207 | + kbuf[size] = '\0'; | |
27208 | + buf = kbuf; | |
27209 | + do { | |
27210 | + tmp = strchr(buf, '\n'); | |
27211 | + if (tmp) { | |
27212 | + *tmp = '\0'; | |
27213 | + size = tmp - buf + 1; | |
27214 | + } else { | |
27215 | + size = strlen(buf); | |
27216 | + if (done + size < count) { | |
27217 | + if (buf != kbuf) | |
27218 | + break; | |
27219 | + /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ | |
27220 | + pr_warn("Line length is too long: Should be less than %d\n", | |
27221 | + WRITE_BUFSIZE - 2); | |
27222 | + ret = -EINVAL; | |
27223 | + goto out; | |
27224 | + } | |
27225 | + } | |
27226 | + done += size; | |
27227 | + | |
27228 | + /* Remove comments */ | |
27229 | + tmp = strchr(buf, '#'); | |
27230 | + | |
27231 | + if (tmp) | |
27232 | + *tmp = '\0'; | |
27233 | + | |
27234 | + ret = trace_run_command(buf, createfn); | |
27235 | + if (ret) | |
27236 | + goto out; | |
27237 | + buf += size; | |
27238 | + | |
27239 | + } while (done < count); | |
27240 | + } | |
27241 | + ret = done; | |
27242 | + | |
27243 | +out: | |
27244 | + kfree(kbuf); | |
27245 | + | |
27246 | + return ret; | |
27247 | +} | |
27248 | + | |
27249 | __init static int tracer_alloc_buffers(void) | |
27250 | { | |
27251 | int ring_buf_size; | |
27252 | @@ -8371,6 +8559,7 @@ | |
27253 | ||
27254 | INIT_LIST_HEAD(&global_trace.systems); | |
27255 | INIT_LIST_HEAD(&global_trace.events); | |
27256 | + INIT_LIST_HEAD(&global_trace.hist_vars); | |
27257 | list_add(&global_trace.list, &ftrace_trace_arrays); | |
27258 | ||
27259 | apply_trace_boot_options(); | |
27260 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_events.c linux-4.14/kernel/trace/trace_events.c | |
27261 | --- linux-4.14.orig/kernel/trace/trace_events.c 2018-09-05 11:03:22.000000000 +0200 | |
27262 | +++ linux-4.14/kernel/trace/trace_events.c 2018-09-05 11:05:07.000000000 +0200 | |
27263 | @@ -187,6 +187,8 @@ | |
27264 | __common_field(unsigned char, flags); | |
27265 | __common_field(unsigned char, preempt_count); | |
27266 | __common_field(int, pid); | |
27267 | + __common_field(unsigned short, migrate_disable); | |
27268 | + __common_field(unsigned short, padding); | |
27269 | ||
27270 | return ret; | |
1a6e0f06 | 27271 | } |
e4b2b4a8 JK |
27272 | @@ -1406,8 +1408,8 @@ |
27273 | return -ENODEV; | |
1a6e0f06 | 27274 | |
e4b2b4a8 JK |
27275 | /* Make sure the system still exists */ |
27276 | - mutex_lock(&trace_types_lock); | |
27277 | mutex_lock(&event_mutex); | |
27278 | + mutex_lock(&trace_types_lock); | |
27279 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { | |
27280 | list_for_each_entry(dir, &tr->systems, list) { | |
27281 | if (dir == inode->i_private) { | |
27282 | @@ -1421,8 +1423,8 @@ | |
27283 | } | |
27284 | } | |
27285 | exit_loop: | |
27286 | - mutex_unlock(&event_mutex); | |
27287 | mutex_unlock(&trace_types_lock); | |
27288 | + mutex_unlock(&event_mutex); | |
27289 | ||
27290 | if (!system) | |
27291 | return -ENODEV; | |
27292 | @@ -2308,15 +2310,15 @@ | |
27293 | int trace_add_event_call(struct trace_event_call *call) | |
1a6e0f06 | 27294 | { |
e4b2b4a8 JK |
27295 | int ret; |
27296 | - mutex_lock(&trace_types_lock); | |
27297 | mutex_lock(&event_mutex); | |
27298 | + mutex_lock(&trace_types_lock); | |
27299 | ||
27300 | ret = __register_event(call, NULL); | |
27301 | if (ret >= 0) | |
27302 | __add_event_to_tracers(call); | |
27303 | ||
27304 | - mutex_unlock(&event_mutex); | |
27305 | mutex_unlock(&trace_types_lock); | |
27306 | + mutex_unlock(&event_mutex); | |
27307 | return ret; | |
1a6e0f06 JK |
27308 | } |
27309 | ||
e4b2b4a8 | 27310 | @@ -2370,13 +2372,13 @@ |
1a6e0f06 | 27311 | { |
e4b2b4a8 | 27312 | int ret; |
1a6e0f06 | 27313 | |
e4b2b4a8 JK |
27314 | - mutex_lock(&trace_types_lock); |
27315 | mutex_lock(&event_mutex); | |
27316 | + mutex_lock(&trace_types_lock); | |
27317 | down_write(&trace_event_sem); | |
27318 | ret = probe_remove_event_call(call); | |
27319 | up_write(&trace_event_sem); | |
27320 | - mutex_unlock(&event_mutex); | |
27321 | mutex_unlock(&trace_types_lock); | |
27322 | + mutex_unlock(&event_mutex); | |
1a6e0f06 | 27323 | |
e4b2b4a8 | 27324 | return ret; |
1a6e0f06 | 27325 | } |
e4b2b4a8 JK |
27326 | @@ -2438,8 +2440,8 @@ |
27327 | { | |
27328 | struct module *mod = data; | |
1a6e0f06 | 27329 | |
e4b2b4a8 JK |
27330 | - mutex_lock(&trace_types_lock); |
27331 | mutex_lock(&event_mutex); | |
27332 | + mutex_lock(&trace_types_lock); | |
27333 | switch (val) { | |
27334 | case MODULE_STATE_COMING: | |
27335 | trace_module_add_events(mod); | |
27336 | @@ -2448,8 +2450,8 @@ | |
27337 | trace_module_remove_events(mod); | |
27338 | break; | |
27339 | } | |
27340 | - mutex_unlock(&event_mutex); | |
27341 | mutex_unlock(&trace_types_lock); | |
27342 | + mutex_unlock(&event_mutex); | |
1a6e0f06 | 27343 | |
1a6e0f06 JK |
27344 | return 0; |
27345 | } | |
e4b2b4a8 JK |
27346 | @@ -2964,24 +2966,24 @@ |
27347 | * creates the event hierachry in the @parent/events directory. | |
27348 | * | |
27349 | * Returns 0 on success. | |
27350 | + * | |
27351 | + * Must be called with event_mutex held. | |
27352 | */ | |
27353 | int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr) | |
27354 | { | |
27355 | int ret; | |
1a6e0f06 | 27356 | |
e4b2b4a8 JK |
27357 | - mutex_lock(&event_mutex); |
27358 | + lockdep_assert_held(&event_mutex); | |
c7c16703 | 27359 | |
e4b2b4a8 JK |
27360 | ret = create_event_toplevel_files(parent, tr); |
27361 | if (ret) | |
27362 | - goto out_unlock; | |
27363 | + goto out; | |
c7c16703 | 27364 | |
e4b2b4a8 JK |
27365 | down_write(&trace_event_sem); |
27366 | __trace_add_event_dirs(tr); | |
27367 | up_write(&trace_event_sem); | |
c7c16703 | 27368 | |
e4b2b4a8 JK |
27369 | - out_unlock: |
27370 | - mutex_unlock(&event_mutex); | |
27371 | - | |
27372 | + out: | |
27373 | return ret; | |
1a6e0f06 | 27374 | } |
1a6e0f06 | 27375 | |
e4b2b4a8 JK |
27376 | @@ -3010,9 +3012,10 @@ |
27377 | return ret; | |
1a6e0f06 | 27378 | } |
1a6e0f06 | 27379 | |
e4b2b4a8 JK |
27380 | +/* Must be called with event_mutex held */ |
27381 | int event_trace_del_tracer(struct trace_array *tr) | |
27382 | { | |
27383 | - mutex_lock(&event_mutex); | |
27384 | + lockdep_assert_held(&event_mutex); | |
1a6e0f06 | 27385 | |
e4b2b4a8 JK |
27386 | /* Disable any event triggers and associated soft-disabled events */ |
27387 | clear_event_triggers(tr); | |
27388 | @@ -3033,8 +3036,6 @@ | |
1a6e0f06 | 27389 | |
e4b2b4a8 | 27390 | tr->event_dir = NULL; |
1a6e0f06 | 27391 | |
e4b2b4a8 JK |
27392 | - mutex_unlock(&event_mutex); |
27393 | - | |
27394 | return 0; | |
1a6e0f06 | 27395 | } |
1a6e0f06 | 27396 | |
e4b2b4a8 JK |
27397 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_events_hist.c linux-4.14/kernel/trace/trace_events_hist.c |
27398 | --- linux-4.14.orig/kernel/trace/trace_events_hist.c 2018-09-05 11:03:22.000000000 +0200 | |
27399 | +++ linux-4.14/kernel/trace/trace_events_hist.c 2018-09-05 11:05:07.000000000 +0200 | |
27400 | @@ -20,13 +20,39 @@ | |
27401 | #include <linux/slab.h> | |
27402 | #include <linux/stacktrace.h> | |
27403 | #include <linux/rculist.h> | |
27404 | +#include <linux/tracefs.h> | |
1a6e0f06 | 27405 | |
e4b2b4a8 JK |
27406 | #include "tracing_map.h" |
27407 | #include "trace.h" | |
1a6e0f06 | 27408 | |
e4b2b4a8 JK |
27409 | +#define SYNTH_SYSTEM "synthetic" |
27410 | +#define SYNTH_FIELDS_MAX 16 | |
27411 | + | |
27412 | +#define STR_VAR_LEN_MAX 32 /* must be multiple of sizeof(u64) */ | |
27413 | + | |
27414 | struct hist_field; | |
1a6e0f06 | 27415 | |
e4b2b4a8 JK |
27416 | -typedef u64 (*hist_field_fn_t) (struct hist_field *field, void *event); |
27417 | +typedef u64 (*hist_field_fn_t) (struct hist_field *field, | |
27418 | + struct tracing_map_elt *elt, | |
27419 | + struct ring_buffer_event *rbe, | |
27420 | + void *event); | |
27421 | + | |
27422 | +#define HIST_FIELD_OPERANDS_MAX 2 | |
27423 | +#define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) | |
27424 | +#define HIST_ACTIONS_MAX 8 | |
27425 | + | |
27426 | +enum field_op_id { | |
27427 | + FIELD_OP_NONE, | |
27428 | + FIELD_OP_PLUS, | |
27429 | + FIELD_OP_MINUS, | |
27430 | + FIELD_OP_UNARY_MINUS, | |
27431 | +}; | |
27432 | + | |
27433 | +struct hist_var { | |
27434 | + char *name; | |
27435 | + struct hist_trigger_data *hist_data; | |
27436 | + unsigned int idx; | |
27437 | +}; | |
27438 | ||
27439 | struct hist_field { | |
27440 | struct ftrace_event_field *field; | |
27441 | @@ -34,26 +60,50 @@ | |
27442 | hist_field_fn_t fn; | |
27443 | unsigned int size; | |
27444 | unsigned int offset; | |
27445 | + unsigned int is_signed; | |
27446 | + const char *type; | |
27447 | + struct hist_field *operands[HIST_FIELD_OPERANDS_MAX]; | |
27448 | + struct hist_trigger_data *hist_data; | |
27449 | + struct hist_var var; | |
27450 | + enum field_op_id operator; | |
27451 | + char *system; | |
27452 | + char *event_name; | |
27453 | + char *name; | |
27454 | + unsigned int var_idx; | |
27455 | + unsigned int var_ref_idx; | |
27456 | + bool read_once; | |
27457 | }; | |
27458 | ||
27459 | -static u64 hist_field_none(struct hist_field *field, void *event) | |
27460 | +static u64 hist_field_none(struct hist_field *field, | |
27461 | + struct tracing_map_elt *elt, | |
27462 | + struct ring_buffer_event *rbe, | |
27463 | + void *event) | |
1a6e0f06 | 27464 | { |
e4b2b4a8 JK |
27465 | return 0; |
27466 | } | |
1a6e0f06 | 27467 | |
e4b2b4a8 JK |
27468 | -static u64 hist_field_counter(struct hist_field *field, void *event) |
27469 | +static u64 hist_field_counter(struct hist_field *field, | |
27470 | + struct tracing_map_elt *elt, | |
27471 | + struct ring_buffer_event *rbe, | |
27472 | + void *event) | |
27473 | { | |
27474 | return 1; | |
1a6e0f06 JK |
27475 | } |
27476 | ||
e4b2b4a8 JK |
27477 | -static u64 hist_field_string(struct hist_field *hist_field, void *event) |
27478 | +static u64 hist_field_string(struct hist_field *hist_field, | |
27479 | + struct tracing_map_elt *elt, | |
27480 | + struct ring_buffer_event *rbe, | |
27481 | + void *event) | |
27482 | { | |
27483 | char *addr = (char *)(event + hist_field->field->offset); | |
1a6e0f06 | 27484 | |
e4b2b4a8 | 27485 | return (u64)(unsigned long)addr; |
1a6e0f06 | 27486 | } |
e4b2b4a8 JK |
27487 | |
27488 | -static u64 hist_field_dynstring(struct hist_field *hist_field, void *event) | |
27489 | +static u64 hist_field_dynstring(struct hist_field *hist_field, | |
27490 | + struct tracing_map_elt *elt, | |
27491 | + struct ring_buffer_event *rbe, | |
27492 | + void *event) | |
27493 | { | |
27494 | u32 str_item = *(u32 *)(event + hist_field->field->offset); | |
27495 | int str_loc = str_item & 0xffff; | |
27496 | @@ -62,22 +112,74 @@ | |
27497 | return (u64)(unsigned long)addr; | |
1a6e0f06 | 27498 | } |
1a6e0f06 | 27499 | |
e4b2b4a8 JK |
27500 | -static u64 hist_field_pstring(struct hist_field *hist_field, void *event) |
27501 | +static u64 hist_field_pstring(struct hist_field *hist_field, | |
27502 | + struct tracing_map_elt *elt, | |
27503 | + struct ring_buffer_event *rbe, | |
27504 | + void *event) | |
27505 | { | |
27506 | char **addr = (char **)(event + hist_field->field->offset); | |
1a6e0f06 | 27507 | |
e4b2b4a8 | 27508 | return (u64)(unsigned long)*addr; |
1a6e0f06 JK |
27509 | } |
27510 | ||
e4b2b4a8 JK |
27511 | -static u64 hist_field_log2(struct hist_field *hist_field, void *event) |
27512 | +static u64 hist_field_log2(struct hist_field *hist_field, | |
27513 | + struct tracing_map_elt *elt, | |
27514 | + struct ring_buffer_event *rbe, | |
27515 | + void *event) | |
1a6e0f06 | 27516 | { |
e4b2b4a8 JK |
27517 | - u64 val = *(u64 *)(event + hist_field->field->offset); |
27518 | + struct hist_field *operand = hist_field->operands[0]; | |
27519 | + | |
27520 | + u64 val = operand->fn(operand, elt, rbe, event); | |
1a6e0f06 | 27521 | |
e4b2b4a8 | 27522 | return (u64) ilog2(roundup_pow_of_two(val)); |
1a6e0f06 JK |
27523 | } |
27524 | ||
e4b2b4a8 JK |
27525 | +static u64 hist_field_plus(struct hist_field *hist_field, |
27526 | + struct tracing_map_elt *elt, | |
27527 | + struct ring_buffer_event *rbe, | |
27528 | + void *event) | |
1a6e0f06 | 27529 | +{ |
e4b2b4a8 JK |
27530 | + struct hist_field *operand1 = hist_field->operands[0]; |
27531 | + struct hist_field *operand2 = hist_field->operands[1]; | |
27532 | + | |
27533 | + u64 val1 = operand1->fn(operand1, elt, rbe, event); | |
27534 | + u64 val2 = operand2->fn(operand2, elt, rbe, event); | |
27535 | + | |
27536 | + return val1 + val2; | |
27537 | +} | |
27538 | + | |
27539 | +static u64 hist_field_minus(struct hist_field *hist_field, | |
27540 | + struct tracing_map_elt *elt, | |
27541 | + struct ring_buffer_event *rbe, | |
27542 | + void *event) | |
27543 | +{ | |
27544 | + struct hist_field *operand1 = hist_field->operands[0]; | |
27545 | + struct hist_field *operand2 = hist_field->operands[1]; | |
27546 | + | |
27547 | + u64 val1 = operand1->fn(operand1, elt, rbe, event); | |
27548 | + u64 val2 = operand2->fn(operand2, elt, rbe, event); | |
27549 | + | |
27550 | + return val1 - val2; | |
27551 | +} | |
27552 | + | |
27553 | +static u64 hist_field_unary_minus(struct hist_field *hist_field, | |
27554 | + struct tracing_map_elt *elt, | |
27555 | + struct ring_buffer_event *rbe, | |
27556 | + void *event) | |
27557 | +{ | |
27558 | + struct hist_field *operand = hist_field->operands[0]; | |
27559 | + | |
27560 | + s64 sval = (s64)operand->fn(operand, elt, rbe, event); | |
27561 | + u64 val = (u64)-sval; | |
27562 | + | |
27563 | + return val; | |
27564 | +} | |
27565 | + | |
27566 | #define DEFINE_HIST_FIELD_FN(type) \ | |
27567 | -static u64 hist_field_##type(struct hist_field *hist_field, void *event)\ | |
27568 | + static u64 hist_field_##type(struct hist_field *hist_field, \ | |
27569 | + struct tracing_map_elt *elt, \ | |
27570 | + struct ring_buffer_event *rbe, \ | |
27571 | + void *event) \ | |
27572 | { \ | |
27573 | type *addr = (type *)(event + hist_field->field->offset); \ | |
27574 | \ | |
27575 | @@ -110,16 +212,29 @@ | |
27576 | #define HIST_KEY_SIZE_MAX (MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE) | |
27577 | ||
27578 | enum hist_field_flags { | |
27579 | - HIST_FIELD_FL_HITCOUNT = 1, | |
27580 | - HIST_FIELD_FL_KEY = 2, | |
27581 | - HIST_FIELD_FL_STRING = 4, | |
27582 | - HIST_FIELD_FL_HEX = 8, | |
27583 | - HIST_FIELD_FL_SYM = 16, | |
27584 | - HIST_FIELD_FL_SYM_OFFSET = 32, | |
27585 | - HIST_FIELD_FL_EXECNAME = 64, | |
27586 | - HIST_FIELD_FL_SYSCALL = 128, | |
27587 | - HIST_FIELD_FL_STACKTRACE = 256, | |
27588 | - HIST_FIELD_FL_LOG2 = 512, | |
27589 | + HIST_FIELD_FL_HITCOUNT = 1 << 0, | |
27590 | + HIST_FIELD_FL_KEY = 1 << 1, | |
27591 | + HIST_FIELD_FL_STRING = 1 << 2, | |
27592 | + HIST_FIELD_FL_HEX = 1 << 3, | |
27593 | + HIST_FIELD_FL_SYM = 1 << 4, | |
27594 | + HIST_FIELD_FL_SYM_OFFSET = 1 << 5, | |
27595 | + HIST_FIELD_FL_EXECNAME = 1 << 6, | |
27596 | + HIST_FIELD_FL_SYSCALL = 1 << 7, | |
27597 | + HIST_FIELD_FL_STACKTRACE = 1 << 8, | |
27598 | + HIST_FIELD_FL_LOG2 = 1 << 9, | |
27599 | + HIST_FIELD_FL_TIMESTAMP = 1 << 10, | |
27600 | + HIST_FIELD_FL_TIMESTAMP_USECS = 1 << 11, | |
27601 | + HIST_FIELD_FL_VAR = 1 << 12, | |
27602 | + HIST_FIELD_FL_EXPR = 1 << 13, | |
27603 | + HIST_FIELD_FL_VAR_REF = 1 << 14, | |
27604 | + HIST_FIELD_FL_CPU = 1 << 15, | |
27605 | + HIST_FIELD_FL_ALIAS = 1 << 16, | |
27606 | +}; | |
27607 | + | |
27608 | +struct var_defs { | |
27609 | + unsigned int n_vars; | |
27610 | + char *name[TRACING_MAP_VARS_MAX]; | |
27611 | + char *expr[TRACING_MAP_VARS_MAX]; | |
27612 | }; | |
27613 | ||
27614 | struct hist_trigger_attrs { | |
27615 | @@ -127,25 +242,1474 @@ | |
27616 | char *vals_str; | |
27617 | char *sort_key_str; | |
27618 | char *name; | |
27619 | + char *clock; | |
27620 | bool pause; | |
27621 | bool cont; | |
27622 | bool clear; | |
27623 | + bool ts_in_usecs; | |
27624 | unsigned int map_bits; | |
27625 | + | |
27626 | + char *assignment_str[TRACING_MAP_VARS_MAX]; | |
27627 | + unsigned int n_assignments; | |
27628 | + | |
27629 | + char *action_str[HIST_ACTIONS_MAX]; | |
27630 | + unsigned int n_actions; | |
27631 | + | |
27632 | + struct var_defs var_defs; | |
27633 | +}; | |
27634 | + | |
27635 | +struct field_var { | |
27636 | + struct hist_field *var; | |
27637 | + struct hist_field *val; | |
27638 | +}; | |
1a6e0f06 | 27639 | + |
e4b2b4a8 JK |
27640 | +struct field_var_hist { |
27641 | + struct hist_trigger_data *hist_data; | |
27642 | + char *cmd; | |
27643 | }; | |
27644 | ||
27645 | struct hist_trigger_data { | |
27646 | - struct hist_field *fields[TRACING_MAP_FIELDS_MAX]; | |
27647 | + struct hist_field *fields[HIST_FIELDS_MAX]; | |
27648 | unsigned int n_vals; | |
27649 | unsigned int n_keys; | |
27650 | unsigned int n_fields; | |
27651 | + unsigned int n_vars; | |
27652 | unsigned int key_size; | |
27653 | struct tracing_map_sort_key sort_keys[TRACING_MAP_SORT_KEYS_MAX]; | |
27654 | unsigned int n_sort_keys; | |
27655 | struct trace_event_file *event_file; | |
27656 | struct hist_trigger_attrs *attrs; | |
27657 | struct tracing_map *map; | |
27658 | + bool enable_timestamps; | |
27659 | + bool remove; | |
27660 | + struct hist_field *var_refs[TRACING_MAP_VARS_MAX]; | |
27661 | + unsigned int n_var_refs; | |
27662 | + | |
27663 | + struct action_data *actions[HIST_ACTIONS_MAX]; | |
27664 | + unsigned int n_actions; | |
27665 | + | |
27666 | + struct hist_field *synth_var_refs[SYNTH_FIELDS_MAX]; | |
27667 | + unsigned int n_synth_var_refs; | |
27668 | + struct field_var *field_vars[SYNTH_FIELDS_MAX]; | |
27669 | + unsigned int n_field_vars; | |
27670 | + unsigned int n_field_var_str; | |
27671 | + struct field_var_hist *field_var_hists[SYNTH_FIELDS_MAX]; | |
27672 | + unsigned int n_field_var_hists; | |
27673 | + | |
27674 | + struct field_var *max_vars[SYNTH_FIELDS_MAX]; | |
27675 | + unsigned int n_max_vars; | |
27676 | + unsigned int n_max_var_str; | |
27677 | +}; | |
1a6e0f06 | 27678 | + |
e4b2b4a8 JK |
27679 | +struct synth_field { |
27680 | + char *type; | |
27681 | + char *name; | |
27682 | + size_t size; | |
27683 | + bool is_signed; | |
27684 | + bool is_string; | |
27685 | +}; | |
1a6e0f06 | 27686 | + |
e4b2b4a8 JK |
27687 | +struct synth_event { |
27688 | + struct list_head list; | |
27689 | + int ref; | |
27690 | + char *name; | |
27691 | + struct synth_field **fields; | |
27692 | + unsigned int n_fields; | |
27693 | + unsigned int n_u64; | |
27694 | + struct trace_event_class class; | |
27695 | + struct trace_event_call call; | |
27696 | + struct tracepoint *tp; | |
27697 | +}; | |
1a6e0f06 | 27698 | + |
e4b2b4a8 | 27699 | +struct action_data; |
1a6e0f06 | 27700 | + |
e4b2b4a8 JK |
27701 | +typedef void (*action_fn_t) (struct hist_trigger_data *hist_data, |
27702 | + struct tracing_map_elt *elt, void *rec, | |
27703 | + struct ring_buffer_event *rbe, | |
27704 | + struct action_data *data, u64 *var_ref_vals); | |
1a6e0f06 | 27705 | + |
e4b2b4a8 JK |
27706 | +struct action_data { |
27707 | + action_fn_t fn; | |
27708 | + unsigned int n_params; | |
27709 | + char *params[SYNTH_FIELDS_MAX]; | |
27710 | + | |
27711 | + union { | |
27712 | + struct { | |
27713 | + unsigned int var_ref_idx; | |
27714 | + char *match_event; | |
27715 | + char *match_event_system; | |
27716 | + char *synth_event_name; | |
27717 | + struct synth_event *synth_event; | |
27718 | + } onmatch; | |
27719 | + | |
27720 | + struct { | |
27721 | + char *var_str; | |
27722 | + char *fn_name; | |
27723 | + unsigned int max_var_ref_idx; | |
27724 | + struct hist_field *max_var; | |
27725 | + struct hist_field *var; | |
27726 | + } onmax; | |
27727 | + }; | |
27728 | +}; | |
27729 | + | |
27730 | + | |
27731 | +static char last_hist_cmd[MAX_FILTER_STR_VAL]; | |
27732 | +static char hist_err_str[MAX_FILTER_STR_VAL]; | |
27733 | + | |
27734 | +static void last_cmd_set(char *str) | |
27735 | +{ | |
27736 | + if (!str) | |
1a6e0f06 JK |
27737 | + return; |
27738 | + | |
e4b2b4a8 | 27739 | + strncpy(last_hist_cmd, str, MAX_FILTER_STR_VAL - 1); |
1a6e0f06 | 27740 | +} |
1a6e0f06 | 27741 | + |
e4b2b4a8 JK |
27742 | +static void hist_err(char *str, char *var) |
27743 | +{ | |
27744 | + int maxlen = MAX_FILTER_STR_VAL - 1; | |
1a6e0f06 | 27745 | + |
e4b2b4a8 | 27746 | + if (!str) |
1a6e0f06 | 27747 | + return; |
1a6e0f06 | 27748 | + |
e4b2b4a8 JK |
27749 | + if (strlen(hist_err_str)) |
27750 | + return; | |
27751 | + | |
27752 | + if (!var) | |
27753 | + var = ""; | |
27754 | + | |
27755 | + if (strlen(hist_err_str) + strlen(str) + strlen(var) > maxlen) | |
27756 | + return; | |
1a6e0f06 | 27757 | + |
e4b2b4a8 JK |
27758 | + strcat(hist_err_str, str); |
27759 | + strcat(hist_err_str, var); | |
27760 | +} | |
27761 | + | |
27762 | +static void hist_err_event(char *str, char *system, char *event, char *var) | |
1a6e0f06 | 27763 | +{ |
e4b2b4a8 JK |
27764 | + char err[MAX_FILTER_STR_VAL]; |
27765 | + | |
27766 | + if (system && var) | |
27767 | + snprintf(err, MAX_FILTER_STR_VAL, "%s.%s.%s", system, event, var); | |
27768 | + else if (system) | |
27769 | + snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); | |
27770 | + else | |
27771 | + strncpy(err, var, MAX_FILTER_STR_VAL); | |
27772 | + | |
27773 | + hist_err(str, err); | |
1a6e0f06 JK |
27774 | +} |
27775 | + | |
e4b2b4a8 | 27776 | +static void hist_err_clear(void) |
1a6e0f06 | 27777 | +{ |
e4b2b4a8 | 27778 | + hist_err_str[0] = '\0'; |
1a6e0f06 JK |
27779 | +} |
27780 | + | |
e4b2b4a8 | 27781 | +static bool have_hist_err(void) |
1a6e0f06 | 27782 | +{ |
e4b2b4a8 JK |
27783 | + if (strlen(hist_err_str)) |
27784 | + return true; | |
1a6e0f06 | 27785 | + |
e4b2b4a8 JK |
27786 | + return false; |
27787 | +} | |
1a6e0f06 | 27788 | + |
e4b2b4a8 JK |
27789 | +static LIST_HEAD(synth_event_list); |
27790 | +static DEFINE_MUTEX(synth_event_mutex); | |
1a6e0f06 | 27791 | + |
e4b2b4a8 JK |
27792 | +struct synth_trace_event { |
27793 | + struct trace_entry ent; | |
27794 | + u64 fields[]; | |
27795 | +}; | |
1a6e0f06 | 27796 | + |
e4b2b4a8 JK |
27797 | +static int synth_event_define_fields(struct trace_event_call *call) |
27798 | +{ | |
27799 | + struct synth_trace_event trace; | |
27800 | + int offset = offsetof(typeof(trace), fields); | |
27801 | + struct synth_event *event = call->data; | |
27802 | + unsigned int i, size, n_u64; | |
27803 | + char *name, *type; | |
27804 | + bool is_signed; | |
27805 | + int ret = 0; | |
27806 | + | |
27807 | + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { | |
27808 | + size = event->fields[i]->size; | |
27809 | + is_signed = event->fields[i]->is_signed; | |
27810 | + type = event->fields[i]->type; | |
27811 | + name = event->fields[i]->name; | |
27812 | + ret = trace_define_field(call, type, name, offset, size, | |
27813 | + is_signed, FILTER_OTHER); | |
27814 | + if (ret) | |
27815 | + break; | |
1a6e0f06 | 27816 | + |
e4b2b4a8 JK |
27817 | + if (event->fields[i]->is_string) { |
27818 | + offset += STR_VAR_LEN_MAX; | |
27819 | + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); | |
27820 | + } else { | |
27821 | + offset += sizeof(u64); | |
27822 | + n_u64++; | |
27823 | + } | |
1a6e0f06 JK |
27824 | + } |
27825 | + | |
e4b2b4a8 JK |
27826 | + event->n_u64 = n_u64; |
27827 | + | |
27828 | + return ret; | |
27829 | +} | |
1a6e0f06 | 27830 | + |
e4b2b4a8 JK |
27831 | +static bool synth_field_signed(char *type) |
27832 | +{ | |
27833 | + if (strncmp(type, "u", 1) == 0) | |
27834 | + return false; | |
1a6e0f06 | 27835 | + |
e4b2b4a8 JK |
27836 | + return true; |
27837 | +} | |
1a6e0f06 | 27838 | + |
e4b2b4a8 JK |
27839 | +static int synth_field_is_string(char *type) |
27840 | +{ | |
27841 | + if (strstr(type, "char[") != NULL) | |
27842 | + return true; | |
1a6e0f06 | 27843 | + |
e4b2b4a8 | 27844 | + return false; |
1a6e0f06 JK |
27845 | +} |
27846 | + | |
e4b2b4a8 | 27847 | +static int synth_field_string_size(char *type) |
1a6e0f06 | 27848 | +{ |
e4b2b4a8 JK |
27849 | + char buf[4], *end, *start; |
27850 | + unsigned int len; | |
27851 | + int size, err; | |
1a6e0f06 | 27852 | + |
e4b2b4a8 JK |
27853 | + start = strstr(type, "char["); |
27854 | + if (start == NULL) | |
27855 | + return -EINVAL; | |
27856 | + start += strlen("char["); | |
1a6e0f06 | 27857 | + |
e4b2b4a8 JK |
27858 | + end = strchr(type, ']'); |
27859 | + if (!end || end < start) | |
27860 | + return -EINVAL; | |
27861 | + | |
27862 | + len = end - start; | |
27863 | + if (len > 3) | |
27864 | + return -EINVAL; | |
27865 | + | |
27866 | + strncpy(buf, start, len); | |
27867 | + buf[len] = '\0'; | |
27868 | + | |
27869 | + err = kstrtouint(buf, 0, &size); | |
27870 | + if (err) | |
27871 | + return err; | |
27872 | + | |
27873 | + if (size > STR_VAR_LEN_MAX) | |
27874 | + return -EINVAL; | |
27875 | + | |
27876 | + return size; | |
1a6e0f06 JK |
27877 | +} |
27878 | + | |
e4b2b4a8 JK |
27879 | +static int synth_field_size(char *type) |
27880 | +{ | |
27881 | + int size = 0; | |
27882 | + | |
27883 | + if (strcmp(type, "s64") == 0) | |
27884 | + size = sizeof(s64); | |
27885 | + else if (strcmp(type, "u64") == 0) | |
27886 | + size = sizeof(u64); | |
27887 | + else if (strcmp(type, "s32") == 0) | |
27888 | + size = sizeof(s32); | |
27889 | + else if (strcmp(type, "u32") == 0) | |
27890 | + size = sizeof(u32); | |
27891 | + else if (strcmp(type, "s16") == 0) | |
27892 | + size = sizeof(s16); | |
27893 | + else if (strcmp(type, "u16") == 0) | |
27894 | + size = sizeof(u16); | |
27895 | + else if (strcmp(type, "s8") == 0) | |
27896 | + size = sizeof(s8); | |
27897 | + else if (strcmp(type, "u8") == 0) | |
27898 | + size = sizeof(u8); | |
27899 | + else if (strcmp(type, "char") == 0) | |
27900 | + size = sizeof(char); | |
27901 | + else if (strcmp(type, "unsigned char") == 0) | |
27902 | + size = sizeof(unsigned char); | |
27903 | + else if (strcmp(type, "int") == 0) | |
27904 | + size = sizeof(int); | |
27905 | + else if (strcmp(type, "unsigned int") == 0) | |
27906 | + size = sizeof(unsigned int); | |
27907 | + else if (strcmp(type, "long") == 0) | |
27908 | + size = sizeof(long); | |
27909 | + else if (strcmp(type, "unsigned long") == 0) | |
27910 | + size = sizeof(unsigned long); | |
27911 | + else if (strcmp(type, "pid_t") == 0) | |
27912 | + size = sizeof(pid_t); | |
27913 | + else if (synth_field_is_string(type)) | |
27914 | + size = synth_field_string_size(type); | |
1a6e0f06 | 27915 | + |
e4b2b4a8 JK |
27916 | + return size; |
27917 | +} | |
27918 | + | |
27919 | +static const char *synth_field_fmt(char *type) | |
27920 | +{ | |
27921 | + const char *fmt = "%llu"; | |
27922 | + | |
27923 | + if (strcmp(type, "s64") == 0) | |
27924 | + fmt = "%lld"; | |
27925 | + else if (strcmp(type, "u64") == 0) | |
27926 | + fmt = "%llu"; | |
27927 | + else if (strcmp(type, "s32") == 0) | |
27928 | + fmt = "%d"; | |
27929 | + else if (strcmp(type, "u32") == 0) | |
27930 | + fmt = "%u"; | |
27931 | + else if (strcmp(type, "s16") == 0) | |
27932 | + fmt = "%d"; | |
27933 | + else if (strcmp(type, "u16") == 0) | |
27934 | + fmt = "%u"; | |
27935 | + else if (strcmp(type, "s8") == 0) | |
27936 | + fmt = "%d"; | |
27937 | + else if (strcmp(type, "u8") == 0) | |
27938 | + fmt = "%u"; | |
27939 | + else if (strcmp(type, "char") == 0) | |
27940 | + fmt = "%d"; | |
27941 | + else if (strcmp(type, "unsigned char") == 0) | |
27942 | + fmt = "%u"; | |
27943 | + else if (strcmp(type, "int") == 0) | |
27944 | + fmt = "%d"; | |
27945 | + else if (strcmp(type, "unsigned int") == 0) | |
27946 | + fmt = "%u"; | |
27947 | + else if (strcmp(type, "long") == 0) | |
27948 | + fmt = "%ld"; | |
27949 | + else if (strcmp(type, "unsigned long") == 0) | |
27950 | + fmt = "%lu"; | |
27951 | + else if (strcmp(type, "pid_t") == 0) | |
27952 | + fmt = "%d"; | |
27953 | + else if (synth_field_is_string(type)) | |
27954 | + fmt = "%s"; | |
27955 | + | |
27956 | + return fmt; | |
27957 | +} | |
27958 | + | |
27959 | +static enum print_line_t print_synth_event(struct trace_iterator *iter, | |
27960 | + int flags, | |
27961 | + struct trace_event *event) | |
27962 | +{ | |
27963 | + struct trace_array *tr = iter->tr; | |
27964 | + struct trace_seq *s = &iter->seq; | |
27965 | + struct synth_trace_event *entry; | |
27966 | + struct synth_event *se; | |
27967 | + unsigned int i, n_u64; | |
27968 | + char print_fmt[32]; | |
27969 | + const char *fmt; | |
27970 | + | |
27971 | + entry = (struct synth_trace_event *)iter->ent; | |
27972 | + se = container_of(event, struct synth_event, call.event); | |
27973 | + | |
27974 | + trace_seq_printf(s, "%s: ", se->name); | |
27975 | + | |
27976 | + for (i = 0, n_u64 = 0; i < se->n_fields; i++) { | |
27977 | + if (trace_seq_has_overflowed(s)) | |
27978 | + goto end; | |
27979 | + | |
27980 | + fmt = synth_field_fmt(se->fields[i]->type); | |
27981 | + | |
27982 | + /* parameter types */ | |
27983 | + if (tr->trace_flags & TRACE_ITER_VERBOSE) | |
27984 | + trace_seq_printf(s, "%s ", fmt); | |
27985 | + | |
27986 | + snprintf(print_fmt, sizeof(print_fmt), "%%s=%s%%s", fmt); | |
27987 | + | |
27988 | + /* parameter values */ | |
27989 | + if (se->fields[i]->is_string) { | |
27990 | + trace_seq_printf(s, print_fmt, se->fields[i]->name, | |
27991 | + (char *)&entry->fields[n_u64], | |
27992 | + i == se->n_fields - 1 ? "" : " "); | |
27993 | + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); | |
27994 | + } else { | |
27995 | + trace_seq_printf(s, print_fmt, se->fields[i]->name, | |
27996 | + entry->fields[n_u64], | |
27997 | + i == se->n_fields - 1 ? "" : " "); | |
27998 | + n_u64++; | |
27999 | + } | |
28000 | + } | |
28001 | +end: | |
28002 | + trace_seq_putc(s, '\n'); | |
28003 | + | |
28004 | + return trace_handle_return(s); | |
1a6e0f06 JK |
28005 | +} |
28006 | + | |
e4b2b4a8 JK |
28007 | +static struct trace_event_functions synth_event_funcs = { |
28008 | + .trace = print_synth_event | |
28009 | +}; | |
1a6e0f06 | 28010 | + |
e4b2b4a8 JK |
28011 | +static notrace void trace_event_raw_event_synth(void *__data, |
28012 | + u64 *var_ref_vals, | |
28013 | + unsigned int var_ref_idx) | |
1a6e0f06 | 28014 | +{ |
e4b2b4a8 JK |
28015 | + struct trace_event_file *trace_file = __data; |
28016 | + struct synth_trace_event *entry; | |
28017 | + struct trace_event_buffer fbuffer; | |
28018 | + struct ring_buffer *buffer; | |
28019 | + struct synth_event *event; | |
28020 | + unsigned int i, n_u64; | |
28021 | + int fields_size = 0; | |
1a6e0f06 | 28022 | + |
e4b2b4a8 JK |
28023 | + event = trace_file->event_call->data; |
28024 | + | |
28025 | + if (trace_trigger_soft_disabled(trace_file)) | |
1a6e0f06 | 28026 | + return; |
1a6e0f06 | 28027 | + |
e4b2b4a8 | 28028 | + fields_size = event->n_u64 * sizeof(u64); |
1a6e0f06 | 28029 | + |
e4b2b4a8 JK |
28030 | + /* |
28031 | + * Avoid ring buffer recursion detection, as this event | |
28032 | + * is being performed within another event. | |
28033 | + */ | |
28034 | + buffer = trace_file->tr->trace_buffer.buffer; | |
28035 | + ring_buffer_nest_start(buffer); | |
28036 | + | |
28037 | + entry = trace_event_buffer_reserve(&fbuffer, trace_file, | |
28038 | + sizeof(*entry) + fields_size); | |
28039 | + if (!entry) | |
28040 | + goto out; | |
28041 | + | |
28042 | + for (i = 0, n_u64 = 0; i < event->n_fields; i++) { | |
28043 | + if (event->fields[i]->is_string) { | |
28044 | + char *str_val = (char *)(long)var_ref_vals[var_ref_idx + i]; | |
28045 | + char *str_field = (char *)&entry->fields[n_u64]; | |
28046 | + | |
28047 | + strscpy(str_field, str_val, STR_VAR_LEN_MAX); | |
28048 | + n_u64 += STR_VAR_LEN_MAX / sizeof(u64); | |
28049 | + } else { | |
28050 | + entry->fields[n_u64] = var_ref_vals[var_ref_idx + i]; | |
28051 | + n_u64++; | |
28052 | + } | |
1a6e0f06 JK |
28053 | + } |
28054 | + | |
e4b2b4a8 JK |
28055 | + trace_event_buffer_commit(&fbuffer); |
28056 | +out: | |
28057 | + ring_buffer_nest_end(buffer); | |
1a6e0f06 | 28058 | +} |
1a6e0f06 | 28059 | + |
e4b2b4a8 | 28060 | +static void free_synth_event_print_fmt(struct trace_event_call *call) |
1a6e0f06 | 28061 | +{ |
e4b2b4a8 JK |
28062 | + if (call) { |
28063 | + kfree(call->print_fmt); | |
28064 | + call->print_fmt = NULL; | |
1a6e0f06 | 28065 | + } |
e4b2b4a8 | 28066 | +} |
1a6e0f06 | 28067 | + |
e4b2b4a8 JK |
28068 | +static int __set_synth_event_print_fmt(struct synth_event *event, |
28069 | + char *buf, int len) | |
28070 | +{ | |
28071 | + const char *fmt; | |
28072 | + int pos = 0; | |
28073 | + int i; | |
28074 | + | |
28075 | + /* When len=0, we just calculate the needed length */ | |
28076 | +#define LEN_OR_ZERO (len ? len - pos : 0) | |
28077 | + | |
28078 | + pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); | |
28079 | + for (i = 0; i < event->n_fields; i++) { | |
28080 | + fmt = synth_field_fmt(event->fields[i]->type); | |
28081 | + pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s", | |
28082 | + event->fields[i]->name, fmt, | |
28083 | + i == event->n_fields - 1 ? "" : ", "); | |
1a6e0f06 | 28084 | + } |
e4b2b4a8 | 28085 | + pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); |
1a6e0f06 | 28086 | + |
e4b2b4a8 JK |
28087 | + for (i = 0; i < event->n_fields; i++) { |
28088 | + pos += snprintf(buf + pos, LEN_OR_ZERO, | |
28089 | + ", REC->%s", event->fields[i]->name); | |
1a6e0f06 JK |
28090 | + } |
28091 | + | |
e4b2b4a8 | 28092 | +#undef LEN_OR_ZERO |
1a6e0f06 | 28093 | + |
e4b2b4a8 JK |
28094 | + /* return the length of print_fmt */ |
28095 | + return pos; | |
1a6e0f06 | 28096 | +} |
1a6e0f06 | 28097 | + |
e4b2b4a8 JK |
28098 | +static int set_synth_event_print_fmt(struct trace_event_call *call) |
28099 | +{ | |
28100 | + struct synth_event *event = call->data; | |
28101 | + char *print_fmt; | |
28102 | + int len; | |
1a6e0f06 | 28103 | + |
e4b2b4a8 JK |
28104 | + /* First: called with 0 length to calculate the needed length */ |
28105 | + len = __set_synth_event_print_fmt(event, NULL, 0); | |
1a6e0f06 | 28106 | + |
e4b2b4a8 JK |
28107 | + print_fmt = kmalloc(len + 1, GFP_KERNEL); |
28108 | + if (!print_fmt) | |
28109 | + return -ENOMEM; | |
1a6e0f06 | 28110 | + |
e4b2b4a8 JK |
28111 | + /* Second: actually write the @print_fmt */ |
28112 | + __set_synth_event_print_fmt(event, print_fmt, len + 1); | |
28113 | + call->print_fmt = print_fmt; | |
28114 | + | |
28115 | + return 0; | |
1a6e0f06 JK |
28116 | +} |
28117 | + | |
e4b2b4a8 | 28118 | +static void free_synth_field(struct synth_field *field) |
1a6e0f06 | 28119 | +{ |
e4b2b4a8 JK |
28120 | + kfree(field->type); |
28121 | + kfree(field->name); | |
28122 | + kfree(field); | |
1a6e0f06 JK |
28123 | +} |
28124 | + | |
e4b2b4a8 JK |
28125 | +static struct synth_field *parse_synth_field(char *field_type, |
28126 | + char *field_name) | |
1a6e0f06 | 28127 | +{ |
e4b2b4a8 JK |
28128 | + struct synth_field *field; |
28129 | + int len, ret = 0; | |
28130 | + char *array; | |
1a6e0f06 | 28131 | + |
e4b2b4a8 JK |
28132 | + if (field_type[0] == ';') |
28133 | + field_type++; | |
1a6e0f06 | 28134 | + |
e4b2b4a8 JK |
28135 | + len = strlen(field_name); |
28136 | + if (field_name[len - 1] == ';') | |
28137 | + field_name[len - 1] = '\0'; | |
1a6e0f06 | 28138 | + |
e4b2b4a8 JK |
28139 | + field = kzalloc(sizeof(*field), GFP_KERNEL); |
28140 | + if (!field) | |
28141 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 28142 | + |
e4b2b4a8 JK |
28143 | + len = strlen(field_type) + 1; |
28144 | + array = strchr(field_name, '['); | |
28145 | + if (array) | |
28146 | + len += strlen(array); | |
28147 | + field->type = kzalloc(len, GFP_KERNEL); | |
28148 | + if (!field->type) { | |
28149 | + ret = -ENOMEM; | |
28150 | + goto free; | |
28151 | + } | |
28152 | + strcat(field->type, field_type); | |
28153 | + if (array) { | |
28154 | + strcat(field->type, array); | |
28155 | + *array = '\0'; | |
28156 | + } | |
1a6e0f06 | 28157 | + |
e4b2b4a8 JK |
28158 | + field->size = synth_field_size(field->type); |
28159 | + if (!field->size) { | |
28160 | + ret = -EINVAL; | |
28161 | + goto free; | |
1a6e0f06 | 28162 | + } |
1a6e0f06 | 28163 | + |
e4b2b4a8 JK |
28164 | + if (synth_field_is_string(field->type)) |
28165 | + field->is_string = true; | |
28166 | + | |
28167 | + field->is_signed = synth_field_signed(field->type); | |
28168 | + | |
28169 | + field->name = kstrdup(field_name, GFP_KERNEL); | |
28170 | + if (!field->name) { | |
28171 | + ret = -ENOMEM; | |
28172 | + goto free; | |
28173 | + } | |
28174 | + out: | |
28175 | + return field; | |
28176 | + free: | |
28177 | + free_synth_field(field); | |
28178 | + field = ERR_PTR(ret); | |
28179 | + goto out; | |
28180 | +} | |
28181 | + | |
28182 | +static void free_synth_tracepoint(struct tracepoint *tp) | |
1a6e0f06 | 28183 | +{ |
e4b2b4a8 JK |
28184 | + if (!tp) |
28185 | + return; | |
28186 | + | |
28187 | + kfree(tp->name); | |
28188 | + kfree(tp); | |
1a6e0f06 | 28189 | +} |
1a6e0f06 | 28190 | + |
e4b2b4a8 | 28191 | +static struct tracepoint *alloc_synth_tracepoint(char *name) |
1a6e0f06 | 28192 | +{ |
e4b2b4a8 | 28193 | + struct tracepoint *tp; |
1a6e0f06 | 28194 | + |
e4b2b4a8 JK |
28195 | + tp = kzalloc(sizeof(*tp), GFP_KERNEL); |
28196 | + if (!tp) | |
28197 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 28198 | + |
e4b2b4a8 JK |
28199 | + tp->name = kstrdup(name, GFP_KERNEL); |
28200 | + if (!tp->name) { | |
28201 | + kfree(tp); | |
28202 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 28203 | + } |
e4b2b4a8 JK |
28204 | + |
28205 | + return tp; | |
1a6e0f06 | 28206 | +} |
1a6e0f06 | 28207 | + |
e4b2b4a8 JK |
28208 | +typedef void (*synth_probe_func_t) (void *__data, u64 *var_ref_vals, |
28209 | + unsigned int var_ref_idx); | |
1a6e0f06 | 28210 | + |
e4b2b4a8 JK |
28211 | +static inline void trace_synth(struct synth_event *event, u64 *var_ref_vals, |
28212 | + unsigned int var_ref_idx) | |
28213 | +{ | |
28214 | + struct tracepoint *tp = event->tp; | |
28215 | + | |
28216 | + if (unlikely(atomic_read(&tp->key.enabled) > 0)) { | |
28217 | + struct tracepoint_func *probe_func_ptr; | |
28218 | + synth_probe_func_t probe_func; | |
28219 | + void *__data; | |
28220 | + | |
28221 | + if (!(cpu_online(raw_smp_processor_id()))) | |
28222 | + return; | |
28223 | + | |
28224 | + probe_func_ptr = rcu_dereference_sched((tp)->funcs); | |
28225 | + if (probe_func_ptr) { | |
28226 | + do { | |
28227 | + probe_func = probe_func_ptr->func; | |
28228 | + __data = probe_func_ptr->data; | |
28229 | + probe_func(__data, var_ref_vals, var_ref_idx); | |
28230 | + } while ((++probe_func_ptr)->func); | |
28231 | + } | |
28232 | + } | |
28233 | +} | |
28234 | + | |
28235 | +static struct synth_event *find_synth_event(const char *name) | |
28236 | +{ | |
28237 | + struct synth_event *event; | |
28238 | + | |
28239 | + list_for_each_entry(event, &synth_event_list, list) { | |
28240 | + if (strcmp(event->name, name) == 0) | |
28241 | + return event; | |
28242 | + } | |
28243 | + | |
28244 | + return NULL; | |
28245 | +} | |
28246 | + | |
28247 | +static int register_synth_event(struct synth_event *event) | |
28248 | +{ | |
28249 | + struct trace_event_call *call = &event->call; | |
28250 | + int ret = 0; | |
28251 | + | |
28252 | + event->call.class = &event->class; | |
28253 | + event->class.system = kstrdup(SYNTH_SYSTEM, GFP_KERNEL); | |
28254 | + if (!event->class.system) { | |
28255 | + ret = -ENOMEM; | |
28256 | + goto out; | |
28257 | + } | |
28258 | + | |
28259 | + event->tp = alloc_synth_tracepoint(event->name); | |
28260 | + if (IS_ERR(event->tp)) { | |
28261 | + ret = PTR_ERR(event->tp); | |
28262 | + event->tp = NULL; | |
28263 | + goto out; | |
28264 | + } | |
28265 | + | |
28266 | + INIT_LIST_HEAD(&call->class->fields); | |
28267 | + call->event.funcs = &synth_event_funcs; | |
28268 | + call->class->define_fields = synth_event_define_fields; | |
28269 | + | |
28270 | + ret = register_trace_event(&call->event); | |
28271 | + if (!ret) { | |
28272 | + ret = -ENODEV; | |
28273 | + goto out; | |
28274 | + } | |
28275 | + call->flags = TRACE_EVENT_FL_TRACEPOINT; | |
28276 | + call->class->reg = trace_event_reg; | |
28277 | + call->class->probe = trace_event_raw_event_synth; | |
28278 | + call->data = event; | |
28279 | + call->tp = event->tp; | |
28280 | + | |
28281 | + ret = trace_add_event_call(call); | |
28282 | + if (ret) { | |
28283 | + pr_warn("Failed to register synthetic event: %s\n", | |
28284 | + trace_event_name(call)); | |
28285 | + goto err; | |
28286 | + } | |
28287 | + | |
28288 | + ret = set_synth_event_print_fmt(call); | |
28289 | + if (ret < 0) { | |
28290 | + trace_remove_event_call(call); | |
28291 | + goto err; | |
28292 | + } | |
28293 | + out: | |
28294 | + return ret; | |
28295 | + err: | |
28296 | + unregister_trace_event(&call->event); | |
28297 | + goto out; | |
28298 | +} | |
28299 | + | |
28300 | +static int unregister_synth_event(struct synth_event *event) | |
28301 | +{ | |
28302 | + struct trace_event_call *call = &event->call; | |
28303 | + int ret; | |
28304 | + | |
28305 | + ret = trace_remove_event_call(call); | |
28306 | + | |
28307 | + return ret; | |
28308 | +} | |
28309 | + | |
28310 | +static void free_synth_event(struct synth_event *event) | |
28311 | +{ | |
28312 | + unsigned int i; | |
28313 | + | |
28314 | + if (!event) | |
28315 | + return; | |
28316 | + | |
28317 | + for (i = 0; i < event->n_fields; i++) | |
28318 | + free_synth_field(event->fields[i]); | |
28319 | + | |
28320 | + kfree(event->fields); | |
28321 | + kfree(event->name); | |
28322 | + kfree(event->class.system); | |
28323 | + free_synth_tracepoint(event->tp); | |
28324 | + free_synth_event_print_fmt(&event->call); | |
28325 | + kfree(event); | |
28326 | +} | |
28327 | + | |
28328 | +static struct synth_event *alloc_synth_event(char *event_name, int n_fields, | |
28329 | + struct synth_field **fields) | |
28330 | +{ | |
28331 | + struct synth_event *event; | |
28332 | + unsigned int i; | |
1a6e0f06 | 28333 | + |
e4b2b4a8 JK |
28334 | + event = kzalloc(sizeof(*event), GFP_KERNEL); |
28335 | + if (!event) { | |
28336 | + event = ERR_PTR(-ENOMEM); | |
28337 | + goto out; | |
28338 | + } | |
1a6e0f06 | 28339 | + |
e4b2b4a8 JK |
28340 | + event->name = kstrdup(event_name, GFP_KERNEL); |
28341 | + if (!event->name) { | |
28342 | + kfree(event); | |
28343 | + event = ERR_PTR(-ENOMEM); | |
28344 | + goto out; | |
28345 | + } | |
1a6e0f06 | 28346 | + |
e4b2b4a8 JK |
28347 | + event->fields = kcalloc(n_fields, sizeof(*event->fields), GFP_KERNEL); |
28348 | + if (!event->fields) { | |
28349 | + free_synth_event(event); | |
28350 | + event = ERR_PTR(-ENOMEM); | |
28351 | + goto out; | |
28352 | + } | |
1a6e0f06 | 28353 | + |
e4b2b4a8 JK |
28354 | + for (i = 0; i < n_fields; i++) |
28355 | + event->fields[i] = fields[i]; | |
1a6e0f06 | 28356 | + |
e4b2b4a8 JK |
28357 | + event->n_fields = n_fields; |
28358 | + out: | |
28359 | + return event; | |
28360 | +} | |
1a6e0f06 | 28361 | + |
e4b2b4a8 JK |
28362 | +static void action_trace(struct hist_trigger_data *hist_data, |
28363 | + struct tracing_map_elt *elt, void *rec, | |
28364 | + struct ring_buffer_event *rbe, | |
28365 | + struct action_data *data, u64 *var_ref_vals) | |
1a6e0f06 | 28366 | +{ |
e4b2b4a8 | 28367 | + struct synth_event *event = data->onmatch.synth_event; |
1a6e0f06 | 28368 | + |
e4b2b4a8 JK |
28369 | + trace_synth(event, var_ref_vals, data->onmatch.var_ref_idx); |
28370 | +} | |
1a6e0f06 | 28371 | + |
e4b2b4a8 JK |
28372 | +struct hist_var_data { |
28373 | + struct list_head list; | |
28374 | + struct hist_trigger_data *hist_data; | |
28375 | +}; | |
1a6e0f06 | 28376 | + |
e4b2b4a8 JK |
28377 | +static void add_or_delete_synth_event(struct synth_event *event, int delete) |
28378 | +{ | |
28379 | + if (delete) | |
28380 | + free_synth_event(event); | |
28381 | + else { | |
28382 | + mutex_lock(&synth_event_mutex); | |
28383 | + if (!find_synth_event(event->name)) | |
28384 | + list_add(&event->list, &synth_event_list); | |
28385 | + else | |
28386 | + free_synth_event(event); | |
28387 | + mutex_unlock(&synth_event_mutex); | |
28388 | + } | |
1a6e0f06 JK |
28389 | +} |
28390 | + | |
e4b2b4a8 | 28391 | +static int create_synth_event(int argc, char **argv) |
1a6e0f06 | 28392 | +{ |
e4b2b4a8 JK |
28393 | + struct synth_field *field, *fields[SYNTH_FIELDS_MAX]; |
28394 | + struct synth_event *event = NULL; | |
28395 | + bool delete_event = false; | |
28396 | + int i, n_fields = 0, ret = 0; | |
28397 | + char *name; | |
1a6e0f06 | 28398 | + |
e4b2b4a8 | 28399 | + mutex_lock(&synth_event_mutex); |
1a6e0f06 | 28400 | + |
e4b2b4a8 JK |
28401 | + /* |
28402 | + * Argument syntax: | |
28403 | + * - Add synthetic event: <event_name> field[;field] ... | |
28404 | + * - Remove synthetic event: !<event_name> field[;field] ... | |
28405 | + * where 'field' = type field_name | |
28406 | + */ | |
28407 | + if (argc < 1) { | |
28408 | + ret = -EINVAL; | |
28409 | + goto out; | |
28410 | + } | |
1a6e0f06 | 28411 | + |
e4b2b4a8 JK |
28412 | + name = argv[0]; |
28413 | + if (name[0] == '!') { | |
28414 | + delete_event = true; | |
28415 | + name++; | |
28416 | + } | |
1a6e0f06 | 28417 | + |
e4b2b4a8 JK |
28418 | + event = find_synth_event(name); |
28419 | + if (event) { | |
28420 | + if (delete_event) { | |
28421 | + if (event->ref) { | |
28422 | + event = NULL; | |
28423 | + ret = -EBUSY; | |
28424 | + goto out; | |
28425 | + } | |
28426 | + list_del(&event->list); | |
28427 | + goto out; | |
1a6e0f06 | 28428 | + } |
e4b2b4a8 JK |
28429 | + event = NULL; |
28430 | + ret = -EEXIST; | |
28431 | + goto out; | |
28432 | + } else if (delete_event) | |
28433 | + goto out; | |
28434 | + | |
28435 | + if (argc < 2) { | |
28436 | + ret = -EINVAL; | |
28437 | + goto out; | |
1a6e0f06 | 28438 | + } |
1a6e0f06 | 28439 | + |
e4b2b4a8 JK |
28440 | + for (i = 1; i < argc - 1; i++) { |
28441 | + if (strcmp(argv[i], ";") == 0) | |
28442 | + continue; | |
28443 | + if (n_fields == SYNTH_FIELDS_MAX) { | |
28444 | + ret = -EINVAL; | |
28445 | + goto err; | |
28446 | + } | |
1a6e0f06 | 28447 | + |
e4b2b4a8 JK |
28448 | + field = parse_synth_field(argv[i], argv[i + 1]); |
28449 | + if (IS_ERR(field)) { | |
28450 | + ret = PTR_ERR(field); | |
28451 | + goto err; | |
28452 | + } | |
28453 | + fields[n_fields] = field; | |
28454 | + i++; n_fields++; | |
28455 | + } | |
1a6e0f06 | 28456 | + |
e4b2b4a8 JK |
28457 | + if (i < argc) { |
28458 | + ret = -EINVAL; | |
28459 | + goto err; | |
28460 | + } | |
1a6e0f06 | 28461 | + |
e4b2b4a8 JK |
28462 | + event = alloc_synth_event(name, n_fields, fields); |
28463 | + if (IS_ERR(event)) { | |
28464 | + ret = PTR_ERR(event); | |
28465 | + event = NULL; | |
28466 | + goto err; | |
1a6e0f06 | 28467 | + } |
e4b2b4a8 JK |
28468 | + out: |
28469 | + mutex_unlock(&synth_event_mutex); | |
1a6e0f06 | 28470 | + |
e4b2b4a8 JK |
28471 | + if (event) { |
28472 | + if (delete_event) { | |
28473 | + ret = unregister_synth_event(event); | |
28474 | + add_or_delete_synth_event(event, !ret); | |
28475 | + } else { | |
28476 | + ret = register_synth_event(event); | |
28477 | + add_or_delete_synth_event(event, ret); | |
28478 | + } | |
28479 | + } | |
28480 | + | |
28481 | + return ret; | |
28482 | + err: | |
28483 | + mutex_unlock(&synth_event_mutex); | |
28484 | + | |
28485 | + for (i = 0; i < n_fields; i++) | |
28486 | + free_synth_field(fields[i]); | |
28487 | + free_synth_event(event); | |
28488 | + | |
28489 | + return ret; | |
1a6e0f06 JK |
28490 | +} |
28491 | + | |
e4b2b4a8 | 28492 | +static int release_all_synth_events(void) |
1a6e0f06 | 28493 | +{ |
e4b2b4a8 JK |
28494 | + struct list_head release_events; |
28495 | + struct synth_event *event, *e; | |
28496 | + int ret = 0; | |
1a6e0f06 | 28497 | + |
e4b2b4a8 JK |
28498 | + INIT_LIST_HEAD(&release_events); |
28499 | + | |
28500 | + mutex_lock(&synth_event_mutex); | |
28501 | + | |
28502 | + list_for_each_entry(event, &synth_event_list, list) { | |
28503 | + if (event->ref) { | |
28504 | + mutex_unlock(&synth_event_mutex); | |
28505 | + return -EBUSY; | |
28506 | + } | |
28507 | + } | |
28508 | + | |
28509 | + list_splice_init(&event->list, &release_events); | |
28510 | + | |
28511 | + mutex_unlock(&synth_event_mutex); | |
28512 | + | |
28513 | + list_for_each_entry_safe(event, e, &release_events, list) { | |
28514 | + list_del(&event->list); | |
28515 | + | |
28516 | + ret = unregister_synth_event(event); | |
28517 | + add_or_delete_synth_event(event, !ret); | |
28518 | + } | |
28519 | + | |
28520 | + return ret; | |
1a6e0f06 JK |
28521 | +} |
28522 | + | |
e4b2b4a8 JK |
28523 | + |
28524 | +static void *synth_events_seq_start(struct seq_file *m, loff_t *pos) | |
1a6e0f06 | 28525 | +{ |
e4b2b4a8 | 28526 | + mutex_lock(&synth_event_mutex); |
1a6e0f06 | 28527 | + |
e4b2b4a8 JK |
28528 | + return seq_list_start(&synth_event_list, *pos); |
28529 | +} | |
1a6e0f06 | 28530 | + |
e4b2b4a8 JK |
28531 | +static void *synth_events_seq_next(struct seq_file *m, void *v, loff_t *pos) |
28532 | +{ | |
28533 | + return seq_list_next(v, &synth_event_list, pos); | |
28534 | +} | |
1a6e0f06 | 28535 | + |
e4b2b4a8 JK |
28536 | +static void synth_events_seq_stop(struct seq_file *m, void *v) |
28537 | +{ | |
28538 | + mutex_unlock(&synth_event_mutex); | |
1a6e0f06 | 28539 | +} |
1a6e0f06 | 28540 | + |
e4b2b4a8 | 28541 | +static int synth_events_seq_show(struct seq_file *m, void *v) |
1a6e0f06 | 28542 | +{ |
e4b2b4a8 JK |
28543 | + struct synth_field *field; |
28544 | + struct synth_event *event = v; | |
28545 | + unsigned int i; | |
1a6e0f06 | 28546 | + |
e4b2b4a8 | 28547 | + seq_printf(m, "%s\t", event->name); |
1a6e0f06 | 28548 | + |
e4b2b4a8 JK |
28549 | + for (i = 0; i < event->n_fields; i++) { |
28550 | + field = event->fields[i]; | |
28551 | + | |
28552 | + /* parameter values */ | |
28553 | + seq_printf(m, "%s %s%s", field->type, field->name, | |
28554 | + i == event->n_fields - 1 ? "" : "; "); | |
1a6e0f06 JK |
28555 | + } |
28556 | + | |
e4b2b4a8 | 28557 | + seq_putc(m, '\n'); |
1a6e0f06 JK |
28558 | + |
28559 | + return 0; | |
28560 | +} | |
1a6e0f06 | 28561 | + |
e4b2b4a8 JK |
28562 | +static const struct seq_operations synth_events_seq_op = { |
28563 | + .start = synth_events_seq_start, | |
28564 | + .next = synth_events_seq_next, | |
28565 | + .stop = synth_events_seq_stop, | |
28566 | + .show = synth_events_seq_show | |
28567 | +}; | |
28568 | + | |
28569 | +static int synth_events_open(struct inode *inode, struct file *file) | |
1a6e0f06 | 28570 | +{ |
e4b2b4a8 | 28571 | + int ret; |
1a6e0f06 | 28572 | + |
e4b2b4a8 JK |
28573 | + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { |
28574 | + ret = release_all_synth_events(); | |
28575 | + if (ret < 0) | |
28576 | + return ret; | |
28577 | + } | |
1a6e0f06 | 28578 | + |
e4b2b4a8 | 28579 | + return seq_open(file, &synth_events_seq_op); |
1a6e0f06 | 28580 | +} |
e4b2b4a8 JK |
28581 | + |
28582 | +static ssize_t synth_events_write(struct file *file, | |
28583 | + const char __user *buffer, | |
28584 | + size_t count, loff_t *ppos) | |
1a6e0f06 | 28585 | +{ |
e4b2b4a8 JK |
28586 | + return trace_parse_run_command(file, buffer, count, ppos, |
28587 | + create_synth_event); | |
28588 | +} | |
1a6e0f06 | 28589 | + |
e4b2b4a8 JK |
28590 | +static const struct file_operations synth_events_fops = { |
28591 | + .open = synth_events_open, | |
28592 | + .write = synth_events_write, | |
28593 | + .read = seq_read, | |
28594 | + .llseek = seq_lseek, | |
28595 | + .release = seq_release, | |
28596 | +}; | |
28597 | + | |
28598 | +static u64 hist_field_timestamp(struct hist_field *hist_field, | |
28599 | + struct tracing_map_elt *elt, | |
28600 | + struct ring_buffer_event *rbe, | |
28601 | + void *event) | |
28602 | +{ | |
28603 | + struct hist_trigger_data *hist_data = hist_field->hist_data; | |
28604 | + struct trace_array *tr = hist_data->event_file->tr; | |
28605 | + | |
28606 | + u64 ts = ring_buffer_event_time_stamp(rbe); | |
28607 | + | |
28608 | + if (hist_data->attrs->ts_in_usecs && trace_clock_in_ns(tr)) | |
28609 | + ts = ns2usecs(ts); | |
28610 | + | |
28611 | + return ts; | |
1a6e0f06 JK |
28612 | +} |
28613 | + | |
e4b2b4a8 JK |
28614 | +static u64 hist_field_cpu(struct hist_field *hist_field, |
28615 | + struct tracing_map_elt *elt, | |
28616 | + struct ring_buffer_event *rbe, | |
28617 | + void *event) | |
1a6e0f06 | 28618 | +{ |
e4b2b4a8 JK |
28619 | + int cpu = smp_processor_id(); |
28620 | + | |
28621 | + return cpu; | |
1a6e0f06 JK |
28622 | +} |
28623 | + | |
e4b2b4a8 JK |
28624 | +static struct hist_field * |
28625 | +check_field_for_var_ref(struct hist_field *hist_field, | |
28626 | + struct hist_trigger_data *var_data, | |
28627 | + unsigned int var_idx) | |
1a6e0f06 | 28628 | +{ |
e4b2b4a8 JK |
28629 | + struct hist_field *found = NULL; |
28630 | + | |
28631 | + if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR_REF) { | |
28632 | + if (hist_field->var.idx == var_idx && | |
28633 | + hist_field->var.hist_data == var_data) { | |
28634 | + found = hist_field; | |
28635 | + } | |
28636 | + } | |
28637 | + | |
28638 | + return found; | |
1a6e0f06 JK |
28639 | +} |
28640 | + | |
e4b2b4a8 JK |
28641 | +static struct hist_field * |
28642 | +check_field_for_var_refs(struct hist_trigger_data *hist_data, | |
28643 | + struct hist_field *hist_field, | |
28644 | + struct hist_trigger_data *var_data, | |
28645 | + unsigned int var_idx, | |
28646 | + unsigned int level) | |
28647 | +{ | |
28648 | + struct hist_field *found = NULL; | |
28649 | + unsigned int i; | |
28650 | + | |
28651 | + if (level > 3) | |
28652 | + return found; | |
28653 | + | |
28654 | + if (!hist_field) | |
28655 | + return found; | |
28656 | + | |
28657 | + found = check_field_for_var_ref(hist_field, var_data, var_idx); | |
28658 | + if (found) | |
28659 | + return found; | |
28660 | + | |
28661 | + for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) { | |
28662 | + struct hist_field *operand; | |
28663 | + | |
28664 | + operand = hist_field->operands[i]; | |
28665 | + found = check_field_for_var_refs(hist_data, operand, var_data, | |
28666 | + var_idx, level + 1); | |
28667 | + if (found) | |
28668 | + return found; | |
28669 | + } | |
28670 | + | |
28671 | + return found; | |
28672 | +} | |
28673 | + | |
28674 | +static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data, | |
28675 | + struct hist_trigger_data *var_data, | |
28676 | + unsigned int var_idx) | |
28677 | +{ | |
28678 | + struct hist_field *hist_field, *found = NULL; | |
28679 | + unsigned int i; | |
28680 | + | |
28681 | + for_each_hist_field(i, hist_data) { | |
28682 | + hist_field = hist_data->fields[i]; | |
28683 | + found = check_field_for_var_refs(hist_data, hist_field, | |
28684 | + var_data, var_idx, 0); | |
28685 | + if (found) | |
28686 | + return found; | |
28687 | + } | |
28688 | + | |
28689 | + for (i = 0; i < hist_data->n_synth_var_refs; i++) { | |
28690 | + hist_field = hist_data->synth_var_refs[i]; | |
28691 | + found = check_field_for_var_refs(hist_data, hist_field, | |
28692 | + var_data, var_idx, 0); | |
28693 | + if (found) | |
28694 | + return found; | |
28695 | + } | |
28696 | + | |
28697 | + return found; | |
28698 | +} | |
28699 | + | |
28700 | +static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, | |
28701 | + unsigned int var_idx) | |
1a6e0f06 | 28702 | +{ |
e4b2b4a8 JK |
28703 | + struct trace_array *tr = hist_data->event_file->tr; |
28704 | + struct hist_field *found = NULL; | |
28705 | + struct hist_var_data *var_data; | |
1a6e0f06 | 28706 | + |
e4b2b4a8 JK |
28707 | + list_for_each_entry(var_data, &tr->hist_vars, list) { |
28708 | + if (var_data->hist_data == hist_data) | |
28709 | + continue; | |
28710 | + found = find_var_ref(var_data->hist_data, hist_data, var_idx); | |
28711 | + if (found) | |
28712 | + break; | |
28713 | + } | |
1a6e0f06 | 28714 | + |
e4b2b4a8 | 28715 | + return found; |
1a6e0f06 JK |
28716 | +} |
28717 | + | |
e4b2b4a8 | 28718 | +static bool check_var_refs(struct hist_trigger_data *hist_data) |
1a6e0f06 | 28719 | +{ |
e4b2b4a8 JK |
28720 | + struct hist_field *field; |
28721 | + bool found = false; | |
28722 | + int i; | |
1a6e0f06 | 28723 | + |
e4b2b4a8 JK |
28724 | + for_each_hist_field(i, hist_data) { |
28725 | + field = hist_data->fields[i]; | |
28726 | + if (field && field->flags & HIST_FIELD_FL_VAR) { | |
28727 | + if (find_any_var_ref(hist_data, field->var.idx)) { | |
28728 | + found = true; | |
28729 | + break; | |
28730 | + } | |
28731 | + } | |
28732 | + } | |
1a6e0f06 | 28733 | + |
e4b2b4a8 | 28734 | + return found; |
1a6e0f06 JK |
28735 | +} |
28736 | + | |
e4b2b4a8 | 28737 | +static struct hist_var_data *find_hist_vars(struct hist_trigger_data *hist_data) |
1a6e0f06 | 28738 | +{ |
e4b2b4a8 JK |
28739 | + struct trace_array *tr = hist_data->event_file->tr; |
28740 | + struct hist_var_data *var_data, *found = NULL; | |
1a6e0f06 | 28741 | + |
e4b2b4a8 JK |
28742 | + list_for_each_entry(var_data, &tr->hist_vars, list) { |
28743 | + if (var_data->hist_data == hist_data) { | |
28744 | + found = var_data; | |
28745 | + break; | |
1a6e0f06 | 28746 | + } |
e4b2b4a8 | 28747 | + } |
1a6e0f06 | 28748 | + |
e4b2b4a8 JK |
28749 | + return found; |
28750 | +} | |
28751 | + | |
28752 | +static bool field_has_hist_vars(struct hist_field *hist_field, | |
28753 | + unsigned int level) | |
28754 | +{ | |
28755 | + int i; | |
28756 | + | |
28757 | + if (level > 3) | |
28758 | + return false; | |
28759 | + | |
28760 | + if (!hist_field) | |
28761 | + return false; | |
28762 | + | |
28763 | + if (hist_field->flags & HIST_FIELD_FL_VAR || | |
28764 | + hist_field->flags & HIST_FIELD_FL_VAR_REF) | |
28765 | + return true; | |
28766 | + | |
28767 | + for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) { | |
28768 | + struct hist_field *operand; | |
28769 | + | |
28770 | + operand = hist_field->operands[i]; | |
28771 | + if (field_has_hist_vars(operand, level + 1)) | |
28772 | + return true; | |
1a6e0f06 | 28773 | + } |
e4b2b4a8 JK |
28774 | + |
28775 | + return false; | |
1a6e0f06 JK |
28776 | +} |
28777 | + | |
e4b2b4a8 JK |
28778 | +static bool has_hist_vars(struct hist_trigger_data *hist_data) |
28779 | +{ | |
28780 | + struct hist_field *hist_field; | |
28781 | + int i; | |
1a6e0f06 | 28782 | + |
e4b2b4a8 JK |
28783 | + for_each_hist_field(i, hist_data) { |
28784 | + hist_field = hist_data->fields[i]; | |
28785 | + if (field_has_hist_vars(hist_field, 0)) | |
28786 | + return true; | |
28787 | + } | |
1a6e0f06 | 28788 | + |
e4b2b4a8 JK |
28789 | + return false; |
28790 | +} | |
1a6e0f06 | 28791 | + |
e4b2b4a8 | 28792 | +static int save_hist_vars(struct hist_trigger_data *hist_data) |
1a6e0f06 | 28793 | +{ |
e4b2b4a8 JK |
28794 | + struct trace_array *tr = hist_data->event_file->tr; |
28795 | + struct hist_var_data *var_data; | |
1a6e0f06 | 28796 | + |
e4b2b4a8 JK |
28797 | + var_data = find_hist_vars(hist_data); |
28798 | + if (var_data) | |
28799 | + return 0; | |
28800 | + | |
28801 | + if (trace_array_get(tr) < 0) | |
28802 | + return -ENODEV; | |
28803 | + | |
28804 | + var_data = kzalloc(sizeof(*var_data), GFP_KERNEL); | |
28805 | + if (!var_data) { | |
28806 | + trace_array_put(tr); | |
28807 | + return -ENOMEM; | |
28808 | + } | |
28809 | + | |
28810 | + var_data->hist_data = hist_data; | |
28811 | + list_add(&var_data->list, &tr->hist_vars); | |
28812 | + | |
28813 | + return 0; | |
1a6e0f06 JK |
28814 | +} |
28815 | + | |
e4b2b4a8 | 28816 | +static void remove_hist_vars(struct hist_trigger_data *hist_data) |
1a6e0f06 | 28817 | +{ |
e4b2b4a8 JK |
28818 | + struct trace_array *tr = hist_data->event_file->tr; |
28819 | + struct hist_var_data *var_data; | |
1a6e0f06 | 28820 | + |
e4b2b4a8 JK |
28821 | + var_data = find_hist_vars(hist_data); |
28822 | + if (!var_data) | |
28823 | + return; | |
28824 | + | |
28825 | + if (WARN_ON(check_var_refs(hist_data))) | |
28826 | + return; | |
28827 | + | |
28828 | + list_del(&var_data->list); | |
28829 | + | |
28830 | + kfree(var_data); | |
28831 | + | |
28832 | + trace_array_put(tr); | |
1a6e0f06 JK |
28833 | +} |
28834 | + | |
e4b2b4a8 JK |
28835 | +static struct hist_field *find_var_field(struct hist_trigger_data *hist_data, |
28836 | + const char *var_name) | |
1a6e0f06 | 28837 | +{ |
e4b2b4a8 | 28838 | + struct hist_field *hist_field, *found = NULL; |
1a6e0f06 JK |
28839 | + int i; |
28840 | + | |
e4b2b4a8 JK |
28841 | + for_each_hist_field(i, hist_data) { |
28842 | + hist_field = hist_data->fields[i]; | |
28843 | + if (hist_field && hist_field->flags & HIST_FIELD_FL_VAR && | |
28844 | + strcmp(hist_field->var.name, var_name) == 0) { | |
28845 | + found = hist_field; | |
28846 | + break; | |
28847 | + } | |
28848 | + } | |
1a6e0f06 | 28849 | + |
e4b2b4a8 JK |
28850 | + return found; |
28851 | +} | |
1a6e0f06 | 28852 | + |
e4b2b4a8 JK |
28853 | +static struct hist_field *find_var(struct hist_trigger_data *hist_data, |
28854 | + struct trace_event_file *file, | |
28855 | + const char *var_name) | |
28856 | +{ | |
28857 | + struct hist_trigger_data *test_data; | |
28858 | + struct event_trigger_data *test; | |
28859 | + struct hist_field *hist_field; | |
28860 | + | |
28861 | + hist_field = find_var_field(hist_data, var_name); | |
28862 | + if (hist_field) | |
28863 | + return hist_field; | |
28864 | + | |
28865 | + list_for_each_entry_rcu(test, &file->triggers, list) { | |
28866 | + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
28867 | + test_data = test->private_data; | |
28868 | + hist_field = find_var_field(test_data, var_name); | |
28869 | + if (hist_field) | |
28870 | + return hist_field; | |
28871 | + } | |
28872 | + } | |
28873 | + | |
28874 | + return NULL; | |
28875 | +} | |
28876 | + | |
28877 | +static struct trace_event_file *find_var_file(struct trace_array *tr, | |
28878 | + char *system, | |
28879 | + char *event_name, | |
28880 | + char *var_name) | |
28881 | +{ | |
28882 | + struct hist_trigger_data *var_hist_data; | |
28883 | + struct hist_var_data *var_data; | |
28884 | + struct trace_event_file *file, *found = NULL; | |
28885 | + | |
28886 | + if (system) | |
28887 | + return find_event_file(tr, system, event_name); | |
28888 | + | |
28889 | + list_for_each_entry(var_data, &tr->hist_vars, list) { | |
28890 | + var_hist_data = var_data->hist_data; | |
28891 | + file = var_hist_data->event_file; | |
28892 | + if (file == found) | |
28893 | + continue; | |
28894 | + | |
28895 | + if (find_var_field(var_hist_data, var_name)) { | |
28896 | + if (found) { | |
28897 | + hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); | |
28898 | + return NULL; | |
1a6e0f06 | 28899 | + } |
e4b2b4a8 JK |
28900 | + |
28901 | + found = file; | |
1a6e0f06 JK |
28902 | + } |
28903 | + } | |
28904 | + | |
e4b2b4a8 JK |
28905 | + return found; |
28906 | +} | |
28907 | + | |
28908 | +static struct hist_field *find_file_var(struct trace_event_file *file, | |
28909 | + const char *var_name) | |
28910 | +{ | |
28911 | + struct hist_trigger_data *test_data; | |
28912 | + struct event_trigger_data *test; | |
28913 | + struct hist_field *hist_field; | |
28914 | + | |
28915 | + list_for_each_entry_rcu(test, &file->triggers, list) { | |
28916 | + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
28917 | + test_data = test->private_data; | |
28918 | + hist_field = find_var_field(test_data, var_name); | |
28919 | + if (hist_field) | |
28920 | + return hist_field; | |
28921 | + } | |
1a6e0f06 | 28922 | + } |
e4b2b4a8 JK |
28923 | + |
28924 | + return NULL; | |
1a6e0f06 | 28925 | +} |
e4b2b4a8 JK |
28926 | + |
28927 | +static struct hist_field * | |
28928 | +find_match_var(struct hist_trigger_data *hist_data, char *var_name) | |
1a6e0f06 | 28929 | +{ |
e4b2b4a8 JK |
28930 | + struct trace_array *tr = hist_data->event_file->tr; |
28931 | + struct hist_field *hist_field, *found = NULL; | |
28932 | + struct trace_event_file *file; | |
28933 | + unsigned int i; | |
1a6e0f06 | 28934 | + |
e4b2b4a8 JK |
28935 | + for (i = 0; i < hist_data->n_actions; i++) { |
28936 | + struct action_data *data = hist_data->actions[i]; | |
28937 | + | |
28938 | + if (data->fn == action_trace) { | |
28939 | + char *system = data->onmatch.match_event_system; | |
28940 | + char *event_name = data->onmatch.match_event; | |
28941 | + | |
28942 | + file = find_var_file(tr, system, event_name, var_name); | |
28943 | + if (!file) | |
28944 | + continue; | |
28945 | + hist_field = find_file_var(file, var_name); | |
28946 | + if (hist_field) { | |
28947 | + if (found) { | |
28948 | + hist_err_event("Variable name not unique, need to use fully qualified name (subsys.event.var) for variable: ", system, event_name, var_name); | |
28949 | + return ERR_PTR(-EINVAL); | |
28950 | + } | |
28951 | + | |
28952 | + found = hist_field; | |
28953 | + } | |
28954 | + } | |
1a6e0f06 | 28955 | + } |
e4b2b4a8 | 28956 | + return found; |
1a6e0f06 | 28957 | +} |
1a6e0f06 | 28958 | + |
e4b2b4a8 JK |
28959 | +static struct hist_field *find_event_var(struct hist_trigger_data *hist_data, |
28960 | + char *system, | |
28961 | + char *event_name, | |
28962 | + char *var_name) | |
28963 | +{ | |
28964 | + struct trace_array *tr = hist_data->event_file->tr; | |
28965 | + struct hist_field *hist_field = NULL; | |
28966 | + struct trace_event_file *file; | |
1a6e0f06 | 28967 | + |
e4b2b4a8 JK |
28968 | + if (!system || !event_name) { |
28969 | + hist_field = find_match_var(hist_data, var_name); | |
28970 | + if (IS_ERR(hist_field)) | |
28971 | + return NULL; | |
28972 | + if (hist_field) | |
28973 | + return hist_field; | |
28974 | + } | |
28975 | + | |
28976 | + file = find_var_file(tr, system, event_name, var_name); | |
28977 | + if (!file) | |
28978 | + return NULL; | |
28979 | + | |
28980 | + hist_field = find_file_var(file, var_name); | |
28981 | + | |
28982 | + return hist_field; | |
28983 | +} | |
28984 | + | |
28985 | +struct hist_elt_data { | |
28986 | + char *comm; | |
28987 | + u64 *var_ref_vals; | |
28988 | + char *field_var_str[SYNTH_FIELDS_MAX]; | |
28989 | }; | |
1a6e0f06 | 28990 | |
e4b2b4a8 JK |
28991 | +static u64 hist_field_var_ref(struct hist_field *hist_field, |
28992 | + struct tracing_map_elt *elt, | |
28993 | + struct ring_buffer_event *rbe, | |
28994 | + void *event) | |
1a6e0f06 | 28995 | +{ |
e4b2b4a8 JK |
28996 | + struct hist_elt_data *elt_data; |
28997 | + u64 var_val = 0; | |
1a6e0f06 | 28998 | + |
e4b2b4a8 JK |
28999 | + elt_data = elt->private_data; |
29000 | + var_val = elt_data->var_ref_vals[hist_field->var_ref_idx]; | |
29001 | + | |
29002 | + return var_val; | |
1a6e0f06 | 29003 | +} |
1a6e0f06 | 29004 | + |
e4b2b4a8 JK |
29005 | +static bool resolve_var_refs(struct hist_trigger_data *hist_data, void *key, |
29006 | + u64 *var_ref_vals, bool self) | |
1a6e0f06 | 29007 | +{ |
e4b2b4a8 JK |
29008 | + struct hist_trigger_data *var_data; |
29009 | + struct tracing_map_elt *var_elt; | |
29010 | + struct hist_field *hist_field; | |
29011 | + unsigned int i, var_idx; | |
29012 | + bool resolved = true; | |
29013 | + u64 var_val = 0; | |
1a6e0f06 | 29014 | + |
e4b2b4a8 JK |
29015 | + for (i = 0; i < hist_data->n_var_refs; i++) { |
29016 | + hist_field = hist_data->var_refs[i]; | |
29017 | + var_idx = hist_field->var.idx; | |
29018 | + var_data = hist_field->var.hist_data; | |
1a6e0f06 | 29019 | + |
e4b2b4a8 JK |
29020 | + if (var_data == NULL) { |
29021 | + resolved = false; | |
29022 | + break; | |
29023 | + } | |
1a6e0f06 | 29024 | + |
e4b2b4a8 JK |
29025 | + if ((self && var_data != hist_data) || |
29026 | + (!self && var_data == hist_data)) | |
29027 | + continue; | |
29028 | + | |
29029 | + var_elt = tracing_map_lookup(var_data->map, key); | |
29030 | + if (!var_elt) { | |
29031 | + resolved = false; | |
29032 | + break; | |
29033 | + } | |
29034 | + | |
29035 | + if (!tracing_map_var_set(var_elt, var_idx)) { | |
29036 | + resolved = false; | |
29037 | + break; | |
29038 | + } | |
29039 | + | |
29040 | + if (self || !hist_field->read_once) | |
29041 | + var_val = tracing_map_read_var(var_elt, var_idx); | |
29042 | + else | |
29043 | + var_val = tracing_map_read_var_once(var_elt, var_idx); | |
29044 | + | |
29045 | + var_ref_vals[i] = var_val; | |
1a6e0f06 | 29046 | + } |
e4b2b4a8 JK |
29047 | + |
29048 | + return resolved; | |
1a6e0f06 JK |
29049 | +} |
29050 | + | |
e4b2b4a8 JK |
29051 | +static const char *hist_field_name(struct hist_field *field, |
29052 | + unsigned int level) | |
1a6e0f06 | 29053 | +{ |
e4b2b4a8 JK |
29054 | + const char *field_name = ""; |
29055 | + | |
29056 | + if (level > 1) | |
29057 | + return field_name; | |
29058 | + | |
29059 | + if (field->field) | |
29060 | + field_name = field->field->name; | |
29061 | + else if (field->flags & HIST_FIELD_FL_LOG2 || | |
29062 | + field->flags & HIST_FIELD_FL_ALIAS) | |
29063 | + field_name = hist_field_name(field->operands[0], ++level); | |
29064 | + else if (field->flags & HIST_FIELD_FL_CPU) | |
29065 | + field_name = "cpu"; | |
29066 | + else if (field->flags & HIST_FIELD_FL_EXPR || | |
29067 | + field->flags & HIST_FIELD_FL_VAR_REF) { | |
29068 | + if (field->system) { | |
29069 | + static char full_name[MAX_FILTER_STR_VAL]; | |
29070 | + | |
29071 | + strcat(full_name, field->system); | |
29072 | + strcat(full_name, "."); | |
29073 | + strcat(full_name, field->event_name); | |
29074 | + strcat(full_name, "."); | |
29075 | + strcat(full_name, field->name); | |
29076 | + field_name = full_name; | |
29077 | + } else | |
29078 | + field_name = field->name; | |
29079 | + } else if (field->flags & HIST_FIELD_FL_TIMESTAMP) | |
29080 | + field_name = "common_timestamp"; | |
29081 | + | |
29082 | + if (field_name == NULL) | |
29083 | + field_name = ""; | |
29084 | + | |
29085 | + return field_name; | |
1a6e0f06 JK |
29086 | +} |
29087 | + | |
e4b2b4a8 JK |
29088 | static hist_field_fn_t select_value_fn(int field_size, int field_is_signed) |
29089 | { | |
29090 | hist_field_fn_t fn = NULL; | |
29091 | @@ -207,16 +1771,119 @@ | |
29092 | ||
29093 | static void destroy_hist_trigger_attrs(struct hist_trigger_attrs *attrs) | |
29094 | { | |
29095 | + unsigned int i; | |
1a6e0f06 | 29096 | + |
e4b2b4a8 JK |
29097 | if (!attrs) |
29098 | return; | |
29099 | ||
29100 | + for (i = 0; i < attrs->n_assignments; i++) | |
29101 | + kfree(attrs->assignment_str[i]); | |
1a6e0f06 | 29102 | + |
e4b2b4a8 JK |
29103 | + for (i = 0; i < attrs->n_actions; i++) |
29104 | + kfree(attrs->action_str[i]); | |
1a6e0f06 | 29105 | + |
e4b2b4a8 JK |
29106 | kfree(attrs->name); |
29107 | kfree(attrs->sort_key_str); | |
29108 | kfree(attrs->keys_str); | |
29109 | kfree(attrs->vals_str); | |
29110 | + kfree(attrs->clock); | |
29111 | kfree(attrs); | |
29112 | } | |
29113 | ||
29114 | +static int parse_action(char *str, struct hist_trigger_attrs *attrs) | |
29115 | +{ | |
29116 | + int ret = -EINVAL; | |
1a6e0f06 | 29117 | + |
e4b2b4a8 JK |
29118 | + if (attrs->n_actions >= HIST_ACTIONS_MAX) |
29119 | + return ret; | |
1a6e0f06 | 29120 | + |
e4b2b4a8 JK |
29121 | + if ((strncmp(str, "onmatch(", strlen("onmatch(")) == 0) || |
29122 | + (strncmp(str, "onmax(", strlen("onmax(")) == 0)) { | |
29123 | + attrs->action_str[attrs->n_actions] = kstrdup(str, GFP_KERNEL); | |
29124 | + if (!attrs->action_str[attrs->n_actions]) { | |
29125 | + ret = -ENOMEM; | |
29126 | + return ret; | |
29127 | + } | |
29128 | + attrs->n_actions++; | |
29129 | + ret = 0; | |
1a6e0f06 JK |
29130 | + } |
29131 | + | |
e4b2b4a8 | 29132 | + return ret; |
1a6e0f06 JK |
29133 | +} |
29134 | + | |
e4b2b4a8 | 29135 | +static int parse_assignment(char *str, struct hist_trigger_attrs *attrs) |
1a6e0f06 | 29136 | +{ |
e4b2b4a8 JK |
29137 | + int ret = 0; |
29138 | + | |
29139 | + if ((strncmp(str, "key=", strlen("key=")) == 0) || | |
29140 | + (strncmp(str, "keys=", strlen("keys=")) == 0)) { | |
29141 | + attrs->keys_str = kstrdup(str, GFP_KERNEL); | |
29142 | + if (!attrs->keys_str) { | |
29143 | + ret = -ENOMEM; | |
29144 | + goto out; | |
29145 | + } | |
29146 | + } else if ((strncmp(str, "val=", strlen("val=")) == 0) || | |
29147 | + (strncmp(str, "vals=", strlen("vals=")) == 0) || | |
29148 | + (strncmp(str, "values=", strlen("values=")) == 0)) { | |
29149 | + attrs->vals_str = kstrdup(str, GFP_KERNEL); | |
29150 | + if (!attrs->vals_str) { | |
29151 | + ret = -ENOMEM; | |
29152 | + goto out; | |
29153 | + } | |
29154 | + } else if (strncmp(str, "sort=", strlen("sort=")) == 0) { | |
29155 | + attrs->sort_key_str = kstrdup(str, GFP_KERNEL); | |
29156 | + if (!attrs->sort_key_str) { | |
29157 | + ret = -ENOMEM; | |
29158 | + goto out; | |
29159 | + } | |
29160 | + } else if (strncmp(str, "name=", strlen("name=")) == 0) { | |
29161 | + attrs->name = kstrdup(str, GFP_KERNEL); | |
29162 | + if (!attrs->name) { | |
29163 | + ret = -ENOMEM; | |
29164 | + goto out; | |
29165 | + } | |
29166 | + } else if (strncmp(str, "clock=", strlen("clock=")) == 0) { | |
29167 | + strsep(&str, "="); | |
29168 | + if (!str) { | |
29169 | + ret = -EINVAL; | |
29170 | + goto out; | |
29171 | + } | |
29172 | + | |
29173 | + str = strstrip(str); | |
29174 | + attrs->clock = kstrdup(str, GFP_KERNEL); | |
29175 | + if (!attrs->clock) { | |
29176 | + ret = -ENOMEM; | |
29177 | + goto out; | |
29178 | + } | |
29179 | + } else if (strncmp(str, "size=", strlen("size=")) == 0) { | |
29180 | + int map_bits = parse_map_size(str); | |
29181 | + | |
29182 | + if (map_bits < 0) { | |
29183 | + ret = map_bits; | |
29184 | + goto out; | |
29185 | + } | |
29186 | + attrs->map_bits = map_bits; | |
29187 | + } else { | |
29188 | + char *assignment; | |
29189 | + | |
29190 | + if (attrs->n_assignments == TRACING_MAP_VARS_MAX) { | |
29191 | + hist_err("Too many variables defined: ", str); | |
29192 | + ret = -EINVAL; | |
29193 | + goto out; | |
29194 | + } | |
29195 | + | |
29196 | + assignment = kstrdup(str, GFP_KERNEL); | |
29197 | + if (!assignment) { | |
29198 | + ret = -ENOMEM; | |
29199 | + goto out; | |
29200 | + } | |
29201 | + | |
29202 | + attrs->assignment_str[attrs->n_assignments++] = assignment; | |
1a6e0f06 | 29203 | + } |
e4b2b4a8 JK |
29204 | + out: |
29205 | + return ret; | |
1a6e0f06 JK |
29206 | +} |
29207 | + | |
e4b2b4a8 JK |
29208 | static struct hist_trigger_attrs *parse_hist_trigger_attrs(char *trigger_str) |
29209 | { | |
29210 | struct hist_trigger_attrs *attrs; | |
29211 | @@ -229,35 +1896,21 @@ | |
29212 | while (trigger_str) { | |
29213 | char *str = strsep(&trigger_str, ":"); | |
29214 | ||
29215 | - if ((strncmp(str, "key=", strlen("key=")) == 0) || | |
29216 | - (strncmp(str, "keys=", strlen("keys=")) == 0)) | |
29217 | - attrs->keys_str = kstrdup(str, GFP_KERNEL); | |
29218 | - else if ((strncmp(str, "val=", strlen("val=")) == 0) || | |
29219 | - (strncmp(str, "vals=", strlen("vals=")) == 0) || | |
29220 | - (strncmp(str, "values=", strlen("values=")) == 0)) | |
29221 | - attrs->vals_str = kstrdup(str, GFP_KERNEL); | |
29222 | - else if (strncmp(str, "sort=", strlen("sort=")) == 0) | |
29223 | - attrs->sort_key_str = kstrdup(str, GFP_KERNEL); | |
29224 | - else if (strncmp(str, "name=", strlen("name=")) == 0) | |
29225 | - attrs->name = kstrdup(str, GFP_KERNEL); | |
29226 | - else if (strcmp(str, "pause") == 0) | |
29227 | + if (strchr(str, '=')) { | |
29228 | + ret = parse_assignment(str, attrs); | |
29229 | + if (ret) | |
29230 | + goto free; | |
29231 | + } else if (strcmp(str, "pause") == 0) | |
29232 | attrs->pause = true; | |
29233 | else if ((strcmp(str, "cont") == 0) || | |
29234 | (strcmp(str, "continue") == 0)) | |
29235 | attrs->cont = true; | |
29236 | else if (strcmp(str, "clear") == 0) | |
29237 | attrs->clear = true; | |
29238 | - else if (strncmp(str, "size=", strlen("size=")) == 0) { | |
29239 | - int map_bits = parse_map_size(str); | |
29240 | - | |
29241 | - if (map_bits < 0) { | |
29242 | - ret = map_bits; | |
29243 | + else { | |
29244 | + ret = parse_action(str, attrs); | |
29245 | + if (ret) | |
29246 | goto free; | |
29247 | - } | |
29248 | - attrs->map_bits = map_bits; | |
29249 | - } else { | |
29250 | - ret = -EINVAL; | |
29251 | - goto free; | |
29252 | } | |
29253 | } | |
1a6e0f06 | 29254 | |
e4b2b4a8 JK |
29255 | @@ -266,6 +1919,14 @@ |
29256 | goto free; | |
29257 | } | |
1a6e0f06 | 29258 | |
e4b2b4a8 JK |
29259 | + if (!attrs->clock) { |
29260 | + attrs->clock = kstrdup("global", GFP_KERNEL); | |
29261 | + if (!attrs->clock) { | |
29262 | + ret = -ENOMEM; | |
29263 | + goto free; | |
29264 | + } | |
29265 | + } | |
29266 | + | |
29267 | return attrs; | |
29268 | free: | |
29269 | destroy_hist_trigger_attrs(attrs); | |
29270 | @@ -288,65 +1949,222 @@ | |
29271 | memcpy(comm, task->comm, TASK_COMM_LEN); | |
29272 | } | |
1a6e0f06 | 29273 | |
e4b2b4a8 JK |
29274 | -static void hist_trigger_elt_comm_free(struct tracing_map_elt *elt) |
29275 | +static void hist_elt_data_free(struct hist_elt_data *elt_data) | |
29276 | { | |
29277 | - kfree((char *)elt->private_data); | |
29278 | + unsigned int i; | |
29279 | + | |
29280 | + for (i = 0; i < SYNTH_FIELDS_MAX; i++) | |
29281 | + kfree(elt_data->field_var_str[i]); | |
29282 | + | |
29283 | + kfree(elt_data->comm); | |
29284 | + kfree(elt_data); | |
1a6e0f06 JK |
29285 | } |
29286 | ||
e4b2b4a8 JK |
29287 | -static int hist_trigger_elt_comm_alloc(struct tracing_map_elt *elt) |
29288 | +static void hist_trigger_elt_data_free(struct tracing_map_elt *elt) | |
1a6e0f06 | 29289 | +{ |
e4b2b4a8 | 29290 | + struct hist_elt_data *elt_data = elt->private_data; |
1a6e0f06 | 29291 | + |
e4b2b4a8 | 29292 | + hist_elt_data_free(elt_data); |
1a6e0f06 JK |
29293 | +} |
29294 | + | |
e4b2b4a8 JK |
29295 | +static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt) |
29296 | { | |
29297 | struct hist_trigger_data *hist_data = elt->map->private_data; | |
29298 | + unsigned int size = TASK_COMM_LEN; | |
29299 | + struct hist_elt_data *elt_data; | |
29300 | struct hist_field *key_field; | |
29301 | - unsigned int i; | |
29302 | + unsigned int i, n_str; | |
1a6e0f06 | 29303 | + |
e4b2b4a8 JK |
29304 | + elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); |
29305 | + if (!elt_data) | |
29306 | + return -ENOMEM; | |
29307 | ||
29308 | for_each_hist_key_field(i, hist_data) { | |
29309 | key_field = hist_data->fields[i]; | |
29310 | ||
29311 | if (key_field->flags & HIST_FIELD_FL_EXECNAME) { | |
29312 | - unsigned int size = TASK_COMM_LEN + 1; | |
29313 | - | |
29314 | - elt->private_data = kzalloc(size, GFP_KERNEL); | |
29315 | - if (!elt->private_data) | |
29316 | + elt_data->comm = kzalloc(size, GFP_KERNEL); | |
29317 | + if (!elt_data->comm) { | |
29318 | + kfree(elt_data); | |
29319 | return -ENOMEM; | |
29320 | + } | |
29321 | break; | |
29322 | } | |
29323 | } | |
29324 | ||
29325 | + n_str = hist_data->n_field_var_str + hist_data->n_max_var_str; | |
29326 | + | |
29327 | + size = STR_VAR_LEN_MAX; | |
1a6e0f06 | 29328 | + |
e4b2b4a8 JK |
29329 | + for (i = 0; i < n_str; i++) { |
29330 | + elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL); | |
29331 | + if (!elt_data->field_var_str[i]) { | |
29332 | + hist_elt_data_free(elt_data); | |
29333 | + return -ENOMEM; | |
29334 | + } | |
29335 | + } | |
1a6e0f06 | 29336 | + |
e4b2b4a8 | 29337 | + elt->private_data = elt_data; |
1a6e0f06 | 29338 | + |
e4b2b4a8 JK |
29339 | return 0; |
29340 | } | |
29341 | ||
29342 | -static void hist_trigger_elt_comm_copy(struct tracing_map_elt *to, | |
29343 | - struct tracing_map_elt *from) | |
29344 | +static void hist_trigger_elt_data_init(struct tracing_map_elt *elt) | |
29345 | { | |
29346 | - char *comm_from = from->private_data; | |
29347 | - char *comm_to = to->private_data; | |
29348 | + struct hist_elt_data *elt_data = elt->private_data; | |
29349 | ||
29350 | - if (comm_from) | |
29351 | - memcpy(comm_to, comm_from, TASK_COMM_LEN + 1); | |
29352 | + if (elt_data->comm) | |
29353 | + save_comm(elt_data->comm, current); | |
29354 | } | |
29355 | ||
29356 | -static void hist_trigger_elt_comm_init(struct tracing_map_elt *elt) | |
29357 | +static const struct tracing_map_ops hist_trigger_elt_data_ops = { | |
29358 | + .elt_alloc = hist_trigger_elt_data_alloc, | |
29359 | + .elt_free = hist_trigger_elt_data_free, | |
29360 | + .elt_init = hist_trigger_elt_data_init, | |
29361 | +}; | |
29362 | + | |
29363 | +static const char *get_hist_field_flags(struct hist_field *hist_field) | |
29364 | { | |
29365 | - char *comm = elt->private_data; | |
29366 | + const char *flags_str = NULL; | |
29367 | ||
29368 | - if (comm) | |
29369 | - save_comm(comm, current); | |
29370 | + if (hist_field->flags & HIST_FIELD_FL_HEX) | |
29371 | + flags_str = "hex"; | |
29372 | + else if (hist_field->flags & HIST_FIELD_FL_SYM) | |
29373 | + flags_str = "sym"; | |
29374 | + else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) | |
29375 | + flags_str = "sym-offset"; | |
29376 | + else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) | |
29377 | + flags_str = "execname"; | |
29378 | + else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) | |
29379 | + flags_str = "syscall"; | |
29380 | + else if (hist_field->flags & HIST_FIELD_FL_LOG2) | |
29381 | + flags_str = "log2"; | |
29382 | + else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS) | |
29383 | + flags_str = "usecs"; | |
29384 | + | |
29385 | + return flags_str; | |
29386 | } | |
29387 | ||
29388 | -static const struct tracing_map_ops hist_trigger_elt_comm_ops = { | |
29389 | - .elt_alloc = hist_trigger_elt_comm_alloc, | |
29390 | - .elt_copy = hist_trigger_elt_comm_copy, | |
29391 | - .elt_free = hist_trigger_elt_comm_free, | |
29392 | - .elt_init = hist_trigger_elt_comm_init, | |
29393 | -}; | |
29394 | +static void expr_field_str(struct hist_field *field, char *expr) | |
1a6e0f06 | 29395 | +{ |
e4b2b4a8 JK |
29396 | + if (field->flags & HIST_FIELD_FL_VAR_REF) |
29397 | + strcat(expr, "$"); | |
29398 | ||
29399 | -static void destroy_hist_field(struct hist_field *hist_field) | |
29400 | + strcat(expr, hist_field_name(field, 0)); | |
1a6e0f06 | 29401 | + |
e4b2b4a8 JK |
29402 | + if (field->flags && !(field->flags & HIST_FIELD_FL_VAR_REF)) { |
29403 | + const char *flags_str = get_hist_field_flags(field); | |
1a6e0f06 | 29404 | + |
e4b2b4a8 JK |
29405 | + if (flags_str) { |
29406 | + strcat(expr, "."); | |
29407 | + strcat(expr, flags_str); | |
29408 | + } | |
29409 | + } | |
1a6e0f06 JK |
29410 | +} |
29411 | + | |
e4b2b4a8 | 29412 | +static char *expr_str(struct hist_field *field, unsigned int level) |
1a6e0f06 | 29413 | +{ |
e4b2b4a8 | 29414 | + char *expr; |
1a6e0f06 | 29415 | + |
e4b2b4a8 JK |
29416 | + if (level > 1) |
29417 | + return NULL; | |
1a6e0f06 | 29418 | + |
e4b2b4a8 JK |
29419 | + expr = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); |
29420 | + if (!expr) | |
29421 | + return NULL; | |
1a6e0f06 | 29422 | + |
e4b2b4a8 JK |
29423 | + if (!field->operands[0]) { |
29424 | + expr_field_str(field, expr); | |
29425 | + return expr; | |
29426 | + } | |
1a6e0f06 | 29427 | + |
e4b2b4a8 JK |
29428 | + if (field->operator == FIELD_OP_UNARY_MINUS) { |
29429 | + char *subexpr; | |
1a6e0f06 | 29430 | + |
e4b2b4a8 JK |
29431 | + strcat(expr, "-("); |
29432 | + subexpr = expr_str(field->operands[0], ++level); | |
29433 | + if (!subexpr) { | |
29434 | + kfree(expr); | |
29435 | + return NULL; | |
1a6e0f06 | 29436 | + } |
e4b2b4a8 JK |
29437 | + strcat(expr, subexpr); |
29438 | + strcat(expr, ")"); | |
1a6e0f06 | 29439 | + |
e4b2b4a8 | 29440 | + kfree(subexpr); |
1a6e0f06 | 29441 | + |
e4b2b4a8 JK |
29442 | + return expr; |
29443 | + } | |
1a6e0f06 | 29444 | + |
e4b2b4a8 | 29445 | + expr_field_str(field->operands[0], expr); |
1a6e0f06 | 29446 | + |
e4b2b4a8 JK |
29447 | + switch (field->operator) { |
29448 | + case FIELD_OP_MINUS: | |
29449 | + strcat(expr, "-"); | |
29450 | + break; | |
29451 | + case FIELD_OP_PLUS: | |
29452 | + strcat(expr, "+"); | |
29453 | + break; | |
29454 | + default: | |
29455 | + kfree(expr); | |
29456 | + return NULL; | |
29457 | + } | |
1a6e0f06 | 29458 | + |
e4b2b4a8 | 29459 | + expr_field_str(field->operands[1], expr); |
1a6e0f06 | 29460 | + |
e4b2b4a8 | 29461 | + return expr; |
1a6e0f06 | 29462 | +} |
1a6e0f06 | 29463 | + |
e4b2b4a8 | 29464 | +static int contains_operator(char *str) |
1a6e0f06 | 29465 | +{ |
e4b2b4a8 JK |
29466 | + enum field_op_id field_op = FIELD_OP_NONE; |
29467 | + char *op; | |
1a6e0f06 | 29468 | + |
e4b2b4a8 JK |
29469 | + op = strpbrk(str, "+-"); |
29470 | + if (!op) | |
29471 | + return FIELD_OP_NONE; | |
1a6e0f06 | 29472 | + |
e4b2b4a8 JK |
29473 | + switch (*op) { |
29474 | + case '-': | |
29475 | + if (*str == '-') | |
29476 | + field_op = FIELD_OP_UNARY_MINUS; | |
29477 | + else | |
29478 | + field_op = FIELD_OP_MINUS; | |
29479 | + break; | |
29480 | + case '+': | |
29481 | + field_op = FIELD_OP_PLUS; | |
29482 | + break; | |
29483 | + default: | |
29484 | + break; | |
1a6e0f06 | 29485 | + } |
1a6e0f06 | 29486 | + |
e4b2b4a8 JK |
29487 | + return field_op; |
29488 | +} | |
1a6e0f06 | 29489 | + |
e4b2b4a8 JK |
29490 | +static void destroy_hist_field(struct hist_field *hist_field, |
29491 | + unsigned int level) | |
29492 | { | |
29493 | + unsigned int i; | |
1a6e0f06 | 29494 | + |
e4b2b4a8 JK |
29495 | + if (level > 3) |
29496 | + return; | |
1a6e0f06 | 29497 | + |
e4b2b4a8 | 29498 | + if (!hist_field) |
1a6e0f06 JK |
29499 | + return; |
29500 | + | |
e4b2b4a8 JK |
29501 | + for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) |
29502 | + destroy_hist_field(hist_field->operands[i], level + 1); | |
1a6e0f06 | 29503 | + |
e4b2b4a8 JK |
29504 | + kfree(hist_field->var.name); |
29505 | + kfree(hist_field->name); | |
29506 | + kfree(hist_field->type); | |
1a6e0f06 | 29507 | + |
e4b2b4a8 JK |
29508 | kfree(hist_field); |
29509 | } | |
29510 | ||
29511 | -static struct hist_field *create_hist_field(struct ftrace_event_field *field, | |
29512 | - unsigned long flags) | |
29513 | +static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, | |
29514 | + struct ftrace_event_field *field, | |
29515 | + unsigned long flags, | |
29516 | + char *var_name) | |
29517 | { | |
29518 | struct hist_field *hist_field; | |
29519 | ||
29520 | @@ -357,8 +2175,22 @@ | |
29521 | if (!hist_field) | |
29522 | return NULL; | |
29523 | ||
29524 | + hist_field->hist_data = hist_data; | |
1a6e0f06 | 29525 | + |
e4b2b4a8 JK |
29526 | + if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS) |
29527 | + goto out; /* caller will populate */ | |
1a6e0f06 | 29528 | + |
e4b2b4a8 JK |
29529 | + if (flags & HIST_FIELD_FL_VAR_REF) { |
29530 | + hist_field->fn = hist_field_var_ref; | |
29531 | + goto out; | |
29532 | + } | |
1a6e0f06 | 29533 | + |
e4b2b4a8 JK |
29534 | if (flags & HIST_FIELD_FL_HITCOUNT) { |
29535 | hist_field->fn = hist_field_counter; | |
29536 | + hist_field->size = sizeof(u64); | |
29537 | + hist_field->type = kstrdup("u64", GFP_KERNEL); | |
29538 | + if (!hist_field->type) | |
29539 | + goto free; | |
29540 | goto out; | |
29541 | } | |
29542 | ||
29543 | @@ -368,7 +2200,31 @@ | |
29544 | } | |
29545 | ||
29546 | if (flags & HIST_FIELD_FL_LOG2) { | |
29547 | + unsigned long fl = flags & ~HIST_FIELD_FL_LOG2; | |
29548 | hist_field->fn = hist_field_log2; | |
29549 | + hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); | |
29550 | + hist_field->size = hist_field->operands[0]->size; | |
29551 | + hist_field->type = kstrdup(hist_field->operands[0]->type, GFP_KERNEL); | |
29552 | + if (!hist_field->type) | |
29553 | + goto free; | |
29554 | + goto out; | |
29555 | + } | |
1a6e0f06 | 29556 | + |
e4b2b4a8 JK |
29557 | + if (flags & HIST_FIELD_FL_TIMESTAMP) { |
29558 | + hist_field->fn = hist_field_timestamp; | |
29559 | + hist_field->size = sizeof(u64); | |
29560 | + hist_field->type = kstrdup("u64", GFP_KERNEL); | |
29561 | + if (!hist_field->type) | |
29562 | + goto free; | |
29563 | + goto out; | |
29564 | + } | |
1a6e0f06 | 29565 | + |
e4b2b4a8 JK |
29566 | + if (flags & HIST_FIELD_FL_CPU) { |
29567 | + hist_field->fn = hist_field_cpu; | |
29568 | + hist_field->size = sizeof(int); | |
29569 | + hist_field->type = kstrdup("unsigned int", GFP_KERNEL); | |
29570 | + if (!hist_field->type) | |
29571 | + goto free; | |
29572 | goto out; | |
29573 | } | |
29574 | ||
29575 | @@ -378,6 +2234,11 @@ | |
29576 | if (is_string_field(field)) { | |
29577 | flags |= HIST_FIELD_FL_STRING; | |
29578 | ||
29579 | + hist_field->size = MAX_FILTER_STR_VAL; | |
29580 | + hist_field->type = kstrdup(field->type, GFP_KERNEL); | |
29581 | + if (!hist_field->type) | |
29582 | + goto free; | |
29583 | + | |
29584 | if (field->filter_type == FILTER_STATIC_STRING) | |
29585 | hist_field->fn = hist_field_string; | |
29586 | else if (field->filter_type == FILTER_DYN_STRING) | |
29587 | @@ -385,10 +2246,16 @@ | |
29588 | else | |
29589 | hist_field->fn = hist_field_pstring; | |
29590 | } else { | |
29591 | + hist_field->size = field->size; | |
29592 | + hist_field->is_signed = field->is_signed; | |
29593 | + hist_field->type = kstrdup(field->type, GFP_KERNEL); | |
29594 | + if (!hist_field->type) | |
29595 | + goto free; | |
29596 | + | |
29597 | hist_field->fn = select_value_fn(field->size, | |
29598 | field->is_signed); | |
29599 | if (!hist_field->fn) { | |
29600 | - destroy_hist_field(hist_field); | |
29601 | + destroy_hist_field(hist_field, 0); | |
29602 | return NULL; | |
29603 | } | |
29604 | } | |
29605 | @@ -396,84 +2263,1636 @@ | |
29606 | hist_field->field = field; | |
29607 | hist_field->flags = flags; | |
29608 | ||
29609 | + if (var_name) { | |
29610 | + hist_field->var.name = kstrdup(var_name, GFP_KERNEL); | |
29611 | + if (!hist_field->var.name) | |
29612 | + goto free; | |
29613 | + } | |
29614 | + | |
29615 | return hist_field; | |
29616 | + free: | |
29617 | + destroy_hist_field(hist_field, 0); | |
29618 | + return NULL; | |
29619 | } | |
29620 | ||
29621 | static void destroy_hist_fields(struct hist_trigger_data *hist_data) | |
29622 | { | |
29623 | unsigned int i; | |
29624 | ||
29625 | - for (i = 0; i < TRACING_MAP_FIELDS_MAX; i++) { | |
29626 | + for (i = 0; i < HIST_FIELDS_MAX; i++) { | |
29627 | if (hist_data->fields[i]) { | |
29628 | - destroy_hist_field(hist_data->fields[i]); | |
29629 | + destroy_hist_field(hist_data->fields[i], 0); | |
29630 | hist_data->fields[i] = NULL; | |
29631 | } | |
29632 | } | |
29633 | } | |
29634 | ||
29635 | -static int create_hitcount_val(struct hist_trigger_data *hist_data) | |
29636 | +static int init_var_ref(struct hist_field *ref_field, | |
29637 | + struct hist_field *var_field, | |
29638 | + char *system, char *event_name) | |
29639 | { | |
29640 | - hist_data->fields[HITCOUNT_IDX] = | |
29641 | - create_hist_field(NULL, HIST_FIELD_FL_HITCOUNT); | |
29642 | - if (!hist_data->fields[HITCOUNT_IDX]) | |
29643 | - return -ENOMEM; | |
29644 | + int err = 0; | |
29645 | ||
29646 | - hist_data->n_vals++; | |
29647 | + ref_field->var.idx = var_field->var.idx; | |
29648 | + ref_field->var.hist_data = var_field->hist_data; | |
29649 | + ref_field->size = var_field->size; | |
29650 | + ref_field->is_signed = var_field->is_signed; | |
29651 | + ref_field->flags |= var_field->flags & | |
29652 | + (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); | |
29653 | ||
29654 | - if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) | |
29655 | + if (system) { | |
29656 | + ref_field->system = kstrdup(system, GFP_KERNEL); | |
29657 | + if (!ref_field->system) | |
29658 | + return -ENOMEM; | |
29659 | + } | |
1a6e0f06 | 29660 | + |
e4b2b4a8 JK |
29661 | + if (event_name) { |
29662 | + ref_field->event_name = kstrdup(event_name, GFP_KERNEL); | |
29663 | + if (!ref_field->event_name) { | |
29664 | + err = -ENOMEM; | |
29665 | + goto free; | |
29666 | + } | |
29667 | + } | |
1a6e0f06 | 29668 | + |
e4b2b4a8 JK |
29669 | + if (var_field->var.name) { |
29670 | + ref_field->name = kstrdup(var_field->var.name, GFP_KERNEL); | |
29671 | + if (!ref_field->name) { | |
29672 | + err = -ENOMEM; | |
29673 | + goto free; | |
29674 | + } | |
29675 | + } else if (var_field->name) { | |
29676 | + ref_field->name = kstrdup(var_field->name, GFP_KERNEL); | |
29677 | + if (!ref_field->name) { | |
29678 | + err = -ENOMEM; | |
29679 | + goto free; | |
29680 | + } | |
29681 | + } | |
1a6e0f06 | 29682 | + |
e4b2b4a8 JK |
29683 | + ref_field->type = kstrdup(var_field->type, GFP_KERNEL); |
29684 | + if (!ref_field->type) { | |
29685 | + err = -ENOMEM; | |
29686 | + goto free; | |
29687 | + } | |
29688 | + out: | |
29689 | + return err; | |
29690 | + free: | |
29691 | + kfree(ref_field->system); | |
29692 | + kfree(ref_field->event_name); | |
29693 | + kfree(ref_field->name); | |
29694 | + | |
29695 | + goto out; | |
1a6e0f06 JK |
29696 | +} |
29697 | + | |
e4b2b4a8 JK |
29698 | +static struct hist_field *create_var_ref(struct hist_field *var_field, |
29699 | + char *system, char *event_name) | |
1a6e0f06 | 29700 | +{ |
e4b2b4a8 JK |
29701 | + unsigned long flags = HIST_FIELD_FL_VAR_REF; |
29702 | + struct hist_field *ref_field; | |
1a6e0f06 | 29703 | + |
e4b2b4a8 JK |
29704 | + ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); |
29705 | + if (ref_field) { | |
29706 | + if (init_var_ref(ref_field, var_field, system, event_name)) { | |
29707 | + destroy_hist_field(ref_field, 0); | |
29708 | + return NULL; | |
29709 | + } | |
29710 | + } | |
1a6e0f06 | 29711 | + |
e4b2b4a8 | 29712 | + return ref_field; |
1a6e0f06 JK |
29713 | +} |
29714 | + | |
e4b2b4a8 | 29715 | +static bool is_var_ref(char *var_name) |
1a6e0f06 | 29716 | +{ |
e4b2b4a8 JK |
29717 | + if (!var_name || strlen(var_name) < 2 || var_name[0] != '$') |
29718 | + return false; | |
1a6e0f06 | 29719 | + |
e4b2b4a8 | 29720 | + return true; |
1a6e0f06 JK |
29721 | +} |
29722 | + | |
e4b2b4a8 JK |
29723 | +static char *field_name_from_var(struct hist_trigger_data *hist_data, |
29724 | + char *var_name) | |
1a6e0f06 | 29725 | +{ |
e4b2b4a8 JK |
29726 | + char *name, *field; |
29727 | + unsigned int i; | |
1a6e0f06 | 29728 | + |
e4b2b4a8 JK |
29729 | + for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { |
29730 | + name = hist_data->attrs->var_defs.name[i]; | |
1a6e0f06 | 29731 | + |
e4b2b4a8 JK |
29732 | + if (strcmp(var_name, name) == 0) { |
29733 | + field = hist_data->attrs->var_defs.expr[i]; | |
29734 | + if (contains_operator(field) || is_var_ref(field)) | |
29735 | + continue; | |
29736 | + return field; | |
1a6e0f06 JK |
29737 | + } |
29738 | + } | |
e4b2b4a8 JK |
29739 | + |
29740 | + return NULL; | |
1a6e0f06 JK |
29741 | +} |
29742 | + | |
e4b2b4a8 JK |
29743 | +static char *local_field_var_ref(struct hist_trigger_data *hist_data, |
29744 | + char *system, char *event_name, | |
29745 | + char *var_name) | |
29746 | +{ | |
29747 | + struct trace_event_call *call; | |
29748 | + | |
29749 | + if (system && event_name) { | |
29750 | + call = hist_data->event_file->event_call; | |
29751 | + | |
29752 | + if (strcmp(system, call->class->system) != 0) | |
29753 | + return NULL; | |
29754 | + | |
29755 | + if (strcmp(event_name, trace_event_name(call)) != 0) | |
29756 | + return NULL; | |
29757 | + } | |
29758 | + | |
29759 | + if (!!system != !!event_name) | |
29760 | + return NULL; | |
29761 | + | |
29762 | + if (!is_var_ref(var_name)) | |
29763 | + return NULL; | |
29764 | + | |
29765 | + var_name++; | |
29766 | + | |
29767 | + return field_name_from_var(hist_data, var_name); | |
1a6e0f06 | 29768 | +} |
e4b2b4a8 JK |
29769 | + |
29770 | +static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data, | |
29771 | + char *system, char *event_name, | |
29772 | + char *var_name) | |
1a6e0f06 | 29773 | +{ |
e4b2b4a8 JK |
29774 | + struct hist_field *var_field = NULL, *ref_field = NULL; |
29775 | + | |
29776 | + if (!is_var_ref(var_name)) | |
29777 | + return NULL; | |
29778 | + | |
29779 | + var_name++; | |
29780 | + | |
29781 | + var_field = find_event_var(hist_data, system, event_name, var_name); | |
29782 | + if (var_field) | |
29783 | + ref_field = create_var_ref(var_field, system, event_name); | |
29784 | + | |
29785 | + if (!ref_field) | |
29786 | + hist_err_event("Couldn't find variable: $", | |
29787 | + system, event_name, var_name); | |
29788 | + | |
29789 | + return ref_field; | |
29790 | +} | |
29791 | + | |
29792 | +static struct ftrace_event_field * | |
29793 | +parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, | |
29794 | + char *field_str, unsigned long *flags) | |
29795 | +{ | |
29796 | + struct ftrace_event_field *field = NULL; | |
29797 | + char *field_name, *modifier, *str; | |
29798 | + | |
29799 | + modifier = str = kstrdup(field_str, GFP_KERNEL); | |
29800 | + if (!modifier) | |
29801 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 29802 | + |
e4b2b4a8 JK |
29803 | + field_name = strsep(&modifier, "."); |
29804 | + if (modifier) { | |
29805 | + if (strcmp(modifier, "hex") == 0) | |
29806 | + *flags |= HIST_FIELD_FL_HEX; | |
29807 | + else if (strcmp(modifier, "sym") == 0) | |
29808 | + *flags |= HIST_FIELD_FL_SYM; | |
29809 | + else if (strcmp(modifier, "sym-offset") == 0) | |
29810 | + *flags |= HIST_FIELD_FL_SYM_OFFSET; | |
29811 | + else if ((strcmp(modifier, "execname") == 0) && | |
29812 | + (strcmp(field_name, "common_pid") == 0)) | |
29813 | + *flags |= HIST_FIELD_FL_EXECNAME; | |
29814 | + else if (strcmp(modifier, "syscall") == 0) | |
29815 | + *flags |= HIST_FIELD_FL_SYSCALL; | |
29816 | + else if (strcmp(modifier, "log2") == 0) | |
29817 | + *flags |= HIST_FIELD_FL_LOG2; | |
29818 | + else if (strcmp(modifier, "usecs") == 0) | |
29819 | + *flags |= HIST_FIELD_FL_TIMESTAMP_USECS; | |
29820 | + else { | |
29821 | + hist_err("Invalid field modifier: ", modifier); | |
29822 | + field = ERR_PTR(-EINVAL); | |
29823 | + goto out; | |
1a6e0f06 | 29824 | + } |
e4b2b4a8 | 29825 | + } |
1a6e0f06 | 29826 | + |
e4b2b4a8 JK |
29827 | + if (strcmp(field_name, "common_timestamp") == 0) { |
29828 | + *flags |= HIST_FIELD_FL_TIMESTAMP; | |
29829 | + hist_data->enable_timestamps = true; | |
29830 | + if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) | |
29831 | + hist_data->attrs->ts_in_usecs = true; | |
29832 | + } else if (strcmp(field_name, "cpu") == 0) | |
29833 | + *flags |= HIST_FIELD_FL_CPU; | |
29834 | + else { | |
29835 | + field = trace_find_event_field(file->event_call, field_name); | |
29836 | + if (!field || !field->size) { | |
29837 | + hist_err("Couldn't find field: ", field_name); | |
29838 | + field = ERR_PTR(-EINVAL); | |
29839 | + goto out; | |
29840 | + } | |
29841 | + } | |
29842 | + out: | |
29843 | + kfree(str); | |
1a6e0f06 | 29844 | + |
e4b2b4a8 JK |
29845 | + return field; |
29846 | +} | |
1a6e0f06 | 29847 | + |
e4b2b4a8 JK |
29848 | +static struct hist_field *create_alias(struct hist_trigger_data *hist_data, |
29849 | + struct hist_field *var_ref, | |
29850 | + char *var_name) | |
1a6e0f06 | 29851 | +{ |
e4b2b4a8 JK |
29852 | + struct hist_field *alias = NULL; |
29853 | + unsigned long flags = HIST_FIELD_FL_ALIAS | HIST_FIELD_FL_VAR; | |
1a6e0f06 | 29854 | + |
e4b2b4a8 JK |
29855 | + alias = create_hist_field(hist_data, NULL, flags, var_name); |
29856 | + if (!alias) | |
29857 | + return NULL; | |
1a6e0f06 | 29858 | + |
e4b2b4a8 JK |
29859 | + alias->fn = var_ref->fn; |
29860 | + alias->operands[0] = var_ref; | |
1a6e0f06 | 29861 | + |
e4b2b4a8 JK |
29862 | + if (init_var_ref(alias, var_ref, var_ref->system, var_ref->event_name)) { |
29863 | + destroy_hist_field(alias, 0); | |
29864 | + return NULL; | |
29865 | + } | |
1a6e0f06 | 29866 | + |
e4b2b4a8 | 29867 | + return alias; |
1a6e0f06 JK |
29868 | +} |
29869 | + | |
e4b2b4a8 JK |
29870 | +static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, |
29871 | + struct trace_event_file *file, char *str, | |
29872 | + unsigned long *flags, char *var_name) | |
29873 | +{ | |
29874 | + char *s, *ref_system = NULL, *ref_event = NULL, *ref_var = str; | |
29875 | + struct ftrace_event_field *field = NULL; | |
29876 | + struct hist_field *hist_field = NULL; | |
29877 | + int ret = 0; | |
1a6e0f06 | 29878 | + |
e4b2b4a8 JK |
29879 | + s = strchr(str, '.'); |
29880 | + if (s) { | |
29881 | + s = strchr(++s, '.'); | |
29882 | + if (s) { | |
29883 | + ref_system = strsep(&str, "."); | |
29884 | + if (!str) { | |
29885 | + ret = -EINVAL; | |
29886 | + goto out; | |
29887 | + } | |
29888 | + ref_event = strsep(&str, "."); | |
29889 | + if (!str) { | |
29890 | + ret = -EINVAL; | |
29891 | + goto out; | |
29892 | + } | |
29893 | + ref_var = str; | |
29894 | + } | |
29895 | + } | |
1a6e0f06 | 29896 | + |
e4b2b4a8 JK |
29897 | + s = local_field_var_ref(hist_data, ref_system, ref_event, ref_var); |
29898 | + if (!s) { | |
29899 | + hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var); | |
29900 | + if (hist_field) { | |
29901 | + hist_data->var_refs[hist_data->n_var_refs] = hist_field; | |
29902 | + hist_field->var_ref_idx = hist_data->n_var_refs++; | |
29903 | + if (var_name) { | |
29904 | + hist_field = create_alias(hist_data, hist_field, var_name); | |
29905 | + if (!hist_field) { | |
29906 | + ret = -ENOMEM; | |
29907 | + goto out; | |
29908 | + } | |
29909 | + } | |
29910 | + return hist_field; | |
29911 | + } | |
29912 | + } else | |
29913 | + str = s; | |
29914 | + | |
29915 | + field = parse_field(hist_data, file, str, flags); | |
29916 | + if (IS_ERR(field)) { | |
29917 | + ret = PTR_ERR(field); | |
29918 | + goto out; | |
29919 | + } | |
29920 | + | |
29921 | + hist_field = create_hist_field(hist_data, field, *flags, var_name); | |
29922 | + if (!hist_field) { | |
29923 | + ret = -ENOMEM; | |
29924 | + goto out; | |
29925 | + } | |
29926 | + | |
29927 | + return hist_field; | |
29928 | + out: | |
29929 | + return ERR_PTR(ret); | |
29930 | +} | |
29931 | + | |
29932 | +static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, | |
29933 | + struct trace_event_file *file, | |
29934 | + char *str, unsigned long flags, | |
29935 | + char *var_name, unsigned int level); | |
29936 | + | |
29937 | +static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, | |
29938 | + struct trace_event_file *file, | |
29939 | + char *str, unsigned long flags, | |
29940 | + char *var_name, unsigned int level) | |
1a6e0f06 | 29941 | +{ |
e4b2b4a8 JK |
29942 | + struct hist_field *operand1, *expr = NULL; |
29943 | + unsigned long operand_flags; | |
29944 | + int ret = 0; | |
29945 | + char *s; | |
29946 | + | |
29947 | + // we support only -(xxx) i.e. explicit parens required | |
29948 | + | |
29949 | + if (level > 3) { | |
29950 | + hist_err("Too many subexpressions (3 max): ", str); | |
29951 | + ret = -EINVAL; | |
29952 | + goto free; | |
1a6e0f06 | 29953 | + } |
e4b2b4a8 JK |
29954 | + |
29955 | + str++; // skip leading '-' | |
29956 | + | |
29957 | + s = strchr(str, '('); | |
29958 | + if (s) | |
29959 | + str++; | |
29960 | + else { | |
29961 | + ret = -EINVAL; | |
29962 | + goto free; | |
29963 | + } | |
29964 | + | |
29965 | + s = strrchr(str, ')'); | |
29966 | + if (s) | |
29967 | + *s = '\0'; | |
29968 | + else { | |
29969 | + ret = -EINVAL; // no closing ')' | |
29970 | + goto free; | |
29971 | + } | |
29972 | + | |
29973 | + flags |= HIST_FIELD_FL_EXPR; | |
29974 | + expr = create_hist_field(hist_data, NULL, flags, var_name); | |
29975 | + if (!expr) { | |
29976 | + ret = -ENOMEM; | |
29977 | + goto free; | |
29978 | + } | |
29979 | + | |
29980 | + operand_flags = 0; | |
29981 | + operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); | |
29982 | + if (IS_ERR(operand1)) { | |
29983 | + ret = PTR_ERR(operand1); | |
29984 | + goto free; | |
29985 | + } | |
29986 | + | |
29987 | + expr->flags |= operand1->flags & | |
29988 | + (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); | |
29989 | + expr->fn = hist_field_unary_minus; | |
29990 | + expr->operands[0] = operand1; | |
29991 | + expr->operator = FIELD_OP_UNARY_MINUS; | |
29992 | + expr->name = expr_str(expr, 0); | |
29993 | + expr->type = kstrdup(operand1->type, GFP_KERNEL); | |
29994 | + if (!expr->type) { | |
29995 | + ret = -ENOMEM; | |
29996 | + goto free; | |
29997 | + } | |
29998 | + | |
29999 | + return expr; | |
30000 | + free: | |
30001 | + destroy_hist_field(expr, 0); | |
30002 | + return ERR_PTR(ret); | |
1a6e0f06 | 30003 | +} |
1a6e0f06 | 30004 | + |
e4b2b4a8 JK |
30005 | +static int check_expr_operands(struct hist_field *operand1, |
30006 | + struct hist_field *operand2) | |
30007 | +{ | |
30008 | + unsigned long operand1_flags = operand1->flags; | |
30009 | + unsigned long operand2_flags = operand2->flags; | |
1a6e0f06 | 30010 | + |
e4b2b4a8 JK |
30011 | + if ((operand1_flags & HIST_FIELD_FL_VAR_REF) || |
30012 | + (operand1_flags & HIST_FIELD_FL_ALIAS)) { | |
30013 | + struct hist_field *var; | |
30014 | + | |
30015 | + var = find_var_field(operand1->var.hist_data, operand1->name); | |
30016 | + if (!var) | |
30017 | + return -EINVAL; | |
30018 | + operand1_flags = var->flags; | |
30019 | + } | |
30020 | + | |
30021 | + if ((operand2_flags & HIST_FIELD_FL_VAR_REF) || | |
30022 | + (operand2_flags & HIST_FIELD_FL_ALIAS)) { | |
30023 | + struct hist_field *var; | |
30024 | + | |
30025 | + var = find_var_field(operand2->var.hist_data, operand2->name); | |
30026 | + if (!var) | |
30027 | + return -EINVAL; | |
30028 | + operand2_flags = var->flags; | |
30029 | + } | |
30030 | + | |
30031 | + if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != | |
30032 | + (operand2_flags & HIST_FIELD_FL_TIMESTAMP_USECS)) { | |
30033 | + hist_err("Timestamp units in expression don't match", NULL); | |
30034 | return -EINVAL; | |
30035 | + } | |
1a6e0f06 | 30036 | |
e4b2b4a8 | 30037 | return 0; |
1a6e0f06 JK |
30038 | } |
30039 | ||
e4b2b4a8 JK |
30040 | -static int create_val_field(struct hist_trigger_data *hist_data, |
30041 | - unsigned int val_idx, | |
30042 | - struct trace_event_file *file, | |
30043 | - char *field_str) | |
30044 | +static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, | |
30045 | + struct trace_event_file *file, | |
30046 | + char *str, unsigned long flags, | |
30047 | + char *var_name, unsigned int level) | |
30048 | { | |
30049 | - struct ftrace_event_field *field = NULL; | |
30050 | - unsigned long flags = 0; | |
30051 | - char *field_name; | |
30052 | + struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL; | |
30053 | + unsigned long operand_flags; | |
30054 | + int field_op, ret = -EINVAL; | |
30055 | + char *sep, *operand1_str; | |
30056 | + | |
30057 | + if (level > 3) { | |
30058 | + hist_err("Too many subexpressions (3 max): ", str); | |
30059 | + return ERR_PTR(-EINVAL); | |
30060 | + } | |
30061 | + | |
30062 | + field_op = contains_operator(str); | |
30063 | + | |
30064 | + if (field_op == FIELD_OP_NONE) | |
30065 | + return parse_atom(hist_data, file, str, &flags, var_name); | |
30066 | + | |
30067 | + if (field_op == FIELD_OP_UNARY_MINUS) | |
30068 | + return parse_unary(hist_data, file, str, flags, var_name, ++level); | |
30069 | + | |
30070 | + switch (field_op) { | |
30071 | + case FIELD_OP_MINUS: | |
30072 | + sep = "-"; | |
30073 | + break; | |
30074 | + case FIELD_OP_PLUS: | |
30075 | + sep = "+"; | |
30076 | + break; | |
30077 | + default: | |
30078 | + goto free; | |
30079 | + } | |
30080 | + | |
30081 | + operand1_str = strsep(&str, sep); | |
30082 | + if (!operand1_str || !str) | |
30083 | + goto free; | |
30084 | + | |
30085 | + operand_flags = 0; | |
30086 | + operand1 = parse_atom(hist_data, file, operand1_str, | |
30087 | + &operand_flags, NULL); | |
30088 | + if (IS_ERR(operand1)) { | |
30089 | + ret = PTR_ERR(operand1); | |
30090 | + operand1 = NULL; | |
30091 | + goto free; | |
30092 | + } | |
30093 | + | |
30094 | + // rest of string could be another expression e.g. b+c in a+b+c | |
30095 | + operand_flags = 0; | |
30096 | + operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); | |
30097 | + if (IS_ERR(operand2)) { | |
30098 | + ret = PTR_ERR(operand2); | |
30099 | + operand2 = NULL; | |
30100 | + goto free; | |
30101 | + } | |
30102 | + | |
30103 | + ret = check_expr_operands(operand1, operand2); | |
30104 | + if (ret) | |
30105 | + goto free; | |
30106 | + | |
30107 | + flags |= HIST_FIELD_FL_EXPR; | |
30108 | + | |
30109 | + flags |= operand1->flags & | |
30110 | + (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); | |
1a6e0f06 | 30111 | + |
e4b2b4a8 JK |
30112 | + expr = create_hist_field(hist_data, NULL, flags, var_name); |
30113 | + if (!expr) { | |
30114 | + ret = -ENOMEM; | |
30115 | + goto free; | |
30116 | + } | |
1a6e0f06 | 30117 | + |
e4b2b4a8 JK |
30118 | + operand1->read_once = true; |
30119 | + operand2->read_once = true; | |
30120 | + | |
30121 | + expr->operands[0] = operand1; | |
30122 | + expr->operands[1] = operand2; | |
30123 | + expr->operator = field_op; | |
30124 | + expr->name = expr_str(expr, 0); | |
30125 | + expr->type = kstrdup(operand1->type, GFP_KERNEL); | |
30126 | + if (!expr->type) { | |
30127 | + ret = -ENOMEM; | |
30128 | + goto free; | |
30129 | + } | |
1a6e0f06 | 30130 | + |
e4b2b4a8 JK |
30131 | + switch (field_op) { |
30132 | + case FIELD_OP_MINUS: | |
30133 | + expr->fn = hist_field_minus; | |
30134 | + break; | |
30135 | + case FIELD_OP_PLUS: | |
30136 | + expr->fn = hist_field_plus; | |
30137 | + break; | |
30138 | + default: | |
30139 | + ret = -EINVAL; | |
30140 | + goto free; | |
30141 | + } | |
30142 | + | |
30143 | + return expr; | |
30144 | + free: | |
30145 | + destroy_hist_field(operand1, 0); | |
30146 | + destroy_hist_field(operand2, 0); | |
30147 | + destroy_hist_field(expr, 0); | |
30148 | + | |
30149 | + return ERR_PTR(ret); | |
30150 | +} | |
30151 | + | |
30152 | +static char *find_trigger_filter(struct hist_trigger_data *hist_data, | |
30153 | + struct trace_event_file *file) | |
1a6e0f06 | 30154 | +{ |
e4b2b4a8 JK |
30155 | + struct event_trigger_data *test; |
30156 | + | |
30157 | + list_for_each_entry_rcu(test, &file->triggers, list) { | |
30158 | + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
30159 | + if (test->private_data == hist_data) | |
30160 | + return test->filter_str; | |
30161 | + } | |
30162 | + } | |
30163 | + | |
30164 | + return NULL; | |
1a6e0f06 JK |
30165 | +} |
30166 | + | |
e4b2b4a8 JK |
30167 | +static struct event_command trigger_hist_cmd; |
30168 | +static int event_hist_trigger_func(struct event_command *cmd_ops, | |
30169 | + struct trace_event_file *file, | |
30170 | + char *glob, char *cmd, char *param); | |
30171 | + | |
30172 | +static bool compatible_keys(struct hist_trigger_data *target_hist_data, | |
30173 | + struct hist_trigger_data *hist_data, | |
30174 | + unsigned int n_keys) | |
1a6e0f06 | 30175 | +{ |
e4b2b4a8 JK |
30176 | + struct hist_field *target_hist_field, *hist_field; |
30177 | + unsigned int n, i, j; | |
30178 | + | |
30179 | + if (hist_data->n_fields - hist_data->n_vals != n_keys) | |
30180 | + return false; | |
30181 | + | |
30182 | + i = hist_data->n_vals; | |
30183 | + j = target_hist_data->n_vals; | |
30184 | + | |
30185 | + for (n = 0; n < n_keys; n++) { | |
30186 | + hist_field = hist_data->fields[i + n]; | |
30187 | + target_hist_field = target_hist_data->fields[j + n]; | |
30188 | + | |
30189 | + if (strcmp(hist_field->type, target_hist_field->type) != 0) | |
30190 | + return false; | |
30191 | + if (hist_field->size != target_hist_field->size) | |
30192 | + return false; | |
30193 | + if (hist_field->is_signed != target_hist_field->is_signed) | |
30194 | + return false; | |
30195 | + } | |
30196 | + | |
30197 | + return true; | |
1a6e0f06 JK |
30198 | +} |
30199 | + | |
e4b2b4a8 JK |
30200 | +static struct hist_trigger_data * |
30201 | +find_compatible_hist(struct hist_trigger_data *target_hist_data, | |
30202 | + struct trace_event_file *file) | |
1a6e0f06 | 30203 | +{ |
e4b2b4a8 JK |
30204 | + struct hist_trigger_data *hist_data; |
30205 | + struct event_trigger_data *test; | |
30206 | + unsigned int n_keys; | |
30207 | + | |
30208 | + n_keys = target_hist_data->n_fields - target_hist_data->n_vals; | |
30209 | + | |
30210 | + list_for_each_entry_rcu(test, &file->triggers, list) { | |
30211 | + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
30212 | + hist_data = test->private_data; | |
30213 | + | |
30214 | + if (compatible_keys(target_hist_data, hist_data, n_keys)) | |
30215 | + return hist_data; | |
30216 | + } | |
30217 | + } | |
30218 | + | |
30219 | + return NULL; | |
1a6e0f06 | 30220 | +} |
1a6e0f06 | 30221 | + |
e4b2b4a8 JK |
30222 | +static struct trace_event_file *event_file(struct trace_array *tr, |
30223 | + char *system, char *event_name) | |
30224 | +{ | |
30225 | + struct trace_event_file *file; | |
30226 | + | |
30227 | + file = find_event_file(tr, system, event_name); | |
30228 | + if (!file) | |
30229 | + return ERR_PTR(-EINVAL); | |
30230 | + | |
30231 | + return file; | |
30232 | +} | |
30233 | + | |
30234 | +static struct hist_field * | |
30235 | +find_synthetic_field_var(struct hist_trigger_data *target_hist_data, | |
30236 | + char *system, char *event_name, char *field_name) | |
30237 | +{ | |
30238 | + struct hist_field *event_var; | |
30239 | + char *synthetic_name; | |
30240 | + | |
30241 | + synthetic_name = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); | |
30242 | + if (!synthetic_name) | |
30243 | + return ERR_PTR(-ENOMEM); | |
30244 | + | |
30245 | + strcpy(synthetic_name, "synthetic_"); | |
30246 | + strcat(synthetic_name, field_name); | |
30247 | + | |
30248 | + event_var = find_event_var(target_hist_data, system, event_name, synthetic_name); | |
30249 | + | |
30250 | + kfree(synthetic_name); | |
30251 | + | |
30252 | + return event_var; | |
30253 | +} | |
1a6e0f06 JK |
30254 | + |
30255 | +/** | |
e4b2b4a8 JK |
30256 | + * create_field_var_hist - Automatically create a histogram and var for a field |
30257 | + * @target_hist_data: The target hist trigger | |
30258 | + * @subsys_name: Optional subsystem name | |
30259 | + * @event_name: Optional event name | |
30260 | + * @field_name: The name of the field (and the resulting variable) | |
1a6e0f06 | 30261 | + * |
e4b2b4a8 JK |
30262 | + * Hist trigger actions fetch data from variables, not directly from |
30263 | + * events. However, for convenience, users are allowed to directly | |
30264 | + * specify an event field in an action, which will be automatically | |
30265 | + * converted into a variable on their behalf. | |
30266 | + | |
30267 | + * If a user specifies a field on an event that isn't the event the | |
30268 | + * histogram currently being defined (the target event histogram), the | |
30269 | + * only way that can be accomplished is if a new hist trigger is | |
30270 | + * created and the field variable defined on that. | |
1a6e0f06 | 30271 | + * |
e4b2b4a8 JK |
30272 | + * This function creates a new histogram compatible with the target |
30273 | + * event (meaning a histogram with the same key as the target | |
30274 | + * histogram), and creates a variable for the specified field, but | |
30275 | + * with 'synthetic_' prepended to the variable name in order to avoid | |
30276 | + * collision with normal field variables. | |
30277 | + * | |
30278 | + * Return: The variable created for the field. | |
1a6e0f06 | 30279 | + */ |
e4b2b4a8 JK |
30280 | +static struct hist_field * |
30281 | +create_field_var_hist(struct hist_trigger_data *target_hist_data, | |
30282 | + char *subsys_name, char *event_name, char *field_name) | |
30283 | +{ | |
30284 | + struct trace_array *tr = target_hist_data->event_file->tr; | |
30285 | + struct hist_field *event_var = ERR_PTR(-EINVAL); | |
30286 | + struct hist_trigger_data *hist_data; | |
30287 | + unsigned int i, n, first = true; | |
30288 | + struct field_var_hist *var_hist; | |
30289 | + struct trace_event_file *file; | |
30290 | + struct hist_field *key_field; | |
30291 | + char *saved_filter; | |
30292 | + char *cmd; | |
30293 | + int ret; | |
1a6e0f06 | 30294 | + |
e4b2b4a8 JK |
30295 | + if (target_hist_data->n_field_var_hists >= SYNTH_FIELDS_MAX) { |
30296 | + hist_err_event("onmatch: Too many field variables defined: ", | |
30297 | + subsys_name, event_name, field_name); | |
30298 | + return ERR_PTR(-EINVAL); | |
30299 | + } | |
1a6e0f06 | 30300 | + |
e4b2b4a8 | 30301 | + file = event_file(tr, subsys_name, event_name); |
1a6e0f06 | 30302 | + |
e4b2b4a8 JK |
30303 | + if (IS_ERR(file)) { |
30304 | + hist_err_event("onmatch: Event file not found: ", | |
30305 | + subsys_name, event_name, field_name); | |
30306 | + ret = PTR_ERR(file); | |
30307 | + return ERR_PTR(ret); | |
1a6e0f06 JK |
30308 | + } |
30309 | + | |
e4b2b4a8 JK |
30310 | + /* |
30311 | + * Look for a histogram compatible with target. We'll use the | |
30312 | + * found histogram specification to create a new matching | |
30313 | + * histogram with our variable on it. target_hist_data is not | |
30314 | + * yet a registered histogram so we can't use that. | |
30315 | + */ | |
30316 | + hist_data = find_compatible_hist(target_hist_data, file); | |
30317 | + if (!hist_data) { | |
30318 | + hist_err_event("onmatch: Matching event histogram not found: ", | |
30319 | + subsys_name, event_name, field_name); | |
30320 | + return ERR_PTR(-EINVAL); | |
1a6e0f06 | 30321 | + } |
1a6e0f06 | 30322 | + |
e4b2b4a8 JK |
30323 | + /* See if a synthetic field variable has already been created */ |
30324 | + event_var = find_synthetic_field_var(target_hist_data, subsys_name, | |
30325 | + event_name, field_name); | |
30326 | + if (!IS_ERR_OR_NULL(event_var)) | |
30327 | + return event_var; | |
1a6e0f06 | 30328 | + |
e4b2b4a8 JK |
30329 | + var_hist = kzalloc(sizeof(*var_hist), GFP_KERNEL); |
30330 | + if (!var_hist) | |
30331 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 30332 | + |
e4b2b4a8 JK |
30333 | + cmd = kzalloc(MAX_FILTER_STR_VAL, GFP_KERNEL); |
30334 | + if (!cmd) { | |
30335 | + kfree(var_hist); | |
30336 | + return ERR_PTR(-ENOMEM); | |
30337 | + } | |
30338 | + | |
30339 | + /* Use the same keys as the compatible histogram */ | |
30340 | + strcat(cmd, "keys="); | |
30341 | + | |
30342 | + for_each_hist_key_field(i, hist_data) { | |
30343 | + key_field = hist_data->fields[i]; | |
30344 | + if (!first) | |
30345 | + strcat(cmd, ","); | |
30346 | + strcat(cmd, key_field->field->name); | |
30347 | + first = false; | |
30348 | + } | |
30349 | + | |
30350 | + /* Create the synthetic field variable specification */ | |
30351 | + strcat(cmd, ":synthetic_"); | |
30352 | + strcat(cmd, field_name); | |
30353 | + strcat(cmd, "="); | |
30354 | + strcat(cmd, field_name); | |
30355 | + | |
30356 | + /* Use the same filter as the compatible histogram */ | |
30357 | + saved_filter = find_trigger_filter(hist_data, file); | |
30358 | + if (saved_filter) { | |
30359 | + strcat(cmd, " if "); | |
30360 | + strcat(cmd, saved_filter); | |
30361 | + } | |
30362 | + | |
30363 | + var_hist->cmd = kstrdup(cmd, GFP_KERNEL); | |
30364 | + if (!var_hist->cmd) { | |
30365 | + kfree(cmd); | |
30366 | + kfree(var_hist); | |
30367 | + return ERR_PTR(-ENOMEM); | |
30368 | + } | |
30369 | + | |
30370 | + /* Save the compatible histogram information */ | |
30371 | + var_hist->hist_data = hist_data; | |
30372 | + | |
30373 | + /* Create the new histogram with our variable */ | |
30374 | + ret = event_hist_trigger_func(&trigger_hist_cmd, file, | |
30375 | + "", "hist", cmd); | |
30376 | + if (ret) { | |
30377 | + kfree(cmd); | |
30378 | + kfree(var_hist->cmd); | |
30379 | + kfree(var_hist); | |
30380 | + hist_err_event("onmatch: Couldn't create histogram for field: ", | |
30381 | + subsys_name, event_name, field_name); | |
30382 | + return ERR_PTR(ret); | |
30383 | + } | |
30384 | + | |
30385 | + kfree(cmd); | |
30386 | + | |
30387 | + /* If we can't find the variable, something went wrong */ | |
30388 | + event_var = find_synthetic_field_var(target_hist_data, subsys_name, | |
30389 | + event_name, field_name); | |
30390 | + if (IS_ERR_OR_NULL(event_var)) { | |
30391 | + kfree(var_hist->cmd); | |
30392 | + kfree(var_hist); | |
30393 | + hist_err_event("onmatch: Couldn't find synthetic variable: ", | |
30394 | + subsys_name, event_name, field_name); | |
30395 | + return ERR_PTR(-EINVAL); | |
1a6e0f06 | 30396 | + } |
e4b2b4a8 JK |
30397 | + |
30398 | + n = target_hist_data->n_field_var_hists; | |
30399 | + target_hist_data->field_var_hists[n] = var_hist; | |
30400 | + target_hist_data->n_field_var_hists++; | |
30401 | + | |
30402 | + return event_var; | |
1a6e0f06 JK |
30403 | +} |
30404 | + | |
e4b2b4a8 JK |
30405 | +static struct hist_field * |
30406 | +find_target_event_var(struct hist_trigger_data *hist_data, | |
30407 | + char *subsys_name, char *event_name, char *var_name) | |
1a6e0f06 | 30408 | +{ |
e4b2b4a8 JK |
30409 | + struct trace_event_file *file = hist_data->event_file; |
30410 | + struct hist_field *hist_field = NULL; | |
1a6e0f06 | 30411 | + |
e4b2b4a8 JK |
30412 | + if (subsys_name) { |
30413 | + struct trace_event_call *call; | |
1a6e0f06 | 30414 | + |
e4b2b4a8 JK |
30415 | + if (!event_name) |
30416 | + return NULL; | |
1a6e0f06 | 30417 | + |
e4b2b4a8 | 30418 | + call = file->event_call; |
1a6e0f06 | 30419 | + |
e4b2b4a8 JK |
30420 | + if (strcmp(subsys_name, call->class->system) != 0) |
30421 | + return NULL; | |
1a6e0f06 | 30422 | + |
e4b2b4a8 JK |
30423 | + if (strcmp(event_name, trace_event_name(call)) != 0) |
30424 | + return NULL; | |
30425 | + } | |
30426 | + | |
30427 | + hist_field = find_var_field(hist_data, var_name); | |
30428 | + | |
30429 | + return hist_field; | |
30430 | +} | |
30431 | + | |
30432 | +static inline void __update_field_vars(struct tracing_map_elt *elt, | |
30433 | + struct ring_buffer_event *rbe, | |
30434 | + void *rec, | |
30435 | + struct field_var **field_vars, | |
30436 | + unsigned int n_field_vars, | |
30437 | + unsigned int field_var_str_start) | |
30438 | +{ | |
30439 | + struct hist_elt_data *elt_data = elt->private_data; | |
30440 | + unsigned int i, j, var_idx; | |
30441 | + u64 var_val; | |
1a6e0f06 | 30442 | + |
e4b2b4a8 JK |
30443 | + for (i = 0, j = field_var_str_start; i < n_field_vars; i++) { |
30444 | + struct field_var *field_var = field_vars[i]; | |
30445 | + struct hist_field *var = field_var->var; | |
30446 | + struct hist_field *val = field_var->val; | |
1a6e0f06 | 30447 | + |
e4b2b4a8 JK |
30448 | + var_val = val->fn(val, elt, rbe, rec); |
30449 | + var_idx = var->var.idx; | |
1a6e0f06 | 30450 | + |
e4b2b4a8 JK |
30451 | + if (val->flags & HIST_FIELD_FL_STRING) { |
30452 | + char *str = elt_data->field_var_str[j++]; | |
30453 | + char *val_str = (char *)(uintptr_t)var_val; | |
1a6e0f06 | 30454 | + |
e4b2b4a8 JK |
30455 | + strscpy(str, val_str, STR_VAR_LEN_MAX); |
30456 | + var_val = (u64)(uintptr_t)str; | |
1a6e0f06 | 30457 | + } |
e4b2b4a8 | 30458 | + tracing_map_set_var(elt, var_idx, var_val); |
1a6e0f06 | 30459 | + } |
1a6e0f06 JK |
30460 | +} |
30461 | + | |
e4b2b4a8 JK |
30462 | +static void update_field_vars(struct hist_trigger_data *hist_data, |
30463 | + struct tracing_map_elt *elt, | |
30464 | + struct ring_buffer_event *rbe, | |
30465 | + void *rec) | |
1a6e0f06 | 30466 | +{ |
e4b2b4a8 JK |
30467 | + __update_field_vars(elt, rbe, rec, hist_data->field_vars, |
30468 | + hist_data->n_field_vars, 0); | |
30469 | +} | |
1a6e0f06 | 30470 | + |
e4b2b4a8 JK |
30471 | +static void update_max_vars(struct hist_trigger_data *hist_data, |
30472 | + struct tracing_map_elt *elt, | |
30473 | + struct ring_buffer_event *rbe, | |
30474 | + void *rec) | |
30475 | +{ | |
30476 | + __update_field_vars(elt, rbe, rec, hist_data->max_vars, | |
30477 | + hist_data->n_max_vars, hist_data->n_field_var_str); | |
1a6e0f06 JK |
30478 | +} |
30479 | + | |
e4b2b4a8 JK |
30480 | +static struct hist_field *create_var(struct hist_trigger_data *hist_data, |
30481 | + struct trace_event_file *file, | |
30482 | + char *name, int size, const char *type) | |
30483 | +{ | |
30484 | + struct hist_field *var; | |
30485 | + int idx; | |
1a6e0f06 | 30486 | + |
e4b2b4a8 JK |
30487 | + if (find_var(hist_data, file, name) && !hist_data->remove) { |
30488 | + var = ERR_PTR(-EINVAL); | |
30489 | + goto out; | |
30490 | + } | |
1a6e0f06 | 30491 | + |
e4b2b4a8 JK |
30492 | + var = kzalloc(sizeof(struct hist_field), GFP_KERNEL); |
30493 | + if (!var) { | |
30494 | + var = ERR_PTR(-ENOMEM); | |
30495 | + goto out; | |
30496 | + } | |
1a6e0f06 | 30497 | + |
e4b2b4a8 JK |
30498 | + idx = tracing_map_add_var(hist_data->map); |
30499 | + if (idx < 0) { | |
30500 | + kfree(var); | |
30501 | + var = ERR_PTR(-EINVAL); | |
30502 | + goto out; | |
30503 | + } | |
1a6e0f06 | 30504 | + |
e4b2b4a8 JK |
30505 | + var->flags = HIST_FIELD_FL_VAR; |
30506 | + var->var.idx = idx; | |
30507 | + var->var.hist_data = var->hist_data = hist_data; | |
30508 | + var->size = size; | |
30509 | + var->var.name = kstrdup(name, GFP_KERNEL); | |
30510 | + var->type = kstrdup(type, GFP_KERNEL); | |
30511 | + if (!var->var.name || !var->type) { | |
30512 | + kfree(var->var.name); | |
30513 | + kfree(var->type); | |
30514 | + kfree(var); | |
30515 | + var = ERR_PTR(-ENOMEM); | |
30516 | + } | |
30517 | + out: | |
30518 | + return var; | |
30519 | +} | |
1a6e0f06 | 30520 | + |
e4b2b4a8 JK |
30521 | +static struct field_var *create_field_var(struct hist_trigger_data *hist_data, |
30522 | + struct trace_event_file *file, | |
30523 | + char *field_name) | |
1a6e0f06 | 30524 | +{ |
e4b2b4a8 JK |
30525 | + struct hist_field *val = NULL, *var = NULL; |
30526 | + unsigned long flags = HIST_FIELD_FL_VAR; | |
30527 | + struct field_var *field_var; | |
30528 | int ret = 0; | |
30529 | ||
30530 | - if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) | |
30531 | + if (hist_data->n_field_vars >= SYNTH_FIELDS_MAX) { | |
30532 | + hist_err("Too many field variables defined: ", field_name); | |
30533 | + ret = -EINVAL; | |
30534 | + goto err; | |
30535 | + } | |
30536 | + | |
30537 | + val = parse_atom(hist_data, file, field_name, &flags, NULL); | |
30538 | + if (IS_ERR(val)) { | |
30539 | + hist_err("Couldn't parse field variable: ", field_name); | |
30540 | + ret = PTR_ERR(val); | |
30541 | + goto err; | |
30542 | + } | |
30543 | + | |
30544 | + var = create_var(hist_data, file, field_name, val->size, val->type); | |
30545 | + if (IS_ERR(var)) { | |
30546 | + hist_err("Couldn't create or find variable: ", field_name); | |
30547 | + kfree(val); | |
30548 | + ret = PTR_ERR(var); | |
30549 | + goto err; | |
30550 | + } | |
30551 | + | |
30552 | + field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); | |
30553 | + if (!field_var) { | |
30554 | + kfree(val); | |
30555 | + kfree(var); | |
30556 | + ret = -ENOMEM; | |
30557 | + goto err; | |
30558 | + } | |
30559 | + | |
30560 | + field_var->var = var; | |
30561 | + field_var->val = val; | |
30562 | + out: | |
30563 | + return field_var; | |
30564 | + err: | |
30565 | + field_var = ERR_PTR(ret); | |
30566 | + goto out; | |
1a6e0f06 JK |
30567 | +} |
30568 | + | |
e4b2b4a8 JK |
30569 | +/** |
30570 | + * create_target_field_var - Automatically create a variable for a field | |
30571 | + * @target_hist_data: The target hist trigger | |
30572 | + * @subsys_name: Optional subsystem name | |
30573 | + * @event_name: Optional event name | |
30574 | + * @var_name: The name of the field (and the resulting variable) | |
30575 | + * | |
30576 | + * Hist trigger actions fetch data from variables, not directly from | |
30577 | + * events. However, for convenience, users are allowed to directly | |
30578 | + * specify an event field in an action, which will be automatically | |
30579 | + * converted into a variable on their behalf. | |
30580 | + | |
30581 | + * This function creates a field variable with the name var_name on | |
30582 | + * the hist trigger currently being defined on the target event. If | |
30583 | + * subsys_name and event_name are specified, this function simply | |
30584 | + * verifies that they do in fact match the target event subsystem and | |
30585 | + * event name. | |
30586 | + * | |
30587 | + * Return: The variable created for the field. | |
1a6e0f06 | 30588 | + */ |
e4b2b4a8 JK |
30589 | +static struct field_var * |
30590 | +create_target_field_var(struct hist_trigger_data *target_hist_data, | |
30591 | + char *subsys_name, char *event_name, char *var_name) | |
1a6e0f06 | 30592 | +{ |
e4b2b4a8 | 30593 | + struct trace_event_file *file = target_hist_data->event_file; |
1a6e0f06 | 30594 | + |
e4b2b4a8 JK |
30595 | + if (subsys_name) { |
30596 | + struct trace_event_call *call; | |
1a6e0f06 | 30597 | + |
e4b2b4a8 JK |
30598 | + if (!event_name) |
30599 | + return NULL; | |
1a6e0f06 | 30600 | + |
e4b2b4a8 JK |
30601 | + call = file->event_call; |
30602 | + | |
30603 | + if (strcmp(subsys_name, call->class->system) != 0) | |
30604 | + return NULL; | |
30605 | + | |
30606 | + if (strcmp(event_name, trace_event_name(call)) != 0) | |
30607 | + return NULL; | |
30608 | + } | |
30609 | + | |
30610 | + return create_field_var(target_hist_data, file, var_name); | |
1a6e0f06 JK |
30611 | +} |
30612 | + | |
e4b2b4a8 JK |
30613 | +static void onmax_print(struct seq_file *m, |
30614 | + struct hist_trigger_data *hist_data, | |
30615 | + struct tracing_map_elt *elt, | |
30616 | + struct action_data *data) | |
1a6e0f06 | 30617 | +{ |
e4b2b4a8 | 30618 | + unsigned int i, save_var_idx, max_idx = data->onmax.max_var->var.idx; |
1a6e0f06 | 30619 | + |
e4b2b4a8 | 30620 | + seq_printf(m, "\n\tmax: %10llu", tracing_map_read_var(elt, max_idx)); |
1a6e0f06 | 30621 | + |
e4b2b4a8 JK |
30622 | + for (i = 0; i < hist_data->n_max_vars; i++) { |
30623 | + struct hist_field *save_val = hist_data->max_vars[i]->val; | |
30624 | + struct hist_field *save_var = hist_data->max_vars[i]->var; | |
30625 | + u64 val; | |
1a6e0f06 | 30626 | + |
e4b2b4a8 | 30627 | + save_var_idx = save_var->var.idx; |
1a6e0f06 | 30628 | + |
e4b2b4a8 | 30629 | + val = tracing_map_read_var(elt, save_var_idx); |
1a6e0f06 | 30630 | + |
e4b2b4a8 JK |
30631 | + if (save_val->flags & HIST_FIELD_FL_STRING) { |
30632 | + seq_printf(m, " %s: %-32s", save_var->var.name, | |
30633 | + (char *)(uintptr_t)(val)); | |
30634 | + } else | |
30635 | + seq_printf(m, " %s: %10llu", save_var->var.name, val); | |
30636 | + } | |
1a6e0f06 JK |
30637 | +} |
30638 | + | |
e4b2b4a8 JK |
30639 | +static void onmax_save(struct hist_trigger_data *hist_data, |
30640 | + struct tracing_map_elt *elt, void *rec, | |
30641 | + struct ring_buffer_event *rbe, | |
30642 | + struct action_data *data, u64 *var_ref_vals) | |
1a6e0f06 | 30643 | +{ |
e4b2b4a8 JK |
30644 | + unsigned int max_idx = data->onmax.max_var->var.idx; |
30645 | + unsigned int max_var_ref_idx = data->onmax.max_var_ref_idx; | |
1a6e0f06 | 30646 | + |
e4b2b4a8 | 30647 | + u64 var_val, max_val; |
1a6e0f06 | 30648 | + |
e4b2b4a8 JK |
30649 | + var_val = var_ref_vals[max_var_ref_idx]; |
30650 | + max_val = tracing_map_read_var(elt, max_idx); | |
30651 | + | |
30652 | + if (var_val <= max_val) | |
30653 | + return; | |
30654 | + | |
30655 | + tracing_map_set_var(elt, max_idx, var_val); | |
30656 | + | |
30657 | + update_max_vars(hist_data, elt, rbe, rec); | |
30658 | +} | |
1a6e0f06 | 30659 | + |
e4b2b4a8 | 30660 | +static void onmax_destroy(struct action_data *data) |
1a6e0f06 | 30661 | +{ |
e4b2b4a8 | 30662 | + unsigned int i; |
1a6e0f06 | 30663 | + |
e4b2b4a8 JK |
30664 | + destroy_hist_field(data->onmax.max_var, 0); |
30665 | + destroy_hist_field(data->onmax.var, 0); | |
1a6e0f06 | 30666 | + |
e4b2b4a8 JK |
30667 | + kfree(data->onmax.var_str); |
30668 | + kfree(data->onmax.fn_name); | |
1a6e0f06 | 30669 | + |
e4b2b4a8 JK |
30670 | + for (i = 0; i < data->n_params; i++) |
30671 | + kfree(data->params[i]); | |
1a6e0f06 | 30672 | + |
e4b2b4a8 JK |
30673 | + kfree(data); |
30674 | +} | |
1a6e0f06 | 30675 | + |
e4b2b4a8 JK |
30676 | +static int onmax_create(struct hist_trigger_data *hist_data, |
30677 | + struct action_data *data) | |
30678 | +{ | |
30679 | + struct trace_event_file *file = hist_data->event_file; | |
30680 | + struct hist_field *var_field, *ref_field, *max_var; | |
30681 | + unsigned int var_ref_idx = hist_data->n_var_refs; | |
30682 | + struct field_var *field_var; | |
30683 | + char *onmax_var_str, *param; | |
30684 | + unsigned long flags; | |
30685 | + unsigned int i; | |
30686 | + int ret = 0; | |
1a6e0f06 | 30687 | + |
e4b2b4a8 JK |
30688 | + onmax_var_str = data->onmax.var_str; |
30689 | + if (onmax_var_str[0] != '$') { | |
30690 | + hist_err("onmax: For onmax(x), x must be a variable: ", onmax_var_str); | |
30691 | return -EINVAL; | |
30692 | + } | |
30693 | + onmax_var_str++; | |
30694 | ||
30695 | - field_name = strsep(&field_str, "."); | |
30696 | - if (field_str) { | |
30697 | - if (strcmp(field_str, "hex") == 0) | |
30698 | - flags |= HIST_FIELD_FL_HEX; | |
30699 | - else { | |
30700 | + var_field = find_target_event_var(hist_data, NULL, NULL, onmax_var_str); | |
30701 | + if (!var_field) { | |
30702 | + hist_err("onmax: Couldn't find onmax variable: ", onmax_var_str); | |
30703 | + return -EINVAL; | |
30704 | + } | |
1a6e0f06 | 30705 | + |
e4b2b4a8 JK |
30706 | + flags = HIST_FIELD_FL_VAR_REF; |
30707 | + ref_field = create_hist_field(hist_data, NULL, flags, NULL); | |
30708 | + if (!ref_field) | |
30709 | + return -ENOMEM; | |
1a6e0f06 | 30710 | + |
e4b2b4a8 JK |
30711 | + if (init_var_ref(ref_field, var_field, NULL, NULL)) { |
30712 | + destroy_hist_field(ref_field, 0); | |
30713 | + ret = -ENOMEM; | |
30714 | + goto out; | |
1a6e0f06 | 30715 | + } |
e4b2b4a8 JK |
30716 | + hist_data->var_refs[hist_data->n_var_refs] = ref_field; |
30717 | + ref_field->var_ref_idx = hist_data->n_var_refs++; | |
30718 | + data->onmax.var = ref_field; | |
30719 | + | |
30720 | + data->fn = onmax_save; | |
30721 | + data->onmax.max_var_ref_idx = var_ref_idx; | |
30722 | + max_var = create_var(hist_data, file, "max", sizeof(u64), "u64"); | |
30723 | + if (IS_ERR(max_var)) { | |
30724 | + hist_err("onmax: Couldn't create onmax variable: ", "max"); | |
30725 | + ret = PTR_ERR(max_var); | |
30726 | + goto out; | |
30727 | + } | |
30728 | + data->onmax.max_var = max_var; | |
1a6e0f06 | 30729 | + |
e4b2b4a8 JK |
30730 | + for (i = 0; i < data->n_params; i++) { |
30731 | + param = kstrdup(data->params[i], GFP_KERNEL); | |
30732 | + if (!param) { | |
30733 | + ret = -ENOMEM; | |
30734 | + goto out; | |
30735 | + } | |
30736 | + | |
30737 | + field_var = create_target_field_var(hist_data, NULL, NULL, param); | |
30738 | + if (IS_ERR(field_var)) { | |
30739 | + hist_err("onmax: Couldn't create field variable: ", param); | |
30740 | + ret = PTR_ERR(field_var); | |
30741 | + kfree(param); | |
30742 | + goto out; | |
30743 | + } | |
30744 | + | |
30745 | + hist_data->max_vars[hist_data->n_max_vars++] = field_var; | |
30746 | + if (field_var->val->flags & HIST_FIELD_FL_STRING) | |
30747 | + hist_data->n_max_var_str++; | |
30748 | + | |
30749 | + kfree(param); | |
1a6e0f06 | 30750 | + } |
e4b2b4a8 JK |
30751 | + out: |
30752 | + return ret; | |
1a6e0f06 JK |
30753 | +} |
30754 | + | |
e4b2b4a8 | 30755 | +static int parse_action_params(char *params, struct action_data *data) |
1a6e0f06 | 30756 | +{ |
e4b2b4a8 JK |
30757 | + char *param, *saved_param; |
30758 | + int ret = 0; | |
1a6e0f06 | 30759 | + |
e4b2b4a8 JK |
30760 | + while (params) { |
30761 | + if (data->n_params >= SYNTH_FIELDS_MAX) | |
30762 | + goto out; | |
1a6e0f06 | 30763 | + |
e4b2b4a8 JK |
30764 | + param = strsep(¶ms, ","); |
30765 | + if (!param) { | |
30766 | + ret = -EINVAL; | |
30767 | + goto out; | |
30768 | + } | |
1a6e0f06 | 30769 | + |
e4b2b4a8 JK |
30770 | + param = strstrip(param); |
30771 | + if (strlen(param) < 2) { | |
30772 | + hist_err("Invalid action param: ", param); | |
30773 | ret = -EINVAL; | |
30774 | goto out; | |
30775 | } | |
1a6e0f06 | 30776 | + |
e4b2b4a8 JK |
30777 | + saved_param = kstrdup(param, GFP_KERNEL); |
30778 | + if (!saved_param) { | |
30779 | + ret = -ENOMEM; | |
30780 | + goto out; | |
30781 | + } | |
30782 | + | |
30783 | + data->params[data->n_params++] = saved_param; | |
30784 | } | |
30785 | + out: | |
30786 | + return ret; | |
30787 | +} | |
30788 | ||
30789 | - field = trace_find_event_field(file->event_call, field_name); | |
30790 | - if (!field || !field->size) { | |
30791 | +static struct action_data *onmax_parse(char *str) | |
1a6e0f06 | 30792 | +{ |
e4b2b4a8 JK |
30793 | + char *onmax_fn_name, *onmax_var_str; |
30794 | + struct action_data *data; | |
30795 | + int ret = -EINVAL; | |
1a6e0f06 | 30796 | + |
e4b2b4a8 JK |
30797 | + data = kzalloc(sizeof(*data), GFP_KERNEL); |
30798 | + if (!data) | |
30799 | + return ERR_PTR(-ENOMEM); | |
1a6e0f06 | 30800 | + |
e4b2b4a8 JK |
30801 | + onmax_var_str = strsep(&str, ")"); |
30802 | + if (!onmax_var_str || !str) { | |
30803 | ret = -EINVAL; | |
30804 | - goto out; | |
30805 | + goto free; | |
30806 | } | |
30807 | ||
30808 | - hist_data->fields[val_idx] = create_hist_field(field, flags); | |
30809 | - if (!hist_data->fields[val_idx]) { | |
30810 | + data->onmax.var_str = kstrdup(onmax_var_str, GFP_KERNEL); | |
30811 | + if (!data->onmax.var_str) { | |
30812 | + ret = -ENOMEM; | |
30813 | + goto free; | |
30814 | + } | |
30815 | + | |
30816 | + strsep(&str, "."); | |
30817 | + if (!str) | |
30818 | + goto free; | |
30819 | + | |
30820 | + onmax_fn_name = strsep(&str, "("); | |
30821 | + if (!onmax_fn_name || !str) | |
30822 | + goto free; | |
30823 | + | |
30824 | + if (strncmp(onmax_fn_name, "save", strlen("save")) == 0) { | |
30825 | + char *params = strsep(&str, ")"); | |
30826 | + | |
30827 | + if (!params) { | |
30828 | + ret = -EINVAL; | |
30829 | + goto free; | |
1a6e0f06 | 30830 | + } |
1a6e0f06 | 30831 | + |
e4b2b4a8 JK |
30832 | + ret = parse_action_params(params, data); |
30833 | + if (ret) | |
30834 | + goto free; | |
30835 | + } else | |
30836 | + goto free; | |
30837 | + | |
30838 | + data->onmax.fn_name = kstrdup(onmax_fn_name, GFP_KERNEL); | |
30839 | + if (!data->onmax.fn_name) { | |
30840 | + ret = -ENOMEM; | |
30841 | + goto free; | |
1a6e0f06 | 30842 | + } |
e4b2b4a8 JK |
30843 | + out: |
30844 | + return data; | |
30845 | + free: | |
30846 | + onmax_destroy(data); | |
30847 | + data = ERR_PTR(ret); | |
30848 | + goto out; | |
1a6e0f06 JK |
30849 | +} |
30850 | + | |
e4b2b4a8 JK |
30851 | +static void onmatch_destroy(struct action_data *data) |
30852 | +{ | |
30853 | + unsigned int i; | |
30854 | + | |
30855 | + mutex_lock(&synth_event_mutex); | |
30856 | + | |
30857 | + kfree(data->onmatch.match_event); | |
30858 | + kfree(data->onmatch.match_event_system); | |
30859 | + kfree(data->onmatch.synth_event_name); | |
30860 | + | |
30861 | + for (i = 0; i < data->n_params; i++) | |
30862 | + kfree(data->params[i]); | |
30863 | + | |
30864 | + if (data->onmatch.synth_event) | |
30865 | + data->onmatch.synth_event->ref--; | |
30866 | + | |
30867 | + kfree(data); | |
30868 | + | |
30869 | + mutex_unlock(&synth_event_mutex); | |
30870 | +} | |
30871 | + | |
30872 | +static void destroy_field_var(struct field_var *field_var) | |
30873 | +{ | |
30874 | + if (!field_var) | |
30875 | + return; | |
30876 | + | |
30877 | + destroy_hist_field(field_var->var, 0); | |
30878 | + destroy_hist_field(field_var->val, 0); | |
30879 | + | |
30880 | + kfree(field_var); | |
1a6e0f06 JK |
30881 | +} |
30882 | + | |
e4b2b4a8 JK |
30883 | +static void destroy_field_vars(struct hist_trigger_data *hist_data) |
30884 | +{ | |
30885 | + unsigned int i; | |
1a6e0f06 | 30886 | + |
e4b2b4a8 JK |
30887 | + for (i = 0; i < hist_data->n_field_vars; i++) |
30888 | + destroy_field_var(hist_data->field_vars[i]); | |
30889 | +} | |
30890 | + | |
30891 | +static void save_field_var(struct hist_trigger_data *hist_data, | |
30892 | + struct field_var *field_var) | |
1a6e0f06 | 30893 | +{ |
e4b2b4a8 | 30894 | + hist_data->field_vars[hist_data->n_field_vars++] = field_var; |
1a6e0f06 | 30895 | + |
e4b2b4a8 JK |
30896 | + if (field_var->val->flags & HIST_FIELD_FL_STRING) |
30897 | + hist_data->n_field_var_str++; | |
30898 | +} | |
1a6e0f06 | 30899 | + |
e4b2b4a8 JK |
30900 | + |
30901 | +static void destroy_synth_var_refs(struct hist_trigger_data *hist_data) | |
30902 | +{ | |
30903 | + unsigned int i; | |
30904 | + | |
30905 | + for (i = 0; i < hist_data->n_synth_var_refs; i++) | |
30906 | + destroy_hist_field(hist_data->synth_var_refs[i], 0); | |
1a6e0f06 | 30907 | +} |
e4b2b4a8 JK |
30908 | + |
30909 | +static void save_synth_var_ref(struct hist_trigger_data *hist_data, | |
30910 | + struct hist_field *var_ref) | |
1a6e0f06 | 30911 | +{ |
e4b2b4a8 JK |
30912 | + hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref; |
30913 | + | |
30914 | + hist_data->var_refs[hist_data->n_var_refs] = var_ref; | |
30915 | + var_ref->var_ref_idx = hist_data->n_var_refs++; | |
1a6e0f06 | 30916 | +} |
1a6e0f06 | 30917 | + |
e4b2b4a8 JK |
30918 | +static int check_synth_field(struct synth_event *event, |
30919 | + struct hist_field *hist_field, | |
30920 | + unsigned int field_pos) | |
1a6e0f06 | 30921 | +{ |
e4b2b4a8 JK |
30922 | + struct synth_field *field; |
30923 | + | |
30924 | + if (field_pos >= event->n_fields) | |
30925 | + return -EINVAL; | |
30926 | + | |
30927 | + field = event->fields[field_pos]; | |
30928 | + | |
30929 | + if (strcmp(field->type, hist_field->type) != 0) | |
30930 | + return -EINVAL; | |
30931 | + | |
30932 | + return 0; | |
1a6e0f06 JK |
30933 | +} |
30934 | + | |
e4b2b4a8 JK |
30935 | +static struct hist_field * |
30936 | +onmatch_find_var(struct hist_trigger_data *hist_data, struct action_data *data, | |
30937 | + char *system, char *event, char *var) | |
30938 | +{ | |
30939 | + struct hist_field *hist_field; | |
30940 | + | |
30941 | + var++; /* skip '$' */ | |
30942 | + | |
30943 | + hist_field = find_target_event_var(hist_data, system, event, var); | |
30944 | + if (!hist_field) { | |
30945 | + if (!system) { | |
30946 | + system = data->onmatch.match_event_system; | |
30947 | + event = data->onmatch.match_event; | |
30948 | + } | |
30949 | + | |
30950 | + hist_field = find_event_var(hist_data, system, event, var); | |
1a6e0f06 JK |
30951 | + } |
30952 | + | |
e4b2b4a8 JK |
30953 | + if (!hist_field) |
30954 | + hist_err_event("onmatch: Couldn't find onmatch param: $", system, event, var); | |
30955 | + | |
30956 | + return hist_field; | |
30957 | +} | |
30958 | + | |
30959 | +static struct hist_field * | |
30960 | +onmatch_create_field_var(struct hist_trigger_data *hist_data, | |
30961 | + struct action_data *data, char *system, | |
30962 | + char *event, char *var) | |
30963 | +{ | |
30964 | + struct hist_field *hist_field = NULL; | |
30965 | + struct field_var *field_var; | |
30966 | + | |
30967 | + /* | |
30968 | + * First try to create a field var on the target event (the | |
30969 | + * currently being defined). This will create a variable for | |
30970 | + * unqualified fields on the target event, or if qualified, | |
30971 | + * target fields that have qualified names matching the target. | |
30972 | + */ | |
30973 | + field_var = create_target_field_var(hist_data, system, event, var); | |
30974 | + | |
30975 | + if (field_var && !IS_ERR(field_var)) { | |
30976 | + save_field_var(hist_data, field_var); | |
30977 | + hist_field = field_var->var; | |
30978 | + } else { | |
30979 | + field_var = NULL; | |
30980 | + /* | |
30981 | + * If no explicit system.event is specfied, default to | |
30982 | + * looking for fields on the onmatch(system.event.xxx) | |
30983 | + * event. | |
30984 | + */ | |
30985 | + if (!system) { | |
30986 | + system = data->onmatch.match_event_system; | |
30987 | + event = data->onmatch.match_event; | |
30988 | + } | |
30989 | + | |
30990 | + /* | |
30991 | + * At this point, we're looking at a field on another | |
30992 | + * event. Because we can't modify a hist trigger on | |
30993 | + * another event to add a variable for a field, we need | |
30994 | + * to create a new trigger on that event and create the | |
30995 | + * variable at the same time. | |
30996 | + */ | |
30997 | + hist_field = create_field_var_hist(hist_data, system, event, var); | |
30998 | + if (IS_ERR(hist_field)) | |
30999 | + goto free; | |
31000 | + } | |
31001 | + out: | |
31002 | + return hist_field; | |
31003 | + free: | |
31004 | + destroy_field_var(field_var); | |
31005 | + hist_field = NULL; | |
31006 | + goto out; | |
31007 | +} | |
31008 | + | |
31009 | +static int onmatch_create(struct hist_trigger_data *hist_data, | |
31010 | + struct trace_event_file *file, | |
31011 | + struct action_data *data) | |
31012 | +{ | |
31013 | + char *event_name, *param, *system = NULL; | |
31014 | + struct hist_field *hist_field, *var_ref; | |
31015 | + unsigned int i, var_ref_idx; | |
31016 | + unsigned int field_pos = 0; | |
31017 | + struct synth_event *event; | |
31018 | + int ret = 0; | |
31019 | + | |
31020 | + mutex_lock(&synth_event_mutex); | |
31021 | + event = find_synth_event(data->onmatch.synth_event_name); | |
31022 | + if (!event) { | |
31023 | + hist_err("onmatch: Couldn't find synthetic event: ", data->onmatch.synth_event_name); | |
31024 | + mutex_unlock(&synth_event_mutex); | |
31025 | + return -EINVAL; | |
31026 | + } | |
31027 | + event->ref++; | |
31028 | + mutex_unlock(&synth_event_mutex); | |
31029 | + | |
31030 | + var_ref_idx = hist_data->n_var_refs; | |
31031 | + | |
31032 | + for (i = 0; i < data->n_params; i++) { | |
31033 | + char *p; | |
31034 | + | |
31035 | + p = param = kstrdup(data->params[i], GFP_KERNEL); | |
31036 | + if (!param) { | |
31037 | + ret = -ENOMEM; | |
31038 | + goto err; | |
31039 | + } | |
31040 | + | |
31041 | + system = strsep(¶m, "."); | |
31042 | + if (!param) { | |
31043 | + param = (char *)system; | |
31044 | + system = event_name = NULL; | |
31045 | + } else { | |
31046 | + event_name = strsep(¶m, "."); | |
31047 | + if (!param) { | |
31048 | + kfree(p); | |
31049 | + ret = -EINVAL; | |
31050 | + goto err; | |
31051 | + } | |
31052 | + } | |
31053 | + | |
31054 | + if (param[0] == '$') | |
31055 | + hist_field = onmatch_find_var(hist_data, data, system, | |
31056 | + event_name, param); | |
31057 | + else | |
31058 | + hist_field = onmatch_create_field_var(hist_data, data, | |
31059 | + system, | |
31060 | + event_name, | |
31061 | + param); | |
31062 | + | |
31063 | + if (!hist_field) { | |
31064 | + kfree(p); | |
31065 | + ret = -EINVAL; | |
31066 | + goto err; | |
31067 | + } | |
31068 | + | |
31069 | + if (check_synth_field(event, hist_field, field_pos) == 0) { | |
31070 | + var_ref = create_var_ref(hist_field, system, event_name); | |
31071 | + if (!var_ref) { | |
31072 | + kfree(p); | |
31073 | + ret = -ENOMEM; | |
31074 | + goto err; | |
31075 | + } | |
31076 | + | |
31077 | + save_synth_var_ref(hist_data, var_ref); | |
31078 | + field_pos++; | |
31079 | + kfree(p); | |
31080 | + continue; | |
31081 | + } | |
31082 | + | |
31083 | + hist_err_event("onmatch: Param type doesn't match synthetic event field type: ", | |
31084 | + system, event_name, param); | |
31085 | + kfree(p); | |
31086 | + ret = -EINVAL; | |
31087 | + goto err; | |
31088 | + } | |
31089 | + | |
31090 | + if (field_pos != event->n_fields) { | |
31091 | + hist_err("onmatch: Param count doesn't match synthetic event field count: ", event->name); | |
31092 | + ret = -EINVAL; | |
31093 | + goto err; | |
31094 | + } | |
31095 | + | |
31096 | + data->fn = action_trace; | |
31097 | + data->onmatch.synth_event = event; | |
31098 | + data->onmatch.var_ref_idx = var_ref_idx; | |
31099 | + out: | |
31100 | + return ret; | |
31101 | + err: | |
31102 | + mutex_lock(&synth_event_mutex); | |
31103 | + event->ref--; | |
31104 | + mutex_unlock(&synth_event_mutex); | |
31105 | + | |
31106 | + goto out; | |
31107 | +} | |
31108 | + | |
31109 | +static struct action_data *onmatch_parse(struct trace_array *tr, char *str) | |
31110 | +{ | |
31111 | + char *match_event, *match_event_system; | |
31112 | + char *synth_event_name, *params; | |
31113 | + struct action_data *data; | |
31114 | + int ret = -EINVAL; | |
31115 | + | |
31116 | + data = kzalloc(sizeof(*data), GFP_KERNEL); | |
31117 | + if (!data) | |
31118 | + return ERR_PTR(-ENOMEM); | |
31119 | + | |
31120 | + match_event = strsep(&str, ")"); | |
31121 | + if (!match_event || !str) { | |
31122 | + hist_err("onmatch: Missing closing paren: ", match_event); | |
31123 | + goto free; | |
31124 | + } | |
31125 | + | |
31126 | + match_event_system = strsep(&match_event, "."); | |
31127 | + if (!match_event) { | |
31128 | + hist_err("onmatch: Missing subsystem for match event: ", match_event_system); | |
31129 | + goto free; | |
31130 | + } | |
31131 | + | |
31132 | + if (IS_ERR(event_file(tr, match_event_system, match_event))) { | |
31133 | + hist_err_event("onmatch: Invalid subsystem or event name: ", | |
31134 | + match_event_system, match_event, NULL); | |
31135 | + goto free; | |
31136 | + } | |
31137 | + | |
31138 | + data->onmatch.match_event = kstrdup(match_event, GFP_KERNEL); | |
31139 | + if (!data->onmatch.match_event) { | |
31140 | + ret = -ENOMEM; | |
31141 | + goto free; | |
31142 | + } | |
31143 | + | |
31144 | + data->onmatch.match_event_system = kstrdup(match_event_system, GFP_KERNEL); | |
31145 | + if (!data->onmatch.match_event_system) { | |
31146 | + ret = -ENOMEM; | |
31147 | + goto free; | |
31148 | + } | |
31149 | + | |
31150 | + strsep(&str, "."); | |
31151 | + if (!str) { | |
31152 | + hist_err("onmatch: Missing . after onmatch(): ", str); | |
31153 | + goto free; | |
31154 | + } | |
31155 | + | |
31156 | + synth_event_name = strsep(&str, "("); | |
31157 | + if (!synth_event_name || !str) { | |
31158 | + hist_err("onmatch: Missing opening paramlist paren: ", synth_event_name); | |
31159 | + goto free; | |
31160 | + } | |
31161 | + | |
31162 | + data->onmatch.synth_event_name = kstrdup(synth_event_name, GFP_KERNEL); | |
31163 | + if (!data->onmatch.synth_event_name) { | |
31164 | ret = -ENOMEM; | |
31165 | + goto free; | |
31166 | + } | |
31167 | + | |
31168 | + params = strsep(&str, ")"); | |
31169 | + if (!params || !str || (str && strlen(str))) { | |
31170 | + hist_err("onmatch: Missing closing paramlist paren: ", params); | |
31171 | + goto free; | |
31172 | + } | |
31173 | + | |
31174 | + ret = parse_action_params(params, data); | |
31175 | + if (ret) | |
31176 | + goto free; | |
31177 | + out: | |
31178 | + return data; | |
31179 | + free: | |
31180 | + onmatch_destroy(data); | |
31181 | + data = ERR_PTR(ret); | |
31182 | + goto out; | |
31183 | +} | |
31184 | + | |
31185 | +static int create_hitcount_val(struct hist_trigger_data *hist_data) | |
31186 | +{ | |
31187 | + hist_data->fields[HITCOUNT_IDX] = | |
31188 | + create_hist_field(hist_data, NULL, HIST_FIELD_FL_HITCOUNT, NULL); | |
31189 | + if (!hist_data->fields[HITCOUNT_IDX]) | |
31190 | + return -ENOMEM; | |
31191 | + | |
31192 | + hist_data->n_vals++; | |
31193 | + hist_data->n_fields++; | |
31194 | + | |
31195 | + if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) | |
31196 | + return -EINVAL; | |
31197 | + | |
31198 | + return 0; | |
31199 | +} | |
31200 | + | |
31201 | +static int __create_val_field(struct hist_trigger_data *hist_data, | |
31202 | + unsigned int val_idx, | |
31203 | + struct trace_event_file *file, | |
31204 | + char *var_name, char *field_str, | |
31205 | + unsigned long flags) | |
31206 | +{ | |
31207 | + struct hist_field *hist_field; | |
31208 | + int ret = 0; | |
31209 | + | |
31210 | + hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); | |
31211 | + if (IS_ERR(hist_field)) { | |
31212 | + ret = PTR_ERR(hist_field); | |
31213 | goto out; | |
1a6e0f06 JK |
31214 | } |
31215 | ||
e4b2b4a8 JK |
31216 | + hist_data->fields[val_idx] = hist_field; |
31217 | + | |
31218 | ++hist_data->n_vals; | |
31219 | + ++hist_data->n_fields; | |
1a6e0f06 | 31220 | |
e4b2b4a8 JK |
31221 | - if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX)) |
31222 | + if (WARN_ON(hist_data->n_vals > TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) | |
31223 | ret = -EINVAL; | |
31224 | out: | |
1a6e0f06 JK |
31225 | return ret; |
31226 | } | |
1a6e0f06 | 31227 | |
e4b2b4a8 JK |
31228 | +static int create_val_field(struct hist_trigger_data *hist_data, |
31229 | + unsigned int val_idx, | |
31230 | + struct trace_event_file *file, | |
31231 | + char *field_str) | |
1a6e0f06 | 31232 | +{ |
e4b2b4a8 JK |
31233 | + if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX)) |
31234 | + return -EINVAL; | |
1a6e0f06 | 31235 | + |
e4b2b4a8 | 31236 | + return __create_val_field(hist_data, val_idx, file, NULL, field_str, 0); |
1a6e0f06 JK |
31237 | +} |
31238 | + | |
e4b2b4a8 JK |
31239 | +static int create_var_field(struct hist_trigger_data *hist_data, |
31240 | + unsigned int val_idx, | |
31241 | + struct trace_event_file *file, | |
31242 | + char *var_name, char *expr_str) | |
1a6e0f06 | 31243 | +{ |
e4b2b4a8 | 31244 | + unsigned long flags = 0; |
1a6e0f06 | 31245 | + |
e4b2b4a8 JK |
31246 | + if (WARN_ON(val_idx >= TRACING_MAP_VALS_MAX + TRACING_MAP_VARS_MAX)) |
31247 | + return -EINVAL; | |
1a6e0f06 | 31248 | + |
e4b2b4a8 JK |
31249 | + if (find_var(hist_data, file, var_name) && !hist_data->remove) { |
31250 | + hist_err("Variable already defined: ", var_name); | |
31251 | + return -EINVAL; | |
31252 | + } | |
31253 | + | |
31254 | + flags |= HIST_FIELD_FL_VAR; | |
31255 | + hist_data->n_vars++; | |
31256 | + if (WARN_ON(hist_data->n_vars > TRACING_MAP_VARS_MAX)) | |
31257 | + return -EINVAL; | |
31258 | + | |
31259 | + return __create_val_field(hist_data, val_idx, file, var_name, expr_str, flags); | |
31260 | +} | |
31261 | + | |
31262 | static int create_val_fields(struct hist_trigger_data *hist_data, | |
31263 | struct trace_event_file *file) | |
31264 | { | |
31265 | char *fields_str, *field_str; | |
31266 | - unsigned int i, j; | |
31267 | + unsigned int i, j = 1; | |
31268 | int ret; | |
1a6e0f06 | 31269 | |
e4b2b4a8 JK |
31270 | ret = create_hitcount_val(hist_data); |
31271 | @@ -493,12 +3912,15 @@ | |
31272 | field_str = strsep(&fields_str, ","); | |
31273 | if (!field_str) | |
31274 | break; | |
31275 | + | |
31276 | if (strcmp(field_str, "hitcount") == 0) | |
31277 | continue; | |
31278 | + | |
31279 | ret = create_val_field(hist_data, j++, file, field_str); | |
31280 | if (ret) | |
31281 | goto out; | |
1a6e0f06 | 31282 | } |
e4b2b4a8 JK |
31283 | + |
31284 | if (fields_str && (strcmp(fields_str, "hitcount") != 0)) | |
31285 | ret = -EINVAL; | |
31286 | out: | |
31287 | @@ -511,12 +3933,13 @@ | |
31288 | struct trace_event_file *file, | |
31289 | char *field_str) | |
31290 | { | |
31291 | - struct ftrace_event_field *field = NULL; | |
31292 | + struct hist_field *hist_field = NULL; | |
31293 | + | |
31294 | unsigned long flags = 0; | |
31295 | unsigned int key_size; | |
31296 | int ret = 0; | |
1a6e0f06 | 31297 | |
e4b2b4a8 JK |
31298 | - if (WARN_ON(key_idx >= TRACING_MAP_FIELDS_MAX)) |
31299 | + if (WARN_ON(key_idx >= HIST_FIELDS_MAX)) | |
31300 | return -EINVAL; | |
1a6e0f06 | 31301 | |
e4b2b4a8 JK |
31302 | flags |= HIST_FIELD_FL_KEY; |
31303 | @@ -524,57 +3947,40 @@ | |
31304 | if (strcmp(field_str, "stacktrace") == 0) { | |
31305 | flags |= HIST_FIELD_FL_STACKTRACE; | |
31306 | key_size = sizeof(unsigned long) * HIST_STACKTRACE_DEPTH; | |
31307 | + hist_field = create_hist_field(hist_data, NULL, flags, NULL); | |
31308 | } else { | |
31309 | - char *field_name = strsep(&field_str, "."); | |
31310 | - | |
31311 | - if (field_str) { | |
31312 | - if (strcmp(field_str, "hex") == 0) | |
31313 | - flags |= HIST_FIELD_FL_HEX; | |
31314 | - else if (strcmp(field_str, "sym") == 0) | |
31315 | - flags |= HIST_FIELD_FL_SYM; | |
31316 | - else if (strcmp(field_str, "sym-offset") == 0) | |
31317 | - flags |= HIST_FIELD_FL_SYM_OFFSET; | |
31318 | - else if ((strcmp(field_str, "execname") == 0) && | |
31319 | - (strcmp(field_name, "common_pid") == 0)) | |
31320 | - flags |= HIST_FIELD_FL_EXECNAME; | |
31321 | - else if (strcmp(field_str, "syscall") == 0) | |
31322 | - flags |= HIST_FIELD_FL_SYSCALL; | |
31323 | - else if (strcmp(field_str, "log2") == 0) | |
31324 | - flags |= HIST_FIELD_FL_LOG2; | |
31325 | - else { | |
31326 | - ret = -EINVAL; | |
31327 | - goto out; | |
31328 | - } | |
31329 | + hist_field = parse_expr(hist_data, file, field_str, flags, | |
31330 | + NULL, 0); | |
31331 | + if (IS_ERR(hist_field)) { | |
31332 | + ret = PTR_ERR(hist_field); | |
31333 | + goto out; | |
31334 | } | |
1a6e0f06 | 31335 | |
e4b2b4a8 JK |
31336 | - field = trace_find_event_field(file->event_call, field_name); |
31337 | - if (!field || !field->size) { | |
31338 | + if (hist_field->flags & HIST_FIELD_FL_VAR_REF) { | |
31339 | + hist_err("Using variable references as keys not supported: ", field_str); | |
31340 | + destroy_hist_field(hist_field, 0); | |
31341 | ret = -EINVAL; | |
31342 | goto out; | |
1a6e0f06 | 31343 | } |
e4b2b4a8 JK |
31344 | |
31345 | - if (is_string_field(field)) | |
31346 | - key_size = MAX_FILTER_STR_VAL; | |
31347 | - else | |
31348 | - key_size = field->size; | |
31349 | + key_size = hist_field->size; | |
1a6e0f06 | 31350 | } |
1a6e0f06 | 31351 | |
e4b2b4a8 JK |
31352 | - hist_data->fields[key_idx] = create_hist_field(field, flags); |
31353 | - if (!hist_data->fields[key_idx]) { | |
31354 | - ret = -ENOMEM; | |
31355 | - goto out; | |
31356 | - } | |
31357 | + hist_data->fields[key_idx] = hist_field; | |
31358 | ||
31359 | key_size = ALIGN(key_size, sizeof(u64)); | |
31360 | hist_data->fields[key_idx]->size = key_size; | |
31361 | hist_data->fields[key_idx]->offset = key_offset; | |
31362 | + | |
31363 | hist_data->key_size += key_size; | |
31364 | + | |
31365 | if (hist_data->key_size > HIST_KEY_SIZE_MAX) { | |
31366 | ret = -EINVAL; | |
31367 | goto out; | |
1a6e0f06 | 31368 | } |
1a6e0f06 | 31369 | |
e4b2b4a8 JK |
31370 | hist_data->n_keys++; |
31371 | + hist_data->n_fields++; | |
1a6e0f06 | 31372 | |
e4b2b4a8 JK |
31373 | if (WARN_ON(hist_data->n_keys > TRACING_MAP_KEYS_MAX)) |
31374 | return -EINVAL; | |
31375 | @@ -618,21 +4024,113 @@ | |
31376 | return ret; | |
1a6e0f06 JK |
31377 | } |
31378 | ||
e4b2b4a8 JK |
31379 | +static int create_var_fields(struct hist_trigger_data *hist_data, |
31380 | + struct trace_event_file *file) | |
31381 | +{ | |
31382 | + unsigned int i, j = hist_data->n_vals; | |
31383 | + int ret = 0; | |
31384 | + | |
31385 | + unsigned int n_vars = hist_data->attrs->var_defs.n_vars; | |
31386 | + | |
31387 | + for (i = 0; i < n_vars; i++) { | |
31388 | + char *var_name = hist_data->attrs->var_defs.name[i]; | |
31389 | + char *expr = hist_data->attrs->var_defs.expr[i]; | |
31390 | + | |
31391 | + ret = create_var_field(hist_data, j++, file, var_name, expr); | |
31392 | + if (ret) | |
31393 | + goto out; | |
31394 | + } | |
31395 | + out: | |
31396 | + return ret; | |
31397 | +} | |
31398 | + | |
31399 | +static void free_var_defs(struct hist_trigger_data *hist_data) | |
31400 | +{ | |
31401 | + unsigned int i; | |
31402 | + | |
31403 | + for (i = 0; i < hist_data->attrs->var_defs.n_vars; i++) { | |
31404 | + kfree(hist_data->attrs->var_defs.name[i]); | |
31405 | + kfree(hist_data->attrs->var_defs.expr[i]); | |
31406 | + } | |
31407 | + | |
31408 | + hist_data->attrs->var_defs.n_vars = 0; | |
31409 | +} | |
31410 | + | |
31411 | +static int parse_var_defs(struct hist_trigger_data *hist_data) | |
31412 | +{ | |
31413 | + char *s, *str, *var_name, *field_str; | |
31414 | + unsigned int i, j, n_vars = 0; | |
31415 | + int ret = 0; | |
31416 | + | |
31417 | + for (i = 0; i < hist_data->attrs->n_assignments; i++) { | |
31418 | + str = hist_data->attrs->assignment_str[i]; | |
31419 | + for (j = 0; j < TRACING_MAP_VARS_MAX; j++) { | |
31420 | + field_str = strsep(&str, ","); | |
31421 | + if (!field_str) | |
31422 | + break; | |
31423 | + | |
31424 | + var_name = strsep(&field_str, "="); | |
31425 | + if (!var_name || !field_str) { | |
31426 | + hist_err("Malformed assignment: ", var_name); | |
31427 | + ret = -EINVAL; | |
31428 | + goto free; | |
31429 | + } | |
31430 | + | |
31431 | + if (n_vars == TRACING_MAP_VARS_MAX) { | |
31432 | + hist_err("Too many variables defined: ", var_name); | |
31433 | + ret = -EINVAL; | |
31434 | + goto free; | |
31435 | + } | |
31436 | + | |
31437 | + s = kstrdup(var_name, GFP_KERNEL); | |
31438 | + if (!s) { | |
31439 | + ret = -ENOMEM; | |
31440 | + goto free; | |
31441 | + } | |
31442 | + hist_data->attrs->var_defs.name[n_vars] = s; | |
31443 | + | |
31444 | + s = kstrdup(field_str, GFP_KERNEL); | |
31445 | + if (!s) { | |
31446 | + kfree(hist_data->attrs->var_defs.name[n_vars]); | |
31447 | + ret = -ENOMEM; | |
31448 | + goto free; | |
31449 | + } | |
31450 | + hist_data->attrs->var_defs.expr[n_vars++] = s; | |
31451 | + | |
31452 | + hist_data->attrs->var_defs.n_vars = n_vars; | |
31453 | + } | |
31454 | + } | |
31455 | + | |
31456 | + return ret; | |
31457 | + free: | |
31458 | + free_var_defs(hist_data); | |
31459 | + | |
31460 | + return ret; | |
31461 | +} | |
31462 | + | |
31463 | static int create_hist_fields(struct hist_trigger_data *hist_data, | |
31464 | struct trace_event_file *file) | |
31465 | { | |
31466 | int ret; | |
1a6e0f06 | 31467 | |
e4b2b4a8 JK |
31468 | + ret = parse_var_defs(hist_data); |
31469 | + if (ret) | |
31470 | + goto out; | |
31471 | + | |
31472 | ret = create_val_fields(hist_data, file); | |
31473 | if (ret) | |
31474 | goto out; | |
1a6e0f06 | 31475 | |
e4b2b4a8 JK |
31476 | - ret = create_key_fields(hist_data, file); |
31477 | + ret = create_var_fields(hist_data, file); | |
31478 | if (ret) | |
31479 | goto out; | |
1a6e0f06 | 31480 | |
e4b2b4a8 JK |
31481 | - hist_data->n_fields = hist_data->n_vals + hist_data->n_keys; |
31482 | + ret = create_key_fields(hist_data, file); | |
31483 | + if (ret) | |
31484 | + goto out; | |
31485 | out: | |
31486 | + free_var_defs(hist_data); | |
31487 | + | |
31488 | return ret; | |
1a6e0f06 JK |
31489 | } |
31490 | ||
e4b2b4a8 JK |
31491 | @@ -653,10 +4151,9 @@ |
31492 | static int create_sort_keys(struct hist_trigger_data *hist_data) | |
1a6e0f06 | 31493 | { |
e4b2b4a8 JK |
31494 | char *fields_str = hist_data->attrs->sort_key_str; |
31495 | - struct ftrace_event_field *field = NULL; | |
31496 | struct tracing_map_sort_key *sort_key; | |
31497 | int descending, ret = 0; | |
31498 | - unsigned int i, j; | |
31499 | + unsigned int i, j, k; | |
1a6e0f06 | 31500 | |
e4b2b4a8 | 31501 | hist_data->n_sort_keys = 1; /* we always have at least one, hitcount */ |
1a6e0f06 | 31502 | |
e4b2b4a8 JK |
31503 | @@ -670,7 +4167,9 @@ |
31504 | } | |
1a6e0f06 | 31505 | |
e4b2b4a8 JK |
31506 | for (i = 0; i < TRACING_MAP_SORT_KEYS_MAX; i++) { |
31507 | + struct hist_field *hist_field; | |
31508 | char *field_str, *field_name; | |
31509 | + const char *test_name; | |
1a6e0f06 | 31510 | |
e4b2b4a8 | 31511 | sort_key = &hist_data->sort_keys[i]; |
1a6e0f06 | 31512 | |
e4b2b4a8 JK |
31513 | @@ -702,10 +4201,19 @@ |
31514 | continue; | |
31515 | } | |
1a6e0f06 | 31516 | |
e4b2b4a8 JK |
31517 | - for (j = 1; j < hist_data->n_fields; j++) { |
31518 | - field = hist_data->fields[j]->field; | |
31519 | - if (field && (strcmp(field_name, field->name) == 0)) { | |
31520 | - sort_key->field_idx = j; | |
31521 | + for (j = 1, k = 1; j < hist_data->n_fields; j++) { | |
31522 | + unsigned int idx; | |
1a6e0f06 | 31523 | + |
e4b2b4a8 JK |
31524 | + hist_field = hist_data->fields[j]; |
31525 | + if (hist_field->flags & HIST_FIELD_FL_VAR) | |
31526 | + continue; | |
1a6e0f06 | 31527 | + |
e4b2b4a8 | 31528 | + idx = k++; |
1a6e0f06 | 31529 | + |
e4b2b4a8 | 31530 | + test_name = hist_field_name(hist_field, 0); |
1a6e0f06 | 31531 | + |
e4b2b4a8 JK |
31532 | + if (strcmp(field_name, test_name) == 0) { |
31533 | + sort_key->field_idx = idx; | |
31534 | descending = is_descending(field_str); | |
31535 | if (descending < 0) { | |
31536 | ret = descending; | |
31537 | @@ -720,16 +4228,230 @@ | |
31538 | break; | |
31539 | } | |
31540 | } | |
31541 | + | |
31542 | hist_data->n_sort_keys = i; | |
31543 | out: | |
31544 | return ret; | |
31545 | } | |
1a6e0f06 | 31546 | |
e4b2b4a8 JK |
31547 | +static void destroy_actions(struct hist_trigger_data *hist_data) |
31548 | +{ | |
31549 | + unsigned int i; | |
1a6e0f06 | 31550 | + |
e4b2b4a8 JK |
31551 | + for (i = 0; i < hist_data->n_actions; i++) { |
31552 | + struct action_data *data = hist_data->actions[i]; | |
1a6e0f06 | 31553 | + |
e4b2b4a8 JK |
31554 | + if (data->fn == action_trace) |
31555 | + onmatch_destroy(data); | |
31556 | + else if (data->fn == onmax_save) | |
31557 | + onmax_destroy(data); | |
31558 | + else | |
31559 | + kfree(data); | |
31560 | + } | |
31561 | +} | |
1a6e0f06 | 31562 | + |
e4b2b4a8 JK |
31563 | +static int parse_actions(struct hist_trigger_data *hist_data) |
31564 | +{ | |
31565 | + struct trace_array *tr = hist_data->event_file->tr; | |
31566 | + struct action_data *data; | |
31567 | + unsigned int i; | |
31568 | + int ret = 0; | |
31569 | + char *str; | |
1a6e0f06 | 31570 | + |
e4b2b4a8 JK |
31571 | + for (i = 0; i < hist_data->attrs->n_actions; i++) { |
31572 | + str = hist_data->attrs->action_str[i]; | |
1a6e0f06 | 31573 | + |
e4b2b4a8 JK |
31574 | + if (strncmp(str, "onmatch(", strlen("onmatch(")) == 0) { |
31575 | + char *action_str = str + strlen("onmatch("); | |
1a6e0f06 | 31576 | + |
e4b2b4a8 JK |
31577 | + data = onmatch_parse(tr, action_str); |
31578 | + if (IS_ERR(data)) { | |
31579 | + ret = PTR_ERR(data); | |
31580 | + break; | |
31581 | + } | |
31582 | + data->fn = action_trace; | |
31583 | + } else if (strncmp(str, "onmax(", strlen("onmax(")) == 0) { | |
31584 | + char *action_str = str + strlen("onmax("); | |
1a6e0f06 | 31585 | + |
e4b2b4a8 JK |
31586 | + data = onmax_parse(action_str); |
31587 | + if (IS_ERR(data)) { | |
31588 | + ret = PTR_ERR(data); | |
31589 | + break; | |
31590 | + } | |
31591 | + data->fn = onmax_save; | |
31592 | + } else { | |
31593 | + ret = -EINVAL; | |
31594 | + break; | |
31595 | + } | |
1a6e0f06 | 31596 | + |
e4b2b4a8 JK |
31597 | + hist_data->actions[hist_data->n_actions++] = data; |
31598 | + } | |
1a6e0f06 | 31599 | + |
e4b2b4a8 JK |
31600 | + return ret; |
31601 | +} | |
1a6e0f06 | 31602 | + |
e4b2b4a8 JK |
31603 | +static int create_actions(struct hist_trigger_data *hist_data, |
31604 | + struct trace_event_file *file) | |
31605 | +{ | |
31606 | + struct action_data *data; | |
31607 | + unsigned int i; | |
31608 | + int ret = 0; | |
1a6e0f06 | 31609 | + |
e4b2b4a8 JK |
31610 | + for (i = 0; i < hist_data->attrs->n_actions; i++) { |
31611 | + data = hist_data->actions[i]; | |
1a6e0f06 | 31612 | + |
e4b2b4a8 JK |
31613 | + if (data->fn == action_trace) { |
31614 | + ret = onmatch_create(hist_data, file, data); | |
31615 | + if (ret) | |
31616 | + return ret; | |
31617 | + } else if (data->fn == onmax_save) { | |
31618 | + ret = onmax_create(hist_data, data); | |
31619 | + if (ret) | |
31620 | + return ret; | |
31621 | + } | |
31622 | + } | |
1a6e0f06 | 31623 | + |
e4b2b4a8 JK |
31624 | + return ret; |
31625 | +} | |
1a6e0f06 | 31626 | + |
e4b2b4a8 JK |
31627 | +static void print_actions(struct seq_file *m, |
31628 | + struct hist_trigger_data *hist_data, | |
31629 | + struct tracing_map_elt *elt) | |
31630 | +{ | |
31631 | + unsigned int i; | |
1a6e0f06 | 31632 | + |
e4b2b4a8 JK |
31633 | + for (i = 0; i < hist_data->n_actions; i++) { |
31634 | + struct action_data *data = hist_data->actions[i]; | |
1a6e0f06 | 31635 | + |
e4b2b4a8 JK |
31636 | + if (data->fn == onmax_save) |
31637 | + onmax_print(m, hist_data, elt, data); | |
31638 | + } | |
31639 | +} | |
1a6e0f06 | 31640 | + |
e4b2b4a8 JK |
31641 | +static void print_onmax_spec(struct seq_file *m, |
31642 | + struct hist_trigger_data *hist_data, | |
31643 | + struct action_data *data) | |
31644 | +{ | |
31645 | + unsigned int i; | |
1a6e0f06 | 31646 | + |
e4b2b4a8 JK |
31647 | + seq_puts(m, ":onmax("); |
31648 | + seq_printf(m, "%s", data->onmax.var_str); | |
31649 | + seq_printf(m, ").%s(", data->onmax.fn_name); | |
1a6e0f06 | 31650 | + |
e4b2b4a8 JK |
31651 | + for (i = 0; i < hist_data->n_max_vars; i++) { |
31652 | + seq_printf(m, "%s", hist_data->max_vars[i]->var->var.name); | |
31653 | + if (i < hist_data->n_max_vars - 1) | |
31654 | + seq_puts(m, ","); | |
31655 | + } | |
31656 | + seq_puts(m, ")"); | |
31657 | +} | |
1a6e0f06 | 31658 | + |
e4b2b4a8 JK |
31659 | +static void print_onmatch_spec(struct seq_file *m, |
31660 | + struct hist_trigger_data *hist_data, | |
31661 | + struct action_data *data) | |
31662 | +{ | |
31663 | + unsigned int i; | |
1a6e0f06 | 31664 | + |
e4b2b4a8 JK |
31665 | + seq_printf(m, ":onmatch(%s.%s).", data->onmatch.match_event_system, |
31666 | + data->onmatch.match_event); | |
1a6e0f06 | 31667 | + |
e4b2b4a8 | 31668 | + seq_printf(m, "%s(", data->onmatch.synth_event->name); |
1a6e0f06 | 31669 | + |
e4b2b4a8 JK |
31670 | + for (i = 0; i < data->n_params; i++) { |
31671 | + if (i) | |
31672 | + seq_puts(m, ","); | |
31673 | + seq_printf(m, "%s", data->params[i]); | |
31674 | + } | |
1a6e0f06 | 31675 | + |
e4b2b4a8 JK |
31676 | + seq_puts(m, ")"); |
31677 | +} | |
1a6e0f06 | 31678 | + |
e4b2b4a8 JK |
31679 | +static bool actions_match(struct hist_trigger_data *hist_data, |
31680 | + struct hist_trigger_data *hist_data_test) | |
1a6e0f06 | 31681 | +{ |
e4b2b4a8 | 31682 | + unsigned int i, j; |
1a6e0f06 | 31683 | + |
e4b2b4a8 JK |
31684 | + if (hist_data->n_actions != hist_data_test->n_actions) |
31685 | + return false; | |
1a6e0f06 | 31686 | + |
e4b2b4a8 JK |
31687 | + for (i = 0; i < hist_data->n_actions; i++) { |
31688 | + struct action_data *data = hist_data->actions[i]; | |
31689 | + struct action_data *data_test = hist_data_test->actions[i]; | |
1a6e0f06 | 31690 | + |
e4b2b4a8 JK |
31691 | + if (data->fn != data_test->fn) |
31692 | + return false; | |
1a6e0f06 | 31693 | + |
e4b2b4a8 JK |
31694 | + if (data->n_params != data_test->n_params) |
31695 | + return false; | |
1a6e0f06 | 31696 | + |
e4b2b4a8 JK |
31697 | + for (j = 0; j < data->n_params; j++) { |
31698 | + if (strcmp(data->params[j], data_test->params[j]) != 0) | |
31699 | + return false; | |
31700 | + } | |
1a6e0f06 | 31701 | + |
e4b2b4a8 JK |
31702 | + if (data->fn == action_trace) { |
31703 | + if (strcmp(data->onmatch.synth_event_name, | |
31704 | + data_test->onmatch.synth_event_name) != 0) | |
31705 | + return false; | |
31706 | + if (strcmp(data->onmatch.match_event_system, | |
31707 | + data_test->onmatch.match_event_system) != 0) | |
31708 | + return false; | |
31709 | + if (strcmp(data->onmatch.match_event, | |
31710 | + data_test->onmatch.match_event) != 0) | |
31711 | + return false; | |
31712 | + } else if (data->fn == onmax_save) { | |
31713 | + if (strcmp(data->onmax.var_str, | |
31714 | + data_test->onmax.var_str) != 0) | |
31715 | + return false; | |
31716 | + if (strcmp(data->onmax.fn_name, | |
31717 | + data_test->onmax.fn_name) != 0) | |
31718 | + return false; | |
1a6e0f06 | 31719 | + } |
1a6e0f06 | 31720 | + } |
1a6e0f06 | 31721 | + |
e4b2b4a8 JK |
31722 | + return true; |
31723 | +} | |
1a6e0f06 | 31724 | + |
1a6e0f06 | 31725 | + |
e4b2b4a8 JK |
31726 | +static void print_actions_spec(struct seq_file *m, |
31727 | + struct hist_trigger_data *hist_data) | |
31728 | +{ | |
31729 | + unsigned int i; | |
1a6e0f06 | 31730 | + |
e4b2b4a8 JK |
31731 | + for (i = 0; i < hist_data->n_actions; i++) { |
31732 | + struct action_data *data = hist_data->actions[i]; | |
1a6e0f06 | 31733 | + |
e4b2b4a8 JK |
31734 | + if (data->fn == action_trace) |
31735 | + print_onmatch_spec(m, hist_data, data); | |
31736 | + else if (data->fn == onmax_save) | |
31737 | + print_onmax_spec(m, hist_data, data); | |
1a6e0f06 | 31738 | + } |
1a6e0f06 JK |
31739 | +} |
31740 | + | |
e4b2b4a8 | 31741 | +static void destroy_field_var_hists(struct hist_trigger_data *hist_data) |
1a6e0f06 | 31742 | +{ |
e4b2b4a8 | 31743 | + unsigned int i; |
1a6e0f06 | 31744 | + |
e4b2b4a8 JK |
31745 | + for (i = 0; i < hist_data->n_field_var_hists; i++) { |
31746 | + kfree(hist_data->field_var_hists[i]->cmd); | |
31747 | + kfree(hist_data->field_var_hists[i]); | |
1a6e0f06 | 31748 | + } |
1a6e0f06 JK |
31749 | +} |
31750 | + | |
e4b2b4a8 JK |
31751 | static void destroy_hist_data(struct hist_trigger_data *hist_data) |
31752 | { | |
31753 | + if (!hist_data) | |
31754 | + return; | |
1a6e0f06 | 31755 | + |
e4b2b4a8 JK |
31756 | destroy_hist_trigger_attrs(hist_data->attrs); |
31757 | destroy_hist_fields(hist_data); | |
31758 | tracing_map_destroy(hist_data->map); | |
1a6e0f06 | 31759 | + |
e4b2b4a8 JK |
31760 | + destroy_actions(hist_data); |
31761 | + destroy_field_vars(hist_data); | |
31762 | + destroy_field_var_hists(hist_data); | |
31763 | + destroy_synth_var_refs(hist_data); | |
31764 | + | |
31765 | kfree(hist_data); | |
31766 | } | |
31767 | ||
31768 | @@ -738,7 +4460,7 @@ | |
31769 | struct tracing_map *map = hist_data->map; | |
31770 | struct ftrace_event_field *field; | |
31771 | struct hist_field *hist_field; | |
31772 | - int i, idx; | |
31773 | + int i, idx = 0; | |
31774 | ||
31775 | for_each_hist_field(i, hist_data) { | |
31776 | hist_field = hist_data->fields[i]; | |
31777 | @@ -749,6 +4471,9 @@ | |
31778 | ||
31779 | if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) | |
31780 | cmp_fn = tracing_map_cmp_none; | |
31781 | + else if (!field) | |
31782 | + cmp_fn = tracing_map_cmp_num(hist_field->size, | |
31783 | + hist_field->is_signed); | |
31784 | else if (is_string_field(field)) | |
31785 | cmp_fn = tracing_map_cmp_string; | |
31786 | else | |
31787 | @@ -757,36 +4482,29 @@ | |
31788 | idx = tracing_map_add_key_field(map, | |
31789 | hist_field->offset, | |
31790 | cmp_fn); | |
31791 | - | |
31792 | - } else | |
31793 | + } else if (!(hist_field->flags & HIST_FIELD_FL_VAR)) | |
31794 | idx = tracing_map_add_sum_field(map); | |
31795 | ||
31796 | if (idx < 0) | |
31797 | return idx; | |
31798 | - } | |
31799 | - | |
31800 | - return 0; | |
31801 | -} | |
31802 | - | |
31803 | -static bool need_tracing_map_ops(struct hist_trigger_data *hist_data) | |
31804 | -{ | |
31805 | - struct hist_field *key_field; | |
31806 | - unsigned int i; | |
31807 | - | |
31808 | - for_each_hist_key_field(i, hist_data) { | |
31809 | - key_field = hist_data->fields[i]; | |
31810 | ||
31811 | - if (key_field->flags & HIST_FIELD_FL_EXECNAME) | |
31812 | - return true; | |
31813 | + if (hist_field->flags & HIST_FIELD_FL_VAR) { | |
31814 | + idx = tracing_map_add_var(map); | |
31815 | + if (idx < 0) | |
31816 | + return idx; | |
31817 | + hist_field->var.idx = idx; | |
31818 | + hist_field->var.hist_data = hist_data; | |
31819 | + } | |
31820 | } | |
31821 | ||
31822 | - return false; | |
1a6e0f06 | 31823 | + return 0; |
e4b2b4a8 JK |
31824 | } |
31825 | ||
31826 | static struct hist_trigger_data * | |
31827 | create_hist_data(unsigned int map_bits, | |
31828 | struct hist_trigger_attrs *attrs, | |
31829 | - struct trace_event_file *file) | |
31830 | + struct trace_event_file *file, | |
31831 | + bool remove) | |
31832 | { | |
31833 | const struct tracing_map_ops *map_ops = NULL; | |
31834 | struct hist_trigger_data *hist_data; | |
31835 | @@ -797,6 +4515,12 @@ | |
31836 | return ERR_PTR(-ENOMEM); | |
31837 | ||
31838 | hist_data->attrs = attrs; | |
31839 | + hist_data->remove = remove; | |
31840 | + hist_data->event_file = file; | |
1a6e0f06 | 31841 | + |
e4b2b4a8 JK |
31842 | + ret = parse_actions(hist_data); |
31843 | + if (ret) | |
31844 | + goto free; | |
31845 | ||
31846 | ret = create_hist_fields(hist_data, file); | |
31847 | if (ret) | |
31848 | @@ -806,8 +4530,7 @@ | |
31849 | if (ret) | |
31850 | goto free; | |
31851 | ||
31852 | - if (need_tracing_map_ops(hist_data)) | |
31853 | - map_ops = &hist_trigger_elt_comm_ops; | |
31854 | + map_ops = &hist_trigger_elt_data_ops; | |
31855 | ||
31856 | hist_data->map = tracing_map_create(map_bits, hist_data->key_size, | |
31857 | map_ops, hist_data); | |
31858 | @@ -820,12 +4543,6 @@ | |
31859 | ret = create_tracing_map_fields(hist_data); | |
31860 | if (ret) | |
31861 | goto free; | |
31862 | - | |
31863 | - ret = tracing_map_init(hist_data->map); | |
31864 | - if (ret) | |
31865 | - goto free; | |
31866 | - | |
31867 | - hist_data->event_file = file; | |
31868 | out: | |
31869 | return hist_data; | |
31870 | free: | |
31871 | @@ -839,18 +4556,39 @@ | |
31872 | } | |
31873 | ||
31874 | static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, | |
31875 | - struct tracing_map_elt *elt, | |
31876 | - void *rec) | |
31877 | + struct tracing_map_elt *elt, void *rec, | |
31878 | + struct ring_buffer_event *rbe, | |
31879 | + u64 *var_ref_vals) | |
31880 | { | |
31881 | + struct hist_elt_data *elt_data; | |
31882 | struct hist_field *hist_field; | |
31883 | - unsigned int i; | |
31884 | + unsigned int i, var_idx; | |
31885 | u64 hist_val; | |
31886 | ||
31887 | + elt_data = elt->private_data; | |
31888 | + elt_data->var_ref_vals = var_ref_vals; | |
31889 | + | |
31890 | for_each_hist_val_field(i, hist_data) { | |
31891 | hist_field = hist_data->fields[i]; | |
31892 | - hist_val = hist_field->fn(hist_field, rec); | |
31893 | + hist_val = hist_field->fn(hist_field, elt, rbe, rec); | |
31894 | + if (hist_field->flags & HIST_FIELD_FL_VAR) { | |
31895 | + var_idx = hist_field->var.idx; | |
31896 | + tracing_map_set_var(elt, var_idx, hist_val); | |
31897 | + continue; | |
31898 | + } | |
31899 | tracing_map_update_sum(elt, i, hist_val); | |
31900 | } | |
31901 | + | |
31902 | + for_each_hist_key_field(i, hist_data) { | |
31903 | + hist_field = hist_data->fields[i]; | |
31904 | + if (hist_field->flags & HIST_FIELD_FL_VAR) { | |
31905 | + hist_val = hist_field->fn(hist_field, elt, rbe, rec); | |
31906 | + var_idx = hist_field->var.idx; | |
31907 | + tracing_map_set_var(elt, var_idx, hist_val); | |
31908 | + } | |
31909 | + } | |
1a6e0f06 | 31910 | + |
e4b2b4a8 JK |
31911 | + update_field_vars(hist_data, elt, rbe, rec); |
31912 | } | |
31913 | ||
31914 | static inline void add_to_key(char *compound_key, void *key, | |
31915 | @@ -877,15 +4615,31 @@ | |
31916 | memcpy(compound_key + key_field->offset, key, size); | |
31917 | } | |
31918 | ||
31919 | -static void event_hist_trigger(struct event_trigger_data *data, void *rec) | |
31920 | +static void | |
31921 | +hist_trigger_actions(struct hist_trigger_data *hist_data, | |
31922 | + struct tracing_map_elt *elt, void *rec, | |
31923 | + struct ring_buffer_event *rbe, u64 *var_ref_vals) | |
1a6e0f06 | 31924 | +{ |
e4b2b4a8 JK |
31925 | + struct action_data *data; |
31926 | + unsigned int i; | |
1a6e0f06 | 31927 | + |
e4b2b4a8 JK |
31928 | + for (i = 0; i < hist_data->n_actions; i++) { |
31929 | + data = hist_data->actions[i]; | |
31930 | + data->fn(hist_data, elt, rec, rbe, data, var_ref_vals); | |
1a6e0f06 | 31931 | + } |
1a6e0f06 JK |
31932 | +} |
31933 | + | |
e4b2b4a8 JK |
31934 | +static void event_hist_trigger(struct event_trigger_data *data, void *rec, |
31935 | + struct ring_buffer_event *rbe) | |
31936 | { | |
31937 | struct hist_trigger_data *hist_data = data->private_data; | |
31938 | bool use_compound_key = (hist_data->n_keys > 1); | |
31939 | unsigned long entries[HIST_STACKTRACE_DEPTH]; | |
31940 | + u64 var_ref_vals[TRACING_MAP_VARS_MAX]; | |
31941 | char compound_key[HIST_KEY_SIZE_MAX]; | |
31942 | + struct tracing_map_elt *elt = NULL; | |
31943 | struct stack_trace stacktrace; | |
31944 | struct hist_field *key_field; | |
31945 | - struct tracing_map_elt *elt; | |
31946 | u64 field_contents; | |
31947 | void *key = NULL; | |
31948 | unsigned int i; | |
31949 | @@ -906,7 +4660,7 @@ | |
31950 | ||
31951 | key = entries; | |
31952 | } else { | |
31953 | - field_contents = key_field->fn(key_field, rec); | |
31954 | + field_contents = key_field->fn(key_field, elt, rbe, rec); | |
31955 | if (key_field->flags & HIST_FIELD_FL_STRING) { | |
31956 | key = (void *)(unsigned long)field_contents; | |
31957 | use_compound_key = true; | |
31958 | @@ -921,9 +4675,18 @@ | |
31959 | if (use_compound_key) | |
31960 | key = compound_key; | |
31961 | ||
31962 | + if (hist_data->n_var_refs && | |
31963 | + !resolve_var_refs(hist_data, key, var_ref_vals, false)) | |
31964 | + return; | |
1a6e0f06 | 31965 | + |
e4b2b4a8 JK |
31966 | elt = tracing_map_insert(hist_data->map, key); |
31967 | - if (elt) | |
31968 | - hist_trigger_elt_update(hist_data, elt, rec); | |
31969 | + if (!elt) | |
31970 | + return; | |
1a6e0f06 | 31971 | + |
e4b2b4a8 JK |
31972 | + hist_trigger_elt_update(hist_data, elt, rec, rbe, var_ref_vals); |
31973 | + | |
31974 | + if (resolve_var_refs(hist_data, key, var_ref_vals, true)) | |
31975 | + hist_trigger_actions(hist_data, elt, rec, rbe, var_ref_vals); | |
31976 | } | |
31977 | ||
31978 | static void hist_trigger_stacktrace_print(struct seq_file *m, | |
31979 | @@ -952,6 +4715,7 @@ | |
31980 | struct hist_field *key_field; | |
31981 | char str[KSYM_SYMBOL_LEN]; | |
31982 | bool multiline = false; | |
31983 | + const char *field_name; | |
31984 | unsigned int i; | |
31985 | u64 uval; | |
31986 | ||
31987 | @@ -963,26 +4727,33 @@ | |
31988 | if (i > hist_data->n_vals) | |
31989 | seq_puts(m, ", "); | |
31990 | ||
31991 | + field_name = hist_field_name(key_field, 0); | |
31992 | + | |
31993 | if (key_field->flags & HIST_FIELD_FL_HEX) { | |
31994 | uval = *(u64 *)(key + key_field->offset); | |
31995 | - seq_printf(m, "%s: %llx", | |
31996 | - key_field->field->name, uval); | |
31997 | + seq_printf(m, "%s: %llx", field_name, uval); | |
31998 | } else if (key_field->flags & HIST_FIELD_FL_SYM) { | |
31999 | uval = *(u64 *)(key + key_field->offset); | |
32000 | sprint_symbol_no_offset(str, uval); | |
32001 | - seq_printf(m, "%s: [%llx] %-45s", | |
32002 | - key_field->field->name, uval, str); | |
32003 | + seq_printf(m, "%s: [%llx] %-45s", field_name, | |
32004 | + uval, str); | |
32005 | } else if (key_field->flags & HIST_FIELD_FL_SYM_OFFSET) { | |
32006 | uval = *(u64 *)(key + key_field->offset); | |
32007 | sprint_symbol(str, uval); | |
32008 | - seq_printf(m, "%s: [%llx] %-55s", | |
32009 | - key_field->field->name, uval, str); | |
32010 | + seq_printf(m, "%s: [%llx] %-55s", field_name, | |
32011 | + uval, str); | |
32012 | } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { | |
32013 | - char *comm = elt->private_data; | |
32014 | + struct hist_elt_data *elt_data = elt->private_data; | |
32015 | + char *comm; | |
32016 | + | |
32017 | + if (WARN_ON_ONCE(!elt_data)) | |
32018 | + return; | |
1a6e0f06 | 32019 | + |
e4b2b4a8 JK |
32020 | + comm = elt_data->comm; |
32021 | ||
32022 | uval = *(u64 *)(key + key_field->offset); | |
32023 | - seq_printf(m, "%s: %-16s[%10llu]", | |
32024 | - key_field->field->name, comm, uval); | |
32025 | + seq_printf(m, "%s: %-16s[%10llu]", field_name, | |
32026 | + comm, uval); | |
32027 | } else if (key_field->flags & HIST_FIELD_FL_SYSCALL) { | |
32028 | const char *syscall_name; | |
32029 | ||
32030 | @@ -991,8 +4762,8 @@ | |
32031 | if (!syscall_name) | |
32032 | syscall_name = "unknown_syscall"; | |
32033 | ||
32034 | - seq_printf(m, "%s: %-30s[%3llu]", | |
32035 | - key_field->field->name, syscall_name, uval); | |
32036 | + seq_printf(m, "%s: %-30s[%3llu]", field_name, | |
32037 | + syscall_name, uval); | |
32038 | } else if (key_field->flags & HIST_FIELD_FL_STACKTRACE) { | |
32039 | seq_puts(m, "stacktrace:\n"); | |
32040 | hist_trigger_stacktrace_print(m, | |
32041 | @@ -1000,15 +4771,14 @@ | |
32042 | HIST_STACKTRACE_DEPTH); | |
32043 | multiline = true; | |
32044 | } else if (key_field->flags & HIST_FIELD_FL_LOG2) { | |
32045 | - seq_printf(m, "%s: ~ 2^%-2llu", key_field->field->name, | |
32046 | + seq_printf(m, "%s: ~ 2^%-2llu", field_name, | |
32047 | *(u64 *)(key + key_field->offset)); | |
32048 | } else if (key_field->flags & HIST_FIELD_FL_STRING) { | |
32049 | - seq_printf(m, "%s: %-50s", key_field->field->name, | |
32050 | + seq_printf(m, "%s: %-50s", field_name, | |
32051 | (char *)(key + key_field->offset)); | |
32052 | } else { | |
32053 | uval = *(u64 *)(key + key_field->offset); | |
32054 | - seq_printf(m, "%s: %10llu", key_field->field->name, | |
32055 | - uval); | |
32056 | + seq_printf(m, "%s: %10llu", field_name, uval); | |
32057 | } | |
32058 | } | |
32059 | ||
32060 | @@ -1021,17 +4791,23 @@ | |
32061 | tracing_map_read_sum(elt, HITCOUNT_IDX)); | |
32062 | ||
32063 | for (i = 1; i < hist_data->n_vals; i++) { | |
32064 | + field_name = hist_field_name(hist_data->fields[i], 0); | |
1a6e0f06 | 32065 | + |
e4b2b4a8 JK |
32066 | + if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR || |
32067 | + hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR) | |
32068 | + continue; | |
1a6e0f06 | 32069 | + |
e4b2b4a8 JK |
32070 | if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { |
32071 | - seq_printf(m, " %s: %10llx", | |
32072 | - hist_data->fields[i]->field->name, | |
32073 | + seq_printf(m, " %s: %10llx", field_name, | |
32074 | tracing_map_read_sum(elt, i)); | |
32075 | } else { | |
32076 | - seq_printf(m, " %s: %10llu", | |
32077 | - hist_data->fields[i]->field->name, | |
32078 | + seq_printf(m, " %s: %10llu", field_name, | |
32079 | tracing_map_read_sum(elt, i)); | |
32080 | } | |
32081 | } | |
32082 | ||
32083 | + print_actions(m, hist_data, elt); | |
1a6e0f06 | 32084 | + |
e4b2b4a8 JK |
32085 | seq_puts(m, "\n"); |
32086 | } | |
32087 | ||
32088 | @@ -1102,6 +4878,11 @@ | |
32089 | hist_trigger_show(m, data, n++); | |
32090 | } | |
32091 | ||
32092 | + if (have_hist_err()) { | |
32093 | + seq_printf(m, "\nERROR: %s\n", hist_err_str); | |
32094 | + seq_printf(m, " Last command: %s\n", last_hist_cmd); | |
32095 | + } | |
1a6e0f06 | 32096 | + |
e4b2b4a8 JK |
32097 | out_unlock: |
32098 | mutex_unlock(&event_mutex); | |
32099 | ||
32100 | @@ -1120,34 +4901,31 @@ | |
32101 | .release = single_release, | |
32102 | }; | |
32103 | ||
32104 | -static const char *get_hist_field_flags(struct hist_field *hist_field) | |
32105 | +static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) | |
32106 | { | |
32107 | - const char *flags_str = NULL; | |
32108 | + const char *field_name = hist_field_name(hist_field, 0); | |
32109 | ||
32110 | - if (hist_field->flags & HIST_FIELD_FL_HEX) | |
32111 | - flags_str = "hex"; | |
32112 | - else if (hist_field->flags & HIST_FIELD_FL_SYM) | |
32113 | - flags_str = "sym"; | |
32114 | - else if (hist_field->flags & HIST_FIELD_FL_SYM_OFFSET) | |
32115 | - flags_str = "sym-offset"; | |
32116 | - else if (hist_field->flags & HIST_FIELD_FL_EXECNAME) | |
32117 | - flags_str = "execname"; | |
32118 | - else if (hist_field->flags & HIST_FIELD_FL_SYSCALL) | |
32119 | - flags_str = "syscall"; | |
32120 | - else if (hist_field->flags & HIST_FIELD_FL_LOG2) | |
32121 | - flags_str = "log2"; | |
32122 | + if (hist_field->var.name) | |
32123 | + seq_printf(m, "%s=", hist_field->var.name); | |
32124 | ||
32125 | - return flags_str; | |
32126 | -} | |
32127 | + if (hist_field->flags & HIST_FIELD_FL_CPU) | |
32128 | + seq_puts(m, "cpu"); | |
32129 | + else if (field_name) { | |
32130 | + if (hist_field->flags & HIST_FIELD_FL_VAR_REF || | |
32131 | + hist_field->flags & HIST_FIELD_FL_ALIAS) | |
32132 | + seq_putc(m, '$'); | |
32133 | + seq_printf(m, "%s", field_name); | |
32134 | + } else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP) | |
32135 | + seq_puts(m, "common_timestamp"); | |
32136 | ||
32137 | -static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) | |
32138 | -{ | |
32139 | - seq_printf(m, "%s", hist_field->field->name); | |
32140 | if (hist_field->flags) { | |
32141 | - const char *flags_str = get_hist_field_flags(hist_field); | |
32142 | + if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) && | |
32143 | + !(hist_field->flags & HIST_FIELD_FL_EXPR)) { | |
32144 | + const char *flags = get_hist_field_flags(hist_field); | |
32145 | ||
32146 | - if (flags_str) | |
32147 | - seq_printf(m, ".%s", flags_str); | |
32148 | + if (flags) | |
32149 | + seq_printf(m, ".%s", flags); | |
32150 | + } | |
32151 | } | |
32152 | } | |
32153 | ||
32154 | @@ -1156,7 +4934,8 @@ | |
32155 | struct event_trigger_data *data) | |
32156 | { | |
32157 | struct hist_trigger_data *hist_data = data->private_data; | |
32158 | - struct hist_field *key_field; | |
32159 | + struct hist_field *field; | |
32160 | + bool have_var = false; | |
32161 | unsigned int i; | |
32162 | ||
32163 | seq_puts(m, "hist:"); | |
32164 | @@ -1167,25 +4946,47 @@ | |
32165 | seq_puts(m, "keys="); | |
32166 | ||
32167 | for_each_hist_key_field(i, hist_data) { | |
32168 | - key_field = hist_data->fields[i]; | |
32169 | + field = hist_data->fields[i]; | |
32170 | ||
32171 | if (i > hist_data->n_vals) | |
32172 | seq_puts(m, ","); | |
32173 | ||
32174 | - if (key_field->flags & HIST_FIELD_FL_STACKTRACE) | |
32175 | + if (field->flags & HIST_FIELD_FL_STACKTRACE) | |
32176 | seq_puts(m, "stacktrace"); | |
32177 | else | |
32178 | - hist_field_print(m, key_field); | |
32179 | + hist_field_print(m, field); | |
32180 | } | |
32181 | ||
32182 | seq_puts(m, ":vals="); | |
32183 | ||
32184 | for_each_hist_val_field(i, hist_data) { | |
32185 | + field = hist_data->fields[i]; | |
32186 | + if (field->flags & HIST_FIELD_FL_VAR) { | |
32187 | + have_var = true; | |
32188 | + continue; | |
1a6e0f06 JK |
32189 | + } |
32190 | + | |
e4b2b4a8 JK |
32191 | if (i == HITCOUNT_IDX) |
32192 | seq_puts(m, "hitcount"); | |
32193 | else { | |
32194 | seq_puts(m, ","); | |
32195 | - hist_field_print(m, hist_data->fields[i]); | |
32196 | + hist_field_print(m, field); | |
32197 | + } | |
1a6e0f06 JK |
32198 | + } |
32199 | + | |
e4b2b4a8 JK |
32200 | + if (have_var) { |
32201 | + unsigned int n = 0; | |
32202 | + | |
32203 | + seq_puts(m, ":"); | |
32204 | + | |
32205 | + for_each_hist_val_field(i, hist_data) { | |
32206 | + field = hist_data->fields[i]; | |
32207 | + | |
32208 | + if (field->flags & HIST_FIELD_FL_VAR) { | |
32209 | + if (n++) | |
32210 | + seq_puts(m, ","); | |
32211 | + hist_field_print(m, field); | |
32212 | + } | |
32213 | } | |
32214 | } | |
32215 | ||
32216 | @@ -1193,28 +4994,36 @@ | |
32217 | ||
32218 | for (i = 0; i < hist_data->n_sort_keys; i++) { | |
32219 | struct tracing_map_sort_key *sort_key; | |
32220 | + unsigned int idx, first_key_idx; | |
32221 | + | |
32222 | + /* skip VAR vals */ | |
32223 | + first_key_idx = hist_data->n_vals - hist_data->n_vars; | |
32224 | ||
32225 | sort_key = &hist_data->sort_keys[i]; | |
32226 | + idx = sort_key->field_idx; | |
32227 | + | |
32228 | + if (WARN_ON(idx >= HIST_FIELDS_MAX)) | |
32229 | + return -EINVAL; | |
32230 | ||
32231 | if (i > 0) | |
32232 | seq_puts(m, ","); | |
32233 | ||
32234 | - if (sort_key->field_idx == HITCOUNT_IDX) | |
32235 | + if (idx == HITCOUNT_IDX) | |
32236 | seq_puts(m, "hitcount"); | |
32237 | else { | |
32238 | - unsigned int idx = sort_key->field_idx; | |
32239 | - | |
32240 | - if (WARN_ON(idx >= TRACING_MAP_FIELDS_MAX)) | |
32241 | - return -EINVAL; | |
32242 | - | |
32243 | + if (idx >= first_key_idx) | |
32244 | + idx += hist_data->n_vars; | |
32245 | hist_field_print(m, hist_data->fields[idx]); | |
32246 | } | |
32247 | ||
32248 | if (sort_key->descending) | |
32249 | seq_puts(m, ".descending"); | |
32250 | } | |
32251 | - | |
32252 | seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits)); | |
32253 | + if (hist_data->enable_timestamps) | |
32254 | + seq_printf(m, ":clock=%s", hist_data->attrs->clock); | |
1a6e0f06 | 32255 | + |
e4b2b4a8 JK |
32256 | + print_actions_spec(m, hist_data); |
32257 | ||
32258 | if (data->filter_str) | |
32259 | seq_printf(m, " if %s", data->filter_str); | |
32260 | @@ -1242,6 +5051,21 @@ | |
32261 | return 0; | |
32262 | } | |
32263 | ||
32264 | +static void unregister_field_var_hists(struct hist_trigger_data *hist_data) | |
1a6e0f06 | 32265 | +{ |
e4b2b4a8 JK |
32266 | + struct trace_event_file *file; |
32267 | + unsigned int i; | |
32268 | + char *cmd; | |
32269 | + int ret; | |
1a6e0f06 | 32270 | + |
e4b2b4a8 JK |
32271 | + for (i = 0; i < hist_data->n_field_var_hists; i++) { |
32272 | + file = hist_data->field_var_hists[i]->hist_data->event_file; | |
32273 | + cmd = hist_data->field_var_hists[i]->cmd; | |
32274 | + ret = event_hist_trigger_func(&trigger_hist_cmd, file, | |
32275 | + "!hist", "hist", cmd); | |
32276 | + } | |
1a6e0f06 JK |
32277 | +} |
32278 | + | |
e4b2b4a8 JK |
32279 | static void event_hist_trigger_free(struct event_trigger_ops *ops, |
32280 | struct event_trigger_data *data) | |
32281 | { | |
32282 | @@ -1254,7 +5078,13 @@ | |
32283 | if (!data->ref) { | |
32284 | if (data->name) | |
32285 | del_named_trigger(data); | |
1a6e0f06 | 32286 | + |
e4b2b4a8 | 32287 | trigger_data_free(data); |
1a6e0f06 | 32288 | + |
e4b2b4a8 | 32289 | + remove_hist_vars(hist_data); |
1a6e0f06 | 32290 | + |
e4b2b4a8 | 32291 | + unregister_field_var_hists(hist_data); |
1a6e0f06 | 32292 | + |
e4b2b4a8 JK |
32293 | destroy_hist_data(hist_data); |
32294 | } | |
32295 | } | |
32296 | @@ -1381,6 +5211,15 @@ | |
32297 | return false; | |
32298 | if (key_field->offset != key_field_test->offset) | |
32299 | return false; | |
32300 | + if (key_field->size != key_field_test->size) | |
32301 | + return false; | |
32302 | + if (key_field->is_signed != key_field_test->is_signed) | |
32303 | + return false; | |
32304 | + if (!!key_field->var.name != !!key_field_test->var.name) | |
32305 | + return false; | |
32306 | + if (key_field->var.name && | |
32307 | + strcmp(key_field->var.name, key_field_test->var.name) != 0) | |
32308 | + return false; | |
32309 | } | |
32310 | ||
32311 | for (i = 0; i < hist_data->n_sort_keys; i++) { | |
32312 | @@ -1396,6 +5235,9 @@ | |
32313 | (strcmp(data->filter_str, data_test->filter_str) != 0)) | |
32314 | return false; | |
32315 | ||
32316 | + if (!actions_match(hist_data, hist_data_test)) | |
32317 | + return false; | |
1a6e0f06 | 32318 | + |
e4b2b4a8 JK |
32319 | return true; |
32320 | } | |
32321 | ||
32322 | @@ -1412,6 +5254,7 @@ | |
32323 | if (named_data) { | |
32324 | if (!hist_trigger_match(data, named_data, named_data, | |
32325 | true)) { | |
32326 | + hist_err("Named hist trigger doesn't match existing named trigger (includes variables): ", hist_data->attrs->name); | |
32327 | ret = -EINVAL; | |
32328 | goto out; | |
32329 | } | |
32330 | @@ -1431,13 +5274,16 @@ | |
32331 | test->paused = false; | |
32332 | else if (hist_data->attrs->clear) | |
32333 | hist_clear(test); | |
32334 | - else | |
32335 | + else { | |
32336 | + hist_err("Hist trigger already exists", NULL); | |
32337 | ret = -EEXIST; | |
32338 | + } | |
32339 | goto out; | |
32340 | } | |
32341 | } | |
32342 | new: | |
32343 | if (hist_data->attrs->cont || hist_data->attrs->clear) { | |
32344 | + hist_err("Can't clear or continue a nonexistent hist trigger", NULL); | |
32345 | ret = -ENOENT; | |
32346 | goto out; | |
32347 | } | |
32348 | @@ -1446,7 +5292,6 @@ | |
32349 | data->paused = true; | |
32350 | ||
32351 | if (named_data) { | |
32352 | - destroy_hist_data(data->private_data); | |
32353 | data->private_data = named_data->private_data; | |
32354 | set_named_trigger_data(data, named_data); | |
32355 | data->ops = &event_hist_trigger_named_ops; | |
32356 | @@ -1458,8 +5303,32 @@ | |
32357 | goto out; | |
32358 | } | |
32359 | ||
32360 | - list_add_rcu(&data->list, &file->triggers); | |
32361 | + if (hist_data->enable_timestamps) { | |
32362 | + char *clock = hist_data->attrs->clock; | |
1a6e0f06 | 32363 | + |
e4b2b4a8 JK |
32364 | + ret = tracing_set_clock(file->tr, hist_data->attrs->clock); |
32365 | + if (ret) { | |
32366 | + hist_err("Couldn't set trace_clock: ", clock); | |
32367 | + goto out; | |
32368 | + } | |
1a6e0f06 | 32369 | + |
e4b2b4a8 | 32370 | + tracing_set_time_stamp_abs(file->tr, true); |
1a6e0f06 JK |
32371 | + } |
32372 | + | |
e4b2b4a8 JK |
32373 | + if (named_data) |
32374 | + destroy_hist_data(hist_data); | |
1a6e0f06 | 32375 | + |
e4b2b4a8 JK |
32376 | ret++; |
32377 | + out: | |
32378 | + return ret; | |
1a6e0f06 | 32379 | +} |
1a6e0f06 | 32380 | + |
e4b2b4a8 JK |
32381 | +static int hist_trigger_enable(struct event_trigger_data *data, |
32382 | + struct trace_event_file *file) | |
1a6e0f06 | 32383 | +{ |
e4b2b4a8 | 32384 | + int ret = 0; |
1a6e0f06 | 32385 | + |
e4b2b4a8 JK |
32386 | + list_add_tail_rcu(&data->list, &file->triggers); |
32387 | ||
32388 | update_cond_flag(file); | |
32389 | ||
32390 | @@ -1468,10 +5337,55 @@ | |
32391 | update_cond_flag(file); | |
32392 | ret--; | |
32393 | } | |
32394 | - out: | |
1a6e0f06 | 32395 | + |
e4b2b4a8 JK |
32396 | return ret; |
32397 | } | |
32398 | ||
32399 | +static bool have_hist_trigger_match(struct event_trigger_data *data, | |
32400 | + struct trace_event_file *file) | |
1a6e0f06 | 32401 | +{ |
e4b2b4a8 JK |
32402 | + struct hist_trigger_data *hist_data = data->private_data; |
32403 | + struct event_trigger_data *test, *named_data = NULL; | |
32404 | + bool match = false; | |
1a6e0f06 | 32405 | + |
e4b2b4a8 JK |
32406 | + if (hist_data->attrs->name) |
32407 | + named_data = find_named_trigger(hist_data->attrs->name); | |
1a6e0f06 | 32408 | + |
e4b2b4a8 JK |
32409 | + list_for_each_entry_rcu(test, &file->triggers, list) { |
32410 | + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
32411 | + if (hist_trigger_match(data, test, named_data, false)) { | |
32412 | + match = true; | |
32413 | + break; | |
1a6e0f06 | 32414 | + } |
1a6e0f06 JK |
32415 | + } |
32416 | + } | |
1a6e0f06 | 32417 | + |
e4b2b4a8 JK |
32418 | + return match; |
32419 | +} | |
1a6e0f06 | 32420 | + |
e4b2b4a8 JK |
32421 | +static bool hist_trigger_check_refs(struct event_trigger_data *data, |
32422 | + struct trace_event_file *file) | |
1a6e0f06 | 32423 | +{ |
e4b2b4a8 JK |
32424 | + struct hist_trigger_data *hist_data = data->private_data; |
32425 | + struct event_trigger_data *test, *named_data = NULL; | |
1a6e0f06 | 32426 | + |
e4b2b4a8 JK |
32427 | + if (hist_data->attrs->name) |
32428 | + named_data = find_named_trigger(hist_data->attrs->name); | |
1a6e0f06 | 32429 | + |
e4b2b4a8 JK |
32430 | + list_for_each_entry_rcu(test, &file->triggers, list) { |
32431 | + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
32432 | + if (!hist_trigger_match(data, test, named_data, false)) | |
32433 | + continue; | |
32434 | + hist_data = test->private_data; | |
32435 | + if (check_var_refs(hist_data)) | |
32436 | + return true; | |
32437 | + break; | |
1a6e0f06 | 32438 | + } |
e4b2b4a8 | 32439 | + } |
1a6e0f06 | 32440 | + |
e4b2b4a8 JK |
32441 | + return false; |
32442 | +} | |
1a6e0f06 | 32443 | + |
e4b2b4a8 JK |
32444 | static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, |
32445 | struct event_trigger_data *data, | |
32446 | struct trace_event_file *file) | |
32447 | @@ -1497,17 +5411,55 @@ | |
32448 | ||
32449 | if (unregistered && test->ops->free) | |
32450 | test->ops->free(test->ops, test); | |
1a6e0f06 | 32451 | + |
e4b2b4a8 JK |
32452 | + if (hist_data->enable_timestamps) { |
32453 | + if (!hist_data->remove || unregistered) | |
32454 | + tracing_set_time_stamp_abs(file->tr, false); | |
32455 | + } | |
32456 | +} | |
1a6e0f06 | 32457 | + |
e4b2b4a8 JK |
32458 | +static bool hist_file_check_refs(struct trace_event_file *file) |
32459 | +{ | |
32460 | + struct hist_trigger_data *hist_data; | |
32461 | + struct event_trigger_data *test; | |
1a6e0f06 | 32462 | + |
e4b2b4a8 JK |
32463 | + list_for_each_entry_rcu(test, &file->triggers, list) { |
32464 | + if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
32465 | + hist_data = test->private_data; | |
32466 | + if (check_var_refs(hist_data)) | |
32467 | + return true; | |
1a6e0f06 | 32468 | + } |
e4b2b4a8 | 32469 | + } |
1a6e0f06 | 32470 | + |
e4b2b4a8 JK |
32471 | + return false; |
32472 | } | |
32473 | ||
32474 | static void hist_unreg_all(struct trace_event_file *file) | |
32475 | { | |
32476 | struct event_trigger_data *test, *n; | |
32477 | + struct hist_trigger_data *hist_data; | |
32478 | + struct synth_event *se; | |
32479 | + const char *se_name; | |
1a6e0f06 | 32480 | + |
e4b2b4a8 JK |
32481 | + if (hist_file_check_refs(file)) |
32482 | + return; | |
32483 | ||
32484 | list_for_each_entry_safe(test, n, &file->triggers, list) { | |
32485 | if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { | |
32486 | + hist_data = test->private_data; | |
32487 | list_del_rcu(&test->list); | |
32488 | trace_event_trigger_enable_disable(file, 0); | |
32489 | + | |
32490 | + mutex_lock(&synth_event_mutex); | |
32491 | + se_name = trace_event_name(file->event_call); | |
32492 | + se = find_synth_event(se_name); | |
32493 | + if (se) | |
32494 | + se->ref--; | |
32495 | + mutex_unlock(&synth_event_mutex); | |
32496 | + | |
32497 | update_cond_flag(file); | |
32498 | + if (hist_data->enable_timestamps) | |
32499 | + tracing_set_time_stamp_abs(file->tr, false); | |
32500 | if (test->ops->free) | |
32501 | test->ops->free(test->ops, test); | |
32502 | } | |
32503 | @@ -1523,16 +5475,54 @@ | |
32504 | struct hist_trigger_attrs *attrs; | |
32505 | struct event_trigger_ops *trigger_ops; | |
32506 | struct hist_trigger_data *hist_data; | |
32507 | - char *trigger; | |
32508 | + struct synth_event *se; | |
32509 | + const char *se_name; | |
32510 | + bool remove = false; | |
32511 | + char *trigger, *p; | |
32512 | int ret = 0; | |
32513 | ||
32514 | + if (glob && strlen(glob)) { | |
32515 | + last_cmd_set(param); | |
32516 | + hist_err_clear(); | |
32517 | + } | |
1a6e0f06 | 32518 | + |
e4b2b4a8 JK |
32519 | if (!param) |
32520 | return -EINVAL; | |
32521 | ||
32522 | - /* separate the trigger from the filter (k:v [if filter]) */ | |
32523 | - trigger = strsep(¶m, " \t"); | |
32524 | - if (!trigger) | |
32525 | - return -EINVAL; | |
32526 | + if (glob[0] == '!') | |
32527 | + remove = true; | |
1a6e0f06 | 32528 | + |
e4b2b4a8 JK |
32529 | + /* |
32530 | + * separate the trigger from the filter (k:v [if filter]) | |
32531 | + * allowing for whitespace in the trigger | |
32532 | + */ | |
32533 | + p = trigger = param; | |
32534 | + do { | |
32535 | + p = strstr(p, "if"); | |
32536 | + if (!p) | |
32537 | + break; | |
32538 | + if (p == param) | |
32539 | + return -EINVAL; | |
32540 | + if (*(p - 1) != ' ' && *(p - 1) != '\t') { | |
32541 | + p++; | |
32542 | + continue; | |
1a6e0f06 | 32543 | + } |
e4b2b4a8 JK |
32544 | + if (p >= param + strlen(param) - strlen("if") - 1) |
32545 | + return -EINVAL; | |
32546 | + if (*(p + strlen("if")) != ' ' && *(p + strlen("if")) != '\t') { | |
32547 | + p++; | |
32548 | + continue; | |
32549 | + } | |
32550 | + break; | |
32551 | + } while (p); | |
32552 | + | |
32553 | + if (!p) | |
32554 | + param = NULL; | |
32555 | + else { | |
32556 | + *(p - 1) = '\0'; | |
32557 | + param = strstrip(p); | |
32558 | + trigger = strstrip(trigger); | |
1a6e0f06 | 32559 | + } |
e4b2b4a8 JK |
32560 | |
32561 | attrs = parse_hist_trigger_attrs(trigger); | |
32562 | if (IS_ERR(attrs)) | |
32563 | @@ -1541,7 +5531,7 @@ | |
32564 | if (attrs->map_bits) | |
32565 | hist_trigger_bits = attrs->map_bits; | |
32566 | ||
32567 | - hist_data = create_hist_data(hist_trigger_bits, attrs, file); | |
32568 | + hist_data = create_hist_data(hist_trigger_bits, attrs, file, remove); | |
32569 | if (IS_ERR(hist_data)) { | |
32570 | destroy_hist_trigger_attrs(attrs); | |
32571 | return PTR_ERR(hist_data); | |
32572 | @@ -1549,10 +5539,11 @@ | |
32573 | ||
32574 | trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger); | |
32575 | ||
32576 | - ret = -ENOMEM; | |
32577 | trigger_data = kzalloc(sizeof(*trigger_data), GFP_KERNEL); | |
32578 | - if (!trigger_data) | |
32579 | + if (!trigger_data) { | |
32580 | + ret = -ENOMEM; | |
32581 | goto out_free; | |
32582 | + } | |
32583 | ||
32584 | trigger_data->count = -1; | |
32585 | trigger_data->ops = trigger_ops; | |
32586 | @@ -1570,8 +5561,24 @@ | |
32587 | goto out_free; | |
32588 | } | |
32589 | ||
32590 | - if (glob[0] == '!') { | |
32591 | + if (remove) { | |
32592 | + if (!have_hist_trigger_match(trigger_data, file)) | |
32593 | + goto out_free; | |
1a6e0f06 | 32594 | + |
e4b2b4a8 JK |
32595 | + if (hist_trigger_check_refs(trigger_data, file)) { |
32596 | + ret = -EBUSY; | |
32597 | + goto out_free; | |
1a6e0f06 JK |
32598 | + } |
32599 | + | |
e4b2b4a8 | 32600 | cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); |
1a6e0f06 | 32601 | + |
e4b2b4a8 JK |
32602 | + mutex_lock(&synth_event_mutex); |
32603 | + se_name = trace_event_name(file->event_call); | |
32604 | + se = find_synth_event(se_name); | |
32605 | + if (se) | |
32606 | + se->ref--; | |
32607 | + mutex_unlock(&synth_event_mutex); | |
1a6e0f06 | 32608 | + |
e4b2b4a8 JK |
32609 | ret = 0; |
32610 | goto out_free; | |
32611 | } | |
32612 | @@ -1588,14 +5595,47 @@ | |
32613 | goto out_free; | |
32614 | } else if (ret < 0) | |
32615 | goto out_free; | |
1a6e0f06 | 32616 | + |
e4b2b4a8 JK |
32617 | + if (get_named_trigger_data(trigger_data)) |
32618 | + goto enable; | |
1a6e0f06 | 32619 | + |
e4b2b4a8 JK |
32620 | + if (has_hist_vars(hist_data)) |
32621 | + save_hist_vars(hist_data); | |
1a6e0f06 | 32622 | + |
e4b2b4a8 JK |
32623 | + ret = create_actions(hist_data, file); |
32624 | + if (ret) | |
32625 | + goto out_unreg; | |
1a6e0f06 | 32626 | + |
e4b2b4a8 JK |
32627 | + ret = tracing_map_init(hist_data->map); |
32628 | + if (ret) | |
32629 | + goto out_unreg; | |
32630 | +enable: | |
32631 | + ret = hist_trigger_enable(trigger_data, file); | |
32632 | + if (ret) | |
32633 | + goto out_unreg; | |
1a6e0f06 | 32634 | + |
e4b2b4a8 JK |
32635 | + mutex_lock(&synth_event_mutex); |
32636 | + se_name = trace_event_name(file->event_call); | |
32637 | + se = find_synth_event(se_name); | |
32638 | + if (se) | |
32639 | + se->ref++; | |
32640 | + mutex_unlock(&synth_event_mutex); | |
1a6e0f06 | 32641 | + |
e4b2b4a8 JK |
32642 | /* Just return zero, not the number of registered triggers */ |
32643 | ret = 0; | |
32644 | out: | |
32645 | + if (ret == 0) | |
32646 | + hist_err_clear(); | |
1a6e0f06 | 32647 | + |
e4b2b4a8 JK |
32648 | return ret; |
32649 | + out_unreg: | |
32650 | + cmd_ops->unreg(glob+1, trigger_ops, trigger_data, file); | |
32651 | out_free: | |
32652 | if (cmd_ops->set_filter) | |
32653 | cmd_ops->set_filter(NULL, trigger_data, NULL); | |
32654 | ||
32655 | + remove_hist_vars(hist_data); | |
1a6e0f06 | 32656 | + |
e4b2b4a8 JK |
32657 | kfree(trigger_data); |
32658 | ||
32659 | destroy_hist_data(hist_data); | |
32660 | @@ -1625,7 +5665,8 @@ | |
32661 | } | |
32662 | ||
32663 | static void | |
32664 | -hist_enable_trigger(struct event_trigger_data *data, void *rec) | |
32665 | +hist_enable_trigger(struct event_trigger_data *data, void *rec, | |
32666 | + struct ring_buffer_event *event) | |
32667 | { | |
32668 | struct enable_trigger_data *enable_data = data->private_data; | |
32669 | struct event_trigger_data *test; | |
32670 | @@ -1641,7 +5682,8 @@ | |
32671 | } | |
32672 | ||
32673 | static void | |
32674 | -hist_enable_count_trigger(struct event_trigger_data *data, void *rec) | |
32675 | +hist_enable_count_trigger(struct event_trigger_data *data, void *rec, | |
32676 | + struct ring_buffer_event *event) | |
32677 | { | |
32678 | if (!data->count) | |
32679 | return; | |
32680 | @@ -1649,7 +5691,7 @@ | |
32681 | if (data->count != -1) | |
32682 | (data->count)--; | |
32683 | ||
32684 | - hist_enable_trigger(data, rec); | |
32685 | + hist_enable_trigger(data, rec, event); | |
32686 | } | |
32687 | ||
32688 | static struct event_trigger_ops hist_enable_trigger_ops = { | |
32689 | @@ -1754,3 +5796,31 @@ | |
32690 | ||
32691 | return ret; | |
32692 | } | |
1a6e0f06 | 32693 | + |
e4b2b4a8 JK |
32694 | +static __init int trace_events_hist_init(void) |
32695 | +{ | |
32696 | + struct dentry *entry = NULL; | |
32697 | + struct dentry *d_tracer; | |
32698 | + int err = 0; | |
1a6e0f06 | 32699 | + |
e4b2b4a8 JK |
32700 | + d_tracer = tracing_init_dentry(); |
32701 | + if (IS_ERR(d_tracer)) { | |
32702 | + err = PTR_ERR(d_tracer); | |
32703 | + goto err; | |
1a6e0f06 JK |
32704 | + } |
32705 | + | |
e4b2b4a8 JK |
32706 | + entry = tracefs_create_file("synthetic_events", 0644, d_tracer, |
32707 | + NULL, &synth_events_fops); | |
32708 | + if (!entry) { | |
32709 | + err = -ENODEV; | |
32710 | + goto err; | |
1a6e0f06 JK |
32711 | + } |
32712 | + | |
e4b2b4a8 JK |
32713 | + return err; |
32714 | + err: | |
32715 | + pr_warn("Could not create tracefs 'synthetic_events' entry\n"); | |
32716 | + | |
32717 | + return err; | |
1a6e0f06 | 32718 | +} |
1a6e0f06 | 32719 | + |
e4b2b4a8 JK |
32720 | +fs_initcall(trace_events_hist_init); |
32721 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_events_trigger.c linux-4.14/kernel/trace/trace_events_trigger.c | |
32722 | --- linux-4.14.orig/kernel/trace/trace_events_trigger.c 2018-09-05 11:03:22.000000000 +0200 | |
32723 | +++ linux-4.14/kernel/trace/trace_events_trigger.c 2018-09-05 11:05:07.000000000 +0200 | |
32724 | @@ -63,7 +63,8 @@ | |
32725 | * any trigger that should be deferred, ETT_NONE if nothing to defer. | |
32726 | */ | |
32727 | enum event_trigger_type | |
32728 | -event_triggers_call(struct trace_event_file *file, void *rec) | |
32729 | +event_triggers_call(struct trace_event_file *file, void *rec, | |
32730 | + struct ring_buffer_event *event) | |
32731 | { | |
32732 | struct event_trigger_data *data; | |
32733 | enum event_trigger_type tt = ETT_NONE; | |
32734 | @@ -76,7 +77,7 @@ | |
32735 | if (data->paused) | |
32736 | continue; | |
32737 | if (!rec) { | |
32738 | - data->ops->func(data, rec); | |
32739 | + data->ops->func(data, rec, event); | |
32740 | continue; | |
32741 | } | |
32742 | filter = rcu_dereference_sched(data->filter); | |
32743 | @@ -86,7 +87,7 @@ | |
32744 | tt |= data->cmd_ops->trigger_type; | |
32745 | continue; | |
32746 | } | |
32747 | - data->ops->func(data, rec); | |
32748 | + data->ops->func(data, rec, event); | |
32749 | } | |
32750 | return tt; | |
32751 | } | |
32752 | @@ -108,7 +109,7 @@ | |
32753 | void | |
32754 | event_triggers_post_call(struct trace_event_file *file, | |
32755 | enum event_trigger_type tt, | |
32756 | - void *rec) | |
32757 | + void *rec, struct ring_buffer_event *event) | |
32758 | { | |
32759 | struct event_trigger_data *data; | |
32760 | ||
32761 | @@ -116,7 +117,7 @@ | |
32762 | if (data->paused) | |
32763 | continue; | |
32764 | if (data->cmd_ops->trigger_type & tt) | |
32765 | - data->ops->func(data, rec); | |
32766 | + data->ops->func(data, rec, event); | |
32767 | } | |
32768 | } | |
32769 | EXPORT_SYMBOL_GPL(event_triggers_post_call); | |
32770 | @@ -914,8 +915,15 @@ | |
32771 | data->named_data = named_data; | |
32772 | } | |
32773 | ||
32774 | +struct event_trigger_data * | |
32775 | +get_named_trigger_data(struct event_trigger_data *data) | |
1a6e0f06 | 32776 | +{ |
e4b2b4a8 | 32777 | + return data->named_data; |
1a6e0f06 JK |
32778 | +} |
32779 | + | |
e4b2b4a8 JK |
32780 | static void |
32781 | -traceon_trigger(struct event_trigger_data *data, void *rec) | |
32782 | +traceon_trigger(struct event_trigger_data *data, void *rec, | |
32783 | + struct ring_buffer_event *event) | |
32784 | { | |
32785 | if (tracing_is_on()) | |
32786 | return; | |
32787 | @@ -924,7 +932,8 @@ | |
32788 | } | |
32789 | ||
32790 | static void | |
32791 | -traceon_count_trigger(struct event_trigger_data *data, void *rec) | |
32792 | +traceon_count_trigger(struct event_trigger_data *data, void *rec, | |
32793 | + struct ring_buffer_event *event) | |
32794 | { | |
32795 | if (tracing_is_on()) | |
32796 | return; | |
32797 | @@ -939,7 +948,8 @@ | |
32798 | } | |
32799 | ||
32800 | static void | |
32801 | -traceoff_trigger(struct event_trigger_data *data, void *rec) | |
32802 | +traceoff_trigger(struct event_trigger_data *data, void *rec, | |
32803 | + struct ring_buffer_event *event) | |
32804 | { | |
32805 | if (!tracing_is_on()) | |
32806 | return; | |
32807 | @@ -948,7 +958,8 @@ | |
32808 | } | |
32809 | ||
32810 | static void | |
32811 | -traceoff_count_trigger(struct event_trigger_data *data, void *rec) | |
32812 | +traceoff_count_trigger(struct event_trigger_data *data, void *rec, | |
32813 | + struct ring_buffer_event *event) | |
32814 | { | |
32815 | if (!tracing_is_on()) | |
32816 | return; | |
32817 | @@ -1045,7 +1056,8 @@ | |
1a6e0f06 | 32818 | |
e4b2b4a8 JK |
32819 | #ifdef CONFIG_TRACER_SNAPSHOT |
32820 | static void | |
32821 | -snapshot_trigger(struct event_trigger_data *data, void *rec) | |
32822 | +snapshot_trigger(struct event_trigger_data *data, void *rec, | |
32823 | + struct ring_buffer_event *event) | |
32824 | { | |
32825 | struct trace_event_file *file = data->private_data; | |
32826 | ||
32827 | @@ -1056,7 +1068,8 @@ | |
1a6e0f06 | 32828 | } |
1a6e0f06 | 32829 | |
e4b2b4a8 JK |
32830 | static void |
32831 | -snapshot_count_trigger(struct event_trigger_data *data, void *rec) | |
32832 | +snapshot_count_trigger(struct event_trigger_data *data, void *rec, | |
32833 | + struct ring_buffer_event *event) | |
32834 | { | |
32835 | if (!data->count) | |
32836 | return; | |
32837 | @@ -1064,7 +1077,7 @@ | |
32838 | if (data->count != -1) | |
32839 | (data->count)--; | |
1a6e0f06 | 32840 | |
e4b2b4a8 JK |
32841 | - snapshot_trigger(data, rec); |
32842 | + snapshot_trigger(data, rec, event); | |
32843 | } | |
32844 | ||
32845 | static int | |
32846 | @@ -1143,13 +1156,15 @@ | |
32847 | #define STACK_SKIP 3 | |
32848 | ||
32849 | static void | |
32850 | -stacktrace_trigger(struct event_trigger_data *data, void *rec) | |
32851 | +stacktrace_trigger(struct event_trigger_data *data, void *rec, | |
32852 | + struct ring_buffer_event *event) | |
1a6e0f06 | 32853 | { |
e4b2b4a8 | 32854 | trace_dump_stack(STACK_SKIP); |
1a6e0f06 JK |
32855 | } |
32856 | ||
e4b2b4a8 JK |
32857 | static void |
32858 | -stacktrace_count_trigger(struct event_trigger_data *data, void *rec) | |
32859 | +stacktrace_count_trigger(struct event_trigger_data *data, void *rec, | |
32860 | + struct ring_buffer_event *event) | |
32861 | { | |
32862 | if (!data->count) | |
32863 | return; | |
32864 | @@ -1157,7 +1172,7 @@ | |
32865 | if (data->count != -1) | |
32866 | (data->count)--; | |
32867 | ||
32868 | - stacktrace_trigger(data, rec); | |
32869 | + stacktrace_trigger(data, rec, event); | |
1a6e0f06 JK |
32870 | } |
32871 | ||
e4b2b4a8 JK |
32872 | static int |
32873 | @@ -1219,7 +1234,8 @@ | |
32874 | } | |
32875 | ||
32876 | static void | |
32877 | -event_enable_trigger(struct event_trigger_data *data, void *rec) | |
32878 | +event_enable_trigger(struct event_trigger_data *data, void *rec, | |
32879 | + struct ring_buffer_event *event) | |
32880 | { | |
32881 | struct enable_trigger_data *enable_data = data->private_data; | |
32882 | ||
32883 | @@ -1230,7 +1246,8 @@ | |
32884 | } | |
32885 | ||
32886 | static void | |
32887 | -event_enable_count_trigger(struct event_trigger_data *data, void *rec) | |
32888 | +event_enable_count_trigger(struct event_trigger_data *data, void *rec, | |
32889 | + struct ring_buffer_event *event) | |
32890 | { | |
32891 | struct enable_trigger_data *enable_data = data->private_data; | |
32892 | ||
32893 | @@ -1244,7 +1261,7 @@ | |
32894 | if (data->count != -1) | |
32895 | (data->count)--; | |
32896 | ||
32897 | - event_enable_trigger(data, rec); | |
32898 | + event_enable_trigger(data, rec, event); | |
32899 | } | |
32900 | ||
32901 | int event_enable_trigger_print(struct seq_file *m, | |
32902 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace.h linux-4.14/kernel/trace/trace.h | |
32903 | --- linux-4.14.orig/kernel/trace/trace.h 2018-09-05 11:03:22.000000000 +0200 | |
32904 | +++ linux-4.14/kernel/trace/trace.h 2018-09-05 11:05:07.000000000 +0200 | |
32905 | @@ -127,6 +127,7 @@ | |
1a6e0f06 JK |
32906 | * NEED_RESCHED - reschedule is requested |
32907 | * HARDIRQ - inside an interrupt handler | |
32908 | * SOFTIRQ - inside a softirq handler | |
32909 | + * NEED_RESCHED_LAZY - lazy reschedule is requested | |
32910 | */ | |
32911 | enum trace_flag_type { | |
32912 | TRACE_FLAG_IRQS_OFF = 0x01, | |
e4b2b4a8 | 32913 | @@ -136,6 +137,7 @@ |
1a6e0f06 JK |
32914 | TRACE_FLAG_SOFTIRQ = 0x10, |
32915 | TRACE_FLAG_PREEMPT_RESCHED = 0x20, | |
32916 | TRACE_FLAG_NMI = 0x40, | |
32917 | + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, | |
32918 | }; | |
32919 | ||
32920 | #define TRACE_BUF_SIZE 1024 | |
e4b2b4a8 JK |
32921 | @@ -273,6 +275,8 @@ |
32922 | /* function tracing enabled */ | |
32923 | int function_enabled; | |
32924 | #endif | |
32925 | + int time_stamp_abs_ref; | |
32926 | + struct list_head hist_vars; | |
32927 | }; | |
1a6e0f06 | 32928 | |
e4b2b4a8 JK |
32929 | enum { |
32930 | @@ -286,6 +290,11 @@ | |
32931 | extern int trace_array_get(struct trace_array *tr); | |
32932 | extern void trace_array_put(struct trace_array *tr); | |
1a6e0f06 | 32933 | |
e4b2b4a8 JK |
32934 | +extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs); |
32935 | +extern int tracing_set_clock(struct trace_array *tr, const char *clockstr); | |
32936 | + | |
32937 | +extern bool trace_clock_in_ns(struct trace_array *tr); | |
32938 | + | |
32939 | /* | |
32940 | * The global tracer (top) should be the first trace array added, | |
32941 | * but we check the flag anyway. | |
32942 | @@ -1293,7 +1302,7 @@ | |
32943 | unsigned long eflags = file->flags; | |
1a6e0f06 | 32944 | |
e4b2b4a8 JK |
32945 | if (eflags & EVENT_FILE_FL_TRIGGER_COND) |
32946 | - *tt = event_triggers_call(file, entry); | |
32947 | + *tt = event_triggers_call(file, entry, event); | |
1a6e0f06 | 32948 | |
e4b2b4a8 JK |
32949 | if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || |
32950 | (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && | |
32951 | @@ -1330,7 +1339,7 @@ | |
32952 | trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); | |
32953 | ||
32954 | if (tt) | |
32955 | - event_triggers_post_call(file, tt, entry); | |
32956 | + event_triggers_post_call(file, tt, entry, event); | |
1a6e0f06 JK |
32957 | } |
32958 | ||
e4b2b4a8 JK |
32959 | /** |
32960 | @@ -1363,7 +1372,7 @@ | |
32961 | irq_flags, pc, regs); | |
32962 | ||
32963 | if (tt) | |
32964 | - event_triggers_post_call(file, tt, entry); | |
32965 | + event_triggers_post_call(file, tt, entry, event); | |
32966 | } | |
32967 | ||
32968 | #define FILTER_PRED_INVALID ((unsigned short)-1) | |
32969 | @@ -1545,6 +1554,8 @@ | |
32970 | extern void unpause_named_trigger(struct event_trigger_data *data); | |
32971 | extern void set_named_trigger_data(struct event_trigger_data *data, | |
32972 | struct event_trigger_data *named_data); | |
32973 | +extern struct event_trigger_data * | |
32974 | +get_named_trigger_data(struct event_trigger_data *data); | |
32975 | extern int register_event_command(struct event_command *cmd); | |
32976 | extern int unregister_event_command(struct event_command *cmd); | |
32977 | extern int register_trigger_hist_enable_disable_cmds(void); | |
32978 | @@ -1588,7 +1599,8 @@ | |
1a6e0f06 | 32979 | */ |
e4b2b4a8 JK |
32980 | struct event_trigger_ops { |
32981 | void (*func)(struct event_trigger_data *data, | |
32982 | - void *rec); | |
32983 | + void *rec, | |
32984 | + struct ring_buffer_event *rbe); | |
32985 | int (*init)(struct event_trigger_ops *ops, | |
32986 | struct event_trigger_data *data); | |
32987 | void (*free)(struct event_trigger_ops *ops, | |
32988 | @@ -1755,6 +1767,13 @@ | |
32989 | int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set); | |
32990 | int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled); | |
32991 | ||
32992 | +#define MAX_EVENT_NAME_LEN 64 | |
32993 | + | |
32994 | +extern int trace_run_command(const char *buf, int (*createfn)(int, char**)); | |
32995 | +extern ssize_t trace_parse_run_command(struct file *file, | |
32996 | + const char __user *buffer, size_t count, loff_t *ppos, | |
32997 | + int (*createfn)(int, char**)); | |
32998 | + | |
32999 | /* | |
33000 | * Normal trace_printk() and friends allocates special buffers | |
33001 | * to do the manipulation, as well as saves the print formats | |
33002 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_hwlat.c linux-4.14/kernel/trace/trace_hwlat.c | |
33003 | --- linux-4.14.orig/kernel/trace/trace_hwlat.c 2017-11-12 19:46:13.000000000 +0100 | |
33004 | +++ linux-4.14/kernel/trace/trace_hwlat.c 2018-09-05 11:05:07.000000000 +0200 | |
33005 | @@ -279,7 +279,7 @@ | |
33006 | * of this thread, than stop migrating for the duration | |
33007 | * of the current test. | |
33008 | */ | |
33009 | - if (!cpumask_equal(current_mask, ¤t->cpus_allowed)) | |
33010 | + if (!cpumask_equal(current_mask, current->cpus_ptr)) | |
33011 | goto disable; | |
1a6e0f06 | 33012 | |
e4b2b4a8 JK |
33013 | get_online_cpus(); |
33014 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_kprobe.c linux-4.14/kernel/trace/trace_kprobe.c | |
33015 | --- linux-4.14.orig/kernel/trace/trace_kprobe.c 2018-09-05 11:03:22.000000000 +0200 | |
33016 | +++ linux-4.14/kernel/trace/trace_kprobe.c 2018-09-05 11:05:07.000000000 +0200 | |
33017 | @@ -918,8 +918,8 @@ | |
33018 | static ssize_t probes_write(struct file *file, const char __user *buffer, | |
33019 | size_t count, loff_t *ppos) | |
33020 | { | |
33021 | - return traceprobe_probes_write(file, buffer, count, ppos, | |
33022 | - create_trace_kprobe); | |
33023 | + return trace_parse_run_command(file, buffer, count, ppos, | |
33024 | + create_trace_kprobe); | |
33025 | } | |
33026 | ||
33027 | static const struct file_operations kprobe_events_ops = { | |
33028 | @@ -1444,9 +1444,9 @@ | |
33029 | ||
33030 | pr_info("Testing kprobe tracing: "); | |
33031 | ||
33032 | - ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target " | |
33033 | - "$stack $stack0 +0($stack)", | |
33034 | - create_trace_kprobe); | |
33035 | + ret = trace_run_command("p:testprobe kprobe_trace_selftest_target " | |
33036 | + "$stack $stack0 +0($stack)", | |
33037 | + create_trace_kprobe); | |
33038 | if (WARN_ON_ONCE(ret)) { | |
33039 | pr_warn("error on probing function entry.\n"); | |
33040 | warn++; | |
33041 | @@ -1466,8 +1466,8 @@ | |
33042 | } | |
33043 | } | |
1a6e0f06 | 33044 | |
e4b2b4a8 JK |
33045 | - ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target " |
33046 | - "$retval", create_trace_kprobe); | |
33047 | + ret = trace_run_command("r:testprobe2 kprobe_trace_selftest_target " | |
33048 | + "$retval", create_trace_kprobe); | |
33049 | if (WARN_ON_ONCE(ret)) { | |
33050 | pr_warn("error on probing function return.\n"); | |
33051 | warn++; | |
33052 | @@ -1537,13 +1537,13 @@ | |
33053 | disable_trace_kprobe(tk, file); | |
33054 | } | |
1a6e0f06 | 33055 | |
e4b2b4a8 JK |
33056 | - ret = traceprobe_command("-:testprobe", create_trace_kprobe); |
33057 | + ret = trace_run_command("-:testprobe", create_trace_kprobe); | |
33058 | if (WARN_ON_ONCE(ret)) { | |
33059 | pr_warn("error on deleting a probe.\n"); | |
33060 | warn++; | |
33061 | } | |
33062 | ||
33063 | - ret = traceprobe_command("-:testprobe2", create_trace_kprobe); | |
33064 | + ret = trace_run_command("-:testprobe2", create_trace_kprobe); | |
33065 | if (WARN_ON_ONCE(ret)) { | |
33066 | pr_warn("error on deleting a probe.\n"); | |
33067 | warn++; | |
33068 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_output.c linux-4.14/kernel/trace/trace_output.c | |
33069 | --- linux-4.14.orig/kernel/trace/trace_output.c 2018-09-05 11:03:22.000000000 +0200 | |
33070 | +++ linux-4.14/kernel/trace/trace_output.c 2018-09-05 11:05:07.000000000 +0200 | |
33071 | @@ -447,6 +447,7 @@ | |
1a6e0f06 JK |
33072 | { |
33073 | char hardsoft_irq; | |
33074 | char need_resched; | |
33075 | + char need_resched_lazy; | |
33076 | char irqs_off; | |
33077 | int hardirq; | |
33078 | int softirq; | |
e4b2b4a8 | 33079 | @@ -477,6 +478,9 @@ |
1a6e0f06 JK |
33080 | break; |
33081 | } | |
33082 | ||
33083 | + need_resched_lazy = | |
33084 | + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; | |
33085 | + | |
33086 | hardsoft_irq = | |
33087 | (nmi && hardirq) ? 'Z' : | |
33088 | nmi ? 'z' : | |
e4b2b4a8 | 33089 | @@ -485,14 +489,25 @@ |
1a6e0f06 JK |
33090 | softirq ? 's' : |
33091 | '.' ; | |
33092 | ||
e4b2b4a8 JK |
33093 | - trace_seq_printf(s, "%c%c%c", |
33094 | - irqs_off, need_resched, hardsoft_irq); | |
33095 | + trace_seq_printf(s, "%c%c%c%c", | |
33096 | + irqs_off, need_resched, need_resched_lazy, | |
33097 | + hardsoft_irq); | |
33098 | ||
33099 | if (entry->preempt_count) | |
33100 | trace_seq_printf(s, "%x", entry->preempt_count); | |
33101 | else | |
33102 | trace_seq_putc(s, '.'); | |
33103 | ||
33104 | + if (entry->preempt_lazy_count) | |
33105 | + trace_seq_printf(s, "%x", entry->preempt_lazy_count); | |
33106 | + else | |
33107 | + trace_seq_putc(s, '.'); | |
33108 | + | |
33109 | + if (entry->migrate_disable) | |
33110 | + trace_seq_printf(s, "%x", entry->migrate_disable); | |
33111 | + else | |
33112 | + trace_seq_putc(s, '.'); | |
33113 | + | |
33114 | return !trace_seq_has_overflowed(s); | |
33115 | } | |
33116 | ||
33117 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_probe.c linux-4.14/kernel/trace/trace_probe.c | |
33118 | --- linux-4.14.orig/kernel/trace/trace_probe.c 2018-09-05 11:03:22.000000000 +0200 | |
33119 | +++ linux-4.14/kernel/trace/trace_probe.c 2018-09-05 11:05:07.000000000 +0200 | |
33120 | @@ -621,92 +621,6 @@ | |
33121 | kfree(arg->comm); | |
33122 | } | |
33123 | ||
33124 | -int traceprobe_command(const char *buf, int (*createfn)(int, char **)) | |
33125 | -{ | |
33126 | - char **argv; | |
33127 | - int argc, ret; | |
33128 | - | |
33129 | - argc = 0; | |
33130 | - ret = 0; | |
33131 | - argv = argv_split(GFP_KERNEL, buf, &argc); | |
33132 | - if (!argv) | |
33133 | - return -ENOMEM; | |
33134 | - | |
33135 | - if (argc) | |
33136 | - ret = createfn(argc, argv); | |
33137 | - | |
33138 | - argv_free(argv); | |
33139 | - | |
33140 | - return ret; | |
33141 | -} | |
33142 | - | |
33143 | -#define WRITE_BUFSIZE 4096 | |
33144 | - | |
33145 | -ssize_t traceprobe_probes_write(struct file *file, const char __user *buffer, | |
33146 | - size_t count, loff_t *ppos, | |
33147 | - int (*createfn)(int, char **)) | |
33148 | -{ | |
33149 | - char *kbuf, *buf, *tmp; | |
33150 | - int ret = 0; | |
33151 | - size_t done = 0; | |
33152 | - size_t size; | |
33153 | - | |
33154 | - kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); | |
33155 | - if (!kbuf) | |
33156 | - return -ENOMEM; | |
33157 | - | |
33158 | - while (done < count) { | |
33159 | - size = count - done; | |
33160 | - | |
33161 | - if (size >= WRITE_BUFSIZE) | |
33162 | - size = WRITE_BUFSIZE - 1; | |
33163 | - | |
33164 | - if (copy_from_user(kbuf, buffer + done, size)) { | |
33165 | - ret = -EFAULT; | |
33166 | - goto out; | |
33167 | - } | |
33168 | - kbuf[size] = '\0'; | |
33169 | - buf = kbuf; | |
33170 | - do { | |
33171 | - tmp = strchr(buf, '\n'); | |
33172 | - if (tmp) { | |
33173 | - *tmp = '\0'; | |
33174 | - size = tmp - buf + 1; | |
33175 | - } else { | |
33176 | - size = strlen(buf); | |
33177 | - if (done + size < count) { | |
33178 | - if (buf != kbuf) | |
33179 | - break; | |
33180 | - /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */ | |
33181 | - pr_warn("Line length is too long: Should be less than %d\n", | |
33182 | - WRITE_BUFSIZE - 2); | |
33183 | - ret = -EINVAL; | |
33184 | - goto out; | |
33185 | - } | |
33186 | - } | |
33187 | - done += size; | |
33188 | - | |
33189 | - /* Remove comments */ | |
33190 | - tmp = strchr(buf, '#'); | |
33191 | - | |
33192 | - if (tmp) | |
33193 | - *tmp = '\0'; | |
33194 | - | |
33195 | - ret = traceprobe_command(buf, createfn); | |
33196 | - if (ret) | |
33197 | - goto out; | |
33198 | - buf += size; | |
33199 | - | |
33200 | - } while (done < count); | |
33201 | - } | |
33202 | - ret = done; | |
33203 | - | |
33204 | -out: | |
33205 | - kfree(kbuf); | |
33206 | - | |
33207 | - return ret; | |
33208 | -} | |
33209 | - | |
33210 | static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, | |
33211 | bool is_return) | |
33212 | { | |
33213 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_probe.h linux-4.14/kernel/trace/trace_probe.h | |
33214 | --- linux-4.14.orig/kernel/trace/trace_probe.h 2018-09-05 11:03:22.000000000 +0200 | |
33215 | +++ linux-4.14/kernel/trace/trace_probe.h 2018-09-05 11:05:07.000000000 +0200 | |
33216 | @@ -42,7 +42,6 @@ | |
33217 | ||
33218 | #define MAX_TRACE_ARGS 128 | |
33219 | #define MAX_ARGSTR_LEN 63 | |
33220 | -#define MAX_EVENT_NAME_LEN 64 | |
33221 | #define MAX_STRING_SIZE PATH_MAX | |
33222 | ||
33223 | /* Reserved field names */ | |
33224 | @@ -356,12 +355,6 @@ | |
33225 | ||
33226 | extern int traceprobe_split_symbol_offset(char *symbol, long *offset); | |
33227 | ||
33228 | -extern ssize_t traceprobe_probes_write(struct file *file, | |
33229 | - const char __user *buffer, size_t count, loff_t *ppos, | |
33230 | - int (*createfn)(int, char**)); | |
33231 | - | |
33232 | -extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); | |
33233 | - | |
33234 | /* Sum up total data length for dynamic arraies (strings) */ | |
33235 | static nokprobe_inline int | |
33236 | __get_data_size(struct trace_probe *tp, struct pt_regs *regs) | |
33237 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/trace_uprobe.c linux-4.14/kernel/trace/trace_uprobe.c | |
33238 | --- linux-4.14.orig/kernel/trace/trace_uprobe.c 2018-09-05 11:03:22.000000000 +0200 | |
33239 | +++ linux-4.14/kernel/trace/trace_uprobe.c 2018-09-05 11:05:07.000000000 +0200 | |
33240 | @@ -647,7 +647,7 @@ | |
33241 | static ssize_t probes_write(struct file *file, const char __user *buffer, | |
33242 | size_t count, loff_t *ppos) | |
33243 | { | |
33244 | - return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe); | |
33245 | + return trace_parse_run_command(file, buffer, count, ppos, create_trace_uprobe); | |
33246 | } | |
33247 | ||
33248 | static const struct file_operations uprobe_events_ops = { | |
33249 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/tracing_map.c linux-4.14/kernel/trace/tracing_map.c | |
33250 | --- linux-4.14.orig/kernel/trace/tracing_map.c 2017-11-12 19:46:13.000000000 +0100 | |
33251 | +++ linux-4.14/kernel/trace/tracing_map.c 2018-09-05 11:05:07.000000000 +0200 | |
33252 | @@ -66,6 +66,73 @@ | |
33253 | return (u64)atomic64_read(&elt->fields[i].sum); | |
33254 | } | |
33255 | ||
33256 | +/** | |
33257 | + * tracing_map_set_var - Assign a tracing_map_elt's variable field | |
33258 | + * @elt: The tracing_map_elt | |
33259 | + * @i: The index of the given variable associated with the tracing_map_elt | |
33260 | + * @n: The value to assign | |
33261 | + * | |
33262 | + * Assign n to variable i associated with the specified tracing_map_elt | |
33263 | + * instance. The index i is the index returned by the call to | |
33264 | + * tracing_map_add_var() when the tracing map was set up. | |
33265 | + */ | |
33266 | +void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
33267 | +{ | |
33268 | + atomic64_set(&elt->vars[i], n); | |
33269 | + elt->var_set[i] = true; | |
33270 | +} | |
33271 | + | |
33272 | +/** | |
33273 | + * tracing_map_var_set - Return whether or not a variable has been set | |
33274 | + * @elt: The tracing_map_elt | |
33275 | + * @i: The index of the given variable associated with the tracing_map_elt | |
33276 | + * | |
33277 | + * Return true if the variable has been set, false otherwise. The | |
33278 | + * index i is the index returned by the call to tracing_map_add_var() | |
33279 | + * when the tracing map was set up. | |
33280 | + */ | |
33281 | +bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i) | |
33282 | +{ | |
33283 | + return elt->var_set[i]; | |
33284 | +} | |
33285 | + | |
33286 | +/** | |
33287 | + * tracing_map_read_var - Return the value of a tracing_map_elt's variable field | |
33288 | + * @elt: The tracing_map_elt | |
33289 | + * @i: The index of the given variable associated with the tracing_map_elt | |
33290 | + * | |
33291 | + * Retrieve the value of the variable i associated with the specified | |
33292 | + * tracing_map_elt instance. The index i is the index returned by the | |
33293 | + * call to tracing_map_add_var() when the tracing map was set | |
33294 | + * up. | |
33295 | + * | |
33296 | + * Return: The variable value associated with field i for elt. | |
33297 | + */ | |
33298 | +u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i) | |
33299 | +{ | |
33300 | + return (u64)atomic64_read(&elt->vars[i]); | |
33301 | +} | |
33302 | + | |
33303 | +/** | |
33304 | + * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field | |
33305 | + * @elt: The tracing_map_elt | |
33306 | + * @i: The index of the given variable associated with the tracing_map_elt | |
33307 | + * | |
33308 | + * Retrieve the value of the variable i associated with the specified | |
33309 | + * tracing_map_elt instance, and reset the variable to the 'not set' | |
33310 | + * state. The index i is the index returned by the call to | |
33311 | + * tracing_map_add_var() when the tracing map was set up. The reset | |
33312 | + * essentially makes the variable a read-once variable if it's only | |
33313 | + * accessed using this function. | |
33314 | + * | |
33315 | + * Return: The variable value associated with field i for elt. | |
33316 | + */ | |
33317 | +u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i) | |
33318 | +{ | |
33319 | + elt->var_set[i] = false; | |
33320 | + return (u64)atomic64_read(&elt->vars[i]); | |
33321 | +} | |
33322 | + | |
33323 | int tracing_map_cmp_string(void *val_a, void *val_b) | |
33324 | { | |
33325 | char *a = val_a; | |
33326 | @@ -171,6 +238,28 @@ | |
33327 | } | |
33328 | ||
33329 | /** | |
33330 | + * tracing_map_add_var - Add a field describing a tracing_map var | |
33331 | + * @map: The tracing_map | |
33332 | + * | |
33333 | + * Add a var to the map and return the index identifying it in the map | |
33334 | + * and associated tracing_map_elts. This is the index used for | |
33335 | + * instance to update a var for a particular tracing_map_elt using | |
33336 | + * tracing_map_update_var() or reading it via tracing_map_read_var(). | |
33337 | + * | |
33338 | + * Return: The index identifying the var in the map and associated | |
33339 | + * tracing_map_elts, or -EINVAL on error. | |
33340 | + */ | |
33341 | +int tracing_map_add_var(struct tracing_map *map) | |
33342 | +{ | |
33343 | + int ret = -EINVAL; | |
33344 | + | |
33345 | + if (map->n_vars < TRACING_MAP_VARS_MAX) | |
33346 | + ret = map->n_vars++; | |
33347 | + | |
33348 | + return ret; | |
33349 | +} | |
33350 | + | |
33351 | +/** | |
33352 | * tracing_map_add_key_field - Add a field describing a tracing_map key | |
33353 | * @map: The tracing_map | |
33354 | * @offset: The offset within the key | |
33355 | @@ -280,6 +369,11 @@ | |
33356 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) | |
33357 | atomic64_set(&elt->fields[i].sum, 0); | |
33358 | ||
33359 | + for (i = 0; i < elt->map->n_vars; i++) { | |
33360 | + atomic64_set(&elt->vars[i], 0); | |
33361 | + elt->var_set[i] = false; | |
33362 | + } | |
33363 | + | |
33364 | if (elt->map->ops && elt->map->ops->elt_clear) | |
33365 | elt->map->ops->elt_clear(elt); | |
33366 | } | |
33367 | @@ -306,6 +400,8 @@ | |
33368 | if (elt->map->ops && elt->map->ops->elt_free) | |
33369 | elt->map->ops->elt_free(elt); | |
33370 | kfree(elt->fields); | |
33371 | + kfree(elt->vars); | |
33372 | + kfree(elt->var_set); | |
33373 | kfree(elt->key); | |
33374 | kfree(elt); | |
33375 | } | |
33376 | @@ -333,6 +429,18 @@ | |
33377 | goto free; | |
33378 | } | |
33379 | ||
33380 | + elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL); | |
33381 | + if (!elt->vars) { | |
33382 | + err = -ENOMEM; | |
33383 | + goto free; | |
33384 | + } | |
33385 | + | |
33386 | + elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL); | |
33387 | + if (!elt->var_set) { | |
33388 | + err = -ENOMEM; | |
33389 | + goto free; | |
33390 | + } | |
33391 | + | |
33392 | tracing_map_elt_init_fields(elt); | |
33393 | ||
33394 | if (map->ops && map->ops->elt_alloc) { | |
33395 | @@ -414,7 +522,9 @@ | |
33396 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | |
33397 | { | |
33398 | u32 idx, key_hash, test_key; | |
33399 | + int dup_try = 0; | |
33400 | struct tracing_map_entry *entry; | |
33401 | + struct tracing_map_elt *val; | |
33402 | ||
33403 | key_hash = jhash(key, map->key_size, 0); | |
33404 | if (key_hash == 0) | |
33405 | @@ -426,10 +536,33 @@ | |
33406 | entry = TRACING_MAP_ENTRY(map->map, idx); | |
33407 | test_key = entry->key; | |
33408 | ||
33409 | - if (test_key && test_key == key_hash && entry->val && | |
33410 | - keys_match(key, entry->val->key, map->key_size)) { | |
33411 | - atomic64_inc(&map->hits); | |
33412 | - return entry->val; | |
33413 | + if (test_key && test_key == key_hash) { | |
33414 | + val = READ_ONCE(entry->val); | |
33415 | + if (val && | |
33416 | + keys_match(key, val->key, map->key_size)) { | |
33417 | + if (!lookup_only) | |
33418 | + atomic64_inc(&map->hits); | |
33419 | + return val; | |
33420 | + } else if (unlikely(!val)) { | |
33421 | + /* | |
33422 | + * The key is present. But, val (pointer to elt | |
33423 | + * struct) is still NULL. which means some other | |
33424 | + * thread is in the process of inserting an | |
33425 | + * element. | |
33426 | + * | |
33427 | + * On top of that, it's key_hash is same as the | |
33428 | + * one being inserted right now. So, it's | |
33429 | + * possible that the element has the same | |
33430 | + * key as well. | |
33431 | + */ | |
33432 | + | |
33433 | + dup_try++; | |
33434 | + if (dup_try > map->map_size) { | |
33435 | + atomic64_inc(&map->drops); | |
33436 | + break; | |
33437 | + } | |
33438 | + continue; | |
33439 | + } | |
33440 | } | |
33441 | ||
33442 | if (!test_key) { | |
33443 | @@ -451,6 +584,13 @@ | |
33444 | atomic64_inc(&map->hits); | |
33445 | ||
33446 | return entry->val; | |
33447 | + } else { | |
33448 | + /* | |
33449 | + * cmpxchg() failed. Loop around once | |
33450 | + * more to check what key was inserted. | |
33451 | + */ | |
33452 | + dup_try++; | |
33453 | + continue; | |
33454 | } | |
33455 | } | |
33456 | ||
33457 | @@ -815,67 +955,15 @@ | |
33458 | return sort_entry; | |
33459 | } | |
33460 | ||
33461 | -static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt) | |
33462 | -{ | |
33463 | - struct tracing_map_elt *dup_elt; | |
33464 | - unsigned int i; | |
33465 | - | |
33466 | - dup_elt = tracing_map_elt_alloc(elt->map); | |
33467 | - if (IS_ERR(dup_elt)) | |
33468 | - return NULL; | |
33469 | - | |
33470 | - if (elt->map->ops && elt->map->ops->elt_copy) | |
33471 | - elt->map->ops->elt_copy(dup_elt, elt); | |
33472 | - | |
33473 | - dup_elt->private_data = elt->private_data; | |
33474 | - memcpy(dup_elt->key, elt->key, elt->map->key_size); | |
33475 | - | |
33476 | - for (i = 0; i < elt->map->n_fields; i++) { | |
33477 | - atomic64_set(&dup_elt->fields[i].sum, | |
33478 | - atomic64_read(&elt->fields[i].sum)); | |
33479 | - dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn; | |
33480 | - } | |
33481 | - | |
33482 | - return dup_elt; | |
33483 | -} | |
33484 | - | |
33485 | -static int merge_dup(struct tracing_map_sort_entry **sort_entries, | |
33486 | - unsigned int target, unsigned int dup) | |
33487 | -{ | |
33488 | - struct tracing_map_elt *target_elt, *elt; | |
33489 | - bool first_dup = (target - dup) == 1; | |
33490 | - int i; | |
33491 | - | |
33492 | - if (first_dup) { | |
33493 | - elt = sort_entries[target]->elt; | |
33494 | - target_elt = copy_elt(elt); | |
33495 | - if (!target_elt) | |
33496 | - return -ENOMEM; | |
33497 | - sort_entries[target]->elt = target_elt; | |
33498 | - sort_entries[target]->elt_copied = true; | |
33499 | - } else | |
33500 | - target_elt = sort_entries[target]->elt; | |
33501 | - | |
33502 | - elt = sort_entries[dup]->elt; | |
33503 | - | |
33504 | - for (i = 0; i < elt->map->n_fields; i++) | |
33505 | - atomic64_add(atomic64_read(&elt->fields[i].sum), | |
33506 | - &target_elt->fields[i].sum); | |
33507 | - | |
33508 | - sort_entries[dup]->dup = true; | |
33509 | - | |
33510 | - return 0; | |
33511 | -} | |
33512 | - | |
33513 | -static int merge_dups(struct tracing_map_sort_entry **sort_entries, | |
33514 | +static void detect_dups(struct tracing_map_sort_entry **sort_entries, | |
33515 | int n_entries, unsigned int key_size) | |
33516 | { | |
33517 | unsigned int dups = 0, total_dups = 0; | |
33518 | - int err, i, j; | |
33519 | + int i; | |
33520 | void *key; | |
33521 | ||
33522 | if (n_entries < 2) | |
33523 | - return total_dups; | |
33524 | + return; | |
1a6e0f06 | 33525 | |
e4b2b4a8 JK |
33526 | sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), |
33527 | (int (*)(const void *, const void *))cmp_entries_dup, NULL); | |
33528 | @@ -884,30 +972,14 @@ | |
33529 | for (i = 1; i < n_entries; i++) { | |
33530 | if (!memcmp(sort_entries[i]->key, key, key_size)) { | |
33531 | dups++; total_dups++; | |
33532 | - err = merge_dup(sort_entries, i - dups, i); | |
33533 | - if (err) | |
33534 | - return err; | |
33535 | continue; | |
33536 | } | |
33537 | key = sort_entries[i]->key; | |
33538 | dups = 0; | |
33539 | } | |
1a6e0f06 | 33540 | |
e4b2b4a8 JK |
33541 | - if (!total_dups) |
33542 | - return total_dups; | |
33543 | - | |
33544 | - for (i = 0, j = 0; i < n_entries; i++) { | |
33545 | - if (!sort_entries[i]->dup) { | |
33546 | - sort_entries[j] = sort_entries[i]; | |
33547 | - if (j++ != i) | |
33548 | - sort_entries[i] = NULL; | |
33549 | - } else { | |
33550 | - destroy_sort_entry(sort_entries[i]); | |
33551 | - sort_entries[i] = NULL; | |
33552 | - } | |
33553 | - } | |
33554 | - | |
33555 | - return total_dups; | |
33556 | + WARN_ONCE(total_dups > 0, | |
33557 | + "Duplicates detected: %d\n", total_dups); | |
1a6e0f06 JK |
33558 | } |
33559 | ||
e4b2b4a8 JK |
33560 | static bool is_key(struct tracing_map *map, unsigned int field_idx) |
33561 | @@ -1033,10 +1105,7 @@ | |
33562 | return 1; | |
33563 | } | |
33564 | ||
33565 | - ret = merge_dups(entries, n_entries, map->key_size); | |
33566 | - if (ret < 0) | |
33567 | - goto free; | |
33568 | - n_entries -= ret; | |
33569 | + detect_dups(entries, n_entries, map->key_size); | |
33570 | ||
33571 | if (is_key(map, sort_keys[0].field_idx)) | |
33572 | cmp_entries_fn = cmp_entries_key; | |
33573 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/trace/tracing_map.h linux-4.14/kernel/trace/tracing_map.h | |
33574 | --- linux-4.14.orig/kernel/trace/tracing_map.h 2017-11-12 19:46:13.000000000 +0100 | |
33575 | +++ linux-4.14/kernel/trace/tracing_map.h 2018-09-05 11:05:07.000000000 +0200 | |
33576 | @@ -6,10 +6,11 @@ | |
33577 | #define TRACING_MAP_BITS_MAX 17 | |
33578 | #define TRACING_MAP_BITS_MIN 7 | |
33579 | ||
33580 | -#define TRACING_MAP_KEYS_MAX 2 | |
33581 | +#define TRACING_MAP_KEYS_MAX 3 | |
33582 | #define TRACING_MAP_VALS_MAX 3 | |
33583 | #define TRACING_MAP_FIELDS_MAX (TRACING_MAP_KEYS_MAX + \ | |
33584 | TRACING_MAP_VALS_MAX) | |
33585 | +#define TRACING_MAP_VARS_MAX 16 | |
33586 | #define TRACING_MAP_SORT_KEYS_MAX 2 | |
33587 | ||
33588 | typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b); | |
33589 | @@ -137,6 +138,8 @@ | |
33590 | struct tracing_map_elt { | |
33591 | struct tracing_map *map; | |
33592 | struct tracing_map_field *fields; | |
33593 | + atomic64_t *vars; | |
33594 | + bool *var_set; | |
33595 | void *key; | |
33596 | void *private_data; | |
33597 | }; | |
33598 | @@ -192,6 +195,7 @@ | |
33599 | int key_idx[TRACING_MAP_KEYS_MAX]; | |
33600 | unsigned int n_keys; | |
33601 | struct tracing_map_sort_key sort_key; | |
33602 | + unsigned int n_vars; | |
33603 | atomic64_t hits; | |
33604 | atomic64_t drops; | |
33605 | }; | |
33606 | @@ -215,11 +219,6 @@ | |
33607 | * Element allocation occurs before tracing begins, when the | |
33608 | * tracing_map_init() call is made by client code. | |
33609 | * | |
33610 | - * @elt_copy: At certain points in the lifetime of an element, it may | |
33611 | - * need to be copied. The copy should include a copy of the | |
33612 | - * client-allocated data, which can be copied into the 'to' | |
33613 | - * element from the 'from' element. | |
33614 | - * | |
33615 | * @elt_free: When a tracing_map_elt is freed, this function is called | |
33616 | * and allows client-allocated per-element data to be freed. | |
33617 | * | |
33618 | @@ -233,8 +232,6 @@ | |
33619 | */ | |
33620 | struct tracing_map_ops { | |
33621 | int (*elt_alloc)(struct tracing_map_elt *elt); | |
33622 | - void (*elt_copy)(struct tracing_map_elt *to, | |
33623 | - struct tracing_map_elt *from); | |
33624 | void (*elt_free)(struct tracing_map_elt *elt); | |
33625 | void (*elt_clear)(struct tracing_map_elt *elt); | |
33626 | void (*elt_init)(struct tracing_map_elt *elt); | |
33627 | @@ -248,6 +245,7 @@ | |
33628 | extern int tracing_map_init(struct tracing_map *map); | |
33629 | ||
33630 | extern int tracing_map_add_sum_field(struct tracing_map *map); | |
33631 | +extern int tracing_map_add_var(struct tracing_map *map); | |
33632 | extern int tracing_map_add_key_field(struct tracing_map *map, | |
33633 | unsigned int offset, | |
33634 | tracing_map_cmp_fn_t cmp_fn); | |
33635 | @@ -267,7 +265,13 @@ | |
33636 | ||
33637 | extern void tracing_map_update_sum(struct tracing_map_elt *elt, | |
33638 | unsigned int i, u64 n); | |
33639 | +extern void tracing_map_set_var(struct tracing_map_elt *elt, | |
33640 | + unsigned int i, u64 n); | |
33641 | +extern bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i); | |
33642 | extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i); | |
33643 | +extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i); | |
33644 | +extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i); | |
33645 | + | |
33646 | extern void tracing_map_set_field_descr(struct tracing_map *map, | |
33647 | unsigned int i, | |
33648 | unsigned int key_offset, | |
33649 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/user.c linux-4.14/kernel/user.c | |
33650 | --- linux-4.14.orig/kernel/user.c 2017-11-12 19:46:13.000000000 +0100 | |
33651 | +++ linux-4.14/kernel/user.c 2018-09-05 11:05:07.000000000 +0200 | |
33652 | @@ -162,11 +162,11 @@ | |
1a6e0f06 JK |
33653 | if (!up) |
33654 | return; | |
33655 | ||
33656 | - local_irq_save(flags); | |
33657 | + local_irq_save_nort(flags); | |
33658 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) | |
33659 | free_user(up, flags); | |
33660 | else | |
33661 | - local_irq_restore(flags); | |
33662 | + local_irq_restore_nort(flags); | |
33663 | } | |
33664 | ||
33665 | struct user_struct *alloc_uid(kuid_t uid) | |
e4b2b4a8 JK |
33666 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/watchdog.c linux-4.14/kernel/watchdog.c |
33667 | --- linux-4.14.orig/kernel/watchdog.c 2017-11-12 19:46:13.000000000 +0100 | |
33668 | +++ linux-4.14/kernel/watchdog.c 2018-09-05 11:05:07.000000000 +0200 | |
33669 | @@ -462,7 +462,7 @@ | |
33670 | * Start the timer first to prevent the NMI watchdog triggering | |
33671 | * before the timer has a chance to fire. | |
33672 | */ | |
33673 | - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
33674 | + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); | |
33675 | hrtimer->function = watchdog_timer_fn; | |
33676 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), | |
33677 | HRTIMER_MODE_REL_PINNED); | |
33678 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/watchdog_hld.c linux-4.14/kernel/watchdog_hld.c | |
33679 | --- linux-4.14.orig/kernel/watchdog_hld.c 2017-11-12 19:46:13.000000000 +0100 | |
33680 | +++ linux-4.14/kernel/watchdog_hld.c 2018-09-05 11:05:07.000000000 +0200 | |
33681 | @@ -24,6 +24,8 @@ | |
33682 | static DEFINE_PER_CPU(bool, watchdog_nmi_touch); | |
33683 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | |
33684 | static DEFINE_PER_CPU(struct perf_event *, dead_event); | |
1a6e0f06 JK |
33685 | +static DEFINE_RAW_SPINLOCK(watchdog_output_lock); |
33686 | + | |
e4b2b4a8 JK |
33687 | static struct cpumask dead_events_mask; |
33688 | ||
33689 | static unsigned long hardlockup_allcpu_dumped; | |
33690 | @@ -134,6 +136,13 @@ | |
1a6e0f06 JK |
33691 | /* only print hardlockups once */ |
33692 | if (__this_cpu_read(hard_watchdog_warn) == true) | |
33693 | return; | |
33694 | + /* | |
33695 | + * If early-printk is enabled then make sure we do not | |
33696 | + * lock up in printk() and kill console logging: | |
33697 | + */ | |
33698 | + printk_kill(); | |
33699 | + | |
33700 | + raw_spin_lock(&watchdog_output_lock); | |
33701 | ||
33702 | pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); | |
33703 | print_modules(); | |
e4b2b4a8 | 33704 | @@ -151,6 +160,7 @@ |
1a6e0f06 JK |
33705 | !test_and_set_bit(0, &hardlockup_allcpu_dumped)) |
33706 | trigger_allbutself_cpu_backtrace(); | |
33707 | ||
33708 | + raw_spin_unlock(&watchdog_output_lock); | |
33709 | if (hardlockup_panic) | |
33710 | nmi_panic(regs, "Hard LOCKUP"); | |
33711 | ||
e4b2b4a8 JK |
33712 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/workqueue.c linux-4.14/kernel/workqueue.c |
33713 | --- linux-4.14.orig/kernel/workqueue.c 2018-09-05 11:03:22.000000000 +0200 | |
33714 | +++ linux-4.14/kernel/workqueue.c 2018-09-05 11:05:07.000000000 +0200 | |
33715 | @@ -49,6 +49,8 @@ | |
1a6e0f06 JK |
33716 | #include <linux/moduleparam.h> |
33717 | #include <linux/uaccess.h> | |
e4b2b4a8 | 33718 | #include <linux/nmi.h> |
1a6e0f06 JK |
33719 | +#include <linux/locallock.h> |
33720 | +#include <linux/delay.h> | |
33721 | ||
33722 | #include "workqueue_internal.h" | |
33723 | ||
e4b2b4a8 | 33724 | @@ -123,11 +125,16 @@ |
1a6e0f06 JK |
33725 | * cpu or grabbing pool->lock is enough for read access. If |
33726 | * POOL_DISASSOCIATED is set, it's identical to L. | |
33727 | * | |
33728 | + * On RT we need the extra protection via rt_lock_idle_list() for | |
33729 | + * the list manipulations against read access from | |
33730 | + * wq_worker_sleeping(). All other places are nicely serialized via | |
33731 | + * pool->lock. | |
33732 | + * | |
33733 | * A: pool->attach_mutex protected. | |
33734 | * | |
33735 | * PL: wq_pool_mutex protected. | |
33736 | * | |
33737 | - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. | |
33738 | + * PR: wq_pool_mutex protected for writes. RCU protected for reads. | |
33739 | * | |
33740 | * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. | |
33741 | * | |
e4b2b4a8 | 33742 | @@ -136,7 +143,7 @@ |
1a6e0f06 JK |
33743 | * |
33744 | * WQ: wq->mutex protected. | |
33745 | * | |
33746 | - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. | |
33747 | + * WR: wq->mutex protected for writes. RCU protected for reads. | |
33748 | * | |
33749 | * MD: wq_mayday_lock protected. | |
33750 | */ | |
e4b2b4a8 | 33751 | @@ -186,7 +193,7 @@ |
1a6e0f06 JK |
33752 | atomic_t nr_running ____cacheline_aligned_in_smp; |
33753 | ||
33754 | /* | |
33755 | - * Destruction of pool is sched-RCU protected to allow dereferences | |
33756 | + * Destruction of pool is RCU protected to allow dereferences | |
33757 | * from get_work_pool(). | |
33758 | */ | |
33759 | struct rcu_head rcu; | |
e4b2b4a8 | 33760 | @@ -215,7 +222,7 @@ |
1a6e0f06 JK |
33761 | /* |
33762 | * Release of unbound pwq is punted to system_wq. See put_pwq() | |
33763 | * and pwq_unbound_release_workfn() for details. pool_workqueue | |
33764 | - * itself is also sched-RCU protected so that the first pwq can be | |
33765 | + * itself is also RCU protected so that the first pwq can be | |
33766 | * determined without grabbing wq->mutex. | |
33767 | */ | |
33768 | struct work_struct unbound_release_work; | |
e4b2b4a8 | 33769 | @@ -352,6 +359,8 @@ |
1a6e0f06 JK |
33770 | struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; |
33771 | EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); | |
33772 | ||
33773 | +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); | |
33774 | + | |
33775 | static int worker_thread(void *__worker); | |
33776 | static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
33777 | ||
e4b2b4a8 | 33778 | @@ -359,20 +368,20 @@ |
1a6e0f06 JK |
33779 | #include <trace/events/workqueue.h> |
33780 | ||
33781 | #define assert_rcu_or_pool_mutex() \ | |
33782 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
33783 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
33784 | !lockdep_is_held(&wq_pool_mutex), \ | |
33785 | - "sched RCU or wq_pool_mutex should be held") | |
33786 | + "RCU or wq_pool_mutex should be held") | |
33787 | ||
33788 | #define assert_rcu_or_wq_mutex(wq) \ | |
33789 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
33790 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
33791 | !lockdep_is_held(&wq->mutex), \ | |
33792 | - "sched RCU or wq->mutex should be held") | |
33793 | + "RCU or wq->mutex should be held") | |
33794 | ||
33795 | #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ | |
33796 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
33797 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
33798 | !lockdep_is_held(&wq->mutex) && \ | |
33799 | !lockdep_is_held(&wq_pool_mutex), \ | |
33800 | - "sched RCU, wq->mutex or wq_pool_mutex should be held") | |
33801 | + "RCU, wq->mutex or wq_pool_mutex should be held") | |
33802 | ||
33803 | #define for_each_cpu_worker_pool(pool, cpu) \ | |
33804 | for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ | |
e4b2b4a8 | 33805 | @@ -384,7 +393,7 @@ |
1a6e0f06 JK |
33806 | * @pool: iteration cursor |
33807 | * @pi: integer used for iteration | |
33808 | * | |
33809 | - * This must be called either with wq_pool_mutex held or sched RCU read | |
33810 | + * This must be called either with wq_pool_mutex held or RCU read | |
33811 | * locked. If the pool needs to be used beyond the locking in effect, the | |
33812 | * caller is responsible for guaranteeing that the pool stays online. | |
33813 | * | |
e4b2b4a8 | 33814 | @@ -416,7 +425,7 @@ |
1a6e0f06 JK |
33815 | * @pwq: iteration cursor |
33816 | * @wq: the target workqueue | |
33817 | * | |
33818 | - * This must be called either with wq->mutex held or sched RCU read locked. | |
33819 | + * This must be called either with wq->mutex held or RCU read locked. | |
33820 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
33821 | * responsible for guaranteeing that the pwq stays online. | |
33822 | * | |
e4b2b4a8 | 33823 | @@ -428,6 +437,31 @@ |
1a6e0f06 JK |
33824 | if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ |
33825 | else | |
33826 | ||
33827 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
33828 | +static inline void rt_lock_idle_list(struct worker_pool *pool) | |
33829 | +{ | |
33830 | + preempt_disable(); | |
33831 | +} | |
33832 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) | |
33833 | +{ | |
33834 | + preempt_enable(); | |
33835 | +} | |
33836 | +static inline void sched_lock_idle_list(struct worker_pool *pool) { } | |
33837 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) { } | |
33838 | +#else | |
33839 | +static inline void rt_lock_idle_list(struct worker_pool *pool) { } | |
33840 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) { } | |
33841 | +static inline void sched_lock_idle_list(struct worker_pool *pool) | |
33842 | +{ | |
33843 | + spin_lock_irq(&pool->lock); | |
33844 | +} | |
33845 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) | |
33846 | +{ | |
33847 | + spin_unlock_irq(&pool->lock); | |
33848 | +} | |
33849 | +#endif | |
33850 | + | |
33851 | + | |
33852 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | |
33853 | ||
33854 | static struct debug_obj_descr work_debug_descr; | |
e4b2b4a8 | 33855 | @@ -552,7 +586,7 @@ |
1a6e0f06 JK |
33856 | * @wq: the target workqueue |
33857 | * @node: the node ID | |
33858 | * | |
33859 | - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU | |
33860 | + * This must be called with any of wq_pool_mutex, wq->mutex or RCU | |
33861 | * read locked. | |
33862 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
33863 | * responsible for guaranteeing that the pwq stays online. | |
e4b2b4a8 | 33864 | @@ -696,8 +730,8 @@ |
1a6e0f06 JK |
33865 | * @work: the work item of interest |
33866 | * | |
33867 | * Pools are created and destroyed under wq_pool_mutex, and allows read | |
33868 | - * access under sched-RCU read lock. As such, this function should be | |
33869 | - * called under wq_pool_mutex or with preemption disabled. | |
33870 | + * access under RCU read lock. As such, this function should be | |
33871 | + * called under wq_pool_mutex or inside of a rcu_read_lock() region. | |
33872 | * | |
33873 | * All fields of the returned pool are accessible as long as the above | |
33874 | * mentioned locking is in effect. If the returned pool needs to be used | |
e4b2b4a8 | 33875 | @@ -834,50 +868,45 @@ |
1a6e0f06 JK |
33876 | */ |
33877 | static void wake_up_worker(struct worker_pool *pool) | |
33878 | { | |
33879 | - struct worker *worker = first_idle_worker(pool); | |
33880 | + struct worker *worker; | |
33881 | + | |
33882 | + rt_lock_idle_list(pool); | |
33883 | + | |
33884 | + worker = first_idle_worker(pool); | |
33885 | ||
33886 | if (likely(worker)) | |
33887 | wake_up_process(worker->task); | |
33888 | + | |
33889 | + rt_unlock_idle_list(pool); | |
33890 | } | |
33891 | ||
33892 | /** | |
33893 | - * wq_worker_waking_up - a worker is waking up | |
33894 | + * wq_worker_running - a worker is running again | |
33895 | * @task: task waking up | |
33896 | - * @cpu: CPU @task is waking up to | |
e4b2b4a8 | 33897 | - * |
1a6e0f06 JK |
33898 | - * This function is called during try_to_wake_up() when a worker is |
33899 | - * being awoken. | |
e4b2b4a8 | 33900 | * |
1a6e0f06 JK |
33901 | - * CONTEXT: |
33902 | - * spin_lock_irq(rq->lock) | |
33903 | + * This function is called when a worker returns from schedule() | |
33904 | */ | |
33905 | -void wq_worker_waking_up(struct task_struct *task, int cpu) | |
33906 | +void wq_worker_running(struct task_struct *task) | |
33907 | { | |
33908 | struct worker *worker = kthread_data(task); | |
33909 | ||
33910 | - if (!(worker->flags & WORKER_NOT_RUNNING)) { | |
33911 | - WARN_ON_ONCE(worker->pool->cpu != cpu); | |
33912 | + if (!worker->sleeping) | |
33913 | + return; | |
33914 | + if (!(worker->flags & WORKER_NOT_RUNNING)) | |
33915 | atomic_inc(&worker->pool->nr_running); | |
33916 | - } | |
33917 | + worker->sleeping = 0; | |
33918 | } | |
33919 | ||
33920 | /** | |
33921 | * wq_worker_sleeping - a worker is going to sleep | |
33922 | * @task: task going to sleep | |
33923 | * | |
33924 | - * This function is called during schedule() when a busy worker is | |
33925 | - * going to sleep. Worker on the same cpu can be woken up by | |
33926 | - * returning pointer to its task. | |
33927 | - * | |
33928 | - * CONTEXT: | |
33929 | - * spin_lock_irq(rq->lock) | |
33930 | - * | |
33931 | - * Return: | |
33932 | - * Worker task on @cpu to wake up, %NULL if none. | |
33933 | + * This function is called from schedule() when a busy worker is | |
33934 | + * going to sleep. | |
33935 | */ | |
33936 | -struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
33937 | +void wq_worker_sleeping(struct task_struct *task) | |
33938 | { | |
33939 | - struct worker *worker = kthread_data(task), *to_wakeup = NULL; | |
33940 | + struct worker *worker = kthread_data(task); | |
33941 | struct worker_pool *pool; | |
33942 | ||
33943 | /* | |
e4b2b4a8 | 33944 | @@ -886,29 +915,26 @@ |
1a6e0f06 JK |
33945 | * checking NOT_RUNNING. |
33946 | */ | |
33947 | if (worker->flags & WORKER_NOT_RUNNING) | |
33948 | - return NULL; | |
33949 | + return; | |
33950 | ||
33951 | pool = worker->pool; | |
33952 | ||
33953 | - /* this can only happen on the local cpu */ | |
33954 | - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id())) | |
33955 | - return NULL; | |
33956 | + if (WARN_ON_ONCE(worker->sleeping)) | |
33957 | + return; | |
33958 | + | |
33959 | + worker->sleeping = 1; | |
33960 | ||
33961 | /* | |
33962 | * The counterpart of the following dec_and_test, implied mb, | |
33963 | * worklist not empty test sequence is in insert_work(). | |
33964 | * Please read comment there. | |
33965 | - * | |
33966 | - * NOT_RUNNING is clear. This means that we're bound to and | |
33967 | - * running on the local cpu w/ rq lock held and preemption | |
33968 | - * disabled, which in turn means that none else could be | |
33969 | - * manipulating idle_list, so dereferencing idle_list without pool | |
33970 | - * lock is safe. | |
33971 | */ | |
33972 | if (atomic_dec_and_test(&pool->nr_running) && | |
33973 | - !list_empty(&pool->worklist)) | |
33974 | - to_wakeup = first_idle_worker(pool); | |
33975 | - return to_wakeup ? to_wakeup->task : NULL; | |
33976 | + !list_empty(&pool->worklist)) { | |
33977 | + sched_lock_idle_list(pool); | |
33978 | + wake_up_worker(pool); | |
33979 | + sched_unlock_idle_list(pool); | |
33980 | + } | |
33981 | } | |
33982 | ||
33983 | /** | |
e4b2b4a8 | 33984 | @@ -1102,12 +1128,14 @@ |
1a6e0f06 JK |
33985 | { |
33986 | if (pwq) { | |
33987 | /* | |
33988 | - * As both pwqs and pools are sched-RCU protected, the | |
33989 | + * As both pwqs and pools are RCU protected, the | |
33990 | * following lock operations are safe. | |
33991 | */ | |
33992 | - spin_lock_irq(&pwq->pool->lock); | |
c7c16703 | 33993 | + rcu_read_lock(); |
1a6e0f06 JK |
33994 | + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); |
33995 | put_pwq(pwq); | |
33996 | - spin_unlock_irq(&pwq->pool->lock); | |
33997 | + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); | |
c7c16703 | 33998 | + rcu_read_unlock(); |
1a6e0f06 JK |
33999 | } |
34000 | } | |
34001 | ||
e4b2b4a8 | 34002 | @@ -1211,7 +1239,7 @@ |
1a6e0f06 JK |
34003 | struct worker_pool *pool; |
34004 | struct pool_workqueue *pwq; | |
34005 | ||
34006 | - local_irq_save(*flags); | |
34007 | + local_lock_irqsave(pendingb_lock, *flags); | |
34008 | ||
34009 | /* try to steal the timer if it exists */ | |
34010 | if (is_dwork) { | |
e4b2b4a8 | 34011 | @@ -1230,6 +1258,7 @@ |
1a6e0f06 JK |
34012 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) |
34013 | return 0; | |
34014 | ||
34015 | + rcu_read_lock(); | |
34016 | /* | |
34017 | * The queueing is in progress, or it is already queued. Try to | |
34018 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | |
e4b2b4a8 | 34019 | @@ -1268,14 +1297,16 @@ |
1a6e0f06 JK |
34020 | set_work_pool_and_keep_pending(work, pool->id); |
34021 | ||
34022 | spin_unlock(&pool->lock); | |
34023 | + rcu_read_unlock(); | |
34024 | return 1; | |
34025 | } | |
34026 | spin_unlock(&pool->lock); | |
34027 | fail: | |
34028 | - local_irq_restore(*flags); | |
34029 | + rcu_read_unlock(); | |
34030 | + local_unlock_irqrestore(pendingb_lock, *flags); | |
34031 | if (work_is_canceling(work)) | |
34032 | return -ENOENT; | |
34033 | - cpu_relax(); | |
34034 | + cpu_chill(); | |
34035 | return -EAGAIN; | |
34036 | } | |
34037 | ||
e4b2b4a8 | 34038 | @@ -1377,7 +1408,7 @@ |
1a6e0f06 JK |
34039 | * queued or lose PENDING. Grabbing PENDING and queueing should |
34040 | * happen with IRQ disabled. | |
34041 | */ | |
34042 | - WARN_ON_ONCE(!irqs_disabled()); | |
34043 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
34044 | ||
34045 | debug_work_activate(work); | |
34046 | ||
e4b2b4a8 | 34047 | @@ -1385,6 +1416,7 @@ |
1a6e0f06 JK |
34048 | if (unlikely(wq->flags & __WQ_DRAINING) && |
34049 | WARN_ON_ONCE(!is_chained_work(wq))) | |
34050 | return; | |
34051 | + rcu_read_lock(); | |
34052 | retry: | |
34053 | if (req_cpu == WORK_CPU_UNBOUND) | |
34054 | cpu = wq_select_unbound_cpu(raw_smp_processor_id()); | |
e4b2b4a8 | 34055 | @@ -1441,10 +1473,8 @@ |
1a6e0f06 JK |
34056 | /* pwq determined, queue */ |
34057 | trace_workqueue_queue_work(req_cpu, pwq, work); | |
34058 | ||
34059 | - if (WARN_ON(!list_empty(&work->entry))) { | |
34060 | - spin_unlock(&pwq->pool->lock); | |
34061 | - return; | |
34062 | - } | |
34063 | + if (WARN_ON(!list_empty(&work->entry))) | |
34064 | + goto out; | |
34065 | ||
34066 | pwq->nr_in_flight[pwq->work_color]++; | |
34067 | work_flags = work_color_to_flags(pwq->work_color); | |
e4b2b4a8 | 34068 | @@ -1462,7 +1492,9 @@ |
1a6e0f06 JK |
34069 | |
34070 | insert_work(pwq, work, worklist, work_flags); | |
34071 | ||
34072 | +out: | |
34073 | spin_unlock(&pwq->pool->lock); | |
34074 | + rcu_read_unlock(); | |
34075 | } | |
34076 | ||
34077 | /** | |
e4b2b4a8 | 34078 | @@ -1482,14 +1514,14 @@ |
1a6e0f06 JK |
34079 | bool ret = false; |
34080 | unsigned long flags; | |
34081 | ||
34082 | - local_irq_save(flags); | |
34083 | + local_lock_irqsave(pendingb_lock,flags); | |
34084 | ||
34085 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
34086 | __queue_work(cpu, wq, work); | |
34087 | ret = true; | |
34088 | } | |
34089 | ||
34090 | - local_irq_restore(flags); | |
34091 | + local_unlock_irqrestore(pendingb_lock, flags); | |
34092 | return ret; | |
34093 | } | |
34094 | EXPORT_SYMBOL(queue_work_on); | |
e4b2b4a8 JK |
34095 | @@ -1498,8 +1530,11 @@ |
34096 | { | |
34097 | struct delayed_work *dwork = (struct delayed_work *)__data; | |
34098 | ||
34099 | + /* XXX */ | |
34100 | + /* local_lock(pendingb_lock); */ | |
34101 | /* should have been called from irqsafe timer with irq already off */ | |
34102 | __queue_work(dwork->cpu, dwork->wq, &dwork->work); | |
34103 | + /* local_unlock(pendingb_lock); */ | |
34104 | } | |
34105 | EXPORT_SYMBOL(delayed_work_timer_fn); | |
34106 | ||
34107 | @@ -1555,14 +1590,14 @@ | |
1a6e0f06 JK |
34108 | unsigned long flags; |
34109 | ||
34110 | /* read the comment in __queue_work() */ | |
34111 | - local_irq_save(flags); | |
34112 | + local_lock_irqsave(pendingb_lock, flags); | |
34113 | ||
34114 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
34115 | __queue_delayed_work(cpu, wq, dwork, delay); | |
34116 | ret = true; | |
34117 | } | |
34118 | ||
34119 | - local_irq_restore(flags); | |
34120 | + local_unlock_irqrestore(pendingb_lock, flags); | |
34121 | return ret; | |
34122 | } | |
34123 | EXPORT_SYMBOL(queue_delayed_work_on); | |
e4b2b4a8 | 34124 | @@ -1597,7 +1632,7 @@ |
1a6e0f06 JK |
34125 | |
34126 | if (likely(ret >= 0)) { | |
34127 | __queue_delayed_work(cpu, wq, dwork, delay); | |
34128 | - local_irq_restore(flags); | |
34129 | + local_unlock_irqrestore(pendingb_lock, flags); | |
34130 | } | |
34131 | ||
34132 | /* -ENOENT from try_to_grab_pending() becomes %true */ | |
e4b2b4a8 | 34133 | @@ -1630,7 +1665,9 @@ |
1a6e0f06 JK |
34134 | worker->last_active = jiffies; |
34135 | ||
34136 | /* idle_list is LIFO */ | |
34137 | + rt_lock_idle_list(pool); | |
34138 | list_add(&worker->entry, &pool->idle_list); | |
34139 | + rt_unlock_idle_list(pool); | |
34140 | ||
34141 | if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) | |
34142 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); | |
e4b2b4a8 | 34143 | @@ -1663,7 +1700,9 @@ |
1a6e0f06 JK |
34144 | return; |
34145 | worker_clr_flags(worker, WORKER_IDLE); | |
34146 | pool->nr_idle--; | |
34147 | + rt_lock_idle_list(pool); | |
34148 | list_del_init(&worker->entry); | |
34149 | + rt_unlock_idle_list(pool); | |
34150 | } | |
34151 | ||
34152 | static struct worker *alloc_worker(int node) | |
e4b2b4a8 | 34153 | @@ -1829,7 +1868,9 @@ |
1a6e0f06 JK |
34154 | pool->nr_workers--; |
34155 | pool->nr_idle--; | |
34156 | ||
34157 | + rt_lock_idle_list(pool); | |
34158 | list_del_init(&worker->entry); | |
34159 | + rt_unlock_idle_list(pool); | |
34160 | worker->flags |= WORKER_DIE; | |
34161 | wake_up_process(worker->task); | |
34162 | } | |
e4b2b4a8 | 34163 | @@ -2815,14 +2856,14 @@ |
1a6e0f06 JK |
34164 | |
34165 | might_sleep(); | |
34166 | ||
34167 | - local_irq_disable(); | |
34168 | + rcu_read_lock(); | |
34169 | pool = get_work_pool(work); | |
34170 | if (!pool) { | |
34171 | - local_irq_enable(); | |
34172 | + rcu_read_unlock(); | |
34173 | return false; | |
34174 | } | |
34175 | ||
34176 | - spin_lock(&pool->lock); | |
34177 | + spin_lock_irq(&pool->lock); | |
34178 | /* see the comment in try_to_grab_pending() with the same code */ | |
34179 | pwq = get_work_pwq(work); | |
34180 | if (pwq) { | |
e4b2b4a8 JK |
34181 | @@ -2853,10 +2894,11 @@ |
34182 | lock_map_acquire(&pwq->wq->lockdep_map); | |
34183 | lock_map_release(&pwq->wq->lockdep_map); | |
34184 | } | |
1a6e0f06 JK |
34185 | - |
34186 | + rcu_read_unlock(); | |
34187 | return true; | |
34188 | already_gone: | |
34189 | spin_unlock_irq(&pool->lock); | |
34190 | + rcu_read_unlock(); | |
34191 | return false; | |
34192 | } | |
34193 | ||
e4b2b4a8 | 34194 | @@ -2946,7 +2988,7 @@ |
1a6e0f06 JK |
34195 | |
34196 | /* tell other tasks trying to grab @work to back off */ | |
34197 | mark_work_canceling(work); | |
34198 | - local_irq_restore(flags); | |
34199 | + local_unlock_irqrestore(pendingb_lock, flags); | |
34200 | ||
e4b2b4a8 JK |
34201 | /* |
34202 | * This allows canceling during early boot. We know that @work | |
34203 | @@ -3007,10 +3049,10 @@ | |
1a6e0f06 JK |
34204 | */ |
34205 | bool flush_delayed_work(struct delayed_work *dwork) | |
34206 | { | |
34207 | - local_irq_disable(); | |
34208 | + local_lock_irq(pendingb_lock); | |
34209 | if (del_timer_sync(&dwork->timer)) | |
34210 | __queue_work(dwork->cpu, dwork->wq, &dwork->work); | |
34211 | - local_irq_enable(); | |
34212 | + local_unlock_irq(pendingb_lock); | |
34213 | return flush_work(&dwork->work); | |
34214 | } | |
34215 | EXPORT_SYMBOL(flush_delayed_work); | |
e4b2b4a8 | 34216 | @@ -3028,7 +3070,7 @@ |
c7c16703 | 34217 | return false; |
1a6e0f06 | 34218 | |
c7c16703 | 34219 | set_work_pool_and_clear_pending(work, get_work_pool_id(work)); |
1a6e0f06 JK |
34220 | - local_irq_restore(flags); |
34221 | + local_unlock_irqrestore(pendingb_lock, flags); | |
34222 | return ret; | |
34223 | } | |
c7c16703 | 34224 | |
e4b2b4a8 | 34225 | @@ -3284,7 +3326,7 @@ |
1a6e0f06 JK |
34226 | * put_unbound_pool - put a worker_pool |
34227 | * @pool: worker_pool to put | |
34228 | * | |
34229 | - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU | |
34230 | + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU | |
34231 | * safe manner. get_unbound_pool() calls this function on its failure path | |
34232 | * and this function should be able to release pools which went through, | |
34233 | * successfully or not, init_worker_pool(). | |
e4b2b4a8 | 34234 | @@ -3338,8 +3380,8 @@ |
1a6e0f06 JK |
34235 | del_timer_sync(&pool->idle_timer); |
34236 | del_timer_sync(&pool->mayday_timer); | |
34237 | ||
34238 | - /* sched-RCU protected to allow dereferences from get_work_pool() */ | |
34239 | - call_rcu_sched(&pool->rcu, rcu_free_pool); | |
34240 | + /* RCU protected to allow dereferences from get_work_pool() */ | |
34241 | + call_rcu(&pool->rcu, rcu_free_pool); | |
34242 | } | |
34243 | ||
34244 | /** | |
e4b2b4a8 | 34245 | @@ -3446,14 +3488,14 @@ |
1a6e0f06 JK |
34246 | put_unbound_pool(pool); |
34247 | mutex_unlock(&wq_pool_mutex); | |
34248 | ||
34249 | - call_rcu_sched(&pwq->rcu, rcu_free_pwq); | |
34250 | + call_rcu(&pwq->rcu, rcu_free_pwq); | |
34251 | ||
34252 | /* | |
34253 | * If we're the last pwq going away, @wq is already dead and no one | |
34254 | * is gonna access it anymore. Schedule RCU free. | |
34255 | */ | |
34256 | if (is_last) | |
34257 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
34258 | + call_rcu(&wq->rcu, rcu_free_wq); | |
34259 | } | |
34260 | ||
34261 | /** | |
e4b2b4a8 | 34262 | @@ -4128,7 +4170,7 @@ |
1a6e0f06 JK |
34263 | * The base ref is never dropped on per-cpu pwqs. Directly |
34264 | * schedule RCU free. | |
34265 | */ | |
34266 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
34267 | + call_rcu(&wq->rcu, rcu_free_wq); | |
34268 | } else { | |
34269 | /* | |
34270 | * We're the sole accessor of @wq at this point. Directly | |
e4b2b4a8 | 34271 | @@ -4238,7 +4280,8 @@ |
1a6e0f06 JK |
34272 | struct pool_workqueue *pwq; |
34273 | bool ret; | |
34274 | ||
34275 | - rcu_read_lock_sched(); | |
34276 | + rcu_read_lock(); | |
34277 | + preempt_disable(); | |
34278 | ||
34279 | if (cpu == WORK_CPU_UNBOUND) | |
34280 | cpu = smp_processor_id(); | |
e4b2b4a8 | 34281 | @@ -4249,7 +4292,8 @@ |
1a6e0f06 JK |
34282 | pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); |
34283 | ||
34284 | ret = !list_empty(&pwq->delayed_works); | |
34285 | - rcu_read_unlock_sched(); | |
34286 | + preempt_enable(); | |
34287 | + rcu_read_unlock(); | |
34288 | ||
34289 | return ret; | |
34290 | } | |
e4b2b4a8 | 34291 | @@ -4275,15 +4319,15 @@ |
1a6e0f06 JK |
34292 | if (work_pending(work)) |
34293 | ret |= WORK_BUSY_PENDING; | |
34294 | ||
34295 | - local_irq_save(flags); | |
34296 | + rcu_read_lock(); | |
34297 | pool = get_work_pool(work); | |
34298 | if (pool) { | |
34299 | - spin_lock(&pool->lock); | |
34300 | + spin_lock_irqsave(&pool->lock, flags); | |
34301 | if (find_worker_executing_work(pool, work)) | |
34302 | ret |= WORK_BUSY_RUNNING; | |
34303 | - spin_unlock(&pool->lock); | |
34304 | + spin_unlock_irqrestore(&pool->lock, flags); | |
34305 | } | |
34306 | - local_irq_restore(flags); | |
34307 | + rcu_read_unlock(); | |
34308 | ||
34309 | return ret; | |
34310 | } | |
e4b2b4a8 | 34311 | @@ -4472,7 +4516,7 @@ |
1a6e0f06 JK |
34312 | unsigned long flags; |
34313 | int pi; | |
34314 | ||
34315 | - rcu_read_lock_sched(); | |
34316 | + rcu_read_lock(); | |
34317 | ||
34318 | pr_info("Showing busy workqueues and worker pools:\n"); | |
34319 | ||
e4b2b4a8 JK |
34320 | @@ -4537,7 +4581,7 @@ |
34321 | touch_nmi_watchdog(); | |
1a6e0f06 JK |
34322 | } |
34323 | ||
34324 | - rcu_read_unlock_sched(); | |
34325 | + rcu_read_unlock(); | |
34326 | } | |
34327 | ||
34328 | /* | |
e4b2b4a8 | 34329 | @@ -4898,16 +4942,16 @@ |
1a6e0f06 JK |
34330 | * nr_active is monotonically decreasing. It's safe |
34331 | * to peek without lock. | |
34332 | */ | |
34333 | - rcu_read_lock_sched(); | |
34334 | + rcu_read_lock(); | |
34335 | for_each_pwq(pwq, wq) { | |
34336 | WARN_ON_ONCE(pwq->nr_active < 0); | |
34337 | if (pwq->nr_active) { | |
34338 | busy = true; | |
34339 | - rcu_read_unlock_sched(); | |
34340 | + rcu_read_unlock(); | |
34341 | goto out_unlock; | |
34342 | } | |
34343 | } | |
34344 | - rcu_read_unlock_sched(); | |
34345 | + rcu_read_unlock(); | |
34346 | } | |
34347 | out_unlock: | |
34348 | mutex_unlock(&wq_pool_mutex); | |
e4b2b4a8 | 34349 | @@ -5097,7 +5141,8 @@ |
1a6e0f06 JK |
34350 | const char *delim = ""; |
34351 | int node, written = 0; | |
34352 | ||
34353 | - rcu_read_lock_sched(); | |
34354 | + get_online_cpus(); | |
34355 | + rcu_read_lock(); | |
34356 | for_each_node(node) { | |
34357 | written += scnprintf(buf + written, PAGE_SIZE - written, | |
34358 | "%s%d:%d", delim, node, | |
e4b2b4a8 | 34359 | @@ -5105,7 +5150,8 @@ |
1a6e0f06 JK |
34360 | delim = " "; |
34361 | } | |
34362 | written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | |
34363 | - rcu_read_unlock_sched(); | |
34364 | + rcu_read_unlock(); | |
34365 | + put_online_cpus(); | |
34366 | ||
34367 | return written; | |
34368 | } | |
e4b2b4a8 JK |
34369 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/kernel/workqueue_internal.h linux-4.14/kernel/workqueue_internal.h |
34370 | --- linux-4.14.orig/kernel/workqueue_internal.h 2017-11-12 19:46:13.000000000 +0100 | |
34371 | +++ linux-4.14/kernel/workqueue_internal.h 2018-09-05 11:05:07.000000000 +0200 | |
34372 | @@ -45,6 +45,7 @@ | |
1a6e0f06 JK |
34373 | unsigned long last_active; /* L: last active timestamp */ |
34374 | unsigned int flags; /* X: flags */ | |
34375 | int id; /* I: worker id */ | |
34376 | + int sleeping; /* None */ | |
34377 | ||
34378 | /* | |
34379 | * Opaque string set with work_set_desc(). Printed out with task | |
e4b2b4a8 | 34380 | @@ -70,7 +71,7 @@ |
1a6e0f06 JK |
34381 | * Scheduler hooks for concurrency managed workqueue. Only to be used from |
34382 | * sched/core.c and workqueue.c. | |
34383 | */ | |
34384 | -void wq_worker_waking_up(struct task_struct *task, int cpu); | |
34385 | -struct task_struct *wq_worker_sleeping(struct task_struct *task); | |
34386 | +void wq_worker_running(struct task_struct *task); | |
34387 | +void wq_worker_sleeping(struct task_struct *task); | |
34388 | ||
34389 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ | |
e4b2b4a8 JK |
34390 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/debugobjects.c linux-4.14/lib/debugobjects.c |
34391 | --- linux-4.14.orig/lib/debugobjects.c 2017-11-12 19:46:13.000000000 +0100 | |
34392 | +++ linux-4.14/lib/debugobjects.c 2018-09-05 11:05:07.000000000 +0200 | |
34393 | @@ -336,7 +336,10 @@ | |
1a6e0f06 JK |
34394 | struct debug_obj *obj; |
34395 | unsigned long flags; | |
34396 | ||
34397 | - fill_pool(); | |
34398 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
34399 | + if (preempt_count() == 0 && !irqs_disabled()) | |
34400 | +#endif | |
34401 | + fill_pool(); | |
34402 | ||
34403 | db = get_bucket((unsigned long) addr); | |
34404 | ||
e4b2b4a8 JK |
34405 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/irq_poll.c linux-4.14/lib/irq_poll.c |
34406 | --- linux-4.14.orig/lib/irq_poll.c 2017-11-12 19:46:13.000000000 +0100 | |
34407 | +++ linux-4.14/lib/irq_poll.c 2018-09-05 11:05:07.000000000 +0200 | |
34408 | @@ -37,6 +37,7 @@ | |
1a6e0f06 JK |
34409 | list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); |
34410 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
34411 | local_irq_restore(flags); | |
34412 | + preempt_check_resched_rt(); | |
34413 | } | |
34414 | EXPORT_SYMBOL(irq_poll_sched); | |
34415 | ||
e4b2b4a8 | 34416 | @@ -72,6 +73,7 @@ |
1a6e0f06 JK |
34417 | local_irq_save(flags); |
34418 | __irq_poll_complete(iop); | |
34419 | local_irq_restore(flags); | |
34420 | + preempt_check_resched_rt(); | |
34421 | } | |
34422 | EXPORT_SYMBOL(irq_poll_complete); | |
34423 | ||
e4b2b4a8 | 34424 | @@ -96,6 +98,7 @@ |
1a6e0f06 JK |
34425 | } |
34426 | ||
34427 | local_irq_enable(); | |
34428 | + preempt_check_resched_rt(); | |
34429 | ||
34430 | /* Even though interrupts have been re-enabled, this | |
34431 | * access is safe because interrupts can only add new | |
e4b2b4a8 | 34432 | @@ -133,6 +136,7 @@ |
1a6e0f06 JK |
34433 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); |
34434 | ||
34435 | local_irq_enable(); | |
34436 | + preempt_check_resched_rt(); | |
34437 | } | |
34438 | ||
34439 | /** | |
e4b2b4a8 | 34440 | @@ -196,6 +200,7 @@ |
c7c16703 JK |
34441 | this_cpu_ptr(&blk_cpu_iopoll)); |
34442 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
34443 | local_irq_enable(); | |
34444 | + preempt_check_resched_rt(); | |
1a6e0f06 | 34445 | |
c7c16703 JK |
34446 | return 0; |
34447 | } | |
e4b2b4a8 JK |
34448 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/Kconfig linux-4.14/lib/Kconfig |
34449 | --- linux-4.14.orig/lib/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
34450 | +++ linux-4.14/lib/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
34451 | @@ -428,6 +428,7 @@ | |
34452 | ||
34453 | config CPUMASK_OFFSTACK | |
34454 | bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS | |
34455 | + depends on !PREEMPT_RT_FULL | |
34456 | help | |
34457 | Use dynamic allocation for cpumask_var_t, instead of putting | |
34458 | them on the stack. This is a bit more expensive, but avoids | |
34459 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/Kconfig.debug linux-4.14/lib/Kconfig.debug | |
34460 | --- linux-4.14.orig/lib/Kconfig.debug 2018-09-05 11:03:22.000000000 +0200 | |
34461 | +++ linux-4.14/lib/Kconfig.debug 2018-09-05 11:05:07.000000000 +0200 | |
34462 | @@ -1197,7 +1197,7 @@ | |
34463 | ||
34464 | config DEBUG_LOCKING_API_SELFTESTS | |
34465 | bool "Locking API boot-time self-tests" | |
34466 | - depends on DEBUG_KERNEL | |
34467 | + depends on DEBUG_KERNEL && !PREEMPT_RT_FULL | |
34468 | help | |
34469 | Say Y here if you want the kernel to run a short self-test during | |
34470 | bootup. The self-test checks whether common types of locking bugs | |
34471 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/locking-selftest.c linux-4.14/lib/locking-selftest.c | |
34472 | --- linux-4.14.orig/lib/locking-selftest.c 2017-11-12 19:46:13.000000000 +0100 | |
34473 | +++ linux-4.14/lib/locking-selftest.c 2018-09-05 11:05:07.000000000 +0200 | |
34474 | @@ -742,6 +742,8 @@ | |
1a6e0f06 JK |
34475 | #include "locking-selftest-spin-hardirq.h" |
34476 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) | |
34477 | ||
34478 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34479 | + | |
34480 | #include "locking-selftest-rlock-hardirq.h" | |
34481 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) | |
34482 | ||
e4b2b4a8 | 34483 | @@ -757,9 +759,12 @@ |
1a6e0f06 JK |
34484 | #include "locking-selftest-wlock-softirq.h" |
34485 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) | |
34486 | ||
34487 | +#endif | |
34488 | + | |
34489 | #undef E1 | |
34490 | #undef E2 | |
34491 | ||
34492 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34493 | /* | |
34494 | * Enabling hardirqs with a softirq-safe lock held: | |
34495 | */ | |
e4b2b4a8 | 34496 | @@ -792,6 +797,8 @@ |
1a6e0f06 JK |
34497 | #undef E1 |
34498 | #undef E2 | |
34499 | ||
34500 | +#endif | |
34501 | + | |
34502 | /* | |
34503 | * Enabling irqs with an irq-safe lock held: | |
34504 | */ | |
e4b2b4a8 | 34505 | @@ -815,6 +822,8 @@ |
1a6e0f06 JK |
34506 | #include "locking-selftest-spin-hardirq.h" |
34507 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) | |
34508 | ||
34509 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34510 | + | |
34511 | #include "locking-selftest-rlock-hardirq.h" | |
34512 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) | |
34513 | ||
e4b2b4a8 | 34514 | @@ -830,6 +839,8 @@ |
1a6e0f06 JK |
34515 | #include "locking-selftest-wlock-softirq.h" |
34516 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
34517 | ||
34518 | +#endif | |
34519 | + | |
34520 | #undef E1 | |
34521 | #undef E2 | |
34522 | ||
e4b2b4a8 | 34523 | @@ -861,6 +872,8 @@ |
1a6e0f06 JK |
34524 | #include "locking-selftest-spin-hardirq.h" |
34525 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) | |
34526 | ||
34527 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34528 | + | |
34529 | #include "locking-selftest-rlock-hardirq.h" | |
34530 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) | |
34531 | ||
e4b2b4a8 | 34532 | @@ -876,6 +889,8 @@ |
1a6e0f06 JK |
34533 | #include "locking-selftest-wlock-softirq.h" |
34534 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
34535 | ||
34536 | +#endif | |
34537 | + | |
34538 | #undef E1 | |
34539 | #undef E2 | |
34540 | #undef E3 | |
e4b2b4a8 | 34541 | @@ -909,6 +924,8 @@ |
1a6e0f06 JK |
34542 | #include "locking-selftest-spin-hardirq.h" |
34543 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) | |
34544 | ||
34545 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34546 | + | |
34547 | #include "locking-selftest-rlock-hardirq.h" | |
34548 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) | |
34549 | ||
e4b2b4a8 | 34550 | @@ -924,10 +941,14 @@ |
1a6e0f06 JK |
34551 | #include "locking-selftest-wlock-softirq.h" |
34552 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) | |
34553 | ||
34554 | +#endif | |
34555 | + | |
34556 | #undef E1 | |
34557 | #undef E2 | |
34558 | #undef E3 | |
34559 | ||
34560 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34561 | + | |
34562 | /* | |
34563 | * read-lock / write-lock irq inversion. | |
34564 | * | |
e4b2b4a8 | 34565 | @@ -990,6 +1011,10 @@ |
1a6e0f06 JK |
34566 | #undef E2 |
34567 | #undef E3 | |
34568 | ||
34569 | +#endif | |
34570 | + | |
34571 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34572 | + | |
34573 | /* | |
34574 | * read-lock / write-lock recursion that is actually safe. | |
34575 | */ | |
e4b2b4a8 | 34576 | @@ -1028,6 +1053,8 @@ |
1a6e0f06 JK |
34577 | #undef E2 |
34578 | #undef E3 | |
34579 | ||
34580 | +#endif | |
34581 | + | |
34582 | /* | |
34583 | * read-lock / write-lock recursion that is unsafe. | |
34584 | */ | |
e4b2b4a8 | 34585 | @@ -2057,6 +2084,7 @@ |
1a6e0f06 JK |
34586 | |
34587 | printk(" --------------------------------------------------------------------------\n"); | |
34588 | ||
34589 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34590 | /* | |
34591 | * irq-context testcases: | |
34592 | */ | |
e4b2b4a8 | 34593 | @@ -2069,6 +2097,28 @@ |
1a6e0f06 JK |
34594 | |
34595 | DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); | |
34596 | // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); | |
34597 | +#else | |
34598 | + /* On -rt, we only do hardirq context test for raw spinlock */ | |
34599 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); | |
34600 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21); | |
34601 | + | |
34602 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12); | |
34603 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21); | |
34604 | + | |
34605 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123); | |
34606 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132); | |
34607 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213); | |
34608 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231); | |
34609 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312); | |
34610 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321); | |
34611 | + | |
34612 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123); | |
34613 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132); | |
34614 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213); | |
34615 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231); | |
34616 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); | |
34617 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); | |
34618 | +#endif | |
34619 | ||
34620 | ww_tests(); | |
34621 | ||
e4b2b4a8 JK |
34622 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/percpu_ida.c linux-4.14/lib/percpu_ida.c |
34623 | --- linux-4.14.orig/lib/percpu_ida.c 2017-11-12 19:46:13.000000000 +0100 | |
34624 | +++ linux-4.14/lib/percpu_ida.c 2018-09-05 11:05:07.000000000 +0200 | |
34625 | @@ -27,6 +27,9 @@ | |
1a6e0f06 JK |
34626 | #include <linux/string.h> |
34627 | #include <linux/spinlock.h> | |
34628 | #include <linux/percpu_ida.h> | |
34629 | +#include <linux/locallock.h> | |
34630 | + | |
34631 | +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock); | |
34632 | ||
34633 | struct percpu_ida_cpu { | |
34634 | /* | |
e4b2b4a8 | 34635 | @@ -149,13 +152,13 @@ |
1a6e0f06 JK |
34636 | unsigned long flags; |
34637 | int tag; | |
34638 | ||
34639 | - local_irq_save(flags); | |
34640 | + local_lock_irqsave(irq_off_lock, flags); | |
34641 | tags = this_cpu_ptr(pool->tag_cpu); | |
34642 | ||
34643 | /* Fastpath */ | |
34644 | tag = alloc_local_tag(tags); | |
34645 | if (likely(tag >= 0)) { | |
34646 | - local_irq_restore(flags); | |
34647 | + local_unlock_irqrestore(irq_off_lock, flags); | |
34648 | return tag; | |
34649 | } | |
34650 | ||
e4b2b4a8 | 34651 | @@ -174,6 +177,7 @@ |
1a6e0f06 JK |
34652 | |
34653 | if (!tags->nr_free) | |
34654 | alloc_global_tags(pool, tags); | |
34655 | + | |
34656 | if (!tags->nr_free) | |
34657 | steal_tags(pool, tags); | |
34658 | ||
e4b2b4a8 | 34659 | @@ -185,7 +189,7 @@ |
1a6e0f06 JK |
34660 | } |
34661 | ||
34662 | spin_unlock(&pool->lock); | |
34663 | - local_irq_restore(flags); | |
34664 | + local_unlock_irqrestore(irq_off_lock, flags); | |
34665 | ||
34666 | if (tag >= 0 || state == TASK_RUNNING) | |
34667 | break; | |
e4b2b4a8 | 34668 | @@ -197,7 +201,7 @@ |
1a6e0f06 JK |
34669 | |
34670 | schedule(); | |
34671 | ||
34672 | - local_irq_save(flags); | |
34673 | + local_lock_irqsave(irq_off_lock, flags); | |
34674 | tags = this_cpu_ptr(pool->tag_cpu); | |
34675 | } | |
34676 | if (state != TASK_RUNNING) | |
e4b2b4a8 | 34677 | @@ -222,7 +226,7 @@ |
1a6e0f06 JK |
34678 | |
34679 | BUG_ON(tag >= pool->nr_tags); | |
34680 | ||
34681 | - local_irq_save(flags); | |
34682 | + local_lock_irqsave(irq_off_lock, flags); | |
34683 | tags = this_cpu_ptr(pool->tag_cpu); | |
34684 | ||
34685 | spin_lock(&tags->lock); | |
e4b2b4a8 | 34686 | @@ -254,7 +258,7 @@ |
1a6e0f06 JK |
34687 | spin_unlock(&pool->lock); |
34688 | } | |
34689 | ||
34690 | - local_irq_restore(flags); | |
34691 | + local_unlock_irqrestore(irq_off_lock, flags); | |
34692 | } | |
34693 | EXPORT_SYMBOL_GPL(percpu_ida_free); | |
34694 | ||
e4b2b4a8 | 34695 | @@ -346,7 +350,7 @@ |
1a6e0f06 JK |
34696 | struct percpu_ida_cpu *remote; |
34697 | unsigned cpu, i, err = 0; | |
34698 | ||
34699 | - local_irq_save(flags); | |
34700 | + local_lock_irqsave(irq_off_lock, flags); | |
34701 | for_each_possible_cpu(cpu) { | |
34702 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | |
34703 | spin_lock(&remote->lock); | |
e4b2b4a8 | 34704 | @@ -368,7 +372,7 @@ |
1a6e0f06 JK |
34705 | } |
34706 | spin_unlock(&pool->lock); | |
34707 | out: | |
34708 | - local_irq_restore(flags); | |
34709 | + local_unlock_irqrestore(irq_off_lock, flags); | |
34710 | return err; | |
34711 | } | |
34712 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | |
e4b2b4a8 JK |
34713 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/radix-tree.c linux-4.14/lib/radix-tree.c |
34714 | --- linux-4.14.orig/lib/radix-tree.c 2018-09-05 11:03:25.000000000 +0200 | |
34715 | +++ linux-4.14/lib/radix-tree.c 2018-09-05 11:05:07.000000000 +0200 | |
34716 | @@ -37,7 +37,7 @@ | |
1f39f580 | 34717 | #include <linux/rcupdate.h> |
e4b2b4a8 JK |
34718 | #include <linux/slab.h> |
34719 | #include <linux/string.h> | |
1f39f580 JK |
34720 | - |
34721 | +#include <linux/locallock.h> | |
34722 | ||
34723 | /* Number of nodes in fully populated tree of given height */ | |
34724 | static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; | |
e4b2b4a8 | 34725 | @@ -86,6 +86,7 @@ |
1f39f580 JK |
34726 | struct radix_tree_node *nodes; |
34727 | }; | |
34728 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; | |
34729 | +static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock); | |
34730 | ||
e4b2b4a8 | 34731 | static inline struct radix_tree_node *entry_to_node(void *ptr) |
1f39f580 | 34732 | { |
e4b2b4a8 | 34733 | @@ -404,12 +405,13 @@ |
1a6e0f06 JK |
34734 | * succeed in getting a node here (and never reach |
34735 | * kmem_cache_alloc) | |
34736 | */ | |
34737 | - rtp = this_cpu_ptr(&radix_tree_preloads); | |
1f39f580 | 34738 | + rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads); |
1a6e0f06 JK |
34739 | if (rtp->nr) { |
34740 | ret = rtp->nodes; | |
e4b2b4a8 | 34741 | rtp->nodes = ret->parent; |
1a6e0f06 JK |
34742 | rtp->nr--; |
34743 | } | |
1f39f580 | 34744 | + put_locked_var(radix_tree_preloads_lock, radix_tree_preloads); |
1a6e0f06 JK |
34745 | /* |
34746 | * Update the allocation stack trace as this is more useful | |
34747 | * for debugging. | |
e4b2b4a8 | 34748 | @@ -475,14 +477,14 @@ |
1f39f580 JK |
34749 | */ |
34750 | gfp_mask &= ~__GFP_ACCOUNT; | |
34751 | ||
34752 | - preempt_disable(); | |
34753 | + local_lock(radix_tree_preloads_lock); | |
34754 | rtp = this_cpu_ptr(&radix_tree_preloads); | |
34755 | while (rtp->nr < nr) { | |
34756 | - preempt_enable(); | |
34757 | + local_unlock(radix_tree_preloads_lock); | |
34758 | node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); | |
34759 | if (node == NULL) | |
34760 | goto out; | |
34761 | - preempt_disable(); | |
34762 | + local_lock(radix_tree_preloads_lock); | |
34763 | rtp = this_cpu_ptr(&radix_tree_preloads); | |
34764 | if (rtp->nr < nr) { | |
e4b2b4a8 JK |
34765 | node->parent = rtp->nodes; |
34766 | @@ -524,7 +526,7 @@ | |
1f39f580 JK |
34767 | if (gfpflags_allow_blocking(gfp_mask)) |
34768 | return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); | |
34769 | /* Preloading doesn't help anything with this gfp mask, skip it */ | |
34770 | - preempt_disable(); | |
34771 | + local_lock(radix_tree_preloads_lock); | |
34772 | return 0; | |
1a6e0f06 | 34773 | } |
1f39f580 | 34774 | EXPORT_SYMBOL(radix_tree_maybe_preload); |
e4b2b4a8 | 34775 | @@ -562,7 +564,7 @@ |
1a6e0f06 | 34776 | |
1f39f580 JK |
34777 | /* Preloading doesn't help anything with this gfp mask, skip it */ |
34778 | if (!gfpflags_allow_blocking(gfp_mask)) { | |
34779 | - preempt_disable(); | |
34780 | + local_lock(radix_tree_preloads_lock); | |
34781 | return 0; | |
34782 | } | |
1a6e0f06 | 34783 | |
e4b2b4a8 | 34784 | @@ -596,6 +598,12 @@ |
1a6e0f06 JK |
34785 | return __radix_tree_preload(gfp_mask, nr_nodes); |
34786 | } | |
1a6e0f06 | 34787 | |
1f39f580 JK |
34788 | +void radix_tree_preload_end(void) |
34789 | +{ | |
34790 | + local_unlock(radix_tree_preloads_lock); | |
34791 | +} | |
34792 | +EXPORT_SYMBOL(radix_tree_preload_end); | |
34793 | + | |
e4b2b4a8 JK |
34794 | static unsigned radix_tree_load_root(const struct radix_tree_root *root, |
34795 | struct radix_tree_node **nodep, unsigned long *maxindex) | |
34796 | { | |
34797 | @@ -2105,10 +2113,16 @@ | |
34798 | void idr_preload(gfp_t gfp_mask) | |
34799 | { | |
34800 | if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE)) | |
34801 | - preempt_disable(); | |
34802 | + local_lock(radix_tree_preloads_lock); | |
34803 | } | |
34804 | EXPORT_SYMBOL(idr_preload); | |
34805 | ||
34806 | +void idr_preload_end(void) | |
34807 | +{ | |
34808 | + local_unlock(radix_tree_preloads_lock); | |
34809 | +} | |
34810 | +EXPORT_SYMBOL(idr_preload_end); | |
34811 | + | |
34812 | /** | |
34813 | * ida_pre_get - reserve resources for ida allocation | |
34814 | * @ida: ida handle | |
34815 | @@ -2125,7 +2139,7 @@ | |
34816 | * to return to the ida_pre_get() step. | |
34817 | */ | |
34818 | if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) | |
34819 | - preempt_enable(); | |
34820 | + local_unlock(radix_tree_preloads_lock); | |
34821 | ||
34822 | if (!this_cpu_read(ida_bitmap)) { | |
34823 | struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); | |
34824 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/scatterlist.c linux-4.14/lib/scatterlist.c | |
34825 | --- linux-4.14.orig/lib/scatterlist.c 2017-11-12 19:46:13.000000000 +0100 | |
34826 | +++ linux-4.14/lib/scatterlist.c 2018-09-05 11:05:07.000000000 +0200 | |
34827 | @@ -620,7 +620,7 @@ | |
1a6e0f06 JK |
34828 | flush_kernel_dcache_page(miter->page); |
34829 | ||
34830 | if (miter->__flags & SG_MITER_ATOMIC) { | |
34831 | - WARN_ON_ONCE(preemptible()); | |
34832 | + WARN_ON_ONCE(!pagefault_disabled()); | |
34833 | kunmap_atomic(miter->addr); | |
34834 | } else | |
34835 | kunmap(miter->page); | |
e4b2b4a8 JK |
34836 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/smp_processor_id.c linux-4.14/lib/smp_processor_id.c |
34837 | --- linux-4.14.orig/lib/smp_processor_id.c 2017-11-12 19:46:13.000000000 +0100 | |
34838 | +++ linux-4.14/lib/smp_processor_id.c 2018-09-05 11:05:07.000000000 +0200 | |
34839 | @@ -23,7 +23,7 @@ | |
34840 | * Kernel threads bound to a single CPU can safely use | |
34841 | * smp_processor_id(): | |
34842 | */ | |
34843 | - if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) | |
34844 | + if (cpumask_equal(current->cpus_ptr, cpumask_of(this_cpu))) | |
34845 | goto out; | |
1a6e0f06 | 34846 | |
e4b2b4a8 JK |
34847 | /* |
34848 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/lib/timerqueue.c linux-4.14/lib/timerqueue.c | |
34849 | --- linux-4.14.orig/lib/timerqueue.c 2017-11-12 19:46:13.000000000 +0100 | |
34850 | +++ linux-4.14/lib/timerqueue.c 2018-09-05 11:05:07.000000000 +0200 | |
34851 | @@ -33,8 +33,9 @@ | |
34852 | * @head: head of timerqueue | |
34853 | * @node: timer node to be added | |
34854 | * | |
34855 | - * Adds the timer node to the timerqueue, sorted by the | |
34856 | - * node's expires value. | |
34857 | + * Adds the timer node to the timerqueue, sorted by the node's expires | |
34858 | + * value. Returns true if the newly added timer is the first expiring timer in | |
34859 | + * the queue. | |
34860 | */ | |
34861 | bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node) | |
34862 | { | |
34863 | @@ -70,7 +71,8 @@ | |
34864 | * @head: head of timerqueue | |
34865 | * @node: timer node to be removed | |
34866 | * | |
34867 | - * Removes the timer node from the timerqueue. | |
34868 | + * Removes the timer node from the timerqueue. Returns true if the queue is | |
34869 | + * not empty after the remove. | |
34870 | */ | |
34871 | bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) | |
34872 | { | |
34873 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/localversion-rt linux-4.14/localversion-rt | |
34874 | --- linux-4.14.orig/localversion-rt 1970-01-01 01:00:00.000000000 +0100 | |
34875 | +++ linux-4.14/localversion-rt 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 | 34876 | @@ -0,0 +1 @@ |
e4b2b4a8 JK |
34877 | +-rt40 |
34878 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/backing-dev.c linux-4.14/mm/backing-dev.c | |
34879 | --- linux-4.14.orig/mm/backing-dev.c 2018-09-05 11:03:25.000000000 +0200 | |
34880 | +++ linux-4.14/mm/backing-dev.c 2018-09-05 11:05:07.000000000 +0200 | |
34881 | @@ -470,9 +470,9 @@ | |
1a6e0f06 JK |
34882 | { |
34883 | unsigned long flags; | |
34884 | ||
34885 | - local_irq_save(flags); | |
34886 | + local_irq_save_nort(flags); | |
34887 | if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { | |
34888 | - local_irq_restore(flags); | |
34889 | + local_irq_restore_nort(flags); | |
34890 | return; | |
34891 | } | |
34892 | ||
e4b2b4a8 JK |
34893 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/compaction.c linux-4.14/mm/compaction.c |
34894 | --- linux-4.14.orig/mm/compaction.c 2017-11-12 19:46:13.000000000 +0100 | |
34895 | +++ linux-4.14/mm/compaction.c 2018-09-05 11:05:07.000000000 +0200 | |
34896 | @@ -1634,10 +1634,12 @@ | |
1a6e0f06 JK |
34897 | block_start_pfn(cc->migrate_pfn, cc->order); |
34898 | ||
34899 | if (cc->last_migrated_pfn < current_block_start) { | |
34900 | - cpu = get_cpu(); | |
34901 | + cpu = get_cpu_light(); | |
34902 | + local_lock_irq(swapvec_lock); | |
34903 | lru_add_drain_cpu(cpu); | |
34904 | + local_unlock_irq(swapvec_lock); | |
34905 | drain_local_pages(zone); | |
34906 | - put_cpu(); | |
34907 | + put_cpu_light(); | |
34908 | /* No more flushing until we migrate again */ | |
34909 | cc->last_migrated_pfn = 0; | |
34910 | } | |
e4b2b4a8 JK |
34911 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/filemap.c linux-4.14/mm/filemap.c |
34912 | --- linux-4.14.orig/mm/filemap.c 2018-09-05 11:03:28.000000000 +0200 | |
34913 | +++ linux-4.14/mm/filemap.c 2018-09-05 11:05:07.000000000 +0200 | |
34914 | @@ -110,6 +110,7 @@ | |
34915 | * ->i_mmap_rwsem | |
34916 | * ->tasklist_lock (memory_failure, collect_procs_ao) | |
34917 | */ | |
34918 | +DECLARE_LOCAL_IRQ_LOCK(shadow_nodes_lock); | |
34919 | ||
34920 | static int page_cache_tree_insert(struct address_space *mapping, | |
34921 | struct page *page, void **shadowp) | |
34922 | @@ -133,8 +134,10 @@ | |
34923 | if (shadowp) | |
34924 | *shadowp = p; | |
1a6e0f06 | 34925 | } |
e4b2b4a8 JK |
34926 | + local_lock(shadow_nodes_lock); |
34927 | __radix_tree_replace(&mapping->page_tree, node, slot, page, | |
34928 | - workingset_update_node, mapping); | |
34929 | + __workingset_update_node, mapping); | |
34930 | + local_unlock(shadow_nodes_lock); | |
34931 | mapping->nrpages++; | |
1a6e0f06 JK |
34932 | return 0; |
34933 | } | |
e4b2b4a8 JK |
34934 | @@ -151,6 +154,7 @@ |
34935 | VM_BUG_ON_PAGE(PageTail(page), page); | |
34936 | VM_BUG_ON_PAGE(nr != 1 && shadow, page); | |
34937 | ||
34938 | + local_lock(shadow_nodes_lock); | |
34939 | for (i = 0; i < nr; i++) { | |
34940 | struct radix_tree_node *node; | |
34941 | void **slot; | |
34942 | @@ -162,8 +166,9 @@ | |
1a6e0f06 | 34943 | |
e4b2b4a8 JK |
34944 | radix_tree_clear_tags(&mapping->page_tree, node, slot); |
34945 | __radix_tree_replace(&mapping->page_tree, node, slot, shadow, | |
34946 | - workingset_update_node, mapping); | |
34947 | + __workingset_update_node, mapping); | |
34948 | } | |
34949 | + local_unlock(shadow_nodes_lock); | |
34950 | ||
34951 | if (shadow) { | |
34952 | mapping->nrexceptional += nr; | |
34953 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/highmem.c linux-4.14/mm/highmem.c | |
34954 | --- linux-4.14.orig/mm/highmem.c 2017-11-12 19:46:13.000000000 +0100 | |
34955 | +++ linux-4.14/mm/highmem.c 2018-09-05 11:05:07.000000000 +0200 | |
34956 | @@ -30,10 +30,11 @@ | |
1a6e0f06 JK |
34957 | #include <linux/kgdb.h> |
34958 | #include <asm/tlbflush.h> | |
34959 | ||
34960 | - | |
34961 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34962 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
34963 | DEFINE_PER_CPU(int, __kmap_atomic_idx); | |
34964 | #endif | |
34965 | +#endif | |
34966 | ||
34967 | /* | |
34968 | * Virtual_count is not a pure "count". | |
e4b2b4a8 | 34969 | @@ -108,8 +109,9 @@ |
1a6e0f06 JK |
34970 | unsigned long totalhigh_pages __read_mostly; |
34971 | EXPORT_SYMBOL(totalhigh_pages); | |
34972 | ||
34973 | - | |
34974 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
34975 | EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); | |
34976 | +#endif | |
34977 | ||
34978 | unsigned int nr_free_highpages (void) | |
34979 | { | |
e4b2b4a8 JK |
34980 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/Kconfig linux-4.14/mm/Kconfig |
34981 | --- linux-4.14.orig/mm/Kconfig 2018-09-05 11:03:25.000000000 +0200 | |
34982 | +++ linux-4.14/mm/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
34983 | @@ -385,7 +385,7 @@ | |
34984 | ||
34985 | config TRANSPARENT_HUGEPAGE | |
34986 | bool "Transparent Hugepage Support" | |
34987 | - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE | |
34988 | + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL | |
34989 | select COMPACTION | |
34990 | select RADIX_TREE_MULTIORDER | |
34991 | help | |
34992 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/memcontrol.c linux-4.14/mm/memcontrol.c | |
34993 | --- linux-4.14.orig/mm/memcontrol.c 2018-09-05 11:03:25.000000000 +0200 | |
34994 | +++ linux-4.14/mm/memcontrol.c 2018-09-05 11:05:07.000000000 +0200 | |
34995 | @@ -69,6 +69,7 @@ | |
1a6e0f06 JK |
34996 | #include <net/sock.h> |
34997 | #include <net/ip.h> | |
34998 | #include "slab.h" | |
34999 | +#include <linux/locallock.h> | |
35000 | ||
e4b2b4a8 | 35001 | #include <linux/uaccess.h> |
1a6e0f06 | 35002 | |
e4b2b4a8 | 35003 | @@ -94,6 +95,8 @@ |
1a6e0f06 JK |
35004 | #define do_swap_account 0 |
35005 | #endif | |
35006 | ||
35007 | +static DEFINE_LOCAL_IRQ_LOCK(event_lock); | |
35008 | + | |
35009 | /* Whether legacy memory+swap accounting is active */ | |
35010 | static bool do_memsw_account(void) | |
35011 | { | |
e4b2b4a8 JK |
35012 | @@ -1831,7 +1834,7 @@ |
35013 | * as well as workers from this path always operate on the local | |
35014 | * per-cpu data. CPU up doesn't touch memcg_stock at all. | |
35015 | */ | |
1a6e0f06 JK |
35016 | - curcpu = get_cpu(); |
35017 | + curcpu = get_cpu_light(); | |
35018 | for_each_online_cpu(cpu) { | |
35019 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | |
35020 | struct mem_cgroup *memcg; | |
e4b2b4a8 | 35021 | @@ -1851,7 +1854,7 @@ |
1a6e0f06 | 35022 | } |
e4b2b4a8 | 35023 | css_put(&memcg->css); |
1a6e0f06 JK |
35024 | } |
35025 | - put_cpu(); | |
35026 | + put_cpu_light(); | |
1a6e0f06 JK |
35027 | mutex_unlock(&percpu_charge_mutex); |
35028 | } | |
e4b2b4a8 JK |
35029 | |
35030 | @@ -4624,12 +4627,12 @@ | |
1a6e0f06 JK |
35031 | |
35032 | ret = 0; | |
35033 | ||
35034 | - local_irq_disable(); | |
35035 | + local_lock_irq(event_lock); | |
35036 | mem_cgroup_charge_statistics(to, page, compound, nr_pages); | |
35037 | memcg_check_events(to, page); | |
35038 | mem_cgroup_charge_statistics(from, page, compound, -nr_pages); | |
35039 | memcg_check_events(from, page); | |
35040 | - local_irq_enable(); | |
35041 | + local_unlock_irq(event_lock); | |
35042 | out_unlock: | |
35043 | unlock_page(page); | |
35044 | out: | |
e4b2b4a8 | 35045 | @@ -5572,10 +5575,10 @@ |
1a6e0f06 JK |
35046 | |
35047 | commit_charge(page, memcg, lrucare); | |
35048 | ||
35049 | - local_irq_disable(); | |
35050 | + local_lock_irq(event_lock); | |
35051 | mem_cgroup_charge_statistics(memcg, page, compound, nr_pages); | |
35052 | memcg_check_events(memcg, page); | |
35053 | - local_irq_enable(); | |
35054 | + local_unlock_irq(event_lock); | |
35055 | ||
35056 | if (do_memsw_account() && PageSwapCache(page)) { | |
35057 | swp_entry_t entry = { .val = page_private(page) }; | |
e4b2b4a8 JK |
35058 | @@ -5644,7 +5647,7 @@ |
35059 | memcg_oom_recover(ug->memcg); | |
1a6e0f06 JK |
35060 | } |
35061 | ||
35062 | - local_irq_save(flags); | |
35063 | + local_lock_irqsave(event_lock, flags); | |
e4b2b4a8 JK |
35064 | __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon); |
35065 | __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file); | |
35066 | __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge); | |
35067 | @@ -5652,7 +5655,7 @@ | |
35068 | __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout); | |
35069 | __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages); | |
35070 | memcg_check_events(ug->memcg, ug->dummy_page); | |
1a6e0f06 JK |
35071 | - local_irq_restore(flags); |
35072 | + local_unlock_irqrestore(event_lock, flags); | |
35073 | ||
e4b2b4a8 JK |
35074 | if (!mem_cgroup_is_root(ug->memcg)) |
35075 | css_put_many(&ug->memcg->css, nr_pages); | |
35076 | @@ -5815,10 +5818,10 @@ | |
1a6e0f06 JK |
35077 | |
35078 | commit_charge(newpage, memcg, false); | |
35079 | ||
35080 | - local_irq_save(flags); | |
35081 | + local_lock_irqsave(event_lock, flags); | |
35082 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); | |
35083 | memcg_check_events(memcg, newpage); | |
35084 | - local_irq_restore(flags); | |
35085 | + local_unlock_irqrestore(event_lock, flags); | |
35086 | } | |
35087 | ||
35088 | DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); | |
e4b2b4a8 | 35089 | @@ -6010,6 +6013,7 @@ |
1a6e0f06 | 35090 | struct mem_cgroup *memcg, *swap_memcg; |
e4b2b4a8 | 35091 | unsigned int nr_entries; |
1a6e0f06 JK |
35092 | unsigned short oldid; |
35093 | + unsigned long flags; | |
35094 | ||
35095 | VM_BUG_ON_PAGE(PageLRU(page), page); | |
35096 | VM_BUG_ON_PAGE(page_count(page), page); | |
e4b2b4a8 | 35097 | @@ -6055,13 +6059,17 @@ |
1a6e0f06 JK |
35098 | * important here to have the interrupts disabled because it is the |
35099 | * only synchronisation we have for udpating the per-CPU variables. | |
35100 | */ | |
35101 | + local_lock_irqsave(event_lock, flags); | |
35102 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
35103 | VM_BUG_ON(!irqs_disabled()); | |
35104 | +#endif | |
e4b2b4a8 JK |
35105 | mem_cgroup_charge_statistics(memcg, page, PageTransHuge(page), |
35106 | -nr_entries); | |
1a6e0f06 JK |
35107 | memcg_check_events(memcg, page); |
35108 | ||
35109 | if (!mem_cgroup_is_root(memcg)) | |
e4b2b4a8 | 35110 | css_put_many(&memcg->css, nr_entries); |
1a6e0f06 JK |
35111 | + local_unlock_irqrestore(event_lock, flags); |
35112 | } | |
35113 | ||
e4b2b4a8 JK |
35114 | /** |
35115 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/mmu_context.c linux-4.14/mm/mmu_context.c | |
35116 | --- linux-4.14.orig/mm/mmu_context.c 2017-11-12 19:46:13.000000000 +0100 | |
35117 | +++ linux-4.14/mm/mmu_context.c 2018-09-05 11:05:07.000000000 +0200 | |
35118 | @@ -25,6 +25,7 @@ | |
1a6e0f06 JK |
35119 | struct task_struct *tsk = current; |
35120 | ||
35121 | task_lock(tsk); | |
35122 | + preempt_disable_rt(); | |
35123 | active_mm = tsk->active_mm; | |
35124 | if (active_mm != mm) { | |
e4b2b4a8 JK |
35125 | mmgrab(mm); |
35126 | @@ -32,6 +33,7 @@ | |
1a6e0f06 JK |
35127 | } |
35128 | tsk->mm = mm; | |
35129 | switch_mm(active_mm, mm, tsk); | |
35130 | + preempt_enable_rt(); | |
35131 | task_unlock(tsk); | |
35132 | #ifdef finish_arch_post_lock_switch | |
35133 | finish_arch_post_lock_switch(); | |
e4b2b4a8 JK |
35134 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/page_alloc.c linux-4.14/mm/page_alloc.c |
35135 | --- linux-4.14.orig/mm/page_alloc.c 2018-09-05 11:03:25.000000000 +0200 | |
35136 | +++ linux-4.14/mm/page_alloc.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 | 35137 | @@ -61,6 +61,7 @@ |
1a6e0f06 JK |
35138 | #include <linux/hugetlb.h> |
35139 | #include <linux/sched/rt.h> | |
e4b2b4a8 | 35140 | #include <linux/sched/mm.h> |
1a6e0f06 JK |
35141 | +#include <linux/locallock.h> |
35142 | #include <linux/page_owner.h> | |
35143 | #include <linux/kthread.h> | |
35144 | #include <linux/memcontrol.h> | |
e4b2b4a8 | 35145 | @@ -286,6 +287,18 @@ |
1a6e0f06 JK |
35146 | EXPORT_SYMBOL(nr_online_nodes); |
35147 | #endif | |
35148 | ||
35149 | +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); | |
35150 | + | |
35151 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
35152 | +# define cpu_lock_irqsave(cpu, flags) \ | |
35153 | + local_lock_irqsave_on(pa_lock, flags, cpu) | |
35154 | +# define cpu_unlock_irqrestore(cpu, flags) \ | |
35155 | + local_unlock_irqrestore_on(pa_lock, flags, cpu) | |
35156 | +#else | |
35157 | +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) | |
35158 | +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) | |
35159 | +#endif | |
35160 | + | |
35161 | int page_group_by_mobility_disabled __read_mostly; | |
35162 | ||
35163 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | |
e4b2b4a8 | 35164 | @@ -1094,7 +1107,7 @@ |
1a6e0f06 JK |
35165 | #endif /* CONFIG_DEBUG_VM */ |
35166 | ||
35167 | /* | |
35168 | - * Frees a number of pages from the PCP lists | |
35169 | + * Frees a number of pages which have been collected from the pcp lists. | |
35170 | * Assumes all pages on list are in same zone, and of same order. | |
35171 | * count is the number of pages to free. | |
35172 | * | |
e4b2b4a8 | 35173 | @@ -1105,15 +1118,53 @@ |
1a6e0f06 JK |
35174 | * pinned" detection logic. |
35175 | */ | |
35176 | static void free_pcppages_bulk(struct zone *zone, int count, | |
35177 | - struct per_cpu_pages *pcp) | |
35178 | + struct list_head *list) | |
35179 | { | |
35180 | - int migratetype = 0; | |
35181 | - int batch_free = 0; | |
1a6e0f06 JK |
35182 | bool isolated_pageblocks; |
35183 | + unsigned long flags; | |
1a6e0f06 JK |
35184 | |
35185 | - spin_lock(&zone->lock); | |
e4b2b4a8 | 35186 | + spin_lock_irqsave(&zone->lock, flags); |
1a6e0f06 | 35187 | isolated_pageblocks = has_isolate_pageblock(zone); |
1a6e0f06 JK |
35188 | |
35189 | + while (!list_empty(list)) { | |
35190 | + struct page *page; | |
e4b2b4a8 | 35191 | + int mt; /* migratetype of the to-be-freed page */ |
1a6e0f06 JK |
35192 | + |
35193 | + page = list_first_entry(list, struct page, lru); | |
35194 | + /* must delete as __free_one_page list manipulates */ | |
35195 | + list_del(&page->lru); | |
35196 | + | |
35197 | + mt = get_pcppage_migratetype(page); | |
35198 | + /* MIGRATE_ISOLATE page should not go to pcplists */ | |
35199 | + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
35200 | + /* Pageblock could have been isolated meanwhile */ | |
35201 | + if (unlikely(isolated_pageblocks)) | |
35202 | + mt = get_pageblock_migratetype(page); | |
35203 | + | |
35204 | + if (bulkfree_pcp_prepare(page)) | |
35205 | + continue; | |
35206 | + | |
35207 | + __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
35208 | + trace_mm_page_pcpu_drain(page, 0, mt); | |
35209 | + count--; | |
35210 | + } | |
35211 | + WARN_ON(count != 0); | |
35212 | + spin_unlock_irqrestore(&zone->lock, flags); | |
35213 | +} | |
35214 | + | |
35215 | +/* | |
35216 | + * Moves a number of pages from the PCP lists to free list which | |
35217 | + * is freed outside of the locked region. | |
35218 | + * | |
35219 | + * Assumes all pages on list are in same zone, and of same order. | |
35220 | + * count is the number of pages to free. | |
35221 | + */ | |
35222 | +static void isolate_pcp_pages(int count, struct per_cpu_pages *src, | |
35223 | + struct list_head *dst) | |
35224 | +{ | |
35225 | + int migratetype = 0; | |
35226 | + int batch_free = 0; | |
35227 | + | |
35228 | while (count) { | |
35229 | struct page *page; | |
35230 | struct list_head *list; | |
e4b2b4a8 | 35231 | @@ -1129,7 +1180,7 @@ |
1a6e0f06 JK |
35232 | batch_free++; |
35233 | if (++migratetype == MIGRATE_PCPTYPES) | |
35234 | migratetype = 0; | |
35235 | - list = &pcp->lists[migratetype]; | |
35236 | + list = &src->lists[migratetype]; | |
35237 | } while (list_empty(list)); | |
35238 | ||
35239 | /* This is the only non-empty list. Free them all. */ | |
e4b2b4a8 | 35240 | @@ -1137,27 +1188,12 @@ |
1a6e0f06 JK |
35241 | batch_free = count; |
35242 | ||
35243 | do { | |
35244 | - int mt; /* migratetype of the to-be-freed page */ | |
35245 | - | |
35246 | page = list_last_entry(list, struct page, lru); | |
35247 | - /* must delete as __free_one_page list manipulates */ | |
35248 | list_del(&page->lru); | |
35249 | ||
35250 | - mt = get_pcppage_migratetype(page); | |
35251 | - /* MIGRATE_ISOLATE page should not go to pcplists */ | |
35252 | - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
35253 | - /* Pageblock could have been isolated meanwhile */ | |
35254 | - if (unlikely(isolated_pageblocks)) | |
35255 | - mt = get_pageblock_migratetype(page); | |
35256 | - | |
35257 | - if (bulkfree_pcp_prepare(page)) | |
35258 | - continue; | |
35259 | - | |
35260 | - __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
35261 | - trace_mm_page_pcpu_drain(page, 0, mt); | |
35262 | + list_add(&page->lru, dst); | |
35263 | } while (--count && --batch_free && !list_empty(list)); | |
35264 | } | |
35265 | - spin_unlock(&zone->lock); | |
35266 | } | |
35267 | ||
35268 | static void free_one_page(struct zone *zone, | |
e4b2b4a8 JK |
35269 | @@ -1165,13 +1201,15 @@ |
35270 | unsigned int order, | |
1a6e0f06 JK |
35271 | int migratetype) |
35272 | { | |
1a6e0f06 JK |
35273 | - spin_lock(&zone->lock); |
35274 | + unsigned long flags; | |
35275 | + | |
35276 | + spin_lock_irqsave(&zone->lock, flags); | |
e4b2b4a8 JK |
35277 | if (unlikely(has_isolate_pageblock(zone) || |
35278 | is_migrate_isolate(migratetype))) { | |
1a6e0f06 JK |
35279 | migratetype = get_pfnblock_migratetype(page, pfn); |
35280 | } | |
35281 | __free_one_page(page, pfn, zone, order, migratetype); | |
35282 | - spin_unlock(&zone->lock); | |
35283 | + spin_unlock_irqrestore(&zone->lock, flags); | |
35284 | } | |
35285 | ||
35286 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |
e4b2b4a8 | 35287 | @@ -1257,10 +1295,10 @@ |
1a6e0f06 JK |
35288 | return; |
35289 | ||
35290 | migratetype = get_pfnblock_migratetype(page, pfn); | |
35291 | - local_irq_save(flags); | |
35292 | + local_lock_irqsave(pa_lock, flags); | |
35293 | __count_vm_events(PGFREE, 1 << order); | |
35294 | free_one_page(page_zone(page), page, pfn, order, migratetype); | |
35295 | - local_irq_restore(flags); | |
35296 | + local_unlock_irqrestore(pa_lock, flags); | |
35297 | } | |
35298 | ||
35299 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) | |
e4b2b4a8 | 35300 | @@ -2378,16 +2416,18 @@ |
1a6e0f06 JK |
35301 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
35302 | { | |
35303 | unsigned long flags; | |
35304 | + LIST_HEAD(dst); | |
35305 | int to_drain, batch; | |
35306 | ||
35307 | - local_irq_save(flags); | |
35308 | + local_lock_irqsave(pa_lock, flags); | |
35309 | batch = READ_ONCE(pcp->batch); | |
35310 | to_drain = min(pcp->count, batch); | |
35311 | if (to_drain > 0) { | |
35312 | - free_pcppages_bulk(zone, to_drain, pcp); | |
35313 | + isolate_pcp_pages(to_drain, pcp, &dst); | |
35314 | pcp->count -= to_drain; | |
35315 | } | |
35316 | - local_irq_restore(flags); | |
35317 | + local_unlock_irqrestore(pa_lock, flags); | |
35318 | + free_pcppages_bulk(zone, to_drain, &dst); | |
35319 | } | |
35320 | #endif | |
35321 | ||
e4b2b4a8 | 35322 | @@ -2403,16 +2443,21 @@ |
1a6e0f06 JK |
35323 | unsigned long flags; |
35324 | struct per_cpu_pageset *pset; | |
35325 | struct per_cpu_pages *pcp; | |
35326 | + LIST_HEAD(dst); | |
35327 | + int count; | |
35328 | ||
35329 | - local_irq_save(flags); | |
35330 | + cpu_lock_irqsave(cpu, flags); | |
35331 | pset = per_cpu_ptr(zone->pageset, cpu); | |
35332 | ||
35333 | pcp = &pset->pcp; | |
35334 | - if (pcp->count) { | |
35335 | - free_pcppages_bulk(zone, pcp->count, pcp); | |
35336 | + count = pcp->count; | |
35337 | + if (count) { | |
35338 | + isolate_pcp_pages(count, pcp, &dst); | |
35339 | pcp->count = 0; | |
35340 | } | |
35341 | - local_irq_restore(flags); | |
35342 | + cpu_unlock_irqrestore(cpu, flags); | |
35343 | + if (count) | |
35344 | + free_pcppages_bulk(zone, count, &dst); | |
35345 | } | |
35346 | ||
35347 | /* | |
e4b2b4a8 JK |
35348 | @@ -2447,6 +2492,7 @@ |
35349 | drain_pages(cpu); | |
35350 | } | |
35351 | ||
35352 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
35353 | static void drain_local_pages_wq(struct work_struct *work) | |
35354 | { | |
35355 | /* | |
35356 | @@ -2460,6 +2506,7 @@ | |
35357 | drain_local_pages(NULL); | |
35358 | preempt_enable(); | |
35359 | } | |
35360 | +#endif | |
35361 | ||
35362 | /* | |
35363 | * Spill all the per-cpu pages from all CPUs back into the buddy allocator. | |
35364 | @@ -2526,7 +2573,14 @@ | |
1a6e0f06 JK |
35365 | else |
35366 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | |
35367 | } | |
e4b2b4a8 JK |
35368 | - |
35369 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
1a6e0f06 JK |
35370 | + for_each_cpu(cpu, &cpus_with_pcps) { |
35371 | + if (zone) | |
35372 | + drain_pages_zone(cpu, zone); | |
35373 | + else | |
35374 | + drain_pages(cpu); | |
35375 | + } | |
e4b2b4a8 JK |
35376 | +#else |
35377 | for_each_cpu(cpu, &cpus_with_pcps) { | |
35378 | struct work_struct *work = per_cpu_ptr(&pcpu_drain, cpu); | |
35379 | INIT_WORK(work, drain_local_pages_wq); | |
35380 | @@ -2534,6 +2588,7 @@ | |
35381 | } | |
35382 | for_each_cpu(cpu, &cpus_with_pcps) | |
35383 | flush_work(per_cpu_ptr(&pcpu_drain, cpu)); | |
1a6e0f06 | 35384 | +#endif |
1a6e0f06 | 35385 | |
e4b2b4a8 JK |
35386 | mutex_unlock(&pcpu_drain_mutex); |
35387 | } | |
35388 | @@ -2610,7 +2665,7 @@ | |
1a6e0f06 JK |
35389 | |
35390 | migratetype = get_pfnblock_migratetype(page, pfn); | |
35391 | set_pcppage_migratetype(page, migratetype); | |
35392 | - local_irq_save(flags); | |
35393 | + local_lock_irqsave(pa_lock, flags); | |
35394 | __count_vm_event(PGFREE); | |
35395 | ||
35396 | /* | |
e4b2b4a8 | 35397 | @@ -2636,12 +2691,17 @@ |
1a6e0f06 JK |
35398 | pcp->count++; |
35399 | if (pcp->count >= pcp->high) { | |
35400 | unsigned long batch = READ_ONCE(pcp->batch); | |
35401 | - free_pcppages_bulk(zone, batch, pcp); | |
35402 | + LIST_HEAD(dst); | |
35403 | + | |
35404 | + isolate_pcp_pages(batch, pcp, &dst); | |
35405 | pcp->count -= batch; | |
35406 | + local_unlock_irqrestore(pa_lock, flags); | |
35407 | + free_pcppages_bulk(zone, batch, &dst); | |
35408 | + return; | |
35409 | } | |
35410 | ||
35411 | out: | |
35412 | - local_irq_restore(flags); | |
35413 | + local_unlock_irqrestore(pa_lock, flags); | |
35414 | } | |
35415 | ||
35416 | /* | |
e4b2b4a8 JK |
35417 | @@ -2789,7 +2849,7 @@ |
35418 | struct page *page; | |
35419 | unsigned long flags; | |
1a6e0f06 | 35420 | |
e4b2b4a8 JK |
35421 | - local_irq_save(flags); |
35422 | + local_lock_irqsave(pa_lock, flags); | |
35423 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | |
35424 | list = &pcp->lists[migratetype]; | |
35425 | page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list); | |
35426 | @@ -2797,7 +2857,7 @@ | |
35427 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | |
35428 | zone_statistics(preferred_zone, zone); | |
1a6e0f06 | 35429 | } |
e4b2b4a8 JK |
35430 | - local_irq_restore(flags); |
35431 | + local_unlock_irqrestore(pa_lock, flags); | |
35432 | return page; | |
35433 | } | |
35434 | ||
35435 | @@ -2824,7 +2884,7 @@ | |
35436 | * allocate greater than order-1 page units with __GFP_NOFAIL. | |
35437 | */ | |
35438 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); | |
35439 | - spin_lock_irqsave(&zone->lock, flags); | |
35440 | + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); | |
35441 | ||
35442 | do { | |
35443 | page = NULL; | |
35444 | @@ -2844,14 +2904,14 @@ | |
1a6e0f06 JK |
35445 | |
35446 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | |
e4b2b4a8 | 35447 | zone_statistics(preferred_zone, zone); |
1a6e0f06 JK |
35448 | - local_irq_restore(flags); |
35449 | + local_unlock_irqrestore(pa_lock, flags); | |
35450 | ||
e4b2b4a8 JK |
35451 | out: |
35452 | VM_BUG_ON_PAGE(page && bad_range(zone, page), page); | |
1a6e0f06 JK |
35453 | return page; |
35454 | ||
35455 | failed: | |
35456 | - local_irq_restore(flags); | |
35457 | + local_unlock_irqrestore(pa_lock, flags); | |
35458 | return NULL; | |
35459 | } | |
35460 | ||
e4b2b4a8 | 35461 | @@ -6778,8 +6838,9 @@ |
1a6e0f06 | 35462 | |
e4b2b4a8 | 35463 | static int page_alloc_cpu_dead(unsigned int cpu) |
1a6e0f06 | 35464 | { |
e4b2b4a8 JK |
35465 | - |
35466 | + local_lock_irq_on(swapvec_lock, cpu); | |
35467 | lru_add_drain_cpu(cpu); | |
35468 | + local_unlock_irq_on(swapvec_lock, cpu); | |
35469 | drain_pages(cpu); | |
1a6e0f06 | 35470 | |
e4b2b4a8 JK |
35471 | /* |
35472 | @@ -7683,7 +7744,7 @@ | |
1a6e0f06 JK |
35473 | struct per_cpu_pageset *pset; |
35474 | ||
35475 | /* avoid races with drain_pages() */ | |
35476 | - local_irq_save(flags); | |
35477 | + local_lock_irqsave(pa_lock, flags); | |
35478 | if (zone->pageset != &boot_pageset) { | |
35479 | for_each_online_cpu(cpu) { | |
35480 | pset = per_cpu_ptr(zone->pageset, cpu); | |
e4b2b4a8 | 35481 | @@ -7692,7 +7753,7 @@ |
1a6e0f06 JK |
35482 | free_percpu(zone->pageset); |
35483 | zone->pageset = &boot_pageset; | |
35484 | } | |
35485 | - local_irq_restore(flags); | |
35486 | + local_unlock_irqrestore(pa_lock, flags); | |
35487 | } | |
35488 | ||
35489 | #ifdef CONFIG_MEMORY_HOTREMOVE | |
e4b2b4a8 JK |
35490 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/slab.h linux-4.14/mm/slab.h |
35491 | --- linux-4.14.orig/mm/slab.h 2018-09-05 11:03:25.000000000 +0200 | |
35492 | +++ linux-4.14/mm/slab.h 2018-09-05 11:05:07.000000000 +0200 | |
35493 | @@ -451,7 +451,11 @@ | |
1a6e0f06 JK |
35494 | * The slab lists for all objects. |
35495 | */ | |
35496 | struct kmem_cache_node { | |
35497 | +#ifdef CONFIG_SLUB | |
35498 | + raw_spinlock_t list_lock; | |
35499 | +#else | |
35500 | spinlock_t list_lock; | |
35501 | +#endif | |
35502 | ||
35503 | #ifdef CONFIG_SLAB | |
35504 | struct list_head slabs_partial; /* partial list first, better asm code */ | |
e4b2b4a8 JK |
35505 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/slub.c linux-4.14/mm/slub.c |
35506 | --- linux-4.14.orig/mm/slub.c 2018-09-05 11:03:25.000000000 +0200 | |
35507 | +++ linux-4.14/mm/slub.c 2018-09-05 11:05:07.000000000 +0200 | |
35508 | @@ -1179,7 +1179,7 @@ | |
1a6e0f06 JK |
35509 | unsigned long uninitialized_var(flags); |
35510 | int ret = 0; | |
35511 | ||
35512 | - spin_lock_irqsave(&n->list_lock, flags); | |
35513 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
35514 | slab_lock(page); | |
35515 | ||
35516 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | |
e4b2b4a8 | 35517 | @@ -1214,7 +1214,7 @@ |
1a6e0f06 JK |
35518 | bulk_cnt, cnt); |
35519 | ||
35520 | slab_unlock(page); | |
35521 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35522 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35523 | if (!ret) | |
35524 | slab_fix(s, "Object at 0x%p not freed", object); | |
35525 | return ret; | |
e4b2b4a8 | 35526 | @@ -1342,6 +1342,12 @@ |
1a6e0f06 JK |
35527 | |
35528 | #endif /* CONFIG_SLUB_DEBUG */ | |
35529 | ||
35530 | +struct slub_free_list { | |
35531 | + raw_spinlock_t lock; | |
35532 | + struct list_head list; | |
35533 | +}; | |
35534 | +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); | |
35535 | + | |
35536 | /* | |
35537 | * Hooks for other subsystems that check memory allocations. In a typical | |
35538 | * production configuration these hooks all should produce no code at all. | |
e4b2b4a8 | 35539 | @@ -1561,10 +1567,17 @@ |
1a6e0f06 JK |
35540 | void *start, *p; |
35541 | int idx, order; | |
35542 | bool shuffle; | |
35543 | + bool enableirqs = false; | |
35544 | ||
35545 | flags &= gfp_allowed_mask; | |
35546 | ||
35547 | if (gfpflags_allow_blocking(flags)) | |
35548 | + enableirqs = true; | |
35549 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
e4b2b4a8 | 35550 | + if (system_state > SYSTEM_BOOTING) |
1a6e0f06 JK |
35551 | + enableirqs = true; |
35552 | +#endif | |
35553 | + if (enableirqs) | |
35554 | local_irq_enable(); | |
35555 | ||
35556 | flags |= s->allocflags; | |
e4b2b4a8 | 35557 | @@ -1623,7 +1636,7 @@ |
1a6e0f06 JK |
35558 | page->frozen = 1; |
35559 | ||
35560 | out: | |
35561 | - if (gfpflags_allow_blocking(flags)) | |
35562 | + if (enableirqs) | |
35563 | local_irq_disable(); | |
35564 | if (!page) | |
35565 | return NULL; | |
e4b2b4a8 | 35566 | @@ -1681,6 +1694,16 @@ |
1a6e0f06 JK |
35567 | __free_pages(page, order); |
35568 | } | |
35569 | ||
35570 | +static void free_delayed(struct list_head *h) | |
35571 | +{ | |
35572 | + while(!list_empty(h)) { | |
35573 | + struct page *page = list_first_entry(h, struct page, lru); | |
35574 | + | |
35575 | + list_del(&page->lru); | |
35576 | + __free_slab(page->slab_cache, page); | |
35577 | + } | |
35578 | +} | |
35579 | + | |
35580 | #define need_reserve_slab_rcu \ | |
35581 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | |
35582 | ||
e4b2b4a8 | 35583 | @@ -1712,6 +1735,12 @@ |
1a6e0f06 JK |
35584 | } |
35585 | ||
35586 | call_rcu(head, rcu_free_slab); | |
35587 | + } else if (irqs_disabled()) { | |
35588 | + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); | |
35589 | + | |
35590 | + raw_spin_lock(&f->lock); | |
35591 | + list_add(&page->lru, &f->list); | |
35592 | + raw_spin_unlock(&f->lock); | |
35593 | } else | |
35594 | __free_slab(s, page); | |
35595 | } | |
e4b2b4a8 | 35596 | @@ -1819,7 +1848,7 @@ |
1a6e0f06 JK |
35597 | if (!n || !n->nr_partial) |
35598 | return NULL; | |
35599 | ||
35600 | - spin_lock(&n->list_lock); | |
35601 | + raw_spin_lock(&n->list_lock); | |
35602 | list_for_each_entry_safe(page, page2, &n->partial, lru) { | |
35603 | void *t; | |
35604 | ||
e4b2b4a8 | 35605 | @@ -1844,7 +1873,7 @@ |
1a6e0f06 JK |
35606 | break; |
35607 | ||
35608 | } | |
35609 | - spin_unlock(&n->list_lock); | |
35610 | + raw_spin_unlock(&n->list_lock); | |
35611 | return object; | |
35612 | } | |
35613 | ||
e4b2b4a8 | 35614 | @@ -2090,7 +2119,7 @@ |
1a6e0f06 JK |
35615 | * that acquire_slab() will see a slab page that |
35616 | * is frozen | |
35617 | */ | |
35618 | - spin_lock(&n->list_lock); | |
35619 | + raw_spin_lock(&n->list_lock); | |
35620 | } | |
35621 | } else { | |
35622 | m = M_FULL; | |
e4b2b4a8 | 35623 | @@ -2101,7 +2130,7 @@ |
1a6e0f06 JK |
35624 | * slabs from diagnostic functions will not see |
35625 | * any frozen slabs. | |
35626 | */ | |
35627 | - spin_lock(&n->list_lock); | |
35628 | + raw_spin_lock(&n->list_lock); | |
35629 | } | |
35630 | } | |
35631 | ||
e4b2b4a8 | 35632 | @@ -2136,7 +2165,7 @@ |
1a6e0f06 JK |
35633 | goto redo; |
35634 | ||
35635 | if (lock) | |
35636 | - spin_unlock(&n->list_lock); | |
35637 | + raw_spin_unlock(&n->list_lock); | |
35638 | ||
35639 | if (m == M_FREE) { | |
35640 | stat(s, DEACTIVATE_EMPTY); | |
e4b2b4a8 | 35641 | @@ -2171,10 +2200,10 @@ |
1a6e0f06 JK |
35642 | n2 = get_node(s, page_to_nid(page)); |
35643 | if (n != n2) { | |
35644 | if (n) | |
35645 | - spin_unlock(&n->list_lock); | |
35646 | + raw_spin_unlock(&n->list_lock); | |
35647 | ||
35648 | n = n2; | |
35649 | - spin_lock(&n->list_lock); | |
35650 | + raw_spin_lock(&n->list_lock); | |
35651 | } | |
35652 | ||
35653 | do { | |
e4b2b4a8 | 35654 | @@ -2203,7 +2232,7 @@ |
1a6e0f06 JK |
35655 | } |
35656 | ||
35657 | if (n) | |
35658 | - spin_unlock(&n->list_lock); | |
35659 | + raw_spin_unlock(&n->list_lock); | |
35660 | ||
35661 | while (discard_page) { | |
35662 | page = discard_page; | |
e4b2b4a8 | 35663 | @@ -2242,14 +2271,21 @@ |
1a6e0f06 JK |
35664 | pobjects = oldpage->pobjects; |
35665 | pages = oldpage->pages; | |
35666 | if (drain && pobjects > s->cpu_partial) { | |
35667 | + struct slub_free_list *f; | |
35668 | unsigned long flags; | |
35669 | + LIST_HEAD(tofree); | |
35670 | /* | |
35671 | * partial array is full. Move the existing | |
35672 | * set to the per node partial list. | |
35673 | */ | |
35674 | local_irq_save(flags); | |
35675 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | |
35676 | + f = this_cpu_ptr(&slub_free_list); | |
35677 | + raw_spin_lock(&f->lock); | |
35678 | + list_splice_init(&f->list, &tofree); | |
35679 | + raw_spin_unlock(&f->lock); | |
35680 | local_irq_restore(flags); | |
35681 | + free_delayed(&tofree); | |
35682 | oldpage = NULL; | |
35683 | pobjects = 0; | |
35684 | pages = 0; | |
e4b2b4a8 | 35685 | @@ -2319,7 +2355,22 @@ |
1a6e0f06 JK |
35686 | |
35687 | static void flush_all(struct kmem_cache *s) | |
35688 | { | |
35689 | + LIST_HEAD(tofree); | |
35690 | + int cpu; | |
35691 | + | |
35692 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); | |
35693 | + for_each_online_cpu(cpu) { | |
35694 | + struct slub_free_list *f; | |
35695 | + | |
35696 | + if (!has_cpu_slab(cpu, s)) | |
35697 | + continue; | |
35698 | + | |
35699 | + f = &per_cpu(slub_free_list, cpu); | |
35700 | + raw_spin_lock_irq(&f->lock); | |
35701 | + list_splice_init(&f->list, &tofree); | |
35702 | + raw_spin_unlock_irq(&f->lock); | |
35703 | + free_delayed(&tofree); | |
35704 | + } | |
35705 | } | |
35706 | ||
35707 | /* | |
e4b2b4a8 | 35708 | @@ -2374,10 +2425,10 @@ |
1a6e0f06 JK |
35709 | unsigned long x = 0; |
35710 | struct page *page; | |
35711 | ||
35712 | - spin_lock_irqsave(&n->list_lock, flags); | |
35713 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
35714 | list_for_each_entry(page, &n->partial, lru) | |
35715 | x += get_count(page); | |
35716 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35717 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35718 | return x; | |
35719 | } | |
35720 | #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ | |
e4b2b4a8 | 35721 | @@ -2515,8 +2566,10 @@ |
1a6e0f06 JK |
35722 | * already disabled (which is the case for bulk allocation). |
35723 | */ | |
35724 | static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
35725 | - unsigned long addr, struct kmem_cache_cpu *c) | |
35726 | + unsigned long addr, struct kmem_cache_cpu *c, | |
35727 | + struct list_head *to_free) | |
35728 | { | |
35729 | + struct slub_free_list *f; | |
35730 | void *freelist; | |
35731 | struct page *page; | |
35732 | ||
e4b2b4a8 | 35733 | @@ -2572,6 +2625,13 @@ |
1a6e0f06 JK |
35734 | VM_BUG_ON(!c->page->frozen); |
35735 | c->freelist = get_freepointer(s, freelist); | |
35736 | c->tid = next_tid(c->tid); | |
35737 | + | |
35738 | +out: | |
35739 | + f = this_cpu_ptr(&slub_free_list); | |
35740 | + raw_spin_lock(&f->lock); | |
35741 | + list_splice_init(&f->list, to_free); | |
35742 | + raw_spin_unlock(&f->lock); | |
35743 | + | |
35744 | return freelist; | |
35745 | ||
35746 | new_slab: | |
e4b2b4a8 JK |
35747 | @@ -2587,7 +2647,7 @@ |
35748 | ||
35749 | if (unlikely(!freelist)) { | |
35750 | slab_out_of_memory(s, gfpflags, node); | |
35751 | - return NULL; | |
35752 | + goto out; | |
35753 | } | |
35754 | ||
35755 | page = c->page; | |
35756 | @@ -2600,7 +2660,7 @@ | |
35757 | goto new_slab; /* Slab failed checks. Next slab needed */ | |
35758 | ||
35759 | deactivate_slab(s, page, get_freepointer(s, freelist), c); | |
1a6e0f06 JK |
35760 | - return freelist; |
35761 | + goto out; | |
35762 | } | |
35763 | ||
35764 | /* | |
e4b2b4a8 | 35765 | @@ -2612,6 +2672,7 @@ |
1a6e0f06 JK |
35766 | { |
35767 | void *p; | |
35768 | unsigned long flags; | |
35769 | + LIST_HEAD(tofree); | |
35770 | ||
35771 | local_irq_save(flags); | |
35772 | #ifdef CONFIG_PREEMPT | |
e4b2b4a8 | 35773 | @@ -2623,8 +2684,9 @@ |
1a6e0f06 JK |
35774 | c = this_cpu_ptr(s->cpu_slab); |
35775 | #endif | |
35776 | ||
35777 | - p = ___slab_alloc(s, gfpflags, node, addr, c); | |
35778 | + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); | |
35779 | local_irq_restore(flags); | |
35780 | + free_delayed(&tofree); | |
35781 | return p; | |
35782 | } | |
35783 | ||
e4b2b4a8 | 35784 | @@ -2810,7 +2872,7 @@ |
1a6e0f06 JK |
35785 | |
35786 | do { | |
35787 | if (unlikely(n)) { | |
35788 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35789 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35790 | n = NULL; | |
35791 | } | |
35792 | prior = page->freelist; | |
e4b2b4a8 | 35793 | @@ -2842,7 +2904,7 @@ |
1a6e0f06 JK |
35794 | * Otherwise the list_lock will synchronize with |
35795 | * other processors updating the list of slabs. | |
35796 | */ | |
35797 | - spin_lock_irqsave(&n->list_lock, flags); | |
35798 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
35799 | ||
35800 | } | |
35801 | } | |
e4b2b4a8 | 35802 | @@ -2884,7 +2946,7 @@ |
1a6e0f06 JK |
35803 | add_partial(n, page, DEACTIVATE_TO_TAIL); |
35804 | stat(s, FREE_ADD_PARTIAL); | |
35805 | } | |
35806 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35807 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35808 | return; | |
35809 | ||
35810 | slab_empty: | |
e4b2b4a8 | 35811 | @@ -2899,7 +2961,7 @@ |
1a6e0f06 JK |
35812 | remove_full(s, n, page); |
35813 | } | |
35814 | ||
35815 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35816 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35817 | stat(s, FREE_SLAB); | |
35818 | discard_slab(s, page); | |
35819 | } | |
e4b2b4a8 | 35820 | @@ -3104,6 +3166,7 @@ |
1a6e0f06 JK |
35821 | void **p) |
35822 | { | |
35823 | struct kmem_cache_cpu *c; | |
35824 | + LIST_HEAD(to_free); | |
35825 | int i; | |
35826 | ||
35827 | /* memcg and kmem_cache debug support */ | |
e4b2b4a8 | 35828 | @@ -3127,7 +3190,7 @@ |
1a6e0f06 JK |
35829 | * of re-populating per CPU c->freelist |
35830 | */ | |
35831 | p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, | |
35832 | - _RET_IP_, c); | |
35833 | + _RET_IP_, c, &to_free); | |
35834 | if (unlikely(!p[i])) | |
35835 | goto error; | |
35836 | ||
e4b2b4a8 | 35837 | @@ -3139,6 +3202,7 @@ |
1a6e0f06 JK |
35838 | } |
35839 | c->tid = next_tid(c->tid); | |
35840 | local_irq_enable(); | |
35841 | + free_delayed(&to_free); | |
35842 | ||
35843 | /* Clear memory outside IRQ disabled fastpath loop */ | |
35844 | if (unlikely(flags & __GFP_ZERO)) { | |
e4b2b4a8 JK |
35845 | @@ -3153,6 +3217,7 @@ |
35846 | return i; | |
35847 | error: | |
35848 | local_irq_enable(); | |
35849 | + free_delayed(&to_free); | |
35850 | slab_post_alloc_hook(s, flags, i, p); | |
35851 | __kmem_cache_free_bulk(s, i, p); | |
35852 | return 0; | |
35853 | @@ -3286,7 +3351,7 @@ | |
1a6e0f06 JK |
35854 | init_kmem_cache_node(struct kmem_cache_node *n) |
35855 | { | |
35856 | n->nr_partial = 0; | |
35857 | - spin_lock_init(&n->list_lock); | |
35858 | + raw_spin_lock_init(&n->list_lock); | |
35859 | INIT_LIST_HEAD(&n->partial); | |
35860 | #ifdef CONFIG_SLUB_DEBUG | |
35861 | atomic_long_set(&n->nr_slabs, 0); | |
e4b2b4a8 | 35862 | @@ -3640,6 +3705,10 @@ |
1a6e0f06 JK |
35863 | const char *text) |
35864 | { | |
35865 | #ifdef CONFIG_SLUB_DEBUG | |
35866 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
35867 | + /* XXX move out of irq-off section */ | |
35868 | + slab_err(s, page, text, s->name); | |
35869 | +#else | |
35870 | void *addr = page_address(page); | |
35871 | void *p; | |
35872 | unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * | |
e4b2b4a8 | 35873 | @@ -3660,6 +3729,7 @@ |
1a6e0f06 JK |
35874 | slab_unlock(page); |
35875 | kfree(map); | |
35876 | #endif | |
35877 | +#endif | |
35878 | } | |
35879 | ||
35880 | /* | |
e4b2b4a8 | 35881 | @@ -3673,7 +3743,7 @@ |
1a6e0f06 JK |
35882 | struct page *page, *h; |
35883 | ||
35884 | BUG_ON(irqs_disabled()); | |
35885 | - spin_lock_irq(&n->list_lock); | |
35886 | + raw_spin_lock_irq(&n->list_lock); | |
35887 | list_for_each_entry_safe(page, h, &n->partial, lru) { | |
35888 | if (!page->inuse) { | |
35889 | remove_partial(n, page); | |
e4b2b4a8 | 35890 | @@ -3683,7 +3753,7 @@ |
1a6e0f06 JK |
35891 | "Objects remaining in %s on __kmem_cache_shutdown()"); |
35892 | } | |
35893 | } | |
35894 | - spin_unlock_irq(&n->list_lock); | |
35895 | + raw_spin_unlock_irq(&n->list_lock); | |
35896 | ||
35897 | list_for_each_entry_safe(page, h, &discard, lru) | |
35898 | discard_slab(s, page); | |
e4b2b4a8 | 35899 | @@ -3927,7 +3997,7 @@ |
1a6e0f06 JK |
35900 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) |
35901 | INIT_LIST_HEAD(promote + i); | |
35902 | ||
35903 | - spin_lock_irqsave(&n->list_lock, flags); | |
35904 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
35905 | ||
35906 | /* | |
35907 | * Build lists of slabs to discard or promote. | |
e4b2b4a8 | 35908 | @@ -3958,7 +4028,7 @@ |
1a6e0f06 JK |
35909 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) |
35910 | list_splice(promote + i, &n->partial); | |
35911 | ||
35912 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35913 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35914 | ||
35915 | /* Release empty slabs */ | |
35916 | list_for_each_entry_safe(page, t, &discard, lru) | |
e4b2b4a8 | 35917 | @@ -4171,6 +4241,12 @@ |
1a6e0f06 JK |
35918 | { |
35919 | static __initdata struct kmem_cache boot_kmem_cache, | |
35920 | boot_kmem_cache_node; | |
35921 | + int cpu; | |
35922 | + | |
35923 | + for_each_possible_cpu(cpu) { | |
35924 | + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); | |
35925 | + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); | |
35926 | + } | |
35927 | ||
35928 | if (debug_guardpage_minorder()) | |
35929 | slub_max_order = 0; | |
e4b2b4a8 | 35930 | @@ -4379,7 +4455,7 @@ |
1a6e0f06 JK |
35931 | struct page *page; |
35932 | unsigned long flags; | |
35933 | ||
35934 | - spin_lock_irqsave(&n->list_lock, flags); | |
35935 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
35936 | ||
35937 | list_for_each_entry(page, &n->partial, lru) { | |
35938 | validate_slab_slab(s, page, map); | |
e4b2b4a8 | 35939 | @@ -4401,7 +4477,7 @@ |
1a6e0f06 JK |
35940 | s->name, count, atomic_long_read(&n->nr_slabs)); |
35941 | ||
35942 | out: | |
35943 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35944 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35945 | return count; | |
35946 | } | |
35947 | ||
e4b2b4a8 | 35948 | @@ -4589,12 +4665,12 @@ |
1a6e0f06 JK |
35949 | if (!atomic_long_read(&n->nr_slabs)) |
35950 | continue; | |
35951 | ||
35952 | - spin_lock_irqsave(&n->list_lock, flags); | |
35953 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
35954 | list_for_each_entry(page, &n->partial, lru) | |
35955 | process_slab(&t, s, page, alloc, map); | |
35956 | list_for_each_entry(page, &n->full, lru) | |
35957 | process_slab(&t, s, page, alloc, map); | |
35958 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
35959 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
35960 | } | |
35961 | ||
35962 | for (i = 0; i < t.count; i++) { | |
e4b2b4a8 JK |
35963 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/swap.c linux-4.14/mm/swap.c |
35964 | --- linux-4.14.orig/mm/swap.c 2017-11-12 19:46:13.000000000 +0100 | |
35965 | +++ linux-4.14/mm/swap.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
35966 | @@ -32,6 +32,7 @@ |
35967 | #include <linux/memcontrol.h> | |
35968 | #include <linux/gfp.h> | |
35969 | #include <linux/uio.h> | |
35970 | +#include <linux/locallock.h> | |
35971 | #include <linux/hugetlb.h> | |
35972 | #include <linux/page_idle.h> | |
35973 | ||
e4b2b4a8 | 35974 | @@ -50,6 +51,8 @@ |
1a6e0f06 JK |
35975 | #ifdef CONFIG_SMP |
35976 | static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); | |
35977 | #endif | |
35978 | +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); | |
35979 | +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock); | |
35980 | ||
35981 | /* | |
35982 | * This path almost never happens for VM activity - pages are normally | |
e4b2b4a8 | 35983 | @@ -252,11 +255,11 @@ |
1a6e0f06 JK |
35984 | unsigned long flags; |
35985 | ||
35986 | get_page(page); | |
35987 | - local_irq_save(flags); | |
35988 | + local_lock_irqsave(rotate_lock, flags); | |
35989 | pvec = this_cpu_ptr(&lru_rotate_pvecs); | |
35990 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
35991 | pagevec_move_tail(pvec); | |
35992 | - local_irq_restore(flags); | |
35993 | + local_unlock_irqrestore(rotate_lock, flags); | |
35994 | } | |
35995 | } | |
35996 | ||
e4b2b4a8 | 35997 | @@ -306,12 +309,13 @@ |
1a6e0f06 JK |
35998 | { |
35999 | page = compound_head(page); | |
36000 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
36001 | - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
36002 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
36003 | + activate_page_pvecs); | |
36004 | ||
36005 | get_page(page); | |
36006 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
36007 | pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
36008 | - put_cpu_var(activate_page_pvecs); | |
36009 | + put_locked_var(swapvec_lock, activate_page_pvecs); | |
36010 | } | |
36011 | } | |
36012 | ||
e4b2b4a8 | 36013 | @@ -338,7 +342,7 @@ |
1a6e0f06 JK |
36014 | |
36015 | static void __lru_cache_activate_page(struct page *page) | |
36016 | { | |
36017 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
36018 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
36019 | int i; | |
36020 | ||
36021 | /* | |
e4b2b4a8 | 36022 | @@ -360,7 +364,7 @@ |
1a6e0f06 JK |
36023 | } |
36024 | } | |
36025 | ||
36026 | - put_cpu_var(lru_add_pvec); | |
36027 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
36028 | } | |
36029 | ||
36030 | /* | |
e4b2b4a8 | 36031 | @@ -402,12 +406,12 @@ |
1a6e0f06 JK |
36032 | |
36033 | static void __lru_cache_add(struct page *page) | |
36034 | { | |
36035 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
36036 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
36037 | ||
36038 | get_page(page); | |
36039 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
36040 | __pagevec_lru_add(pvec); | |
36041 | - put_cpu_var(lru_add_pvec); | |
36042 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
36043 | } | |
36044 | ||
36045 | /** | |
e4b2b4a8 | 36046 | @@ -613,9 +617,15 @@ |
1a6e0f06 JK |
36047 | unsigned long flags; |
36048 | ||
36049 | /* No harm done if a racing interrupt already did this */ | |
36050 | - local_irq_save(flags); | |
36051 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
36052 | + local_lock_irqsave_on(rotate_lock, flags, cpu); | |
36053 | pagevec_move_tail(pvec); | |
36054 | - local_irq_restore(flags); | |
36055 | + local_unlock_irqrestore_on(rotate_lock, flags, cpu); | |
36056 | +#else | |
36057 | + local_lock_irqsave(rotate_lock, flags); | |
36058 | + pagevec_move_tail(pvec); | |
36059 | + local_unlock_irqrestore(rotate_lock, flags); | |
36060 | +#endif | |
36061 | } | |
36062 | ||
36063 | pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); | |
e4b2b4a8 | 36064 | @@ -647,11 +657,12 @@ |
1a6e0f06 JK |
36065 | return; |
36066 | ||
36067 | if (likely(get_page_unless_zero(page))) { | |
36068 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); | |
36069 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
36070 | + lru_deactivate_file_pvecs); | |
36071 | ||
36072 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
36073 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); | |
36074 | - put_cpu_var(lru_deactivate_file_pvecs); | |
36075 | + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs); | |
36076 | } | |
36077 | } | |
36078 | ||
e4b2b4a8 | 36079 | @@ -666,21 +677,32 @@ |
1a6e0f06 | 36080 | { |
e4b2b4a8 JK |
36081 | if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && |
36082 | !PageSwapCache(page) && !PageUnevictable(page)) { | |
36083 | - struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs); | |
1a6e0f06 | 36084 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, |
e4b2b4a8 | 36085 | + lru_lazyfree_pvecs); |
1a6e0f06 JK |
36086 | |
36087 | get_page(page); | |
36088 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
e4b2b4a8 JK |
36089 | pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL); |
36090 | - put_cpu_var(lru_lazyfree_pvecs); | |
36091 | + put_locked_var(swapvec_lock, lru_lazyfree_pvecs); | |
1a6e0f06 JK |
36092 | } |
36093 | } | |
36094 | ||
36095 | void lru_add_drain(void) | |
36096 | { | |
36097 | - lru_add_drain_cpu(get_cpu()); | |
36098 | - put_cpu(); | |
36099 | + lru_add_drain_cpu(local_lock_cpu(swapvec_lock)); | |
36100 | + local_unlock_cpu(swapvec_lock); | |
36101 | } | |
36102 | ||
1a6e0f06 JK |
36103 | +#ifdef CONFIG_PREEMPT_RT_BASE |
36104 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
e4b2b4a8 | 36105 | +{ |
1a6e0f06 JK |
36106 | + local_lock_on(swapvec_lock, cpu); |
36107 | + lru_add_drain_cpu(cpu); | |
36108 | + local_unlock_on(swapvec_lock, cpu); | |
e4b2b4a8 JK |
36109 | +} |
36110 | + | |
1a6e0f06 | 36111 | +#else |
e4b2b4a8 JK |
36112 | + |
36113 | static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
36114 | { | |
36115 | lru_add_drain(); | |
36116 | @@ -688,6 +710,16 @@ | |
1a6e0f06 | 36117 | |
e4b2b4a8 | 36118 | static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); |
1a6e0f06 | 36119 | |
1a6e0f06 JK |
36120 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) |
36121 | +{ | |
36122 | + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
36123 | + | |
36124 | + INIT_WORK(work, lru_add_drain_per_cpu); | |
e4b2b4a8 | 36125 | + queue_work_on(cpu, mm_percpu_wq, work); |
1a6e0f06 JK |
36126 | + cpumask_set_cpu(cpu, has_work); |
36127 | +} | |
36128 | +#endif | |
36129 | + | |
e4b2b4a8 | 36130 | void lru_add_drain_all_cpuslocked(void) |
1a6e0f06 JK |
36131 | { |
36132 | static DEFINE_MUTEX(lock); | |
e4b2b4a8 | 36133 | @@ -705,21 +737,19 @@ |
1a6e0f06 JK |
36134 | cpumask_clear(&has_work); |
36135 | ||
36136 | for_each_online_cpu(cpu) { | |
36137 | - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
e4b2b4a8 | 36138 | |
1a6e0f06 JK |
36139 | if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || |
36140 | pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || | |
36141 | pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || | |
e4b2b4a8 | 36142 | pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) || |
1a6e0f06 JK |
36143 | - need_activate_page_drain(cpu)) { |
36144 | - INIT_WORK(work, lru_add_drain_per_cpu); | |
e4b2b4a8 | 36145 | - queue_work_on(cpu, mm_percpu_wq, work); |
1a6e0f06 JK |
36146 | - cpumask_set_cpu(cpu, &has_work); |
36147 | - } | |
36148 | + need_activate_page_drain(cpu)) | |
36149 | + remote_lru_add_drain(cpu, &has_work); | |
36150 | } | |
36151 | ||
36152 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
36153 | for_each_cpu(cpu, &has_work) | |
36154 | flush_work(&per_cpu(lru_add_drain_work, cpu)); | |
36155 | +#endif | |
36156 | ||
1a6e0f06 | 36157 | mutex_unlock(&lock); |
e4b2b4a8 JK |
36158 | } |
36159 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/truncate.c linux-4.14/mm/truncate.c | |
36160 | --- linux-4.14.orig/mm/truncate.c 2017-11-12 19:46:13.000000000 +0100 | |
36161 | +++ linux-4.14/mm/truncate.c 2018-09-05 11:05:07.000000000 +0200 | |
36162 | @@ -41,8 +41,10 @@ | |
36163 | goto unlock; | |
36164 | if (*slot != entry) | |
36165 | goto unlock; | |
36166 | + local_lock(shadow_nodes_lock); | |
36167 | __radix_tree_replace(&mapping->page_tree, node, slot, NULL, | |
36168 | - workingset_update_node, mapping); | |
36169 | + __workingset_update_node, mapping); | |
36170 | + local_unlock(shadow_nodes_lock); | |
36171 | mapping->nrexceptional--; | |
1a6e0f06 JK |
36172 | unlock: |
36173 | spin_unlock_irq(&mapping->tree_lock); | |
e4b2b4a8 JK |
36174 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/vmalloc.c linux-4.14/mm/vmalloc.c |
36175 | --- linux-4.14.orig/mm/vmalloc.c 2018-09-05 11:03:25.000000000 +0200 | |
36176 | +++ linux-4.14/mm/vmalloc.c 2018-09-05 11:05:07.000000000 +0200 | |
36177 | @@ -865,7 +865,7 @@ | |
1a6e0f06 JK |
36178 | struct vmap_block *vb; |
36179 | struct vmap_area *va; | |
36180 | unsigned long vb_idx; | |
36181 | - int node, err; | |
36182 | + int node, err, cpu; | |
36183 | void *vaddr; | |
36184 | ||
36185 | node = numa_node_id(); | |
e4b2b4a8 | 36186 | @@ -908,11 +908,12 @@ |
1a6e0f06 JK |
36187 | BUG_ON(err); |
36188 | radix_tree_preload_end(); | |
36189 | ||
36190 | - vbq = &get_cpu_var(vmap_block_queue); | |
36191 | + cpu = get_cpu_light(); | |
36192 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
36193 | spin_lock(&vbq->lock); | |
36194 | list_add_tail_rcu(&vb->free_list, &vbq->free); | |
36195 | spin_unlock(&vbq->lock); | |
36196 | - put_cpu_var(vmap_block_queue); | |
36197 | + put_cpu_light(); | |
36198 | ||
36199 | return vaddr; | |
36200 | } | |
e4b2b4a8 | 36201 | @@ -981,6 +982,7 @@ |
1a6e0f06 JK |
36202 | struct vmap_block *vb; |
36203 | void *vaddr = NULL; | |
36204 | unsigned int order; | |
36205 | + int cpu; | |
36206 | ||
36207 | BUG_ON(offset_in_page(size)); | |
36208 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | |
e4b2b4a8 | 36209 | @@ -995,7 +997,8 @@ |
1a6e0f06 JK |
36210 | order = get_order(size); |
36211 | ||
36212 | rcu_read_lock(); | |
36213 | - vbq = &get_cpu_var(vmap_block_queue); | |
36214 | + cpu = get_cpu_light(); | |
36215 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
36216 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | |
36217 | unsigned long pages_off; | |
36218 | ||
e4b2b4a8 | 36219 | @@ -1018,7 +1021,7 @@ |
1a6e0f06 JK |
36220 | break; |
36221 | } | |
36222 | ||
36223 | - put_cpu_var(vmap_block_queue); | |
36224 | + put_cpu_light(); | |
36225 | rcu_read_unlock(); | |
36226 | ||
36227 | /* Allocate new block if nothing was found */ | |
e4b2b4a8 JK |
36228 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/vmstat.c linux-4.14/mm/vmstat.c |
36229 | --- linux-4.14.orig/mm/vmstat.c 2017-11-12 19:46:13.000000000 +0100 | |
36230 | +++ linux-4.14/mm/vmstat.c 2018-09-05 11:05:07.000000000 +0200 | |
36231 | @@ -249,6 +249,7 @@ | |
1a6e0f06 JK |
36232 | long x; |
36233 | long t; | |
36234 | ||
36235 | + preempt_disable_rt(); | |
36236 | x = delta + __this_cpu_read(*p); | |
36237 | ||
36238 | t = __this_cpu_read(pcp->stat_threshold); | |
e4b2b4a8 | 36239 | @@ -258,6 +259,7 @@ |
1a6e0f06 JK |
36240 | x = 0; |
36241 | } | |
36242 | __this_cpu_write(*p, x); | |
36243 | + preempt_enable_rt(); | |
36244 | } | |
36245 | EXPORT_SYMBOL(__mod_zone_page_state); | |
36246 | ||
e4b2b4a8 | 36247 | @@ -269,6 +271,7 @@ |
1a6e0f06 JK |
36248 | long x; |
36249 | long t; | |
36250 | ||
36251 | + preempt_disable_rt(); | |
36252 | x = delta + __this_cpu_read(*p); | |
36253 | ||
36254 | t = __this_cpu_read(pcp->stat_threshold); | |
e4b2b4a8 | 36255 | @@ -278,6 +281,7 @@ |
1a6e0f06 JK |
36256 | x = 0; |
36257 | } | |
36258 | __this_cpu_write(*p, x); | |
36259 | + preempt_enable_rt(); | |
36260 | } | |
36261 | EXPORT_SYMBOL(__mod_node_page_state); | |
36262 | ||
e4b2b4a8 | 36263 | @@ -310,6 +314,7 @@ |
1a6e0f06 JK |
36264 | s8 __percpu *p = pcp->vm_stat_diff + item; |
36265 | s8 v, t; | |
36266 | ||
36267 | + preempt_disable_rt(); | |
36268 | v = __this_cpu_inc_return(*p); | |
36269 | t = __this_cpu_read(pcp->stat_threshold); | |
36270 | if (unlikely(v > t)) { | |
e4b2b4a8 | 36271 | @@ -318,6 +323,7 @@ |
1a6e0f06 JK |
36272 | zone_page_state_add(v + overstep, zone, item); |
36273 | __this_cpu_write(*p, -overstep); | |
36274 | } | |
36275 | + preempt_enable_rt(); | |
36276 | } | |
36277 | ||
36278 | void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
e4b2b4a8 | 36279 | @@ -326,6 +332,7 @@ |
1a6e0f06 JK |
36280 | s8 __percpu *p = pcp->vm_node_stat_diff + item; |
36281 | s8 v, t; | |
36282 | ||
36283 | + preempt_disable_rt(); | |
36284 | v = __this_cpu_inc_return(*p); | |
36285 | t = __this_cpu_read(pcp->stat_threshold); | |
36286 | if (unlikely(v > t)) { | |
e4b2b4a8 | 36287 | @@ -334,6 +341,7 @@ |
1a6e0f06 JK |
36288 | node_page_state_add(v + overstep, pgdat, item); |
36289 | __this_cpu_write(*p, -overstep); | |
36290 | } | |
36291 | + preempt_enable_rt(); | |
36292 | } | |
36293 | ||
36294 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
e4b2b4a8 | 36295 | @@ -354,6 +362,7 @@ |
1a6e0f06 JK |
36296 | s8 __percpu *p = pcp->vm_stat_diff + item; |
36297 | s8 v, t; | |
36298 | ||
36299 | + preempt_disable_rt(); | |
36300 | v = __this_cpu_dec_return(*p); | |
36301 | t = __this_cpu_read(pcp->stat_threshold); | |
36302 | if (unlikely(v < - t)) { | |
e4b2b4a8 | 36303 | @@ -362,6 +371,7 @@ |
1a6e0f06 JK |
36304 | zone_page_state_add(v - overstep, zone, item); |
36305 | __this_cpu_write(*p, overstep); | |
36306 | } | |
36307 | + preempt_enable_rt(); | |
36308 | } | |
36309 | ||
36310 | void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
e4b2b4a8 | 36311 | @@ -370,6 +380,7 @@ |
1a6e0f06 JK |
36312 | s8 __percpu *p = pcp->vm_node_stat_diff + item; |
36313 | s8 v, t; | |
36314 | ||
36315 | + preempt_disable_rt(); | |
36316 | v = __this_cpu_dec_return(*p); | |
36317 | t = __this_cpu_read(pcp->stat_threshold); | |
36318 | if (unlikely(v < - t)) { | |
e4b2b4a8 | 36319 | @@ -378,6 +389,7 @@ |
1a6e0f06 JK |
36320 | node_page_state_add(v - overstep, pgdat, item); |
36321 | __this_cpu_write(*p, overstep); | |
36322 | } | |
36323 | + preempt_enable_rt(); | |
36324 | } | |
36325 | ||
36326 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
e4b2b4a8 JK |
36327 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/workingset.c linux-4.14/mm/workingset.c |
36328 | --- linux-4.14.orig/mm/workingset.c 2017-11-12 19:46:13.000000000 +0100 | |
36329 | +++ linux-4.14/mm/workingset.c 2018-09-05 11:05:07.000000000 +0200 | |
36330 | @@ -338,9 +338,10 @@ | |
1a6e0f06 JK |
36331 | * point where they would still be useful. |
36332 | */ | |
36333 | ||
e4b2b4a8 JK |
36334 | -static struct list_lru shadow_nodes; |
36335 | +static struct list_lru __shadow_nodes; | |
36336 | +DEFINE_LOCAL_IRQ_LOCK(shadow_nodes_lock); | |
36337 | ||
36338 | -void workingset_update_node(struct radix_tree_node *node, void *private) | |
36339 | +void __workingset_update_node(struct radix_tree_node *node, void *private) | |
36340 | { | |
36341 | struct address_space *mapping = private; | |
36342 | ||
36343 | @@ -358,10 +359,10 @@ | |
36344 | */ | |
36345 | if (node->count && node->count == node->exceptional) { | |
36346 | if (list_empty(&node->private_list)) | |
36347 | - list_lru_add(&shadow_nodes, &node->private_list); | |
36348 | + list_lru_add(&__shadow_nodes, &node->private_list); | |
36349 | } else { | |
36350 | if (!list_empty(&node->private_list)) | |
36351 | - list_lru_del(&shadow_nodes, &node->private_list); | |
36352 | + list_lru_del(&__shadow_nodes, &node->private_list); | |
36353 | } | |
36354 | } | |
1a6e0f06 | 36355 | |
e4b2b4a8 JK |
36356 | @@ -373,9 +374,9 @@ |
36357 | unsigned long cache; | |
1a6e0f06 JK |
36358 | |
36359 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
36360 | - local_irq_disable(); | |
e4b2b4a8 | 36361 | - nodes = list_lru_shrink_count(&shadow_nodes, sc); |
1a6e0f06 | 36362 | - local_irq_enable(); |
e4b2b4a8 JK |
36363 | + local_lock_irq(shadow_nodes_lock); |
36364 | + nodes = list_lru_shrink_count(&__shadow_nodes, sc); | |
36365 | + local_unlock_irq(shadow_nodes_lock); | |
1a6e0f06 | 36366 | |
e4b2b4a8 JK |
36367 | /* |
36368 | * Approximate a reasonable limit for the radix tree nodes | |
36369 | @@ -475,15 +476,15 @@ | |
36370 | goto out_invalid; | |
36371 | inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM); | |
36372 | __radix_tree_delete_node(&mapping->page_tree, node, | |
36373 | - workingset_update_node, mapping); | |
36374 | + __workingset_update_node, mapping); | |
36375 | ||
36376 | out_invalid: | |
1a6e0f06 JK |
36377 | spin_unlock(&mapping->tree_lock); |
36378 | ret = LRU_REMOVED_RETRY; | |
36379 | out: | |
36380 | - local_irq_enable(); | |
e4b2b4a8 | 36381 | + local_unlock_irq(shadow_nodes_lock); |
1a6e0f06 JK |
36382 | cond_resched(); |
36383 | - local_irq_disable(); | |
e4b2b4a8 | 36384 | + local_lock_irq(shadow_nodes_lock); |
1a6e0f06 JK |
36385 | spin_lock(lru_lock); |
36386 | return ret; | |
36387 | } | |
e4b2b4a8 | 36388 | @@ -494,9 +495,9 @@ |
1a6e0f06 JK |
36389 | unsigned long ret; |
36390 | ||
36391 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
36392 | - local_irq_disable(); | |
e4b2b4a8 | 36393 | - ret = list_lru_shrink_walk(&shadow_nodes, sc, shadow_lru_isolate, NULL); |
1a6e0f06 | 36394 | - local_irq_enable(); |
e4b2b4a8 JK |
36395 | + local_lock_irq(shadow_nodes_lock); |
36396 | + ret = list_lru_shrink_walk(&__shadow_nodes, sc, shadow_lru_isolate, NULL); | |
36397 | + local_unlock_irq(shadow_nodes_lock); | |
1a6e0f06 JK |
36398 | return ret; |
36399 | } | |
36400 | ||
e4b2b4a8 | 36401 | @@ -534,7 +535,7 @@ |
1a6e0f06 JK |
36402 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", |
36403 | timestamp_bits, max_order, bucket_order); | |
36404 | ||
e4b2b4a8 JK |
36405 | - ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key); |
36406 | + ret = __list_lru_init(&__shadow_nodes, true, &shadow_nodes_key); | |
1a6e0f06 JK |
36407 | if (ret) |
36408 | goto err; | |
36409 | ret = register_shrinker(&workingset_shadow_shrinker); | |
e4b2b4a8 | 36410 | @@ -542,7 +543,7 @@ |
1a6e0f06 JK |
36411 | goto err_list_lru; |
36412 | return 0; | |
36413 | err_list_lru: | |
e4b2b4a8 JK |
36414 | - list_lru_destroy(&shadow_nodes); |
36415 | + list_lru_destroy(&__shadow_nodes); | |
1a6e0f06 JK |
36416 | err: |
36417 | return ret; | |
36418 | } | |
e4b2b4a8 JK |
36419 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/mm/zsmalloc.c linux-4.14/mm/zsmalloc.c |
36420 | --- linux-4.14.orig/mm/zsmalloc.c 2018-09-05 11:03:25.000000000 +0200 | |
36421 | +++ linux-4.14/mm/zsmalloc.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
36422 | @@ -53,6 +53,7 @@ |
36423 | #include <linux/mount.h> | |
36424 | #include <linux/migrate.h> | |
36425 | #include <linux/pagemap.h> | |
36426 | +#include <linux/locallock.h> | |
36427 | ||
36428 | #define ZSPAGE_MAGIC 0x58 | |
36429 | ||
36430 | @@ -70,9 +71,22 @@ | |
36431 | */ | |
36432 | #define ZS_MAX_ZSPAGE_ORDER 2 | |
36433 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) | |
36434 | - | |
36435 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) | |
36436 | ||
36437 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36438 | + | |
36439 | +struct zsmalloc_handle { | |
36440 | + unsigned long addr; | |
36441 | + struct mutex lock; | |
36442 | +}; | |
36443 | + | |
36444 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) | |
36445 | + | |
36446 | +#else | |
36447 | + | |
36448 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) | |
36449 | +#endif | |
36450 | + | |
36451 | /* | |
36452 | * Object location (<PFN>, <obj_idx>) is encoded as | |
36453 | * as single (unsigned long) handle value. | |
e4b2b4a8 | 36454 | @@ -320,7 +334,7 @@ |
1a6e0f06 JK |
36455 | |
36456 | static int create_cache(struct zs_pool *pool) | |
36457 | { | |
36458 | - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, | |
36459 | + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, | |
36460 | 0, 0, NULL); | |
36461 | if (!pool->handle_cachep) | |
36462 | return 1; | |
e4b2b4a8 | 36463 | @@ -344,9 +358,26 @@ |
1a6e0f06 JK |
36464 | |
36465 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) | |
36466 | { | |
36467 | - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, | |
36468 | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
36469 | + void *p; | |
36470 | + | |
36471 | + p = kmem_cache_alloc(pool->handle_cachep, | |
36472 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
36473 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36474 | + if (p) { | |
36475 | + struct zsmalloc_handle *zh = p; | |
36476 | + | |
36477 | + mutex_init(&zh->lock); | |
36478 | + } | |
36479 | +#endif | |
36480 | + return (unsigned long)p; | |
e4b2b4a8 JK |
36481 | +} |
36482 | + | |
1a6e0f06 JK |
36483 | +#ifdef CONFIG_PREEMPT_RT_FULL |
36484 | +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) | |
36485 | +{ | |
36486 | + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); | |
e4b2b4a8 | 36487 | } |
1a6e0f06 | 36488 | +#endif |
e4b2b4a8 | 36489 | |
1a6e0f06 JK |
36490 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) |
36491 | { | |
e4b2b4a8 | 36492 | @@ -366,12 +397,18 @@ |
1a6e0f06 JK |
36493 | |
36494 | static void record_obj(unsigned long handle, unsigned long obj) | |
36495 | { | |
36496 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36497 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
36498 | + | |
36499 | + WRITE_ONCE(zh->addr, obj); | |
36500 | +#else | |
36501 | /* | |
36502 | * lsb of @obj represents handle lock while other bits | |
36503 | * represent object value the handle is pointing so | |
36504 | * updating shouldn't do store tearing. | |
36505 | */ | |
36506 | WRITE_ONCE(*(unsigned long *)handle, obj); | |
36507 | +#endif | |
36508 | } | |
36509 | ||
36510 | /* zpool driver */ | |
e4b2b4a8 | 36511 | @@ -460,6 +497,7 @@ |
1a6e0f06 JK |
36512 | |
36513 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ | |
36514 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); | |
36515 | +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); | |
36516 | ||
36517 | static bool is_zspage_isolated(struct zspage *zspage) | |
36518 | { | |
e4b2b4a8 | 36519 | @@ -898,7 +936,13 @@ |
1a6e0f06 JK |
36520 | |
36521 | static unsigned long handle_to_obj(unsigned long handle) | |
36522 | { | |
36523 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36524 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
36525 | + | |
36526 | + return zh->addr; | |
36527 | +#else | |
36528 | return *(unsigned long *)handle; | |
36529 | +#endif | |
36530 | } | |
36531 | ||
36532 | static unsigned long obj_to_head(struct page *page, void *obj) | |
e4b2b4a8 | 36533 | @@ -912,22 +956,46 @@ |
1a6e0f06 JK |
36534 | |
36535 | static inline int testpin_tag(unsigned long handle) | |
36536 | { | |
36537 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36538 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
36539 | + | |
36540 | + return mutex_is_locked(&zh->lock); | |
36541 | +#else | |
36542 | return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); | |
36543 | +#endif | |
36544 | } | |
36545 | ||
36546 | static inline int trypin_tag(unsigned long handle) | |
36547 | { | |
36548 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36549 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
36550 | + | |
36551 | + return mutex_trylock(&zh->lock); | |
36552 | +#else | |
36553 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
36554 | +#endif | |
36555 | } | |
36556 | ||
36557 | static void pin_tag(unsigned long handle) | |
36558 | { | |
36559 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36560 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
36561 | + | |
36562 | + return mutex_lock(&zh->lock); | |
36563 | +#else | |
36564 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
36565 | +#endif | |
36566 | } | |
36567 | ||
36568 | static void unpin_tag(unsigned long handle) | |
36569 | { | |
36570 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
36571 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
36572 | + | |
36573 | + return mutex_unlock(&zh->lock); | |
36574 | +#else | |
36575 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
36576 | +#endif | |
36577 | } | |
36578 | ||
36579 | static void reset_page(struct page *page) | |
e4b2b4a8 | 36580 | @@ -1365,7 +1433,7 @@ |
1a6e0f06 JK |
36581 | class = pool->size_class[class_idx]; |
36582 | off = (class->size * obj_idx) & ~PAGE_MASK; | |
36583 | ||
36584 | - area = &get_cpu_var(zs_map_area); | |
36585 | + area = &get_locked_var(zs_map_area_lock, zs_map_area); | |
36586 | area->vm_mm = mm; | |
36587 | if (off + class->size <= PAGE_SIZE) { | |
36588 | /* this object is contained entirely within a page */ | |
e4b2b4a8 | 36589 | @@ -1419,7 +1487,7 @@ |
1a6e0f06 JK |
36590 | |
36591 | __zs_unmap_object(area, pages, off, class->size); | |
36592 | } | |
36593 | - put_cpu_var(zs_map_area); | |
36594 | + put_locked_var(zs_map_area_lock, zs_map_area); | |
36595 | ||
36596 | migrate_read_unlock(zspage); | |
36597 | unpin_tag(handle); | |
e4b2b4a8 JK |
36598 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/9p/trans_xen.c linux-4.14/net/9p/trans_xen.c |
36599 | --- linux-4.14.orig/net/9p/trans_xen.c 2018-09-05 11:03:25.000000000 +0200 | |
36600 | +++ linux-4.14/net/9p/trans_xen.c 2018-09-05 11:05:07.000000000 +0200 | |
36601 | @@ -38,7 +38,6 @@ | |
36602 | ||
36603 | #include <linux/module.h> | |
36604 | #include <linux/spinlock.h> | |
36605 | -#include <linux/rwlock.h> | |
36606 | #include <net/9p/9p.h> | |
36607 | #include <net/9p/client.h> | |
36608 | #include <net/9p/transport.h> | |
36609 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/bluetooth/hci_sock.c linux-4.14/net/bluetooth/hci_sock.c | |
36610 | --- linux-4.14.orig/net/bluetooth/hci_sock.c 2017-11-12 19:46:13.000000000 +0100 | |
36611 | +++ linux-4.14/net/bluetooth/hci_sock.c 2018-09-05 11:05:07.000000000 +0200 | |
36612 | @@ -251,15 +251,13 @@ | |
36613 | } | |
36614 | ||
36615 | /* Send frame to sockets with specific channel */ | |
36616 | -void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, | |
36617 | - int flag, struct sock *skip_sk) | |
36618 | +static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb, | |
36619 | + int flag, struct sock *skip_sk) | |
36620 | { | |
36621 | struct sock *sk; | |
36622 | ||
36623 | BT_DBG("channel %u len %d", channel, skb->len); | |
36624 | ||
36625 | - read_lock(&hci_sk_list.lock); | |
36626 | - | |
36627 | sk_for_each(sk, &hci_sk_list.head) { | |
36628 | struct sk_buff *nskb; | |
36629 | ||
36630 | @@ -285,6 +283,13 @@ | |
36631 | kfree_skb(nskb); | |
36632 | } | |
36633 | ||
36634 | +} | |
36635 | + | |
36636 | +void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, | |
36637 | + int flag, struct sock *skip_sk) | |
36638 | +{ | |
36639 | + read_lock(&hci_sk_list.lock); | |
36640 | + __hci_send_to_channel(channel, skb, flag, skip_sk); | |
36641 | read_unlock(&hci_sk_list.lock); | |
36642 | } | |
36643 | ||
36644 | @@ -388,8 +393,8 @@ | |
36645 | hdr->index = index; | |
36646 | hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); | |
36647 | ||
36648 | - hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, | |
36649 | - HCI_SOCK_TRUSTED, NULL); | |
36650 | + __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, | |
36651 | + HCI_SOCK_TRUSTED, NULL); | |
36652 | kfree_skb(skb); | |
36653 | } | |
36654 | ||
36655 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/can/bcm.c linux-4.14/net/can/bcm.c | |
36656 | --- linux-4.14.orig/net/can/bcm.c 2017-11-12 19:46:13.000000000 +0100 | |
36657 | +++ linux-4.14/net/can/bcm.c 2018-09-05 11:05:07.000000000 +0200 | |
36658 | @@ -102,7 +102,6 @@ | |
36659 | unsigned long frames_abs, frames_filtered; | |
36660 | struct bcm_timeval ival1, ival2; | |
36661 | struct hrtimer timer, thrtimer; | |
36662 | - struct tasklet_struct tsklet, thrtsklet; | |
36663 | ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; | |
36664 | int rx_ifindex; | |
36665 | int cfsiz; | |
36666 | @@ -364,25 +363,34 @@ | |
36667 | } | |
36668 | } | |
36669 | ||
36670 | -static void bcm_tx_start_timer(struct bcm_op *op) | |
36671 | +static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt) | |
36672 | { | |
36673 | + ktime_t ival; | |
36674 | + | |
36675 | if (op->kt_ival1 && op->count) | |
36676 | - hrtimer_start(&op->timer, | |
36677 | - ktime_add(ktime_get(), op->kt_ival1), | |
36678 | - HRTIMER_MODE_ABS); | |
36679 | + ival = op->kt_ival1; | |
36680 | else if (op->kt_ival2) | |
36681 | - hrtimer_start(&op->timer, | |
36682 | - ktime_add(ktime_get(), op->kt_ival2), | |
36683 | - HRTIMER_MODE_ABS); | |
36684 | + ival = op->kt_ival2; | |
36685 | + else | |
36686 | + return false; | |
36687 | + | |
36688 | + hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival)); | |
36689 | + return true; | |
36690 | } | |
36691 | ||
36692 | -static void bcm_tx_timeout_tsklet(unsigned long data) | |
36693 | +static void bcm_tx_start_timer(struct bcm_op *op) | |
36694 | { | |
36695 | - struct bcm_op *op = (struct bcm_op *)data; | |
36696 | + if (bcm_tx_set_expiry(op, &op->timer)) | |
36697 | + hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT); | |
36698 | +} | |
36699 | + | |
36700 | +/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ | |
36701 | +static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) | |
36702 | +{ | |
36703 | + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); | |
36704 | struct bcm_msg_head msg_head; | |
36705 | ||
36706 | if (op->kt_ival1 && (op->count > 0)) { | |
36707 | - | |
36708 | op->count--; | |
36709 | if (!op->count && (op->flags & TX_COUNTEVT)) { | |
36710 | ||
36711 | @@ -399,22 +407,12 @@ | |
36712 | } | |
36713 | bcm_can_tx(op); | |
36714 | ||
36715 | - } else if (op->kt_ival2) | |
36716 | + } else if (op->kt_ival2) { | |
36717 | bcm_can_tx(op); | |
36718 | + } | |
36719 | ||
36720 | - bcm_tx_start_timer(op); | |
36721 | -} | |
36722 | - | |
36723 | -/* | |
36724 | - * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions | |
36725 | - */ | |
36726 | -static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) | |
36727 | -{ | |
36728 | - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); | |
36729 | - | |
36730 | - tasklet_schedule(&op->tsklet); | |
36731 | - | |
36732 | - return HRTIMER_NORESTART; | |
36733 | + return bcm_tx_set_expiry(op, &op->timer) ? | |
36734 | + HRTIMER_RESTART : HRTIMER_NORESTART; | |
36735 | } | |
36736 | ||
36737 | /* | |
36738 | @@ -480,7 +478,7 @@ | |
36739 | /* do not send the saved data - only start throttle timer */ | |
36740 | hrtimer_start(&op->thrtimer, | |
36741 | ktime_add(op->kt_lastmsg, op->kt_ival2), | |
36742 | - HRTIMER_MODE_ABS); | |
36743 | + HRTIMER_MODE_ABS_SOFT); | |
36744 | return; | |
36745 | } | |
36746 | ||
36747 | @@ -539,14 +537,21 @@ | |
36748 | return; | |
36749 | ||
36750 | if (op->kt_ival1) | |
36751 | - hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); | |
36752 | + hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); | |
36753 | } | |
36754 | ||
36755 | -static void bcm_rx_timeout_tsklet(unsigned long data) | |
36756 | +/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */ | |
36757 | +static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) | |
36758 | { | |
36759 | - struct bcm_op *op = (struct bcm_op *)data; | |
36760 | + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); | |
36761 | struct bcm_msg_head msg_head; | |
36762 | ||
36763 | + /* if user wants to be informed, when cyclic CAN-Messages come back */ | |
36764 | + if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { | |
36765 | + /* clear received CAN frames to indicate 'nothing received' */ | |
36766 | + memset(op->last_frames, 0, op->nframes * op->cfsiz); | |
36767 | + } | |
36768 | + | |
36769 | /* create notification to user */ | |
36770 | msg_head.opcode = RX_TIMEOUT; | |
36771 | msg_head.flags = op->flags; | |
36772 | @@ -557,25 +562,6 @@ | |
36773 | msg_head.nframes = 0; | |
36774 | ||
36775 | bcm_send_to_user(op, &msg_head, NULL, 0); | |
36776 | -} | |
36777 | - | |
36778 | -/* | |
36779 | - * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out | |
36780 | - */ | |
36781 | -static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) | |
36782 | -{ | |
36783 | - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); | |
36784 | - | |
36785 | - /* schedule before NET_RX_SOFTIRQ */ | |
36786 | - tasklet_hi_schedule(&op->tsklet); | |
36787 | - | |
36788 | - /* no restart of the timer is done here! */ | |
36789 | - | |
36790 | - /* if user wants to be informed, when cyclic CAN-Messages come back */ | |
36791 | - if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { | |
36792 | - /* clear received CAN frames to indicate 'nothing received' */ | |
36793 | - memset(op->last_frames, 0, op->nframes * op->cfsiz); | |
36794 | - } | |
36795 | ||
36796 | return HRTIMER_NORESTART; | |
36797 | } | |
36798 | @@ -583,14 +569,12 @@ | |
36799 | /* | |
36800 | * bcm_rx_do_flush - helper for bcm_rx_thr_flush | |
36801 | */ | |
36802 | -static inline int bcm_rx_do_flush(struct bcm_op *op, int update, | |
36803 | - unsigned int index) | |
36804 | +static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index) | |
36805 | { | |
36806 | struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; | |
36807 | ||
36808 | if ((op->last_frames) && (lcf->flags & RX_THR)) { | |
36809 | - if (update) | |
36810 | - bcm_rx_changed(op, lcf); | |
36811 | + bcm_rx_changed(op, lcf); | |
36812 | return 1; | |
36813 | } | |
36814 | return 0; | |
36815 | @@ -598,11 +582,8 @@ | |
36816 | ||
36817 | /* | |
36818 | * bcm_rx_thr_flush - Check for throttled data and send it to the userspace | |
36819 | - * | |
36820 | - * update == 0 : just check if throttled data is available (any irq context) | |
36821 | - * update == 1 : check and send throttled data to userspace (soft_irq context) | |
36822 | */ | |
36823 | -static int bcm_rx_thr_flush(struct bcm_op *op, int update) | |
36824 | +static int bcm_rx_thr_flush(struct bcm_op *op) | |
36825 | { | |
36826 | int updated = 0; | |
36827 | ||
36828 | @@ -611,24 +592,16 @@ | |
36829 | ||
36830 | /* for MUX filter we start at index 1 */ | |
36831 | for (i = 1; i < op->nframes; i++) | |
36832 | - updated += bcm_rx_do_flush(op, update, i); | |
36833 | + updated += bcm_rx_do_flush(op, i); | |
36834 | ||
36835 | } else { | |
36836 | /* for RX_FILTER_ID and simple filter */ | |
36837 | - updated += bcm_rx_do_flush(op, update, 0); | |
36838 | + updated += bcm_rx_do_flush(op, 0); | |
36839 | } | |
36840 | ||
36841 | return updated; | |
36842 | } | |
36843 | ||
36844 | -static void bcm_rx_thr_tsklet(unsigned long data) | |
36845 | -{ | |
36846 | - struct bcm_op *op = (struct bcm_op *)data; | |
36847 | - | |
36848 | - /* push the changed data to the userspace */ | |
36849 | - bcm_rx_thr_flush(op, 1); | |
36850 | -} | |
36851 | - | |
36852 | /* | |
36853 | * bcm_rx_thr_handler - the time for blocked content updates is over now: | |
36854 | * Check for throttled data and send it to the userspace | |
36855 | @@ -637,9 +610,7 @@ | |
36856 | { | |
36857 | struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); | |
36858 | ||
36859 | - tasklet_schedule(&op->thrtsklet); | |
36860 | - | |
36861 | - if (bcm_rx_thr_flush(op, 0)) { | |
36862 | + if (bcm_rx_thr_flush(op)) { | |
36863 | hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); | |
36864 | return HRTIMER_RESTART; | |
36865 | } else { | |
36866 | @@ -735,23 +706,8 @@ | |
36867 | ||
36868 | static void bcm_remove_op(struct bcm_op *op) | |
36869 | { | |
36870 | - if (op->tsklet.func) { | |
36871 | - while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || | |
36872 | - test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || | |
36873 | - hrtimer_active(&op->timer)) { | |
36874 | - hrtimer_cancel(&op->timer); | |
36875 | - tasklet_kill(&op->tsklet); | |
36876 | - } | |
36877 | - } | |
36878 | - | |
36879 | - if (op->thrtsklet.func) { | |
36880 | - while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || | |
36881 | - test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || | |
36882 | - hrtimer_active(&op->thrtimer)) { | |
36883 | - hrtimer_cancel(&op->thrtimer); | |
36884 | - tasklet_kill(&op->thrtsklet); | |
36885 | - } | |
36886 | - } | |
36887 | + hrtimer_cancel(&op->timer); | |
36888 | + hrtimer_cancel(&op->thrtimer); | |
36889 | ||
36890 | if ((op->frames) && (op->frames != &op->sframe)) | |
36891 | kfree(op->frames); | |
36892 | @@ -979,15 +935,13 @@ | |
36893 | op->ifindex = ifindex; | |
36894 | ||
36895 | /* initialize uninitialized (kzalloc) structure */ | |
36896 | - hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
36897 | + hrtimer_init(&op->timer, CLOCK_MONOTONIC, | |
36898 | + HRTIMER_MODE_REL_SOFT); | |
36899 | op->timer.function = bcm_tx_timeout_handler; | |
36900 | ||
36901 | - /* initialize tasklet for tx countevent notification */ | |
36902 | - tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet, | |
36903 | - (unsigned long) op); | |
36904 | - | |
36905 | /* currently unused in tx_ops */ | |
36906 | - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
36907 | + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, | |
36908 | + HRTIMER_MODE_REL_SOFT); | |
36909 | ||
36910 | /* add this bcm_op to the list of the tx_ops */ | |
36911 | list_add(&op->list, &bo->tx_ops); | |
36912 | @@ -1150,20 +1104,14 @@ | |
36913 | op->rx_ifindex = ifindex; | |
36914 | ||
36915 | /* initialize uninitialized (kzalloc) structure */ | |
36916 | - hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
36917 | + hrtimer_init(&op->timer, CLOCK_MONOTONIC, | |
36918 | + HRTIMER_MODE_REL_SOFT); | |
36919 | op->timer.function = bcm_rx_timeout_handler; | |
36920 | ||
36921 | - /* initialize tasklet for rx timeout notification */ | |
36922 | - tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet, | |
36923 | - (unsigned long) op); | |
36924 | - | |
36925 | - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
36926 | + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, | |
36927 | + HRTIMER_MODE_REL_SOFT); | |
36928 | op->thrtimer.function = bcm_rx_thr_handler; | |
36929 | ||
36930 | - /* initialize tasklet for rx throttle handling */ | |
36931 | - tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet, | |
36932 | - (unsigned long) op); | |
36933 | - | |
36934 | /* add this bcm_op to the list of the rx_ops */ | |
36935 | list_add(&op->list, &bo->rx_ops); | |
36936 | ||
36937 | @@ -1209,12 +1157,12 @@ | |
36938 | */ | |
36939 | op->kt_lastmsg = 0; | |
36940 | hrtimer_cancel(&op->thrtimer); | |
36941 | - bcm_rx_thr_flush(op, 1); | |
36942 | + bcm_rx_thr_flush(op); | |
36943 | } | |
36944 | ||
36945 | if ((op->flags & STARTTIMER) && op->kt_ival1) | |
36946 | hrtimer_start(&op->timer, op->kt_ival1, | |
36947 | - HRTIMER_MODE_REL); | |
36948 | + HRTIMER_MODE_REL_SOFT); | |
36949 | } | |
36950 | ||
36951 | /* now we can register for can_ids, if we added a new bcm_op */ | |
36952 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/core/dev.c linux-4.14/net/core/dev.c | |
36953 | --- linux-4.14.orig/net/core/dev.c 2018-09-05 11:03:25.000000000 +0200 | |
36954 | +++ linux-4.14/net/core/dev.c 2018-09-05 11:05:07.000000000 +0200 | |
36955 | @@ -195,6 +195,7 @@ | |
1a6e0f06 JK |
36956 | static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); |
36957 | ||
36958 | static seqcount_t devnet_rename_seq; | |
36959 | +static DEFINE_MUTEX(devnet_rename_mutex); | |
36960 | ||
36961 | static inline void dev_base_seq_inc(struct net *net) | |
36962 | { | |
e4b2b4a8 | 36963 | @@ -217,14 +218,14 @@ |
1a6e0f06 JK |
36964 | static inline void rps_lock(struct softnet_data *sd) |
36965 | { | |
36966 | #ifdef CONFIG_RPS | |
36967 | - spin_lock(&sd->input_pkt_queue.lock); | |
36968 | + raw_spin_lock(&sd->input_pkt_queue.raw_lock); | |
36969 | #endif | |
36970 | } | |
36971 | ||
36972 | static inline void rps_unlock(struct softnet_data *sd) | |
36973 | { | |
36974 | #ifdef CONFIG_RPS | |
36975 | - spin_unlock(&sd->input_pkt_queue.lock); | |
36976 | + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); | |
36977 | #endif | |
36978 | } | |
36979 | ||
e4b2b4a8 | 36980 | @@ -920,7 +921,8 @@ |
1a6e0f06 JK |
36981 | strcpy(name, dev->name); |
36982 | rcu_read_unlock(); | |
36983 | if (read_seqcount_retry(&devnet_rename_seq, seq)) { | |
36984 | - cond_resched(); | |
36985 | + mutex_lock(&devnet_rename_mutex); | |
36986 | + mutex_unlock(&devnet_rename_mutex); | |
36987 | goto retry; | |
36988 | } | |
36989 | ||
e4b2b4a8 | 36990 | @@ -1189,20 +1191,17 @@ |
1a6e0f06 JK |
36991 | if (dev->flags & IFF_UP) |
36992 | return -EBUSY; | |
36993 | ||
36994 | - write_seqcount_begin(&devnet_rename_seq); | |
36995 | + mutex_lock(&devnet_rename_mutex); | |
36996 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
36997 | ||
36998 | - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | |
36999 | - write_seqcount_end(&devnet_rename_seq); | |
37000 | - return 0; | |
37001 | - } | |
37002 | + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | |
37003 | + goto outunlock; | |
37004 | ||
37005 | memcpy(oldname, dev->name, IFNAMSIZ); | |
37006 | ||
37007 | err = dev_get_valid_name(net, dev, newname); | |
37008 | - if (err < 0) { | |
37009 | - write_seqcount_end(&devnet_rename_seq); | |
37010 | - return err; | |
37011 | - } | |
37012 | + if (err < 0) | |
37013 | + goto outunlock; | |
37014 | ||
37015 | if (oldname[0] && !strchr(oldname, '%')) | |
37016 | netdev_info(dev, "renamed from %s\n", oldname); | |
e4b2b4a8 | 37017 | @@ -1215,11 +1214,12 @@ |
1a6e0f06 JK |
37018 | if (ret) { |
37019 | memcpy(dev->name, oldname, IFNAMSIZ); | |
37020 | dev->name_assign_type = old_assign_type; | |
37021 | - write_seqcount_end(&devnet_rename_seq); | |
37022 | - return ret; | |
37023 | + err = ret; | |
37024 | + goto outunlock; | |
37025 | } | |
37026 | ||
37027 | - write_seqcount_end(&devnet_rename_seq); | |
37028 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
37029 | + mutex_unlock(&devnet_rename_mutex); | |
37030 | ||
37031 | netdev_adjacent_rename_links(dev, oldname); | |
37032 | ||
e4b2b4a8 | 37033 | @@ -1240,7 +1240,8 @@ |
1a6e0f06 JK |
37034 | /* err >= 0 after dev_alloc_name() or stores the first errno */ |
37035 | if (err >= 0) { | |
37036 | err = ret; | |
37037 | - write_seqcount_begin(&devnet_rename_seq); | |
37038 | + mutex_lock(&devnet_rename_mutex); | |
37039 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
37040 | memcpy(dev->name, oldname, IFNAMSIZ); | |
37041 | memcpy(oldname, newname, IFNAMSIZ); | |
37042 | dev->name_assign_type = old_assign_type; | |
e4b2b4a8 | 37043 | @@ -1253,6 +1254,11 @@ |
1a6e0f06 JK |
37044 | } |
37045 | ||
37046 | return err; | |
37047 | + | |
37048 | +outunlock: | |
37049 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
37050 | + mutex_unlock(&devnet_rename_mutex); | |
37051 | + return err; | |
37052 | } | |
37053 | ||
37054 | /** | |
e4b2b4a8 | 37055 | @@ -2438,6 +2444,7 @@ |
1a6e0f06 JK |
37056 | sd->output_queue_tailp = &q->next_sched; |
37057 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
37058 | local_irq_restore(flags); | |
37059 | + preempt_check_resched_rt(); | |
37060 | } | |
37061 | ||
37062 | void __netif_schedule(struct Qdisc *q) | |
e4b2b4a8 | 37063 | @@ -2500,6 +2507,7 @@ |
1a6e0f06 JK |
37064 | __this_cpu_write(softnet_data.completion_queue, skb); |
37065 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
37066 | local_irq_restore(flags); | |
37067 | + preempt_check_resched_rt(); | |
37068 | } | |
37069 | EXPORT_SYMBOL(__dev_kfree_skb_irq); | |
37070 | ||
e4b2b4a8 | 37071 | @@ -3175,7 +3183,11 @@ |
1a6e0f06 JK |
37072 | * This permits qdisc->running owner to get the lock more |
37073 | * often and dequeue packets faster. | |
37074 | */ | |
37075 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
37076 | + contended = true; | |
37077 | +#else | |
37078 | contended = qdisc_is_running(q); | |
37079 | +#endif | |
37080 | if (unlikely(contended)) | |
37081 | spin_lock(&q->busylock); | |
37082 | ||
e4b2b4a8 | 37083 | @@ -3246,8 +3258,10 @@ |
1a6e0f06 JK |
37084 | #define skb_update_prio(skb) |
37085 | #endif | |
37086 | ||
37087 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
37088 | DEFINE_PER_CPU(int, xmit_recursion); | |
37089 | EXPORT_SYMBOL(xmit_recursion); | |
37090 | +#endif | |
37091 | ||
37092 | /** | |
37093 | * dev_loopback_xmit - loop back @skb | |
e4b2b4a8 JK |
37094 | @@ -3487,9 +3501,12 @@ |
37095 | if (dev->flags & IFF_UP) { | |
1a6e0f06 JK |
37096 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
37097 | ||
e4b2b4a8 JK |
37098 | +#ifdef CONFIG_PREEMPT_RT_FULL |
37099 | + if (txq->xmit_lock_owner != current) { | |
37100 | +#else | |
1a6e0f06 JK |
37101 | if (txq->xmit_lock_owner != cpu) { |
37102 | - if (unlikely(__this_cpu_read(xmit_recursion) > | |
37103 | - XMIT_RECURSION_LIMIT)) | |
e4b2b4a8 | 37104 | +#endif |
1a6e0f06 JK |
37105 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) |
37106 | goto recursion_alert; | |
37107 | ||
37108 | skb = validate_xmit_skb(skb, dev); | |
e4b2b4a8 | 37109 | @@ -3499,9 +3516,9 @@ |
1a6e0f06 JK |
37110 | HARD_TX_LOCK(dev, txq, cpu); |
37111 | ||
37112 | if (!netif_xmit_stopped(txq)) { | |
37113 | - __this_cpu_inc(xmit_recursion); | |
37114 | + xmit_rec_inc(); | |
37115 | skb = dev_hard_start_xmit(skb, dev, txq, &rc); | |
37116 | - __this_cpu_dec(xmit_recursion); | |
37117 | + xmit_rec_dec(); | |
37118 | if (dev_xmit_complete(rc)) { | |
37119 | HARD_TX_UNLOCK(dev, txq); | |
37120 | goto out; | |
e4b2b4a8 | 37121 | @@ -3882,6 +3899,7 @@ |
1a6e0f06 JK |
37122 | rps_unlock(sd); |
37123 | ||
37124 | local_irq_restore(flags); | |
37125 | + preempt_check_resched_rt(); | |
37126 | ||
37127 | atomic_long_inc(&skb->dev->rx_dropped); | |
37128 | kfree_skb(skb); | |
e4b2b4a8 | 37129 | @@ -4034,7 +4052,7 @@ |
1a6e0f06 JK |
37130 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
37131 | int cpu; | |
37132 | ||
37133 | - preempt_disable(); | |
37134 | + migrate_disable(); | |
37135 | rcu_read_lock(); | |
37136 | ||
37137 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | |
e4b2b4a8 | 37138 | @@ -4044,14 +4062,14 @@ |
1a6e0f06 JK |
37139 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
37140 | ||
37141 | rcu_read_unlock(); | |
37142 | - preempt_enable(); | |
37143 | + migrate_enable(); | |
37144 | } else | |
37145 | #endif | |
37146 | { | |
37147 | unsigned int qtail; | |
e4b2b4a8 | 37148 | |
1a6e0f06 JK |
37149 | - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); |
37150 | - put_cpu(); | |
37151 | + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); | |
37152 | + put_cpu_light(); | |
37153 | } | |
37154 | return ret; | |
37155 | } | |
e4b2b4a8 | 37156 | @@ -4085,11 +4103,9 @@ |
1a6e0f06 JK |
37157 | |
37158 | trace_netif_rx_ni_entry(skb); | |
37159 | ||
37160 | - preempt_disable(); | |
37161 | + local_bh_disable(); | |
37162 | err = netif_rx_internal(skb); | |
37163 | - if (local_softirq_pending()) | |
37164 | - do_softirq(); | |
37165 | - preempt_enable(); | |
37166 | + local_bh_enable(); | |
37167 | ||
37168 | return err; | |
37169 | } | |
e4b2b4a8 | 37170 | @@ -4607,7 +4623,7 @@ |
1a6e0f06 | 37171 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { |
c7c16703 | 37172 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { |
1a6e0f06 JK |
37173 | __skb_unlink(skb, &sd->input_pkt_queue); |
37174 | - kfree_skb(skb); | |
37175 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
37176 | input_queue_head_incr(sd); | |
37177 | } | |
37178 | } | |
e4b2b4a8 | 37179 | @@ -4617,11 +4633,14 @@ |
1a6e0f06 | 37180 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { |
c7c16703 | 37181 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { |
1a6e0f06 JK |
37182 | __skb_unlink(skb, &sd->process_queue); |
37183 | - kfree_skb(skb); | |
37184 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
37185 | input_queue_head_incr(sd); | |
37186 | } | |
37187 | } | |
1a6e0f06 JK |
37188 | + if (!skb_queue_empty(&sd->tofree_queue)) |
37189 | + raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
c7c16703 JK |
37190 | local_bh_enable(); |
37191 | + | |
1a6e0f06 JK |
37192 | } |
37193 | ||
c7c16703 | 37194 | static void flush_all_backlogs(void) |
e4b2b4a8 | 37195 | @@ -5131,12 +5150,14 @@ |
1a6e0f06 JK |
37196 | sd->rps_ipi_list = NULL; |
37197 | ||
37198 | local_irq_enable(); | |
37199 | + preempt_check_resched_rt(); | |
37200 | ||
37201 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | |
e4b2b4a8 | 37202 | net_rps_send_ipi(remsd); |
1a6e0f06 JK |
37203 | } else |
37204 | #endif | |
37205 | local_irq_enable(); | |
37206 | + preempt_check_resched_rt(); | |
37207 | } | |
37208 | ||
37209 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) | |
e4b2b4a8 | 37210 | @@ -5166,7 +5187,9 @@ |
c7c16703 JK |
37211 | while (again) { |
37212 | struct sk_buff *skb; | |
37213 | ||
37214 | + local_irq_disable(); | |
37215 | while ((skb = __skb_dequeue(&sd->process_queue))) { | |
37216 | + local_irq_enable(); | |
37217 | rcu_read_lock(); | |
37218 | __netif_receive_skb(skb); | |
37219 | rcu_read_unlock(); | |
e4b2b4a8 | 37220 | @@ -5174,9 +5197,9 @@ |
c7c16703 JK |
37221 | if (++work >= quota) |
37222 | return work; | |
37223 | ||
37224 | + local_irq_disable(); | |
37225 | } | |
37226 | ||
37227 | - local_irq_disable(); | |
37228 | rps_lock(sd); | |
37229 | if (skb_queue_empty(&sd->input_pkt_queue)) { | |
37230 | /* | |
e4b2b4a8 | 37231 | @@ -5214,6 +5237,7 @@ |
1a6e0f06 JK |
37232 | local_irq_save(flags); |
37233 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
37234 | local_irq_restore(flags); | |
37235 | + preempt_check_resched_rt(); | |
37236 | } | |
37237 | EXPORT_SYMBOL(__napi_schedule); | |
37238 | ||
e4b2b4a8 JK |
37239 | @@ -5250,6 +5274,7 @@ |
37240 | } | |
37241 | EXPORT_SYMBOL(napi_schedule_prep); | |
37242 | ||
c7c16703 JK |
37243 | +#ifndef CONFIG_PREEMPT_RT_FULL |
37244 | /** | |
37245 | * __napi_schedule_irqoff - schedule for receive | |
37246 | * @n: entry to schedule | |
e4b2b4a8 | 37247 | @@ -5261,6 +5286,7 @@ |
c7c16703 JK |
37248 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); |
37249 | } | |
37250 | EXPORT_SYMBOL(__napi_schedule_irqoff); | |
37251 | +#endif | |
37252 | ||
e4b2b4a8 | 37253 | bool napi_complete_done(struct napi_struct *n, int work_done) |
c7c16703 | 37254 | { |
e4b2b4a8 JK |
37255 | @@ -5615,13 +5641,21 @@ |
37256 | unsigned long time_limit = jiffies + | |
37257 | usecs_to_jiffies(netdev_budget_usecs); | |
c7c16703 JK |
37258 | int budget = netdev_budget; |
37259 | + struct sk_buff_head tofree_q; | |
37260 | + struct sk_buff *skb; | |
37261 | LIST_HEAD(list); | |
37262 | LIST_HEAD(repoll); | |
37263 | ||
37264 | + __skb_queue_head_init(&tofree_q); | |
37265 | + | |
37266 | local_irq_disable(); | |
37267 | + skb_queue_splice_init(&sd->tofree_queue, &tofree_q); | |
37268 | list_splice_init(&sd->poll_list, &list); | |
37269 | local_irq_enable(); | |
37270 | ||
37271 | + while ((skb = __skb_dequeue(&tofree_q))) | |
37272 | + kfree_skb(skb); | |
37273 | + | |
37274 | for (;;) { | |
37275 | struct napi_struct *n; | |
37276 | ||
e4b2b4a8 | 37277 | @@ -5651,7 +5685,7 @@ |
1a6e0f06 JK |
37278 | list_splice_tail(&repoll, &list); |
37279 | list_splice(&list, &sd->poll_list); | |
37280 | if (!list_empty(&sd->poll_list)) | |
37281 | - __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
37282 | + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); | |
37283 | ||
37284 | net_rps_action_and_irq_enable(sd); | |
e4b2b4a8 JK |
37285 | out: |
37286 | @@ -7478,7 +7512,7 @@ | |
37287 | /* Initialize queue lock */ | |
37288 | spin_lock_init(&queue->_xmit_lock); | |
37289 | netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type); | |
37290 | - queue->xmit_lock_owner = -1; | |
37291 | + netdev_queue_clear_owner(queue); | |
37292 | netdev_queue_numa_node_write(queue, NUMA_NO_NODE); | |
37293 | queue->dev = dev; | |
37294 | #ifdef CONFIG_BQL | |
37295 | @@ -8418,6 +8452,7 @@ | |
1a6e0f06 JK |
37296 | |
37297 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
37298 | local_irq_enable(); | |
37299 | + preempt_check_resched_rt(); | |
37300 | ||
e4b2b4a8 JK |
37301 | #ifdef CONFIG_RPS |
37302 | remsd = oldsd->rps_ipi_list; | |
37303 | @@ -8431,10 +8466,13 @@ | |
1a6e0f06 JK |
37304 | netif_rx_ni(skb); |
37305 | input_queue_head_incr(oldsd); | |
37306 | } | |
37307 | - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { | |
37308 | + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | |
37309 | netif_rx_ni(skb); | |
37310 | input_queue_head_incr(oldsd); | |
37311 | } | |
37312 | + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { | |
37313 | + kfree_skb(skb); | |
37314 | + } | |
37315 | ||
e4b2b4a8 | 37316 | return 0; |
1a6e0f06 | 37317 | } |
e4b2b4a8 | 37318 | @@ -8738,8 +8776,9 @@ |
c7c16703 JK |
37319 | |
37320 | INIT_WORK(flush, flush_backlog); | |
1a6e0f06 JK |
37321 | |
37322 | - skb_queue_head_init(&sd->input_pkt_queue); | |
37323 | - skb_queue_head_init(&sd->process_queue); | |
37324 | + skb_queue_head_init_raw(&sd->input_pkt_queue); | |
37325 | + skb_queue_head_init_raw(&sd->process_queue); | |
37326 | + skb_queue_head_init_raw(&sd->tofree_queue); | |
37327 | INIT_LIST_HEAD(&sd->poll_list); | |
37328 | sd->output_queue_tailp = &sd->output_queue; | |
37329 | #ifdef CONFIG_RPS | |
e4b2b4a8 JK |
37330 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/core/filter.c linux-4.14/net/core/filter.c |
37331 | --- linux-4.14.orig/net/core/filter.c 2018-09-05 11:03:25.000000000 +0200 | |
37332 | +++ linux-4.14/net/core/filter.c 2018-09-05 11:05:07.000000000 +0200 | |
37333 | @@ -1696,7 +1696,7 @@ | |
1a6e0f06 JK |
37334 | { |
37335 | int ret; | |
37336 | ||
37337 | - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { | |
37338 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) { | |
37339 | net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); | |
37340 | kfree_skb(skb); | |
37341 | return -ENETDOWN; | |
e4b2b4a8 | 37342 | @@ -1704,9 +1704,9 @@ |
1a6e0f06 JK |
37343 | |
37344 | skb->dev = dev; | |
37345 | ||
37346 | - __this_cpu_inc(xmit_recursion); | |
37347 | + xmit_rec_inc(); | |
37348 | ret = dev_queue_xmit(skb); | |
37349 | - __this_cpu_dec(xmit_recursion); | |
37350 | + xmit_rec_dec(); | |
37351 | ||
37352 | return ret; | |
37353 | } | |
e4b2b4a8 JK |
37354 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/core/gen_estimator.c linux-4.14/net/core/gen_estimator.c |
37355 | --- linux-4.14.orig/net/core/gen_estimator.c 2018-09-05 11:03:25.000000000 +0200 | |
37356 | +++ linux-4.14/net/core/gen_estimator.c 2018-09-05 11:05:07.000000000 +0200 | |
37357 | @@ -46,7 +46,7 @@ | |
37358 | struct net_rate_estimator { | |
1a6e0f06 | 37359 | struct gnet_stats_basic_packed *bstats; |
1a6e0f06 JK |
37360 | spinlock_t *stats_lock; |
37361 | - seqcount_t *running; | |
37362 | + net_seqlock_t *running; | |
e4b2b4a8 JK |
37363 | struct gnet_stats_basic_cpu __percpu *cpu_bstats; |
37364 | u8 ewma_log; | |
37365 | u8 intvl_log; /* period : (250ms << intvl_log) */ | |
37366 | @@ -129,7 +129,7 @@ | |
1a6e0f06 | 37367 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
e4b2b4a8 | 37368 | struct net_rate_estimator __rcu **rate_est, |
1a6e0f06 JK |
37369 | spinlock_t *stats_lock, |
37370 | - seqcount_t *running, | |
37371 | + net_seqlock_t *running, | |
37372 | struct nlattr *opt) | |
37373 | { | |
e4b2b4a8 JK |
37374 | struct gnet_estimator *parm = nla_data(opt); |
37375 | @@ -222,7 +222,7 @@ | |
1a6e0f06 | 37376 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, |
e4b2b4a8 | 37377 | struct net_rate_estimator __rcu **rate_est, |
1a6e0f06 JK |
37378 | spinlock_t *stats_lock, |
37379 | - seqcount_t *running, struct nlattr *opt) | |
37380 | + net_seqlock_t *running, struct nlattr *opt) | |
37381 | { | |
e4b2b4a8 JK |
37382 | return gen_new_estimator(bstats, cpu_bstats, rate_est, |
37383 | stats_lock, running, opt); | |
37384 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/core/gen_stats.c linux-4.14/net/core/gen_stats.c | |
37385 | --- linux-4.14.orig/net/core/gen_stats.c 2018-09-05 11:03:25.000000000 +0200 | |
37386 | +++ linux-4.14/net/core/gen_stats.c 2018-09-05 11:05:07.000000000 +0200 | |
37387 | @@ -142,7 +142,7 @@ | |
1a6e0f06 JK |
37388 | } |
37389 | ||
37390 | void | |
37391 | -__gnet_stats_copy_basic(const seqcount_t *running, | |
37392 | +__gnet_stats_copy_basic(net_seqlock_t *running, | |
37393 | struct gnet_stats_basic_packed *bstats, | |
37394 | struct gnet_stats_basic_cpu __percpu *cpu, | |
37395 | struct gnet_stats_basic_packed *b) | |
e4b2b4a8 | 37396 | @@ -155,10 +155,10 @@ |
1a6e0f06 JK |
37397 | } |
37398 | do { | |
37399 | if (running) | |
37400 | - seq = read_seqcount_begin(running); | |
37401 | + seq = net_seq_begin(running); | |
37402 | bstats->bytes = b->bytes; | |
37403 | bstats->packets = b->packets; | |
37404 | - } while (running && read_seqcount_retry(running, seq)); | |
37405 | + } while (running && net_seq_retry(running, seq)); | |
37406 | } | |
37407 | EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
37408 | ||
e4b2b4a8 | 37409 | @@ -176,7 +176,7 @@ |
1a6e0f06 JK |
37410 | * if the room in the socket buffer was not sufficient. |
37411 | */ | |
37412 | int | |
37413 | -gnet_stats_copy_basic(const seqcount_t *running, | |
37414 | +gnet_stats_copy_basic(net_seqlock_t *running, | |
37415 | struct gnet_dump *d, | |
37416 | struct gnet_stats_basic_cpu __percpu *cpu, | |
37417 | struct gnet_stats_basic_packed *b) | |
e4b2b4a8 JK |
37418 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/core/pktgen.c linux-4.14/net/core/pktgen.c |
37419 | --- linux-4.14.orig/net/core/pktgen.c 2017-11-12 19:46:13.000000000 +0100 | |
37420 | +++ linux-4.14/net/core/pktgen.c 2018-09-05 11:05:07.000000000 +0200 | |
37421 | @@ -2252,7 +2252,8 @@ | |
37422 | s64 remaining; | |
37423 | struct hrtimer_sleeper t; | |
37424 | ||
37425 | - hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
37426 | + hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS, | |
37427 | + current); | |
37428 | hrtimer_set_expires(&t.timer, spin_until); | |
37429 | ||
37430 | remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer)); | |
37431 | @@ -2267,7 +2268,6 @@ | |
37432 | } while (ktime_compare(end_time, spin_until) < 0); | |
37433 | } else { | |
37434 | /* see do_nanosleep */ | |
37435 | - hrtimer_init_sleeper(&t, current); | |
37436 | do { | |
37437 | set_current_state(TASK_INTERRUPTIBLE); | |
37438 | hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS); | |
37439 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/core/skbuff.c linux-4.14/net/core/skbuff.c | |
37440 | --- linux-4.14.orig/net/core/skbuff.c 2018-09-05 11:03:25.000000000 +0200 | |
37441 | +++ linux-4.14/net/core/skbuff.c 2018-09-05 11:05:07.000000000 +0200 | |
37442 | @@ -63,6 +63,7 @@ | |
1a6e0f06 JK |
37443 | #include <linux/errqueue.h> |
37444 | #include <linux/prefetch.h> | |
37445 | #include <linux/if_vlan.h> | |
37446 | +#include <linux/locallock.h> | |
37447 | ||
37448 | #include <net/protocol.h> | |
37449 | #include <net/dst.h> | |
e4b2b4a8 | 37450 | @@ -330,6 +331,8 @@ |
1a6e0f06 JK |
37451 | |
37452 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); | |
37453 | static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); | |
37454 | +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); | |
37455 | +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); | |
37456 | ||
37457 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
37458 | { | |
e4b2b4a8 | 37459 | @@ -337,10 +340,10 @@ |
1a6e0f06 JK |
37460 | unsigned long flags; |
37461 | void *data; | |
37462 | ||
37463 | - local_irq_save(flags); | |
37464 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
37465 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
e4b2b4a8 | 37466 | data = page_frag_alloc(nc, fragsz, gfp_mask); |
1a6e0f06 JK |
37467 | - local_irq_restore(flags); |
37468 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
37469 | return data; | |
37470 | } | |
37471 | ||
e4b2b4a8 | 37472 | @@ -359,9 +362,13 @@ |
1a6e0f06 JK |
37473 | |
37474 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
37475 | { | |
37476 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
37477 | + struct napi_alloc_cache *nc; | |
37478 | + void *data; | |
37479 | ||
e4b2b4a8 | 37480 | - return page_frag_alloc(&nc->page, fragsz, gfp_mask); |
1a6e0f06 | 37481 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); |
e4b2b4a8 | 37482 | + data = page_frag_alloc(&nc->page, fragsz, gfp_mask); |
1a6e0f06 JK |
37483 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); |
37484 | + return data; | |
37485 | } | |
37486 | ||
37487 | void *napi_alloc_frag(unsigned int fragsz) | |
e4b2b4a8 | 37488 | @@ -408,13 +415,13 @@ |
1a6e0f06 JK |
37489 | if (sk_memalloc_socks()) |
37490 | gfp_mask |= __GFP_MEMALLOC; | |
37491 | ||
37492 | - local_irq_save(flags); | |
37493 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
37494 | ||
37495 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
e4b2b4a8 | 37496 | data = page_frag_alloc(nc, len, gfp_mask); |
1a6e0f06 JK |
37497 | pfmemalloc = nc->pfmemalloc; |
37498 | ||
37499 | - local_irq_restore(flags); | |
37500 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
37501 | ||
37502 | if (unlikely(!data)) | |
37503 | return NULL; | |
e4b2b4a8 | 37504 | @@ -455,9 +462,10 @@ |
1a6e0f06 JK |
37505 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, |
37506 | gfp_t gfp_mask) | |
37507 | { | |
37508 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
37509 | + struct napi_alloc_cache *nc; | |
37510 | struct sk_buff *skb; | |
37511 | void *data; | |
37512 | + bool pfmemalloc; | |
37513 | ||
37514 | len += NET_SKB_PAD + NET_IP_ALIGN; | |
37515 | ||
e4b2b4a8 | 37516 | @@ -475,7 +483,10 @@ |
1a6e0f06 JK |
37517 | if (sk_memalloc_socks()) |
37518 | gfp_mask |= __GFP_MEMALLOC; | |
37519 | ||
37520 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
e4b2b4a8 | 37521 | data = page_frag_alloc(&nc->page, len, gfp_mask); |
1a6e0f06 JK |
37522 | + pfmemalloc = nc->page.pfmemalloc; |
37523 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
37524 | if (unlikely(!data)) | |
37525 | return NULL; | |
37526 | ||
e4b2b4a8 | 37527 | @@ -486,7 +497,7 @@ |
1a6e0f06 JK |
37528 | } |
37529 | ||
37530 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
37531 | - if (nc->page.pfmemalloc) | |
37532 | + if (pfmemalloc) | |
37533 | skb->pfmemalloc = 1; | |
37534 | skb->head_frag = 1; | |
37535 | ||
e4b2b4a8 | 37536 | @@ -718,23 +729,26 @@ |
1a6e0f06 JK |
37537 | |
37538 | void __kfree_skb_flush(void) | |
37539 | { | |
37540 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
37541 | + struct napi_alloc_cache *nc; | |
37542 | ||
37543 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
37544 | /* flush skb_cache if containing objects */ | |
37545 | if (nc->skb_count) { | |
37546 | kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, | |
37547 | nc->skb_cache); | |
37548 | nc->skb_count = 0; | |
37549 | } | |
37550 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
37551 | } | |
37552 | ||
37553 | static inline void _kfree_skb_defer(struct sk_buff *skb) | |
37554 | { | |
37555 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
37556 | + struct napi_alloc_cache *nc; | |
37557 | ||
37558 | /* drop skb->head and call any destructors for packet */ | |
37559 | skb_release_all(skb); | |
37560 | ||
37561 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
37562 | /* record skb to CPU local list */ | |
37563 | nc->skb_cache[nc->skb_count++] = skb; | |
37564 | ||
e4b2b4a8 | 37565 | @@ -749,6 +763,7 @@ |
1a6e0f06 JK |
37566 | nc->skb_cache); |
37567 | nc->skb_count = 0; | |
37568 | } | |
37569 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
37570 | } | |
37571 | void __kfree_skb_defer(struct sk_buff *skb) | |
37572 | { | |
e4b2b4a8 JK |
37573 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/core/sock.c linux-4.14/net/core/sock.c |
37574 | --- linux-4.14.orig/net/core/sock.c 2018-09-05 11:03:25.000000000 +0200 | |
37575 | +++ linux-4.14/net/core/sock.c 2018-09-05 11:05:07.000000000 +0200 | |
37576 | @@ -2757,12 +2757,11 @@ | |
1a6e0f06 JK |
37577 | if (sk->sk_lock.owned) |
37578 | __lock_sock(sk); | |
37579 | sk->sk_lock.owned = 1; | |
37580 | - spin_unlock(&sk->sk_lock.slock); | |
37581 | + spin_unlock_bh(&sk->sk_lock.slock); | |
37582 | /* | |
37583 | * The sk_lock has mutex_lock() semantics here: | |
37584 | */ | |
37585 | mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); | |
37586 | - local_bh_enable(); | |
37587 | } | |
37588 | EXPORT_SYMBOL(lock_sock_nested); | |
37589 | ||
e4b2b4a8 JK |
37590 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/ipv4/icmp.c linux-4.14/net/ipv4/icmp.c |
37591 | --- linux-4.14.orig/net/ipv4/icmp.c 2018-09-05 11:03:25.000000000 +0200 | |
37592 | +++ linux-4.14/net/ipv4/icmp.c 2018-09-05 11:05:07.000000000 +0200 | |
37593 | @@ -77,6 +77,7 @@ | |
1a6e0f06 JK |
37594 | #include <linux/string.h> |
37595 | #include <linux/netfilter_ipv4.h> | |
37596 | #include <linux/slab.h> | |
37597 | +#include <linux/locallock.h> | |
37598 | #include <net/snmp.h> | |
37599 | #include <net/ip.h> | |
37600 | #include <net/route.h> | |
e4b2b4a8 | 37601 | @@ -204,6 +205,8 @@ |
1a6e0f06 JK |
37602 | * |
37603 | * On SMP we have one ICMP socket per-cpu. | |
37604 | */ | |
37605 | +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock); | |
37606 | + | |
37607 | static struct sock *icmp_sk(struct net *net) | |
37608 | { | |
37609 | return *this_cpu_ptr(net->ipv4.icmp_sk); | |
e4b2b4a8 JK |
37610 | @@ -214,12 +217,16 @@ |
37611 | { | |
37612 | struct sock *sk; | |
1a6e0f06 | 37613 | |
e4b2b4a8 JK |
37614 | + if (!local_trylock(icmp_sk_lock)) |
37615 | + return NULL; | |
37616 | + | |
1a6e0f06 JK |
37617 | sk = icmp_sk(net); |
37618 | ||
37619 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { | |
37620 | /* This can happen if the output path signals a | |
37621 | * dst_link_failure() for an outgoing ICMP packet. | |
37622 | */ | |
37623 | + local_unlock(icmp_sk_lock); | |
1a6e0f06 JK |
37624 | return NULL; |
37625 | } | |
e4b2b4a8 JK |
37626 | return sk; |
37627 | @@ -228,6 +235,7 @@ | |
1a6e0f06 JK |
37628 | static inline void icmp_xmit_unlock(struct sock *sk) |
37629 | { | |
e4b2b4a8 | 37630 | spin_unlock(&sk->sk_lock.slock); |
1a6e0f06 JK |
37631 | + local_unlock(icmp_sk_lock); |
37632 | } | |
37633 | ||
37634 | int sysctl_icmp_msgs_per_sec __read_mostly = 1000; | |
e4b2b4a8 JK |
37635 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/ipv4/tcp_ipv4.c linux-4.14/net/ipv4/tcp_ipv4.c |
37636 | --- linux-4.14.orig/net/ipv4/tcp_ipv4.c 2018-09-05 11:03:25.000000000 +0200 | |
37637 | +++ linux-4.14/net/ipv4/tcp_ipv4.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
37638 | @@ -62,6 +62,7 @@ |
37639 | #include <linux/init.h> | |
37640 | #include <linux/times.h> | |
37641 | #include <linux/slab.h> | |
37642 | +#include <linux/locallock.h> | |
37643 | ||
37644 | #include <net/net_namespace.h> | |
37645 | #include <net/icmp.h> | |
e4b2b4a8 | 37646 | @@ -580,6 +581,7 @@ |
1a6e0f06 JK |
37647 | } |
37648 | EXPORT_SYMBOL(tcp_v4_send_check); | |
37649 | ||
37650 | +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock); | |
37651 | /* | |
37652 | * This routine will send an RST to the other tcp. | |
37653 | * | |
e4b2b4a8 | 37654 | @@ -710,6 +712,7 @@ |
1a6e0f06 | 37655 | arg.tos = ip_hdr(skb)->tos; |
e4b2b4a8 | 37656 | arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); |
1a6e0f06 | 37657 | local_bh_disable(); |
e4b2b4a8 | 37658 | + local_lock(tcp_sk_lock); |
1a6e0f06 JK |
37659 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), |
37660 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
e4b2b4a8 JK |
37661 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
37662 | @@ -717,6 +720,7 @@ | |
37663 | ||
1a6e0f06 JK |
37664 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); |
37665 | __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); | |
1a6e0f06 | 37666 | + local_unlock(tcp_sk_lock); |
e4b2b4a8 | 37667 | local_bh_enable(); |
1a6e0f06 JK |
37668 | |
37669 | #ifdef CONFIG_TCP_MD5SIG | |
e4b2b4a8 | 37670 | @@ -796,12 +800,14 @@ |
1a6e0f06 | 37671 | arg.tos = tos; |
e4b2b4a8 | 37672 | arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); |
1a6e0f06 | 37673 | local_bh_disable(); |
e4b2b4a8 | 37674 | + local_lock(tcp_sk_lock); |
1a6e0f06 JK |
37675 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), |
37676 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
e4b2b4a8 JK |
37677 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, |
37678 | &arg, arg.iov[0].iov_len); | |
1a6e0f06 JK |
37679 | |
37680 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | |
1a6e0f06 | 37681 | + local_unlock(tcp_sk_lock); |
e4b2b4a8 | 37682 | local_bh_enable(); |
1a6e0f06 JK |
37683 | } |
37684 | ||
e4b2b4a8 JK |
37685 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/Kconfig linux-4.14/net/Kconfig |
37686 | --- linux-4.14.orig/net/Kconfig 2017-11-12 19:46:13.000000000 +0100 | |
37687 | +++ linux-4.14/net/Kconfig 2018-09-05 11:05:07.000000000 +0200 | |
37688 | @@ -272,7 +272,7 @@ | |
37689 | ||
37690 | config NET_RX_BUSY_POLL | |
37691 | bool | |
37692 | - default y | |
37693 | + default y if !PREEMPT_RT_FULL | |
37694 | ||
37695 | config BQL | |
37696 | bool | |
37697 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/mac80211/rx.c linux-4.14/net/mac80211/rx.c | |
37698 | --- linux-4.14.orig/net/mac80211/rx.c 2018-09-05 11:03:25.000000000 +0200 | |
37699 | +++ linux-4.14/net/mac80211/rx.c 2018-09-05 11:05:07.000000000 +0200 | |
37700 | @@ -4252,7 +4252,7 @@ | |
1a6e0f06 JK |
37701 | struct ieee80211_supported_band *sband; |
37702 | struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); | |
37703 | ||
37704 | - WARN_ON_ONCE(softirq_count() == 0); | |
37705 | + WARN_ON_ONCE_NONRT(softirq_count() == 0); | |
37706 | ||
37707 | if (WARN_ON(status->band >= NUM_NL80211_BANDS)) | |
37708 | goto drop; | |
e4b2b4a8 JK |
37709 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/netfilter/core.c linux-4.14/net/netfilter/core.c |
37710 | --- linux-4.14.orig/net/netfilter/core.c 2017-11-12 19:46:13.000000000 +0100 | |
37711 | +++ linux-4.14/net/netfilter/core.c 2018-09-05 11:05:07.000000000 +0200 | |
37712 | @@ -21,6 +21,7 @@ | |
37713 | #include <linux/inetdevice.h> | |
1a6e0f06 JK |
37714 | #include <linux/proc_fs.h> |
37715 | #include <linux/mutex.h> | |
1a6e0f06 | 37716 | +#include <linux/locallock.h> |
e4b2b4a8 | 37717 | #include <linux/mm.h> |
c7c16703 | 37718 | #include <linux/rcupdate.h> |
1a6e0f06 | 37719 | #include <net/net_namespace.h> |
e4b2b4a8 | 37720 | @@ -28,6 +29,11 @@ |
1a6e0f06 JK |
37721 | |
37722 | #include "nf_internals.h" | |
37723 | ||
37724 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
37725 | +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock); | |
37726 | +EXPORT_PER_CPU_SYMBOL(xt_write_lock); | |
37727 | +#endif | |
37728 | + | |
37729 | static DEFINE_MUTEX(afinfo_mutex); | |
37730 | ||
37731 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
e4b2b4a8 JK |
37732 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/packet/af_packet.c linux-4.14/net/packet/af_packet.c |
37733 | --- linux-4.14.orig/net/packet/af_packet.c 2018-09-05 11:03:25.000000000 +0200 | |
37734 | +++ linux-4.14/net/packet/af_packet.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
37735 | @@ -63,6 +63,7 @@ |
37736 | #include <linux/if_packet.h> | |
37737 | #include <linux/wireless.h> | |
37738 | #include <linux/kernel.h> | |
37739 | +#include <linux/delay.h> | |
37740 | #include <linux/kmod.h> | |
37741 | #include <linux/slab.h> | |
37742 | #include <linux/vmalloc.h> | |
e4b2b4a8 | 37743 | @@ -707,7 +708,7 @@ |
1a6e0f06 JK |
37744 | if (BLOCK_NUM_PKTS(pbd)) { |
37745 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
37746 | /* Waiting for skb_copy_bits to finish... */ | |
37747 | - cpu_relax(); | |
37748 | + cpu_chill(); | |
37749 | } | |
37750 | } | |
37751 | ||
e4b2b4a8 | 37752 | @@ -969,7 +970,7 @@ |
1a6e0f06 JK |
37753 | if (!(status & TP_STATUS_BLK_TMO)) { |
37754 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
37755 | /* Waiting for skb_copy_bits to finish... */ | |
37756 | - cpu_relax(); | |
37757 | + cpu_chill(); | |
37758 | } | |
37759 | } | |
37760 | prb_close_block(pkc, pbd, po, status); | |
e4b2b4a8 JK |
37761 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/rds/ib_rdma.c linux-4.14/net/rds/ib_rdma.c |
37762 | --- linux-4.14.orig/net/rds/ib_rdma.c 2017-11-12 19:46:13.000000000 +0100 | |
37763 | +++ linux-4.14/net/rds/ib_rdma.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
37764 | @@ -34,6 +34,7 @@ |
37765 | #include <linux/slab.h> | |
37766 | #include <linux/rculist.h> | |
37767 | #include <linux/llist.h> | |
37768 | +#include <linux/delay.h> | |
37769 | ||
37770 | #include "rds_single_path.h" | |
37771 | #include "ib_mr.h" | |
e4b2b4a8 | 37772 | @@ -210,7 +211,7 @@ |
1a6e0f06 JK |
37773 | for_each_online_cpu(cpu) { |
37774 | flag = &per_cpu(clean_list_grace, cpu); | |
37775 | while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) | |
37776 | - cpu_relax(); | |
37777 | + cpu_chill(); | |
37778 | } | |
37779 | } | |
37780 | ||
e4b2b4a8 JK |
37781 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/rxrpc/security.c linux-4.14/net/rxrpc/security.c |
37782 | --- linux-4.14.orig/net/rxrpc/security.c 2017-11-12 19:46:13.000000000 +0100 | |
37783 | +++ linux-4.14/net/rxrpc/security.c 2018-09-05 11:05:07.000000000 +0200 | |
1a6e0f06 JK |
37784 | @@ -19,9 +19,6 @@ |
37785 | #include <keys/rxrpc-type.h> | |
37786 | #include "ar-internal.h" | |
37787 | ||
37788 | -static LIST_HEAD(rxrpc_security_methods); | |
37789 | -static DECLARE_RWSEM(rxrpc_security_sem); | |
37790 | - | |
37791 | static const struct rxrpc_security *rxrpc_security_types[] = { | |
37792 | [RXRPC_SECURITY_NONE] = &rxrpc_no_security, | |
37793 | #ifdef CONFIG_RXKAD | |
e4b2b4a8 JK |
37794 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/sched/sch_api.c linux-4.14/net/sched/sch_api.c |
37795 | --- linux-4.14.orig/net/sched/sch_api.c 2017-11-12 19:46:13.000000000 +0100 | |
37796 | +++ linux-4.14/net/sched/sch_api.c 2018-09-05 11:05:07.000000000 +0200 | |
37797 | @@ -1081,7 +1081,7 @@ | |
1a6e0f06 JK |
37798 | rcu_assign_pointer(sch->stab, stab); |
37799 | } | |
37800 | if (tca[TCA_RATE]) { | |
37801 | - seqcount_t *running; | |
37802 | + net_seqlock_t *running; | |
37803 | ||
37804 | err = -EOPNOTSUPP; | |
37805 | if (sch->flags & TCQ_F_MQROOT) | |
e4b2b4a8 JK |
37806 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/sched/sch_generic.c linux-4.14/net/sched/sch_generic.c |
37807 | --- linux-4.14.orig/net/sched/sch_generic.c 2018-09-05 11:03:25.000000000 +0200 | |
37808 | +++ linux-4.14/net/sched/sch_generic.c 2018-09-05 11:05:07.000000000 +0200 | |
37809 | @@ -429,7 +429,11 @@ | |
c7c16703 | 37810 | .ops = &noop_qdisc_ops, |
1a6e0f06 JK |
37811 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), |
37812 | .dev_queue = &noop_netdev_queue, | |
37813 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
37814 | + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running), | |
37815 | +#else | |
37816 | .running = SEQCNT_ZERO(noop_qdisc.running), | |
37817 | +#endif | |
37818 | .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), | |
37819 | }; | |
37820 | EXPORT_SYMBOL(noop_qdisc); | |
e4b2b4a8 | 37821 | @@ -628,9 +632,17 @@ |
1a6e0f06 JK |
37822 | lockdep_set_class(&sch->busylock, |
37823 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); | |
37824 | ||
37825 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
37826 | + seqlock_init(&sch->running); | |
37827 | + lockdep_set_class(&sch->running.seqcount, | |
37828 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
37829 | + lockdep_set_class(&sch->running.lock, | |
37830 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
37831 | +#else | |
37832 | seqcount_init(&sch->running); | |
37833 | lockdep_set_class(&sch->running, | |
37834 | dev->qdisc_running_key ?: &qdisc_running_key); | |
37835 | +#endif | |
37836 | ||
37837 | sch->ops = ops; | |
37838 | sch->enqueue = ops->enqueue; | |
e4b2b4a8 | 37839 | @@ -933,7 +945,7 @@ |
1a6e0f06 | 37840 | /* Wait for outstanding qdisc_run calls. */ |
e4b2b4a8 | 37841 | list_for_each_entry(dev, head, close_list) { |
1a6e0f06 JK |
37842 | while (some_qdisc_is_busy(dev)) |
37843 | - yield(); | |
37844 | + msleep(1); | |
e4b2b4a8 JK |
37845 | /* The new qdisc is assigned at this point so we can safely |
37846 | * unwind stale skb lists and qdisc statistics | |
37847 | */ | |
37848 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/sunrpc/svc_xprt.c linux-4.14/net/sunrpc/svc_xprt.c | |
37849 | --- linux-4.14.orig/net/sunrpc/svc_xprt.c 2017-11-12 19:46:13.000000000 +0100 | |
37850 | +++ linux-4.14/net/sunrpc/svc_xprt.c 2018-09-05 11:05:07.000000000 +0200 | |
37851 | @@ -396,7 +396,7 @@ | |
1a6e0f06 JK |
37852 | goto out; |
37853 | } | |
37854 | ||
37855 | - cpu = get_cpu(); | |
37856 | + cpu = get_cpu_light(); | |
37857 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | |
37858 | ||
37859 | atomic_long_inc(&pool->sp_stats.packets); | |
e4b2b4a8 | 37860 | @@ -432,7 +432,7 @@ |
1a6e0f06 JK |
37861 | |
37862 | atomic_long_inc(&pool->sp_stats.threads_woken); | |
37863 | wake_up_process(rqstp->rq_task); | |
37864 | - put_cpu(); | |
37865 | + put_cpu_light(); | |
37866 | goto out; | |
37867 | } | |
37868 | rcu_read_unlock(); | |
e4b2b4a8 | 37869 | @@ -453,7 +453,7 @@ |
1a6e0f06 JK |
37870 | goto redo_search; |
37871 | } | |
37872 | rqstp = NULL; | |
37873 | - put_cpu(); | |
37874 | + put_cpu_light(); | |
37875 | out: | |
37876 | trace_svc_xprt_do_enqueue(xprt, rqstp); | |
37877 | } | |
e4b2b4a8 JK |
37878 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/net/xfrm/xfrm_state.c linux-4.14/net/xfrm/xfrm_state.c |
37879 | --- linux-4.14.orig/net/xfrm/xfrm_state.c 2018-09-05 11:03:25.000000000 +0200 | |
37880 | +++ linux-4.14/net/xfrm/xfrm_state.c 2018-09-05 11:05:07.000000000 +0200 | |
37881 | @@ -427,7 +427,7 @@ | |
37882 | ||
37883 | static void xfrm_state_gc_destroy(struct xfrm_state *x) | |
37884 | { | |
37885 | - tasklet_hrtimer_cancel(&x->mtimer); | |
37886 | + hrtimer_cancel(&x->mtimer); | |
37887 | del_timer_sync(&x->rtimer); | |
37888 | kfree(x->aead); | |
37889 | kfree(x->aalg); | |
37890 | @@ -472,8 +472,8 @@ | |
37891 | ||
37892 | static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me) | |
37893 | { | |
37894 | - struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer); | |
37895 | - struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer); | |
37896 | + struct xfrm_state *x = container_of(me, struct xfrm_state, mtimer); | |
37897 | + enum hrtimer_restart ret = HRTIMER_NORESTART; | |
37898 | unsigned long now = get_seconds(); | |
37899 | long next = LONG_MAX; | |
37900 | int warn = 0; | |
37901 | @@ -537,7 +537,8 @@ | |
37902 | km_state_expired(x, 0, 0); | |
37903 | resched: | |
37904 | if (next != LONG_MAX) { | |
37905 | - tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL); | |
37906 | + hrtimer_forward_now(&x->mtimer, ktime_set(next, 0)); | |
37907 | + ret = HRTIMER_RESTART; | |
37908 | } | |
37909 | ||
37910 | goto out; | |
37911 | @@ -554,7 +555,7 @@ | |
37912 | ||
37913 | out: | |
37914 | spin_unlock(&x->lock); | |
37915 | - return HRTIMER_NORESTART; | |
37916 | + return ret; | |
37917 | } | |
37918 | ||
37919 | static void xfrm_replay_timer_handler(unsigned long data); | |
37920 | @@ -573,8 +574,8 @@ | |
37921 | INIT_HLIST_NODE(&x->bydst); | |
37922 | INIT_HLIST_NODE(&x->bysrc); | |
37923 | INIT_HLIST_NODE(&x->byspi); | |
37924 | - tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler, | |
37925 | - CLOCK_BOOTTIME, HRTIMER_MODE_ABS); | |
37926 | + hrtimer_init(&x->mtimer, CLOCK_BOOTTIME, HRTIMER_MODE_ABS_SOFT); | |
37927 | + x->mtimer.function = xfrm_timer_handler; | |
37928 | setup_timer(&x->rtimer, xfrm_replay_timer_handler, | |
37929 | (unsigned long)x); | |
37930 | x->curlft.add_time = get_seconds(); | |
37931 | @@ -1031,7 +1032,9 @@ | |
37932 | hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); | |
37933 | } | |
37934 | x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; | |
37935 | - tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); | |
37936 | + hrtimer_start(&x->mtimer, | |
37937 | + ktime_set(net->xfrm.sysctl_acq_expires, 0), | |
37938 | + HRTIMER_MODE_REL_SOFT); | |
37939 | net->xfrm.state_num++; | |
37940 | xfrm_hash_grow_check(net, x->bydst.next != NULL); | |
37941 | spin_unlock_bh(&net->xfrm.xfrm_state_lock); | |
37942 | @@ -1142,7 +1145,7 @@ | |
37943 | hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); | |
37944 | } | |
37945 | ||
37946 | - tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); | |
37947 | + hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL_SOFT); | |
37948 | if (x->replay_maxage) | |
37949 | mod_timer(&x->rtimer, jiffies + x->replay_maxage); | |
37950 | ||
37951 | @@ -1246,7 +1249,9 @@ | |
37952 | x->mark.m = m->m; | |
37953 | x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; | |
37954 | xfrm_state_hold(x); | |
37955 | - tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); | |
37956 | + hrtimer_start(&x->mtimer, | |
37957 | + ktime_set(net->xfrm.sysctl_acq_expires, 0), | |
37958 | + HRTIMER_MODE_REL_SOFT); | |
37959 | list_add(&x->km.all, &net->xfrm.state_all); | |
37960 | hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); | |
37961 | h = xfrm_src_hash(net, daddr, saddr, family); | |
37962 | @@ -1546,7 +1551,8 @@ | |
37963 | memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); | |
37964 | x1->km.dying = 0; | |
37965 | ||
37966 | - tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); | |
37967 | + hrtimer_start(&x1->mtimer, ktime_set(1, 0), | |
37968 | + HRTIMER_MODE_REL_SOFT); | |
37969 | if (x1->curlft.use_time) | |
37970 | xfrm_state_check_expire(x1); | |
37971 | ||
37972 | @@ -1570,7 +1576,7 @@ | |
37973 | if (x->curlft.bytes >= x->lft.hard_byte_limit || | |
37974 | x->curlft.packets >= x->lft.hard_packet_limit) { | |
37975 | x->km.state = XFRM_STATE_EXPIRED; | |
37976 | - tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL); | |
37977 | + hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL_SOFT); | |
37978 | return -EINVAL; | |
37979 | } | |
37980 | ||
37981 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/samples/trace_events/trace-events-sample.c linux-4.14/samples/trace_events/trace-events-sample.c | |
37982 | --- linux-4.14.orig/samples/trace_events/trace-events-sample.c 2017-11-12 19:46:13.000000000 +0100 | |
37983 | +++ linux-4.14/samples/trace_events/trace-events-sample.c 2018-09-05 11:05:07.000000000 +0200 | |
37984 | @@ -33,7 +33,7 @@ | |
37985 | ||
37986 | /* Silly tracepoints */ | |
37987 | trace_foo_bar("hello", cnt, array, random_strings[len], | |
37988 | - ¤t->cpus_allowed); | |
37989 | + current->cpus_ptr); | |
37990 | ||
37991 | trace_foo_with_template_simple("HELLO", cnt); | |
37992 | ||
37993 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/scripts/mkcompile_h linux-4.14/scripts/mkcompile_h | |
37994 | --- linux-4.14.orig/scripts/mkcompile_h 2017-11-12 19:46:13.000000000 +0100 | |
37995 | +++ linux-4.14/scripts/mkcompile_h 2018-09-05 11:05:07.000000000 +0200 | |
37996 | @@ -5,7 +5,8 @@ | |
1a6e0f06 JK |
37997 | ARCH=$2 |
37998 | SMP=$3 | |
37999 | PREEMPT=$4 | |
38000 | -CC=$5 | |
38001 | +RT=$5 | |
38002 | +CC=$6 | |
38003 | ||
38004 | vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } | |
38005 | ||
e4b2b4a8 | 38006 | @@ -58,6 +59,7 @@ |
1a6e0f06 JK |
38007 | CONFIG_FLAGS="" |
38008 | if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi | |
38009 | if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi | |
38010 | +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi | |
38011 | UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" | |
38012 | ||
38013 | # Truncate to maximum length | |
e4b2b4a8 JK |
38014 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/security/apparmor/include/path.h linux-4.14/security/apparmor/include/path.h |
38015 | --- linux-4.14.orig/security/apparmor/include/path.h 2017-11-12 19:46:13.000000000 +0100 | |
38016 | +++ linux-4.14/security/apparmor/include/path.h 2018-09-05 11:05:07.000000000 +0200 | |
38017 | @@ -39,9 +39,10 @@ | |
38018 | }; | |
38019 | ||
38020 | #include <linux/percpu.h> | |
38021 | -#include <linux/preempt.h> | |
38022 | +#include <linux/locallock.h> | |
38023 | ||
38024 | DECLARE_PER_CPU(struct aa_buffers, aa_buffers); | |
38025 | +DECLARE_LOCAL_IRQ_LOCK(aa_buffers_lock); | |
38026 | ||
38027 | #define COUNT_ARGS(X...) COUNT_ARGS_HELPER(, ##X, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) | |
38028 | #define COUNT_ARGS_HELPER(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, n, X...) n | |
38029 | @@ -55,12 +56,24 @@ | |
38030 | ||
38031 | #define for_each_cpu_buffer(I) for ((I) = 0; (I) < MAX_PATH_BUFFERS; (I)++) | |
38032 | ||
38033 | -#ifdef CONFIG_DEBUG_PREEMPT | |
38034 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
38035 | + | |
38036 | +static inline void AA_BUG_PREEMPT_ENABLED(const char *s) | |
38037 | +{ | |
38038 | + struct local_irq_lock *lv; | |
38039 | + | |
38040 | + lv = this_cpu_ptr(&aa_buffers_lock); | |
38041 | + WARN_ONCE(lv->owner != current, | |
38042 | + "__get_buffer without aa_buffers_lock\n"); | |
38043 | +} | |
38044 | + | |
38045 | +#elif defined(CONFIG_DEBUG_PREEMPT) | |
38046 | #define AA_BUG_PREEMPT_ENABLED(X) AA_BUG(preempt_count() <= 0, X) | |
38047 | #else | |
38048 | #define AA_BUG_PREEMPT_ENABLED(X) /* nop */ | |
38049 | #endif | |
38050 | ||
38051 | + | |
38052 | #define __get_buffer(N) ({ \ | |
38053 | struct aa_buffers *__cpu_var; \ | |
38054 | AA_BUG_PREEMPT_ENABLED("__get_buffer without preempt disabled"); \ | |
38055 | @@ -73,14 +86,14 @@ | |
38056 | ||
38057 | #define get_buffers(X...) \ | |
38058 | do { \ | |
38059 | - preempt_disable(); \ | |
38060 | + local_lock(aa_buffers_lock); \ | |
38061 | __get_buffers(X); \ | |
38062 | } while (0) | |
38063 | ||
38064 | #define put_buffers(X, Y...) \ | |
38065 | do { \ | |
38066 | __put_buffers(X, Y); \ | |
38067 | - preempt_enable(); \ | |
38068 | + local_unlock(aa_buffers_lock); \ | |
38069 | } while (0) | |
38070 | ||
38071 | #endif /* __AA_PATH_H */ | |
38072 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/security/apparmor/lsm.c linux-4.14/security/apparmor/lsm.c | |
38073 | --- linux-4.14.orig/security/apparmor/lsm.c 2017-11-12 19:46:13.000000000 +0100 | |
38074 | +++ linux-4.14/security/apparmor/lsm.c 2018-09-05 11:05:07.000000000 +0200 | |
38075 | @@ -44,7 +44,7 @@ | |
38076 | int apparmor_initialized; | |
38077 | ||
38078 | DEFINE_PER_CPU(struct aa_buffers, aa_buffers); | |
38079 | - | |
38080 | +DEFINE_LOCAL_IRQ_LOCK(aa_buffers_lock); | |
38081 | ||
38082 | /* | |
38083 | * LSM hook functions | |
38084 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/sound/core/pcm_native.c linux-4.14/sound/core/pcm_native.c | |
38085 | --- linux-4.14.orig/sound/core/pcm_native.c 2018-09-05 11:03:25.000000000 +0200 | |
38086 | +++ linux-4.14/sound/core/pcm_native.c 2018-09-05 11:05:07.000000000 +0200 | |
38087 | @@ -148,7 +148,7 @@ | |
1a6e0f06 JK |
38088 | void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) |
38089 | { | |
38090 | if (!substream->pcm->nonatomic) | |
38091 | - local_irq_disable(); | |
38092 | + local_irq_disable_nort(); | |
38093 | snd_pcm_stream_lock(substream); | |
38094 | } | |
38095 | EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); | |
e4b2b4a8 | 38096 | @@ -163,7 +163,7 @@ |
1a6e0f06 JK |
38097 | { |
38098 | snd_pcm_stream_unlock(substream); | |
38099 | if (!substream->pcm->nonatomic) | |
38100 | - local_irq_enable(); | |
38101 | + local_irq_enable_nort(); | |
38102 | } | |
38103 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); | |
38104 | ||
e4b2b4a8 | 38105 | @@ -171,7 +171,7 @@ |
1a6e0f06 JK |
38106 | { |
38107 | unsigned long flags = 0; | |
38108 | if (!substream->pcm->nonatomic) | |
38109 | - local_irq_save(flags); | |
38110 | + local_irq_save_nort(flags); | |
38111 | snd_pcm_stream_lock(substream); | |
38112 | return flags; | |
38113 | } | |
e4b2b4a8 | 38114 | @@ -189,7 +189,7 @@ |
1a6e0f06 JK |
38115 | { |
38116 | snd_pcm_stream_unlock(substream); | |
38117 | if (!substream->pcm->nonatomic) | |
38118 | - local_irq_restore(flags); | |
38119 | + local_irq_restore_nort(flags); | |
38120 | } | |
38121 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); | |
38122 | ||
e4b2b4a8 JK |
38123 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/sound/drivers/dummy.c linux-4.14/sound/drivers/dummy.c |
38124 | --- linux-4.14.orig/sound/drivers/dummy.c 2017-11-12 19:46:13.000000000 +0100 | |
38125 | +++ linux-4.14/sound/drivers/dummy.c 2018-09-05 11:05:07.000000000 +0200 | |
38126 | @@ -376,17 +376,9 @@ | |
38127 | ktime_t period_time; | |
38128 | atomic_t running; | |
38129 | struct hrtimer timer; | |
38130 | - struct tasklet_struct tasklet; | |
38131 | struct snd_pcm_substream *substream; | |
38132 | }; | |
38133 | ||
38134 | -static void dummy_hrtimer_pcm_elapsed(unsigned long priv) | |
38135 | -{ | |
38136 | - struct dummy_hrtimer_pcm *dpcm = (struct dummy_hrtimer_pcm *)priv; | |
38137 | - if (atomic_read(&dpcm->running)) | |
38138 | - snd_pcm_period_elapsed(dpcm->substream); | |
38139 | -} | |
38140 | - | |
38141 | static enum hrtimer_restart dummy_hrtimer_callback(struct hrtimer *timer) | |
38142 | { | |
38143 | struct dummy_hrtimer_pcm *dpcm; | |
38144 | @@ -394,7 +386,14 @@ | |
38145 | dpcm = container_of(timer, struct dummy_hrtimer_pcm, timer); | |
38146 | if (!atomic_read(&dpcm->running)) | |
38147 | return HRTIMER_NORESTART; | |
38148 | - tasklet_schedule(&dpcm->tasklet); | |
38149 | + /* | |
38150 | + * In cases of XRUN and draining, this calls .trigger to stop PCM | |
38151 | + * substream. | |
38152 | + */ | |
38153 | + snd_pcm_period_elapsed(dpcm->substream); | |
38154 | + if (!atomic_read(&dpcm->running)) | |
38155 | + return HRTIMER_NORESTART; | |
38156 | + | |
38157 | hrtimer_forward_now(timer, dpcm->period_time); | |
38158 | return HRTIMER_RESTART; | |
38159 | } | |
38160 | @@ -404,7 +403,7 @@ | |
38161 | struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; | |
38162 | ||
38163 | dpcm->base_time = hrtimer_cb_get_time(&dpcm->timer); | |
38164 | - hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL); | |
38165 | + hrtimer_start(&dpcm->timer, dpcm->period_time, HRTIMER_MODE_REL_SOFT); | |
38166 | atomic_set(&dpcm->running, 1); | |
38167 | return 0; | |
38168 | } | |
38169 | @@ -414,14 +413,14 @@ | |
38170 | struct dummy_hrtimer_pcm *dpcm = substream->runtime->private_data; | |
38171 | ||
38172 | atomic_set(&dpcm->running, 0); | |
38173 | - hrtimer_cancel(&dpcm->timer); | |
38174 | + if (!hrtimer_callback_running(&dpcm->timer)) | |
38175 | + hrtimer_cancel(&dpcm->timer); | |
38176 | return 0; | |
38177 | } | |
38178 | ||
38179 | static inline void dummy_hrtimer_sync(struct dummy_hrtimer_pcm *dpcm) | |
38180 | { | |
38181 | hrtimer_cancel(&dpcm->timer); | |
38182 | - tasklet_kill(&dpcm->tasklet); | |
38183 | } | |
38184 | ||
38185 | static snd_pcm_uframes_t | |
38186 | @@ -466,12 +465,10 @@ | |
38187 | if (!dpcm) | |
38188 | return -ENOMEM; | |
38189 | substream->runtime->private_data = dpcm; | |
38190 | - hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
38191 | + hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); | |
38192 | dpcm->timer.function = dummy_hrtimer_callback; | |
38193 | dpcm->substream = substream; | |
38194 | atomic_set(&dpcm->running, 0); | |
38195 | - tasklet_init(&dpcm->tasklet, dummy_hrtimer_pcm_elapsed, | |
38196 | - (unsigned long)dpcm); | |
38197 | return 0; | |
38198 | } | |
38199 | ||
38200 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/functions linux-4.14/tools/testing/selftests/ftrace/test.d/functions | |
38201 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/functions 2018-09-05 11:03:25.000000000 +0200 | |
38202 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/functions 2018-09-05 11:05:07.000000000 +0200 | |
38203 | @@ -70,6 +70,13 @@ | |
38204 | echo 0 > events/enable | |
38205 | } | |
38206 | ||
38207 | +clear_synthetic_events() { # reset all current synthetic events | |
38208 | + grep -v ^# synthetic_events | | |
38209 | + while read line; do | |
38210 | + echo "!$line" >> synthetic_events | |
38211 | + done | |
38212 | +} | |
38213 | + | |
38214 | initialize_ftrace() { # Reset ftrace to initial-state | |
38215 | # As the initial state, ftrace will be set to nop tracer, | |
38216 | # no events, no triggers, no filters, no function filters, | |
38217 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc | |
38218 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc 1970-01-01 01:00:00.000000000 +0100 | |
38219 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc 2018-09-05 11:05:07.000000000 +0200 | |
38220 | @@ -0,0 +1,39 @@ | |
38221 | +#!/bin/sh | |
38222 | +# description: event trigger - test extended error support | |
38223 | + | |
38224 | + | |
38225 | +do_reset() { | |
38226 | + reset_trigger | |
38227 | + echo > set_event | |
38228 | + clear_trace | |
38229 | +} | |
38230 | + | |
38231 | +fail() { #msg | |
38232 | + do_reset | |
38233 | + echo $1 | |
38234 | + exit_fail | |
38235 | +} | |
38236 | + | |
38237 | +if [ ! -f set_event ]; then | |
38238 | + echo "event tracing is not supported" | |
38239 | + exit_unsupported | |
38240 | +fi | |
38241 | + | |
38242 | +if [ ! -f synthetic_events ]; then | |
38243 | + echo "synthetic event is not supported" | |
38244 | + exit_unsupported | |
38245 | +fi | |
38246 | + | |
38247 | +reset_tracer | |
38248 | +do_reset | |
38249 | + | |
38250 | +echo "Test extended error support" | |
38251 | +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger | |
38252 | +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger &>/dev/null | |
38253 | +if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then | |
38254 | + fail "Failed to generate extended error in histogram" | |
38255 | +fi | |
38256 | + | |
38257 | +do_reset | |
38258 | + | |
38259 | +exit 0 | |
38260 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc | |
38261 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc 1970-01-01 01:00:00.000000000 +0100 | |
38262 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc 2018-09-05 11:05:07.000000000 +0200 | |
38263 | @@ -0,0 +1,54 @@ | |
38264 | +#!/bin/sh | |
38265 | +# description: event trigger - test field variable support | |
38266 | + | |
38267 | +do_reset() { | |
38268 | + reset_trigger | |
38269 | + echo > set_event | |
38270 | + clear_trace | |
38271 | +} | |
38272 | + | |
38273 | +fail() { #msg | |
38274 | + do_reset | |
38275 | + echo $1 | |
38276 | + exit_fail | |
38277 | +} | |
38278 | + | |
38279 | +if [ ! -f set_event ]; then | |
38280 | + echo "event tracing is not supported" | |
38281 | + exit_unsupported | |
38282 | +fi | |
38283 | + | |
38284 | +if [ ! -f synthetic_events ]; then | |
38285 | + echo "synthetic event is not supported" | |
38286 | + exit_unsupported | |
38287 | +fi | |
38288 | + | |
38289 | +clear_synthetic_events | |
38290 | +reset_tracer | |
38291 | +do_reset | |
38292 | + | |
38293 | +echo "Test field variable support" | |
38294 | + | |
38295 | +echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events | |
38296 | +echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger | |
38297 | +echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger | |
38298 | +echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger | |
38299 | + | |
38300 | +ping localhost -c 3 | |
38301 | +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then | |
38302 | + fail "Failed to create inter-event histogram" | |
38303 | +fi | |
38304 | + | |
38305 | +if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then | |
38306 | + fail "Failed to create histogram with field variable" | |
38307 | +fi | |
38308 | + | |
38309 | +echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger | |
38310 | + | |
38311 | +if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then | |
38312 | + fail "Failed to remove histogram with field variable" | |
38313 | +fi | |
38314 | + | |
38315 | +do_reset | |
38316 | + | |
38317 | +exit 0 | |
38318 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc | |
38319 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc 1970-01-01 01:00:00.000000000 +0100 | |
38320 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc 2018-09-05 11:05:07.000000000 +0200 | |
38321 | @@ -0,0 +1,58 @@ | |
38322 | +#!/bin/sh | |
38323 | +# description: event trigger - test inter-event combined histogram trigger | |
38324 | + | |
38325 | +do_reset() { | |
38326 | + reset_trigger | |
38327 | + echo > set_event | |
38328 | + clear_trace | |
38329 | +} | |
38330 | + | |
38331 | +fail() { #msg | |
38332 | + do_reset | |
38333 | + echo $1 | |
38334 | + exit_fail | |
38335 | +} | |
38336 | + | |
38337 | +if [ ! -f set_event ]; then | |
38338 | + echo "event tracing is not supported" | |
38339 | + exit_unsupported | |
38340 | +fi | |
38341 | + | |
38342 | +if [ ! -f synthetic_events ]; then | |
38343 | + echo "synthetic event is not supported" | |
38344 | + exit_unsupported | |
38345 | +fi | |
38346 | + | |
38347 | +reset_tracer | |
38348 | +do_reset | |
38349 | +clear_synthetic_events | |
38350 | + | |
38351 | +echo "Test create synthetic event" | |
38352 | + | |
38353 | +echo 'waking_latency u64 lat pid_t pid' > synthetic_events | |
38354 | +if [ ! -d events/synthetic/waking_latency ]; then | |
38355 | + fail "Failed to create waking_latency synthetic event" | |
38356 | +fi | |
38357 | + | |
38358 | +echo "Test combined histogram" | |
38359 | + | |
38360 | +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger | |
38361 | +echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger | |
38362 | +echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger | |
38363 | + | |
38364 | +echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events | |
38365 | +echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger | |
38366 | +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger | |
38367 | + | |
38368 | +echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events | |
38369 | +echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger | |
38370 | +echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger | |
38371 | + | |
38372 | +ping localhost -c 3 | |
38373 | +if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then | |
38374 | + fail "Failed to create combined histogram" | |
38375 | +fi | |
38376 | + | |
38377 | +do_reset | |
38378 | + | |
38379 | +exit 0 | |
38380 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc | |
38381 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc 1970-01-01 01:00:00.000000000 +0100 | |
38382 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc 2018-09-05 11:05:07.000000000 +0200 | |
38383 | @@ -0,0 +1,50 @@ | |
38384 | +#!/bin/sh | |
38385 | +# description: event trigger - test inter-event histogram trigger onmatch action | |
38386 | + | |
38387 | +do_reset() { | |
38388 | + reset_trigger | |
38389 | + echo > set_event | |
38390 | + clear_trace | |
38391 | +} | |
38392 | + | |
38393 | +fail() { #msg | |
38394 | + do_reset | |
38395 | + echo $1 | |
38396 | + exit_fail | |
38397 | +} | |
38398 | + | |
38399 | +if [ ! -f set_event ]; then | |
38400 | + echo "event tracing is not supported" | |
38401 | + exit_unsupported | |
38402 | +fi | |
38403 | + | |
38404 | +if [ ! -f synthetic_events ]; then | |
38405 | + echo "synthetic event is not supported" | |
38406 | + exit_unsupported | |
38407 | +fi | |
38408 | + | |
38409 | +clear_synthetic_events | |
38410 | +reset_tracer | |
38411 | +do_reset | |
38412 | + | |
38413 | +echo "Test create synthetic event" | |
38414 | + | |
38415 | +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | |
38416 | +if [ ! -d events/synthetic/wakeup_latency ]; then | |
38417 | + fail "Failed to create wakeup_latency synthetic event" | |
38418 | +fi | |
38419 | + | |
38420 | +echo "Test create histogram for synthetic event" | |
38421 | +echo "Test histogram variables,simple expression support and onmatch action" | |
38422 | + | |
38423 | +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger | |
38424 | +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger | |
38425 | +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger | |
38426 | +ping localhost -c 5 | |
38427 | +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then | |
38428 | + fail "Failed to create onmatch action inter-event histogram" | |
38429 | +fi | |
38430 | + | |
38431 | +do_reset | |
38432 | + | |
38433 | +exit 0 | |
38434 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc | |
38435 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc 1970-01-01 01:00:00.000000000 +0100 | |
38436 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc 2018-09-05 11:05:07.000000000 +0200 | |
38437 | @@ -0,0 +1,50 @@ | |
38438 | +#!/bin/sh | |
38439 | +# description: event trigger - test inter-event histogram trigger onmatch-onmax action | |
38440 | + | |
38441 | +do_reset() { | |
38442 | + reset_trigger | |
38443 | + echo > set_event | |
38444 | + clear_trace | |
38445 | +} | |
38446 | + | |
38447 | +fail() { #msg | |
38448 | + do_reset | |
38449 | + echo $1 | |
38450 | + exit_fail | |
38451 | +} | |
38452 | + | |
38453 | +if [ ! -f set_event ]; then | |
38454 | + echo "event tracing is not supported" | |
38455 | + exit_unsupported | |
38456 | +fi | |
38457 | + | |
38458 | +if [ ! -f synthetic_events ]; then | |
38459 | + echo "synthetic event is not supported" | |
38460 | + exit_unsupported | |
38461 | +fi | |
38462 | + | |
38463 | +clear_synthetic_events | |
38464 | +reset_tracer | |
38465 | +do_reset | |
38466 | + | |
38467 | +echo "Test create synthetic event" | |
38468 | + | |
38469 | +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | |
38470 | +if [ ! -d events/synthetic/wakeup_latency ]; then | |
38471 | + fail "Failed to create wakeup_latency synthetic event" | |
38472 | +fi | |
38473 | + | |
38474 | +echo "Test create histogram for synthetic event" | |
38475 | +echo "Test histogram variables,simple expression support and onmatch-onmax action" | |
38476 | + | |
38477 | +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger | |
38478 | +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger | |
38479 | +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger | |
38480 | +ping localhost -c 5 | |
38481 | +if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then | |
38482 | + fail "Failed to create onmatch-onmax action inter-event histogram" | |
38483 | +fi | |
38484 | + | |
38485 | +do_reset | |
38486 | + | |
38487 | +exit 0 | |
38488 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc | |
38489 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc 1970-01-01 01:00:00.000000000 +0100 | |
38490 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc 2018-09-05 11:05:07.000000000 +0200 | |
38491 | @@ -0,0 +1,48 @@ | |
38492 | +#!/bin/sh | |
38493 | +# description: event trigger - test inter-event histogram trigger onmax action | |
38494 | + | |
38495 | +do_reset() { | |
38496 | + reset_trigger | |
38497 | + echo > set_event | |
38498 | + clear_trace | |
38499 | +} | |
38500 | + | |
38501 | +fail() { #msg | |
38502 | + do_reset | |
38503 | + echo $1 | |
38504 | + exit_fail | |
38505 | +} | |
38506 | + | |
38507 | +if [ ! -f set_event ]; then | |
38508 | + echo "event tracing is not supported" | |
38509 | + exit_unsupported | |
38510 | +fi | |
38511 | + | |
38512 | +if [ ! -f synthetic_events ]; then | |
38513 | + echo "synthetic event is not supported" | |
38514 | + exit_unsupported | |
38515 | +fi | |
38516 | + | |
38517 | +clear_synthetic_events | |
38518 | +reset_tracer | |
38519 | +do_reset | |
38520 | + | |
38521 | +echo "Test create synthetic event" | |
38522 | + | |
38523 | +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | |
38524 | +if [ ! -d events/synthetic/wakeup_latency ]; then | |
38525 | + fail "Failed to create wakeup_latency synthetic event" | |
38526 | +fi | |
38527 | + | |
38528 | +echo "Test onmax action" | |
38529 | + | |
38530 | +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger | |
38531 | +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger | |
38532 | +ping localhost -c 3 | |
38533 | +if ! grep -q "max:" events/sched/sched_switch/hist; then | |
38534 | + fail "Failed to create onmax action inter-event histogram" | |
38535 | +fi | |
38536 | + | |
38537 | +do_reset | |
38538 | + | |
38539 | +exit 0 | |
38540 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc | |
38541 | --- linux-4.14.orig/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc 1970-01-01 01:00:00.000000000 +0100 | |
38542 | +++ linux-4.14/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc 2018-09-05 11:05:07.000000000 +0200 | |
38543 | @@ -0,0 +1,54 @@ | |
38544 | +#!/bin/sh | |
38545 | +# description: event trigger - test synthetic event create remove | |
38546 | +do_reset() { | |
38547 | + reset_trigger | |
38548 | + echo > set_event | |
38549 | + clear_trace | |
38550 | +} | |
38551 | + | |
38552 | +fail() { #msg | |
38553 | + do_reset | |
38554 | + echo $1 | |
38555 | + exit_fail | |
38556 | +} | |
38557 | + | |
38558 | +if [ ! -f set_event ]; then | |
38559 | + echo "event tracing is not supported" | |
38560 | + exit_unsupported | |
38561 | +fi | |
38562 | + | |
38563 | +if [ ! -f synthetic_events ]; then | |
38564 | + echo "synthetic event is not supported" | |
38565 | + exit_unsupported | |
38566 | +fi | |
38567 | + | |
38568 | +clear_synthetic_events | |
38569 | +reset_tracer | |
38570 | +do_reset | |
38571 | + | |
38572 | +echo "Test create synthetic event" | |
38573 | + | |
38574 | +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | |
38575 | +if [ ! -d events/synthetic/wakeup_latency ]; then | |
38576 | + fail "Failed to create wakeup_latency synthetic event" | |
38577 | +fi | |
38578 | + | |
38579 | +reset_trigger | |
38580 | + | |
38581 | +echo "Test create synthetic event with an error" | |
38582 | +echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null | |
38583 | +if [ -d events/synthetic/wakeup_latency ]; then | |
38584 | + fail "Created wakeup_latency synthetic event with an invalid format" | |
38585 | +fi | |
38586 | + | |
38587 | +reset_trigger | |
38588 | + | |
38589 | +echo "Test remove synthetic event" | |
38590 | +echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events | |
38591 | +if [ -d events/synthetic/wakeup_latency ]; then | |
38592 | + fail "Failed to delete wakeup_latency synthetic event" | |
38593 | +fi | |
38594 | + | |
38595 | +do_reset | |
38596 | + | |
38597 | +exit 0 | |
38598 | diff -durN -x '*~' -x '*.orig' linux-4.14.orig/virt/kvm/arm/arm.c linux-4.14/virt/kvm/arm/arm.c | |
38599 | --- linux-4.14.orig/virt/kvm/arm/arm.c 2018-09-05 11:03:25.000000000 +0200 | |
38600 | +++ linux-4.14/virt/kvm/arm/arm.c 2018-09-05 11:05:07.000000000 +0200 | |
38601 | @@ -69,7 +69,6 @@ | |
38602 | ||
38603 | static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) | |
38604 | { | |
38605 | - BUG_ON(preemptible()); | |
38606 | __this_cpu_write(kvm_arm_running_vcpu, vcpu); | |
38607 | } | |
38608 | ||
38609 | @@ -79,7 +78,6 @@ | |
38610 | */ | |
38611 | struct kvm_vcpu *kvm_arm_get_running_vcpu(void) | |
38612 | { | |
38613 | - BUG_ON(preemptible()); | |
38614 | return __this_cpu_read(kvm_arm_running_vcpu); | |
38615 | } | |
38616 | ||
38617 | @@ -653,7 +651,7 @@ | |
38618 | * involves poking the GIC, which must be done in a | |
38619 | * non-preemptible context. | |
38620 | */ | |
38621 | - preempt_disable(); | |
38622 | + migrate_disable(); | |
38623 | ||
38624 | kvm_pmu_flush_hwstate(vcpu); | |
38625 | ||
38626 | @@ -690,7 +688,7 @@ | |
38627 | kvm_pmu_sync_hwstate(vcpu); | |
38628 | kvm_timer_sync_hwstate(vcpu); | |
38629 | kvm_vgic_sync_hwstate(vcpu); | |
38630 | - preempt_enable(); | |
38631 | + migrate_enable(); | |
38632 | continue; | |
38633 | } | |
38634 | ||
38635 | @@ -745,7 +743,7 @@ | |
38636 | ||
38637 | kvm_vgic_sync_hwstate(vcpu); | |
38638 | ||
38639 | - preempt_enable(); | |
38640 | + migrate_enable(); | |
38641 | ||
38642 | ret = handle_exit(vcpu, run, ret); | |
38643 | } |