]>
Commit | Line | Data |
---|---|---|
1a6e0f06 JK |
1 | diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt |
2 | index 3a3b30ac2a75..9e0745cafbd8 100644 | |
3 | --- a/Documentation/sysrq.txt | |
4 | +++ b/Documentation/sysrq.txt | |
5 | @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>, | |
6 | On other - If you know of the key combos for other architectures, please | |
7 | let me know so I can add them to this section. | |
8 | ||
9 | -On all - write a character to /proc/sysrq-trigger. e.g.: | |
10 | - | |
11 | +On all - write a character to /proc/sysrq-trigger, e.g.: | |
12 | echo t > /proc/sysrq-trigger | |
13 | ||
14 | +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g. | |
15 | + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq | |
16 | + Send an ICMP echo request with this pattern plus the particular | |
17 | + SysRq command key. Example: | |
18 | + # ping -c1 -s57 -p0102030468 | |
19 | + will trigger the SysRq-H (help) command. | |
20 | + | |
21 | + | |
22 | * What are the 'command' keys? | |
23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
24 | 'b' - Will immediately reboot the system without syncing or unmounting | |
25 | diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt | |
26 | new file mode 100644 | |
27 | index 000000000000..6f2aeabf7faa | |
28 | --- /dev/null | |
29 | +++ b/Documentation/trace/histograms.txt | |
30 | @@ -0,0 +1,186 @@ | |
31 | + Using the Linux Kernel Latency Histograms | |
32 | + | |
33 | + | |
34 | +This document gives a short explanation how to enable, configure and use | |
35 | +latency histograms. Latency histograms are primarily relevant in the | |
36 | +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT) | |
37 | +and are used in the quality management of the Linux real-time | |
38 | +capabilities. | |
39 | + | |
40 | + | |
41 | +* Purpose of latency histograms | |
42 | + | |
43 | +A latency histogram continuously accumulates the frequencies of latency | |
44 | +data. There are two types of histograms | |
45 | +- potential sources of latencies | |
46 | +- effective latencies | |
47 | + | |
48 | + | |
49 | +* Potential sources of latencies | |
50 | + | |
51 | +Potential sources of latencies are code segments where interrupts, | |
52 | +preemption or both are disabled (aka critical sections). To create | |
53 | +histograms of potential sources of latency, the kernel stores the time | |
54 | +stamp at the start of a critical section, determines the time elapsed | |
55 | +when the end of the section is reached, and increments the frequency | |
56 | +counter of that latency value - irrespective of whether any concurrently | |
57 | +running process is affected by latency or not. | |
58 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
59 | + CONFIG_INTERRUPT_OFF_LATENCY | |
60 | + CONFIG_PREEMPT_OFF_LATENCY | |
61 | + | |
62 | + | |
63 | +* Effective latencies | |
64 | + | |
65 | +Effective latencies are actually occuring during wakeup of a process. To | |
66 | +determine effective latencies, the kernel stores the time stamp when a | |
67 | +process is scheduled to be woken up, and determines the duration of the | |
68 | +wakeup time shortly before control is passed over to this process. Note | |
69 | +that the apparent latency in user space may be somewhat longer, since the | |
70 | +process may be interrupted after control is passed over to it but before | |
71 | +the execution in user space takes place. Simply measuring the interval | |
72 | +between enqueuing and wakeup may also not appropriate in cases when a | |
73 | +process is scheduled as a result of a timer expiration. The timer may have | |
74 | +missed its deadline, e.g. due to disabled interrupts, but this latency | |
75 | +would not be registered. Therefore, the offsets of missed timers are | |
76 | +recorded in a separate histogram. If both wakeup latency and missed timer | |
77 | +offsets are configured and enabled, a third histogram may be enabled that | |
78 | +records the overall latency as a sum of the timer latency, if any, and the | |
79 | +wakeup latency. This histogram is called "timerandwakeup". | |
80 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
81 | + CONFIG_WAKEUP_LATENCY | |
82 | + CONFIG_MISSED_TIMER_OFSETS | |
83 | + | |
84 | + | |
85 | +* Usage | |
86 | + | |
87 | +The interface to the administration of the latency histograms is located | |
88 | +in the debugfs file system. To mount it, either enter | |
89 | + | |
90 | +mount -t sysfs nodev /sys | |
91 | +mount -t debugfs nodev /sys/kernel/debug | |
92 | + | |
93 | +from shell command line level, or add | |
94 | + | |
95 | +nodev /sys sysfs defaults 0 0 | |
96 | +nodev /sys/kernel/debug debugfs defaults 0 0 | |
97 | + | |
98 | +to the file /etc/fstab. All latency histogram related files are then | |
99 | +available in the directory /sys/kernel/debug/tracing/latency_hist. A | |
100 | +particular histogram type is enabled by writing non-zero to the related | |
101 | +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. | |
102 | +Select "preemptirqsoff" for the histograms of potential sources of | |
103 | +latencies and "wakeup" for histograms of effective latencies etc. The | |
104 | +histogram data - one per CPU - are available in the files | |
105 | + | |
106 | +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx | |
107 | +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx | |
108 | +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx | |
109 | +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx | |
110 | +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx | |
111 | +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx | |
112 | +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx | |
113 | + | |
114 | +The histograms are reset by writing non-zero to the file "reset" in a | |
115 | +particular latency directory. To reset all latency data, use | |
116 | + | |
117 | +#!/bin/sh | |
118 | + | |
119 | +TRACINGDIR=/sys/kernel/debug/tracing | |
120 | +HISTDIR=$TRACINGDIR/latency_hist | |
121 | + | |
122 | +if test -d $HISTDIR | |
123 | +then | |
124 | + cd $HISTDIR | |
125 | + for i in `find . | grep /reset$` | |
126 | + do | |
127 | + echo 1 >$i | |
128 | + done | |
129 | +fi | |
130 | + | |
131 | + | |
132 | +* Data format | |
133 | + | |
134 | +Latency data are stored with a resolution of one microsecond. The | |
135 | +maximum latency is 10,240 microseconds. The data are only valid, if the | |
136 | +overflow register is empty. Every output line contains the latency in | |
137 | +microseconds in the first row and the number of samples in the second | |
138 | +row. To display only lines with a positive latency count, use, for | |
139 | +example, | |
140 | + | |
141 | +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 | |
142 | + | |
143 | +#Minimum latency: 0 microseconds. | |
144 | +#Average latency: 0 microseconds. | |
145 | +#Maximum latency: 25 microseconds. | |
146 | +#Total samples: 3104770694 | |
147 | +#There are 0 samples greater or equal than 10240 microseconds | |
148 | +#usecs samples | |
149 | + 0 2984486876 | |
150 | + 1 49843506 | |
151 | + 2 58219047 | |
152 | + 3 5348126 | |
153 | + 4 2187960 | |
154 | + 5 3388262 | |
155 | + 6 959289 | |
156 | + 7 208294 | |
157 | + 8 40420 | |
158 | + 9 4485 | |
159 | + 10 14918 | |
160 | + 11 18340 | |
161 | + 12 25052 | |
162 | + 13 19455 | |
163 | + 14 5602 | |
164 | + 15 969 | |
165 | + 16 47 | |
166 | + 17 18 | |
167 | + 18 14 | |
168 | + 19 1 | |
169 | + 20 3 | |
170 | + 21 2 | |
171 | + 22 5 | |
172 | + 23 2 | |
173 | + 25 1 | |
174 | + | |
175 | + | |
176 | +* Wakeup latency of a selected process | |
177 | + | |
178 | +To only collect wakeup latency data of a particular process, write the | |
179 | +PID of the requested process to | |
180 | + | |
181 | +/sys/kernel/debug/tracing/latency_hist/wakeup/pid | |
182 | + | |
183 | +PIDs are not considered, if this variable is set to 0. | |
184 | + | |
185 | + | |
186 | +* Details of the process with the highest wakeup latency so far | |
187 | + | |
188 | +Selected data of the process that suffered from the highest wakeup | |
189 | +latency that occurred in a particular CPU are available in the file | |
190 | + | |
191 | +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx. | |
192 | + | |
193 | +In addition, other relevant system data at the time when the | |
194 | +latency occurred are given. | |
195 | + | |
196 | +The format of the data is (all in one line): | |
197 | +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \ | |
198 | +<- <PID> <Priority> <Command> <Timestamp> | |
199 | + | |
200 | +The value of <Timeroffset> is only relevant in the combined timer | |
201 | +and wakeup latency recording. In the wakeup recording, it is | |
202 | +always 0, in the missed_timer_offsets recording, it is the same | |
203 | +as <Latency>. | |
204 | + | |
205 | +When retrospectively searching for the origin of a latency and | |
206 | +tracing was not enabled, it may be helpful to know the name and | |
207 | +some basic data of the task that (finally) was switching to the | |
208 | +late real-tlme task. In addition to the victim's data, also the | |
209 | +data of the possible culprit are therefore displayed after the | |
210 | +"<-" symbol. | |
211 | + | |
212 | +Finally, the timestamp of the time when the latency occurred | |
213 | +in <seconds>.<microseconds> after the most recent system boot | |
214 | +is provided. | |
215 | + | |
216 | +These data are also reset when the wakeup histogram is reset. | |
1a6e0f06 | 217 | diff --git a/arch/Kconfig b/arch/Kconfig |
c7c16703 | 218 | index 659bdd079277..099fc0f5155e 100644 |
1a6e0f06 JK |
219 | --- a/arch/Kconfig |
220 | +++ b/arch/Kconfig | |
221 | @@ -9,6 +9,7 @@ config OPROFILE | |
222 | tristate "OProfile system profiling" | |
223 | depends on PROFILING | |
224 | depends on HAVE_OPROFILE | |
225 | + depends on !PREEMPT_RT_FULL | |
226 | select RING_BUFFER | |
227 | select RING_BUFFER_ALLOW_SWAP | |
228 | help | |
229 | @@ -52,6 +53,7 @@ config KPROBES | |
230 | config JUMP_LABEL | |
231 | bool "Optimize very unlikely/likely branches" | |
232 | depends on HAVE_ARCH_JUMP_LABEL | |
233 | + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) | |
234 | help | |
235 | This option enables a transparent branch optimization that | |
236 | makes certain almost-always-true or almost-always-false branch | |
237 | diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig | |
c7c16703 | 238 | index b5d529fdffab..5715844e83e3 100644 |
1a6e0f06 JK |
239 | --- a/arch/arm/Kconfig |
240 | +++ b/arch/arm/Kconfig | |
241 | @@ -36,7 +36,7 @@ config ARM | |
242 | select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) | |
243 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 | |
244 | select HAVE_ARCH_HARDENED_USERCOPY | |
245 | - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU | |
246 | + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE | |
247 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU | |
248 | select HAVE_ARCH_MMAP_RND_BITS if MMU | |
249 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) | |
250 | @@ -75,6 +75,7 @@ config ARM | |
251 | select HAVE_PERF_EVENTS | |
252 | select HAVE_PERF_REGS | |
253 | select HAVE_PERF_USER_STACK_DUMP | |
254 | + select HAVE_PREEMPT_LAZY | |
255 | select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) | |
256 | select HAVE_REGS_AND_STACK_ACCESS_API | |
257 | select HAVE_SYSCALL_TRACEPOINTS | |
c7c16703 JK |
258 | diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h |
259 | index e53638c8ed8a..6095a1649865 100644 | |
260 | --- a/arch/arm/include/asm/irq.h | |
261 | +++ b/arch/arm/include/asm/irq.h | |
262 | @@ -22,6 +22,8 @@ | |
263 | #endif | |
264 | ||
265 | #ifndef __ASSEMBLY__ | |
266 | +#include <linux/cpumask.h> | |
267 | + | |
268 | struct irqaction; | |
269 | struct pt_regs; | |
270 | extern void migrate_irqs(void); | |
1a6e0f06 JK |
271 | diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h |
272 | index 12ebfcc1d539..c962084605bc 100644 | |
273 | --- a/arch/arm/include/asm/switch_to.h | |
274 | +++ b/arch/arm/include/asm/switch_to.h | |
275 | @@ -3,6 +3,13 @@ | |
276 | ||
277 | #include <linux/thread_info.h> | |
278 | ||
279 | +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM | |
280 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); | |
281 | +#else | |
282 | +static inline void | |
283 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
284 | +#endif | |
285 | + | |
286 | /* | |
287 | * For v7 SMP cores running a preemptible kernel we may be pre-empted | |
288 | * during a TLB maintenance operation, so execute an inner-shareable dsb | |
289 | @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info | |
290 | #define switch_to(prev,next,last) \ | |
291 | do { \ | |
292 | __complete_pending_tlbi(); \ | |
293 | + switch_kmaps(prev, next); \ | |
294 | last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ | |
295 | } while (0) | |
296 | ||
297 | diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h | |
298 | index 776757d1604a..1f36a4eccc72 100644 | |
299 | --- a/arch/arm/include/asm/thread_info.h | |
300 | +++ b/arch/arm/include/asm/thread_info.h | |
301 | @@ -49,6 +49,7 @@ struct cpu_context_save { | |
302 | struct thread_info { | |
303 | unsigned long flags; /* low level flags */ | |
304 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
305 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
306 | mm_segment_t addr_limit; /* address limit */ | |
307 | struct task_struct *task; /* main task structure */ | |
308 | __u32 cpu; /* cpu */ | |
309 | @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
310 | #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ | |
311 | #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ | |
312 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ | |
313 | -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ | |
314 | +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ | |
315 | +#define TIF_NEED_RESCHED_LAZY 7 | |
316 | ||
317 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ | |
318 | #define TIF_USING_IWMMXT 17 | |
319 | @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
320 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | |
321 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
322 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
323 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
324 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
325 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
326 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
327 | @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
328 | * Change these and you break ASM code in entry-common.S | |
329 | */ | |
330 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
331 | - _TIF_NOTIFY_RESUME | _TIF_UPROBE) | |
332 | + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
333 | + _TIF_NEED_RESCHED_LAZY) | |
334 | ||
335 | #endif /* __KERNEL__ */ | |
336 | #endif /* __ASM_ARM_THREAD_INFO_H */ | |
337 | diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c | |
338 | index 608008229c7d..3866da3f7bb7 100644 | |
339 | --- a/arch/arm/kernel/asm-offsets.c | |
340 | +++ b/arch/arm/kernel/asm-offsets.c | |
341 | @@ -65,6 +65,7 @@ int main(void) | |
342 | BLANK(); | |
343 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
344 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
345 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
346 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
347 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
348 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
349 | diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S | |
350 | index 9f157e7c51e7..468e224d76aa 100644 | |
351 | --- a/arch/arm/kernel/entry-armv.S | |
352 | +++ b/arch/arm/kernel/entry-armv.S | |
353 | @@ -220,11 +220,18 @@ ENDPROC(__dabt_svc) | |
354 | ||
355 | #ifdef CONFIG_PREEMPT | |
356 | ldr r8, [tsk, #TI_PREEMPT] @ get preempt count | |
357 | - ldr r0, [tsk, #TI_FLAGS] @ get flags | |
358 | teq r8, #0 @ if preempt count != 0 | |
359 | + bne 1f @ return from exeption | |
360 | + ldr r0, [tsk, #TI_FLAGS] @ get flags | |
361 | + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set | |
362 | + blne svc_preempt @ preempt! | |
363 | + | |
364 | + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
365 | + teq r8, #0 @ if preempt lazy count != 0 | |
366 | movne r0, #0 @ force flags to 0 | |
367 | - tst r0, #_TIF_NEED_RESCHED | |
368 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
369 | blne svc_preempt | |
370 | +1: | |
371 | #endif | |
372 | ||
373 | svc_exit r5, irq = 1 @ return from exception | |
374 | @@ -239,8 +246,14 @@ ENDPROC(__irq_svc) | |
375 | 1: bl preempt_schedule_irq @ irq en/disable is done inside | |
376 | ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS | |
377 | tst r0, #_TIF_NEED_RESCHED | |
378 | + bne 1b | |
379 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
380 | reteq r8 @ go again | |
381 | - b 1b | |
382 | + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
383 | + teq r0, #0 @ if preempt lazy count != 0 | |
384 | + beq 1b | |
385 | + ret r8 @ go again | |
386 | + | |
387 | #endif | |
388 | ||
389 | __und_fault: | |
390 | diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S | |
391 | index 10c3283d6c19..8872937862cc 100644 | |
392 | --- a/arch/arm/kernel/entry-common.S | |
393 | +++ b/arch/arm/kernel/entry-common.S | |
394 | @@ -36,7 +36,9 @@ | |
395 | UNWIND(.cantunwind ) | |
396 | disable_irq_notrace @ disable interrupts | |
397 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
398 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
399 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
400 | + bne fast_work_pending | |
401 | + tst r1, #_TIF_SECCOMP | |
402 | bne fast_work_pending | |
403 | ||
404 | /* perform architecture specific actions before user return */ | |
405 | @@ -62,8 +64,11 @@ ENDPROC(ret_fast_syscall) | |
406 | str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 | |
407 | disable_irq_notrace @ disable interrupts | |
408 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
409 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
410 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
411 | + bne do_slower_path | |
412 | + tst r1, #_TIF_SECCOMP | |
413 | beq no_work_pending | |
414 | +do_slower_path: | |
415 | UNWIND(.fnend ) | |
416 | ENDPROC(ret_fast_syscall) | |
417 | ||
c7c16703 JK |
418 | diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c |
419 | index 69bda1a5707e..1f665acaa6a9 100644 | |
420 | --- a/arch/arm/kernel/patch.c | |
421 | +++ b/arch/arm/kernel/patch.c | |
422 | @@ -15,7 +15,7 @@ struct patch { | |
423 | unsigned int insn; | |
424 | }; | |
425 | ||
426 | -static DEFINE_SPINLOCK(patch_lock); | |
427 | +static DEFINE_RAW_SPINLOCK(patch_lock); | |
428 | ||
429 | static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) | |
430 | __acquires(&patch_lock) | |
431 | @@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) | |
432 | return addr; | |
433 | ||
434 | if (flags) | |
435 | - spin_lock_irqsave(&patch_lock, *flags); | |
436 | + raw_spin_lock_irqsave(&patch_lock, *flags); | |
437 | else | |
438 | __acquire(&patch_lock); | |
439 | ||
440 | @@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) | |
441 | clear_fixmap(fixmap); | |
442 | ||
443 | if (flags) | |
444 | - spin_unlock_irqrestore(&patch_lock, *flags); | |
445 | + raw_spin_unlock_irqrestore(&patch_lock, *flags); | |
446 | else | |
447 | __release(&patch_lock); | |
448 | } | |
1a6e0f06 | 449 | diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c |
c7c16703 | 450 | index 91d2d5b01414..750550098b59 100644 |
1a6e0f06 JK |
451 | --- a/arch/arm/kernel/process.c |
452 | +++ b/arch/arm/kernel/process.c | |
c7c16703 | 453 | @@ -322,6 +322,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) |
1a6e0f06 JK |
454 | } |
455 | ||
456 | #ifdef CONFIG_MMU | |
457 | +/* | |
458 | + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not | |
459 | + * initialized by pgtable_page_ctor() then a coredump of the vector page will | |
460 | + * fail. | |
461 | + */ | |
462 | +static int __init vectors_user_mapping_init_page(void) | |
463 | +{ | |
464 | + struct page *page; | |
465 | + unsigned long addr = 0xffff0000; | |
466 | + pgd_t *pgd; | |
467 | + pud_t *pud; | |
468 | + pmd_t *pmd; | |
469 | + | |
470 | + pgd = pgd_offset_k(addr); | |
471 | + pud = pud_offset(pgd, addr); | |
472 | + pmd = pmd_offset(pud, addr); | |
473 | + page = pmd_page(*(pmd)); | |
474 | + | |
475 | + pgtable_page_ctor(page); | |
476 | + | |
477 | + return 0; | |
478 | +} | |
479 | +late_initcall(vectors_user_mapping_init_page); | |
480 | + | |
481 | #ifdef CONFIG_KUSER_HELPERS | |
482 | /* | |
483 | * The vectors page is always readable from user space for the | |
484 | diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c | |
485 | index 7b8f2141427b..96541e00b74a 100644 | |
486 | --- a/arch/arm/kernel/signal.c | |
487 | +++ b/arch/arm/kernel/signal.c | |
488 | @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) | |
489 | */ | |
490 | trace_hardirqs_off(); | |
491 | do { | |
492 | - if (likely(thread_flags & _TIF_NEED_RESCHED)) { | |
493 | + if (likely(thread_flags & (_TIF_NEED_RESCHED | | |
494 | + _TIF_NEED_RESCHED_LAZY))) { | |
495 | schedule(); | |
496 | } else { | |
497 | if (unlikely(!user_mode(regs))) | |
498 | diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c | |
c7c16703 | 499 | index 7dd14e8395e6..4cd7e3d98035 100644 |
1a6e0f06 JK |
500 | --- a/arch/arm/kernel/smp.c |
501 | +++ b/arch/arm/kernel/smp.c | |
502 | @@ -234,8 +234,6 @@ int __cpu_disable(void) | |
503 | flush_cache_louis(); | |
504 | local_flush_tlb_all(); | |
505 | ||
506 | - clear_tasks_mm_cpumask(cpu); | |
507 | - | |
508 | return 0; | |
509 | } | |
510 | ||
511 | @@ -251,6 +249,9 @@ void __cpu_die(unsigned int cpu) | |
512 | pr_err("CPU%u: cpu didn't die\n", cpu); | |
513 | return; | |
514 | } | |
515 | + | |
516 | + clear_tasks_mm_cpumask(cpu); | |
517 | + | |
518 | pr_notice("CPU%u: shutdown\n", cpu); | |
519 | ||
520 | /* | |
521 | diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c | |
522 | index 0bee233fef9a..314cfb232a63 100644 | |
523 | --- a/arch/arm/kernel/unwind.c | |
524 | +++ b/arch/arm/kernel/unwind.c | |
525 | @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; | |
526 | static const struct unwind_idx *__origin_unwind_idx; | |
527 | extern const struct unwind_idx __stop_unwind_idx[]; | |
528 | ||
529 | -static DEFINE_SPINLOCK(unwind_lock); | |
530 | +static DEFINE_RAW_SPINLOCK(unwind_lock); | |
531 | static LIST_HEAD(unwind_tables); | |
532 | ||
533 | /* Convert a prel31 symbol to an absolute address */ | |
534 | @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
535 | /* module unwind tables */ | |
536 | struct unwind_table *table; | |
537 | ||
538 | - spin_lock_irqsave(&unwind_lock, flags); | |
539 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
540 | list_for_each_entry(table, &unwind_tables, list) { | |
541 | if (addr >= table->begin_addr && | |
542 | addr < table->end_addr) { | |
543 | @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
544 | break; | |
545 | } | |
546 | } | |
547 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
548 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
549 | } | |
550 | ||
551 | pr_debug("%s: idx = %p\n", __func__, idx); | |
552 | @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, | |
553 | tab->begin_addr = text_addr; | |
554 | tab->end_addr = text_addr + text_size; | |
555 | ||
556 | - spin_lock_irqsave(&unwind_lock, flags); | |
557 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
558 | list_add_tail(&tab->list, &unwind_tables); | |
559 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
560 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
561 | ||
562 | return tab; | |
563 | } | |
564 | @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) | |
565 | if (!tab) | |
566 | return; | |
567 | ||
568 | - spin_lock_irqsave(&unwind_lock, flags); | |
569 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
570 | list_del(&tab->list); | |
571 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
572 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
573 | ||
574 | kfree(tab); | |
575 | } | |
576 | diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c | |
c7c16703 | 577 | index 19b5f5c1c0ff..82aa639e6737 100644 |
1a6e0f06 JK |
578 | --- a/arch/arm/kvm/arm.c |
579 | +++ b/arch/arm/kvm/arm.c | |
c7c16703 | 580 | @@ -619,7 +619,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
1a6e0f06 JK |
581 | * involves poking the GIC, which must be done in a |
582 | * non-preemptible context. | |
583 | */ | |
584 | - preempt_disable(); | |
585 | + migrate_disable(); | |
586 | kvm_pmu_flush_hwstate(vcpu); | |
587 | kvm_timer_flush_hwstate(vcpu); | |
588 | kvm_vgic_flush_hwstate(vcpu); | |
c7c16703 | 589 | @@ -640,7 +640,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
1a6e0f06 JK |
590 | kvm_pmu_sync_hwstate(vcpu); |
591 | kvm_timer_sync_hwstate(vcpu); | |
592 | kvm_vgic_sync_hwstate(vcpu); | |
593 | - preempt_enable(); | |
594 | + migrate_enable(); | |
595 | continue; | |
596 | } | |
597 | ||
c7c16703 | 598 | @@ -696,7 +696,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
1a6e0f06 JK |
599 | |
600 | kvm_vgic_sync_hwstate(vcpu); | |
601 | ||
602 | - preempt_enable(); | |
603 | + migrate_enable(); | |
604 | ||
605 | ret = handle_exit(vcpu, run, ret); | |
606 | } | |
607 | diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c | |
608 | index 98ffe1e62ad5..df9769ddece5 100644 | |
609 | --- a/arch/arm/mach-exynos/platsmp.c | |
610 | +++ b/arch/arm/mach-exynos/platsmp.c | |
611 | @@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void) | |
612 | return (void __iomem *)(S5P_VA_SCU); | |
613 | } | |
614 | ||
615 | -static DEFINE_SPINLOCK(boot_lock); | |
616 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
617 | ||
618 | static void exynos_secondary_init(unsigned int cpu) | |
619 | { | |
620 | @@ -242,8 +242,8 @@ static void exynos_secondary_init(unsigned int cpu) | |
621 | /* | |
622 | * Synchronise with the boot thread. | |
623 | */ | |
624 | - spin_lock(&boot_lock); | |
625 | - spin_unlock(&boot_lock); | |
626 | + raw_spin_lock(&boot_lock); | |
627 | + raw_spin_unlock(&boot_lock); | |
628 | } | |
629 | ||
630 | int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) | |
631 | @@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
632 | * Set synchronisation state between this boot processor | |
633 | * and the secondary one | |
634 | */ | |
635 | - spin_lock(&boot_lock); | |
636 | + raw_spin_lock(&boot_lock); | |
637 | ||
638 | /* | |
639 | * The secondary processor is waiting to be released from | |
640 | @@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
641 | ||
642 | if (timeout == 0) { | |
643 | printk(KERN_ERR "cpu1 power enable failed"); | |
644 | - spin_unlock(&boot_lock); | |
645 | + raw_spin_unlock(&boot_lock); | |
646 | return -ETIMEDOUT; | |
647 | } | |
648 | } | |
649 | @@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
650 | * calibrations, then wait for it to finish | |
651 | */ | |
652 | fail: | |
653 | - spin_unlock(&boot_lock); | |
654 | + raw_spin_unlock(&boot_lock); | |
655 | ||
656 | return pen_release != -1 ? ret : 0; | |
657 | } | |
658 | diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c | |
659 | index 4b653a8cb75c..b03d5a922cb1 100644 | |
660 | --- a/arch/arm/mach-hisi/platmcpm.c | |
661 | +++ b/arch/arm/mach-hisi/platmcpm.c | |
662 | @@ -61,7 +61,7 @@ | |
663 | ||
664 | static void __iomem *sysctrl, *fabric; | |
665 | static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; | |
666 | -static DEFINE_SPINLOCK(boot_lock); | |
667 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
668 | static u32 fabric_phys_addr; | |
669 | /* | |
670 | * [0]: bootwrapper physical address | |
671 | @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
672 | if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) | |
673 | return -EINVAL; | |
674 | ||
675 | - spin_lock_irq(&boot_lock); | |
676 | + raw_spin_lock_irq(&boot_lock); | |
677 | ||
678 | if (hip04_cpu_table[cluster][cpu]) | |
679 | goto out; | |
680 | @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
681 | ||
682 | out: | |
683 | hip04_cpu_table[cluster][cpu]++; | |
684 | - spin_unlock_irq(&boot_lock); | |
685 | + raw_spin_unlock_irq(&boot_lock); | |
686 | ||
687 | return 0; | |
688 | } | |
689 | @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
690 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); | |
691 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | |
692 | ||
693 | - spin_lock(&boot_lock); | |
694 | + raw_spin_lock(&boot_lock); | |
695 | hip04_cpu_table[cluster][cpu]--; | |
696 | if (hip04_cpu_table[cluster][cpu] == 1) { | |
697 | /* A power_up request went ahead of us. */ | |
698 | - spin_unlock(&boot_lock); | |
699 | + raw_spin_unlock(&boot_lock); | |
700 | return; | |
701 | } else if (hip04_cpu_table[cluster][cpu] > 1) { | |
702 | pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); | |
703 | @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
704 | } | |
705 | ||
706 | last_man = hip04_cluster_is_down(cluster); | |
707 | - spin_unlock(&boot_lock); | |
708 | + raw_spin_unlock(&boot_lock); | |
709 | if (last_man) { | |
710 | /* Since it's Cortex A15, disable L2 prefetching. */ | |
711 | asm volatile( | |
712 | @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
713 | cpu >= HIP04_MAX_CPUS_PER_CLUSTER); | |
714 | ||
715 | count = TIMEOUT_MSEC / POLL_MSEC; | |
716 | - spin_lock_irq(&boot_lock); | |
717 | + raw_spin_lock_irq(&boot_lock); | |
718 | for (tries = 0; tries < count; tries++) { | |
719 | if (hip04_cpu_table[cluster][cpu]) | |
720 | goto err; | |
721 | @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
722 | data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); | |
723 | if (data & CORE_WFI_STATUS(cpu)) | |
724 | break; | |
725 | - spin_unlock_irq(&boot_lock); | |
726 | + raw_spin_unlock_irq(&boot_lock); | |
727 | /* Wait for clean L2 when the whole cluster is down. */ | |
728 | msleep(POLL_MSEC); | |
729 | - spin_lock_irq(&boot_lock); | |
730 | + raw_spin_lock_irq(&boot_lock); | |
731 | } | |
732 | if (tries >= count) | |
733 | goto err; | |
734 | @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
735 | goto err; | |
736 | if (hip04_cluster_is_down(cluster)) | |
737 | hip04_set_snoop_filter(cluster, 0); | |
738 | - spin_unlock_irq(&boot_lock); | |
739 | + raw_spin_unlock_irq(&boot_lock); | |
740 | return 1; | |
741 | err: | |
742 | - spin_unlock_irq(&boot_lock); | |
743 | + raw_spin_unlock_irq(&boot_lock); | |
744 | return 0; | |
745 | } | |
746 | #endif | |
747 | diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c | |
748 | index b4de3da6dffa..b52893319d75 100644 | |
749 | --- a/arch/arm/mach-omap2/omap-smp.c | |
750 | +++ b/arch/arm/mach-omap2/omap-smp.c | |
751 | @@ -64,7 +64,7 @@ static const struct omap_smp_config omap5_cfg __initconst = { | |
752 | .startup_addr = omap5_secondary_startup, | |
753 | }; | |
754 | ||
755 | -static DEFINE_SPINLOCK(boot_lock); | |
756 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
757 | ||
758 | void __iomem *omap4_get_scu_base(void) | |
759 | { | |
760 | @@ -131,8 +131,8 @@ static void omap4_secondary_init(unsigned int cpu) | |
761 | /* | |
762 | * Synchronise with the boot thread. | |
763 | */ | |
764 | - spin_lock(&boot_lock); | |
765 | - spin_unlock(&boot_lock); | |
766 | + raw_spin_lock(&boot_lock); | |
767 | + raw_spin_unlock(&boot_lock); | |
768 | } | |
769 | ||
770 | static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
771 | @@ -146,7 +146,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
772 | * Set synchronisation state between this boot processor | |
773 | * and the secondary one | |
774 | */ | |
775 | - spin_lock(&boot_lock); | |
776 | + raw_spin_lock(&boot_lock); | |
777 | ||
778 | /* | |
779 | * Update the AuxCoreBoot0 with boot state for secondary core. | |
780 | @@ -223,7 +223,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
781 | * Now the secondary core is starting up let it run its | |
782 | * calibrations, then wait for it to finish | |
783 | */ | |
784 | - spin_unlock(&boot_lock); | |
785 | + raw_spin_unlock(&boot_lock); | |
786 | ||
787 | return 0; | |
788 | } | |
789 | diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c | |
790 | index 0875b99add18..18b6d98d2581 100644 | |
791 | --- a/arch/arm/mach-prima2/platsmp.c | |
792 | +++ b/arch/arm/mach-prima2/platsmp.c | |
793 | @@ -22,7 +22,7 @@ | |
794 | ||
795 | static void __iomem *clk_base; | |
796 | ||
797 | -static DEFINE_SPINLOCK(boot_lock); | |
798 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
799 | ||
800 | static void sirfsoc_secondary_init(unsigned int cpu) | |
801 | { | |
802 | @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) | |
803 | /* | |
804 | * Synchronise with the boot thread. | |
805 | */ | |
806 | - spin_lock(&boot_lock); | |
807 | - spin_unlock(&boot_lock); | |
808 | + raw_spin_lock(&boot_lock); | |
809 | + raw_spin_unlock(&boot_lock); | |
810 | } | |
811 | ||
812 | static const struct of_device_id clk_ids[] = { | |
813 | @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
814 | /* make sure write buffer is drained */ | |
815 | mb(); | |
816 | ||
817 | - spin_lock(&boot_lock); | |
818 | + raw_spin_lock(&boot_lock); | |
819 | ||
820 | /* | |
821 | * The secondary processor is waiting to be released from | |
822 | @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
823 | * now the secondary core is starting up let it run its | |
824 | * calibrations, then wait for it to finish | |
825 | */ | |
826 | - spin_unlock(&boot_lock); | |
827 | + raw_spin_unlock(&boot_lock); | |
828 | ||
829 | return pen_release != -1 ? -ENOSYS : 0; | |
830 | } | |
831 | diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c | |
832 | index 5494c9e0c909..e8ce157d3548 100644 | |
833 | --- a/arch/arm/mach-qcom/platsmp.c | |
834 | +++ b/arch/arm/mach-qcom/platsmp.c | |
835 | @@ -46,7 +46,7 @@ | |
836 | ||
837 | extern void secondary_startup_arm(void); | |
838 | ||
839 | -static DEFINE_SPINLOCK(boot_lock); | |
840 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
841 | ||
842 | #ifdef CONFIG_HOTPLUG_CPU | |
843 | static void qcom_cpu_die(unsigned int cpu) | |
844 | @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) | |
845 | /* | |
846 | * Synchronise with the boot thread. | |
847 | */ | |
848 | - spin_lock(&boot_lock); | |
849 | - spin_unlock(&boot_lock); | |
850 | + raw_spin_lock(&boot_lock); | |
851 | + raw_spin_unlock(&boot_lock); | |
852 | } | |
853 | ||
854 | static int scss_release_secondary(unsigned int cpu) | |
855 | @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
856 | * set synchronisation state between this boot processor | |
857 | * and the secondary one | |
858 | */ | |
859 | - spin_lock(&boot_lock); | |
860 | + raw_spin_lock(&boot_lock); | |
861 | ||
862 | /* | |
863 | * Send the secondary CPU a soft interrupt, thereby causing | |
864 | @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
865 | * now the secondary core is starting up let it run its | |
866 | * calibrations, then wait for it to finish | |
867 | */ | |
868 | - spin_unlock(&boot_lock); | |
869 | + raw_spin_unlock(&boot_lock); | |
870 | ||
871 | return ret; | |
872 | } | |
873 | diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c | |
874 | index 8d1e2d551786..7fa56cc78118 100644 | |
875 | --- a/arch/arm/mach-spear/platsmp.c | |
876 | +++ b/arch/arm/mach-spear/platsmp.c | |
877 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
878 | sync_cache_w(&pen_release); | |
879 | } | |
880 | ||
881 | -static DEFINE_SPINLOCK(boot_lock); | |
882 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
883 | ||
884 | static void __iomem *scu_base = IOMEM(VA_SCU_BASE); | |
885 | ||
886 | @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) | |
887 | /* | |
888 | * Synchronise with the boot thread. | |
889 | */ | |
890 | - spin_lock(&boot_lock); | |
891 | - spin_unlock(&boot_lock); | |
892 | + raw_spin_lock(&boot_lock); | |
893 | + raw_spin_unlock(&boot_lock); | |
894 | } | |
895 | ||
896 | static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
897 | @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
898 | * set synchronisation state between this boot processor | |
899 | * and the secondary one | |
900 | */ | |
901 | - spin_lock(&boot_lock); | |
902 | + raw_spin_lock(&boot_lock); | |
903 | ||
904 | /* | |
905 | * The secondary processor is waiting to be released from | |
906 | @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
907 | * now the secondary core is starting up let it run its | |
908 | * calibrations, then wait for it to finish | |
909 | */ | |
910 | - spin_unlock(&boot_lock); | |
911 | + raw_spin_unlock(&boot_lock); | |
912 | ||
913 | return pen_release != -1 ? -ENOSYS : 0; | |
914 | } | |
915 | diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c | |
916 | index ea5a2277ee46..b988e081ac79 100644 | |
917 | --- a/arch/arm/mach-sti/platsmp.c | |
918 | +++ b/arch/arm/mach-sti/platsmp.c | |
919 | @@ -35,7 +35,7 @@ static void write_pen_release(int val) | |
920 | sync_cache_w(&pen_release); | |
921 | } | |
922 | ||
923 | -static DEFINE_SPINLOCK(boot_lock); | |
924 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
925 | ||
926 | static void sti_secondary_init(unsigned int cpu) | |
927 | { | |
928 | @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu) | |
929 | /* | |
930 | * Synchronise with the boot thread. | |
931 | */ | |
932 | - spin_lock(&boot_lock); | |
933 | - spin_unlock(&boot_lock); | |
934 | + raw_spin_lock(&boot_lock); | |
935 | + raw_spin_unlock(&boot_lock); | |
936 | } | |
937 | ||
938 | static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
939 | @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
940 | * set synchronisation state between this boot processor | |
941 | * and the secondary one | |
942 | */ | |
943 | - spin_lock(&boot_lock); | |
944 | + raw_spin_lock(&boot_lock); | |
945 | ||
946 | /* | |
947 | * The secondary processor is waiting to be released from | |
948 | @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
949 | * now the secondary core is starting up let it run its | |
950 | * calibrations, then wait for it to finish | |
951 | */ | |
952 | - spin_unlock(&boot_lock); | |
953 | + raw_spin_unlock(&boot_lock); | |
954 | ||
955 | return pen_release != -1 ? -ENOSYS : 0; | |
956 | } | |
957 | diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c | |
958 | index 3a2e678b8d30..3ed1e9ba6a01 100644 | |
959 | --- a/arch/arm/mm/fault.c | |
960 | +++ b/arch/arm/mm/fault.c | |
961 | @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
962 | if (addr < TASK_SIZE) | |
963 | return do_page_fault(addr, fsr, regs); | |
964 | ||
965 | + if (interrupts_enabled(regs)) | |
966 | + local_irq_enable(); | |
967 | + | |
968 | if (user_mode(regs)) | |
969 | goto bad_area; | |
970 | ||
971 | @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
972 | static int | |
973 | do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |
974 | { | |
975 | + if (interrupts_enabled(regs)) | |
976 | + local_irq_enable(); | |
977 | + | |
978 | do_bad_area(addr, fsr, regs); | |
979 | return 0; | |
980 | } | |
981 | diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c | |
982 | index d02f8187b1cc..542692dbd40a 100644 | |
983 | --- a/arch/arm/mm/highmem.c | |
984 | +++ b/arch/arm/mm/highmem.c | |
985 | @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) | |
986 | return *ptep; | |
987 | } | |
988 | ||
989 | +static unsigned int fixmap_idx(int type) | |
990 | +{ | |
991 | + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
992 | +} | |
993 | + | |
994 | void *kmap(struct page *page) | |
995 | { | |
996 | might_sleep(); | |
997 | @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap); | |
998 | ||
999 | void *kmap_atomic(struct page *page) | |
1000 | { | |
1001 | + pte_t pte = mk_pte(page, kmap_prot); | |
1002 | unsigned int idx; | |
1003 | unsigned long vaddr; | |
1004 | void *kmap; | |
1005 | int type; | |
1006 | ||
1007 | - preempt_disable(); | |
1008 | + preempt_disable_nort(); | |
1009 | pagefault_disable(); | |
1010 | if (!PageHighMem(page)) | |
1011 | return page_address(page); | |
1012 | @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page) | |
1013 | ||
1014 | type = kmap_atomic_idx_push(); | |
1015 | ||
1016 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1017 | + idx = fixmap_idx(type); | |
1018 | vaddr = __fix_to_virt(idx); | |
1019 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1020 | /* | |
1021 | @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page) | |
1022 | * in place, so the contained TLB flush ensures the TLB is updated | |
1023 | * with the new mapping. | |
1024 | */ | |
1025 | - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); | |
1026 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1027 | + current->kmap_pte[type] = pte; | |
1028 | +#endif | |
1029 | + set_fixmap_pte(idx, pte); | |
1030 | ||
1031 | return (void *)vaddr; | |
1032 | } | |
1033 | @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr) | |
1034 | ||
1035 | if (kvaddr >= (void *)FIXADDR_START) { | |
1036 | type = kmap_atomic_idx(); | |
1037 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1038 | + idx = fixmap_idx(type); | |
1039 | ||
1040 | if (cache_is_vivt()) | |
1041 | __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); | |
1042 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1043 | + current->kmap_pte[type] = __pte(0); | |
1044 | +#endif | |
1045 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1046 | BUG_ON(vaddr != __fix_to_virt(idx)); | |
1047 | - set_fixmap_pte(idx, __pte(0)); | |
1048 | #else | |
1049 | (void) idx; /* to kill a warning */ | |
1050 | #endif | |
1051 | + set_fixmap_pte(idx, __pte(0)); | |
1052 | kmap_atomic_idx_pop(); | |
1053 | } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { | |
1054 | /* this address was obtained through kmap_high_get() */ | |
1055 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); | |
1056 | } | |
1057 | pagefault_enable(); | |
1058 | - preempt_enable(); | |
1059 | + preempt_enable_nort(); | |
1060 | } | |
1061 | EXPORT_SYMBOL(__kunmap_atomic); | |
1062 | ||
1063 | void *kmap_atomic_pfn(unsigned long pfn) | |
1064 | { | |
1065 | + pte_t pte = pfn_pte(pfn, kmap_prot); | |
1066 | unsigned long vaddr; | |
1067 | int idx, type; | |
1068 | struct page *page = pfn_to_page(pfn); | |
1069 | ||
1070 | - preempt_disable(); | |
1071 | + preempt_disable_nort(); | |
1072 | pagefault_disable(); | |
1073 | if (!PageHighMem(page)) | |
1074 | return page_address(page); | |
1075 | ||
1076 | type = kmap_atomic_idx_push(); | |
1077 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1078 | + idx = fixmap_idx(type); | |
1079 | vaddr = __fix_to_virt(idx); | |
1080 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1081 | BUG_ON(!pte_none(get_fixmap_pte(vaddr))); | |
1082 | #endif | |
1083 | - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); | |
1084 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1085 | + current->kmap_pte[type] = pte; | |
1086 | +#endif | |
1087 | + set_fixmap_pte(idx, pte); | |
1088 | ||
1089 | return (void *)vaddr; | |
1090 | } | |
1091 | +#if defined CONFIG_PREEMPT_RT_FULL | |
1092 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
1093 | +{ | |
1094 | + int i; | |
1095 | + | |
1096 | + /* | |
1097 | + * Clear @prev's kmap_atomic mappings | |
1098 | + */ | |
1099 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
1100 | + int idx = fixmap_idx(i); | |
1101 | + | |
1102 | + set_fixmap_pte(idx, __pte(0)); | |
1103 | + } | |
1104 | + /* | |
1105 | + * Restore @next_p's kmap_atomic mappings | |
1106 | + */ | |
1107 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
1108 | + int idx = fixmap_idx(i); | |
1109 | + | |
1110 | + if (!pte_none(next_p->kmap_pte[i])) | |
1111 | + set_fixmap_pte(idx, next_p->kmap_pte[i]); | |
1112 | + } | |
1113 | +} | |
1114 | +#endif | |
1115 | diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c | |
1116 | index c2366510187a..6b60f582b738 100644 | |
1117 | --- a/arch/arm/plat-versatile/platsmp.c | |
1118 | +++ b/arch/arm/plat-versatile/platsmp.c | |
1119 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
1120 | sync_cache_w(&pen_release); | |
1121 | } | |
1122 | ||
1123 | -static DEFINE_SPINLOCK(boot_lock); | |
1124 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1125 | ||
1126 | void versatile_secondary_init(unsigned int cpu) | |
1127 | { | |
1128 | @@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu) | |
1129 | /* | |
1130 | * Synchronise with the boot thread. | |
1131 | */ | |
1132 | - spin_lock(&boot_lock); | |
1133 | - spin_unlock(&boot_lock); | |
1134 | + raw_spin_lock(&boot_lock); | |
1135 | + raw_spin_unlock(&boot_lock); | |
1136 | } | |
1137 | ||
1138 | int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1139 | @@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1140 | * Set synchronisation state between this boot processor | |
1141 | * and the secondary one | |
1142 | */ | |
1143 | - spin_lock(&boot_lock); | |
1144 | + raw_spin_lock(&boot_lock); | |
1145 | ||
1146 | /* | |
1147 | * This is really belt and braces; we hold unintended secondary | |
1148 | @@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1149 | * now the secondary core is starting up let it run its | |
1150 | * calibrations, then wait for it to finish | |
1151 | */ | |
1152 | - spin_unlock(&boot_lock); | |
1153 | + raw_spin_unlock(&boot_lock); | |
1154 | ||
1155 | return pen_release != -1 ? -ENOSYS : 0; | |
1156 | } | |
1157 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig | |
c7c16703 | 1158 | index 969ef880d234..1182fe883771 100644 |
1a6e0f06 JK |
1159 | --- a/arch/arm64/Kconfig |
1160 | +++ b/arch/arm64/Kconfig | |
c7c16703 | 1161 | @@ -91,6 +91,7 @@ config ARM64 |
1a6e0f06 JK |
1162 | select HAVE_PERF_EVENTS |
1163 | select HAVE_PERF_REGS | |
1164 | select HAVE_PERF_USER_STACK_DUMP | |
1165 | + select HAVE_PREEMPT_LAZY | |
1166 | select HAVE_REGS_AND_STACK_ACCESS_API | |
1167 | select HAVE_RCU_TABLE_FREE | |
1168 | select HAVE_SYSCALL_TRACEPOINTS | |
c7c16703 | 1169 | @@ -694,7 +695,7 @@ config XEN_DOM0 |
1a6e0f06 JK |
1170 | |
1171 | config XEN | |
1172 | bool "Xen guest support on ARM64" | |
1173 | - depends on ARM64 && OF | |
1174 | + depends on ARM64 && OF && !PREEMPT_RT_FULL | |
1175 | select SWIOTLB_XEN | |
1176 | select PARAVIRT | |
1177 | help | |
1178 | diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h | |
c7c16703 | 1179 | index e9ea5a6bd449..6c500ad63c6a 100644 |
1a6e0f06 JK |
1180 | --- a/arch/arm64/include/asm/thread_info.h |
1181 | +++ b/arch/arm64/include/asm/thread_info.h | |
1182 | @@ -49,6 +49,7 @@ struct thread_info { | |
1183 | mm_segment_t addr_limit; /* address limit */ | |
1184 | struct task_struct *task; /* main task structure */ | |
1185 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
1186 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
1187 | int cpu; /* cpu */ | |
1188 | }; | |
1189 | ||
c7c16703 | 1190 | @@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void) |
1a6e0f06 JK |
1191 | #define TIF_NEED_RESCHED 1 |
1192 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ | |
1193 | #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ | |
1194 | +#define TIF_NEED_RESCHED_LAZY 4 | |
1195 | #define TIF_NOHZ 7 | |
1196 | #define TIF_SYSCALL_TRACE 8 | |
1197 | #define TIF_SYSCALL_AUDIT 9 | |
c7c16703 | 1198 | @@ -127,6 +129,7 @@ static inline struct thread_info *current_thread_info(void) |
1a6e0f06 JK |
1199 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) |
1200 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
1201 | #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) | |
1202 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
1203 | #define _TIF_NOHZ (1 << TIF_NOHZ) | |
1204 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
1205 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
c7c16703 | 1206 | @@ -135,7 +138,9 @@ static inline struct thread_info *current_thread_info(void) |
1a6e0f06 JK |
1207 | #define _TIF_32BIT (1 << TIF_32BIT) |
1208 | ||
1209 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
1210 | - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) | |
1211 | + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ | |
1212 | + _TIF_NEED_RESCHED_LAZY) | |
c7c16703 | 1213 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) |
1a6e0f06 JK |
1214 | |
1215 | #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1216 | _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ | |
1217 | diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c | |
c7c16703 | 1218 | index 4a2f0f0fef32..6bf2bc17c400 100644 |
1a6e0f06 JK |
1219 | --- a/arch/arm64/kernel/asm-offsets.c |
1220 | +++ b/arch/arm64/kernel/asm-offsets.c | |
c7c16703 | 1221 | @@ -38,6 +38,7 @@ int main(void) |
1a6e0f06 JK |
1222 | BLANK(); |
1223 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1224 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1225 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1226 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
1227 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1228 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1229 | diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S | |
c7c16703 | 1230 | index 223d54a4d66b..266c0e2141ca 100644 |
1a6e0f06 JK |
1231 | --- a/arch/arm64/kernel/entry.S |
1232 | +++ b/arch/arm64/kernel/entry.S | |
c7c16703 | 1233 | @@ -428,11 +428,16 @@ ENDPROC(el1_sync) |
1a6e0f06 JK |
1234 | |
1235 | #ifdef CONFIG_PREEMPT | |
1236 | ldr w24, [tsk, #TI_PREEMPT] // get preempt count | |
1237 | - cbnz w24, 1f // preempt count != 0 | |
1238 | + cbnz w24, 2f // preempt count != 0 | |
1239 | ldr x0, [tsk, #TI_FLAGS] // get flags | |
1240 | - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1241 | - bl el1_preempt | |
1242 | + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1243 | + | |
1244 | + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count | |
1245 | + cbnz w24, 2f // preempt lazy count != 0 | |
1246 | + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? | |
1247 | 1: | |
1248 | + bl el1_preempt | |
1249 | +2: | |
1250 | #endif | |
1251 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1252 | bl trace_hardirqs_on | |
c7c16703 | 1253 | @@ -446,6 +451,7 @@ ENDPROC(el1_irq) |
1a6e0f06 JK |
1254 | 1: bl preempt_schedule_irq // irq en/disable is done inside |
1255 | ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS | |
1256 | tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? | |
1257 | + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? | |
1258 | ret x24 | |
1259 | #endif | |
1260 | ||
c7c16703 JK |
1261 | diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c |
1262 | index 404dd67080b9..639dc6d12e72 100644 | |
1263 | --- a/arch/arm64/kernel/signal.c | |
1264 | +++ b/arch/arm64/kernel/signal.c | |
1265 | @@ -409,7 +409,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, | |
1266 | */ | |
1267 | trace_hardirqs_off(); | |
1268 | do { | |
1269 | - if (thread_flags & _TIF_NEED_RESCHED) { | |
1270 | + if (thread_flags & _TIF_NEED_RESCHED_MASK) { | |
1271 | schedule(); | |
1272 | } else { | |
1273 | local_irq_enable(); | |
1a6e0f06 | 1274 | diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig |
c7c16703 | 1275 | index b3c5bde43d34..8122bf058de0 100644 |
1a6e0f06 JK |
1276 | --- a/arch/mips/Kconfig |
1277 | +++ b/arch/mips/Kconfig | |
c7c16703 | 1278 | @@ -2514,7 +2514,7 @@ config MIPS_ASID_BITS_VARIABLE |
1a6e0f06 JK |
1279 | # |
1280 | config HIGHMEM | |
1281 | bool "High Memory Support" | |
1282 | - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA | |
1283 | + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL | |
1284 | ||
1285 | config CPU_SUPPORTS_HIGHMEM | |
1286 | bool | |
1287 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig | |
c7c16703 | 1288 | index 65fba4c34cd7..4b5ba68910e0 100644 |
1a6e0f06 JK |
1289 | --- a/arch/powerpc/Kconfig |
1290 | +++ b/arch/powerpc/Kconfig | |
c7c16703 | 1291 | @@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT |
1a6e0f06 JK |
1292 | |
1293 | config RWSEM_GENERIC_SPINLOCK | |
1294 | bool | |
1295 | + default y if PREEMPT_RT_FULL | |
1296 | ||
1297 | config RWSEM_XCHGADD_ALGORITHM | |
1298 | bool | |
1299 | - default y | |
1300 | + default y if !PREEMPT_RT_FULL | |
1301 | ||
1302 | config GENERIC_LOCKBREAK | |
1303 | bool | |
c7c16703 | 1304 | @@ -134,6 +135,7 @@ config PPC |
1a6e0f06 JK |
1305 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
1306 | select GENERIC_STRNCPY_FROM_USER | |
1307 | select GENERIC_STRNLEN_USER | |
1308 | + select HAVE_PREEMPT_LAZY | |
1309 | select HAVE_MOD_ARCH_SPECIFIC | |
1310 | select MODULES_USE_ELF_RELA | |
1311 | select CLONE_BACKWARDS | |
c7c16703 | 1312 | @@ -321,7 +323,7 @@ menu "Kernel options" |
1a6e0f06 JK |
1313 | |
1314 | config HIGHMEM | |
1315 | bool "High memory support" | |
1316 | - depends on PPC32 | |
1317 | + depends on PPC32 && !PREEMPT_RT_FULL | |
1318 | ||
1319 | source kernel/Kconfig.hz | |
1320 | source kernel/Kconfig.preempt | |
1321 | diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h | |
1322 | index 87e4b2d8dcd4..981e501a4359 100644 | |
1323 | --- a/arch/powerpc/include/asm/thread_info.h | |
1324 | +++ b/arch/powerpc/include/asm/thread_info.h | |
1325 | @@ -43,6 +43,8 @@ struct thread_info { | |
1326 | int cpu; /* cpu we're on */ | |
1327 | int preempt_count; /* 0 => preemptable, | |
1328 | <0 => BUG */ | |
1329 | + int preempt_lazy_count; /* 0 => preemptable, | |
1330 | + <0 => BUG */ | |
1331 | unsigned long local_flags; /* private flags for thread */ | |
1332 | #ifdef CONFIG_LIVEPATCH | |
1333 | unsigned long *livepatch_sp; | |
1334 | @@ -88,8 +90,7 @@ static inline struct thread_info *current_thread_info(void) | |
1335 | #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ | |
1336 | #define TIF_SIGPENDING 1 /* signal pending */ | |
1337 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | |
1338 | -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling | |
1339 | - TIF_NEED_RESCHED */ | |
1340 | +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ | |
1341 | #define TIF_32BIT 4 /* 32 bit binary */ | |
1342 | #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ | |
1343 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
1344 | @@ -107,6 +108,8 @@ static inline struct thread_info *current_thread_info(void) | |
1345 | #if defined(CONFIG_PPC64) | |
1346 | #define TIF_ELF2ABI 18 /* function descriptors must die! */ | |
1347 | #endif | |
1348 | +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling | |
1349 | + TIF_NEED_RESCHED */ | |
1350 | ||
1351 | /* as above, but as bit values */ | |
1352 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
1353 | @@ -125,14 +128,16 @@ static inline struct thread_info *current_thread_info(void) | |
1354 | #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) | |
1355 | #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) | |
1356 | #define _TIF_NOHZ (1<<TIF_NOHZ) | |
1357 | +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) | |
1358 | #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1359 | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ | |
1360 | _TIF_NOHZ) | |
1361 | ||
1362 | #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ | |
1363 | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1364 | - _TIF_RESTORE_TM) | |
1365 | + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) | |
1366 | #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) | |
1367 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
1368 | ||
1369 | /* Bits in local_flags */ | |
1370 | /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ | |
1371 | diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c | |
c7c16703 | 1372 | index c833d88c423d..96e9fbc3f684 100644 |
1a6e0f06 JK |
1373 | --- a/arch/powerpc/kernel/asm-offsets.c |
1374 | +++ b/arch/powerpc/kernel/asm-offsets.c | |
1375 | @@ -156,6 +156,7 @@ int main(void) | |
1376 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1377 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); | |
1378 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1379 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1380 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1381 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1382 | ||
1383 | diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S | |
c7c16703 | 1384 | index 3841d749a430..6dbaeff192b9 100644 |
1a6e0f06 JK |
1385 | --- a/arch/powerpc/kernel/entry_32.S |
1386 | +++ b/arch/powerpc/kernel/entry_32.S | |
1387 | @@ -835,7 +835,14 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1388 | cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1389 | bne restore | |
1390 | andi. r8,r8,_TIF_NEED_RESCHED | |
1391 | + bne+ 1f | |
1392 | + lwz r0,TI_PREEMPT_LAZY(r9) | |
1393 | + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1394 | + bne restore | |
1395 | + lwz r0,TI_FLAGS(r9) | |
1396 | + andi. r0,r0,_TIF_NEED_RESCHED_LAZY | |
1397 | beq+ restore | |
1398 | +1: | |
1399 | lwz r3,_MSR(r1) | |
1400 | andi. r0,r3,MSR_EE /* interrupts off? */ | |
1401 | beq restore /* don't schedule if so */ | |
1402 | @@ -846,11 +853,11 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1403 | */ | |
1404 | bl trace_hardirqs_off | |
1405 | #endif | |
1406 | -1: bl preempt_schedule_irq | |
1407 | +2: bl preempt_schedule_irq | |
1408 | CURRENT_THREAD_INFO(r9, r1) | |
1409 | lwz r3,TI_FLAGS(r9) | |
1410 | - andi. r0,r3,_TIF_NEED_RESCHED | |
1411 | - bne- 1b | |
1412 | + andi. r0,r3,_TIF_NEED_RESCHED_MASK | |
1413 | + bne- 2b | |
1414 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1415 | /* And now, to properly rebalance the above, we tell lockdep they | |
1416 | * are being turned back on, which will happen when we return | |
1417 | @@ -1171,7 +1178,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) | |
1418 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ | |
1419 | ||
1420 | do_work: /* r10 contains MSR_KERNEL here */ | |
1421 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1422 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1423 | beq do_user_signal | |
1424 | ||
1425 | do_resched: /* r10 contains MSR_KERNEL here */ | |
1426 | @@ -1192,7 +1199,7 @@ do_resched: /* r10 contains MSR_KERNEL here */ | |
1427 | MTMSRD(r10) /* disable interrupts */ | |
1428 | CURRENT_THREAD_INFO(r9, r1) | |
1429 | lwz r9,TI_FLAGS(r9) | |
1430 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1431 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1432 | bne- do_resched | |
1433 | andi. r0,r9,_TIF_USER_WORK_MASK | |
1434 | beq restore_user | |
1435 | diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S | |
c7c16703 | 1436 | index 6432d4bf08c8..5509a26f1070 100644 |
1a6e0f06 JK |
1437 | --- a/arch/powerpc/kernel/entry_64.S |
1438 | +++ b/arch/powerpc/kernel/entry_64.S | |
c7c16703 | 1439 | @@ -656,7 +656,7 @@ _GLOBAL(ret_from_except_lite) |
1a6e0f06 JK |
1440 | bl restore_math |
1441 | b restore | |
1442 | #endif | |
1443 | -1: andi. r0,r4,_TIF_NEED_RESCHED | |
1444 | +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1445 | beq 2f | |
1446 | bl restore_interrupts | |
1447 | SCHEDULE_USER | |
c7c16703 | 1448 | @@ -718,10 +718,18 @@ _GLOBAL(ret_from_except_lite) |
1a6e0f06 JK |
1449 | |
1450 | #ifdef CONFIG_PREEMPT | |
1451 | /* Check if we need to preempt */ | |
1452 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1453 | - beq+ restore | |
1454 | - /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1455 | lwz r8,TI_PREEMPT(r9) | |
1456 | + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ | |
1457 | + bne restore | |
1458 | + andi. r0,r4,_TIF_NEED_RESCHED | |
1459 | + bne+ check_count | |
1460 | + | |
1461 | + andi. r0,r4,_TIF_NEED_RESCHED_LAZY | |
1462 | + beq+ restore | |
1463 | + lwz r8,TI_PREEMPT_LAZY(r9) | |
1464 | + | |
1465 | + /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1466 | +check_count: | |
1467 | cmpwi cr1,r8,0 | |
1468 | ld r0,SOFTE(r1) | |
1469 | cmpdi r0,0 | |
c7c16703 | 1470 | @@ -738,7 +746,7 @@ _GLOBAL(ret_from_except_lite) |
1a6e0f06 JK |
1471 | /* Re-test flags and eventually loop */ |
1472 | CURRENT_THREAD_INFO(r9, r1) | |
1473 | ld r4,TI_FLAGS(r9) | |
1474 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1475 | + andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1476 | bne 1b | |
1477 | ||
1478 | /* | |
1479 | diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c | |
c7c16703 | 1480 | index 3c05c311e35e..f83f6ac1274d 100644 |
1a6e0f06 JK |
1481 | --- a/arch/powerpc/kernel/irq.c |
1482 | +++ b/arch/powerpc/kernel/irq.c | |
c7c16703 | 1483 | @@ -638,6 +638,7 @@ void irq_ctx_init(void) |
1a6e0f06 JK |
1484 | } |
1485 | } | |
1486 | ||
1487 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1488 | void do_softirq_own_stack(void) | |
1489 | { | |
1490 | struct thread_info *curtp, *irqtp; | |
c7c16703 | 1491 | @@ -655,6 +656,7 @@ void do_softirq_own_stack(void) |
1a6e0f06 JK |
1492 | if (irqtp->flags) |
1493 | set_bits(irqtp->flags, &curtp->flags); | |
1494 | } | |
1495 | +#endif | |
1496 | ||
1497 | irq_hw_number_t virq_to_hw(unsigned int virq) | |
1498 | { | |
1499 | diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S | |
c7c16703 | 1500 | index 030d72df5dd5..b471a709e100 100644 |
1a6e0f06 JK |
1501 | --- a/arch/powerpc/kernel/misc_32.S |
1502 | +++ b/arch/powerpc/kernel/misc_32.S | |
c7c16703 | 1503 | @@ -41,6 +41,7 @@ |
1a6e0f06 JK |
1504 | * We store the saved ksp_limit in the unused part |
1505 | * of the STACK_FRAME_OVERHEAD | |
1506 | */ | |
1507 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1508 | _GLOBAL(call_do_softirq) | |
1509 | mflr r0 | |
1510 | stw r0,4(r1) | |
c7c16703 | 1511 | @@ -57,6 +58,7 @@ _GLOBAL(call_do_softirq) |
1a6e0f06 JK |
1512 | stw r10,THREAD+KSP_LIMIT(r2) |
1513 | mtlr r0 | |
1514 | blr | |
1515 | +#endif | |
1516 | ||
1517 | /* | |
1518 | * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); | |
1519 | diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S | |
c7c16703 | 1520 | index 4f178671f230..39e7d84a3492 100644 |
1a6e0f06 JK |
1521 | --- a/arch/powerpc/kernel/misc_64.S |
1522 | +++ b/arch/powerpc/kernel/misc_64.S | |
c7c16703 | 1523 | @@ -31,6 +31,7 @@ |
1a6e0f06 JK |
1524 | |
1525 | .text | |
1526 | ||
1527 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1528 | _GLOBAL(call_do_softirq) | |
1529 | mflr r0 | |
1530 | std r0,16(r1) | |
c7c16703 | 1531 | @@ -41,6 +42,7 @@ _GLOBAL(call_do_softirq) |
1a6e0f06 JK |
1532 | ld r0,16(r1) |
1533 | mtlr r0 | |
1534 | blr | |
1535 | +#endif | |
1536 | ||
1537 | _GLOBAL(call_do_irq) | |
1538 | mflr r0 | |
1539 | diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig | |
c7c16703 | 1540 | index 029be26b5a17..9528089ea142 100644 |
1a6e0f06 JK |
1541 | --- a/arch/powerpc/kvm/Kconfig |
1542 | +++ b/arch/powerpc/kvm/Kconfig | |
c7c16703 | 1543 | @@ -175,6 +175,7 @@ config KVM_E500MC |
1a6e0f06 JK |
1544 | config KVM_MPIC |
1545 | bool "KVM in-kernel MPIC emulation" | |
1546 | depends on KVM && E500 | |
1547 | + depends on !PREEMPT_RT_FULL | |
1548 | select HAVE_KVM_IRQCHIP | |
1549 | select HAVE_KVM_IRQFD | |
1550 | select HAVE_KVM_IRQ_ROUTING | |
1551 | diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c | |
c7c16703 | 1552 | index e48462447ff0..2670cee66064 100644 |
1a6e0f06 JK |
1553 | --- a/arch/powerpc/platforms/ps3/device-init.c |
1554 | +++ b/arch/powerpc/platforms/ps3/device-init.c | |
1555 | @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, | |
1556 | } | |
1557 | pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); | |
1558 | ||
1559 | - res = wait_event_interruptible(dev->done.wait, | |
1560 | + res = swait_event_interruptible(dev->done.wait, | |
1561 | dev->done.done || kthread_should_stop()); | |
1562 | if (kthread_should_stop()) | |
1563 | res = -EINTR; | |
1564 | diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c | |
1565 | index 6c0378c0b8b5..abd58b4dff97 100644 | |
1566 | --- a/arch/sh/kernel/irq.c | |
1567 | +++ b/arch/sh/kernel/irq.c | |
1568 | @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) | |
1569 | hardirq_ctx[cpu] = NULL; | |
1570 | } | |
1571 | ||
1572 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1573 | void do_softirq_own_stack(void) | |
1574 | { | |
1575 | struct thread_info *curctx; | |
1576 | @@ -174,6 +175,7 @@ void do_softirq_own_stack(void) | |
1577 | "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" | |
1578 | ); | |
1579 | } | |
1580 | +#endif | |
1581 | #else | |
1582 | static inline void handle_one_irq(unsigned int irq) | |
1583 | { | |
1584 | diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig | |
c7c16703 | 1585 | index 165ecdd24d22..b68a464a22be 100644 |
1a6e0f06 JK |
1586 | --- a/arch/sparc/Kconfig |
1587 | +++ b/arch/sparc/Kconfig | |
c7c16703 | 1588 | @@ -194,12 +194,10 @@ config NR_CPUS |
1a6e0f06 JK |
1589 | source kernel/Kconfig.hz |
1590 | ||
1591 | config RWSEM_GENERIC_SPINLOCK | |
1592 | - bool | |
1593 | - default y if SPARC32 | |
1594 | + def_bool PREEMPT_RT_FULL | |
1595 | ||
1596 | config RWSEM_XCHGADD_ALGORITHM | |
1597 | - bool | |
1598 | - default y if SPARC64 | |
1599 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1600 | ||
1601 | config GENERIC_HWEIGHT | |
1602 | bool | |
1603 | diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c | |
1604 | index 34a7930b76ef..773740521008 100644 | |
1605 | --- a/arch/sparc/kernel/irq_64.c | |
1606 | +++ b/arch/sparc/kernel/irq_64.c | |
1607 | @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) | |
1608 | set_irq_regs(old_regs); | |
1609 | } | |
1610 | ||
1611 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1612 | void do_softirq_own_stack(void) | |
1613 | { | |
1614 | void *orig_sp, *sp = softirq_stack[smp_processor_id()]; | |
1615 | @@ -868,6 +869,7 @@ void do_softirq_own_stack(void) | |
1616 | __asm__ __volatile__("mov %0, %%sp" | |
1617 | : : "r" (orig_sp)); | |
1618 | } | |
1619 | +#endif | |
1620 | ||
1621 | #ifdef CONFIG_HOTPLUG_CPU | |
1622 | void fixup_irqs(void) | |
1623 | diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig | |
c7c16703 | 1624 | index bada636d1065..f8a995c90c01 100644 |
1a6e0f06 JK |
1625 | --- a/arch/x86/Kconfig |
1626 | +++ b/arch/x86/Kconfig | |
1627 | @@ -17,6 +17,7 @@ config X86_64 | |
1628 | ### Arch settings | |
1629 | config X86 | |
1630 | def_bool y | |
1631 | + select HAVE_PREEMPT_LAZY | |
1632 | select ACPI_LEGACY_TABLES_LOOKUP if ACPI | |
1633 | select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI | |
1634 | select ANON_INODES | |
c7c16703 | 1635 | @@ -232,8 +233,11 @@ config ARCH_MAY_HAVE_PC_FDC |
1a6e0f06 JK |
1636 | def_bool y |
1637 | depends on ISA_DMA_API | |
1638 | ||
1639 | +config RWSEM_GENERIC_SPINLOCK | |
1640 | + def_bool PREEMPT_RT_FULL | |
1641 | + | |
1642 | config RWSEM_XCHGADD_ALGORITHM | |
1643 | - def_bool y | |
1644 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1645 | ||
1646 | config GENERIC_CALIBRATE_DELAY | |
1647 | def_bool y | |
c7c16703 | 1648 | @@ -897,7 +901,7 @@ config IOMMU_HELPER |
1a6e0f06 JK |
1649 | config MAXSMP |
1650 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" | |
1651 | depends on X86_64 && SMP && DEBUG_KERNEL | |
1652 | - select CPUMASK_OFFSTACK | |
1653 | + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL | |
1654 | ---help--- | |
1655 | Enable maximum number of CPUS and NUMA Nodes for this architecture. | |
1656 | If unsure, say N. | |
1657 | diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c | |
c7c16703 | 1658 | index aa8b0672f87a..2429414bfc71 100644 |
1a6e0f06 JK |
1659 | --- a/arch/x86/crypto/aesni-intel_glue.c |
1660 | +++ b/arch/x86/crypto/aesni-intel_glue.c | |
1661 | @@ -372,14 +372,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |
1662 | err = blkcipher_walk_virt(desc, &walk); | |
1663 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1664 | ||
1665 | - kernel_fpu_begin(); | |
1666 | while ((nbytes = walk.nbytes)) { | |
1667 | + kernel_fpu_begin(); | |
1668 | aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1669 | - nbytes & AES_BLOCK_MASK); | |
1670 | + nbytes & AES_BLOCK_MASK); | |
1671 | + kernel_fpu_end(); | |
1672 | nbytes &= AES_BLOCK_SIZE - 1; | |
1673 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1674 | } | |
1675 | - kernel_fpu_end(); | |
1676 | ||
1677 | return err; | |
1678 | } | |
1679 | @@ -396,14 +396,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |
1680 | err = blkcipher_walk_virt(desc, &walk); | |
1681 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1682 | ||
1683 | - kernel_fpu_begin(); | |
1684 | while ((nbytes = walk.nbytes)) { | |
1685 | + kernel_fpu_begin(); | |
1686 | aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1687 | nbytes & AES_BLOCK_MASK); | |
1688 | + kernel_fpu_end(); | |
1689 | nbytes &= AES_BLOCK_SIZE - 1; | |
1690 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1691 | } | |
1692 | - kernel_fpu_end(); | |
1693 | ||
1694 | return err; | |
1695 | } | |
1696 | @@ -420,14 +420,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |
1697 | err = blkcipher_walk_virt(desc, &walk); | |
1698 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1699 | ||
1700 | - kernel_fpu_begin(); | |
1701 | while ((nbytes = walk.nbytes)) { | |
1702 | + kernel_fpu_begin(); | |
1703 | aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1704 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1705 | + kernel_fpu_end(); | |
1706 | nbytes &= AES_BLOCK_SIZE - 1; | |
1707 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1708 | } | |
1709 | - kernel_fpu_end(); | |
1710 | ||
1711 | return err; | |
1712 | } | |
1713 | @@ -444,14 +444,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |
1714 | err = blkcipher_walk_virt(desc, &walk); | |
1715 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1716 | ||
1717 | - kernel_fpu_begin(); | |
1718 | while ((nbytes = walk.nbytes)) { | |
1719 | + kernel_fpu_begin(); | |
1720 | aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1721 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1722 | + kernel_fpu_end(); | |
1723 | nbytes &= AES_BLOCK_SIZE - 1; | |
1724 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1725 | } | |
1726 | - kernel_fpu_end(); | |
1727 | ||
1728 | return err; | |
1729 | } | |
1730 | @@ -503,18 +503,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |
1731 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | |
1732 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1733 | ||
1734 | - kernel_fpu_begin(); | |
1735 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | |
1736 | + kernel_fpu_begin(); | |
1737 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1738 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1739 | + kernel_fpu_end(); | |
1740 | nbytes &= AES_BLOCK_SIZE - 1; | |
1741 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1742 | } | |
1743 | if (walk.nbytes) { | |
1744 | + kernel_fpu_begin(); | |
1745 | ctr_crypt_final(ctx, &walk); | |
1746 | + kernel_fpu_end(); | |
1747 | err = blkcipher_walk_done(desc, &walk, 0); | |
1748 | } | |
1749 | - kernel_fpu_end(); | |
1750 | ||
1751 | return err; | |
1752 | } | |
1753 | diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c | |
1754 | index 8648158f3916..d7699130ee36 100644 | |
1755 | --- a/arch/x86/crypto/cast5_avx_glue.c | |
1756 | +++ b/arch/x86/crypto/cast5_avx_glue.c | |
1757 | @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) | |
1758 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1759 | bool enc) | |
1760 | { | |
1761 | - bool fpu_enabled = false; | |
1762 | + bool fpu_enabled; | |
1763 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1764 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
1765 | unsigned int nbytes; | |
1766 | @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1767 | u8 *wsrc = walk->src.virt.addr; | |
1768 | u8 *wdst = walk->dst.virt.addr; | |
1769 | ||
1770 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1771 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1772 | ||
1773 | /* Process multi-block batch */ | |
1774 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
1775 | @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1776 | } while (nbytes >= bsize); | |
1777 | ||
1778 | done: | |
1779 | + cast5_fpu_end(fpu_enabled); | |
1780 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1781 | } | |
1782 | - | |
1783 | - cast5_fpu_end(fpu_enabled); | |
1784 | return err; | |
1785 | } | |
1786 | ||
1787 | @@ -227,7 +226,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |
1788 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1789 | struct scatterlist *src, unsigned int nbytes) | |
1790 | { | |
1791 | - bool fpu_enabled = false; | |
1792 | + bool fpu_enabled; | |
1793 | struct blkcipher_walk walk; | |
1794 | int err; | |
1795 | ||
1796 | @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1797 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1798 | ||
1799 | while ((nbytes = walk.nbytes)) { | |
1800 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1801 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1802 | nbytes = __cbc_decrypt(desc, &walk); | |
1803 | + cast5_fpu_end(fpu_enabled); | |
1804 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1805 | } | |
1806 | - | |
1807 | - cast5_fpu_end(fpu_enabled); | |
1808 | return err; | |
1809 | } | |
1810 | ||
1811 | @@ -311,7 +309,7 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | |
1812 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1813 | struct scatterlist *src, unsigned int nbytes) | |
1814 | { | |
1815 | - bool fpu_enabled = false; | |
1816 | + bool fpu_enabled; | |
1817 | struct blkcipher_walk walk; | |
1818 | int err; | |
1819 | ||
1820 | @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1821 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1822 | ||
1823 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
1824 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1825 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1826 | nbytes = __ctr_crypt(desc, &walk); | |
1827 | + cast5_fpu_end(fpu_enabled); | |
1828 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1829 | } | |
1830 | ||
1831 | - cast5_fpu_end(fpu_enabled); | |
1832 | - | |
1833 | if (walk.nbytes) { | |
1834 | ctr_crypt_final(desc, &walk); | |
1835 | err = blkcipher_walk_done(desc, &walk, 0); | |
1836 | diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c | |
1837 | index 6a85598931b5..3a506ce7ed93 100644 | |
1838 | --- a/arch/x86/crypto/glue_helper.c | |
1839 | +++ b/arch/x86/crypto/glue_helper.c | |
1840 | @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1841 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1842 | const unsigned int bsize = 128 / 8; | |
1843 | unsigned int nbytes, i, func_bytes; | |
1844 | - bool fpu_enabled = false; | |
1845 | + bool fpu_enabled; | |
1846 | int err; | |
1847 | ||
1848 | err = blkcipher_walk_virt(desc, walk); | |
1849 | @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1850 | u8 *wdst = walk->dst.virt.addr; | |
1851 | ||
1852 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1853 | - desc, fpu_enabled, nbytes); | |
1854 | + desc, false, nbytes); | |
1855 | ||
1856 | for (i = 0; i < gctx->num_funcs; i++) { | |
1857 | func_bytes = bsize * gctx->funcs[i].num_blocks; | |
1858 | @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1859 | } | |
1860 | ||
1861 | done: | |
1862 | + glue_fpu_end(fpu_enabled); | |
1863 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1864 | } | |
1865 | ||
1866 | - glue_fpu_end(fpu_enabled); | |
1867 | return err; | |
1868 | } | |
1869 | ||
1870 | @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1871 | struct scatterlist *src, unsigned int nbytes) | |
1872 | { | |
1873 | const unsigned int bsize = 128 / 8; | |
1874 | - bool fpu_enabled = false; | |
1875 | + bool fpu_enabled; | |
1876 | struct blkcipher_walk walk; | |
1877 | int err; | |
1878 | ||
1879 | @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1880 | ||
1881 | while ((nbytes = walk.nbytes)) { | |
1882 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1883 | - desc, fpu_enabled, nbytes); | |
1884 | + desc, false, nbytes); | |
1885 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | |
1886 | + glue_fpu_end(fpu_enabled); | |
1887 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1888 | } | |
1889 | ||
1890 | - glue_fpu_end(fpu_enabled); | |
1891 | return err; | |
1892 | } | |
1893 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | |
1894 | @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1895 | struct scatterlist *src, unsigned int nbytes) | |
1896 | { | |
1897 | const unsigned int bsize = 128 / 8; | |
1898 | - bool fpu_enabled = false; | |
1899 | + bool fpu_enabled; | |
1900 | struct blkcipher_walk walk; | |
1901 | int err; | |
1902 | ||
1903 | @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1904 | ||
1905 | while ((nbytes = walk.nbytes) >= bsize) { | |
1906 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1907 | - desc, fpu_enabled, nbytes); | |
1908 | + desc, false, nbytes); | |
1909 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | |
1910 | + glue_fpu_end(fpu_enabled); | |
1911 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1912 | } | |
1913 | ||
1914 | - glue_fpu_end(fpu_enabled); | |
1915 | - | |
1916 | if (walk.nbytes) { | |
1917 | glue_ctr_crypt_final_128bit( | |
1918 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | |
1919 | @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1920 | void *tweak_ctx, void *crypt_ctx) | |
1921 | { | |
1922 | const unsigned int bsize = 128 / 8; | |
1923 | - bool fpu_enabled = false; | |
1924 | + bool fpu_enabled; | |
1925 | struct blkcipher_walk walk; | |
1926 | int err; | |
1927 | ||
1928 | @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1929 | ||
1930 | /* set minimum length to bsize, for tweak_fn */ | |
1931 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1932 | - desc, fpu_enabled, | |
1933 | + desc, false, | |
1934 | nbytes < bsize ? bsize : nbytes); | |
1935 | - | |
1936 | /* calculate first value of T */ | |
1937 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | |
1938 | + glue_fpu_end(fpu_enabled); | |
1939 | ||
1940 | while (nbytes) { | |
1941 | + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1942 | + desc, false, nbytes); | |
1943 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | |
1944 | ||
1945 | + glue_fpu_end(fpu_enabled); | |
1946 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1947 | nbytes = walk.nbytes; | |
1948 | } | |
1949 | - | |
1950 | - glue_fpu_end(fpu_enabled); | |
1951 | - | |
1952 | return err; | |
1953 | } | |
1954 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | |
1955 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | |
c7c16703 | 1956 | index bdd9cc59d20f..56d01a339ba4 100644 |
1a6e0f06 JK |
1957 | --- a/arch/x86/entry/common.c |
1958 | +++ b/arch/x86/entry/common.c | |
c7c16703 | 1959 | @@ -129,7 +129,7 @@ static long syscall_trace_enter(struct pt_regs *regs) |
1a6e0f06 JK |
1960 | |
1961 | #define EXIT_TO_USERMODE_LOOP_FLAGS \ | |
1962 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1963 | - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) | |
1964 | + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) | |
1965 | ||
1966 | static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
1967 | { | |
c7c16703 | 1968 | @@ -145,9 +145,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) |
1a6e0f06 JK |
1969 | /* We have work to do. */ |
1970 | local_irq_enable(); | |
1971 | ||
1972 | - if (cached_flags & _TIF_NEED_RESCHED) | |
1973 | + if (cached_flags & _TIF_NEED_RESCHED_MASK) | |
1974 | schedule(); | |
1975 | ||
1976 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
1977 | + if (unlikely(current->forced_info.si_signo)) { | |
1978 | + struct task_struct *t = current; | |
1979 | + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); | |
1980 | + t->forced_info.si_signo = 0; | |
1981 | + } | |
1982 | +#endif | |
1983 | if (cached_flags & _TIF_UPROBE) | |
1984 | uprobe_notify_resume(regs); | |
1985 | ||
1986 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | |
c7c16703 | 1987 | index edba8606b99a..4a3389535fc6 100644 |
1a6e0f06 JK |
1988 | --- a/arch/x86/entry/entry_32.S |
1989 | +++ b/arch/x86/entry/entry_32.S | |
c7c16703 | 1990 | @@ -308,8 +308,25 @@ END(ret_from_exception) |
1a6e0f06 JK |
1991 | ENTRY(resume_kernel) |
1992 | DISABLE_INTERRUPTS(CLBR_ANY) | |
1993 | need_resched: | |
1994 | + # preempt count == 0 + NEED_RS set? | |
1995 | cmpl $0, PER_CPU_VAR(__preempt_count) | |
1996 | +#ifndef CONFIG_PREEMPT_LAZY | |
1997 | jnz restore_all | |
1998 | +#else | |
1999 | + jz test_int_off | |
2000 | + | |
2001 | + # atleast preempt count == 0 ? | |
2002 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2003 | + jne restore_all | |
2004 | + | |
c7c16703 JK |
2005 | + movl PER_CPU_VAR(current_task), %ebp |
2006 | + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? | |
1a6e0f06 JK |
2007 | + jnz restore_all |
2008 | + | |
c7c16703 | 2009 | + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) |
1a6e0f06 JK |
2010 | + jz restore_all |
2011 | +test_int_off: | |
2012 | +#endif | |
2013 | testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? | |
2014 | jz restore_all | |
2015 | call preempt_schedule_irq | |
2016 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | |
c7c16703 | 2017 | index ef766a358b37..28401f826ab1 100644 |
1a6e0f06 JK |
2018 | --- a/arch/x86/entry/entry_64.S |
2019 | +++ b/arch/x86/entry/entry_64.S | |
c7c16703 | 2020 | @@ -546,7 +546,23 @@ GLOBAL(retint_user) |
1a6e0f06 JK |
2021 | bt $9, EFLAGS(%rsp) /* were interrupts off? */ |
2022 | jnc 1f | |
2023 | 0: cmpl $0, PER_CPU_VAR(__preempt_count) | |
2024 | +#ifndef CONFIG_PREEMPT_LAZY | |
2025 | jnz 1f | |
2026 | +#else | |
2027 | + jz do_preempt_schedule_irq | |
2028 | + | |
2029 | + # atleast preempt count == 0 ? | |
2030 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2031 | + jnz 1f | |
2032 | + | |
c7c16703 JK |
2033 | + movq PER_CPU_VAR(current_task), %rcx |
2034 | + cmpl $0, TASK_TI_preempt_lazy_count(%rcx) | |
1a6e0f06 JK |
2035 | + jnz 1f |
2036 | + | |
c7c16703 | 2037 | + bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx) |
1a6e0f06 JK |
2038 | + jnc 1f |
2039 | +do_preempt_schedule_irq: | |
2040 | +#endif | |
2041 | call preempt_schedule_irq | |
2042 | jmp 0b | |
2043 | 1: | |
c7c16703 | 2044 | @@ -894,6 +910,7 @@ EXPORT_SYMBOL(native_load_gs_index) |
1a6e0f06 JK |
2045 | jmp 2b |
2046 | .previous | |
2047 | ||
2048 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2049 | /* Call softirq on interrupt stack. Interrupts are off. */ | |
2050 | ENTRY(do_softirq_own_stack) | |
2051 | pushq %rbp | |
c7c16703 | 2052 | @@ -906,6 +923,7 @@ ENTRY(do_softirq_own_stack) |
1a6e0f06 JK |
2053 | decl PER_CPU_VAR(irq_count) |
2054 | ret | |
2055 | END(do_softirq_own_stack) | |
2056 | +#endif | |
2057 | ||
2058 | #ifdef CONFIG_XEN | |
2059 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 | |
2060 | diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h | |
2061 | index 17f218645701..11bd1b7ee6eb 100644 | |
2062 | --- a/arch/x86/include/asm/preempt.h | |
2063 | +++ b/arch/x86/include/asm/preempt.h | |
2064 | @@ -79,17 +79,46 @@ static __always_inline void __preempt_count_sub(int val) | |
2065 | * a decrement which hits zero means we have no preempt_count and should | |
2066 | * reschedule. | |
2067 | */ | |
2068 | -static __always_inline bool __preempt_count_dec_and_test(void) | |
2069 | +static __always_inline bool ____preempt_count_dec_and_test(void) | |
2070 | { | |
2071 | GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); | |
2072 | } | |
2073 | ||
2074 | +static __always_inline bool __preempt_count_dec_and_test(void) | |
2075 | +{ | |
2076 | + if (____preempt_count_dec_and_test()) | |
2077 | + return true; | |
2078 | +#ifdef CONFIG_PREEMPT_LAZY | |
2079 | + if (current_thread_info()->preempt_lazy_count) | |
2080 | + return false; | |
2081 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2082 | +#else | |
2083 | + return false; | |
2084 | +#endif | |
2085 | +} | |
2086 | + | |
2087 | /* | |
2088 | * Returns true when we need to resched and can (barring IRQ state). | |
2089 | */ | |
2090 | static __always_inline bool should_resched(int preempt_offset) | |
2091 | { | |
2092 | +#ifdef CONFIG_PREEMPT_LAZY | |
2093 | + u32 tmp; | |
2094 | + | |
2095 | + tmp = raw_cpu_read_4(__preempt_count); | |
2096 | + if (tmp == preempt_offset) | |
2097 | + return true; | |
2098 | + | |
2099 | + /* preempt count == 0 ? */ | |
2100 | + tmp &= ~PREEMPT_NEED_RESCHED; | |
2101 | + if (tmp) | |
2102 | + return false; | |
2103 | + if (current_thread_info()->preempt_lazy_count) | |
2104 | + return false; | |
2105 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2106 | +#else | |
2107 | return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); | |
2108 | +#endif | |
2109 | } | |
2110 | ||
2111 | #ifdef CONFIG_PREEMPT | |
2112 | diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h | |
c7c16703 | 2113 | index 8af22be0fe61..d1328789b759 100644 |
1a6e0f06 JK |
2114 | --- a/arch/x86/include/asm/signal.h |
2115 | +++ b/arch/x86/include/asm/signal.h | |
c7c16703 JK |
2116 | @@ -27,6 +27,19 @@ typedef struct { |
2117 | #define SA_IA32_ABI 0x02000000u | |
2118 | #define SA_X32_ABI 0x01000000u | |
1a6e0f06 JK |
2119 | |
2120 | +/* | |
2121 | + * Because some traps use the IST stack, we must keep preemption | |
2122 | + * disabled while calling do_trap(), but do_trap() may call | |
2123 | + * force_sig_info() which will grab the signal spin_locks for the | |
2124 | + * task, which in PREEMPT_RT_FULL are mutexes. By defining | |
2125 | + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set | |
2126 | + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the | |
2127 | + * trap. | |
2128 | + */ | |
2129 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
2130 | +#define ARCH_RT_DELAYS_SIGNAL_SEND | |
2131 | +#endif | |
2132 | + | |
2133 | #ifndef CONFIG_COMPAT | |
2134 | typedef sigset_t compat_sigset_t; | |
2135 | #endif | |
2136 | diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h | |
2137 | index 58505f01962f..02fa39652cd6 100644 | |
2138 | --- a/arch/x86/include/asm/stackprotector.h | |
2139 | +++ b/arch/x86/include/asm/stackprotector.h | |
2140 | @@ -59,7 +59,7 @@ | |
2141 | */ | |
2142 | static __always_inline void boot_init_stack_canary(void) | |
2143 | { | |
2144 | - u64 canary; | |
2145 | + u64 uninitialized_var(canary); | |
2146 | u64 tsc; | |
2147 | ||
2148 | #ifdef CONFIG_X86_64 | |
2149 | @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void) | |
2150 | * of randomness. The TSC only matters for very early init, | |
2151 | * there it already has some randomness on most systems. Later | |
2152 | * on during the bootup the random pool has true entropy too. | |
2153 | + * | |
2154 | + * For preempt-rt we need to weaken the randomness a bit, as | |
2155 | + * we can't call into the random generator from atomic context | |
2156 | + * due to locking constraints. We just leave canary | |
2157 | + * uninitialized and use the TSC based randomness on top of it. | |
2158 | */ | |
2159 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2160 | get_random_bytes(&canary, sizeof(canary)); | |
2161 | +#endif | |
2162 | tsc = rdtsc(); | |
2163 | canary += tsc + (tsc << 32UL); | |
2164 | ||
2165 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | |
c7c16703 | 2166 | index ad6f5eb07a95..5ceb3a1c2b1a 100644 |
1a6e0f06 JK |
2167 | --- a/arch/x86/include/asm/thread_info.h |
2168 | +++ b/arch/x86/include/asm/thread_info.h | |
c7c16703 JK |
2169 | @@ -54,11 +54,14 @@ struct task_struct; |
2170 | ||
2171 | struct thread_info { | |
2172 | unsigned long flags; /* low level flags */ | |
2173 | + int preempt_lazy_count; /* 0 => lazy preemptable | |
1a6e0f06 JK |
2174 | + <0 => BUG */ |
2175 | }; | |
2176 | ||
2177 | #define INIT_THREAD_INFO(tsk) \ | |
c7c16703 JK |
2178 | { \ |
2179 | .flags = 0, \ | |
2180 | + .preempt_lazy_count = 0, \ | |
2181 | } | |
2182 | ||
2183 | #define init_stack (init_thread_union.stack) | |
2184 | @@ -67,6 +70,10 @@ struct thread_info { | |
1a6e0f06 JK |
2185 | |
2186 | #include <asm/asm-offsets.h> | |
2187 | ||
2188 | +#define GET_THREAD_INFO(reg) \ | |
2189 | + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ | |
2190 | + _ASM_SUB $(THREAD_SIZE),reg ; | |
2191 | + | |
2192 | #endif | |
2193 | ||
2194 | /* | |
c7c16703 | 2195 | @@ -85,6 +92,7 @@ struct thread_info { |
1a6e0f06 JK |
2196 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ |
2197 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
2198 | #define TIF_SECCOMP 8 /* secure computing */ | |
2199 | +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ | |
2200 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | |
2201 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | |
2202 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | |
c7c16703 | 2203 | @@ -108,6 +116,7 @@ struct thread_info { |
1a6e0f06 JK |
2204 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) |
2205 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
2206 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | |
2207 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
2208 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | |
2209 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
2210 | #define _TIF_NOTSC (1 << TIF_NOTSC) | |
c7c16703 | 2211 | @@ -143,6 +152,8 @@ struct thread_info { |
1a6e0f06 JK |
2212 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
2213 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | |
2214 | ||
2215 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
2216 | + | |
2217 | #define STACK_WARN (THREAD_SIZE/8) | |
2218 | ||
2219 | /* | |
2220 | diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h | |
c7c16703 | 2221 | index 57ab86d94d64..35d25e27180f 100644 |
1a6e0f06 JK |
2222 | --- a/arch/x86/include/asm/uv/uv_bau.h |
2223 | +++ b/arch/x86/include/asm/uv/uv_bau.h | |
c7c16703 | 2224 | @@ -624,9 +624,9 @@ struct bau_control { |
1a6e0f06 JK |
2225 | cycles_t send_message; |
2226 | cycles_t period_end; | |
2227 | cycles_t period_time; | |
2228 | - spinlock_t uvhub_lock; | |
2229 | - spinlock_t queue_lock; | |
2230 | - spinlock_t disable_lock; | |
2231 | + raw_spinlock_t uvhub_lock; | |
2232 | + raw_spinlock_t queue_lock; | |
2233 | + raw_spinlock_t disable_lock; | |
2234 | /* tunables */ | |
2235 | int max_concurr; | |
2236 | int max_concurr_const; | |
c7c16703 | 2237 | @@ -815,15 +815,15 @@ static inline int atom_asr(short i, struct atomic_short *v) |
1a6e0f06 JK |
2238 | * to be lowered below the current 'v'. atomic_add_unless can only stop |
2239 | * on equal. | |
2240 | */ | |
2241 | -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |
2242 | +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) | |
2243 | { | |
2244 | - spin_lock(lock); | |
2245 | + raw_spin_lock(lock); | |
2246 | if (atomic_read(v) >= u) { | |
2247 | - spin_unlock(lock); | |
2248 | + raw_spin_unlock(lock); | |
2249 | return 0; | |
2250 | } | |
2251 | atomic_inc(v); | |
2252 | - spin_unlock(lock); | |
2253 | + raw_spin_unlock(lock); | |
2254 | return 1; | |
2255 | } | |
2256 | ||
2257 | diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c | |
c7c16703 | 2258 | index 931ced8ca345..167975ac8af7 100644 |
1a6e0f06 JK |
2259 | --- a/arch/x86/kernel/acpi/boot.c |
2260 | +++ b/arch/x86/kernel/acpi/boot.c | |
2261 | @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |
2262 | * ->ioapic_mutex | |
2263 | * ->ioapic_lock | |
2264 | */ | |
2265 | +#ifdef CONFIG_X86_IO_APIC | |
2266 | static DEFINE_MUTEX(acpi_ioapic_lock); | |
2267 | +#endif | |
2268 | ||
2269 | /* -------------------------------------------------------------------------- | |
2270 | Boot-time Configuration | |
2271 | diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c | |
2272 | index 48e6d84f173e..0b5a8b994f65 100644 | |
2273 | --- a/arch/x86/kernel/apic/io_apic.c | |
2274 | +++ b/arch/x86/kernel/apic/io_apic.c | |
2275 | @@ -1712,7 +1712,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) | |
2276 | static inline bool ioapic_irqd_mask(struct irq_data *data) | |
2277 | { | |
2278 | /* If we are moving the irq we need to mask it */ | |
2279 | - if (unlikely(irqd_is_setaffinity_pending(data))) { | |
2280 | + if (unlikely(irqd_is_setaffinity_pending(data) && | |
2281 | + !irqd_irq_inprogress(data))) { | |
2282 | mask_ioapic_irq(data); | |
2283 | return true; | |
2284 | } | |
2285 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c | |
c7c16703 | 2286 | index c62e015b126c..0cc71257fca6 100644 |
1a6e0f06 JK |
2287 | --- a/arch/x86/kernel/asm-offsets.c |
2288 | +++ b/arch/x86/kernel/asm-offsets.c | |
c7c16703 | 2289 | @@ -36,6 +36,7 @@ void common(void) { |
1a6e0f06 JK |
2290 | |
2291 | BLANK(); | |
c7c16703 JK |
2292 | OFFSET(TASK_TI_flags, task_struct, thread_info.flags); |
2293 | + OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count); | |
1a6e0f06 | 2294 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); |
c7c16703 JK |
2295 | |
2296 | BLANK(); | |
2297 | @@ -91,4 +92,5 @@ void common(void) { | |
1a6e0f06 JK |
2298 | |
2299 | BLANK(); | |
2300 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | |
2301 | + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); | |
2302 | } | |
2303 | diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c | |
c7c16703 | 2304 | index a7fdf453d895..e3a0e969a66e 100644 |
1a6e0f06 JK |
2305 | --- a/arch/x86/kernel/cpu/mcheck/mce.c |
2306 | +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |
2307 | @@ -41,6 +41,8 @@ | |
2308 | #include <linux/debugfs.h> | |
2309 | #include <linux/irq_work.h> | |
2310 | #include <linux/export.h> | |
2311 | +#include <linux/jiffies.h> | |
2312 | +#include <linux/swork.h> | |
c7c16703 | 2313 | #include <linux/jump_label.h> |
1a6e0f06 JK |
2314 | |
2315 | #include <asm/processor.h> | |
c7c16703 | 2316 | @@ -1317,7 +1319,7 @@ void mce_log_therm_throt_event(__u64 status) |
1a6e0f06 JK |
2317 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; |
2318 | ||
2319 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | |
2320 | -static DEFINE_PER_CPU(struct timer_list, mce_timer); | |
2321 | +static DEFINE_PER_CPU(struct hrtimer, mce_timer); | |
2322 | ||
2323 | static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2324 | { | |
c7c16703 | 2325 | @@ -1326,32 +1328,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) |
1a6e0f06 JK |
2326 | |
2327 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; | |
2328 | ||
2329 | -static void __restart_timer(struct timer_list *t, unsigned long interval) | |
2330 | +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval) | |
2331 | { | |
2332 | - unsigned long when = jiffies + interval; | |
2333 | - unsigned long flags; | |
2334 | - | |
2335 | - local_irq_save(flags); | |
2336 | - | |
2337 | - if (timer_pending(t)) { | |
2338 | - if (time_before(when, t->expires)) | |
2339 | - mod_timer(t, when); | |
2340 | - } else { | |
2341 | - t->expires = round_jiffies(when); | |
2342 | - add_timer_on(t, smp_processor_id()); | |
2343 | - } | |
2344 | - | |
2345 | - local_irq_restore(flags); | |
2346 | + if (!interval) | |
2347 | + return HRTIMER_NORESTART; | |
2348 | + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval))); | |
2349 | + return HRTIMER_RESTART; | |
2350 | } | |
2351 | ||
2352 | -static void mce_timer_fn(unsigned long data) | |
2353 | +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) | |
2354 | { | |
2355 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2356 | - int cpu = smp_processor_id(); | |
2357 | unsigned long iv; | |
2358 | ||
2359 | - WARN_ON(cpu != data); | |
2360 | - | |
2361 | iv = __this_cpu_read(mce_next_interval); | |
2362 | ||
2363 | if (mce_available(this_cpu_ptr(&cpu_info))) { | |
c7c16703 | 2364 | @@ -1374,7 +1362,7 @@ static void mce_timer_fn(unsigned long data) |
1a6e0f06 JK |
2365 | |
2366 | done: | |
2367 | __this_cpu_write(mce_next_interval, iv); | |
2368 | - __restart_timer(t, iv); | |
2369 | + return __restart_timer(timer, iv); | |
2370 | } | |
2371 | ||
2372 | /* | |
c7c16703 | 2373 | @@ -1382,7 +1370,7 @@ static void mce_timer_fn(unsigned long data) |
1a6e0f06 JK |
2374 | */ |
2375 | void mce_timer_kick(unsigned long interval) | |
2376 | { | |
2377 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2378 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2379 | unsigned long iv = __this_cpu_read(mce_next_interval); | |
2380 | ||
2381 | __restart_timer(t, interval); | |
c7c16703 | 2382 | @@ -1397,7 +1385,7 @@ static void mce_timer_delete_all(void) |
1a6e0f06 JK |
2383 | int cpu; |
2384 | ||
2385 | for_each_online_cpu(cpu) | |
2386 | - del_timer_sync(&per_cpu(mce_timer, cpu)); | |
2387 | + hrtimer_cancel(&per_cpu(mce_timer, cpu)); | |
2388 | } | |
2389 | ||
2390 | static void mce_do_trigger(struct work_struct *work) | |
c7c16703 | 2391 | @@ -1407,6 +1395,56 @@ static void mce_do_trigger(struct work_struct *work) |
1a6e0f06 JK |
2392 | |
2393 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2394 | ||
2395 | +static void __mce_notify_work(struct swork_event *event) | |
2396 | +{ | |
2397 | + /* Not more than two messages every minute */ | |
2398 | + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2399 | + | |
2400 | + /* wake processes polling /dev/mcelog */ | |
2401 | + wake_up_interruptible(&mce_chrdev_wait); | |
2402 | + | |
2403 | + /* | |
2404 | + * There is no risk of missing notifications because | |
2405 | + * work_pending is always cleared before the function is | |
2406 | + * executed. | |
2407 | + */ | |
2408 | + if (mce_helper[0] && !work_pending(&mce_trigger_work)) | |
2409 | + schedule_work(&mce_trigger_work); | |
2410 | + | |
2411 | + if (__ratelimit(&ratelimit)) | |
2412 | + pr_info(HW_ERR "Machine check events logged\n"); | |
2413 | +} | |
2414 | + | |
2415 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2416 | +static bool notify_work_ready __read_mostly; | |
2417 | +static struct swork_event notify_work; | |
2418 | + | |
2419 | +static int mce_notify_work_init(void) | |
2420 | +{ | |
2421 | + int err; | |
2422 | + | |
2423 | + err = swork_get(); | |
2424 | + if (err) | |
2425 | + return err; | |
2426 | + | |
2427 | + INIT_SWORK(¬ify_work, __mce_notify_work); | |
2428 | + notify_work_ready = true; | |
2429 | + return 0; | |
2430 | +} | |
2431 | + | |
2432 | +static void mce_notify_work(void) | |
2433 | +{ | |
2434 | + if (notify_work_ready) | |
2435 | + swork_queue(¬ify_work); | |
2436 | +} | |
2437 | +#else | |
2438 | +static void mce_notify_work(void) | |
2439 | +{ | |
2440 | + __mce_notify_work(NULL); | |
2441 | +} | |
2442 | +static inline int mce_notify_work_init(void) { return 0; } | |
2443 | +#endif | |
2444 | + | |
2445 | /* | |
2446 | * Notify the user(s) about new machine check events. | |
2447 | * Can be called from interrupt context, but not from machine check/NMI | |
c7c16703 | 2448 | @@ -1414,19 +1452,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
1a6e0f06 JK |
2449 | */ |
2450 | int mce_notify_irq(void) | |
2451 | { | |
2452 | - /* Not more than two messages every minute */ | |
2453 | - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2454 | - | |
2455 | if (test_and_clear_bit(0, &mce_need_notify)) { | |
2456 | - /* wake processes polling /dev/mcelog */ | |
2457 | - wake_up_interruptible(&mce_chrdev_wait); | |
2458 | - | |
2459 | - if (mce_helper[0]) | |
2460 | - schedule_work(&mce_trigger_work); | |
2461 | - | |
2462 | - if (__ratelimit(&ratelimit)) | |
2463 | - pr_info(HW_ERR "Machine check events logged\n"); | |
2464 | - | |
2465 | + mce_notify_work(); | |
2466 | return 1; | |
2467 | } | |
2468 | return 0; | |
c7c16703 | 2469 | @@ -1732,7 +1759,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) |
1a6e0f06 JK |
2470 | } |
2471 | } | |
2472 | ||
2473 | -static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2474 | +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) | |
2475 | { | |
2476 | unsigned long iv = check_interval * HZ; | |
2477 | ||
c7c16703 | 2478 | @@ -1741,16 +1768,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1a6e0f06 JK |
2479 | |
2480 | per_cpu(mce_next_interval, cpu) = iv; | |
2481 | ||
2482 | - t->expires = round_jiffies(jiffies + iv); | |
2483 | - add_timer_on(t, cpu); | |
2484 | + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), | |
2485 | + 0, HRTIMER_MODE_REL_PINNED); | |
2486 | } | |
2487 | ||
2488 | static void __mcheck_cpu_init_timer(void) | |
2489 | { | |
2490 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2491 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2492 | unsigned int cpu = smp_processor_id(); | |
2493 | ||
2494 | - setup_pinned_timer(t, mce_timer_fn, cpu); | |
2495 | + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
2496 | + t->function = mce_timer_fn; | |
2497 | mce_start_timer(cpu, t); | |
2498 | } | |
2499 | ||
c7c16703 | 2500 | @@ -2475,6 +2503,8 @@ static void mce_disable_cpu(void *h) |
1a6e0f06 JK |
2501 | if (!mce_available(raw_cpu_ptr(&cpu_info))) |
2502 | return; | |
2503 | ||
2504 | + hrtimer_cancel(this_cpu_ptr(&mce_timer)); | |
2505 | + | |
2506 | if (!(action & CPU_TASKS_FROZEN)) | |
2507 | cmci_clear(); | |
2508 | ||
c7c16703 | 2509 | @@ -2497,6 +2527,7 @@ static void mce_reenable_cpu(void *h) |
1a6e0f06 JK |
2510 | if (b->init) |
2511 | wrmsrl(msr_ops.ctl(i), b->ctl); | |
2512 | } | |
2513 | + __mcheck_cpu_init_timer(); | |
2514 | } | |
2515 | ||
2516 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | |
c7c16703 | 2517 | @@ -2504,7 +2535,6 @@ static int |
1a6e0f06 JK |
2518 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
2519 | { | |
2520 | unsigned int cpu = (unsigned long)hcpu; | |
2521 | - struct timer_list *t = &per_cpu(mce_timer, cpu); | |
2522 | ||
2523 | switch (action & ~CPU_TASKS_FROZEN) { | |
2524 | case CPU_ONLINE: | |
c7c16703 | 2525 | @@ -2524,11 +2554,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
1a6e0f06 JK |
2526 | break; |
2527 | case CPU_DOWN_PREPARE: | |
2528 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | |
2529 | - del_timer_sync(t); | |
2530 | break; | |
2531 | case CPU_DOWN_FAILED: | |
2532 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | |
2533 | - mce_start_timer(cpu, t); | |
2534 | break; | |
2535 | } | |
2536 | ||
c7c16703 | 2537 | @@ -2567,6 +2595,10 @@ static __init int mcheck_init_device(void) |
1a6e0f06 JK |
2538 | goto err_out; |
2539 | } | |
2540 | ||
2541 | + err = mce_notify_work_init(); | |
2542 | + if (err) | |
2543 | + goto err_out; | |
2544 | + | |
2545 | if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { | |
2546 | err = -ENOMEM; | |
2547 | goto err_out; | |
1a6e0f06 JK |
2548 | diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c |
2549 | index 1f38d9a4d9de..053bf3b2ef39 100644 | |
2550 | --- a/arch/x86/kernel/irq_32.c | |
2551 | +++ b/arch/x86/kernel/irq_32.c | |
2552 | @@ -127,6 +127,7 @@ void irq_ctx_init(int cpu) | |
2553 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); | |
2554 | } | |
2555 | ||
2556 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2557 | void do_softirq_own_stack(void) | |
2558 | { | |
2559 | struct irq_stack *irqstk; | |
2560 | @@ -143,6 +144,7 @@ void do_softirq_own_stack(void) | |
2561 | ||
2562 | call_on_stack(__do_softirq, isp); | |
2563 | } | |
2564 | +#endif | |
2565 | ||
2566 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) | |
2567 | { | |
2568 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | |
c7c16703 | 2569 | index bd7be8efdc4c..b3b0a7f7b1ca 100644 |
1a6e0f06 JK |
2570 | --- a/arch/x86/kernel/process_32.c |
2571 | +++ b/arch/x86/kernel/process_32.c | |
2572 | @@ -35,6 +35,7 @@ | |
2573 | #include <linux/uaccess.h> | |
2574 | #include <linux/io.h> | |
2575 | #include <linux/kdebug.h> | |
2576 | +#include <linux/highmem.h> | |
2577 | ||
2578 | #include <asm/pgtable.h> | |
2579 | #include <asm/ldt.h> | |
c7c16703 | 2580 | @@ -195,6 +196,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) |
1a6e0f06 JK |
2581 | } |
2582 | EXPORT_SYMBOL_GPL(start_thread); | |
2583 | ||
2584 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2585 | +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
2586 | +{ | |
2587 | + int i; | |
2588 | + | |
2589 | + /* | |
2590 | + * Clear @prev's kmap_atomic mappings | |
2591 | + */ | |
2592 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
2593 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2594 | + pte_t *ptep = kmap_pte - idx; | |
2595 | + | |
2596 | + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); | |
2597 | + } | |
2598 | + /* | |
2599 | + * Restore @next_p's kmap_atomic mappings | |
2600 | + */ | |
2601 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
2602 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2603 | + | |
2604 | + if (!pte_none(next_p->kmap_pte[i])) | |
2605 | + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); | |
2606 | + } | |
2607 | +} | |
2608 | +#else | |
2609 | +static inline void | |
2610 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
2611 | +#endif | |
2612 | + | |
2613 | ||
2614 | /* | |
2615 | * switch_to(x,y) should switch tasks from x to y. | |
c7c16703 | 2616 | @@ -271,6 +301,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
1a6e0f06 JK |
2617 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
2618 | __switch_to_xtra(prev_p, next_p, tss); | |
2619 | ||
2620 | + switch_kmaps(prev_p, next_p); | |
2621 | + | |
2622 | /* | |
2623 | * Leave lazy mode, flushing any hypercalls made here. | |
2624 | * This must be done before restoring TLS segments so | |
2625 | diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c | |
c7c16703 | 2626 | index 6f69340f9fa3..d47f204a0fbe 100644 |
1a6e0f06 JK |
2627 | --- a/arch/x86/kvm/lapic.c |
2628 | +++ b/arch/x86/kvm/lapic.c | |
c7c16703 | 2629 | @@ -1939,6 +1939,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) |
1a6e0f06 JK |
2630 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
2631 | HRTIMER_MODE_ABS_PINNED); | |
2632 | apic->lapic_timer.timer.function = apic_timer_fn; | |
2633 | + apic->lapic_timer.timer.irqsafe = 1; | |
2634 | ||
2635 | /* | |
2636 | * APIC is created enabled. This will prevent kvm_lapic_set_base from | |
2637 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | |
c7c16703 | 2638 | index f3648c978d2f..d0d0901d1c56 100644 |
1a6e0f06 JK |
2639 | --- a/arch/x86/kvm/x86.c |
2640 | +++ b/arch/x86/kvm/x86.c | |
c7c16703 | 2641 | @@ -5930,6 +5930,13 @@ int kvm_arch_init(void *opaque) |
1a6e0f06 JK |
2642 | goto out; |
2643 | } | |
2644 | ||
2645 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2646 | + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | |
2647 | + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); | |
2648 | + return -EOPNOTSUPP; | |
2649 | + } | |
2650 | +#endif | |
2651 | + | |
2652 | r = kvm_mmu_module_init(); | |
2653 | if (r) | |
2654 | goto out_free_percpu; | |
2655 | diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c | |
2656 | index 6d18b70ed5a9..f752724c22e8 100644 | |
2657 | --- a/arch/x86/mm/highmem_32.c | |
2658 | +++ b/arch/x86/mm/highmem_32.c | |
2659 | @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap); | |
2660 | */ | |
2661 | void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2662 | { | |
2663 | + pte_t pte = mk_pte(page, prot); | |
2664 | unsigned long vaddr; | |
2665 | int idx, type; | |
2666 | ||
2667 | - preempt_disable(); | |
2668 | + preempt_disable_nort(); | |
2669 | pagefault_disable(); | |
2670 | ||
2671 | if (!PageHighMem(page)) | |
2672 | @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2673 | idx = type + KM_TYPE_NR*smp_processor_id(); | |
2674 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2675 | BUG_ON(!pte_none(*(kmap_pte-idx))); | |
2676 | - set_pte(kmap_pte-idx, mk_pte(page, prot)); | |
2677 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2678 | + current->kmap_pte[type] = pte; | |
2679 | +#endif | |
2680 | + set_pte(kmap_pte-idx, pte); | |
2681 | arch_flush_lazy_mmu_mode(); | |
2682 | ||
2683 | return (void *)vaddr; | |
2684 | @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr) | |
2685 | * is a bad idea also, in case the page changes cacheability | |
2686 | * attributes or becomes a protected page in a hypervisor. | |
2687 | */ | |
2688 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2689 | + current->kmap_pte[type] = __pte(0); | |
2690 | +#endif | |
2691 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2692 | kmap_atomic_idx_pop(); | |
2693 | arch_flush_lazy_mmu_mode(); | |
2694 | @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr) | |
2695 | #endif | |
2696 | ||
2697 | pagefault_enable(); | |
2698 | - preempt_enable(); | |
2699 | + preempt_enable_nort(); | |
2700 | } | |
2701 | EXPORT_SYMBOL(__kunmap_atomic); | |
2702 | ||
2703 | diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c | |
2704 | index ada98b39b8ad..585f6829653b 100644 | |
2705 | --- a/arch/x86/mm/iomap_32.c | |
2706 | +++ b/arch/x86/mm/iomap_32.c | |
2707 | @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); | |
2708 | ||
2709 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2710 | { | |
2711 | + pte_t pte = pfn_pte(pfn, prot); | |
2712 | unsigned long vaddr; | |
2713 | int idx, type; | |
2714 | ||
2715 | @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2716 | type = kmap_atomic_idx_push(); | |
2717 | idx = type + KM_TYPE_NR * smp_processor_id(); | |
2718 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2719 | - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); | |
2720 | + WARN_ON(!pte_none(*(kmap_pte - idx))); | |
2721 | + | |
2722 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2723 | + current->kmap_pte[type] = pte; | |
2724 | +#endif | |
2725 | + set_pte(kmap_pte - idx, pte); | |
2726 | arch_flush_lazy_mmu_mode(); | |
2727 | ||
2728 | return (void *)vaddr; | |
2729 | @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr) | |
2730 | * is a bad idea also, in case the page changes cacheability | |
2731 | * attributes or becomes a protected page in a hypervisor. | |
2732 | */ | |
2733 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2734 | + current->kmap_pte[type] = __pte(0); | |
2735 | +#endif | |
2736 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2737 | kmap_atomic_idx_pop(); | |
2738 | } | |
2739 | diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c | |
c7c16703 | 2740 | index 9e42842e924a..5398f97172f9 100644 |
1a6e0f06 JK |
2741 | --- a/arch/x86/platform/uv/tlb_uv.c |
2742 | +++ b/arch/x86/platform/uv/tlb_uv.c | |
c7c16703 | 2743 | @@ -748,9 +748,9 @@ static void destination_plugged(struct bau_desc *bau_desc, |
1a6e0f06 JK |
2744 | |
2745 | quiesce_local_uvhub(hmaster); | |
2746 | ||
2747 | - spin_lock(&hmaster->queue_lock); | |
2748 | + raw_spin_lock(&hmaster->queue_lock); | |
2749 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2750 | - spin_unlock(&hmaster->queue_lock); | |
2751 | + raw_spin_unlock(&hmaster->queue_lock); | |
2752 | ||
2753 | end_uvhub_quiesce(hmaster); | |
2754 | ||
c7c16703 | 2755 | @@ -770,9 +770,9 @@ static void destination_timeout(struct bau_desc *bau_desc, |
1a6e0f06 JK |
2756 | |
2757 | quiesce_local_uvhub(hmaster); | |
2758 | ||
2759 | - spin_lock(&hmaster->queue_lock); | |
2760 | + raw_spin_lock(&hmaster->queue_lock); | |
2761 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2762 | - spin_unlock(&hmaster->queue_lock); | |
2763 | + raw_spin_unlock(&hmaster->queue_lock); | |
2764 | ||
2765 | end_uvhub_quiesce(hmaster); | |
2766 | ||
c7c16703 | 2767 | @@ -793,7 +793,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2768 | cycles_t tm1; |
2769 | ||
2770 | hmaster = bcp->uvhub_master; | |
2771 | - spin_lock(&hmaster->disable_lock); | |
2772 | + raw_spin_lock(&hmaster->disable_lock); | |
2773 | if (!bcp->baudisabled) { | |
2774 | stat->s_bau_disabled++; | |
2775 | tm1 = get_cycles(); | |
c7c16703 | 2776 | @@ -806,7 +806,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2777 | } |
2778 | } | |
2779 | } | |
2780 | - spin_unlock(&hmaster->disable_lock); | |
2781 | + raw_spin_unlock(&hmaster->disable_lock); | |
2782 | } | |
2783 | ||
2784 | static void count_max_concurr(int stat, struct bau_control *bcp, | |
c7c16703 | 2785 | @@ -869,7 +869,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, |
1a6e0f06 JK |
2786 | */ |
2787 | static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) | |
2788 | { | |
2789 | - spinlock_t *lock = &hmaster->uvhub_lock; | |
2790 | + raw_spinlock_t *lock = &hmaster->uvhub_lock; | |
2791 | atomic_t *v; | |
2792 | ||
2793 | v = &hmaster->active_descriptor_count; | |
c7c16703 | 2794 | @@ -1002,7 +1002,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2795 | struct bau_control *hmaster; |
2796 | ||
2797 | hmaster = bcp->uvhub_master; | |
2798 | - spin_lock(&hmaster->disable_lock); | |
2799 | + raw_spin_lock(&hmaster->disable_lock); | |
2800 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { | |
2801 | stat->s_bau_reenabled++; | |
2802 | for_each_present_cpu(tcpu) { | |
c7c16703 | 2803 | @@ -1014,10 +1014,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2804 | tbcp->period_giveups = 0; |
2805 | } | |
2806 | } | |
2807 | - spin_unlock(&hmaster->disable_lock); | |
2808 | + raw_spin_unlock(&hmaster->disable_lock); | |
2809 | return 0; | |
2810 | } | |
2811 | - spin_unlock(&hmaster->disable_lock); | |
2812 | + raw_spin_unlock(&hmaster->disable_lock); | |
2813 | return -1; | |
2814 | } | |
2815 | ||
c7c16703 | 2816 | @@ -1940,9 +1940,9 @@ static void __init init_per_cpu_tunables(void) |
1a6e0f06 | 2817 | bcp->cong_reps = congested_reps; |
c7c16703 JK |
2818 | bcp->disabled_period = sec_2_cycles(disabled_period); |
2819 | bcp->giveup_limit = giveup_limit; | |
1a6e0f06 JK |
2820 | - spin_lock_init(&bcp->queue_lock); |
2821 | - spin_lock_init(&bcp->uvhub_lock); | |
2822 | - spin_lock_init(&bcp->disable_lock); | |
2823 | + raw_spin_lock_init(&bcp->queue_lock); | |
2824 | + raw_spin_lock_init(&bcp->uvhub_lock); | |
2825 | + raw_spin_lock_init(&bcp->disable_lock); | |
2826 | } | |
2827 | } | |
2828 | ||
2829 | diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c | |
2830 | index b333fc45f9ec..8b85916e6986 100644 | |
2831 | --- a/arch/x86/platform/uv/uv_time.c | |
2832 | +++ b/arch/x86/platform/uv/uv_time.c | |
2833 | @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | |
2834 | ||
2835 | /* There is one of these allocated per node */ | |
2836 | struct uv_rtc_timer_head { | |
2837 | - spinlock_t lock; | |
2838 | + raw_spinlock_t lock; | |
2839 | /* next cpu waiting for timer, local node relative: */ | |
2840 | int next_cpu; | |
2841 | /* number of cpus on this node: */ | |
2842 | @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void) | |
2843 | uv_rtc_deallocate_timers(); | |
2844 | return -ENOMEM; | |
2845 | } | |
2846 | - spin_lock_init(&head->lock); | |
2847 | + raw_spin_lock_init(&head->lock); | |
2848 | head->ncpus = uv_blade_nr_possible_cpus(bid); | |
2849 | head->next_cpu = -1; | |
2850 | blade_info[bid] = head; | |
2851 | @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2852 | unsigned long flags; | |
2853 | int next_cpu; | |
2854 | ||
2855 | - spin_lock_irqsave(&head->lock, flags); | |
2856 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2857 | ||
2858 | next_cpu = head->next_cpu; | |
2859 | *t = expires; | |
2860 | @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2861 | if (uv_setup_intr(cpu, expires)) { | |
2862 | *t = ULLONG_MAX; | |
2863 | uv_rtc_find_next_timer(head, pnode); | |
2864 | - spin_unlock_irqrestore(&head->lock, flags); | |
2865 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2866 | return -ETIME; | |
2867 | } | |
2868 | } | |
2869 | ||
2870 | - spin_unlock_irqrestore(&head->lock, flags); | |
2871 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2872 | return 0; | |
2873 | } | |
2874 | ||
2875 | @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2876 | unsigned long flags; | |
2877 | int rc = 0; | |
2878 | ||
2879 | - spin_lock_irqsave(&head->lock, flags); | |
2880 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2881 | ||
2882 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | |
2883 | rc = 1; | |
2884 | @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2885 | uv_rtc_find_next_timer(head, pnode); | |
2886 | } | |
2887 | ||
2888 | - spin_unlock_irqrestore(&head->lock, flags); | |
2889 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2890 | ||
2891 | return rc; | |
2892 | } | |
2893 | @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2894 | static cycle_t uv_read_rtc(struct clocksource *cs) | |
2895 | { | |
2896 | unsigned long offset; | |
2897 | + cycle_t cycles; | |
2898 | ||
2899 | + preempt_disable(); | |
2900 | if (uv_get_min_hub_revision_id() == 1) | |
2901 | offset = 0; | |
2902 | else | |
2903 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | |
2904 | ||
2905 | - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
2906 | + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
2907 | + preempt_enable(); | |
2908 | + | |
2909 | + return cycles; | |
2910 | } | |
2911 | ||
2912 | /* | |
2913 | diff --git a/block/blk-core.c b/block/blk-core.c | |
c7c16703 | 2914 | index 14d7c0740dc0..dfd905bea77c 100644 |
1a6e0f06 JK |
2915 | --- a/block/blk-core.c |
2916 | +++ b/block/blk-core.c | |
2917 | @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |
2918 | ||
2919 | INIT_LIST_HEAD(&rq->queuelist); | |
2920 | INIT_LIST_HEAD(&rq->timeout_list); | |
2921 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2922 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
2923 | +#endif | |
2924 | rq->cpu = -1; | |
2925 | rq->q = q; | |
2926 | rq->__sector = (sector_t) -1; | |
2927 | @@ -233,7 +236,7 @@ EXPORT_SYMBOL(blk_start_queue_async); | |
2928 | **/ | |
2929 | void blk_start_queue(struct request_queue *q) | |
2930 | { | |
2931 | - WARN_ON(!irqs_disabled()); | |
2932 | + WARN_ON_NONRT(!irqs_disabled()); | |
2933 | ||
2934 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | |
2935 | __blk_run_queue(q); | |
2936 | @@ -659,7 +662,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait) | |
2937 | if (nowait) | |
2938 | return -EBUSY; | |
2939 | ||
2940 | - ret = wait_event_interruptible(q->mq_freeze_wq, | |
2941 | + ret = swait_event_interruptible(q->mq_freeze_wq, | |
2942 | !atomic_read(&q->mq_freeze_depth) || | |
2943 | blk_queue_dying(q)); | |
2944 | if (blk_queue_dying(q)) | |
2945 | @@ -679,7 +682,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) | |
2946 | struct request_queue *q = | |
2947 | container_of(ref, struct request_queue, q_usage_counter); | |
2948 | ||
2949 | - wake_up_all(&q->mq_freeze_wq); | |
2950 | + swake_up_all(&q->mq_freeze_wq); | |
2951 | } | |
2952 | ||
2953 | static void blk_rq_timed_out_timer(unsigned long data) | |
2954 | @@ -748,7 +751,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |
2955 | q->bypass_depth = 1; | |
2956 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | |
2957 | ||
2958 | - init_waitqueue_head(&q->mq_freeze_wq); | |
2959 | + init_swait_queue_head(&q->mq_freeze_wq); | |
2960 | ||
2961 | /* | |
2962 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | |
c7c16703 | 2963 | @@ -3177,7 +3180,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, |
1a6e0f06 JK |
2964 | blk_run_queue_async(q); |
2965 | else | |
2966 | __blk_run_queue(q); | |
2967 | - spin_unlock(q->queue_lock); | |
2968 | + spin_unlock_irq(q->queue_lock); | |
2969 | } | |
2970 | ||
2971 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) | |
c7c16703 | 2972 | @@ -3225,7 +3228,6 @@ EXPORT_SYMBOL(blk_check_plugged); |
1a6e0f06 JK |
2973 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
2974 | { | |
2975 | struct request_queue *q; | |
2976 | - unsigned long flags; | |
2977 | struct request *rq; | |
2978 | LIST_HEAD(list); | |
2979 | unsigned int depth; | |
c7c16703 | 2980 | @@ -3245,11 +3247,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
1a6e0f06 JK |
2981 | q = NULL; |
2982 | depth = 0; | |
2983 | ||
2984 | - /* | |
2985 | - * Save and disable interrupts here, to avoid doing it for every | |
2986 | - * queue lock we have to take. | |
2987 | - */ | |
2988 | - local_irq_save(flags); | |
2989 | while (!list_empty(&list)) { | |
2990 | rq = list_entry_rq(list.next); | |
2991 | list_del_init(&rq->queuelist); | |
c7c16703 | 2992 | @@ -3262,7 +3259,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
1a6e0f06 JK |
2993 | queue_unplugged(q, depth, from_schedule); |
2994 | q = rq->q; | |
2995 | depth = 0; | |
2996 | - spin_lock(q->queue_lock); | |
2997 | + spin_lock_irq(q->queue_lock); | |
2998 | } | |
2999 | ||
3000 | /* | |
c7c16703 | 3001 | @@ -3289,8 +3286,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
1a6e0f06 JK |
3002 | */ |
3003 | if (q) | |
3004 | queue_unplugged(q, depth, from_schedule); | |
3005 | - | |
3006 | - local_irq_restore(flags); | |
3007 | } | |
3008 | ||
3009 | void blk_finish_plug(struct blk_plug *plug) | |
3010 | diff --git a/block/blk-ioc.c b/block/blk-ioc.c | |
3011 | index 381cb50a673c..dc8785233d94 100644 | |
3012 | --- a/block/blk-ioc.c | |
3013 | +++ b/block/blk-ioc.c | |
3014 | @@ -7,6 +7,7 @@ | |
3015 | #include <linux/bio.h> | |
3016 | #include <linux/blkdev.h> | |
3017 | #include <linux/slab.h> | |
3018 | +#include <linux/delay.h> | |
3019 | ||
3020 | #include "blk.h" | |
3021 | ||
3022 | @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work) | |
3023 | spin_unlock(q->queue_lock); | |
3024 | } else { | |
3025 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3026 | - cpu_relax(); | |
3027 | + cpu_chill(); | |
3028 | spin_lock_irqsave_nested(&ioc->lock, flags, 1); | |
3029 | } | |
3030 | } | |
3031 | @@ -187,7 +188,7 @@ void put_io_context_active(struct io_context *ioc) | |
3032 | spin_unlock(icq->q->queue_lock); | |
3033 | } else { | |
3034 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3035 | - cpu_relax(); | |
3036 | + cpu_chill(); | |
3037 | goto retry; | |
3038 | } | |
3039 | } | |
1a6e0f06 | 3040 | diff --git a/block/blk-mq.c b/block/blk-mq.c |
c7c16703 | 3041 | index ad459e4e8071..1bfacb205bfa 100644 |
1a6e0f06 JK |
3042 | --- a/block/blk-mq.c |
3043 | +++ b/block/blk-mq.c | |
c7c16703 | 3044 | @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); |
1a6e0f06 JK |
3045 | |
3046 | static void blk_mq_freeze_queue_wait(struct request_queue *q) | |
3047 | { | |
3048 | - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3049 | + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3050 | } | |
3051 | ||
3052 | /* | |
c7c16703 | 3053 | @@ -110,7 +110,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) |
1a6e0f06 JK |
3054 | WARN_ON_ONCE(freeze_depth < 0); |
3055 | if (!freeze_depth) { | |
3056 | percpu_ref_reinit(&q->q_usage_counter); | |
3057 | - wake_up_all(&q->mq_freeze_wq); | |
3058 | + swake_up_all(&q->mq_freeze_wq); | |
3059 | } | |
3060 | } | |
3061 | EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); | |
c7c16703 | 3062 | @@ -129,7 +129,7 @@ void blk_mq_wake_waiters(struct request_queue *q) |
1a6e0f06 JK |
3063 | * dying, we need to ensure that processes currently waiting on |
3064 | * the queue are notified as well. | |
3065 | */ | |
3066 | - wake_up_all(&q->mq_freeze_wq); | |
3067 | + swake_up_all(&q->mq_freeze_wq); | |
3068 | } | |
3069 | ||
3070 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | |
c7c16703 | 3071 | @@ -177,6 +177,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, |
1a6e0f06 JK |
3072 | rq->resid_len = 0; |
3073 | rq->sense = NULL; | |
3074 | ||
3075 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3076 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3077 | +#endif | |
3078 | INIT_LIST_HEAD(&rq->timeout_list); | |
3079 | rq->timeout = 0; | |
3080 | ||
c7c16703 | 3081 | @@ -345,6 +348,17 @@ void blk_mq_end_request(struct request *rq, int error) |
1a6e0f06 JK |
3082 | } |
3083 | EXPORT_SYMBOL(blk_mq_end_request); | |
3084 | ||
3085 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3086 | + | |
3087 | +void __blk_mq_complete_request_remote_work(struct work_struct *work) | |
3088 | +{ | |
3089 | + struct request *rq = container_of(work, struct request, work); | |
3090 | + | |
3091 | + rq->q->softirq_done_fn(rq); | |
3092 | +} | |
3093 | + | |
3094 | +#else | |
3095 | + | |
3096 | static void __blk_mq_complete_request_remote(void *data) | |
3097 | { | |
3098 | struct request *rq = data; | |
c7c16703 | 3099 | @@ -352,6 +366,8 @@ static void __blk_mq_complete_request_remote(void *data) |
1a6e0f06 JK |
3100 | rq->q->softirq_done_fn(rq); |
3101 | } | |
3102 | ||
3103 | +#endif | |
3104 | + | |
3105 | static void blk_mq_ipi_complete_request(struct request *rq) | |
3106 | { | |
3107 | struct blk_mq_ctx *ctx = rq->mq_ctx; | |
c7c16703 | 3108 | @@ -363,19 +379,23 @@ static void blk_mq_ipi_complete_request(struct request *rq) |
1a6e0f06 JK |
3109 | return; |
3110 | } | |
3111 | ||
3112 | - cpu = get_cpu(); | |
3113 | + cpu = get_cpu_light(); | |
3114 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) | |
3115 | shared = cpus_share_cache(cpu, ctx->cpu); | |
3116 | ||
3117 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | |
3118 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3119 | + schedule_work_on(ctx->cpu, &rq->work); | |
3120 | +#else | |
3121 | rq->csd.func = __blk_mq_complete_request_remote; | |
3122 | rq->csd.info = rq; | |
3123 | rq->csd.flags = 0; | |
3124 | smp_call_function_single_async(ctx->cpu, &rq->csd); | |
3125 | +#endif | |
3126 | } else { | |
3127 | rq->q->softirq_done_fn(rq); | |
3128 | } | |
3129 | - put_cpu(); | |
3130 | + put_cpu_light(); | |
3131 | } | |
3132 | ||
3133 | static void __blk_mq_complete_request(struct request *rq) | |
c7c16703 | 3134 | @@ -917,14 +937,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) |
1a6e0f06 JK |
3135 | return; |
3136 | ||
c7c16703 | 3137 | if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { |
1a6e0f06 JK |
3138 | - int cpu = get_cpu(); |
3139 | + int cpu = get_cpu_light(); | |
3140 | if (cpumask_test_cpu(cpu, hctx->cpumask)) { | |
3141 | __blk_mq_run_hw_queue(hctx); | |
3142 | - put_cpu(); | |
3143 | + put_cpu_light(); | |
3144 | return; | |
3145 | } | |
3146 | ||
3147 | - put_cpu(); | |
3148 | + put_cpu_light(); | |
3149 | } | |
3150 | ||
c7c16703 | 3151 | kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); |
1a6e0f06 | 3152 | diff --git a/block/blk-mq.h b/block/blk-mq.h |
c7c16703 | 3153 | index e5d25249028c..1e846b842eab 100644 |
1a6e0f06 JK |
3154 | --- a/block/blk-mq.h |
3155 | +++ b/block/blk-mq.h | |
c7c16703 | 3156 | @@ -72,12 +72,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, |
1a6e0f06 JK |
3157 | */ |
3158 | static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) | |
3159 | { | |
3160 | - return __blk_mq_get_ctx(q, get_cpu()); | |
3161 | + return __blk_mq_get_ctx(q, get_cpu_light()); | |
3162 | } | |
3163 | ||
3164 | static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) | |
3165 | { | |
3166 | - put_cpu(); | |
3167 | + put_cpu_light(); | |
3168 | } | |
3169 | ||
3170 | struct blk_mq_alloc_data { | |
3171 | diff --git a/block/blk-softirq.c b/block/blk-softirq.c | |
c7c16703 | 3172 | index 06cf9807f49a..c40342643ca0 100644 |
1a6e0f06 JK |
3173 | --- a/block/blk-softirq.c |
3174 | +++ b/block/blk-softirq.c | |
3175 | @@ -51,6 +51,7 @@ static void trigger_softirq(void *data) | |
3176 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3177 | ||
3178 | local_irq_restore(flags); | |
3179 | + preempt_check_resched_rt(); | |
3180 | } | |
3181 | ||
3182 | /* | |
c7c16703 JK |
3183 | @@ -89,6 +90,7 @@ static int blk_softirq_cpu_dead(unsigned int cpu) |
3184 | this_cpu_ptr(&blk_cpu_done)); | |
3185 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3186 | local_irq_enable(); | |
3187 | + preempt_check_resched_rt(); | |
1a6e0f06 | 3188 | |
c7c16703 JK |
3189 | return 0; |
3190 | } | |
3191 | @@ -141,6 +143,7 @@ void __blk_complete_request(struct request *req) | |
1a6e0f06 JK |
3192 | goto do_local; |
3193 | ||
3194 | local_irq_restore(flags); | |
3195 | + preempt_check_resched_rt(); | |
3196 | } | |
3197 | ||
3198 | /** | |
3199 | diff --git a/block/bounce.c b/block/bounce.c | |
3200 | index 1cb5dd3a5da1..2f1ec8a67cbe 100644 | |
3201 | --- a/block/bounce.c | |
3202 | +++ b/block/bounce.c | |
3203 | @@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) | |
3204 | unsigned long flags; | |
3205 | unsigned char *vto; | |
3206 | ||
3207 | - local_irq_save(flags); | |
3208 | + local_irq_save_nort(flags); | |
3209 | vto = kmap_atomic(to->bv_page); | |
3210 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | |
3211 | kunmap_atomic(vto); | |
3212 | - local_irq_restore(flags); | |
3213 | + local_irq_restore_nort(flags); | |
3214 | } | |
3215 | ||
3216 | #else /* CONFIG_HIGHMEM */ | |
3217 | diff --git a/crypto/algapi.c b/crypto/algapi.c | |
3218 | index df939b54b09f..efe5e06adcf7 100644 | |
3219 | --- a/crypto/algapi.c | |
3220 | +++ b/crypto/algapi.c | |
3221 | @@ -718,13 +718,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); | |
3222 | ||
3223 | int crypto_register_notifier(struct notifier_block *nb) | |
3224 | { | |
3225 | - return blocking_notifier_chain_register(&crypto_chain, nb); | |
3226 | + return srcu_notifier_chain_register(&crypto_chain, nb); | |
3227 | } | |
3228 | EXPORT_SYMBOL_GPL(crypto_register_notifier); | |
3229 | ||
3230 | int crypto_unregister_notifier(struct notifier_block *nb) | |
3231 | { | |
3232 | - return blocking_notifier_chain_unregister(&crypto_chain, nb); | |
3233 | + return srcu_notifier_chain_unregister(&crypto_chain, nb); | |
3234 | } | |
3235 | EXPORT_SYMBOL_GPL(crypto_unregister_notifier); | |
3236 | ||
3237 | diff --git a/crypto/api.c b/crypto/api.c | |
3238 | index bbc147cb5dec..bc1a848f02ec 100644 | |
3239 | --- a/crypto/api.c | |
3240 | +++ b/crypto/api.c | |
3241 | @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); | |
3242 | DECLARE_RWSEM(crypto_alg_sem); | |
3243 | EXPORT_SYMBOL_GPL(crypto_alg_sem); | |
3244 | ||
3245 | -BLOCKING_NOTIFIER_HEAD(crypto_chain); | |
3246 | +SRCU_NOTIFIER_HEAD(crypto_chain); | |
3247 | EXPORT_SYMBOL_GPL(crypto_chain); | |
3248 | ||
3249 | static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); | |
3250 | @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v) | |
3251 | { | |
3252 | int ok; | |
3253 | ||
3254 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3255 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3256 | if (ok == NOTIFY_DONE) { | |
3257 | request_module("cryptomgr"); | |
3258 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3259 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3260 | } | |
3261 | ||
3262 | return ok; | |
3263 | diff --git a/crypto/internal.h b/crypto/internal.h | |
3264 | index 7eefcdb00227..0ecc7f5a2f40 100644 | |
3265 | --- a/crypto/internal.h | |
3266 | +++ b/crypto/internal.h | |
3267 | @@ -47,7 +47,7 @@ struct crypto_larval { | |
3268 | ||
3269 | extern struct list_head crypto_alg_list; | |
3270 | extern struct rw_semaphore crypto_alg_sem; | |
3271 | -extern struct blocking_notifier_head crypto_chain; | |
3272 | +extern struct srcu_notifier_head crypto_chain; | |
3273 | ||
3274 | #ifdef CONFIG_PROC_FS | |
3275 | void __init crypto_init_proc(void); | |
3276 | @@ -146,7 +146,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg) | |
3277 | ||
3278 | static inline void crypto_notify(unsigned long val, void *v) | |
3279 | { | |
3280 | - blocking_notifier_call_chain(&crypto_chain, val, v); | |
3281 | + srcu_notifier_call_chain(&crypto_chain, val, v); | |
3282 | } | |
3283 | ||
3284 | #endif /* _CRYPTO_INTERNAL_H */ | |
3285 | diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h | |
c7c16703 | 3286 | index 750fa824d42c..441edf51484a 100644 |
1a6e0f06 JK |
3287 | --- a/drivers/acpi/acpica/acglobal.h |
3288 | +++ b/drivers/acpi/acpica/acglobal.h | |
3289 | @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); | |
3290 | * interrupt level | |
3291 | */ | |
3292 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ | |
3293 | -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3294 | +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3295 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); | |
3296 | ||
3297 | /* Mutex for _OSI support */ | |
3298 | diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c | |
3299 | index 3b7fb99362b6..696bf8e62afb 100644 | |
3300 | --- a/drivers/acpi/acpica/hwregs.c | |
3301 | +++ b/drivers/acpi/acpica/hwregs.c | |
3302 | @@ -363,14 +363,14 @@ acpi_status acpi_hw_clear_acpi_status(void) | |
3303 | ACPI_BITMASK_ALL_FIXED_STATUS, | |
3304 | ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); | |
3305 | ||
3306 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3307 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3308 | ||
3309 | /* Clear the fixed events in PM1 A/B */ | |
3310 | ||
3311 | status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, | |
3312 | ACPI_BITMASK_ALL_FIXED_STATUS); | |
3313 | ||
3314 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3315 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3316 | ||
3317 | if (ACPI_FAILURE(status)) { | |
3318 | goto exit; | |
3319 | diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c | |
3320 | index 98c26ff39409..6e236f2ea791 100644 | |
3321 | --- a/drivers/acpi/acpica/hwxface.c | |
3322 | +++ b/drivers/acpi/acpica/hwxface.c | |
3323 | @@ -373,7 +373,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3324 | return_ACPI_STATUS(AE_BAD_PARAMETER); | |
3325 | } | |
3326 | ||
3327 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3328 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3329 | ||
3330 | /* | |
3331 | * At this point, we know that the parent register is one of the | |
3332 | @@ -434,7 +434,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3333 | ||
3334 | unlock_and_exit: | |
3335 | ||
3336 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3337 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3338 | return_ACPI_STATUS(status); | |
3339 | } | |
3340 | ||
3341 | diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c | |
3342 | index 15073375bd00..357e7ca5a587 100644 | |
3343 | --- a/drivers/acpi/acpica/utmutex.c | |
3344 | +++ b/drivers/acpi/acpica/utmutex.c | |
3345 | @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void) | |
3346 | return_ACPI_STATUS (status); | |
3347 | } | |
3348 | ||
3349 | - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); | |
3350 | + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); | |
3351 | if (ACPI_FAILURE (status)) { | |
3352 | return_ACPI_STATUS (status); | |
3353 | } | |
3354 | @@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void) | |
3355 | /* Delete the spinlocks */ | |
3356 | ||
3357 | acpi_os_delete_lock(acpi_gbl_gpe_lock); | |
3358 | - acpi_os_delete_lock(acpi_gbl_hardware_lock); | |
3359 | + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); | |
3360 | acpi_os_delete_lock(acpi_gbl_reference_count_lock); | |
3361 | ||
3362 | /* Delete the reader/writer lock */ | |
3363 | diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c | |
3364 | index 051b6158d1b7..7ad293bef6ed 100644 | |
3365 | --- a/drivers/ata/libata-sff.c | |
3366 | +++ b/drivers/ata/libata-sff.c | |
3367 | @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, | |
3368 | unsigned long flags; | |
3369 | unsigned int consumed; | |
3370 | ||
3371 | - local_irq_save(flags); | |
3372 | + local_irq_save_nort(flags); | |
3373 | consumed = ata_sff_data_xfer32(dev, buf, buflen, rw); | |
3374 | - local_irq_restore(flags); | |
3375 | + local_irq_restore_nort(flags); | |
3376 | ||
3377 | return consumed; | |
3378 | } | |
3379 | @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3380 | unsigned long flags; | |
3381 | ||
3382 | /* FIXME: use a bounce buffer */ | |
3383 | - local_irq_save(flags); | |
3384 | + local_irq_save_nort(flags); | |
3385 | buf = kmap_atomic(page); | |
3386 | ||
3387 | /* do the actual data transfer */ | |
3388 | @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3389 | do_write); | |
3390 | ||
3391 | kunmap_atomic(buf); | |
3392 | - local_irq_restore(flags); | |
3393 | + local_irq_restore_nort(flags); | |
3394 | } else { | |
3395 | buf = page_address(page); | |
3396 | ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, | |
3397 | @@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3398 | unsigned long flags; | |
3399 | ||
3400 | /* FIXME: use bounce buffer */ | |
3401 | - local_irq_save(flags); | |
3402 | + local_irq_save_nort(flags); | |
3403 | buf = kmap_atomic(page); | |
3404 | ||
3405 | /* do the actual data transfer */ | |
3406 | @@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3407 | count, rw); | |
3408 | ||
3409 | kunmap_atomic(buf); | |
3410 | - local_irq_restore(flags); | |
3411 | + local_irq_restore_nort(flags); | |
3412 | } else { | |
3413 | buf = page_address(page); | |
3414 | consumed = ap->ops->sff_data_xfer(dev, buf + offset, | |
3415 | diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c | |
3416 | index 4b5cd3a7b2b6..fa8329ad79fd 100644 | |
3417 | --- a/drivers/block/zram/zcomp.c | |
3418 | +++ b/drivers/block/zram/zcomp.c | |
3419 | @@ -118,12 +118,19 @@ ssize_t zcomp_available_show(const char *comp, char *buf) | |
3420 | ||
3421 | struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) | |
3422 | { | |
3423 | - return *get_cpu_ptr(comp->stream); | |
3424 | + struct zcomp_strm *zstrm; | |
3425 | + | |
3426 | + zstrm = *this_cpu_ptr(comp->stream); | |
3427 | + spin_lock(&zstrm->zcomp_lock); | |
3428 | + return zstrm; | |
3429 | } | |
3430 | ||
3431 | void zcomp_stream_put(struct zcomp *comp) | |
3432 | { | |
3433 | - put_cpu_ptr(comp->stream); | |
3434 | + struct zcomp_strm *zstrm; | |
3435 | + | |
3436 | + zstrm = *this_cpu_ptr(comp->stream); | |
3437 | + spin_unlock(&zstrm->zcomp_lock); | |
3438 | } | |
3439 | ||
3440 | int zcomp_compress(struct zcomp_strm *zstrm, | |
3441 | @@ -174,6 +181,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp, | |
3442 | pr_err("Can't allocate a compression stream\n"); | |
3443 | return NOTIFY_BAD; | |
3444 | } | |
3445 | + spin_lock_init(&zstrm->zcomp_lock); | |
3446 | *per_cpu_ptr(comp->stream, cpu) = zstrm; | |
3447 | break; | |
3448 | case CPU_DEAD: | |
3449 | diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h | |
3450 | index 478cac2ed465..f7a6efdc3285 100644 | |
3451 | --- a/drivers/block/zram/zcomp.h | |
3452 | +++ b/drivers/block/zram/zcomp.h | |
3453 | @@ -14,6 +14,7 @@ struct zcomp_strm { | |
3454 | /* compression/decompression buffer */ | |
3455 | void *buffer; | |
3456 | struct crypto_comp *tfm; | |
3457 | + spinlock_t zcomp_lock; | |
3458 | }; | |
3459 | ||
3460 | /* dynamic per-device compression frontend */ | |
3461 | diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c | |
c7c16703 | 3462 | index 5497f7fc44d0..3826072a23c5 100644 |
1a6e0f06 JK |
3463 | --- a/drivers/block/zram/zram_drv.c |
3464 | +++ b/drivers/block/zram/zram_drv.c | |
3465 | @@ -519,6 +519,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) | |
3466 | goto out_error; | |
3467 | } | |
3468 | ||
3469 | + zram_meta_init_table_locks(meta, disksize); | |
3470 | + | |
3471 | return meta; | |
3472 | ||
3473 | out_error: | |
3474 | @@ -566,28 +568,28 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) | |
3475 | struct zram_meta *meta = zram->meta; | |
3476 | unsigned long handle; | |
3477 | unsigned int size; | |
3478 | + struct zcomp_strm *zstrm; | |
3479 | ||
3480 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3481 | + zram_lock_table(&meta->table[index]); | |
3482 | handle = meta->table[index].handle; | |
3483 | size = zram_get_obj_size(meta, index); | |
3484 | ||
3485 | if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3486 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3487 | + zram_unlock_table(&meta->table[index]); | |
3488 | clear_page(mem); | |
3489 | return 0; | |
3490 | } | |
3491 | ||
3492 | + zstrm = zcomp_stream_get(zram->comp); | |
3493 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); | |
3494 | if (size == PAGE_SIZE) { | |
3495 | copy_page(mem, cmem); | |
3496 | } else { | |
3497 | - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
3498 | - | |
3499 | ret = zcomp_decompress(zstrm, cmem, size, mem); | |
3500 | - zcomp_stream_put(zram->comp); | |
3501 | } | |
3502 | zs_unmap_object(meta->mem_pool, handle); | |
3503 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3504 | + zcomp_stream_put(zram->comp); | |
3505 | + zram_unlock_table(&meta->table[index]); | |
3506 | ||
3507 | /* Should NEVER happen. Return bio error if it does. */ | |
3508 | if (unlikely(ret)) { | |
3509 | @@ -607,14 +609,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, | |
3510 | struct zram_meta *meta = zram->meta; | |
3511 | page = bvec->bv_page; | |
3512 | ||
3513 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3514 | + zram_lock_table(&meta->table[index]); | |
3515 | if (unlikely(!meta->table[index].handle) || | |
3516 | zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3517 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3518 | + zram_unlock_table(&meta->table[index]); | |
3519 | handle_zero_page(bvec); | |
3520 | return 0; | |
3521 | } | |
3522 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3523 | + zram_unlock_table(&meta->table[index]); | |
3524 | ||
3525 | if (is_partial_io(bvec)) | |
3526 | /* Use a temporary buffer to decompress the page */ | |
3527 | @@ -691,10 +693,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
3528 | if (user_mem) | |
3529 | kunmap_atomic(user_mem); | |
3530 | /* Free memory associated with this sector now. */ | |
3531 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3532 | + zram_lock_table(&meta->table[index]); | |
3533 | zram_free_page(zram, index); | |
3534 | zram_set_flag(meta, index, ZRAM_ZERO); | |
3535 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3536 | + zram_unlock_table(&meta->table[index]); | |
3537 | ||
3538 | atomic64_inc(&zram->stats.zero_pages); | |
3539 | ret = 0; | |
3540 | @@ -785,12 +787,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
3541 | * Free memory associated with this sector | |
3542 | * before overwriting unused sectors. | |
3543 | */ | |
3544 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3545 | + zram_lock_table(&meta->table[index]); | |
3546 | zram_free_page(zram, index); | |
3547 | ||
3548 | meta->table[index].handle = handle; | |
3549 | zram_set_obj_size(meta, index, clen); | |
3550 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3551 | + zram_unlock_table(&meta->table[index]); | |
3552 | ||
3553 | /* Update stats */ | |
3554 | atomic64_add(clen, &zram->stats.compr_data_size); | |
3555 | @@ -833,9 +835,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, | |
3556 | } | |
3557 | ||
3558 | while (n >= PAGE_SIZE) { | |
3559 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3560 | + zram_lock_table(&meta->table[index]); | |
3561 | zram_free_page(zram, index); | |
3562 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3563 | + zram_unlock_table(&meta->table[index]); | |
3564 | atomic64_inc(&zram->stats.notify_free); | |
3565 | index++; | |
3566 | n -= PAGE_SIZE; | |
3567 | @@ -964,9 +966,9 @@ static void zram_slot_free_notify(struct block_device *bdev, | |
3568 | zram = bdev->bd_disk->private_data; | |
3569 | meta = zram->meta; | |
3570 | ||
3571 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3572 | + zram_lock_table(&meta->table[index]); | |
3573 | zram_free_page(zram, index); | |
3574 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3575 | + zram_unlock_table(&meta->table[index]); | |
3576 | atomic64_inc(&zram->stats.notify_free); | |
3577 | } | |
3578 | ||
3579 | diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h | |
3580 | index 74fcf10da374..fd4020c99b9e 100644 | |
3581 | --- a/drivers/block/zram/zram_drv.h | |
3582 | +++ b/drivers/block/zram/zram_drv.h | |
3583 | @@ -73,6 +73,9 @@ enum zram_pageflags { | |
3584 | struct zram_table_entry { | |
3585 | unsigned long handle; | |
3586 | unsigned long value; | |
3587 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
3588 | + spinlock_t lock; | |
3589 | +#endif | |
3590 | }; | |
3591 | ||
3592 | struct zram_stats { | |
3593 | @@ -120,4 +123,42 @@ struct zram { | |
3594 | */ | |
3595 | bool claim; /* Protected by bdev->bd_mutex */ | |
3596 | }; | |
3597 | + | |
3598 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
3599 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3600 | +{ | |
3601 | + bit_spin_lock(ZRAM_ACCESS, &table->value); | |
3602 | +} | |
3603 | + | |
3604 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3605 | +{ | |
3606 | + bit_spin_unlock(ZRAM_ACCESS, &table->value); | |
3607 | +} | |
3608 | + | |
3609 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { } | |
3610 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
3611 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3612 | +{ | |
3613 | + spin_lock(&table->lock); | |
3614 | + __set_bit(ZRAM_ACCESS, &table->value); | |
3615 | +} | |
3616 | + | |
3617 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3618 | +{ | |
3619 | + __clear_bit(ZRAM_ACCESS, &table->value); | |
3620 | + spin_unlock(&table->lock); | |
3621 | +} | |
3622 | + | |
3623 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) | |
3624 | +{ | |
3625 | + size_t num_pages = disksize >> PAGE_SHIFT; | |
3626 | + size_t index; | |
3627 | + | |
3628 | + for (index = 0; index < num_pages; index++) { | |
3629 | + spinlock_t *lock = &meta->table[index].lock; | |
3630 | + spin_lock_init(lock); | |
3631 | + } | |
3632 | +} | |
3633 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
3634 | + | |
3635 | #endif | |
3636 | diff --git a/drivers/char/random.c b/drivers/char/random.c | |
c7c16703 | 3637 | index d6876d506220..0c60b1e54579 100644 |
1a6e0f06 JK |
3638 | --- a/drivers/char/random.c |
3639 | +++ b/drivers/char/random.c | |
3640 | @@ -1028,8 +1028,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
3641 | } sample; | |
3642 | long delta, delta2, delta3; | |
3643 | ||
3644 | - preempt_disable(); | |
3645 | - | |
3646 | sample.jiffies = jiffies; | |
3647 | sample.cycles = random_get_entropy(); | |
3648 | sample.num = num; | |
3649 | @@ -1070,7 +1068,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
3650 | */ | |
3651 | credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); | |
3652 | } | |
3653 | - preempt_enable(); | |
3654 | } | |
3655 | ||
3656 | void add_input_randomness(unsigned int type, unsigned int code, | |
3657 | @@ -1123,28 +1120,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) | |
3658 | return *(ptr + f->reg_idx++); | |
3659 | } | |
3660 | ||
3661 | -void add_interrupt_randomness(int irq, int irq_flags) | |
3662 | +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) | |
3663 | { | |
3664 | struct entropy_store *r; | |
3665 | struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); | |
3666 | - struct pt_regs *regs = get_irq_regs(); | |
3667 | unsigned long now = jiffies; | |
3668 | cycles_t cycles = random_get_entropy(); | |
3669 | __u32 c_high, j_high; | |
3670 | - __u64 ip; | |
3671 | unsigned long seed; | |
3672 | int credit = 0; | |
3673 | ||
3674 | if (cycles == 0) | |
3675 | - cycles = get_reg(fast_pool, regs); | |
3676 | + cycles = get_reg(fast_pool, NULL); | |
3677 | c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; | |
3678 | j_high = (sizeof(now) > 4) ? now >> 32 : 0; | |
3679 | fast_pool->pool[0] ^= cycles ^ j_high ^ irq; | |
3680 | fast_pool->pool[1] ^= now ^ c_high; | |
3681 | - ip = regs ? instruction_pointer(regs) : _RET_IP_; | |
3682 | + if (!ip) | |
3683 | + ip = _RET_IP_; | |
3684 | fast_pool->pool[2] ^= ip; | |
3685 | fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : | |
3686 | - get_reg(fast_pool, regs); | |
3687 | + get_reg(fast_pool, NULL); | |
3688 | ||
3689 | fast_mix(fast_pool); | |
3690 | add_interrupt_bench(cycles); | |
3691 | diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c | |
3692 | index 4da2af9694a2..5b6f57f500b8 100644 | |
3693 | --- a/drivers/clocksource/tcb_clksrc.c | |
3694 | +++ b/drivers/clocksource/tcb_clksrc.c | |
3695 | @@ -23,8 +23,7 @@ | |
3696 | * this 32 bit free-running counter. the second channel is not used. | |
3697 | * | |
3698 | * - The third channel may be used to provide a 16-bit clockevent | |
3699 | - * source, used in either periodic or oneshot mode. This runs | |
3700 | - * at 32 KiHZ, and can handle delays of up to two seconds. | |
3701 | + * source, used in either periodic or oneshot mode. | |
3702 | * | |
3703 | * A boot clocksource and clockevent source are also currently needed, | |
3704 | * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so | |
3705 | @@ -74,6 +73,8 @@ static struct clocksource clksrc = { | |
3706 | struct tc_clkevt_device { | |
3707 | struct clock_event_device clkevt; | |
3708 | struct clk *clk; | |
3709 | + bool clk_enabled; | |
3710 | + u32 freq; | |
3711 | void __iomem *regs; | |
3712 | }; | |
3713 | ||
3714 | @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt) | |
3715 | return container_of(clkevt, struct tc_clkevt_device, clkevt); | |
3716 | } | |
3717 | ||
3718 | -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, | |
3719 | - * because using one of the divided clocks would usually mean the | |
3720 | - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). | |
3721 | - * | |
3722 | - * A divided clock could be good for high resolution timers, since | |
3723 | - * 30.5 usec resolution can seem "low". | |
3724 | - */ | |
3725 | static u32 timer_clock; | |
3726 | ||
3727 | +static void tc_clk_disable(struct clock_event_device *d) | |
3728 | +{ | |
3729 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3730 | + | |
3731 | + clk_disable(tcd->clk); | |
3732 | + tcd->clk_enabled = false; | |
3733 | +} | |
3734 | + | |
3735 | +static void tc_clk_enable(struct clock_event_device *d) | |
3736 | +{ | |
3737 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3738 | + | |
3739 | + if (tcd->clk_enabled) | |
3740 | + return; | |
3741 | + clk_enable(tcd->clk); | |
3742 | + tcd->clk_enabled = true; | |
3743 | +} | |
3744 | + | |
3745 | static int tc_shutdown(struct clock_event_device *d) | |
3746 | { | |
3747 | struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3748 | @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d) | |
3749 | ||
3750 | __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); | |
3751 | __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); | |
3752 | + return 0; | |
3753 | +} | |
3754 | + | |
3755 | +static int tc_shutdown_clk_off(struct clock_event_device *d) | |
3756 | +{ | |
3757 | + tc_shutdown(d); | |
3758 | if (!clockevent_state_detached(d)) | |
3759 | - clk_disable(tcd->clk); | |
3760 | + tc_clk_disable(d); | |
3761 | ||
3762 | return 0; | |
3763 | } | |
3764 | @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d) | |
3765 | if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) | |
3766 | tc_shutdown(d); | |
3767 | ||
3768 | - clk_enable(tcd->clk); | |
3769 | + tc_clk_enable(d); | |
3770 | ||
3771 | - /* slow clock, count up to RC, then irq and stop */ | |
3772 | + /* count up to RC, then irq and stop */ | |
3773 | __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | | |
3774 | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); | |
3775 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3776 | @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d) | |
3777 | /* By not making the gentime core emulate periodic mode on top | |
3778 | * of oneshot, we get lower overhead and improved accuracy. | |
3779 | */ | |
3780 | - clk_enable(tcd->clk); | |
3781 | + tc_clk_enable(d); | |
3782 | ||
3783 | - /* slow clock, count up to RC, then irq and restart */ | |
3784 | + /* count up to RC, then irq and restart */ | |
3785 | __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, | |
3786 | regs + ATMEL_TC_REG(2, CMR)); | |
3787 | - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3788 | + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3789 | ||
3790 | /* Enable clock and interrupts on RC compare */ | |
3791 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3792 | @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = { | |
3793 | .features = CLOCK_EVT_FEAT_PERIODIC | | |
3794 | CLOCK_EVT_FEAT_ONESHOT, | |
3795 | /* Should be lower than at91rm9200's system timer */ | |
3796 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
3797 | .rating = 125, | |
3798 | +#else | |
3799 | + .rating = 200, | |
3800 | +#endif | |
3801 | .set_next_event = tc_next_event, | |
3802 | - .set_state_shutdown = tc_shutdown, | |
3803 | + .set_state_shutdown = tc_shutdown_clk_off, | |
3804 | .set_state_periodic = tc_set_periodic, | |
3805 | .set_state_oneshot = tc_set_oneshot, | |
3806 | }, | |
3807 | @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle) | |
3808 | return IRQ_NONE; | |
3809 | } | |
3810 | ||
3811 | -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3812 | +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) | |
3813 | { | |
3814 | + unsigned divisor = atmel_tc_divisors[divisor_idx]; | |
3815 | int ret; | |
3816 | struct clk *t2_clk = tc->clk[2]; | |
3817 | int irq = tc->irq[2]; | |
3818 | @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3819 | clkevt.regs = tc->regs; | |
3820 | clkevt.clk = t2_clk; | |
3821 | ||
3822 | - timer_clock = clk32k_divisor_idx; | |
3823 | + timer_clock = divisor_idx; | |
3824 | + if (!divisor) | |
3825 | + clkevt.freq = 32768; | |
3826 | + else | |
3827 | + clkevt.freq = clk_get_rate(t2_clk) / divisor; | |
3828 | ||
3829 | clkevt.clkevt.cpumask = cpumask_of(0); | |
3830 | ||
3831 | @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3832 | return ret; | |
3833 | } | |
3834 | ||
3835 | - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); | |
3836 | + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); | |
3837 | ||
3838 | return ret; | |
3839 | } | |
3840 | @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void) | |
3841 | goto err_disable_t1; | |
3842 | ||
3843 | /* channel 2: periodic and oneshot timer support */ | |
3844 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
3845 | ret = setup_clkevents(tc, clk32k_divisor_idx); | |
3846 | +#else | |
3847 | + ret = setup_clkevents(tc, best_divisor_idx); | |
3848 | +#endif | |
3849 | if (ret) | |
3850 | goto err_unregister_clksrc; | |
3851 | ||
3852 | diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c | |
c7c16703 | 3853 | index 6555821bbdae..93288849b2bd 100644 |
1a6e0f06 JK |
3854 | --- a/drivers/clocksource/timer-atmel-pit.c |
3855 | +++ b/drivers/clocksource/timer-atmel-pit.c | |
3856 | @@ -46,6 +46,7 @@ struct pit_data { | |
3857 | u32 cycle; | |
3858 | u32 cnt; | |
3859 | unsigned int irq; | |
3860 | + bool irq_requested; | |
3861 | struct clk *mck; | |
3862 | }; | |
3863 | ||
3864 | @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev) | |
3865 | ||
3866 | /* disable irq, leaving the clocksource active */ | |
3867 | pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); | |
3868 | + if (data->irq_requested) { | |
3869 | + free_irq(data->irq, data); | |
3870 | + data->irq_requested = false; | |
3871 | + } | |
3872 | return 0; | |
3873 | } | |
3874 | ||
3875 | +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); | |
3876 | /* | |
3877 | * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) | |
3878 | */ | |
3879 | static int pit_clkevt_set_periodic(struct clock_event_device *dev) | |
3880 | { | |
3881 | struct pit_data *data = clkevt_to_pit_data(dev); | |
3882 | + int ret; | |
3883 | + | |
3884 | + ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
3885 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3886 | + "at91_tick", data); | |
3887 | + if (ret) | |
3888 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
3889 | + | |
3890 | + data->irq_requested = true; | |
3891 | ||
3892 | /* update clocksource counter */ | |
3893 | data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); | |
c7c16703 | 3894 | @@ -230,15 +245,6 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) |
1a6e0f06 JK |
3895 | return ret; |
3896 | } | |
3897 | ||
3898 | - /* Set up irq handler */ | |
3899 | - ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
3900 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3901 | - "at91_tick", data); | |
3902 | - if (ret) { | |
3903 | - pr_err("Unable to setup IRQ\n"); | |
3904 | - return ret; | |
3905 | - } | |
3906 | - | |
3907 | /* Set up and register clockevents */ | |
3908 | data->clkevt.name = "pit"; | |
3909 | data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; | |
3910 | diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c | |
3911 | index e90ab5b63a90..9e124087c55f 100644 | |
3912 | --- a/drivers/clocksource/timer-atmel-st.c | |
3913 | +++ b/drivers/clocksource/timer-atmel-st.c | |
3914 | @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void) | |
3915 | last_crtr = read_CRTR(); | |
3916 | } | |
3917 | ||
3918 | +static int atmel_st_irq; | |
3919 | + | |
3920 | static int clkevt32k_shutdown(struct clock_event_device *evt) | |
3921 | { | |
3922 | clkdev32k_disable_and_flush_irq(); | |
3923 | irqmask = 0; | |
3924 | regmap_write(regmap_st, AT91_ST_IER, irqmask); | |
3925 | + free_irq(atmel_st_irq, regmap_st); | |
3926 | return 0; | |
3927 | } | |
3928 | ||
3929 | static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
3930 | { | |
3931 | + int ret; | |
3932 | + | |
3933 | clkdev32k_disable_and_flush_irq(); | |
3934 | ||
3935 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
3936 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3937 | + "at91_tick", regmap_st); | |
3938 | + if (ret) | |
3939 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
3940 | + | |
3941 | /* | |
3942 | * ALM for oneshot irqs, set by next_event() | |
3943 | * before 32 seconds have passed. | |
3944 | @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
3945 | ||
3946 | static int clkevt32k_set_periodic(struct clock_event_device *dev) | |
3947 | { | |
3948 | + int ret; | |
3949 | + | |
3950 | clkdev32k_disable_and_flush_irq(); | |
3951 | ||
3952 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
3953 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3954 | + "at91_tick", regmap_st); | |
3955 | + if (ret) | |
3956 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
3957 | + | |
3958 | /* PIT for periodic irqs; fixed rate of 1/HZ */ | |
3959 | irqmask = AT91_ST_PITS; | |
3960 | regmap_write(regmap_st, AT91_ST_PIMR, timer_latch); | |
3961 | @@ -198,7 +217,7 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
3962 | { | |
3963 | struct clk *sclk; | |
3964 | unsigned int sclk_rate, val; | |
3965 | - int irq, ret; | |
3966 | + int ret; | |
3967 | ||
3968 | regmap_st = syscon_node_to_regmap(node); | |
3969 | if (IS_ERR(regmap_st)) { | |
3970 | @@ -212,21 +231,12 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
3971 | regmap_read(regmap_st, AT91_ST_SR, &val); | |
3972 | ||
3973 | /* Get the interrupts property */ | |
3974 | - irq = irq_of_parse_and_map(node, 0); | |
3975 | - if (!irq) { | |
3976 | + atmel_st_irq = irq_of_parse_and_map(node, 0); | |
3977 | + if (!atmel_st_irq) { | |
3978 | pr_err("Unable to get IRQ from DT\n"); | |
3979 | return -EINVAL; | |
3980 | } | |
3981 | ||
3982 | - /* Make IRQs happen for the system timer */ | |
3983 | - ret = request_irq(irq, at91rm9200_timer_interrupt, | |
3984 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3985 | - "at91_tick", regmap_st); | |
3986 | - if (ret) { | |
3987 | - pr_err("Unable to setup IRQ\n"); | |
3988 | - return ret; | |
3989 | - } | |
3990 | - | |
3991 | sclk = of_clk_get(node, 0); | |
3992 | if (IS_ERR(sclk)) { | |
3993 | pr_err("Unable to get slow clock\n"); | |
3994 | diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c | |
3995 | index a782ce87715c..19d265948526 100644 | |
3996 | --- a/drivers/connector/cn_proc.c | |
3997 | +++ b/drivers/connector/cn_proc.c | |
3998 | @@ -32,6 +32,7 @@ | |
3999 | #include <linux/pid_namespace.h> | |
4000 | ||
4001 | #include <linux/cn_proc.h> | |
4002 | +#include <linux/locallock.h> | |
4003 | ||
4004 | /* | |
4005 | * Size of a cn_msg followed by a proc_event structure. Since the | |
4006 | @@ -54,10 +55,11 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; | |
4007 | ||
4008 | /* proc_event_counts is used as the sequence number of the netlink message */ | |
4009 | static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; | |
4010 | +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock); | |
4011 | ||
4012 | static inline void send_msg(struct cn_msg *msg) | |
4013 | { | |
4014 | - preempt_disable(); | |
4015 | + local_lock(send_msg_lock); | |
4016 | ||
4017 | msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; | |
4018 | ((struct proc_event *)msg->data)->cpu = smp_processor_id(); | |
4019 | @@ -70,7 +72,7 @@ static inline void send_msg(struct cn_msg *msg) | |
4020 | */ | |
4021 | cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); | |
4022 | ||
4023 | - preempt_enable(); | |
4024 | + local_unlock(send_msg_lock); | |
4025 | } | |
4026 | ||
4027 | void proc_fork_connector(struct task_struct *task) | |
4028 | diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 | |
4029 | index adbd1de1cea5..1fac5074f2cf 100644 | |
4030 | --- a/drivers/cpufreq/Kconfig.x86 | |
4031 | +++ b/drivers/cpufreq/Kconfig.x86 | |
4032 | @@ -124,7 +124,7 @@ config X86_POWERNOW_K7_ACPI | |
4033 | ||
4034 | config X86_POWERNOW_K8 | |
4035 | tristate "AMD Opteron/Athlon64 PowerNow!" | |
4036 | - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ | |
4037 | + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE | |
4038 | help | |
4039 | This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. | |
4040 | Support for K10 and newer processors is now in acpi-cpufreq. | |
4041 | diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
c7c16703 | 4042 | index a218c2e395e7..5273d8f1d5dd 100644 |
1a6e0f06 JK |
4043 | --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c |
4044 | +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
c7c16703 | 4045 | @@ -1537,7 +1537,9 @@ execbuf_submit(struct i915_execbuffer_params *params, |
1a6e0f06 JK |
4046 | if (ret) |
4047 | return ret; | |
4048 | ||
4049 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
4050 | trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); | |
4051 | +#endif | |
4052 | ||
4053 | i915_gem_execbuffer_move_to_active(vmas, params->request); | |
4054 | ||
4055 | diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
c7c16703 | 4056 | index 1c237d02f30b..9e9b4404c0d7 100644 |
1a6e0f06 JK |
4057 | --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c |
4058 | +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4059 | @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4060 | if (!mutex_is_locked(mutex)) | |
4061 | return false; | |
4062 | ||
4063 | -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) | |
4064 | +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4065 | return mutex->owner == task; | |
4066 | #else | |
4067 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4068 | diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c | |
c7c16703 | 4069 | index 3fc286cd1157..252a1117b103 100644 |
1a6e0f06 JK |
4070 | --- a/drivers/gpu/drm/i915/i915_irq.c |
4071 | +++ b/drivers/gpu/drm/i915/i915_irq.c | |
4072 | @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4073 | spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); | |
4074 | ||
4075 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4076 | + preempt_disable_rt(); | |
4077 | ||
4078 | /* Get optional system timestamp before query. */ | |
4079 | if (stime) | |
4080 | @@ -863,6 +864,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4081 | *etime = ktime_get(); | |
4082 | ||
4083 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4084 | + preempt_enable_rt(); | |
4085 | ||
4086 | spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); | |
4087 | ||
4088 | diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c | |
c7c16703 | 4089 | index c9e83f39ec0a..6b0caae38076 100644 |
1a6e0f06 JK |
4090 | --- a/drivers/gpu/drm/i915/intel_display.c |
4091 | +++ b/drivers/gpu/drm/i915/intel_display.c | |
c7c16703 | 4092 | @@ -12131,7 +12131,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) |
1a6e0f06 JK |
4093 | struct intel_crtc *intel_crtc = to_intel_crtc(crtc); |
4094 | struct intel_flip_work *work; | |
4095 | ||
4096 | - WARN_ON(!in_interrupt()); | |
4097 | + WARN_ON_NONRT(!in_interrupt()); | |
4098 | ||
4099 | if (crtc == NULL) | |
4100 | return; | |
4101 | diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c | |
c7c16703 | 4102 | index dbed12c484c9..5c540b78e8b5 100644 |
1a6e0f06 JK |
4103 | --- a/drivers/gpu/drm/i915/intel_sprite.c |
4104 | +++ b/drivers/gpu/drm/i915/intel_sprite.c | |
c7c16703 JK |
4105 | @@ -35,6 +35,7 @@ |
4106 | #include <drm/drm_rect.h> | |
4107 | #include <drm/drm_atomic.h> | |
4108 | #include <drm/drm_plane_helper.h> | |
4109 | +#include <linux/locallock.h> | |
1a6e0f06 | 4110 | #include "intel_drv.h" |
c7c16703 | 4111 | #include "intel_frontbuffer.h" |
1a6e0f06 | 4112 | #include <drm/i915_drm.h> |
c7c16703 | 4113 | @@ -65,6 +66,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, |
1a6e0f06 JK |
4114 | 1000 * adjusted_mode->crtc_htotal); |
4115 | } | |
4116 | ||
4117 | +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); | |
4118 | + | |
4119 | /** | |
4120 | * intel_pipe_update_start() - start update of a set of display registers | |
4121 | * @crtc: the crtc of which the registers are going to be updated | |
c7c16703 | 4122 | @@ -95,7 +98,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) |
1a6e0f06 JK |
4123 | min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); |
4124 | max = vblank_start - 1; | |
4125 | ||
4126 | - local_irq_disable(); | |
4127 | + local_lock_irq(pipe_update_lock); | |
4128 | ||
4129 | if (min <= 0 || max <= 0) | |
4130 | return; | |
c7c16703 | 4131 | @@ -125,11 +128,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) |
1a6e0f06 JK |
4132 | break; |
4133 | } | |
4134 | ||
4135 | - local_irq_enable(); | |
4136 | + local_unlock_irq(pipe_update_lock); | |
4137 | ||
4138 | timeout = schedule_timeout(timeout); | |
4139 | ||
4140 | - local_irq_disable(); | |
4141 | + local_lock_irq(pipe_update_lock); | |
4142 | } | |
4143 | ||
4144 | finish_wait(wq, &wait); | |
c7c16703 | 4145 | @@ -181,7 +184,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work |
1a6e0f06 JK |
4146 | crtc->base.state->event = NULL; |
4147 | } | |
4148 | ||
4149 | - local_irq_enable(); | |
4150 | + local_unlock_irq(pipe_update_lock); | |
4151 | ||
4152 | if (crtc->debug.start_vbl_count && | |
4153 | crtc->debug.start_vbl_count != end_vbl_count) { | |
4154 | diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
c7c16703 | 4155 | index 192b2d3a79cb..d5372a207326 100644 |
1a6e0f06 JK |
4156 | --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c |
4157 | +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4158 | @@ -23,7 +23,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4159 | if (!mutex_is_locked(mutex)) | |
4160 | return false; | |
4161 | ||
4162 | -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) | |
4163 | +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4164 | return mutex->owner == task; | |
4165 | #else | |
4166 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4167 | diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c | |
c7c16703 | 4168 | index cdb8cb568c15..b6d7fd964cbc 100644 |
1a6e0f06 JK |
4169 | --- a/drivers/gpu/drm/radeon/radeon_display.c |
4170 | +++ b/drivers/gpu/drm/radeon/radeon_display.c | |
c7c16703 | 4171 | @@ -1845,6 +1845,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, |
1a6e0f06 JK |
4172 | struct radeon_device *rdev = dev->dev_private; |
4173 | ||
4174 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4175 | + preempt_disable_rt(); | |
4176 | ||
4177 | /* Get optional system timestamp before query. */ | |
4178 | if (stime) | |
c7c16703 | 4179 | @@ -1937,6 +1938,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, |
1a6e0f06 JK |
4180 | *etime = ktime_get(); |
4181 | ||
4182 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4183 | + preempt_enable_rt(); | |
4184 | ||
4185 | /* Decode into vertical and horizontal scanout position. */ | |
4186 | *vpos = position & 0x1fff; | |
4187 | diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c | |
c7c16703 | 4188 | index 0276d2ef06ee..8868045eabde 100644 |
1a6e0f06 JK |
4189 | --- a/drivers/hv/vmbus_drv.c |
4190 | +++ b/drivers/hv/vmbus_drv.c | |
4191 | @@ -761,6 +761,8 @@ static void vmbus_isr(void) | |
4192 | void *page_addr; | |
4193 | struct hv_message *msg; | |
4194 | union hv_synic_event_flags *event; | |
4195 | + struct pt_regs *regs = get_irq_regs(); | |
4196 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
4197 | bool handled = false; | |
4198 | ||
4199 | page_addr = hv_context.synic_event_page[cpu]; | |
4200 | @@ -808,7 +810,7 @@ static void vmbus_isr(void) | |
4201 | tasklet_schedule(hv_context.msg_dpc[cpu]); | |
4202 | } | |
4203 | ||
4204 | - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); | |
4205 | + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); | |
4206 | } | |
4207 | ||
4208 | ||
4209 | diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c | |
4210 | index 36f76e28a0bf..394f142f90c7 100644 | |
4211 | --- a/drivers/ide/alim15x3.c | |
4212 | +++ b/drivers/ide/alim15x3.c | |
4213 | @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4214 | ||
4215 | isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); | |
4216 | ||
4217 | - local_irq_save(flags); | |
4218 | + local_irq_save_nort(flags); | |
4219 | ||
4220 | if (m5229_revision < 0xC2) { | |
4221 | /* | |
4222 | @@ -325,7 +325,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4223 | } | |
4224 | pci_dev_put(north); | |
4225 | pci_dev_put(isa_dev); | |
4226 | - local_irq_restore(flags); | |
4227 | + local_irq_restore_nort(flags); | |
4228 | return 0; | |
4229 | } | |
4230 | ||
4231 | diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c | |
4232 | index 0ceae5cbd89a..c212e85d7f3e 100644 | |
4233 | --- a/drivers/ide/hpt366.c | |
4234 | +++ b/drivers/ide/hpt366.c | |
4235 | @@ -1236,7 +1236,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4236 | ||
4237 | dma_old = inb(base + 2); | |
4238 | ||
4239 | - local_irq_save(flags); | |
4240 | + local_irq_save_nort(flags); | |
4241 | ||
4242 | dma_new = dma_old; | |
4243 | pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); | |
4244 | @@ -1247,7 +1247,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4245 | if (dma_new != dma_old) | |
4246 | outb(dma_new, base + 2); | |
4247 | ||
4248 | - local_irq_restore(flags); | |
4249 | + local_irq_restore_nort(flags); | |
4250 | ||
4251 | printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", | |
4252 | hwif->name, base, base + 7); | |
4253 | diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c | |
4254 | index 19763977568c..4169433faab5 100644 | |
4255 | --- a/drivers/ide/ide-io-std.c | |
4256 | +++ b/drivers/ide/ide-io-std.c | |
4257 | @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4258 | unsigned long uninitialized_var(flags); | |
4259 | ||
4260 | if ((io_32bit & 2) && !mmio) { | |
4261 | - local_irq_save(flags); | |
4262 | + local_irq_save_nort(flags); | |
4263 | ata_vlb_sync(io_ports->nsect_addr); | |
4264 | } | |
4265 | ||
4266 | @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4267 | insl(data_addr, buf, words); | |
4268 | ||
4269 | if ((io_32bit & 2) && !mmio) | |
4270 | - local_irq_restore(flags); | |
4271 | + local_irq_restore_nort(flags); | |
4272 | ||
4273 | if (((len + 1) & 3) < 2) | |
4274 | return; | |
4275 | @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4276 | unsigned long uninitialized_var(flags); | |
4277 | ||
4278 | if ((io_32bit & 2) && !mmio) { | |
4279 | - local_irq_save(flags); | |
4280 | + local_irq_save_nort(flags); | |
4281 | ata_vlb_sync(io_ports->nsect_addr); | |
4282 | } | |
4283 | ||
4284 | @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4285 | outsl(data_addr, buf, words); | |
4286 | ||
4287 | if ((io_32bit & 2) && !mmio) | |
4288 | - local_irq_restore(flags); | |
4289 | + local_irq_restore_nort(flags); | |
4290 | ||
4291 | if (((len + 1) & 3) < 2) | |
4292 | return; | |
4293 | diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c | |
4294 | index 669ea1e45795..e12e43e62245 100644 | |
4295 | --- a/drivers/ide/ide-io.c | |
4296 | +++ b/drivers/ide/ide-io.c | |
4297 | @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data) | |
4298 | /* disable_irq_nosync ?? */ | |
4299 | disable_irq(hwif->irq); | |
4300 | /* local CPU only, as if we were handling an interrupt */ | |
4301 | - local_irq_disable(); | |
4302 | + local_irq_disable_nort(); | |
4303 | if (hwif->polling) { | |
4304 | startstop = handler(drive); | |
4305 | } else if (drive_is_ready(drive)) { | |
4306 | diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c | |
4307 | index 376f2dc410c5..f014dd1b73dc 100644 | |
4308 | --- a/drivers/ide/ide-iops.c | |
4309 | +++ b/drivers/ide/ide-iops.c | |
4310 | @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, | |
4311 | if ((stat & ATA_BUSY) == 0) | |
4312 | break; | |
4313 | ||
4314 | - local_irq_restore(flags); | |
4315 | + local_irq_restore_nort(flags); | |
4316 | *rstat = stat; | |
4317 | return -EBUSY; | |
4318 | } | |
4319 | } | |
4320 | - local_irq_restore(flags); | |
4321 | + local_irq_restore_nort(flags); | |
4322 | } | |
4323 | /* | |
4324 | * Allow status to settle, then read it again. | |
4325 | diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c | |
4326 | index 0b63facd1d87..4ceba37afc0c 100644 | |
4327 | --- a/drivers/ide/ide-probe.c | |
4328 | +++ b/drivers/ide/ide-probe.c | |
4329 | @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) | |
4330 | int bswap = 1; | |
4331 | ||
4332 | /* local CPU only; some systems need this */ | |
4333 | - local_irq_save(flags); | |
4334 | + local_irq_save_nort(flags); | |
4335 | /* read 512 bytes of id info */ | |
4336 | hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); | |
4337 | - local_irq_restore(flags); | |
4338 | + local_irq_restore_nort(flags); | |
4339 | ||
4340 | drive->dev_flags |= IDE_DFLAG_ID_READ; | |
4341 | #ifdef DEBUG | |
4342 | diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c | |
4343 | index a716693417a3..be0568c722d6 100644 | |
4344 | --- a/drivers/ide/ide-taskfile.c | |
4345 | +++ b/drivers/ide/ide-taskfile.c | |
4346 | @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4347 | ||
4348 | page_is_high = PageHighMem(page); | |
4349 | if (page_is_high) | |
4350 | - local_irq_save(flags); | |
4351 | + local_irq_save_nort(flags); | |
4352 | ||
4353 | buf = kmap_atomic(page) + offset; | |
4354 | ||
4355 | @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4356 | kunmap_atomic(buf); | |
4357 | ||
4358 | if (page_is_high) | |
4359 | - local_irq_restore(flags); | |
4360 | + local_irq_restore_nort(flags); | |
4361 | ||
4362 | len -= nr_bytes; | |
4363 | } | |
4364 | @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, | |
4365 | } | |
4366 | ||
4367 | if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) | |
4368 | - local_irq_disable(); | |
4369 | + local_irq_disable_nort(); | |
4370 | ||
4371 | ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); | |
4372 | ||
4373 | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
c7c16703 | 4374 | index fddff403d5d2..cca1bb4fbfe3 100644 |
1a6e0f06 JK |
4375 | --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c |
4376 | +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
c7c16703 | 4377 | @@ -902,7 +902,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) |
1a6e0f06 JK |
4378 | |
4379 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
4380 | ||
4381 | - local_irq_save(flags); | |
4382 | + local_irq_save_nort(flags); | |
4383 | netif_addr_lock(dev); | |
4384 | spin_lock(&priv->lock); | |
4385 | ||
c7c16703 | 4386 | @@ -984,7 +984,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) |
1a6e0f06 JK |
4387 | |
4388 | spin_unlock(&priv->lock); | |
4389 | netif_addr_unlock(dev); | |
4390 | - local_irq_restore(flags); | |
4391 | + local_irq_restore_nort(flags); | |
4392 | ||
4393 | /* | |
4394 | * make sure the in-flight joins have finished before we attempt | |
4395 | diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c | |
4396 | index 4a2a9e370be7..e970d9afd179 100644 | |
4397 | --- a/drivers/input/gameport/gameport.c | |
4398 | +++ b/drivers/input/gameport/gameport.c | |
4399 | @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport) | |
4400 | tx = ~0; | |
4401 | ||
4402 | for (i = 0; i < 50; i++) { | |
4403 | - local_irq_save(flags); | |
4404 | + local_irq_save_nort(flags); | |
4405 | t1 = ktime_get_ns(); | |
4406 | for (t = 0; t < 50; t++) | |
4407 | gameport_read(gameport); | |
4408 | t2 = ktime_get_ns(); | |
4409 | t3 = ktime_get_ns(); | |
4410 | - local_irq_restore(flags); | |
4411 | + local_irq_restore_nort(flags); | |
4412 | udelay(i * 10); | |
4413 | t = (t2 - t1) - (t3 - t2); | |
4414 | if (t < tx) | |
4415 | @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4416 | tx = 1 << 30; | |
4417 | ||
4418 | for(i = 0; i < 50; i++) { | |
4419 | - local_irq_save(flags); | |
4420 | + local_irq_save_nort(flags); | |
4421 | GET_TIME(t1); | |
4422 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4423 | GET_TIME(t2); | |
4424 | GET_TIME(t3); | |
4425 | - local_irq_restore(flags); | |
4426 | + local_irq_restore_nort(flags); | |
4427 | udelay(i * 10); | |
4428 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; | |
4429 | } | |
4430 | @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4431 | tx = 1 << 30; | |
4432 | ||
4433 | for(i = 0; i < 50; i++) { | |
4434 | - local_irq_save(flags); | |
4435 | + local_irq_save_nort(flags); | |
4436 | t1 = rdtsc(); | |
4437 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4438 | t2 = rdtsc(); | |
4439 | - local_irq_restore(flags); | |
4440 | + local_irq_restore_nort(flags); | |
4441 | udelay(i * 10); | |
4442 | if (t2 - t1 < tx) tx = t2 - t1; | |
4443 | } | |
4444 | diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c | |
c7c16703 | 4445 | index 11a13b5be73a..baaed0ac274b 100644 |
1a6e0f06 JK |
4446 | --- a/drivers/iommu/amd_iommu.c |
4447 | +++ b/drivers/iommu/amd_iommu.c | |
c7c16703 | 4448 | @@ -1923,10 +1923,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, |
1a6e0f06 JK |
4449 | int ret; |
4450 | ||
4451 | /* | |
4452 | - * Must be called with IRQs disabled. Warn here to detect early | |
4453 | - * when its not. | |
4454 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4455 | + * detect early when its not. | |
4456 | */ | |
4457 | - WARN_ON(!irqs_disabled()); | |
4458 | + WARN_ON_NONRT(!irqs_disabled()); | |
4459 | ||
4460 | /* lock domain */ | |
4461 | spin_lock(&domain->lock); | |
c7c16703 | 4462 | @@ -2094,10 +2094,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) |
1a6e0f06 JK |
4463 | struct protection_domain *domain; |
4464 | ||
4465 | /* | |
4466 | - * Must be called with IRQs disabled. Warn here to detect early | |
4467 | - * when its not. | |
4468 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4469 | + * detect early when its not. | |
4470 | */ | |
4471 | - WARN_ON(!irqs_disabled()); | |
4472 | + WARN_ON_NONRT(!irqs_disabled()); | |
4473 | ||
4474 | if (WARN_ON(!dev_data->domain)) | |
4475 | return; | |
4476 | diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c | |
c7c16703 | 4477 | index d82637ab09fd..ebe41d30c093 100644 |
1a6e0f06 JK |
4478 | --- a/drivers/iommu/intel-iommu.c |
4479 | +++ b/drivers/iommu/intel-iommu.c | |
4480 | @@ -479,7 +479,7 @@ struct deferred_flush_data { | |
4481 | struct deferred_flush_table *tables; | |
4482 | }; | |
4483 | ||
4484 | -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4485 | +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4486 | ||
4487 | /* bitmap for indexing intel_iommus */ | |
4488 | static int g_num_of_iommus; | |
c7c16703 | 4489 | @@ -3715,10 +3715,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, |
1a6e0f06 JK |
4490 | struct intel_iommu *iommu; |
4491 | struct deferred_flush_entry *entry; | |
4492 | struct deferred_flush_data *flush_data; | |
4493 | - unsigned int cpuid; | |
4494 | ||
4495 | - cpuid = get_cpu(); | |
4496 | - flush_data = per_cpu_ptr(&deferred_flush, cpuid); | |
4497 | + flush_data = raw_cpu_ptr(&deferred_flush); | |
4498 | ||
4499 | /* Flush all CPUs' entries to avoid deferring too much. If | |
4500 | * this becomes a bottleneck, can just flush us, and rely on | |
c7c16703 | 4501 | @@ -3751,8 +3749,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, |
1a6e0f06 JK |
4502 | } |
4503 | flush_data->size++; | |
4504 | spin_unlock_irqrestore(&flush_data->lock, flags); | |
4505 | - | |
4506 | - put_cpu(); | |
4507 | } | |
4508 | ||
4509 | static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) | |
4510 | diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c | |
4511 | index e23001bfcfee..359d5d169ec0 100644 | |
4512 | --- a/drivers/iommu/iova.c | |
4513 | +++ b/drivers/iommu/iova.c | |
4514 | @@ -22,6 +22,7 @@ | |
4515 | #include <linux/slab.h> | |
4516 | #include <linux/smp.h> | |
4517 | #include <linux/bitops.h> | |
4518 | +#include <linux/cpu.h> | |
4519 | ||
4520 | static bool iova_rcache_insert(struct iova_domain *iovad, | |
4521 | unsigned long pfn, | |
4522 | @@ -420,10 +421,8 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, | |
4523 | ||
4524 | /* Try replenishing IOVAs by flushing rcache. */ | |
4525 | flushed_rcache = true; | |
4526 | - preempt_disable(); | |
4527 | for_each_online_cpu(cpu) | |
4528 | free_cpu_cached_iovas(cpu, iovad); | |
4529 | - preempt_enable(); | |
4530 | goto retry; | |
4531 | } | |
4532 | ||
4533 | @@ -751,7 +750,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4534 | bool can_insert = false; | |
4535 | unsigned long flags; | |
4536 | ||
4537 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4538 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4539 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4540 | ||
4541 | if (!iova_magazine_full(cpu_rcache->loaded)) { | |
4542 | @@ -781,7 +780,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4543 | iova_magazine_push(cpu_rcache->loaded, iova_pfn); | |
4544 | ||
4545 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4546 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4547 | ||
4548 | if (mag_to_free) { | |
4549 | iova_magazine_free_pfns(mag_to_free, iovad); | |
4550 | @@ -815,7 +813,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4551 | bool has_pfn = false; | |
4552 | unsigned long flags; | |
4553 | ||
4554 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4555 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4556 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4557 | ||
4558 | if (!iova_magazine_empty(cpu_rcache->loaded)) { | |
4559 | @@ -837,7 +835,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4560 | iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); | |
4561 | ||
4562 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4563 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4564 | ||
4565 | return iova_pfn; | |
4566 | } | |
4567 | diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig | |
4568 | index 3f9ddb9fafa7..09da5b6b44a1 100644 | |
4569 | --- a/drivers/leds/trigger/Kconfig | |
4570 | +++ b/drivers/leds/trigger/Kconfig | |
4571 | @@ -69,7 +69,7 @@ config LEDS_TRIGGER_BACKLIGHT | |
4572 | ||
4573 | config LEDS_TRIGGER_CPU | |
4574 | bool "LED CPU Trigger" | |
4575 | - depends on LEDS_TRIGGERS | |
4576 | + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE | |
4577 | help | |
4578 | This allows LEDs to be controlled by active CPUs. This shows | |
4579 | the active CPUs across an array of LEDs so you can see which | |
4580 | diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig | |
4581 | index 4d200883c505..98b64ed5cb81 100644 | |
4582 | --- a/drivers/md/bcache/Kconfig | |
4583 | +++ b/drivers/md/bcache/Kconfig | |
4584 | @@ -1,6 +1,7 @@ | |
4585 | ||
4586 | config BCACHE | |
4587 | tristate "Block device as cache" | |
4588 | + depends on !PREEMPT_RT_FULL | |
4589 | ---help--- | |
4590 | Allows a block device to be used as cache for other devices; uses | |
4591 | a btree for indexing and the layout is optimized for SSDs. | |
4592 | diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c | |
c7c16703 | 4593 | index 31a89c8832c0..c3a7e8a9f761 100644 |
1a6e0f06 JK |
4594 | --- a/drivers/md/dm-rq.c |
4595 | +++ b/drivers/md/dm-rq.c | |
c7c16703 | 4596 | @@ -838,7 +838,7 @@ static void dm_old_request_fn(struct request_queue *q) |
1a6e0f06 JK |
4597 | /* Establish tio->ti before queuing work (map_tio_request) */ |
4598 | tio->ti = ti; | |
c7c16703 | 4599 | kthread_queue_work(&md->kworker, &tio->work); |
1a6e0f06 JK |
4600 | - BUG_ON(!irqs_disabled()); |
4601 | + BUG_ON_NONRT(!irqs_disabled()); | |
4602 | } | |
4603 | } | |
4604 | ||
4605 | diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c | |
c7c16703 | 4606 | index cce6057b9aca..fa2c4de32a64 100644 |
1a6e0f06 JK |
4607 | --- a/drivers/md/raid5.c |
4608 | +++ b/drivers/md/raid5.c | |
4609 | @@ -1928,8 +1928,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4610 | struct raid5_percpu *percpu; | |
4611 | unsigned long cpu; | |
4612 | ||
4613 | - cpu = get_cpu(); | |
4614 | + cpu = get_cpu_light(); | |
4615 | percpu = per_cpu_ptr(conf->percpu, cpu); | |
4616 | + spin_lock(&percpu->lock); | |
4617 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | |
4618 | ops_run_biofill(sh); | |
4619 | overlap_clear++; | |
4620 | @@ -1985,7 +1986,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4621 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | |
4622 | wake_up(&sh->raid_conf->wait_for_overlap); | |
4623 | } | |
4624 | - put_cpu(); | |
4625 | + spin_unlock(&percpu->lock); | |
4626 | + put_cpu_light(); | |
4627 | } | |
4628 | ||
4629 | static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, | |
c7c16703 JK |
4630 | @@ -6391,6 +6393,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) |
4631 | __func__, cpu); | |
4632 | return -ENOMEM; | |
1a6e0f06 | 4633 | } |
c7c16703 JK |
4634 | + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); |
4635 | return 0; | |
4636 | } | |
1a6e0f06 | 4637 | |
c7c16703 JK |
4638 | @@ -6401,7 +6404,6 @@ static int raid5_alloc_percpu(struct r5conf *conf) |
4639 | conf->percpu = alloc_percpu(struct raid5_percpu); | |
4640 | if (!conf->percpu) | |
4641 | return -ENOMEM; | |
4642 | - | |
4643 | err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); | |
4644 | if (!err) { | |
4645 | conf->scribble_disks = max(conf->raid_disks, | |
1a6e0f06 | 4646 | diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h |
c7c16703 | 4647 | index 57ec49f0839e..0739604990b7 100644 |
1a6e0f06 JK |
4648 | --- a/drivers/md/raid5.h |
4649 | +++ b/drivers/md/raid5.h | |
4650 | @@ -504,6 +504,7 @@ struct r5conf { | |
4651 | int recovery_disabled; | |
4652 | /* per cpu variables */ | |
4653 | struct raid5_percpu { | |
c7c16703 JK |
4654 | + spinlock_t lock; /* Protection for -RT */ |
4655 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | |
4656 | struct flex_array *scribble; /* space for constructing buffer | |
4657 | * lists and performing address | |
4658 | diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig | |
4659 | index 64971baf11fa..215e91e36198 100644 | |
4660 | --- a/drivers/misc/Kconfig | |
4661 | +++ b/drivers/misc/Kconfig | |
4662 | @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI | |
4663 | config ATMEL_TCLIB | |
4664 | bool "Atmel AT32/AT91 Timer/Counter Library" | |
4665 | depends on (AVR32 || ARCH_AT91) | |
4666 | + default y if PREEMPT_RT_FULL | |
4667 | help | |
4668 | Select this if you want a library to allocate the Timer/Counter | |
4669 | blocks found on many Atmel processors. This facilitates using | |
4670 | @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC | |
4671 | are combined to make a single 32-bit timer. | |
4672 | ||
4673 | When GENERIC_CLOCKEVENTS is defined, the third timer channel | |
4674 | - may be used as a clock event device supporting oneshot mode | |
4675 | - (delays of up to two seconds) based on the 32 KiHz clock. | |
4676 | + may be used as a clock event device supporting oneshot mode. | |
4677 | ||
4678 | config ATMEL_TCB_CLKSRC_BLOCK | |
4679 | int | |
4680 | @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK | |
4681 | TC can be used for other purposes, such as PWM generation and | |
4682 | interval timing. | |
4683 | ||
4684 | +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
4685 | + bool "TC Block use 32 KiHz clock" | |
4686 | + depends on ATMEL_TCB_CLKSRC | |
4687 | + default y if !PREEMPT_RT_FULL | |
4688 | + help | |
4689 | + Select this to use 32 KiHz base clock rate as TC block clock | |
4690 | + source for clock events. | |
1a6e0f06 | 4691 | + |
1a6e0f06 | 4692 | + |
c7c16703 JK |
4693 | config DUMMY_IRQ |
4694 | tristate "Dummy IRQ handler" | |
4695 | default n | |
1a6e0f06 JK |
4696 | diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c |
4697 | index df990bb8c873..1a162709a85e 100644 | |
4698 | --- a/drivers/mmc/host/mmci.c | |
4699 | +++ b/drivers/mmc/host/mmci.c | |
4700 | @@ -1147,15 +1147,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
4701 | struct sg_mapping_iter *sg_miter = &host->sg_miter; | |
4702 | struct variant_data *variant = host->variant; | |
4703 | void __iomem *base = host->base; | |
4704 | - unsigned long flags; | |
4705 | u32 status; | |
4706 | ||
4707 | status = readl(base + MMCISTATUS); | |
4708 | ||
4709 | dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); | |
4710 | ||
4711 | - local_irq_save(flags); | |
4712 | - | |
4713 | do { | |
4714 | unsigned int remain, len; | |
4715 | char *buffer; | |
4716 | @@ -1195,8 +1192,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
4717 | ||
4718 | sg_miter_stop(sg_miter); | |
4719 | ||
4720 | - local_irq_restore(flags); | |
4721 | - | |
4722 | /* | |
4723 | * If we have less than the fifo 'half-full' threshold to transfer, | |
4724 | * trigger a PIO interrupt as soon as any data is available. | |
4725 | diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c | |
c7c16703 | 4726 | index 9133e7926da5..63afb921ed40 100644 |
1a6e0f06 JK |
4727 | --- a/drivers/net/ethernet/3com/3c59x.c |
4728 | +++ b/drivers/net/ethernet/3com/3c59x.c | |
4729 | @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) | |
4730 | { | |
4731 | struct vortex_private *vp = netdev_priv(dev); | |
4732 | unsigned long flags; | |
4733 | - local_irq_save(flags); | |
4734 | + local_irq_save_nort(flags); | |
4735 | (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); | |
4736 | - local_irq_restore(flags); | |
4737 | + local_irq_restore_nort(flags); | |
4738 | } | |
4739 | #endif | |
4740 | ||
4741 | @@ -1910,12 +1910,12 @@ static void vortex_tx_timeout(struct net_device *dev) | |
4742 | * Block interrupts because vortex_interrupt does a bare spin_lock() | |
4743 | */ | |
4744 | unsigned long flags; | |
4745 | - local_irq_save(flags); | |
4746 | + local_irq_save_nort(flags); | |
4747 | if (vp->full_bus_master_tx) | |
4748 | boomerang_interrupt(dev->irq, dev); | |
4749 | else | |
4750 | vortex_interrupt(dev->irq, dev); | |
4751 | - local_irq_restore(flags); | |
4752 | + local_irq_restore_nort(flags); | |
4753 | } | |
4754 | } | |
4755 | ||
4756 | diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c | |
4757 | index da4c2d8a4173..1420dfb56bac 100644 | |
4758 | --- a/drivers/net/ethernet/realtek/8139too.c | |
4759 | +++ b/drivers/net/ethernet/realtek/8139too.c | |
4760 | @@ -2233,7 +2233,7 @@ static void rtl8139_poll_controller(struct net_device *dev) | |
4761 | struct rtl8139_private *tp = netdev_priv(dev); | |
4762 | const int irq = tp->pci_dev->irq; | |
4763 | ||
4764 | - disable_irq(irq); | |
4765 | + disable_irq_nosync(irq); | |
4766 | rtl8139_interrupt(irq, dev); | |
4767 | enable_irq(irq); | |
4768 | } | |
4769 | diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
c7c16703 | 4770 | index bca6935a94db..d7a35ee34d03 100644 |
1a6e0f06 JK |
4771 | --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c |
4772 | +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
4773 | @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, | |
4774 | while (!ctx->done.done && msecs--) | |
4775 | udelay(1000); | |
4776 | } else { | |
4777 | - wait_event_interruptible(ctx->done.wait, | |
4778 | + swait_event_interruptible(ctx->done.wait, | |
4779 | ctx->done.done); | |
4780 | } | |
4781 | break; | |
4782 | diff --git a/drivers/pci/access.c b/drivers/pci/access.c | |
4783 | index d11cdbb8fba3..223bbb9acb03 100644 | |
4784 | --- a/drivers/pci/access.c | |
4785 | +++ b/drivers/pci/access.c | |
4786 | @@ -672,7 +672,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev) | |
4787 | WARN_ON(!dev->block_cfg_access); | |
4788 | ||
4789 | dev->block_cfg_access = 0; | |
4790 | - wake_up_all(&pci_cfg_wait); | |
4791 | + wake_up_all_locked(&pci_cfg_wait); | |
4792 | raw_spin_unlock_irqrestore(&pci_lock, flags); | |
4793 | } | |
4794 | EXPORT_SYMBOL_GPL(pci_cfg_access_unlock); | |
4795 | diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c | |
4796 | index 9bd41a35a78a..8e2d436c2e3f 100644 | |
4797 | --- a/drivers/scsi/fcoe/fcoe.c | |
4798 | +++ b/drivers/scsi/fcoe/fcoe.c | |
4799 | @@ -1455,11 +1455,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, | |
4800 | static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) | |
4801 | { | |
4802 | struct fcoe_percpu_s *fps; | |
4803 | - int rc; | |
4804 | + int rc, cpu = get_cpu_light(); | |
4805 | ||
4806 | - fps = &get_cpu_var(fcoe_percpu); | |
4807 | + fps = &per_cpu(fcoe_percpu, cpu); | |
4808 | rc = fcoe_get_paged_crc_eof(skb, tlen, fps); | |
4809 | - put_cpu_var(fcoe_percpu); | |
4810 | + put_cpu_light(); | |
4811 | ||
4812 | return rc; | |
4813 | } | |
4814 | @@ -1646,11 +1646,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, | |
4815 | return 0; | |
4816 | } | |
4817 | ||
4818 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
4819 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
4820 | stats->InvalidCRCCount++; | |
4821 | if (stats->InvalidCRCCount < 5) | |
4822 | printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); | |
4823 | - put_cpu(); | |
4824 | + put_cpu_light(); | |
4825 | return -EINVAL; | |
4826 | } | |
4827 | ||
4828 | @@ -1693,7 +1693,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
4829 | */ | |
4830 | hp = (struct fcoe_hdr *) skb_network_header(skb); | |
4831 | ||
4832 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
4833 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
4834 | if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { | |
4835 | if (stats->ErrorFrames < 5) | |
4836 | printk(KERN_WARNING "fcoe: FCoE version " | |
4837 | @@ -1725,13 +1725,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
4838 | goto drop; | |
4839 | ||
4840 | if (!fcoe_filter_frames(lport, fp)) { | |
4841 | - put_cpu(); | |
4842 | + put_cpu_light(); | |
4843 | fc_exch_recv(lport, fp); | |
4844 | return; | |
4845 | } | |
4846 | drop: | |
4847 | stats->ErrorFrames++; | |
4848 | - put_cpu(); | |
4849 | + put_cpu_light(); | |
4850 | kfree_skb(skb); | |
4851 | } | |
4852 | ||
4853 | diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c | |
4854 | index dcf36537a767..1a1f2e46452c 100644 | |
4855 | --- a/drivers/scsi/fcoe/fcoe_ctlr.c | |
4856 | +++ b/drivers/scsi/fcoe/fcoe_ctlr.c | |
4857 | @@ -834,7 +834,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
4858 | ||
4859 | INIT_LIST_HEAD(&del_list); | |
4860 | ||
4861 | - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); | |
4862 | + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); | |
4863 | ||
4864 | list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { | |
4865 | deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; | |
4866 | @@ -870,7 +870,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
4867 | sel_time = fcf->time; | |
4868 | } | |
4869 | } | |
4870 | - put_cpu(); | |
4871 | + put_cpu_light(); | |
4872 | ||
4873 | list_for_each_entry_safe(fcf, next, &del_list, list) { | |
4874 | /* Removes fcf from current list */ | |
4875 | diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c | |
c7c16703 | 4876 | index 16ca31ad5ec0..c3987347e762 100644 |
1a6e0f06 JK |
4877 | --- a/drivers/scsi/libfc/fc_exch.c |
4878 | +++ b/drivers/scsi/libfc/fc_exch.c | |
4879 | @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, | |
4880 | } | |
4881 | memset(ep, 0, sizeof(*ep)); | |
4882 | ||
4883 | - cpu = get_cpu(); | |
4884 | + cpu = get_cpu_light(); | |
4885 | pool = per_cpu_ptr(mp->pool, cpu); | |
4886 | spin_lock_bh(&pool->lock); | |
4887 | - put_cpu(); | |
4888 | + put_cpu_light(); | |
4889 | ||
4890 | /* peek cache of free slot */ | |
4891 | if (pool->left != FC_XID_UNKNOWN) { | |
4892 | diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c | |
4893 | index 763f012fdeca..d0f61b595470 100644 | |
4894 | --- a/drivers/scsi/libsas/sas_ata.c | |
4895 | +++ b/drivers/scsi/libsas/sas_ata.c | |
4896 | @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
4897 | /* TODO: audit callers to ensure they are ready for qc_issue to | |
4898 | * unconditionally re-enable interrupts | |
4899 | */ | |
4900 | - local_irq_save(flags); | |
4901 | + local_irq_save_nort(flags); | |
4902 | spin_unlock(ap->lock); | |
4903 | ||
4904 | /* If the device fell off, no sense in issuing commands */ | |
4905 | @@ -252,7 +252,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
4906 | ||
4907 | out: | |
4908 | spin_lock(ap->lock); | |
4909 | - local_irq_restore(flags); | |
4910 | + local_irq_restore_nort(flags); | |
4911 | return ret; | |
4912 | } | |
4913 | ||
4914 | diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h | |
4915 | index edc48f3b8230..ee5c6f9dfb6f 100644 | |
4916 | --- a/drivers/scsi/qla2xxx/qla_inline.h | |
4917 | +++ b/drivers/scsi/qla2xxx/qla_inline.h | |
4918 | @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp) | |
4919 | { | |
4920 | unsigned long flags; | |
4921 | struct qla_hw_data *ha = rsp->hw; | |
4922 | - local_irq_save(flags); | |
4923 | + local_irq_save_nort(flags); | |
4924 | if (IS_P3P_TYPE(ha)) | |
4925 | qla82xx_poll(0, rsp); | |
4926 | else | |
4927 | ha->isp_ops->intr_handler(0, rsp); | |
4928 | - local_irq_restore(flags); | |
4929 | + local_irq_restore_nort(flags); | |
4930 | } | |
4931 | ||
4932 | static inline uint8_t * | |
4933 | diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c | |
c7c16703 | 4934 | index 068c4e47fac9..a2090f640397 100644 |
1a6e0f06 JK |
4935 | --- a/drivers/scsi/qla2xxx/qla_isr.c |
4936 | +++ b/drivers/scsi/qla2xxx/qla_isr.c | |
4937 | @@ -3125,7 +3125,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) | |
4938 | * kref_put(). | |
4939 | */ | |
4940 | kref_get(&qentry->irq_notify.kref); | |
4941 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
4942 | + swork_queue(&qentry->irq_notify.swork); | |
4943 | +#else | |
4944 | schedule_work(&qentry->irq_notify.work); | |
4945 | +#endif | |
4946 | } | |
4947 | ||
4948 | /* | |
4949 | diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c | |
c7c16703 | 4950 | index 95f4c1bcdb4c..0be934799bff 100644 |
1a6e0f06 JK |
4951 | --- a/drivers/thermal/x86_pkg_temp_thermal.c |
4952 | +++ b/drivers/thermal/x86_pkg_temp_thermal.c | |
4953 | @@ -29,6 +29,7 @@ | |
4954 | #include <linux/pm.h> | |
4955 | #include <linux/thermal.h> | |
4956 | #include <linux/debugfs.h> | |
4957 | +#include <linux/swork.h> | |
4958 | #include <asm/cpu_device_id.h> | |
4959 | #include <asm/mce.h> | |
4960 | ||
c7c16703 | 4961 | @@ -353,7 +354,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) |
1a6e0f06 JK |
4962 | } |
4963 | } | |
4964 | ||
4965 | -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
4966 | +static void platform_thermal_notify_work(struct swork_event *event) | |
4967 | { | |
4968 | unsigned long flags; | |
4969 | int cpu = smp_processor_id(); | |
c7c16703 | 4970 | @@ -370,7 +371,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) |
1a6e0f06 JK |
4971 | pkg_work_scheduled[phy_id]) { |
4972 | disable_pkg_thres_interrupt(); | |
4973 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
4974 | - return -EINVAL; | |
4975 | + return; | |
4976 | } | |
4977 | pkg_work_scheduled[phy_id] = 1; | |
4978 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
c7c16703 | 4979 | @@ -379,9 +380,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) |
1a6e0f06 JK |
4980 | schedule_delayed_work_on(cpu, |
4981 | &per_cpu(pkg_temp_thermal_threshold_work, cpu), | |
4982 | msecs_to_jiffies(notify_delay_ms)); | |
4983 | +} | |
4984 | + | |
4985 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
4986 | +static struct swork_event notify_work; | |
4987 | + | |
4988 | +static int thermal_notify_work_init(void) | |
4989 | +{ | |
4990 | + int err; | |
4991 | + | |
4992 | + err = swork_get(); | |
4993 | + if (err) | |
4994 | + return err; | |
4995 | + | |
4996 | + INIT_SWORK(¬ify_work, platform_thermal_notify_work); | |
4997 | return 0; | |
4998 | } | |
4999 | ||
5000 | +static void thermal_notify_work_cleanup(void) | |
5001 | +{ | |
5002 | + swork_put(); | |
5003 | +} | |
5004 | + | |
5005 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5006 | +{ | |
5007 | + swork_queue(¬ify_work); | |
5008 | + return 0; | |
5009 | +} | |
5010 | + | |
5011 | +#else /* !CONFIG_PREEMPT_RT_FULL */ | |
5012 | + | |
5013 | +static int thermal_notify_work_init(void) { return 0; } | |
5014 | + | |
5015 | +static void thermal_notify_work_cleanup(void) { } | |
5016 | + | |
5017 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5018 | +{ | |
5019 | + platform_thermal_notify_work(NULL); | |
5020 | + | |
5021 | + return 0; | |
5022 | +} | |
5023 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
5024 | + | |
5025 | static int find_siblings_cpu(int cpu) | |
5026 | { | |
5027 | int i; | |
c7c16703 | 5028 | @@ -585,6 +625,9 @@ static int __init pkg_temp_thermal_init(void) |
1a6e0f06 JK |
5029 | if (!x86_match_cpu(pkg_temp_thermal_ids)) |
5030 | return -ENODEV; | |
5031 | ||
5032 | + if (!thermal_notify_work_init()) | |
5033 | + return -ENODEV; | |
5034 | + | |
5035 | spin_lock_init(&pkg_work_lock); | |
5036 | platform_thermal_package_notify = | |
5037 | pkg_temp_thermal_platform_thermal_notify; | |
c7c16703 | 5038 | @@ -609,7 +652,7 @@ static int __init pkg_temp_thermal_init(void) |
1a6e0f06 JK |
5039 | kfree(pkg_work_scheduled); |
5040 | platform_thermal_package_notify = NULL; | |
5041 | platform_thermal_package_rate_control = NULL; | |
5042 | - | |
5043 | + thermal_notify_work_cleanup(); | |
5044 | return -ENODEV; | |
5045 | } | |
5046 | ||
c7c16703 | 5047 | @@ -634,6 +677,7 @@ static void __exit pkg_temp_thermal_exit(void) |
1a6e0f06 JK |
5048 | mutex_unlock(&phy_dev_list_mutex); |
5049 | platform_thermal_package_notify = NULL; | |
5050 | platform_thermal_package_rate_control = NULL; | |
5051 | + thermal_notify_work_cleanup(); | |
5052 | for_each_online_cpu(i) | |
5053 | cancel_delayed_work_sync( | |
5054 | &per_cpu(pkg_temp_thermal_threshold_work, i)); | |
5055 | diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c | |
c7c16703 | 5056 | index 240a361b674f..55e249267144 100644 |
1a6e0f06 JK |
5057 | --- a/drivers/tty/serial/8250/8250_core.c |
5058 | +++ b/drivers/tty/serial/8250/8250_core.c | |
5059 | @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg; | |
5060 | ||
5061 | static unsigned int skip_txen_test; /* force skip of txen test at init time */ | |
5062 | ||
5063 | -#define PASS_LIMIT 512 | |
5064 | +/* | |
5065 | + * On -rt we can have a more delays, and legitimately | |
5066 | + * so - so don't drop work spuriously and spam the | |
5067 | + * syslog: | |
5068 | + */ | |
5069 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5070 | +# define PASS_LIMIT 1000000 | |
5071 | +#else | |
5072 | +# define PASS_LIMIT 512 | |
5073 | +#endif | |
5074 | ||
5075 | #include <asm/serial.h> | |
5076 | /* | |
5077 | diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c | |
c7c16703 | 5078 | index 1731b98d2471..5cc62301e840 100644 |
1a6e0f06 JK |
5079 | --- a/drivers/tty/serial/8250/8250_port.c |
5080 | +++ b/drivers/tty/serial/8250/8250_port.c | |
5081 | @@ -35,6 +35,7 @@ | |
5082 | #include <linux/nmi.h> | |
5083 | #include <linux/mutex.h> | |
5084 | #include <linux/slab.h> | |
5085 | +#include <linux/kdb.h> | |
5086 | #include <linux/uaccess.h> | |
5087 | #include <linux/pm_runtime.h> | |
5088 | #include <linux/timer.h> | |
c7c16703 | 5089 | @@ -3144,9 +3145,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, |
1a6e0f06 JK |
5090 | |
5091 | serial8250_rpm_get(up); | |
5092 | ||
5093 | - if (port->sysrq) | |
5094 | + if (port->sysrq || oops_in_progress) | |
5095 | locked = 0; | |
5096 | - else if (oops_in_progress) | |
5097 | + else if (in_kdb_printk()) | |
5098 | locked = spin_trylock_irqsave(&port->lock, flags); | |
5099 | else | |
5100 | spin_lock_irqsave(&port->lock, flags); | |
5101 | diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c | |
c7c16703 | 5102 | index e2c33b9528d8..53af53c43e8c 100644 |
1a6e0f06 JK |
5103 | --- a/drivers/tty/serial/amba-pl011.c |
5104 | +++ b/drivers/tty/serial/amba-pl011.c | |
c7c16703 | 5105 | @@ -2194,13 +2194,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) |
1a6e0f06 JK |
5106 | |
5107 | clk_enable(uap->clk); | |
5108 | ||
5109 | - local_irq_save(flags); | |
5110 | + /* | |
5111 | + * local_irq_save(flags); | |
5112 | + * | |
5113 | + * This local_irq_save() is nonsense. If we come in via sysrq | |
5114 | + * handling then interrupts are already disabled. Aside of | |
5115 | + * that the port.sysrq check is racy on SMP regardless. | |
5116 | + */ | |
5117 | if (uap->port.sysrq) | |
5118 | locked = 0; | |
5119 | else if (oops_in_progress) | |
5120 | - locked = spin_trylock(&uap->port.lock); | |
5121 | + locked = spin_trylock_irqsave(&uap->port.lock, flags); | |
5122 | else | |
5123 | - spin_lock(&uap->port.lock); | |
5124 | + spin_lock_irqsave(&uap->port.lock, flags); | |
5125 | ||
5126 | /* | |
5127 | * First save the CR then disable the interrupts | |
c7c16703 | 5128 | @@ -2224,8 +2230,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) |
1a6e0f06 JK |
5129 | pl011_write(old_cr, uap, REG_CR); |
5130 | ||
5131 | if (locked) | |
5132 | - spin_unlock(&uap->port.lock); | |
5133 | - local_irq_restore(flags); | |
5134 | + spin_unlock_irqrestore(&uap->port.lock, flags); | |
5135 | ||
5136 | clk_disable(uap->clk); | |
5137 | } | |
5138 | diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c | |
5139 | index a2a529994ba5..0ee7c4c518df 100644 | |
5140 | --- a/drivers/tty/serial/omap-serial.c | |
5141 | +++ b/drivers/tty/serial/omap-serial.c | |
5142 | @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s, | |
5143 | ||
5144 | pm_runtime_get_sync(up->dev); | |
5145 | ||
5146 | - local_irq_save(flags); | |
5147 | - if (up->port.sysrq) | |
5148 | - locked = 0; | |
5149 | - else if (oops_in_progress) | |
5150 | - locked = spin_trylock(&up->port.lock); | |
5151 | + if (up->port.sysrq || oops_in_progress) | |
5152 | + locked = spin_trylock_irqsave(&up->port.lock, flags); | |
5153 | else | |
5154 | - spin_lock(&up->port.lock); | |
5155 | + spin_lock_irqsave(&up->port.lock, flags); | |
5156 | ||
5157 | /* | |
5158 | * First save the IER then disable the interrupts | |
5159 | @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s, | |
5160 | pm_runtime_mark_last_busy(up->dev); | |
5161 | pm_runtime_put_autosuspend(up->dev); | |
5162 | if (locked) | |
5163 | - spin_unlock(&up->port.lock); | |
5164 | - local_irq_restore(flags); | |
5165 | + spin_unlock_irqrestore(&up->port.lock, flags); | |
5166 | } | |
5167 | ||
5168 | static int __init | |
1a6e0f06 | 5169 | diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c |
c7c16703 | 5170 | index 479e223f9cff..3418a54b4131 100644 |
1a6e0f06 JK |
5171 | --- a/drivers/usb/core/hcd.c |
5172 | +++ b/drivers/usb/core/hcd.c | |
c7c16703 | 5173 | @@ -1761,9 +1761,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) |
1a6e0f06 JK |
5174 | * and no one may trigger the above deadlock situation when |
5175 | * running complete() in tasklet. | |
5176 | */ | |
5177 | - local_irq_save(flags); | |
5178 | + local_irq_save_nort(flags); | |
5179 | urb->complete(urb); | |
5180 | - local_irq_restore(flags); | |
5181 | + local_irq_restore_nort(flags); | |
5182 | ||
5183 | usb_anchor_resume_wakeups(anchor); | |
5184 | atomic_dec(&urb->use_count); | |
5185 | diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c | |
c7c16703 | 5186 | index 17989b72cdae..88c6574b5992 100644 |
1a6e0f06 JK |
5187 | --- a/drivers/usb/gadget/function/f_fs.c |
5188 | +++ b/drivers/usb/gadget/function/f_fs.c | |
c7c16703 | 5189 | @@ -1593,7 +1593,7 @@ static void ffs_data_put(struct ffs_data *ffs) |
1a6e0f06 JK |
5190 | pr_info("%s(): freeing\n", __func__); |
5191 | ffs_data_clear(ffs); | |
5192 | BUG_ON(waitqueue_active(&ffs->ev.waitq) || | |
5193 | - waitqueue_active(&ffs->ep0req_completion.wait)); | |
5194 | + swait_active(&ffs->ep0req_completion.wait)); | |
5195 | kfree(ffs->dev_name); | |
5196 | kfree(ffs); | |
5197 | } | |
5198 | diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c | |
c7c16703 | 5199 | index 1468d8f085a3..6aae3ae25c18 100644 |
1a6e0f06 JK |
5200 | --- a/drivers/usb/gadget/legacy/inode.c |
5201 | +++ b/drivers/usb/gadget/legacy/inode.c | |
5202 | @@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
5203 | spin_unlock_irq (&epdata->dev->lock); | |
5204 | ||
5205 | if (likely (value == 0)) { | |
5206 | - value = wait_event_interruptible (done.wait, done.done); | |
5207 | + value = swait_event_interruptible (done.wait, done.done); | |
5208 | if (value != 0) { | |
5209 | spin_lock_irq (&epdata->dev->lock); | |
5210 | if (likely (epdata->ep != NULL)) { | |
5211 | @@ -355,7 +355,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
5212 | usb_ep_dequeue (epdata->ep, epdata->req); | |
5213 | spin_unlock_irq (&epdata->dev->lock); | |
5214 | ||
5215 | - wait_event (done.wait, done.done); | |
5216 | + swait_event (done.wait, done.done); | |
5217 | if (epdata->status == -ECONNRESET) | |
5218 | epdata->status = -EINTR; | |
5219 | } else { | |
5220 | diff --git a/fs/aio.c b/fs/aio.c | |
c7c16703 | 5221 | index 428484f2f841..2b02e2eb2158 100644 |
1a6e0f06 JK |
5222 | --- a/fs/aio.c |
5223 | +++ b/fs/aio.c | |
5224 | @@ -40,6 +40,7 @@ | |
5225 | #include <linux/ramfs.h> | |
5226 | #include <linux/percpu-refcount.h> | |
5227 | #include <linux/mount.h> | |
5228 | +#include <linux/swork.h> | |
5229 | ||
5230 | #include <asm/kmap_types.h> | |
5231 | #include <asm/uaccess.h> | |
5232 | @@ -115,7 +116,7 @@ struct kioctx { | |
5233 | struct page **ring_pages; | |
5234 | long nr_pages; | |
5235 | ||
5236 | - struct work_struct free_work; | |
5237 | + struct swork_event free_work; | |
5238 | ||
5239 | /* | |
5240 | * signals when all in-flight requests are done | |
5241 | @@ -258,6 +259,7 @@ static int __init aio_setup(void) | |
5242 | .mount = aio_mount, | |
5243 | .kill_sb = kill_anon_super, | |
5244 | }; | |
5245 | + BUG_ON(swork_get()); | |
5246 | aio_mnt = kern_mount(&aio_fs); | |
5247 | if (IS_ERR(aio_mnt)) | |
5248 | panic("Failed to create aio fs mount."); | |
c7c16703 | 5249 | @@ -581,9 +583,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) |
1a6e0f06 JK |
5250 | return cancel(&kiocb->common); |
5251 | } | |
5252 | ||
5253 | -static void free_ioctx(struct work_struct *work) | |
5254 | +static void free_ioctx(struct swork_event *sev) | |
5255 | { | |
5256 | - struct kioctx *ctx = container_of(work, struct kioctx, free_work); | |
5257 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
5258 | ||
5259 | pr_debug("freeing %p\n", ctx); | |
5260 | ||
c7c16703 | 5261 | @@ -602,8 +604,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) |
1a6e0f06 JK |
5262 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) |
5263 | complete(&ctx->rq_wait->comp); | |
5264 | ||
5265 | - INIT_WORK(&ctx->free_work, free_ioctx); | |
5266 | - schedule_work(&ctx->free_work); | |
5267 | + INIT_SWORK(&ctx->free_work, free_ioctx); | |
5268 | + swork_queue(&ctx->free_work); | |
5269 | } | |
5270 | ||
5271 | /* | |
c7c16703 | 5272 | @@ -611,9 +613,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) |
1a6e0f06 JK |
5273 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - |
5274 | * now it's safe to cancel any that need to be. | |
5275 | */ | |
5276 | -static void free_ioctx_users(struct percpu_ref *ref) | |
5277 | +static void free_ioctx_users_work(struct swork_event *sev) | |
5278 | { | |
5279 | - struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
5280 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
5281 | struct aio_kiocb *req; | |
5282 | ||
5283 | spin_lock_irq(&ctx->ctx_lock); | |
c7c16703 | 5284 | @@ -632,6 +634,14 @@ static void free_ioctx_users(struct percpu_ref *ref) |
1a6e0f06 JK |
5285 | percpu_ref_put(&ctx->reqs); |
5286 | } | |
5287 | ||
5288 | +static void free_ioctx_users(struct percpu_ref *ref) | |
5289 | +{ | |
5290 | + struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
5291 | + | |
5292 | + INIT_SWORK(&ctx->free_work, free_ioctx_users_work); | |
5293 | + swork_queue(&ctx->free_work); | |
5294 | +} | |
5295 | + | |
5296 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |
5297 | { | |
5298 | unsigned i, new_nr; | |
5299 | diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h | |
c7c16703 | 5300 | index a1fba4285277..3796769b4cd1 100644 |
1a6e0f06 JK |
5301 | --- a/fs/autofs4/autofs_i.h |
5302 | +++ b/fs/autofs4/autofs_i.h | |
c7c16703 | 5303 | @@ -31,6 +31,7 @@ |
1a6e0f06 JK |
5304 | #include <linux/sched.h> |
5305 | #include <linux/mount.h> | |
5306 | #include <linux/namei.h> | |
5307 | +#include <linux/delay.h> | |
5308 | #include <asm/current.h> | |
5309 | #include <linux/uaccess.h> | |
5310 | ||
5311 | diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c | |
5312 | index d8e6d421c27f..2e689ab1306b 100644 | |
5313 | --- a/fs/autofs4/expire.c | |
5314 | +++ b/fs/autofs4/expire.c | |
5315 | @@ -148,7 +148,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, | |
5316 | parent = p->d_parent; | |
5317 | if (!spin_trylock(&parent->d_lock)) { | |
5318 | spin_unlock(&p->d_lock); | |
5319 | - cpu_relax(); | |
5320 | + cpu_chill(); | |
5321 | goto relock; | |
5322 | } | |
5323 | spin_unlock(&p->d_lock); | |
c7c16703 JK |
5324 | diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c |
5325 | index 63d197724519..b8e479c5ad83 100644 | |
5326 | --- a/fs/btrfs/async-thread.c | |
5327 | +++ b/fs/btrfs/async-thread.c | |
5328 | @@ -306,8 +306,8 @@ | |
5329 | * because the callback could free the structure. | |
5330 | */ | |
5331 | wtag = work; | |
5332 | - work->ordered_free(work); | |
5333 | trace_btrfs_all_work_done(wq->fs_info, wtag); | |
5334 | + work->ordered_free(work); | |
5335 | } | |
5336 | spin_unlock_irqrestore(lock, flags); | |
5337 | } | |
5338 | @@ -339,8 +339,6 @@ | |
5339 | set_bit(WORK_DONE_BIT, &work->flags); | |
5340 | run_ordered_work(wq); | |
5341 | } | |
5342 | - if (!need_order) | |
5343 | - trace_btrfs_all_work_done(wq->fs_info, wtag); | |
5344 | } | |
5345 | ||
5346 | void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, | |
1a6e0f06 | 5347 | diff --git a/fs/buffer.c b/fs/buffer.c |
c7c16703 | 5348 | index b205a629001d..5646afc022ba 100644 |
1a6e0f06 JK |
5349 | --- a/fs/buffer.c |
5350 | +++ b/fs/buffer.c | |
5351 | @@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5352 | * decide that the page is now completely done. | |
5353 | */ | |
5354 | first = page_buffers(page); | |
5355 | - local_irq_save(flags); | |
5356 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
5357 | + flags = bh_uptodate_lock_irqsave(first); | |
5358 | clear_buffer_async_read(bh); | |
5359 | unlock_buffer(bh); | |
5360 | tmp = bh; | |
5361 | @@ -315,8 +314,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5362 | } | |
5363 | tmp = tmp->b_this_page; | |
5364 | } while (tmp != bh); | |
5365 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5366 | - local_irq_restore(flags); | |
5367 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5368 | ||
5369 | /* | |
5370 | * If none of the buffers had errors and they are all | |
5371 | @@ -328,9 +326,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5372 | return; | |
5373 | ||
5374 | still_busy: | |
5375 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5376 | - local_irq_restore(flags); | |
5377 | - return; | |
5378 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5379 | } | |
5380 | ||
5381 | /* | |
5382 | @@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
5383 | } | |
5384 | ||
5385 | first = page_buffers(page); | |
5386 | - local_irq_save(flags); | |
5387 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
5388 | + flags = bh_uptodate_lock_irqsave(first); | |
5389 | ||
5390 | clear_buffer_async_write(bh); | |
5391 | unlock_buffer(bh); | |
5392 | @@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
5393 | } | |
5394 | tmp = tmp->b_this_page; | |
5395 | } | |
5396 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5397 | - local_irq_restore(flags); | |
5398 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5399 | end_page_writeback(page); | |
5400 | return; | |
5401 | ||
5402 | still_busy: | |
5403 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5404 | - local_irq_restore(flags); | |
5405 | - return; | |
5406 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5407 | } | |
5408 | EXPORT_SYMBOL(end_buffer_async_write); | |
5409 | ||
c7c16703 | 5410 | @@ -3383,6 +3375,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) |
1a6e0f06 JK |
5411 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); |
5412 | if (ret) { | |
5413 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | |
5414 | + buffer_head_init_locks(ret); | |
5415 | preempt_disable(); | |
5416 | __this_cpu_inc(bh_accounting.nr); | |
5417 | recalc_bh_state(); | |
5418 | diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c | |
5419 | index 8f6a2a5863b9..4217828d0b68 100644 | |
5420 | --- a/fs/cifs/readdir.c | |
5421 | +++ b/fs/cifs/readdir.c | |
5422 | @@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, | |
5423 | struct inode *inode; | |
5424 | struct super_block *sb = parent->d_sb; | |
5425 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | |
5426 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5427 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5428 | ||
5429 | cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); | |
5430 | ||
5431 | diff --git a/fs/dcache.c b/fs/dcache.c | |
5432 | index 5c7cc953ac81..a9bb31f1c1af 100644 | |
5433 | --- a/fs/dcache.c | |
5434 | +++ b/fs/dcache.c | |
5435 | @@ -19,6 +19,7 @@ | |
5436 | #include <linux/mm.h> | |
5437 | #include <linux/fs.h> | |
5438 | #include <linux/fsnotify.h> | |
5439 | +#include <linux/delay.h> | |
5440 | #include <linux/slab.h> | |
5441 | #include <linux/init.h> | |
5442 | #include <linux/hash.h> | |
5443 | @@ -750,6 +751,8 @@ static inline bool fast_dput(struct dentry *dentry) | |
5444 | */ | |
5445 | void dput(struct dentry *dentry) | |
5446 | { | |
5447 | + struct dentry *parent; | |
5448 | + | |
5449 | if (unlikely(!dentry)) | |
5450 | return; | |
5451 | ||
5452 | @@ -788,9 +791,18 @@ void dput(struct dentry *dentry) | |
5453 | return; | |
5454 | ||
5455 | kill_it: | |
5456 | - dentry = dentry_kill(dentry); | |
5457 | - if (dentry) { | |
5458 | - cond_resched(); | |
5459 | + parent = dentry_kill(dentry); | |
5460 | + if (parent) { | |
5461 | + int r; | |
5462 | + | |
5463 | + if (parent == dentry) { | |
5464 | + /* the task with the highest priority won't schedule */ | |
5465 | + r = cond_resched(); | |
5466 | + if (!r) | |
5467 | + cpu_chill(); | |
5468 | + } else { | |
5469 | + dentry = parent; | |
5470 | + } | |
5471 | goto repeat; | |
5472 | } | |
5473 | } | |
5474 | @@ -2321,7 +2333,7 @@ void d_delete(struct dentry * dentry) | |
5475 | if (dentry->d_lockref.count == 1) { | |
5476 | if (!spin_trylock(&inode->i_lock)) { | |
5477 | spin_unlock(&dentry->d_lock); | |
5478 | - cpu_relax(); | |
5479 | + cpu_chill(); | |
5480 | goto again; | |
5481 | } | |
5482 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | |
5483 | @@ -2381,21 +2393,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n) | |
5484 | ||
5485 | static void d_wait_lookup(struct dentry *dentry) | |
5486 | { | |
5487 | - if (d_in_lookup(dentry)) { | |
5488 | - DECLARE_WAITQUEUE(wait, current); | |
5489 | - add_wait_queue(dentry->d_wait, &wait); | |
5490 | - do { | |
5491 | - set_current_state(TASK_UNINTERRUPTIBLE); | |
5492 | - spin_unlock(&dentry->d_lock); | |
5493 | - schedule(); | |
5494 | - spin_lock(&dentry->d_lock); | |
5495 | - } while (d_in_lookup(dentry)); | |
5496 | - } | |
5497 | + struct swait_queue __wait; | |
5498 | + | |
5499 | + if (!d_in_lookup(dentry)) | |
5500 | + return; | |
5501 | + | |
5502 | + INIT_LIST_HEAD(&__wait.task_list); | |
5503 | + do { | |
5504 | + prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE); | |
5505 | + spin_unlock(&dentry->d_lock); | |
5506 | + schedule(); | |
5507 | + spin_lock(&dentry->d_lock); | |
5508 | + } while (d_in_lookup(dentry)); | |
5509 | + finish_swait(dentry->d_wait, &__wait); | |
5510 | } | |
5511 | ||
5512 | struct dentry *d_alloc_parallel(struct dentry *parent, | |
5513 | const struct qstr *name, | |
5514 | - wait_queue_head_t *wq) | |
5515 | + struct swait_queue_head *wq) | |
5516 | { | |
5517 | unsigned int hash = name->hash; | |
5518 | struct hlist_bl_head *b = in_lookup_hash(parent, hash); | |
5519 | @@ -2504,7 +2519,7 @@ void __d_lookup_done(struct dentry *dentry) | |
5520 | hlist_bl_lock(b); | |
5521 | dentry->d_flags &= ~DCACHE_PAR_LOOKUP; | |
5522 | __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); | |
5523 | - wake_up_all(dentry->d_wait); | |
5524 | + swake_up_all(dentry->d_wait); | |
5525 | dentry->d_wait = NULL; | |
5526 | hlist_bl_unlock(b); | |
5527 | INIT_HLIST_NODE(&dentry->d_u.d_alias); | |
5528 | @@ -3601,6 +3616,11 @@ EXPORT_SYMBOL(d_genocide); | |
5529 | ||
5530 | void __init vfs_caches_init_early(void) | |
5531 | { | |
5532 | + int i; | |
5533 | + | |
5534 | + for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) | |
5535 | + INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); | |
5536 | + | |
5537 | dcache_init_early(); | |
5538 | inode_init_early(); | |
5539 | } | |
5540 | diff --git a/fs/eventpoll.c b/fs/eventpoll.c | |
5541 | index 10db91218933..42af0a06f657 100644 | |
5542 | --- a/fs/eventpoll.c | |
5543 | +++ b/fs/eventpoll.c | |
5544 | @@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) | |
5545 | */ | |
5546 | static void ep_poll_safewake(wait_queue_head_t *wq) | |
5547 | { | |
5548 | - int this_cpu = get_cpu(); | |
5549 | + int this_cpu = get_cpu_light(); | |
5550 | ||
5551 | ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, | |
5552 | ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); | |
5553 | ||
5554 | - put_cpu(); | |
5555 | + put_cpu_light(); | |
5556 | } | |
5557 | ||
5558 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | |
5559 | diff --git a/fs/exec.c b/fs/exec.c | |
c7c16703 | 5560 | index 67e86571685a..fe14cdd84016 100644 |
1a6e0f06 JK |
5561 | --- a/fs/exec.c |
5562 | +++ b/fs/exec.c | |
c7c16703 | 5563 | @@ -1017,12 +1017,14 @@ static int exec_mmap(struct mm_struct *mm) |
1a6e0f06 JK |
5564 | } |
5565 | } | |
5566 | task_lock(tsk); | |
5567 | + preempt_disable_rt(); | |
5568 | active_mm = tsk->active_mm; | |
5569 | tsk->mm = mm; | |
5570 | tsk->active_mm = mm; | |
5571 | activate_mm(active_mm, mm); | |
5572 | tsk->mm->vmacache_seqnum = 0; | |
5573 | vmacache_flush(tsk); | |
5574 | + preempt_enable_rt(); | |
5575 | task_unlock(tsk); | |
5576 | if (old_mm) { | |
5577 | up_read(&old_mm->mmap_sem); | |
5578 | diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c | |
c7c16703 | 5579 | index 096f79997f75..310e2aabbb0d 100644 |
1a6e0f06 JK |
5580 | --- a/fs/fuse/dir.c |
5581 | +++ b/fs/fuse/dir.c | |
c7c16703 | 5582 | @@ -1191,7 +1191,7 @@ static int fuse_direntplus_link(struct file *file, |
1a6e0f06 JK |
5583 | struct inode *dir = d_inode(parent); |
5584 | struct fuse_conn *fc; | |
5585 | struct inode *inode; | |
5586 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5587 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5588 | ||
5589 | if (!o->nodeid) { | |
5590 | /* | |
5591 | diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c | |
5592 | index 684996c8a3a4..6e18a06aaabe 100644 | |
5593 | --- a/fs/jbd2/checkpoint.c | |
5594 | +++ b/fs/jbd2/checkpoint.c | |
5595 | @@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |
5596 | nblocks = jbd2_space_needed(journal); | |
5597 | while (jbd2_log_space_left(journal) < nblocks) { | |
5598 | write_unlock(&journal->j_state_lock); | |
5599 | + if (current->plug) | |
5600 | + io_schedule(); | |
5601 | mutex_lock(&journal->j_checkpoint_mutex); | |
5602 | ||
5603 | /* | |
c7c16703 JK |
5604 | diff --git a/fs/locks.c b/fs/locks.c |
5605 | index 22c5b4aa4961..269c6a44449a 100644 | |
5606 | --- a/fs/locks.c | |
5607 | +++ b/fs/locks.c | |
5608 | @@ -935,7 +935,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) | |
5609 | return -ENOMEM; | |
5610 | } | |
5611 | ||
5612 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5613 | + percpu_down_read(&file_rwsem); | |
5614 | spin_lock(&ctx->flc_lock); | |
5615 | if (request->fl_flags & FL_ACCESS) | |
5616 | goto find_conflict; | |
5617 | @@ -976,7 +976,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) | |
5618 | ||
5619 | out: | |
5620 | spin_unlock(&ctx->flc_lock); | |
5621 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5622 | + percpu_up_read(&file_rwsem); | |
5623 | if (new_fl) | |
5624 | locks_free_lock(new_fl); | |
5625 | locks_dispose_list(&dispose); | |
5626 | @@ -1013,7 +1013,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |
5627 | new_fl2 = locks_alloc_lock(); | |
5628 | } | |
5629 | ||
5630 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5631 | + percpu_down_read(&file_rwsem); | |
5632 | spin_lock(&ctx->flc_lock); | |
5633 | /* | |
5634 | * New lock request. Walk all POSIX locks and look for conflicts. If | |
5635 | @@ -1185,7 +1185,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |
5636 | } | |
5637 | out: | |
5638 | spin_unlock(&ctx->flc_lock); | |
5639 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5640 | + percpu_up_read(&file_rwsem); | |
5641 | /* | |
5642 | * Free any unused locks. | |
5643 | */ | |
5644 | @@ -1460,7 +1460,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5645 | return error; | |
5646 | } | |
5647 | ||
5648 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5649 | + percpu_down_read(&file_rwsem); | |
5650 | spin_lock(&ctx->flc_lock); | |
5651 | ||
5652 | time_out_leases(inode, &dispose); | |
5653 | @@ -1512,13 +1512,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5654 | locks_insert_block(fl, new_fl); | |
5655 | trace_break_lease_block(inode, new_fl); | |
5656 | spin_unlock(&ctx->flc_lock); | |
5657 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5658 | + percpu_up_read(&file_rwsem); | |
5659 | ||
5660 | locks_dispose_list(&dispose); | |
5661 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | |
5662 | !new_fl->fl_next, break_time); | |
5663 | ||
5664 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5665 | + percpu_down_read(&file_rwsem); | |
5666 | spin_lock(&ctx->flc_lock); | |
5667 | trace_break_lease_unblock(inode, new_fl); | |
5668 | locks_delete_block(new_fl); | |
5669 | @@ -1535,7 +1535,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5670 | } | |
5671 | out: | |
5672 | spin_unlock(&ctx->flc_lock); | |
5673 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5674 | + percpu_up_read(&file_rwsem); | |
5675 | locks_dispose_list(&dispose); | |
5676 | locks_free_lock(new_fl); | |
5677 | return error; | |
5678 | @@ -1609,7 +1609,7 @@ int fcntl_getlease(struct file *filp) | |
5679 | ||
5680 | ctx = smp_load_acquire(&inode->i_flctx); | |
5681 | if (ctx && !list_empty_careful(&ctx->flc_lease)) { | |
5682 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5683 | + percpu_down_read(&file_rwsem); | |
5684 | spin_lock(&ctx->flc_lock); | |
5685 | time_out_leases(inode, &dispose); | |
5686 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
5687 | @@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp) | |
5688 | break; | |
5689 | } | |
5690 | spin_unlock(&ctx->flc_lock); | |
5691 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5692 | + percpu_up_read(&file_rwsem); | |
5693 | ||
5694 | locks_dispose_list(&dispose); | |
5695 | } | |
5696 | @@ -1694,7 +1694,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |
5697 | return -EINVAL; | |
5698 | } | |
5699 | ||
5700 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5701 | + percpu_down_read(&file_rwsem); | |
5702 | spin_lock(&ctx->flc_lock); | |
5703 | time_out_leases(inode, &dispose); | |
5704 | error = check_conflicting_open(dentry, arg, lease->fl_flags); | |
5705 | @@ -1765,7 +1765,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |
5706 | lease->fl_lmops->lm_setup(lease, priv); | |
5707 | out: | |
5708 | spin_unlock(&ctx->flc_lock); | |
5709 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5710 | + percpu_up_read(&file_rwsem); | |
5711 | locks_dispose_list(&dispose); | |
5712 | if (is_deleg) | |
5713 | inode_unlock(inode); | |
5714 | @@ -1788,7 +1788,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |
5715 | return error; | |
5716 | } | |
5717 | ||
5718 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5719 | + percpu_down_read(&file_rwsem); | |
5720 | spin_lock(&ctx->flc_lock); | |
5721 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
5722 | if (fl->fl_file == filp && | |
5723 | @@ -1801,7 +1801,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |
5724 | if (victim) | |
5725 | error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); | |
5726 | spin_unlock(&ctx->flc_lock); | |
5727 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5728 | + percpu_up_read(&file_rwsem); | |
5729 | locks_dispose_list(&dispose); | |
5730 | return error; | |
5731 | } | |
5732 | @@ -2532,13 +2532,13 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx) | |
5733 | if (list_empty(&ctx->flc_lease)) | |
5734 | return; | |
5735 | ||
5736 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5737 | + percpu_down_read(&file_rwsem); | |
5738 | spin_lock(&ctx->flc_lock); | |
5739 | list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) | |
5740 | if (filp == fl->fl_file) | |
5741 | lease_modify(fl, F_UNLCK, &dispose); | |
5742 | spin_unlock(&ctx->flc_lock); | |
5743 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5744 | + percpu_up_read(&file_rwsem); | |
5745 | ||
5746 | locks_dispose_list(&dispose); | |
5747 | } | |
1a6e0f06 | 5748 | diff --git a/fs/namei.c b/fs/namei.c |
c7c16703 | 5749 | index 5b4eed221530..9c8dd3c83a80 100644 |
1a6e0f06 JK |
5750 | --- a/fs/namei.c |
5751 | +++ b/fs/namei.c | |
5752 | @@ -1629,7 +1629,7 @@ static struct dentry *lookup_slow(const struct qstr *name, | |
5753 | { | |
5754 | struct dentry *dentry = ERR_PTR(-ENOENT), *old; | |
5755 | struct inode *inode = dir->d_inode; | |
5756 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5757 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5758 | ||
5759 | inode_lock_shared(inode); | |
5760 | /* Don't go there if it's already dead */ | |
5761 | @@ -3086,7 +3086,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, | |
5762 | struct dentry *dentry; | |
5763 | int error, create_error = 0; | |
5764 | umode_t mode = op->mode; | |
5765 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5766 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5767 | ||
5768 | if (unlikely(IS_DEADDIR(dir_inode))) | |
5769 | return -ENOENT; | |
5770 | diff --git a/fs/namespace.c b/fs/namespace.c | |
c7c16703 | 5771 | index e6c234b1a645..c9dbe5e56347 100644 |
1a6e0f06 JK |
5772 | --- a/fs/namespace.c |
5773 | +++ b/fs/namespace.c | |
5774 | @@ -14,6 +14,7 @@ | |
5775 | #include <linux/mnt_namespace.h> | |
5776 | #include <linux/user_namespace.h> | |
5777 | #include <linux/namei.h> | |
5778 | +#include <linux/delay.h> | |
5779 | #include <linux/security.h> | |
5780 | #include <linux/idr.h> | |
5781 | #include <linux/init.h> /* init_rootfs */ | |
c7c16703 | 5782 | @@ -356,8 +357,11 @@ int __mnt_want_write(struct vfsmount *m) |
1a6e0f06 JK |
5783 | * incremented count after it has set MNT_WRITE_HOLD. |
5784 | */ | |
5785 | smp_mb(); | |
5786 | - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) | |
5787 | - cpu_relax(); | |
5788 | + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { | |
5789 | + preempt_enable(); | |
5790 | + cpu_chill(); | |
5791 | + preempt_disable(); | |
5792 | + } | |
5793 | /* | |
5794 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | |
5795 | * be set to match its requirements. So we must not load that until | |
5796 | diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c | |
c7c16703 | 5797 | index dff600ae0d74..d726d2e09353 100644 |
1a6e0f06 JK |
5798 | --- a/fs/nfs/delegation.c |
5799 | +++ b/fs/nfs/delegation.c | |
5800 | @@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode *inode, | |
5801 | sp = state->owner; | |
5802 | /* Block nfs4_proc_unlck */ | |
5803 | mutex_lock(&sp->so_delegreturn_mutex); | |
5804 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
5805 | + seq = read_seqbegin(&sp->so_reclaim_seqlock); | |
5806 | err = nfs4_open_delegation_recall(ctx, state, stateid, type); | |
5807 | if (!err) | |
5808 | err = nfs_delegation_claim_locks(ctx, state, stateid); | |
5809 | - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
5810 | + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
5811 | err = -EAGAIN; | |
5812 | mutex_unlock(&sp->so_delegreturn_mutex); | |
5813 | put_nfs_open_context(ctx); | |
5814 | diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c | |
c7c16703 | 5815 | index 5f1af4cd1a33..436c27eb9d4f 100644 |
1a6e0f06 JK |
5816 | --- a/fs/nfs/dir.c |
5817 | +++ b/fs/nfs/dir.c | |
5818 | @@ -485,7 +485,7 @@ static | |
5819 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |
5820 | { | |
5821 | struct qstr filename = QSTR_INIT(entry->name, entry->len); | |
5822 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5823 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5824 | struct dentry *dentry; | |
5825 | struct dentry *alias; | |
5826 | struct inode *dir = d_inode(parent); | |
c7c16703 | 5827 | @@ -1498,7 +1498,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, |
1a6e0f06 JK |
5828 | struct file *file, unsigned open_flags, |
5829 | umode_t mode, int *opened) | |
5830 | { | |
5831 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5832 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5833 | struct nfs_open_context *ctx; | |
5834 | struct dentry *res; | |
5835 | struct iattr attr = { .ia_valid = ATTR_OPEN }; | |
c7c16703 | 5836 | @@ -1813,7 +1813,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) |
1a6e0f06 JK |
5837 | |
5838 | trace_nfs_rmdir_enter(dir, dentry); | |
5839 | if (d_really_is_positive(dentry)) { | |
5840 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
5841 | + down(&NFS_I(d_inode(dentry))->rmdir_sem); | |
5842 | +#else | |
5843 | down_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
5844 | +#endif | |
5845 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
5846 | /* Ensure the VFS deletes this inode */ | |
5847 | switch (error) { | |
c7c16703 | 5848 | @@ -1823,7 +1827,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) |
1a6e0f06 JK |
5849 | case -ENOENT: |
5850 | nfs_dentry_handle_enoent(dentry); | |
5851 | } | |
5852 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
5853 | + up(&NFS_I(d_inode(dentry))->rmdir_sem); | |
5854 | +#else | |
5855 | up_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
5856 | +#endif | |
5857 | } else | |
5858 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
5859 | trace_nfs_rmdir_exit(dir, dentry, error); | |
5860 | diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c | |
5861 | index bf4ec5ecc97e..36cd5fc9192c 100644 | |
5862 | --- a/fs/nfs/inode.c | |
5863 | +++ b/fs/nfs/inode.c | |
5864 | @@ -1957,7 +1957,11 @@ static void init_once(void *foo) | |
5865 | nfsi->nrequests = 0; | |
5866 | nfsi->commit_info.ncommit = 0; | |
5867 | atomic_set(&nfsi->commit_info.rpcs_out, 0); | |
5868 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
5869 | + sema_init(&nfsi->rmdir_sem, 1); | |
5870 | +#else | |
5871 | init_rwsem(&nfsi->rmdir_sem); | |
5872 | +#endif | |
5873 | nfs4_init_once(nfsi); | |
5874 | } | |
5875 | ||
5876 | diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h | |
c7c16703 | 5877 | index 1452177c822d..f43b01d54c59 100644 |
1a6e0f06 JK |
5878 | --- a/fs/nfs/nfs4_fs.h |
5879 | +++ b/fs/nfs/nfs4_fs.h | |
c7c16703 | 5880 | @@ -111,7 +111,7 @@ struct nfs4_state_owner { |
1a6e0f06 JK |
5881 | unsigned long so_flags; |
5882 | struct list_head so_states; | |
5883 | struct nfs_seqid_counter so_seqid; | |
5884 | - seqcount_t so_reclaim_seqcount; | |
5885 | + seqlock_t so_reclaim_seqlock; | |
5886 | struct mutex so_delegreturn_mutex; | |
5887 | }; | |
5888 | ||
5889 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c | |
c7c16703 | 5890 | index 241da19b7da4..8f9636cc298f 100644 |
1a6e0f06 JK |
5891 | --- a/fs/nfs/nfs4proc.c |
5892 | +++ b/fs/nfs/nfs4proc.c | |
c7c16703 | 5893 | @@ -2697,7 +2697,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, |
1a6e0f06 JK |
5894 | unsigned int seq; |
5895 | int ret; | |
5896 | ||
5897 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
5898 | + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
5899 | ||
5900 | ret = _nfs4_proc_open(opendata); | |
5901 | if (ret != 0) | |
c7c16703 | 5902 | @@ -2735,7 +2735,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, |
1a6e0f06 JK |
5903 | ctx->state = state; |
5904 | if (d_inode(dentry) == state->inode) { | |
5905 | nfs_inode_attach_open_context(ctx); | |
5906 | - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
5907 | + if (read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
5908 | nfs4_schedule_stateid_recovery(server, state); | |
5909 | } | |
5910 | out: | |
5911 | diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c | |
c7c16703 | 5912 | index 0959c9661662..dabd834d7686 100644 |
1a6e0f06 JK |
5913 | --- a/fs/nfs/nfs4state.c |
5914 | +++ b/fs/nfs/nfs4state.c | |
5915 | @@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, | |
5916 | nfs4_init_seqid_counter(&sp->so_seqid); | |
5917 | atomic_set(&sp->so_count, 1); | |
5918 | INIT_LIST_HEAD(&sp->so_lru); | |
5919 | - seqcount_init(&sp->so_reclaim_seqcount); | |
5920 | + seqlock_init(&sp->so_reclaim_seqlock); | |
5921 | mutex_init(&sp->so_delegreturn_mutex); | |
5922 | return sp; | |
5923 | } | |
c7c16703 | 5924 | @@ -1497,8 +1497,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs |
1a6e0f06 JK |
5925 | * recovering after a network partition or a reboot from a |
5926 | * server that doesn't support a grace period. | |
5927 | */ | |
5928 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5929 | + write_seqlock(&sp->so_reclaim_seqlock); | |
5930 | +#else | |
5931 | + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
5932 | +#endif | |
5933 | spin_lock(&sp->so_lock); | |
5934 | - raw_write_seqcount_begin(&sp->so_reclaim_seqcount); | |
5935 | restart: | |
5936 | list_for_each_entry(state, &sp->so_states, open_states) { | |
5937 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) | |
c7c16703 | 5938 | @@ -1567,14 +1571,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs |
1a6e0f06 JK |
5939 | spin_lock(&sp->so_lock); |
5940 | goto restart; | |
5941 | } | |
5942 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
5943 | spin_unlock(&sp->so_lock); | |
5944 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5945 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
5946 | +#else | |
5947 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
5948 | +#endif | |
5949 | return 0; | |
5950 | out_err: | |
5951 | nfs4_put_open_state(state); | |
5952 | - spin_lock(&sp->so_lock); | |
5953 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
5954 | - spin_unlock(&sp->so_lock); | |
5955 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5956 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
5957 | +#else | |
5958 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
5959 | +#endif | |
5960 | return status; | |
5961 | } | |
5962 | ||
5963 | diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c | |
5964 | index 191aa577dd1f..58990c8f52e0 100644 | |
5965 | --- a/fs/nfs/unlink.c | |
5966 | +++ b/fs/nfs/unlink.c | |
5967 | @@ -12,7 +12,7 @@ | |
5968 | #include <linux/sunrpc/clnt.h> | |
5969 | #include <linux/nfs_fs.h> | |
5970 | #include <linux/sched.h> | |
5971 | -#include <linux/wait.h> | |
5972 | +#include <linux/swait.h> | |
5973 | #include <linux/namei.h> | |
5974 | #include <linux/fsnotify.h> | |
5975 | ||
5976 | @@ -51,6 +51,29 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) | |
5977 | rpc_restart_call_prepare(task); | |
5978 | } | |
5979 | ||
5980 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
5981 | +static void nfs_down_anon(struct semaphore *sema) | |
5982 | +{ | |
5983 | + down(sema); | |
5984 | +} | |
5985 | + | |
5986 | +static void nfs_up_anon(struct semaphore *sema) | |
5987 | +{ | |
5988 | + up(sema); | |
5989 | +} | |
5990 | + | |
5991 | +#else | |
5992 | +static void nfs_down_anon(struct rw_semaphore *rwsem) | |
5993 | +{ | |
5994 | + down_read_non_owner(rwsem); | |
5995 | +} | |
5996 | + | |
5997 | +static void nfs_up_anon(struct rw_semaphore *rwsem) | |
5998 | +{ | |
5999 | + up_read_non_owner(rwsem); | |
6000 | +} | |
6001 | +#endif | |
6002 | + | |
6003 | /** | |
6004 | * nfs_async_unlink_release - Release the sillydelete data. | |
6005 | * @task: rpc_task of the sillydelete | |
6006 | @@ -64,7 +87,7 @@ static void nfs_async_unlink_release(void *calldata) | |
6007 | struct dentry *dentry = data->dentry; | |
6008 | struct super_block *sb = dentry->d_sb; | |
6009 | ||
6010 | - up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
6011 | + nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
6012 | d_lookup_done(dentry); | |
6013 | nfs_free_unlinkdata(data); | |
6014 | dput(dentry); | |
6015 | @@ -117,10 +140,10 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
6016 | struct inode *dir = d_inode(dentry->d_parent); | |
6017 | struct dentry *alias; | |
6018 | ||
6019 | - down_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6020 | + nfs_down_anon(&NFS_I(dir)->rmdir_sem); | |
6021 | alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); | |
6022 | if (IS_ERR(alias)) { | |
6023 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6024 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
6025 | return 0; | |
6026 | } | |
6027 | if (!d_in_lookup(alias)) { | |
6028 | @@ -142,7 +165,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
6029 | ret = 0; | |
6030 | spin_unlock(&alias->d_lock); | |
6031 | dput(alias); | |
6032 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6033 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
6034 | /* | |
6035 | * If we'd displaced old cached devname, free it. At that | |
6036 | * point dentry is definitely not a root, so we won't need | |
6037 | @@ -182,7 +205,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) | |
6038 | goto out_free_name; | |
6039 | } | |
6040 | data->res.dir_attr = &data->dir_attr; | |
6041 | - init_waitqueue_head(&data->wq); | |
6042 | + init_swait_queue_head(&data->wq); | |
6043 | ||
6044 | status = -EBUSY; | |
6045 | spin_lock(&dentry->d_lock); | |
6046 | diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c | |
6047 | index fe251f187ff8..e89da4fb14c2 100644 | |
6048 | --- a/fs/ntfs/aops.c | |
6049 | +++ b/fs/ntfs/aops.c | |
6050 | @@ -92,13 +92,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6051 | ofs = 0; | |
6052 | if (file_ofs < init_size) | |
6053 | ofs = init_size - file_ofs; | |
6054 | - local_irq_save(flags); | |
6055 | + local_irq_save_nort(flags); | |
6056 | kaddr = kmap_atomic(page); | |
6057 | memset(kaddr + bh_offset(bh) + ofs, 0, | |
6058 | bh->b_size - ofs); | |
6059 | flush_dcache_page(page); | |
6060 | kunmap_atomic(kaddr); | |
6061 | - local_irq_restore(flags); | |
6062 | + local_irq_restore_nort(flags); | |
6063 | } | |
6064 | } else { | |
6065 | clear_buffer_uptodate(bh); | |
6066 | @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6067 | "0x%llx.", (unsigned long long)bh->b_blocknr); | |
6068 | } | |
6069 | first = page_buffers(page); | |
6070 | - local_irq_save(flags); | |
6071 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
6072 | + flags = bh_uptodate_lock_irqsave(first); | |
6073 | clear_buffer_async_read(bh); | |
6074 | unlock_buffer(bh); | |
6075 | tmp = bh; | |
6076 | @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6077 | } | |
6078 | tmp = tmp->b_this_page; | |
6079 | } while (tmp != bh); | |
6080 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6081 | - local_irq_restore(flags); | |
6082 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6083 | /* | |
6084 | * If none of the buffers had errors then we can set the page uptodate, | |
6085 | * but we first have to perform the post read mst fixups, if the | |
6086 | @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6087 | recs = PAGE_SIZE / rec_size; | |
6088 | /* Should have been verified before we got here... */ | |
6089 | BUG_ON(!recs); | |
6090 | - local_irq_save(flags); | |
6091 | + local_irq_save_nort(flags); | |
6092 | kaddr = kmap_atomic(page); | |
6093 | for (i = 0; i < recs; i++) | |
6094 | post_read_mst_fixup((NTFS_RECORD*)(kaddr + | |
6095 | i * rec_size), rec_size); | |
6096 | kunmap_atomic(kaddr); | |
6097 | - local_irq_restore(flags); | |
6098 | + local_irq_restore_nort(flags); | |
6099 | flush_dcache_page(page); | |
6100 | if (likely(page_uptodate && !PageError(page))) | |
6101 | SetPageUptodate(page); | |
6102 | @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6103 | unlock_page(page); | |
6104 | return; | |
6105 | still_busy: | |
6106 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6107 | - local_irq_restore(flags); | |
6108 | - return; | |
6109 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6110 | } | |
6111 | ||
6112 | /** | |
6113 | diff --git a/fs/proc/base.c b/fs/proc/base.c | |
c7c16703 | 6114 | index ca651ac00660..41d9dc789285 100644 |
1a6e0f06 JK |
6115 | --- a/fs/proc/base.c |
6116 | +++ b/fs/proc/base.c | |
c7c16703 | 6117 | @@ -1834,7 +1834,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, |
1a6e0f06 JK |
6118 | |
6119 | child = d_hash_and_lookup(dir, &qname); | |
6120 | if (!child) { | |
6121 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6122 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6123 | child = d_alloc_parallel(dir, &qname, &wq); | |
6124 | if (IS_ERR(child)) | |
6125 | goto end_instantiate; | |
6126 | diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c | |
c7c16703 | 6127 | index 55313d994895..bdfc493721e9 100644 |
1a6e0f06 JK |
6128 | --- a/fs/proc/proc_sysctl.c |
6129 | +++ b/fs/proc/proc_sysctl.c | |
c7c16703 | 6130 | @@ -632,7 +632,7 @@ static bool proc_sys_fill_cache(struct file *file, |
1a6e0f06 JK |
6131 | |
6132 | child = d_lookup(dir, &qname); | |
6133 | if (!child) { | |
6134 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6135 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6136 | child = d_alloc_parallel(dir, &qname, &wq); | |
6137 | if (IS_ERR(child)) | |
6138 | return false; | |
6139 | diff --git a/fs/timerfd.c b/fs/timerfd.c | |
6140 | index 9ae4abb4110b..8644b67c48fd 100644 | |
6141 | --- a/fs/timerfd.c | |
6142 | +++ b/fs/timerfd.c | |
6143 | @@ -460,7 +460,10 @@ static int do_timerfd_settime(int ufd, int flags, | |
6144 | break; | |
6145 | } | |
6146 | spin_unlock_irq(&ctx->wqh.lock); | |
6147 | - cpu_relax(); | |
6148 | + if (isalarm(ctx)) | |
6149 | + hrtimer_wait_for_timer(&ctx->t.alarm.timer); | |
6150 | + else | |
6151 | + hrtimer_wait_for_timer(&ctx->t.tmr); | |
6152 | } | |
6153 | ||
6154 | /* | |
6155 | diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h | |
c7c16703 | 6156 | index e861a24f06f2..b5c97d3059c7 100644 |
1a6e0f06 JK |
6157 | --- a/include/acpi/platform/aclinux.h |
6158 | +++ b/include/acpi/platform/aclinux.h | |
c7c16703 | 6159 | @@ -133,6 +133,7 @@ |
1a6e0f06 JK |
6160 | |
6161 | #define acpi_cache_t struct kmem_cache | |
6162 | #define acpi_spinlock spinlock_t * | |
6163 | +#define acpi_raw_spinlock raw_spinlock_t * | |
6164 | #define acpi_cpu_flags unsigned long | |
6165 | ||
6166 | /* Use native linux version of acpi_os_allocate_zeroed */ | |
c7c16703 | 6167 | @@ -151,6 +152,20 @@ |
1a6e0f06 JK |
6168 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id |
6169 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock | |
6170 | ||
6171 | +#define acpi_os_create_raw_lock(__handle) \ | |
6172 | +({ \ | |
6173 | + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ | |
6174 | + \ | |
6175 | + if (lock) { \ | |
6176 | + *(__handle) = lock; \ | |
6177 | + raw_spin_lock_init(*(__handle)); \ | |
6178 | + } \ | |
6179 | + lock ? AE_OK : AE_NO_MEMORY; \ | |
6180 | + }) | |
6181 | + | |
6182 | +#define acpi_os_delete_raw_lock(__handle) kfree(__handle) | |
6183 | + | |
6184 | + | |
6185 | /* | |
6186 | * OSL interfaces used by debugger/disassembler | |
6187 | */ | |
6188 | diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h | |
6189 | index 6f96247226a4..fa53a21263c2 100644 | |
6190 | --- a/include/asm-generic/bug.h | |
6191 | +++ b/include/asm-generic/bug.h | |
6192 | @@ -215,6 +215,20 @@ void __warn(const char *file, int line, void *caller, unsigned taint, | |
6193 | # define WARN_ON_SMP(x) ({0;}) | |
6194 | #endif | |
6195 | ||
6196 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6197 | +# define BUG_ON_RT(c) BUG_ON(c) | |
6198 | +# define BUG_ON_NONRT(c) do { } while (0) | |
6199 | +# define WARN_ON_RT(condition) WARN_ON(condition) | |
6200 | +# define WARN_ON_NONRT(condition) do { } while (0) | |
6201 | +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) | |
6202 | +#else | |
6203 | +# define BUG_ON_RT(c) do { } while (0) | |
6204 | +# define BUG_ON_NONRT(c) BUG_ON(c) | |
6205 | +# define WARN_ON_RT(condition) do { } while (0) | |
6206 | +# define WARN_ON_NONRT(condition) WARN_ON(condition) | |
6207 | +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) | |
6208 | +#endif | |
6209 | + | |
6210 | #endif /* __ASSEMBLY__ */ | |
6211 | ||
6212 | #endif | |
6213 | diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h | |
c7c16703 | 6214 | index 535ab2e13d2e..cfc246899473 100644 |
1a6e0f06 JK |
6215 | --- a/include/linux/blk-mq.h |
6216 | +++ b/include/linux/blk-mq.h | |
c7c16703 JK |
6217 | @@ -209,7 +209,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) |
6218 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; | |
6219 | } | |
1a6e0f06 | 6220 | |
c7c16703 | 6221 | - |
1a6e0f06 | 6222 | +void __blk_mq_complete_request_remote_work(struct work_struct *work); |
1a6e0f06 JK |
6223 | int blk_mq_request_started(struct request *rq); |
6224 | void blk_mq_start_request(struct request *rq); | |
c7c16703 | 6225 | void blk_mq_end_request(struct request *rq, int error); |
1a6e0f06 | 6226 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h |
c7c16703 | 6227 | index c47c358ba052..a99c23735725 100644 |
1a6e0f06 JK |
6228 | --- a/include/linux/blkdev.h |
6229 | +++ b/include/linux/blkdev.h | |
6230 | @@ -89,6 +89,7 @@ struct request { | |
6231 | struct list_head queuelist; | |
6232 | union { | |
6233 | struct call_single_data csd; | |
6234 | + struct work_struct work; | |
6235 | u64 fifo_time; | |
6236 | }; | |
6237 | ||
6238 | @@ -467,7 +468,7 @@ struct request_queue { | |
6239 | struct throtl_data *td; | |
6240 | #endif | |
6241 | struct rcu_head rcu_head; | |
6242 | - wait_queue_head_t mq_freeze_wq; | |
6243 | + struct swait_queue_head mq_freeze_wq; | |
6244 | struct percpu_ref q_usage_counter; | |
6245 | struct list_head all_q_node; | |
6246 | ||
6247 | diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h | |
6248 | index 8fdcb783197d..d07dbeec7bc1 100644 | |
6249 | --- a/include/linux/bottom_half.h | |
6250 | +++ b/include/linux/bottom_half.h | |
6251 | @@ -3,6 +3,39 @@ | |
6252 | ||
6253 | #include <linux/preempt.h> | |
6254 | ||
6255 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6256 | + | |
6257 | +extern void __local_bh_disable(void); | |
6258 | +extern void _local_bh_enable(void); | |
6259 | +extern void __local_bh_enable(void); | |
6260 | + | |
6261 | +static inline void local_bh_disable(void) | |
6262 | +{ | |
6263 | + __local_bh_disable(); | |
6264 | +} | |
6265 | + | |
6266 | +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |
6267 | +{ | |
6268 | + __local_bh_disable(); | |
6269 | +} | |
6270 | + | |
6271 | +static inline void local_bh_enable(void) | |
6272 | +{ | |
6273 | + __local_bh_enable(); | |
6274 | +} | |
6275 | + | |
6276 | +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) | |
6277 | +{ | |
6278 | + __local_bh_enable(); | |
6279 | +} | |
6280 | + | |
6281 | +static inline void local_bh_enable_ip(unsigned long ip) | |
6282 | +{ | |
6283 | + __local_bh_enable(); | |
6284 | +} | |
6285 | + | |
6286 | +#else | |
6287 | + | |
6288 | #ifdef CONFIG_TRACE_IRQFLAGS | |
6289 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); | |
6290 | #else | |
6291 | @@ -30,5 +63,6 @@ static inline void local_bh_enable(void) | |
6292 | { | |
6293 | __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); | |
6294 | } | |
6295 | +#endif | |
6296 | ||
6297 | #endif /* _LINUX_BH_H */ | |
6298 | diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h | |
6299 | index ebbacd14d450..be5e87f6360a 100644 | |
6300 | --- a/include/linux/buffer_head.h | |
6301 | +++ b/include/linux/buffer_head.h | |
6302 | @@ -75,8 +75,50 @@ struct buffer_head { | |
6303 | struct address_space *b_assoc_map; /* mapping this buffer is | |
6304 | associated with */ | |
6305 | atomic_t b_count; /* users using this buffer_head */ | |
6306 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6307 | + spinlock_t b_uptodate_lock; | |
6308 | +#if IS_ENABLED(CONFIG_JBD2) | |
6309 | + spinlock_t b_state_lock; | |
6310 | + spinlock_t b_journal_head_lock; | |
6311 | +#endif | |
6312 | +#endif | |
6313 | }; | |
6314 | ||
6315 | +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) | |
6316 | +{ | |
6317 | + unsigned long flags; | |
6318 | + | |
6319 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
6320 | + local_irq_save(flags); | |
6321 | + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); | |
6322 | +#else | |
6323 | + spin_lock_irqsave(&bh->b_uptodate_lock, flags); | |
6324 | +#endif | |
6325 | + return flags; | |
6326 | +} | |
6327 | + | |
6328 | +static inline void | |
6329 | +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) | |
6330 | +{ | |
6331 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
6332 | + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); | |
6333 | + local_irq_restore(flags); | |
6334 | +#else | |
6335 | + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); | |
6336 | +#endif | |
6337 | +} | |
6338 | + | |
6339 | +static inline void buffer_head_init_locks(struct buffer_head *bh) | |
6340 | +{ | |
6341 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6342 | + spin_lock_init(&bh->b_uptodate_lock); | |
6343 | +#if IS_ENABLED(CONFIG_JBD2) | |
6344 | + spin_lock_init(&bh->b_state_lock); | |
6345 | + spin_lock_init(&bh->b_journal_head_lock); | |
6346 | +#endif | |
6347 | +#endif | |
6348 | +} | |
6349 | + | |
6350 | /* | |
6351 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() | |
6352 | * and buffer_foo() functions. | |
6353 | diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h | |
6354 | index 5b17de62c962..56027cc01a56 100644 | |
6355 | --- a/include/linux/cgroup-defs.h | |
6356 | +++ b/include/linux/cgroup-defs.h | |
6357 | @@ -16,6 +16,7 @@ | |
6358 | #include <linux/percpu-refcount.h> | |
6359 | #include <linux/percpu-rwsem.h> | |
6360 | #include <linux/workqueue.h> | |
6361 | +#include <linux/swork.h> | |
6362 | ||
6363 | #ifdef CONFIG_CGROUPS | |
6364 | ||
6365 | @@ -137,6 +138,7 @@ struct cgroup_subsys_state { | |
6366 | /* percpu_ref killing and RCU release */ | |
6367 | struct rcu_head rcu_head; | |
6368 | struct work_struct destroy_work; | |
6369 | + struct swork_event destroy_swork; | |
6370 | }; | |
6371 | ||
6372 | /* | |
6373 | diff --git a/include/linux/completion.h b/include/linux/completion.h | |
6374 | index 5d5aaae3af43..3bca1590e29f 100644 | |
6375 | --- a/include/linux/completion.h | |
6376 | +++ b/include/linux/completion.h | |
6377 | @@ -7,8 +7,7 @@ | |
6378 | * Atomic wait-for-completion handler data structures. | |
6379 | * See kernel/sched/completion.c for details. | |
6380 | */ | |
6381 | - | |
6382 | -#include <linux/wait.h> | |
6383 | +#include <linux/swait.h> | |
6384 | ||
6385 | /* | |
6386 | * struct completion - structure used to maintain state for a "completion" | |
6387 | @@ -24,11 +23,11 @@ | |
6388 | */ | |
6389 | struct completion { | |
6390 | unsigned int done; | |
6391 | - wait_queue_head_t wait; | |
6392 | + struct swait_queue_head wait; | |
6393 | }; | |
6394 | ||
6395 | #define COMPLETION_INITIALIZER(work) \ | |
6396 | - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
6397 | + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
6398 | ||
6399 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | |
6400 | ({ init_completion(&work); work; }) | |
6401 | @@ -73,7 +72,7 @@ struct completion { | |
6402 | static inline void init_completion(struct completion *x) | |
6403 | { | |
6404 | x->done = 0; | |
6405 | - init_waitqueue_head(&x->wait); | |
6406 | + init_swait_queue_head(&x->wait); | |
6407 | } | |
6408 | ||
6409 | /** | |
6410 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h | |
c7c16703 | 6411 | index e571128ad99a..5e52d28c20c1 100644 |
1a6e0f06 JK |
6412 | --- a/include/linux/cpu.h |
6413 | +++ b/include/linux/cpu.h | |
c7c16703 | 6414 | @@ -182,6 +182,8 @@ extern void get_online_cpus(void); |
1a6e0f06 JK |
6415 | extern void put_online_cpus(void); |
6416 | extern void cpu_hotplug_disable(void); | |
6417 | extern void cpu_hotplug_enable(void); | |
6418 | +extern void pin_current_cpu(void); | |
6419 | +extern void unpin_current_cpu(void); | |
6420 | #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) | |
6421 | #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) | |
6422 | #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) | |
c7c16703 | 6423 | @@ -199,6 +201,8 @@ static inline void cpu_hotplug_done(void) {} |
1a6e0f06 JK |
6424 | #define put_online_cpus() do { } while (0) |
6425 | #define cpu_hotplug_disable() do { } while (0) | |
6426 | #define cpu_hotplug_enable() do { } while (0) | |
6427 | +static inline void pin_current_cpu(void) { } | |
6428 | +static inline void unpin_current_cpu(void) { } | |
6429 | #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
6430 | #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
6431 | /* These aren't inline functions due to a GCC bug. */ | |
6432 | diff --git a/include/linux/dcache.h b/include/linux/dcache.h | |
c7c16703 | 6433 | index 5beed7b30561..61cab7ef458e 100644 |
1a6e0f06 JK |
6434 | --- a/include/linux/dcache.h |
6435 | +++ b/include/linux/dcache.h | |
6436 | @@ -11,6 +11,7 @@ | |
6437 | #include <linux/rcupdate.h> | |
6438 | #include <linux/lockref.h> | |
6439 | #include <linux/stringhash.h> | |
6440 | +#include <linux/wait.h> | |
6441 | ||
6442 | struct path; | |
6443 | struct vfsmount; | |
6444 | @@ -100,7 +101,7 @@ struct dentry { | |
6445 | ||
6446 | union { | |
6447 | struct list_head d_lru; /* LRU list */ | |
6448 | - wait_queue_head_t *d_wait; /* in-lookup ones only */ | |
6449 | + struct swait_queue_head *d_wait; /* in-lookup ones only */ | |
6450 | }; | |
6451 | struct list_head d_child; /* child of parent list */ | |
6452 | struct list_head d_subdirs; /* our children */ | |
6453 | @@ -230,7 +231,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op | |
6454 | extern struct dentry * d_alloc(struct dentry *, const struct qstr *); | |
6455 | extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); | |
6456 | extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, | |
6457 | - wait_queue_head_t *); | |
6458 | + struct swait_queue_head *); | |
6459 | extern struct dentry * d_splice_alias(struct inode *, struct dentry *); | |
6460 | extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); | |
6461 | extern struct dentry * d_exact_alias(struct dentry *, struct inode *); | |
6462 | diff --git a/include/linux/delay.h b/include/linux/delay.h | |
6463 | index a6ecb34cf547..37caab306336 100644 | |
6464 | --- a/include/linux/delay.h | |
6465 | +++ b/include/linux/delay.h | |
6466 | @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds) | |
6467 | msleep(seconds * 1000); | |
6468 | } | |
6469 | ||
6470 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6471 | +extern void cpu_chill(void); | |
6472 | +#else | |
6473 | +# define cpu_chill() cpu_relax() | |
6474 | +#endif | |
6475 | + | |
6476 | #endif /* defined(_LINUX_DELAY_H) */ | |
1a6e0f06 JK |
6477 | diff --git a/include/linux/highmem.h b/include/linux/highmem.h |
6478 | index bb3f3297062a..a117a33ef72c 100644 | |
6479 | --- a/include/linux/highmem.h | |
6480 | +++ b/include/linux/highmem.h | |
6481 | @@ -7,6 +7,7 @@ | |
6482 | #include <linux/mm.h> | |
6483 | #include <linux/uaccess.h> | |
6484 | #include <linux/hardirq.h> | |
6485 | +#include <linux/sched.h> | |
6486 | ||
6487 | #include <asm/cacheflush.h> | |
6488 | ||
6489 | @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page) | |
6490 | ||
6491 | static inline void *kmap_atomic(struct page *page) | |
6492 | { | |
6493 | - preempt_disable(); | |
6494 | + preempt_disable_nort(); | |
6495 | pagefault_disable(); | |
6496 | return page_address(page); | |
6497 | } | |
6498 | @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page) | |
6499 | static inline void __kunmap_atomic(void *addr) | |
6500 | { | |
6501 | pagefault_enable(); | |
6502 | - preempt_enable(); | |
6503 | + preempt_enable_nort(); | |
6504 | } | |
6505 | ||
6506 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) | |
6507 | @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr) | |
6508 | ||
6509 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
6510 | ||
6511 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6512 | DECLARE_PER_CPU(int, __kmap_atomic_idx); | |
6513 | +#endif | |
6514 | ||
6515 | static inline int kmap_atomic_idx_push(void) | |
6516 | { | |
6517 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6518 | int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; | |
6519 | ||
6520 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
6521 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6522 | WARN_ON_ONCE(in_irq() && !irqs_disabled()); | |
6523 | BUG_ON(idx >= KM_TYPE_NR); | |
6524 | -#endif | |
6525 | +# endif | |
6526 | return idx; | |
6527 | +#else | |
6528 | + current->kmap_idx++; | |
6529 | + BUG_ON(current->kmap_idx > KM_TYPE_NR); | |
6530 | + return current->kmap_idx - 1; | |
6531 | +#endif | |
6532 | } | |
6533 | ||
6534 | static inline int kmap_atomic_idx(void) | |
6535 | { | |
6536 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6537 | return __this_cpu_read(__kmap_atomic_idx) - 1; | |
6538 | +#else | |
6539 | + return current->kmap_idx - 1; | |
6540 | +#endif | |
6541 | } | |
6542 | ||
6543 | static inline void kmap_atomic_idx_pop(void) | |
6544 | { | |
6545 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
6546 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6547 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6548 | int idx = __this_cpu_dec_return(__kmap_atomic_idx); | |
6549 | ||
6550 | BUG_ON(idx < 0); | |
6551 | -#else | |
6552 | +# else | |
6553 | __this_cpu_dec(__kmap_atomic_idx); | |
6554 | +# endif | |
6555 | +#else | |
6556 | + current->kmap_idx--; | |
6557 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6558 | + BUG_ON(current->kmap_idx < 0); | |
6559 | +# endif | |
6560 | #endif | |
6561 | } | |
6562 | ||
6563 | diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h | |
6564 | index 5e00f80b1535..65d0671f20b4 100644 | |
6565 | --- a/include/linux/hrtimer.h | |
6566 | +++ b/include/linux/hrtimer.h | |
6567 | @@ -87,6 +87,9 @@ enum hrtimer_restart { | |
6568 | * @function: timer expiry callback function | |
6569 | * @base: pointer to the timer base (per cpu and per clock) | |
6570 | * @state: state information (See bit values above) | |
6571 | + * @cb_entry: list entry to defer timers from hardirq context | |
6572 | + * @irqsafe: timer can run in hardirq context | |
6573 | + * @praecox: timer expiry time if expired at the time of programming | |
6574 | * @is_rel: Set if the timer was armed relative | |
6575 | * @start_pid: timer statistics field to store the pid of the task which | |
6576 | * started the timer | |
6577 | @@ -103,6 +106,11 @@ struct hrtimer { | |
6578 | enum hrtimer_restart (*function)(struct hrtimer *); | |
6579 | struct hrtimer_clock_base *base; | |
6580 | u8 state; | |
6581 | + struct list_head cb_entry; | |
6582 | + int irqsafe; | |
6583 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
6584 | + ktime_t praecox; | |
6585 | +#endif | |
6586 | u8 is_rel; | |
6587 | #ifdef CONFIG_TIMER_STATS | |
6588 | int start_pid; | |
6589 | @@ -123,11 +131,7 @@ struct hrtimer_sleeper { | |
6590 | struct task_struct *task; | |
6591 | }; | |
6592 | ||
6593 | -#ifdef CONFIG_64BIT | |
6594 | # define HRTIMER_CLOCK_BASE_ALIGN 64 | |
6595 | -#else | |
6596 | -# define HRTIMER_CLOCK_BASE_ALIGN 32 | |
6597 | -#endif | |
6598 | ||
6599 | /** | |
6600 | * struct hrtimer_clock_base - the timer base for a specific clock | |
6601 | @@ -136,6 +140,7 @@ struct hrtimer_sleeper { | |
6602 | * timer to a base on another cpu. | |
6603 | * @clockid: clock id for per_cpu support | |
6604 | * @active: red black tree root node for the active timers | |
6605 | + * @expired: list head for deferred timers. | |
6606 | * @get_time: function to retrieve the current time of the clock | |
6607 | * @offset: offset of this clock to the monotonic base | |
6608 | */ | |
6609 | @@ -144,6 +149,7 @@ struct hrtimer_clock_base { | |
6610 | int index; | |
6611 | clockid_t clockid; | |
6612 | struct timerqueue_head active; | |
6613 | + struct list_head expired; | |
6614 | ktime_t (*get_time)(void); | |
6615 | ktime_t offset; | |
6616 | } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); | |
6617 | @@ -187,6 +193,7 @@ struct hrtimer_cpu_base { | |
6618 | raw_spinlock_t lock; | |
6619 | seqcount_t seq; | |
6620 | struct hrtimer *running; | |
6621 | + struct hrtimer *running_soft; | |
6622 | unsigned int cpu; | |
6623 | unsigned int active_bases; | |
6624 | unsigned int clock_was_set_seq; | |
6625 | @@ -203,6 +210,9 @@ struct hrtimer_cpu_base { | |
6626 | unsigned int nr_hangs; | |
6627 | unsigned int max_hang_time; | |
6628 | #endif | |
6629 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6630 | + wait_queue_head_t wait; | |
6631 | +#endif | |
6632 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | |
6633 | } ____cacheline_aligned; | |
6634 | ||
6635 | @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer) | |
6636 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | |
6637 | } | |
6638 | ||
6639 | +/* Softirq preemption could deadlock timer removal */ | |
6640 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6641 | + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); | |
6642 | +#else | |
6643 | +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) | |
6644 | +#endif | |
6645 | + | |
6646 | /* Query timers: */ | |
6647 | extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); | |
6648 | ||
6649 | @@ -436,7 +453,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) | |
6650 | * Helper function to check, whether the timer is running the callback | |
6651 | * function | |
6652 | */ | |
6653 | -static inline int hrtimer_callback_running(struct hrtimer *timer) | |
6654 | +static inline int hrtimer_callback_running(const struct hrtimer *timer) | |
6655 | { | |
6656 | return timer->base->cpu_base->running == timer; | |
6657 | } | |
6658 | diff --git a/include/linux/idr.h b/include/linux/idr.h | |
6659 | index 083d61e92706..5899796f50cb 100644 | |
6660 | --- a/include/linux/idr.h | |
6661 | +++ b/include/linux/idr.h | |
6662 | @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp); | |
6663 | * Each idr_preload() should be matched with an invocation of this | |
6664 | * function. See idr_preload() for details. | |
6665 | */ | |
6666 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6667 | +void idr_preload_end(void); | |
6668 | +#else | |
6669 | static inline void idr_preload_end(void) | |
6670 | { | |
6671 | preempt_enable(); | |
6672 | } | |
6673 | +#endif | |
6674 | ||
6675 | /** | |
6676 | * idr_find - return pointer for given id | |
6677 | diff --git a/include/linux/init_task.h b/include/linux/init_task.h | |
c7c16703 | 6678 | index 325f649d77ff..8af70bcc799b 100644 |
1a6e0f06 JK |
6679 | --- a/include/linux/init_task.h |
6680 | +++ b/include/linux/init_task.h | |
c7c16703 | 6681 | @@ -150,6 +150,12 @@ extern struct task_group root_task_group; |
1a6e0f06 JK |
6682 | # define INIT_PERF_EVENTS(tsk) |
6683 | #endif | |
6684 | ||
6685 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6686 | +# define INIT_TIMER_LIST .posix_timer_list = NULL, | |
6687 | +#else | |
6688 | +# define INIT_TIMER_LIST | |
6689 | +#endif | |
6690 | + | |
6691 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
6692 | # define INIT_VTIME(tsk) \ | |
6693 | .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ | |
c7c16703 | 6694 | @@ -250,6 +256,7 @@ extern struct task_group root_task_group; |
1a6e0f06 JK |
6695 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ |
6696 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ | |
6697 | .timer_slack_ns = 50000, /* 50 usec default slack */ \ | |
6698 | + INIT_TIMER_LIST \ | |
6699 | .pids = { \ | |
6700 | [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ | |
6701 | [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ | |
6702 | diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h | |
c7c16703 | 6703 | index 72f0721f75e7..480972ae47d3 100644 |
1a6e0f06 JK |
6704 | --- a/include/linux/interrupt.h |
6705 | +++ b/include/linux/interrupt.h | |
6706 | @@ -14,6 +14,7 @@ | |
6707 | #include <linux/hrtimer.h> | |
6708 | #include <linux/kref.h> | |
6709 | #include <linux/workqueue.h> | |
6710 | +#include <linux/swork.h> | |
6711 | ||
6712 | #include <linux/atomic.h> | |
6713 | #include <asm/ptrace.h> | |
6714 | @@ -61,6 +62,7 @@ | |
6715 | * interrupt handler after suspending interrupts. For system | |
6716 | * wakeup devices users need to implement wakeup detection in | |
6717 | * their interrupt handlers. | |
6718 | + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) | |
6719 | */ | |
6720 | #define IRQF_SHARED 0x00000080 | |
6721 | #define IRQF_PROBE_SHARED 0x00000100 | |
6722 | @@ -74,6 +76,7 @@ | |
6723 | #define IRQF_NO_THREAD 0x00010000 | |
6724 | #define IRQF_EARLY_RESUME 0x00020000 | |
6725 | #define IRQF_COND_SUSPEND 0x00040000 | |
6726 | +#define IRQF_NO_SOFTIRQ_CALL 0x00080000 | |
6727 | ||
6728 | #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) | |
6729 | ||
6730 | @@ -196,7 +199,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); | |
6731 | #ifdef CONFIG_LOCKDEP | |
6732 | # define local_irq_enable_in_hardirq() do { } while (0) | |
6733 | #else | |
6734 | -# define local_irq_enable_in_hardirq() local_irq_enable() | |
6735 | +# define local_irq_enable_in_hardirq() local_irq_enable_nort() | |
6736 | #endif | |
6737 | ||
6738 | extern void disable_irq_nosync(unsigned int irq); | |
6739 | @@ -216,6 +219,7 @@ extern void resume_device_irqs(void); | |
6740 | * struct irq_affinity_notify - context for notification of IRQ affinity changes | |
6741 | * @irq: Interrupt to which notification applies | |
6742 | * @kref: Reference count, for internal use | |
6743 | + * @swork: Swork item, for internal use | |
6744 | * @work: Work item, for internal use | |
6745 | * @notify: Function to be called on change. This will be | |
6746 | * called in process context. | |
6747 | @@ -227,7 +231,11 @@ extern void resume_device_irqs(void); | |
6748 | struct irq_affinity_notify { | |
6749 | unsigned int irq; | |
6750 | struct kref kref; | |
6751 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6752 | + struct swork_event swork; | |
6753 | +#else | |
6754 | struct work_struct work; | |
6755 | +#endif | |
6756 | void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); | |
6757 | void (*release)(struct kref *ref); | |
6758 | }; | |
c7c16703 | 6759 | @@ -406,9 +414,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, |
1a6e0f06 JK |
6760 | bool state); |
6761 | ||
6762 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
6763 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
6764 | extern bool force_irqthreads; | |
6765 | +# else | |
6766 | +# define force_irqthreads (true) | |
6767 | +# endif | |
6768 | #else | |
6769 | -#define force_irqthreads (0) | |
6770 | +#define force_irqthreads (false) | |
6771 | #endif | |
6772 | ||
6773 | #ifndef __ARCH_SET_SOFTIRQ_PENDING | |
c7c16703 | 6774 | @@ -465,9 +477,10 @@ struct softirq_action |
1a6e0f06 JK |
6775 | void (*action)(struct softirq_action *); |
6776 | }; | |
6777 | ||
6778 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6779 | asmlinkage void do_softirq(void); | |
6780 | asmlinkage void __do_softirq(void); | |
6781 | - | |
6782 | +static inline void thread_do_softirq(void) { do_softirq(); } | |
6783 | #ifdef __ARCH_HAS_DO_SOFTIRQ | |
6784 | void do_softirq_own_stack(void); | |
6785 | #else | |
c7c16703 | 6786 | @@ -476,13 +489,25 @@ static inline void do_softirq_own_stack(void) |
1a6e0f06 JK |
6787 | __do_softirq(); |
6788 | } | |
6789 | #endif | |
6790 | +#else | |
6791 | +extern void thread_do_softirq(void); | |
6792 | +#endif | |
6793 | ||
6794 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | |
6795 | extern void softirq_init(void); | |
6796 | extern void __raise_softirq_irqoff(unsigned int nr); | |
6797 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6798 | +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); | |
6799 | +#else | |
6800 | +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
6801 | +{ | |
6802 | + __raise_softirq_irqoff(nr); | |
6803 | +} | |
6804 | +#endif | |
6805 | ||
6806 | extern void raise_softirq_irqoff(unsigned int nr); | |
6807 | extern void raise_softirq(unsigned int nr); | |
6808 | +extern void softirq_check_pending_idle(void); | |
6809 | ||
6810 | DECLARE_PER_CPU(struct task_struct *, ksoftirqd); | |
6811 | ||
c7c16703 | 6812 | @@ -504,8 +529,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) |
1a6e0f06 JK |
6813 | to be executed on some cpu at least once after this. |
6814 | * If the tasklet is already scheduled, but its execution is still not | |
6815 | started, it will be executed only once. | |
6816 | - * If this tasklet is already running on another CPU (or schedule is called | |
6817 | - from tasklet itself), it is rescheduled for later. | |
6818 | + * If this tasklet is already running on another CPU, it is rescheduled | |
6819 | + for later. | |
6820 | + * Schedule must not be called from the tasklet itself (a lockup occurs) | |
6821 | * Tasklet is strictly serialized wrt itself, but not | |
6822 | wrt another tasklets. If client needs some intertask synchronization, | |
6823 | he makes it with spinlocks. | |
c7c16703 | 6824 | @@ -530,27 +556,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } |
1a6e0f06 JK |
6825 | enum |
6826 | { | |
6827 | TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ | |
6828 | - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ | |
6829 | + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ | |
6830 | + TASKLET_STATE_PENDING /* Tasklet is pending */ | |
6831 | }; | |
6832 | ||
6833 | -#ifdef CONFIG_SMP | |
6834 | +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) | |
6835 | +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) | |
6836 | +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) | |
6837 | + | |
6838 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
6839 | static inline int tasklet_trylock(struct tasklet_struct *t) | |
6840 | { | |
6841 | return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); | |
6842 | } | |
6843 | ||
6844 | +static inline int tasklet_tryunlock(struct tasklet_struct *t) | |
6845 | +{ | |
6846 | + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; | |
6847 | +} | |
6848 | + | |
6849 | static inline void tasklet_unlock(struct tasklet_struct *t) | |
6850 | { | |
6851 | smp_mb__before_atomic(); | |
6852 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | |
6853 | } | |
6854 | ||
6855 | -static inline void tasklet_unlock_wait(struct tasklet_struct *t) | |
6856 | -{ | |
6857 | - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } | |
6858 | -} | |
6859 | +extern void tasklet_unlock_wait(struct tasklet_struct *t); | |
6860 | + | |
6861 | #else | |
6862 | #define tasklet_trylock(t) 1 | |
6863 | +#define tasklet_tryunlock(t) 1 | |
6864 | #define tasklet_unlock_wait(t) do { } while (0) | |
6865 | #define tasklet_unlock(t) do { } while (0) | |
6866 | #endif | |
c7c16703 | 6867 | @@ -599,12 +634,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) |
1a6e0f06 JK |
6868 | smp_mb(); |
6869 | } | |
6870 | ||
6871 | -static inline void tasklet_enable(struct tasklet_struct *t) | |
6872 | -{ | |
6873 | - smp_mb__before_atomic(); | |
6874 | - atomic_dec(&t->count); | |
6875 | -} | |
6876 | - | |
6877 | +extern void tasklet_enable(struct tasklet_struct *t); | |
6878 | extern void tasklet_kill(struct tasklet_struct *t); | |
6879 | extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); | |
6880 | extern void tasklet_init(struct tasklet_struct *t, | |
c7c16703 | 6881 | @@ -635,6 +665,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) |
1a6e0f06 JK |
6882 | tasklet_kill(&ttimer->tasklet); |
6883 | } | |
6884 | ||
6885 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6886 | +extern void softirq_early_init(void); | |
6887 | +#else | |
6888 | +static inline void softirq_early_init(void) { } | |
6889 | +#endif | |
6890 | + | |
6891 | /* | |
6892 | * Autoprobing for irqs: | |
6893 | * | |
6894 | diff --git a/include/linux/irq.h b/include/linux/irq.h | |
c7c16703 | 6895 | index e79875574b39..177cee0c3305 100644 |
1a6e0f06 JK |
6896 | --- a/include/linux/irq.h |
6897 | +++ b/include/linux/irq.h | |
6898 | @@ -72,6 +72,7 @@ enum irqchip_irq_state; | |
6899 | * IRQ_IS_POLLED - Always polled by another interrupt. Exclude | |
6900 | * it from the spurious interrupt detection | |
6901 | * mechanism and from core side polling. | |
6902 | + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) | |
6903 | * IRQ_DISABLE_UNLAZY - Disable lazy irq disable | |
6904 | */ | |
6905 | enum { | |
6906 | @@ -99,13 +100,14 @@ enum { | |
6907 | IRQ_PER_CPU_DEVID = (1 << 17), | |
6908 | IRQ_IS_POLLED = (1 << 18), | |
6909 | IRQ_DISABLE_UNLAZY = (1 << 19), | |
6910 | + IRQ_NO_SOFTIRQ_CALL = (1 << 20), | |
6911 | }; | |
6912 | ||
6913 | #define IRQF_MODIFY_MASK \ | |
6914 | (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ | |
6915 | IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ | |
6916 | IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ | |
6917 | - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) | |
6918 | + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) | |
6919 | ||
6920 | #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | |
6921 | ||
6922 | diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h | |
6923 | index 47b9ebd4a74f..2543aab05daa 100644 | |
6924 | --- a/include/linux/irq_work.h | |
6925 | +++ b/include/linux/irq_work.h | |
6926 | @@ -16,6 +16,7 @@ | |
6927 | #define IRQ_WORK_BUSY 2UL | |
6928 | #define IRQ_WORK_FLAGS 3UL | |
6929 | #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ | |
6930 | +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ | |
6931 | ||
6932 | struct irq_work { | |
6933 | unsigned long flags; | |
6934 | @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } | |
6935 | static inline void irq_work_run(void) { } | |
6936 | #endif | |
6937 | ||
6938 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
6939 | +void irq_work_tick_soft(void); | |
6940 | +#else | |
6941 | +static inline void irq_work_tick_soft(void) { } | |
6942 | +#endif | |
6943 | + | |
6944 | #endif /* _LINUX_IRQ_WORK_H */ | |
6945 | diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h | |
c7c16703 | 6946 | index c9be57931b58..eeeb540971ae 100644 |
1a6e0f06 JK |
6947 | --- a/include/linux/irqdesc.h |
6948 | +++ b/include/linux/irqdesc.h | |
c7c16703 | 6949 | @@ -66,6 +66,7 @@ struct irq_desc { |
1a6e0f06 JK |
6950 | unsigned int irqs_unhandled; |
6951 | atomic_t threads_handled; | |
6952 | int threads_handled_last; | |
6953 | + u64 random_ip; | |
6954 | raw_spinlock_t lock; | |
6955 | struct cpumask *percpu_enabled; | |
6956 | const struct cpumask *percpu_affinity; | |
6957 | diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h | |
6958 | index 5dd1272d1ab2..9b77034f7c5e 100644 | |
6959 | --- a/include/linux/irqflags.h | |
6960 | +++ b/include/linux/irqflags.h | |
6961 | @@ -25,8 +25,6 @@ | |
6962 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) | |
6963 | # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) | |
6964 | # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) | |
6965 | -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
6966 | -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
6967 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | |
6968 | #else | |
6969 | # define trace_hardirqs_on() do { } while (0) | |
6970 | @@ -39,9 +37,15 @@ | |
6971 | # define trace_softirqs_enabled(p) 0 | |
6972 | # define trace_hardirq_enter() do { } while (0) | |
6973 | # define trace_hardirq_exit() do { } while (0) | |
6974 | +# define INIT_TRACE_IRQFLAGS | |
6975 | +#endif | |
6976 | + | |
6977 | +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) | |
6978 | +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
6979 | +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
6980 | +#else | |
6981 | # define lockdep_softirq_enter() do { } while (0) | |
6982 | # define lockdep_softirq_exit() do { } while (0) | |
6983 | -# define INIT_TRACE_IRQFLAGS | |
6984 | #endif | |
6985 | ||
6986 | #if defined(CONFIG_IRQSOFF_TRACER) || \ | |
6987 | @@ -148,4 +152,23 @@ | |
6988 | ||
6989 | #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) | |
6990 | ||
6991 | +/* | |
6992 | + * local_irq* variants depending on RT/!RT | |
6993 | + */ | |
6994 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6995 | +# define local_irq_disable_nort() do { } while (0) | |
6996 | +# define local_irq_enable_nort() do { } while (0) | |
6997 | +# define local_irq_save_nort(flags) local_save_flags(flags) | |
6998 | +# define local_irq_restore_nort(flags) (void)(flags) | |
6999 | +# define local_irq_disable_rt() local_irq_disable() | |
7000 | +# define local_irq_enable_rt() local_irq_enable() | |
7001 | +#else | |
7002 | +# define local_irq_disable_nort() local_irq_disable() | |
7003 | +# define local_irq_enable_nort() local_irq_enable() | |
7004 | +# define local_irq_save_nort(flags) local_irq_save(flags) | |
7005 | +# define local_irq_restore_nort(flags) local_irq_restore(flags) | |
7006 | +# define local_irq_disable_rt() do { } while (0) | |
7007 | +# define local_irq_enable_rt() do { } while (0) | |
7008 | +#endif | |
7009 | + | |
7010 | #endif | |
7011 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h | |
7012 | index dfaa1f4dcb0c..d57dd06544a1 100644 | |
7013 | --- a/include/linux/jbd2.h | |
7014 | +++ b/include/linux/jbd2.h | |
7015 | @@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) | |
7016 | ||
7017 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | |
7018 | { | |
7019 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7020 | bit_spin_lock(BH_State, &bh->b_state); | |
7021 | +#else | |
7022 | + spin_lock(&bh->b_state_lock); | |
7023 | +#endif | |
7024 | } | |
7025 | ||
7026 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | |
7027 | { | |
7028 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7029 | return bit_spin_trylock(BH_State, &bh->b_state); | |
7030 | +#else | |
7031 | + return spin_trylock(&bh->b_state_lock); | |
7032 | +#endif | |
7033 | } | |
7034 | ||
7035 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | |
7036 | { | |
7037 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7038 | return bit_spin_is_locked(BH_State, &bh->b_state); | |
7039 | +#else | |
7040 | + return spin_is_locked(&bh->b_state_lock); | |
7041 | +#endif | |
7042 | } | |
7043 | ||
7044 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | |
7045 | { | |
7046 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7047 | bit_spin_unlock(BH_State, &bh->b_state); | |
7048 | +#else | |
7049 | + spin_unlock(&bh->b_state_lock); | |
7050 | +#endif | |
7051 | } | |
7052 | ||
7053 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | |
7054 | { | |
7055 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7056 | bit_spin_lock(BH_JournalHead, &bh->b_state); | |
7057 | +#else | |
7058 | + spin_lock(&bh->b_journal_head_lock); | |
7059 | +#endif | |
7060 | } | |
7061 | ||
7062 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |
7063 | { | |
7064 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7065 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | |
7066 | +#else | |
7067 | + spin_unlock(&bh->b_journal_head_lock); | |
7068 | +#endif | |
7069 | } | |
7070 | ||
7071 | #define J_ASSERT(assert) BUG_ON(!(assert)) | |
7072 | diff --git a/include/linux/kdb.h b/include/linux/kdb.h | |
7073 | index 410decacff8f..0861bebfc188 100644 | |
7074 | --- a/include/linux/kdb.h | |
7075 | +++ b/include/linux/kdb.h | |
7076 | @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, | |
7077 | extern __printf(1, 2) int kdb_printf(const char *, ...); | |
7078 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); | |
7079 | ||
7080 | +#define in_kdb_printk() (kdb_trap_printk) | |
7081 | extern void kdb_init(int level); | |
7082 | ||
7083 | /* Access to kdb specific polling devices */ | |
7084 | @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *, | |
7085 | extern int kdb_unregister(char *); | |
7086 | #else /* ! CONFIG_KGDB_KDB */ | |
7087 | static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } | |
7088 | +#define in_kdb_printk() (0) | |
7089 | static inline void kdb_init(int level) {} | |
7090 | static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, | |
7091 | char *help, short minlen) { return 0; } | |
7092 | diff --git a/include/linux/kernel.h b/include/linux/kernel.h | |
c7c16703 | 7093 | index bc6ed52a39b9..7894d55e4998 100644 |
1a6e0f06 JK |
7094 | --- a/include/linux/kernel.h |
7095 | +++ b/include/linux/kernel.h | |
7096 | @@ -194,6 +194,9 @@ extern int _cond_resched(void); | |
7097 | */ | |
7098 | # define might_sleep() \ | |
7099 | do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
7100 | + | |
7101 | +# define might_sleep_no_state_check() \ | |
7102 | + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
7103 | # define sched_annotate_sleep() (current->task_state_change = 0) | |
7104 | #else | |
7105 | static inline void ___might_sleep(const char *file, int line, | |
7106 | @@ -201,6 +204,7 @@ extern int _cond_resched(void); | |
7107 | static inline void __might_sleep(const char *file, int line, | |
7108 | int preempt_offset) { } | |
7109 | # define might_sleep() do { might_resched(); } while (0) | |
7110 | +# define might_sleep_no_state_check() do { might_resched(); } while (0) | |
7111 | # define sched_annotate_sleep() do { } while (0) | |
7112 | #endif | |
7113 | ||
c7c16703 | 7114 | @@ -488,6 +492,7 @@ extern enum system_states { |
1a6e0f06 JK |
7115 | SYSTEM_HALT, |
7116 | SYSTEM_POWER_OFF, | |
7117 | SYSTEM_RESTART, | |
7118 | + SYSTEM_SUSPEND, | |
7119 | } system_state; | |
7120 | ||
7121 | #define TAINT_PROPRIETARY_MODULE 0 | |
1a6e0f06 JK |
7122 | diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h |
7123 | index cb483305e1f5..4e5062316bb6 100644 | |
7124 | --- a/include/linux/list_bl.h | |
7125 | +++ b/include/linux/list_bl.h | |
7126 | @@ -2,6 +2,7 @@ | |
7127 | #define _LINUX_LIST_BL_H | |
7128 | ||
7129 | #include <linux/list.h> | |
7130 | +#include <linux/spinlock.h> | |
7131 | #include <linux/bit_spinlock.h> | |
7132 | ||
7133 | /* | |
7134 | @@ -32,13 +33,24 @@ | |
7135 | ||
7136 | struct hlist_bl_head { | |
7137 | struct hlist_bl_node *first; | |
7138 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7139 | + raw_spinlock_t lock; | |
7140 | +#endif | |
7141 | }; | |
7142 | ||
7143 | struct hlist_bl_node { | |
7144 | struct hlist_bl_node *next, **pprev; | |
7145 | }; | |
7146 | -#define INIT_HLIST_BL_HEAD(ptr) \ | |
7147 | - ((ptr)->first = NULL) | |
7148 | + | |
7149 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7150 | +#define INIT_HLIST_BL_HEAD(h) \ | |
7151 | +do { \ | |
7152 | + (h)->first = NULL; \ | |
7153 | + raw_spin_lock_init(&(h)->lock); \ | |
7154 | +} while (0) | |
7155 | +#else | |
7156 | +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL | |
7157 | +#endif | |
7158 | ||
7159 | static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) | |
7160 | { | |
7161 | @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) | |
7162 | ||
7163 | static inline void hlist_bl_lock(struct hlist_bl_head *b) | |
7164 | { | |
7165 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7166 | bit_spin_lock(0, (unsigned long *)b); | |
7167 | +#else | |
7168 | + raw_spin_lock(&b->lock); | |
7169 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
7170 | + __set_bit(0, (unsigned long *)b); | |
7171 | +#endif | |
7172 | +#endif | |
7173 | } | |
7174 | ||
7175 | static inline void hlist_bl_unlock(struct hlist_bl_head *b) | |
7176 | { | |
7177 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7178 | __bit_spin_unlock(0, (unsigned long *)b); | |
7179 | +#else | |
7180 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
7181 | + __clear_bit(0, (unsigned long *)b); | |
7182 | +#endif | |
7183 | + raw_spin_unlock(&b->lock); | |
7184 | +#endif | |
7185 | } | |
7186 | ||
7187 | static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) | |
7188 | diff --git a/include/linux/locallock.h b/include/linux/locallock.h | |
7189 | new file mode 100644 | |
7190 | index 000000000000..845c77f1a5ca | |
7191 | --- /dev/null | |
7192 | +++ b/include/linux/locallock.h | |
7193 | @@ -0,0 +1,278 @@ | |
7194 | +#ifndef _LINUX_LOCALLOCK_H | |
7195 | +#define _LINUX_LOCALLOCK_H | |
7196 | + | |
7197 | +#include <linux/percpu.h> | |
7198 | +#include <linux/spinlock.h> | |
7199 | + | |
7200 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7201 | + | |
7202 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
7203 | +# define LL_WARN(cond) WARN_ON(cond) | |
7204 | +#else | |
7205 | +# define LL_WARN(cond) do { } while (0) | |
7206 | +#endif | |
7207 | + | |
7208 | +/* | |
7209 | + * per cpu lock based substitute for local_irq_*() | |
7210 | + */ | |
7211 | +struct local_irq_lock { | |
7212 | + spinlock_t lock; | |
7213 | + struct task_struct *owner; | |
7214 | + int nestcnt; | |
7215 | + unsigned long flags; | |
7216 | +}; | |
7217 | + | |
7218 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ | |
7219 | + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ | |
7220 | + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } | |
7221 | + | |
7222 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ | |
7223 | + DECLARE_PER_CPU(struct local_irq_lock, lvar) | |
7224 | + | |
7225 | +#define local_irq_lock_init(lvar) \ | |
7226 | + do { \ | |
7227 | + int __cpu; \ | |
7228 | + for_each_possible_cpu(__cpu) \ | |
7229 | + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ | |
7230 | + } while (0) | |
7231 | + | |
7232 | +/* | |
7233 | + * spin_lock|trylock|unlock_local flavour that does not migrate disable | |
7234 | + * used for __local_lock|trylock|unlock where get_local_var/put_local_var | |
7235 | + * already takes care of the migrate_disable/enable | |
7236 | + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. | |
7237 | + */ | |
7238 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7239 | +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock) | |
7240 | +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock) | |
7241 | +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock) | |
7242 | +#else | |
7243 | +# define spin_lock_local(lock) spin_lock(lock) | |
7244 | +# define spin_trylock_local(lock) spin_trylock(lock) | |
7245 | +# define spin_unlock_local(lock) spin_unlock(lock) | |
7246 | +#endif | |
7247 | + | |
7248 | +static inline void __local_lock(struct local_irq_lock *lv) | |
7249 | +{ | |
7250 | + if (lv->owner != current) { | |
7251 | + spin_lock_local(&lv->lock); | |
7252 | + LL_WARN(lv->owner); | |
7253 | + LL_WARN(lv->nestcnt); | |
7254 | + lv->owner = current; | |
7255 | + } | |
7256 | + lv->nestcnt++; | |
7257 | +} | |
7258 | + | |
7259 | +#define local_lock(lvar) \ | |
7260 | + do { __local_lock(&get_local_var(lvar)); } while (0) | |
7261 | + | |
7262 | +#define local_lock_on(lvar, cpu) \ | |
7263 | + do { __local_lock(&per_cpu(lvar, cpu)); } while (0) | |
7264 | + | |
7265 | +static inline int __local_trylock(struct local_irq_lock *lv) | |
7266 | +{ | |
7267 | + if (lv->owner != current && spin_trylock_local(&lv->lock)) { | |
7268 | + LL_WARN(lv->owner); | |
7269 | + LL_WARN(lv->nestcnt); | |
7270 | + lv->owner = current; | |
7271 | + lv->nestcnt = 1; | |
7272 | + return 1; | |
7273 | + } | |
7274 | + return 0; | |
7275 | +} | |
7276 | + | |
7277 | +#define local_trylock(lvar) \ | |
7278 | + ({ \ | |
7279 | + int __locked; \ | |
7280 | + __locked = __local_trylock(&get_local_var(lvar)); \ | |
7281 | + if (!__locked) \ | |
7282 | + put_local_var(lvar); \ | |
7283 | + __locked; \ | |
7284 | + }) | |
7285 | + | |
7286 | +static inline void __local_unlock(struct local_irq_lock *lv) | |
7287 | +{ | |
7288 | + LL_WARN(lv->nestcnt == 0); | |
7289 | + LL_WARN(lv->owner != current); | |
7290 | + if (--lv->nestcnt) | |
7291 | + return; | |
7292 | + | |
7293 | + lv->owner = NULL; | |
7294 | + spin_unlock_local(&lv->lock); | |
7295 | +} | |
7296 | + | |
7297 | +#define local_unlock(lvar) \ | |
7298 | + do { \ | |
7299 | + __local_unlock(this_cpu_ptr(&lvar)); \ | |
7300 | + put_local_var(lvar); \ | |
7301 | + } while (0) | |
7302 | + | |
7303 | +#define local_unlock_on(lvar, cpu) \ | |
7304 | + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) | |
7305 | + | |
7306 | +static inline void __local_lock_irq(struct local_irq_lock *lv) | |
7307 | +{ | |
7308 | + spin_lock_irqsave(&lv->lock, lv->flags); | |
7309 | + LL_WARN(lv->owner); | |
7310 | + LL_WARN(lv->nestcnt); | |
7311 | + lv->owner = current; | |
7312 | + lv->nestcnt = 1; | |
7313 | +} | |
7314 | + | |
7315 | +#define local_lock_irq(lvar) \ | |
7316 | + do { __local_lock_irq(&get_local_var(lvar)); } while (0) | |
7317 | + | |
7318 | +#define local_lock_irq_on(lvar, cpu) \ | |
7319 | + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) | |
7320 | + | |
7321 | +static inline void __local_unlock_irq(struct local_irq_lock *lv) | |
7322 | +{ | |
7323 | + LL_WARN(!lv->nestcnt); | |
7324 | + LL_WARN(lv->owner != current); | |
7325 | + lv->owner = NULL; | |
7326 | + lv->nestcnt = 0; | |
7327 | + spin_unlock_irq(&lv->lock); | |
7328 | +} | |
7329 | + | |
7330 | +#define local_unlock_irq(lvar) \ | |
7331 | + do { \ | |
7332 | + __local_unlock_irq(this_cpu_ptr(&lvar)); \ | |
7333 | + put_local_var(lvar); \ | |
7334 | + } while (0) | |
7335 | + | |
7336 | +#define local_unlock_irq_on(lvar, cpu) \ | |
7337 | + do { \ | |
7338 | + __local_unlock_irq(&per_cpu(lvar, cpu)); \ | |
7339 | + } while (0) | |
7340 | + | |
7341 | +static inline int __local_lock_irqsave(struct local_irq_lock *lv) | |
7342 | +{ | |
7343 | + if (lv->owner != current) { | |
7344 | + __local_lock_irq(lv); | |
7345 | + return 0; | |
7346 | + } else { | |
7347 | + lv->nestcnt++; | |
7348 | + return 1; | |
7349 | + } | |
7350 | +} | |
7351 | + | |
7352 | +#define local_lock_irqsave(lvar, _flags) \ | |
7353 | + do { \ | |
7354 | + if (__local_lock_irqsave(&get_local_var(lvar))) \ | |
7355 | + put_local_var(lvar); \ | |
7356 | + _flags = __this_cpu_read(lvar.flags); \ | |
7357 | + } while (0) | |
7358 | + | |
7359 | +#define local_lock_irqsave_on(lvar, _flags, cpu) \ | |
7360 | + do { \ | |
7361 | + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ | |
7362 | + _flags = per_cpu(lvar, cpu).flags; \ | |
7363 | + } while (0) | |
7364 | + | |
7365 | +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, | |
7366 | + unsigned long flags) | |
7367 | +{ | |
7368 | + LL_WARN(!lv->nestcnt); | |
7369 | + LL_WARN(lv->owner != current); | |
7370 | + if (--lv->nestcnt) | |
7371 | + return 0; | |
7372 | + | |
7373 | + lv->owner = NULL; | |
7374 | + spin_unlock_irqrestore(&lv->lock, lv->flags); | |
7375 | + return 1; | |
7376 | +} | |
7377 | + | |
7378 | +#define local_unlock_irqrestore(lvar, flags) \ | |
7379 | + do { \ | |
7380 | + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \ | |
7381 | + put_local_var(lvar); \ | |
7382 | + } while (0) | |
7383 | + | |
7384 | +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ | |
7385 | + do { \ | |
7386 | + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ | |
7387 | + } while (0) | |
7388 | + | |
7389 | +#define local_spin_trylock_irq(lvar, lock) \ | |
7390 | + ({ \ | |
7391 | + int __locked; \ | |
7392 | + local_lock_irq(lvar); \ | |
7393 | + __locked = spin_trylock(lock); \ | |
7394 | + if (!__locked) \ | |
7395 | + local_unlock_irq(lvar); \ | |
7396 | + __locked; \ | |
7397 | + }) | |
7398 | + | |
7399 | +#define local_spin_lock_irq(lvar, lock) \ | |
7400 | + do { \ | |
7401 | + local_lock_irq(lvar); \ | |
7402 | + spin_lock(lock); \ | |
7403 | + } while (0) | |
7404 | + | |
7405 | +#define local_spin_unlock_irq(lvar, lock) \ | |
7406 | + do { \ | |
7407 | + spin_unlock(lock); \ | |
7408 | + local_unlock_irq(lvar); \ | |
7409 | + } while (0) | |
7410 | + | |
7411 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
7412 | + do { \ | |
7413 | + local_lock_irqsave(lvar, flags); \ | |
7414 | + spin_lock(lock); \ | |
7415 | + } while (0) | |
7416 | + | |
7417 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
7418 | + do { \ | |
7419 | + spin_unlock(lock); \ | |
7420 | + local_unlock_irqrestore(lvar, flags); \ | |
7421 | + } while (0) | |
7422 | + | |
7423 | +#define get_locked_var(lvar, var) \ | |
7424 | + (*({ \ | |
7425 | + local_lock(lvar); \ | |
7426 | + this_cpu_ptr(&var); \ | |
7427 | + })) | |
7428 | + | |
7429 | +#define put_locked_var(lvar, var) local_unlock(lvar); | |
7430 | + | |
7431 | +#define local_lock_cpu(lvar) \ | |
7432 | + ({ \ | |
7433 | + local_lock(lvar); \ | |
7434 | + smp_processor_id(); \ | |
7435 | + }) | |
7436 | + | |
7437 | +#define local_unlock_cpu(lvar) local_unlock(lvar) | |
7438 | + | |
7439 | +#else /* PREEMPT_RT_BASE */ | |
7440 | + | |
7441 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar | |
7442 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar | |
7443 | + | |
7444 | +static inline void local_irq_lock_init(int lvar) { } | |
7445 | + | |
7446 | +#define local_lock(lvar) preempt_disable() | |
7447 | +#define local_unlock(lvar) preempt_enable() | |
7448 | +#define local_lock_irq(lvar) local_irq_disable() | |
7449 | +#define local_lock_irq_on(lvar, cpu) local_irq_disable() | |
7450 | +#define local_unlock_irq(lvar) local_irq_enable() | |
7451 | +#define local_unlock_irq_on(lvar, cpu) local_irq_enable() | |
7452 | +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) | |
7453 | +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) | |
7454 | + | |
7455 | +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) | |
7456 | +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) | |
7457 | +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) | |
7458 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
7459 | + spin_lock_irqsave(lock, flags) | |
7460 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
7461 | + spin_unlock_irqrestore(lock, flags) | |
7462 | + | |
7463 | +#define get_locked_var(lvar, var) get_cpu_var(var) | |
7464 | +#define put_locked_var(lvar, var) put_cpu_var(var) | |
7465 | + | |
7466 | +#define local_lock_cpu(lvar) get_cpu() | |
7467 | +#define local_unlock_cpu(lvar) put_cpu() | |
7468 | + | |
7469 | +#endif | |
7470 | + | |
7471 | +#endif | |
7472 | diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h | |
c7c16703 | 7473 | index 08d947fc4c59..705fb564a605 100644 |
1a6e0f06 JK |
7474 | --- a/include/linux/mm_types.h |
7475 | +++ b/include/linux/mm_types.h | |
7476 | @@ -11,6 +11,7 @@ | |
7477 | #include <linux/completion.h> | |
7478 | #include <linux/cpumask.h> | |
7479 | #include <linux/uprobes.h> | |
7480 | +#include <linux/rcupdate.h> | |
7481 | #include <linux/page-flags-layout.h> | |
7482 | #include <linux/workqueue.h> | |
7483 | #include <asm/page.h> | |
c7c16703 | 7484 | @@ -509,6 +510,9 @@ struct mm_struct { |
1a6e0f06 JK |
7485 | bool tlb_flush_pending; |
7486 | #endif | |
7487 | struct uprobes_state uprobes_state; | |
7488 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7489 | + struct rcu_head delayed_drop; | |
7490 | +#endif | |
7491 | #ifdef CONFIG_X86_INTEL_MPX | |
7492 | /* address of the bounds directory */ | |
7493 | void __user *bd_addr; | |
7494 | diff --git a/include/linux/mutex.h b/include/linux/mutex.h | |
7495 | index 2cb7531e7d7a..b3fdfc820216 100644 | |
7496 | --- a/include/linux/mutex.h | |
7497 | +++ b/include/linux/mutex.h | |
7498 | @@ -19,6 +19,17 @@ | |
7499 | #include <asm/processor.h> | |
7500 | #include <linux/osq_lock.h> | |
7501 | ||
7502 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7503 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
7504 | + , .dep_map = { .name = #lockname } | |
7505 | +#else | |
7506 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
7507 | +#endif | |
7508 | + | |
7509 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7510 | +# include <linux/mutex_rt.h> | |
7511 | +#else | |
7512 | + | |
7513 | /* | |
7514 | * Simple, straightforward mutexes with strict semantics: | |
7515 | * | |
7516 | @@ -99,13 +110,6 @@ do { \ | |
7517 | static inline void mutex_destroy(struct mutex *lock) {} | |
7518 | #endif | |
7519 | ||
7520 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7521 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
7522 | - , .dep_map = { .name = #lockname } | |
7523 | -#else | |
7524 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
7525 | -#endif | |
7526 | - | |
7527 | #define __MUTEX_INITIALIZER(lockname) \ | |
7528 | { .count = ATOMIC_INIT(1) \ | |
7529 | , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ | |
7530 | @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); | |
7531 | extern int mutex_trylock(struct mutex *lock); | |
7532 | extern void mutex_unlock(struct mutex *lock); | |
7533 | ||
7534 | +#endif /* !PREEMPT_RT_FULL */ | |
7535 | + | |
7536 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | |
7537 | ||
7538 | #endif /* __LINUX_MUTEX_H */ | |
7539 | diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h | |
7540 | new file mode 100644 | |
7541 | index 000000000000..c38a44b14da5 | |
7542 | --- /dev/null | |
7543 | +++ b/include/linux/mutex_rt.h | |
7544 | @@ -0,0 +1,84 @@ | |
7545 | +#ifndef __LINUX_MUTEX_RT_H | |
7546 | +#define __LINUX_MUTEX_RT_H | |
7547 | + | |
7548 | +#ifndef __LINUX_MUTEX_H | |
7549 | +#error "Please include mutex.h" | |
7550 | +#endif | |
7551 | + | |
7552 | +#include <linux/rtmutex.h> | |
7553 | + | |
7554 | +/* FIXME: Just for __lockfunc */ | |
7555 | +#include <linux/spinlock.h> | |
7556 | + | |
7557 | +struct mutex { | |
7558 | + struct rt_mutex lock; | |
7559 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7560 | + struct lockdep_map dep_map; | |
7561 | +#endif | |
7562 | +}; | |
7563 | + | |
7564 | +#define __MUTEX_INITIALIZER(mutexname) \ | |
7565 | + { \ | |
7566 | + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ | |
7567 | + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ | |
7568 | + } | |
7569 | + | |
7570 | +#define DEFINE_MUTEX(mutexname) \ | |
7571 | + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) | |
7572 | + | |
7573 | +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); | |
7574 | +extern void __lockfunc _mutex_lock(struct mutex *lock); | |
7575 | +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); | |
7576 | +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); | |
7577 | +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); | |
7578 | +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); | |
7579 | +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); | |
7580 | +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); | |
7581 | +extern int __lockfunc _mutex_trylock(struct mutex *lock); | |
7582 | +extern void __lockfunc _mutex_unlock(struct mutex *lock); | |
7583 | + | |
7584 | +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) | |
7585 | +#define mutex_lock(l) _mutex_lock(l) | |
7586 | +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) | |
7587 | +#define mutex_lock_killable(l) _mutex_lock_killable(l) | |
7588 | +#define mutex_trylock(l) _mutex_trylock(l) | |
7589 | +#define mutex_unlock(l) _mutex_unlock(l) | |
7590 | +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) | |
7591 | + | |
7592 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7593 | +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) | |
7594 | +# define mutex_lock_interruptible_nested(l, s) \ | |
7595 | + _mutex_lock_interruptible_nested(l, s) | |
7596 | +# define mutex_lock_killable_nested(l, s) \ | |
7597 | + _mutex_lock_killable_nested(l, s) | |
7598 | + | |
7599 | +# define mutex_lock_nest_lock(lock, nest_lock) \ | |
7600 | +do { \ | |
7601 | + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ | |
7602 | + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ | |
7603 | +} while (0) | |
7604 | + | |
7605 | +#else | |
7606 | +# define mutex_lock_nested(l, s) _mutex_lock(l) | |
7607 | +# define mutex_lock_interruptible_nested(l, s) \ | |
7608 | + _mutex_lock_interruptible(l) | |
7609 | +# define mutex_lock_killable_nested(l, s) \ | |
7610 | + _mutex_lock_killable(l) | |
7611 | +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) | |
7612 | +#endif | |
7613 | + | |
7614 | +# define mutex_init(mutex) \ | |
7615 | +do { \ | |
7616 | + static struct lock_class_key __key; \ | |
7617 | + \ | |
7618 | + rt_mutex_init(&(mutex)->lock); \ | |
7619 | + __mutex_do_init((mutex), #mutex, &__key); \ | |
7620 | +} while (0) | |
7621 | + | |
7622 | +# define __mutex_init(mutex, name, key) \ | |
7623 | +do { \ | |
7624 | + rt_mutex_init(&(mutex)->lock); \ | |
7625 | + __mutex_do_init((mutex), name, key); \ | |
7626 | +} while (0) | |
7627 | + | |
7628 | +#endif | |
7629 | diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h | |
c7c16703 | 7630 | index d83590ef74a1..0ae3b6cf430c 100644 |
1a6e0f06 JK |
7631 | --- a/include/linux/netdevice.h |
7632 | +++ b/include/linux/netdevice.h | |
c7c16703 JK |
7633 | @@ -396,7 +396,19 @@ typedef enum rx_handler_result rx_handler_result_t; |
7634 | typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); | |
7635 | ||
7636 | void __napi_schedule(struct napi_struct *n); | |
7637 | + | |
7638 | +/* | |
7639 | + * When PREEMPT_RT_FULL is defined, all device interrupt handlers | |
7640 | + * run as threads, and they can also be preempted (without PREEMPT_RT | |
7641 | + * interrupt threads can not be preempted). Which means that calling | |
7642 | + * __napi_schedule_irqoff() from an interrupt handler can be preempted | |
7643 | + * and can corrupt the napi->poll_list. | |
7644 | + */ | |
7645 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7646 | +#define __napi_schedule_irqoff(n) __napi_schedule(n) | |
7647 | +#else | |
7648 | void __napi_schedule_irqoff(struct napi_struct *n); | |
7649 | +#endif | |
7650 | ||
7651 | static inline bool napi_disable_pending(struct napi_struct *n) | |
7652 | { | |
7653 | @@ -2461,14 +2473,53 @@ void netdev_freemem(struct net_device *dev); | |
1a6e0f06 JK |
7654 | void synchronize_net(void); |
7655 | int init_dummy_netdev(struct net_device *dev); | |
7656 | ||
7657 | -DECLARE_PER_CPU(int, xmit_recursion); | |
7658 | #define XMIT_RECURSION_LIMIT 10 | |
7659 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7660 | +static inline int dev_recursion_level(void) | |
7661 | +{ | |
7662 | + return current->xmit_recursion; | |
7663 | +} | |
7664 | + | |
7665 | +static inline int xmit_rec_read(void) | |
7666 | +{ | |
7667 | + return current->xmit_recursion; | |
7668 | +} | |
7669 | + | |
7670 | +static inline void xmit_rec_inc(void) | |
7671 | +{ | |
7672 | + current->xmit_recursion++; | |
7673 | +} | |
7674 | + | |
7675 | +static inline void xmit_rec_dec(void) | |
7676 | +{ | |
7677 | + current->xmit_recursion--; | |
7678 | +} | |
7679 | + | |
7680 | +#else | |
7681 | + | |
7682 | +DECLARE_PER_CPU(int, xmit_recursion); | |
7683 | ||
7684 | static inline int dev_recursion_level(void) | |
7685 | { | |
7686 | return this_cpu_read(xmit_recursion); | |
7687 | } | |
7688 | ||
7689 | +static inline int xmit_rec_read(void) | |
7690 | +{ | |
7691 | + return __this_cpu_read(xmit_recursion); | |
7692 | +} | |
7693 | + | |
7694 | +static inline void xmit_rec_inc(void) | |
7695 | +{ | |
7696 | + __this_cpu_inc(xmit_recursion); | |
7697 | +} | |
7698 | + | |
7699 | +static inline void xmit_rec_dec(void) | |
7700 | +{ | |
7701 | + __this_cpu_dec(xmit_recursion); | |
7702 | +} | |
7703 | +#endif | |
7704 | + | |
7705 | struct net_device *dev_get_by_index(struct net *net, int ifindex); | |
7706 | struct net_device *__dev_get_by_index(struct net *net, int ifindex); | |
7707 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); | |
c7c16703 | 7708 | @@ -2851,6 +2902,7 @@ struct softnet_data { |
1a6e0f06 JK |
7709 | unsigned int dropped; |
7710 | struct sk_buff_head input_pkt_queue; | |
7711 | struct napi_struct backlog; | |
7712 | + struct sk_buff_head tofree_queue; | |
7713 | ||
7714 | }; | |
7715 | ||
7716 | diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h | |
7717 | index 2ad1a2b289b5..b4d10155af54 100644 | |
7718 | --- a/include/linux/netfilter/x_tables.h | |
7719 | +++ b/include/linux/netfilter/x_tables.h | |
7720 | @@ -4,6 +4,7 @@ | |
7721 | ||
7722 | #include <linux/netdevice.h> | |
7723 | #include <linux/static_key.h> | |
7724 | +#include <linux/locallock.h> | |
7725 | #include <uapi/linux/netfilter/x_tables.h> | |
7726 | ||
7727 | /* Test a struct->invflags and a boolean for inequality */ | |
7728 | @@ -300,6 +301,8 @@ void xt_free_table_info(struct xt_table_info *info); | |
7729 | */ | |
7730 | DECLARE_PER_CPU(seqcount_t, xt_recseq); | |
7731 | ||
7732 | +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); | |
7733 | + | |
7734 | /* xt_tee_enabled - true if x_tables needs to handle reentrancy | |
7735 | * | |
7736 | * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. | |
7737 | @@ -320,6 +323,9 @@ static inline unsigned int xt_write_recseq_begin(void) | |
7738 | { | |
7739 | unsigned int addend; | |
7740 | ||
7741 | + /* RT protection */ | |
7742 | + local_lock(xt_write_lock); | |
7743 | + | |
7744 | /* | |
7745 | * Low order bit of sequence is set if we already | |
7746 | * called xt_write_recseq_begin(). | |
7747 | @@ -350,6 +356,7 @@ static inline void xt_write_recseq_end(unsigned int addend) | |
7748 | /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ | |
7749 | smp_wmb(); | |
7750 | __this_cpu_add(xt_recseq.sequence, addend); | |
7751 | + local_unlock(xt_write_lock); | |
7752 | } | |
7753 | ||
7754 | /* | |
7755 | diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h | |
7756 | index 810124b33327..d54ca43d571f 100644 | |
7757 | --- a/include/linux/nfs_fs.h | |
7758 | +++ b/include/linux/nfs_fs.h | |
7759 | @@ -165,7 +165,11 @@ struct nfs_inode { | |
7760 | ||
7761 | /* Readers: in-flight sillydelete RPC calls */ | |
7762 | /* Writers: rmdir */ | |
7763 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7764 | + struct semaphore rmdir_sem; | |
7765 | +#else | |
7766 | struct rw_semaphore rmdir_sem; | |
7767 | +#endif | |
7768 | ||
7769 | #if IS_ENABLED(CONFIG_NFS_V4) | |
7770 | struct nfs4_cached_acl *nfs4_acl; | |
7771 | diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h | |
c7c16703 | 7772 | index beb1e10f446e..ebaf2e7bfe29 100644 |
1a6e0f06 JK |
7773 | --- a/include/linux/nfs_xdr.h |
7774 | +++ b/include/linux/nfs_xdr.h | |
c7c16703 | 7775 | @@ -1490,7 +1490,7 @@ struct nfs_unlinkdata { |
1a6e0f06 JK |
7776 | struct nfs_removeargs args; |
7777 | struct nfs_removeres res; | |
7778 | struct dentry *dentry; | |
7779 | - wait_queue_head_t wq; | |
7780 | + struct swait_queue_head wq; | |
7781 | struct rpc_cred *cred; | |
7782 | struct nfs_fattr dir_attr; | |
7783 | long timeout; | |
7784 | diff --git a/include/linux/notifier.h b/include/linux/notifier.h | |
7785 | index 4149868de4e6..babe5b9bcb91 100644 | |
7786 | --- a/include/linux/notifier.h | |
7787 | +++ b/include/linux/notifier.h | |
7788 | @@ -6,7 +6,7 @@ | |
7789 | * | |
7790 | * Alan Cox <Alan.Cox@linux.org> | |
7791 | */ | |
7792 | - | |
7793 | + | |
7794 | #ifndef _LINUX_NOTIFIER_H | |
7795 | #define _LINUX_NOTIFIER_H | |
7796 | #include <linux/errno.h> | |
7797 | @@ -42,9 +42,7 @@ | |
7798 | * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. | |
7799 | * As compensation, srcu_notifier_chain_unregister() is rather expensive. | |
7800 | * SRCU notifier chains should be used when the chain will be called very | |
7801 | - * often but notifier_blocks will seldom be removed. Also, SRCU notifier | |
7802 | - * chains are slightly more difficult to use because they require special | |
7803 | - * runtime initialization. | |
7804 | + * often but notifier_blocks will seldom be removed. | |
7805 | */ | |
7806 | ||
7807 | struct notifier_block; | |
7808 | @@ -90,7 +88,7 @@ struct srcu_notifier_head { | |
7809 | (name)->head = NULL; \ | |
7810 | } while (0) | |
7811 | ||
7812 | -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ | |
7813 | +/* srcu_notifier_heads must be cleaned up dynamically */ | |
7814 | extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
7815 | #define srcu_cleanup_notifier_head(name) \ | |
7816 | cleanup_srcu_struct(&(name)->srcu); | |
7817 | @@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
7818 | .head = NULL } | |
7819 | #define RAW_NOTIFIER_INIT(name) { \ | |
7820 | .head = NULL } | |
7821 | -/* srcu_notifier_heads cannot be initialized statically */ | |
7822 | + | |
7823 | +#define SRCU_NOTIFIER_INIT(name, pcpu) \ | |
7824 | + { \ | |
7825 | + .mutex = __MUTEX_INITIALIZER(name.mutex), \ | |
7826 | + .head = NULL, \ | |
7827 | + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ | |
7828 | + } | |
7829 | ||
7830 | #define ATOMIC_NOTIFIER_HEAD(name) \ | |
7831 | struct atomic_notifier_head name = \ | |
7832 | @@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
7833 | struct raw_notifier_head name = \ | |
7834 | RAW_NOTIFIER_INIT(name) | |
7835 | ||
7836 | +#define _SRCU_NOTIFIER_HEAD(name, mod) \ | |
7837 | + static DEFINE_PER_CPU(struct srcu_struct_array, \ | |
7838 | + name##_head_srcu_array); \ | |
7839 | + mod struct srcu_notifier_head name = \ | |
7840 | + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) | |
7841 | + | |
7842 | +#define SRCU_NOTIFIER_HEAD(name) \ | |
7843 | + _SRCU_NOTIFIER_HEAD(name, ) | |
7844 | + | |
7845 | +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ | |
7846 | + _SRCU_NOTIFIER_HEAD(name, static) | |
7847 | + | |
7848 | #ifdef __KERNEL__ | |
7849 | ||
7850 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, | |
7851 | @@ -184,12 +200,12 @@ static inline int notifier_to_errno(int ret) | |
7852 | ||
7853 | /* | |
7854 | * Declared notifiers so far. I can imagine quite a few more chains | |
7855 | - * over time (eg laptop power reset chains, reboot chain (to clean | |
7856 | + * over time (eg laptop power reset chains, reboot chain (to clean | |
7857 | * device units up), device [un]mount chain, module load/unload chain, | |
7858 | - * low memory chain, screenblank chain (for plug in modular screenblankers) | |
7859 | + * low memory chain, screenblank chain (for plug in modular screenblankers) | |
7860 | * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... | |
7861 | */ | |
7862 | - | |
7863 | + | |
7864 | /* CPU notfiers are defined in include/linux/cpu.h. */ | |
7865 | ||
7866 | /* netdevice notifiers are defined in include/linux/netdevice.h */ | |
c7c16703 JK |
7867 | diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h |
7868 | index 5b2e6159b744..ea940f451606 100644 | |
7869 | --- a/include/linux/percpu-rwsem.h | |
7870 | +++ b/include/linux/percpu-rwsem.h | |
7871 | @@ -4,7 +4,7 @@ | |
7872 | #include <linux/atomic.h> | |
7873 | #include <linux/rwsem.h> | |
7874 | #include <linux/percpu.h> | |
7875 | -#include <linux/wait.h> | |
7876 | +#include <linux/swait.h> | |
7877 | #include <linux/rcu_sync.h> | |
7878 | #include <linux/lockdep.h> | |
7879 | ||
7880 | @@ -12,7 +12,7 @@ struct percpu_rw_semaphore { | |
7881 | struct rcu_sync rss; | |
7882 | unsigned int __percpu *read_count; | |
7883 | struct rw_semaphore rw_sem; | |
7884 | - wait_queue_head_t writer; | |
7885 | + struct swait_queue_head writer; | |
7886 | int readers_block; | |
7887 | }; | |
7888 | ||
7889 | @@ -22,13 +22,13 @@ static struct percpu_rw_semaphore name = { \ | |
7890 | .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ | |
7891 | .read_count = &__percpu_rwsem_rc_##name, \ | |
7892 | .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ | |
7893 | - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ | |
7894 | + .writer = __SWAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ | |
7895 | } | |
7896 | ||
7897 | extern int __percpu_down_read(struct percpu_rw_semaphore *, int); | |
7898 | extern void __percpu_up_read(struct percpu_rw_semaphore *); | |
7899 | ||
7900 | -static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) | |
7901 | +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) | |
7902 | { | |
7903 | might_sleep(); | |
7904 | ||
7905 | @@ -46,16 +46,10 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * | |
7906 | __this_cpu_inc(*sem->read_count); | |
7907 | if (unlikely(!rcu_sync_is_idle(&sem->rss))) | |
7908 | __percpu_down_read(sem, false); /* Unconditional memory barrier */ | |
7909 | - barrier(); | |
7910 | /* | |
7911 | - * The barrier() prevents the compiler from | |
7912 | + * The preempt_enable() prevents the compiler from | |
7913 | * bleeding the critical section out. | |
7914 | */ | |
7915 | -} | |
7916 | - | |
7917 | -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) | |
7918 | -{ | |
7919 | - percpu_down_read_preempt_disable(sem); | |
7920 | preempt_enable(); | |
7921 | } | |
7922 | ||
7923 | @@ -82,13 +76,9 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) | |
7924 | return ret; | |
7925 | } | |
7926 | ||
7927 | -static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) | |
7928 | +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) | |
7929 | { | |
7930 | - /* | |
7931 | - * The barrier() prevents the compiler from | |
7932 | - * bleeding the critical section out. | |
7933 | - */ | |
7934 | - barrier(); | |
7935 | + preempt_disable(); | |
7936 | /* | |
7937 | * Same as in percpu_down_read(). | |
7938 | */ | |
7939 | @@ -101,12 +91,6 @@ static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem | |
7940 | rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); | |
7941 | } | |
7942 | ||
7943 | -static inline void percpu_up_read(struct percpu_rw_semaphore *sem) | |
7944 | -{ | |
7945 | - preempt_disable(); | |
7946 | - percpu_up_read_preempt_enable(sem); | |
7947 | -} | |
7948 | - | |
7949 | extern void percpu_down_write(struct percpu_rw_semaphore *); | |
7950 | extern void percpu_up_write(struct percpu_rw_semaphore *); | |
7951 | ||
1a6e0f06 JK |
7952 | diff --git a/include/linux/percpu.h b/include/linux/percpu.h |
7953 | index 56939d3f6e53..1c7e33fc83e4 100644 | |
7954 | --- a/include/linux/percpu.h | |
7955 | +++ b/include/linux/percpu.h | |
7956 | @@ -18,6 +18,35 @@ | |
7957 | #define PERCPU_MODULE_RESERVE 0 | |
7958 | #endif | |
7959 | ||
7960 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7961 | + | |
7962 | +#define get_local_var(var) (*({ \ | |
7963 | + migrate_disable(); \ | |
7964 | + this_cpu_ptr(&var); })) | |
7965 | + | |
7966 | +#define put_local_var(var) do { \ | |
7967 | + (void)&(var); \ | |
7968 | + migrate_enable(); \ | |
7969 | +} while (0) | |
7970 | + | |
7971 | +# define get_local_ptr(var) ({ \ | |
7972 | + migrate_disable(); \ | |
7973 | + this_cpu_ptr(var); }) | |
7974 | + | |
7975 | +# define put_local_ptr(var) do { \ | |
7976 | + (void)(var); \ | |
7977 | + migrate_enable(); \ | |
7978 | +} while (0) | |
7979 | + | |
7980 | +#else | |
7981 | + | |
7982 | +#define get_local_var(var) get_cpu_var(var) | |
7983 | +#define put_local_var(var) put_cpu_var(var) | |
7984 | +#define get_local_ptr(var) get_cpu_ptr(var) | |
7985 | +#define put_local_ptr(var) put_cpu_ptr(var) | |
7986 | + | |
7987 | +#endif | |
7988 | + | |
7989 | /* minimum unit size, also is the maximum supported allocation size */ | |
7990 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) | |
7991 | ||
7992 | diff --git a/include/linux/pid.h b/include/linux/pid.h | |
7993 | index 23705a53abba..2cc64b779f03 100644 | |
7994 | --- a/include/linux/pid.h | |
7995 | +++ b/include/linux/pid.h | |
7996 | @@ -2,6 +2,7 @@ | |
7997 | #define _LINUX_PID_H | |
7998 | ||
7999 | #include <linux/rcupdate.h> | |
8000 | +#include <linux/atomic.h> | |
8001 | ||
8002 | enum pid_type | |
8003 | { | |
8004 | diff --git a/include/linux/preempt.h b/include/linux/preempt.h | |
8005 | index 75e4e30677f1..1cfb1cb72354 100644 | |
8006 | --- a/include/linux/preempt.h | |
8007 | +++ b/include/linux/preempt.h | |
8008 | @@ -50,7 +50,11 @@ | |
8009 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | |
8010 | #define NMI_OFFSET (1UL << NMI_SHIFT) | |
8011 | ||
8012 | -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
8013 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8014 | +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
8015 | +#else | |
8016 | +# define SOFTIRQ_DISABLE_OFFSET (0) | |
8017 | +#endif | |
8018 | ||
8019 | /* We use the MSB mostly because its available */ | |
8020 | #define PREEMPT_NEED_RESCHED 0x80000000 | |
8021 | @@ -59,9 +63,15 @@ | |
8022 | #include <asm/preempt.h> | |
8023 | ||
8024 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | |
8025 | -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
8026 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | |
8027 | | NMI_MASK)) | |
8028 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8029 | +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
8030 | +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
8031 | +#else | |
8032 | +# define softirq_count() (0UL) | |
8033 | +extern int in_serving_softirq(void); | |
8034 | +#endif | |
8035 | ||
8036 | /* | |
8037 | * Are we doing bottom half or hardware interrupt processing? | |
8038 | @@ -72,7 +82,6 @@ | |
8039 | #define in_irq() (hardirq_count()) | |
8040 | #define in_softirq() (softirq_count()) | |
8041 | #define in_interrupt() (irq_count()) | |
8042 | -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
8043 | ||
8044 | /* | |
8045 | * Are we in NMI context? | |
8046 | @@ -91,7 +100,11 @@ | |
8047 | /* | |
8048 | * The preempt_count offset after spin_lock() | |
8049 | */ | |
8050 | +#if !defined(CONFIG_PREEMPT_RT_FULL) | |
8051 | #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET | |
8052 | +#else | |
8053 | +#define PREEMPT_LOCK_OFFSET 0 | |
8054 | +#endif | |
8055 | ||
8056 | /* | |
8057 | * The preempt_count offset needed for things like: | |
8058 | @@ -140,6 +153,20 @@ extern void preempt_count_sub(int val); | |
8059 | #define preempt_count_inc() preempt_count_add(1) | |
8060 | #define preempt_count_dec() preempt_count_sub(1) | |
8061 | ||
8062 | +#ifdef CONFIG_PREEMPT_LAZY | |
8063 | +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) | |
8064 | +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) | |
8065 | +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) | |
8066 | +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) | |
8067 | +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) | |
8068 | +#else | |
8069 | +#define add_preempt_lazy_count(val) do { } while (0) | |
8070 | +#define sub_preempt_lazy_count(val) do { } while (0) | |
8071 | +#define inc_preempt_lazy_count() do { } while (0) | |
8072 | +#define dec_preempt_lazy_count() do { } while (0) | |
8073 | +#define preempt_lazy_count() (0) | |
8074 | +#endif | |
8075 | + | |
8076 | #ifdef CONFIG_PREEMPT_COUNT | |
8077 | ||
8078 | #define preempt_disable() \ | |
8079 | @@ -148,13 +175,25 @@ do { \ | |
8080 | barrier(); \ | |
8081 | } while (0) | |
8082 | ||
8083 | +#define preempt_lazy_disable() \ | |
8084 | +do { \ | |
8085 | + inc_preempt_lazy_count(); \ | |
8086 | + barrier(); \ | |
8087 | +} while (0) | |
8088 | + | |
8089 | #define sched_preempt_enable_no_resched() \ | |
8090 | do { \ | |
8091 | barrier(); \ | |
8092 | preempt_count_dec(); \ | |
8093 | } while (0) | |
8094 | ||
8095 | -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
8096 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8097 | +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
8098 | +# define preempt_check_resched_rt() preempt_check_resched() | |
8099 | +#else | |
8100 | +# define preempt_enable_no_resched() preempt_enable() | |
8101 | +# define preempt_check_resched_rt() barrier(); | |
8102 | +#endif | |
8103 | ||
8104 | #define preemptible() (preempt_count() == 0 && !irqs_disabled()) | |
8105 | ||
8106 | @@ -179,6 +218,13 @@ do { \ | |
8107 | __preempt_schedule(); \ | |
8108 | } while (0) | |
8109 | ||
8110 | +#define preempt_lazy_enable() \ | |
8111 | +do { \ | |
8112 | + dec_preempt_lazy_count(); \ | |
8113 | + barrier(); \ | |
8114 | + preempt_check_resched(); \ | |
8115 | +} while (0) | |
8116 | + | |
8117 | #else /* !CONFIG_PREEMPT */ | |
8118 | #define preempt_enable() \ | |
8119 | do { \ | |
8120 | @@ -224,6 +270,7 @@ do { \ | |
8121 | #define preempt_disable_notrace() barrier() | |
8122 | #define preempt_enable_no_resched_notrace() barrier() | |
8123 | #define preempt_enable_notrace() barrier() | |
8124 | +#define preempt_check_resched_rt() barrier() | |
8125 | #define preemptible() 0 | |
8126 | ||
8127 | #endif /* CONFIG_PREEMPT_COUNT */ | |
8128 | @@ -244,10 +291,31 @@ do { \ | |
8129 | } while (0) | |
8130 | #define preempt_fold_need_resched() \ | |
8131 | do { \ | |
8132 | - if (tif_need_resched()) \ | |
8133 | + if (tif_need_resched_now()) \ | |
8134 | set_preempt_need_resched(); \ | |
8135 | } while (0) | |
8136 | ||
8137 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8138 | +# define preempt_disable_rt() preempt_disable() | |
8139 | +# define preempt_enable_rt() preempt_enable() | |
8140 | +# define preempt_disable_nort() barrier() | |
8141 | +# define preempt_enable_nort() barrier() | |
8142 | +# ifdef CONFIG_SMP | |
8143 | + extern void migrate_disable(void); | |
8144 | + extern void migrate_enable(void); | |
8145 | +# else /* CONFIG_SMP */ | |
8146 | +# define migrate_disable() barrier() | |
8147 | +# define migrate_enable() barrier() | |
8148 | +# endif /* CONFIG_SMP */ | |
8149 | +#else | |
8150 | +# define preempt_disable_rt() barrier() | |
8151 | +# define preempt_enable_rt() barrier() | |
8152 | +# define preempt_disable_nort() preempt_disable() | |
8153 | +# define preempt_enable_nort() preempt_enable() | |
8154 | +# define migrate_disable() preempt_disable() | |
8155 | +# define migrate_enable() preempt_enable() | |
8156 | +#endif | |
8157 | + | |
8158 | #ifdef CONFIG_PREEMPT_NOTIFIERS | |
8159 | ||
8160 | struct preempt_notifier; | |
8161 | diff --git a/include/linux/printk.h b/include/linux/printk.h | |
c7c16703 | 8162 | index eac1af8502bb..37e647af0b0b 100644 |
1a6e0f06 JK |
8163 | --- a/include/linux/printk.h |
8164 | +++ b/include/linux/printk.h | |
c7c16703 | 8165 | @@ -126,9 +126,11 @@ struct va_format { |
1a6e0f06 JK |
8166 | #ifdef CONFIG_EARLY_PRINTK |
8167 | extern asmlinkage __printf(1, 2) | |
8168 | void early_printk(const char *fmt, ...); | |
8169 | +extern void printk_kill(void); | |
8170 | #else | |
8171 | static inline __printf(1, 2) __cold | |
8172 | void early_printk(const char *s, ...) { } | |
8173 | +static inline void printk_kill(void) { } | |
8174 | #endif | |
8175 | ||
8176 | #ifdef CONFIG_PRINTK_NMI | |
8177 | diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h | |
c7c16703 | 8178 | index af3581b8a451..f87f87dec84c 100644 |
1a6e0f06 JK |
8179 | --- a/include/linux/radix-tree.h |
8180 | +++ b/include/linux/radix-tree.h | |
8181 | @@ -289,9 +289,19 @@ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, | |
8182 | unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, | |
8183 | void ***results, unsigned long *indices, | |
8184 | unsigned long first_index, unsigned int max_items); | |
8185 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8186 | +static inline int radix_tree_preload(gfp_t gm) { return 0; } | |
8187 | +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; } | |
8188 | +static inline int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |
8189 | +{ | |
8190 | + return 0; | |
8191 | +}; | |
8192 | + | |
8193 | +#else | |
8194 | int radix_tree_preload(gfp_t gfp_mask); | |
8195 | int radix_tree_maybe_preload(gfp_t gfp_mask); | |
8196 | int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); | |
8197 | +#endif | |
8198 | void radix_tree_init(void); | |
8199 | void *radix_tree_tag_set(struct radix_tree_root *root, | |
8200 | unsigned long index, unsigned int tag); | |
8201 | @@ -316,7 +326,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); | |
8202 | ||
8203 | static inline void radix_tree_preload_end(void) | |
8204 | { | |
8205 | - preempt_enable(); | |
8206 | + preempt_enable_nort(); | |
8207 | } | |
8208 | ||
8209 | /** | |
8210 | diff --git a/include/linux/random.h b/include/linux/random.h | |
c7c16703 | 8211 | index 7bd2403e4fef..b2df7148a42b 100644 |
1a6e0f06 JK |
8212 | --- a/include/linux/random.h |
8213 | +++ b/include/linux/random.h | |
c7c16703 JK |
8214 | @@ -31,7 +31,7 @@ static inline void add_latent_entropy(void) {} |
8215 | ||
1a6e0f06 | 8216 | extern void add_input_randomness(unsigned int type, unsigned int code, |
c7c16703 JK |
8217 | unsigned int value) __latent_entropy; |
8218 | -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; | |
8219 | +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy; | |
1a6e0f06 JK |
8220 | |
8221 | extern void get_random_bytes(void *buf, int nbytes); | |
8222 | extern int add_random_ready_callback(struct random_ready_callback *rdy); | |
8223 | diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h | |
8224 | index e585018498d5..25c64474fc27 100644 | |
8225 | --- a/include/linux/rbtree.h | |
8226 | +++ b/include/linux/rbtree.h | |
8227 | @@ -31,7 +31,7 @@ | |
8228 | ||
8229 | #include <linux/kernel.h> | |
8230 | #include <linux/stddef.h> | |
8231 | -#include <linux/rcupdate.h> | |
8232 | +#include <linux/rcu_assign_pointer.h> | |
8233 | ||
8234 | struct rb_node { | |
8235 | unsigned long __rb_parent_color; | |
8236 | diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h | |
8237 | index d076183e49be..36bfb4dd57ae 100644 | |
8238 | --- a/include/linux/rbtree_augmented.h | |
8239 | +++ b/include/linux/rbtree_augmented.h | |
8240 | @@ -26,6 +26,7 @@ | |
8241 | ||
8242 | #include <linux/compiler.h> | |
8243 | #include <linux/rbtree.h> | |
8244 | +#include <linux/rcupdate.h> | |
8245 | ||
8246 | /* | |
8247 | * Please note - only struct rb_augment_callbacks and the prototypes for | |
8248 | diff --git a/include/linux/rcu_assign_pointer.h b/include/linux/rcu_assign_pointer.h | |
8249 | new file mode 100644 | |
8250 | index 000000000000..7066962a4379 | |
8251 | --- /dev/null | |
8252 | +++ b/include/linux/rcu_assign_pointer.h | |
8253 | @@ -0,0 +1,54 @@ | |
8254 | +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__ | |
8255 | +#define __LINUX_RCU_ASSIGN_POINTER_H__ | |
8256 | +#include <linux/compiler.h> | |
8257 | +#include <asm/barrier.h> | |
8258 | + | |
8259 | +/** | |
8260 | + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
8261 | + * @v: The value to statically initialize with. | |
8262 | + */ | |
8263 | +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
8264 | + | |
8265 | +/** | |
8266 | + * rcu_assign_pointer() - assign to RCU-protected pointer | |
8267 | + * @p: pointer to assign to | |
8268 | + * @v: value to assign (publish) | |
8269 | + * | |
8270 | + * Assigns the specified value to the specified RCU-protected | |
8271 | + * pointer, ensuring that any concurrent RCU readers will see | |
8272 | + * any prior initialization. | |
8273 | + * | |
8274 | + * Inserts memory barriers on architectures that require them | |
8275 | + * (which is most of them), and also prevents the compiler from | |
8276 | + * reordering the code that initializes the structure after the pointer | |
8277 | + * assignment. More importantly, this call documents which pointers | |
8278 | + * will be dereferenced by RCU read-side code. | |
8279 | + * | |
8280 | + * In some special cases, you may use RCU_INIT_POINTER() instead | |
8281 | + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
8282 | + * to the fact that it does not constrain either the CPU or the compiler. | |
8283 | + * That said, using RCU_INIT_POINTER() when you should have used | |
8284 | + * rcu_assign_pointer() is a very bad thing that results in | |
8285 | + * impossible-to-diagnose memory corruption. So please be careful. | |
8286 | + * See the RCU_INIT_POINTER() comment header for details. | |
8287 | + * | |
8288 | + * Note that rcu_assign_pointer() evaluates each of its arguments only | |
8289 | + * once, appearances notwithstanding. One of the "extra" evaluations | |
8290 | + * is in typeof() and the other visible only to sparse (__CHECKER__), | |
8291 | + * neither of which actually execute the argument. As with most cpp | |
8292 | + * macros, this execute-arguments-only-once property is important, so | |
8293 | + * please be careful when making changes to rcu_assign_pointer() and the | |
8294 | + * other macros that it invokes. | |
8295 | + */ | |
8296 | +#define rcu_assign_pointer(p, v) \ | |
8297 | +({ \ | |
8298 | + uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
8299 | + \ | |
8300 | + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
8301 | + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
8302 | + else \ | |
8303 | + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
8304 | + _r_a_p__v; \ | |
8305 | +}) | |
8306 | + | |
8307 | +#endif | |
8308 | diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h | |
c7c16703 | 8309 | index 321f9ed552a9..a52a110bf815 100644 |
1a6e0f06 JK |
8310 | --- a/include/linux/rcupdate.h |
8311 | +++ b/include/linux/rcupdate.h | |
8312 | @@ -46,6 +46,7 @@ | |
8313 | #include <linux/compiler.h> | |
8314 | #include <linux/ktime.h> | |
8315 | #include <linux/irqflags.h> | |
8316 | +#include <linux/rcu_assign_pointer.h> | |
8317 | ||
8318 | #include <asm/barrier.h> | |
8319 | ||
8320 | @@ -178,6 +179,9 @@ void call_rcu(struct rcu_head *head, | |
8321 | ||
8322 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
8323 | ||
8324 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8325 | +#define call_rcu_bh call_rcu | |
8326 | +#else | |
8327 | /** | |
8328 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | |
8329 | * @head: structure to be used for queueing the RCU updates. | |
8330 | @@ -201,6 +205,7 @@ void call_rcu(struct rcu_head *head, | |
8331 | */ | |
8332 | void call_rcu_bh(struct rcu_head *head, | |
8333 | rcu_callback_t func); | |
8334 | +#endif | |
8335 | ||
8336 | /** | |
8337 | * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | |
8338 | @@ -301,6 +306,11 @@ void synchronize_rcu(void); | |
8339 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | |
8340 | */ | |
8341 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
8342 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8343 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
8344 | +#else | |
8345 | +static inline int sched_rcu_preempt_depth(void) { return 0; } | |
8346 | +#endif | |
8347 | ||
8348 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | |
8349 | ||
8350 | @@ -326,6 +336,8 @@ static inline int rcu_preempt_depth(void) | |
8351 | return 0; | |
8352 | } | |
8353 | ||
8354 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
8355 | + | |
8356 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
8357 | ||
8358 | /* Internal to kernel */ | |
c7c16703 | 8359 | @@ -501,7 +513,14 @@ extern struct lockdep_map rcu_callback_map; |
1a6e0f06 JK |
8360 | int debug_lockdep_rcu_enabled(void); |
8361 | ||
8362 | int rcu_read_lock_held(void); | |
8363 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8364 | +static inline int rcu_read_lock_bh_held(void) | |
8365 | +{ | |
8366 | + return rcu_read_lock_held(); | |
8367 | +} | |
8368 | +#else | |
8369 | int rcu_read_lock_bh_held(void); | |
8370 | +#endif | |
8371 | ||
8372 | /** | |
8373 | * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? | |
c7c16703 | 8374 | @@ -622,54 +641,6 @@ static inline void rcu_preempt_sleep_check(void) |
1a6e0f06 JK |
8375 | }) |
8376 | ||
8377 | /** | |
8378 | - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
8379 | - * @v: The value to statically initialize with. | |
8380 | - */ | |
8381 | -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
8382 | - | |
8383 | -/** | |
8384 | - * rcu_assign_pointer() - assign to RCU-protected pointer | |
8385 | - * @p: pointer to assign to | |
8386 | - * @v: value to assign (publish) | |
8387 | - * | |
8388 | - * Assigns the specified value to the specified RCU-protected | |
8389 | - * pointer, ensuring that any concurrent RCU readers will see | |
8390 | - * any prior initialization. | |
8391 | - * | |
8392 | - * Inserts memory barriers on architectures that require them | |
8393 | - * (which is most of them), and also prevents the compiler from | |
8394 | - * reordering the code that initializes the structure after the pointer | |
8395 | - * assignment. More importantly, this call documents which pointers | |
8396 | - * will be dereferenced by RCU read-side code. | |
8397 | - * | |
8398 | - * In some special cases, you may use RCU_INIT_POINTER() instead | |
8399 | - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
8400 | - * to the fact that it does not constrain either the CPU or the compiler. | |
8401 | - * That said, using RCU_INIT_POINTER() when you should have used | |
8402 | - * rcu_assign_pointer() is a very bad thing that results in | |
8403 | - * impossible-to-diagnose memory corruption. So please be careful. | |
8404 | - * See the RCU_INIT_POINTER() comment header for details. | |
8405 | - * | |
8406 | - * Note that rcu_assign_pointer() evaluates each of its arguments only | |
8407 | - * once, appearances notwithstanding. One of the "extra" evaluations | |
8408 | - * is in typeof() and the other visible only to sparse (__CHECKER__), | |
8409 | - * neither of which actually execute the argument. As with most cpp | |
8410 | - * macros, this execute-arguments-only-once property is important, so | |
8411 | - * please be careful when making changes to rcu_assign_pointer() and the | |
8412 | - * other macros that it invokes. | |
8413 | - */ | |
8414 | -#define rcu_assign_pointer(p, v) \ | |
8415 | -({ \ | |
8416 | - uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
8417 | - \ | |
8418 | - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
8419 | - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
8420 | - else \ | |
8421 | - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
8422 | - _r_a_p__v; \ | |
8423 | -}) | |
8424 | - | |
8425 | -/** | |
8426 | * rcu_access_pointer() - fetch RCU pointer with no dereferencing | |
8427 | * @p: The pointer to read | |
8428 | * | |
c7c16703 | 8429 | @@ -947,10 +918,14 @@ static inline void rcu_read_unlock(void) |
1a6e0f06 JK |
8430 | static inline void rcu_read_lock_bh(void) |
8431 | { | |
8432 | local_bh_disable(); | |
8433 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8434 | + rcu_read_lock(); | |
8435 | +#else | |
8436 | __acquire(RCU_BH); | |
8437 | rcu_lock_acquire(&rcu_bh_lock_map); | |
8438 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
8439 | "rcu_read_lock_bh() used illegally while idle"); | |
8440 | +#endif | |
8441 | } | |
8442 | ||
8443 | /* | |
c7c16703 | 8444 | @@ -960,10 +935,14 @@ static inline void rcu_read_lock_bh(void) |
1a6e0f06 JK |
8445 | */ |
8446 | static inline void rcu_read_unlock_bh(void) | |
8447 | { | |
8448 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8449 | + rcu_read_unlock(); | |
8450 | +#else | |
8451 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
8452 | "rcu_read_unlock_bh() used illegally while idle"); | |
8453 | rcu_lock_release(&rcu_bh_lock_map); | |
8454 | __release(RCU_BH); | |
8455 | +#endif | |
8456 | local_bh_enable(); | |
8457 | } | |
8458 | ||
8459 | diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h | |
8460 | index 63a4e4cf40a5..08ab12df2863 100644 | |
8461 | --- a/include/linux/rcutree.h | |
8462 | +++ b/include/linux/rcutree.h | |
8463 | @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu) | |
8464 | rcu_note_context_switch(); | |
8465 | } | |
8466 | ||
8467 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8468 | +# define synchronize_rcu_bh synchronize_rcu | |
8469 | +#else | |
8470 | void synchronize_rcu_bh(void); | |
8471 | +#endif | |
8472 | void synchronize_sched_expedited(void); | |
8473 | void synchronize_rcu_expedited(void); | |
8474 | ||
8475 | @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void) | |
8476 | } | |
8477 | ||
8478 | void rcu_barrier(void); | |
8479 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8480 | +# define rcu_barrier_bh rcu_barrier | |
8481 | +#else | |
8482 | void rcu_barrier_bh(void); | |
8483 | +#endif | |
8484 | void rcu_barrier_sched(void); | |
8485 | unsigned long get_state_synchronize_rcu(void); | |
8486 | void cond_synchronize_rcu(unsigned long oldstate); | |
8487 | @@ -82,17 +90,14 @@ void cond_synchronize_sched(unsigned long oldstate); | |
8488 | extern unsigned long rcutorture_testseq; | |
8489 | extern unsigned long rcutorture_vernum; | |
8490 | unsigned long rcu_batches_started(void); | |
8491 | -unsigned long rcu_batches_started_bh(void); | |
8492 | unsigned long rcu_batches_started_sched(void); | |
8493 | unsigned long rcu_batches_completed(void); | |
8494 | -unsigned long rcu_batches_completed_bh(void); | |
8495 | unsigned long rcu_batches_completed_sched(void); | |
8496 | unsigned long rcu_exp_batches_completed(void); | |
8497 | unsigned long rcu_exp_batches_completed_sched(void); | |
8498 | void show_rcu_gp_kthreads(void); | |
8499 | ||
8500 | void rcu_force_quiescent_state(void); | |
8501 | -void rcu_bh_force_quiescent_state(void); | |
8502 | void rcu_sched_force_quiescent_state(void); | |
8503 | ||
8504 | void rcu_idle_enter(void); | |
8505 | @@ -109,6 +114,16 @@ extern int rcu_scheduler_active __read_mostly; | |
8506 | ||
8507 | bool rcu_is_watching(void); | |
8508 | ||
8509 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8510 | +void rcu_bh_force_quiescent_state(void); | |
8511 | +unsigned long rcu_batches_started_bh(void); | |
8512 | +unsigned long rcu_batches_completed_bh(void); | |
8513 | +#else | |
8514 | +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state | |
8515 | +# define rcu_batches_completed_bh rcu_batches_completed | |
8516 | +# define rcu_batches_started_bh rcu_batches_completed | |
8517 | +#endif | |
8518 | + | |
8519 | void rcu_all_qs(void); | |
8520 | ||
8521 | /* RCUtree hotplug events */ | |
8522 | diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h | |
8523 | index 1abba5ce2a2f..30211c627511 100644 | |
8524 | --- a/include/linux/rtmutex.h | |
8525 | +++ b/include/linux/rtmutex.h | |
8526 | @@ -13,11 +13,15 @@ | |
8527 | #define __LINUX_RT_MUTEX_H | |
8528 | ||
8529 | #include <linux/linkage.h> | |
8530 | +#include <linux/spinlock_types_raw.h> | |
8531 | #include <linux/rbtree.h> | |
8532 | -#include <linux/spinlock_types.h> | |
8533 | ||
8534 | extern int max_lock_depth; /* for sysctl */ | |
8535 | ||
8536 | +#ifdef CONFIG_DEBUG_MUTEXES | |
8537 | +#include <linux/debug_locks.h> | |
8538 | +#endif | |
8539 | + | |
8540 | /** | |
8541 | * The rt_mutex structure | |
8542 | * | |
8543 | @@ -31,8 +35,8 @@ struct rt_mutex { | |
8544 | struct rb_root waiters; | |
8545 | struct rb_node *waiters_leftmost; | |
8546 | struct task_struct *owner; | |
8547 | -#ifdef CONFIG_DEBUG_RT_MUTEXES | |
8548 | int save_state; | |
8549 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
8550 | const char *name, *file; | |
8551 | int line; | |
8552 | void *magic; | |
8553 | @@ -55,22 +59,33 @@ struct hrtimer_sleeper; | |
8554 | # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) | |
8555 | #endif | |
8556 | ||
8557 | +# define rt_mutex_init(mutex) \ | |
8558 | + do { \ | |
8559 | + raw_spin_lock_init(&(mutex)->wait_lock); \ | |
8560 | + __rt_mutex_init(mutex, #mutex); \ | |
8561 | + } while (0) | |
8562 | + | |
8563 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
8564 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ | |
8565 | , .name = #mutexname, .file = __FILE__, .line = __LINE__ | |
8566 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) | |
8567 | extern void rt_mutex_debug_task_free(struct task_struct *tsk); | |
8568 | #else | |
8569 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
8570 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) | |
8571 | # define rt_mutex_debug_task_free(t) do { } while (0) | |
8572 | #endif | |
8573 | ||
8574 | -#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
8575 | - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
8576 | +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
8577 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
8578 | , .waiters = RB_ROOT \ | |
8579 | , .owner = NULL \ | |
8580 | - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} | |
8581 | + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
8582 | + | |
8583 | +#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
8584 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } | |
8585 | + | |
8586 | +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ | |
8587 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
8588 | + , .save_state = 1 } | |
8589 | ||
8590 | #define DEFINE_RT_MUTEX(mutexname) \ | |
8591 | struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) | |
8592 | @@ -91,6 +106,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock); | |
8593 | ||
8594 | extern void rt_mutex_lock(struct rt_mutex *lock); | |
8595 | extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); | |
8596 | +extern int rt_mutex_lock_killable(struct rt_mutex *lock); | |
8597 | extern int rt_mutex_timed_lock(struct rt_mutex *lock, | |
8598 | struct hrtimer_sleeper *timeout); | |
8599 | ||
8600 | diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h | |
8601 | new file mode 100644 | |
8602 | index 000000000000..49ed2d45d3be | |
8603 | --- /dev/null | |
8604 | +++ b/include/linux/rwlock_rt.h | |
8605 | @@ -0,0 +1,99 @@ | |
8606 | +#ifndef __LINUX_RWLOCK_RT_H | |
8607 | +#define __LINUX_RWLOCK_RT_H | |
8608 | + | |
8609 | +#ifndef __LINUX_SPINLOCK_H | |
8610 | +#error Do not include directly. Use spinlock.h | |
8611 | +#endif | |
8612 | + | |
8613 | +#define rwlock_init(rwl) \ | |
8614 | +do { \ | |
8615 | + static struct lock_class_key __key; \ | |
8616 | + \ | |
8617 | + rt_mutex_init(&(rwl)->lock); \ | |
8618 | + __rt_rwlock_init(rwl, #rwl, &__key); \ | |
8619 | +} while (0) | |
8620 | + | |
8621 | +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); | |
8622 | +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); | |
8623 | +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); | |
8624 | +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); | |
8625 | +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); | |
8626 | +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); | |
8627 | +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); | |
8628 | +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); | |
8629 | +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); | |
8630 | +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); | |
8631 | + | |
8632 | +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) | |
8633 | +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) | |
8634 | + | |
8635 | +#define write_trylock_irqsave(lock, flags) \ | |
8636 | + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) | |
8637 | + | |
8638 | +#define read_lock_irqsave(lock, flags) \ | |
8639 | + do { \ | |
8640 | + typecheck(unsigned long, flags); \ | |
8641 | + flags = rt_read_lock_irqsave(lock); \ | |
8642 | + } while (0) | |
8643 | + | |
8644 | +#define write_lock_irqsave(lock, flags) \ | |
8645 | + do { \ | |
8646 | + typecheck(unsigned long, flags); \ | |
8647 | + flags = rt_write_lock_irqsave(lock); \ | |
8648 | + } while (0) | |
8649 | + | |
8650 | +#define read_lock(lock) rt_read_lock(lock) | |
8651 | + | |
8652 | +#define read_lock_bh(lock) \ | |
8653 | + do { \ | |
8654 | + local_bh_disable(); \ | |
8655 | + rt_read_lock(lock); \ | |
8656 | + } while (0) | |
8657 | + | |
8658 | +#define read_lock_irq(lock) read_lock(lock) | |
8659 | + | |
8660 | +#define write_lock(lock) rt_write_lock(lock) | |
8661 | + | |
8662 | +#define write_lock_bh(lock) \ | |
8663 | + do { \ | |
8664 | + local_bh_disable(); \ | |
8665 | + rt_write_lock(lock); \ | |
8666 | + } while (0) | |
8667 | + | |
8668 | +#define write_lock_irq(lock) write_lock(lock) | |
8669 | + | |
8670 | +#define read_unlock(lock) rt_read_unlock(lock) | |
8671 | + | |
8672 | +#define read_unlock_bh(lock) \ | |
8673 | + do { \ | |
8674 | + rt_read_unlock(lock); \ | |
8675 | + local_bh_enable(); \ | |
8676 | + } while (0) | |
8677 | + | |
8678 | +#define read_unlock_irq(lock) read_unlock(lock) | |
8679 | + | |
8680 | +#define write_unlock(lock) rt_write_unlock(lock) | |
8681 | + | |
8682 | +#define write_unlock_bh(lock) \ | |
8683 | + do { \ | |
8684 | + rt_write_unlock(lock); \ | |
8685 | + local_bh_enable(); \ | |
8686 | + } while (0) | |
8687 | + | |
8688 | +#define write_unlock_irq(lock) write_unlock(lock) | |
8689 | + | |
8690 | +#define read_unlock_irqrestore(lock, flags) \ | |
8691 | + do { \ | |
8692 | + typecheck(unsigned long, flags); \ | |
8693 | + (void) flags; \ | |
8694 | + rt_read_unlock(lock); \ | |
8695 | + } while (0) | |
8696 | + | |
8697 | +#define write_unlock_irqrestore(lock, flags) \ | |
8698 | + do { \ | |
8699 | + typecheck(unsigned long, flags); \ | |
8700 | + (void) flags; \ | |
8701 | + rt_write_unlock(lock); \ | |
8702 | + } while (0) | |
8703 | + | |
8704 | +#endif | |
8705 | diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h | |
8706 | index cc0072e93e36..5317cd957292 100644 | |
8707 | --- a/include/linux/rwlock_types.h | |
8708 | +++ b/include/linux/rwlock_types.h | |
8709 | @@ -1,6 +1,10 @@ | |
8710 | #ifndef __LINUX_RWLOCK_TYPES_H | |
8711 | #define __LINUX_RWLOCK_TYPES_H | |
8712 | ||
8713 | +#if !defined(__LINUX_SPINLOCK_TYPES_H) | |
8714 | +# error "Do not include directly, include spinlock_types.h" | |
8715 | +#endif | |
8716 | + | |
8717 | /* | |
8718 | * include/linux/rwlock_types.h - generic rwlock type definitions | |
8719 | * and initializers | |
8720 | diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h | |
8721 | new file mode 100644 | |
8722 | index 000000000000..51b28d775fe1 | |
8723 | --- /dev/null | |
8724 | +++ b/include/linux/rwlock_types_rt.h | |
8725 | @@ -0,0 +1,33 @@ | |
8726 | +#ifndef __LINUX_RWLOCK_TYPES_RT_H | |
8727 | +#define __LINUX_RWLOCK_TYPES_RT_H | |
8728 | + | |
8729 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
8730 | +#error "Do not include directly. Include spinlock_types.h instead" | |
8731 | +#endif | |
8732 | + | |
8733 | +/* | |
8734 | + * rwlocks - rtmutex which allows single reader recursion | |
8735 | + */ | |
8736 | +typedef struct { | |
8737 | + struct rt_mutex lock; | |
8738 | + int read_depth; | |
8739 | + unsigned int break_lock; | |
8740 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8741 | + struct lockdep_map dep_map; | |
8742 | +#endif | |
8743 | +} rwlock_t; | |
8744 | + | |
8745 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8746 | +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
8747 | +#else | |
8748 | +# define RW_DEP_MAP_INIT(lockname) | |
8749 | +#endif | |
8750 | + | |
8751 | +#define __RW_LOCK_UNLOCKED(name) \ | |
8752 | + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ | |
8753 | + RW_DEP_MAP_INIT(name) } | |
8754 | + | |
8755 | +#define DEFINE_RWLOCK(name) \ | |
8756 | + rwlock_t name = __RW_LOCK_UNLOCKED(name) | |
8757 | + | |
8758 | +#endif | |
8759 | diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h | |
8760 | index dd1d14250340..8e1f44ff1f2f 100644 | |
8761 | --- a/include/linux/rwsem.h | |
8762 | +++ b/include/linux/rwsem.h | |
8763 | @@ -19,6 +19,10 @@ | |
8764 | #include <linux/osq_lock.h> | |
8765 | #endif | |
8766 | ||
8767 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8768 | +#include <linux/rwsem_rt.h> | |
8769 | +#else /* PREEMPT_RT_FULL */ | |
8770 | + | |
8771 | struct rw_semaphore; | |
8772 | ||
8773 | #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK | |
8774 | @@ -184,4 +188,6 @@ extern void up_read_non_owner(struct rw_semaphore *sem); | |
8775 | # define up_read_non_owner(sem) up_read(sem) | |
8776 | #endif | |
8777 | ||
8778 | +#endif /* !PREEMPT_RT_FULL */ | |
8779 | + | |
8780 | #endif /* _LINUX_RWSEM_H */ | |
8781 | diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h | |
8782 | new file mode 100644 | |
8783 | index 000000000000..e26bd95a57c3 | |
8784 | --- /dev/null | |
8785 | +++ b/include/linux/rwsem_rt.h | |
8786 | @@ -0,0 +1,167 @@ | |
8787 | +#ifndef _LINUX_RWSEM_RT_H | |
8788 | +#define _LINUX_RWSEM_RT_H | |
8789 | + | |
8790 | +#ifndef _LINUX_RWSEM_H | |
8791 | +#error "Include rwsem.h" | |
8792 | +#endif | |
8793 | + | |
8794 | +/* | |
8795 | + * RW-semaphores are a spinlock plus a reader-depth count. | |
8796 | + * | |
8797 | + * Note that the semantics are different from the usual | |
8798 | + * Linux rw-sems, in PREEMPT_RT mode we do not allow | |
8799 | + * multiple readers to hold the lock at once, we only allow | |
8800 | + * a read-lock owner to read-lock recursively. This is | |
8801 | + * better for latency, makes the implementation inherently | |
8802 | + * fair and makes it simpler as well. | |
8803 | + */ | |
8804 | + | |
8805 | +#include <linux/rtmutex.h> | |
8806 | + | |
8807 | +struct rw_semaphore { | |
8808 | + struct rt_mutex lock; | |
8809 | + int read_depth; | |
8810 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8811 | + struct lockdep_map dep_map; | |
8812 | +#endif | |
8813 | +}; | |
8814 | + | |
8815 | +#define __RWSEM_INITIALIZER(name) \ | |
8816 | + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ | |
8817 | + RW_DEP_MAP_INIT(name) } | |
8818 | + | |
8819 | +#define DECLARE_RWSEM(lockname) \ | |
8820 | + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) | |
8821 | + | |
8822 | +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
8823 | + struct lock_class_key *key); | |
8824 | + | |
8825 | +#define __rt_init_rwsem(sem, name, key) \ | |
8826 | + do { \ | |
8827 | + rt_mutex_init(&(sem)->lock); \ | |
8828 | + __rt_rwsem_init((sem), (name), (key));\ | |
8829 | + } while (0) | |
8830 | + | |
8831 | +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key) | |
8832 | + | |
8833 | +# define rt_init_rwsem(sem) \ | |
8834 | +do { \ | |
8835 | + static struct lock_class_key __key; \ | |
8836 | + \ | |
8837 | + __rt_init_rwsem((sem), #sem, &__key); \ | |
8838 | +} while (0) | |
8839 | + | |
8840 | +extern void rt_down_write(struct rw_semaphore *rwsem); | |
8841 | +extern int rt_down_write_killable(struct rw_semaphore *rwsem); | |
8842 | +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); | |
8843 | +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); | |
8844 | +extern int rt_down_write_killable_nested(struct rw_semaphore *rwsem, | |
8845 | + int subclass); | |
8846 | +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
8847 | + struct lockdep_map *nest); | |
8848 | +extern void rt__down_read(struct rw_semaphore *rwsem); | |
8849 | +extern void rt_down_read(struct rw_semaphore *rwsem); | |
8850 | +extern int rt_down_write_trylock(struct rw_semaphore *rwsem); | |
8851 | +extern int rt__down_read_trylock(struct rw_semaphore *rwsem); | |
8852 | +extern int rt_down_read_trylock(struct rw_semaphore *rwsem); | |
8853 | +extern void __rt_up_read(struct rw_semaphore *rwsem); | |
8854 | +extern void rt_up_read(struct rw_semaphore *rwsem); | |
8855 | +extern void rt_up_write(struct rw_semaphore *rwsem); | |
8856 | +extern void rt_downgrade_write(struct rw_semaphore *rwsem); | |
8857 | + | |
8858 | +#define init_rwsem(sem) rt_init_rwsem(sem) | |
8859 | +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) | |
8860 | + | |
8861 | +static inline int rwsem_is_contended(struct rw_semaphore *sem) | |
8862 | +{ | |
8863 | + /* rt_mutex_has_waiters() */ | |
8864 | + return !RB_EMPTY_ROOT(&sem->lock.waiters); | |
8865 | +} | |
8866 | + | |
8867 | +static inline void __down_read(struct rw_semaphore *sem) | |
8868 | +{ | |
8869 | + rt__down_read(sem); | |
8870 | +} | |
8871 | + | |
8872 | +static inline void down_read(struct rw_semaphore *sem) | |
8873 | +{ | |
8874 | + rt_down_read(sem); | |
8875 | +} | |
8876 | + | |
8877 | +static inline int __down_read_trylock(struct rw_semaphore *sem) | |
8878 | +{ | |
8879 | + return rt__down_read_trylock(sem); | |
8880 | +} | |
8881 | + | |
8882 | +static inline int down_read_trylock(struct rw_semaphore *sem) | |
8883 | +{ | |
8884 | + return rt_down_read_trylock(sem); | |
8885 | +} | |
8886 | + | |
8887 | +static inline void down_write(struct rw_semaphore *sem) | |
8888 | +{ | |
8889 | + rt_down_write(sem); | |
8890 | +} | |
8891 | + | |
8892 | +static inline int down_write_killable(struct rw_semaphore *sem) | |
8893 | +{ | |
8894 | + return rt_down_write_killable(sem); | |
8895 | +} | |
8896 | + | |
8897 | +static inline int down_write_trylock(struct rw_semaphore *sem) | |
8898 | +{ | |
8899 | + return rt_down_write_trylock(sem); | |
8900 | +} | |
8901 | + | |
8902 | +static inline void __up_read(struct rw_semaphore *sem) | |
8903 | +{ | |
8904 | + __rt_up_read(sem); | |
8905 | +} | |
8906 | + | |
8907 | +static inline void up_read(struct rw_semaphore *sem) | |
8908 | +{ | |
8909 | + rt_up_read(sem); | |
8910 | +} | |
8911 | + | |
8912 | +static inline void up_write(struct rw_semaphore *sem) | |
8913 | +{ | |
8914 | + rt_up_write(sem); | |
8915 | +} | |
8916 | + | |
8917 | +static inline void downgrade_write(struct rw_semaphore *sem) | |
8918 | +{ | |
8919 | + rt_downgrade_write(sem); | |
8920 | +} | |
8921 | + | |
8922 | +static inline void down_read_nested(struct rw_semaphore *sem, int subclass) | |
8923 | +{ | |
8924 | + return rt_down_read_nested(sem, subclass); | |
8925 | +} | |
8926 | + | |
8927 | +static inline void down_write_nested(struct rw_semaphore *sem, int subclass) | |
8928 | +{ | |
8929 | + rt_down_write_nested(sem, subclass); | |
8930 | +} | |
8931 | + | |
8932 | +static inline int down_write_killable_nested(struct rw_semaphore *sem, | |
8933 | + int subclass) | |
8934 | +{ | |
8935 | + return rt_down_write_killable_nested(sem, subclass); | |
8936 | +} | |
8937 | + | |
8938 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8939 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
8940 | + struct rw_semaphore *nest_lock) | |
8941 | +{ | |
8942 | + rt_down_write_nested_lock(sem, &nest_lock->dep_map); | |
8943 | +} | |
8944 | + | |
8945 | +#else | |
8946 | + | |
8947 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
8948 | + struct rw_semaphore *nest_lock) | |
8949 | +{ | |
8950 | + rt_down_write_nested_lock(sem, NULL); | |
8951 | +} | |
8952 | +#endif | |
8953 | +#endif | |
8954 | diff --git a/include/linux/sched.h b/include/linux/sched.h | |
c7c16703 | 8955 | index 75d9a57e212e..8cb7df0f56e3 100644 |
1a6e0f06 JK |
8956 | --- a/include/linux/sched.h |
8957 | +++ b/include/linux/sched.h | |
8958 | @@ -26,6 +26,7 @@ struct sched_param { | |
8959 | #include <linux/nodemask.h> | |
8960 | #include <linux/mm_types.h> | |
8961 | #include <linux/preempt.h> | |
8962 | +#include <asm/kmap_types.h> | |
8963 | ||
8964 | #include <asm/page.h> | |
8965 | #include <asm/ptrace.h> | |
8966 | @@ -243,10 +244,7 @@ extern char ___assert_task_state[1 - 2*!!( | |
8967 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ | |
8968 | __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) | |
8969 | ||
8970 | -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) | |
8971 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) | |
8972 | -#define task_is_stopped_or_traced(task) \ | |
8973 | - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | |
8974 | #define task_contributes_to_load(task) \ | |
8975 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | |
8976 | (task->flags & PF_FROZEN) == 0 && \ | |
8977 | @@ -312,6 +310,11 @@ extern char ___assert_task_state[1 - 2*!!( | |
8978 | ||
8979 | #endif | |
8980 | ||
8981 | +#define __set_current_state_no_track(state_value) \ | |
8982 | + do { current->state = (state_value); } while (0) | |
8983 | +#define set_current_state_no_track(state_value) \ | |
8984 | + set_mb(current->state, (state_value)) | |
8985 | + | |
8986 | /* Task command name length */ | |
8987 | #define TASK_COMM_LEN 16 | |
8988 | ||
c7c16703 | 8989 | @@ -1013,8 +1016,18 @@ struct wake_q_head { |
1a6e0f06 JK |
8990 | struct wake_q_head name = { WAKE_Q_TAIL, &name.first } |
8991 | ||
8992 | extern void wake_q_add(struct wake_q_head *head, | |
8993 | - struct task_struct *task); | |
8994 | -extern void wake_up_q(struct wake_q_head *head); | |
8995 | + struct task_struct *task); | |
8996 | +extern void __wake_up_q(struct wake_q_head *head, bool sleeper); | |
8997 | + | |
8998 | +static inline void wake_up_q(struct wake_q_head *head) | |
8999 | +{ | |
9000 | + __wake_up_q(head, false); | |
9001 | +} | |
9002 | + | |
9003 | +static inline void wake_up_q_sleeper(struct wake_q_head *head) | |
9004 | +{ | |
9005 | + __wake_up_q(head, true); | |
9006 | +} | |
9007 | ||
9008 | /* | |
9009 | * sched-domains (multiprocessor balancing) declarations: | |
c7c16703 JK |
9010 | @@ -1481,6 +1494,7 @@ struct task_struct { |
9011 | struct thread_info thread_info; | |
9012 | #endif | |
1a6e0f06 | 9013 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
c7c16703 | 9014 | + volatile long saved_state; /* saved state for "spinlock sleepers" */ |
1a6e0f06 JK |
9015 | void *stack; |
9016 | atomic_t usage; | |
9017 | unsigned int flags; /* per process flags, defined below */ | |
c7c16703 | 9018 | @@ -1520,6 +1534,12 @@ struct task_struct { |
1a6e0f06 JK |
9019 | #endif |
9020 | ||
9021 | unsigned int policy; | |
9022 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9023 | + int migrate_disable; | |
9024 | +# ifdef CONFIG_SCHED_DEBUG | |
9025 | + int migrate_disable_atomic; | |
9026 | +# endif | |
9027 | +#endif | |
9028 | int nr_cpus_allowed; | |
9029 | cpumask_t cpus_allowed; | |
9030 | ||
c7c16703 | 9031 | @@ -1654,6 +1674,9 @@ struct task_struct { |
1a6e0f06 JK |
9032 | |
9033 | struct task_cputime cputime_expires; | |
9034 | struct list_head cpu_timers[3]; | |
9035 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9036 | + struct task_struct *posix_timer_list; | |
9037 | +#endif | |
9038 | ||
9039 | /* process credentials */ | |
c7c16703 JK |
9040 | const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ |
9041 | @@ -1685,10 +1708,15 @@ struct task_struct { | |
1a6e0f06 JK |
9042 | /* signal handlers */ |
9043 | struct signal_struct *signal; | |
9044 | struct sighand_struct *sighand; | |
9045 | + struct sigqueue *sigqueue_cache; | |
9046 | ||
9047 | sigset_t blocked, real_blocked; | |
9048 | sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ | |
9049 | struct sigpending pending; | |
9050 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9051 | + /* TODO: move me into ->restart_block ? */ | |
9052 | + struct siginfo forced_info; | |
9053 | +#endif | |
9054 | ||
9055 | unsigned long sas_ss_sp; | |
9056 | size_t sas_ss_size; | |
c7c16703 | 9057 | @@ -1917,6 +1945,12 @@ struct task_struct { |
1a6e0f06 JK |
9058 | /* bitmask and counter of trace recursion */ |
9059 | unsigned long trace_recursion; | |
9060 | #endif /* CONFIG_TRACING */ | |
9061 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
9062 | + u64 preempt_timestamp_hist; | |
9063 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
9064 | + long timer_offset; | |
9065 | +#endif | |
9066 | +#endif | |
9067 | #ifdef CONFIG_KCOV | |
9068 | /* Coverage collection mode enabled for this task (0 if disabled). */ | |
9069 | enum kcov_mode kcov_mode; | |
c7c16703 | 9070 | @@ -1942,9 +1976,23 @@ struct task_struct { |
1a6e0f06 JK |
9071 | unsigned int sequential_io; |
9072 | unsigned int sequential_io_avg; | |
9073 | #endif | |
9074 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9075 | + struct rcu_head put_rcu; | |
9076 | + int softirq_nestcnt; | |
9077 | + unsigned int softirqs_raised; | |
9078 | +#endif | |
9079 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9080 | +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 | |
9081 | + int kmap_idx; | |
9082 | + pte_t kmap_pte[KM_TYPE_NR]; | |
9083 | +# endif | |
9084 | +#endif | |
9085 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
9086 | unsigned long task_state_change; | |
9087 | #endif | |
9088 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9089 | + int xmit_recursion; | |
9090 | +#endif | |
9091 | int pagefault_disabled; | |
9092 | #ifdef CONFIG_MMU | |
9093 | struct task_struct *oom_reaper_list; | |
c7c16703 JK |
9094 | @@ -1984,14 +2032,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) |
9095 | } | |
1a6e0f06 JK |
9096 | #endif |
9097 | ||
9098 | -/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
9099 | -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | |
9100 | - | |
9101 | -static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
9102 | -{ | |
9103 | - return p->nr_cpus_allowed; | |
9104 | -} | |
9105 | - | |
9106 | #define TNF_MIGRATED 0x01 | |
9107 | #define TNF_NO_GROUP 0x02 | |
9108 | #define TNF_SHARED 0x04 | |
c7c16703 | 9109 | @@ -2207,6 +2247,15 @@ extern struct pid *cad_pid; |
1a6e0f06 JK |
9110 | extern void free_task(struct task_struct *tsk); |
9111 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) | |
9112 | ||
9113 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9114 | +extern void __put_task_struct_cb(struct rcu_head *rhp); | |
9115 | + | |
9116 | +static inline void put_task_struct(struct task_struct *t) | |
9117 | +{ | |
9118 | + if (atomic_dec_and_test(&t->usage)) | |
9119 | + call_rcu(&t->put_rcu, __put_task_struct_cb); | |
9120 | +} | |
9121 | +#else | |
9122 | extern void __put_task_struct(struct task_struct *t); | |
9123 | ||
9124 | static inline void put_task_struct(struct task_struct *t) | |
c7c16703 | 9125 | @@ -2214,6 +2263,7 @@ static inline void put_task_struct(struct task_struct *t) |
1a6e0f06 JK |
9126 | if (atomic_dec_and_test(&t->usage)) |
9127 | __put_task_struct(t); | |
9128 | } | |
9129 | +#endif | |
9130 | ||
9131 | struct task_struct *task_rcu_dereference(struct task_struct **ptask); | |
9132 | struct task_struct *try_get_task_struct(struct task_struct **ptask); | |
c7c16703 | 9133 | @@ -2255,6 +2305,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, |
1a6e0f06 JK |
9134 | /* |
9135 | * Per process flags | |
9136 | */ | |
9137 | +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ | |
9138 | #define PF_EXITING 0x00000004 /* getting shut down */ | |
9139 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | |
9140 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | |
c7c16703 | 9141 | @@ -2423,6 +2474,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, |
1a6e0f06 JK |
9142 | |
9143 | extern int set_cpus_allowed_ptr(struct task_struct *p, | |
9144 | const struct cpumask *new_mask); | |
9145 | +int migrate_me(void); | |
9146 | +void tell_sched_cpu_down_begin(int cpu); | |
9147 | +void tell_sched_cpu_down_done(int cpu); | |
9148 | + | |
9149 | #else | |
9150 | static inline void do_set_cpus_allowed(struct task_struct *p, | |
9151 | const struct cpumask *new_mask) | |
c7c16703 | 9152 | @@ -2435,6 +2490,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, |
1a6e0f06 JK |
9153 | return -EINVAL; |
9154 | return 0; | |
9155 | } | |
9156 | +static inline int migrate_me(void) { return 0; } | |
9157 | +static inline void tell_sched_cpu_down_begin(int cpu) { } | |
9158 | +static inline void tell_sched_cpu_down_done(int cpu) { } | |
9159 | #endif | |
9160 | ||
9161 | #ifdef CONFIG_NO_HZ_COMMON | |
c7c16703 | 9162 | @@ -2673,6 +2731,7 @@ extern void xtime_update(unsigned long ticks); |
1a6e0f06 JK |
9163 | |
9164 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); | |
9165 | extern int wake_up_process(struct task_struct *tsk); | |
9166 | +extern int wake_up_lock_sleeper(struct task_struct * tsk); | |
9167 | extern void wake_up_new_task(struct task_struct *tsk); | |
9168 | #ifdef CONFIG_SMP | |
9169 | extern void kick_process(struct task_struct *tsk); | |
c7c16703 | 9170 | @@ -2881,6 +2940,17 @@ static inline void mmdrop(struct mm_struct *mm) |
1a6e0f06 JK |
9171 | __mmdrop(mm); |
9172 | } | |
9173 | ||
9174 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9175 | +extern void __mmdrop_delayed(struct rcu_head *rhp); | |
9176 | +static inline void mmdrop_delayed(struct mm_struct *mm) | |
9177 | +{ | |
9178 | + if (atomic_dec_and_test(&mm->mm_count)) | |
9179 | + call_rcu(&mm->delayed_drop, __mmdrop_delayed); | |
9180 | +} | |
9181 | +#else | |
9182 | +# define mmdrop_delayed(mm) mmdrop(mm) | |
9183 | +#endif | |
9184 | + | |
c7c16703 | 9185 | static inline void mmdrop_async_fn(struct work_struct *work) |
1a6e0f06 | 9186 | { |
c7c16703 JK |
9187 | struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); |
9188 | @@ -3273,6 +3343,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) | |
1a6e0f06 JK |
9189 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); |
9190 | } | |
9191 | ||
9192 | +#ifdef CONFIG_PREEMPT_LAZY | |
9193 | +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) | |
9194 | +{ | |
9195 | + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
9196 | +} | |
9197 | + | |
9198 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) | |
9199 | +{ | |
9200 | + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
9201 | +} | |
9202 | + | |
9203 | +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) | |
9204 | +{ | |
9205 | + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); | |
9206 | +} | |
9207 | + | |
9208 | +static inline int need_resched_lazy(void) | |
9209 | +{ | |
9210 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
9211 | +} | |
9212 | + | |
9213 | +static inline int need_resched_now(void) | |
9214 | +{ | |
9215 | + return test_thread_flag(TIF_NEED_RESCHED); | |
9216 | +} | |
9217 | + | |
9218 | +#else | |
9219 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } | |
9220 | +static inline int need_resched_lazy(void) { return 0; } | |
9221 | + | |
9222 | +static inline int need_resched_now(void) | |
9223 | +{ | |
9224 | + return test_thread_flag(TIF_NEED_RESCHED); | |
9225 | +} | |
9226 | + | |
9227 | +#endif | |
9228 | + | |
9229 | static inline int restart_syscall(void) | |
9230 | { | |
9231 | set_tsk_thread_flag(current, TIF_SIGPENDING); | |
c7c16703 | 9232 | @@ -3304,6 +3411,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) |
1a6e0f06 JK |
9233 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); |
9234 | } | |
9235 | ||
9236 | +static inline bool __task_is_stopped_or_traced(struct task_struct *task) | |
9237 | +{ | |
9238 | + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) | |
9239 | + return true; | |
9240 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9241 | + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) | |
9242 | + return true; | |
9243 | +#endif | |
9244 | + return false; | |
9245 | +} | |
9246 | + | |
9247 | +static inline bool task_is_stopped_or_traced(struct task_struct *task) | |
9248 | +{ | |
9249 | + bool traced_stopped; | |
9250 | + | |
9251 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9252 | + unsigned long flags; | |
9253 | + | |
9254 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
9255 | + traced_stopped = __task_is_stopped_or_traced(task); | |
9256 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
9257 | +#else | |
9258 | + traced_stopped = __task_is_stopped_or_traced(task); | |
9259 | +#endif | |
9260 | + return traced_stopped; | |
9261 | +} | |
9262 | + | |
9263 | +static inline bool task_is_traced(struct task_struct *task) | |
9264 | +{ | |
9265 | + bool traced = false; | |
9266 | + | |
9267 | + if (task->state & __TASK_TRACED) | |
9268 | + return true; | |
9269 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9270 | + /* in case the task is sleeping on tasklist_lock */ | |
9271 | + raw_spin_lock_irq(&task->pi_lock); | |
9272 | + if (task->state & __TASK_TRACED) | |
9273 | + traced = true; | |
9274 | + else if (task->saved_state & __TASK_TRACED) | |
9275 | + traced = true; | |
9276 | + raw_spin_unlock_irq(&task->pi_lock); | |
9277 | +#endif | |
9278 | + return traced; | |
9279 | +} | |
9280 | + | |
9281 | /* | |
9282 | * cond_resched() and cond_resched_lock(): latency reduction via | |
9283 | * explicit rescheduling in places that are safe. The return | |
c7c16703 | 9284 | @@ -3329,12 +3481,16 @@ extern int __cond_resched_lock(spinlock_t *lock); |
1a6e0f06 JK |
9285 | __cond_resched_lock(lock); \ |
9286 | }) | |
9287 | ||
9288 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9289 | extern int __cond_resched_softirq(void); | |
9290 | ||
9291 | #define cond_resched_softirq() ({ \ | |
9292 | ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ | |
9293 | __cond_resched_softirq(); \ | |
9294 | }) | |
9295 | +#else | |
9296 | +# define cond_resched_softirq() cond_resched() | |
9297 | +#endif | |
9298 | ||
9299 | static inline void cond_resched_rcu(void) | |
9300 | { | |
c7c16703 | 9301 | @@ -3509,6 +3665,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) |
1a6e0f06 JK |
9302 | |
9303 | #endif /* CONFIG_SMP */ | |
9304 | ||
9305 | +static inline int __migrate_disabled(struct task_struct *p) | |
9306 | +{ | |
9307 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9308 | + return p->migrate_disable; | |
9309 | +#else | |
9310 | + return 0; | |
9311 | +#endif | |
9312 | +} | |
9313 | + | |
9314 | +/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
9315 | +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) | |
9316 | +{ | |
9317 | + if (__migrate_disabled(p)) | |
9318 | + return cpumask_of(task_cpu(p)); | |
9319 | + | |
9320 | + return &p->cpus_allowed; | |
9321 | +} | |
9322 | + | |
9323 | +static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
9324 | +{ | |
9325 | + if (__migrate_disabled(p)) | |
9326 | + return 1; | |
9327 | + return p->nr_cpus_allowed; | |
9328 | +} | |
9329 | + | |
9330 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); | |
9331 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); | |
9332 | ||
9333 | diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h | |
9334 | index ead97654c4e9..3d7223ffdd3b 100644 | |
9335 | --- a/include/linux/seqlock.h | |
9336 | +++ b/include/linux/seqlock.h | |
9337 | @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) | |
9338 | return __read_seqcount_retry(s, start); | |
9339 | } | |
9340 | ||
9341 | - | |
9342 | - | |
9343 | -static inline void raw_write_seqcount_begin(seqcount_t *s) | |
9344 | +static inline void __raw_write_seqcount_begin(seqcount_t *s) | |
9345 | { | |
9346 | s->sequence++; | |
9347 | smp_wmb(); | |
9348 | } | |
9349 | ||
9350 | -static inline void raw_write_seqcount_end(seqcount_t *s) | |
9351 | +static inline void raw_write_seqcount_begin(seqcount_t *s) | |
9352 | +{ | |
9353 | + preempt_disable_rt(); | |
9354 | + __raw_write_seqcount_begin(s); | |
9355 | +} | |
9356 | + | |
9357 | +static inline void __raw_write_seqcount_end(seqcount_t *s) | |
9358 | { | |
9359 | smp_wmb(); | |
9360 | s->sequence++; | |
9361 | } | |
9362 | ||
9363 | +static inline void raw_write_seqcount_end(seqcount_t *s) | |
9364 | +{ | |
9365 | + __raw_write_seqcount_end(s); | |
9366 | + preempt_enable_rt(); | |
9367 | +} | |
9368 | + | |
9369 | /** | |
9370 | * raw_write_seqcount_barrier - do a seq write barrier | |
9371 | * @s: pointer to seqcount_t | |
9372 | @@ -428,10 +438,32 @@ typedef struct { | |
9373 | /* | |
9374 | * Read side functions for starting and finalizing a read side section. | |
9375 | */ | |
9376 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9377 | static inline unsigned read_seqbegin(const seqlock_t *sl) | |
9378 | { | |
9379 | return read_seqcount_begin(&sl->seqcount); | |
9380 | } | |
9381 | +#else | |
9382 | +/* | |
9383 | + * Starvation safe read side for RT | |
9384 | + */ | |
9385 | +static inline unsigned read_seqbegin(seqlock_t *sl) | |
9386 | +{ | |
9387 | + unsigned ret; | |
9388 | + | |
9389 | +repeat: | |
9390 | + ret = ACCESS_ONCE(sl->seqcount.sequence); | |
9391 | + if (unlikely(ret & 1)) { | |
9392 | + /* | |
9393 | + * Take the lock and let the writer proceed (i.e. evtl | |
9394 | + * boost it), otherwise we could loop here forever. | |
9395 | + */ | |
9396 | + spin_unlock_wait(&sl->lock); | |
9397 | + goto repeat; | |
9398 | + } | |
9399 | + return ret; | |
9400 | +} | |
9401 | +#endif | |
9402 | ||
9403 | static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
9404 | { | |
9405 | @@ -446,36 +478,45 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
9406 | static inline void write_seqlock(seqlock_t *sl) | |
9407 | { | |
9408 | spin_lock(&sl->lock); | |
9409 | - write_seqcount_begin(&sl->seqcount); | |
9410 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9411 | +} | |
9412 | + | |
9413 | +static inline int try_write_seqlock(seqlock_t *sl) | |
9414 | +{ | |
9415 | + if (spin_trylock(&sl->lock)) { | |
9416 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9417 | + return 1; | |
9418 | + } | |
9419 | + return 0; | |
9420 | } | |
9421 | ||
9422 | static inline void write_sequnlock(seqlock_t *sl) | |
9423 | { | |
9424 | - write_seqcount_end(&sl->seqcount); | |
9425 | + __raw_write_seqcount_end(&sl->seqcount); | |
9426 | spin_unlock(&sl->lock); | |
9427 | } | |
9428 | ||
9429 | static inline void write_seqlock_bh(seqlock_t *sl) | |
9430 | { | |
9431 | spin_lock_bh(&sl->lock); | |
9432 | - write_seqcount_begin(&sl->seqcount); | |
9433 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9434 | } | |
9435 | ||
9436 | static inline void write_sequnlock_bh(seqlock_t *sl) | |
9437 | { | |
9438 | - write_seqcount_end(&sl->seqcount); | |
9439 | + __raw_write_seqcount_end(&sl->seqcount); | |
9440 | spin_unlock_bh(&sl->lock); | |
9441 | } | |
9442 | ||
9443 | static inline void write_seqlock_irq(seqlock_t *sl) | |
9444 | { | |
9445 | spin_lock_irq(&sl->lock); | |
9446 | - write_seqcount_begin(&sl->seqcount); | |
9447 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9448 | } | |
9449 | ||
9450 | static inline void write_sequnlock_irq(seqlock_t *sl) | |
9451 | { | |
9452 | - write_seqcount_end(&sl->seqcount); | |
9453 | + __raw_write_seqcount_end(&sl->seqcount); | |
9454 | spin_unlock_irq(&sl->lock); | |
9455 | } | |
9456 | ||
9457 | @@ -484,7 +525,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
9458 | unsigned long flags; | |
9459 | ||
9460 | spin_lock_irqsave(&sl->lock, flags); | |
9461 | - write_seqcount_begin(&sl->seqcount); | |
9462 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9463 | return flags; | |
9464 | } | |
9465 | ||
9466 | @@ -494,7 +535,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
9467 | static inline void | |
9468 | write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | |
9469 | { | |
9470 | - write_seqcount_end(&sl->seqcount); | |
9471 | + __raw_write_seqcount_end(&sl->seqcount); | |
9472 | spin_unlock_irqrestore(&sl->lock, flags); | |
9473 | } | |
9474 | ||
9475 | diff --git a/include/linux/signal.h b/include/linux/signal.h | |
9476 | index b63f63eaa39c..295540fdfc72 100644 | |
9477 | --- a/include/linux/signal.h | |
9478 | +++ b/include/linux/signal.h | |
9479 | @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig) | |
9480 | } | |
9481 | ||
9482 | extern void flush_sigqueue(struct sigpending *queue); | |
9483 | +extern void flush_task_sigqueue(struct task_struct *tsk); | |
9484 | ||
9485 | /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ | |
9486 | static inline int valid_signal(unsigned long sig) | |
9487 | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h | |
c7c16703 | 9488 | index 32810f279f8e..0db6e31161f6 100644 |
1a6e0f06 JK |
9489 | --- a/include/linux/skbuff.h |
9490 | +++ b/include/linux/skbuff.h | |
9491 | @@ -284,6 +284,7 @@ struct sk_buff_head { | |
9492 | ||
9493 | __u32 qlen; | |
9494 | spinlock_t lock; | |
9495 | + raw_spinlock_t raw_lock; | |
9496 | }; | |
9497 | ||
9498 | struct sk_buff; | |
c7c16703 | 9499 | @@ -1573,6 +1574,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) |
1a6e0f06 JK |
9500 | __skb_queue_head_init(list); |
9501 | } | |
9502 | ||
9503 | +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) | |
9504 | +{ | |
9505 | + raw_spin_lock_init(&list->raw_lock); | |
9506 | + __skb_queue_head_init(list); | |
9507 | +} | |
9508 | + | |
9509 | static inline void skb_queue_head_init_class(struct sk_buff_head *list, | |
9510 | struct lock_class_key *class) | |
9511 | { | |
9512 | diff --git a/include/linux/smp.h b/include/linux/smp.h | |
c7c16703 | 9513 | index 8e0cb7a0f836..b16ca967ad80 100644 |
1a6e0f06 JK |
9514 | --- a/include/linux/smp.h |
9515 | +++ b/include/linux/smp.h | |
9516 | @@ -185,6 +185,9 @@ static inline void smp_init(void) { } | |
9517 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) | |
9518 | #define put_cpu() preempt_enable() | |
9519 | ||
9520 | +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) | |
9521 | +#define put_cpu_light() migrate_enable() | |
9522 | + | |
9523 | /* | |
9524 | * Callback to arch code if there's nosmp or maxcpus=0 on the | |
9525 | * boot command line: | |
9526 | diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h | |
9527 | index 47dd0cebd204..02928fa5499d 100644 | |
9528 | --- a/include/linux/spinlock.h | |
9529 | +++ b/include/linux/spinlock.h | |
9530 | @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
9531 | #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) | |
9532 | ||
9533 | /* Include rwlock functions */ | |
9534 | -#include <linux/rwlock.h> | |
9535 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9536 | +# include <linux/rwlock_rt.h> | |
9537 | +#else | |
9538 | +# include <linux/rwlock.h> | |
9539 | +#endif | |
9540 | ||
9541 | /* | |
9542 | * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: | |
9543 | @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
9544 | # include <linux/spinlock_api_up.h> | |
9545 | #endif | |
9546 | ||
9547 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9548 | +# include <linux/spinlock_rt.h> | |
9549 | +#else /* PREEMPT_RT_FULL */ | |
9550 | + | |
9551 | /* | |
9552 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n | |
9553 | */ | |
9554 | @@ -347,6 +355,12 @@ static __always_inline void spin_unlock(spinlock_t *lock) | |
9555 | raw_spin_unlock(&lock->rlock); | |
9556 | } | |
9557 | ||
9558 | +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock) | |
9559 | +{ | |
9560 | + raw_spin_unlock(&lock->rlock); | |
9561 | + return 0; | |
9562 | +} | |
9563 | + | |
9564 | static __always_inline void spin_unlock_bh(spinlock_t *lock) | |
9565 | { | |
9566 | raw_spin_unlock_bh(&lock->rlock); | |
9567 | @@ -416,4 +430,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); | |
9568 | #define atomic_dec_and_lock(atomic, lock) \ | |
9569 | __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) | |
9570 | ||
9571 | +#endif /* !PREEMPT_RT_FULL */ | |
9572 | + | |
9573 | #endif /* __LINUX_SPINLOCK_H */ | |
9574 | diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h | |
9575 | index 5344268e6e62..043263f30e81 100644 | |
9576 | --- a/include/linux/spinlock_api_smp.h | |
9577 | +++ b/include/linux/spinlock_api_smp.h | |
9578 | @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) | |
9579 | return 0; | |
9580 | } | |
9581 | ||
9582 | -#include <linux/rwlock_api_smp.h> | |
9583 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9584 | +# include <linux/rwlock_api_smp.h> | |
9585 | +#endif | |
9586 | ||
9587 | #endif /* __LINUX_SPINLOCK_API_SMP_H */ | |
9588 | diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h | |
9589 | new file mode 100644 | |
c7c16703 | 9590 | index 000000000000..3534cff3dd08 |
1a6e0f06 JK |
9591 | --- /dev/null |
9592 | +++ b/include/linux/spinlock_rt.h | |
c7c16703 | 9593 | @@ -0,0 +1,164 @@ |
1a6e0f06 JK |
9594 | +#ifndef __LINUX_SPINLOCK_RT_H |
9595 | +#define __LINUX_SPINLOCK_RT_H | |
9596 | + | |
9597 | +#ifndef __LINUX_SPINLOCK_H | |
9598 | +#error Do not include directly. Use spinlock.h | |
9599 | +#endif | |
9600 | + | |
9601 | +#include <linux/bug.h> | |
9602 | + | |
9603 | +extern void | |
9604 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); | |
9605 | + | |
9606 | +#define spin_lock_init(slock) \ | |
9607 | +do { \ | |
9608 | + static struct lock_class_key __key; \ | |
9609 | + \ | |
9610 | + rt_mutex_init(&(slock)->lock); \ | |
9611 | + __rt_spin_lock_init(slock, #slock, &__key); \ | |
9612 | +} while (0) | |
9613 | + | |
9614 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock); | |
9615 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock); | |
9616 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock); | |
9617 | + | |
9618 | +extern void __lockfunc rt_spin_lock(spinlock_t *lock); | |
9619 | +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); | |
9620 | +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); | |
9621 | +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); | |
9622 | +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock); | |
9623 | +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); | |
9624 | +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); | |
9625 | +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); | |
9626 | +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); | |
9627 | +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); | |
9628 | + | |
9629 | +/* | |
9630 | + * lockdep-less calls, for derived types like rwlock: | |
9631 | + * (for trylock they can use rt_mutex_trylock() directly. | |
9632 | + */ | |
9633 | +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); | |
9634 | +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); | |
9635 | +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); | |
1a6e0f06 JK |
9636 | + |
9637 | +#define spin_lock(lock) rt_spin_lock(lock) | |
9638 | + | |
9639 | +#define spin_lock_bh(lock) \ | |
9640 | + do { \ | |
9641 | + local_bh_disable(); \ | |
9642 | + rt_spin_lock(lock); \ | |
9643 | + } while (0) | |
9644 | + | |
9645 | +#define spin_lock_irq(lock) spin_lock(lock) | |
9646 | + | |
9647 | +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) | |
9648 | + | |
9649 | +#define spin_trylock(lock) \ | |
9650 | +({ \ | |
9651 | + int __locked; \ | |
9652 | + __locked = spin_do_trylock(lock); \ | |
9653 | + __locked; \ | |
9654 | +}) | |
9655 | + | |
9656 | +#ifdef CONFIG_LOCKDEP | |
9657 | +# define spin_lock_nested(lock, subclass) \ | |
9658 | + do { \ | |
9659 | + rt_spin_lock_nested(lock, subclass); \ | |
9660 | + } while (0) | |
9661 | + | |
9662 | +#define spin_lock_bh_nested(lock, subclass) \ | |
9663 | + do { \ | |
9664 | + local_bh_disable(); \ | |
9665 | + rt_spin_lock_nested(lock, subclass); \ | |
9666 | + } while (0) | |
9667 | + | |
9668 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
9669 | + do { \ | |
9670 | + typecheck(unsigned long, flags); \ | |
9671 | + flags = 0; \ | |
9672 | + rt_spin_lock_nested(lock, subclass); \ | |
9673 | + } while (0) | |
9674 | +#else | |
9675 | +# define spin_lock_nested(lock, subclass) spin_lock(lock) | |
9676 | +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) | |
9677 | + | |
9678 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
9679 | + do { \ | |
9680 | + typecheck(unsigned long, flags); \ | |
9681 | + flags = 0; \ | |
9682 | + spin_lock(lock); \ | |
9683 | + } while (0) | |
9684 | +#endif | |
9685 | + | |
9686 | +#define spin_lock_irqsave(lock, flags) \ | |
9687 | + do { \ | |
9688 | + typecheck(unsigned long, flags); \ | |
9689 | + flags = 0; \ | |
9690 | + spin_lock(lock); \ | |
9691 | + } while (0) | |
9692 | + | |
9693 | +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) | |
9694 | +{ | |
9695 | + unsigned long flags = 0; | |
9696 | +#ifdef CONFIG_TRACE_IRQFLAGS | |
9697 | + flags = rt_spin_lock_trace_flags(lock); | |
9698 | +#else | |
9699 | + spin_lock(lock); /* lock_local */ | |
9700 | +#endif | |
9701 | + return flags; | |
9702 | +} | |
9703 | + | |
9704 | +/* FIXME: we need rt_spin_lock_nest_lock */ | |
9705 | +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) | |
9706 | + | |
9707 | +#define spin_unlock(lock) rt_spin_unlock(lock) | |
9708 | +#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock) | |
9709 | + | |
9710 | +#define spin_unlock_bh(lock) \ | |
9711 | + do { \ | |
9712 | + rt_spin_unlock(lock); \ | |
9713 | + local_bh_enable(); \ | |
9714 | + } while (0) | |
9715 | + | |
9716 | +#define spin_unlock_irq(lock) spin_unlock(lock) | |
9717 | + | |
9718 | +#define spin_unlock_irqrestore(lock, flags) \ | |
9719 | + do { \ | |
9720 | + typecheck(unsigned long, flags); \ | |
9721 | + (void) flags; \ | |
9722 | + spin_unlock(lock); \ | |
9723 | + } while (0) | |
9724 | + | |
9725 | +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) | |
9726 | +#define spin_trylock_irq(lock) spin_trylock(lock) | |
9727 | + | |
9728 | +#define spin_trylock_irqsave(lock, flags) \ | |
9729 | + rt_spin_trylock_irqsave(lock, &(flags)) | |
9730 | + | |
9731 | +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) | |
9732 | + | |
9733 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
9734 | +# define spin_is_contended(lock) ((lock)->break_lock) | |
9735 | +#else | |
9736 | +# define spin_is_contended(lock) (((void)(lock), 0)) | |
9737 | +#endif | |
9738 | + | |
9739 | +static inline int spin_can_lock(spinlock_t *lock) | |
9740 | +{ | |
9741 | + return !rt_mutex_is_locked(&lock->lock); | |
9742 | +} | |
9743 | + | |
9744 | +static inline int spin_is_locked(spinlock_t *lock) | |
9745 | +{ | |
9746 | + return rt_mutex_is_locked(&lock->lock); | |
9747 | +} | |
9748 | + | |
9749 | +static inline void assert_spin_locked(spinlock_t *lock) | |
9750 | +{ | |
9751 | + BUG_ON(!spin_is_locked(lock)); | |
9752 | +} | |
9753 | + | |
9754 | +#define atomic_dec_and_lock(atomic, lock) \ | |
9755 | + atomic_dec_and_spin_lock(atomic, lock) | |
9756 | + | |
9757 | +#endif | |
9758 | diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h | |
9759 | index 73548eb13a5d..10bac715ea96 100644 | |
9760 | --- a/include/linux/spinlock_types.h | |
9761 | +++ b/include/linux/spinlock_types.h | |
9762 | @@ -9,80 +9,15 @@ | |
9763 | * Released under the General Public License (GPL). | |
9764 | */ | |
9765 | ||
9766 | -#if defined(CONFIG_SMP) | |
9767 | -# include <asm/spinlock_types.h> | |
9768 | +#include <linux/spinlock_types_raw.h> | |
9769 | + | |
9770 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9771 | +# include <linux/spinlock_types_nort.h> | |
9772 | +# include <linux/rwlock_types.h> | |
9773 | #else | |
9774 | -# include <linux/spinlock_types_up.h> | |
9775 | +# include <linux/rtmutex.h> | |
9776 | +# include <linux/spinlock_types_rt.h> | |
9777 | +# include <linux/rwlock_types_rt.h> | |
9778 | #endif | |
9779 | ||
9780 | -#include <linux/lockdep.h> | |
9781 | - | |
9782 | -typedef struct raw_spinlock { | |
9783 | - arch_spinlock_t raw_lock; | |
9784 | -#ifdef CONFIG_GENERIC_LOCKBREAK | |
9785 | - unsigned int break_lock; | |
9786 | -#endif | |
9787 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
9788 | - unsigned int magic, owner_cpu; | |
9789 | - void *owner; | |
9790 | -#endif | |
9791 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9792 | - struct lockdep_map dep_map; | |
9793 | -#endif | |
9794 | -} raw_spinlock_t; | |
9795 | - | |
9796 | -#define SPINLOCK_MAGIC 0xdead4ead | |
9797 | - | |
9798 | -#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
9799 | - | |
9800 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9801 | -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
9802 | -#else | |
9803 | -# define SPIN_DEP_MAP_INIT(lockname) | |
9804 | -#endif | |
9805 | - | |
9806 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
9807 | -# define SPIN_DEBUG_INIT(lockname) \ | |
9808 | - .magic = SPINLOCK_MAGIC, \ | |
9809 | - .owner_cpu = -1, \ | |
9810 | - .owner = SPINLOCK_OWNER_INIT, | |
9811 | -#else | |
9812 | -# define SPIN_DEBUG_INIT(lockname) | |
9813 | -#endif | |
9814 | - | |
9815 | -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
9816 | - { \ | |
9817 | - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
9818 | - SPIN_DEBUG_INIT(lockname) \ | |
9819 | - SPIN_DEP_MAP_INIT(lockname) } | |
9820 | - | |
9821 | -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
9822 | - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
9823 | - | |
9824 | -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
9825 | - | |
9826 | -typedef struct spinlock { | |
9827 | - union { | |
9828 | - struct raw_spinlock rlock; | |
9829 | - | |
9830 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9831 | -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
9832 | - struct { | |
9833 | - u8 __padding[LOCK_PADSIZE]; | |
9834 | - struct lockdep_map dep_map; | |
9835 | - }; | |
9836 | -#endif | |
9837 | - }; | |
9838 | -} spinlock_t; | |
9839 | - | |
9840 | -#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
9841 | - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
9842 | - | |
9843 | -#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
9844 | - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
9845 | - | |
9846 | -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
9847 | - | |
9848 | -#include <linux/rwlock_types.h> | |
9849 | - | |
9850 | #endif /* __LINUX_SPINLOCK_TYPES_H */ | |
9851 | diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h | |
9852 | new file mode 100644 | |
9853 | index 000000000000..f1dac1fb1d6a | |
9854 | --- /dev/null | |
9855 | +++ b/include/linux/spinlock_types_nort.h | |
9856 | @@ -0,0 +1,33 @@ | |
9857 | +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H | |
9858 | +#define __LINUX_SPINLOCK_TYPES_NORT_H | |
9859 | + | |
9860 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
9861 | +#error "Do not include directly. Include spinlock_types.h instead" | |
9862 | +#endif | |
9863 | + | |
9864 | +/* | |
9865 | + * The non RT version maps spinlocks to raw_spinlocks | |
9866 | + */ | |
9867 | +typedef struct spinlock { | |
9868 | + union { | |
9869 | + struct raw_spinlock rlock; | |
9870 | + | |
9871 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9872 | +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
9873 | + struct { | |
9874 | + u8 __padding[LOCK_PADSIZE]; | |
9875 | + struct lockdep_map dep_map; | |
9876 | + }; | |
9877 | +#endif | |
9878 | + }; | |
9879 | +} spinlock_t; | |
9880 | + | |
9881 | +#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
9882 | + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
9883 | + | |
9884 | +#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
9885 | + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
9886 | + | |
9887 | +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
9888 | + | |
9889 | +#endif | |
9890 | diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h | |
9891 | new file mode 100644 | |
9892 | index 000000000000..edffc4d53fc9 | |
9893 | --- /dev/null | |
9894 | +++ b/include/linux/spinlock_types_raw.h | |
9895 | @@ -0,0 +1,56 @@ | |
9896 | +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H | |
9897 | +#define __LINUX_SPINLOCK_TYPES_RAW_H | |
9898 | + | |
9899 | +#if defined(CONFIG_SMP) | |
9900 | +# include <asm/spinlock_types.h> | |
9901 | +#else | |
9902 | +# include <linux/spinlock_types_up.h> | |
9903 | +#endif | |
9904 | + | |
9905 | +#include <linux/lockdep.h> | |
9906 | + | |
9907 | +typedef struct raw_spinlock { | |
9908 | + arch_spinlock_t raw_lock; | |
9909 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
9910 | + unsigned int break_lock; | |
9911 | +#endif | |
9912 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
9913 | + unsigned int magic, owner_cpu; | |
9914 | + void *owner; | |
9915 | +#endif | |
9916 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9917 | + struct lockdep_map dep_map; | |
9918 | +#endif | |
9919 | +} raw_spinlock_t; | |
9920 | + | |
9921 | +#define SPINLOCK_MAGIC 0xdead4ead | |
9922 | + | |
9923 | +#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
9924 | + | |
9925 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9926 | +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
9927 | +#else | |
9928 | +# define SPIN_DEP_MAP_INIT(lockname) | |
9929 | +#endif | |
9930 | + | |
9931 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
9932 | +# define SPIN_DEBUG_INIT(lockname) \ | |
9933 | + .magic = SPINLOCK_MAGIC, \ | |
9934 | + .owner_cpu = -1, \ | |
9935 | + .owner = SPINLOCK_OWNER_INIT, | |
9936 | +#else | |
9937 | +# define SPIN_DEBUG_INIT(lockname) | |
9938 | +#endif | |
9939 | + | |
9940 | +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
9941 | + { \ | |
9942 | + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
9943 | + SPIN_DEBUG_INIT(lockname) \ | |
9944 | + SPIN_DEP_MAP_INIT(lockname) } | |
9945 | + | |
9946 | +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
9947 | + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
9948 | + | |
9949 | +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
9950 | + | |
9951 | +#endif | |
9952 | diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h | |
9953 | new file mode 100644 | |
9954 | index 000000000000..3e3d8c5f7a9a | |
9955 | --- /dev/null | |
9956 | +++ b/include/linux/spinlock_types_rt.h | |
9957 | @@ -0,0 +1,48 @@ | |
9958 | +#ifndef __LINUX_SPINLOCK_TYPES_RT_H | |
9959 | +#define __LINUX_SPINLOCK_TYPES_RT_H | |
9960 | + | |
9961 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
9962 | +#error "Do not include directly. Include spinlock_types.h instead" | |
9963 | +#endif | |
9964 | + | |
9965 | +#include <linux/cache.h> | |
9966 | + | |
9967 | +/* | |
9968 | + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: | |
9969 | + */ | |
9970 | +typedef struct spinlock { | |
9971 | + struct rt_mutex lock; | |
9972 | + unsigned int break_lock; | |
9973 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9974 | + struct lockdep_map dep_map; | |
9975 | +#endif | |
9976 | +} spinlock_t; | |
9977 | + | |
9978 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
9979 | +# define __RT_SPIN_INITIALIZER(name) \ | |
9980 | + { \ | |
9981 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
9982 | + .save_state = 1, \ | |
9983 | + .file = __FILE__, \ | |
9984 | + .line = __LINE__ , \ | |
9985 | + } | |
9986 | +#else | |
9987 | +# define __RT_SPIN_INITIALIZER(name) \ | |
9988 | + { \ | |
9989 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
9990 | + .save_state = 1, \ | |
9991 | + } | |
9992 | +#endif | |
9993 | + | |
9994 | +/* | |
9995 | +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) | |
9996 | +*/ | |
9997 | + | |
9998 | +#define __SPIN_LOCK_UNLOCKED(name) \ | |
9999 | + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ | |
10000 | + SPIN_DEP_MAP_INIT(name) } | |
10001 | + | |
10002 | +#define DEFINE_SPINLOCK(name) \ | |
10003 | + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) | |
10004 | + | |
10005 | +#endif | |
10006 | diff --git a/include/linux/srcu.h b/include/linux/srcu.h | |
10007 | index dc8eb63c6568..e793d3a257da 100644 | |
10008 | --- a/include/linux/srcu.h | |
10009 | +++ b/include/linux/srcu.h | |
10010 | @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp); | |
10011 | ||
10012 | void process_srcu(struct work_struct *work); | |
10013 | ||
10014 | -#define __SRCU_STRUCT_INIT(name) \ | |
10015 | +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ | |
10016 | { \ | |
10017 | .completed = -300, \ | |
10018 | - .per_cpu_ref = &name##_srcu_array, \ | |
10019 | + .per_cpu_ref = &pcpu_name, \ | |
10020 | .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ | |
10021 | .running = false, \ | |
10022 | .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ | |
10023 | @@ -119,7 +119,7 @@ void process_srcu(struct work_struct *work); | |
10024 | */ | |
10025 | #define __DEFINE_SRCU(name, is_static) \ | |
10026 | static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ | |
10027 | - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
10028 | + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array) | |
10029 | #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) | |
10030 | #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) | |
10031 | ||
10032 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
c7c16703 | 10033 | index d9718378a8be..e81e6dc7dcb1 100644 |
1a6e0f06 JK |
10034 | --- a/include/linux/suspend.h |
10035 | +++ b/include/linux/suspend.h | |
10036 | @@ -193,6 +193,12 @@ struct platform_freeze_ops { | |
10037 | void (*end)(void); | |
10038 | }; | |
10039 | ||
10040 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | |
10041 | +extern bool pm_in_action; | |
10042 | +#else | |
10043 | +# define pm_in_action false | |
10044 | +#endif | |
10045 | + | |
10046 | #ifdef CONFIG_SUSPEND | |
10047 | /** | |
10048 | * suspend_set_ops - set platform dependent suspend operations | |
10049 | diff --git a/include/linux/swait.h b/include/linux/swait.h | |
10050 | index c1f9c62a8a50..83f004a72320 100644 | |
10051 | --- a/include/linux/swait.h | |
10052 | +++ b/include/linux/swait.h | |
10053 | @@ -87,6 +87,7 @@ static inline int swait_active(struct swait_queue_head *q) | |
10054 | extern void swake_up(struct swait_queue_head *q); | |
10055 | extern void swake_up_all(struct swait_queue_head *q); | |
10056 | extern void swake_up_locked(struct swait_queue_head *q); | |
10057 | +extern void swake_up_all_locked(struct swait_queue_head *q); | |
10058 | ||
10059 | extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); | |
10060 | extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); | |
10061 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
c7c16703 | 10062 | index a56523cefb9b..c59a9f0d8ca1 100644 |
1a6e0f06 JK |
10063 | --- a/include/linux/swap.h |
10064 | +++ b/include/linux/swap.h | |
10065 | @@ -11,6 +11,7 @@ | |
10066 | #include <linux/fs.h> | |
10067 | #include <linux/atomic.h> | |
10068 | #include <linux/page-flags.h> | |
10069 | +#include <linux/locallock.h> | |
10070 | #include <asm/page.h> | |
10071 | ||
10072 | struct notifier_block; | |
c7c16703 | 10073 | @@ -246,7 +247,8 @@ struct swap_info_struct { |
1a6e0f06 JK |
10074 | void *workingset_eviction(struct address_space *mapping, struct page *page); |
10075 | bool workingset_refault(void *shadow); | |
10076 | void workingset_activation(struct page *page); | |
10077 | -extern struct list_lru workingset_shadow_nodes; | |
10078 | +extern struct list_lru __workingset_shadow_nodes; | |
10079 | +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
10080 | ||
10081 | static inline unsigned int workingset_node_pages(struct radix_tree_node *node) | |
10082 | { | |
c7c16703 | 10083 | @@ -291,6 +293,7 @@ extern unsigned long nr_free_pagecache_pages(void); |
1a6e0f06 JK |
10084 | |
10085 | ||
10086 | /* linux/mm/swap.c */ | |
10087 | +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); | |
10088 | extern void lru_cache_add(struct page *); | |
10089 | extern void lru_cache_add_anon(struct page *page); | |
10090 | extern void lru_cache_add_file(struct page *page); | |
10091 | diff --git a/include/linux/swork.h b/include/linux/swork.h | |
10092 | new file mode 100644 | |
10093 | index 000000000000..f175fa9a6016 | |
10094 | --- /dev/null | |
10095 | +++ b/include/linux/swork.h | |
10096 | @@ -0,0 +1,24 @@ | |
10097 | +#ifndef _LINUX_SWORK_H | |
10098 | +#define _LINUX_SWORK_H | |
10099 | + | |
10100 | +#include <linux/list.h> | |
10101 | + | |
10102 | +struct swork_event { | |
10103 | + struct list_head item; | |
10104 | + unsigned long flags; | |
10105 | + void (*func)(struct swork_event *); | |
10106 | +}; | |
10107 | + | |
10108 | +static inline void INIT_SWORK(struct swork_event *event, | |
10109 | + void (*func)(struct swork_event *)) | |
10110 | +{ | |
10111 | + event->flags = 0; | |
10112 | + event->func = func; | |
10113 | +} | |
10114 | + | |
10115 | +bool swork_queue(struct swork_event *sev); | |
10116 | + | |
10117 | +int swork_get(void); | |
10118 | +void swork_put(void); | |
10119 | + | |
10120 | +#endif /* _LINUX_SWORK_H */ | |
10121 | diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h | |
c7c16703 | 10122 | index 2873baf5372a..eb1a108f17ca 100644 |
1a6e0f06 JK |
10123 | --- a/include/linux/thread_info.h |
10124 | +++ b/include/linux/thread_info.h | |
c7c16703 | 10125 | @@ -107,7 +107,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) |
1a6e0f06 JK |
10126 | #define test_thread_flag(flag) \ |
10127 | test_ti_thread_flag(current_thread_info(), flag) | |
10128 | ||
10129 | -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
10130 | +#ifdef CONFIG_PREEMPT_LAZY | |
10131 | +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ | |
10132 | + test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
10133 | +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) | |
10134 | +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
10135 | + | |
10136 | +#else | |
10137 | +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
10138 | +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) | |
10139 | +#define tif_need_resched_lazy() 0 | |
10140 | +#endif | |
10141 | ||
10142 | #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES | |
10143 | static inline int arch_within_stack_frames(const void * const stack, | |
10144 | diff --git a/include/linux/timer.h b/include/linux/timer.h | |
10145 | index 51d601f192d4..83cea629efe1 100644 | |
10146 | --- a/include/linux/timer.h | |
10147 | +++ b/include/linux/timer.h | |
10148 | @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer); | |
10149 | ||
10150 | extern int try_to_del_timer_sync(struct timer_list *timer); | |
10151 | ||
10152 | -#ifdef CONFIG_SMP | |
10153 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
10154 | extern int del_timer_sync(struct timer_list *timer); | |
10155 | #else | |
10156 | # define del_timer_sync(t) del_timer(t) | |
10157 | diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h | |
10158 | index be007610ceb0..15154b13a53b 100644 | |
10159 | --- a/include/linux/trace_events.h | |
10160 | +++ b/include/linux/trace_events.h | |
10161 | @@ -56,6 +56,9 @@ struct trace_entry { | |
10162 | unsigned char flags; | |
10163 | unsigned char preempt_count; | |
10164 | int pid; | |
10165 | + unsigned short migrate_disable; | |
10166 | + unsigned short padding; | |
10167 | + unsigned char preempt_lazy_count; | |
10168 | }; | |
10169 | ||
10170 | #define TRACE_EVENT_TYPE_MAX \ | |
10171 | diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h | |
10172 | index f30c187ed785..83bf0f798426 100644 | |
10173 | --- a/include/linux/uaccess.h | |
10174 | +++ b/include/linux/uaccess.h | |
10175 | @@ -24,6 +24,7 @@ static __always_inline void pagefault_disabled_dec(void) | |
10176 | */ | |
10177 | static inline void pagefault_disable(void) | |
10178 | { | |
10179 | + migrate_disable(); | |
10180 | pagefault_disabled_inc(); | |
10181 | /* | |
10182 | * make sure to have issued the store before a pagefault | |
10183 | @@ -40,6 +41,7 @@ static inline void pagefault_enable(void) | |
10184 | */ | |
10185 | barrier(); | |
10186 | pagefault_disabled_dec(); | |
10187 | + migrate_enable(); | |
10188 | } | |
10189 | ||
10190 | /* | |
10191 | diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h | |
10192 | index 4a29c75b146e..0a294e950df8 100644 | |
10193 | --- a/include/linux/uprobes.h | |
10194 | +++ b/include/linux/uprobes.h | |
10195 | @@ -27,6 +27,7 @@ | |
10196 | #include <linux/errno.h> | |
10197 | #include <linux/rbtree.h> | |
10198 | #include <linux/types.h> | |
10199 | +#include <linux/wait.h> | |
10200 | ||
10201 | struct vm_area_struct; | |
10202 | struct mm_struct; | |
10203 | diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h | |
10204 | index 613771909b6e..e28c5a43229d 100644 | |
10205 | --- a/include/linux/vmstat.h | |
10206 | +++ b/include/linux/vmstat.h | |
10207 | @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); | |
10208 | */ | |
10209 | static inline void __count_vm_event(enum vm_event_item item) | |
10210 | { | |
10211 | + preempt_disable_rt(); | |
10212 | raw_cpu_inc(vm_event_states.event[item]); | |
10213 | + preempt_enable_rt(); | |
10214 | } | |
10215 | ||
10216 | static inline void count_vm_event(enum vm_event_item item) | |
10217 | @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item) | |
10218 | ||
10219 | static inline void __count_vm_events(enum vm_event_item item, long delta) | |
10220 | { | |
10221 | + preempt_disable_rt(); | |
10222 | raw_cpu_add(vm_event_states.event[item], delta); | |
10223 | + preempt_enable_rt(); | |
10224 | } | |
10225 | ||
10226 | static inline void count_vm_events(enum vm_event_item item, long delta) | |
10227 | diff --git a/include/linux/wait.h b/include/linux/wait.h | |
c7c16703 | 10228 | index 2408e8d5c05c..db50d6609195 100644 |
1a6e0f06 JK |
10229 | --- a/include/linux/wait.h |
10230 | +++ b/include/linux/wait.h | |
10231 | @@ -8,6 +8,7 @@ | |
10232 | #include <linux/spinlock.h> | |
10233 | #include <asm/current.h> | |
10234 | #include <uapi/linux/wait.h> | |
10235 | +#include <linux/atomic.h> | |
10236 | ||
10237 | typedef struct __wait_queue wait_queue_t; | |
10238 | typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); | |
10239 | diff --git a/include/net/dst.h b/include/net/dst.h | |
10240 | index 6835d224d47b..55a5a9698f14 100644 | |
10241 | --- a/include/net/dst.h | |
10242 | +++ b/include/net/dst.h | |
10243 | @@ -446,7 +446,7 @@ static inline void dst_confirm(struct dst_entry *dst) | |
10244 | static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, | |
10245 | struct sk_buff *skb) | |
10246 | { | |
10247 | - const struct hh_cache *hh; | |
10248 | + struct hh_cache *hh; | |
10249 | ||
10250 | if (dst->pending_confirm) { | |
10251 | unsigned long now = jiffies; | |
10252 | diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h | |
10253 | index 231e121cc7d9..d125222b979d 100644 | |
10254 | --- a/include/net/gen_stats.h | |
10255 | +++ b/include/net/gen_stats.h | |
10256 | @@ -5,6 +5,7 @@ | |
10257 | #include <linux/socket.h> | |
10258 | #include <linux/rtnetlink.h> | |
10259 | #include <linux/pkt_sched.h> | |
10260 | +#include <net/net_seq_lock.h> | |
10261 | ||
10262 | struct gnet_stats_basic_cpu { | |
10263 | struct gnet_stats_basic_packed bstats; | |
10264 | @@ -33,11 +34,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, | |
10265 | spinlock_t *lock, struct gnet_dump *d, | |
10266 | int padattr); | |
10267 | ||
10268 | -int gnet_stats_copy_basic(const seqcount_t *running, | |
10269 | +int gnet_stats_copy_basic(net_seqlock_t *running, | |
10270 | struct gnet_dump *d, | |
10271 | struct gnet_stats_basic_cpu __percpu *cpu, | |
10272 | struct gnet_stats_basic_packed *b); | |
10273 | -void __gnet_stats_copy_basic(const seqcount_t *running, | |
10274 | +void __gnet_stats_copy_basic(net_seqlock_t *running, | |
10275 | struct gnet_stats_basic_packed *bstats, | |
10276 | struct gnet_stats_basic_cpu __percpu *cpu, | |
10277 | struct gnet_stats_basic_packed *b); | |
10278 | @@ -55,14 +56,14 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
10279 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
10280 | struct gnet_stats_rate_est64 *rate_est, | |
10281 | spinlock_t *stats_lock, | |
10282 | - seqcount_t *running, struct nlattr *opt); | |
10283 | + net_seqlock_t *running, struct nlattr *opt); | |
10284 | void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, | |
10285 | struct gnet_stats_rate_est64 *rate_est); | |
10286 | int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
10287 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
10288 | struct gnet_stats_rate_est64 *rate_est, | |
10289 | spinlock_t *stats_lock, | |
10290 | - seqcount_t *running, struct nlattr *opt); | |
10291 | + net_seqlock_t *running, struct nlattr *opt); | |
10292 | bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, | |
10293 | const struct gnet_stats_rate_est64 *rate_est); | |
10294 | #endif | |
10295 | diff --git a/include/net/neighbour.h b/include/net/neighbour.h | |
10296 | index 8b683841e574..bf656008f6e7 100644 | |
10297 | --- a/include/net/neighbour.h | |
10298 | +++ b/include/net/neighbour.h | |
10299 | @@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) | |
10300 | } | |
10301 | #endif | |
10302 | ||
10303 | -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) | |
10304 | +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) | |
10305 | { | |
10306 | unsigned int seq; | |
10307 | int hh_len; | |
10308 | @@ -501,7 +501,7 @@ struct neighbour_cb { | |
10309 | ||
10310 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | |
10311 | ||
10312 | -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, | |
10313 | +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n, | |
10314 | const struct net_device *dev) | |
10315 | { | |
10316 | unsigned int seq; | |
10317 | diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h | |
10318 | new file mode 100644 | |
10319 | index 000000000000..a7034298a82a | |
10320 | --- /dev/null | |
10321 | +++ b/include/net/net_seq_lock.h | |
10322 | @@ -0,0 +1,15 @@ | |
10323 | +#ifndef __NET_NET_SEQ_LOCK_H__ | |
10324 | +#define __NET_NET_SEQ_LOCK_H__ | |
10325 | + | |
10326 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10327 | +# define net_seqlock_t seqlock_t | |
10328 | +# define net_seq_begin(__r) read_seqbegin(__r) | |
10329 | +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) | |
10330 | + | |
10331 | +#else | |
10332 | +# define net_seqlock_t seqcount_t | |
10333 | +# define net_seq_begin(__r) read_seqcount_begin(__r) | |
10334 | +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s) | |
10335 | +#endif | |
10336 | + | |
10337 | +#endif | |
10338 | diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h | |
c7c16703 | 10339 | index 7adf4386ac8f..d3fd5c357268 100644 |
1a6e0f06 JK |
10340 | --- a/include/net/netns/ipv4.h |
10341 | +++ b/include/net/netns/ipv4.h | |
c7c16703 | 10342 | @@ -69,6 +69,7 @@ struct netns_ipv4 { |
1a6e0f06 JK |
10343 | |
10344 | int sysctl_icmp_echo_ignore_all; | |
10345 | int sysctl_icmp_echo_ignore_broadcasts; | |
10346 | + int sysctl_icmp_echo_sysrq; | |
10347 | int sysctl_icmp_ignore_bogus_error_responses; | |
10348 | int sysctl_icmp_ratelimit; | |
10349 | int sysctl_icmp_ratemask; | |
10350 | diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h | |
c7c16703 | 10351 | index e6aa0a249672..b57736f2a8a3 100644 |
1a6e0f06 JK |
10352 | --- a/include/net/sch_generic.h |
10353 | +++ b/include/net/sch_generic.h | |
10354 | @@ -10,6 +10,7 @@ | |
10355 | #include <linux/dynamic_queue_limits.h> | |
10356 | #include <net/gen_stats.h> | |
10357 | #include <net/rtnetlink.h> | |
10358 | +#include <net/net_seq_lock.h> | |
10359 | ||
10360 | struct Qdisc_ops; | |
10361 | struct qdisc_walker; | |
c7c16703 | 10362 | @@ -86,7 +87,7 @@ struct Qdisc { |
1a6e0f06 | 10363 | struct sk_buff *gso_skb ____cacheline_aligned_in_smp; |
c7c16703 | 10364 | struct qdisc_skb_head q; |
1a6e0f06 JK |
10365 | struct gnet_stats_basic_packed bstats; |
10366 | - seqcount_t running; | |
10367 | + net_seqlock_t running; | |
10368 | struct gnet_stats_queue qstats; | |
10369 | unsigned long state; | |
10370 | struct Qdisc *next_sched; | |
c7c16703 | 10371 | @@ -98,13 +99,22 @@ struct Qdisc { |
1a6e0f06 JK |
10372 | spinlock_t busylock ____cacheline_aligned_in_smp; |
10373 | }; | |
10374 | ||
10375 | -static inline bool qdisc_is_running(const struct Qdisc *qdisc) | |
10376 | +static inline bool qdisc_is_running(struct Qdisc *qdisc) | |
10377 | { | |
10378 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10379 | + return spin_is_locked(&qdisc->running.lock) ? true : false; | |
10380 | +#else | |
10381 | return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; | |
10382 | +#endif | |
10383 | } | |
10384 | ||
10385 | static inline bool qdisc_run_begin(struct Qdisc *qdisc) | |
10386 | { | |
10387 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10388 | + if (try_write_seqlock(&qdisc->running)) | |
10389 | + return true; | |
10390 | + return false; | |
10391 | +#else | |
10392 | if (qdisc_is_running(qdisc)) | |
10393 | return false; | |
10394 | /* Variant of write_seqcount_begin() telling lockdep a trylock | |
c7c16703 | 10395 | @@ -113,11 +123,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) |
1a6e0f06 JK |
10396 | raw_write_seqcount_begin(&qdisc->running); |
10397 | seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); | |
10398 | return true; | |
10399 | +#endif | |
10400 | } | |
10401 | ||
10402 | static inline void qdisc_run_end(struct Qdisc *qdisc) | |
10403 | { | |
10404 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10405 | + write_sequnlock(&qdisc->running); | |
10406 | +#else | |
10407 | write_seqcount_end(&qdisc->running); | |
10408 | +#endif | |
10409 | } | |
10410 | ||
10411 | static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) | |
c7c16703 | 10412 | @@ -308,7 +323,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) |
1a6e0f06 JK |
10413 | return qdisc_lock(root); |
10414 | } | |
10415 | ||
10416 | -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
10417 | +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
10418 | { | |
10419 | struct Qdisc *root = qdisc_root_sleeping(qdisc); | |
10420 | ||
10421 | diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h | |
10422 | new file mode 100644 | |
10423 | index 000000000000..f7710de1b1f3 | |
10424 | --- /dev/null | |
10425 | +++ b/include/trace/events/hist.h | |
10426 | @@ -0,0 +1,73 @@ | |
10427 | +#undef TRACE_SYSTEM | |
10428 | +#define TRACE_SYSTEM hist | |
10429 | + | |
10430 | +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ) | |
10431 | +#define _TRACE_HIST_H | |
10432 | + | |
10433 | +#include "latency_hist.h" | |
10434 | +#include <linux/tracepoint.h> | |
10435 | + | |
10436 | +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST) | |
10437 | +#define trace_preemptirqsoff_hist(a, b) | |
10438 | +#define trace_preemptirqsoff_hist_rcuidle(a, b) | |
10439 | +#else | |
10440 | +TRACE_EVENT(preemptirqsoff_hist, | |
10441 | + | |
10442 | + TP_PROTO(int reason, int starthist), | |
10443 | + | |
10444 | + TP_ARGS(reason, starthist), | |
10445 | + | |
10446 | + TP_STRUCT__entry( | |
10447 | + __field(int, reason) | |
10448 | + __field(int, starthist) | |
10449 | + ), | |
10450 | + | |
10451 | + TP_fast_assign( | |
10452 | + __entry->reason = reason; | |
10453 | + __entry->starthist = starthist; | |
10454 | + ), | |
10455 | + | |
10456 | + TP_printk("reason=%s starthist=%s", getaction(__entry->reason), | |
10457 | + __entry->starthist ? "start" : "stop") | |
10458 | +); | |
10459 | +#endif | |
10460 | + | |
10461 | +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
10462 | +#define trace_hrtimer_interrupt(a, b, c, d) | |
10463 | +#else | |
10464 | +TRACE_EVENT(hrtimer_interrupt, | |
10465 | + | |
10466 | + TP_PROTO(int cpu, long long offset, struct task_struct *curr, | |
10467 | + struct task_struct *task), | |
10468 | + | |
10469 | + TP_ARGS(cpu, offset, curr, task), | |
10470 | + | |
10471 | + TP_STRUCT__entry( | |
10472 | + __field(int, cpu) | |
10473 | + __field(long long, offset) | |
10474 | + __array(char, ccomm, TASK_COMM_LEN) | |
10475 | + __field(int, cprio) | |
10476 | + __array(char, tcomm, TASK_COMM_LEN) | |
10477 | + __field(int, tprio) | |
10478 | + ), | |
10479 | + | |
10480 | + TP_fast_assign( | |
10481 | + __entry->cpu = cpu; | |
10482 | + __entry->offset = offset; | |
10483 | + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN); | |
10484 | + __entry->cprio = curr->prio; | |
10485 | + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", | |
10486 | + task != NULL ? TASK_COMM_LEN : 7); | |
10487 | + __entry->tprio = task != NULL ? task->prio : -1; | |
10488 | + ), | |
10489 | + | |
10490 | + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]", | |
10491 | + __entry->cpu, __entry->offset, __entry->ccomm, | |
10492 | + __entry->cprio, __entry->tcomm, __entry->tprio) | |
10493 | +); | |
10494 | +#endif | |
10495 | + | |
10496 | +#endif /* _TRACE_HIST_H */ | |
10497 | + | |
10498 | +/* This part must be outside protection */ | |
10499 | +#include <trace/define_trace.h> | |
10500 | diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h | |
10501 | new file mode 100644 | |
10502 | index 000000000000..d3f2fbd560b1 | |
10503 | --- /dev/null | |
10504 | +++ b/include/trace/events/latency_hist.h | |
10505 | @@ -0,0 +1,29 @@ | |
10506 | +#ifndef _LATENCY_HIST_H | |
10507 | +#define _LATENCY_HIST_H | |
10508 | + | |
10509 | +enum hist_action { | |
10510 | + IRQS_ON, | |
10511 | + PREEMPT_ON, | |
10512 | + TRACE_STOP, | |
10513 | + IRQS_OFF, | |
10514 | + PREEMPT_OFF, | |
10515 | + TRACE_START, | |
10516 | +}; | |
10517 | + | |
10518 | +static char *actions[] = { | |
10519 | + "IRQS_ON", | |
10520 | + "PREEMPT_ON", | |
10521 | + "TRACE_STOP", | |
10522 | + "IRQS_OFF", | |
10523 | + "PREEMPT_OFF", | |
10524 | + "TRACE_START", | |
10525 | +}; | |
10526 | + | |
10527 | +static inline char *getaction(int action) | |
10528 | +{ | |
10529 | + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0])) | |
10530 | + return actions[action]; | |
10531 | + return "unknown"; | |
10532 | +} | |
10533 | + | |
10534 | +#endif /* _LATENCY_HIST_H */ | |
10535 | diff --git a/init/Kconfig b/init/Kconfig | |
c7c16703 | 10536 | index 34407f15e6d3..2ce33a32e65d 100644 |
1a6e0f06 JK |
10537 | --- a/init/Kconfig |
10538 | +++ b/init/Kconfig | |
c7c16703 | 10539 | @@ -506,7 +506,7 @@ config TINY_RCU |
1a6e0f06 JK |
10540 | |
10541 | config RCU_EXPERT | |
10542 | bool "Make expert-level adjustments to RCU configuration" | |
10543 | - default n | |
10544 | + default y if PREEMPT_RT_FULL | |
10545 | help | |
10546 | This option needs to be enabled if you wish to make | |
10547 | expert-level adjustments to RCU configuration. By default, | |
c7c16703 | 10548 | @@ -623,7 +623,7 @@ config RCU_FANOUT_LEAF |
1a6e0f06 JK |
10549 | |
10550 | config RCU_FAST_NO_HZ | |
10551 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | |
10552 | - depends on NO_HZ_COMMON && SMP && RCU_EXPERT | |
10553 | + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL | |
10554 | default n | |
10555 | help | |
10556 | This option permits CPUs to enter dynticks-idle state even if | |
c7c16703 | 10557 | @@ -650,7 +650,7 @@ config TREE_RCU_TRACE |
1a6e0f06 JK |
10558 | config RCU_BOOST |
10559 | bool "Enable RCU priority boosting" | |
10560 | depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT | |
10561 | - default n | |
10562 | + default y if PREEMPT_RT_FULL | |
10563 | help | |
10564 | This option boosts the priority of preempted RCU readers that | |
10565 | block the current preemptible RCU grace period for too long. | |
c7c16703 JK |
10566 | @@ -781,19 +781,6 @@ config RCU_NOCB_CPU_ALL |
10567 | ||
10568 | endchoice | |
10569 | ||
10570 | -config RCU_EXPEDITE_BOOT | |
10571 | - bool | |
10572 | - default n | |
10573 | - help | |
10574 | - This option enables expedited grace periods at boot time, | |
10575 | - as if rcu_expedite_gp() had been invoked early in boot. | |
10576 | - The corresponding rcu_unexpedite_gp() is invoked from | |
10577 | - rcu_end_inkernel_boot(), which is intended to be invoked | |
10578 | - at the end of the kernel-only boot sequence, just before | |
10579 | - init is exec'ed. | |
10580 | - | |
10581 | - Accept the default if unsure. | |
10582 | - | |
10583 | endmenu # "RCU Subsystem" | |
10584 | ||
10585 | config BUILD_BIN2C | |
10586 | @@ -1064,6 +1051,7 @@ config CFS_BANDWIDTH | |
1a6e0f06 JK |
10587 | config RT_GROUP_SCHED |
10588 | bool "Group scheduling for SCHED_RR/FIFO" | |
10589 | depends on CGROUP_SCHED | |
10590 | + depends on !PREEMPT_RT_FULL | |
10591 | default n | |
10592 | help | |
10593 | This feature lets you explicitly allocate real CPU bandwidth | |
c7c16703 | 10594 | @@ -1772,6 +1760,7 @@ choice |
1a6e0f06 JK |
10595 | |
10596 | config SLAB | |
10597 | bool "SLAB" | |
10598 | + depends on !PREEMPT_RT_FULL | |
10599 | select HAVE_HARDENED_USERCOPY_ALLOCATOR | |
10600 | help | |
10601 | The regular slab allocator that is established and known to work | |
c7c16703 | 10602 | @@ -1792,6 +1781,7 @@ config SLUB |
1a6e0f06 JK |
10603 | config SLOB |
10604 | depends on EXPERT | |
10605 | bool "SLOB (Simple Allocator)" | |
c7c16703 JK |
10606 | + depends on !PREEMPT_RT_FULL |
10607 | help | |
10608 | SLOB replaces the stock allocator with a drastically simpler | |
10609 | allocator. SLOB is generally more space efficient but | |
10610 | @@ -1810,7 +1800,7 @@ config SLAB_FREELIST_RANDOM | |
10611 | ||
10612 | config SLUB_CPU_PARTIAL | |
10613 | default y | |
10614 | - depends on SLUB && SMP | |
10615 | + depends on SLUB && SMP && !PREEMPT_RT_FULL | |
10616 | bool "SLUB per cpu partial cache" | |
10617 | help | |
10618 | Per cpu partial caches accellerate objects allocation and freeing | |
10619 | diff --git a/init/Makefile b/init/Makefile | |
10620 | index c4fb45525d08..821190dfaa75 100644 | |
10621 | --- a/init/Makefile | |
10622 | +++ b/init/Makefile | |
10623 | @@ -35,4 +35,4 @@ $(obj)/version.o: include/generated/compile.h | |
10624 | include/generated/compile.h: FORCE | |
10625 | @$($(quiet)chk_compile.h) | |
10626 | $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ | |
10627 | - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" | |
10628 | + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" | |
10629 | diff --git a/init/main.c b/init/main.c | |
10630 | index 2858be732f6d..3c97c3c91d88 100644 | |
10631 | --- a/init/main.c | |
10632 | +++ b/init/main.c | |
10633 | @@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void) | |
10634 | setup_command_line(command_line); | |
10635 | setup_nr_cpu_ids(); | |
10636 | setup_per_cpu_areas(); | |
10637 | + softirq_early_init(); | |
10638 | boot_cpu_state_init(); | |
10639 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | |
1a6e0f06 | 10640 | |
1a6e0f06 | 10641 | diff --git a/ipc/sem.c b/ipc/sem.c |
c7c16703 | 10642 | index 10b94bc59d4a..b8360eaacc7a 100644 |
1a6e0f06 JK |
10643 | --- a/ipc/sem.c |
10644 | +++ b/ipc/sem.c | |
10645 | @@ -712,6 +712,13 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) | |
10646 | static void wake_up_sem_queue_prepare(struct list_head *pt, | |
10647 | struct sem_queue *q, int error) | |
10648 | { | |
10649 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10650 | + struct task_struct *p = q->sleeper; | |
10651 | + get_task_struct(p); | |
10652 | + q->status = error; | |
10653 | + wake_up_process(p); | |
10654 | + put_task_struct(p); | |
10655 | +#else | |
10656 | if (list_empty(pt)) { | |
10657 | /* | |
10658 | * Hold preempt off so that we don't get preempted and have the | |
10659 | @@ -723,6 +730,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
10660 | q->pid = error; | |
10661 | ||
10662 | list_add_tail(&q->list, pt); | |
10663 | +#endif | |
10664 | } | |
10665 | ||
10666 | /** | |
10667 | @@ -736,6 +744,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
10668 | */ | |
10669 | static void wake_up_sem_queue_do(struct list_head *pt) | |
10670 | { | |
10671 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
10672 | struct sem_queue *q, *t; | |
10673 | int did_something; | |
10674 | ||
10675 | @@ -748,6 +757,7 @@ static void wake_up_sem_queue_do(struct list_head *pt) | |
10676 | } | |
10677 | if (did_something) | |
10678 | preempt_enable(); | |
10679 | +#endif | |
10680 | } | |
10681 | ||
10682 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | |
10683 | diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks | |
10684 | index ebdb0043203a..b9e6aa7e5aa6 100644 | |
10685 | --- a/kernel/Kconfig.locks | |
10686 | +++ b/kernel/Kconfig.locks | |
10687 | @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW | |
10688 | ||
10689 | config MUTEX_SPIN_ON_OWNER | |
10690 | def_bool y | |
10691 | - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW | |
10692 | + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
10693 | ||
10694 | config RWSEM_SPIN_ON_OWNER | |
10695 | def_bool y | |
10696 | - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | |
10697 | + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
10698 | ||
10699 | config LOCK_SPIN_ON_OWNER | |
10700 | def_bool y | |
10701 | diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt | |
10702 | index 3f9c97419f02..11dbe26a8279 100644 | |
10703 | --- a/kernel/Kconfig.preempt | |
10704 | +++ b/kernel/Kconfig.preempt | |
10705 | @@ -1,3 +1,16 @@ | |
10706 | +config PREEMPT | |
10707 | + bool | |
10708 | + select PREEMPT_COUNT | |
10709 | + | |
10710 | +config PREEMPT_RT_BASE | |
10711 | + bool | |
10712 | + select PREEMPT | |
10713 | + | |
10714 | +config HAVE_PREEMPT_LAZY | |
10715 | + bool | |
10716 | + | |
10717 | +config PREEMPT_LAZY | |
10718 | + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL | |
10719 | ||
10720 | choice | |
10721 | prompt "Preemption Model" | |
10722 | @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY | |
10723 | ||
10724 | Select this if you are building a kernel for a desktop system. | |
10725 | ||
10726 | -config PREEMPT | |
10727 | +config PREEMPT__LL | |
10728 | bool "Preemptible Kernel (Low-Latency Desktop)" | |
10729 | - select PREEMPT_COUNT | |
10730 | + select PREEMPT | |
10731 | select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK | |
10732 | help | |
10733 | This option reduces the latency of the kernel by making | |
10734 | @@ -52,6 +65,22 @@ config PREEMPT | |
10735 | embedded system with latency requirements in the milliseconds | |
10736 | range. | |
10737 | ||
10738 | +config PREEMPT_RTB | |
10739 | + bool "Preemptible Kernel (Basic RT)" | |
10740 | + select PREEMPT_RT_BASE | |
10741 | + help | |
10742 | + This option is basically the same as (Low-Latency Desktop) but | |
10743 | + enables changes which are preliminary for the full preemptible | |
10744 | + RT kernel. | |
10745 | + | |
10746 | +config PREEMPT_RT_FULL | |
10747 | + bool "Fully Preemptible Kernel (RT)" | |
10748 | + depends on IRQ_FORCED_THREADING | |
10749 | + select PREEMPT_RT_BASE | |
10750 | + select PREEMPT_RCU | |
10751 | + help | |
10752 | + All and everything | |
10753 | + | |
10754 | endchoice | |
10755 | ||
10756 | config PREEMPT_COUNT | |
1a6e0f06 | 10757 | diff --git a/kernel/cgroup.c b/kernel/cgroup.c |
c7c16703 | 10758 | index 85bc9beb046d..3b8da75ba2e0 100644 |
1a6e0f06 JK |
10759 | --- a/kernel/cgroup.c |
10760 | +++ b/kernel/cgroup.c | |
c7c16703 | 10761 | @@ -5040,10 +5040,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) |
1a6e0f06 JK |
10762 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
10763 | } | |
10764 | ||
10765 | -static void css_release_work_fn(struct work_struct *work) | |
10766 | +static void css_release_work_fn(struct swork_event *sev) | |
10767 | { | |
10768 | struct cgroup_subsys_state *css = | |
10769 | - container_of(work, struct cgroup_subsys_state, destroy_work); | |
10770 | + container_of(sev, struct cgroup_subsys_state, destroy_swork); | |
10771 | struct cgroup_subsys *ss = css->ss; | |
10772 | struct cgroup *cgrp = css->cgroup; | |
10773 | ||
c7c16703 | 10774 | @@ -5086,8 +5086,8 @@ static void css_release(struct percpu_ref *ref) |
1a6e0f06 JK |
10775 | struct cgroup_subsys_state *css = |
10776 | container_of(ref, struct cgroup_subsys_state, refcnt); | |
10777 | ||
10778 | - INIT_WORK(&css->destroy_work, css_release_work_fn); | |
10779 | - queue_work(cgroup_destroy_wq, &css->destroy_work); | |
10780 | + INIT_SWORK(&css->destroy_swork, css_release_work_fn); | |
10781 | + swork_queue(&css->destroy_swork); | |
10782 | } | |
10783 | ||
10784 | static void init_and_link_css(struct cgroup_subsys_state *css, | |
c7c16703 | 10785 | @@ -5742,6 +5742,7 @@ static int __init cgroup_wq_init(void) |
1a6e0f06 JK |
10786 | */ |
10787 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | |
10788 | BUG_ON(!cgroup_destroy_wq); | |
10789 | + BUG_ON(swork_get()); | |
10790 | ||
10791 | /* | |
10792 | * Used to destroy pidlists and separate to serve as flush domain. | |
10793 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
c7c16703 | 10794 | index 217fd2e7f435..69444f1bc924 100644 |
1a6e0f06 JK |
10795 | --- a/kernel/cpu.c |
10796 | +++ b/kernel/cpu.c | |
c7c16703 | 10797 | @@ -239,6 +239,289 @@ static struct { |
1a6e0f06 JK |
10798 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) |
10799 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) | |
10800 | ||
10801 | +/** | |
10802 | + * hotplug_pcp - per cpu hotplug descriptor | |
10803 | + * @unplug: set when pin_current_cpu() needs to sync tasks | |
10804 | + * @sync_tsk: the task that waits for tasks to finish pinned sections | |
10805 | + * @refcount: counter of tasks in pinned sections | |
10806 | + * @grab_lock: set when the tasks entering pinned sections should wait | |
10807 | + * @synced: notifier for @sync_tsk to tell cpu_down it's finished | |
10808 | + * @mutex: the mutex to make tasks wait (used when @grab_lock is true) | |
10809 | + * @mutex_init: zero if the mutex hasn't been initialized yet. | |
10810 | + * | |
10811 | + * Although @unplug and @sync_tsk may point to the same task, the @unplug | |
10812 | + * is used as a flag and still exists after @sync_tsk has exited and | |
10813 | + * @sync_tsk set to NULL. | |
10814 | + */ | |
10815 | +struct hotplug_pcp { | |
10816 | + struct task_struct *unplug; | |
10817 | + struct task_struct *sync_tsk; | |
10818 | + int refcount; | |
10819 | + int grab_lock; | |
10820 | + struct completion synced; | |
10821 | + struct completion unplug_wait; | |
10822 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10823 | + /* | |
10824 | + * Note, on PREEMPT_RT, the hotplug lock must save the state of | |
10825 | + * the task, otherwise the mutex will cause the task to fail | |
10826 | + * to sleep when required. (Because it's called from migrate_disable()) | |
10827 | + * | |
10828 | + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's | |
10829 | + * state. | |
10830 | + */ | |
10831 | + spinlock_t lock; | |
10832 | +#else | |
10833 | + struct mutex mutex; | |
10834 | +#endif | |
10835 | + int mutex_init; | |
10836 | +}; | |
10837 | + | |
10838 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10839 | +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock) | |
10840 | +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock) | |
10841 | +#else | |
10842 | +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex) | |
10843 | +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex) | |
10844 | +#endif | |
10845 | + | |
10846 | +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); | |
10847 | + | |
10848 | +/** | |
10849 | + * pin_current_cpu - Prevent the current cpu from being unplugged | |
10850 | + * | |
10851 | + * Lightweight version of get_online_cpus() to prevent cpu from being | |
10852 | + * unplugged when code runs in a migration disabled region. | |
10853 | + * | |
10854 | + * Must be called with preemption disabled (preempt_count = 1)! | |
10855 | + */ | |
10856 | +void pin_current_cpu(void) | |
10857 | +{ | |
10858 | + struct hotplug_pcp *hp; | |
10859 | + int force = 0; | |
10860 | + | |
10861 | +retry: | |
10862 | + hp = this_cpu_ptr(&hotplug_pcp); | |
10863 | + | |
10864 | + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 || | |
10865 | + hp->unplug == current) { | |
10866 | + hp->refcount++; | |
10867 | + return; | |
10868 | + } | |
10869 | + if (hp->grab_lock) { | |
10870 | + preempt_enable(); | |
10871 | + hotplug_lock(hp); | |
10872 | + hotplug_unlock(hp); | |
10873 | + } else { | |
10874 | + preempt_enable(); | |
10875 | + /* | |
10876 | + * Try to push this task off of this CPU. | |
10877 | + */ | |
10878 | + if (!migrate_me()) { | |
10879 | + preempt_disable(); | |
10880 | + hp = this_cpu_ptr(&hotplug_pcp); | |
10881 | + if (!hp->grab_lock) { | |
10882 | + /* | |
10883 | + * Just let it continue it's already pinned | |
10884 | + * or about to sleep. | |
10885 | + */ | |
10886 | + force = 1; | |
10887 | + goto retry; | |
10888 | + } | |
10889 | + preempt_enable(); | |
10890 | + } | |
10891 | + } | |
10892 | + preempt_disable(); | |
10893 | + goto retry; | |
10894 | +} | |
10895 | + | |
10896 | +/** | |
10897 | + * unpin_current_cpu - Allow unplug of current cpu | |
10898 | + * | |
10899 | + * Must be called with preemption or interrupts disabled! | |
10900 | + */ | |
10901 | +void unpin_current_cpu(void) | |
10902 | +{ | |
10903 | + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); | |
10904 | + | |
10905 | + WARN_ON(hp->refcount <= 0); | |
10906 | + | |
10907 | + /* This is safe. sync_unplug_thread is pinned to this cpu */ | |
10908 | + if (!--hp->refcount && hp->unplug && hp->unplug != current) | |
10909 | + wake_up_process(hp->unplug); | |
10910 | +} | |
10911 | + | |
10912 | +static void wait_for_pinned_cpus(struct hotplug_pcp *hp) | |
10913 | +{ | |
10914 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
10915 | + while (hp->refcount) { | |
10916 | + schedule_preempt_disabled(); | |
10917 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
10918 | + } | |
10919 | +} | |
10920 | + | |
10921 | +static int sync_unplug_thread(void *data) | |
10922 | +{ | |
10923 | + struct hotplug_pcp *hp = data; | |
10924 | + | |
10925 | + wait_for_completion(&hp->unplug_wait); | |
10926 | + preempt_disable(); | |
10927 | + hp->unplug = current; | |
10928 | + wait_for_pinned_cpus(hp); | |
10929 | + | |
10930 | + /* | |
10931 | + * This thread will synchronize the cpu_down() with threads | |
10932 | + * that have pinned the CPU. When the pinned CPU count reaches | |
10933 | + * zero, we inform the cpu_down code to continue to the next step. | |
10934 | + */ | |
10935 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
10936 | + preempt_enable(); | |
10937 | + complete(&hp->synced); | |
10938 | + | |
10939 | + /* | |
10940 | + * If all succeeds, the next step will need tasks to wait till | |
10941 | + * the CPU is offline before continuing. To do this, the grab_lock | |
10942 | + * is set and tasks going into pin_current_cpu() will block on the | |
10943 | + * mutex. But we still need to wait for those that are already in | |
10944 | + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop() | |
10945 | + * will kick this thread out. | |
10946 | + */ | |
10947 | + while (!hp->grab_lock && !kthread_should_stop()) { | |
10948 | + schedule(); | |
10949 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
10950 | + } | |
10951 | + | |
10952 | + /* Make sure grab_lock is seen before we see a stale completion */ | |
10953 | + smp_mb(); | |
10954 | + | |
10955 | + /* | |
10956 | + * Now just before cpu_down() enters stop machine, we need to make | |
10957 | + * sure all tasks that are in pinned CPU sections are out, and new | |
10958 | + * tasks will now grab the lock, keeping them from entering pinned | |
10959 | + * CPU sections. | |
10960 | + */ | |
10961 | + if (!kthread_should_stop()) { | |
10962 | + preempt_disable(); | |
10963 | + wait_for_pinned_cpus(hp); | |
10964 | + preempt_enable(); | |
10965 | + complete(&hp->synced); | |
10966 | + } | |
10967 | + | |
10968 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
10969 | + while (!kthread_should_stop()) { | |
10970 | + schedule(); | |
10971 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
10972 | + } | |
10973 | + set_current_state(TASK_RUNNING); | |
10974 | + | |
10975 | + /* | |
10976 | + * Force this thread off this CPU as it's going down and | |
10977 | + * we don't want any more work on this CPU. | |
10978 | + */ | |
10979 | + current->flags &= ~PF_NO_SETAFFINITY; | |
10980 | + set_cpus_allowed_ptr(current, cpu_present_mask); | |
10981 | + migrate_me(); | |
10982 | + return 0; | |
10983 | +} | |
10984 | + | |
10985 | +static void __cpu_unplug_sync(struct hotplug_pcp *hp) | |
10986 | +{ | |
10987 | + wake_up_process(hp->sync_tsk); | |
10988 | + wait_for_completion(&hp->synced); | |
10989 | +} | |
10990 | + | |
10991 | +static void __cpu_unplug_wait(unsigned int cpu) | |
10992 | +{ | |
10993 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
10994 | + | |
10995 | + complete(&hp->unplug_wait); | |
10996 | + wait_for_completion(&hp->synced); | |
10997 | +} | |
10998 | + | |
10999 | +/* | |
11000 | + * Start the sync_unplug_thread on the target cpu and wait for it to | |
11001 | + * complete. | |
11002 | + */ | |
11003 | +static int cpu_unplug_begin(unsigned int cpu) | |
11004 | +{ | |
11005 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11006 | + int err; | |
11007 | + | |
11008 | + /* Protected by cpu_hotplug.lock */ | |
11009 | + if (!hp->mutex_init) { | |
11010 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11011 | + spin_lock_init(&hp->lock); | |
11012 | +#else | |
11013 | + mutex_init(&hp->mutex); | |
11014 | +#endif | |
11015 | + hp->mutex_init = 1; | |
11016 | + } | |
11017 | + | |
11018 | + /* Inform the scheduler to migrate tasks off this CPU */ | |
11019 | + tell_sched_cpu_down_begin(cpu); | |
11020 | + | |
11021 | + init_completion(&hp->synced); | |
11022 | + init_completion(&hp->unplug_wait); | |
11023 | + | |
11024 | + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); | |
11025 | + if (IS_ERR(hp->sync_tsk)) { | |
11026 | + err = PTR_ERR(hp->sync_tsk); | |
11027 | + hp->sync_tsk = NULL; | |
11028 | + return err; | |
11029 | + } | |
11030 | + kthread_bind(hp->sync_tsk, cpu); | |
11031 | + | |
11032 | + /* | |
11033 | + * Wait for tasks to get out of the pinned sections, | |
11034 | + * it's still OK if new tasks enter. Some CPU notifiers will | |
11035 | + * wait for tasks that are going to enter these sections and | |
11036 | + * we must not have them block. | |
11037 | + */ | |
11038 | + wake_up_process(hp->sync_tsk); | |
11039 | + return 0; | |
11040 | +} | |
11041 | + | |
11042 | +static void cpu_unplug_sync(unsigned int cpu) | |
11043 | +{ | |
11044 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11045 | + | |
11046 | + init_completion(&hp->synced); | |
11047 | + /* The completion needs to be initialzied before setting grab_lock */ | |
11048 | + smp_wmb(); | |
11049 | + | |
11050 | + /* Grab the mutex before setting grab_lock */ | |
11051 | + hotplug_lock(hp); | |
11052 | + hp->grab_lock = 1; | |
11053 | + | |
11054 | + /* | |
11055 | + * The CPU notifiers have been completed. | |
11056 | + * Wait for tasks to get out of pinned CPU sections and have new | |
11057 | + * tasks block until the CPU is completely down. | |
11058 | + */ | |
11059 | + __cpu_unplug_sync(hp); | |
11060 | + | |
11061 | + /* All done with the sync thread */ | |
11062 | + kthread_stop(hp->sync_tsk); | |
11063 | + hp->sync_tsk = NULL; | |
11064 | +} | |
11065 | + | |
11066 | +static void cpu_unplug_done(unsigned int cpu) | |
11067 | +{ | |
11068 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11069 | + | |
11070 | + hp->unplug = NULL; | |
11071 | + /* Let all tasks know cpu unplug is finished before cleaning up */ | |
11072 | + smp_wmb(); | |
11073 | + | |
11074 | + if (hp->sync_tsk) | |
11075 | + kthread_stop(hp->sync_tsk); | |
11076 | + | |
11077 | + if (hp->grab_lock) { | |
11078 | + hotplug_unlock(hp); | |
11079 | + /* protected by cpu_hotplug.lock */ | |
11080 | + hp->grab_lock = 0; | |
11081 | + } | |
11082 | + tell_sched_cpu_down_done(cpu); | |
11083 | +} | |
11084 | ||
11085 | void get_online_cpus(void) | |
11086 | { | |
c7c16703 | 11087 | @@ -789,10 +1072,14 @@ static int takedown_cpu(unsigned int cpu) |
1a6e0f06 JK |
11088 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
11089 | int err; | |
11090 | ||
11091 | + __cpu_unplug_wait(cpu); | |
11092 | /* Park the smpboot threads */ | |
11093 | kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); | |
11094 | smpboot_park_threads(cpu); | |
11095 | ||
11096 | + /* Notifiers are done. Don't let any more tasks pin this CPU. */ | |
11097 | + cpu_unplug_sync(cpu); | |
11098 | + | |
11099 | /* | |
11100 | * Prevent irq alloc/free while the dying cpu reorganizes the | |
11101 | * interrupt affinities. | |
c7c16703 | 11102 | @@ -877,6 +1164,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, |
1a6e0f06 JK |
11103 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
11104 | int prev_state, ret = 0; | |
11105 | bool hasdied = false; | |
11106 | + int mycpu; | |
11107 | + cpumask_var_t cpumask; | |
11108 | + cpumask_var_t cpumask_org; | |
11109 | ||
11110 | if (num_online_cpus() == 1) | |
11111 | return -EBUSY; | |
c7c16703 | 11112 | @@ -884,7 +1174,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, |
1a6e0f06 JK |
11113 | if (!cpu_present(cpu)) |
11114 | return -EINVAL; | |
11115 | ||
11116 | + /* Move the downtaker off the unplug cpu */ | |
11117 | + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) | |
11118 | + return -ENOMEM; | |
11119 | + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) { | |
11120 | + free_cpumask_var(cpumask); | |
11121 | + return -ENOMEM; | |
11122 | + } | |
11123 | + | |
11124 | + cpumask_copy(cpumask_org, tsk_cpus_allowed(current)); | |
11125 | + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); | |
11126 | + set_cpus_allowed_ptr(current, cpumask); | |
11127 | + free_cpumask_var(cpumask); | |
11128 | + migrate_disable(); | |
11129 | + mycpu = smp_processor_id(); | |
11130 | + if (mycpu == cpu) { | |
11131 | + printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); | |
11132 | + migrate_enable(); | |
11133 | + ret = -EBUSY; | |
11134 | + goto restore_cpus; | |
11135 | + } | |
11136 | + | |
11137 | + migrate_enable(); | |
11138 | cpu_hotplug_begin(); | |
11139 | + ret = cpu_unplug_begin(cpu); | |
11140 | + if (ret) { | |
11141 | + printk("cpu_unplug_begin(%d) failed\n", cpu); | |
11142 | + goto out_cancel; | |
11143 | + } | |
11144 | ||
11145 | cpuhp_tasks_frozen = tasks_frozen; | |
11146 | ||
c7c16703 | 11147 | @@ -923,10 +1240,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, |
1a6e0f06 JK |
11148 | |
11149 | hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; | |
11150 | out: | |
11151 | + cpu_unplug_done(cpu); | |
11152 | +out_cancel: | |
11153 | cpu_hotplug_done(); | |
11154 | /* This post dead nonsense must die */ | |
11155 | if (!ret && hasdied) | |
11156 | cpu_notify_nofail(CPU_POST_DEAD, cpu); | |
11157 | +restore_cpus: | |
11158 | + set_cpus_allowed_ptr(current, cpumask_org); | |
11159 | + free_cpumask_var(cpumask_org); | |
11160 | return ret; | |
11161 | } | |
11162 | ||
11163 | diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c | |
11164 | index fc1ef736253c..83c666537a7a 100644 | |
11165 | --- a/kernel/debug/kdb/kdb_io.c | |
11166 | +++ b/kernel/debug/kdb/kdb_io.c | |
11167 | @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11168 | int linecount; | |
11169 | int colcount; | |
11170 | int logging, saved_loglevel = 0; | |
11171 | - int saved_trap_printk; | |
11172 | int got_printf_lock = 0; | |
11173 | int retlen = 0; | |
11174 | int fnd, len; | |
11175 | @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11176 | unsigned long uninitialized_var(flags); | |
11177 | ||
11178 | preempt_disable(); | |
11179 | - saved_trap_printk = kdb_trap_printk; | |
11180 | - kdb_trap_printk = 0; | |
11181 | ||
11182 | /* Serialize kdb_printf if multiple cpus try to write at once. | |
11183 | * But if any cpu goes recursive in kdb, just print the output, | |
11184 | @@ -855,7 +852,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11185 | } else { | |
11186 | __release(kdb_printf_lock); | |
11187 | } | |
11188 | - kdb_trap_printk = saved_trap_printk; | |
11189 | preempt_enable(); | |
11190 | return retlen; | |
11191 | } | |
11192 | @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...) | |
11193 | va_list ap; | |
11194 | int r; | |
11195 | ||
11196 | + kdb_trap_printk++; | |
11197 | va_start(ap, fmt); | |
11198 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); | |
11199 | va_end(ap); | |
11200 | + kdb_trap_printk--; | |
11201 | ||
11202 | return r; | |
11203 | } | |
11204 | diff --git a/kernel/events/core.c b/kernel/events/core.c | |
c7c16703 | 11205 | index 02c8421f8c01..3748cb7b2d6e 100644 |
1a6e0f06 JK |
11206 | --- a/kernel/events/core.c |
11207 | +++ b/kernel/events/core.c | |
c7c16703 | 11208 | @@ -1050,6 +1050,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) |
1a6e0f06 JK |
11209 | raw_spin_lock_init(&cpuctx->hrtimer_lock); |
11210 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); | |
11211 | timer->function = perf_mux_hrtimer_handler; | |
11212 | + timer->irqsafe = 1; | |
11213 | } | |
11214 | ||
11215 | static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) | |
c7c16703 | 11216 | @@ -8335,6 +8336,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) |
1a6e0f06 JK |
11217 | |
11218 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
11219 | hwc->hrtimer.function = perf_swevent_hrtimer; | |
11220 | + hwc->hrtimer.irqsafe = 1; | |
11221 | ||
11222 | /* | |
11223 | * Since hrtimers have a fixed rate, we can do a static freq->period | |
11224 | diff --git a/kernel/exit.c b/kernel/exit.c | |
c7c16703 | 11225 | index 3076f3089919..fb2ebcf3ca7c 100644 |
1a6e0f06 JK |
11226 | --- a/kernel/exit.c |
11227 | +++ b/kernel/exit.c | |
11228 | @@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk) | |
11229 | * Do this under ->siglock, we can race with another thread | |
11230 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | |
11231 | */ | |
11232 | - flush_sigqueue(&tsk->pending); | |
11233 | + flush_task_sigqueue(tsk); | |
11234 | tsk->sighand = NULL; | |
11235 | spin_unlock(&sighand->siglock); | |
11236 | ||
11237 | diff --git a/kernel/fork.c b/kernel/fork.c | |
c7c16703 | 11238 | index ba8a01564985..47784f8aed37 100644 |
1a6e0f06 JK |
11239 | --- a/kernel/fork.c |
11240 | +++ b/kernel/fork.c | |
c7c16703 JK |
11241 | @@ -76,6 +76,7 @@ |
11242 | #include <linux/compiler.h> | |
11243 | #include <linux/sysctl.h> | |
11244 | #include <linux/kcov.h> | |
11245 | +#include <linux/kprobes.h> | |
11246 | ||
11247 | #include <asm/pgtable.h> | |
11248 | #include <asm/pgalloc.h> | |
11249 | @@ -376,13 +377,24 @@ static inline void put_signal_struct(struct signal_struct *sig) | |
1a6e0f06 JK |
11250 | if (atomic_dec_and_test(&sig->sigcnt)) |
11251 | free_signal_struct(sig); | |
11252 | } | |
11253 | - | |
11254 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11255 | +static | |
11256 | +#endif | |
11257 | void __put_task_struct(struct task_struct *tsk) | |
11258 | { | |
11259 | WARN_ON(!tsk->exit_state); | |
c7c16703 JK |
11260 | WARN_ON(atomic_read(&tsk->usage)); |
11261 | WARN_ON(tsk == current); | |
11262 | ||
11263 | + /* | |
11264 | + * Remove function-return probe instances associated with this | |
11265 | + * task and put them back on the free list. | |
11266 | + */ | |
11267 | + kprobe_flush_task(tsk); | |
11268 | + | |
11269 | + /* Task is done with its stack. */ | |
11270 | + put_task_stack(tsk); | |
11271 | + | |
11272 | cgroup_free(tsk); | |
11273 | task_numa_free(tsk); | |
11274 | security_task_free(tsk); | |
11275 | @@ -393,7 +405,18 @@ void __put_task_struct(struct task_struct *tsk) | |
1a6e0f06 JK |
11276 | if (!profile_handoff_task(tsk)) |
11277 | free_task(tsk); | |
11278 | } | |
11279 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11280 | EXPORT_SYMBOL_GPL(__put_task_struct); | |
11281 | +#else | |
11282 | +void __put_task_struct_cb(struct rcu_head *rhp) | |
11283 | +{ | |
11284 | + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu); | |
11285 | + | |
11286 | + __put_task_struct(tsk); | |
11287 | + | |
11288 | +} | |
11289 | +EXPORT_SYMBOL_GPL(__put_task_struct_cb); | |
11290 | +#endif | |
11291 | ||
11292 | void __init __weak arch_task_cache_init(void) { } | |
11293 | ||
c7c16703 | 11294 | @@ -852,6 +875,19 @@ void __mmdrop(struct mm_struct *mm) |
1a6e0f06 JK |
11295 | } |
11296 | EXPORT_SYMBOL_GPL(__mmdrop); | |
11297 | ||
11298 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11299 | +/* | |
11300 | + * RCU callback for delayed mm drop. Not strictly rcu, but we don't | |
11301 | + * want another facility to make this work. | |
11302 | + */ | |
11303 | +void __mmdrop_delayed(struct rcu_head *rhp) | |
11304 | +{ | |
11305 | + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); | |
11306 | + | |
11307 | + __mmdrop(mm); | |
11308 | +} | |
11309 | +#endif | |
11310 | + | |
11311 | static inline void __mmput(struct mm_struct *mm) | |
11312 | { | |
11313 | VM_BUG_ON(atomic_read(&mm->mm_users)); | |
c7c16703 | 11314 | @@ -1426,6 +1462,9 @@ static void rt_mutex_init_task(struct task_struct *p) |
1a6e0f06 JK |
11315 | */ |
11316 | static void posix_cpu_timers_init(struct task_struct *tsk) | |
11317 | { | |
11318 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11319 | + tsk->posix_timer_list = NULL; | |
11320 | +#endif | |
11321 | tsk->cputime_expires.prof_exp = 0; | |
11322 | tsk->cputime_expires.virt_exp = 0; | |
11323 | tsk->cputime_expires.sched_exp = 0; | |
c7c16703 | 11324 | @@ -1552,6 +1591,7 @@ static __latent_entropy struct task_struct *copy_process( |
1a6e0f06 JK |
11325 | spin_lock_init(&p->alloc_lock); |
11326 | ||
11327 | init_sigpending(&p->pending); | |
11328 | + p->sigqueue_cache = NULL; | |
11329 | ||
11330 | p->utime = p->stime = p->gtime = 0; | |
11331 | p->utimescaled = p->stimescaled = 0; | |
11332 | diff --git a/kernel/futex.c b/kernel/futex.c | |
c7c16703 | 11333 | index 2c4be467fecd..064917c2d9a5 100644 |
1a6e0f06 JK |
11334 | --- a/kernel/futex.c |
11335 | +++ b/kernel/futex.c | |
c7c16703 | 11336 | @@ -904,7 +904,9 @@ void exit_pi_state_list(struct task_struct *curr) |
1a6e0f06 JK |
11337 | * task still owns the PI-state: |
11338 | */ | |
11339 | if (head->next != next) { | |
11340 | + raw_spin_unlock_irq(&curr->pi_lock); | |
11341 | spin_unlock(&hb->lock); | |
11342 | + raw_spin_lock_irq(&curr->pi_lock); | |
11343 | continue; | |
11344 | } | |
11345 | ||
c7c16703 | 11346 | @@ -1299,6 +1301,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, |
1a6e0f06 JK |
11347 | struct futex_pi_state *pi_state = this->pi_state; |
11348 | u32 uninitialized_var(curval), newval; | |
11349 | WAKE_Q(wake_q); | |
11350 | + WAKE_Q(wake_sleeper_q); | |
11351 | bool deboost; | |
11352 | int ret = 0; | |
11353 | ||
c7c16703 | 11354 | @@ -1365,7 +1368,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, |
1a6e0f06 JK |
11355 | |
11356 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
11357 | ||
11358 | - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); | |
11359 | + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, | |
11360 | + &wake_sleeper_q); | |
11361 | ||
11362 | /* | |
11363 | * First unlock HB so the waiter does not spin on it once he got woken | |
c7c16703 | 11364 | @@ -1373,8 +1377,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, |
1a6e0f06 JK |
11365 | * deboost first (and lose our higher priority), then the task might get |
11366 | * scheduled away before the wake up can take place. | |
11367 | */ | |
11368 | - spin_unlock(&hb->lock); | |
11369 | + deboost |= spin_unlock_no_deboost(&hb->lock); | |
11370 | wake_up_q(&wake_q); | |
11371 | + wake_up_q_sleeper(&wake_sleeper_q); | |
11372 | if (deboost) | |
11373 | rt_mutex_adjust_prio(current); | |
11374 | ||
c7c16703 | 11375 | @@ -1924,6 +1929,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, |
1a6e0f06 JK |
11376 | requeue_pi_wake_futex(this, &key2, hb2); |
11377 | drop_count++; | |
11378 | continue; | |
11379 | + } else if (ret == -EAGAIN) { | |
11380 | + /* | |
11381 | + * Waiter was woken by timeout or | |
11382 | + * signal and has set pi_blocked_on to | |
11383 | + * PI_WAKEUP_INPROGRESS before we | |
11384 | + * tried to enqueue it on the rtmutex. | |
11385 | + */ | |
11386 | + this->pi_state = NULL; | |
11387 | + put_pi_state(pi_state); | |
11388 | + continue; | |
11389 | } else if (ret) { | |
11390 | /* | |
11391 | * rt_mutex_start_proxy_lock() detected a | |
c7c16703 | 11392 | @@ -2814,7 +2829,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1a6e0f06 JK |
11393 | struct hrtimer_sleeper timeout, *to = NULL; |
11394 | struct rt_mutex_waiter rt_waiter; | |
11395 | struct rt_mutex *pi_mutex = NULL; | |
11396 | - struct futex_hash_bucket *hb; | |
11397 | + struct futex_hash_bucket *hb, *hb2; | |
11398 | union futex_key key2 = FUTEX_KEY_INIT; | |
11399 | struct futex_q q = futex_q_init; | |
11400 | int res, ret; | |
c7c16703 | 11401 | @@ -2839,10 +2854,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1a6e0f06 JK |
11402 | * The waiter is allocated on our stack, manipulated by the requeue |
11403 | * code while we sleep on uaddr. | |
11404 | */ | |
11405 | - debug_rt_mutex_init_waiter(&rt_waiter); | |
11406 | - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); | |
11407 | - RB_CLEAR_NODE(&rt_waiter.tree_entry); | |
11408 | - rt_waiter.task = NULL; | |
11409 | + rt_mutex_init_waiter(&rt_waiter, false); | |
11410 | ||
11411 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); | |
11412 | if (unlikely(ret != 0)) | |
c7c16703 | 11413 | @@ -2873,20 +2885,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1a6e0f06 JK |
11414 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ |
11415 | futex_wait_queue_me(hb, &q, to); | |
11416 | ||
11417 | - spin_lock(&hb->lock); | |
11418 | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
11419 | - spin_unlock(&hb->lock); | |
11420 | - if (ret) | |
11421 | - goto out_put_keys; | |
11422 | + /* | |
11423 | + * On RT we must avoid races with requeue and trying to block | |
11424 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by | |
11425 | + * serializing access to pi_blocked_on with pi_lock. | |
11426 | + */ | |
11427 | + raw_spin_lock_irq(¤t->pi_lock); | |
11428 | + if (current->pi_blocked_on) { | |
11429 | + /* | |
11430 | + * We have been requeued or are in the process of | |
11431 | + * being requeued. | |
11432 | + */ | |
11433 | + raw_spin_unlock_irq(¤t->pi_lock); | |
11434 | + } else { | |
11435 | + /* | |
11436 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS | |
11437 | + * prevents a concurrent requeue from moving us to the | |
11438 | + * uaddr2 rtmutex. After that we can safely acquire | |
11439 | + * (and possibly block on) hb->lock. | |
11440 | + */ | |
11441 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; | |
11442 | + raw_spin_unlock_irq(¤t->pi_lock); | |
11443 | + | |
11444 | + spin_lock(&hb->lock); | |
11445 | + | |
11446 | + /* | |
11447 | + * Clean up pi_blocked_on. We might leak it otherwise | |
11448 | + * when we succeeded with the hb->lock in the fast | |
11449 | + * path. | |
11450 | + */ | |
11451 | + raw_spin_lock_irq(¤t->pi_lock); | |
11452 | + current->pi_blocked_on = NULL; | |
11453 | + raw_spin_unlock_irq(¤t->pi_lock); | |
11454 | + | |
11455 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
11456 | + spin_unlock(&hb->lock); | |
11457 | + if (ret) | |
11458 | + goto out_put_keys; | |
11459 | + } | |
11460 | ||
11461 | /* | |
11462 | - * In order for us to be here, we know our q.key == key2, and since | |
11463 | - * we took the hb->lock above, we also know that futex_requeue() has | |
11464 | - * completed and we no longer have to concern ourselves with a wakeup | |
11465 | - * race with the atomic proxy lock acquisition by the requeue code. The | |
11466 | - * futex_requeue dropped our key1 reference and incremented our key2 | |
11467 | - * reference count. | |
11468 | + * In order to be here, we have either been requeued, are in | |
11469 | + * the process of being requeued, or requeue successfully | |
11470 | + * acquired uaddr2 on our behalf. If pi_blocked_on was | |
11471 | + * non-null above, we may be racing with a requeue. Do not | |
11472 | + * rely on q->lock_ptr to be hb2->lock until after blocking on | |
11473 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 | |
11474 | + * reference and incremented our key2 reference count. | |
11475 | */ | |
11476 | + hb2 = hash_futex(&key2); | |
11477 | ||
11478 | /* Check if the requeue code acquired the second futex for us. */ | |
11479 | if (!q.rt_waiter) { | |
c7c16703 | 11480 | @@ -2895,14 +2942,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1a6e0f06 JK |
11481 | * did a lock-steal - fix up the PI-state in that case. |
11482 | */ | |
11483 | if (q.pi_state && (q.pi_state->owner != current)) { | |
11484 | - spin_lock(q.lock_ptr); | |
11485 | + spin_lock(&hb2->lock); | |
11486 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
11487 | ret = fixup_pi_state_owner(uaddr2, &q, current); | |
11488 | /* | |
11489 | * Drop the reference to the pi state which | |
11490 | * the requeue_pi() code acquired for us. | |
11491 | */ | |
11492 | put_pi_state(q.pi_state); | |
11493 | - spin_unlock(q.lock_ptr); | |
11494 | + spin_unlock(&hb2->lock); | |
11495 | } | |
11496 | } else { | |
11497 | /* | |
c7c16703 | 11498 | @@ -2915,7 +2963,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1a6e0f06 JK |
11499 | ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); |
11500 | debug_rt_mutex_free_waiter(&rt_waiter); | |
11501 | ||
11502 | - spin_lock(q.lock_ptr); | |
11503 | + spin_lock(&hb2->lock); | |
11504 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
11505 | /* | |
11506 | * Fixup the pi_state owner and possibly acquire the lock if we | |
11507 | * haven't already. | |
11508 | diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c | |
11509 | index d3f24905852c..f87aa8fdcc51 100644 | |
11510 | --- a/kernel/irq/handle.c | |
11511 | +++ b/kernel/irq/handle.c | |
11512 | @@ -181,10 +181,16 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |
11513 | { | |
11514 | irqreturn_t retval; | |
11515 | unsigned int flags = 0; | |
11516 | + struct pt_regs *regs = get_irq_regs(); | |
11517 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
11518 | ||
11519 | retval = __handle_irq_event_percpu(desc, &flags); | |
11520 | ||
11521 | - add_interrupt_randomness(desc->irq_data.irq, flags); | |
11522 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11523 | + desc->random_ip = ip; | |
11524 | +#else | |
11525 | + add_interrupt_randomness(desc->irq_data.irq, flags, ip); | |
11526 | +#endif | |
11527 | ||
11528 | if (!noirqdebug) | |
11529 | note_interrupt(desc, retval); | |
11530 | diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c | |
c7c16703 | 11531 | index 6b669593e7eb..e357bf6c59d5 100644 |
1a6e0f06 JK |
11532 | --- a/kernel/irq/manage.c |
11533 | +++ b/kernel/irq/manage.c | |
11534 | @@ -22,6 +22,7 @@ | |
11535 | #include "internals.h" | |
11536 | ||
11537 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
11538 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
11539 | __read_mostly bool force_irqthreads; | |
11540 | ||
11541 | static int __init setup_forced_irqthreads(char *arg) | |
11542 | @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg) | |
11543 | return 0; | |
11544 | } | |
11545 | early_param("threadirqs", setup_forced_irqthreads); | |
11546 | +# endif | |
11547 | #endif | |
11548 | ||
11549 | static void __synchronize_hardirq(struct irq_desc *desc) | |
11550 | @@ -233,7 +235,12 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, | |
11551 | ||
11552 | if (desc->affinity_notify) { | |
11553 | kref_get(&desc->affinity_notify->kref); | |
11554 | + | |
11555 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11556 | + swork_queue(&desc->affinity_notify->swork); | |
11557 | +#else | |
11558 | schedule_work(&desc->affinity_notify->work); | |
11559 | +#endif | |
11560 | } | |
11561 | irqd_set(data, IRQD_AFFINITY_SET); | |
11562 | ||
11563 | @@ -271,10 +278,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | |
11564 | } | |
11565 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | |
11566 | ||
11567 | -static void irq_affinity_notify(struct work_struct *work) | |
11568 | +static void _irq_affinity_notify(struct irq_affinity_notify *notify) | |
11569 | { | |
11570 | - struct irq_affinity_notify *notify = | |
11571 | - container_of(work, struct irq_affinity_notify, work); | |
11572 | struct irq_desc *desc = irq_to_desc(notify->irq); | |
11573 | cpumask_var_t cpumask; | |
11574 | unsigned long flags; | |
11575 | @@ -296,6 +301,35 @@ static void irq_affinity_notify(struct work_struct *work) | |
11576 | kref_put(¬ify->kref, notify->release); | |
11577 | } | |
11578 | ||
11579 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11580 | +static void init_helper_thread(void) | |
11581 | +{ | |
11582 | + static int init_sworker_once; | |
11583 | + | |
11584 | + if (init_sworker_once) | |
11585 | + return; | |
11586 | + if (WARN_ON(swork_get())) | |
11587 | + return; | |
11588 | + init_sworker_once = 1; | |
11589 | +} | |
11590 | + | |
11591 | +static void irq_affinity_notify(struct swork_event *swork) | |
11592 | +{ | |
11593 | + struct irq_affinity_notify *notify = | |
11594 | + container_of(swork, struct irq_affinity_notify, swork); | |
11595 | + _irq_affinity_notify(notify); | |
11596 | +} | |
11597 | + | |
11598 | +#else | |
11599 | + | |
11600 | +static void irq_affinity_notify(struct work_struct *work) | |
11601 | +{ | |
11602 | + struct irq_affinity_notify *notify = | |
11603 | + container_of(work, struct irq_affinity_notify, work); | |
11604 | + _irq_affinity_notify(notify); | |
11605 | +} | |
11606 | +#endif | |
11607 | + | |
11608 | /** | |
11609 | * irq_set_affinity_notifier - control notification of IRQ affinity changes | |
11610 | * @irq: Interrupt for which to enable/disable notification | |
11611 | @@ -324,7 +358,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) | |
11612 | if (notify) { | |
11613 | notify->irq = irq; | |
11614 | kref_init(¬ify->kref); | |
11615 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11616 | + INIT_SWORK(¬ify->swork, irq_affinity_notify); | |
11617 | + init_helper_thread(); | |
11618 | +#else | |
11619 | INIT_WORK(¬ify->work, irq_affinity_notify); | |
11620 | +#endif | |
11621 | } | |
11622 | ||
11623 | raw_spin_lock_irqsave(&desc->lock, flags); | |
11624 | @@ -879,7 +918,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | |
11625 | local_bh_disable(); | |
11626 | ret = action->thread_fn(action->irq, action->dev_id); | |
11627 | irq_finalize_oneshot(desc, action); | |
11628 | - local_bh_enable(); | |
11629 | + /* | |
11630 | + * Interrupts which have real time requirements can be set up | |
11631 | + * to avoid softirq processing in the thread handler. This is | |
11632 | + * safe as these interrupts do not raise soft interrupts. | |
11633 | + */ | |
11634 | + if (irq_settings_no_softirq_call(desc)) | |
11635 | + _local_bh_enable(); | |
11636 | + else | |
11637 | + local_bh_enable(); | |
11638 | return ret; | |
11639 | } | |
11640 | ||
11641 | @@ -976,6 +1023,12 @@ static int irq_thread(void *data) | |
11642 | if (action_ret == IRQ_WAKE_THREAD) | |
11643 | irq_wake_secondary(desc, action); | |
11644 | ||
11645 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11646 | + migrate_disable(); | |
11647 | + add_interrupt_randomness(action->irq, 0, | |
11648 | + desc->random_ip ^ (unsigned long) action); | |
11649 | + migrate_enable(); | |
11650 | +#endif | |
11651 | wake_threads_waitq(desc); | |
11652 | } | |
11653 | ||
11654 | @@ -1336,6 +1389,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |
11655 | irqd_set(&desc->irq_data, IRQD_NO_BALANCING); | |
11656 | } | |
11657 | ||
11658 | + if (new->flags & IRQF_NO_SOFTIRQ_CALL) | |
11659 | + irq_settings_set_no_softirq_call(desc); | |
11660 | + | |
11661 | /* Set default affinity mask once everything is setup */ | |
11662 | setup_affinity(desc, mask); | |
11663 | ||
11664 | @@ -2061,7 +2117,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); | |
11665 | * This call sets the internal irqchip state of an interrupt, | |
11666 | * depending on the value of @which. | |
11667 | * | |
11668 | - * This function should be called with preemption disabled if the | |
11669 | + * This function should be called with migration disabled if the | |
11670 | * interrupt controller has per-cpu registers. | |
11671 | */ | |
11672 | int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
11673 | diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h | |
11674 | index 320579d89091..2df2d4445b1e 100644 | |
11675 | --- a/kernel/irq/settings.h | |
11676 | +++ b/kernel/irq/settings.h | |
11677 | @@ -16,6 +16,7 @@ enum { | |
11678 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | |
11679 | _IRQ_IS_POLLED = IRQ_IS_POLLED, | |
11680 | _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, | |
11681 | + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL, | |
11682 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | |
11683 | }; | |
11684 | ||
11685 | @@ -30,6 +31,7 @@ enum { | |
11686 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | |
11687 | #define IRQ_IS_POLLED GOT_YOU_MORON | |
11688 | #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON | |
11689 | +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON | |
11690 | #undef IRQF_MODIFY_MASK | |
11691 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | |
11692 | ||
11693 | @@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) | |
11694 | desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); | |
11695 | } | |
11696 | ||
11697 | +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc) | |
11698 | +{ | |
11699 | + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL; | |
11700 | +} | |
11701 | + | |
11702 | +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc) | |
11703 | +{ | |
11704 | + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL; | |
11705 | +} | |
11706 | + | |
11707 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) | |
11708 | { | |
11709 | return desc->status_use_accessors & _IRQ_PER_CPU; | |
11710 | diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c | |
11711 | index 5707f97a3e6a..73f38dc7a7fb 100644 | |
11712 | --- a/kernel/irq/spurious.c | |
11713 | +++ b/kernel/irq/spurious.c | |
11714 | @@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); | |
11715 | ||
11716 | static int __init irqfixup_setup(char *str) | |
11717 | { | |
11718 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11719 | + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
11720 | + return 1; | |
11721 | +#endif | |
11722 | irqfixup = 1; | |
11723 | printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); | |
11724 | printk(KERN_WARNING "This may impact system performance.\n"); | |
11725 | @@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644); | |
11726 | ||
11727 | static int __init irqpoll_setup(char *str) | |
11728 | { | |
11729 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11730 | + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
11731 | + return 1; | |
11732 | +#endif | |
11733 | irqfixup = 2; | |
11734 | printk(KERN_WARNING "Misrouted IRQ fixup and polling support " | |
11735 | "enabled\n"); | |
11736 | diff --git a/kernel/irq_work.c b/kernel/irq_work.c | |
11737 | index bcf107ce0854..2899ba0d23d1 100644 | |
11738 | --- a/kernel/irq_work.c | |
11739 | +++ b/kernel/irq_work.c | |
11740 | @@ -17,6 +17,7 @@ | |
11741 | #include <linux/cpu.h> | |
11742 | #include <linux/notifier.h> | |
11743 | #include <linux/smp.h> | |
11744 | +#include <linux/interrupt.h> | |
11745 | #include <asm/processor.h> | |
11746 | ||
11747 | ||
11748 | @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void) | |
11749 | */ | |
11750 | bool irq_work_queue_on(struct irq_work *work, int cpu) | |
11751 | { | |
11752 | + struct llist_head *list; | |
11753 | + | |
11754 | /* All work should have been flushed before going offline */ | |
11755 | WARN_ON_ONCE(cpu_is_offline(cpu)); | |
11756 | ||
11757 | @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) | |
11758 | if (!irq_work_claim(work)) | |
11759 | return false; | |
11760 | ||
11761 | - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | |
11762 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) | |
11763 | + list = &per_cpu(lazy_list, cpu); | |
11764 | + else | |
11765 | + list = &per_cpu(raised_list, cpu); | |
11766 | + | |
11767 | + if (llist_add(&work->llnode, list)) | |
11768 | arch_send_call_function_single_ipi(cpu); | |
11769 | ||
11770 | return true; | |
11771 | @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on); | |
11772 | /* Enqueue the irq work @work on the current CPU */ | |
11773 | bool irq_work_queue(struct irq_work *work) | |
11774 | { | |
11775 | + struct llist_head *list; | |
11776 | + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
11777 | + | |
11778 | /* Only queue if not already pending */ | |
11779 | if (!irq_work_claim(work)) | |
11780 | return false; | |
11781 | @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work) | |
11782 | /* Queue the entry and raise the IPI if needed. */ | |
11783 | preempt_disable(); | |
11784 | ||
11785 | - /* If the work is "lazy", handle it from next tick if any */ | |
11786 | - if (work->flags & IRQ_WORK_LAZY) { | |
11787 | - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && | |
11788 | - tick_nohz_tick_stopped()) | |
11789 | - arch_irq_work_raise(); | |
11790 | - } else { | |
11791 | - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) | |
11792 | + lazy_work = work->flags & IRQ_WORK_LAZY; | |
11793 | + | |
11794 | + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) | |
11795 | + list = this_cpu_ptr(&lazy_list); | |
11796 | + else | |
11797 | + list = this_cpu_ptr(&raised_list); | |
11798 | + | |
11799 | + if (llist_add(&work->llnode, list)) { | |
11800 | + if (!lazy_work || tick_nohz_tick_stopped()) | |
11801 | arch_irq_work_raise(); | |
11802 | } | |
11803 | ||
11804 | @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void) | |
11805 | raised = this_cpu_ptr(&raised_list); | |
11806 | lazy = this_cpu_ptr(&lazy_list); | |
11807 | ||
11808 | - if (llist_empty(raised) || arch_irq_work_has_interrupt()) | |
11809 | - if (llist_empty(lazy)) | |
11810 | - return false; | |
11811 | + if (llist_empty(raised) && llist_empty(lazy)) | |
11812 | + return false; | |
11813 | ||
11814 | /* All work should have been flushed before going offline */ | |
11815 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | |
11816 | @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list) | |
11817 | struct irq_work *work; | |
11818 | struct llist_node *llnode; | |
11819 | ||
11820 | - BUG_ON(!irqs_disabled()); | |
11821 | + BUG_ON_NONRT(!irqs_disabled()); | |
11822 | ||
11823 | if (llist_empty(list)) | |
11824 | return; | |
11825 | @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list) | |
11826 | void irq_work_run(void) | |
11827 | { | |
11828 | irq_work_run_list(this_cpu_ptr(&raised_list)); | |
11829 | - irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
11830 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { | |
11831 | + /* | |
11832 | + * NOTE: we raise softirq via IPI for safety, | |
11833 | + * and execute in irq_work_tick() to move the | |
11834 | + * overhead from hard to soft irq context. | |
11835 | + */ | |
11836 | + if (!llist_empty(this_cpu_ptr(&lazy_list))) | |
11837 | + raise_softirq(TIMER_SOFTIRQ); | |
11838 | + } else | |
11839 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
11840 | } | |
11841 | EXPORT_SYMBOL_GPL(irq_work_run); | |
11842 | ||
11843 | @@ -179,8 +200,17 @@ void irq_work_tick(void) | |
11844 | ||
11845 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) | |
11846 | irq_work_run_list(raised); | |
11847 | + | |
11848 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) | |
11849 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
11850 | +} | |
11851 | + | |
11852 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
11853 | +void irq_work_tick_soft(void) | |
11854 | +{ | |
11855 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
11856 | } | |
11857 | +#endif | |
11858 | ||
11859 | /* | |
11860 | * Synchronize against the irq_work @entry, ensures the entry is not | |
11861 | diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c | |
11862 | index ee1bc1bb8feb..ddef07958840 100644 | |
11863 | --- a/kernel/ksysfs.c | |
11864 | +++ b/kernel/ksysfs.c | |
11865 | @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo); | |
11866 | ||
11867 | #endif /* CONFIG_KEXEC_CORE */ | |
11868 | ||
11869 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
11870 | +static ssize_t realtime_show(struct kobject *kobj, | |
11871 | + struct kobj_attribute *attr, char *buf) | |
11872 | +{ | |
11873 | + return sprintf(buf, "%d\n", 1); | |
11874 | +} | |
11875 | +KERNEL_ATTR_RO(realtime); | |
11876 | +#endif | |
11877 | + | |
11878 | /* whether file capabilities are enabled */ | |
11879 | static ssize_t fscaps_show(struct kobject *kobj, | |
11880 | struct kobj_attribute *attr, char *buf) | |
11881 | @@ -225,6 +234,9 @@ static struct attribute * kernel_attrs[] = { | |
11882 | &rcu_expedited_attr.attr, | |
11883 | &rcu_normal_attr.attr, | |
11884 | #endif | |
11885 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11886 | + &realtime_attr.attr, | |
11887 | +#endif | |
11888 | NULL | |
11889 | }; | |
11890 | ||
11891 | diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile | |
c7c16703 | 11892 | index 6f88e352cd4f..5e27fb1079e7 100644 |
1a6e0f06 JK |
11893 | --- a/kernel/locking/Makefile |
11894 | +++ b/kernel/locking/Makefile | |
11895 | @@ -2,7 +2,7 @@ | |
11896 | # and is generally not a function of system call inputs. | |
11897 | KCOV_INSTRUMENT := n | |
11898 | ||
11899 | -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o | |
11900 | +obj-y += semaphore.o percpu-rwsem.o | |
11901 | ||
11902 | ifdef CONFIG_FUNCTION_TRACER | |
11903 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | |
11904 | @@ -11,7 +11,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) | |
11905 | CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) | |
11906 | endif | |
11907 | ||
11908 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
11909 | +obj-y += mutex.o | |
11910 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | |
11911 | +obj-y += rwsem.o | |
11912 | +endif | |
11913 | obj-$(CONFIG_LOCKDEP) += lockdep.o | |
11914 | ifeq ($(CONFIG_PROC_FS),y) | |
11915 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | |
c7c16703 | 11916 | @@ -24,7 +28,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o |
1a6e0f06 JK |
11917 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
11918 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |
11919 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |
11920 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
11921 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | |
11922 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | |
11923 | +endif | |
11924 | +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o | |
11925 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o | |
11926 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | |
1a6e0f06 | 11927 | diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c |
c7c16703 | 11928 | index 4d7ffc0a0d00..9e52009c192e 100644 |
1a6e0f06 JK |
11929 | --- a/kernel/locking/lockdep.c |
11930 | +++ b/kernel/locking/lockdep.c | |
c7c16703 | 11931 | @@ -3689,6 +3689,7 @@ static void check_flags(unsigned long flags) |
1a6e0f06 JK |
11932 | } |
11933 | } | |
11934 | ||
11935 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
11936 | /* | |
11937 | * We dont accurately track softirq state in e.g. | |
11938 | * hardirq contexts (such as on 4KSTACKS), so only | |
c7c16703 | 11939 | @@ -3703,6 +3704,7 @@ static void check_flags(unsigned long flags) |
1a6e0f06 JK |
11940 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); |
11941 | } | |
11942 | } | |
11943 | +#endif | |
11944 | ||
11945 | if (!debug_locks) | |
11946 | print_irqtrace_events(current); | |
11947 | diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c | |
11948 | index f8c5af52a131..788068773e61 100644 | |
11949 | --- a/kernel/locking/locktorture.c | |
11950 | +++ b/kernel/locking/locktorture.c | |
11951 | @@ -26,7 +26,6 @@ | |
11952 | #include <linux/kthread.h> | |
11953 | #include <linux/sched/rt.h> | |
11954 | #include <linux/spinlock.h> | |
11955 | -#include <linux/rwlock.h> | |
11956 | #include <linux/mutex.h> | |
11957 | #include <linux/rwsem.h> | |
11958 | #include <linux/smp.h> | |
c7c16703 JK |
11959 | diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c |
11960 | index ce182599cf2e..2ad3a1e8344c 100644 | |
11961 | --- a/kernel/locking/percpu-rwsem.c | |
11962 | +++ b/kernel/locking/percpu-rwsem.c | |
11963 | @@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, | |
11964 | /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ | |
11965 | rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); | |
11966 | __init_rwsem(&sem->rw_sem, name, rwsem_key); | |
11967 | - init_waitqueue_head(&sem->writer); | |
11968 | + init_swait_queue_head(&sem->writer); | |
11969 | sem->readers_block = 0; | |
11970 | return 0; | |
11971 | } | |
11972 | @@ -103,7 +103,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem) | |
11973 | __this_cpu_dec(*sem->read_count); | |
11974 | ||
11975 | /* Prod writer to recheck readers_active */ | |
11976 | - wake_up(&sem->writer); | |
11977 | + swake_up(&sem->writer); | |
11978 | } | |
11979 | EXPORT_SYMBOL_GPL(__percpu_up_read); | |
11980 | ||
11981 | @@ -160,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem) | |
11982 | */ | |
11983 | ||
11984 | /* Wait for all now active readers to complete. */ | |
11985 | - wait_event(sem->writer, readers_active_check(sem)); | |
11986 | + swait_event(sem->writer, readers_active_check(sem)); | |
11987 | } | |
11988 | EXPORT_SYMBOL_GPL(percpu_down_write); | |
11989 | ||
1a6e0f06 JK |
11990 | diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c |
11991 | new file mode 100644 | |
11992 | index 000000000000..665754c00e1e | |
11993 | --- /dev/null | |
11994 | +++ b/kernel/locking/rt.c | |
11995 | @@ -0,0 +1,498 @@ | |
11996 | +/* | |
11997 | + * kernel/rt.c | |
11998 | + * | |
11999 | + * Real-Time Preemption Support | |
12000 | + * | |
12001 | + * started by Ingo Molnar: | |
12002 | + * | |
12003 | + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
12004 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
12005 | + * | |
12006 | + * historic credit for proving that Linux spinlocks can be implemented via | |
12007 | + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow | |
12008 | + * and others) who prototyped it on 2.4 and did lots of comparative | |
12009 | + * research and analysis; TimeSys, for proving that you can implement a | |
12010 | + * fully preemptible kernel via the use of IRQ threading and mutexes; | |
12011 | + * Bill Huey for persuasively arguing on lkml that the mutex model is the | |
12012 | + * right one; and to MontaVista, who ported pmutexes to 2.6. | |
12013 | + * | |
12014 | + * This code is a from-scratch implementation and is not based on pmutexes, | |
12015 | + * but the idea of converting spinlocks to mutexes is used here too. | |
12016 | + * | |
12017 | + * lock debugging, locking tree, deadlock detection: | |
12018 | + * | |
12019 | + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey | |
12020 | + * Released under the General Public License (GPL). | |
12021 | + * | |
12022 | + * Includes portions of the generic R/W semaphore implementation from: | |
12023 | + * | |
12024 | + * Copyright (c) 2001 David Howells (dhowells@redhat.com). | |
12025 | + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | |
12026 | + * - Derived also from comments by Linus | |
12027 | + * | |
12028 | + * Pending ownership of locks and ownership stealing: | |
12029 | + * | |
12030 | + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt | |
12031 | + * | |
12032 | + * (also by Steven Rostedt) | |
12033 | + * - Converted single pi_lock to individual task locks. | |
12034 | + * | |
12035 | + * By Esben Nielsen: | |
12036 | + * Doing priority inheritance with help of the scheduler. | |
12037 | + * | |
12038 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
12039 | + * - major rework based on Esben Nielsens initial patch | |
12040 | + * - replaced thread_info references by task_struct refs | |
12041 | + * - removed task->pending_owner dependency | |
12042 | + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks | |
12043 | + * in the scheduler return path as discussed with Steven Rostedt | |
12044 | + * | |
12045 | + * Copyright (C) 2006, Kihon Technologies Inc. | |
12046 | + * Steven Rostedt <rostedt@goodmis.org> | |
12047 | + * - debugged and patched Thomas Gleixner's rework. | |
12048 | + * - added back the cmpxchg to the rework. | |
12049 | + * - turned atomic require back on for SMP. | |
12050 | + */ | |
12051 | + | |
12052 | +#include <linux/spinlock.h> | |
12053 | +#include <linux/rtmutex.h> | |
12054 | +#include <linux/sched.h> | |
12055 | +#include <linux/delay.h> | |
12056 | +#include <linux/module.h> | |
12057 | +#include <linux/kallsyms.h> | |
12058 | +#include <linux/syscalls.h> | |
12059 | +#include <linux/interrupt.h> | |
12060 | +#include <linux/plist.h> | |
12061 | +#include <linux/fs.h> | |
12062 | +#include <linux/futex.h> | |
12063 | +#include <linux/hrtimer.h> | |
12064 | + | |
12065 | +#include "rtmutex_common.h" | |
12066 | + | |
12067 | +/* | |
12068 | + * struct mutex functions | |
12069 | + */ | |
12070 | +void __mutex_do_init(struct mutex *mutex, const char *name, | |
12071 | + struct lock_class_key *key) | |
12072 | +{ | |
12073 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12074 | + /* | |
12075 | + * Make sure we are not reinitializing a held lock: | |
12076 | + */ | |
12077 | + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); | |
12078 | + lockdep_init_map(&mutex->dep_map, name, key, 0); | |
12079 | +#endif | |
12080 | + mutex->lock.save_state = 0; | |
12081 | +} | |
12082 | +EXPORT_SYMBOL(__mutex_do_init); | |
12083 | + | |
12084 | +void __lockfunc _mutex_lock(struct mutex *lock) | |
12085 | +{ | |
12086 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12087 | + rt_mutex_lock(&lock->lock); | |
12088 | +} | |
12089 | +EXPORT_SYMBOL(_mutex_lock); | |
12090 | + | |
12091 | +int __lockfunc _mutex_lock_interruptible(struct mutex *lock) | |
12092 | +{ | |
12093 | + int ret; | |
12094 | + | |
12095 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12096 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
12097 | + if (ret) | |
12098 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12099 | + return ret; | |
12100 | +} | |
12101 | +EXPORT_SYMBOL(_mutex_lock_interruptible); | |
12102 | + | |
12103 | +int __lockfunc _mutex_lock_killable(struct mutex *lock) | |
12104 | +{ | |
12105 | + int ret; | |
12106 | + | |
12107 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12108 | + ret = rt_mutex_lock_killable(&lock->lock); | |
12109 | + if (ret) | |
12110 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12111 | + return ret; | |
12112 | +} | |
12113 | +EXPORT_SYMBOL(_mutex_lock_killable); | |
12114 | + | |
12115 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12116 | +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) | |
12117 | +{ | |
12118 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
12119 | + rt_mutex_lock(&lock->lock); | |
12120 | +} | |
12121 | +EXPORT_SYMBOL(_mutex_lock_nested); | |
12122 | + | |
12123 | +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | |
12124 | +{ | |
12125 | + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); | |
12126 | + rt_mutex_lock(&lock->lock); | |
12127 | +} | |
12128 | +EXPORT_SYMBOL(_mutex_lock_nest_lock); | |
12129 | + | |
12130 | +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) | |
12131 | +{ | |
12132 | + int ret; | |
12133 | + | |
12134 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
12135 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
12136 | + if (ret) | |
12137 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12138 | + return ret; | |
12139 | +} | |
12140 | +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); | |
12141 | + | |
12142 | +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) | |
12143 | +{ | |
12144 | + int ret; | |
12145 | + | |
12146 | + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
12147 | + ret = rt_mutex_lock_killable(&lock->lock); | |
12148 | + if (ret) | |
12149 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12150 | + return ret; | |
12151 | +} | |
12152 | +EXPORT_SYMBOL(_mutex_lock_killable_nested); | |
12153 | +#endif | |
12154 | + | |
12155 | +int __lockfunc _mutex_trylock(struct mutex *lock) | |
12156 | +{ | |
12157 | + int ret = rt_mutex_trylock(&lock->lock); | |
12158 | + | |
12159 | + if (ret) | |
12160 | + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
12161 | + | |
12162 | + return ret; | |
12163 | +} | |
12164 | +EXPORT_SYMBOL(_mutex_trylock); | |
12165 | + | |
12166 | +void __lockfunc _mutex_unlock(struct mutex *lock) | |
12167 | +{ | |
12168 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12169 | + rt_mutex_unlock(&lock->lock); | |
12170 | +} | |
12171 | +EXPORT_SYMBOL(_mutex_unlock); | |
12172 | + | |
12173 | +/* | |
12174 | + * rwlock_t functions | |
12175 | + */ | |
12176 | +int __lockfunc rt_write_trylock(rwlock_t *rwlock) | |
12177 | +{ | |
12178 | + int ret; | |
12179 | + | |
12180 | + migrate_disable(); | |
12181 | + ret = rt_mutex_trylock(&rwlock->lock); | |
12182 | + if (ret) | |
12183 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
12184 | + else | |
12185 | + migrate_enable(); | |
12186 | + | |
12187 | + return ret; | |
12188 | +} | |
12189 | +EXPORT_SYMBOL(rt_write_trylock); | |
12190 | + | |
12191 | +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) | |
12192 | +{ | |
12193 | + int ret; | |
12194 | + | |
12195 | + *flags = 0; | |
12196 | + ret = rt_write_trylock(rwlock); | |
12197 | + return ret; | |
12198 | +} | |
12199 | +EXPORT_SYMBOL(rt_write_trylock_irqsave); | |
12200 | + | |
12201 | +int __lockfunc rt_read_trylock(rwlock_t *rwlock) | |
12202 | +{ | |
12203 | + struct rt_mutex *lock = &rwlock->lock; | |
12204 | + int ret = 1; | |
12205 | + | |
12206 | + /* | |
12207 | + * recursive read locks succeed when current owns the lock, | |
12208 | + * but not when read_depth == 0 which means that the lock is | |
12209 | + * write locked. | |
12210 | + */ | |
12211 | + if (rt_mutex_owner(lock) != current) { | |
12212 | + migrate_disable(); | |
12213 | + ret = rt_mutex_trylock(lock); | |
12214 | + if (ret) | |
12215 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
12216 | + else | |
12217 | + migrate_enable(); | |
12218 | + | |
12219 | + } else if (!rwlock->read_depth) { | |
12220 | + ret = 0; | |
12221 | + } | |
12222 | + | |
12223 | + if (ret) | |
12224 | + rwlock->read_depth++; | |
12225 | + | |
12226 | + return ret; | |
12227 | +} | |
12228 | +EXPORT_SYMBOL(rt_read_trylock); | |
12229 | + | |
12230 | +void __lockfunc rt_write_lock(rwlock_t *rwlock) | |
12231 | +{ | |
12232 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
12233 | + __rt_spin_lock(&rwlock->lock); | |
12234 | +} | |
12235 | +EXPORT_SYMBOL(rt_write_lock); | |
12236 | + | |
12237 | +void __lockfunc rt_read_lock(rwlock_t *rwlock) | |
12238 | +{ | |
12239 | + struct rt_mutex *lock = &rwlock->lock; | |
12240 | + | |
12241 | + | |
12242 | + /* | |
12243 | + * recursive read locks succeed when current owns the lock | |
12244 | + */ | |
12245 | + if (rt_mutex_owner(lock) != current) { | |
12246 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
12247 | + __rt_spin_lock(lock); | |
12248 | + } | |
12249 | + rwlock->read_depth++; | |
12250 | +} | |
12251 | + | |
12252 | +EXPORT_SYMBOL(rt_read_lock); | |
12253 | + | |
12254 | +void __lockfunc rt_write_unlock(rwlock_t *rwlock) | |
12255 | +{ | |
12256 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
12257 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
12258 | + __rt_spin_unlock(&rwlock->lock); | |
12259 | + migrate_enable(); | |
12260 | +} | |
12261 | +EXPORT_SYMBOL(rt_write_unlock); | |
12262 | + | |
12263 | +void __lockfunc rt_read_unlock(rwlock_t *rwlock) | |
12264 | +{ | |
12265 | + /* Release the lock only when read_depth is down to 0 */ | |
12266 | + if (--rwlock->read_depth == 0) { | |
12267 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
12268 | + __rt_spin_unlock(&rwlock->lock); | |
12269 | + migrate_enable(); | |
12270 | + } | |
12271 | +} | |
12272 | +EXPORT_SYMBOL(rt_read_unlock); | |
12273 | + | |
12274 | +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock) | |
12275 | +{ | |
12276 | + rt_write_lock(rwlock); | |
12277 | + | |
12278 | + return 0; | |
12279 | +} | |
12280 | +EXPORT_SYMBOL(rt_write_lock_irqsave); | |
12281 | + | |
12282 | +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock) | |
12283 | +{ | |
12284 | + rt_read_lock(rwlock); | |
12285 | + | |
12286 | + return 0; | |
12287 | +} | |
12288 | +EXPORT_SYMBOL(rt_read_lock_irqsave); | |
12289 | + | |
12290 | +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) | |
12291 | +{ | |
12292 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12293 | + /* | |
12294 | + * Make sure we are not reinitializing a held lock: | |
12295 | + */ | |
12296 | + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); | |
12297 | + lockdep_init_map(&rwlock->dep_map, name, key, 0); | |
12298 | +#endif | |
12299 | + rwlock->lock.save_state = 1; | |
12300 | + rwlock->read_depth = 0; | |
12301 | +} | |
12302 | +EXPORT_SYMBOL(__rt_rwlock_init); | |
12303 | + | |
12304 | +/* | |
12305 | + * rw_semaphores | |
12306 | + */ | |
12307 | + | |
12308 | +void rt_up_write(struct rw_semaphore *rwsem) | |
12309 | +{ | |
12310 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12311 | + rt_mutex_unlock(&rwsem->lock); | |
12312 | +} | |
12313 | +EXPORT_SYMBOL(rt_up_write); | |
12314 | + | |
12315 | +void __rt_up_read(struct rw_semaphore *rwsem) | |
12316 | +{ | |
12317 | + if (--rwsem->read_depth == 0) | |
12318 | + rt_mutex_unlock(&rwsem->lock); | |
12319 | +} | |
12320 | + | |
12321 | +void rt_up_read(struct rw_semaphore *rwsem) | |
12322 | +{ | |
12323 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12324 | + __rt_up_read(rwsem); | |
12325 | +} | |
12326 | +EXPORT_SYMBOL(rt_up_read); | |
12327 | + | |
12328 | +/* | |
12329 | + * downgrade a write lock into a read lock | |
12330 | + * - just wake up any readers at the front of the queue | |
12331 | + */ | |
12332 | +void rt_downgrade_write(struct rw_semaphore *rwsem) | |
12333 | +{ | |
12334 | + BUG_ON(rt_mutex_owner(&rwsem->lock) != current); | |
12335 | + rwsem->read_depth = 1; | |
12336 | +} | |
12337 | +EXPORT_SYMBOL(rt_downgrade_write); | |
12338 | + | |
12339 | +int rt_down_write_trylock(struct rw_semaphore *rwsem) | |
12340 | +{ | |
12341 | + int ret = rt_mutex_trylock(&rwsem->lock); | |
12342 | + | |
12343 | + if (ret) | |
12344 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
12345 | + return ret; | |
12346 | +} | |
12347 | +EXPORT_SYMBOL(rt_down_write_trylock); | |
12348 | + | |
12349 | +void rt_down_write(struct rw_semaphore *rwsem) | |
12350 | +{ | |
12351 | + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); | |
12352 | + rt_mutex_lock(&rwsem->lock); | |
12353 | +} | |
12354 | +EXPORT_SYMBOL(rt_down_write); | |
12355 | + | |
12356 | +int rt_down_write_killable(struct rw_semaphore *rwsem) | |
12357 | +{ | |
12358 | + int ret; | |
12359 | + | |
12360 | + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); | |
12361 | + ret = rt_mutex_lock_killable(&rwsem->lock); | |
12362 | + if (ret) | |
12363 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12364 | + return ret; | |
12365 | +} | |
12366 | +EXPORT_SYMBOL(rt_down_write_killable); | |
12367 | + | |
12368 | +int rt_down_write_killable_nested(struct rw_semaphore *rwsem, int subclass) | |
12369 | +{ | |
12370 | + int ret; | |
12371 | + | |
12372 | + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
12373 | + ret = rt_mutex_lock_killable(&rwsem->lock); | |
12374 | + if (ret) | |
12375 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12376 | + return ret; | |
12377 | +} | |
12378 | +EXPORT_SYMBOL(rt_down_write_killable_nested); | |
12379 | + | |
12380 | +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) | |
12381 | +{ | |
12382 | + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
12383 | + rt_mutex_lock(&rwsem->lock); | |
12384 | +} | |
12385 | +EXPORT_SYMBOL(rt_down_write_nested); | |
12386 | + | |
12387 | +void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
12388 | + struct lockdep_map *nest) | |
12389 | +{ | |
12390 | + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_); | |
12391 | + rt_mutex_lock(&rwsem->lock); | |
12392 | +} | |
12393 | +EXPORT_SYMBOL(rt_down_write_nested_lock); | |
12394 | + | |
12395 | +int rt__down_read_trylock(struct rw_semaphore *rwsem) | |
12396 | +{ | |
12397 | + struct rt_mutex *lock = &rwsem->lock; | |
12398 | + int ret = 1; | |
12399 | + | |
12400 | + /* | |
12401 | + * recursive read locks succeed when current owns the rwsem, | |
12402 | + * but not when read_depth == 0 which means that the rwsem is | |
12403 | + * write locked. | |
12404 | + */ | |
12405 | + if (rt_mutex_owner(lock) != current) | |
12406 | + ret = rt_mutex_trylock(&rwsem->lock); | |
12407 | + else if (!rwsem->read_depth) | |
12408 | + ret = 0; | |
12409 | + | |
12410 | + if (ret) | |
12411 | + rwsem->read_depth++; | |
12412 | + return ret; | |
12413 | + | |
12414 | +} | |
12415 | + | |
12416 | +int rt_down_read_trylock(struct rw_semaphore *rwsem) | |
12417 | +{ | |
12418 | + int ret; | |
12419 | + | |
12420 | + ret = rt__down_read_trylock(rwsem); | |
12421 | + if (ret) | |
12422 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
12423 | + | |
12424 | + return ret; | |
12425 | +} | |
12426 | +EXPORT_SYMBOL(rt_down_read_trylock); | |
12427 | + | |
12428 | +void rt__down_read(struct rw_semaphore *rwsem) | |
12429 | +{ | |
12430 | + struct rt_mutex *lock = &rwsem->lock; | |
12431 | + | |
12432 | + if (rt_mutex_owner(lock) != current) | |
12433 | + rt_mutex_lock(&rwsem->lock); | |
12434 | + rwsem->read_depth++; | |
12435 | +} | |
12436 | +EXPORT_SYMBOL(rt__down_read); | |
12437 | + | |
12438 | +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) | |
12439 | +{ | |
12440 | + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
12441 | + rt__down_read(rwsem); | |
12442 | +} | |
12443 | + | |
12444 | +void rt_down_read(struct rw_semaphore *rwsem) | |
12445 | +{ | |
12446 | + __rt_down_read(rwsem, 0); | |
12447 | +} | |
12448 | +EXPORT_SYMBOL(rt_down_read); | |
12449 | + | |
12450 | +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass) | |
12451 | +{ | |
12452 | + __rt_down_read(rwsem, subclass); | |
12453 | +} | |
12454 | +EXPORT_SYMBOL(rt_down_read_nested); | |
12455 | + | |
12456 | +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
12457 | + struct lock_class_key *key) | |
12458 | +{ | |
12459 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12460 | + /* | |
12461 | + * Make sure we are not reinitializing a held lock: | |
12462 | + */ | |
12463 | + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); | |
12464 | + lockdep_init_map(&rwsem->dep_map, name, key, 0); | |
12465 | +#endif | |
12466 | + rwsem->read_depth = 0; | |
12467 | + rwsem->lock.save_state = 0; | |
12468 | +} | |
12469 | +EXPORT_SYMBOL(__rt_rwsem_init); | |
12470 | + | |
12471 | +/** | |
12472 | + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | |
12473 | + * @cnt: the atomic which we are to dec | |
12474 | + * @lock: the mutex to return holding if we dec to 0 | |
12475 | + * | |
12476 | + * return true and hold lock if we dec to 0, return false otherwise | |
12477 | + */ | |
12478 | +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |
12479 | +{ | |
12480 | + /* dec if we can't possibly hit 0 */ | |
12481 | + if (atomic_add_unless(cnt, -1, 1)) | |
12482 | + return 0; | |
12483 | + /* we might hit 0, so take the lock */ | |
12484 | + mutex_lock(lock); | |
12485 | + if (!atomic_dec_and_test(cnt)) { | |
12486 | + /* when we actually did the dec, we didn't hit 0 */ | |
12487 | + mutex_unlock(lock); | |
12488 | + return 0; | |
12489 | + } | |
12490 | + /* we hit 0, and we hold the lock */ | |
12491 | + return 1; | |
12492 | +} | |
12493 | +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | |
12494 | diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c | |
c7c16703 | 12495 | index 2c49d76f96c3..4f1a7663c34d 100644 |
1a6e0f06 JK |
12496 | --- a/kernel/locking/rtmutex.c |
12497 | +++ b/kernel/locking/rtmutex.c | |
12498 | @@ -7,6 +7,11 @@ | |
12499 | * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
12500 | * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt | |
12501 | * Copyright (C) 2006 Esben Nielsen | |
12502 | + * Adaptive Spinlocks: | |
12503 | + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, | |
12504 | + * and Peter Morreale, | |
12505 | + * Adaptive Spinlocks simplification: | |
12506 | + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> | |
12507 | * | |
12508 | * See Documentation/locking/rt-mutex-design.txt for details. | |
12509 | */ | |
12510 | @@ -16,6 +21,7 @@ | |
12511 | #include <linux/sched/rt.h> | |
12512 | #include <linux/sched/deadline.h> | |
12513 | #include <linux/timer.h> | |
12514 | +#include <linux/ww_mutex.h> | |
12515 | ||
12516 | #include "rtmutex_common.h" | |
12517 | ||
c7c16703 JK |
12518 | @@ -133,6 +139,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) |
12519 | WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); | |
1a6e0f06 JK |
12520 | } |
12521 | ||
12522 | +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) | |
12523 | +{ | |
12524 | + return waiter && waiter != PI_WAKEUP_INPROGRESS && | |
12525 | + waiter != PI_REQUEUE_INPROGRESS; | |
12526 | +} | |
12527 | + | |
12528 | /* | |
12529 | * We can speed up the acquire/release, if there's no debugging state to be | |
12530 | * set up. | |
c7c16703 | 12531 | @@ -414,6 +426,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, |
1a6e0f06 JK |
12532 | return debug_rt_mutex_detect_deadlock(waiter, chwalk); |
12533 | } | |
12534 | ||
12535 | +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) | |
12536 | +{ | |
12537 | + if (waiter->savestate) | |
12538 | + wake_up_lock_sleeper(waiter->task); | |
12539 | + else | |
12540 | + wake_up_process(waiter->task); | |
12541 | +} | |
12542 | + | |
12543 | /* | |
12544 | * Max number of times we'll walk the boosting chain: | |
12545 | */ | |
c7c16703 | 12546 | @@ -421,7 +441,8 @@ int max_lock_depth = 1024; |
1a6e0f06 JK |
12547 | |
12548 | static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) | |
12549 | { | |
12550 | - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; | |
12551 | + return rt_mutex_real_waiter(p->pi_blocked_on) ? | |
12552 | + p->pi_blocked_on->lock : NULL; | |
12553 | } | |
12554 | ||
12555 | /* | |
c7c16703 | 12556 | @@ -557,7 +578,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
1a6e0f06 JK |
12557 | * reached or the state of the chain has changed while we |
12558 | * dropped the locks. | |
12559 | */ | |
12560 | - if (!waiter) | |
12561 | + if (!rt_mutex_real_waiter(waiter)) | |
12562 | goto out_unlock_pi; | |
12563 | ||
12564 | /* | |
c7c16703 | 12565 | @@ -719,13 +740,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
1a6e0f06 JK |
12566 | * follow here. This is the end of the chain we are walking. |
12567 | */ | |
12568 | if (!rt_mutex_owner(lock)) { | |
12569 | + struct rt_mutex_waiter *lock_top_waiter; | |
12570 | + | |
12571 | /* | |
12572 | * If the requeue [7] above changed the top waiter, | |
12573 | * then we need to wake the new top waiter up to try | |
12574 | * to get the lock. | |
12575 | */ | |
12576 | - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) | |
12577 | - wake_up_process(rt_mutex_top_waiter(lock)->task); | |
12578 | + lock_top_waiter = rt_mutex_top_waiter(lock); | |
12579 | + if (prerequeue_top_waiter != lock_top_waiter) | |
12580 | + rt_mutex_wake_waiter(lock_top_waiter); | |
12581 | raw_spin_unlock_irq(&lock->wait_lock); | |
12582 | return 0; | |
12583 | } | |
c7c16703 | 12584 | @@ -818,6 +842,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
1a6e0f06 JK |
12585 | return ret; |
12586 | } | |
12587 | ||
12588 | + | |
12589 | +#define STEAL_NORMAL 0 | |
12590 | +#define STEAL_LATERAL 1 | |
12591 | + | |
12592 | +/* | |
12593 | + * Note that RT tasks are excluded from lateral-steals to prevent the | |
12594 | + * introduction of an unbounded latency | |
12595 | + */ | |
12596 | +static inline int lock_is_stealable(struct task_struct *task, | |
12597 | + struct task_struct *pendowner, int mode) | |
12598 | +{ | |
12599 | + if (mode == STEAL_NORMAL || rt_task(task)) { | |
12600 | + if (task->prio >= pendowner->prio) | |
12601 | + return 0; | |
12602 | + } else if (task->prio > pendowner->prio) | |
12603 | + return 0; | |
12604 | + return 1; | |
12605 | +} | |
12606 | + | |
12607 | /* | |
12608 | * Try to take an rt-mutex | |
12609 | * | |
c7c16703 | 12610 | @@ -828,8 +871,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
1a6e0f06 JK |
12611 | * @waiter: The waiter that is queued to the lock's wait tree if the |
12612 | * callsite called task_blocked_on_lock(), otherwise NULL | |
12613 | */ | |
12614 | -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
12615 | - struct rt_mutex_waiter *waiter) | |
12616 | +static int __try_to_take_rt_mutex(struct rt_mutex *lock, | |
12617 | + struct task_struct *task, | |
12618 | + struct rt_mutex_waiter *waiter, int mode) | |
12619 | { | |
12620 | /* | |
12621 | * Before testing whether we can acquire @lock, we set the | |
c7c16703 | 12622 | @@ -866,8 +910,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, |
1a6e0f06 JK |
12623 | * If waiter is not the highest priority waiter of |
12624 | * @lock, give up. | |
12625 | */ | |
12626 | - if (waiter != rt_mutex_top_waiter(lock)) | |
12627 | + if (waiter != rt_mutex_top_waiter(lock)) { | |
12628 | + /* XXX lock_is_stealable() ? */ | |
12629 | return 0; | |
12630 | + } | |
12631 | ||
12632 | /* | |
12633 | * We can acquire the lock. Remove the waiter from the | |
c7c16703 | 12634 | @@ -885,14 +931,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, |
1a6e0f06 JK |
12635 | * not need to be dequeued. |
12636 | */ | |
12637 | if (rt_mutex_has_waiters(lock)) { | |
12638 | - /* | |
12639 | - * If @task->prio is greater than or equal to | |
12640 | - * the top waiter priority (kernel view), | |
12641 | - * @task lost. | |
12642 | - */ | |
12643 | - if (task->prio >= rt_mutex_top_waiter(lock)->prio) | |
12644 | - return 0; | |
12645 | + struct task_struct *pown = rt_mutex_top_waiter(lock)->task; | |
12646 | ||
12647 | + if (task != pown && !lock_is_stealable(task, pown, mode)) | |
12648 | + return 0; | |
12649 | /* | |
12650 | * The current top waiter stays enqueued. We | |
12651 | * don't have to change anything in the lock | |
c7c16703 | 12652 | @@ -941,6 +983,433 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, |
1a6e0f06 JK |
12653 | return 1; |
12654 | } | |
12655 | ||
12656 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12657 | +/* | |
12658 | + * preemptible spin_lock functions: | |
12659 | + */ | |
12660 | +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, | |
12661 | + void (*slowfn)(struct rt_mutex *lock, | |
12662 | + bool mg_off), | |
12663 | + bool do_mig_dis) | |
12664 | +{ | |
12665 | + might_sleep_no_state_check(); | |
12666 | + | |
12667 | + if (do_mig_dis) | |
12668 | + migrate_disable(); | |
12669 | + | |
12670 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
12671 | + rt_mutex_deadlock_account_lock(lock, current); | |
12672 | + else | |
12673 | + slowfn(lock, do_mig_dis); | |
12674 | +} | |
12675 | + | |
12676 | +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock, | |
12677 | + int (*slowfn)(struct rt_mutex *lock)) | |
12678 | +{ | |
12679 | + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
12680 | + rt_mutex_deadlock_account_unlock(current); | |
12681 | + return 0; | |
12682 | + } | |
12683 | + return slowfn(lock); | |
12684 | +} | |
12685 | +#ifdef CONFIG_SMP | |
12686 | +/* | |
12687 | + * Note that owner is a speculative pointer and dereferencing relies | |
12688 | + * on rcu_read_lock() and the check against the lock owner. | |
12689 | + */ | |
12690 | +static int adaptive_wait(struct rt_mutex *lock, | |
12691 | + struct task_struct *owner) | |
12692 | +{ | |
12693 | + int res = 0; | |
12694 | + | |
12695 | + rcu_read_lock(); | |
12696 | + for (;;) { | |
12697 | + if (owner != rt_mutex_owner(lock)) | |
12698 | + break; | |
12699 | + /* | |
12700 | + * Ensure that owner->on_cpu is dereferenced _after_ | |
12701 | + * checking the above to be valid. | |
12702 | + */ | |
12703 | + barrier(); | |
12704 | + if (!owner->on_cpu) { | |
12705 | + res = 1; | |
12706 | + break; | |
12707 | + } | |
12708 | + cpu_relax(); | |
12709 | + } | |
12710 | + rcu_read_unlock(); | |
12711 | + return res; | |
12712 | +} | |
12713 | +#else | |
12714 | +static int adaptive_wait(struct rt_mutex *lock, | |
12715 | + struct task_struct *orig_owner) | |
12716 | +{ | |
12717 | + return 1; | |
12718 | +} | |
12719 | +#endif | |
12720 | + | |
12721 | +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
12722 | + struct rt_mutex_waiter *waiter, | |
12723 | + struct task_struct *task, | |
12724 | + enum rtmutex_chainwalk chwalk); | |
12725 | +/* | |
12726 | + * Slow path lock function spin_lock style: this variant is very | |
12727 | + * careful not to miss any non-lock wakeups. | |
12728 | + * | |
12729 | + * We store the current state under p->pi_lock in p->saved_state and | |
12730 | + * the try_to_wake_up() code handles this accordingly. | |
12731 | + */ | |
12732 | +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, | |
12733 | + bool mg_off) | |
12734 | +{ | |
12735 | + struct task_struct *lock_owner, *self = current; | |
12736 | + struct rt_mutex_waiter waiter, *top_waiter; | |
12737 | + unsigned long flags; | |
12738 | + int ret; | |
12739 | + | |
12740 | + rt_mutex_init_waiter(&waiter, true); | |
12741 | + | |
12742 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
12743 | + | |
12744 | + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) { | |
12745 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
12746 | + return; | |
12747 | + } | |
12748 | + | |
12749 | + BUG_ON(rt_mutex_owner(lock) == self); | |
12750 | + | |
12751 | + /* | |
12752 | + * We save whatever state the task is in and we'll restore it | |
12753 | + * after acquiring the lock taking real wakeups into account | |
12754 | + * as well. We are serialized via pi_lock against wakeups. See | |
12755 | + * try_to_wake_up(). | |
12756 | + */ | |
12757 | + raw_spin_lock(&self->pi_lock); | |
12758 | + self->saved_state = self->state; | |
12759 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
12760 | + raw_spin_unlock(&self->pi_lock); | |
12761 | + | |
12762 | + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK); | |
12763 | + BUG_ON(ret); | |
12764 | + | |
12765 | + for (;;) { | |
12766 | + /* Try to acquire the lock again. */ | |
12767 | + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL)) | |
12768 | + break; | |
12769 | + | |
12770 | + top_waiter = rt_mutex_top_waiter(lock); | |
12771 | + lock_owner = rt_mutex_owner(lock); | |
12772 | + | |
12773 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
12774 | + | |
12775 | + debug_rt_mutex_print_deadlock(&waiter); | |
12776 | + | |
12777 | + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) { | |
12778 | + if (mg_off) | |
12779 | + migrate_enable(); | |
12780 | + schedule(); | |
12781 | + if (mg_off) | |
12782 | + migrate_disable(); | |
12783 | + } | |
12784 | + | |
12785 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
12786 | + | |
12787 | + raw_spin_lock(&self->pi_lock); | |
12788 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
12789 | + raw_spin_unlock(&self->pi_lock); | |
12790 | + } | |
12791 | + | |
12792 | + /* | |
12793 | + * Restore the task state to current->saved_state. We set it | |
12794 | + * to the original state above and the try_to_wake_up() code | |
12795 | + * has possibly updated it when a real (non-rtmutex) wakeup | |
12796 | + * happened while we were blocked. Clear saved_state so | |
12797 | + * try_to_wakeup() does not get confused. | |
12798 | + */ | |
12799 | + raw_spin_lock(&self->pi_lock); | |
12800 | + __set_current_state_no_track(self->saved_state); | |
12801 | + self->saved_state = TASK_RUNNING; | |
12802 | + raw_spin_unlock(&self->pi_lock); | |
12803 | + | |
12804 | + /* | |
12805 | + * try_to_take_rt_mutex() sets the waiter bit | |
12806 | + * unconditionally. We might have to fix that up: | |
12807 | + */ | |
12808 | + fixup_rt_mutex_waiters(lock); | |
12809 | + | |
12810 | + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock)); | |
12811 | + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry)); | |
12812 | + | |
12813 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
12814 | + | |
12815 | + debug_rt_mutex_free_waiter(&waiter); | |
12816 | +} | |
12817 | + | |
12818 | +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
12819 | + struct wake_q_head *wake_sleeper_q, | |
12820 | + struct rt_mutex *lock); | |
12821 | +/* | |
12822 | + * Slow path to release a rt_mutex spin_lock style | |
12823 | + */ | |
12824 | +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) | |
12825 | +{ | |
12826 | + unsigned long flags; | |
12827 | + WAKE_Q(wake_q); | |
12828 | + WAKE_Q(wake_sleeper_q); | |
12829 | + | |
12830 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
12831 | + | |
12832 | + debug_rt_mutex_unlock(lock); | |
12833 | + | |
12834 | + rt_mutex_deadlock_account_unlock(current); | |
12835 | + | |
12836 | + if (!rt_mutex_has_waiters(lock)) { | |
12837 | + lock->owner = NULL; | |
12838 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
12839 | + return 0; | |
12840 | + } | |
12841 | + | |
12842 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
12843 | + | |
12844 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
12845 | + wake_up_q(&wake_q); | |
12846 | + wake_up_q_sleeper(&wake_sleeper_q); | |
12847 | + | |
12848 | + /* Undo pi boosting.when necessary */ | |
12849 | + rt_mutex_adjust_prio(current); | |
12850 | + return 0; | |
12851 | +} | |
12852 | + | |
12853 | +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock) | |
12854 | +{ | |
12855 | + unsigned long flags; | |
12856 | + WAKE_Q(wake_q); | |
12857 | + WAKE_Q(wake_sleeper_q); | |
12858 | + | |
12859 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
12860 | + | |
12861 | + debug_rt_mutex_unlock(lock); | |
12862 | + | |
12863 | + rt_mutex_deadlock_account_unlock(current); | |
12864 | + | |
12865 | + if (!rt_mutex_has_waiters(lock)) { | |
12866 | + lock->owner = NULL; | |
12867 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
12868 | + return 0; | |
12869 | + } | |
12870 | + | |
12871 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
12872 | + | |
12873 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
12874 | + wake_up_q(&wake_q); | |
12875 | + wake_up_q_sleeper(&wake_sleeper_q); | |
12876 | + return 1; | |
12877 | +} | |
12878 | + | |
12879 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) | |
12880 | +{ | |
12881 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false); | |
12882 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12883 | +} | |
12884 | +EXPORT_SYMBOL(rt_spin_lock__no_mg); | |
12885 | + | |
12886 | +void __lockfunc rt_spin_lock(spinlock_t *lock) | |
12887 | +{ | |
12888 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
12889 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12890 | +} | |
12891 | +EXPORT_SYMBOL(rt_spin_lock); | |
12892 | + | |
12893 | +void __lockfunc __rt_spin_lock(struct rt_mutex *lock) | |
12894 | +{ | |
12895 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true); | |
12896 | +} | |
12897 | +EXPORT_SYMBOL(__rt_spin_lock); | |
12898 | + | |
12899 | +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock) | |
12900 | +{ | |
12901 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false); | |
12902 | +} | |
12903 | +EXPORT_SYMBOL(__rt_spin_lock__no_mg); | |
12904 | + | |
12905 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12906 | +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) | |
12907 | +{ | |
12908 | + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
12909 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
12910 | +} | |
12911 | +EXPORT_SYMBOL(rt_spin_lock_nested); | |
12912 | +#endif | |
12913 | + | |
12914 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock) | |
12915 | +{ | |
12916 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
12917 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
12918 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
12919 | +} | |
12920 | +EXPORT_SYMBOL(rt_spin_unlock__no_mg); | |
12921 | + | |
12922 | +void __lockfunc rt_spin_unlock(spinlock_t *lock) | |
12923 | +{ | |
12924 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
12925 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
12926 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
12927 | + migrate_enable(); | |
12928 | +} | |
12929 | +EXPORT_SYMBOL(rt_spin_unlock); | |
12930 | + | |
12931 | +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock) | |
12932 | +{ | |
12933 | + int ret; | |
12934 | + | |
12935 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
12936 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
12937 | + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost); | |
12938 | + migrate_enable(); | |
12939 | + return ret; | |
12940 | +} | |
12941 | + | |
12942 | +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) | |
12943 | +{ | |
12944 | + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); | |
12945 | +} | |
12946 | +EXPORT_SYMBOL(__rt_spin_unlock); | |
12947 | + | |
12948 | +/* | |
12949 | + * Wait for the lock to get unlocked: instead of polling for an unlock | |
12950 | + * (like raw spinlocks do), we lock and unlock, to force the kernel to | |
12951 | + * schedule if there's contention: | |
12952 | + */ | |
12953 | +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) | |
12954 | +{ | |
12955 | + spin_lock(lock); | |
12956 | + spin_unlock(lock); | |
12957 | +} | |
12958 | +EXPORT_SYMBOL(rt_spin_unlock_wait); | |
12959 | + | |
1a6e0f06 JK |
12960 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock) |
12961 | +{ | |
12962 | + int ret; | |
12963 | + | |
12964 | + ret = rt_mutex_trylock(&lock->lock); | |
12965 | + if (ret) | |
12966 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
12967 | + return ret; | |
12968 | +} | |
12969 | +EXPORT_SYMBOL(rt_spin_trylock__no_mg); | |
12970 | + | |
12971 | +int __lockfunc rt_spin_trylock(spinlock_t *lock) | |
12972 | +{ | |
12973 | + int ret; | |
12974 | + | |
12975 | + migrate_disable(); | |
12976 | + ret = rt_mutex_trylock(&lock->lock); | |
12977 | + if (ret) | |
12978 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
12979 | + else | |
12980 | + migrate_enable(); | |
12981 | + return ret; | |
12982 | +} | |
12983 | +EXPORT_SYMBOL(rt_spin_trylock); | |
12984 | + | |
12985 | +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) | |
12986 | +{ | |
12987 | + int ret; | |
12988 | + | |
12989 | + local_bh_disable(); | |
12990 | + ret = rt_mutex_trylock(&lock->lock); | |
12991 | + if (ret) { | |
12992 | + migrate_disable(); | |
12993 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
12994 | + } else | |
12995 | + local_bh_enable(); | |
12996 | + return ret; | |
12997 | +} | |
12998 | +EXPORT_SYMBOL(rt_spin_trylock_bh); | |
12999 | + | |
13000 | +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) | |
13001 | +{ | |
13002 | + int ret; | |
13003 | + | |
13004 | + *flags = 0; | |
13005 | + ret = rt_mutex_trylock(&lock->lock); | |
13006 | + if (ret) { | |
13007 | + migrate_disable(); | |
13008 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
13009 | + } | |
13010 | + return ret; | |
13011 | +} | |
13012 | +EXPORT_SYMBOL(rt_spin_trylock_irqsave); | |
13013 | + | |
13014 | +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) | |
13015 | +{ | |
13016 | + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ | |
13017 | + if (atomic_add_unless(atomic, -1, 1)) | |
13018 | + return 0; | |
13019 | + rt_spin_lock(lock); | |
13020 | + if (atomic_dec_and_test(atomic)) | |
13021 | + return 1; | |
13022 | + rt_spin_unlock(lock); | |
13023 | + return 0; | |
13024 | +} | |
13025 | +EXPORT_SYMBOL(atomic_dec_and_spin_lock); | |
13026 | + | |
13027 | + void | |
13028 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) | |
13029 | +{ | |
13030 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13031 | + /* | |
13032 | + * Make sure we are not reinitializing a held lock: | |
13033 | + */ | |
13034 | + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | |
13035 | + lockdep_init_map(&lock->dep_map, name, key, 0); | |
13036 | +#endif | |
13037 | +} | |
13038 | +EXPORT_SYMBOL(__rt_spin_lock_init); | |
13039 | + | |
13040 | +#endif /* PREEMPT_RT_FULL */ | |
13041 | + | |
13042 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13043 | + static inline int __sched | |
13044 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
13045 | +{ | |
13046 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
13047 | + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | |
13048 | + | |
13049 | + if (!hold_ctx) | |
13050 | + return 0; | |
13051 | + | |
13052 | + if (unlikely(ctx == hold_ctx)) | |
13053 | + return -EALREADY; | |
13054 | + | |
13055 | + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | |
13056 | + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | |
13057 | +#ifdef CONFIG_DEBUG_MUTEXES | |
13058 | + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); | |
13059 | + ctx->contending_lock = ww; | |
13060 | +#endif | |
13061 | + return -EDEADLK; | |
13062 | + } | |
13063 | + | |
13064 | + return 0; | |
13065 | +} | |
13066 | +#else | |
13067 | + static inline int __sched | |
13068 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
13069 | +{ | |
13070 | + BUG(); | |
13071 | + return 0; | |
13072 | +} | |
13073 | + | |
13074 | +#endif | |
13075 | + | |
13076 | +static inline int | |
13077 | +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
13078 | + struct rt_mutex_waiter *waiter) | |
13079 | +{ | |
13080 | + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); | |
13081 | +} | |
13082 | + | |
13083 | /* | |
13084 | * Task blocks on lock. | |
13085 | * | |
c7c16703 | 13086 | @@ -971,6 +1440,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
1a6e0f06 JK |
13087 | return -EDEADLK; |
13088 | ||
13089 | raw_spin_lock(&task->pi_lock); | |
13090 | + | |
13091 | + /* | |
13092 | + * In the case of futex requeue PI, this will be a proxy | |
13093 | + * lock. The task will wake unaware that it is enqueueed on | |
13094 | + * this lock. Avoid blocking on two locks and corrupting | |
13095 | + * pi_blocked_on via the PI_WAKEUP_INPROGRESS | |
13096 | + * flag. futex_wait_requeue_pi() sets this when it wakes up | |
13097 | + * before requeue (due to a signal or timeout). Do not enqueue | |
13098 | + * the task if PI_WAKEUP_INPROGRESS is set. | |
13099 | + */ | |
13100 | + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) { | |
13101 | + raw_spin_unlock(&task->pi_lock); | |
13102 | + return -EAGAIN; | |
13103 | + } | |
13104 | + | |
13105 | + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); | |
13106 | + | |
13107 | __rt_mutex_adjust_prio(task); | |
13108 | waiter->task = task; | |
13109 | waiter->lock = lock; | |
c7c16703 | 13110 | @@ -994,7 +1480,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
1a6e0f06 JK |
13111 | rt_mutex_enqueue_pi(owner, waiter); |
13112 | ||
13113 | __rt_mutex_adjust_prio(owner); | |
13114 | - if (owner->pi_blocked_on) | |
13115 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
13116 | chain_walk = 1; | |
13117 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { | |
13118 | chain_walk = 1; | |
c7c16703 | 13119 | @@ -1036,6 +1522,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
1a6e0f06 JK |
13120 | * Called with lock->wait_lock held and interrupts disabled. |
13121 | */ | |
13122 | static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
13123 | + struct wake_q_head *wake_sleeper_q, | |
13124 | struct rt_mutex *lock) | |
13125 | { | |
13126 | struct rt_mutex_waiter *waiter; | |
c7c16703 | 13127 | @@ -1064,7 +1551,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, |
1a6e0f06 JK |
13128 | |
13129 | raw_spin_unlock(¤t->pi_lock); | |
13130 | ||
13131 | - wake_q_add(wake_q, waiter->task); | |
13132 | + if (waiter->savestate) | |
13133 | + wake_q_add(wake_sleeper_q, waiter->task); | |
13134 | + else | |
13135 | + wake_q_add(wake_q, waiter->task); | |
13136 | } | |
13137 | ||
13138 | /* | |
c7c16703 | 13139 | @@ -1078,7 +1568,7 @@ static void remove_waiter(struct rt_mutex *lock, |
1a6e0f06 JK |
13140 | { |
13141 | bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); | |
13142 | struct task_struct *owner = rt_mutex_owner(lock); | |
13143 | - struct rt_mutex *next_lock; | |
13144 | + struct rt_mutex *next_lock = NULL; | |
13145 | ||
13146 | raw_spin_lock(¤t->pi_lock); | |
13147 | rt_mutex_dequeue(lock, waiter); | |
c7c16703 | 13148 | @@ -1102,7 +1592,8 @@ static void remove_waiter(struct rt_mutex *lock, |
1a6e0f06 JK |
13149 | __rt_mutex_adjust_prio(owner); |
13150 | ||
13151 | /* Store the lock on which owner is blocked or NULL */ | |
13152 | - next_lock = task_blocked_on_lock(owner); | |
13153 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
13154 | + next_lock = task_blocked_on_lock(owner); | |
13155 | ||
13156 | raw_spin_unlock(&owner->pi_lock); | |
13157 | ||
c7c16703 | 13158 | @@ -1138,17 +1629,17 @@ void rt_mutex_adjust_pi(struct task_struct *task) |
1a6e0f06 JK |
13159 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
13160 | ||
13161 | waiter = task->pi_blocked_on; | |
13162 | - if (!waiter || (waiter->prio == task->prio && | |
13163 | + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio && | |
13164 | !dl_prio(task->prio))) { | |
13165 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
13166 | return; | |
13167 | } | |
13168 | next_lock = waiter->lock; | |
13169 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
13170 | ||
13171 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | |
13172 | get_task_struct(task); | |
13173 | ||
13174 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
13175 | rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, | |
13176 | next_lock, NULL, task); | |
13177 | } | |
c7c16703 | 13178 | @@ -1166,7 +1657,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) |
1a6e0f06 JK |
13179 | static int __sched |
13180 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
13181 | struct hrtimer_sleeper *timeout, | |
13182 | - struct rt_mutex_waiter *waiter) | |
13183 | + struct rt_mutex_waiter *waiter, | |
13184 | + struct ww_acquire_ctx *ww_ctx) | |
13185 | { | |
13186 | int ret = 0; | |
13187 | ||
c7c16703 | 13188 | @@ -1189,6 +1681,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, |
1a6e0f06 JK |
13189 | break; |
13190 | } | |
13191 | ||
13192 | + if (ww_ctx && ww_ctx->acquired > 0) { | |
13193 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
13194 | + if (ret) | |
13195 | + break; | |
13196 | + } | |
13197 | + | |
13198 | raw_spin_unlock_irq(&lock->wait_lock); | |
13199 | ||
13200 | debug_rt_mutex_print_deadlock(waiter); | |
c7c16703 | 13201 | @@ -1223,21 +1721,96 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, |
1a6e0f06 JK |
13202 | } |
13203 | } | |
13204 | ||
13205 | +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | |
13206 | + struct ww_acquire_ctx *ww_ctx) | |
13207 | +{ | |
13208 | +#ifdef CONFIG_DEBUG_MUTEXES | |
13209 | + /* | |
13210 | + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | |
13211 | + * but released with a normal mutex_unlock in this call. | |
13212 | + * | |
13213 | + * This should never happen, always use ww_mutex_unlock. | |
13214 | + */ | |
13215 | + DEBUG_LOCKS_WARN_ON(ww->ctx); | |
13216 | + | |
13217 | + /* | |
13218 | + * Not quite done after calling ww_acquire_done() ? | |
13219 | + */ | |
13220 | + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | |
13221 | + | |
13222 | + if (ww_ctx->contending_lock) { | |
13223 | + /* | |
13224 | + * After -EDEADLK you tried to | |
13225 | + * acquire a different ww_mutex? Bad! | |
13226 | + */ | |
13227 | + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | |
13228 | + | |
13229 | + /* | |
13230 | + * You called ww_mutex_lock after receiving -EDEADLK, | |
13231 | + * but 'forgot' to unlock everything else first? | |
13232 | + */ | |
13233 | + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | |
13234 | + ww_ctx->contending_lock = NULL; | |
13235 | + } | |
13236 | + | |
13237 | + /* | |
13238 | + * Naughty, using a different class will lead to undefined behavior! | |
13239 | + */ | |
13240 | + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | |
13241 | +#endif | |
13242 | + ww_ctx->acquired++; | |
13243 | +} | |
13244 | + | |
13245 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13246 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
13247 | + struct ww_acquire_ctx *ww_ctx) | |
13248 | +{ | |
13249 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
13250 | + struct rt_mutex_waiter *waiter, *n; | |
13251 | + | |
13252 | + /* | |
13253 | + * This branch gets optimized out for the common case, | |
13254 | + * and is only important for ww_mutex_lock. | |
13255 | + */ | |
13256 | + ww_mutex_lock_acquired(ww, ww_ctx); | |
13257 | + ww->ctx = ww_ctx; | |
13258 | + | |
13259 | + /* | |
13260 | + * Give any possible sleeping processes the chance to wake up, | |
13261 | + * so they can recheck if they have to back off. | |
13262 | + */ | |
13263 | + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters, | |
13264 | + tree_entry) { | |
13265 | + /* XXX debug rt mutex waiter wakeup */ | |
13266 | + | |
13267 | + BUG_ON(waiter->lock != lock); | |
13268 | + rt_mutex_wake_waiter(waiter); | |
13269 | + } | |
13270 | +} | |
13271 | + | |
13272 | +#else | |
13273 | + | |
13274 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
13275 | + struct ww_acquire_ctx *ww_ctx) | |
13276 | +{ | |
13277 | + BUG(); | |
13278 | +} | |
13279 | +#endif | |
13280 | + | |
13281 | /* | |
13282 | * Slow path lock function: | |
13283 | */ | |
13284 | static int __sched | |
13285 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
13286 | struct hrtimer_sleeper *timeout, | |
13287 | - enum rtmutex_chainwalk chwalk) | |
13288 | + enum rtmutex_chainwalk chwalk, | |
13289 | + struct ww_acquire_ctx *ww_ctx) | |
13290 | { | |
13291 | struct rt_mutex_waiter waiter; | |
13292 | unsigned long flags; | |
13293 | int ret = 0; | |
13294 | ||
13295 | - debug_rt_mutex_init_waiter(&waiter); | |
13296 | - RB_CLEAR_NODE(&waiter.pi_tree_entry); | |
13297 | - RB_CLEAR_NODE(&waiter.tree_entry); | |
13298 | + rt_mutex_init_waiter(&waiter, false); | |
13299 | ||
13300 | /* | |
13301 | * Technically we could use raw_spin_[un]lock_irq() here, but this can | |
c7c16703 | 13302 | @@ -1251,6 +1824,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, |
1a6e0f06 JK |
13303 | |
13304 | /* Try to acquire the lock again: */ | |
13305 | if (try_to_take_rt_mutex(lock, current, NULL)) { | |
13306 | + if (ww_ctx) | |
13307 | + ww_mutex_account_lock(lock, ww_ctx); | |
13308 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13309 | return 0; | |
13310 | } | |
c7c16703 | 13311 | @@ -1265,13 +1840,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, |
1a6e0f06 JK |
13312 | |
13313 | if (likely(!ret)) | |
13314 | /* sleep on the mutex */ | |
13315 | - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | |
13316 | + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, | |
13317 | + ww_ctx); | |
13318 | + else if (ww_ctx) { | |
13319 | + /* ww_mutex received EDEADLK, let it become EALREADY */ | |
13320 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
13321 | + BUG_ON(!ret); | |
13322 | + } | |
13323 | ||
13324 | if (unlikely(ret)) { | |
13325 | __set_current_state(TASK_RUNNING); | |
13326 | if (rt_mutex_has_waiters(lock)) | |
13327 | remove_waiter(lock, &waiter); | |
13328 | - rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
13329 | + /* ww_mutex want to report EDEADLK/EALREADY, let them */ | |
13330 | + if (!ww_ctx) | |
13331 | + rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
13332 | + } else if (ww_ctx) { | |
13333 | + ww_mutex_account_lock(lock, ww_ctx); | |
13334 | } | |
13335 | ||
13336 | /* | |
c7c16703 | 13337 | @@ -1331,7 +1916,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) |
1a6e0f06 JK |
13338 | * Return whether the current task needs to undo a potential priority boosting. |
13339 | */ | |
13340 | static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
13341 | - struct wake_q_head *wake_q) | |
13342 | + struct wake_q_head *wake_q, | |
13343 | + struct wake_q_head *wake_sleeper_q) | |
13344 | { | |
13345 | unsigned long flags; | |
13346 | ||
c7c16703 | 13347 | @@ -1387,7 +1973,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, |
1a6e0f06 JK |
13348 | * |
13349 | * Queue the next waiter for wakeup once we release the wait_lock. | |
13350 | */ | |
13351 | - mark_wakeup_next_waiter(wake_q, lock); | |
13352 | + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); | |
13353 | ||
13354 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13355 | ||
c7c16703 | 13356 | @@ -1403,31 +1989,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, |
1a6e0f06 JK |
13357 | */ |
13358 | static inline int | |
13359 | rt_mutex_fastlock(struct rt_mutex *lock, int state, | |
13360 | + struct ww_acquire_ctx *ww_ctx, | |
13361 | int (*slowfn)(struct rt_mutex *lock, int state, | |
13362 | struct hrtimer_sleeper *timeout, | |
13363 | - enum rtmutex_chainwalk chwalk)) | |
13364 | + enum rtmutex_chainwalk chwalk, | |
13365 | + struct ww_acquire_ctx *ww_ctx)) | |
13366 | { | |
13367 | if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
13368 | rt_mutex_deadlock_account_lock(lock, current); | |
13369 | return 0; | |
13370 | } else | |
13371 | - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); | |
13372 | + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, | |
13373 | + ww_ctx); | |
13374 | } | |
13375 | ||
13376 | static inline int | |
13377 | rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, | |
13378 | struct hrtimer_sleeper *timeout, | |
13379 | enum rtmutex_chainwalk chwalk, | |
13380 | + struct ww_acquire_ctx *ww_ctx, | |
13381 | int (*slowfn)(struct rt_mutex *lock, int state, | |
13382 | struct hrtimer_sleeper *timeout, | |
13383 | - enum rtmutex_chainwalk chwalk)) | |
13384 | + enum rtmutex_chainwalk chwalk, | |
13385 | + struct ww_acquire_ctx *ww_ctx)) | |
13386 | { | |
13387 | if (chwalk == RT_MUTEX_MIN_CHAINWALK && | |
13388 | likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
13389 | rt_mutex_deadlock_account_lock(lock, current); | |
13390 | return 0; | |
13391 | } else | |
13392 | - return slowfn(lock, state, timeout, chwalk); | |
13393 | + return slowfn(lock, state, timeout, chwalk, ww_ctx); | |
13394 | } | |
13395 | ||
13396 | static inline int | |
c7c16703 | 13397 | @@ -1444,17 +2035,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, |
1a6e0f06 JK |
13398 | static inline void |
13399 | rt_mutex_fastunlock(struct rt_mutex *lock, | |
13400 | bool (*slowfn)(struct rt_mutex *lock, | |
13401 | - struct wake_q_head *wqh)) | |
13402 | + struct wake_q_head *wqh, | |
13403 | + struct wake_q_head *wq_sleeper)) | |
13404 | { | |
13405 | WAKE_Q(wake_q); | |
13406 | + WAKE_Q(wake_sleeper_q); | |
13407 | ||
13408 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
13409 | rt_mutex_deadlock_account_unlock(current); | |
13410 | ||
13411 | } else { | |
13412 | - bool deboost = slowfn(lock, &wake_q); | |
13413 | + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q); | |
13414 | ||
13415 | wake_up_q(&wake_q); | |
13416 | + wake_up_q_sleeper(&wake_sleeper_q); | |
13417 | ||
13418 | /* Undo pi boosting if necessary: */ | |
13419 | if (deboost) | |
c7c16703 | 13420 | @@ -1471,7 +2065,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) |
1a6e0f06 JK |
13421 | { |
13422 | might_sleep(); | |
13423 | ||
13424 | - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); | |
13425 | + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
13426 | } | |
13427 | EXPORT_SYMBOL_GPL(rt_mutex_lock); | |
13428 | ||
c7c16703 | 13429 | @@ -1488,7 +2082,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) |
1a6e0f06 JK |
13430 | { |
13431 | might_sleep(); | |
13432 | ||
13433 | - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); | |
13434 | + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
13435 | } | |
13436 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | |
13437 | ||
c7c16703 | 13438 | @@ -1501,11 +2095,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock, |
1a6e0f06 JK |
13439 | might_sleep(); |
13440 | ||
13441 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
13442 | - RT_MUTEX_FULL_CHAINWALK, | |
13443 | + RT_MUTEX_FULL_CHAINWALK, NULL, | |
13444 | rt_mutex_slowlock); | |
13445 | } | |
13446 | ||
13447 | /** | |
13448 | + * rt_mutex_lock_killable - lock a rt_mutex killable | |
13449 | + * | |
13450 | + * @lock: the rt_mutex to be locked | |
13451 | + * @detect_deadlock: deadlock detection on/off | |
13452 | + * | |
13453 | + * Returns: | |
13454 | + * 0 on success | |
13455 | + * -EINTR when interrupted by a signal | |
13456 | + * -EDEADLK when the lock would deadlock (when deadlock detection is on) | |
13457 | + */ | |
13458 | +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) | |
13459 | +{ | |
13460 | + might_sleep(); | |
13461 | + | |
13462 | + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock); | |
13463 | +} | |
13464 | +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); | |
13465 | + | |
13466 | +/** | |
13467 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | |
13468 | * the timeout structure is provided | |
13469 | * by the caller | |
c7c16703 | 13470 | @@ -1525,6 +2138,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) |
1a6e0f06 JK |
13471 | |
13472 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
13473 | RT_MUTEX_MIN_CHAINWALK, | |
13474 | + NULL, | |
13475 | rt_mutex_slowlock); | |
13476 | } | |
13477 | EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
c7c16703 | 13478 | @@ -1542,7 +2156,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); |
1a6e0f06 JK |
13479 | */ |
13480 | int __sched rt_mutex_trylock(struct rt_mutex *lock) | |
13481 | { | |
13482 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13483 | + if (WARN_ON_ONCE(in_irq() || in_nmi())) | |
13484 | +#else | |
13485 | if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) | |
13486 | +#endif | |
13487 | return 0; | |
13488 | ||
13489 | return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); | |
c7c16703 | 13490 | @@ -1568,13 +2186,14 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); |
1a6e0f06 JK |
13491 | * required or not. |
13492 | */ | |
13493 | bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, | |
13494 | - struct wake_q_head *wqh) | |
13495 | + struct wake_q_head *wqh, | |
13496 | + struct wake_q_head *wq_sleeper) | |
13497 | { | |
13498 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
13499 | rt_mutex_deadlock_account_unlock(current); | |
13500 | return false; | |
13501 | } | |
13502 | - return rt_mutex_slowunlock(lock, wqh); | |
13503 | + return rt_mutex_slowunlock(lock, wqh, wq_sleeper); | |
13504 | } | |
13505 | ||
13506 | /** | |
c7c16703 | 13507 | @@ -1607,13 +2226,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); |
1a6e0f06 JK |
13508 | void __rt_mutex_init(struct rt_mutex *lock, const char *name) |
13509 | { | |
13510 | lock->owner = NULL; | |
13511 | - raw_spin_lock_init(&lock->wait_lock); | |
13512 | lock->waiters = RB_ROOT; | |
13513 | lock->waiters_leftmost = NULL; | |
13514 | ||
13515 | debug_rt_mutex_init(lock, name); | |
13516 | } | |
13517 | -EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
13518 | +EXPORT_SYMBOL(__rt_mutex_init); | |
13519 | ||
13520 | /** | |
13521 | * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a | |
c7c16703 | 13522 | @@ -1628,7 +2246,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); |
1a6e0f06 JK |
13523 | void rt_mutex_init_proxy_locked(struct rt_mutex *lock, |
13524 | struct task_struct *proxy_owner) | |
13525 | { | |
13526 | - __rt_mutex_init(lock, NULL); | |
13527 | + rt_mutex_init(lock); | |
13528 | debug_rt_mutex_proxy_lock(lock, proxy_owner); | |
13529 | rt_mutex_set_owner(lock, proxy_owner); | |
13530 | rt_mutex_deadlock_account_lock(lock, proxy_owner); | |
c7c16703 | 13531 | @@ -1676,6 +2294,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
1a6e0f06 JK |
13532 | return 1; |
13533 | } | |
13534 | ||
13535 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13536 | + /* | |
13537 | + * In PREEMPT_RT there's an added race. | |
13538 | + * If the task, that we are about to requeue, times out, | |
13539 | + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue | |
13540 | + * to skip this task. But right after the task sets | |
13541 | + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then | |
13542 | + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. | |
13543 | + * This will replace the PI_WAKEUP_INPROGRESS with the actual | |
13544 | + * lock that it blocks on. We *must not* place this task | |
13545 | + * on this proxy lock in that case. | |
13546 | + * | |
13547 | + * To prevent this race, we first take the task's pi_lock | |
13548 | + * and check if it has updated its pi_blocked_on. If it has, | |
13549 | + * we assume that it woke up and we return -EAGAIN. | |
13550 | + * Otherwise, we set the task's pi_blocked_on to | |
13551 | + * PI_REQUEUE_INPROGRESS, so that if the task is waking up | |
13552 | + * it will know that we are in the process of requeuing it. | |
13553 | + */ | |
13554 | + raw_spin_lock(&task->pi_lock); | |
13555 | + if (task->pi_blocked_on) { | |
13556 | + raw_spin_unlock(&task->pi_lock); | |
13557 | + raw_spin_unlock_irq(&lock->wait_lock); | |
13558 | + return -EAGAIN; | |
13559 | + } | |
13560 | + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; | |
13561 | + raw_spin_unlock(&task->pi_lock); | |
13562 | +#endif | |
13563 | + | |
13564 | /* We enforce deadlock detection for futexes */ | |
13565 | ret = task_blocks_on_rt_mutex(lock, waiter, task, | |
13566 | RT_MUTEX_FULL_CHAINWALK); | |
c7c16703 | 13567 | @@ -1690,7 +2337,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
1a6e0f06 JK |
13568 | ret = 0; |
13569 | } | |
13570 | ||
13571 | - if (unlikely(ret)) | |
13572 | + if (ret && rt_mutex_has_waiters(lock)) | |
13573 | remove_waiter(lock, waiter); | |
13574 | ||
13575 | raw_spin_unlock_irq(&lock->wait_lock); | |
c7c16703 | 13576 | @@ -1746,7 +2393,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, |
1a6e0f06 JK |
13577 | set_current_state(TASK_INTERRUPTIBLE); |
13578 | ||
13579 | /* sleep on the mutex */ | |
13580 | - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | |
13581 | + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); | |
13582 | ||
13583 | if (unlikely(ret)) | |
13584 | remove_waiter(lock, waiter); | |
c7c16703 | 13585 | @@ -1761,3 +2408,89 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, |
1a6e0f06 JK |
13586 | |
13587 | return ret; | |
13588 | } | |
13589 | + | |
13590 | +static inline int | |
13591 | +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
13592 | +{ | |
13593 | +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH | |
13594 | + unsigned tmp; | |
13595 | + | |
13596 | + if (ctx->deadlock_inject_countdown-- == 0) { | |
13597 | + tmp = ctx->deadlock_inject_interval; | |
13598 | + if (tmp > UINT_MAX/4) | |
13599 | + tmp = UINT_MAX; | |
13600 | + else | |
13601 | + tmp = tmp*2 + tmp + tmp/2; | |
13602 | + | |
13603 | + ctx->deadlock_inject_interval = tmp; | |
13604 | + ctx->deadlock_inject_countdown = tmp; | |
13605 | + ctx->contending_lock = lock; | |
13606 | + | |
13607 | + ww_mutex_unlock(lock); | |
13608 | + | |
13609 | + return -EDEADLK; | |
13610 | + } | |
13611 | +#endif | |
13612 | + | |
13613 | + return 0; | |
13614 | +} | |
13615 | + | |
13616 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13617 | +int __sched | |
13618 | +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
13619 | +{ | |
13620 | + int ret; | |
13621 | + | |
13622 | + might_sleep(); | |
13623 | + | |
13624 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
13625 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx); | |
13626 | + if (ret) | |
13627 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
13628 | + else if (!ret && ww_ctx->acquired > 1) | |
13629 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
13630 | + | |
13631 | + return ret; | |
13632 | +} | |
13633 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); | |
13634 | + | |
13635 | +int __sched | |
13636 | +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
13637 | +{ | |
13638 | + int ret; | |
13639 | + | |
13640 | + might_sleep(); | |
13641 | + | |
13642 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
13643 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx); | |
13644 | + if (ret) | |
13645 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
13646 | + else if (!ret && ww_ctx->acquired > 1) | |
13647 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
13648 | + | |
13649 | + return ret; | |
13650 | +} | |
13651 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock); | |
13652 | + | |
13653 | +void __sched ww_mutex_unlock(struct ww_mutex *lock) | |
13654 | +{ | |
13655 | + int nest = !!lock->ctx; | |
13656 | + | |
13657 | + /* | |
13658 | + * The unlocking fastpath is the 0->1 transition from 'locked' | |
13659 | + * into 'unlocked' state: | |
13660 | + */ | |
13661 | + if (nest) { | |
13662 | +#ifdef CONFIG_DEBUG_MUTEXES | |
13663 | + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | |
13664 | +#endif | |
13665 | + if (lock->ctx->acquired > 0) | |
13666 | + lock->ctx->acquired--; | |
13667 | + lock->ctx = NULL; | |
13668 | + } | |
13669 | + | |
13670 | + mutex_release(&lock->base.dep_map, nest, _RET_IP_); | |
13671 | + rt_mutex_unlock(&lock->base.lock); | |
13672 | +} | |
13673 | +EXPORT_SYMBOL(ww_mutex_unlock); | |
13674 | +#endif | |
13675 | diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h | |
c7c16703 | 13676 | index e317e1cbb3eb..f457c7574920 100644 |
1a6e0f06 JK |
13677 | --- a/kernel/locking/rtmutex_common.h |
13678 | +++ b/kernel/locking/rtmutex_common.h | |
13679 | @@ -27,6 +27,7 @@ struct rt_mutex_waiter { | |
13680 | struct rb_node pi_tree_entry; | |
13681 | struct task_struct *task; | |
13682 | struct rt_mutex *lock; | |
13683 | + bool savestate; | |
13684 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
13685 | unsigned long ip; | |
13686 | struct pid *deadlock_task_pid; | |
c7c16703 | 13687 | @@ -98,6 +99,9 @@ enum rtmutex_chainwalk { |
1a6e0f06 JK |
13688 | /* |
13689 | * PI-futex support (proxy locking functions, etc.): | |
13690 | */ | |
13691 | +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) | |
13692 | +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) | |
13693 | + | |
13694 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | |
13695 | extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
13696 | struct task_struct *proxy_owner); | |
c7c16703 | 13697 | @@ -111,7 +115,8 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, |
1a6e0f06 JK |
13698 | struct rt_mutex_waiter *waiter); |
13699 | extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); | |
13700 | extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, | |
13701 | - struct wake_q_head *wqh); | |
13702 | + struct wake_q_head *wqh, | |
13703 | + struct wake_q_head *wq_sleeper); | |
13704 | extern void rt_mutex_adjust_prio(struct task_struct *task); | |
13705 | ||
13706 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
c7c16703 | 13707 | @@ -120,4 +125,14 @@ extern void rt_mutex_adjust_prio(struct task_struct *task); |
1a6e0f06 JK |
13708 | # include "rtmutex.h" |
13709 | #endif | |
13710 | ||
13711 | +static inline void | |
13712 | +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) | |
13713 | +{ | |
13714 | + debug_rt_mutex_init_waiter(waiter); | |
13715 | + waiter->task = NULL; | |
13716 | + waiter->savestate = savestate; | |
13717 | + RB_CLEAR_NODE(&waiter->pi_tree_entry); | |
13718 | + RB_CLEAR_NODE(&waiter->tree_entry); | |
13719 | +} | |
13720 | + | |
13721 | #endif | |
13722 | diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c | |
13723 | index db3ccb1dd614..909779647bd1 100644 | |
13724 | --- a/kernel/locking/spinlock.c | |
13725 | +++ b/kernel/locking/spinlock.c | |
13726 | @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ | |
13727 | * __[spin|read|write]_lock_bh() | |
13728 | */ | |
13729 | BUILD_LOCK_OPS(spin, raw_spinlock); | |
13730 | + | |
13731 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13732 | BUILD_LOCK_OPS(read, rwlock); | |
13733 | BUILD_LOCK_OPS(write, rwlock); | |
13734 | +#endif | |
13735 | ||
13736 | #endif | |
13737 | ||
13738 | @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) | |
13739 | EXPORT_SYMBOL(_raw_spin_unlock_bh); | |
13740 | #endif | |
13741 | ||
13742 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13743 | + | |
13744 | #ifndef CONFIG_INLINE_READ_TRYLOCK | |
13745 | int __lockfunc _raw_read_trylock(rwlock_t *lock) | |
13746 | { | |
13747 | @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) | |
13748 | EXPORT_SYMBOL(_raw_write_unlock_bh); | |
13749 | #endif | |
13750 | ||
13751 | +#endif /* !PREEMPT_RT_FULL */ | |
13752 | + | |
13753 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13754 | ||
13755 | void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | |
13756 | diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c | |
13757 | index 0374a596cffa..94970338d518 100644 | |
13758 | --- a/kernel/locking/spinlock_debug.c | |
13759 | +++ b/kernel/locking/spinlock_debug.c | |
13760 | @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, | |
13761 | ||
13762 | EXPORT_SYMBOL(__raw_spin_lock_init); | |
13763 | ||
13764 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13765 | void __rwlock_init(rwlock_t *lock, const char *name, | |
13766 | struct lock_class_key *key) | |
13767 | { | |
13768 | @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, | |
13769 | } | |
13770 | ||
13771 | EXPORT_SYMBOL(__rwlock_init); | |
13772 | +#endif | |
13773 | ||
13774 | static void spin_dump(raw_spinlock_t *lock, const char *msg) | |
13775 | { | |
13776 | @@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock) | |
13777 | arch_spin_unlock(&lock->raw_lock); | |
13778 | } | |
13779 | ||
13780 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13781 | static void rwlock_bug(rwlock_t *lock, const char *msg) | |
13782 | { | |
13783 | if (!debug_locks_off()) | |
13784 | @@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock) | |
13785 | debug_write_unlock(lock); | |
13786 | arch_write_unlock(&lock->raw_lock); | |
13787 | } | |
13788 | + | |
13789 | +#endif | |
13790 | diff --git a/kernel/panic.c b/kernel/panic.c | |
c7c16703 | 13791 | index e6480e20379e..7e9c1918a94e 100644 |
1a6e0f06 JK |
13792 | --- a/kernel/panic.c |
13793 | +++ b/kernel/panic.c | |
c7c16703 | 13794 | @@ -482,9 +482,11 @@ static u64 oops_id; |
1a6e0f06 JK |
13795 | |
13796 | static int init_oops_id(void) | |
13797 | { | |
13798 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13799 | if (!oops_id) | |
13800 | get_random_bytes(&oops_id, sizeof(oops_id)); | |
13801 | else | |
13802 | +#endif | |
13803 | oops_id++; | |
13804 | ||
13805 | return 0; | |
13806 | diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c | |
c7c16703 | 13807 | index b26dbc48c75b..968255f27a33 100644 |
1a6e0f06 JK |
13808 | --- a/kernel/power/hibernate.c |
13809 | +++ b/kernel/power/hibernate.c | |
13810 | @@ -286,6 +286,8 @@ static int create_image(int platform_mode) | |
13811 | ||
13812 | local_irq_disable(); | |
13813 | ||
13814 | + system_state = SYSTEM_SUSPEND; | |
13815 | + | |
13816 | error = syscore_suspend(); | |
13817 | if (error) { | |
13818 | printk(KERN_ERR "PM: Some system devices failed to power down, " | |
c7c16703 | 13819 | @@ -317,6 +319,7 @@ static int create_image(int platform_mode) |
1a6e0f06 JK |
13820 | syscore_resume(); |
13821 | ||
13822 | Enable_irqs: | |
13823 | + system_state = SYSTEM_RUNNING; | |
13824 | local_irq_enable(); | |
13825 | ||
13826 | Enable_cpus: | |
c7c16703 | 13827 | @@ -446,6 +449,7 @@ static int resume_target_kernel(bool platform_mode) |
1a6e0f06 JK |
13828 | goto Enable_cpus; |
13829 | ||
13830 | local_irq_disable(); | |
13831 | + system_state = SYSTEM_SUSPEND; | |
13832 | ||
13833 | error = syscore_suspend(); | |
13834 | if (error) | |
c7c16703 | 13835 | @@ -479,6 +483,7 @@ static int resume_target_kernel(bool platform_mode) |
1a6e0f06 JK |
13836 | syscore_resume(); |
13837 | ||
13838 | Enable_irqs: | |
13839 | + system_state = SYSTEM_RUNNING; | |
13840 | local_irq_enable(); | |
13841 | ||
13842 | Enable_cpus: | |
c7c16703 | 13843 | @@ -564,6 +569,7 @@ int hibernation_platform_enter(void) |
1a6e0f06 JK |
13844 | goto Enable_cpus; |
13845 | ||
13846 | local_irq_disable(); | |
13847 | + system_state = SYSTEM_SUSPEND; | |
13848 | syscore_suspend(); | |
13849 | if (pm_wakeup_pending()) { | |
13850 | error = -EAGAIN; | |
c7c16703 | 13851 | @@ -576,6 +582,7 @@ int hibernation_platform_enter(void) |
1a6e0f06 JK |
13852 | |
13853 | Power_up: | |
13854 | syscore_resume(); | |
13855 | + system_state = SYSTEM_RUNNING; | |
13856 | local_irq_enable(); | |
13857 | ||
13858 | Enable_cpus: | |
c7c16703 | 13859 | @@ -676,6 +683,10 @@ static int load_image_and_restore(void) |
1a6e0f06 JK |
13860 | return error; |
13861 | } | |
13862 | ||
13863 | +#ifndef CONFIG_SUSPEND | |
13864 | +bool pm_in_action; | |
13865 | +#endif | |
13866 | + | |
13867 | /** | |
13868 | * hibernate - Carry out system hibernation, including saving the image. | |
13869 | */ | |
c7c16703 | 13870 | @@ -689,6 +700,8 @@ int hibernate(void) |
1a6e0f06 JK |
13871 | return -EPERM; |
13872 | } | |
13873 | ||
13874 | + pm_in_action = true; | |
13875 | + | |
13876 | lock_system_sleep(); | |
13877 | /* The snapshot device should not be opened while we're running */ | |
13878 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
c7c16703 | 13879 | @@ -766,6 +779,7 @@ int hibernate(void) |
1a6e0f06 JK |
13880 | atomic_inc(&snapshot_device_available); |
13881 | Unlock: | |
13882 | unlock_system_sleep(); | |
13883 | + pm_in_action = false; | |
13884 | return error; | |
13885 | } | |
13886 | ||
13887 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c | |
c7c16703 | 13888 | index 6ccb08f57fcb..c8cbb5ed2fe3 100644 |
1a6e0f06 JK |
13889 | --- a/kernel/power/suspend.c |
13890 | +++ b/kernel/power/suspend.c | |
c7c16703 | 13891 | @@ -369,6 +369,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) |
1a6e0f06 JK |
13892 | arch_suspend_disable_irqs(); |
13893 | BUG_ON(!irqs_disabled()); | |
13894 | ||
13895 | + system_state = SYSTEM_SUSPEND; | |
13896 | + | |
13897 | error = syscore_suspend(); | |
13898 | if (!error) { | |
13899 | *wakeup = pm_wakeup_pending(); | |
c7c16703 | 13900 | @@ -385,6 +387,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) |
1a6e0f06 JK |
13901 | syscore_resume(); |
13902 | } | |
13903 | ||
13904 | + system_state = SYSTEM_RUNNING; | |
13905 | + | |
13906 | arch_suspend_enable_irqs(); | |
13907 | BUG_ON(irqs_disabled()); | |
13908 | ||
c7c16703 | 13909 | @@ -527,6 +531,8 @@ static int enter_state(suspend_state_t state) |
1a6e0f06 JK |
13910 | return error; |
13911 | } | |
13912 | ||
13913 | +bool pm_in_action; | |
13914 | + | |
13915 | /** | |
13916 | * pm_suspend - Externally visible function for suspending the system. | |
13917 | * @state: System sleep state to enter. | |
c7c16703 | 13918 | @@ -541,6 +547,8 @@ int pm_suspend(suspend_state_t state) |
1a6e0f06 JK |
13919 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) |
13920 | return -EINVAL; | |
13921 | ||
13922 | + pm_in_action = true; | |
13923 | + | |
13924 | error = enter_state(state); | |
13925 | if (error) { | |
13926 | suspend_stats.fail++; | |
c7c16703 | 13927 | @@ -548,6 +556,7 @@ int pm_suspend(suspend_state_t state) |
1a6e0f06 JK |
13928 | } else { |
13929 | suspend_stats.success++; | |
13930 | } | |
13931 | + pm_in_action = false; | |
13932 | return error; | |
13933 | } | |
13934 | EXPORT_SYMBOL(pm_suspend); | |
13935 | diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c | |
c7c16703 | 13936 | index f7a55e9ff2f7..9277ee033271 100644 |
1a6e0f06 JK |
13937 | --- a/kernel/printk/printk.c |
13938 | +++ b/kernel/printk/printk.c | |
13939 | @@ -351,6 +351,65 @@ __packed __aligned(4) | |
13940 | */ | |
13941 | DEFINE_RAW_SPINLOCK(logbuf_lock); | |
13942 | ||
13943 | +#ifdef CONFIG_EARLY_PRINTK | |
13944 | +struct console *early_console; | |
13945 | + | |
13946 | +static void early_vprintk(const char *fmt, va_list ap) | |
13947 | +{ | |
13948 | + if (early_console) { | |
13949 | + char buf[512]; | |
13950 | + int n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
13951 | + | |
13952 | + early_console->write(early_console, buf, n); | |
13953 | + } | |
13954 | +} | |
13955 | + | |
13956 | +asmlinkage void early_printk(const char *fmt, ...) | |
13957 | +{ | |
13958 | + va_list ap; | |
13959 | + | |
13960 | + va_start(ap, fmt); | |
13961 | + early_vprintk(fmt, ap); | |
13962 | + va_end(ap); | |
13963 | +} | |
13964 | + | |
13965 | +/* | |
13966 | + * This is independent of any log levels - a global | |
13967 | + * kill switch that turns off all of printk. | |
13968 | + * | |
13969 | + * Used by the NMI watchdog if early-printk is enabled. | |
13970 | + */ | |
13971 | +static bool __read_mostly printk_killswitch; | |
13972 | + | |
13973 | +static int __init force_early_printk_setup(char *str) | |
13974 | +{ | |
13975 | + printk_killswitch = true; | |
13976 | + return 0; | |
13977 | +} | |
13978 | +early_param("force_early_printk", force_early_printk_setup); | |
13979 | + | |
13980 | +void printk_kill(void) | |
13981 | +{ | |
13982 | + printk_killswitch = true; | |
13983 | +} | |
13984 | + | |
13985 | +#ifdef CONFIG_PRINTK | |
13986 | +static int forced_early_printk(const char *fmt, va_list ap) | |
13987 | +{ | |
13988 | + if (!printk_killswitch) | |
13989 | + return 0; | |
13990 | + early_vprintk(fmt, ap); | |
13991 | + return 1; | |
13992 | +} | |
13993 | +#endif | |
13994 | + | |
13995 | +#else | |
13996 | +static inline int forced_early_printk(const char *fmt, va_list ap) | |
13997 | +{ | |
13998 | + return 0; | |
13999 | +} | |
14000 | +#endif | |
14001 | + | |
14002 | #ifdef CONFIG_PRINTK | |
14003 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | |
14004 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | |
c7c16703 | 14005 | @@ -1337,6 +1396,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
14006 | { |
14007 | char *text; | |
14008 | int len = 0; | |
14009 | + int attempts = 0; | |
14010 | ||
14011 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); | |
14012 | if (!text) | |
c7c16703 | 14013 | @@ -1348,6 +1408,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
14014 | u64 seq; |
14015 | u32 idx; | |
14016 | enum log_flags prev; | |
14017 | + int num_msg; | |
14018 | +try_again: | |
14019 | + attempts++; | |
14020 | + if (attempts > 10) { | |
14021 | + len = -EBUSY; | |
14022 | + goto out; | |
14023 | + } | |
14024 | + num_msg = 0; | |
14025 | ||
14026 | /* | |
14027 | * Find first record that fits, including all following records, | |
c7c16703 | 14028 | @@ -1363,6 +1431,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
14029 | prev = msg->flags; |
14030 | idx = log_next(idx); | |
14031 | seq++; | |
14032 | + num_msg++; | |
14033 | + if (num_msg > 5) { | |
14034 | + num_msg = 0; | |
14035 | + raw_spin_unlock_irq(&logbuf_lock); | |
14036 | + raw_spin_lock_irq(&logbuf_lock); | |
14037 | + if (clear_seq < log_first_seq) | |
14038 | + goto try_again; | |
14039 | + } | |
14040 | } | |
14041 | ||
14042 | /* move first record forward until length fits into the buffer */ | |
c7c16703 | 14043 | @@ -1376,6 +1452,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
14044 | prev = msg->flags; |
14045 | idx = log_next(idx); | |
14046 | seq++; | |
14047 | + num_msg++; | |
14048 | + if (num_msg > 5) { | |
14049 | + num_msg = 0; | |
14050 | + raw_spin_unlock_irq(&logbuf_lock); | |
14051 | + raw_spin_lock_irq(&logbuf_lock); | |
14052 | + if (clear_seq < log_first_seq) | |
14053 | + goto try_again; | |
14054 | + } | |
14055 | } | |
14056 | ||
14057 | /* last message fitting into this dump */ | |
c7c16703 | 14058 | @@ -1416,6 +1500,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
14059 | clear_seq = log_next_seq; |
14060 | clear_idx = log_next_idx; | |
14061 | } | |
14062 | +out: | |
14063 | raw_spin_unlock_irq(&logbuf_lock); | |
14064 | ||
14065 | kfree(text); | |
c7c16703 | 14066 | @@ -1569,6 +1654,12 @@ static void call_console_drivers(int level, |
1a6e0f06 JK |
14067 | if (!console_drivers) |
14068 | return; | |
14069 | ||
14070 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
14071 | + if (in_irq() || in_nmi()) | |
14072 | + return; | |
14073 | + } | |
14074 | + | |
14075 | + migrate_disable(); | |
14076 | for_each_console(con) { | |
14077 | if (exclusive_console && con != exclusive_console) | |
14078 | continue; | |
c7c16703 | 14079 | @@ -1584,6 +1675,7 @@ static void call_console_drivers(int level, |
1a6e0f06 JK |
14080 | else |
14081 | con->write(con, text, len); | |
14082 | } | |
14083 | + migrate_enable(); | |
14084 | } | |
14085 | ||
14086 | /* | |
c7c16703 | 14087 | @@ -1781,6 +1873,13 @@ asmlinkage int vprintk_emit(int facility, int level, |
1a6e0f06 JK |
14088 | /* cpu currently holding logbuf_lock in this function */ |
14089 | static unsigned int logbuf_cpu = UINT_MAX; | |
14090 | ||
14091 | + /* | |
14092 | + * Fall back to early_printk if a debugging subsystem has | |
14093 | + * killed printk output | |
14094 | + */ | |
14095 | + if (unlikely(forced_early_printk(fmt, args))) | |
14096 | + return 1; | |
14097 | + | |
14098 | if (level == LOGLEVEL_SCHED) { | |
14099 | level = LOGLEVEL_DEFAULT; | |
14100 | in_sched = true; | |
c7c16703 | 14101 | @@ -1885,13 +1984,23 @@ asmlinkage int vprintk_emit(int facility, int level, |
1a6e0f06 JK |
14102 | |
14103 | /* If called from the scheduler, we can not call up(). */ | |
14104 | if (!in_sched) { | |
14105 | + int may_trylock = 1; | |
14106 | + | |
14107 | lockdep_off(); | |
14108 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14109 | + /* | |
14110 | + * we can't take a sleeping lock with IRQs or preeption disabled | |
14111 | + * so we can't print in these contexts | |
14112 | + */ | |
14113 | + if (!(preempt_count() == 0 && !irqs_disabled())) | |
14114 | + may_trylock = 0; | |
14115 | +#endif | |
14116 | /* | |
14117 | * Try to acquire and then immediately release the console | |
14118 | * semaphore. The release will print out buffers and wake up | |
14119 | * /dev/kmsg and syslog() users. | |
14120 | */ | |
14121 | - if (console_trylock()) | |
14122 | + if (may_trylock && console_trylock()) | |
14123 | console_unlock(); | |
14124 | lockdep_on(); | |
14125 | } | |
c7c16703 | 14126 | @@ -2014,26 +2123,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func); |
1a6e0f06 JK |
14127 | |
14128 | #endif /* CONFIG_PRINTK */ | |
14129 | ||
14130 | -#ifdef CONFIG_EARLY_PRINTK | |
14131 | -struct console *early_console; | |
14132 | - | |
14133 | -asmlinkage __visible void early_printk(const char *fmt, ...) | |
14134 | -{ | |
14135 | - va_list ap; | |
14136 | - char buf[512]; | |
14137 | - int n; | |
14138 | - | |
14139 | - if (!early_console) | |
14140 | - return; | |
14141 | - | |
14142 | - va_start(ap, fmt); | |
14143 | - n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
14144 | - va_end(ap); | |
14145 | - | |
14146 | - early_console->write(early_console, buf, n); | |
14147 | -} | |
14148 | -#endif | |
14149 | - | |
14150 | static int __add_preferred_console(char *name, int idx, char *options, | |
14151 | char *brl_options) | |
14152 | { | |
c7c16703 | 14153 | @@ -2303,11 +2392,16 @@ static void console_cont_flush(char *text, size_t size) |
1a6e0f06 JK |
14154 | goto out; |
14155 | ||
14156 | len = cont_print_text(text, size); | |
14157 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14158 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
14159 | + call_console_drivers(cont.level, NULL, 0, text, len); | |
14160 | +#else | |
14161 | raw_spin_unlock(&logbuf_lock); | |
14162 | stop_critical_timings(); | |
14163 | call_console_drivers(cont.level, NULL, 0, text, len); | |
14164 | start_critical_timings(); | |
14165 | local_irq_restore(flags); | |
14166 | +#endif | |
14167 | return; | |
14168 | out: | |
14169 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
c7c16703 | 14170 | @@ -2431,13 +2525,17 @@ void console_unlock(void) |
1a6e0f06 JK |
14171 | console_idx = log_next(console_idx); |
14172 | console_seq++; | |
14173 | console_prev = msg->flags; | |
14174 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14175 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
14176 | + call_console_drivers(level, ext_text, ext_len, text, len); | |
14177 | +#else | |
14178 | raw_spin_unlock(&logbuf_lock); | |
14179 | ||
14180 | stop_critical_timings(); /* don't trace print latency */ | |
14181 | call_console_drivers(level, ext_text, ext_len, text, len); | |
14182 | start_critical_timings(); | |
14183 | local_irq_restore(flags); | |
14184 | - | |
14185 | +#endif | |
14186 | if (do_cond_resched) | |
14187 | cond_resched(); | |
14188 | } | |
c7c16703 | 14189 | @@ -2489,6 +2587,11 @@ void console_unblank(void) |
1a6e0f06 JK |
14190 | { |
14191 | struct console *c; | |
14192 | ||
14193 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
14194 | + if (in_irq() || in_nmi()) | |
14195 | + return; | |
14196 | + } | |
14197 | + | |
14198 | /* | |
14199 | * console_unblank can no longer be called in interrupt context unless | |
14200 | * oops_in_progress is set to 1.. | |
14201 | diff --git a/kernel/ptrace.c b/kernel/ptrace.c | |
c7c16703 | 14202 | index 49ba7c1ade9d..44f44b47ec07 100644 |
1a6e0f06 JK |
14203 | --- a/kernel/ptrace.c |
14204 | +++ b/kernel/ptrace.c | |
c7c16703 | 14205 | @@ -166,7 +166,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) |
1a6e0f06 JK |
14206 | |
14207 | spin_lock_irq(&task->sighand->siglock); | |
14208 | if (task_is_traced(task) && !__fatal_signal_pending(task)) { | |
14209 | - task->state = __TASK_TRACED; | |
14210 | + unsigned long flags; | |
14211 | + | |
14212 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
14213 | + if (task->state & __TASK_TRACED) | |
14214 | + task->state = __TASK_TRACED; | |
14215 | + else | |
14216 | + task->saved_state = __TASK_TRACED; | |
14217 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14218 | ret = true; | |
14219 | } | |
14220 | spin_unlock_irq(&task->sighand->siglock); | |
14221 | diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c | |
c7c16703 | 14222 | index bf08fee53dc7..eeb8ce4ad7b6 100644 |
1a6e0f06 JK |
14223 | --- a/kernel/rcu/rcutorture.c |
14224 | +++ b/kernel/rcu/rcutorture.c | |
14225 | @@ -404,6 +404,7 @@ static struct rcu_torture_ops rcu_ops = { | |
14226 | .name = "rcu" | |
14227 | }; | |
14228 | ||
14229 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14230 | /* | |
14231 | * Definitions for rcu_bh torture testing. | |
14232 | */ | |
14233 | @@ -443,6 +444,12 @@ static struct rcu_torture_ops rcu_bh_ops = { | |
14234 | .name = "rcu_bh" | |
14235 | }; | |
14236 | ||
14237 | +#else | |
14238 | +static struct rcu_torture_ops rcu_bh_ops = { | |
14239 | + .ttype = INVALID_RCU_FLAVOR, | |
14240 | +}; | |
14241 | +#endif | |
14242 | + | |
14243 | /* | |
14244 | * Don't even think about trying any of these in real life!!! | |
14245 | * The names includes "busted", and they really means it! | |
14246 | diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c | |
c7c16703 | 14247 | index 69a5611a7e7c..64d91f306eda 100644 |
1a6e0f06 JK |
14248 | --- a/kernel/rcu/tree.c |
14249 | +++ b/kernel/rcu/tree.c | |
c7c16703 | 14250 | @@ -55,6 +55,11 @@ |
1a6e0f06 JK |
14251 | #include <linux/random.h> |
14252 | #include <linux/trace_events.h> | |
14253 | #include <linux/suspend.h> | |
14254 | +#include <linux/delay.h> | |
14255 | +#include <linux/gfp.h> | |
14256 | +#include <linux/oom.h> | |
14257 | +#include <linux/smpboot.h> | |
14258 | +#include "../time/tick-internal.h" | |
14259 | ||
14260 | #include "tree.h" | |
14261 | #include "rcu.h" | |
c7c16703 | 14262 | @@ -257,6 +262,19 @@ void rcu_sched_qs(void) |
1a6e0f06 JK |
14263 | this_cpu_ptr(&rcu_sched_data), true); |
14264 | } | |
14265 | ||
14266 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14267 | +static void rcu_preempt_qs(void); | |
14268 | + | |
14269 | +void rcu_bh_qs(void) | |
14270 | +{ | |
14271 | + unsigned long flags; | |
14272 | + | |
14273 | + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */ | |
14274 | + local_irq_save(flags); | |
14275 | + rcu_preempt_qs(); | |
14276 | + local_irq_restore(flags); | |
14277 | +} | |
14278 | +#else | |
14279 | void rcu_bh_qs(void) | |
14280 | { | |
14281 | if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { | |
c7c16703 | 14282 | @@ -266,6 +284,7 @@ void rcu_bh_qs(void) |
1a6e0f06 JK |
14283 | __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); |
14284 | } | |
14285 | } | |
14286 | +#endif | |
14287 | ||
14288 | static DEFINE_PER_CPU(int, rcu_sched_qs_mask); | |
14289 | ||
c7c16703 | 14290 | @@ -446,11 +465,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched); |
1a6e0f06 JK |
14291 | /* |
14292 | * Return the number of RCU BH batches started thus far for debug & stats. | |
14293 | */ | |
14294 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14295 | unsigned long rcu_batches_started_bh(void) | |
14296 | { | |
14297 | return rcu_bh_state.gpnum; | |
14298 | } | |
14299 | EXPORT_SYMBOL_GPL(rcu_batches_started_bh); | |
14300 | +#endif | |
14301 | ||
14302 | /* | |
14303 | * Return the number of RCU batches completed thus far for debug & stats. | |
c7c16703 | 14304 | @@ -470,6 +491,7 @@ unsigned long rcu_batches_completed_sched(void) |
1a6e0f06 JK |
14305 | } |
14306 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | |
14307 | ||
14308 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14309 | /* | |
14310 | * Return the number of RCU BH batches completed thus far for debug & stats. | |
14311 | */ | |
c7c16703 | 14312 | @@ -478,6 +500,7 @@ unsigned long rcu_batches_completed_bh(void) |
1a6e0f06 JK |
14313 | return rcu_bh_state.completed; |
14314 | } | |
14315 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | |
14316 | +#endif | |
14317 | ||
14318 | /* | |
14319 | * Return the number of RCU expedited batches completed thus far for | |
c7c16703 | 14320 | @@ -501,6 +524,7 @@ unsigned long rcu_exp_batches_completed_sched(void) |
1a6e0f06 JK |
14321 | } |
14322 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); | |
14323 | ||
14324 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14325 | /* | |
14326 | * Force a quiescent state. | |
14327 | */ | |
c7c16703 | 14328 | @@ -519,6 +543,13 @@ void rcu_bh_force_quiescent_state(void) |
1a6e0f06 JK |
14329 | } |
14330 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | |
14331 | ||
14332 | +#else | |
14333 | +void rcu_force_quiescent_state(void) | |
14334 | +{ | |
14335 | +} | |
14336 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |
14337 | +#endif | |
14338 | + | |
14339 | /* | |
14340 | * Force a quiescent state for RCU-sched. | |
14341 | */ | |
c7c16703 | 14342 | @@ -569,9 +600,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, |
1a6e0f06 JK |
14343 | case RCU_FLAVOR: |
14344 | rsp = rcu_state_p; | |
14345 | break; | |
14346 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14347 | case RCU_BH_FLAVOR: | |
14348 | rsp = &rcu_bh_state; | |
14349 | break; | |
14350 | +#endif | |
14351 | case RCU_SCHED_FLAVOR: | |
14352 | rsp = &rcu_sched_state; | |
14353 | break; | |
14354 | @@ -3013,18 +3046,17 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |
14355 | /* | |
14356 | * Do RCU core processing for the current CPU. | |
14357 | */ | |
c7c16703 JK |
14358 | -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) |
14359 | +static __latent_entropy void rcu_process_callbacks(void) | |
1a6e0f06 JK |
14360 | { |
14361 | struct rcu_state *rsp; | |
14362 | ||
14363 | if (cpu_is_offline(smp_processor_id())) | |
14364 | return; | |
14365 | - trace_rcu_utilization(TPS("Start RCU core")); | |
14366 | for_each_rcu_flavor(rsp) | |
14367 | __rcu_process_callbacks(rsp); | |
14368 | - trace_rcu_utilization(TPS("End RCU core")); | |
14369 | } | |
14370 | ||
14371 | +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
14372 | /* | |
14373 | * Schedule RCU callback invocation. If the specified type of RCU | |
14374 | * does not support RCU priority boosting, just do a direct call, | |
14375 | @@ -3036,19 +3068,106 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |
14376 | { | |
14377 | if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) | |
14378 | return; | |
14379 | - if (likely(!rsp->boost)) { | |
14380 | - rcu_do_batch(rsp, rdp); | |
14381 | - return; | |
14382 | - } | |
14383 | - invoke_rcu_callbacks_kthread(); | |
14384 | + rcu_do_batch(rsp, rdp); | |
14385 | } | |
14386 | ||
14387 | +static void rcu_wake_cond(struct task_struct *t, int status) | |
14388 | +{ | |
14389 | + /* | |
14390 | + * If the thread is yielding, only wake it when this | |
14391 | + * is invoked from idle | |
14392 | + */ | |
14393 | + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) | |
14394 | + wake_up_process(t); | |
14395 | +} | |
14396 | + | |
14397 | +/* | |
14398 | + * Wake up this CPU's rcuc kthread to do RCU core processing. | |
14399 | + */ | |
14400 | static void invoke_rcu_core(void) | |
14401 | { | |
14402 | - if (cpu_online(smp_processor_id())) | |
14403 | - raise_softirq(RCU_SOFTIRQ); | |
14404 | + unsigned long flags; | |
14405 | + struct task_struct *t; | |
14406 | + | |
14407 | + if (!cpu_online(smp_processor_id())) | |
14408 | + return; | |
14409 | + local_irq_save(flags); | |
14410 | + __this_cpu_write(rcu_cpu_has_work, 1); | |
14411 | + t = __this_cpu_read(rcu_cpu_kthread_task); | |
14412 | + if (t != NULL && current != t) | |
14413 | + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); | |
14414 | + local_irq_restore(flags); | |
14415 | } | |
14416 | ||
14417 | +static void rcu_cpu_kthread_park(unsigned int cpu) | |
14418 | +{ | |
14419 | + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
14420 | +} | |
14421 | + | |
14422 | +static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
14423 | +{ | |
14424 | + return __this_cpu_read(rcu_cpu_has_work); | |
14425 | +} | |
14426 | + | |
14427 | +/* | |
14428 | + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
14429 | + * RCU softirq used in flavors and configurations of RCU that do not | |
14430 | + * support RCU priority boosting. | |
14431 | + */ | |
14432 | +static void rcu_cpu_kthread(unsigned int cpu) | |
14433 | +{ | |
14434 | + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
14435 | + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
14436 | + int spincnt; | |
14437 | + | |
14438 | + for (spincnt = 0; spincnt < 10; spincnt++) { | |
14439 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
14440 | + local_bh_disable(); | |
14441 | + *statusp = RCU_KTHREAD_RUNNING; | |
14442 | + this_cpu_inc(rcu_cpu_kthread_loops); | |
14443 | + local_irq_disable(); | |
14444 | + work = *workp; | |
14445 | + *workp = 0; | |
14446 | + local_irq_enable(); | |
14447 | + if (work) | |
14448 | + rcu_process_callbacks(); | |
14449 | + local_bh_enable(); | |
14450 | + if (*workp == 0) { | |
14451 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
14452 | + *statusp = RCU_KTHREAD_WAITING; | |
14453 | + return; | |
14454 | + } | |
14455 | + } | |
14456 | + *statusp = RCU_KTHREAD_YIELDING; | |
14457 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
14458 | + schedule_timeout_interruptible(2); | |
14459 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
14460 | + *statusp = RCU_KTHREAD_WAITING; | |
14461 | +} | |
14462 | + | |
14463 | +static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
14464 | + .store = &rcu_cpu_kthread_task, | |
14465 | + .thread_should_run = rcu_cpu_kthread_should_run, | |
14466 | + .thread_fn = rcu_cpu_kthread, | |
14467 | + .thread_comm = "rcuc/%u", | |
14468 | + .setup = rcu_cpu_kthread_setup, | |
14469 | + .park = rcu_cpu_kthread_park, | |
14470 | +}; | |
14471 | + | |
14472 | +/* | |
14473 | + * Spawn per-CPU RCU core processing kthreads. | |
14474 | + */ | |
14475 | +static int __init rcu_spawn_core_kthreads(void) | |
14476 | +{ | |
14477 | + int cpu; | |
14478 | + | |
14479 | + for_each_possible_cpu(cpu) | |
14480 | + per_cpu(rcu_cpu_has_work, cpu) = 0; | |
14481 | + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
14482 | + return 0; | |
14483 | +} | |
14484 | +early_initcall(rcu_spawn_core_kthreads); | |
14485 | + | |
14486 | /* | |
14487 | * Handle any core-RCU processing required by a call_rcu() invocation. | |
14488 | */ | |
14489 | @@ -3192,6 +3311,7 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) | |
14490 | } | |
14491 | EXPORT_SYMBOL_GPL(call_rcu_sched); | |
14492 | ||
14493 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14494 | /* | |
14495 | * Queue an RCU callback for invocation after a quicker grace period. | |
14496 | */ | |
14497 | @@ -3200,6 +3320,7 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) | |
14498 | __call_rcu(head, func, &rcu_bh_state, -1, 0); | |
14499 | } | |
14500 | EXPORT_SYMBOL_GPL(call_rcu_bh); | |
14501 | +#endif | |
14502 | ||
14503 | /* | |
14504 | * Queue an RCU callback for lazy invocation after a grace period. | |
14505 | @@ -3291,6 +3412,7 @@ void synchronize_sched(void) | |
14506 | } | |
14507 | EXPORT_SYMBOL_GPL(synchronize_sched); | |
14508 | ||
14509 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14510 | /** | |
14511 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | |
14512 | * | |
14513 | @@ -3317,6 +3439,7 @@ void synchronize_rcu_bh(void) | |
14514 | wait_rcu_gp(call_rcu_bh); | |
14515 | } | |
14516 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | |
14517 | +#endif | |
14518 | ||
14519 | /** | |
14520 | * get_state_synchronize_rcu - Snapshot current RCU state | |
14521 | @@ -3695,6 +3818,7 @@ static void _rcu_barrier(struct rcu_state *rsp) | |
14522 | mutex_unlock(&rsp->barrier_mutex); | |
14523 | } | |
14524 | ||
14525 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14526 | /** | |
14527 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | |
14528 | */ | |
14529 | @@ -3703,6 +3827,7 @@ void rcu_barrier_bh(void) | |
14530 | _rcu_barrier(&rcu_bh_state); | |
14531 | } | |
14532 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |
14533 | +#endif | |
14534 | ||
14535 | /** | |
14536 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | |
c7c16703 | 14537 | @@ -4220,12 +4345,13 @@ void __init rcu_init(void) |
1a6e0f06 JK |
14538 | |
14539 | rcu_bootup_announce(); | |
14540 | rcu_init_geometry(); | |
14541 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14542 | rcu_init_one(&rcu_bh_state); | |
14543 | +#endif | |
14544 | rcu_init_one(&rcu_sched_state); | |
14545 | if (dump_tree) | |
14546 | rcu_dump_rcu_node_tree(&rcu_sched_state); | |
14547 | __rcu_init_preempt(); | |
14548 | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
14549 | ||
14550 | /* | |
14551 | * We don't need protection against CPU-hotplug here because | |
14552 | diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h | |
c7c16703 | 14553 | index e99a5234d9ed..958ac107062c 100644 |
1a6e0f06 JK |
14554 | --- a/kernel/rcu/tree.h |
14555 | +++ b/kernel/rcu/tree.h | |
c7c16703 | 14556 | @@ -588,18 +588,18 @@ extern struct list_head rcu_struct_flavors; |
1a6e0f06 JK |
14557 | */ |
14558 | extern struct rcu_state rcu_sched_state; | |
14559 | ||
14560 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14561 | extern struct rcu_state rcu_bh_state; | |
14562 | +#endif | |
14563 | ||
14564 | #ifdef CONFIG_PREEMPT_RCU | |
14565 | extern struct rcu_state rcu_preempt_state; | |
14566 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | |
14567 | ||
14568 | -#ifdef CONFIG_RCU_BOOST | |
14569 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
14570 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | |
14571 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
14572 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | |
14573 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
14574 | ||
14575 | #ifndef RCU_TREE_NONCORE | |
14576 | ||
c7c16703 | 14577 | @@ -619,10 +619,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); |
1a6e0f06 JK |
14578 | static void __init __rcu_init_preempt(void); |
14579 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | |
14580 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | |
14581 | -static void invoke_rcu_callbacks_kthread(void); | |
14582 | static bool rcu_is_callbacks_kthread(void); | |
14583 | +static void rcu_cpu_kthread_setup(unsigned int cpu); | |
14584 | #ifdef CONFIG_RCU_BOOST | |
14585 | -static void rcu_preempt_do_callbacks(void); | |
14586 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
14587 | struct rcu_node *rnp); | |
14588 | #endif /* #ifdef CONFIG_RCU_BOOST */ | |
14589 | diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h | |
c7c16703 | 14590 | index 85c5a883c6e3..dbbda005c1f9 100644 |
1a6e0f06 JK |
14591 | --- a/kernel/rcu/tree_plugin.h |
14592 | +++ b/kernel/rcu/tree_plugin.h | |
14593 | @@ -24,25 +24,10 @@ | |
14594 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | |
14595 | */ | |
14596 | ||
14597 | -#include <linux/delay.h> | |
14598 | -#include <linux/gfp.h> | |
14599 | -#include <linux/oom.h> | |
14600 | -#include <linux/smpboot.h> | |
14601 | -#include "../time/tick-internal.h" | |
14602 | - | |
14603 | #ifdef CONFIG_RCU_BOOST | |
14604 | ||
14605 | #include "../locking/rtmutex_common.h" | |
14606 | ||
14607 | -/* | |
14608 | - * Control variables for per-CPU and per-rcu_node kthreads. These | |
14609 | - * handle all flavors of RCU. | |
14610 | - */ | |
14611 | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
14612 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
14613 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
14614 | -DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
14615 | - | |
14616 | #else /* #ifdef CONFIG_RCU_BOOST */ | |
14617 | ||
14618 | /* | |
14619 | @@ -55,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
14620 | ||
14621 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
14622 | ||
14623 | +/* | |
14624 | + * Control variables for per-CPU and per-rcu_node kthreads. These | |
14625 | + * handle all flavors of RCU. | |
14626 | + */ | |
14627 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
14628 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
14629 | +DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
14630 | + | |
14631 | #ifdef CONFIG_RCU_NOCB_CPU | |
14632 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | |
14633 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ | |
14634 | @@ -426,7 +419,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |
14635 | } | |
14636 | ||
14637 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | |
14638 | - if (in_irq() || in_serving_softirq()) { | |
14639 | + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { | |
14640 | lockdep_rcu_suspicious(__FILE__, __LINE__, | |
14641 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | |
14642 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", | |
14643 | @@ -632,15 +625,6 @@ static void rcu_preempt_check_callbacks(void) | |
14644 | t->rcu_read_unlock_special.b.need_qs = true; | |
14645 | } | |
14646 | ||
14647 | -#ifdef CONFIG_RCU_BOOST | |
14648 | - | |
14649 | -static void rcu_preempt_do_callbacks(void) | |
14650 | -{ | |
14651 | - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); | |
14652 | -} | |
14653 | - | |
14654 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
14655 | - | |
14656 | /* | |
14657 | * Queue a preemptible-RCU callback for invocation after a grace period. | |
14658 | */ | |
14659 | @@ -829,6 +813,19 @@ void exit_rcu(void) | |
14660 | ||
14661 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
14662 | ||
14663 | +/* | |
14664 | + * If boosting, set rcuc kthreads to realtime priority. | |
14665 | + */ | |
14666 | +static void rcu_cpu_kthread_setup(unsigned int cpu) | |
14667 | +{ | |
14668 | +#ifdef CONFIG_RCU_BOOST | |
14669 | + struct sched_param sp; | |
14670 | + | |
14671 | + sp.sched_priority = kthread_prio; | |
14672 | + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
14673 | +#endif /* #ifdef CONFIG_RCU_BOOST */ | |
14674 | +} | |
14675 | + | |
14676 | #ifdef CONFIG_RCU_BOOST | |
14677 | ||
14678 | #include "../locking/rtmutex_common.h" | |
14679 | @@ -860,16 +857,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |
14680 | ||
14681 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | |
14682 | ||
14683 | -static void rcu_wake_cond(struct task_struct *t, int status) | |
14684 | -{ | |
14685 | - /* | |
14686 | - * If the thread is yielding, only wake it when this | |
14687 | - * is invoked from idle | |
14688 | - */ | |
14689 | - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) | |
14690 | - wake_up_process(t); | |
14691 | -} | |
14692 | - | |
14693 | /* | |
14694 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | |
14695 | * or ->boost_tasks, advancing the pointer to the next task in the | |
14696 | @@ -1013,23 +1000,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
14697 | } | |
14698 | ||
14699 | /* | |
14700 | - * Wake up the per-CPU kthread to invoke RCU callbacks. | |
14701 | - */ | |
14702 | -static void invoke_rcu_callbacks_kthread(void) | |
14703 | -{ | |
14704 | - unsigned long flags; | |
14705 | - | |
14706 | - local_irq_save(flags); | |
14707 | - __this_cpu_write(rcu_cpu_has_work, 1); | |
14708 | - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | |
14709 | - current != __this_cpu_read(rcu_cpu_kthread_task)) { | |
14710 | - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), | |
14711 | - __this_cpu_read(rcu_cpu_kthread_status)); | |
14712 | - } | |
14713 | - local_irq_restore(flags); | |
14714 | -} | |
14715 | - | |
14716 | -/* | |
14717 | * Is the current CPU running the RCU-callbacks kthread? | |
14718 | * Caller must have preemption disabled. | |
14719 | */ | |
14720 | @@ -1083,67 +1053,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
14721 | return 0; | |
14722 | } | |
14723 | ||
14724 | -static void rcu_kthread_do_work(void) | |
14725 | -{ | |
14726 | - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); | |
14727 | - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); | |
14728 | - rcu_preempt_do_callbacks(); | |
14729 | -} | |
14730 | - | |
14731 | -static void rcu_cpu_kthread_setup(unsigned int cpu) | |
14732 | -{ | |
14733 | - struct sched_param sp; | |
14734 | - | |
14735 | - sp.sched_priority = kthread_prio; | |
14736 | - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
14737 | -} | |
14738 | - | |
14739 | -static void rcu_cpu_kthread_park(unsigned int cpu) | |
14740 | -{ | |
14741 | - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
14742 | -} | |
14743 | - | |
14744 | -static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
14745 | -{ | |
14746 | - return __this_cpu_read(rcu_cpu_has_work); | |
14747 | -} | |
14748 | - | |
14749 | -/* | |
14750 | - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
14751 | - * RCU softirq used in flavors and configurations of RCU that do not | |
14752 | - * support RCU priority boosting. | |
14753 | - */ | |
14754 | -static void rcu_cpu_kthread(unsigned int cpu) | |
14755 | -{ | |
14756 | - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
14757 | - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
14758 | - int spincnt; | |
14759 | - | |
14760 | - for (spincnt = 0; spincnt < 10; spincnt++) { | |
14761 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
14762 | - local_bh_disable(); | |
14763 | - *statusp = RCU_KTHREAD_RUNNING; | |
14764 | - this_cpu_inc(rcu_cpu_kthread_loops); | |
14765 | - local_irq_disable(); | |
14766 | - work = *workp; | |
14767 | - *workp = 0; | |
14768 | - local_irq_enable(); | |
14769 | - if (work) | |
14770 | - rcu_kthread_do_work(); | |
14771 | - local_bh_enable(); | |
14772 | - if (*workp == 0) { | |
14773 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
14774 | - *statusp = RCU_KTHREAD_WAITING; | |
14775 | - return; | |
14776 | - } | |
14777 | - } | |
14778 | - *statusp = RCU_KTHREAD_YIELDING; | |
14779 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
14780 | - schedule_timeout_interruptible(2); | |
14781 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
14782 | - *statusp = RCU_KTHREAD_WAITING; | |
14783 | -} | |
14784 | - | |
14785 | /* | |
14786 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | |
14787 | * served by the rcu_node in question. The CPU hotplug lock is still | |
14788 | @@ -1174,26 +1083,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | |
14789 | free_cpumask_var(cm); | |
14790 | } | |
14791 | ||
14792 | -static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
14793 | - .store = &rcu_cpu_kthread_task, | |
14794 | - .thread_should_run = rcu_cpu_kthread_should_run, | |
14795 | - .thread_fn = rcu_cpu_kthread, | |
14796 | - .thread_comm = "rcuc/%u", | |
14797 | - .setup = rcu_cpu_kthread_setup, | |
14798 | - .park = rcu_cpu_kthread_park, | |
14799 | -}; | |
14800 | - | |
14801 | /* | |
14802 | * Spawn boost kthreads -- called as soon as the scheduler is running. | |
14803 | */ | |
14804 | static void __init rcu_spawn_boost_kthreads(void) | |
14805 | { | |
14806 | struct rcu_node *rnp; | |
14807 | - int cpu; | |
14808 | - | |
14809 | - for_each_possible_cpu(cpu) | |
14810 | - per_cpu(rcu_cpu_has_work, cpu) = 0; | |
14811 | - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
14812 | rcu_for_each_leaf_node(rcu_state_p, rnp) | |
14813 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | |
14814 | } | |
14815 | @@ -1216,11 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
14816 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | |
14817 | } | |
14818 | ||
14819 | -static void invoke_rcu_callbacks_kthread(void) | |
14820 | -{ | |
14821 | - WARN_ON_ONCE(1); | |
14822 | -} | |
14823 | - | |
14824 | static bool rcu_is_callbacks_kthread(void) | |
14825 | { | |
14826 | return false; | |
14827 | @@ -1244,7 +1134,7 @@ static void rcu_prepare_kthreads(int cpu) | |
14828 | ||
14829 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
14830 | ||
14831 | -#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
14832 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) | |
14833 | ||
14834 | /* | |
14835 | * Check to see if any future RCU-related work will need to be done | |
14836 | @@ -1261,7 +1151,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
14837 | return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) | |
14838 | ? 0 : rcu_cpu_has_callbacks(NULL); | |
14839 | } | |
14840 | +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ | |
14841 | ||
14842 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
14843 | /* | |
14844 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | |
14845 | * after it. | |
14846 | @@ -1357,6 +1249,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |
14847 | return cbs_ready; | |
14848 | } | |
14849 | ||
14850 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14851 | + | |
14852 | /* | |
14853 | * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | |
14854 | * to invoke. If the CPU has callbacks, try to advance them. Tell the | |
14855 | @@ -1402,6 +1296,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
14856 | *nextevt = basemono + dj * TICK_NSEC; | |
14857 | return 0; | |
14858 | } | |
14859 | +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */ | |
14860 | ||
14861 | /* | |
14862 | * Prepare a CPU for idle from an RCU perspective. The first major task | |
14863 | diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c | |
c7c16703 | 14864 | index f19271dce0a9..6b5ab88b6103 100644 |
1a6e0f06 JK |
14865 | --- a/kernel/rcu/update.c |
14866 | +++ b/kernel/rcu/update.c | |
c7c16703 JK |
14867 | @@ -62,7 +62,7 @@ |
14868 | #ifndef CONFIG_TINY_RCU | |
14869 | module_param(rcu_expedited, int, 0); | |
14870 | module_param(rcu_normal, int, 0); | |
14871 | -static int rcu_normal_after_boot; | |
14872 | +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
14873 | module_param(rcu_normal_after_boot, int, 0); | |
14874 | #endif /* #ifndef CONFIG_TINY_RCU */ | |
14875 | ||
14876 | @@ -129,8 +129,7 @@ bool rcu_gp_is_normal(void) | |
14877 | } | |
14878 | EXPORT_SYMBOL_GPL(rcu_gp_is_normal); | |
14879 | ||
14880 | -static atomic_t rcu_expedited_nesting = | |
14881 | - ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); | |
14882 | +static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1); | |
14883 | ||
14884 | /* | |
14885 | * Should normal grace-period primitives be expedited? Intended for | |
14886 | @@ -178,8 +177,7 @@ EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); | |
14887 | */ | |
14888 | void rcu_end_inkernel_boot(void) | |
14889 | { | |
14890 | - if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) | |
14891 | - rcu_unexpedite_gp(); | |
14892 | + rcu_unexpedite_gp(); | |
14893 | if (rcu_normal_after_boot) | |
14894 | WRITE_ONCE(rcu_normal, 1); | |
14895 | } | |
14896 | @@ -294,6 +292,7 @@ int rcu_read_lock_held(void) | |
1a6e0f06 JK |
14897 | } |
14898 | EXPORT_SYMBOL_GPL(rcu_read_lock_held); | |
14899 | ||
14900 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14901 | /** | |
14902 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | |
14903 | * | |
c7c16703 | 14904 | @@ -320,6 +319,7 @@ int rcu_read_lock_bh_held(void) |
1a6e0f06 JK |
14905 | return in_softirq() || irqs_disabled(); |
14906 | } | |
14907 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |
14908 | +#endif | |
14909 | ||
14910 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | |
14911 | ||
1a6e0f06 JK |
14912 | diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile |
14913 | index 5e59b832ae2b..7337a7f60e3f 100644 | |
14914 | --- a/kernel/sched/Makefile | |
14915 | +++ b/kernel/sched/Makefile | |
14916 | @@ -17,7 +17,7 @@ endif | |
14917 | ||
14918 | obj-y += core.o loadavg.o clock.o cputime.o | |
14919 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o | |
14920 | -obj-y += wait.o swait.o completion.o idle.o | |
14921 | +obj-y += wait.o swait.o swork.o completion.o idle.o | |
14922 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o | |
14923 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | |
14924 | obj-$(CONFIG_SCHEDSTATS) += stats.o | |
14925 | diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c | |
14926 | index 8d0f35debf35..b62cf6400fe0 100644 | |
14927 | --- a/kernel/sched/completion.c | |
14928 | +++ b/kernel/sched/completion.c | |
14929 | @@ -30,10 +30,10 @@ void complete(struct completion *x) | |
14930 | { | |
14931 | unsigned long flags; | |
14932 | ||
14933 | - spin_lock_irqsave(&x->wait.lock, flags); | |
14934 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
14935 | x->done++; | |
14936 | - __wake_up_locked(&x->wait, TASK_NORMAL, 1); | |
14937 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
14938 | + swake_up_locked(&x->wait); | |
14939 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
14940 | } | |
14941 | EXPORT_SYMBOL(complete); | |
14942 | ||
14943 | @@ -50,10 +50,10 @@ void complete_all(struct completion *x) | |
14944 | { | |
14945 | unsigned long flags; | |
14946 | ||
14947 | - spin_lock_irqsave(&x->wait.lock, flags); | |
14948 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
14949 | x->done += UINT_MAX/2; | |
14950 | - __wake_up_locked(&x->wait, TASK_NORMAL, 0); | |
14951 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
14952 | + swake_up_all_locked(&x->wait); | |
14953 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
14954 | } | |
14955 | EXPORT_SYMBOL(complete_all); | |
14956 | ||
14957 | @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x, | |
14958 | long (*action)(long), long timeout, int state) | |
14959 | { | |
14960 | if (!x->done) { | |
14961 | - DECLARE_WAITQUEUE(wait, current); | |
14962 | + DECLARE_SWAITQUEUE(wait); | |
14963 | ||
14964 | - __add_wait_queue_tail_exclusive(&x->wait, &wait); | |
14965 | + __prepare_to_swait(&x->wait, &wait); | |
14966 | do { | |
14967 | if (signal_pending_state(state, current)) { | |
14968 | timeout = -ERESTARTSYS; | |
14969 | break; | |
14970 | } | |
14971 | __set_current_state(state); | |
14972 | - spin_unlock_irq(&x->wait.lock); | |
14973 | + raw_spin_unlock_irq(&x->wait.lock); | |
14974 | timeout = action(timeout); | |
14975 | - spin_lock_irq(&x->wait.lock); | |
14976 | + raw_spin_lock_irq(&x->wait.lock); | |
14977 | } while (!x->done && timeout); | |
14978 | - __remove_wait_queue(&x->wait, &wait); | |
14979 | + __finish_swait(&x->wait, &wait); | |
14980 | if (!x->done) | |
14981 | return timeout; | |
14982 | } | |
14983 | @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x, | |
14984 | { | |
14985 | might_sleep(); | |
14986 | ||
14987 | - spin_lock_irq(&x->wait.lock); | |
14988 | + raw_spin_lock_irq(&x->wait.lock); | |
14989 | timeout = do_wait_for_common(x, action, timeout, state); | |
14990 | - spin_unlock_irq(&x->wait.lock); | |
14991 | + raw_spin_unlock_irq(&x->wait.lock); | |
14992 | return timeout; | |
14993 | } | |
14994 | ||
14995 | @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x) | |
14996 | if (!READ_ONCE(x->done)) | |
14997 | return 0; | |
14998 | ||
14999 | - spin_lock_irqsave(&x->wait.lock, flags); | |
15000 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
15001 | if (!x->done) | |
15002 | ret = 0; | |
15003 | else | |
15004 | x->done--; | |
15005 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
15006 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
15007 | return ret; | |
15008 | } | |
15009 | EXPORT_SYMBOL(try_wait_for_completion); | |
15010 | @@ -311,7 +311,7 @@ bool completion_done(struct completion *x) | |
15011 | * after it's acquired the lock. | |
15012 | */ | |
15013 | smp_rmb(); | |
15014 | - spin_unlock_wait(&x->wait.lock); | |
15015 | + raw_spin_unlock_wait(&x->wait.lock); | |
15016 | return true; | |
15017 | } | |
15018 | EXPORT_SYMBOL(completion_done); | |
15019 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c | |
c7c16703 | 15020 | index 154fd689fe02..a6aa5801b21e 100644 |
1a6e0f06 JK |
15021 | --- a/kernel/sched/core.c |
15022 | +++ b/kernel/sched/core.c | |
15023 | @@ -129,7 +129,11 @@ const_debug unsigned int sysctl_sched_features = | |
15024 | * Number of tasks to iterate in a single balance run. | |
15025 | * Limited because this is done with IRQs disabled. | |
15026 | */ | |
15027 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15028 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | |
15029 | +#else | |
15030 | +const_debug unsigned int sysctl_sched_nr_migrate = 8; | |
15031 | +#endif | |
15032 | ||
15033 | /* | |
15034 | * period over which we average the RT time consumption, measured | |
15035 | @@ -345,6 +349,7 @@ static void init_rq_hrtick(struct rq *rq) | |
15036 | ||
15037 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
15038 | rq->hrtick_timer.function = hrtick; | |
15039 | + rq->hrtick_timer.irqsafe = 1; | |
15040 | } | |
15041 | #else /* CONFIG_SCHED_HRTICK */ | |
15042 | static inline void hrtick_clear(struct rq *rq) | |
15043 | @@ -449,7 +454,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) | |
15044 | head->lastp = &node->next; | |
15045 | } | |
15046 | ||
15047 | -void wake_up_q(struct wake_q_head *head) | |
15048 | +void __wake_up_q(struct wake_q_head *head, bool sleeper) | |
15049 | { | |
15050 | struct wake_q_node *node = head->first; | |
15051 | ||
15052 | @@ -466,7 +471,10 @@ void wake_up_q(struct wake_q_head *head) | |
15053 | * wake_up_process() implies a wmb() to pair with the queueing | |
15054 | * in wake_q_add() so as not to miss wakeups. | |
15055 | */ | |
15056 | - wake_up_process(task); | |
15057 | + if (sleeper) | |
15058 | + wake_up_lock_sleeper(task); | |
15059 | + else | |
15060 | + wake_up_process(task); | |
15061 | put_task_struct(task); | |
15062 | } | |
15063 | } | |
15064 | @@ -502,6 +510,38 @@ void resched_curr(struct rq *rq) | |
15065 | trace_sched_wake_idle_without_ipi(cpu); | |
15066 | } | |
15067 | ||
15068 | +#ifdef CONFIG_PREEMPT_LAZY | |
15069 | +void resched_curr_lazy(struct rq *rq) | |
15070 | +{ | |
15071 | + struct task_struct *curr = rq->curr; | |
15072 | + int cpu; | |
15073 | + | |
15074 | + if (!sched_feat(PREEMPT_LAZY)) { | |
15075 | + resched_curr(rq); | |
15076 | + return; | |
15077 | + } | |
15078 | + | |
15079 | + lockdep_assert_held(&rq->lock); | |
15080 | + | |
15081 | + if (test_tsk_need_resched(curr)) | |
15082 | + return; | |
15083 | + | |
15084 | + if (test_tsk_need_resched_lazy(curr)) | |
15085 | + return; | |
15086 | + | |
15087 | + set_tsk_need_resched_lazy(curr); | |
15088 | + | |
15089 | + cpu = cpu_of(rq); | |
15090 | + if (cpu == smp_processor_id()) | |
15091 | + return; | |
15092 | + | |
15093 | + /* NEED_RESCHED_LAZY must be visible before we test polling */ | |
15094 | + smp_mb(); | |
15095 | + if (!tsk_is_polling(curr)) | |
15096 | + smp_send_reschedule(cpu); | |
15097 | +} | |
15098 | +#endif | |
15099 | + | |
15100 | void resched_cpu(int cpu) | |
15101 | { | |
15102 | struct rq *rq = cpu_rq(cpu); | |
15103 | @@ -525,11 +565,14 @@ void resched_cpu(int cpu) | |
15104 | */ | |
15105 | int get_nohz_timer_target(void) | |
15106 | { | |
15107 | - int i, cpu = smp_processor_id(); | |
15108 | + int i, cpu; | |
15109 | struct sched_domain *sd; | |
15110 | ||
15111 | + preempt_disable_rt(); | |
15112 | + cpu = smp_processor_id(); | |
15113 | + | |
15114 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) | |
15115 | - return cpu; | |
15116 | + goto preempt_en_rt; | |
15117 | ||
15118 | rcu_read_lock(); | |
15119 | for_each_domain(cpu, sd) { | |
15120 | @@ -548,6 +591,8 @@ int get_nohz_timer_target(void) | |
15121 | cpu = housekeeping_any_cpu(); | |
15122 | unlock: | |
15123 | rcu_read_unlock(); | |
15124 | +preempt_en_rt: | |
15125 | + preempt_enable_rt(); | |
15126 | return cpu; | |
15127 | } | |
15128 | /* | |
c7c16703 | 15129 | @@ -1100,6 +1145,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
1a6e0f06 JK |
15130 | |
15131 | lockdep_assert_held(&p->pi_lock); | |
15132 | ||
15133 | + if (__migrate_disabled(p)) { | |
15134 | + cpumask_copy(&p->cpus_allowed, new_mask); | |
15135 | + return; | |
15136 | + } | |
15137 | + | |
15138 | queued = task_on_rq_queued(p); | |
15139 | running = task_current(rq, p); | |
15140 | ||
c7c16703 JK |
15141 | @@ -1122,6 +1172,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
15142 | set_curr_task(rq, p); | |
1a6e0f06 JK |
15143 | } |
15144 | ||
15145 | +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); | |
15146 | +static DEFINE_MUTEX(sched_down_mutex); | |
15147 | +static cpumask_t sched_down_cpumask; | |
15148 | + | |
15149 | +void tell_sched_cpu_down_begin(int cpu) | |
15150 | +{ | |
15151 | + mutex_lock(&sched_down_mutex); | |
15152 | + cpumask_set_cpu(cpu, &sched_down_cpumask); | |
15153 | + mutex_unlock(&sched_down_mutex); | |
15154 | +} | |
15155 | + | |
15156 | +void tell_sched_cpu_down_done(int cpu) | |
15157 | +{ | |
15158 | + mutex_lock(&sched_down_mutex); | |
15159 | + cpumask_clear_cpu(cpu, &sched_down_cpumask); | |
15160 | + mutex_unlock(&sched_down_mutex); | |
15161 | +} | |
15162 | + | |
15163 | +/** | |
15164 | + * migrate_me - try to move the current task off this cpu | |
15165 | + * | |
15166 | + * Used by the pin_current_cpu() code to try to get tasks | |
15167 | + * to move off the current CPU as it is going down. | |
15168 | + * It will only move the task if the task isn't pinned to | |
15169 | + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY) | |
15170 | + * and the task has to be in a RUNNING state. Otherwise the | |
15171 | + * movement of the task will wake it up (change its state | |
15172 | + * to running) when the task did not expect it. | |
15173 | + * | |
15174 | + * Returns 1 if it succeeded in moving the current task | |
15175 | + * 0 otherwise. | |
15176 | + */ | |
15177 | +int migrate_me(void) | |
15178 | +{ | |
15179 | + struct task_struct *p = current; | |
15180 | + struct migration_arg arg; | |
15181 | + struct cpumask *cpumask; | |
15182 | + struct cpumask *mask; | |
15183 | + unsigned int dest_cpu; | |
15184 | + struct rq_flags rf; | |
15185 | + struct rq *rq; | |
15186 | + | |
15187 | + /* | |
15188 | + * We can not migrate tasks bounded to a CPU or tasks not | |
15189 | + * running. The movement of the task will wake it up. | |
15190 | + */ | |
15191 | + if (p->flags & PF_NO_SETAFFINITY || p->state) | |
15192 | + return 0; | |
15193 | + | |
15194 | + mutex_lock(&sched_down_mutex); | |
15195 | + rq = task_rq_lock(p, &rf); | |
15196 | + | |
15197 | + cpumask = this_cpu_ptr(&sched_cpumasks); | |
15198 | + mask = &p->cpus_allowed; | |
15199 | + | |
15200 | + cpumask_andnot(cpumask, mask, &sched_down_cpumask); | |
15201 | + | |
15202 | + if (!cpumask_weight(cpumask)) { | |
15203 | + /* It's only on this CPU? */ | |
15204 | + task_rq_unlock(rq, p, &rf); | |
15205 | + mutex_unlock(&sched_down_mutex); | |
15206 | + return 0; | |
15207 | + } | |
15208 | + | |
15209 | + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask); | |
15210 | + | |
15211 | + arg.task = p; | |
15212 | + arg.dest_cpu = dest_cpu; | |
15213 | + | |
15214 | + task_rq_unlock(rq, p, &rf); | |
15215 | + | |
15216 | + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | |
15217 | + tlb_migrate_finish(p->mm); | |
15218 | + mutex_unlock(&sched_down_mutex); | |
15219 | + | |
15220 | + return 1; | |
15221 | +} | |
15222 | + | |
15223 | /* | |
15224 | * Change a given task's CPU affinity. Migrate the thread to a | |
15225 | * proper CPU and schedule it away if the CPU it's executing on | |
c7c16703 | 15226 | @@ -1179,7 +1307,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, |
1a6e0f06 JK |
15227 | } |
15228 | ||
15229 | /* Can the task run on the task's current CPU? If so, we're done */ | |
15230 | - if (cpumask_test_cpu(task_cpu(p), new_mask)) | |
15231 | + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) | |
15232 | goto out; | |
15233 | ||
15234 | dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); | |
c7c16703 | 15235 | @@ -1366,6 +1494,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) |
1a6e0f06 JK |
15236 | return ret; |
15237 | } | |
15238 | ||
15239 | +static bool check_task_state(struct task_struct *p, long match_state) | |
15240 | +{ | |
15241 | + bool match = false; | |
15242 | + | |
15243 | + raw_spin_lock_irq(&p->pi_lock); | |
15244 | + if (p->state == match_state || p->saved_state == match_state) | |
15245 | + match = true; | |
15246 | + raw_spin_unlock_irq(&p->pi_lock); | |
15247 | + | |
15248 | + return match; | |
15249 | +} | |
15250 | + | |
15251 | /* | |
15252 | * wait_task_inactive - wait for a thread to unschedule. | |
15253 | * | |
c7c16703 | 15254 | @@ -1410,7 +1550,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) |
1a6e0f06 JK |
15255 | * is actually now running somewhere else! |
15256 | */ | |
15257 | while (task_running(rq, p)) { | |
15258 | - if (match_state && unlikely(p->state != match_state)) | |
15259 | + if (match_state && !check_task_state(p, match_state)) | |
15260 | return 0; | |
15261 | cpu_relax(); | |
15262 | } | |
c7c16703 | 15263 | @@ -1425,7 +1565,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) |
1a6e0f06 JK |
15264 | running = task_running(rq, p); |
15265 | queued = task_on_rq_queued(p); | |
15266 | ncsw = 0; | |
15267 | - if (!match_state || p->state == match_state) | |
15268 | + if (!match_state || p->state == match_state || | |
15269 | + p->saved_state == match_state) | |
15270 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | |
15271 | task_rq_unlock(rq, p, &rf); | |
15272 | ||
c7c16703 | 15273 | @@ -1680,10 +1821,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl |
1a6e0f06 JK |
15274 | { |
15275 | activate_task(rq, p, en_flags); | |
15276 | p->on_rq = TASK_ON_RQ_QUEUED; | |
15277 | - | |
15278 | - /* if a worker is waking up, notify workqueue */ | |
15279 | - if (p->flags & PF_WQ_WORKER) | |
15280 | - wq_worker_waking_up(p, cpu_of(rq)); | |
15281 | } | |
15282 | ||
15283 | /* | |
c7c16703 | 15284 | @@ -2018,8 +2155,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
1a6e0f06 JK |
15285 | */ |
15286 | smp_mb__before_spinlock(); | |
15287 | raw_spin_lock_irqsave(&p->pi_lock, flags); | |
15288 | - if (!(p->state & state)) | |
15289 | + if (!(p->state & state)) { | |
15290 | + /* | |
15291 | + * The task might be running due to a spinlock sleeper | |
15292 | + * wakeup. Check the saved state and set it to running | |
15293 | + * if the wakeup condition is true. | |
15294 | + */ | |
15295 | + if (!(wake_flags & WF_LOCK_SLEEPER)) { | |
15296 | + if (p->saved_state & state) { | |
15297 | + p->saved_state = TASK_RUNNING; | |
15298 | + success = 1; | |
15299 | + } | |
15300 | + } | |
15301 | goto out; | |
15302 | + } | |
15303 | + | |
15304 | + /* | |
15305 | + * If this is a regular wakeup, then we can unconditionally | |
15306 | + * clear the saved state of a "lock sleeper". | |
15307 | + */ | |
15308 | + if (!(wake_flags & WF_LOCK_SLEEPER)) | |
15309 | + p->saved_state = TASK_RUNNING; | |
15310 | ||
15311 | trace_sched_waking(p); | |
15312 | ||
c7c16703 | 15313 | @@ -2102,53 +2258,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
1a6e0f06 JK |
15314 | } |
15315 | ||
15316 | /** | |
15317 | - * try_to_wake_up_local - try to wake up a local task with rq lock held | |
15318 | - * @p: the thread to be awakened | |
c7c16703 | 15319 | - * @cookie: context's cookie for pinning |
1a6e0f06 JK |
15320 | - * |
15321 | - * Put @p on the run-queue if it's not already there. The caller must | |
15322 | - * ensure that this_rq() is locked, @p is bound to this_rq() and not | |
15323 | - * the current task. | |
15324 | - */ | |
15325 | -static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie) | |
15326 | -{ | |
15327 | - struct rq *rq = task_rq(p); | |
15328 | - | |
15329 | - if (WARN_ON_ONCE(rq != this_rq()) || | |
15330 | - WARN_ON_ONCE(p == current)) | |
15331 | - return; | |
15332 | - | |
15333 | - lockdep_assert_held(&rq->lock); | |
15334 | - | |
15335 | - if (!raw_spin_trylock(&p->pi_lock)) { | |
15336 | - /* | |
15337 | - * This is OK, because current is on_cpu, which avoids it being | |
15338 | - * picked for load-balance and preemption/IRQs are still | |
15339 | - * disabled avoiding further scheduler activity on it and we've | |
15340 | - * not yet picked a replacement task. | |
15341 | - */ | |
15342 | - lockdep_unpin_lock(&rq->lock, cookie); | |
15343 | - raw_spin_unlock(&rq->lock); | |
15344 | - raw_spin_lock(&p->pi_lock); | |
15345 | - raw_spin_lock(&rq->lock); | |
15346 | - lockdep_repin_lock(&rq->lock, cookie); | |
15347 | - } | |
15348 | - | |
15349 | - if (!(p->state & TASK_NORMAL)) | |
15350 | - goto out; | |
15351 | - | |
15352 | - trace_sched_waking(p); | |
15353 | - | |
15354 | - if (!task_on_rq_queued(p)) | |
15355 | - ttwu_activate(rq, p, ENQUEUE_WAKEUP); | |
15356 | - | |
15357 | - ttwu_do_wakeup(rq, p, 0, cookie); | |
c7c16703 | 15358 | - ttwu_stat(p, smp_processor_id(), 0); |
1a6e0f06 JK |
15359 | -out: |
15360 | - raw_spin_unlock(&p->pi_lock); | |
15361 | -} | |
15362 | - | |
15363 | -/** | |
15364 | * wake_up_process - Wake up a specific process | |
15365 | * @p: The process to be woken up. | |
15366 | * | |
c7c16703 | 15367 | @@ -2166,6 +2275,18 @@ int wake_up_process(struct task_struct *p) |
1a6e0f06 JK |
15368 | } |
15369 | EXPORT_SYMBOL(wake_up_process); | |
15370 | ||
15371 | +/** | |
15372 | + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" | |
15373 | + * @p: The process to be woken up. | |
15374 | + * | |
15375 | + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate | |
15376 | + * the nature of the wakeup. | |
15377 | + */ | |
15378 | +int wake_up_lock_sleeper(struct task_struct *p) | |
15379 | +{ | |
15380 | + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); | |
15381 | +} | |
15382 | + | |
15383 | int wake_up_state(struct task_struct *p, unsigned int state) | |
15384 | { | |
15385 | return try_to_wake_up(p, state, 0); | |
c7c16703 | 15386 | @@ -2442,6 +2563,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) |
1a6e0f06 JK |
15387 | p->on_cpu = 0; |
15388 | #endif | |
15389 | init_task_preempt_count(p); | |
15390 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
15391 | + task_thread_info(p)->preempt_lazy_count = 0; | |
15392 | +#endif | |
15393 | #ifdef CONFIG_SMP | |
15394 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | |
15395 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
c7c16703 | 15396 | @@ -2770,21 +2894,16 @@ static struct rq *finish_task_switch(struct task_struct *prev) |
1a6e0f06 JK |
15397 | finish_arch_post_lock_switch(); |
15398 | ||
15399 | fire_sched_in_preempt_notifiers(current); | |
15400 | + /* | |
15401 | + * We use mmdrop_delayed() here so we don't have to do the | |
15402 | + * full __mmdrop() when we are the last user. | |
15403 | + */ | |
15404 | if (mm) | |
15405 | - mmdrop(mm); | |
15406 | + mmdrop_delayed(mm); | |
15407 | if (unlikely(prev_state == TASK_DEAD)) { | |
15408 | if (prev->sched_class->task_dead) | |
15409 | prev->sched_class->task_dead(prev); | |
c7c16703 JK |
15410 | |
15411 | - /* | |
15412 | - * Remove function-return probe instances associated with this | |
15413 | - * task and put them back on the free list. | |
15414 | - */ | |
15415 | - kprobe_flush_task(prev); | |
15416 | - | |
15417 | - /* Task is done with its stack. */ | |
15418 | - put_task_stack(prev); | |
15419 | - | |
15420 | put_task_struct(prev); | |
15421 | } | |
15422 | ||
15423 | @@ -3252,6 +3371,77 @@ static inline void schedule_debug(struct task_struct *prev) | |
15424 | schedstat_inc(this_rq()->sched_count); | |
1a6e0f06 JK |
15425 | } |
15426 | ||
15427 | +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) | |
15428 | + | |
15429 | +void migrate_disable(void) | |
15430 | +{ | |
15431 | + struct task_struct *p = current; | |
15432 | + | |
15433 | + if (in_atomic() || irqs_disabled()) { | |
15434 | +#ifdef CONFIG_SCHED_DEBUG | |
15435 | + p->migrate_disable_atomic++; | |
15436 | +#endif | |
15437 | + return; | |
15438 | + } | |
15439 | + | |
15440 | +#ifdef CONFIG_SCHED_DEBUG | |
15441 | + if (unlikely(p->migrate_disable_atomic)) { | |
15442 | + tracing_off(); | |
15443 | + WARN_ON_ONCE(1); | |
15444 | + } | |
15445 | +#endif | |
15446 | + | |
15447 | + if (p->migrate_disable) { | |
15448 | + p->migrate_disable++; | |
15449 | + return; | |
15450 | + } | |
15451 | + | |
15452 | + preempt_disable(); | |
15453 | + preempt_lazy_disable(); | |
15454 | + pin_current_cpu(); | |
15455 | + p->migrate_disable = 1; | |
15456 | + preempt_enable(); | |
15457 | +} | |
15458 | +EXPORT_SYMBOL(migrate_disable); | |
15459 | + | |
15460 | +void migrate_enable(void) | |
15461 | +{ | |
15462 | + struct task_struct *p = current; | |
15463 | + | |
15464 | + if (in_atomic() || irqs_disabled()) { | |
15465 | +#ifdef CONFIG_SCHED_DEBUG | |
15466 | + p->migrate_disable_atomic--; | |
15467 | +#endif | |
15468 | + return; | |
15469 | + } | |
15470 | + | |
15471 | +#ifdef CONFIG_SCHED_DEBUG | |
15472 | + if (unlikely(p->migrate_disable_atomic)) { | |
15473 | + tracing_off(); | |
15474 | + WARN_ON_ONCE(1); | |
15475 | + } | |
15476 | +#endif | |
15477 | + WARN_ON_ONCE(p->migrate_disable <= 0); | |
15478 | + | |
15479 | + if (p->migrate_disable > 1) { | |
15480 | + p->migrate_disable--; | |
15481 | + return; | |
15482 | + } | |
15483 | + | |
15484 | + preempt_disable(); | |
15485 | + /* | |
15486 | + * Clearing migrate_disable causes tsk_cpus_allowed to | |
15487 | + * show the tasks original cpu affinity. | |
15488 | + */ | |
15489 | + p->migrate_disable = 0; | |
15490 | + | |
15491 | + unpin_current_cpu(); | |
15492 | + preempt_enable(); | |
15493 | + preempt_lazy_enable(); | |
15494 | +} | |
15495 | +EXPORT_SYMBOL(migrate_enable); | |
15496 | +#endif | |
15497 | + | |
15498 | /* | |
15499 | * Pick up the highest-prio task: | |
15500 | */ | |
c7c16703 | 15501 | @@ -3368,19 +3558,6 @@ static void __sched notrace __schedule(bool preempt) |
1a6e0f06 JK |
15502 | } else { |
15503 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | |
15504 | prev->on_rq = 0; | |
15505 | - | |
15506 | - /* | |
15507 | - * If a worker went to sleep, notify and ask workqueue | |
15508 | - * whether it wants to wake up a task to maintain | |
15509 | - * concurrency. | |
15510 | - */ | |
15511 | - if (prev->flags & PF_WQ_WORKER) { | |
15512 | - struct task_struct *to_wakeup; | |
15513 | - | |
15514 | - to_wakeup = wq_worker_sleeping(prev); | |
15515 | - if (to_wakeup) | |
15516 | - try_to_wake_up_local(to_wakeup, cookie); | |
15517 | - } | |
15518 | } | |
15519 | switch_count = &prev->nvcsw; | |
15520 | } | |
c7c16703 | 15521 | @@ -3390,6 +3567,7 @@ static void __sched notrace __schedule(bool preempt) |
1a6e0f06 JK |
15522 | |
15523 | next = pick_next_task(rq, prev, cookie); | |
15524 | clear_tsk_need_resched(prev); | |
15525 | + clear_tsk_need_resched_lazy(prev); | |
15526 | clear_preempt_need_resched(); | |
15527 | rq->clock_skip_update = 0; | |
15528 | ||
c7c16703 | 15529 | @@ -3437,9 +3615,20 @@ void __noreturn do_task_dead(void) |
1a6e0f06 JK |
15530 | |
15531 | static inline void sched_submit_work(struct task_struct *tsk) | |
15532 | { | |
15533 | - if (!tsk->state || tsk_is_pi_blocked(tsk)) | |
15534 | + if (!tsk->state) | |
15535 | return; | |
15536 | /* | |
15537 | + * If a worker went to sleep, notify and ask workqueue whether | |
15538 | + * it wants to wake up a task to maintain concurrency. | |
15539 | + */ | |
15540 | + if (tsk->flags & PF_WQ_WORKER) | |
15541 | + wq_worker_sleeping(tsk); | |
15542 | + | |
15543 | + | |
15544 | + if (tsk_is_pi_blocked(tsk)) | |
15545 | + return; | |
15546 | + | |
15547 | + /* | |
15548 | * If we are going to sleep and we have plugged IO queued, | |
15549 | * make sure to submit it to avoid deadlocks. | |
15550 | */ | |
c7c16703 | 15551 | @@ -3447,6 +3636,12 @@ static inline void sched_submit_work(struct task_struct *tsk) |
1a6e0f06 JK |
15552 | blk_schedule_flush_plug(tsk); |
15553 | } | |
15554 | ||
15555 | +static void sched_update_worker(struct task_struct *tsk) | |
15556 | +{ | |
15557 | + if (tsk->flags & PF_WQ_WORKER) | |
15558 | + wq_worker_running(tsk); | |
15559 | +} | |
15560 | + | |
15561 | asmlinkage __visible void __sched schedule(void) | |
15562 | { | |
15563 | struct task_struct *tsk = current; | |
c7c16703 | 15564 | @@ -3457,6 +3652,7 @@ asmlinkage __visible void __sched schedule(void) |
1a6e0f06 JK |
15565 | __schedule(false); |
15566 | sched_preempt_enable_no_resched(); | |
15567 | } while (need_resched()); | |
15568 | + sched_update_worker(tsk); | |
15569 | } | |
15570 | EXPORT_SYMBOL(schedule); | |
15571 | ||
c7c16703 | 15572 | @@ -3520,6 +3716,30 @@ static void __sched notrace preempt_schedule_common(void) |
1a6e0f06 JK |
15573 | } while (need_resched()); |
15574 | } | |
15575 | ||
15576 | +#ifdef CONFIG_PREEMPT_LAZY | |
15577 | +/* | |
15578 | + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is | |
15579 | + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as | |
15580 | + * preempt_lazy_count counter >0. | |
15581 | + */ | |
15582 | +static __always_inline int preemptible_lazy(void) | |
15583 | +{ | |
15584 | + if (test_thread_flag(TIF_NEED_RESCHED)) | |
15585 | + return 1; | |
15586 | + if (current_thread_info()->preempt_lazy_count) | |
15587 | + return 0; | |
15588 | + return 1; | |
15589 | +} | |
15590 | + | |
15591 | +#else | |
15592 | + | |
15593 | +static inline int preemptible_lazy(void) | |
15594 | +{ | |
15595 | + return 1; | |
15596 | +} | |
15597 | + | |
15598 | +#endif | |
15599 | + | |
15600 | #ifdef CONFIG_PREEMPT | |
15601 | /* | |
15602 | * this is the entry point to schedule() from in-kernel preemption | |
c7c16703 | 15603 | @@ -3534,7 +3754,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) |
1a6e0f06 JK |
15604 | */ |
15605 | if (likely(!preemptible())) | |
15606 | return; | |
15607 | - | |
15608 | + if (!preemptible_lazy()) | |
15609 | + return; | |
15610 | preempt_schedule_common(); | |
15611 | } | |
15612 | NOKPROBE_SYMBOL(preempt_schedule); | |
c7c16703 | 15613 | @@ -3561,6 +3782,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) |
1a6e0f06 JK |
15614 | if (likely(!preemptible())) |
15615 | return; | |
15616 | ||
15617 | + if (!preemptible_lazy()) | |
15618 | + return; | |
15619 | + | |
15620 | do { | |
15621 | /* | |
15622 | * Because the function tracer can trace preempt_count_sub() | |
c7c16703 | 15623 | @@ -3583,7 +3807,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) |
1a6e0f06 JK |
15624 | * an infinite recursion. |
15625 | */ | |
15626 | prev_ctx = exception_enter(); | |
15627 | + /* | |
15628 | + * The add/subtract must not be traced by the function | |
15629 | + * tracer. But we still want to account for the | |
15630 | + * preempt off latency tracer. Since the _notrace versions | |
15631 | + * of add/subtract skip the accounting for latency tracer | |
15632 | + * we must force it manually. | |
15633 | + */ | |
15634 | + start_critical_timings(); | |
15635 | __schedule(true); | |
15636 | + stop_critical_timings(); | |
15637 | exception_exit(prev_ctx); | |
15638 | ||
15639 | preempt_latency_stop(1); | |
c7c16703 | 15640 | @@ -4939,6 +5172,7 @@ int __cond_resched_lock(spinlock_t *lock) |
1a6e0f06 JK |
15641 | } |
15642 | EXPORT_SYMBOL(__cond_resched_lock); | |
15643 | ||
15644 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15645 | int __sched __cond_resched_softirq(void) | |
15646 | { | |
15647 | BUG_ON(!in_softirq()); | |
c7c16703 | 15648 | @@ -4952,6 +5186,7 @@ int __sched __cond_resched_softirq(void) |
1a6e0f06 JK |
15649 | return 0; |
15650 | } | |
15651 | EXPORT_SYMBOL(__cond_resched_softirq); | |
15652 | +#endif | |
15653 | ||
15654 | /** | |
15655 | * yield - yield the current processor to other threads. | |
c7c16703 | 15656 | @@ -5315,7 +5550,9 @@ void init_idle(struct task_struct *idle, int cpu) |
1a6e0f06 JK |
15657 | |
15658 | /* Set the preempt count _outside_ the spinlocks! */ | |
15659 | init_idle_preempt_count(idle, cpu); | |
15660 | - | |
15661 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
15662 | + task_thread_info(idle)->preempt_lazy_count = 0; | |
15663 | +#endif | |
15664 | /* | |
15665 | * The idle tasks have their own, simple scheduling class: | |
15666 | */ | |
c7c16703 | 15667 | @@ -5458,6 +5695,8 @@ void sched_setnuma(struct task_struct *p, int nid) |
1a6e0f06 JK |
15668 | #endif /* CONFIG_NUMA_BALANCING */ |
15669 | ||
15670 | #ifdef CONFIG_HOTPLUG_CPU | |
15671 | +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); | |
15672 | + | |
15673 | /* | |
15674 | * Ensures that the idle task is using init_mm right before its cpu goes | |
15675 | * offline. | |
c7c16703 | 15676 | @@ -5472,7 +5711,12 @@ void idle_task_exit(void) |
1a6e0f06 JK |
15677 | switch_mm_irqs_off(mm, &init_mm, current); |
15678 | finish_arch_post_lock_switch(); | |
15679 | } | |
15680 | - mmdrop(mm); | |
15681 | + /* | |
15682 | + * Defer the cleanup to an alive cpu. On RT we can neither | |
15683 | + * call mmdrop() nor mmdrop_delayed() from here. | |
15684 | + */ | |
15685 | + per_cpu(idle_last_mm, smp_processor_id()) = mm; | |
15686 | + | |
15687 | } | |
15688 | ||
15689 | /* | |
c7c16703 | 15690 | @@ -7418,6 +7662,10 @@ int sched_cpu_dying(unsigned int cpu) |
1a6e0f06 JK |
15691 | update_max_interval(); |
15692 | nohz_balance_exit_idle(cpu); | |
15693 | hrtick_clear(rq); | |
15694 | + if (per_cpu(idle_last_mm, cpu)) { | |
15695 | + mmdrop_delayed(per_cpu(idle_last_mm, cpu)); | |
15696 | + per_cpu(idle_last_mm, cpu) = NULL; | |
15697 | + } | |
15698 | return 0; | |
15699 | } | |
15700 | #endif | |
c7c16703 | 15701 | @@ -7698,7 +7946,7 @@ void __init sched_init(void) |
1a6e0f06 JK |
15702 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
15703 | static inline int preempt_count_equals(int preempt_offset) | |
15704 | { | |
15705 | - int nested = preempt_count() + rcu_preempt_depth(); | |
15706 | + int nested = preempt_count() + sched_rcu_preempt_depth(); | |
15707 | ||
15708 | return (nested == preempt_offset); | |
15709 | } | |
15710 | diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c | |
c7c16703 | 15711 | index 37e2449186c4..26dcaabde8b3 100644 |
1a6e0f06 JK |
15712 | --- a/kernel/sched/deadline.c |
15713 | +++ b/kernel/sched/deadline.c | |
c7c16703 | 15714 | @@ -687,6 +687,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) |
1a6e0f06 JK |
15715 | |
15716 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
15717 | timer->function = dl_task_timer; | |
15718 | + timer->irqsafe = 1; | |
15719 | } | |
15720 | ||
15721 | static | |
15722 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c | |
c7c16703 | 15723 | index fa178b62ea79..935224123441 100644 |
1a6e0f06 JK |
15724 | --- a/kernel/sched/debug.c |
15725 | +++ b/kernel/sched/debug.c | |
c7c16703 | 15726 | @@ -558,6 +558,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
1a6e0f06 JK |
15727 | P(rt_throttled); |
15728 | PN(rt_time); | |
15729 | PN(rt_runtime); | |
15730 | +#ifdef CONFIG_SMP | |
15731 | + P(rt_nr_migratory); | |
15732 | +#endif | |
15733 | ||
15734 | #undef PN | |
15735 | #undef P | |
c7c16703 | 15736 | @@ -953,6 +956,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) |
1a6e0f06 JK |
15737 | #endif |
15738 | P(policy); | |
15739 | P(prio); | |
15740 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15741 | + P(migrate_disable); | |
15742 | +#endif | |
15743 | + P(nr_cpus_allowed); | |
c7c16703 | 15744 | #undef PN_SCHEDSTAT |
1a6e0f06 JK |
15745 | #undef PN |
15746 | #undef __PN | |
1a6e0f06 | 15747 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
c7c16703 | 15748 | index c242944f5cbd..4aeb2e2e41bc 100644 |
1a6e0f06 JK |
15749 | --- a/kernel/sched/fair.c |
15750 | +++ b/kernel/sched/fair.c | |
c7c16703 | 15751 | @@ -3518,7 +3518,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) |
1a6e0f06 JK |
15752 | ideal_runtime = sched_slice(cfs_rq, curr); |
15753 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | |
15754 | if (delta_exec > ideal_runtime) { | |
15755 | - resched_curr(rq_of(cfs_rq)); | |
15756 | + resched_curr_lazy(rq_of(cfs_rq)); | |
15757 | /* | |
15758 | * The current task ran long enough, ensure it doesn't get | |
15759 | * re-elected due to buddy favours. | |
c7c16703 | 15760 | @@ -3542,7 +3542,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) |
1a6e0f06 JK |
15761 | return; |
15762 | ||
15763 | if (delta > ideal_runtime) | |
15764 | - resched_curr(rq_of(cfs_rq)); | |
15765 | + resched_curr_lazy(rq_of(cfs_rq)); | |
15766 | } | |
15767 | ||
15768 | static void | |
c7c16703 | 15769 | @@ -3684,7 +3684,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) |
1a6e0f06 JK |
15770 | * validating it and just reschedule. |
15771 | */ | |
15772 | if (queued) { | |
15773 | - resched_curr(rq_of(cfs_rq)); | |
15774 | + resched_curr_lazy(rq_of(cfs_rq)); | |
15775 | return; | |
15776 | } | |
15777 | /* | |
c7c16703 | 15778 | @@ -3866,7 +3866,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) |
1a6e0f06 JK |
15779 | * hierarchy can be throttled |
15780 | */ | |
15781 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | |
15782 | - resched_curr(rq_of(cfs_rq)); | |
15783 | + resched_curr_lazy(rq_of(cfs_rq)); | |
15784 | } | |
15785 | ||
15786 | static __always_inline | |
c7c16703 | 15787 | @@ -4494,7 +4494,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) |
1a6e0f06 JK |
15788 | |
15789 | if (delta < 0) { | |
15790 | if (rq->curr == p) | |
15791 | - resched_curr(rq); | |
15792 | + resched_curr_lazy(rq); | |
15793 | return; | |
15794 | } | |
15795 | hrtick_start(rq, delta); | |
c7c16703 | 15796 | @@ -5905,7 +5905,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ |
1a6e0f06 JK |
15797 | return; |
15798 | ||
15799 | preempt: | |
15800 | - resched_curr(rq); | |
15801 | + resched_curr_lazy(rq); | |
15802 | /* | |
15803 | * Only set the backward buddy when the current task is still | |
15804 | * on the rq. This can happen when a wakeup gets interleaved | |
c7c16703 | 15805 | @@ -8631,7 +8631,7 @@ static void task_fork_fair(struct task_struct *p) |
1a6e0f06 JK |
15806 | * 'current' within the tree based on its new key value. |
15807 | */ | |
15808 | swap(curr->vruntime, se->vruntime); | |
15809 | - resched_curr(rq); | |
15810 | + resched_curr_lazy(rq); | |
15811 | } | |
15812 | ||
15813 | se->vruntime -= cfs_rq->min_vruntime; | |
c7c16703 | 15814 | @@ -8655,7 +8655,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) |
1a6e0f06 JK |
15815 | */ |
15816 | if (rq->curr == p) { | |
15817 | if (p->prio > oldprio) | |
15818 | - resched_curr(rq); | |
15819 | + resched_curr_lazy(rq); | |
15820 | } else | |
15821 | check_preempt_curr(rq, p, 0); | |
15822 | } | |
15823 | diff --git a/kernel/sched/features.h b/kernel/sched/features.h | |
15824 | index 69631fa46c2f..6d28fcd08872 100644 | |
15825 | --- a/kernel/sched/features.h | |
15826 | +++ b/kernel/sched/features.h | |
15827 | @@ -45,11 +45,19 @@ SCHED_FEAT(LB_BIAS, true) | |
15828 | */ | |
15829 | SCHED_FEAT(NONTASK_CAPACITY, true) | |
15830 | ||
15831 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15832 | +SCHED_FEAT(TTWU_QUEUE, false) | |
15833 | +# ifdef CONFIG_PREEMPT_LAZY | |
15834 | +SCHED_FEAT(PREEMPT_LAZY, true) | |
15835 | +# endif | |
15836 | +#else | |
15837 | + | |
15838 | /* | |
15839 | * Queue remote wakeups on the target CPU and process them | |
15840 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | |
15841 | */ | |
15842 | SCHED_FEAT(TTWU_QUEUE, true) | |
15843 | +#endif | |
15844 | ||
15845 | #ifdef HAVE_RT_PUSH_IPI | |
15846 | /* | |
15847 | diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c | |
c7c16703 | 15848 | index 2516b8df6dbb..2556baa0a97e 100644 |
1a6e0f06 JK |
15849 | --- a/kernel/sched/rt.c |
15850 | +++ b/kernel/sched/rt.c | |
15851 | @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |
15852 | ||
15853 | hrtimer_init(&rt_b->rt_period_timer, | |
15854 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
15855 | + rt_b->rt_period_timer.irqsafe = 1; | |
15856 | rt_b->rt_period_timer.function = sched_rt_period_timer; | |
15857 | } | |
15858 | ||
15859 | @@ -101,6 +102,7 @@ void init_rt_rq(struct rt_rq *rt_rq) | |
15860 | rt_rq->push_cpu = nr_cpu_ids; | |
15861 | raw_spin_lock_init(&rt_rq->push_lock); | |
15862 | init_irq_work(&rt_rq->push_work, push_irq_work_func); | |
15863 | + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ; | |
15864 | #endif | |
15865 | #endif /* CONFIG_SMP */ | |
15866 | /* We start is dequeued state, because no RT tasks are queued */ | |
15867 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h | |
c7c16703 | 15868 | index 055f935d4421..19324ac27026 100644 |
1a6e0f06 JK |
15869 | --- a/kernel/sched/sched.h |
15870 | +++ b/kernel/sched/sched.h | |
c7c16703 | 15871 | @@ -1163,6 +1163,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) |
1a6e0f06 JK |
15872 | #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ |
15873 | #define WF_FORK 0x02 /* child wakeup after fork */ | |
15874 | #define WF_MIGRATED 0x4 /* internal use, task got migrated */ | |
15875 | +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ | |
15876 | ||
15877 | /* | |
15878 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | |
c7c16703 | 15879 | @@ -1346,6 +1347,15 @@ extern void init_sched_fair_class(void); |
1a6e0f06 JK |
15880 | extern void resched_curr(struct rq *rq); |
15881 | extern void resched_cpu(int cpu); | |
15882 | ||
15883 | +#ifdef CONFIG_PREEMPT_LAZY | |
15884 | +extern void resched_curr_lazy(struct rq *rq); | |
15885 | +#else | |
15886 | +static inline void resched_curr_lazy(struct rq *rq) | |
15887 | +{ | |
15888 | + resched_curr(rq); | |
15889 | +} | |
15890 | +#endif | |
15891 | + | |
15892 | extern struct rt_bandwidth def_rt_bandwidth; | |
15893 | extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); | |
15894 | ||
15895 | diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c | |
15896 | index 82f0dff90030..ef027ff3250a 100644 | |
15897 | --- a/kernel/sched/swait.c | |
15898 | +++ b/kernel/sched/swait.c | |
15899 | @@ -1,5 +1,6 @@ | |
15900 | #include <linux/sched.h> | |
15901 | #include <linux/swait.h> | |
15902 | +#include <linux/suspend.h> | |
15903 | ||
15904 | void __init_swait_queue_head(struct swait_queue_head *q, const char *name, | |
15905 | struct lock_class_key *key) | |
15906 | @@ -29,6 +30,25 @@ void swake_up_locked(struct swait_queue_head *q) | |
15907 | } | |
15908 | EXPORT_SYMBOL(swake_up_locked); | |
15909 | ||
15910 | +void swake_up_all_locked(struct swait_queue_head *q) | |
15911 | +{ | |
15912 | + struct swait_queue *curr; | |
15913 | + int wakes = 0; | |
15914 | + | |
15915 | + while (!list_empty(&q->task_list)) { | |
15916 | + | |
15917 | + curr = list_first_entry(&q->task_list, typeof(*curr), | |
15918 | + task_list); | |
15919 | + wake_up_process(curr->task); | |
15920 | + list_del_init(&curr->task_list); | |
15921 | + wakes++; | |
15922 | + } | |
15923 | + if (pm_in_action) | |
15924 | + return; | |
15925 | + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes); | |
15926 | +} | |
15927 | +EXPORT_SYMBOL(swake_up_all_locked); | |
15928 | + | |
15929 | void swake_up(struct swait_queue_head *q) | |
15930 | { | |
15931 | unsigned long flags; | |
15932 | @@ -54,6 +74,7 @@ void swake_up_all(struct swait_queue_head *q) | |
15933 | if (!swait_active(q)) | |
15934 | return; | |
15935 | ||
15936 | + WARN_ON(irqs_disabled()); | |
15937 | raw_spin_lock_irq(&q->lock); | |
15938 | list_splice_init(&q->task_list, &tmp); | |
15939 | while (!list_empty(&tmp)) { | |
15940 | diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c | |
15941 | new file mode 100644 | |
15942 | index 000000000000..1950f40ca725 | |
15943 | --- /dev/null | |
15944 | +++ b/kernel/sched/swork.c | |
15945 | @@ -0,0 +1,173 @@ | |
15946 | +/* | |
15947 | + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de | |
15948 | + * | |
15949 | + * Provides a framework for enqueuing callbacks from irq context | |
15950 | + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. | |
15951 | + */ | |
15952 | + | |
15953 | +#include <linux/swait.h> | |
15954 | +#include <linux/swork.h> | |
15955 | +#include <linux/kthread.h> | |
15956 | +#include <linux/slab.h> | |
15957 | +#include <linux/spinlock.h> | |
15958 | +#include <linux/export.h> | |
15959 | + | |
15960 | +#define SWORK_EVENT_PENDING (1 << 0) | |
15961 | + | |
15962 | +static DEFINE_MUTEX(worker_mutex); | |
15963 | +static struct sworker *glob_worker; | |
15964 | + | |
15965 | +struct sworker { | |
15966 | + struct list_head events; | |
15967 | + struct swait_queue_head wq; | |
15968 | + | |
15969 | + raw_spinlock_t lock; | |
15970 | + | |
15971 | + struct task_struct *task; | |
15972 | + int refs; | |
15973 | +}; | |
15974 | + | |
15975 | +static bool swork_readable(struct sworker *worker) | |
15976 | +{ | |
15977 | + bool r; | |
15978 | + | |
15979 | + if (kthread_should_stop()) | |
15980 | + return true; | |
15981 | + | |
15982 | + raw_spin_lock_irq(&worker->lock); | |
15983 | + r = !list_empty(&worker->events); | |
15984 | + raw_spin_unlock_irq(&worker->lock); | |
15985 | + | |
15986 | + return r; | |
15987 | +} | |
15988 | + | |
15989 | +static int swork_kthread(void *arg) | |
15990 | +{ | |
15991 | + struct sworker *worker = arg; | |
15992 | + | |
15993 | + for (;;) { | |
15994 | + swait_event_interruptible(worker->wq, | |
15995 | + swork_readable(worker)); | |
15996 | + if (kthread_should_stop()) | |
15997 | + break; | |
15998 | + | |
15999 | + raw_spin_lock_irq(&worker->lock); | |
16000 | + while (!list_empty(&worker->events)) { | |
16001 | + struct swork_event *sev; | |
16002 | + | |
16003 | + sev = list_first_entry(&worker->events, | |
16004 | + struct swork_event, item); | |
16005 | + list_del(&sev->item); | |
16006 | + raw_spin_unlock_irq(&worker->lock); | |
16007 | + | |
16008 | + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, | |
16009 | + &sev->flags)); | |
16010 | + sev->func(sev); | |
16011 | + raw_spin_lock_irq(&worker->lock); | |
16012 | + } | |
16013 | + raw_spin_unlock_irq(&worker->lock); | |
16014 | + } | |
16015 | + return 0; | |
16016 | +} | |
16017 | + | |
16018 | +static struct sworker *swork_create(void) | |
16019 | +{ | |
16020 | + struct sworker *worker; | |
16021 | + | |
16022 | + worker = kzalloc(sizeof(*worker), GFP_KERNEL); | |
16023 | + if (!worker) | |
16024 | + return ERR_PTR(-ENOMEM); | |
16025 | + | |
16026 | + INIT_LIST_HEAD(&worker->events); | |
16027 | + raw_spin_lock_init(&worker->lock); | |
16028 | + init_swait_queue_head(&worker->wq); | |
16029 | + | |
16030 | + worker->task = kthread_run(swork_kthread, worker, "kswork"); | |
16031 | + if (IS_ERR(worker->task)) { | |
16032 | + kfree(worker); | |
16033 | + return ERR_PTR(-ENOMEM); | |
16034 | + } | |
16035 | + | |
16036 | + return worker; | |
16037 | +} | |
16038 | + | |
16039 | +static void swork_destroy(struct sworker *worker) | |
16040 | +{ | |
16041 | + kthread_stop(worker->task); | |
16042 | + | |
16043 | + WARN_ON(!list_empty(&worker->events)); | |
16044 | + kfree(worker); | |
16045 | +} | |
16046 | + | |
16047 | +/** | |
16048 | + * swork_queue - queue swork | |
16049 | + * | |
16050 | + * Returns %false if @work was already on a queue, %true otherwise. | |
16051 | + * | |
16052 | + * The work is queued and processed on a random CPU | |
16053 | + */ | |
16054 | +bool swork_queue(struct swork_event *sev) | |
16055 | +{ | |
16056 | + unsigned long flags; | |
16057 | + | |
16058 | + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) | |
16059 | + return false; | |
16060 | + | |
16061 | + raw_spin_lock_irqsave(&glob_worker->lock, flags); | |
16062 | + list_add_tail(&sev->item, &glob_worker->events); | |
16063 | + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); | |
16064 | + | |
16065 | + swake_up(&glob_worker->wq); | |
16066 | + return true; | |
16067 | +} | |
16068 | +EXPORT_SYMBOL_GPL(swork_queue); | |
16069 | + | |
16070 | +/** | |
16071 | + * swork_get - get an instance of the sworker | |
16072 | + * | |
16073 | + * Returns an negative error code if the initialization if the worker did not | |
16074 | + * work, %0 otherwise. | |
16075 | + * | |
16076 | + */ | |
16077 | +int swork_get(void) | |
16078 | +{ | |
16079 | + struct sworker *worker; | |
16080 | + | |
16081 | + mutex_lock(&worker_mutex); | |
16082 | + if (!glob_worker) { | |
16083 | + worker = swork_create(); | |
16084 | + if (IS_ERR(worker)) { | |
16085 | + mutex_unlock(&worker_mutex); | |
16086 | + return -ENOMEM; | |
16087 | + } | |
16088 | + | |
16089 | + glob_worker = worker; | |
16090 | + } | |
16091 | + | |
16092 | + glob_worker->refs++; | |
16093 | + mutex_unlock(&worker_mutex); | |
16094 | + | |
16095 | + return 0; | |
16096 | +} | |
16097 | +EXPORT_SYMBOL_GPL(swork_get); | |
16098 | + | |
16099 | +/** | |
16100 | + * swork_put - puts an instance of the sworker | |
16101 | + * | |
16102 | + * Will destroy the sworker thread. This function must not be called until all | |
16103 | + * queued events have been completed. | |
16104 | + */ | |
16105 | +void swork_put(void) | |
16106 | +{ | |
16107 | + mutex_lock(&worker_mutex); | |
16108 | + | |
16109 | + glob_worker->refs--; | |
16110 | + if (glob_worker->refs > 0) | |
16111 | + goto out; | |
16112 | + | |
16113 | + swork_destroy(glob_worker); | |
16114 | + glob_worker = NULL; | |
16115 | +out: | |
16116 | + mutex_unlock(&worker_mutex); | |
16117 | +} | |
16118 | +EXPORT_SYMBOL_GPL(swork_put); | |
16119 | diff --git a/kernel/signal.c b/kernel/signal.c | |
c7c16703 | 16120 | index 75761acc77cf..ae0773c76bb0 100644 |
1a6e0f06 JK |
16121 | --- a/kernel/signal.c |
16122 | +++ b/kernel/signal.c | |
16123 | @@ -14,6 +14,7 @@ | |
16124 | #include <linux/export.h> | |
16125 | #include <linux/init.h> | |
16126 | #include <linux/sched.h> | |
16127 | +#include <linux/sched/rt.h> | |
16128 | #include <linux/fs.h> | |
16129 | #include <linux/tty.h> | |
16130 | #include <linux/binfmts.h> | |
16131 | @@ -352,13 +353,30 @@ static bool task_participate_group_stop(struct task_struct *task) | |
16132 | return false; | |
16133 | } | |
16134 | ||
16135 | +static inline struct sigqueue *get_task_cache(struct task_struct *t) | |
16136 | +{ | |
16137 | + struct sigqueue *q = t->sigqueue_cache; | |
16138 | + | |
16139 | + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) | |
16140 | + return NULL; | |
16141 | + return q; | |
16142 | +} | |
16143 | + | |
16144 | +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) | |
16145 | +{ | |
16146 | + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) | |
16147 | + return 0; | |
16148 | + return 1; | |
16149 | +} | |
16150 | + | |
16151 | /* | |
16152 | * allocate a new signal queue record | |
16153 | * - this may be called without locks if and only if t == current, otherwise an | |
16154 | * appropriate lock must be held to stop the target task from exiting | |
16155 | */ | |
16156 | static struct sigqueue * | |
16157 | -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) | |
16158 | +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, | |
16159 | + int override_rlimit, int fromslab) | |
16160 | { | |
16161 | struct sigqueue *q = NULL; | |
16162 | struct user_struct *user; | |
16163 | @@ -375,7 +393,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
16164 | if (override_rlimit || | |
16165 | atomic_read(&user->sigpending) <= | |
16166 | task_rlimit(t, RLIMIT_SIGPENDING)) { | |
16167 | - q = kmem_cache_alloc(sigqueue_cachep, flags); | |
16168 | + if (!fromslab) | |
16169 | + q = get_task_cache(t); | |
16170 | + if (!q) | |
16171 | + q = kmem_cache_alloc(sigqueue_cachep, flags); | |
16172 | } else { | |
16173 | print_dropped_signal(sig); | |
16174 | } | |
16175 | @@ -392,6 +413,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
16176 | return q; | |
16177 | } | |
16178 | ||
16179 | +static struct sigqueue * | |
16180 | +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, | |
16181 | + int override_rlimit) | |
16182 | +{ | |
16183 | + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); | |
16184 | +} | |
16185 | + | |
16186 | static void __sigqueue_free(struct sigqueue *q) | |
16187 | { | |
16188 | if (q->flags & SIGQUEUE_PREALLOC) | |
16189 | @@ -401,6 +429,21 @@ static void __sigqueue_free(struct sigqueue *q) | |
16190 | kmem_cache_free(sigqueue_cachep, q); | |
16191 | } | |
16192 | ||
16193 | +static void sigqueue_free_current(struct sigqueue *q) | |
16194 | +{ | |
16195 | + struct user_struct *up; | |
16196 | + | |
16197 | + if (q->flags & SIGQUEUE_PREALLOC) | |
16198 | + return; | |
16199 | + | |
16200 | + up = q->user; | |
16201 | + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { | |
16202 | + atomic_dec(&up->sigpending); | |
16203 | + free_uid(up); | |
16204 | + } else | |
16205 | + __sigqueue_free(q); | |
16206 | +} | |
16207 | + | |
16208 | void flush_sigqueue(struct sigpending *queue) | |
16209 | { | |
16210 | struct sigqueue *q; | |
16211 | @@ -414,6 +457,21 @@ void flush_sigqueue(struct sigpending *queue) | |
16212 | } | |
16213 | ||
16214 | /* | |
16215 | + * Called from __exit_signal. Flush tsk->pending and | |
16216 | + * tsk->sigqueue_cache | |
16217 | + */ | |
16218 | +void flush_task_sigqueue(struct task_struct *tsk) | |
16219 | +{ | |
16220 | + struct sigqueue *q; | |
16221 | + | |
16222 | + flush_sigqueue(&tsk->pending); | |
16223 | + | |
16224 | + q = get_task_cache(tsk); | |
16225 | + if (q) | |
16226 | + kmem_cache_free(sigqueue_cachep, q); | |
16227 | +} | |
16228 | + | |
16229 | +/* | |
16230 | * Flush all pending signals for this kthread. | |
16231 | */ | |
16232 | void flush_signals(struct task_struct *t) | |
16233 | @@ -525,7 +583,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | |
16234 | still_pending: | |
16235 | list_del_init(&first->list); | |
16236 | copy_siginfo(info, &first->info); | |
16237 | - __sigqueue_free(first); | |
16238 | + sigqueue_free_current(first); | |
16239 | } else { | |
16240 | /* | |
16241 | * Ok, it wasn't in the queue. This must be | |
16242 | @@ -560,6 +618,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |
16243 | { | |
16244 | int signr; | |
16245 | ||
16246 | + WARN_ON_ONCE(tsk != current); | |
16247 | + | |
16248 | /* We only dequeue private signals from ourselves, we don't let | |
16249 | * signalfd steal them | |
16250 | */ | |
16251 | @@ -1156,8 +1216,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, | |
16252 | * We don't want to have recursive SIGSEGV's etc, for example, | |
16253 | * that is why we also clear SIGNAL_UNKILLABLE. | |
16254 | */ | |
16255 | -int | |
16256 | -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16257 | +static int | |
16258 | +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16259 | { | |
16260 | unsigned long int flags; | |
16261 | int ret, blocked, ignored; | |
16262 | @@ -1182,6 +1242,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16263 | return ret; | |
16264 | } | |
16265 | ||
16266 | +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16267 | +{ | |
16268 | +/* | |
16269 | + * On some archs, PREEMPT_RT has to delay sending a signal from a trap | |
16270 | + * since it can not enable preemption, and the signal code's spin_locks | |
16271 | + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will | |
16272 | + * send the signal on exit of the trap. | |
16273 | + */ | |
16274 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
16275 | + if (in_atomic()) { | |
16276 | + if (WARN_ON_ONCE(t != current)) | |
16277 | + return 0; | |
16278 | + if (WARN_ON_ONCE(t->forced_info.si_signo)) | |
16279 | + return 0; | |
16280 | + | |
16281 | + if (is_si_special(info)) { | |
16282 | + WARN_ON_ONCE(info != SEND_SIG_PRIV); | |
16283 | + t->forced_info.si_signo = sig; | |
16284 | + t->forced_info.si_errno = 0; | |
16285 | + t->forced_info.si_code = SI_KERNEL; | |
16286 | + t->forced_info.si_pid = 0; | |
16287 | + t->forced_info.si_uid = 0; | |
16288 | + } else { | |
16289 | + t->forced_info = *info; | |
16290 | + } | |
16291 | + | |
16292 | + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); | |
16293 | + return 0; | |
16294 | + } | |
16295 | +#endif | |
16296 | + return do_force_sig_info(sig, info, t); | |
16297 | +} | |
16298 | + | |
16299 | /* | |
16300 | * Nuke all other threads in the group. | |
16301 | */ | |
16302 | @@ -1216,12 +1309,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
16303 | * Disable interrupts early to avoid deadlocks. | |
16304 | * See rcu_read_unlock() comment header for details. | |
16305 | */ | |
16306 | - local_irq_save(*flags); | |
16307 | + local_irq_save_nort(*flags); | |
16308 | rcu_read_lock(); | |
16309 | sighand = rcu_dereference(tsk->sighand); | |
16310 | if (unlikely(sighand == NULL)) { | |
16311 | rcu_read_unlock(); | |
16312 | - local_irq_restore(*flags); | |
16313 | + local_irq_restore_nort(*flags); | |
16314 | break; | |
16315 | } | |
16316 | /* | |
16317 | @@ -1242,7 +1335,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
16318 | } | |
16319 | spin_unlock(&sighand->siglock); | |
16320 | rcu_read_unlock(); | |
16321 | - local_irq_restore(*flags); | |
16322 | + local_irq_restore_nort(*flags); | |
16323 | } | |
16324 | ||
16325 | return sighand; | |
16326 | @@ -1485,7 +1578,8 @@ EXPORT_SYMBOL(kill_pid); | |
16327 | */ | |
16328 | struct sigqueue *sigqueue_alloc(void) | |
16329 | { | |
16330 | - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); | |
16331 | + /* Preallocated sigqueue objects always from the slabcache ! */ | |
16332 | + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); | |
16333 | ||
16334 | if (q) | |
16335 | q->flags |= SIGQUEUE_PREALLOC; | |
16336 | @@ -1846,15 +1940,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |
16337 | if (gstop_done && ptrace_reparented(current)) | |
16338 | do_notify_parent_cldstop(current, false, why); | |
16339 | ||
16340 | - /* | |
16341 | - * Don't want to allow preemption here, because | |
16342 | - * sys_ptrace() needs this task to be inactive. | |
16343 | - * | |
16344 | - * XXX: implement read_unlock_no_resched(). | |
16345 | - */ | |
16346 | - preempt_disable(); | |
16347 | read_unlock(&tasklist_lock); | |
16348 | - preempt_enable_no_resched(); | |
16349 | freezable_schedule(); | |
16350 | } else { | |
16351 | /* | |
16352 | diff --git a/kernel/softirq.c b/kernel/softirq.c | |
c7c16703 | 16353 | index 744fa611cae0..1431d08e6f21 100644 |
1a6e0f06 JK |
16354 | --- a/kernel/softirq.c |
16355 | +++ b/kernel/softirq.c | |
16356 | @@ -21,10 +21,12 @@ | |
16357 | #include <linux/freezer.h> | |
16358 | #include <linux/kthread.h> | |
16359 | #include <linux/rcupdate.h> | |
16360 | +#include <linux/delay.h> | |
16361 | #include <linux/ftrace.h> | |
16362 | #include <linux/smp.h> | |
16363 | #include <linux/smpboot.h> | |
16364 | #include <linux/tick.h> | |
16365 | +#include <linux/locallock.h> | |
16366 | #include <linux/irq.h> | |
16367 | ||
16368 | #define CREATE_TRACE_POINTS | |
16369 | @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat); | |
16370 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | |
16371 | ||
16372 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |
16373 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
c7c16703 | 16374 | +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) |
1a6e0f06 JK |
16375 | +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); |
16376 | +#endif | |
16377 | ||
16378 | const char * const softirq_to_name[NR_SOFTIRQS] = { | |
c7c16703 | 16379 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", |
1a6e0f06 JK |
16380 | "TASKLET", "SCHED", "HRTIMER", "RCU" |
16381 | }; | |
16382 | ||
16383 | +#ifdef CONFIG_NO_HZ_COMMON | |
16384 | +# ifdef CONFIG_PREEMPT_RT_FULL | |
16385 | + | |
16386 | +struct softirq_runner { | |
16387 | + struct task_struct *runner[NR_SOFTIRQS]; | |
16388 | +}; | |
16389 | + | |
16390 | +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners); | |
16391 | + | |
16392 | +static inline void softirq_set_runner(unsigned int sirq) | |
16393 | +{ | |
16394 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
16395 | + | |
16396 | + sr->runner[sirq] = current; | |
16397 | +} | |
16398 | + | |
16399 | +static inline void softirq_clr_runner(unsigned int sirq) | |
16400 | +{ | |
16401 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
16402 | + | |
16403 | + sr->runner[sirq] = NULL; | |
16404 | +} | |
16405 | + | |
16406 | +/* | |
16407 | + * On preempt-rt a softirq running context might be blocked on a | |
16408 | + * lock. There might be no other runnable task on this CPU because the | |
16409 | + * lock owner runs on some other CPU. So we have to go into idle with | |
16410 | + * the pending bit set. Therefor we need to check this otherwise we | |
16411 | + * warn about false positives which confuses users and defeats the | |
16412 | + * whole purpose of this test. | |
16413 | + * | |
16414 | + * This code is called with interrupts disabled. | |
16415 | + */ | |
16416 | +void softirq_check_pending_idle(void) | |
16417 | +{ | |
16418 | + static int rate_limit; | |
16419 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
16420 | + u32 warnpending; | |
16421 | + int i; | |
16422 | + | |
16423 | + if (rate_limit >= 10) | |
16424 | + return; | |
16425 | + | |
16426 | + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; | |
16427 | + for (i = 0; i < NR_SOFTIRQS; i++) { | |
16428 | + struct task_struct *tsk = sr->runner[i]; | |
16429 | + | |
16430 | + /* | |
16431 | + * The wakeup code in rtmutex.c wakes up the task | |
16432 | + * _before_ it sets pi_blocked_on to NULL under | |
16433 | + * tsk->pi_lock. So we need to check for both: state | |
16434 | + * and pi_blocked_on. | |
16435 | + */ | |
16436 | + if (tsk) { | |
16437 | + raw_spin_lock(&tsk->pi_lock); | |
16438 | + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { | |
16439 | + /* Clear all bits pending in that task */ | |
16440 | + warnpending &= ~(tsk->softirqs_raised); | |
16441 | + warnpending &= ~(1 << i); | |
16442 | + } | |
16443 | + raw_spin_unlock(&tsk->pi_lock); | |
16444 | + } | |
16445 | + } | |
16446 | + | |
16447 | + if (warnpending) { | |
16448 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
16449 | + warnpending); | |
16450 | + rate_limit++; | |
16451 | + } | |
16452 | +} | |
16453 | +# else | |
16454 | +/* | |
16455 | + * On !PREEMPT_RT we just printk rate limited: | |
16456 | + */ | |
16457 | +void softirq_check_pending_idle(void) | |
16458 | +{ | |
16459 | + static int rate_limit; | |
16460 | + | |
16461 | + if (rate_limit < 10 && | |
16462 | + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
16463 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
16464 | + local_softirq_pending()); | |
16465 | + rate_limit++; | |
16466 | + } | |
16467 | +} | |
16468 | +# endif | |
16469 | + | |
16470 | +#else /* !CONFIG_NO_HZ_COMMON */ | |
16471 | +static inline void softirq_set_runner(unsigned int sirq) { } | |
16472 | +static inline void softirq_clr_runner(unsigned int sirq) { } | |
16473 | +#endif | |
16474 | + | |
16475 | /* | |
16476 | * we cannot loop indefinitely here to avoid userspace starvation, | |
16477 | * but we also don't want to introduce a worst case 1/HZ latency | |
c7c16703 | 16478 | @@ -77,6 +175,37 @@ static void wakeup_softirqd(void) |
1a6e0f06 JK |
16479 | wake_up_process(tsk); |
16480 | } | |
16481 | ||
16482 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16483 | +static void wakeup_timer_softirqd(void) | |
16484 | +{ | |
16485 | + /* Interrupts are disabled: no need to stop preemption */ | |
16486 | + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); | |
16487 | + | |
16488 | + if (tsk && tsk->state != TASK_RUNNING) | |
16489 | + wake_up_process(tsk); | |
16490 | +} | |
16491 | +#endif | |
16492 | + | |
16493 | +static void handle_softirq(unsigned int vec_nr) | |
16494 | +{ | |
16495 | + struct softirq_action *h = softirq_vec + vec_nr; | |
16496 | + int prev_count; | |
16497 | + | |
16498 | + prev_count = preempt_count(); | |
16499 | + | |
16500 | + kstat_incr_softirqs_this_cpu(vec_nr); | |
16501 | + | |
16502 | + trace_softirq_entry(vec_nr); | |
16503 | + h->action(h); | |
16504 | + trace_softirq_exit(vec_nr); | |
16505 | + if (unlikely(prev_count != preempt_count())) { | |
16506 | + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
16507 | + vec_nr, softirq_to_name[vec_nr], h->action, | |
16508 | + prev_count, preempt_count()); | |
16509 | + preempt_count_set(prev_count); | |
16510 | + } | |
16511 | +} | |
16512 | + | |
c7c16703 JK |
16513 | /* |
16514 | * If ksoftirqd is scheduled, we do not want to process pending softirqs | |
16515 | * right now. Let ksoftirqd handle this at its own rate, to get fairness. | |
16516 | @@ -88,6 +217,48 @@ static bool ksoftirqd_running(void) | |
16517 | return tsk && (tsk->state == TASK_RUNNING); | |
16518 | } | |
16519 | ||
1a6e0f06 JK |
16520 | +#ifndef CONFIG_PREEMPT_RT_FULL |
16521 | +static inline int ksoftirqd_softirq_pending(void) | |
16522 | +{ | |
16523 | + return local_softirq_pending(); | |
16524 | +} | |
16525 | + | |
16526 | +static void handle_pending_softirqs(u32 pending) | |
16527 | +{ | |
16528 | + struct softirq_action *h = softirq_vec; | |
16529 | + int softirq_bit; | |
16530 | + | |
16531 | + local_irq_enable(); | |
16532 | + | |
16533 | + h = softirq_vec; | |
16534 | + | |
16535 | + while ((softirq_bit = ffs(pending))) { | |
16536 | + unsigned int vec_nr; | |
16537 | + | |
16538 | + h += softirq_bit - 1; | |
16539 | + vec_nr = h - softirq_vec; | |
16540 | + handle_softirq(vec_nr); | |
16541 | + | |
16542 | + h++; | |
16543 | + pending >>= softirq_bit; | |
16544 | + } | |
16545 | + | |
16546 | + rcu_bh_qs(); | |
16547 | + local_irq_disable(); | |
16548 | +} | |
16549 | + | |
16550 | +static void run_ksoftirqd(unsigned int cpu) | |
16551 | +{ | |
16552 | + local_irq_disable(); | |
16553 | + if (ksoftirqd_softirq_pending()) { | |
16554 | + __do_softirq(); | |
16555 | + local_irq_enable(); | |
16556 | + cond_resched_rcu_qs(); | |
16557 | + return; | |
16558 | + } | |
16559 | + local_irq_enable(); | |
16560 | +} | |
16561 | + | |
16562 | /* | |
16563 | * preempt_count and SOFTIRQ_OFFSET usage: | |
16564 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | |
c7c16703 | 16565 | @@ -243,10 +414,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) |
1a6e0f06 JK |
16566 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
16567 | unsigned long old_flags = current->flags; | |
16568 | int max_restart = MAX_SOFTIRQ_RESTART; | |
16569 | - struct softirq_action *h; | |
16570 | bool in_hardirq; | |
16571 | __u32 pending; | |
16572 | - int softirq_bit; | |
16573 | ||
16574 | /* | |
16575 | * Mask out PF_MEMALLOC s current task context is borrowed for the | |
c7c16703 | 16576 | @@ -265,36 +434,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) |
1a6e0f06 JK |
16577 | /* Reset the pending bitmask before enabling irqs */ |
16578 | set_softirq_pending(0); | |
16579 | ||
16580 | - local_irq_enable(); | |
16581 | - | |
16582 | - h = softirq_vec; | |
16583 | - | |
16584 | - while ((softirq_bit = ffs(pending))) { | |
16585 | - unsigned int vec_nr; | |
16586 | - int prev_count; | |
16587 | - | |
16588 | - h += softirq_bit - 1; | |
16589 | - | |
16590 | - vec_nr = h - softirq_vec; | |
16591 | - prev_count = preempt_count(); | |
16592 | - | |
16593 | - kstat_incr_softirqs_this_cpu(vec_nr); | |
16594 | - | |
16595 | - trace_softirq_entry(vec_nr); | |
16596 | - h->action(h); | |
16597 | - trace_softirq_exit(vec_nr); | |
16598 | - if (unlikely(prev_count != preempt_count())) { | |
16599 | - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
16600 | - vec_nr, softirq_to_name[vec_nr], h->action, | |
16601 | - prev_count, preempt_count()); | |
16602 | - preempt_count_set(prev_count); | |
16603 | - } | |
16604 | - h++; | |
16605 | - pending >>= softirq_bit; | |
16606 | - } | |
16607 | - | |
16608 | - rcu_bh_qs(); | |
16609 | - local_irq_disable(); | |
16610 | + handle_pending_softirqs(pending); | |
16611 | ||
16612 | pending = local_softirq_pending(); | |
16613 | if (pending) { | |
c7c16703 | 16614 | @@ -331,6 +471,309 @@ asmlinkage __visible void do_softirq(void) |
1a6e0f06 JK |
16615 | } |
16616 | ||
16617 | /* | |
16618 | + * This function must run with irqs disabled! | |
16619 | + */ | |
16620 | +void raise_softirq_irqoff(unsigned int nr) | |
16621 | +{ | |
16622 | + __raise_softirq_irqoff(nr); | |
16623 | + | |
16624 | + /* | |
16625 | + * If we're in an interrupt or softirq, we're done | |
16626 | + * (this also catches softirq-disabled code). We will | |
16627 | + * actually run the softirq once we return from | |
16628 | + * the irq or softirq. | |
16629 | + * | |
16630 | + * Otherwise we wake up ksoftirqd to make sure we | |
16631 | + * schedule the softirq soon. | |
16632 | + */ | |
16633 | + if (!in_interrupt()) | |
16634 | + wakeup_softirqd(); | |
16635 | +} | |
16636 | + | |
16637 | +void __raise_softirq_irqoff(unsigned int nr) | |
16638 | +{ | |
16639 | + trace_softirq_raise(nr); | |
16640 | + or_softirq_pending(1UL << nr); | |
16641 | +} | |
16642 | + | |
16643 | +static inline void local_bh_disable_nort(void) { local_bh_disable(); } | |
16644 | +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } | |
16645 | +static void ksoftirqd_set_sched_params(unsigned int cpu) { } | |
16646 | + | |
16647 | +#else /* !PREEMPT_RT_FULL */ | |
16648 | + | |
16649 | +/* | |
16650 | + * On RT we serialize softirq execution with a cpu local lock per softirq | |
16651 | + */ | |
16652 | +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks); | |
16653 | + | |
16654 | +void __init softirq_early_init(void) | |
16655 | +{ | |
16656 | + int i; | |
16657 | + | |
16658 | + for (i = 0; i < NR_SOFTIRQS; i++) | |
16659 | + local_irq_lock_init(local_softirq_locks[i]); | |
16660 | +} | |
16661 | + | |
16662 | +static void lock_softirq(int which) | |
16663 | +{ | |
16664 | + local_lock(local_softirq_locks[which]); | |
16665 | +} | |
16666 | + | |
16667 | +static void unlock_softirq(int which) | |
16668 | +{ | |
16669 | + local_unlock(local_softirq_locks[which]); | |
16670 | +} | |
16671 | + | |
16672 | +static void do_single_softirq(int which) | |
16673 | +{ | |
16674 | + unsigned long old_flags = current->flags; | |
16675 | + | |
16676 | + current->flags &= ~PF_MEMALLOC; | |
16677 | + vtime_account_irq_enter(current); | |
16678 | + current->flags |= PF_IN_SOFTIRQ; | |
16679 | + lockdep_softirq_enter(); | |
16680 | + local_irq_enable(); | |
16681 | + handle_softirq(which); | |
16682 | + local_irq_disable(); | |
16683 | + lockdep_softirq_exit(); | |
16684 | + current->flags &= ~PF_IN_SOFTIRQ; | |
16685 | + vtime_account_irq_enter(current); | |
16686 | + tsk_restore_flags(current, old_flags, PF_MEMALLOC); | |
16687 | +} | |
16688 | + | |
16689 | +/* | |
16690 | + * Called with interrupts disabled. Process softirqs which were raised | |
16691 | + * in current context (or on behalf of ksoftirqd). | |
16692 | + */ | |
16693 | +static void do_current_softirqs(void) | |
16694 | +{ | |
16695 | + while (current->softirqs_raised) { | |
16696 | + int i = __ffs(current->softirqs_raised); | |
16697 | + unsigned int pending, mask = (1U << i); | |
16698 | + | |
16699 | + current->softirqs_raised &= ~mask; | |
16700 | + local_irq_enable(); | |
16701 | + | |
16702 | + /* | |
16703 | + * If the lock is contended, we boost the owner to | |
16704 | + * process the softirq or leave the critical section | |
16705 | + * now. | |
16706 | + */ | |
16707 | + lock_softirq(i); | |
16708 | + local_irq_disable(); | |
16709 | + softirq_set_runner(i); | |
16710 | + /* | |
16711 | + * Check with the local_softirq_pending() bits, | |
16712 | + * whether we need to process this still or if someone | |
16713 | + * else took care of it. | |
16714 | + */ | |
16715 | + pending = local_softirq_pending(); | |
16716 | + if (pending & mask) { | |
16717 | + set_softirq_pending(pending & ~mask); | |
16718 | + do_single_softirq(i); | |
16719 | + } | |
16720 | + softirq_clr_runner(i); | |
16721 | + WARN_ON(current->softirq_nestcnt != 1); | |
16722 | + local_irq_enable(); | |
16723 | + unlock_softirq(i); | |
16724 | + local_irq_disable(); | |
16725 | + } | |
16726 | +} | |
16727 | + | |
16728 | +void __local_bh_disable(void) | |
16729 | +{ | |
16730 | + if (++current->softirq_nestcnt == 1) | |
16731 | + migrate_disable(); | |
16732 | +} | |
16733 | +EXPORT_SYMBOL(__local_bh_disable); | |
16734 | + | |
16735 | +void __local_bh_enable(void) | |
16736 | +{ | |
16737 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
16738 | + return; | |
16739 | + | |
16740 | + local_irq_disable(); | |
16741 | + if (current->softirq_nestcnt == 1 && current->softirqs_raised) | |
16742 | + do_current_softirqs(); | |
16743 | + local_irq_enable(); | |
16744 | + | |
16745 | + if (--current->softirq_nestcnt == 0) | |
16746 | + migrate_enable(); | |
16747 | +} | |
16748 | +EXPORT_SYMBOL(__local_bh_enable); | |
16749 | + | |
16750 | +void _local_bh_enable(void) | |
16751 | +{ | |
16752 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
16753 | + return; | |
16754 | + if (--current->softirq_nestcnt == 0) | |
16755 | + migrate_enable(); | |
16756 | +} | |
16757 | +EXPORT_SYMBOL(_local_bh_enable); | |
16758 | + | |
16759 | +int in_serving_softirq(void) | |
16760 | +{ | |
16761 | + return current->flags & PF_IN_SOFTIRQ; | |
16762 | +} | |
16763 | +EXPORT_SYMBOL(in_serving_softirq); | |
16764 | + | |
16765 | +/* Called with preemption disabled */ | |
16766 | +static void run_ksoftirqd(unsigned int cpu) | |
16767 | +{ | |
16768 | + local_irq_disable(); | |
16769 | + current->softirq_nestcnt++; | |
16770 | + | |
16771 | + do_current_softirqs(); | |
16772 | + current->softirq_nestcnt--; | |
16773 | + local_irq_enable(); | |
16774 | + cond_resched_rcu_qs(); | |
16775 | +} | |
16776 | + | |
16777 | +/* | |
16778 | + * Called from netif_rx_ni(). Preemption enabled, but migration | |
16779 | + * disabled. So the cpu can't go away under us. | |
16780 | + */ | |
16781 | +void thread_do_softirq(void) | |
16782 | +{ | |
16783 | + if (!in_serving_softirq() && current->softirqs_raised) { | |
16784 | + current->softirq_nestcnt++; | |
16785 | + do_current_softirqs(); | |
16786 | + current->softirq_nestcnt--; | |
16787 | + } | |
16788 | +} | |
16789 | + | |
16790 | +static void do_raise_softirq_irqoff(unsigned int nr) | |
16791 | +{ | |
16792 | + unsigned int mask; | |
16793 | + | |
16794 | + mask = 1UL << nr; | |
16795 | + | |
16796 | + trace_softirq_raise(nr); | |
16797 | + or_softirq_pending(mask); | |
16798 | + | |
16799 | + /* | |
16800 | + * If we are not in a hard interrupt and inside a bh disabled | |
16801 | + * region, we simply raise the flag on current. local_bh_enable() | |
16802 | + * will make sure that the softirq is executed. Otherwise we | |
16803 | + * delegate it to ksoftirqd. | |
16804 | + */ | |
16805 | + if (!in_irq() && current->softirq_nestcnt) | |
16806 | + current->softirqs_raised |= mask; | |
16807 | + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) | |
16808 | + return; | |
16809 | + | |
16810 | + if (mask & TIMER_SOFTIRQS) | |
16811 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
16812 | + else | |
16813 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
16814 | +} | |
16815 | + | |
16816 | +static void wakeup_proper_softirq(unsigned int nr) | |
16817 | +{ | |
16818 | + if ((1UL << nr) & TIMER_SOFTIRQS) | |
16819 | + wakeup_timer_softirqd(); | |
16820 | + else | |
16821 | + wakeup_softirqd(); | |
16822 | +} | |
16823 | + | |
1a6e0f06 JK |
16824 | +void __raise_softirq_irqoff(unsigned int nr) |
16825 | +{ | |
16826 | + do_raise_softirq_irqoff(nr); | |
16827 | + if (!in_irq() && !current->softirq_nestcnt) | |
16828 | + wakeup_proper_softirq(nr); | |
16829 | +} | |
16830 | + | |
16831 | +/* | |
16832 | + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd | |
16833 | + */ | |
16834 | +void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
16835 | +{ | |
16836 | + unsigned int mask; | |
16837 | + | |
16838 | + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) || | |
16839 | + !__this_cpu_read(ktimer_softirqd))) | |
16840 | + return; | |
16841 | + mask = 1UL << nr; | |
16842 | + | |
16843 | + trace_softirq_raise(nr); | |
16844 | + or_softirq_pending(mask); | |
16845 | + if (mask & TIMER_SOFTIRQS) | |
16846 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
16847 | + else | |
16848 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
16849 | + wakeup_proper_softirq(nr); | |
16850 | +} | |
16851 | + | |
16852 | +/* | |
16853 | + * This function must run with irqs disabled! | |
16854 | + */ | |
16855 | +void raise_softirq_irqoff(unsigned int nr) | |
16856 | +{ | |
16857 | + do_raise_softirq_irqoff(nr); | |
16858 | + | |
16859 | + /* | |
16860 | + * If we're in an hard interrupt we let irq return code deal | |
16861 | + * with the wakeup of ksoftirqd. | |
16862 | + */ | |
16863 | + if (in_irq()) | |
16864 | + return; | |
16865 | + /* | |
16866 | + * If we are in thread context but outside of a bh disabled | |
16867 | + * region, we need to wake ksoftirqd as well. | |
16868 | + * | |
16869 | + * CHECKME: Some of the places which do that could be wrapped | |
16870 | + * into local_bh_disable/enable pairs. Though it's unclear | |
16871 | + * whether this is worth the effort. To find those places just | |
16872 | + * raise a WARN() if the condition is met. | |
16873 | + */ | |
16874 | + if (!current->softirq_nestcnt) | |
16875 | + wakeup_proper_softirq(nr); | |
16876 | +} | |
16877 | + | |
16878 | +static inline int ksoftirqd_softirq_pending(void) | |
16879 | +{ | |
16880 | + return current->softirqs_raised; | |
16881 | +} | |
16882 | + | |
16883 | +static inline void local_bh_disable_nort(void) { } | |
16884 | +static inline void _local_bh_enable_nort(void) { } | |
16885 | + | |
16886 | +static inline void ksoftirqd_set_sched_params(unsigned int cpu) | |
16887 | +{ | |
16888 | + /* Take over all but timer pending softirqs when starting */ | |
16889 | + local_irq_disable(); | |
16890 | + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; | |
16891 | + local_irq_enable(); | |
16892 | +} | |
16893 | + | |
16894 | +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) | |
16895 | +{ | |
16896 | + struct sched_param param = { .sched_priority = 1 }; | |
16897 | + | |
16898 | + sched_setscheduler(current, SCHED_FIFO, ¶m); | |
16899 | + | |
16900 | + /* Take over timer pending softirqs when starting */ | |
16901 | + local_irq_disable(); | |
16902 | + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; | |
16903 | + local_irq_enable(); | |
16904 | +} | |
16905 | + | |
16906 | +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, | |
16907 | + bool online) | |
16908 | +{ | |
16909 | + struct sched_param param = { .sched_priority = 0 }; | |
16910 | + | |
16911 | + sched_setscheduler(current, SCHED_NORMAL, ¶m); | |
16912 | +} | |
16913 | + | |
16914 | +static int ktimer_softirqd_should_run(unsigned int cpu) | |
16915 | +{ | |
16916 | + return current->softirqs_raised; | |
16917 | +} | |
16918 | + | |
16919 | +#endif /* PREEMPT_RT_FULL */ | |
16920 | +/* | |
16921 | * Enter an interrupt context. | |
16922 | */ | |
16923 | void irq_enter(void) | |
c7c16703 | 16924 | @@ -341,9 +784,9 @@ void irq_enter(void) |
1a6e0f06 JK |
16925 | * Prevent raise_softirq from needlessly waking up ksoftirqd |
16926 | * here, as softirq will be serviced on return from interrupt. | |
16927 | */ | |
16928 | - local_bh_disable(); | |
16929 | + local_bh_disable_nort(); | |
16930 | tick_irq_enter(); | |
16931 | - _local_bh_enable(); | |
16932 | + _local_bh_enable_nort(); | |
16933 | } | |
16934 | ||
16935 | __irq_enter(); | |
c7c16703 | 16936 | @@ -351,9 +794,13 @@ void irq_enter(void) |
1a6e0f06 JK |
16937 | |
16938 | static inline void invoke_softirq(void) | |
16939 | { | |
c7c16703 JK |
16940 | +#ifdef CONFIG_PREEMPT_RT_FULL |
16941 | + unsigned long flags; | |
16942 | +#endif | |
16943 | + | |
16944 | if (ksoftirqd_running()) | |
16945 | return; | |
16946 | - | |
1a6e0f06 JK |
16947 | +#ifndef CONFIG_PREEMPT_RT_FULL |
16948 | if (!force_irqthreads) { | |
16949 | #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK | |
16950 | /* | |
c7c16703 | 16951 | @@ -373,6 +820,17 @@ static inline void invoke_softirq(void) |
1a6e0f06 JK |
16952 | } else { |
16953 | wakeup_softirqd(); | |
16954 | } | |
16955 | +#else /* PREEMPT_RT_FULL */ | |
1a6e0f06 JK |
16956 | + |
16957 | + local_irq_save(flags); | |
16958 | + if (__this_cpu_read(ksoftirqd) && | |
16959 | + __this_cpu_read(ksoftirqd)->softirqs_raised) | |
16960 | + wakeup_softirqd(); | |
16961 | + if (__this_cpu_read(ktimer_softirqd) && | |
16962 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised) | |
16963 | + wakeup_timer_softirqd(); | |
16964 | + local_irq_restore(flags); | |
16965 | +#endif | |
16966 | } | |
16967 | ||
16968 | static inline void tick_irq_exit(void) | |
c7c16703 | 16969 | @@ -409,26 +867,6 @@ void irq_exit(void) |
1a6e0f06 JK |
16970 | trace_hardirq_exit(); /* must be last! */ |
16971 | } | |
16972 | ||
16973 | -/* | |
16974 | - * This function must run with irqs disabled! | |
16975 | - */ | |
16976 | -inline void raise_softirq_irqoff(unsigned int nr) | |
16977 | -{ | |
16978 | - __raise_softirq_irqoff(nr); | |
16979 | - | |
16980 | - /* | |
16981 | - * If we're in an interrupt or softirq, we're done | |
16982 | - * (this also catches softirq-disabled code). We will | |
16983 | - * actually run the softirq once we return from | |
16984 | - * the irq or softirq. | |
16985 | - * | |
16986 | - * Otherwise we wake up ksoftirqd to make sure we | |
16987 | - * schedule the softirq soon. | |
16988 | - */ | |
16989 | - if (!in_interrupt()) | |
16990 | - wakeup_softirqd(); | |
16991 | -} | |
16992 | - | |
16993 | void raise_softirq(unsigned int nr) | |
16994 | { | |
16995 | unsigned long flags; | |
c7c16703 | 16996 | @@ -438,12 +876,6 @@ void raise_softirq(unsigned int nr) |
1a6e0f06 JK |
16997 | local_irq_restore(flags); |
16998 | } | |
16999 | ||
17000 | -void __raise_softirq_irqoff(unsigned int nr) | |
17001 | -{ | |
17002 | - trace_softirq_raise(nr); | |
17003 | - or_softirq_pending(1UL << nr); | |
17004 | -} | |
17005 | - | |
17006 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | |
17007 | { | |
17008 | softirq_vec[nr].action = action; | |
c7c16703 | 17009 | @@ -460,15 +892,45 @@ struct tasklet_head { |
1a6e0f06 JK |
17010 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); |
17011 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | |
17012 | ||
17013 | +static void inline | |
17014 | +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) | |
17015 | +{ | |
17016 | + if (tasklet_trylock(t)) { | |
17017 | +again: | |
17018 | + /* We may have been preempted before tasklet_trylock | |
17019 | + * and __tasklet_action may have already run. | |
17020 | + * So double check the sched bit while the takslet | |
17021 | + * is locked before adding it to the list. | |
17022 | + */ | |
17023 | + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { | |
17024 | + t->next = NULL; | |
17025 | + *head->tail = t; | |
17026 | + head->tail = &(t->next); | |
17027 | + raise_softirq_irqoff(nr); | |
17028 | + tasklet_unlock(t); | |
17029 | + } else { | |
17030 | + /* This is subtle. If we hit the corner case above | |
17031 | + * It is possible that we get preempted right here, | |
17032 | + * and another task has successfully called | |
17033 | + * tasklet_schedule(), then this function, and | |
17034 | + * failed on the trylock. Thus we must be sure | |
17035 | + * before releasing the tasklet lock, that the | |
17036 | + * SCHED_BIT is clear. Otherwise the tasklet | |
17037 | + * may get its SCHED_BIT set, but not added to the | |
17038 | + * list | |
17039 | + */ | |
17040 | + if (!tasklet_tryunlock(t)) | |
17041 | + goto again; | |
17042 | + } | |
17043 | + } | |
17044 | +} | |
17045 | + | |
17046 | void __tasklet_schedule(struct tasklet_struct *t) | |
17047 | { | |
17048 | unsigned long flags; | |
17049 | ||
17050 | local_irq_save(flags); | |
17051 | - t->next = NULL; | |
17052 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
17053 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
17054 | - raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
17055 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); | |
17056 | local_irq_restore(flags); | |
17057 | } | |
17058 | EXPORT_SYMBOL(__tasklet_schedule); | |
c7c16703 | 17059 | @@ -478,10 +940,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) |
1a6e0f06 JK |
17060 | unsigned long flags; |
17061 | ||
17062 | local_irq_save(flags); | |
17063 | - t->next = NULL; | |
17064 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
17065 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
17066 | - raise_softirq_irqoff(HI_SOFTIRQ); | |
17067 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); | |
17068 | local_irq_restore(flags); | |
17069 | } | |
17070 | EXPORT_SYMBOL(__tasklet_hi_schedule); | |
c7c16703 | 17071 | @@ -490,82 +949,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) |
1a6e0f06 JK |
17072 | { |
17073 | BUG_ON(!irqs_disabled()); | |
17074 | ||
17075 | - t->next = __this_cpu_read(tasklet_hi_vec.head); | |
17076 | - __this_cpu_write(tasklet_hi_vec.head, t); | |
17077 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
17078 | + __tasklet_hi_schedule(t); | |
17079 | } | |
17080 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | |
17081 | ||
c7c16703 | 17082 | -static __latent_entropy void tasklet_action(struct softirq_action *a) |
1a6e0f06 JK |
17083 | +void tasklet_enable(struct tasklet_struct *t) |
17084 | { | |
17085 | - struct tasklet_struct *list; | |
17086 | + if (!atomic_dec_and_test(&t->count)) | |
17087 | + return; | |
17088 | + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) | |
17089 | + tasklet_schedule(t); | |
17090 | +} | |
17091 | +EXPORT_SYMBOL(tasklet_enable); | |
17092 | ||
17093 | - local_irq_disable(); | |
17094 | - list = __this_cpu_read(tasklet_vec.head); | |
17095 | - __this_cpu_write(tasklet_vec.head, NULL); | |
17096 | - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
17097 | - local_irq_enable(); | |
17098 | +static void __tasklet_action(struct softirq_action *a, | |
17099 | + struct tasklet_struct *list) | |
17100 | +{ | |
17101 | + int loops = 1000000; | |
17102 | ||
17103 | while (list) { | |
17104 | struct tasklet_struct *t = list; | |
17105 | ||
17106 | list = list->next; | |
17107 | ||
17108 | - if (tasklet_trylock(t)) { | |
17109 | - if (!atomic_read(&t->count)) { | |
17110 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
17111 | - &t->state)) | |
17112 | - BUG(); | |
17113 | - t->func(t->data); | |
17114 | - tasklet_unlock(t); | |
17115 | - continue; | |
17116 | - } | |
17117 | - tasklet_unlock(t); | |
17118 | + /* | |
17119 | + * Should always succeed - after a tasklist got on the | |
17120 | + * list (after getting the SCHED bit set from 0 to 1), | |
17121 | + * nothing but the tasklet softirq it got queued to can | |
17122 | + * lock it: | |
17123 | + */ | |
17124 | + if (!tasklet_trylock(t)) { | |
17125 | + WARN_ON(1); | |
17126 | + continue; | |
17127 | } | |
17128 | ||
17129 | - local_irq_disable(); | |
17130 | t->next = NULL; | |
17131 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
17132 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
17133 | - __raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
17134 | - local_irq_enable(); | |
17135 | + | |
17136 | + /* | |
17137 | + * If we cannot handle the tasklet because it's disabled, | |
17138 | + * mark it as pending. tasklet_enable() will later | |
17139 | + * re-schedule the tasklet. | |
17140 | + */ | |
17141 | + if (unlikely(atomic_read(&t->count))) { | |
17142 | +out_disabled: | |
17143 | + /* implicit unlock: */ | |
17144 | + wmb(); | |
17145 | + t->state = TASKLET_STATEF_PENDING; | |
17146 | + continue; | |
17147 | + } | |
17148 | + | |
17149 | + /* | |
17150 | + * After this point on the tasklet might be rescheduled | |
17151 | + * on another CPU, but it can only be added to another | |
17152 | + * CPU's tasklet list if we unlock the tasklet (which we | |
17153 | + * dont do yet). | |
17154 | + */ | |
17155 | + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
17156 | + WARN_ON(1); | |
17157 | + | |
17158 | +again: | |
17159 | + t->func(t->data); | |
17160 | + | |
17161 | + /* | |
17162 | + * Try to unlock the tasklet. We must use cmpxchg, because | |
17163 | + * another CPU might have scheduled or disabled the tasklet. | |
17164 | + * We only allow the STATE_RUN -> 0 transition here. | |
17165 | + */ | |
17166 | + while (!tasklet_tryunlock(t)) { | |
17167 | + /* | |
17168 | + * If it got disabled meanwhile, bail out: | |
17169 | + */ | |
17170 | + if (atomic_read(&t->count)) | |
17171 | + goto out_disabled; | |
17172 | + /* | |
17173 | + * If it got scheduled meanwhile, re-execute | |
17174 | + * the tasklet function: | |
17175 | + */ | |
17176 | + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
17177 | + goto again; | |
17178 | + if (!--loops) { | |
17179 | + printk("hm, tasklet state: %08lx\n", t->state); | |
17180 | + WARN_ON(1); | |
17181 | + tasklet_unlock(t); | |
17182 | + break; | |
17183 | + } | |
17184 | + } | |
17185 | } | |
17186 | } | |
17187 | ||
17188 | +static void tasklet_action(struct softirq_action *a) | |
17189 | +{ | |
17190 | + struct tasklet_struct *list; | |
17191 | + | |
17192 | + local_irq_disable(); | |
17193 | + | |
17194 | + list = __this_cpu_read(tasklet_vec.head); | |
17195 | + __this_cpu_write(tasklet_vec.head, NULL); | |
17196 | + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
17197 | + | |
17198 | + local_irq_enable(); | |
17199 | + | |
17200 | + __tasklet_action(a, list); | |
17201 | +} | |
17202 | + | |
c7c16703 | 17203 | static __latent_entropy void tasklet_hi_action(struct softirq_action *a) |
1a6e0f06 JK |
17204 | { |
17205 | struct tasklet_struct *list; | |
17206 | ||
17207 | local_irq_disable(); | |
17208 | + | |
17209 | list = __this_cpu_read(tasklet_hi_vec.head); | |
17210 | __this_cpu_write(tasklet_hi_vec.head, NULL); | |
17211 | __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); | |
17212 | + | |
17213 | local_irq_enable(); | |
17214 | ||
17215 | - while (list) { | |
17216 | - struct tasklet_struct *t = list; | |
17217 | - | |
17218 | - list = list->next; | |
17219 | - | |
17220 | - if (tasklet_trylock(t)) { | |
17221 | - if (!atomic_read(&t->count)) { | |
17222 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
17223 | - &t->state)) | |
17224 | - BUG(); | |
17225 | - t->func(t->data); | |
17226 | - tasklet_unlock(t); | |
17227 | - continue; | |
17228 | - } | |
17229 | - tasklet_unlock(t); | |
17230 | - } | |
17231 | - | |
17232 | - local_irq_disable(); | |
17233 | - t->next = NULL; | |
17234 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
17235 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
17236 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
17237 | - local_irq_enable(); | |
17238 | - } | |
17239 | + __tasklet_action(a, list); | |
17240 | } | |
17241 | ||
17242 | void tasklet_init(struct tasklet_struct *t, | |
c7c16703 | 17243 | @@ -586,7 +1085,7 @@ void tasklet_kill(struct tasklet_struct *t) |
1a6e0f06 JK |
17244 | |
17245 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | |
17246 | do { | |
17247 | - yield(); | |
17248 | + msleep(1); | |
17249 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); | |
17250 | } | |
17251 | tasklet_unlock_wait(t); | |
c7c16703 | 17252 | @@ -660,25 +1159,26 @@ void __init softirq_init(void) |
1a6e0f06 JK |
17253 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); |
17254 | } | |
17255 | ||
17256 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
17257 | +void tasklet_unlock_wait(struct tasklet_struct *t) | |
17258 | +{ | |
17259 | + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { | |
17260 | + /* | |
17261 | + * Hack for now to avoid this busy-loop: | |
17262 | + */ | |
17263 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17264 | + msleep(1); | |
17265 | +#else | |
17266 | + barrier(); | |
17267 | +#endif | |
17268 | + } | |
17269 | +} | |
17270 | +EXPORT_SYMBOL(tasklet_unlock_wait); | |
17271 | +#endif | |
17272 | + | |
17273 | static int ksoftirqd_should_run(unsigned int cpu) | |
17274 | { | |
17275 | - return local_softirq_pending(); | |
17276 | -} | |
17277 | - | |
17278 | -static void run_ksoftirqd(unsigned int cpu) | |
17279 | -{ | |
17280 | - local_irq_disable(); | |
17281 | - if (local_softirq_pending()) { | |
17282 | - /* | |
17283 | - * We can safely run softirq on inline stack, as we are not deep | |
17284 | - * in the task stack here. | |
17285 | - */ | |
17286 | - __do_softirq(); | |
17287 | - local_irq_enable(); | |
17288 | - cond_resched_rcu_qs(); | |
17289 | - return; | |
17290 | - } | |
17291 | - local_irq_enable(); | |
17292 | + return ksoftirqd_softirq_pending(); | |
17293 | } | |
17294 | ||
17295 | #ifdef CONFIG_HOTPLUG_CPU | |
c7c16703 | 17296 | @@ -745,17 +1245,31 @@ static int takeover_tasklets(unsigned int cpu) |
1a6e0f06 JK |
17297 | |
17298 | static struct smp_hotplug_thread softirq_threads = { | |
17299 | .store = &ksoftirqd, | |
17300 | + .setup = ksoftirqd_set_sched_params, | |
17301 | .thread_should_run = ksoftirqd_should_run, | |
17302 | .thread_fn = run_ksoftirqd, | |
17303 | .thread_comm = "ksoftirqd/%u", | |
17304 | }; | |
17305 | ||
17306 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17307 | +static struct smp_hotplug_thread softirq_timer_threads = { | |
17308 | + .store = &ktimer_softirqd, | |
17309 | + .setup = ktimer_softirqd_set_sched_params, | |
17310 | + .cleanup = ktimer_softirqd_clr_sched_params, | |
17311 | + .thread_should_run = ktimer_softirqd_should_run, | |
17312 | + .thread_fn = run_ksoftirqd, | |
17313 | + .thread_comm = "ktimersoftd/%u", | |
17314 | +}; | |
17315 | +#endif | |
17316 | + | |
17317 | static __init int spawn_ksoftirqd(void) | |
17318 | { | |
c7c16703 JK |
17319 | cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, |
17320 | takeover_tasklets); | |
1a6e0f06 | 17321 | BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); |
c7c16703 | 17322 | - |
1a6e0f06 JK |
17323 | +#ifdef CONFIG_PREEMPT_RT_FULL |
17324 | + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); | |
17325 | +#endif | |
1a6e0f06 JK |
17326 | return 0; |
17327 | } | |
c7c16703 | 17328 | early_initcall(spawn_ksoftirqd); |
1a6e0f06 | 17329 | diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c |
c7c16703 | 17330 | index ec9ab2f01489..8b89dbedeaff 100644 |
1a6e0f06 JK |
17331 | --- a/kernel/stop_machine.c |
17332 | +++ b/kernel/stop_machine.c | |
c7c16703 | 17333 | @@ -36,7 +36,7 @@ struct cpu_stop_done { |
1a6e0f06 JK |
17334 | struct cpu_stopper { |
17335 | struct task_struct *thread; | |
17336 | ||
17337 | - spinlock_t lock; | |
17338 | + raw_spinlock_t lock; | |
17339 | bool enabled; /* is this stopper enabled? */ | |
17340 | struct list_head works; /* list of pending works */ | |
17341 | ||
c7c16703 | 17342 | @@ -78,14 +78,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) |
1a6e0f06 JK |
17343 | unsigned long flags; |
17344 | bool enabled; | |
17345 | ||
17346 | - spin_lock_irqsave(&stopper->lock, flags); | |
17347 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
17348 | enabled = stopper->enabled; | |
17349 | if (enabled) | |
17350 | __cpu_stop_queue_work(stopper, work); | |
17351 | else if (work->done) | |
17352 | cpu_stop_signal_done(work->done); | |
17353 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
17354 | ||
17355 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
17356 | return enabled; | |
17357 | } | |
17358 | ||
c7c16703 JK |
17359 | @@ -231,8 +231,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, |
17360 | struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); | |
1a6e0f06 | 17361 | int err; |
c7c16703 | 17362 | retry: |
1a6e0f06 JK |
17363 | - spin_lock_irq(&stopper1->lock); |
17364 | - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
17365 | + raw_spin_lock_irq(&stopper1->lock); | |
17366 | + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
17367 | ||
17368 | err = -ENOENT; | |
17369 | if (!stopper1->enabled || !stopper2->enabled) | |
c7c16703 | 17370 | @@ -255,8 +255,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, |
1a6e0f06 JK |
17371 | __cpu_stop_queue_work(stopper1, work1); |
17372 | __cpu_stop_queue_work(stopper2, work2); | |
17373 | unlock: | |
17374 | - spin_unlock(&stopper2->lock); | |
17375 | - spin_unlock_irq(&stopper1->lock); | |
17376 | + raw_spin_unlock(&stopper2->lock); | |
17377 | + raw_spin_unlock_irq(&stopper1->lock); | |
1a6e0f06 | 17378 | |
c7c16703 JK |
17379 | if (unlikely(err == -EDEADLK)) { |
17380 | while (stop_cpus_in_progress) | |
17381 | @@ -448,9 +448,9 @@ static int cpu_stop_should_run(unsigned int cpu) | |
1a6e0f06 JK |
17382 | unsigned long flags; |
17383 | int run; | |
17384 | ||
17385 | - spin_lock_irqsave(&stopper->lock, flags); | |
17386 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
17387 | run = !list_empty(&stopper->works); | |
17388 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
17389 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
17390 | return run; | |
17391 | } | |
17392 | ||
c7c16703 | 17393 | @@ -461,13 +461,13 @@ static void cpu_stopper_thread(unsigned int cpu) |
1a6e0f06 JK |
17394 | |
17395 | repeat: | |
17396 | work = NULL; | |
17397 | - spin_lock_irq(&stopper->lock); | |
17398 | + raw_spin_lock_irq(&stopper->lock); | |
17399 | if (!list_empty(&stopper->works)) { | |
17400 | work = list_first_entry(&stopper->works, | |
17401 | struct cpu_stop_work, list); | |
17402 | list_del_init(&work->list); | |
17403 | } | |
17404 | - spin_unlock_irq(&stopper->lock); | |
17405 | + raw_spin_unlock_irq(&stopper->lock); | |
17406 | ||
17407 | if (work) { | |
17408 | cpu_stop_fn_t fn = work->fn; | |
c7c16703 | 17409 | @@ -475,6 +475,8 @@ static void cpu_stopper_thread(unsigned int cpu) |
1a6e0f06 JK |
17410 | struct cpu_stop_done *done = work->done; |
17411 | int ret; | |
17412 | ||
c7c16703 | 17413 | + /* XXX */ |
1a6e0f06 JK |
17414 | + |
17415 | /* cpu stop callbacks must not sleep, make in_atomic() == T */ | |
17416 | preempt_count_inc(); | |
17417 | ret = fn(arg); | |
c7c16703 | 17418 | @@ -541,7 +543,7 @@ static int __init cpu_stop_init(void) |
1a6e0f06 JK |
17419 | for_each_possible_cpu(cpu) { |
17420 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | |
17421 | ||
17422 | - spin_lock_init(&stopper->lock); | |
17423 | + raw_spin_lock_init(&stopper->lock); | |
17424 | INIT_LIST_HEAD(&stopper->works); | |
17425 | } | |
17426 | ||
1a6e0f06 | 17427 | diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c |
c7c16703 | 17428 | index bb5ec425dfe0..8338b14ed3a3 100644 |
1a6e0f06 JK |
17429 | --- a/kernel/time/hrtimer.c |
17430 | +++ b/kernel/time/hrtimer.c | |
17431 | @@ -53,6 +53,7 @@ | |
17432 | #include <asm/uaccess.h> | |
17433 | ||
17434 | #include <trace/events/timer.h> | |
17435 | +#include <trace/events/hist.h> | |
17436 | ||
17437 | #include "tick-internal.h" | |
17438 | ||
17439 | @@ -695,6 +696,29 @@ static void hrtimer_switch_to_hres(void) | |
17440 | retrigger_next_event(NULL); | |
17441 | } | |
17442 | ||
17443 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17444 | + | |
17445 | +static struct swork_event clock_set_delay_work; | |
17446 | + | |
17447 | +static void run_clock_set_delay(struct swork_event *event) | |
17448 | +{ | |
17449 | + clock_was_set(); | |
17450 | +} | |
17451 | + | |
17452 | +void clock_was_set_delayed(void) | |
17453 | +{ | |
17454 | + swork_queue(&clock_set_delay_work); | |
17455 | +} | |
17456 | + | |
17457 | +static __init int create_clock_set_delay_thread(void) | |
17458 | +{ | |
17459 | + WARN_ON(swork_get()); | |
17460 | + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay); | |
17461 | + return 0; | |
17462 | +} | |
17463 | +early_initcall(create_clock_set_delay_thread); | |
17464 | +#else /* PREEMPT_RT_FULL */ | |
17465 | + | |
17466 | static void clock_was_set_work(struct work_struct *work) | |
17467 | { | |
17468 | clock_was_set(); | |
17469 | @@ -710,6 +734,7 @@ void clock_was_set_delayed(void) | |
17470 | { | |
17471 | schedule_work(&hrtimer_work); | |
17472 | } | |
17473 | +#endif | |
17474 | ||
17475 | #else | |
17476 | ||
17477 | @@ -719,11 +744,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } | |
17478 | static inline void hrtimer_switch_to_hres(void) { } | |
17479 | static inline void | |
17480 | hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } | |
17481 | -static inline int hrtimer_reprogram(struct hrtimer *timer, | |
17482 | - struct hrtimer_clock_base *base) | |
17483 | -{ | |
17484 | - return 0; | |
17485 | -} | |
17486 | +static inline void hrtimer_reprogram(struct hrtimer *timer, | |
17487 | + struct hrtimer_clock_base *base) { } | |
17488 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | |
17489 | static inline void retrigger_next_event(void *arg) { } | |
17490 | ||
17491 | @@ -855,6 +877,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |
17492 | } | |
17493 | EXPORT_SYMBOL_GPL(hrtimer_forward); | |
17494 | ||
17495 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17496 | +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) | |
17497 | + | |
17498 | +/** | |
17499 | + * hrtimer_wait_for_timer - Wait for a running timer | |
17500 | + * | |
17501 | + * @timer: timer to wait for | |
17502 | + * | |
17503 | + * The function waits in case the timers callback function is | |
17504 | + * currently executed on the waitqueue of the timer base. The | |
17505 | + * waitqueue is woken up after the timer callback function has | |
17506 | + * finished execution. | |
17507 | + */ | |
17508 | +void hrtimer_wait_for_timer(const struct hrtimer *timer) | |
17509 | +{ | |
17510 | + struct hrtimer_clock_base *base = timer->base; | |
17511 | + | |
17512 | + if (base && base->cpu_base && !timer->irqsafe) | |
17513 | + wait_event(base->cpu_base->wait, | |
17514 | + !(hrtimer_callback_running(timer))); | |
17515 | +} | |
17516 | + | |
17517 | +#else | |
17518 | +# define wake_up_timer_waiters(b) do { } while (0) | |
17519 | +#endif | |
17520 | + | |
17521 | /* | |
17522 | * enqueue_hrtimer - internal function to (re)start a timer | |
17523 | * | |
17524 | @@ -896,6 +944,11 @@ static void __remove_hrtimer(struct hrtimer *timer, | |
17525 | if (!(state & HRTIMER_STATE_ENQUEUED)) | |
17526 | return; | |
17527 | ||
17528 | + if (unlikely(!list_empty(&timer->cb_entry))) { | |
17529 | + list_del_init(&timer->cb_entry); | |
17530 | + return; | |
17531 | + } | |
17532 | + | |
17533 | if (!timerqueue_del(&base->active, &timer->node)) | |
17534 | cpu_base->active_bases &= ~(1 << base->index); | |
17535 | ||
17536 | @@ -991,7 +1044,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |
17537 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | |
17538 | ||
17539 | timer_stats_hrtimer_set_start_info(timer); | |
17540 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
17541 | + { | |
17542 | + ktime_t now = new_base->get_time(); | |
17543 | ||
17544 | + if (ktime_to_ns(tim) < ktime_to_ns(now)) | |
17545 | + timer->praecox = now; | |
17546 | + else | |
17547 | + timer->praecox = ktime_set(0, 0); | |
17548 | + } | |
17549 | +#endif | |
17550 | leftmost = enqueue_hrtimer(timer, new_base); | |
17551 | if (!leftmost) | |
17552 | goto unlock; | |
17553 | @@ -1063,7 +1125,7 @@ int hrtimer_cancel(struct hrtimer *timer) | |
17554 | ||
17555 | if (ret >= 0) | |
17556 | return ret; | |
17557 | - cpu_relax(); | |
17558 | + hrtimer_wait_for_timer(timer); | |
17559 | } | |
17560 | } | |
17561 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | |
17562 | @@ -1127,6 +1189,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |
17563 | ||
17564 | base = hrtimer_clockid_to_base(clock_id); | |
17565 | timer->base = &cpu_base->clock_base[base]; | |
17566 | + INIT_LIST_HEAD(&timer->cb_entry); | |
17567 | timerqueue_init(&timer->node); | |
17568 | ||
17569 | #ifdef CONFIG_TIMER_STATS | |
17570 | @@ -1167,6 +1230,7 @@ bool hrtimer_active(const struct hrtimer *timer) | |
17571 | seq = raw_read_seqcount_begin(&cpu_base->seq); | |
17572 | ||
17573 | if (timer->state != HRTIMER_STATE_INACTIVE || | |
17574 | + cpu_base->running_soft == timer || | |
17575 | cpu_base->running == timer) | |
17576 | return true; | |
17577 | ||
17578 | @@ -1265,10 +1329,112 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |
17579 | cpu_base->running = NULL; | |
17580 | } | |
17581 | ||
17582 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17583 | +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, | |
17584 | + struct hrtimer_clock_base *base) | |
17585 | +{ | |
17586 | + int leftmost; | |
17587 | + | |
17588 | + if (restart != HRTIMER_NORESTART && | |
17589 | + !(timer->state & HRTIMER_STATE_ENQUEUED)) { | |
17590 | + | |
17591 | + leftmost = enqueue_hrtimer(timer, base); | |
17592 | + if (!leftmost) | |
17593 | + return; | |
17594 | +#ifdef CONFIG_HIGH_RES_TIMERS | |
17595 | + if (!hrtimer_is_hres_active(timer)) { | |
17596 | + /* | |
17597 | + * Kick to reschedule the next tick to handle the new timer | |
17598 | + * on dynticks target. | |
17599 | + */ | |
17600 | + if (base->cpu_base->nohz_active) | |
17601 | + wake_up_nohz_cpu(base->cpu_base->cpu); | |
17602 | + } else { | |
17603 | + | |
17604 | + hrtimer_reprogram(timer, base); | |
17605 | + } | |
17606 | +#endif | |
17607 | + } | |
17608 | +} | |
17609 | + | |
17610 | +/* | |
17611 | + * The changes in mainline which removed the callback modes from | |
17612 | + * hrtimer are not yet working with -rt. The non wakeup_process() | |
17613 | + * based callbacks which involve sleeping locks need to be treated | |
17614 | + * seperately. | |
17615 | + */ | |
17616 | +static void hrtimer_rt_run_pending(void) | |
17617 | +{ | |
17618 | + enum hrtimer_restart (*fn)(struct hrtimer *); | |
17619 | + struct hrtimer_cpu_base *cpu_base; | |
17620 | + struct hrtimer_clock_base *base; | |
17621 | + struct hrtimer *timer; | |
17622 | + int index, restart; | |
17623 | + | |
17624 | + local_irq_disable(); | |
17625 | + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); | |
17626 | + | |
17627 | + raw_spin_lock(&cpu_base->lock); | |
17628 | + | |
17629 | + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { | |
17630 | + base = &cpu_base->clock_base[index]; | |
17631 | + | |
17632 | + while (!list_empty(&base->expired)) { | |
17633 | + timer = list_first_entry(&base->expired, | |
17634 | + struct hrtimer, cb_entry); | |
17635 | + | |
17636 | + /* | |
17637 | + * Same as the above __run_hrtimer function | |
17638 | + * just we run with interrupts enabled. | |
17639 | + */ | |
17640 | + debug_deactivate(timer); | |
17641 | + cpu_base->running_soft = timer; | |
17642 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
17643 | + | |
17644 | + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); | |
17645 | + timer_stats_account_hrtimer(timer); | |
17646 | + fn = timer->function; | |
17647 | + | |
17648 | + raw_spin_unlock_irq(&cpu_base->lock); | |
17649 | + restart = fn(timer); | |
17650 | + raw_spin_lock_irq(&cpu_base->lock); | |
17651 | + | |
17652 | + hrtimer_rt_reprogram(restart, timer, base); | |
17653 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
17654 | + | |
17655 | + WARN_ON_ONCE(cpu_base->running_soft != timer); | |
17656 | + cpu_base->running_soft = NULL; | |
17657 | + } | |
17658 | + } | |
17659 | + | |
17660 | + raw_spin_unlock_irq(&cpu_base->lock); | |
17661 | + | |
17662 | + wake_up_timer_waiters(cpu_base); | |
17663 | +} | |
17664 | + | |
17665 | +static int hrtimer_rt_defer(struct hrtimer *timer) | |
17666 | +{ | |
17667 | + if (timer->irqsafe) | |
17668 | + return 0; | |
17669 | + | |
17670 | + __remove_hrtimer(timer, timer->base, timer->state, 0); | |
17671 | + list_add_tail(&timer->cb_entry, &timer->base->expired); | |
17672 | + return 1; | |
17673 | +} | |
17674 | + | |
17675 | +#else | |
17676 | + | |
17677 | +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } | |
17678 | + | |
17679 | +#endif | |
17680 | + | |
17681 | +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); | |
17682 | + | |
17683 | static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
17684 | { | |
17685 | struct hrtimer_clock_base *base = cpu_base->clock_base; | |
17686 | unsigned int active = cpu_base->active_bases; | |
17687 | + int raise = 0; | |
17688 | ||
17689 | for (; active; base++, active >>= 1) { | |
17690 | struct timerqueue_node *node; | |
17691 | @@ -1284,6 +1450,15 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
17692 | ||
17693 | timer = container_of(node, struct hrtimer, node); | |
17694 | ||
17695 | + trace_hrtimer_interrupt(raw_smp_processor_id(), | |
17696 | + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ? | |
17697 | + timer->praecox : hrtimer_get_expires(timer), | |
17698 | + basenow)), | |
17699 | + current, | |
17700 | + timer->function == hrtimer_wakeup ? | |
17701 | + container_of(timer, struct hrtimer_sleeper, | |
17702 | + timer)->task : NULL); | |
17703 | + | |
17704 | /* | |
17705 | * The immediate goal for using the softexpires is | |
17706 | * minimizing wakeups, not running timers at the | |
17707 | @@ -1299,9 +1474,14 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
17708 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) | |
17709 | break; | |
17710 | ||
17711 | - __run_hrtimer(cpu_base, base, timer, &basenow); | |
17712 | + if (!hrtimer_rt_defer(timer)) | |
17713 | + __run_hrtimer(cpu_base, base, timer, &basenow); | |
17714 | + else | |
17715 | + raise = 1; | |
17716 | } | |
17717 | } | |
17718 | + if (raise) | |
17719 | + raise_softirq_irqoff(HRTIMER_SOFTIRQ); | |
17720 | } | |
17721 | ||
17722 | #ifdef CONFIG_HIGH_RES_TIMERS | |
17723 | @@ -1464,16 +1644,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) | |
17724 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |
17725 | { | |
17726 | sl->timer.function = hrtimer_wakeup; | |
17727 | + sl->timer.irqsafe = 1; | |
17728 | sl->task = task; | |
17729 | } | |
17730 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | |
17731 | ||
17732 | -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | |
17733 | +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, | |
17734 | + unsigned long state) | |
17735 | { | |
17736 | hrtimer_init_sleeper(t, current); | |
17737 | ||
17738 | do { | |
17739 | - set_current_state(TASK_INTERRUPTIBLE); | |
17740 | + set_current_state(state); | |
17741 | hrtimer_start_expires(&t->timer, mode); | |
17742 | ||
17743 | if (likely(t->task)) | |
17744 | @@ -1515,7 +1697,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
17745 | HRTIMER_MODE_ABS); | |
17746 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | |
17747 | ||
17748 | - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | |
17749 | + /* cpu_chill() does not care about restart state. */ | |
17750 | + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) | |
17751 | goto out; | |
17752 | ||
17753 | rmtp = restart->nanosleep.rmtp; | |
17754 | @@ -1532,8 +1715,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
17755 | return ret; | |
17756 | } | |
17757 | ||
17758 | -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
17759 | - const enum hrtimer_mode mode, const clockid_t clockid) | |
17760 | +static long | |
17761 | +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
17762 | + const enum hrtimer_mode mode, const clockid_t clockid, | |
17763 | + unsigned long state) | |
17764 | { | |
17765 | struct restart_block *restart; | |
17766 | struct hrtimer_sleeper t; | |
17767 | @@ -1546,7 +1731,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
17768 | ||
17769 | hrtimer_init_on_stack(&t.timer, clockid, mode); | |
17770 | hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); | |
17771 | - if (do_nanosleep(&t, mode)) | |
17772 | + if (do_nanosleep(&t, mode, state)) | |
17773 | goto out; | |
17774 | ||
17775 | /* Absolute timers do not update the rmtp value and restart: */ | |
17776 | @@ -1573,6 +1758,12 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
17777 | return ret; | |
17778 | } | |
17779 | ||
17780 | +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
17781 | + const enum hrtimer_mode mode, const clockid_t clockid) | |
17782 | +{ | |
17783 | + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); | |
17784 | +} | |
17785 | + | |
17786 | SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
17787 | struct timespec __user *, rmtp) | |
17788 | { | |
17789 | @@ -1587,6 +1778,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
17790 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); | |
17791 | } | |
17792 | ||
17793 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17794 | +/* | |
17795 | + * Sleep for 1 ms in hope whoever holds what we want will let it go. | |
17796 | + */ | |
17797 | +void cpu_chill(void) | |
17798 | +{ | |
17799 | + struct timespec tu = { | |
17800 | + .tv_nsec = NSEC_PER_MSEC, | |
17801 | + }; | |
17802 | + unsigned int freeze_flag = current->flags & PF_NOFREEZE; | |
17803 | + | |
17804 | + current->flags |= PF_NOFREEZE; | |
17805 | + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC, | |
17806 | + TASK_UNINTERRUPTIBLE); | |
17807 | + if (!freeze_flag) | |
17808 | + current->flags &= ~PF_NOFREEZE; | |
17809 | +} | |
17810 | +EXPORT_SYMBOL(cpu_chill); | |
17811 | +#endif | |
17812 | + | |
17813 | /* | |
17814 | * Functions related to boot-time initialization: | |
17815 | */ | |
17816 | @@ -1598,10 +1809,14 @@ int hrtimers_prepare_cpu(unsigned int cpu) | |
17817 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | |
17818 | cpu_base->clock_base[i].cpu_base = cpu_base; | |
17819 | timerqueue_init_head(&cpu_base->clock_base[i].active); | |
17820 | + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); | |
17821 | } | |
17822 | ||
17823 | cpu_base->cpu = cpu; | |
17824 | hrtimer_init_hres(cpu_base); | |
17825 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17826 | + init_waitqueue_head(&cpu_base->wait); | |
17827 | +#endif | |
17828 | return 0; | |
17829 | } | |
17830 | ||
17831 | @@ -1671,9 +1886,26 @@ int hrtimers_dead_cpu(unsigned int scpu) | |
17832 | ||
17833 | #endif /* CONFIG_HOTPLUG_CPU */ | |
17834 | ||
17835 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17836 | + | |
17837 | +static void run_hrtimer_softirq(struct softirq_action *h) | |
17838 | +{ | |
17839 | + hrtimer_rt_run_pending(); | |
17840 | +} | |
17841 | + | |
17842 | +static void hrtimers_open_softirq(void) | |
17843 | +{ | |
17844 | + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); | |
17845 | +} | |
17846 | + | |
17847 | +#else | |
17848 | +static void hrtimers_open_softirq(void) { } | |
17849 | +#endif | |
17850 | + | |
17851 | void __init hrtimers_init(void) | |
17852 | { | |
17853 | hrtimers_prepare_cpu(smp_processor_id()); | |
17854 | + hrtimers_open_softirq(); | |
17855 | } | |
17856 | ||
17857 | /** | |
17858 | diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c | |
17859 | index 1d5c7204ddc9..184de6751180 100644 | |
17860 | --- a/kernel/time/itimer.c | |
17861 | +++ b/kernel/time/itimer.c | |
17862 | @@ -213,6 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) | |
17863 | /* We are sharing ->siglock with it_real_fn() */ | |
17864 | if (hrtimer_try_to_cancel(timer) < 0) { | |
17865 | spin_unlock_irq(&tsk->sighand->siglock); | |
17866 | + hrtimer_wait_for_timer(&tsk->signal->real_timer); | |
17867 | goto again; | |
17868 | } | |
17869 | expires = timeval_to_ktime(value->it_value); | |
17870 | diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c | |
17871 | index 555e21f7b966..a5d6435fabbb 100644 | |
17872 | --- a/kernel/time/jiffies.c | |
17873 | +++ b/kernel/time/jiffies.c | |
17874 | @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = { | |
17875 | .max_cycles = 10, | |
17876 | }; | |
17877 | ||
17878 | -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); | |
17879 | +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); | |
17880 | +__cacheline_aligned_in_smp seqcount_t jiffies_seq; | |
17881 | ||
17882 | #if (BITS_PER_LONG < 64) | |
17883 | u64 get_jiffies_64(void) | |
17884 | @@ -83,9 +84,9 @@ u64 get_jiffies_64(void) | |
17885 | u64 ret; | |
17886 | ||
17887 | do { | |
17888 | - seq = read_seqbegin(&jiffies_lock); | |
17889 | + seq = read_seqcount_begin(&jiffies_seq); | |
17890 | ret = jiffies_64; | |
17891 | - } while (read_seqretry(&jiffies_lock, seq)); | |
17892 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
17893 | return ret; | |
17894 | } | |
17895 | EXPORT_SYMBOL(get_jiffies_64); | |
17896 | diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c | |
17897 | index 6df8927c58a5..05b7391bf9bd 100644 | |
17898 | --- a/kernel/time/ntp.c | |
17899 | +++ b/kernel/time/ntp.c | |
17900 | @@ -17,6 +17,7 @@ | |
17901 | #include <linux/module.h> | |
17902 | #include <linux/rtc.h> | |
17903 | #include <linux/math64.h> | |
17904 | +#include <linux/swork.h> | |
17905 | ||
17906 | #include "ntp_internal.h" | |
17907 | #include "timekeeping_internal.h" | |
17908 | @@ -568,10 +569,35 @@ static void sync_cmos_clock(struct work_struct *work) | |
17909 | &sync_cmos_work, timespec64_to_jiffies(&next)); | |
17910 | } | |
17911 | ||
17912 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17913 | + | |
17914 | +static void run_clock_set_delay(struct swork_event *event) | |
17915 | +{ | |
17916 | + queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
17917 | +} | |
17918 | + | |
17919 | +static struct swork_event ntp_cmos_swork; | |
17920 | + | |
17921 | +void ntp_notify_cmos_timer(void) | |
17922 | +{ | |
17923 | + swork_queue(&ntp_cmos_swork); | |
17924 | +} | |
17925 | + | |
17926 | +static __init int create_cmos_delay_thread(void) | |
17927 | +{ | |
17928 | + WARN_ON(swork_get()); | |
17929 | + INIT_SWORK(&ntp_cmos_swork, run_clock_set_delay); | |
17930 | + return 0; | |
17931 | +} | |
17932 | +early_initcall(create_cmos_delay_thread); | |
17933 | + | |
17934 | +#else | |
17935 | + | |
17936 | void ntp_notify_cmos_timer(void) | |
17937 | { | |
17938 | queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
17939 | } | |
17940 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
17941 | ||
17942 | #else | |
17943 | void ntp_notify_cmos_timer(void) { } | |
17944 | diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c | |
17945 | index 39008d78927a..633f4eaca9e7 100644 | |
17946 | --- a/kernel/time/posix-cpu-timers.c | |
17947 | +++ b/kernel/time/posix-cpu-timers.c | |
17948 | @@ -3,6 +3,7 @@ | |
17949 | */ | |
17950 | ||
17951 | #include <linux/sched.h> | |
17952 | +#include <linux/sched/rt.h> | |
17953 | #include <linux/posix-timers.h> | |
17954 | #include <linux/errno.h> | |
17955 | #include <linux/math64.h> | |
17956 | @@ -620,7 +621,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |
17957 | /* | |
17958 | * Disarm any old timer after extracting its expiry time. | |
17959 | */ | |
17960 | - WARN_ON_ONCE(!irqs_disabled()); | |
17961 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
17962 | ||
17963 | ret = 0; | |
17964 | old_incr = timer->it.cpu.incr; | |
17965 | @@ -1064,7 +1065,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |
17966 | /* | |
17967 | * Now re-arm for the new expiry time. | |
17968 | */ | |
17969 | - WARN_ON_ONCE(!irqs_disabled()); | |
17970 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
17971 | arm_timer(timer); | |
17972 | unlock_task_sighand(p, &flags); | |
17973 | ||
17974 | @@ -1153,13 +1154,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |
17975 | * already updated our counts. We need to check if any timers fire now. | |
17976 | * Interrupts are disabled. | |
17977 | */ | |
17978 | -void run_posix_cpu_timers(struct task_struct *tsk) | |
17979 | +static void __run_posix_cpu_timers(struct task_struct *tsk) | |
17980 | { | |
17981 | LIST_HEAD(firing); | |
17982 | struct k_itimer *timer, *next; | |
17983 | unsigned long flags; | |
17984 | ||
17985 | - WARN_ON_ONCE(!irqs_disabled()); | |
17986 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
17987 | ||
17988 | /* | |
17989 | * The fast path checks that there are no expired thread or thread | |
17990 | @@ -1213,6 +1214,190 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |
17991 | } | |
17992 | } | |
17993 | ||
17994 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17995 | +#include <linux/kthread.h> | |
17996 | +#include <linux/cpu.h> | |
17997 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); | |
17998 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); | |
17999 | + | |
18000 | +static int posix_cpu_timers_thread(void *data) | |
18001 | +{ | |
18002 | + int cpu = (long)data; | |
18003 | + | |
18004 | + BUG_ON(per_cpu(posix_timer_task,cpu) != current); | |
18005 | + | |
18006 | + while (!kthread_should_stop()) { | |
18007 | + struct task_struct *tsk = NULL; | |
18008 | + struct task_struct *next = NULL; | |
18009 | + | |
18010 | + if (cpu_is_offline(cpu)) | |
18011 | + goto wait_to_die; | |
18012 | + | |
18013 | + /* grab task list */ | |
18014 | + raw_local_irq_disable(); | |
18015 | + tsk = per_cpu(posix_timer_tasklist, cpu); | |
18016 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
18017 | + raw_local_irq_enable(); | |
18018 | + | |
18019 | + /* its possible the list is empty, just return */ | |
18020 | + if (!tsk) { | |
18021 | + set_current_state(TASK_INTERRUPTIBLE); | |
18022 | + schedule(); | |
18023 | + __set_current_state(TASK_RUNNING); | |
18024 | + continue; | |
18025 | + } | |
18026 | + | |
18027 | + /* Process task list */ | |
18028 | + while (1) { | |
18029 | + /* save next */ | |
18030 | + next = tsk->posix_timer_list; | |
18031 | + | |
18032 | + /* run the task timers, clear its ptr and | |
18033 | + * unreference it | |
18034 | + */ | |
18035 | + __run_posix_cpu_timers(tsk); | |
18036 | + tsk->posix_timer_list = NULL; | |
18037 | + put_task_struct(tsk); | |
18038 | + | |
18039 | + /* check if this is the last on the list */ | |
18040 | + if (next == tsk) | |
18041 | + break; | |
18042 | + tsk = next; | |
18043 | + } | |
18044 | + } | |
18045 | + return 0; | |
18046 | + | |
18047 | +wait_to_die: | |
18048 | + /* Wait for kthread_stop */ | |
18049 | + set_current_state(TASK_INTERRUPTIBLE); | |
18050 | + while (!kthread_should_stop()) { | |
18051 | + schedule(); | |
18052 | + set_current_state(TASK_INTERRUPTIBLE); | |
18053 | + } | |
18054 | + __set_current_state(TASK_RUNNING); | |
18055 | + return 0; | |
18056 | +} | |
18057 | + | |
18058 | +static inline int __fastpath_timer_check(struct task_struct *tsk) | |
18059 | +{ | |
18060 | + /* tsk == current, ensure it is safe to use ->signal/sighand */ | |
18061 | + if (unlikely(tsk->exit_state)) | |
18062 | + return 0; | |
18063 | + | |
18064 | + if (!task_cputime_zero(&tsk->cputime_expires)) | |
18065 | + return 1; | |
18066 | + | |
18067 | + if (!task_cputime_zero(&tsk->signal->cputime_expires)) | |
18068 | + return 1; | |
18069 | + | |
18070 | + return 0; | |
18071 | +} | |
18072 | + | |
18073 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
18074 | +{ | |
18075 | + unsigned long cpu = smp_processor_id(); | |
18076 | + struct task_struct *tasklist; | |
18077 | + | |
18078 | + BUG_ON(!irqs_disabled()); | |
18079 | + if(!per_cpu(posix_timer_task, cpu)) | |
18080 | + return; | |
18081 | + /* get per-cpu references */ | |
18082 | + tasklist = per_cpu(posix_timer_tasklist, cpu); | |
18083 | + | |
18084 | + /* check to see if we're already queued */ | |
18085 | + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { | |
18086 | + get_task_struct(tsk); | |
18087 | + if (tasklist) { | |
18088 | + tsk->posix_timer_list = tasklist; | |
18089 | + } else { | |
18090 | + /* | |
18091 | + * The list is terminated by a self-pointing | |
18092 | + * task_struct | |
18093 | + */ | |
18094 | + tsk->posix_timer_list = tsk; | |
18095 | + } | |
18096 | + per_cpu(posix_timer_tasklist, cpu) = tsk; | |
18097 | + | |
18098 | + wake_up_process(per_cpu(posix_timer_task, cpu)); | |
18099 | + } | |
18100 | +} | |
18101 | + | |
18102 | +/* | |
18103 | + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. | |
18104 | + * Here we can start up the necessary migration thread for the new CPU. | |
18105 | + */ | |
18106 | +static int posix_cpu_thread_call(struct notifier_block *nfb, | |
18107 | + unsigned long action, void *hcpu) | |
18108 | +{ | |
18109 | + int cpu = (long)hcpu; | |
18110 | + struct task_struct *p; | |
18111 | + struct sched_param param; | |
18112 | + | |
18113 | + switch (action) { | |
18114 | + case CPU_UP_PREPARE: | |
18115 | + p = kthread_create(posix_cpu_timers_thread, hcpu, | |
18116 | + "posixcputmr/%d",cpu); | |
18117 | + if (IS_ERR(p)) | |
18118 | + return NOTIFY_BAD; | |
18119 | + p->flags |= PF_NOFREEZE; | |
18120 | + kthread_bind(p, cpu); | |
18121 | + /* Must be high prio to avoid getting starved */ | |
18122 | + param.sched_priority = MAX_RT_PRIO-1; | |
18123 | + sched_setscheduler(p, SCHED_FIFO, ¶m); | |
18124 | + per_cpu(posix_timer_task,cpu) = p; | |
18125 | + break; | |
18126 | + case CPU_ONLINE: | |
18127 | + /* Strictly unneccessary, as first user will wake it. */ | |
18128 | + wake_up_process(per_cpu(posix_timer_task,cpu)); | |
18129 | + break; | |
18130 | +#ifdef CONFIG_HOTPLUG_CPU | |
18131 | + case CPU_UP_CANCELED: | |
18132 | + /* Unbind it from offline cpu so it can run. Fall thru. */ | |
18133 | + kthread_bind(per_cpu(posix_timer_task, cpu), | |
18134 | + cpumask_any(cpu_online_mask)); | |
18135 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
18136 | + per_cpu(posix_timer_task,cpu) = NULL; | |
18137 | + break; | |
18138 | + case CPU_DEAD: | |
18139 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
18140 | + per_cpu(posix_timer_task,cpu) = NULL; | |
18141 | + break; | |
18142 | +#endif | |
18143 | + } | |
18144 | + return NOTIFY_OK; | |
18145 | +} | |
18146 | + | |
18147 | +/* Register at highest priority so that task migration (migrate_all_tasks) | |
18148 | + * happens before everything else. | |
18149 | + */ | |
18150 | +static struct notifier_block posix_cpu_thread_notifier = { | |
18151 | + .notifier_call = posix_cpu_thread_call, | |
18152 | + .priority = 10 | |
18153 | +}; | |
18154 | + | |
18155 | +static int __init posix_cpu_thread_init(void) | |
18156 | +{ | |
18157 | + void *hcpu = (void *)(long)smp_processor_id(); | |
18158 | + /* Start one for boot CPU. */ | |
18159 | + unsigned long cpu; | |
18160 | + | |
18161 | + /* init the per-cpu posix_timer_tasklets */ | |
18162 | + for_each_possible_cpu(cpu) | |
18163 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
18164 | + | |
18165 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu); | |
18166 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu); | |
18167 | + register_cpu_notifier(&posix_cpu_thread_notifier); | |
18168 | + return 0; | |
18169 | +} | |
18170 | +early_initcall(posix_cpu_thread_init); | |
18171 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
18172 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
18173 | +{ | |
18174 | + __run_posix_cpu_timers(tsk); | |
18175 | +} | |
18176 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
18177 | + | |
18178 | /* | |
18179 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | |
18180 | * The tsk->sighand->siglock must be held by the caller. | |
18181 | diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c | |
18182 | index f2826c35e918..464a98155a0e 100644 | |
18183 | --- a/kernel/time/posix-timers.c | |
18184 | +++ b/kernel/time/posix-timers.c | |
18185 | @@ -506,6 +506,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) | |
18186 | static struct pid *good_sigevent(sigevent_t * event) | |
18187 | { | |
18188 | struct task_struct *rtn = current->group_leader; | |
18189 | + int sig = event->sigev_signo; | |
18190 | ||
18191 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && | |
18192 | (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || | |
18193 | @@ -514,7 +515,8 @@ static struct pid *good_sigevent(sigevent_t * event) | |
18194 | return NULL; | |
18195 | ||
18196 | if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && | |
18197 | - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) | |
18198 | + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || | |
18199 | + sig_kernel_coredump(sig))) | |
18200 | return NULL; | |
18201 | ||
18202 | return task_pid(rtn); | |
18203 | @@ -826,6 +828,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) | |
18204 | return overrun; | |
18205 | } | |
18206 | ||
18207 | +/* | |
18208 | + * Protected by RCU! | |
18209 | + */ | |
18210 | +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr) | |
18211 | +{ | |
18212 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18213 | + if (kc->timer_set == common_timer_set) | |
18214 | + hrtimer_wait_for_timer(&timr->it.real.timer); | |
18215 | + else | |
18216 | + /* FIXME: Whacky hack for posix-cpu-timers */ | |
18217 | + schedule_timeout(1); | |
18218 | +#endif | |
18219 | +} | |
18220 | + | |
18221 | /* Set a POSIX.1b interval timer. */ | |
18222 | /* timr->it_lock is taken. */ | |
18223 | static int | |
18224 | @@ -903,6 +919,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
18225 | if (!timr) | |
18226 | return -EINVAL; | |
18227 | ||
18228 | + rcu_read_lock(); | |
18229 | kc = clockid_to_kclock(timr->it_clock); | |
18230 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) | |
18231 | error = -EINVAL; | |
18232 | @@ -911,9 +928,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
18233 | ||
18234 | unlock_timer(timr, flag); | |
18235 | if (error == TIMER_RETRY) { | |
18236 | + timer_wait_for_callback(kc, timr); | |
18237 | rtn = NULL; // We already got the old time... | |
18238 | + rcu_read_unlock(); | |
18239 | goto retry; | |
18240 | } | |
18241 | + rcu_read_unlock(); | |
18242 | ||
18243 | if (old_setting && !error && | |
18244 | copy_to_user(old_setting, &old_spec, sizeof (old_spec))) | |
18245 | @@ -951,10 +971,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) | |
18246 | if (!timer) | |
18247 | return -EINVAL; | |
18248 | ||
18249 | + rcu_read_lock(); | |
18250 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
18251 | unlock_timer(timer, flags); | |
18252 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
18253 | + timer); | |
18254 | + rcu_read_unlock(); | |
18255 | goto retry_delete; | |
18256 | } | |
18257 | + rcu_read_unlock(); | |
18258 | ||
18259 | spin_lock(¤t->sighand->siglock); | |
18260 | list_del(&timer->list); | |
18261 | @@ -980,8 +1005,18 @@ static void itimer_delete(struct k_itimer *timer) | |
18262 | retry_delete: | |
18263 | spin_lock_irqsave(&timer->it_lock, flags); | |
18264 | ||
18265 | - if (timer_delete_hook(timer) == TIMER_RETRY) { | |
18266 | + /* On RT we can race with a deletion */ | |
18267 | + if (!timer->it_signal) { | |
18268 | unlock_timer(timer, flags); | |
18269 | + return; | |
18270 | + } | |
18271 | + | |
18272 | + if (timer_delete_hook(timer) == TIMER_RETRY) { | |
18273 | + rcu_read_lock(); | |
18274 | + unlock_timer(timer, flags); | |
18275 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
18276 | + timer); | |
18277 | + rcu_read_unlock(); | |
18278 | goto retry_delete; | |
18279 | } | |
18280 | list_del(&timer->list); | |
18281 | diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c | |
18282 | index 690b797f522e..fe8ba1619879 100644 | |
18283 | --- a/kernel/time/tick-broadcast-hrtimer.c | |
18284 | +++ b/kernel/time/tick-broadcast-hrtimer.c | |
18285 | @@ -107,5 +107,6 @@ void tick_setup_hrtimer_broadcast(void) | |
18286 | { | |
18287 | hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
18288 | bctimer.function = bc_handler; | |
18289 | + bctimer.irqsafe = true; | |
18290 | clockevents_register_device(&ce_broadcast_hrtimer); | |
18291 | } | |
18292 | diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c | |
18293 | index 4fcd99e12aa0..5a47f2e98faf 100644 | |
18294 | --- a/kernel/time/tick-common.c | |
18295 | +++ b/kernel/time/tick-common.c | |
18296 | @@ -79,13 +79,15 @@ int tick_is_oneshot_available(void) | |
18297 | static void tick_periodic(int cpu) | |
18298 | { | |
18299 | if (tick_do_timer_cpu == cpu) { | |
18300 | - write_seqlock(&jiffies_lock); | |
18301 | + raw_spin_lock(&jiffies_lock); | |
18302 | + write_seqcount_begin(&jiffies_seq); | |
18303 | ||
18304 | /* Keep track of the next tick event */ | |
18305 | tick_next_period = ktime_add(tick_next_period, tick_period); | |
18306 | ||
18307 | do_timer(1); | |
18308 | - write_sequnlock(&jiffies_lock); | |
18309 | + write_seqcount_end(&jiffies_seq); | |
18310 | + raw_spin_unlock(&jiffies_lock); | |
18311 | update_wall_time(); | |
18312 | } | |
18313 | ||
18314 | @@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | |
18315 | ktime_t next; | |
18316 | ||
18317 | do { | |
18318 | - seq = read_seqbegin(&jiffies_lock); | |
18319 | + seq = read_seqcount_begin(&jiffies_seq); | |
18320 | next = tick_next_period; | |
18321 | - } while (read_seqretry(&jiffies_lock, seq)); | |
18322 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
18323 | ||
18324 | clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); | |
18325 | ||
18326 | diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c | |
c7c16703 | 18327 | index 3bcb61b52f6c..66d85482a96e 100644 |
1a6e0f06 JK |
18328 | --- a/kernel/time/tick-sched.c |
18329 | +++ b/kernel/time/tick-sched.c | |
18330 | @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now) | |
18331 | return; | |
18332 | ||
18333 | /* Reevaluate with jiffies_lock held */ | |
18334 | - write_seqlock(&jiffies_lock); | |
18335 | + raw_spin_lock(&jiffies_lock); | |
18336 | + write_seqcount_begin(&jiffies_seq); | |
18337 | ||
18338 | delta = ktime_sub(now, last_jiffies_update); | |
18339 | if (delta.tv64 >= tick_period.tv64) { | |
18340 | @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now) | |
18341 | /* Keep the tick_next_period variable up to date */ | |
18342 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | |
18343 | } else { | |
18344 | - write_sequnlock(&jiffies_lock); | |
18345 | + write_seqcount_end(&jiffies_seq); | |
18346 | + raw_spin_unlock(&jiffies_lock); | |
18347 | return; | |
18348 | } | |
18349 | - write_sequnlock(&jiffies_lock); | |
18350 | + write_seqcount_end(&jiffies_seq); | |
18351 | + raw_spin_unlock(&jiffies_lock); | |
18352 | update_wall_time(); | |
18353 | } | |
18354 | ||
18355 | @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void) | |
18356 | { | |
18357 | ktime_t period; | |
18358 | ||
18359 | - write_seqlock(&jiffies_lock); | |
18360 | + raw_spin_lock(&jiffies_lock); | |
18361 | + write_seqcount_begin(&jiffies_seq); | |
18362 | /* Did we start the jiffies update yet ? */ | |
18363 | if (last_jiffies_update.tv64 == 0) | |
18364 | last_jiffies_update = tick_next_period; | |
18365 | period = last_jiffies_update; | |
18366 | - write_sequnlock(&jiffies_lock); | |
18367 | + write_seqcount_end(&jiffies_seq); | |
18368 | + raw_spin_unlock(&jiffies_lock); | |
18369 | return period; | |
18370 | } | |
18371 | ||
c7c16703 | 18372 | @@ -215,6 +220,7 @@ static void nohz_full_kick_func(struct irq_work *work) |
1a6e0f06 JK |
18373 | |
18374 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |
18375 | .func = nohz_full_kick_func, | |
18376 | + .flags = IRQ_WORK_HARD_IRQ, | |
18377 | }; | |
18378 | ||
18379 | /* | |
c7c16703 | 18380 | @@ -673,10 +679,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, |
1a6e0f06 JK |
18381 | |
18382 | /* Read jiffies and the time when jiffies were updated last */ | |
18383 | do { | |
18384 | - seq = read_seqbegin(&jiffies_lock); | |
18385 | + seq = read_seqcount_begin(&jiffies_seq); | |
18386 | basemono = last_jiffies_update.tv64; | |
18387 | basejiff = jiffies; | |
18388 | - } while (read_seqretry(&jiffies_lock, seq)); | |
18389 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
18390 | ts->last_jiffies = basejiff; | |
18391 | ||
18392 | if (rcu_needs_cpu(basemono, &next_rcu) || | |
c7c16703 | 18393 | @@ -877,14 +883,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) |
1a6e0f06 JK |
18394 | return false; |
18395 | ||
18396 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | |
18397 | - static int ratelimit; | |
18398 | - | |
18399 | - if (ratelimit < 10 && | |
18400 | - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
18401 | - pr_warn("NOHZ: local_softirq_pending %02x\n", | |
18402 | - (unsigned int) local_softirq_pending()); | |
18403 | - ratelimit++; | |
18404 | - } | |
18405 | + softirq_check_pending_idle(); | |
18406 | return false; | |
18407 | } | |
18408 | ||
c7c16703 | 18409 | @@ -1193,6 +1192,7 @@ void tick_setup_sched_timer(void) |
1a6e0f06 JK |
18410 | * Emulate tick processing via per-CPU hrtimers: |
18411 | */ | |
18412 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
18413 | + ts->sched_timer.irqsafe = 1; | |
18414 | ts->sched_timer.function = tick_sched_timer; | |
18415 | ||
18416 | /* Get the next period (per-CPU) */ | |
18417 | diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c | |
c7c16703 | 18418 | index 46e312e9be38..fa75cf5d9253 100644 |
1a6e0f06 JK |
18419 | --- a/kernel/time/timekeeping.c |
18420 | +++ b/kernel/time/timekeeping.c | |
18421 | @@ -2328,8 +2328,10 @@ EXPORT_SYMBOL(hardpps); | |
18422 | */ | |
18423 | void xtime_update(unsigned long ticks) | |
18424 | { | |
18425 | - write_seqlock(&jiffies_lock); | |
18426 | + raw_spin_lock(&jiffies_lock); | |
18427 | + write_seqcount_begin(&jiffies_seq); | |
18428 | do_timer(ticks); | |
18429 | - write_sequnlock(&jiffies_lock); | |
18430 | + write_seqcount_end(&jiffies_seq); | |
18431 | + raw_spin_unlock(&jiffies_lock); | |
18432 | update_wall_time(); | |
18433 | } | |
18434 | diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h | |
18435 | index 704f595ce83f..763a3e5121ff 100644 | |
18436 | --- a/kernel/time/timekeeping.h | |
18437 | +++ b/kernel/time/timekeeping.h | |
18438 | @@ -19,7 +19,8 @@ extern void timekeeping_resume(void); | |
18439 | extern void do_timer(unsigned long ticks); | |
18440 | extern void update_wall_time(void); | |
18441 | ||
18442 | -extern seqlock_t jiffies_lock; | |
18443 | +extern raw_spinlock_t jiffies_lock; | |
18444 | +extern seqcount_t jiffies_seq; | |
18445 | ||
18446 | #define CS_NAME_LEN 32 | |
18447 | ||
18448 | diff --git a/kernel/time/timer.c b/kernel/time/timer.c | |
c7c16703 | 18449 | index c611c47de884..08a5ab762495 100644 |
1a6e0f06 JK |
18450 | --- a/kernel/time/timer.c |
18451 | +++ b/kernel/time/timer.c | |
18452 | @@ -193,8 +193,11 @@ EXPORT_SYMBOL(jiffies_64); | |
18453 | #endif | |
18454 | ||
18455 | struct timer_base { | |
18456 | - spinlock_t lock; | |
18457 | + raw_spinlock_t lock; | |
18458 | struct timer_list *running_timer; | |
18459 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18460 | + struct swait_queue_head wait_for_running_timer; | |
18461 | +#endif | |
18462 | unsigned long clk; | |
18463 | unsigned long next_expiry; | |
18464 | unsigned int cpu; | |
c7c16703 | 18465 | @@ -948,10 +951,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, |
1a6e0f06 JK |
18466 | |
18467 | if (!(tf & TIMER_MIGRATING)) { | |
18468 | base = get_timer_base(tf); | |
18469 | - spin_lock_irqsave(&base->lock, *flags); | |
18470 | + raw_spin_lock_irqsave(&base->lock, *flags); | |
18471 | if (timer->flags == tf) | |
18472 | return base; | |
18473 | - spin_unlock_irqrestore(&base->lock, *flags); | |
18474 | + raw_spin_unlock_irqrestore(&base->lock, *flags); | |
18475 | } | |
18476 | cpu_relax(); | |
18477 | } | |
c7c16703 | 18478 | @@ -1023,9 +1026,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) |
1a6e0f06 JK |
18479 | /* See the comment in lock_timer_base() */ |
18480 | timer->flags |= TIMER_MIGRATING; | |
18481 | ||
18482 | - spin_unlock(&base->lock); | |
18483 | + raw_spin_unlock(&base->lock); | |
18484 | base = new_base; | |
18485 | - spin_lock(&base->lock); | |
18486 | + raw_spin_lock(&base->lock); | |
18487 | WRITE_ONCE(timer->flags, | |
18488 | (timer->flags & ~TIMER_BASEMASK) | base->cpu); | |
18489 | } | |
c7c16703 | 18490 | @@ -1050,7 +1053,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) |
1a6e0f06 JK |
18491 | } |
18492 | ||
18493 | out_unlock: | |
18494 | - spin_unlock_irqrestore(&base->lock, flags); | |
18495 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18496 | ||
18497 | return ret; | |
18498 | } | |
c7c16703 | 18499 | @@ -1144,19 +1147,46 @@ void add_timer_on(struct timer_list *timer, int cpu) |
1a6e0f06 JK |
18500 | if (base != new_base) { |
18501 | timer->flags |= TIMER_MIGRATING; | |
18502 | ||
18503 | - spin_unlock(&base->lock); | |
18504 | + raw_spin_unlock(&base->lock); | |
18505 | base = new_base; | |
18506 | - spin_lock(&base->lock); | |
18507 | + raw_spin_lock(&base->lock); | |
18508 | WRITE_ONCE(timer->flags, | |
18509 | (timer->flags & ~TIMER_BASEMASK) | cpu); | |
18510 | } | |
18511 | ||
18512 | debug_activate(timer, timer->expires); | |
18513 | internal_add_timer(base, timer); | |
18514 | - spin_unlock_irqrestore(&base->lock, flags); | |
18515 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18516 | } | |
18517 | EXPORT_SYMBOL_GPL(add_timer_on); | |
18518 | ||
18519 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18520 | +/* | |
18521 | + * Wait for a running timer | |
18522 | + */ | |
18523 | +static void wait_for_running_timer(struct timer_list *timer) | |
18524 | +{ | |
18525 | + struct timer_base *base; | |
18526 | + u32 tf = timer->flags; | |
18527 | + | |
18528 | + if (tf & TIMER_MIGRATING) | |
18529 | + return; | |
18530 | + | |
18531 | + base = get_timer_base(tf); | |
18532 | + swait_event(base->wait_for_running_timer, | |
18533 | + base->running_timer != timer); | |
18534 | +} | |
18535 | + | |
18536 | +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) | |
18537 | +#else | |
18538 | +static inline void wait_for_running_timer(struct timer_list *timer) | |
18539 | +{ | |
18540 | + cpu_relax(); | |
18541 | +} | |
18542 | + | |
18543 | +# define wakeup_timer_waiters(b) do { } while (0) | |
18544 | +#endif | |
18545 | + | |
18546 | /** | |
18547 | * del_timer - deactive a timer. | |
18548 | * @timer: the timer to be deactivated | |
c7c16703 | 18549 | @@ -1180,7 +1210,7 @@ int del_timer(struct timer_list *timer) |
1a6e0f06 JK |
18550 | if (timer_pending(timer)) { |
18551 | base = lock_timer_base(timer, &flags); | |
18552 | ret = detach_if_pending(timer, base, true); | |
18553 | - spin_unlock_irqrestore(&base->lock, flags); | |
18554 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18555 | } | |
18556 | ||
18557 | return ret; | |
c7c16703 | 18558 | @@ -1208,13 +1238,13 @@ int try_to_del_timer_sync(struct timer_list *timer) |
1a6e0f06 JK |
18559 | timer_stats_timer_clear_start_info(timer); |
18560 | ret = detach_if_pending(timer, base, true); | |
18561 | } | |
18562 | - spin_unlock_irqrestore(&base->lock, flags); | |
18563 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18564 | ||
18565 | return ret; | |
18566 | } | |
18567 | EXPORT_SYMBOL(try_to_del_timer_sync); | |
18568 | ||
18569 | -#ifdef CONFIG_SMP | |
18570 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
18571 | /** | |
18572 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | |
18573 | * @timer: the timer to be deactivated | |
c7c16703 | 18574 | @@ -1274,7 +1304,7 @@ int del_timer_sync(struct timer_list *timer) |
1a6e0f06 JK |
18575 | int ret = try_to_del_timer_sync(timer); |
18576 | if (ret >= 0) | |
18577 | return ret; | |
18578 | - cpu_relax(); | |
18579 | + wait_for_running_timer(timer); | |
18580 | } | |
18581 | } | |
18582 | EXPORT_SYMBOL(del_timer_sync); | |
c7c16703 | 18583 | @@ -1339,14 +1369,17 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) |
1a6e0f06 JK |
18584 | fn = timer->function; |
18585 | data = timer->data; | |
18586 | ||
18587 | - if (timer->flags & TIMER_IRQSAFE) { | |
18588 | - spin_unlock(&base->lock); | |
18589 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && | |
18590 | + timer->flags & TIMER_IRQSAFE) { | |
18591 | + raw_spin_unlock(&base->lock); | |
18592 | call_timer_fn(timer, fn, data); | |
18593 | - spin_lock(&base->lock); | |
18594 | + base->running_timer = NULL; | |
18595 | + raw_spin_lock(&base->lock); | |
18596 | } else { | |
18597 | - spin_unlock_irq(&base->lock); | |
18598 | + raw_spin_unlock_irq(&base->lock); | |
18599 | call_timer_fn(timer, fn, data); | |
18600 | - spin_lock_irq(&base->lock); | |
18601 | + base->running_timer = NULL; | |
18602 | + raw_spin_lock_irq(&base->lock); | |
18603 | } | |
18604 | } | |
18605 | } | |
c7c16703 | 18606 | @@ -1515,7 +1548,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) |
1a6e0f06 JK |
18607 | if (cpu_is_offline(smp_processor_id())) |
18608 | return expires; | |
18609 | ||
18610 | - spin_lock(&base->lock); | |
18611 | + raw_spin_lock(&base->lock); | |
18612 | nextevt = __next_timer_interrupt(base); | |
18613 | is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); | |
18614 | base->next_expiry = nextevt; | |
c7c16703 | 18615 | @@ -1543,7 +1576,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) |
1a6e0f06 JK |
18616 | if ((expires - basem) > TICK_NSEC) |
18617 | base->is_idle = true; | |
18618 | } | |
18619 | - spin_unlock(&base->lock); | |
18620 | + raw_spin_unlock(&base->lock); | |
18621 | ||
18622 | return cmp_next_hrtimer_event(basem, expires); | |
18623 | } | |
c7c16703 | 18624 | @@ -1608,13 +1641,13 @@ void update_process_times(int user_tick) |
1a6e0f06 JK |
18625 | |
18626 | /* Note: this timer irq context must be accounted for as well. */ | |
18627 | account_process_tick(p, user_tick); | |
18628 | + scheduler_tick(); | |
18629 | run_local_timers(); | |
18630 | rcu_check_callbacks(user_tick); | |
18631 | -#ifdef CONFIG_IRQ_WORK | |
18632 | +#if defined(CONFIG_IRQ_WORK) | |
18633 | if (in_irq()) | |
18634 | irq_work_tick(); | |
18635 | #endif | |
18636 | - scheduler_tick(); | |
18637 | run_posix_cpu_timers(p); | |
18638 | } | |
18639 | ||
c7c16703 | 18640 | @@ -1630,7 +1663,7 @@ static inline void __run_timers(struct timer_base *base) |
1a6e0f06 JK |
18641 | if (!time_after_eq(jiffies, base->clk)) |
18642 | return; | |
18643 | ||
18644 | - spin_lock_irq(&base->lock); | |
18645 | + raw_spin_lock_irq(&base->lock); | |
18646 | ||
18647 | while (time_after_eq(jiffies, base->clk)) { | |
18648 | ||
c7c16703 | 18649 | @@ -1640,8 +1673,8 @@ static inline void __run_timers(struct timer_base *base) |
1a6e0f06 JK |
18650 | while (levels--) |
18651 | expire_timers(base, heads + levels); | |
18652 | } | |
18653 | - base->running_timer = NULL; | |
18654 | - spin_unlock_irq(&base->lock); | |
18655 | + raw_spin_unlock_irq(&base->lock); | |
18656 | + wakeup_timer_waiters(base); | |
18657 | } | |
18658 | ||
18659 | /* | |
c7c16703 | 18660 | @@ -1651,6 +1684,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) |
1a6e0f06 JK |
18661 | { |
18662 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | |
18663 | ||
18664 | + irq_work_tick_soft(); | |
18665 | + | |
18666 | __run_timers(base); | |
18667 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) | |
18668 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); | |
c7c16703 | 18669 | @@ -1836,16 +1871,16 @@ int timers_dead_cpu(unsigned int cpu) |
1a6e0f06 JK |
18670 | * The caller is globally serialized and nobody else |
18671 | * takes two locks at once, deadlock is not possible. | |
18672 | */ | |
18673 | - spin_lock_irq(&new_base->lock); | |
18674 | - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
18675 | + raw_spin_lock_irq(&new_base->lock); | |
18676 | + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
18677 | ||
18678 | BUG_ON(old_base->running_timer); | |
18679 | ||
18680 | for (i = 0; i < WHEEL_SIZE; i++) | |
18681 | migrate_timer_list(new_base, old_base->vectors + i); | |
18682 | ||
18683 | - spin_unlock(&old_base->lock); | |
18684 | - spin_unlock_irq(&new_base->lock); | |
18685 | + raw_spin_unlock(&old_base->lock); | |
18686 | + raw_spin_unlock_irq(&new_base->lock); | |
18687 | put_cpu_ptr(&timer_bases); | |
18688 | } | |
18689 | return 0; | |
c7c16703 | 18690 | @@ -1861,8 +1896,11 @@ static void __init init_timer_cpu(int cpu) |
1a6e0f06 JK |
18691 | for (i = 0; i < NR_BASES; i++) { |
18692 | base = per_cpu_ptr(&timer_bases[i], cpu); | |
18693 | base->cpu = cpu; | |
18694 | - spin_lock_init(&base->lock); | |
18695 | + raw_spin_lock_init(&base->lock); | |
18696 | base->clk = jiffies; | |
18697 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18698 | + init_swait_queue_head(&base->wait_for_running_timer); | |
18699 | +#endif | |
18700 | } | |
18701 | } | |
18702 | ||
18703 | diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig | |
c7c16703 | 18704 | index 2a96b063d659..812e37237eb8 100644 |
1a6e0f06 JK |
18705 | --- a/kernel/trace/Kconfig |
18706 | +++ b/kernel/trace/Kconfig | |
c7c16703 | 18707 | @@ -182,6 +182,24 @@ config IRQSOFF_TRACER |
1a6e0f06 JK |
18708 | enabled. This option and the preempt-off timing option can be |
18709 | used together or separately.) | |
18710 | ||
18711 | +config INTERRUPT_OFF_HIST | |
18712 | + bool "Interrupts-off Latency Histogram" | |
18713 | + depends on IRQSOFF_TRACER | |
18714 | + help | |
18715 | + This option generates continuously updated histograms (one per cpu) | |
18716 | + of the duration of time periods with interrupts disabled. The | |
18717 | + histograms are disabled by default. To enable them, write a non-zero | |
18718 | + number to | |
18719 | + | |
18720 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
18721 | + | |
18722 | + If PREEMPT_OFF_HIST is also selected, additional histograms (one | |
18723 | + per cpu) are generated that accumulate the duration of time periods | |
18724 | + when both interrupts and preemption are disabled. The histogram data | |
18725 | + will be located in the debug file system at | |
18726 | + | |
18727 | + /sys/kernel/debug/tracing/latency_hist/irqsoff | |
18728 | + | |
18729 | config PREEMPT_TRACER | |
18730 | bool "Preemption-off Latency Tracer" | |
18731 | default n | |
c7c16703 | 18732 | @@ -206,6 +224,24 @@ config PREEMPT_TRACER |
1a6e0f06 JK |
18733 | enabled. This option and the irqs-off timing option can be |
18734 | used together or separately.) | |
18735 | ||
18736 | +config PREEMPT_OFF_HIST | |
18737 | + bool "Preemption-off Latency Histogram" | |
18738 | + depends on PREEMPT_TRACER | |
18739 | + help | |
18740 | + This option generates continuously updated histograms (one per cpu) | |
18741 | + of the duration of time periods with preemption disabled. The | |
18742 | + histograms are disabled by default. To enable them, write a non-zero | |
18743 | + number to | |
18744 | + | |
18745 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
18746 | + | |
18747 | + If INTERRUPT_OFF_HIST is also selected, additional histograms (one | |
18748 | + per cpu) are generated that accumulate the duration of time periods | |
18749 | + when both interrupts and preemption are disabled. The histogram data | |
18750 | + will be located in the debug file system at | |
18751 | + | |
18752 | + /sys/kernel/debug/tracing/latency_hist/preemptoff | |
18753 | + | |
18754 | config SCHED_TRACER | |
18755 | bool "Scheduling Latency Tracer" | |
18756 | select GENERIC_TRACER | |
c7c16703 JK |
18757 | @@ -251,6 +287,74 @@ config HWLAT_TRACER |
18758 | file. Every time a latency is greater than tracing_thresh, it will | |
18759 | be recorded into the ring buffer. | |
1a6e0f06 JK |
18760 | |
18761 | +config WAKEUP_LATENCY_HIST | |
18762 | + bool "Scheduling Latency Histogram" | |
18763 | + depends on SCHED_TRACER | |
18764 | + help | |
18765 | + This option generates continuously updated histograms (one per cpu) | |
18766 | + of the scheduling latency of the highest priority task. | |
18767 | + The histograms are disabled by default. To enable them, write a | |
18768 | + non-zero number to | |
18769 | + | |
18770 | + /sys/kernel/debug/tracing/latency_hist/enable/wakeup | |
18771 | + | |
18772 | + Two different algorithms are used, one to determine the latency of | |
18773 | + processes that exclusively use the highest priority of the system and | |
18774 | + another one to determine the latency of processes that share the | |
18775 | + highest system priority with other processes. The former is used to | |
18776 | + improve hardware and system software, the latter to optimize the | |
18777 | + priority design of a given system. The histogram data will be | |
18778 | + located in the debug file system at | |
18779 | + | |
18780 | + /sys/kernel/debug/tracing/latency_hist/wakeup | |
18781 | + | |
18782 | + and | |
18783 | + | |
18784 | + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio | |
18785 | + | |
18786 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
18787 | + Histogram are selected, additional histogram data will be collected | |
18788 | + that contain, in addition to the wakeup latency, the timer latency, in | |
18789 | + case the wakeup was triggered by an expired timer. These histograms | |
18790 | + are available in the | |
18791 | + | |
18792 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
18793 | + | |
18794 | + directory. They reflect the apparent interrupt and scheduling latency | |
18795 | + and are best suitable to determine the worst-case latency of a given | |
18796 | + system. To enable these histograms, write a non-zero number to | |
18797 | + | |
18798 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
18799 | + | |
18800 | +config MISSED_TIMER_OFFSETS_HIST | |
18801 | + depends on HIGH_RES_TIMERS | |
18802 | + select GENERIC_TRACER | |
18803 | + bool "Missed Timer Offsets Histogram" | |
18804 | + help | |
18805 | + Generate a histogram of missed timer offsets in microseconds. The | |
18806 | + histograms are disabled by default. To enable them, write a non-zero | |
18807 | + number to | |
18808 | + | |
18809 | + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets | |
18810 | + | |
18811 | + The histogram data will be located in the debug file system at | |
18812 | + | |
18813 | + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets | |
18814 | + | |
18815 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
18816 | + Histogram are selected, additional histogram data will be collected | |
18817 | + that contain, in addition to the wakeup latency, the timer latency, in | |
18818 | + case the wakeup was triggered by an expired timer. These histograms | |
18819 | + are available in the | |
18820 | + | |
18821 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
18822 | + | |
18823 | + directory. They reflect the apparent interrupt and scheduling latency | |
18824 | + and are best suitable to determine the worst-case latency of a given | |
18825 | + system. To enable these histograms, write a non-zero number to | |
18826 | + | |
18827 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
18828 | + | |
18829 | config ENABLE_DEFAULT_TRACERS | |
18830 | bool "Trace process context switches and events" | |
18831 | depends on !GENERIC_TRACER | |
18832 | diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile | |
c7c16703 | 18833 | index e57980845549..83af000b783c 100644 |
1a6e0f06 JK |
18834 | --- a/kernel/trace/Makefile |
18835 | +++ b/kernel/trace/Makefile | |
c7c16703 | 18836 | @@ -38,6 +38,10 @@ obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o |
1a6e0f06 JK |
18837 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o |
18838 | obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o | |
c7c16703 | 18839 | obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o |
1a6e0f06 JK |
18840 | +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o |
18841 | +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o | |
18842 | +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o | |
18843 | +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o | |
18844 | obj-$(CONFIG_NOP_TRACER) += trace_nop.o | |
18845 | obj-$(CONFIG_STACK_TRACER) += trace_stack.o | |
18846 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | |
18847 | diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c | |
18848 | new file mode 100644 | |
18849 | index 000000000000..7f6ee70dea41 | |
18850 | --- /dev/null | |
18851 | +++ b/kernel/trace/latency_hist.c | |
18852 | @@ -0,0 +1,1178 @@ | |
18853 | +/* | |
18854 | + * kernel/trace/latency_hist.c | |
18855 | + * | |
18856 | + * Add support for histograms of preemption-off latency and | |
18857 | + * interrupt-off latency and wakeup latency, it depends on | |
18858 | + * Real-Time Preemption Support. | |
18859 | + * | |
18860 | + * Copyright (C) 2005 MontaVista Software, Inc. | |
18861 | + * Yi Yang <yyang@ch.mvista.com> | |
18862 | + * | |
18863 | + * Converted to work with the new latency tracer. | |
18864 | + * Copyright (C) 2008 Red Hat, Inc. | |
18865 | + * Steven Rostedt <srostedt@redhat.com> | |
18866 | + * | |
18867 | + */ | |
18868 | +#include <linux/module.h> | |
18869 | +#include <linux/debugfs.h> | |
18870 | +#include <linux/seq_file.h> | |
18871 | +#include <linux/percpu.h> | |
18872 | +#include <linux/kallsyms.h> | |
18873 | +#include <linux/uaccess.h> | |
18874 | +#include <linux/sched.h> | |
18875 | +#include <linux/sched/rt.h> | |
18876 | +#include <linux/slab.h> | |
18877 | +#include <linux/atomic.h> | |
18878 | +#include <asm/div64.h> | |
18879 | + | |
18880 | +#include "trace.h" | |
18881 | +#include <trace/events/sched.h> | |
18882 | + | |
18883 | +#define NSECS_PER_USECS 1000L | |
18884 | + | |
18885 | +#define CREATE_TRACE_POINTS | |
18886 | +#include <trace/events/hist.h> | |
18887 | + | |
18888 | +enum { | |
18889 | + IRQSOFF_LATENCY = 0, | |
18890 | + PREEMPTOFF_LATENCY, | |
18891 | + PREEMPTIRQSOFF_LATENCY, | |
18892 | + WAKEUP_LATENCY, | |
18893 | + WAKEUP_LATENCY_SHAREDPRIO, | |
18894 | + MISSED_TIMER_OFFSETS, | |
18895 | + TIMERANDWAKEUP_LATENCY, | |
18896 | + MAX_LATENCY_TYPE, | |
18897 | +}; | |
18898 | + | |
18899 | +#define MAX_ENTRY_NUM 10240 | |
18900 | + | |
18901 | +struct hist_data { | |
18902 | + atomic_t hist_mode; /* 0 log, 1 don't log */ | |
18903 | + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */ | |
18904 | + long min_lat; | |
18905 | + long max_lat; | |
18906 | + unsigned long long below_hist_bound_samples; | |
18907 | + unsigned long long above_hist_bound_samples; | |
18908 | + long long accumulate_lat; | |
18909 | + unsigned long long total_samples; | |
18910 | + unsigned long long hist_array[MAX_ENTRY_NUM]; | |
18911 | +}; | |
18912 | + | |
18913 | +struct enable_data { | |
18914 | + int latency_type; | |
18915 | + int enabled; | |
18916 | +}; | |
18917 | + | |
18918 | +static char *latency_hist_dir_root = "latency_hist"; | |
18919 | + | |
18920 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
18921 | +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist); | |
18922 | +static char *irqsoff_hist_dir = "irqsoff"; | |
18923 | +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start); | |
18924 | +static DEFINE_PER_CPU(int, hist_irqsoff_counting); | |
18925 | +#endif | |
18926 | + | |
18927 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
18928 | +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist); | |
18929 | +static char *preemptoff_hist_dir = "preemptoff"; | |
18930 | +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start); | |
18931 | +static DEFINE_PER_CPU(int, hist_preemptoff_counting); | |
18932 | +#endif | |
18933 | + | |
18934 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
18935 | +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist); | |
18936 | +static char *preemptirqsoff_hist_dir = "preemptirqsoff"; | |
18937 | +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start); | |
18938 | +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting); | |
18939 | +#endif | |
18940 | + | |
18941 | +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST) | |
18942 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start); | |
18943 | +static struct enable_data preemptirqsoff_enabled_data = { | |
18944 | + .latency_type = PREEMPTIRQSOFF_LATENCY, | |
18945 | + .enabled = 0, | |
18946 | +}; | |
18947 | +#endif | |
18948 | + | |
18949 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
18950 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
18951 | +struct maxlatproc_data { | |
18952 | + char comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
18953 | + char current_comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
18954 | + int pid; | |
18955 | + int current_pid; | |
18956 | + int prio; | |
18957 | + int current_prio; | |
18958 | + long latency; | |
18959 | + long timeroffset; | |
18960 | + cycle_t timestamp; | |
18961 | +}; | |
18962 | +#endif | |
18963 | + | |
18964 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
18965 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist); | |
18966 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio); | |
18967 | +static char *wakeup_latency_hist_dir = "wakeup"; | |
18968 | +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio"; | |
18969 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
18970 | + struct task_struct *p); | |
18971 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
18972 | + bool preempt, struct task_struct *prev, struct task_struct *next); | |
18973 | +static notrace void probe_sched_migrate_task(void *, | |
18974 | + struct task_struct *task, int cpu); | |
18975 | +static struct enable_data wakeup_latency_enabled_data = { | |
18976 | + .latency_type = WAKEUP_LATENCY, | |
18977 | + .enabled = 0, | |
18978 | +}; | |
18979 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc); | |
18980 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio); | |
18981 | +static DEFINE_PER_CPU(struct task_struct *, wakeup_task); | |
18982 | +static DEFINE_PER_CPU(int, wakeup_sharedprio); | |
18983 | +static unsigned long wakeup_pid; | |
18984 | +#endif | |
18985 | + | |
18986 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
18987 | +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets); | |
18988 | +static char *missed_timer_offsets_dir = "missed_timer_offsets"; | |
18989 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
18990 | + long long offset, struct task_struct *curr, struct task_struct *task); | |
18991 | +static struct enable_data missed_timer_offsets_enabled_data = { | |
18992 | + .latency_type = MISSED_TIMER_OFFSETS, | |
18993 | + .enabled = 0, | |
18994 | +}; | |
18995 | +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc); | |
18996 | +static unsigned long missed_timer_offsets_pid; | |
18997 | +#endif | |
18998 | + | |
18999 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19000 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19001 | +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist); | |
19002 | +static char *timerandwakeup_latency_hist_dir = "timerandwakeup"; | |
19003 | +static struct enable_data timerandwakeup_enabled_data = { | |
19004 | + .latency_type = TIMERANDWAKEUP_LATENCY, | |
19005 | + .enabled = 0, | |
19006 | +}; | |
19007 | +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc); | |
19008 | +#endif | |
19009 | + | |
19010 | +void notrace latency_hist(int latency_type, int cpu, long latency, | |
19011 | + long timeroffset, cycle_t stop, | |
19012 | + struct task_struct *p) | |
19013 | +{ | |
19014 | + struct hist_data *my_hist; | |
19015 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19016 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19017 | + struct maxlatproc_data *mp = NULL; | |
19018 | +#endif | |
19019 | + | |
19020 | + if (!cpu_possible(cpu) || latency_type < 0 || | |
19021 | + latency_type >= MAX_LATENCY_TYPE) | |
19022 | + return; | |
19023 | + | |
19024 | + switch (latency_type) { | |
19025 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19026 | + case IRQSOFF_LATENCY: | |
19027 | + my_hist = &per_cpu(irqsoff_hist, cpu); | |
19028 | + break; | |
19029 | +#endif | |
19030 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19031 | + case PREEMPTOFF_LATENCY: | |
19032 | + my_hist = &per_cpu(preemptoff_hist, cpu); | |
19033 | + break; | |
19034 | +#endif | |
19035 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
19036 | + case PREEMPTIRQSOFF_LATENCY: | |
19037 | + my_hist = &per_cpu(preemptirqsoff_hist, cpu); | |
19038 | + break; | |
19039 | +#endif | |
19040 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19041 | + case WAKEUP_LATENCY: | |
19042 | + my_hist = &per_cpu(wakeup_latency_hist, cpu); | |
19043 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
19044 | + break; | |
19045 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
19046 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
19047 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
19048 | + break; | |
19049 | +#endif | |
19050 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19051 | + case MISSED_TIMER_OFFSETS: | |
19052 | + my_hist = &per_cpu(missed_timer_offsets, cpu); | |
19053 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
19054 | + break; | |
19055 | +#endif | |
19056 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19057 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19058 | + case TIMERANDWAKEUP_LATENCY: | |
19059 | + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
19060 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
19061 | + break; | |
19062 | +#endif | |
19063 | + | |
19064 | + default: | |
19065 | + return; | |
19066 | + } | |
19067 | + | |
19068 | + latency += my_hist->offset; | |
19069 | + | |
19070 | + if (atomic_read(&my_hist->hist_mode) == 0) | |
19071 | + return; | |
19072 | + | |
19073 | + if (latency < 0 || latency >= MAX_ENTRY_NUM) { | |
19074 | + if (latency < 0) | |
19075 | + my_hist->below_hist_bound_samples++; | |
19076 | + else | |
19077 | + my_hist->above_hist_bound_samples++; | |
19078 | + } else | |
19079 | + my_hist->hist_array[latency]++; | |
19080 | + | |
19081 | + if (unlikely(latency > my_hist->max_lat || | |
19082 | + my_hist->min_lat == LONG_MAX)) { | |
19083 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19084 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19085 | + if (latency_type == WAKEUP_LATENCY || | |
19086 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
19087 | + latency_type == MISSED_TIMER_OFFSETS || | |
19088 | + latency_type == TIMERANDWAKEUP_LATENCY) { | |
19089 | + strncpy(mp->comm, p->comm, sizeof(mp->comm)); | |
19090 | + strncpy(mp->current_comm, current->comm, | |
19091 | + sizeof(mp->current_comm)); | |
19092 | + mp->pid = task_pid_nr(p); | |
19093 | + mp->current_pid = task_pid_nr(current); | |
19094 | + mp->prio = p->prio; | |
19095 | + mp->current_prio = current->prio; | |
19096 | + mp->latency = latency; | |
19097 | + mp->timeroffset = timeroffset; | |
19098 | + mp->timestamp = stop; | |
19099 | + } | |
19100 | +#endif | |
19101 | + my_hist->max_lat = latency; | |
19102 | + } | |
19103 | + if (unlikely(latency < my_hist->min_lat)) | |
19104 | + my_hist->min_lat = latency; | |
19105 | + my_hist->total_samples++; | |
19106 | + my_hist->accumulate_lat += latency; | |
19107 | +} | |
19108 | + | |
19109 | +static void *l_start(struct seq_file *m, loff_t *pos) | |
19110 | +{ | |
19111 | + loff_t *index_ptr = NULL; | |
19112 | + loff_t index = *pos; | |
19113 | + struct hist_data *my_hist = m->private; | |
19114 | + | |
19115 | + if (index == 0) { | |
19116 | + char minstr[32], avgstr[32], maxstr[32]; | |
19117 | + | |
19118 | + atomic_dec(&my_hist->hist_mode); | |
19119 | + | |
19120 | + if (likely(my_hist->total_samples)) { | |
19121 | + long avg = (long) div64_s64(my_hist->accumulate_lat, | |
19122 | + my_hist->total_samples); | |
19123 | + snprintf(minstr, sizeof(minstr), "%ld", | |
19124 | + my_hist->min_lat - my_hist->offset); | |
19125 | + snprintf(avgstr, sizeof(avgstr), "%ld", | |
19126 | + avg - my_hist->offset); | |
19127 | + snprintf(maxstr, sizeof(maxstr), "%ld", | |
19128 | + my_hist->max_lat - my_hist->offset); | |
19129 | + } else { | |
19130 | + strcpy(minstr, "<undef>"); | |
19131 | + strcpy(avgstr, minstr); | |
19132 | + strcpy(maxstr, minstr); | |
19133 | + } | |
19134 | + | |
19135 | + seq_printf(m, "#Minimum latency: %s microseconds\n" | |
19136 | + "#Average latency: %s microseconds\n" | |
19137 | + "#Maximum latency: %s microseconds\n" | |
19138 | + "#Total samples: %llu\n" | |
19139 | + "#There are %llu samples lower than %ld" | |
19140 | + " microseconds.\n" | |
19141 | + "#There are %llu samples greater or equal" | |
19142 | + " than %ld microseconds.\n" | |
19143 | + "#usecs\t%16s\n", | |
19144 | + minstr, avgstr, maxstr, | |
19145 | + my_hist->total_samples, | |
19146 | + my_hist->below_hist_bound_samples, | |
19147 | + -my_hist->offset, | |
19148 | + my_hist->above_hist_bound_samples, | |
19149 | + MAX_ENTRY_NUM - my_hist->offset, | |
19150 | + "samples"); | |
19151 | + } | |
19152 | + if (index < MAX_ENTRY_NUM) { | |
19153 | + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); | |
19154 | + if (index_ptr) | |
19155 | + *index_ptr = index; | |
19156 | + } | |
19157 | + | |
19158 | + return index_ptr; | |
19159 | +} | |
19160 | + | |
19161 | +static void *l_next(struct seq_file *m, void *p, loff_t *pos) | |
19162 | +{ | |
19163 | + loff_t *index_ptr = p; | |
19164 | + struct hist_data *my_hist = m->private; | |
19165 | + | |
19166 | + if (++*pos >= MAX_ENTRY_NUM) { | |
19167 | + atomic_inc(&my_hist->hist_mode); | |
19168 | + return NULL; | |
19169 | + } | |
19170 | + *index_ptr = *pos; | |
19171 | + return index_ptr; | |
19172 | +} | |
19173 | + | |
19174 | +static void l_stop(struct seq_file *m, void *p) | |
19175 | +{ | |
19176 | + kfree(p); | |
19177 | +} | |
19178 | + | |
19179 | +static int l_show(struct seq_file *m, void *p) | |
19180 | +{ | |
19181 | + int index = *(loff_t *) p; | |
19182 | + struct hist_data *my_hist = m->private; | |
19183 | + | |
19184 | + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset, | |
19185 | + my_hist->hist_array[index]); | |
19186 | + return 0; | |
19187 | +} | |
19188 | + | |
19189 | +static const struct seq_operations latency_hist_seq_op = { | |
19190 | + .start = l_start, | |
19191 | + .next = l_next, | |
19192 | + .stop = l_stop, | |
19193 | + .show = l_show | |
19194 | +}; | |
19195 | + | |
19196 | +static int latency_hist_open(struct inode *inode, struct file *file) | |
19197 | +{ | |
19198 | + int ret; | |
19199 | + | |
19200 | + ret = seq_open(file, &latency_hist_seq_op); | |
19201 | + if (!ret) { | |
19202 | + struct seq_file *seq = file->private_data; | |
19203 | + seq->private = inode->i_private; | |
19204 | + } | |
19205 | + return ret; | |
19206 | +} | |
19207 | + | |
19208 | +static const struct file_operations latency_hist_fops = { | |
19209 | + .open = latency_hist_open, | |
19210 | + .read = seq_read, | |
19211 | + .llseek = seq_lseek, | |
19212 | + .release = seq_release, | |
19213 | +}; | |
19214 | + | |
19215 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19216 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19217 | +static void clear_maxlatprocdata(struct maxlatproc_data *mp) | |
19218 | +{ | |
19219 | + mp->comm[0] = mp->current_comm[0] = '\0'; | |
19220 | + mp->prio = mp->current_prio = mp->pid = mp->current_pid = | |
19221 | + mp->latency = mp->timeroffset = -1; | |
19222 | + mp->timestamp = 0; | |
19223 | +} | |
19224 | +#endif | |
19225 | + | |
19226 | +static void hist_reset(struct hist_data *hist) | |
19227 | +{ | |
19228 | + atomic_dec(&hist->hist_mode); | |
19229 | + | |
19230 | + memset(hist->hist_array, 0, sizeof(hist->hist_array)); | |
19231 | + hist->below_hist_bound_samples = 0ULL; | |
19232 | + hist->above_hist_bound_samples = 0ULL; | |
19233 | + hist->min_lat = LONG_MAX; | |
19234 | + hist->max_lat = LONG_MIN; | |
19235 | + hist->total_samples = 0ULL; | |
19236 | + hist->accumulate_lat = 0LL; | |
19237 | + | |
19238 | + atomic_inc(&hist->hist_mode); | |
19239 | +} | |
19240 | + | |
19241 | +static ssize_t | |
19242 | +latency_hist_reset(struct file *file, const char __user *a, | |
19243 | + size_t size, loff_t *off) | |
19244 | +{ | |
19245 | + int cpu; | |
19246 | + struct hist_data *hist = NULL; | |
19247 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19248 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19249 | + struct maxlatproc_data *mp = NULL; | |
19250 | +#endif | |
19251 | + off_t latency_type = (off_t) file->private_data; | |
19252 | + | |
19253 | + for_each_online_cpu(cpu) { | |
19254 | + | |
19255 | + switch (latency_type) { | |
19256 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19257 | + case PREEMPTOFF_LATENCY: | |
19258 | + hist = &per_cpu(preemptoff_hist, cpu); | |
19259 | + break; | |
19260 | +#endif | |
19261 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19262 | + case IRQSOFF_LATENCY: | |
19263 | + hist = &per_cpu(irqsoff_hist, cpu); | |
19264 | + break; | |
19265 | +#endif | |
19266 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
19267 | + case PREEMPTIRQSOFF_LATENCY: | |
19268 | + hist = &per_cpu(preemptirqsoff_hist, cpu); | |
19269 | + break; | |
19270 | +#endif | |
19271 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19272 | + case WAKEUP_LATENCY: | |
19273 | + hist = &per_cpu(wakeup_latency_hist, cpu); | |
19274 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
19275 | + break; | |
19276 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
19277 | + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
19278 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
19279 | + break; | |
19280 | +#endif | |
19281 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19282 | + case MISSED_TIMER_OFFSETS: | |
19283 | + hist = &per_cpu(missed_timer_offsets, cpu); | |
19284 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
19285 | + break; | |
19286 | +#endif | |
19287 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19288 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19289 | + case TIMERANDWAKEUP_LATENCY: | |
19290 | + hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
19291 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
19292 | + break; | |
19293 | +#endif | |
19294 | + } | |
19295 | + | |
19296 | + hist_reset(hist); | |
19297 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19298 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19299 | + if (latency_type == WAKEUP_LATENCY || | |
19300 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
19301 | + latency_type == MISSED_TIMER_OFFSETS || | |
19302 | + latency_type == TIMERANDWAKEUP_LATENCY) | |
19303 | + clear_maxlatprocdata(mp); | |
19304 | +#endif | |
19305 | + } | |
19306 | + | |
19307 | + return size; | |
19308 | +} | |
19309 | + | |
19310 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19311 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19312 | +static ssize_t | |
19313 | +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
19314 | +{ | |
19315 | + char buf[64]; | |
19316 | + int r; | |
19317 | + unsigned long *this_pid = file->private_data; | |
19318 | + | |
19319 | + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid); | |
19320 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
19321 | +} | |
19322 | + | |
19323 | +static ssize_t do_pid(struct file *file, const char __user *ubuf, | |
19324 | + size_t cnt, loff_t *ppos) | |
19325 | +{ | |
19326 | + char buf[64]; | |
19327 | + unsigned long pid; | |
19328 | + unsigned long *this_pid = file->private_data; | |
19329 | + | |
19330 | + if (cnt >= sizeof(buf)) | |
19331 | + return -EINVAL; | |
19332 | + | |
19333 | + if (copy_from_user(&buf, ubuf, cnt)) | |
19334 | + return -EFAULT; | |
19335 | + | |
19336 | + buf[cnt] = '\0'; | |
19337 | + | |
19338 | + if (kstrtoul(buf, 10, &pid)) | |
19339 | + return -EINVAL; | |
19340 | + | |
19341 | + *this_pid = pid; | |
19342 | + | |
19343 | + return cnt; | |
19344 | +} | |
19345 | +#endif | |
19346 | + | |
19347 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19348 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19349 | +static ssize_t | |
19350 | +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
19351 | +{ | |
19352 | + int r; | |
19353 | + struct maxlatproc_data *mp = file->private_data; | |
19354 | + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8); | |
19355 | + unsigned long long t; | |
19356 | + unsigned long usecs, secs; | |
19357 | + char *buf; | |
19358 | + | |
19359 | + if (mp->pid == -1 || mp->current_pid == -1) { | |
19360 | + buf = "(none)\n"; | |
19361 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, | |
19362 | + strlen(buf)); | |
19363 | + } | |
19364 | + | |
19365 | + buf = kmalloc(strmaxlen, GFP_KERNEL); | |
19366 | + if (buf == NULL) | |
19367 | + return -ENOMEM; | |
19368 | + | |
19369 | + t = ns2usecs(mp->timestamp); | |
19370 | + usecs = do_div(t, USEC_PER_SEC); | |
19371 | + secs = (unsigned long) t; | |
19372 | + r = snprintf(buf, strmaxlen, | |
19373 | + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid, | |
19374 | + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm, | |
19375 | + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm, | |
19376 | + secs, usecs); | |
19377 | + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
19378 | + kfree(buf); | |
19379 | + return r; | |
19380 | +} | |
19381 | +#endif | |
19382 | + | |
19383 | +static ssize_t | |
19384 | +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
19385 | +{ | |
19386 | + char buf[64]; | |
19387 | + struct enable_data *ed = file->private_data; | |
19388 | + int r; | |
19389 | + | |
19390 | + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled); | |
19391 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
19392 | +} | |
19393 | + | |
19394 | +static ssize_t | |
19395 | +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) | |
19396 | +{ | |
19397 | + char buf[64]; | |
19398 | + long enable; | |
19399 | + struct enable_data *ed = file->private_data; | |
19400 | + | |
19401 | + if (cnt >= sizeof(buf)) | |
19402 | + return -EINVAL; | |
19403 | + | |
19404 | + if (copy_from_user(&buf, ubuf, cnt)) | |
19405 | + return -EFAULT; | |
19406 | + | |
19407 | + buf[cnt] = 0; | |
19408 | + | |
19409 | + if (kstrtoul(buf, 10, &enable)) | |
19410 | + return -EINVAL; | |
19411 | + | |
19412 | + if ((enable && ed->enabled) || (!enable && !ed->enabled)) | |
19413 | + return cnt; | |
19414 | + | |
19415 | + if (enable) { | |
19416 | + int ret; | |
19417 | + | |
19418 | + switch (ed->latency_type) { | |
19419 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
19420 | + case PREEMPTIRQSOFF_LATENCY: | |
19421 | + ret = register_trace_preemptirqsoff_hist( | |
19422 | + probe_preemptirqsoff_hist, NULL); | |
19423 | + if (ret) { | |
19424 | + pr_info("wakeup trace: Couldn't assign " | |
19425 | + "probe_preemptirqsoff_hist " | |
19426 | + "to trace_preemptirqsoff_hist\n"); | |
19427 | + return ret; | |
19428 | + } | |
19429 | + break; | |
19430 | +#endif | |
19431 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19432 | + case WAKEUP_LATENCY: | |
19433 | + ret = register_trace_sched_wakeup( | |
19434 | + probe_wakeup_latency_hist_start, NULL); | |
19435 | + if (ret) { | |
19436 | + pr_info("wakeup trace: Couldn't assign " | |
19437 | + "probe_wakeup_latency_hist_start " | |
19438 | + "to trace_sched_wakeup\n"); | |
19439 | + return ret; | |
19440 | + } | |
19441 | + ret = register_trace_sched_wakeup_new( | |
19442 | + probe_wakeup_latency_hist_start, NULL); | |
19443 | + if (ret) { | |
19444 | + pr_info("wakeup trace: Couldn't assign " | |
19445 | + "probe_wakeup_latency_hist_start " | |
19446 | + "to trace_sched_wakeup_new\n"); | |
19447 | + unregister_trace_sched_wakeup( | |
19448 | + probe_wakeup_latency_hist_start, NULL); | |
19449 | + return ret; | |
19450 | + } | |
19451 | + ret = register_trace_sched_switch( | |
19452 | + probe_wakeup_latency_hist_stop, NULL); | |
19453 | + if (ret) { | |
19454 | + pr_info("wakeup trace: Couldn't assign " | |
19455 | + "probe_wakeup_latency_hist_stop " | |
19456 | + "to trace_sched_switch\n"); | |
19457 | + unregister_trace_sched_wakeup( | |
19458 | + probe_wakeup_latency_hist_start, NULL); | |
19459 | + unregister_trace_sched_wakeup_new( | |
19460 | + probe_wakeup_latency_hist_start, NULL); | |
19461 | + return ret; | |
19462 | + } | |
19463 | + ret = register_trace_sched_migrate_task( | |
19464 | + probe_sched_migrate_task, NULL); | |
19465 | + if (ret) { | |
19466 | + pr_info("wakeup trace: Couldn't assign " | |
19467 | + "probe_sched_migrate_task " | |
19468 | + "to trace_sched_migrate_task\n"); | |
19469 | + unregister_trace_sched_wakeup( | |
19470 | + probe_wakeup_latency_hist_start, NULL); | |
19471 | + unregister_trace_sched_wakeup_new( | |
19472 | + probe_wakeup_latency_hist_start, NULL); | |
19473 | + unregister_trace_sched_switch( | |
19474 | + probe_wakeup_latency_hist_stop, NULL); | |
19475 | + return ret; | |
19476 | + } | |
19477 | + break; | |
19478 | +#endif | |
19479 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19480 | + case MISSED_TIMER_OFFSETS: | |
19481 | + ret = register_trace_hrtimer_interrupt( | |
19482 | + probe_hrtimer_interrupt, NULL); | |
19483 | + if (ret) { | |
19484 | + pr_info("wakeup trace: Couldn't assign " | |
19485 | + "probe_hrtimer_interrupt " | |
19486 | + "to trace_hrtimer_interrupt\n"); | |
19487 | + return ret; | |
19488 | + } | |
19489 | + break; | |
19490 | +#endif | |
19491 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19492 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19493 | + case TIMERANDWAKEUP_LATENCY: | |
19494 | + if (!wakeup_latency_enabled_data.enabled || | |
19495 | + !missed_timer_offsets_enabled_data.enabled) | |
19496 | + return -EINVAL; | |
19497 | + break; | |
19498 | +#endif | |
19499 | + default: | |
19500 | + break; | |
19501 | + } | |
19502 | + } else { | |
19503 | + switch (ed->latency_type) { | |
19504 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
19505 | + case PREEMPTIRQSOFF_LATENCY: | |
19506 | + { | |
19507 | + int cpu; | |
19508 | + | |
19509 | + unregister_trace_preemptirqsoff_hist( | |
19510 | + probe_preemptirqsoff_hist, NULL); | |
19511 | + for_each_online_cpu(cpu) { | |
19512 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19513 | + per_cpu(hist_irqsoff_counting, | |
19514 | + cpu) = 0; | |
19515 | +#endif | |
19516 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19517 | + per_cpu(hist_preemptoff_counting, | |
19518 | + cpu) = 0; | |
19519 | +#endif | |
19520 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
19521 | + per_cpu(hist_preemptirqsoff_counting, | |
19522 | + cpu) = 0; | |
19523 | +#endif | |
19524 | + } | |
19525 | + } | |
19526 | + break; | |
19527 | +#endif | |
19528 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19529 | + case WAKEUP_LATENCY: | |
19530 | + { | |
19531 | + int cpu; | |
19532 | + | |
19533 | + unregister_trace_sched_wakeup( | |
19534 | + probe_wakeup_latency_hist_start, NULL); | |
19535 | + unregister_trace_sched_wakeup_new( | |
19536 | + probe_wakeup_latency_hist_start, NULL); | |
19537 | + unregister_trace_sched_switch( | |
19538 | + probe_wakeup_latency_hist_stop, NULL); | |
19539 | + unregister_trace_sched_migrate_task( | |
19540 | + probe_sched_migrate_task, NULL); | |
19541 | + | |
19542 | + for_each_online_cpu(cpu) { | |
19543 | + per_cpu(wakeup_task, cpu) = NULL; | |
19544 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
19545 | + } | |
19546 | + } | |
19547 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19548 | + timerandwakeup_enabled_data.enabled = 0; | |
19549 | +#endif | |
19550 | + break; | |
19551 | +#endif | |
19552 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19553 | + case MISSED_TIMER_OFFSETS: | |
19554 | + unregister_trace_hrtimer_interrupt( | |
19555 | + probe_hrtimer_interrupt, NULL); | |
19556 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19557 | + timerandwakeup_enabled_data.enabled = 0; | |
19558 | +#endif | |
19559 | + break; | |
19560 | +#endif | |
19561 | + default: | |
19562 | + break; | |
19563 | + } | |
19564 | + } | |
19565 | + ed->enabled = enable; | |
19566 | + return cnt; | |
19567 | +} | |
19568 | + | |
19569 | +static const struct file_operations latency_hist_reset_fops = { | |
19570 | + .open = tracing_open_generic, | |
19571 | + .write = latency_hist_reset, | |
19572 | +}; | |
19573 | + | |
19574 | +static const struct file_operations enable_fops = { | |
19575 | + .open = tracing_open_generic, | |
19576 | + .read = show_enable, | |
19577 | + .write = do_enable, | |
19578 | +}; | |
19579 | + | |
19580 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19581 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19582 | +static const struct file_operations pid_fops = { | |
19583 | + .open = tracing_open_generic, | |
19584 | + .read = show_pid, | |
19585 | + .write = do_pid, | |
19586 | +}; | |
19587 | + | |
19588 | +static const struct file_operations maxlatproc_fops = { | |
19589 | + .open = tracing_open_generic, | |
19590 | + .read = show_maxlatproc, | |
19591 | +}; | |
19592 | +#endif | |
19593 | + | |
19594 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
19595 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, | |
19596 | + int starthist) | |
19597 | +{ | |
19598 | + int cpu = raw_smp_processor_id(); | |
19599 | + int time_set = 0; | |
19600 | + | |
19601 | + if (starthist) { | |
19602 | + cycle_t uninitialized_var(start); | |
19603 | + | |
19604 | + if (!preempt_count() && !irqs_disabled()) | |
19605 | + return; | |
19606 | + | |
19607 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19608 | + if ((reason == IRQS_OFF || reason == TRACE_START) && | |
19609 | + !per_cpu(hist_irqsoff_counting, cpu)) { | |
19610 | + per_cpu(hist_irqsoff_counting, cpu) = 1; | |
19611 | + start = ftrace_now(cpu); | |
19612 | + time_set++; | |
19613 | + per_cpu(hist_irqsoff_start, cpu) = start; | |
19614 | + } | |
19615 | +#endif | |
19616 | + | |
19617 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19618 | + if ((reason == PREEMPT_OFF || reason == TRACE_START) && | |
19619 | + !per_cpu(hist_preemptoff_counting, cpu)) { | |
19620 | + per_cpu(hist_preemptoff_counting, cpu) = 1; | |
19621 | + if (!(time_set++)) | |
19622 | + start = ftrace_now(cpu); | |
19623 | + per_cpu(hist_preemptoff_start, cpu) = start; | |
19624 | + } | |
19625 | +#endif | |
19626 | + | |
19627 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
19628 | + if (per_cpu(hist_irqsoff_counting, cpu) && | |
19629 | + per_cpu(hist_preemptoff_counting, cpu) && | |
19630 | + !per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
19631 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 1; | |
19632 | + if (!time_set) | |
19633 | + start = ftrace_now(cpu); | |
19634 | + per_cpu(hist_preemptirqsoff_start, cpu) = start; | |
19635 | + } | |
19636 | +#endif | |
19637 | + } else { | |
19638 | + cycle_t uninitialized_var(stop); | |
19639 | + | |
19640 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19641 | + if ((reason == IRQS_ON || reason == TRACE_STOP) && | |
19642 | + per_cpu(hist_irqsoff_counting, cpu)) { | |
19643 | + cycle_t start = per_cpu(hist_irqsoff_start, cpu); | |
19644 | + | |
19645 | + stop = ftrace_now(cpu); | |
19646 | + time_set++; | |
19647 | + if (start) { | |
19648 | + long latency = ((long) (stop - start)) / | |
19649 | + NSECS_PER_USECS; | |
19650 | + | |
19651 | + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0, | |
19652 | + stop, NULL); | |
19653 | + } | |
19654 | + per_cpu(hist_irqsoff_counting, cpu) = 0; | |
19655 | + } | |
19656 | +#endif | |
19657 | + | |
19658 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19659 | + if ((reason == PREEMPT_ON || reason == TRACE_STOP) && | |
19660 | + per_cpu(hist_preemptoff_counting, cpu)) { | |
19661 | + cycle_t start = per_cpu(hist_preemptoff_start, cpu); | |
19662 | + | |
19663 | + if (!(time_set++)) | |
19664 | + stop = ftrace_now(cpu); | |
19665 | + if (start) { | |
19666 | + long latency = ((long) (stop - start)) / | |
19667 | + NSECS_PER_USECS; | |
19668 | + | |
19669 | + latency_hist(PREEMPTOFF_LATENCY, cpu, latency, | |
19670 | + 0, stop, NULL); | |
19671 | + } | |
19672 | + per_cpu(hist_preemptoff_counting, cpu) = 0; | |
19673 | + } | |
19674 | +#endif | |
19675 | + | |
19676 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
19677 | + if ((!per_cpu(hist_irqsoff_counting, cpu) || | |
19678 | + !per_cpu(hist_preemptoff_counting, cpu)) && | |
19679 | + per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
19680 | + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu); | |
19681 | + | |
19682 | + if (!time_set) | |
19683 | + stop = ftrace_now(cpu); | |
19684 | + if (start) { | |
19685 | + long latency = ((long) (stop - start)) / | |
19686 | + NSECS_PER_USECS; | |
19687 | + | |
19688 | + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu, | |
19689 | + latency, 0, stop, NULL); | |
19690 | + } | |
19691 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 0; | |
19692 | + } | |
19693 | +#endif | |
19694 | + } | |
19695 | +} | |
19696 | +#endif | |
19697 | + | |
19698 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19699 | +static DEFINE_RAW_SPINLOCK(wakeup_lock); | |
19700 | +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task, | |
19701 | + int cpu) | |
19702 | +{ | |
19703 | + int old_cpu = task_cpu(task); | |
19704 | + | |
19705 | + if (cpu != old_cpu) { | |
19706 | + unsigned long flags; | |
19707 | + struct task_struct *cpu_wakeup_task; | |
19708 | + | |
19709 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
19710 | + | |
19711 | + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu); | |
19712 | + if (task == cpu_wakeup_task) { | |
19713 | + put_task_struct(cpu_wakeup_task); | |
19714 | + per_cpu(wakeup_task, old_cpu) = NULL; | |
19715 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task; | |
19716 | + get_task_struct(cpu_wakeup_task); | |
19717 | + } | |
19718 | + | |
19719 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
19720 | + } | |
19721 | +} | |
19722 | + | |
19723 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
19724 | + struct task_struct *p) | |
19725 | +{ | |
19726 | + unsigned long flags; | |
19727 | + struct task_struct *curr = current; | |
19728 | + int cpu = task_cpu(p); | |
19729 | + struct task_struct *cpu_wakeup_task; | |
19730 | + | |
19731 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
19732 | + | |
19733 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
19734 | + | |
19735 | + if (wakeup_pid) { | |
19736 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
19737 | + p->prio == curr->prio) | |
19738 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
19739 | + if (likely(wakeup_pid != task_pid_nr(p))) | |
19740 | + goto out; | |
19741 | + } else { | |
19742 | + if (likely(!rt_task(p)) || | |
19743 | + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) || | |
19744 | + p->prio > curr->prio) | |
19745 | + goto out; | |
19746 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
19747 | + p->prio == curr->prio) | |
19748 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
19749 | + } | |
19750 | + | |
19751 | + if (cpu_wakeup_task) | |
19752 | + put_task_struct(cpu_wakeup_task); | |
19753 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p; | |
19754 | + get_task_struct(cpu_wakeup_task); | |
19755 | + cpu_wakeup_task->preempt_timestamp_hist = | |
19756 | + ftrace_now(raw_smp_processor_id()); | |
19757 | +out: | |
19758 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
19759 | +} | |
19760 | + | |
19761 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
19762 | + bool preempt, struct task_struct *prev, struct task_struct *next) | |
19763 | +{ | |
19764 | + unsigned long flags; | |
19765 | + int cpu = task_cpu(next); | |
19766 | + long latency; | |
19767 | + cycle_t stop; | |
19768 | + struct task_struct *cpu_wakeup_task; | |
19769 | + | |
19770 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
19771 | + | |
19772 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
19773 | + | |
19774 | + if (cpu_wakeup_task == NULL) | |
19775 | + goto out; | |
19776 | + | |
19777 | + /* Already running? */ | |
19778 | + if (unlikely(current == cpu_wakeup_task)) | |
19779 | + goto out_reset; | |
19780 | + | |
19781 | + if (next != cpu_wakeup_task) { | |
19782 | + if (next->prio < cpu_wakeup_task->prio) | |
19783 | + goto out_reset; | |
19784 | + | |
19785 | + if (next->prio == cpu_wakeup_task->prio) | |
19786 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
19787 | + | |
19788 | + goto out; | |
19789 | + } | |
19790 | + | |
19791 | + if (current->prio == cpu_wakeup_task->prio) | |
19792 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
19793 | + | |
19794 | + /* | |
19795 | + * The task we are waiting for is about to be switched to. | |
19796 | + * Calculate latency and store it in histogram. | |
19797 | + */ | |
19798 | + stop = ftrace_now(raw_smp_processor_id()); | |
19799 | + | |
19800 | + latency = ((long) (stop - next->preempt_timestamp_hist)) / | |
19801 | + NSECS_PER_USECS; | |
19802 | + | |
19803 | + if (per_cpu(wakeup_sharedprio, cpu)) { | |
19804 | + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop, | |
19805 | + next); | |
19806 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
19807 | + } else { | |
19808 | + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next); | |
19809 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19810 | + if (timerandwakeup_enabled_data.enabled) { | |
19811 | + latency_hist(TIMERANDWAKEUP_LATENCY, cpu, | |
19812 | + next->timer_offset + latency, next->timer_offset, | |
19813 | + stop, next); | |
19814 | + } | |
19815 | +#endif | |
19816 | + } | |
19817 | + | |
19818 | +out_reset: | |
19819 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19820 | + next->timer_offset = 0; | |
19821 | +#endif | |
19822 | + put_task_struct(cpu_wakeup_task); | |
19823 | + per_cpu(wakeup_task, cpu) = NULL; | |
19824 | +out: | |
19825 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
19826 | +} | |
19827 | +#endif | |
19828 | + | |
19829 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19830 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
19831 | + long long latency_ns, struct task_struct *curr, | |
19832 | + struct task_struct *task) | |
19833 | +{ | |
19834 | + if (latency_ns <= 0 && task != NULL && rt_task(task) && | |
19835 | + (task->prio < curr->prio || | |
19836 | + (task->prio == curr->prio && | |
19837 | + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) { | |
19838 | + long latency; | |
19839 | + cycle_t now; | |
19840 | + | |
19841 | + if (missed_timer_offsets_pid) { | |
19842 | + if (likely(missed_timer_offsets_pid != | |
19843 | + task_pid_nr(task))) | |
19844 | + return; | |
19845 | + } | |
19846 | + | |
19847 | + now = ftrace_now(cpu); | |
19848 | + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS); | |
19849 | + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now, | |
19850 | + task); | |
19851 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19852 | + task->timer_offset = latency; | |
19853 | +#endif | |
19854 | + } | |
19855 | +} | |
19856 | +#endif | |
19857 | + | |
19858 | +static __init int latency_hist_init(void) | |
19859 | +{ | |
19860 | + struct dentry *latency_hist_root = NULL; | |
19861 | + struct dentry *dentry; | |
19862 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19863 | + struct dentry *dentry_sharedprio; | |
19864 | +#endif | |
19865 | + struct dentry *entry; | |
19866 | + struct dentry *enable_root; | |
19867 | + int i = 0; | |
19868 | + struct hist_data *my_hist; | |
19869 | + char name[64]; | |
19870 | + char *cpufmt = "CPU%d"; | |
19871 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19872 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19873 | + char *cpufmt_maxlatproc = "max_latency-CPU%d"; | |
19874 | + struct maxlatproc_data *mp = NULL; | |
19875 | +#endif | |
19876 | + | |
19877 | + dentry = tracing_init_dentry(); | |
19878 | + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry); | |
19879 | + enable_root = debugfs_create_dir("enable", latency_hist_root); | |
19880 | + | |
19881 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19882 | + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root); | |
19883 | + for_each_possible_cpu(i) { | |
19884 | + sprintf(name, cpufmt, i); | |
19885 | + entry = debugfs_create_file(name, 0444, dentry, | |
19886 | + &per_cpu(irqsoff_hist, i), &latency_hist_fops); | |
19887 | + my_hist = &per_cpu(irqsoff_hist, i); | |
19888 | + atomic_set(&my_hist->hist_mode, 1); | |
19889 | + my_hist->min_lat = LONG_MAX; | |
19890 | + } | |
19891 | + entry = debugfs_create_file("reset", 0644, dentry, | |
19892 | + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops); | |
19893 | +#endif | |
19894 | + | |
19895 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19896 | + dentry = debugfs_create_dir(preemptoff_hist_dir, | |
19897 | + latency_hist_root); | |
19898 | + for_each_possible_cpu(i) { | |
19899 | + sprintf(name, cpufmt, i); | |
19900 | + entry = debugfs_create_file(name, 0444, dentry, | |
19901 | + &per_cpu(preemptoff_hist, i), &latency_hist_fops); | |
19902 | + my_hist = &per_cpu(preemptoff_hist, i); | |
19903 | + atomic_set(&my_hist->hist_mode, 1); | |
19904 | + my_hist->min_lat = LONG_MAX; | |
19905 | + } | |
19906 | + entry = debugfs_create_file("reset", 0644, dentry, | |
19907 | + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops); | |
19908 | +#endif | |
19909 | + | |
19910 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
19911 | + dentry = debugfs_create_dir(preemptirqsoff_hist_dir, | |
19912 | + latency_hist_root); | |
19913 | + for_each_possible_cpu(i) { | |
19914 | + sprintf(name, cpufmt, i); | |
19915 | + entry = debugfs_create_file(name, 0444, dentry, | |
19916 | + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops); | |
19917 | + my_hist = &per_cpu(preemptirqsoff_hist, i); | |
19918 | + atomic_set(&my_hist->hist_mode, 1); | |
19919 | + my_hist->min_lat = LONG_MAX; | |
19920 | + } | |
19921 | + entry = debugfs_create_file("reset", 0644, dentry, | |
19922 | + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops); | |
19923 | +#endif | |
19924 | + | |
19925 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
19926 | + entry = debugfs_create_file("preemptirqsoff", 0644, | |
19927 | + enable_root, (void *)&preemptirqsoff_enabled_data, | |
19928 | + &enable_fops); | |
19929 | +#endif | |
19930 | + | |
19931 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19932 | + dentry = debugfs_create_dir(wakeup_latency_hist_dir, | |
19933 | + latency_hist_root); | |
19934 | + dentry_sharedprio = debugfs_create_dir( | |
19935 | + wakeup_latency_hist_dir_sharedprio, dentry); | |
19936 | + for_each_possible_cpu(i) { | |
19937 | + sprintf(name, cpufmt, i); | |
19938 | + | |
19939 | + entry = debugfs_create_file(name, 0444, dentry, | |
19940 | + &per_cpu(wakeup_latency_hist, i), | |
19941 | + &latency_hist_fops); | |
19942 | + my_hist = &per_cpu(wakeup_latency_hist, i); | |
19943 | + atomic_set(&my_hist->hist_mode, 1); | |
19944 | + my_hist->min_lat = LONG_MAX; | |
19945 | + | |
19946 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, | |
19947 | + &per_cpu(wakeup_latency_hist_sharedprio, i), | |
19948 | + &latency_hist_fops); | |
19949 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i); | |
19950 | + atomic_set(&my_hist->hist_mode, 1); | |
19951 | + my_hist->min_lat = LONG_MAX; | |
19952 | + | |
19953 | + sprintf(name, cpufmt_maxlatproc, i); | |
19954 | + | |
19955 | + mp = &per_cpu(wakeup_maxlatproc, i); | |
19956 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
19957 | + &maxlatproc_fops); | |
19958 | + clear_maxlatprocdata(mp); | |
19959 | + | |
19960 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i); | |
19961 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp, | |
19962 | + &maxlatproc_fops); | |
19963 | + clear_maxlatprocdata(mp); | |
19964 | + } | |
19965 | + entry = debugfs_create_file("pid", 0644, dentry, | |
19966 | + (void *)&wakeup_pid, &pid_fops); | |
19967 | + entry = debugfs_create_file("reset", 0644, dentry, | |
19968 | + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops); | |
19969 | + entry = debugfs_create_file("reset", 0644, dentry_sharedprio, | |
19970 | + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops); | |
19971 | + entry = debugfs_create_file("wakeup", 0644, | |
19972 | + enable_root, (void *)&wakeup_latency_enabled_data, | |
19973 | + &enable_fops); | |
19974 | +#endif | |
19975 | + | |
19976 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19977 | + dentry = debugfs_create_dir(missed_timer_offsets_dir, | |
19978 | + latency_hist_root); | |
19979 | + for_each_possible_cpu(i) { | |
19980 | + sprintf(name, cpufmt, i); | |
19981 | + entry = debugfs_create_file(name, 0444, dentry, | |
19982 | + &per_cpu(missed_timer_offsets, i), &latency_hist_fops); | |
19983 | + my_hist = &per_cpu(missed_timer_offsets, i); | |
19984 | + atomic_set(&my_hist->hist_mode, 1); | |
19985 | + my_hist->min_lat = LONG_MAX; | |
19986 | + | |
19987 | + sprintf(name, cpufmt_maxlatproc, i); | |
19988 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, i); | |
19989 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
19990 | + &maxlatproc_fops); | |
19991 | + clear_maxlatprocdata(mp); | |
19992 | + } | |
19993 | + entry = debugfs_create_file("pid", 0644, dentry, | |
19994 | + (void *)&missed_timer_offsets_pid, &pid_fops); | |
19995 | + entry = debugfs_create_file("reset", 0644, dentry, | |
19996 | + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops); | |
19997 | + entry = debugfs_create_file("missed_timer_offsets", 0644, | |
19998 | + enable_root, (void *)&missed_timer_offsets_enabled_data, | |
19999 | + &enable_fops); | |
20000 | +#endif | |
20001 | + | |
20002 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
20003 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20004 | + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir, | |
20005 | + latency_hist_root); | |
20006 | + for_each_possible_cpu(i) { | |
20007 | + sprintf(name, cpufmt, i); | |
20008 | + entry = debugfs_create_file(name, 0444, dentry, | |
20009 | + &per_cpu(timerandwakeup_latency_hist, i), | |
20010 | + &latency_hist_fops); | |
20011 | + my_hist = &per_cpu(timerandwakeup_latency_hist, i); | |
20012 | + atomic_set(&my_hist->hist_mode, 1); | |
20013 | + my_hist->min_lat = LONG_MAX; | |
20014 | + | |
20015 | + sprintf(name, cpufmt_maxlatproc, i); | |
20016 | + mp = &per_cpu(timerandwakeup_maxlatproc, i); | |
20017 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
20018 | + &maxlatproc_fops); | |
20019 | + clear_maxlatprocdata(mp); | |
20020 | + } | |
20021 | + entry = debugfs_create_file("reset", 0644, dentry, | |
20022 | + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops); | |
20023 | + entry = debugfs_create_file("timerandwakeup", 0644, | |
20024 | + enable_root, (void *)&timerandwakeup_enabled_data, | |
20025 | + &enable_fops); | |
20026 | +#endif | |
20027 | + return 0; | |
20028 | +} | |
20029 | + | |
20030 | +device_initcall(latency_hist_init); | |
20031 | diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c | |
c7c16703 | 20032 | index 8696ce6bf2f6..277f048a4695 100644 |
1a6e0f06 JK |
20033 | --- a/kernel/trace/trace.c |
20034 | +++ b/kernel/trace/trace.c | |
20035 | @@ -1897,6 +1897,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
20036 | struct task_struct *tsk = current; | |
20037 | ||
20038 | entry->preempt_count = pc & 0xff; | |
20039 | + entry->preempt_lazy_count = preempt_lazy_count(); | |
20040 | entry->pid = (tsk) ? tsk->pid : 0; | |
20041 | entry->flags = | |
20042 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | |
20043 | @@ -1907,8 +1908,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
20044 | ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | | |
20045 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | | |
20046 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | |
20047 | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
20048 | + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
20049 | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | | |
20050 | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); | |
20051 | + | |
20052 | + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; | |
20053 | } | |
20054 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | |
20055 | ||
20056 | @@ -2892,14 +2896,17 @@ get_total_entries(struct trace_buffer *buf, | |
20057 | ||
20058 | static void print_lat_help_header(struct seq_file *m) | |
20059 | { | |
20060 | - seq_puts(m, "# _------=> CPU# \n" | |
20061 | - "# / _-----=> irqs-off \n" | |
20062 | - "# | / _----=> need-resched \n" | |
20063 | - "# || / _---=> hardirq/softirq \n" | |
20064 | - "# ||| / _--=> preempt-depth \n" | |
20065 | - "# |||| / delay \n" | |
20066 | - "# cmd pid ||||| time | caller \n" | |
20067 | - "# \\ / ||||| \\ | / \n"); | |
20068 | + seq_puts(m, "# _--------=> CPU# \n" | |
20069 | + "# / _-------=> irqs-off \n" | |
20070 | + "# | / _------=> need-resched \n" | |
20071 | + "# || / _-----=> need-resched_lazy \n" | |
20072 | + "# ||| / _----=> hardirq/softirq \n" | |
20073 | + "# |||| / _---=> preempt-depth \n" | |
20074 | + "# ||||| / _--=> preempt-lazy-depth\n" | |
20075 | + "# |||||| / _-=> migrate-disable \n" | |
20076 | + "# ||||||| / delay \n" | |
20077 | + "# cmd pid |||||||| time | caller \n" | |
20078 | + "# \\ / |||||||| \\ | / \n"); | |
20079 | } | |
20080 | ||
20081 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) | |
20082 | @@ -2925,11 +2932,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file | |
20083 | print_event_info(buf, m); | |
20084 | seq_puts(m, "# _-----=> irqs-off\n" | |
20085 | "# / _----=> need-resched\n" | |
20086 | - "# | / _---=> hardirq/softirq\n" | |
20087 | - "# || / _--=> preempt-depth\n" | |
20088 | - "# ||| / delay\n" | |
20089 | - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" | |
20090 | - "# | | | |||| | |\n"); | |
20091 | + "# |/ _-----=> need-resched_lazy\n" | |
20092 | + "# || / _---=> hardirq/softirq\n" | |
20093 | + "# ||| / _--=> preempt-depth\n" | |
20094 | + "# |||| / _-=> preempt-lazy-depth\n" | |
20095 | + "# ||||| / _-=> migrate-disable \n" | |
20096 | + "# |||||| / delay\n" | |
20097 | + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n" | |
20098 | + "# | | | ||||||| | |\n"); | |
20099 | } | |
20100 | ||
20101 | void | |
20102 | diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h | |
c7c16703 | 20103 | index fd24b1f9ac43..852b2c81be25 100644 |
1a6e0f06 JK |
20104 | --- a/kernel/trace/trace.h |
20105 | +++ b/kernel/trace/trace.h | |
c7c16703 | 20106 | @@ -124,6 +124,7 @@ struct kretprobe_trace_entry_head { |
1a6e0f06 JK |
20107 | * NEED_RESCHED - reschedule is requested |
20108 | * HARDIRQ - inside an interrupt handler | |
20109 | * SOFTIRQ - inside a softirq handler | |
20110 | + * NEED_RESCHED_LAZY - lazy reschedule is requested | |
20111 | */ | |
20112 | enum trace_flag_type { | |
20113 | TRACE_FLAG_IRQS_OFF = 0x01, | |
c7c16703 | 20114 | @@ -133,6 +134,7 @@ enum trace_flag_type { |
1a6e0f06 JK |
20115 | TRACE_FLAG_SOFTIRQ = 0x10, |
20116 | TRACE_FLAG_PREEMPT_RESCHED = 0x20, | |
20117 | TRACE_FLAG_NMI = 0x40, | |
20118 | + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, | |
20119 | }; | |
20120 | ||
20121 | #define TRACE_BUF_SIZE 1024 | |
20122 | diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c | |
20123 | index 03c0a48c3ac4..0b85d516b491 100644 | |
20124 | --- a/kernel/trace/trace_events.c | |
20125 | +++ b/kernel/trace/trace_events.c | |
20126 | @@ -187,6 +187,8 @@ static int trace_define_common_fields(void) | |
20127 | __common_field(unsigned char, flags); | |
20128 | __common_field(unsigned char, preempt_count); | |
20129 | __common_field(int, pid); | |
20130 | + __common_field(unsigned short, migrate_disable); | |
20131 | + __common_field(unsigned short, padding); | |
20132 | ||
20133 | return ret; | |
20134 | } | |
20135 | diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c | |
20136 | index 03cdff84d026..940bd10b4406 100644 | |
20137 | --- a/kernel/trace/trace_irqsoff.c | |
20138 | +++ b/kernel/trace/trace_irqsoff.c | |
20139 | @@ -13,6 +13,7 @@ | |
20140 | #include <linux/uaccess.h> | |
20141 | #include <linux/module.h> | |
20142 | #include <linux/ftrace.h> | |
20143 | +#include <trace/events/hist.h> | |
20144 | ||
20145 | #include "trace.h" | |
20146 | ||
20147 | @@ -424,11 +425,13 @@ void start_critical_timings(void) | |
20148 | { | |
20149 | if (preempt_trace() || irq_trace()) | |
20150 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20151 | + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1); | |
20152 | } | |
20153 | EXPORT_SYMBOL_GPL(start_critical_timings); | |
20154 | ||
20155 | void stop_critical_timings(void) | |
20156 | { | |
20157 | + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0); | |
20158 | if (preempt_trace() || irq_trace()) | |
20159 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20160 | } | |
20161 | @@ -438,6 +441,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings); | |
20162 | #ifdef CONFIG_PROVE_LOCKING | |
20163 | void time_hardirqs_on(unsigned long a0, unsigned long a1) | |
20164 | { | |
20165 | + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0); | |
20166 | if (!preempt_trace() && irq_trace()) | |
20167 | stop_critical_timing(a0, a1); | |
20168 | } | |
20169 | @@ -446,6 +450,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) | |
20170 | { | |
20171 | if (!preempt_trace() && irq_trace()) | |
20172 | start_critical_timing(a0, a1); | |
20173 | + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1); | |
20174 | } | |
20175 | ||
20176 | #else /* !CONFIG_PROVE_LOCKING */ | |
20177 | @@ -471,6 +476,7 @@ inline void print_irqtrace_events(struct task_struct *curr) | |
20178 | */ | |
20179 | void trace_hardirqs_on(void) | |
20180 | { | |
20181 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
20182 | if (!preempt_trace() && irq_trace()) | |
20183 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20184 | } | |
20185 | @@ -480,11 +486,13 @@ void trace_hardirqs_off(void) | |
20186 | { | |
20187 | if (!preempt_trace() && irq_trace()) | |
20188 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20189 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
20190 | } | |
20191 | EXPORT_SYMBOL(trace_hardirqs_off); | |
20192 | ||
20193 | __visible void trace_hardirqs_on_caller(unsigned long caller_addr) | |
20194 | { | |
20195 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
20196 | if (!preempt_trace() && irq_trace()) | |
20197 | stop_critical_timing(CALLER_ADDR0, caller_addr); | |
20198 | } | |
20199 | @@ -494,6 +502,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) | |
20200 | { | |
20201 | if (!preempt_trace() && irq_trace()) | |
20202 | start_critical_timing(CALLER_ADDR0, caller_addr); | |
20203 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
20204 | } | |
20205 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
20206 | ||
20207 | @@ -503,12 +512,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
20208 | #ifdef CONFIG_PREEMPT_TRACER | |
20209 | void trace_preempt_on(unsigned long a0, unsigned long a1) | |
20210 | { | |
20211 | + trace_preemptirqsoff_hist(PREEMPT_ON, 0); | |
20212 | if (preempt_trace() && !irq_trace()) | |
20213 | stop_critical_timing(a0, a1); | |
20214 | } | |
20215 | ||
20216 | void trace_preempt_off(unsigned long a0, unsigned long a1) | |
20217 | { | |
20218 | + trace_preemptirqsoff_hist(PREEMPT_ON, 1); | |
20219 | if (preempt_trace() && !irq_trace()) | |
20220 | start_critical_timing(a0, a1); | |
20221 | } | |
20222 | diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c | |
c7c16703 | 20223 | index 3fc20422c166..65a6dde71a7d 100644 |
1a6e0f06 JK |
20224 | --- a/kernel/trace/trace_output.c |
20225 | +++ b/kernel/trace/trace_output.c | |
20226 | @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
20227 | { | |
20228 | char hardsoft_irq; | |
20229 | char need_resched; | |
20230 | + char need_resched_lazy; | |
20231 | char irqs_off; | |
20232 | int hardirq; | |
20233 | int softirq; | |
20234 | @@ -416,6 +417,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
20235 | break; | |
20236 | } | |
20237 | ||
20238 | + need_resched_lazy = | |
20239 | + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; | |
20240 | + | |
20241 | hardsoft_irq = | |
20242 | (nmi && hardirq) ? 'Z' : | |
20243 | nmi ? 'z' : | |
20244 | @@ -424,14 +428,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
20245 | softirq ? 's' : | |
20246 | '.' ; | |
20247 | ||
20248 | - trace_seq_printf(s, "%c%c%c", | |
20249 | - irqs_off, need_resched, hardsoft_irq); | |
20250 | + trace_seq_printf(s, "%c%c%c%c", | |
20251 | + irqs_off, need_resched, need_resched_lazy, | |
20252 | + hardsoft_irq); | |
20253 | ||
20254 | if (entry->preempt_count) | |
20255 | trace_seq_printf(s, "%x", entry->preempt_count); | |
20256 | else | |
20257 | trace_seq_putc(s, '.'); | |
20258 | ||
20259 | + if (entry->preempt_lazy_count) | |
20260 | + trace_seq_printf(s, "%x", entry->preempt_lazy_count); | |
20261 | + else | |
20262 | + trace_seq_putc(s, '.'); | |
20263 | + | |
20264 | + if (entry->migrate_disable) | |
20265 | + trace_seq_printf(s, "%x", entry->migrate_disable); | |
20266 | + else | |
20267 | + trace_seq_putc(s, '.'); | |
20268 | + | |
20269 | return !trace_seq_has_overflowed(s); | |
20270 | } | |
20271 | ||
20272 | diff --git a/kernel/user.c b/kernel/user.c | |
20273 | index b069ccbfb0b0..1a2e88e98b5e 100644 | |
20274 | --- a/kernel/user.c | |
20275 | +++ b/kernel/user.c | |
20276 | @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up) | |
20277 | if (!up) | |
20278 | return; | |
20279 | ||
20280 | - local_irq_save(flags); | |
20281 | + local_irq_save_nort(flags); | |
20282 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) | |
20283 | free_user(up, flags); | |
20284 | else | |
20285 | - local_irq_restore(flags); | |
20286 | + local_irq_restore_nort(flags); | |
20287 | } | |
20288 | ||
20289 | struct user_struct *alloc_uid(kuid_t uid) | |
20290 | diff --git a/kernel/watchdog.c b/kernel/watchdog.c | |
c7c16703 | 20291 | index 6d1020c03d41..70c6a2f79f7e 100644 |
1a6e0f06 JK |
20292 | --- a/kernel/watchdog.c |
20293 | +++ b/kernel/watchdog.c | |
20294 | @@ -315,6 +315,8 @@ static int is_softlockup(unsigned long touch_ts) | |
20295 | ||
20296 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | |
20297 | ||
20298 | +static DEFINE_RAW_SPINLOCK(watchdog_output_lock); | |
20299 | + | |
20300 | static struct perf_event_attr wd_hw_attr = { | |
20301 | .type = PERF_TYPE_HARDWARE, | |
20302 | .config = PERF_COUNT_HW_CPU_CYCLES, | |
c7c16703 | 20303 | @@ -348,6 +350,13 @@ static void watchdog_overflow_callback(struct perf_event *event, |
1a6e0f06 JK |
20304 | /* only print hardlockups once */ |
20305 | if (__this_cpu_read(hard_watchdog_warn) == true) | |
20306 | return; | |
20307 | + /* | |
20308 | + * If early-printk is enabled then make sure we do not | |
20309 | + * lock up in printk() and kill console logging: | |
20310 | + */ | |
20311 | + printk_kill(); | |
20312 | + | |
20313 | + raw_spin_lock(&watchdog_output_lock); | |
20314 | ||
20315 | pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); | |
20316 | print_modules(); | |
c7c16703 | 20317 | @@ -365,6 +374,7 @@ static void watchdog_overflow_callback(struct perf_event *event, |
1a6e0f06 JK |
20318 | !test_and_set_bit(0, &hardlockup_allcpu_dumped)) |
20319 | trigger_allbutself_cpu_backtrace(); | |
20320 | ||
20321 | + raw_spin_unlock(&watchdog_output_lock); | |
20322 | if (hardlockup_panic) | |
20323 | nmi_panic(regs, "Hard LOCKUP"); | |
20324 | ||
c7c16703 | 20325 | @@ -512,6 +522,7 @@ static void watchdog_enable(unsigned int cpu) |
1a6e0f06 JK |
20326 | /* kick off the timer for the hardlockup detector */ |
20327 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
20328 | hrtimer->function = watchdog_timer_fn; | |
20329 | + hrtimer->irqsafe = 1; | |
20330 | ||
20331 | /* Enable the perf event */ | |
20332 | watchdog_nmi_enable(cpu); | |
20333 | diff --git a/kernel/workqueue.c b/kernel/workqueue.c | |
c7c16703 | 20334 | index 479d840db286..24eba6620a45 100644 |
1a6e0f06 JK |
20335 | --- a/kernel/workqueue.c |
20336 | +++ b/kernel/workqueue.c | |
20337 | @@ -48,6 +48,8 @@ | |
20338 | #include <linux/nodemask.h> | |
20339 | #include <linux/moduleparam.h> | |
20340 | #include <linux/uaccess.h> | |
20341 | +#include <linux/locallock.h> | |
20342 | +#include <linux/delay.h> | |
20343 | ||
20344 | #include "workqueue_internal.h" | |
20345 | ||
20346 | @@ -121,11 +123,16 @@ enum { | |
20347 | * cpu or grabbing pool->lock is enough for read access. If | |
20348 | * POOL_DISASSOCIATED is set, it's identical to L. | |
20349 | * | |
20350 | + * On RT we need the extra protection via rt_lock_idle_list() for | |
20351 | + * the list manipulations against read access from | |
20352 | + * wq_worker_sleeping(). All other places are nicely serialized via | |
20353 | + * pool->lock. | |
20354 | + * | |
20355 | * A: pool->attach_mutex protected. | |
20356 | * | |
20357 | * PL: wq_pool_mutex protected. | |
20358 | * | |
20359 | - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. | |
20360 | + * PR: wq_pool_mutex protected for writes. RCU protected for reads. | |
20361 | * | |
20362 | * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. | |
20363 | * | |
20364 | @@ -134,7 +141,7 @@ enum { | |
20365 | * | |
20366 | * WQ: wq->mutex protected. | |
20367 | * | |
20368 | - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. | |
20369 | + * WR: wq->mutex protected for writes. RCU protected for reads. | |
20370 | * | |
20371 | * MD: wq_mayday_lock protected. | |
20372 | */ | |
20373 | @@ -185,7 +192,7 @@ struct worker_pool { | |
20374 | atomic_t nr_running ____cacheline_aligned_in_smp; | |
20375 | ||
20376 | /* | |
20377 | - * Destruction of pool is sched-RCU protected to allow dereferences | |
20378 | + * Destruction of pool is RCU protected to allow dereferences | |
20379 | * from get_work_pool(). | |
20380 | */ | |
20381 | struct rcu_head rcu; | |
20382 | @@ -214,7 +221,7 @@ struct pool_workqueue { | |
20383 | /* | |
20384 | * Release of unbound pwq is punted to system_wq. See put_pwq() | |
20385 | * and pwq_unbound_release_workfn() for details. pool_workqueue | |
20386 | - * itself is also sched-RCU protected so that the first pwq can be | |
20387 | + * itself is also RCU protected so that the first pwq can be | |
20388 | * determined without grabbing wq->mutex. | |
20389 | */ | |
20390 | struct work_struct unbound_release_work; | |
20391 | @@ -348,6 +355,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); | |
20392 | struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; | |
20393 | EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); | |
20394 | ||
20395 | +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); | |
20396 | + | |
20397 | static int worker_thread(void *__worker); | |
20398 | static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20399 | ||
20400 | @@ -355,20 +364,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20401 | #include <trace/events/workqueue.h> | |
20402 | ||
20403 | #define assert_rcu_or_pool_mutex() \ | |
20404 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
20405 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
20406 | !lockdep_is_held(&wq_pool_mutex), \ | |
20407 | - "sched RCU or wq_pool_mutex should be held") | |
20408 | + "RCU or wq_pool_mutex should be held") | |
20409 | ||
20410 | #define assert_rcu_or_wq_mutex(wq) \ | |
20411 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
20412 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
20413 | !lockdep_is_held(&wq->mutex), \ | |
20414 | - "sched RCU or wq->mutex should be held") | |
20415 | + "RCU or wq->mutex should be held") | |
20416 | ||
20417 | #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ | |
20418 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
20419 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
20420 | !lockdep_is_held(&wq->mutex) && \ | |
20421 | !lockdep_is_held(&wq_pool_mutex), \ | |
20422 | - "sched RCU, wq->mutex or wq_pool_mutex should be held") | |
20423 | + "RCU, wq->mutex or wq_pool_mutex should be held") | |
20424 | ||
20425 | #define for_each_cpu_worker_pool(pool, cpu) \ | |
20426 | for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ | |
20427 | @@ -380,7 +389,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20428 | * @pool: iteration cursor | |
20429 | * @pi: integer used for iteration | |
20430 | * | |
20431 | - * This must be called either with wq_pool_mutex held or sched RCU read | |
20432 | + * This must be called either with wq_pool_mutex held or RCU read | |
20433 | * locked. If the pool needs to be used beyond the locking in effect, the | |
20434 | * caller is responsible for guaranteeing that the pool stays online. | |
20435 | * | |
20436 | @@ -412,7 +421,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20437 | * @pwq: iteration cursor | |
20438 | * @wq: the target workqueue | |
20439 | * | |
20440 | - * This must be called either with wq->mutex held or sched RCU read locked. | |
20441 | + * This must be called either with wq->mutex held or RCU read locked. | |
20442 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
20443 | * responsible for guaranteeing that the pwq stays online. | |
20444 | * | |
20445 | @@ -424,6 +433,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20446 | if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ | |
20447 | else | |
20448 | ||
20449 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
20450 | +static inline void rt_lock_idle_list(struct worker_pool *pool) | |
20451 | +{ | |
20452 | + preempt_disable(); | |
20453 | +} | |
20454 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) | |
20455 | +{ | |
20456 | + preempt_enable(); | |
20457 | +} | |
20458 | +static inline void sched_lock_idle_list(struct worker_pool *pool) { } | |
20459 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) { } | |
20460 | +#else | |
20461 | +static inline void rt_lock_idle_list(struct worker_pool *pool) { } | |
20462 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) { } | |
20463 | +static inline void sched_lock_idle_list(struct worker_pool *pool) | |
20464 | +{ | |
20465 | + spin_lock_irq(&pool->lock); | |
20466 | +} | |
20467 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) | |
20468 | +{ | |
20469 | + spin_unlock_irq(&pool->lock); | |
20470 | +} | |
20471 | +#endif | |
20472 | + | |
20473 | + | |
20474 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | |
20475 | ||
20476 | static struct debug_obj_descr work_debug_descr; | |
20477 | @@ -548,7 +582,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) | |
20478 | * @wq: the target workqueue | |
20479 | * @node: the node ID | |
20480 | * | |
20481 | - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU | |
20482 | + * This must be called with any of wq_pool_mutex, wq->mutex or RCU | |
20483 | * read locked. | |
20484 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
20485 | * responsible for guaranteeing that the pwq stays online. | |
20486 | @@ -692,8 +726,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work) | |
20487 | * @work: the work item of interest | |
20488 | * | |
20489 | * Pools are created and destroyed under wq_pool_mutex, and allows read | |
20490 | - * access under sched-RCU read lock. As such, this function should be | |
20491 | - * called under wq_pool_mutex or with preemption disabled. | |
20492 | + * access under RCU read lock. As such, this function should be | |
20493 | + * called under wq_pool_mutex or inside of a rcu_read_lock() region. | |
20494 | * | |
20495 | * All fields of the returned pool are accessible as long as the above | |
20496 | * mentioned locking is in effect. If the returned pool needs to be used | |
20497 | @@ -830,50 +864,45 @@ static struct worker *first_idle_worker(struct worker_pool *pool) | |
20498 | */ | |
20499 | static void wake_up_worker(struct worker_pool *pool) | |
20500 | { | |
20501 | - struct worker *worker = first_idle_worker(pool); | |
20502 | + struct worker *worker; | |
20503 | + | |
20504 | + rt_lock_idle_list(pool); | |
20505 | + | |
20506 | + worker = first_idle_worker(pool); | |
20507 | ||
20508 | if (likely(worker)) | |
20509 | wake_up_process(worker->task); | |
20510 | + | |
20511 | + rt_unlock_idle_list(pool); | |
20512 | } | |
20513 | ||
20514 | /** | |
20515 | - * wq_worker_waking_up - a worker is waking up | |
20516 | + * wq_worker_running - a worker is running again | |
20517 | * @task: task waking up | |
20518 | - * @cpu: CPU @task is waking up to | |
20519 | * | |
20520 | - * This function is called during try_to_wake_up() when a worker is | |
20521 | - * being awoken. | |
20522 | - * | |
20523 | - * CONTEXT: | |
20524 | - * spin_lock_irq(rq->lock) | |
20525 | + * This function is called when a worker returns from schedule() | |
20526 | */ | |
20527 | -void wq_worker_waking_up(struct task_struct *task, int cpu) | |
20528 | +void wq_worker_running(struct task_struct *task) | |
20529 | { | |
20530 | struct worker *worker = kthread_data(task); | |
20531 | ||
20532 | - if (!(worker->flags & WORKER_NOT_RUNNING)) { | |
20533 | - WARN_ON_ONCE(worker->pool->cpu != cpu); | |
20534 | + if (!worker->sleeping) | |
20535 | + return; | |
20536 | + if (!(worker->flags & WORKER_NOT_RUNNING)) | |
20537 | atomic_inc(&worker->pool->nr_running); | |
20538 | - } | |
20539 | + worker->sleeping = 0; | |
20540 | } | |
20541 | ||
20542 | /** | |
20543 | * wq_worker_sleeping - a worker is going to sleep | |
20544 | * @task: task going to sleep | |
20545 | * | |
20546 | - * This function is called during schedule() when a busy worker is | |
20547 | - * going to sleep. Worker on the same cpu can be woken up by | |
20548 | - * returning pointer to its task. | |
20549 | - * | |
20550 | - * CONTEXT: | |
20551 | - * spin_lock_irq(rq->lock) | |
20552 | - * | |
20553 | - * Return: | |
20554 | - * Worker task on @cpu to wake up, %NULL if none. | |
20555 | + * This function is called from schedule() when a busy worker is | |
20556 | + * going to sleep. | |
20557 | */ | |
20558 | -struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
20559 | +void wq_worker_sleeping(struct task_struct *task) | |
20560 | { | |
20561 | - struct worker *worker = kthread_data(task), *to_wakeup = NULL; | |
20562 | + struct worker *worker = kthread_data(task); | |
20563 | struct worker_pool *pool; | |
20564 | ||
20565 | /* | |
20566 | @@ -882,29 +911,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
20567 | * checking NOT_RUNNING. | |
20568 | */ | |
20569 | if (worker->flags & WORKER_NOT_RUNNING) | |
20570 | - return NULL; | |
20571 | + return; | |
20572 | ||
20573 | pool = worker->pool; | |
20574 | ||
20575 | - /* this can only happen on the local cpu */ | |
20576 | - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id())) | |
20577 | - return NULL; | |
20578 | + if (WARN_ON_ONCE(worker->sleeping)) | |
20579 | + return; | |
20580 | + | |
20581 | + worker->sleeping = 1; | |
20582 | ||
20583 | /* | |
20584 | * The counterpart of the following dec_and_test, implied mb, | |
20585 | * worklist not empty test sequence is in insert_work(). | |
20586 | * Please read comment there. | |
20587 | - * | |
20588 | - * NOT_RUNNING is clear. This means that we're bound to and | |
20589 | - * running on the local cpu w/ rq lock held and preemption | |
20590 | - * disabled, which in turn means that none else could be | |
20591 | - * manipulating idle_list, so dereferencing idle_list without pool | |
20592 | - * lock is safe. | |
20593 | */ | |
20594 | if (atomic_dec_and_test(&pool->nr_running) && | |
20595 | - !list_empty(&pool->worklist)) | |
20596 | - to_wakeup = first_idle_worker(pool); | |
20597 | - return to_wakeup ? to_wakeup->task : NULL; | |
20598 | + !list_empty(&pool->worklist)) { | |
20599 | + sched_lock_idle_list(pool); | |
20600 | + wake_up_worker(pool); | |
20601 | + sched_unlock_idle_list(pool); | |
20602 | + } | |
20603 | } | |
20604 | ||
20605 | /** | |
c7c16703 | 20606 | @@ -1098,12 +1124,14 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) |
1a6e0f06 JK |
20607 | { |
20608 | if (pwq) { | |
20609 | /* | |
20610 | - * As both pwqs and pools are sched-RCU protected, the | |
20611 | + * As both pwqs and pools are RCU protected, the | |
20612 | * following lock operations are safe. | |
20613 | */ | |
20614 | - spin_lock_irq(&pwq->pool->lock); | |
c7c16703 | 20615 | + rcu_read_lock(); |
1a6e0f06 JK |
20616 | + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); |
20617 | put_pwq(pwq); | |
20618 | - spin_unlock_irq(&pwq->pool->lock); | |
20619 | + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); | |
c7c16703 | 20620 | + rcu_read_unlock(); |
1a6e0f06 JK |
20621 | } |
20622 | } | |
20623 | ||
c7c16703 | 20624 | @@ -1207,7 +1235,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, |
1a6e0f06 JK |
20625 | struct worker_pool *pool; |
20626 | struct pool_workqueue *pwq; | |
20627 | ||
20628 | - local_irq_save(*flags); | |
20629 | + local_lock_irqsave(pendingb_lock, *flags); | |
20630 | ||
20631 | /* try to steal the timer if it exists */ | |
20632 | if (is_dwork) { | |
c7c16703 | 20633 | @@ -1226,6 +1254,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, |
1a6e0f06 JK |
20634 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) |
20635 | return 0; | |
20636 | ||
20637 | + rcu_read_lock(); | |
20638 | /* | |
20639 | * The queueing is in progress, or it is already queued. Try to | |
20640 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | |
c7c16703 | 20641 | @@ -1264,14 +1293,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, |
1a6e0f06 JK |
20642 | set_work_pool_and_keep_pending(work, pool->id); |
20643 | ||
20644 | spin_unlock(&pool->lock); | |
20645 | + rcu_read_unlock(); | |
20646 | return 1; | |
20647 | } | |
20648 | spin_unlock(&pool->lock); | |
20649 | fail: | |
20650 | - local_irq_restore(*flags); | |
20651 | + rcu_read_unlock(); | |
20652 | + local_unlock_irqrestore(pendingb_lock, *flags); | |
20653 | if (work_is_canceling(work)) | |
20654 | return -ENOENT; | |
20655 | - cpu_relax(); | |
20656 | + cpu_chill(); | |
20657 | return -EAGAIN; | |
20658 | } | |
20659 | ||
c7c16703 | 20660 | @@ -1373,7 +1404,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
20661 | * queued or lose PENDING. Grabbing PENDING and queueing should |
20662 | * happen with IRQ disabled. | |
20663 | */ | |
20664 | - WARN_ON_ONCE(!irqs_disabled()); | |
20665 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
20666 | ||
20667 | debug_work_activate(work); | |
20668 | ||
c7c16703 | 20669 | @@ -1381,6 +1412,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
20670 | if (unlikely(wq->flags & __WQ_DRAINING) && |
20671 | WARN_ON_ONCE(!is_chained_work(wq))) | |
20672 | return; | |
20673 | + rcu_read_lock(); | |
20674 | retry: | |
20675 | if (req_cpu == WORK_CPU_UNBOUND) | |
20676 | cpu = wq_select_unbound_cpu(raw_smp_processor_id()); | |
c7c16703 | 20677 | @@ -1437,10 +1469,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
20678 | /* pwq determined, queue */ |
20679 | trace_workqueue_queue_work(req_cpu, pwq, work); | |
20680 | ||
20681 | - if (WARN_ON(!list_empty(&work->entry))) { | |
20682 | - spin_unlock(&pwq->pool->lock); | |
20683 | - return; | |
20684 | - } | |
20685 | + if (WARN_ON(!list_empty(&work->entry))) | |
20686 | + goto out; | |
20687 | ||
20688 | pwq->nr_in_flight[pwq->work_color]++; | |
20689 | work_flags = work_color_to_flags(pwq->work_color); | |
c7c16703 | 20690 | @@ -1458,7 +1488,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
20691 | |
20692 | insert_work(pwq, work, worklist, work_flags); | |
20693 | ||
20694 | +out: | |
20695 | spin_unlock(&pwq->pool->lock); | |
20696 | + rcu_read_unlock(); | |
20697 | } | |
20698 | ||
20699 | /** | |
c7c16703 | 20700 | @@ -1478,14 +1510,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
20701 | bool ret = false; |
20702 | unsigned long flags; | |
20703 | ||
20704 | - local_irq_save(flags); | |
20705 | + local_lock_irqsave(pendingb_lock,flags); | |
20706 | ||
20707 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
20708 | __queue_work(cpu, wq, work); | |
20709 | ret = true; | |
20710 | } | |
20711 | ||
20712 | - local_irq_restore(flags); | |
20713 | + local_unlock_irqrestore(pendingb_lock, flags); | |
20714 | return ret; | |
20715 | } | |
20716 | EXPORT_SYMBOL(queue_work_on); | |
c7c16703 | 20717 | @@ -1552,14 +1584,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
20718 | unsigned long flags; |
20719 | ||
20720 | /* read the comment in __queue_work() */ | |
20721 | - local_irq_save(flags); | |
20722 | + local_lock_irqsave(pendingb_lock, flags); | |
20723 | ||
20724 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
20725 | __queue_delayed_work(cpu, wq, dwork, delay); | |
20726 | ret = true; | |
20727 | } | |
20728 | ||
20729 | - local_irq_restore(flags); | |
20730 | + local_unlock_irqrestore(pendingb_lock, flags); | |
20731 | return ret; | |
20732 | } | |
20733 | EXPORT_SYMBOL(queue_delayed_work_on); | |
c7c16703 | 20734 | @@ -1594,7 +1626,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
20735 | |
20736 | if (likely(ret >= 0)) { | |
20737 | __queue_delayed_work(cpu, wq, dwork, delay); | |
20738 | - local_irq_restore(flags); | |
20739 | + local_unlock_irqrestore(pendingb_lock, flags); | |
20740 | } | |
20741 | ||
20742 | /* -ENOENT from try_to_grab_pending() becomes %true */ | |
c7c16703 | 20743 | @@ -1627,7 +1659,9 @@ static void worker_enter_idle(struct worker *worker) |
1a6e0f06 JK |
20744 | worker->last_active = jiffies; |
20745 | ||
20746 | /* idle_list is LIFO */ | |
20747 | + rt_lock_idle_list(pool); | |
20748 | list_add(&worker->entry, &pool->idle_list); | |
20749 | + rt_unlock_idle_list(pool); | |
20750 | ||
20751 | if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) | |
20752 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); | |
c7c16703 | 20753 | @@ -1660,7 +1694,9 @@ static void worker_leave_idle(struct worker *worker) |
1a6e0f06 JK |
20754 | return; |
20755 | worker_clr_flags(worker, WORKER_IDLE); | |
20756 | pool->nr_idle--; | |
20757 | + rt_lock_idle_list(pool); | |
20758 | list_del_init(&worker->entry); | |
20759 | + rt_unlock_idle_list(pool); | |
20760 | } | |
20761 | ||
20762 | static struct worker *alloc_worker(int node) | |
c7c16703 | 20763 | @@ -1826,7 +1862,9 @@ static void destroy_worker(struct worker *worker) |
1a6e0f06 JK |
20764 | pool->nr_workers--; |
20765 | pool->nr_idle--; | |
20766 | ||
20767 | + rt_lock_idle_list(pool); | |
20768 | list_del_init(&worker->entry); | |
20769 | + rt_unlock_idle_list(pool); | |
20770 | worker->flags |= WORKER_DIE; | |
20771 | wake_up_process(worker->task); | |
20772 | } | |
c7c16703 | 20773 | @@ -2785,14 +2823,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) |
1a6e0f06 JK |
20774 | |
20775 | might_sleep(); | |
20776 | ||
20777 | - local_irq_disable(); | |
20778 | + rcu_read_lock(); | |
20779 | pool = get_work_pool(work); | |
20780 | if (!pool) { | |
20781 | - local_irq_enable(); | |
20782 | + rcu_read_unlock(); | |
20783 | return false; | |
20784 | } | |
20785 | ||
20786 | - spin_lock(&pool->lock); | |
20787 | + spin_lock_irq(&pool->lock); | |
20788 | /* see the comment in try_to_grab_pending() with the same code */ | |
20789 | pwq = get_work_pwq(work); | |
20790 | if (pwq) { | |
c7c16703 | 20791 | @@ -2821,10 +2859,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) |
1a6e0f06 JK |
20792 | else |
20793 | lock_map_acquire_read(&pwq->wq->lockdep_map); | |
20794 | lock_map_release(&pwq->wq->lockdep_map); | |
20795 | - | |
20796 | + rcu_read_unlock(); | |
20797 | return true; | |
20798 | already_gone: | |
20799 | spin_unlock_irq(&pool->lock); | |
20800 | + rcu_read_unlock(); | |
20801 | return false; | |
20802 | } | |
20803 | ||
c7c16703 | 20804 | @@ -2911,7 +2950,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) |
1a6e0f06 JK |
20805 | |
20806 | /* tell other tasks trying to grab @work to back off */ | |
20807 | mark_work_canceling(work); | |
20808 | - local_irq_restore(flags); | |
20809 | + local_unlock_irqrestore(pendingb_lock, flags); | |
20810 | ||
20811 | flush_work(work); | |
20812 | clear_work_data(work); | |
c7c16703 | 20813 | @@ -2966,10 +3005,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); |
1a6e0f06 JK |
20814 | */ |
20815 | bool flush_delayed_work(struct delayed_work *dwork) | |
20816 | { | |
20817 | - local_irq_disable(); | |
20818 | + local_lock_irq(pendingb_lock); | |
20819 | if (del_timer_sync(&dwork->timer)) | |
20820 | __queue_work(dwork->cpu, dwork->wq, &dwork->work); | |
20821 | - local_irq_enable(); | |
20822 | + local_unlock_irq(pendingb_lock); | |
20823 | return flush_work(&dwork->work); | |
20824 | } | |
20825 | EXPORT_SYMBOL(flush_delayed_work); | |
c7c16703 JK |
20826 | @@ -2987,7 +3026,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) |
20827 | return false; | |
1a6e0f06 | 20828 | |
c7c16703 | 20829 | set_work_pool_and_clear_pending(work, get_work_pool_id(work)); |
1a6e0f06 JK |
20830 | - local_irq_restore(flags); |
20831 | + local_unlock_irqrestore(pendingb_lock, flags); | |
20832 | return ret; | |
20833 | } | |
c7c16703 JK |
20834 | |
20835 | @@ -3245,7 +3284,7 @@ static void rcu_free_pool(struct rcu_head *rcu) | |
1a6e0f06 JK |
20836 | * put_unbound_pool - put a worker_pool |
20837 | * @pool: worker_pool to put | |
20838 | * | |
20839 | - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU | |
20840 | + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU | |
20841 | * safe manner. get_unbound_pool() calls this function on its failure path | |
20842 | * and this function should be able to release pools which went through, | |
20843 | * successfully or not, init_worker_pool(). | |
c7c16703 | 20844 | @@ -3299,8 +3338,8 @@ static void put_unbound_pool(struct worker_pool *pool) |
1a6e0f06 JK |
20845 | del_timer_sync(&pool->idle_timer); |
20846 | del_timer_sync(&pool->mayday_timer); | |
20847 | ||
20848 | - /* sched-RCU protected to allow dereferences from get_work_pool() */ | |
20849 | - call_rcu_sched(&pool->rcu, rcu_free_pool); | |
20850 | + /* RCU protected to allow dereferences from get_work_pool() */ | |
20851 | + call_rcu(&pool->rcu, rcu_free_pool); | |
20852 | } | |
20853 | ||
20854 | /** | |
c7c16703 | 20855 | @@ -3407,14 +3446,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) |
1a6e0f06 JK |
20856 | put_unbound_pool(pool); |
20857 | mutex_unlock(&wq_pool_mutex); | |
20858 | ||
20859 | - call_rcu_sched(&pwq->rcu, rcu_free_pwq); | |
20860 | + call_rcu(&pwq->rcu, rcu_free_pwq); | |
20861 | ||
20862 | /* | |
20863 | * If we're the last pwq going away, @wq is already dead and no one | |
20864 | * is gonna access it anymore. Schedule RCU free. | |
20865 | */ | |
20866 | if (is_last) | |
20867 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
20868 | + call_rcu(&wq->rcu, rcu_free_wq); | |
20869 | } | |
20870 | ||
20871 | /** | |
c7c16703 | 20872 | @@ -4064,7 +4103,7 @@ void destroy_workqueue(struct workqueue_struct *wq) |
1a6e0f06 JK |
20873 | * The base ref is never dropped on per-cpu pwqs. Directly |
20874 | * schedule RCU free. | |
20875 | */ | |
20876 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
20877 | + call_rcu(&wq->rcu, rcu_free_wq); | |
20878 | } else { | |
20879 | /* | |
20880 | * We're the sole accessor of @wq at this point. Directly | |
c7c16703 | 20881 | @@ -4157,7 +4196,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) |
1a6e0f06 JK |
20882 | struct pool_workqueue *pwq; |
20883 | bool ret; | |
20884 | ||
20885 | - rcu_read_lock_sched(); | |
20886 | + rcu_read_lock(); | |
20887 | + preempt_disable(); | |
20888 | ||
20889 | if (cpu == WORK_CPU_UNBOUND) | |
20890 | cpu = smp_processor_id(); | |
c7c16703 | 20891 | @@ -4168,7 +4208,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) |
1a6e0f06 JK |
20892 | pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); |
20893 | ||
20894 | ret = !list_empty(&pwq->delayed_works); | |
20895 | - rcu_read_unlock_sched(); | |
20896 | + preempt_enable(); | |
20897 | + rcu_read_unlock(); | |
20898 | ||
20899 | return ret; | |
20900 | } | |
c7c16703 | 20901 | @@ -4194,15 +4235,15 @@ unsigned int work_busy(struct work_struct *work) |
1a6e0f06 JK |
20902 | if (work_pending(work)) |
20903 | ret |= WORK_BUSY_PENDING; | |
20904 | ||
20905 | - local_irq_save(flags); | |
20906 | + rcu_read_lock(); | |
20907 | pool = get_work_pool(work); | |
20908 | if (pool) { | |
20909 | - spin_lock(&pool->lock); | |
20910 | + spin_lock_irqsave(&pool->lock, flags); | |
20911 | if (find_worker_executing_work(pool, work)) | |
20912 | ret |= WORK_BUSY_RUNNING; | |
20913 | - spin_unlock(&pool->lock); | |
20914 | + spin_unlock_irqrestore(&pool->lock, flags); | |
20915 | } | |
20916 | - local_irq_restore(flags); | |
20917 | + rcu_read_unlock(); | |
20918 | ||
20919 | return ret; | |
20920 | } | |
c7c16703 | 20921 | @@ -4391,7 +4432,7 @@ void show_workqueue_state(void) |
1a6e0f06 JK |
20922 | unsigned long flags; |
20923 | int pi; | |
20924 | ||
20925 | - rcu_read_lock_sched(); | |
20926 | + rcu_read_lock(); | |
20927 | ||
20928 | pr_info("Showing busy workqueues and worker pools:\n"); | |
20929 | ||
c7c16703 | 20930 | @@ -4444,7 +4485,7 @@ void show_workqueue_state(void) |
1a6e0f06 JK |
20931 | spin_unlock_irqrestore(&pool->lock, flags); |
20932 | } | |
20933 | ||
20934 | - rcu_read_unlock_sched(); | |
20935 | + rcu_read_unlock(); | |
20936 | } | |
20937 | ||
20938 | /* | |
c7c16703 | 20939 | @@ -4782,16 +4823,16 @@ bool freeze_workqueues_busy(void) |
1a6e0f06 JK |
20940 | * nr_active is monotonically decreasing. It's safe |
20941 | * to peek without lock. | |
20942 | */ | |
20943 | - rcu_read_lock_sched(); | |
20944 | + rcu_read_lock(); | |
20945 | for_each_pwq(pwq, wq) { | |
20946 | WARN_ON_ONCE(pwq->nr_active < 0); | |
20947 | if (pwq->nr_active) { | |
20948 | busy = true; | |
20949 | - rcu_read_unlock_sched(); | |
20950 | + rcu_read_unlock(); | |
20951 | goto out_unlock; | |
20952 | } | |
20953 | } | |
20954 | - rcu_read_unlock_sched(); | |
20955 | + rcu_read_unlock(); | |
20956 | } | |
20957 | out_unlock: | |
20958 | mutex_unlock(&wq_pool_mutex); | |
c7c16703 | 20959 | @@ -4981,7 +5022,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, |
1a6e0f06 JK |
20960 | const char *delim = ""; |
20961 | int node, written = 0; | |
20962 | ||
20963 | - rcu_read_lock_sched(); | |
20964 | + get_online_cpus(); | |
20965 | + rcu_read_lock(); | |
20966 | for_each_node(node) { | |
20967 | written += scnprintf(buf + written, PAGE_SIZE - written, | |
20968 | "%s%d:%d", delim, node, | |
c7c16703 | 20969 | @@ -4989,7 +5031,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, |
1a6e0f06 JK |
20970 | delim = " "; |
20971 | } | |
20972 | written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | |
20973 | - rcu_read_unlock_sched(); | |
20974 | + rcu_read_unlock(); | |
20975 | + put_online_cpus(); | |
20976 | ||
20977 | return written; | |
20978 | } | |
20979 | diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h | |
20980 | index 8635417c587b..f000c4d6917e 100644 | |
20981 | --- a/kernel/workqueue_internal.h | |
20982 | +++ b/kernel/workqueue_internal.h | |
20983 | @@ -43,6 +43,7 @@ struct worker { | |
20984 | unsigned long last_active; /* L: last active timestamp */ | |
20985 | unsigned int flags; /* X: flags */ | |
20986 | int id; /* I: worker id */ | |
20987 | + int sleeping; /* None */ | |
20988 | ||
20989 | /* | |
20990 | * Opaque string set with work_set_desc(). Printed out with task | |
20991 | @@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void) | |
20992 | * Scheduler hooks for concurrency managed workqueue. Only to be used from | |
20993 | * sched/core.c and workqueue.c. | |
20994 | */ | |
20995 | -void wq_worker_waking_up(struct task_struct *task, int cpu); | |
20996 | -struct task_struct *wq_worker_sleeping(struct task_struct *task); | |
20997 | +void wq_worker_running(struct task_struct *task); | |
20998 | +void wq_worker_sleeping(struct task_struct *task); | |
20999 | ||
21000 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ | |
21001 | diff --git a/lib/Kconfig b/lib/Kconfig | |
c7c16703 | 21002 | index 260a80e313b9..b06becb3f477 100644 |
1a6e0f06 JK |
21003 | --- a/lib/Kconfig |
21004 | +++ b/lib/Kconfig | |
21005 | @@ -400,6 +400,7 @@ config CHECK_SIGNATURE | |
21006 | ||
21007 | config CPUMASK_OFFSTACK | |
21008 | bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS | |
21009 | + depends on !PREEMPT_RT_FULL | |
21010 | help | |
21011 | Use dynamic allocation for cpumask_var_t, instead of putting | |
21012 | them on the stack. This is a bit more expensive, but avoids | |
1a6e0f06 | 21013 | diff --git a/lib/debugobjects.c b/lib/debugobjects.c |
c7c16703 | 21014 | index 056052dc8e91..d8494e126de8 100644 |
1a6e0f06 JK |
21015 | --- a/lib/debugobjects.c |
21016 | +++ b/lib/debugobjects.c | |
21017 | @@ -308,7 +308,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) | |
21018 | struct debug_obj *obj; | |
21019 | unsigned long flags; | |
21020 | ||
21021 | - fill_pool(); | |
21022 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
21023 | + if (preempt_count() == 0 && !irqs_disabled()) | |
21024 | +#endif | |
21025 | + fill_pool(); | |
21026 | ||
21027 | db = get_bucket((unsigned long) addr); | |
21028 | ||
21029 | diff --git a/lib/idr.c b/lib/idr.c | |
21030 | index 6098336df267..9decbe914595 100644 | |
21031 | --- a/lib/idr.c | |
21032 | +++ b/lib/idr.c | |
21033 | @@ -30,6 +30,7 @@ | |
21034 | #include <linux/idr.h> | |
21035 | #include <linux/spinlock.h> | |
21036 | #include <linux/percpu.h> | |
21037 | +#include <linux/locallock.h> | |
21038 | ||
21039 | #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) | |
21040 | #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) | |
21041 | @@ -45,6 +46,37 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); | |
21042 | static DEFINE_PER_CPU(int, idr_preload_cnt); | |
21043 | static DEFINE_SPINLOCK(simple_ida_lock); | |
21044 | ||
21045 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
21046 | +static DEFINE_LOCAL_IRQ_LOCK(idr_lock); | |
21047 | + | |
21048 | +static inline void idr_preload_lock(void) | |
21049 | +{ | |
21050 | + local_lock(idr_lock); | |
21051 | +} | |
21052 | + | |
21053 | +static inline void idr_preload_unlock(void) | |
21054 | +{ | |
21055 | + local_unlock(idr_lock); | |
21056 | +} | |
21057 | + | |
21058 | +void idr_preload_end(void) | |
21059 | +{ | |
21060 | + idr_preload_unlock(); | |
21061 | +} | |
21062 | +EXPORT_SYMBOL(idr_preload_end); | |
21063 | +#else | |
21064 | +static inline void idr_preload_lock(void) | |
21065 | +{ | |
21066 | + preempt_disable(); | |
21067 | +} | |
21068 | + | |
21069 | +static inline void idr_preload_unlock(void) | |
21070 | +{ | |
21071 | + preempt_enable(); | |
21072 | +} | |
21073 | +#endif | |
21074 | + | |
21075 | + | |
21076 | /* the maximum ID which can be allocated given idr->layers */ | |
21077 | static int idr_max(int layers) | |
21078 | { | |
21079 | @@ -115,14 +147,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) | |
21080 | * context. See idr_preload() for details. | |
21081 | */ | |
21082 | if (!in_interrupt()) { | |
21083 | - preempt_disable(); | |
21084 | + idr_preload_lock(); | |
21085 | new = __this_cpu_read(idr_preload_head); | |
21086 | if (new) { | |
21087 | __this_cpu_write(idr_preload_head, new->ary[0]); | |
21088 | __this_cpu_dec(idr_preload_cnt); | |
21089 | new->ary[0] = NULL; | |
21090 | } | |
21091 | - preempt_enable(); | |
21092 | + idr_preload_unlock(); | |
21093 | if (new) | |
21094 | return new; | |
21095 | } | |
21096 | @@ -366,7 +398,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, | |
21097 | idr_mark_full(pa, id); | |
21098 | } | |
21099 | ||
21100 | - | |
21101 | /** | |
21102 | * idr_preload - preload for idr_alloc() | |
21103 | * @gfp_mask: allocation mask to use for preloading | |
21104 | @@ -401,7 +432,7 @@ void idr_preload(gfp_t gfp_mask) | |
21105 | WARN_ON_ONCE(in_interrupt()); | |
21106 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | |
21107 | ||
21108 | - preempt_disable(); | |
21109 | + idr_preload_lock(); | |
21110 | ||
21111 | /* | |
21112 | * idr_alloc() is likely to succeed w/o full idr_layer buffer and | |
21113 | @@ -413,9 +444,9 @@ void idr_preload(gfp_t gfp_mask) | |
21114 | while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { | |
21115 | struct idr_layer *new; | |
21116 | ||
21117 | - preempt_enable(); | |
21118 | + idr_preload_unlock(); | |
21119 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); | |
21120 | - preempt_disable(); | |
21121 | + idr_preload_lock(); | |
21122 | if (!new) | |
21123 | break; | |
21124 | ||
21125 | diff --git a/lib/irq_poll.c b/lib/irq_poll.c | |
c7c16703 | 21126 | index 1d6565e81030..b23a79761df7 100644 |
1a6e0f06 JK |
21127 | --- a/lib/irq_poll.c |
21128 | +++ b/lib/irq_poll.c | |
21129 | @@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop) | |
21130 | list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); | |
21131 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
21132 | local_irq_restore(flags); | |
21133 | + preempt_check_resched_rt(); | |
21134 | } | |
21135 | EXPORT_SYMBOL(irq_poll_sched); | |
21136 | ||
21137 | @@ -71,6 +72,7 @@ void irq_poll_complete(struct irq_poll *iop) | |
21138 | local_irq_save(flags); | |
21139 | __irq_poll_complete(iop); | |
21140 | local_irq_restore(flags); | |
21141 | + preempt_check_resched_rt(); | |
21142 | } | |
21143 | EXPORT_SYMBOL(irq_poll_complete); | |
21144 | ||
c7c16703 | 21145 | @@ -95,6 +97,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) |
1a6e0f06 JK |
21146 | } |
21147 | ||
21148 | local_irq_enable(); | |
21149 | + preempt_check_resched_rt(); | |
21150 | ||
21151 | /* Even though interrupts have been re-enabled, this | |
21152 | * access is safe because interrupts can only add new | |
c7c16703 | 21153 | @@ -132,6 +135,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) |
1a6e0f06 JK |
21154 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); |
21155 | ||
21156 | local_irq_enable(); | |
21157 | + preempt_check_resched_rt(); | |
21158 | } | |
21159 | ||
21160 | /** | |
c7c16703 JK |
21161 | @@ -195,6 +199,7 @@ static int irq_poll_cpu_dead(unsigned int cpu) |
21162 | this_cpu_ptr(&blk_cpu_iopoll)); | |
21163 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
21164 | local_irq_enable(); | |
21165 | + preempt_check_resched_rt(); | |
1a6e0f06 | 21166 | |
c7c16703 JK |
21167 | return 0; |
21168 | } | |
1a6e0f06 | 21169 | diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c |
c7c16703 | 21170 | index f3a217ea0388..4611b156ef79 100644 |
1a6e0f06 JK |
21171 | --- a/lib/locking-selftest.c |
21172 | +++ b/lib/locking-selftest.c | |
21173 | @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) | |
21174 | #include "locking-selftest-spin-hardirq.h" | |
21175 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) | |
21176 | ||
21177 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21178 | + | |
21179 | #include "locking-selftest-rlock-hardirq.h" | |
21180 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) | |
21181 | ||
21182 | @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) | |
21183 | #include "locking-selftest-wlock-softirq.h" | |
21184 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) | |
21185 | ||
21186 | +#endif | |
21187 | + | |
21188 | #undef E1 | |
21189 | #undef E2 | |
21190 | ||
21191 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21192 | /* | |
21193 | * Enabling hardirqs with a softirq-safe lock held: | |
21194 | */ | |
21195 | @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
21196 | #undef E1 | |
21197 | #undef E2 | |
21198 | ||
21199 | +#endif | |
21200 | + | |
21201 | /* | |
21202 | * Enabling irqs with an irq-safe lock held: | |
21203 | */ | |
21204 | @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
21205 | #include "locking-selftest-spin-hardirq.h" | |
21206 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) | |
21207 | ||
21208 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21209 | + | |
21210 | #include "locking-selftest-rlock-hardirq.h" | |
21211 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) | |
21212 | ||
21213 | @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) | |
21214 | #include "locking-selftest-wlock-softirq.h" | |
21215 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
21216 | ||
21217 | +#endif | |
21218 | + | |
21219 | #undef E1 | |
21220 | #undef E2 | |
21221 | ||
21222 | @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
21223 | #include "locking-selftest-spin-hardirq.h" | |
21224 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) | |
21225 | ||
21226 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21227 | + | |
21228 | #include "locking-selftest-rlock-hardirq.h" | |
21229 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) | |
21230 | ||
21231 | @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) | |
21232 | #include "locking-selftest-wlock-softirq.h" | |
21233 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
21234 | ||
21235 | +#endif | |
21236 | + | |
21237 | #undef E1 | |
21238 | #undef E2 | |
21239 | #undef E3 | |
21240 | @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
21241 | #include "locking-selftest-spin-hardirq.h" | |
21242 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) | |
21243 | ||
21244 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21245 | + | |
21246 | #include "locking-selftest-rlock-hardirq.h" | |
21247 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) | |
21248 | ||
21249 | @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) | |
21250 | #include "locking-selftest-wlock-softirq.h" | |
21251 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) | |
21252 | ||
21253 | +#endif | |
21254 | + | |
21255 | #undef E1 | |
21256 | #undef E2 | |
21257 | #undef E3 | |
21258 | ||
21259 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21260 | + | |
21261 | /* | |
21262 | * read-lock / write-lock irq inversion. | |
21263 | * | |
21264 | @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) | |
21265 | #undef E2 | |
21266 | #undef E3 | |
21267 | ||
21268 | +#endif | |
21269 | + | |
21270 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21271 | + | |
21272 | /* | |
21273 | * read-lock / write-lock recursion that is actually safe. | |
21274 | */ | |
21275 | @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) | |
21276 | #undef E2 | |
21277 | #undef E3 | |
21278 | ||
21279 | +#endif | |
21280 | + | |
21281 | /* | |
21282 | * read-lock / write-lock recursion that is unsafe. | |
21283 | */ | |
21284 | @@ -1858,6 +1885,7 @@ void locking_selftest(void) | |
21285 | ||
21286 | printk(" --------------------------------------------------------------------------\n"); | |
21287 | ||
21288 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21289 | /* | |
21290 | * irq-context testcases: | |
21291 | */ | |
21292 | @@ -1870,6 +1898,28 @@ void locking_selftest(void) | |
21293 | ||
21294 | DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); | |
21295 | // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); | |
21296 | +#else | |
21297 | + /* On -rt, we only do hardirq context test for raw spinlock */ | |
21298 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); | |
21299 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21); | |
21300 | + | |
21301 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12); | |
21302 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21); | |
21303 | + | |
21304 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123); | |
21305 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132); | |
21306 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213); | |
21307 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231); | |
21308 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312); | |
21309 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321); | |
21310 | + | |
21311 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123); | |
21312 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132); | |
21313 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213); | |
21314 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231); | |
21315 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); | |
21316 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); | |
21317 | +#endif | |
21318 | ||
21319 | ww_tests(); | |
21320 | ||
21321 | diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c | |
21322 | index 6d40944960de..822a2c027e72 100644 | |
21323 | --- a/lib/percpu_ida.c | |
21324 | +++ b/lib/percpu_ida.c | |
21325 | @@ -26,6 +26,9 @@ | |
21326 | #include <linux/string.h> | |
21327 | #include <linux/spinlock.h> | |
21328 | #include <linux/percpu_ida.h> | |
21329 | +#include <linux/locallock.h> | |
21330 | + | |
21331 | +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock); | |
21332 | ||
21333 | struct percpu_ida_cpu { | |
21334 | /* | |
21335 | @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21336 | unsigned long flags; | |
21337 | int tag; | |
21338 | ||
21339 | - local_irq_save(flags); | |
21340 | + local_lock_irqsave(irq_off_lock, flags); | |
21341 | tags = this_cpu_ptr(pool->tag_cpu); | |
21342 | ||
21343 | /* Fastpath */ | |
21344 | tag = alloc_local_tag(tags); | |
21345 | if (likely(tag >= 0)) { | |
21346 | - local_irq_restore(flags); | |
21347 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21348 | return tag; | |
21349 | } | |
21350 | ||
21351 | @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21352 | ||
21353 | if (!tags->nr_free) | |
21354 | alloc_global_tags(pool, tags); | |
21355 | + | |
21356 | if (!tags->nr_free) | |
21357 | steal_tags(pool, tags); | |
21358 | ||
21359 | @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21360 | } | |
21361 | ||
21362 | spin_unlock(&pool->lock); | |
21363 | - local_irq_restore(flags); | |
21364 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21365 | ||
21366 | if (tag >= 0 || state == TASK_RUNNING) | |
21367 | break; | |
21368 | @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21369 | ||
21370 | schedule(); | |
21371 | ||
21372 | - local_irq_save(flags); | |
21373 | + local_lock_irqsave(irq_off_lock, flags); | |
21374 | tags = this_cpu_ptr(pool->tag_cpu); | |
21375 | } | |
21376 | if (state != TASK_RUNNING) | |
21377 | @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
21378 | ||
21379 | BUG_ON(tag >= pool->nr_tags); | |
21380 | ||
21381 | - local_irq_save(flags); | |
21382 | + local_lock_irqsave(irq_off_lock, flags); | |
21383 | tags = this_cpu_ptr(pool->tag_cpu); | |
21384 | ||
21385 | spin_lock(&tags->lock); | |
21386 | @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
21387 | spin_unlock(&pool->lock); | |
21388 | } | |
21389 | ||
21390 | - local_irq_restore(flags); | |
21391 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21392 | } | |
21393 | EXPORT_SYMBOL_GPL(percpu_ida_free); | |
21394 | ||
21395 | @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
21396 | struct percpu_ida_cpu *remote; | |
21397 | unsigned cpu, i, err = 0; | |
21398 | ||
21399 | - local_irq_save(flags); | |
21400 | + local_lock_irqsave(irq_off_lock, flags); | |
21401 | for_each_possible_cpu(cpu) { | |
21402 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | |
21403 | spin_lock(&remote->lock); | |
21404 | @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
21405 | } | |
21406 | spin_unlock(&pool->lock); | |
21407 | out: | |
21408 | - local_irq_restore(flags); | |
21409 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21410 | return err; | |
21411 | } | |
21412 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | |
21413 | diff --git a/lib/radix-tree.c b/lib/radix-tree.c | |
21414 | index 8e6d552c40dd..881cc195d85f 100644 | |
21415 | --- a/lib/radix-tree.c | |
21416 | +++ b/lib/radix-tree.c | |
21417 | @@ -290,13 +290,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |
21418 | * succeed in getting a node here (and never reach | |
21419 | * kmem_cache_alloc) | |
21420 | */ | |
21421 | - rtp = this_cpu_ptr(&radix_tree_preloads); | |
21422 | + rtp = &get_cpu_var(radix_tree_preloads); | |
21423 | if (rtp->nr) { | |
21424 | ret = rtp->nodes; | |
21425 | rtp->nodes = ret->private_data; | |
21426 | ret->private_data = NULL; | |
21427 | rtp->nr--; | |
21428 | } | |
21429 | + put_cpu_var(radix_tree_preloads); | |
21430 | /* | |
21431 | * Update the allocation stack trace as this is more useful | |
21432 | * for debugging. | |
21433 | @@ -336,6 +337,7 @@ radix_tree_node_free(struct radix_tree_node *node) | |
21434 | call_rcu(&node->rcu_head, radix_tree_node_rcu_free); | |
21435 | } | |
21436 | ||
21437 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21438 | /* | |
21439 | * Load up this CPU's radix_tree_node buffer with sufficient objects to | |
21440 | * ensure that the addition of a single element in the tree cannot fail. On | |
21441 | @@ -455,6 +457,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |
21442 | ||
21443 | return __radix_tree_preload(gfp_mask, nr_nodes); | |
21444 | } | |
21445 | +#endif | |
21446 | ||
21447 | /* | |
21448 | * The maximum index which can be stored in a radix tree | |
21449 | diff --git a/lib/scatterlist.c b/lib/scatterlist.c | |
21450 | index 004fc70fc56a..ccc46992a517 100644 | |
21451 | --- a/lib/scatterlist.c | |
21452 | +++ b/lib/scatterlist.c | |
21453 | @@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) | |
21454 | flush_kernel_dcache_page(miter->page); | |
21455 | ||
21456 | if (miter->__flags & SG_MITER_ATOMIC) { | |
21457 | - WARN_ON_ONCE(preemptible()); | |
21458 | + WARN_ON_ONCE(!pagefault_disabled()); | |
21459 | kunmap_atomic(miter->addr); | |
21460 | } else | |
21461 | kunmap(miter->page); | |
21462 | @@ -664,7 +664,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
21463 | if (!sg_miter_skip(&miter, skip)) | |
21464 | return false; | |
21465 | ||
21466 | - local_irq_save(flags); | |
21467 | + local_irq_save_nort(flags); | |
21468 | ||
21469 | while (sg_miter_next(&miter) && offset < buflen) { | |
21470 | unsigned int len; | |
21471 | @@ -681,7 +681,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
21472 | ||
21473 | sg_miter_stop(&miter); | |
21474 | ||
21475 | - local_irq_restore(flags); | |
21476 | + local_irq_restore_nort(flags); | |
21477 | return offset; | |
21478 | } | |
21479 | EXPORT_SYMBOL(sg_copy_buffer); | |
21480 | diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c | |
21481 | index 1afec32de6f2..11fa431046a8 100644 | |
21482 | --- a/lib/smp_processor_id.c | |
21483 | +++ b/lib/smp_processor_id.c | |
21484 | @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1, | |
21485 | if (!printk_ratelimit()) | |
21486 | goto out_enable; | |
21487 | ||
21488 | - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", | |
21489 | - what1, what2, preempt_count() - 1, current->comm, current->pid); | |
21490 | + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n", | |
21491 | + what1, what2, preempt_count() - 1, __migrate_disabled(current), | |
21492 | + current->comm, current->pid); | |
21493 | ||
21494 | print_symbol("caller is %s\n", (long)__builtin_return_address(0)); | |
21495 | dump_stack(); | |
21496 | diff --git a/localversion-rt b/localversion-rt | |
21497 | new file mode 100644 | |
c7c16703 | 21498 | index 000000000000..c3054d08a112 |
1a6e0f06 JK |
21499 | --- /dev/null |
21500 | +++ b/localversion-rt | |
21501 | @@ -0,0 +1 @@ | |
c7c16703 | 21502 | +-rt2 |
1a6e0f06 | 21503 | diff --git a/mm/Kconfig b/mm/Kconfig |
c7c16703 | 21504 | index 86e3e0e74d20..77e5862a1ed2 100644 |
1a6e0f06 JK |
21505 | --- a/mm/Kconfig |
21506 | +++ b/mm/Kconfig | |
21507 | @@ -410,7 +410,7 @@ config NOMMU_INITIAL_TRIM_EXCESS | |
21508 | ||
21509 | config TRANSPARENT_HUGEPAGE | |
21510 | bool "Transparent Hugepage Support" | |
21511 | - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE | |
21512 | + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL | |
21513 | select COMPACTION | |
21514 | select RADIX_TREE_MULTIORDER | |
21515 | help | |
21516 | diff --git a/mm/backing-dev.c b/mm/backing-dev.c | |
21517 | index 8fde443f36d7..d7a863b0ec20 100644 | |
21518 | --- a/mm/backing-dev.c | |
21519 | +++ b/mm/backing-dev.c | |
21520 | @@ -457,9 +457,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested) | |
21521 | { | |
21522 | unsigned long flags; | |
21523 | ||
21524 | - local_irq_save(flags); | |
21525 | + local_irq_save_nort(flags); | |
21526 | if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { | |
21527 | - local_irq_restore(flags); | |
21528 | + local_irq_restore_nort(flags); | |
21529 | return; | |
21530 | } | |
21531 | ||
21532 | diff --git a/mm/compaction.c b/mm/compaction.c | |
c7c16703 | 21533 | index 70e6bec46dc2..6678ed58b7c6 100644 |
1a6e0f06 JK |
21534 | --- a/mm/compaction.c |
21535 | +++ b/mm/compaction.c | |
c7c16703 | 21536 | @@ -1593,10 +1593,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro |
1a6e0f06 JK |
21537 | block_start_pfn(cc->migrate_pfn, cc->order); |
21538 | ||
21539 | if (cc->last_migrated_pfn < current_block_start) { | |
21540 | - cpu = get_cpu(); | |
21541 | + cpu = get_cpu_light(); | |
21542 | + local_lock_irq(swapvec_lock); | |
21543 | lru_add_drain_cpu(cpu); | |
21544 | + local_unlock_irq(swapvec_lock); | |
21545 | drain_local_pages(zone); | |
21546 | - put_cpu(); | |
21547 | + put_cpu_light(); | |
21548 | /* No more flushing until we migrate again */ | |
21549 | cc->last_migrated_pfn = 0; | |
21550 | } | |
21551 | diff --git a/mm/filemap.c b/mm/filemap.c | |
c7c16703 | 21552 | index 9a50acecc473..59f749a0b738 100644 |
1a6e0f06 JK |
21553 | --- a/mm/filemap.c |
21554 | +++ b/mm/filemap.c | |
21555 | @@ -159,9 +159,12 @@ static int page_cache_tree_insert(struct address_space *mapping, | |
21556 | * node->private_list is protected by | |
21557 | * mapping->tree_lock. | |
21558 | */ | |
21559 | - if (!list_empty(&node->private_list)) | |
21560 | - list_lru_del(&workingset_shadow_nodes, | |
21561 | + if (!list_empty(&node->private_list)) { | |
21562 | + local_lock(workingset_shadow_lock); | |
21563 | + list_lru_del(&__workingset_shadow_nodes, | |
21564 | &node->private_list); | |
21565 | + local_unlock(workingset_shadow_lock); | |
21566 | + } | |
21567 | } | |
21568 | return 0; | |
21569 | } | |
21570 | @@ -217,8 +220,10 @@ static void page_cache_tree_delete(struct address_space *mapping, | |
21571 | if (!dax_mapping(mapping) && !workingset_node_pages(node) && | |
21572 | list_empty(&node->private_list)) { | |
21573 | node->private_data = mapping; | |
21574 | - list_lru_add(&workingset_shadow_nodes, | |
21575 | - &node->private_list); | |
21576 | + local_lock(workingset_shadow_lock); | |
21577 | + list_lru_add(&__workingset_shadow_nodes, | |
21578 | + &node->private_list); | |
21579 | + local_unlock(workingset_shadow_lock); | |
21580 | } | |
21581 | } | |
21582 | ||
21583 | diff --git a/mm/highmem.c b/mm/highmem.c | |
21584 | index 50b4ca6787f0..77518a3b35a1 100644 | |
21585 | --- a/mm/highmem.c | |
21586 | +++ b/mm/highmem.c | |
21587 | @@ -29,10 +29,11 @@ | |
21588 | #include <linux/kgdb.h> | |
21589 | #include <asm/tlbflush.h> | |
21590 | ||
21591 | - | |
21592 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21593 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
21594 | DEFINE_PER_CPU(int, __kmap_atomic_idx); | |
21595 | #endif | |
21596 | +#endif | |
21597 | ||
21598 | /* | |
21599 | * Virtual_count is not a pure "count". | |
21600 | @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) | |
21601 | unsigned long totalhigh_pages __read_mostly; | |
21602 | EXPORT_SYMBOL(totalhigh_pages); | |
21603 | ||
21604 | - | |
21605 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21606 | EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); | |
21607 | +#endif | |
21608 | ||
21609 | unsigned int nr_free_highpages (void) | |
21610 | { | |
21611 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c | |
c7c16703 | 21612 | index 0f870ba43942..f219b4066e6d 100644 |
1a6e0f06 JK |
21613 | --- a/mm/memcontrol.c |
21614 | +++ b/mm/memcontrol.c | |
21615 | @@ -67,6 +67,7 @@ | |
21616 | #include <net/sock.h> | |
21617 | #include <net/ip.h> | |
21618 | #include "slab.h" | |
21619 | +#include <linux/locallock.h> | |
21620 | ||
21621 | #include <asm/uaccess.h> | |
21622 | ||
21623 | @@ -92,6 +93,8 @@ int do_swap_account __read_mostly; | |
21624 | #define do_swap_account 0 | |
21625 | #endif | |
21626 | ||
21627 | +static DEFINE_LOCAL_IRQ_LOCK(event_lock); | |
21628 | + | |
21629 | /* Whether legacy memory+swap accounting is active */ | |
21630 | static bool do_memsw_account(void) | |
21631 | { | |
c7c16703 | 21632 | @@ -1694,6 +1697,7 @@ struct memcg_stock_pcp { |
1a6e0f06 JK |
21633 | #define FLUSHING_CACHED_CHARGE 0 |
21634 | }; | |
21635 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); | |
21636 | +static DEFINE_LOCAL_IRQ_LOCK(memcg_stock_ll); | |
21637 | static DEFINE_MUTEX(percpu_charge_mutex); | |
21638 | ||
21639 | /** | |
c7c16703 | 21640 | @@ -1716,7 +1720,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
21641 | if (nr_pages > CHARGE_BATCH) |
21642 | return ret; | |
21643 | ||
21644 | - local_irq_save(flags); | |
21645 | + local_lock_irqsave(memcg_stock_ll, flags); | |
21646 | ||
21647 | stock = this_cpu_ptr(&memcg_stock); | |
21648 | if (memcg == stock->cached && stock->nr_pages >= nr_pages) { | |
c7c16703 | 21649 | @@ -1724,7 +1728,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
21650 | ret = true; |
21651 | } | |
21652 | ||
21653 | - local_irq_restore(flags); | |
21654 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
21655 | ||
21656 | return ret; | |
21657 | } | |
c7c16703 | 21658 | @@ -1751,13 +1755,13 @@ static void drain_local_stock(struct work_struct *dummy) |
1a6e0f06 JK |
21659 | struct memcg_stock_pcp *stock; |
21660 | unsigned long flags; | |
21661 | ||
21662 | - local_irq_save(flags); | |
21663 | + local_lock_irqsave(memcg_stock_ll, flags); | |
21664 | ||
21665 | stock = this_cpu_ptr(&memcg_stock); | |
21666 | drain_stock(stock); | |
21667 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); | |
21668 | ||
21669 | - local_irq_restore(flags); | |
21670 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
21671 | } | |
21672 | ||
21673 | /* | |
c7c16703 | 21674 | @@ -1769,7 +1773,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
21675 | struct memcg_stock_pcp *stock; |
21676 | unsigned long flags; | |
21677 | ||
21678 | - local_irq_save(flags); | |
21679 | + local_lock_irqsave(memcg_stock_ll, flags); | |
21680 | ||
21681 | stock = this_cpu_ptr(&memcg_stock); | |
21682 | if (stock->cached != memcg) { /* reset if necessary */ | |
c7c16703 | 21683 | @@ -1778,7 +1782,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
21684 | } |
21685 | stock->nr_pages += nr_pages; | |
21686 | ||
21687 | - local_irq_restore(flags); | |
21688 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
21689 | } | |
21690 | ||
21691 | /* | |
c7c16703 | 21692 | @@ -1794,7 +1798,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) |
1a6e0f06 JK |
21693 | return; |
21694 | /* Notify other cpus that system-wide "drain" is running */ | |
21695 | get_online_cpus(); | |
21696 | - curcpu = get_cpu(); | |
21697 | + curcpu = get_cpu_light(); | |
21698 | for_each_online_cpu(cpu) { | |
21699 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | |
21700 | struct mem_cgroup *memcg; | |
c7c16703 | 21701 | @@ -1811,7 +1815,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) |
1a6e0f06 JK |
21702 | schedule_work_on(cpu, &stock->work); |
21703 | } | |
21704 | } | |
21705 | - put_cpu(); | |
21706 | + put_cpu_light(); | |
21707 | put_online_cpus(); | |
21708 | mutex_unlock(&percpu_charge_mutex); | |
21709 | } | |
c7c16703 | 21710 | @@ -4550,12 +4554,12 @@ static int mem_cgroup_move_account(struct page *page, |
1a6e0f06 JK |
21711 | |
21712 | ret = 0; | |
21713 | ||
21714 | - local_irq_disable(); | |
21715 | + local_lock_irq(event_lock); | |
21716 | mem_cgroup_charge_statistics(to, page, compound, nr_pages); | |
21717 | memcg_check_events(to, page); | |
21718 | mem_cgroup_charge_statistics(from, page, compound, -nr_pages); | |
21719 | memcg_check_events(from, page); | |
21720 | - local_irq_enable(); | |
21721 | + local_unlock_irq(event_lock); | |
21722 | out_unlock: | |
21723 | unlock_page(page); | |
21724 | out: | |
c7c16703 | 21725 | @@ -5430,10 +5434,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, |
1a6e0f06 JK |
21726 | |
21727 | commit_charge(page, memcg, lrucare); | |
21728 | ||
21729 | - local_irq_disable(); | |
21730 | + local_lock_irq(event_lock); | |
21731 | mem_cgroup_charge_statistics(memcg, page, compound, nr_pages); | |
21732 | memcg_check_events(memcg, page); | |
21733 | - local_irq_enable(); | |
21734 | + local_unlock_irq(event_lock); | |
21735 | ||
21736 | if (do_memsw_account() && PageSwapCache(page)) { | |
21737 | swp_entry_t entry = { .val = page_private(page) }; | |
c7c16703 | 21738 | @@ -5489,14 +5493,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, |
1a6e0f06 JK |
21739 | memcg_oom_recover(memcg); |
21740 | } | |
21741 | ||
21742 | - local_irq_save(flags); | |
21743 | + local_lock_irqsave(event_lock, flags); | |
21744 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); | |
21745 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); | |
21746 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); | |
21747 | __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); | |
21748 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); | |
21749 | memcg_check_events(memcg, dummy_page); | |
21750 | - local_irq_restore(flags); | |
21751 | + local_unlock_irqrestore(event_lock, flags); | |
21752 | ||
21753 | if (!mem_cgroup_is_root(memcg)) | |
21754 | css_put_many(&memcg->css, nr_pages); | |
c7c16703 | 21755 | @@ -5651,10 +5655,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) |
1a6e0f06 JK |
21756 | |
21757 | commit_charge(newpage, memcg, false); | |
21758 | ||
21759 | - local_irq_save(flags); | |
21760 | + local_lock_irqsave(event_lock, flags); | |
21761 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); | |
21762 | memcg_check_events(memcg, newpage); | |
21763 | - local_irq_restore(flags); | |
21764 | + local_unlock_irqrestore(event_lock, flags); | |
21765 | } | |
21766 | ||
21767 | DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); | |
c7c16703 | 21768 | @@ -5834,6 +5838,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) |
1a6e0f06 JK |
21769 | { |
21770 | struct mem_cgroup *memcg, *swap_memcg; | |
21771 | unsigned short oldid; | |
21772 | + unsigned long flags; | |
21773 | ||
21774 | VM_BUG_ON_PAGE(PageLRU(page), page); | |
21775 | VM_BUG_ON_PAGE(page_count(page), page); | |
c7c16703 | 21776 | @@ -5874,12 +5879,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) |
1a6e0f06 JK |
21777 | * important here to have the interrupts disabled because it is the |
21778 | * only synchronisation we have for udpating the per-CPU variables. | |
21779 | */ | |
21780 | + local_lock_irqsave(event_lock, flags); | |
21781 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
21782 | VM_BUG_ON(!irqs_disabled()); | |
21783 | +#endif | |
21784 | mem_cgroup_charge_statistics(memcg, page, false, -1); | |
21785 | memcg_check_events(memcg, page); | |
21786 | ||
21787 | if (!mem_cgroup_is_root(memcg)) | |
21788 | css_put(&memcg->css); | |
21789 | + local_unlock_irqrestore(event_lock, flags); | |
21790 | } | |
21791 | ||
21792 | /* | |
21793 | diff --git a/mm/mmu_context.c b/mm/mmu_context.c | |
21794 | index 6f4d27c5bb32..5cd25c745a8f 100644 | |
21795 | --- a/mm/mmu_context.c | |
21796 | +++ b/mm/mmu_context.c | |
21797 | @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm) | |
21798 | struct task_struct *tsk = current; | |
21799 | ||
21800 | task_lock(tsk); | |
21801 | + preempt_disable_rt(); | |
21802 | active_mm = tsk->active_mm; | |
21803 | if (active_mm != mm) { | |
21804 | atomic_inc(&mm->mm_count); | |
21805 | @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm) | |
21806 | } | |
21807 | tsk->mm = mm; | |
21808 | switch_mm(active_mm, mm, tsk); | |
21809 | + preempt_enable_rt(); | |
21810 | task_unlock(tsk); | |
21811 | #ifdef finish_arch_post_lock_switch | |
21812 | finish_arch_post_lock_switch(); | |
21813 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
c7c16703 | 21814 | index 34ada718ef47..21f0dc3fe2aa 100644 |
1a6e0f06 JK |
21815 | --- a/mm/page_alloc.c |
21816 | +++ b/mm/page_alloc.c | |
21817 | @@ -61,6 +61,7 @@ | |
21818 | #include <linux/page_ext.h> | |
21819 | #include <linux/hugetlb.h> | |
21820 | #include <linux/sched/rt.h> | |
21821 | +#include <linux/locallock.h> | |
21822 | #include <linux/page_owner.h> | |
21823 | #include <linux/kthread.h> | |
21824 | #include <linux/memcontrol.h> | |
c7c16703 | 21825 | @@ -281,6 +282,18 @@ EXPORT_SYMBOL(nr_node_ids); |
1a6e0f06 JK |
21826 | EXPORT_SYMBOL(nr_online_nodes); |
21827 | #endif | |
21828 | ||
21829 | +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); | |
21830 | + | |
21831 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
21832 | +# define cpu_lock_irqsave(cpu, flags) \ | |
21833 | + local_lock_irqsave_on(pa_lock, flags, cpu) | |
21834 | +# define cpu_unlock_irqrestore(cpu, flags) \ | |
21835 | + local_unlock_irqrestore_on(pa_lock, flags, cpu) | |
21836 | +#else | |
21837 | +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) | |
21838 | +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) | |
21839 | +#endif | |
21840 | + | |
21841 | int page_group_by_mobility_disabled __read_mostly; | |
21842 | ||
21843 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | |
c7c16703 | 21844 | @@ -1072,7 +1085,7 @@ static bool bulkfree_pcp_prepare(struct page *page) |
1a6e0f06 JK |
21845 | #endif /* CONFIG_DEBUG_VM */ |
21846 | ||
21847 | /* | |
21848 | - * Frees a number of pages from the PCP lists | |
21849 | + * Frees a number of pages which have been collected from the pcp lists. | |
21850 | * Assumes all pages on list are in same zone, and of same order. | |
21851 | * count is the number of pages to free. | |
21852 | * | |
c7c16703 | 21853 | @@ -1083,19 +1096,58 @@ static bool bulkfree_pcp_prepare(struct page *page) |
1a6e0f06 JK |
21854 | * pinned" detection logic. |
21855 | */ | |
21856 | static void free_pcppages_bulk(struct zone *zone, int count, | |
21857 | - struct per_cpu_pages *pcp) | |
21858 | + struct list_head *list) | |
21859 | { | |
21860 | - int migratetype = 0; | |
21861 | - int batch_free = 0; | |
21862 | unsigned long nr_scanned; | |
21863 | bool isolated_pageblocks; | |
21864 | + unsigned long flags; | |
21865 | + | |
21866 | + spin_lock_irqsave(&zone->lock, flags); | |
21867 | ||
21868 | - spin_lock(&zone->lock); | |
21869 | isolated_pageblocks = has_isolate_pageblock(zone); | |
21870 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
21871 | if (nr_scanned) | |
21872 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
21873 | ||
21874 | + while (!list_empty(list)) { | |
21875 | + struct page *page; | |
21876 | + int mt; /* migratetype of the to-be-freed page */ | |
21877 | + | |
21878 | + page = list_first_entry(list, struct page, lru); | |
21879 | + /* must delete as __free_one_page list manipulates */ | |
21880 | + list_del(&page->lru); | |
21881 | + | |
21882 | + mt = get_pcppage_migratetype(page); | |
21883 | + /* MIGRATE_ISOLATE page should not go to pcplists */ | |
21884 | + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
21885 | + /* Pageblock could have been isolated meanwhile */ | |
21886 | + if (unlikely(isolated_pageblocks)) | |
21887 | + mt = get_pageblock_migratetype(page); | |
21888 | + | |
21889 | + if (bulkfree_pcp_prepare(page)) | |
21890 | + continue; | |
21891 | + | |
21892 | + __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
21893 | + trace_mm_page_pcpu_drain(page, 0, mt); | |
21894 | + count--; | |
21895 | + } | |
21896 | + WARN_ON(count != 0); | |
21897 | + spin_unlock_irqrestore(&zone->lock, flags); | |
21898 | +} | |
21899 | + | |
21900 | +/* | |
21901 | + * Moves a number of pages from the PCP lists to free list which | |
21902 | + * is freed outside of the locked region. | |
21903 | + * | |
21904 | + * Assumes all pages on list are in same zone, and of same order. | |
21905 | + * count is the number of pages to free. | |
21906 | + */ | |
21907 | +static void isolate_pcp_pages(int count, struct per_cpu_pages *src, | |
21908 | + struct list_head *dst) | |
21909 | +{ | |
21910 | + int migratetype = 0; | |
21911 | + int batch_free = 0; | |
21912 | + | |
21913 | while (count) { | |
21914 | struct page *page; | |
21915 | struct list_head *list; | |
c7c16703 | 21916 | @@ -1111,7 +1163,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, |
1a6e0f06 JK |
21917 | batch_free++; |
21918 | if (++migratetype == MIGRATE_PCPTYPES) | |
21919 | migratetype = 0; | |
21920 | - list = &pcp->lists[migratetype]; | |
21921 | + list = &src->lists[migratetype]; | |
21922 | } while (list_empty(list)); | |
21923 | ||
21924 | /* This is the only non-empty list. Free them all. */ | |
c7c16703 | 21925 | @@ -1119,27 +1171,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, |
1a6e0f06 JK |
21926 | batch_free = count; |
21927 | ||
21928 | do { | |
21929 | - int mt; /* migratetype of the to-be-freed page */ | |
21930 | - | |
21931 | page = list_last_entry(list, struct page, lru); | |
21932 | - /* must delete as __free_one_page list manipulates */ | |
21933 | list_del(&page->lru); | |
21934 | ||
21935 | - mt = get_pcppage_migratetype(page); | |
21936 | - /* MIGRATE_ISOLATE page should not go to pcplists */ | |
21937 | - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
21938 | - /* Pageblock could have been isolated meanwhile */ | |
21939 | - if (unlikely(isolated_pageblocks)) | |
21940 | - mt = get_pageblock_migratetype(page); | |
21941 | - | |
21942 | - if (bulkfree_pcp_prepare(page)) | |
21943 | - continue; | |
21944 | - | |
21945 | - __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
21946 | - trace_mm_page_pcpu_drain(page, 0, mt); | |
21947 | + list_add(&page->lru, dst); | |
21948 | } while (--count && --batch_free && !list_empty(list)); | |
21949 | } | |
21950 | - spin_unlock(&zone->lock); | |
21951 | } | |
21952 | ||
21953 | static void free_one_page(struct zone *zone, | |
c7c16703 | 21954 | @@ -1148,7 +1185,9 @@ static void free_one_page(struct zone *zone, |
1a6e0f06 JK |
21955 | int migratetype) |
21956 | { | |
21957 | unsigned long nr_scanned; | |
21958 | - spin_lock(&zone->lock); | |
21959 | + unsigned long flags; | |
21960 | + | |
21961 | + spin_lock_irqsave(&zone->lock, flags); | |
21962 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
21963 | if (nr_scanned) | |
21964 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
c7c16703 | 21965 | @@ -1158,7 +1197,7 @@ static void free_one_page(struct zone *zone, |
1a6e0f06 JK |
21966 | migratetype = get_pfnblock_migratetype(page, pfn); |
21967 | } | |
21968 | __free_one_page(page, pfn, zone, order, migratetype); | |
21969 | - spin_unlock(&zone->lock); | |
21970 | + spin_unlock_irqrestore(&zone->lock, flags); | |
21971 | } | |
21972 | ||
21973 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |
c7c16703 | 21974 | @@ -1244,10 +1283,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) |
1a6e0f06 JK |
21975 | return; |
21976 | ||
21977 | migratetype = get_pfnblock_migratetype(page, pfn); | |
21978 | - local_irq_save(flags); | |
21979 | + local_lock_irqsave(pa_lock, flags); | |
21980 | __count_vm_events(PGFREE, 1 << order); | |
21981 | free_one_page(page_zone(page), page, pfn, order, migratetype); | |
21982 | - local_irq_restore(flags); | |
21983 | + local_unlock_irqrestore(pa_lock, flags); | |
21984 | } | |
21985 | ||
21986 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) | |
c7c16703 | 21987 | @@ -2246,16 +2285,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, |
1a6e0f06 JK |
21988 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
21989 | { | |
21990 | unsigned long flags; | |
21991 | + LIST_HEAD(dst); | |
21992 | int to_drain, batch; | |
21993 | ||
21994 | - local_irq_save(flags); | |
21995 | + local_lock_irqsave(pa_lock, flags); | |
21996 | batch = READ_ONCE(pcp->batch); | |
21997 | to_drain = min(pcp->count, batch); | |
21998 | if (to_drain > 0) { | |
21999 | - free_pcppages_bulk(zone, to_drain, pcp); | |
22000 | + isolate_pcp_pages(to_drain, pcp, &dst); | |
22001 | pcp->count -= to_drain; | |
22002 | } | |
22003 | - local_irq_restore(flags); | |
22004 | + local_unlock_irqrestore(pa_lock, flags); | |
22005 | + free_pcppages_bulk(zone, to_drain, &dst); | |
22006 | } | |
22007 | #endif | |
22008 | ||
c7c16703 | 22009 | @@ -2271,16 +2312,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) |
1a6e0f06 JK |
22010 | unsigned long flags; |
22011 | struct per_cpu_pageset *pset; | |
22012 | struct per_cpu_pages *pcp; | |
22013 | + LIST_HEAD(dst); | |
22014 | + int count; | |
22015 | ||
22016 | - local_irq_save(flags); | |
22017 | + cpu_lock_irqsave(cpu, flags); | |
22018 | pset = per_cpu_ptr(zone->pageset, cpu); | |
22019 | ||
22020 | pcp = &pset->pcp; | |
22021 | - if (pcp->count) { | |
22022 | - free_pcppages_bulk(zone, pcp->count, pcp); | |
22023 | + count = pcp->count; | |
22024 | + if (count) { | |
22025 | + isolate_pcp_pages(count, pcp, &dst); | |
22026 | pcp->count = 0; | |
22027 | } | |
22028 | - local_irq_restore(flags); | |
22029 | + cpu_unlock_irqrestore(cpu, flags); | |
22030 | + if (count) | |
22031 | + free_pcppages_bulk(zone, count, &dst); | |
22032 | } | |
22033 | ||
22034 | /* | |
c7c16703 | 22035 | @@ -2366,8 +2412,17 @@ void drain_all_pages(struct zone *zone) |
1a6e0f06 JK |
22036 | else |
22037 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | |
22038 | } | |
22039 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
22040 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, | |
22041 | zone, 1); | |
22042 | +#else | |
22043 | + for_each_cpu(cpu, &cpus_with_pcps) { | |
22044 | + if (zone) | |
22045 | + drain_pages_zone(cpu, zone); | |
22046 | + else | |
22047 | + drain_pages(cpu); | |
22048 | + } | |
22049 | +#endif | |
22050 | } | |
22051 | ||
22052 | #ifdef CONFIG_HIBERNATION | |
c7c16703 | 22053 | @@ -2427,7 +2482,7 @@ void free_hot_cold_page(struct page *page, bool cold) |
1a6e0f06 JK |
22054 | |
22055 | migratetype = get_pfnblock_migratetype(page, pfn); | |
22056 | set_pcppage_migratetype(page, migratetype); | |
22057 | - local_irq_save(flags); | |
22058 | + local_lock_irqsave(pa_lock, flags); | |
22059 | __count_vm_event(PGFREE); | |
22060 | ||
22061 | /* | |
c7c16703 | 22062 | @@ -2453,12 +2508,17 @@ void free_hot_cold_page(struct page *page, bool cold) |
1a6e0f06 JK |
22063 | pcp->count++; |
22064 | if (pcp->count >= pcp->high) { | |
22065 | unsigned long batch = READ_ONCE(pcp->batch); | |
22066 | - free_pcppages_bulk(zone, batch, pcp); | |
22067 | + LIST_HEAD(dst); | |
22068 | + | |
22069 | + isolate_pcp_pages(batch, pcp, &dst); | |
22070 | pcp->count -= batch; | |
22071 | + local_unlock_irqrestore(pa_lock, flags); | |
22072 | + free_pcppages_bulk(zone, batch, &dst); | |
22073 | + return; | |
22074 | } | |
22075 | ||
22076 | out: | |
22077 | - local_irq_restore(flags); | |
22078 | + local_unlock_irqrestore(pa_lock, flags); | |
22079 | } | |
22080 | ||
22081 | /* | |
c7c16703 | 22082 | @@ -2600,7 +2660,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, |
1a6e0f06 JK |
22083 | struct per_cpu_pages *pcp; |
22084 | struct list_head *list; | |
22085 | ||
22086 | - local_irq_save(flags); | |
22087 | + local_lock_irqsave(pa_lock, flags); | |
22088 | do { | |
22089 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | |
22090 | list = &pcp->lists[migratetype]; | |
c7c16703 | 22091 | @@ -2627,7 +2687,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, |
1a6e0f06 JK |
22092 | * allocate greater than order-1 page units with __GFP_NOFAIL. |
22093 | */ | |
22094 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); | |
22095 | - spin_lock_irqsave(&zone->lock, flags); | |
22096 | + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); | |
22097 | ||
22098 | do { | |
22099 | page = NULL; | |
c7c16703 | 22100 | @@ -2639,22 +2699,24 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, |
1a6e0f06 JK |
22101 | if (!page) |
22102 | page = __rmqueue(zone, order, migratetype); | |
22103 | } while (page && check_new_pages(page, order)); | |
22104 | - spin_unlock(&zone->lock); | |
22105 | - if (!page) | |
22106 | + if (!page) { | |
22107 | + spin_unlock(&zone->lock); | |
22108 | goto failed; | |
22109 | + } | |
22110 | __mod_zone_freepage_state(zone, -(1 << order), | |
22111 | get_pcppage_migratetype(page)); | |
22112 | + spin_unlock(&zone->lock); | |
22113 | } | |
22114 | ||
22115 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | |
22116 | zone_statistics(preferred_zone, zone, gfp_flags); | |
22117 | - local_irq_restore(flags); | |
22118 | + local_unlock_irqrestore(pa_lock, flags); | |
22119 | ||
22120 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | |
22121 | return page; | |
22122 | ||
22123 | failed: | |
22124 | - local_irq_restore(flags); | |
22125 | + local_unlock_irqrestore(pa_lock, flags); | |
22126 | return NULL; | |
22127 | } | |
22128 | ||
c7c16703 | 22129 | @@ -6505,7 +6567,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self, |
1a6e0f06 JK |
22130 | int cpu = (unsigned long)hcpu; |
22131 | ||
22132 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
22133 | + local_lock_irq_on(swapvec_lock, cpu); | |
22134 | lru_add_drain_cpu(cpu); | |
22135 | + local_unlock_irq_on(swapvec_lock, cpu); | |
22136 | drain_pages(cpu); | |
22137 | ||
22138 | /* | |
c7c16703 | 22139 | @@ -6531,6 +6595,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, |
1a6e0f06 JK |
22140 | void __init page_alloc_init(void) |
22141 | { | |
22142 | hotcpu_notifier(page_alloc_cpu_notify, 0); | |
22143 | + local_irq_lock_init(pa_lock); | |
22144 | } | |
22145 | ||
22146 | /* | |
c7c16703 | 22147 | @@ -7359,7 +7424,7 @@ void zone_pcp_reset(struct zone *zone) |
1a6e0f06 JK |
22148 | struct per_cpu_pageset *pset; |
22149 | ||
22150 | /* avoid races with drain_pages() */ | |
22151 | - local_irq_save(flags); | |
22152 | + local_lock_irqsave(pa_lock, flags); | |
22153 | if (zone->pageset != &boot_pageset) { | |
22154 | for_each_online_cpu(cpu) { | |
22155 | pset = per_cpu_ptr(zone->pageset, cpu); | |
c7c16703 | 22156 | @@ -7368,7 +7433,7 @@ void zone_pcp_reset(struct zone *zone) |
1a6e0f06 JK |
22157 | free_percpu(zone->pageset); |
22158 | zone->pageset = &boot_pageset; | |
22159 | } | |
22160 | - local_irq_restore(flags); | |
22161 | + local_unlock_irqrestore(pa_lock, flags); | |
22162 | } | |
22163 | ||
22164 | #ifdef CONFIG_MEMORY_HOTREMOVE | |
22165 | diff --git a/mm/slab.h b/mm/slab.h | |
c7c16703 | 22166 | index bc05fdc3edce..610cf61634f0 100644 |
1a6e0f06 JK |
22167 | --- a/mm/slab.h |
22168 | +++ b/mm/slab.h | |
22169 | @@ -426,7 +426,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |
22170 | * The slab lists for all objects. | |
22171 | */ | |
22172 | struct kmem_cache_node { | |
22173 | +#ifdef CONFIG_SLUB | |
22174 | + raw_spinlock_t list_lock; | |
22175 | +#else | |
22176 | spinlock_t list_lock; | |
22177 | +#endif | |
22178 | ||
22179 | #ifdef CONFIG_SLAB | |
22180 | struct list_head slabs_partial; /* partial list first, better asm code */ | |
22181 | diff --git a/mm/slub.c b/mm/slub.c | |
c7c16703 | 22182 | index 2b3e740609e9..1732f9c5d31f 100644 |
1a6e0f06 JK |
22183 | --- a/mm/slub.c |
22184 | +++ b/mm/slub.c | |
c7c16703 | 22185 | @@ -1141,7 +1141,7 @@ static noinline int free_debug_processing( |
1a6e0f06 JK |
22186 | unsigned long uninitialized_var(flags); |
22187 | int ret = 0; | |
22188 | ||
22189 | - spin_lock_irqsave(&n->list_lock, flags); | |
22190 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22191 | slab_lock(page); | |
22192 | ||
22193 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | |
c7c16703 | 22194 | @@ -1176,7 +1176,7 @@ static noinline int free_debug_processing( |
1a6e0f06 JK |
22195 | bulk_cnt, cnt); |
22196 | ||
22197 | slab_unlock(page); | |
22198 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22199 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22200 | if (!ret) | |
22201 | slab_fix(s, "Object at 0x%p not freed", object); | |
22202 | return ret; | |
c7c16703 | 22203 | @@ -1304,6 +1304,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, |
1a6e0f06 JK |
22204 | |
22205 | #endif /* CONFIG_SLUB_DEBUG */ | |
22206 | ||
22207 | +struct slub_free_list { | |
22208 | + raw_spinlock_t lock; | |
22209 | + struct list_head list; | |
22210 | +}; | |
22211 | +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); | |
22212 | + | |
22213 | /* | |
22214 | * Hooks for other subsystems that check memory allocations. In a typical | |
22215 | * production configuration these hooks all should produce no code at all. | |
c7c16703 | 22216 | @@ -1523,10 +1529,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) |
1a6e0f06 JK |
22217 | void *start, *p; |
22218 | int idx, order; | |
22219 | bool shuffle; | |
22220 | + bool enableirqs = false; | |
22221 | ||
22222 | flags &= gfp_allowed_mask; | |
22223 | ||
22224 | if (gfpflags_allow_blocking(flags)) | |
22225 | + enableirqs = true; | |
22226 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
22227 | + if (system_state == SYSTEM_RUNNING) | |
22228 | + enableirqs = true; | |
22229 | +#endif | |
22230 | + if (enableirqs) | |
22231 | local_irq_enable(); | |
22232 | ||
22233 | flags |= s->allocflags; | |
c7c16703 | 22234 | @@ -1601,7 +1614,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) |
1a6e0f06 JK |
22235 | page->frozen = 1; |
22236 | ||
22237 | out: | |
22238 | - if (gfpflags_allow_blocking(flags)) | |
22239 | + if (enableirqs) | |
22240 | local_irq_disable(); | |
22241 | if (!page) | |
22242 | return NULL; | |
c7c16703 | 22243 | @@ -1660,6 +1673,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) |
1a6e0f06 JK |
22244 | __free_pages(page, order); |
22245 | } | |
22246 | ||
22247 | +static void free_delayed(struct list_head *h) | |
22248 | +{ | |
22249 | + while(!list_empty(h)) { | |
22250 | + struct page *page = list_first_entry(h, struct page, lru); | |
22251 | + | |
22252 | + list_del(&page->lru); | |
22253 | + __free_slab(page->slab_cache, page); | |
22254 | + } | |
22255 | +} | |
22256 | + | |
22257 | #define need_reserve_slab_rcu \ | |
22258 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | |
22259 | ||
c7c16703 | 22260 | @@ -1691,6 +1714,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) |
1a6e0f06 JK |
22261 | } |
22262 | ||
22263 | call_rcu(head, rcu_free_slab); | |
22264 | + } else if (irqs_disabled()) { | |
22265 | + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); | |
22266 | + | |
22267 | + raw_spin_lock(&f->lock); | |
22268 | + list_add(&page->lru, &f->list); | |
22269 | + raw_spin_unlock(&f->lock); | |
22270 | } else | |
22271 | __free_slab(s, page); | |
22272 | } | |
c7c16703 | 22273 | @@ -1798,7 +1827,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, |
1a6e0f06 JK |
22274 | if (!n || !n->nr_partial) |
22275 | return NULL; | |
22276 | ||
22277 | - spin_lock(&n->list_lock); | |
22278 | + raw_spin_lock(&n->list_lock); | |
22279 | list_for_each_entry_safe(page, page2, &n->partial, lru) { | |
22280 | void *t; | |
22281 | ||
c7c16703 | 22282 | @@ -1823,7 +1852,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, |
1a6e0f06 JK |
22283 | break; |
22284 | ||
22285 | } | |
22286 | - spin_unlock(&n->list_lock); | |
22287 | + raw_spin_unlock(&n->list_lock); | |
22288 | return object; | |
22289 | } | |
22290 | ||
c7c16703 | 22291 | @@ -2069,7 +2098,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22292 | * that acquire_slab() will see a slab page that |
22293 | * is frozen | |
22294 | */ | |
22295 | - spin_lock(&n->list_lock); | |
22296 | + raw_spin_lock(&n->list_lock); | |
22297 | } | |
22298 | } else { | |
22299 | m = M_FULL; | |
c7c16703 | 22300 | @@ -2080,7 +2109,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22301 | * slabs from diagnostic functions will not see |
22302 | * any frozen slabs. | |
22303 | */ | |
22304 | - spin_lock(&n->list_lock); | |
22305 | + raw_spin_lock(&n->list_lock); | |
22306 | } | |
22307 | } | |
22308 | ||
c7c16703 | 22309 | @@ -2115,7 +2144,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22310 | goto redo; |
22311 | ||
22312 | if (lock) | |
22313 | - spin_unlock(&n->list_lock); | |
22314 | + raw_spin_unlock(&n->list_lock); | |
22315 | ||
22316 | if (m == M_FREE) { | |
22317 | stat(s, DEACTIVATE_EMPTY); | |
c7c16703 | 22318 | @@ -2147,10 +2176,10 @@ static void unfreeze_partials(struct kmem_cache *s, |
1a6e0f06 JK |
22319 | n2 = get_node(s, page_to_nid(page)); |
22320 | if (n != n2) { | |
22321 | if (n) | |
22322 | - spin_unlock(&n->list_lock); | |
22323 | + raw_spin_unlock(&n->list_lock); | |
22324 | ||
22325 | n = n2; | |
22326 | - spin_lock(&n->list_lock); | |
22327 | + raw_spin_lock(&n->list_lock); | |
22328 | } | |
22329 | ||
22330 | do { | |
c7c16703 | 22331 | @@ -2179,7 +2208,7 @@ static void unfreeze_partials(struct kmem_cache *s, |
1a6e0f06 JK |
22332 | } |
22333 | ||
22334 | if (n) | |
22335 | - spin_unlock(&n->list_lock); | |
22336 | + raw_spin_unlock(&n->list_lock); | |
22337 | ||
22338 | while (discard_page) { | |
22339 | page = discard_page; | |
c7c16703 | 22340 | @@ -2218,14 +2247,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) |
1a6e0f06 JK |
22341 | pobjects = oldpage->pobjects; |
22342 | pages = oldpage->pages; | |
22343 | if (drain && pobjects > s->cpu_partial) { | |
22344 | + struct slub_free_list *f; | |
22345 | unsigned long flags; | |
22346 | + LIST_HEAD(tofree); | |
22347 | /* | |
22348 | * partial array is full. Move the existing | |
22349 | * set to the per node partial list. | |
22350 | */ | |
22351 | local_irq_save(flags); | |
22352 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | |
22353 | + f = this_cpu_ptr(&slub_free_list); | |
22354 | + raw_spin_lock(&f->lock); | |
22355 | + list_splice_init(&f->list, &tofree); | |
22356 | + raw_spin_unlock(&f->lock); | |
22357 | local_irq_restore(flags); | |
22358 | + free_delayed(&tofree); | |
22359 | oldpage = NULL; | |
22360 | pobjects = 0; | |
22361 | pages = 0; | |
c7c16703 | 22362 | @@ -2297,7 +2333,22 @@ static bool has_cpu_slab(int cpu, void *info) |
1a6e0f06 JK |
22363 | |
22364 | static void flush_all(struct kmem_cache *s) | |
22365 | { | |
22366 | + LIST_HEAD(tofree); | |
22367 | + int cpu; | |
22368 | + | |
22369 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); | |
22370 | + for_each_online_cpu(cpu) { | |
22371 | + struct slub_free_list *f; | |
22372 | + | |
22373 | + if (!has_cpu_slab(cpu, s)) | |
22374 | + continue; | |
22375 | + | |
22376 | + f = &per_cpu(slub_free_list, cpu); | |
22377 | + raw_spin_lock_irq(&f->lock); | |
22378 | + list_splice_init(&f->list, &tofree); | |
22379 | + raw_spin_unlock_irq(&f->lock); | |
22380 | + free_delayed(&tofree); | |
22381 | + } | |
22382 | } | |
22383 | ||
22384 | /* | |
c7c16703 | 22385 | @@ -2352,10 +2403,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, |
1a6e0f06 JK |
22386 | unsigned long x = 0; |
22387 | struct page *page; | |
22388 | ||
22389 | - spin_lock_irqsave(&n->list_lock, flags); | |
22390 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22391 | list_for_each_entry(page, &n->partial, lru) | |
22392 | x += get_count(page); | |
22393 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22394 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22395 | return x; | |
22396 | } | |
22397 | #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ | |
c7c16703 | 22398 | @@ -2493,8 +2544,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) |
1a6e0f06 JK |
22399 | * already disabled (which is the case for bulk allocation). |
22400 | */ | |
22401 | static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
22402 | - unsigned long addr, struct kmem_cache_cpu *c) | |
22403 | + unsigned long addr, struct kmem_cache_cpu *c, | |
22404 | + struct list_head *to_free) | |
22405 | { | |
22406 | + struct slub_free_list *f; | |
22407 | void *freelist; | |
22408 | struct page *page; | |
22409 | ||
c7c16703 | 22410 | @@ -2554,6 +2607,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
22411 | VM_BUG_ON(!c->page->frozen); |
22412 | c->freelist = get_freepointer(s, freelist); | |
22413 | c->tid = next_tid(c->tid); | |
22414 | + | |
22415 | +out: | |
22416 | + f = this_cpu_ptr(&slub_free_list); | |
22417 | + raw_spin_lock(&f->lock); | |
22418 | + list_splice_init(&f->list, to_free); | |
22419 | + raw_spin_unlock(&f->lock); | |
22420 | + | |
22421 | return freelist; | |
22422 | ||
22423 | new_slab: | |
c7c16703 | 22424 | @@ -2585,7 +2645,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
22425 | deactivate_slab(s, page, get_freepointer(s, freelist)); |
22426 | c->page = NULL; | |
22427 | c->freelist = NULL; | |
22428 | - return freelist; | |
22429 | + goto out; | |
22430 | } | |
22431 | ||
22432 | /* | |
c7c16703 | 22433 | @@ -2597,6 +2657,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
22434 | { |
22435 | void *p; | |
22436 | unsigned long flags; | |
22437 | + LIST_HEAD(tofree); | |
22438 | ||
22439 | local_irq_save(flags); | |
22440 | #ifdef CONFIG_PREEMPT | |
c7c16703 | 22441 | @@ -2608,8 +2669,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
22442 | c = this_cpu_ptr(s->cpu_slab); |
22443 | #endif | |
22444 | ||
22445 | - p = ___slab_alloc(s, gfpflags, node, addr, c); | |
22446 | + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); | |
22447 | local_irq_restore(flags); | |
22448 | + free_delayed(&tofree); | |
22449 | return p; | |
22450 | } | |
22451 | ||
c7c16703 | 22452 | @@ -2795,7 +2857,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22453 | |
22454 | do { | |
22455 | if (unlikely(n)) { | |
22456 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22457 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22458 | n = NULL; | |
22459 | } | |
22460 | prior = page->freelist; | |
c7c16703 | 22461 | @@ -2827,7 +2889,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22462 | * Otherwise the list_lock will synchronize with |
22463 | * other processors updating the list of slabs. | |
22464 | */ | |
22465 | - spin_lock_irqsave(&n->list_lock, flags); | |
22466 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22467 | ||
22468 | } | |
22469 | } | |
c7c16703 | 22470 | @@ -2869,7 +2931,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22471 | add_partial(n, page, DEACTIVATE_TO_TAIL); |
22472 | stat(s, FREE_ADD_PARTIAL); | |
22473 | } | |
22474 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22475 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22476 | return; | |
22477 | ||
22478 | slab_empty: | |
c7c16703 | 22479 | @@ -2884,7 +2946,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22480 | remove_full(s, n, page); |
22481 | } | |
22482 | ||
22483 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22484 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22485 | stat(s, FREE_SLAB); | |
22486 | discard_slab(s, page); | |
22487 | } | |
c7c16703 | 22488 | @@ -3089,6 +3151,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
1a6e0f06 JK |
22489 | void **p) |
22490 | { | |
22491 | struct kmem_cache_cpu *c; | |
22492 | + LIST_HEAD(to_free); | |
22493 | int i; | |
22494 | ||
22495 | /* memcg and kmem_cache debug support */ | |
c7c16703 | 22496 | @@ -3112,7 +3175,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
1a6e0f06 JK |
22497 | * of re-populating per CPU c->freelist |
22498 | */ | |
22499 | p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, | |
22500 | - _RET_IP_, c); | |
22501 | + _RET_IP_, c, &to_free); | |
22502 | if (unlikely(!p[i])) | |
22503 | goto error; | |
22504 | ||
c7c16703 | 22505 | @@ -3124,6 +3187,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
1a6e0f06 JK |
22506 | } |
22507 | c->tid = next_tid(c->tid); | |
22508 | local_irq_enable(); | |
22509 | + free_delayed(&to_free); | |
22510 | ||
22511 | /* Clear memory outside IRQ disabled fastpath loop */ | |
22512 | if (unlikely(flags & __GFP_ZERO)) { | |
c7c16703 | 22513 | @@ -3271,7 +3335,7 @@ static void |
1a6e0f06 JK |
22514 | init_kmem_cache_node(struct kmem_cache_node *n) |
22515 | { | |
22516 | n->nr_partial = 0; | |
22517 | - spin_lock_init(&n->list_lock); | |
22518 | + raw_spin_lock_init(&n->list_lock); | |
22519 | INIT_LIST_HEAD(&n->partial); | |
22520 | #ifdef CONFIG_SLUB_DEBUG | |
22521 | atomic_long_set(&n->nr_slabs, 0); | |
c7c16703 | 22522 | @@ -3615,6 +3679,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22523 | const char *text) |
22524 | { | |
22525 | #ifdef CONFIG_SLUB_DEBUG | |
22526 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
22527 | + /* XXX move out of irq-off section */ | |
22528 | + slab_err(s, page, text, s->name); | |
22529 | +#else | |
22530 | void *addr = page_address(page); | |
22531 | void *p; | |
22532 | unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * | |
c7c16703 | 22533 | @@ -3635,6 +3703,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
22534 | slab_unlock(page); |
22535 | kfree(map); | |
22536 | #endif | |
22537 | +#endif | |
22538 | } | |
22539 | ||
22540 | /* | |
c7c16703 | 22541 | @@ -3648,7 +3717,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) |
1a6e0f06 JK |
22542 | struct page *page, *h; |
22543 | ||
22544 | BUG_ON(irqs_disabled()); | |
22545 | - spin_lock_irq(&n->list_lock); | |
22546 | + raw_spin_lock_irq(&n->list_lock); | |
22547 | list_for_each_entry_safe(page, h, &n->partial, lru) { | |
22548 | if (!page->inuse) { | |
22549 | remove_partial(n, page); | |
c7c16703 | 22550 | @@ -3658,7 +3727,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) |
1a6e0f06 JK |
22551 | "Objects remaining in %s on __kmem_cache_shutdown()"); |
22552 | } | |
22553 | } | |
22554 | - spin_unlock_irq(&n->list_lock); | |
22555 | + raw_spin_unlock_irq(&n->list_lock); | |
22556 | ||
22557 | list_for_each_entry_safe(page, h, &discard, lru) | |
22558 | discard_slab(s, page); | |
c7c16703 | 22559 | @@ -3916,7 +3985,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) |
1a6e0f06 JK |
22560 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) |
22561 | INIT_LIST_HEAD(promote + i); | |
22562 | ||
22563 | - spin_lock_irqsave(&n->list_lock, flags); | |
22564 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22565 | ||
22566 | /* | |
22567 | * Build lists of slabs to discard or promote. | |
c7c16703 | 22568 | @@ -3947,7 +4016,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) |
1a6e0f06 JK |
22569 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) |
22570 | list_splice(promote + i, &n->partial); | |
22571 | ||
22572 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22573 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22574 | ||
22575 | /* Release empty slabs */ | |
22576 | list_for_each_entry_safe(page, t, &discard, lru) | |
c7c16703 | 22577 | @@ -4123,6 +4192,12 @@ void __init kmem_cache_init(void) |
1a6e0f06 JK |
22578 | { |
22579 | static __initdata struct kmem_cache boot_kmem_cache, | |
22580 | boot_kmem_cache_node; | |
22581 | + int cpu; | |
22582 | + | |
22583 | + for_each_possible_cpu(cpu) { | |
22584 | + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); | |
22585 | + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); | |
22586 | + } | |
22587 | ||
22588 | if (debug_guardpage_minorder()) | |
22589 | slub_max_order = 0; | |
c7c16703 | 22590 | @@ -4331,7 +4406,7 @@ static int validate_slab_node(struct kmem_cache *s, |
1a6e0f06 JK |
22591 | struct page *page; |
22592 | unsigned long flags; | |
22593 | ||
22594 | - spin_lock_irqsave(&n->list_lock, flags); | |
22595 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22596 | ||
22597 | list_for_each_entry(page, &n->partial, lru) { | |
22598 | validate_slab_slab(s, page, map); | |
c7c16703 | 22599 | @@ -4353,7 +4428,7 @@ static int validate_slab_node(struct kmem_cache *s, |
1a6e0f06 JK |
22600 | s->name, count, atomic_long_read(&n->nr_slabs)); |
22601 | ||
22602 | out: | |
22603 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22604 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22605 | return count; | |
22606 | } | |
22607 | ||
c7c16703 | 22608 | @@ -4541,12 +4616,12 @@ static int list_locations(struct kmem_cache *s, char *buf, |
1a6e0f06 JK |
22609 | if (!atomic_long_read(&n->nr_slabs)) |
22610 | continue; | |
22611 | ||
22612 | - spin_lock_irqsave(&n->list_lock, flags); | |
22613 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22614 | list_for_each_entry(page, &n->partial, lru) | |
22615 | process_slab(&t, s, page, alloc, map); | |
22616 | list_for_each_entry(page, &n->full, lru) | |
22617 | process_slab(&t, s, page, alloc, map); | |
22618 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22619 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22620 | } | |
22621 | ||
22622 | for (i = 0; i < t.count; i++) { | |
22623 | diff --git a/mm/swap.c b/mm/swap.c | |
c7c16703 | 22624 | index 4dcf852e1e6d..69c3a5b24060 100644 |
1a6e0f06 JK |
22625 | --- a/mm/swap.c |
22626 | +++ b/mm/swap.c | |
22627 | @@ -32,6 +32,7 @@ | |
22628 | #include <linux/memcontrol.h> | |
22629 | #include <linux/gfp.h> | |
22630 | #include <linux/uio.h> | |
22631 | +#include <linux/locallock.h> | |
22632 | #include <linux/hugetlb.h> | |
22633 | #include <linux/page_idle.h> | |
22634 | ||
22635 | @@ -50,6 +51,8 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); | |
22636 | #ifdef CONFIG_SMP | |
22637 | static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); | |
22638 | #endif | |
22639 | +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); | |
22640 | +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock); | |
22641 | ||
22642 | /* | |
22643 | * This path almost never happens for VM activity - pages are normally | |
22644 | @@ -240,11 +243,11 @@ void rotate_reclaimable_page(struct page *page) | |
22645 | unsigned long flags; | |
22646 | ||
22647 | get_page(page); | |
22648 | - local_irq_save(flags); | |
22649 | + local_lock_irqsave(rotate_lock, flags); | |
22650 | pvec = this_cpu_ptr(&lru_rotate_pvecs); | |
22651 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
22652 | pagevec_move_tail(pvec); | |
22653 | - local_irq_restore(flags); | |
22654 | + local_unlock_irqrestore(rotate_lock, flags); | |
22655 | } | |
22656 | } | |
22657 | ||
22658 | @@ -294,12 +297,13 @@ void activate_page(struct page *page) | |
22659 | { | |
22660 | page = compound_head(page); | |
22661 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
22662 | - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
22663 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
22664 | + activate_page_pvecs); | |
22665 | ||
22666 | get_page(page); | |
22667 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
22668 | pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
22669 | - put_cpu_var(activate_page_pvecs); | |
22670 | + put_locked_var(swapvec_lock, activate_page_pvecs); | |
22671 | } | |
22672 | } | |
22673 | ||
22674 | @@ -326,7 +330,7 @@ void activate_page(struct page *page) | |
22675 | ||
22676 | static void __lru_cache_activate_page(struct page *page) | |
22677 | { | |
22678 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
22679 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
22680 | int i; | |
22681 | ||
22682 | /* | |
22683 | @@ -348,7 +352,7 @@ static void __lru_cache_activate_page(struct page *page) | |
22684 | } | |
22685 | } | |
22686 | ||
22687 | - put_cpu_var(lru_add_pvec); | |
22688 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
22689 | } | |
22690 | ||
22691 | /* | |
22692 | @@ -390,12 +394,12 @@ EXPORT_SYMBOL(mark_page_accessed); | |
22693 | ||
22694 | static void __lru_cache_add(struct page *page) | |
22695 | { | |
22696 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
22697 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
22698 | ||
22699 | get_page(page); | |
22700 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
22701 | __pagevec_lru_add(pvec); | |
22702 | - put_cpu_var(lru_add_pvec); | |
22703 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
22704 | } | |
22705 | ||
22706 | /** | |
22707 | @@ -593,9 +597,15 @@ void lru_add_drain_cpu(int cpu) | |
22708 | unsigned long flags; | |
22709 | ||
22710 | /* No harm done if a racing interrupt already did this */ | |
22711 | - local_irq_save(flags); | |
22712 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
22713 | + local_lock_irqsave_on(rotate_lock, flags, cpu); | |
22714 | pagevec_move_tail(pvec); | |
22715 | - local_irq_restore(flags); | |
22716 | + local_unlock_irqrestore_on(rotate_lock, flags, cpu); | |
22717 | +#else | |
22718 | + local_lock_irqsave(rotate_lock, flags); | |
22719 | + pagevec_move_tail(pvec); | |
22720 | + local_unlock_irqrestore(rotate_lock, flags); | |
22721 | +#endif | |
22722 | } | |
22723 | ||
22724 | pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); | |
22725 | @@ -627,11 +637,12 @@ void deactivate_file_page(struct page *page) | |
22726 | return; | |
22727 | ||
22728 | if (likely(get_page_unless_zero(page))) { | |
22729 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); | |
22730 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
22731 | + lru_deactivate_file_pvecs); | |
22732 | ||
22733 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
22734 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); | |
22735 | - put_cpu_var(lru_deactivate_file_pvecs); | |
22736 | + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs); | |
22737 | } | |
22738 | } | |
22739 | ||
22740 | @@ -646,27 +657,31 @@ void deactivate_file_page(struct page *page) | |
22741 | void deactivate_page(struct page *page) | |
22742 | { | |
22743 | if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { | |
22744 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); | |
22745 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
22746 | + lru_deactivate_pvecs); | |
22747 | ||
22748 | get_page(page); | |
22749 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
22750 | pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); | |
22751 | - put_cpu_var(lru_deactivate_pvecs); | |
22752 | + put_locked_var(swapvec_lock, lru_deactivate_pvecs); | |
22753 | } | |
22754 | } | |
22755 | ||
22756 | void lru_add_drain(void) | |
22757 | { | |
22758 | - lru_add_drain_cpu(get_cpu()); | |
22759 | - put_cpu(); | |
22760 | + lru_add_drain_cpu(local_lock_cpu(swapvec_lock)); | |
22761 | + local_unlock_cpu(swapvec_lock); | |
22762 | } | |
22763 | ||
22764 | -static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
22765 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
22766 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
22767 | { | |
22768 | - lru_add_drain(); | |
22769 | + local_lock_on(swapvec_lock, cpu); | |
22770 | + lru_add_drain_cpu(cpu); | |
22771 | + local_unlock_on(swapvec_lock, cpu); | |
22772 | } | |
22773 | ||
22774 | -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
22775 | +#else | |
22776 | ||
22777 | /* | |
22778 | * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM | |
22779 | @@ -686,6 +701,22 @@ static int __init lru_init(void) | |
22780 | } | |
22781 | early_initcall(lru_init); | |
22782 | ||
22783 | +static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
22784 | +{ | |
22785 | + lru_add_drain(); | |
22786 | +} | |
22787 | + | |
22788 | +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
22789 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
22790 | +{ | |
22791 | + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
22792 | + | |
22793 | + INIT_WORK(work, lru_add_drain_per_cpu); | |
22794 | + queue_work_on(cpu, lru_add_drain_wq, work); | |
22795 | + cpumask_set_cpu(cpu, has_work); | |
22796 | +} | |
22797 | +#endif | |
22798 | + | |
22799 | void lru_add_drain_all(void) | |
22800 | { | |
22801 | static DEFINE_MUTEX(lock); | |
22802 | @@ -697,21 +728,18 @@ void lru_add_drain_all(void) | |
22803 | cpumask_clear(&has_work); | |
22804 | ||
22805 | for_each_online_cpu(cpu) { | |
22806 | - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
22807 | - | |
22808 | if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || | |
22809 | pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || | |
22810 | pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || | |
22811 | pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || | |
22812 | - need_activate_page_drain(cpu)) { | |
22813 | - INIT_WORK(work, lru_add_drain_per_cpu); | |
22814 | - queue_work_on(cpu, lru_add_drain_wq, work); | |
22815 | - cpumask_set_cpu(cpu, &has_work); | |
22816 | - } | |
22817 | + need_activate_page_drain(cpu)) | |
22818 | + remote_lru_add_drain(cpu, &has_work); | |
22819 | } | |
22820 | ||
22821 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
22822 | for_each_cpu(cpu, &has_work) | |
22823 | flush_work(&per_cpu(lru_add_drain_work, cpu)); | |
22824 | +#endif | |
22825 | ||
22826 | put_online_cpus(); | |
22827 | mutex_unlock(&lock); | |
22828 | diff --git a/mm/truncate.c b/mm/truncate.c | |
c7c16703 | 22829 | index 8d8c62d89e6d..5bf1bd25d077 100644 |
1a6e0f06 JK |
22830 | --- a/mm/truncate.c |
22831 | +++ b/mm/truncate.c | |
22832 | @@ -62,9 +62,12 @@ static void clear_exceptional_entry(struct address_space *mapping, | |
22833 | * protected by mapping->tree_lock. | |
22834 | */ | |
22835 | if (!workingset_node_shadows(node) && | |
22836 | - !list_empty(&node->private_list)) | |
22837 | - list_lru_del(&workingset_shadow_nodes, | |
22838 | + !list_empty(&node->private_list)) { | |
22839 | + local_lock(workingset_shadow_lock); | |
22840 | + list_lru_del(&__workingset_shadow_nodes, | |
22841 | &node->private_list); | |
22842 | + local_unlock(workingset_shadow_lock); | |
22843 | + } | |
22844 | __radix_tree_delete_node(&mapping->page_tree, node); | |
22845 | unlock: | |
22846 | spin_unlock_irq(&mapping->tree_lock); | |
22847 | diff --git a/mm/vmalloc.c b/mm/vmalloc.c | |
c7c16703 | 22848 | index f2481cb4e6b2..db4de08fa97c 100644 |
1a6e0f06 JK |
22849 | --- a/mm/vmalloc.c |
22850 | +++ b/mm/vmalloc.c | |
22851 | @@ -845,7 +845,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
22852 | struct vmap_block *vb; | |
22853 | struct vmap_area *va; | |
22854 | unsigned long vb_idx; | |
22855 | - int node, err; | |
22856 | + int node, err, cpu; | |
22857 | void *vaddr; | |
22858 | ||
22859 | node = numa_node_id(); | |
22860 | @@ -888,11 +888,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
22861 | BUG_ON(err); | |
22862 | radix_tree_preload_end(); | |
22863 | ||
22864 | - vbq = &get_cpu_var(vmap_block_queue); | |
22865 | + cpu = get_cpu_light(); | |
22866 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
22867 | spin_lock(&vbq->lock); | |
22868 | list_add_tail_rcu(&vb->free_list, &vbq->free); | |
22869 | spin_unlock(&vbq->lock); | |
22870 | - put_cpu_var(vmap_block_queue); | |
22871 | + put_cpu_light(); | |
22872 | ||
22873 | return vaddr; | |
22874 | } | |
22875 | @@ -961,6 +962,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
22876 | struct vmap_block *vb; | |
22877 | void *vaddr = NULL; | |
22878 | unsigned int order; | |
22879 | + int cpu; | |
22880 | ||
22881 | BUG_ON(offset_in_page(size)); | |
22882 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | |
22883 | @@ -975,7 +977,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
22884 | order = get_order(size); | |
22885 | ||
22886 | rcu_read_lock(); | |
22887 | - vbq = &get_cpu_var(vmap_block_queue); | |
22888 | + cpu = get_cpu_light(); | |
22889 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
22890 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | |
22891 | unsigned long pages_off; | |
22892 | ||
22893 | @@ -998,7 +1001,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
22894 | break; | |
22895 | } | |
22896 | ||
22897 | - put_cpu_var(vmap_block_queue); | |
22898 | + put_cpu_light(); | |
22899 | rcu_read_unlock(); | |
22900 | ||
22901 | /* Allocate new block if nothing was found */ | |
22902 | diff --git a/mm/vmstat.c b/mm/vmstat.c | |
c7c16703 | 22903 | index 604f26a4f696..312006d2db50 100644 |
1a6e0f06 JK |
22904 | --- a/mm/vmstat.c |
22905 | +++ b/mm/vmstat.c | |
22906 | @@ -245,6 +245,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
22907 | long x; | |
22908 | long t; | |
22909 | ||
22910 | + preempt_disable_rt(); | |
22911 | x = delta + __this_cpu_read(*p); | |
22912 | ||
22913 | t = __this_cpu_read(pcp->stat_threshold); | |
22914 | @@ -254,6 +255,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
22915 | x = 0; | |
22916 | } | |
22917 | __this_cpu_write(*p, x); | |
22918 | + preempt_enable_rt(); | |
22919 | } | |
22920 | EXPORT_SYMBOL(__mod_zone_page_state); | |
22921 | ||
22922 | @@ -265,6 +267,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
22923 | long x; | |
22924 | long t; | |
22925 | ||
22926 | + preempt_disable_rt(); | |
22927 | x = delta + __this_cpu_read(*p); | |
22928 | ||
22929 | t = __this_cpu_read(pcp->stat_threshold); | |
22930 | @@ -274,6 +277,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
22931 | x = 0; | |
22932 | } | |
22933 | __this_cpu_write(*p, x); | |
22934 | + preempt_enable_rt(); | |
22935 | } | |
22936 | EXPORT_SYMBOL(__mod_node_page_state); | |
22937 | ||
22938 | @@ -306,6 +310,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
22939 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
22940 | s8 v, t; | |
22941 | ||
22942 | + preempt_disable_rt(); | |
22943 | v = __this_cpu_inc_return(*p); | |
22944 | t = __this_cpu_read(pcp->stat_threshold); | |
22945 | if (unlikely(v > t)) { | |
22946 | @@ -314,6 +319,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
22947 | zone_page_state_add(v + overstep, zone, item); | |
22948 | __this_cpu_write(*p, -overstep); | |
22949 | } | |
22950 | + preempt_enable_rt(); | |
22951 | } | |
22952 | ||
22953 | void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
22954 | @@ -322,6 +328,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
22955 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
22956 | s8 v, t; | |
22957 | ||
22958 | + preempt_disable_rt(); | |
22959 | v = __this_cpu_inc_return(*p); | |
22960 | t = __this_cpu_read(pcp->stat_threshold); | |
22961 | if (unlikely(v > t)) { | |
22962 | @@ -330,6 +337,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
22963 | node_page_state_add(v + overstep, pgdat, item); | |
22964 | __this_cpu_write(*p, -overstep); | |
22965 | } | |
22966 | + preempt_enable_rt(); | |
22967 | } | |
22968 | ||
22969 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
22970 | @@ -350,6 +358,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
22971 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
22972 | s8 v, t; | |
22973 | ||
22974 | + preempt_disable_rt(); | |
22975 | v = __this_cpu_dec_return(*p); | |
22976 | t = __this_cpu_read(pcp->stat_threshold); | |
22977 | if (unlikely(v < - t)) { | |
22978 | @@ -358,6 +367,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
22979 | zone_page_state_add(v - overstep, zone, item); | |
22980 | __this_cpu_write(*p, overstep); | |
22981 | } | |
22982 | + preempt_enable_rt(); | |
22983 | } | |
22984 | ||
22985 | void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
22986 | @@ -366,6 +376,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
22987 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
22988 | s8 v, t; | |
22989 | ||
22990 | + preempt_disable_rt(); | |
22991 | v = __this_cpu_dec_return(*p); | |
22992 | t = __this_cpu_read(pcp->stat_threshold); | |
22993 | if (unlikely(v < - t)) { | |
22994 | @@ -374,6 +385,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
22995 | node_page_state_add(v - overstep, pgdat, item); | |
22996 | __this_cpu_write(*p, overstep); | |
22997 | } | |
22998 | + preempt_enable_rt(); | |
22999 | } | |
23000 | ||
23001 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
23002 | diff --git a/mm/workingset.c b/mm/workingset.c | |
c7c16703 | 23003 | index fb1f9183d89a..7e6ef1a48cd3 100644 |
1a6e0f06 JK |
23004 | --- a/mm/workingset.c |
23005 | +++ b/mm/workingset.c | |
23006 | @@ -334,7 +334,8 @@ void workingset_activation(struct page *page) | |
23007 | * point where they would still be useful. | |
23008 | */ | |
23009 | ||
23010 | -struct list_lru workingset_shadow_nodes; | |
23011 | +struct list_lru __workingset_shadow_nodes; | |
23012 | +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
23013 | ||
23014 | static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
23015 | struct shrink_control *sc) | |
23016 | @@ -344,9 +345,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
23017 | unsigned long pages; | |
23018 | ||
23019 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
23020 | - local_irq_disable(); | |
23021 | - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); | |
23022 | - local_irq_enable(); | |
23023 | + local_lock_irq(workingset_shadow_lock); | |
23024 | + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc); | |
23025 | + local_unlock_irq(workingset_shadow_lock); | |
23026 | ||
c7c16703 | 23027 | if (sc->memcg) { |
1a6e0f06 JK |
23028 | pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, |
23029 | @@ -438,9 +439,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |
23030 | spin_unlock(&mapping->tree_lock); | |
23031 | ret = LRU_REMOVED_RETRY; | |
23032 | out: | |
23033 | - local_irq_enable(); | |
23034 | + local_unlock_irq(workingset_shadow_lock); | |
23035 | cond_resched(); | |
23036 | - local_irq_disable(); | |
23037 | + local_lock_irq(workingset_shadow_lock); | |
23038 | spin_lock(lru_lock); | |
23039 | return ret; | |
23040 | } | |
23041 | @@ -451,10 +452,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, | |
23042 | unsigned long ret; | |
23043 | ||
23044 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
23045 | - local_irq_disable(); | |
23046 | - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc, | |
23047 | + local_lock_irq(workingset_shadow_lock); | |
23048 | + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc, | |
23049 | shadow_lru_isolate, NULL); | |
23050 | - local_irq_enable(); | |
23051 | + local_unlock_irq(workingset_shadow_lock); | |
23052 | return ret; | |
23053 | } | |
23054 | ||
23055 | @@ -492,7 +493,7 @@ static int __init workingset_init(void) | |
23056 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | |
23057 | timestamp_bits, max_order, bucket_order); | |
23058 | ||
23059 | - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | |
23060 | + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key); | |
23061 | if (ret) | |
23062 | goto err; | |
23063 | ret = register_shrinker(&workingset_shadow_shrinker); | |
23064 | @@ -500,7 +501,7 @@ static int __init workingset_init(void) | |
23065 | goto err_list_lru; | |
23066 | return 0; | |
23067 | err_list_lru: | |
23068 | - list_lru_destroy(&workingset_shadow_nodes); | |
23069 | + list_lru_destroy(&__workingset_shadow_nodes); | |
23070 | err: | |
23071 | return ret; | |
23072 | } | |
23073 | diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c | |
23074 | index b0bc023d25c5..5af6426fbcbe 100644 | |
23075 | --- a/mm/zsmalloc.c | |
23076 | +++ b/mm/zsmalloc.c | |
23077 | @@ -53,6 +53,7 @@ | |
23078 | #include <linux/mount.h> | |
23079 | #include <linux/migrate.h> | |
23080 | #include <linux/pagemap.h> | |
23081 | +#include <linux/locallock.h> | |
23082 | ||
23083 | #define ZSPAGE_MAGIC 0x58 | |
23084 | ||
23085 | @@ -70,9 +71,22 @@ | |
23086 | */ | |
23087 | #define ZS_MAX_ZSPAGE_ORDER 2 | |
23088 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) | |
23089 | - | |
23090 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) | |
23091 | ||
23092 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23093 | + | |
23094 | +struct zsmalloc_handle { | |
23095 | + unsigned long addr; | |
23096 | + struct mutex lock; | |
23097 | +}; | |
23098 | + | |
23099 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) | |
23100 | + | |
23101 | +#else | |
23102 | + | |
23103 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) | |
23104 | +#endif | |
23105 | + | |
23106 | /* | |
23107 | * Object location (<PFN>, <obj_idx>) is encoded as | |
23108 | * as single (unsigned long) handle value. | |
23109 | @@ -327,7 +341,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} | |
23110 | ||
23111 | static int create_cache(struct zs_pool *pool) | |
23112 | { | |
23113 | - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, | |
23114 | + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, | |
23115 | 0, 0, NULL); | |
23116 | if (!pool->handle_cachep) | |
23117 | return 1; | |
23118 | @@ -351,10 +365,27 @@ static void destroy_cache(struct zs_pool *pool) | |
23119 | ||
23120 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) | |
23121 | { | |
23122 | - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, | |
23123 | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
23124 | + void *p; | |
23125 | + | |
23126 | + p = kmem_cache_alloc(pool->handle_cachep, | |
23127 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
23128 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23129 | + if (p) { | |
23130 | + struct zsmalloc_handle *zh = p; | |
23131 | + | |
23132 | + mutex_init(&zh->lock); | |
23133 | + } | |
23134 | +#endif | |
23135 | + return (unsigned long)p; | |
23136 | } | |
23137 | ||
23138 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23139 | +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) | |
23140 | +{ | |
23141 | + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); | |
23142 | +} | |
23143 | +#endif | |
23144 | + | |
23145 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) | |
23146 | { | |
23147 | kmem_cache_free(pool->handle_cachep, (void *)handle); | |
23148 | @@ -373,12 +404,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) | |
23149 | ||
23150 | static void record_obj(unsigned long handle, unsigned long obj) | |
23151 | { | |
23152 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23153 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23154 | + | |
23155 | + WRITE_ONCE(zh->addr, obj); | |
23156 | +#else | |
23157 | /* | |
23158 | * lsb of @obj represents handle lock while other bits | |
23159 | * represent object value the handle is pointing so | |
23160 | * updating shouldn't do store tearing. | |
23161 | */ | |
23162 | WRITE_ONCE(*(unsigned long *)handle, obj); | |
23163 | +#endif | |
23164 | } | |
23165 | ||
23166 | /* zpool driver */ | |
23167 | @@ -467,6 +504,7 @@ MODULE_ALIAS("zpool-zsmalloc"); | |
23168 | ||
23169 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ | |
23170 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); | |
23171 | +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); | |
23172 | ||
23173 | static bool is_zspage_isolated(struct zspage *zspage) | |
23174 | { | |
23175 | @@ -902,7 +940,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) | |
23176 | ||
23177 | static unsigned long handle_to_obj(unsigned long handle) | |
23178 | { | |
23179 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23180 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23181 | + | |
23182 | + return zh->addr; | |
23183 | +#else | |
23184 | return *(unsigned long *)handle; | |
23185 | +#endif | |
23186 | } | |
23187 | ||
23188 | static unsigned long obj_to_head(struct page *page, void *obj) | |
23189 | @@ -916,22 +960,46 @@ static unsigned long obj_to_head(struct page *page, void *obj) | |
23190 | ||
23191 | static inline int testpin_tag(unsigned long handle) | |
23192 | { | |
23193 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23194 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23195 | + | |
23196 | + return mutex_is_locked(&zh->lock); | |
23197 | +#else | |
23198 | return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23199 | +#endif | |
23200 | } | |
23201 | ||
23202 | static inline int trypin_tag(unsigned long handle) | |
23203 | { | |
23204 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23205 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23206 | + | |
23207 | + return mutex_trylock(&zh->lock); | |
23208 | +#else | |
23209 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23210 | +#endif | |
23211 | } | |
23212 | ||
23213 | static void pin_tag(unsigned long handle) | |
23214 | { | |
23215 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23216 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23217 | + | |
23218 | + return mutex_lock(&zh->lock); | |
23219 | +#else | |
23220 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23221 | +#endif | |
23222 | } | |
23223 | ||
23224 | static void unpin_tag(unsigned long handle) | |
23225 | { | |
23226 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23227 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23228 | + | |
23229 | + return mutex_unlock(&zh->lock); | |
23230 | +#else | |
23231 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23232 | +#endif | |
23233 | } | |
23234 | ||
23235 | static void reset_page(struct page *page) | |
23236 | @@ -1423,7 +1491,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |
23237 | class = pool->size_class[class_idx]; | |
23238 | off = (class->size * obj_idx) & ~PAGE_MASK; | |
23239 | ||
23240 | - area = &get_cpu_var(zs_map_area); | |
23241 | + area = &get_locked_var(zs_map_area_lock, zs_map_area); | |
23242 | area->vm_mm = mm; | |
23243 | if (off + class->size <= PAGE_SIZE) { | |
23244 | /* this object is contained entirely within a page */ | |
23245 | @@ -1477,7 +1545,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | |
23246 | ||
23247 | __zs_unmap_object(area, pages, off, class->size); | |
23248 | } | |
23249 | - put_cpu_var(zs_map_area); | |
23250 | + put_locked_var(zs_map_area_lock, zs_map_area); | |
23251 | ||
23252 | migrate_read_unlock(zspage); | |
23253 | unpin_tag(handle); | |
23254 | diff --git a/net/core/dev.c b/net/core/dev.c | |
c7c16703 | 23255 | index e1d731fdc72c..6ab4b7863755 100644 |
1a6e0f06 JK |
23256 | --- a/net/core/dev.c |
23257 | +++ b/net/core/dev.c | |
23258 | @@ -190,6 +190,7 @@ static unsigned int napi_gen_id = NR_CPUS; | |
23259 | static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); | |
23260 | ||
23261 | static seqcount_t devnet_rename_seq; | |
23262 | +static DEFINE_MUTEX(devnet_rename_mutex); | |
23263 | ||
23264 | static inline void dev_base_seq_inc(struct net *net) | |
23265 | { | |
23266 | @@ -211,14 +212,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |
23267 | static inline void rps_lock(struct softnet_data *sd) | |
23268 | { | |
23269 | #ifdef CONFIG_RPS | |
23270 | - spin_lock(&sd->input_pkt_queue.lock); | |
23271 | + raw_spin_lock(&sd->input_pkt_queue.raw_lock); | |
23272 | #endif | |
23273 | } | |
23274 | ||
23275 | static inline void rps_unlock(struct softnet_data *sd) | |
23276 | { | |
23277 | #ifdef CONFIG_RPS | |
23278 | - spin_unlock(&sd->input_pkt_queue.lock); | |
23279 | + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); | |
23280 | #endif | |
23281 | } | |
23282 | ||
23283 | @@ -888,7 +889,8 @@ int netdev_get_name(struct net *net, char *name, int ifindex) | |
23284 | strcpy(name, dev->name); | |
23285 | rcu_read_unlock(); | |
23286 | if (read_seqcount_retry(&devnet_rename_seq, seq)) { | |
23287 | - cond_resched(); | |
23288 | + mutex_lock(&devnet_rename_mutex); | |
23289 | + mutex_unlock(&devnet_rename_mutex); | |
23290 | goto retry; | |
23291 | } | |
23292 | ||
23293 | @@ -1157,20 +1159,17 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23294 | if (dev->flags & IFF_UP) | |
23295 | return -EBUSY; | |
23296 | ||
23297 | - write_seqcount_begin(&devnet_rename_seq); | |
23298 | + mutex_lock(&devnet_rename_mutex); | |
23299 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
23300 | ||
23301 | - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | |
23302 | - write_seqcount_end(&devnet_rename_seq); | |
23303 | - return 0; | |
23304 | - } | |
23305 | + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | |
23306 | + goto outunlock; | |
23307 | ||
23308 | memcpy(oldname, dev->name, IFNAMSIZ); | |
23309 | ||
23310 | err = dev_get_valid_name(net, dev, newname); | |
23311 | - if (err < 0) { | |
23312 | - write_seqcount_end(&devnet_rename_seq); | |
23313 | - return err; | |
23314 | - } | |
23315 | + if (err < 0) | |
23316 | + goto outunlock; | |
23317 | ||
23318 | if (oldname[0] && !strchr(oldname, '%')) | |
23319 | netdev_info(dev, "renamed from %s\n", oldname); | |
23320 | @@ -1183,11 +1182,12 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23321 | if (ret) { | |
23322 | memcpy(dev->name, oldname, IFNAMSIZ); | |
23323 | dev->name_assign_type = old_assign_type; | |
23324 | - write_seqcount_end(&devnet_rename_seq); | |
23325 | - return ret; | |
23326 | + err = ret; | |
23327 | + goto outunlock; | |
23328 | } | |
23329 | ||
23330 | - write_seqcount_end(&devnet_rename_seq); | |
23331 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
23332 | + mutex_unlock(&devnet_rename_mutex); | |
23333 | ||
23334 | netdev_adjacent_rename_links(dev, oldname); | |
23335 | ||
23336 | @@ -1208,7 +1208,8 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23337 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | |
23338 | if (err >= 0) { | |
23339 | err = ret; | |
23340 | - write_seqcount_begin(&devnet_rename_seq); | |
23341 | + mutex_lock(&devnet_rename_mutex); | |
23342 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
23343 | memcpy(dev->name, oldname, IFNAMSIZ); | |
23344 | memcpy(oldname, newname, IFNAMSIZ); | |
23345 | dev->name_assign_type = old_assign_type; | |
23346 | @@ -1221,6 +1222,11 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23347 | } | |
23348 | ||
23349 | return err; | |
23350 | + | |
23351 | +outunlock: | |
23352 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
23353 | + mutex_unlock(&devnet_rename_mutex); | |
23354 | + return err; | |
23355 | } | |
23356 | ||
23357 | /** | |
c7c16703 | 23358 | @@ -2263,6 +2269,7 @@ static void __netif_reschedule(struct Qdisc *q) |
1a6e0f06 JK |
23359 | sd->output_queue_tailp = &q->next_sched; |
23360 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
23361 | local_irq_restore(flags); | |
23362 | + preempt_check_resched_rt(); | |
23363 | } | |
23364 | ||
23365 | void __netif_schedule(struct Qdisc *q) | |
c7c16703 | 23366 | @@ -2344,6 +2351,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) |
1a6e0f06 JK |
23367 | __this_cpu_write(softnet_data.completion_queue, skb); |
23368 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
23369 | local_irq_restore(flags); | |
23370 | + preempt_check_resched_rt(); | |
23371 | } | |
23372 | EXPORT_SYMBOL(__dev_kfree_skb_irq); | |
23373 | ||
c7c16703 | 23374 | @@ -3078,7 +3086,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, |
1a6e0f06 JK |
23375 | * This permits qdisc->running owner to get the lock more |
23376 | * often and dequeue packets faster. | |
23377 | */ | |
23378 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23379 | + contended = true; | |
23380 | +#else | |
23381 | contended = qdisc_is_running(q); | |
23382 | +#endif | |
23383 | if (unlikely(contended)) | |
23384 | spin_lock(&q->busylock); | |
23385 | ||
c7c16703 | 23386 | @@ -3141,8 +3153,10 @@ static void skb_update_prio(struct sk_buff *skb) |
1a6e0f06 JK |
23387 | #define skb_update_prio(skb) |
23388 | #endif | |
23389 | ||
23390 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23391 | DEFINE_PER_CPU(int, xmit_recursion); | |
23392 | EXPORT_SYMBOL(xmit_recursion); | |
23393 | +#endif | |
23394 | ||
23395 | /** | |
23396 | * dev_loopback_xmit - loop back @skb | |
c7c16703 | 23397 | @@ -3376,8 +3390,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) |
1a6e0f06 JK |
23398 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
23399 | ||
23400 | if (txq->xmit_lock_owner != cpu) { | |
23401 | - if (unlikely(__this_cpu_read(xmit_recursion) > | |
23402 | - XMIT_RECURSION_LIMIT)) | |
23403 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) | |
23404 | goto recursion_alert; | |
23405 | ||
23406 | skb = validate_xmit_skb(skb, dev); | |
c7c16703 | 23407 | @@ -3387,9 +3400,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) |
1a6e0f06 JK |
23408 | HARD_TX_LOCK(dev, txq, cpu); |
23409 | ||
23410 | if (!netif_xmit_stopped(txq)) { | |
23411 | - __this_cpu_inc(xmit_recursion); | |
23412 | + xmit_rec_inc(); | |
23413 | skb = dev_hard_start_xmit(skb, dev, txq, &rc); | |
23414 | - __this_cpu_dec(xmit_recursion); | |
23415 | + xmit_rec_dec(); | |
23416 | if (dev_xmit_complete(rc)) { | |
23417 | HARD_TX_UNLOCK(dev, txq); | |
23418 | goto out; | |
c7c16703 | 23419 | @@ -3763,6 +3776,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, |
1a6e0f06 JK |
23420 | rps_unlock(sd); |
23421 | ||
23422 | local_irq_restore(flags); | |
23423 | + preempt_check_resched_rt(); | |
23424 | ||
23425 | atomic_long_inc(&skb->dev->rx_dropped); | |
23426 | kfree_skb(skb); | |
c7c16703 | 23427 | @@ -3781,7 +3795,7 @@ static int netif_rx_internal(struct sk_buff *skb) |
1a6e0f06 JK |
23428 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
23429 | int cpu; | |
23430 | ||
23431 | - preempt_disable(); | |
23432 | + migrate_disable(); | |
23433 | rcu_read_lock(); | |
23434 | ||
23435 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | |
c7c16703 | 23436 | @@ -3791,13 +3805,13 @@ static int netif_rx_internal(struct sk_buff *skb) |
1a6e0f06 JK |
23437 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
23438 | ||
23439 | rcu_read_unlock(); | |
23440 | - preempt_enable(); | |
23441 | + migrate_enable(); | |
23442 | } else | |
23443 | #endif | |
23444 | { | |
23445 | unsigned int qtail; | |
23446 | - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | |
23447 | - put_cpu(); | |
23448 | + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); | |
23449 | + put_cpu_light(); | |
23450 | } | |
23451 | return ret; | |
23452 | } | |
c7c16703 | 23453 | @@ -3831,11 +3845,9 @@ int netif_rx_ni(struct sk_buff *skb) |
1a6e0f06 JK |
23454 | |
23455 | trace_netif_rx_ni_entry(skb); | |
23456 | ||
23457 | - preempt_disable(); | |
23458 | + local_bh_disable(); | |
23459 | err = netif_rx_internal(skb); | |
23460 | - if (local_softirq_pending()) | |
23461 | - do_softirq(); | |
23462 | - preempt_enable(); | |
23463 | + local_bh_enable(); | |
23464 | ||
23465 | return err; | |
23466 | } | |
c7c16703 | 23467 | @@ -4314,7 +4326,7 @@ static void flush_backlog(struct work_struct *work) |
1a6e0f06 | 23468 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { |
c7c16703 | 23469 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { |
1a6e0f06 JK |
23470 | __skb_unlink(skb, &sd->input_pkt_queue); |
23471 | - kfree_skb(skb); | |
23472 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
23473 | input_queue_head_incr(sd); | |
23474 | } | |
23475 | } | |
c7c16703 | 23476 | @@ -4324,11 +4336,14 @@ static void flush_backlog(struct work_struct *work) |
1a6e0f06 | 23477 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { |
c7c16703 | 23478 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { |
1a6e0f06 JK |
23479 | __skb_unlink(skb, &sd->process_queue); |
23480 | - kfree_skb(skb); | |
23481 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
23482 | input_queue_head_incr(sd); | |
23483 | } | |
23484 | } | |
1a6e0f06 JK |
23485 | + if (!skb_queue_empty(&sd->tofree_queue)) |
23486 | + raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
c7c16703 JK |
23487 | local_bh_enable(); |
23488 | + | |
1a6e0f06 JK |
23489 | } |
23490 | ||
c7c16703 JK |
23491 | static void flush_all_backlogs(void) |
23492 | @@ -4809,6 +4824,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) | |
1a6e0f06 JK |
23493 | sd->rps_ipi_list = NULL; |
23494 | ||
23495 | local_irq_enable(); | |
23496 | + preempt_check_resched_rt(); | |
23497 | ||
23498 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | |
23499 | while (remsd) { | |
c7c16703 | 23500 | @@ -4822,6 +4838,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) |
1a6e0f06 JK |
23501 | } else |
23502 | #endif | |
23503 | local_irq_enable(); | |
23504 | + preempt_check_resched_rt(); | |
23505 | } | |
23506 | ||
23507 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) | |
c7c16703 JK |
23508 | @@ -4851,7 +4868,9 @@ static int process_backlog(struct napi_struct *napi, int quota) |
23509 | while (again) { | |
23510 | struct sk_buff *skb; | |
23511 | ||
23512 | + local_irq_disable(); | |
23513 | while ((skb = __skb_dequeue(&sd->process_queue))) { | |
23514 | + local_irq_enable(); | |
23515 | rcu_read_lock(); | |
23516 | __netif_receive_skb(skb); | |
23517 | rcu_read_unlock(); | |
23518 | @@ -4859,9 +4878,9 @@ static int process_backlog(struct napi_struct *napi, int quota) | |
23519 | if (++work >= quota) | |
23520 | return work; | |
23521 | ||
23522 | + local_irq_disable(); | |
23523 | } | |
23524 | ||
23525 | - local_irq_disable(); | |
23526 | rps_lock(sd); | |
23527 | if (skb_queue_empty(&sd->input_pkt_queue)) { | |
23528 | /* | |
23529 | @@ -4899,9 +4918,11 @@ void __napi_schedule(struct napi_struct *n) | |
1a6e0f06 JK |
23530 | local_irq_save(flags); |
23531 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
23532 | local_irq_restore(flags); | |
23533 | + preempt_check_resched_rt(); | |
23534 | } | |
23535 | EXPORT_SYMBOL(__napi_schedule); | |
23536 | ||
c7c16703 JK |
23537 | +#ifndef CONFIG_PREEMPT_RT_FULL |
23538 | /** | |
23539 | * __napi_schedule_irqoff - schedule for receive | |
23540 | * @n: entry to schedule | |
23541 | @@ -4913,6 +4934,7 @@ void __napi_schedule_irqoff(struct napi_struct *n) | |
23542 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
23543 | } | |
23544 | EXPORT_SYMBOL(__napi_schedule_irqoff); | |
23545 | +#endif | |
23546 | ||
23547 | void __napi_complete(struct napi_struct *n) | |
23548 | { | |
23549 | @@ -5202,13 +5224,21 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) | |
23550 | struct softnet_data *sd = this_cpu_ptr(&softnet_data); | |
23551 | unsigned long time_limit = jiffies + 2; | |
23552 | int budget = netdev_budget; | |
23553 | + struct sk_buff_head tofree_q; | |
23554 | + struct sk_buff *skb; | |
23555 | LIST_HEAD(list); | |
23556 | LIST_HEAD(repoll); | |
23557 | ||
23558 | + __skb_queue_head_init(&tofree_q); | |
23559 | + | |
23560 | local_irq_disable(); | |
23561 | + skb_queue_splice_init(&sd->tofree_queue, &tofree_q); | |
23562 | list_splice_init(&sd->poll_list, &list); | |
23563 | local_irq_enable(); | |
23564 | ||
23565 | + while ((skb = __skb_dequeue(&tofree_q))) | |
23566 | + kfree_skb(skb); | |
23567 | + | |
23568 | for (;;) { | |
23569 | struct napi_struct *n; | |
23570 | ||
23571 | @@ -5239,7 +5269,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) | |
1a6e0f06 JK |
23572 | list_splice_tail(&repoll, &list); |
23573 | list_splice(&list, &sd->poll_list); | |
23574 | if (!list_empty(&sd->poll_list)) | |
23575 | - __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
23576 | + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); | |
23577 | ||
23578 | net_rps_action_and_irq_enable(sd); | |
23579 | } | |
c7c16703 | 23580 | @@ -8000,16 +8030,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, |
1a6e0f06 JK |
23581 | |
23582 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
23583 | local_irq_enable(); | |
23584 | + preempt_check_resched_rt(); | |
23585 | ||
23586 | /* Process offline CPU's input_pkt_queue */ | |
23587 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { | |
23588 | netif_rx_ni(skb); | |
23589 | input_queue_head_incr(oldsd); | |
23590 | } | |
23591 | - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { | |
23592 | + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | |
23593 | netif_rx_ni(skb); | |
23594 | input_queue_head_incr(oldsd); | |
23595 | } | |
23596 | + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { | |
23597 | + kfree_skb(skb); | |
23598 | + } | |
23599 | ||
23600 | return NOTIFY_OK; | |
23601 | } | |
c7c16703 JK |
23602 | @@ -8314,8 +8348,9 @@ static int __init net_dev_init(void) |
23603 | ||
23604 | INIT_WORK(flush, flush_backlog); | |
1a6e0f06 JK |
23605 | |
23606 | - skb_queue_head_init(&sd->input_pkt_queue); | |
23607 | - skb_queue_head_init(&sd->process_queue); | |
23608 | + skb_queue_head_init_raw(&sd->input_pkt_queue); | |
23609 | + skb_queue_head_init_raw(&sd->process_queue); | |
23610 | + skb_queue_head_init_raw(&sd->tofree_queue); | |
23611 | INIT_LIST_HEAD(&sd->poll_list); | |
23612 | sd->output_queue_tailp = &sd->output_queue; | |
23613 | #ifdef CONFIG_RPS | |
23614 | diff --git a/net/core/filter.c b/net/core/filter.c | |
c7c16703 | 23615 | index b391209838ef..b86e9681a88e 100644 |
1a6e0f06 JK |
23616 | --- a/net/core/filter.c |
23617 | +++ b/net/core/filter.c | |
c7c16703 | 23618 | @@ -1645,7 +1645,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) |
1a6e0f06 JK |
23619 | { |
23620 | int ret; | |
23621 | ||
23622 | - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { | |
23623 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) { | |
23624 | net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); | |
23625 | kfree_skb(skb); | |
23626 | return -ENETDOWN; | |
c7c16703 | 23627 | @@ -1653,9 +1653,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) |
1a6e0f06 JK |
23628 | |
23629 | skb->dev = dev; | |
23630 | ||
23631 | - __this_cpu_inc(xmit_recursion); | |
23632 | + xmit_rec_inc(); | |
23633 | ret = dev_queue_xmit(skb); | |
23634 | - __this_cpu_dec(xmit_recursion); | |
23635 | + xmit_rec_dec(); | |
23636 | ||
23637 | return ret; | |
23638 | } | |
23639 | diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c | |
23640 | index cad8e791f28e..2a9364fe62a5 100644 | |
23641 | --- a/net/core/gen_estimator.c | |
23642 | +++ b/net/core/gen_estimator.c | |
23643 | @@ -84,7 +84,7 @@ struct gen_estimator | |
23644 | struct gnet_stats_basic_packed *bstats; | |
23645 | struct gnet_stats_rate_est64 *rate_est; | |
23646 | spinlock_t *stats_lock; | |
23647 | - seqcount_t *running; | |
23648 | + net_seqlock_t *running; | |
23649 | int ewma_log; | |
23650 | u32 last_packets; | |
23651 | unsigned long avpps; | |
23652 | @@ -213,7 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
23653 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
23654 | struct gnet_stats_rate_est64 *rate_est, | |
23655 | spinlock_t *stats_lock, | |
23656 | - seqcount_t *running, | |
23657 | + net_seqlock_t *running, | |
23658 | struct nlattr *opt) | |
23659 | { | |
23660 | struct gen_estimator *est; | |
23661 | @@ -309,7 +309,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
23662 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
23663 | struct gnet_stats_rate_est64 *rate_est, | |
23664 | spinlock_t *stats_lock, | |
23665 | - seqcount_t *running, struct nlattr *opt) | |
23666 | + net_seqlock_t *running, struct nlattr *opt) | |
23667 | { | |
23668 | gen_kill_estimator(bstats, rate_est); | |
23669 | return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); | |
23670 | diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c | |
23671 | index 508e051304fb..bc3b17b78c94 100644 | |
23672 | --- a/net/core/gen_stats.c | |
23673 | +++ b/net/core/gen_stats.c | |
23674 | @@ -130,7 +130,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, | |
23675 | } | |
23676 | ||
23677 | void | |
23678 | -__gnet_stats_copy_basic(const seqcount_t *running, | |
23679 | +__gnet_stats_copy_basic(net_seqlock_t *running, | |
23680 | struct gnet_stats_basic_packed *bstats, | |
23681 | struct gnet_stats_basic_cpu __percpu *cpu, | |
23682 | struct gnet_stats_basic_packed *b) | |
23683 | @@ -143,10 +143,10 @@ __gnet_stats_copy_basic(const seqcount_t *running, | |
23684 | } | |
23685 | do { | |
23686 | if (running) | |
23687 | - seq = read_seqcount_begin(running); | |
23688 | + seq = net_seq_begin(running); | |
23689 | bstats->bytes = b->bytes; | |
23690 | bstats->packets = b->packets; | |
23691 | - } while (running && read_seqcount_retry(running, seq)); | |
23692 | + } while (running && net_seq_retry(running, seq)); | |
23693 | } | |
23694 | EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
23695 | ||
23696 | @@ -164,7 +164,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
23697 | * if the room in the socket buffer was not sufficient. | |
23698 | */ | |
23699 | int | |
23700 | -gnet_stats_copy_basic(const seqcount_t *running, | |
23701 | +gnet_stats_copy_basic(net_seqlock_t *running, | |
23702 | struct gnet_dump *d, | |
23703 | struct gnet_stats_basic_cpu __percpu *cpu, | |
23704 | struct gnet_stats_basic_packed *b) | |
23705 | diff --git a/net/core/skbuff.c b/net/core/skbuff.c | |
c7c16703 | 23706 | index 1e3e0087245b..1077b39db717 100644 |
1a6e0f06 JK |
23707 | --- a/net/core/skbuff.c |
23708 | +++ b/net/core/skbuff.c | |
23709 | @@ -64,6 +64,7 @@ | |
23710 | #include <linux/errqueue.h> | |
23711 | #include <linux/prefetch.h> | |
23712 | #include <linux/if_vlan.h> | |
23713 | +#include <linux/locallock.h> | |
23714 | ||
23715 | #include <net/protocol.h> | |
23716 | #include <net/dst.h> | |
23717 | @@ -360,6 +361,8 @@ struct napi_alloc_cache { | |
23718 | ||
23719 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); | |
23720 | static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); | |
23721 | +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); | |
23722 | +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); | |
23723 | ||
23724 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
23725 | { | |
23726 | @@ -367,10 +370,10 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
23727 | unsigned long flags; | |
23728 | void *data; | |
23729 | ||
23730 | - local_irq_save(flags); | |
23731 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
23732 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
23733 | data = __alloc_page_frag(nc, fragsz, gfp_mask); | |
23734 | - local_irq_restore(flags); | |
23735 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
23736 | return data; | |
23737 | } | |
23738 | ||
23739 | @@ -389,9 +392,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |
23740 | ||
23741 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
23742 | { | |
23743 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
23744 | + struct napi_alloc_cache *nc; | |
23745 | + void *data; | |
23746 | ||
23747 | - return __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
23748 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23749 | + data = __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
23750 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23751 | + return data; | |
23752 | } | |
23753 | ||
23754 | void *napi_alloc_frag(unsigned int fragsz) | |
23755 | @@ -438,13 +445,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
23756 | if (sk_memalloc_socks()) | |
23757 | gfp_mask |= __GFP_MEMALLOC; | |
23758 | ||
23759 | - local_irq_save(flags); | |
23760 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
23761 | ||
23762 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
23763 | data = __alloc_page_frag(nc, len, gfp_mask); | |
23764 | pfmemalloc = nc->pfmemalloc; | |
23765 | ||
23766 | - local_irq_restore(flags); | |
23767 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
23768 | ||
23769 | if (unlikely(!data)) | |
23770 | return NULL; | |
23771 | @@ -485,9 +492,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); | |
23772 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
23773 | gfp_t gfp_mask) | |
23774 | { | |
23775 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
23776 | + struct napi_alloc_cache *nc; | |
23777 | struct sk_buff *skb; | |
23778 | void *data; | |
23779 | + bool pfmemalloc; | |
23780 | ||
23781 | len += NET_SKB_PAD + NET_IP_ALIGN; | |
23782 | ||
23783 | @@ -505,7 +513,10 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
23784 | if (sk_memalloc_socks()) | |
23785 | gfp_mask |= __GFP_MEMALLOC; | |
23786 | ||
23787 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23788 | data = __alloc_page_frag(&nc->page, len, gfp_mask); | |
23789 | + pfmemalloc = nc->page.pfmemalloc; | |
23790 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23791 | if (unlikely(!data)) | |
23792 | return NULL; | |
23793 | ||
23794 | @@ -516,7 +527,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
23795 | } | |
23796 | ||
23797 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
23798 | - if (nc->page.pfmemalloc) | |
23799 | + if (pfmemalloc) | |
23800 | skb->pfmemalloc = 1; | |
23801 | skb->head_frag = 1; | |
23802 | ||
23803 | @@ -760,23 +771,26 @@ EXPORT_SYMBOL(consume_skb); | |
23804 | ||
23805 | void __kfree_skb_flush(void) | |
23806 | { | |
23807 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
23808 | + struct napi_alloc_cache *nc; | |
23809 | ||
23810 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23811 | /* flush skb_cache if containing objects */ | |
23812 | if (nc->skb_count) { | |
23813 | kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, | |
23814 | nc->skb_cache); | |
23815 | nc->skb_count = 0; | |
23816 | } | |
23817 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23818 | } | |
23819 | ||
23820 | static inline void _kfree_skb_defer(struct sk_buff *skb) | |
23821 | { | |
23822 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
23823 | + struct napi_alloc_cache *nc; | |
23824 | ||
23825 | /* drop skb->head and call any destructors for packet */ | |
23826 | skb_release_all(skb); | |
23827 | ||
23828 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23829 | /* record skb to CPU local list */ | |
23830 | nc->skb_cache[nc->skb_count++] = skb; | |
23831 | ||
23832 | @@ -791,6 +805,7 @@ static inline void _kfree_skb_defer(struct sk_buff *skb) | |
23833 | nc->skb_cache); | |
23834 | nc->skb_count = 0; | |
23835 | } | |
23836 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
23837 | } | |
23838 | void __kfree_skb_defer(struct sk_buff *skb) | |
23839 | { | |
23840 | diff --git a/net/core/sock.c b/net/core/sock.c | |
c7c16703 | 23841 | index bc6543f7de36..2c32ee79620f 100644 |
1a6e0f06 JK |
23842 | --- a/net/core/sock.c |
23843 | +++ b/net/core/sock.c | |
c7c16703 | 23844 | @@ -2488,12 +2488,11 @@ void lock_sock_nested(struct sock *sk, int subclass) |
1a6e0f06 JK |
23845 | if (sk->sk_lock.owned) |
23846 | __lock_sock(sk); | |
23847 | sk->sk_lock.owned = 1; | |
23848 | - spin_unlock(&sk->sk_lock.slock); | |
23849 | + spin_unlock_bh(&sk->sk_lock.slock); | |
23850 | /* | |
23851 | * The sk_lock has mutex_lock() semantics here: | |
23852 | */ | |
23853 | mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); | |
23854 | - local_bh_enable(); | |
23855 | } | |
23856 | EXPORT_SYMBOL(lock_sock_nested); | |
23857 | ||
23858 | diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c | |
c7c16703 | 23859 | index 48734ee6293f..e6864ff11352 100644 |
1a6e0f06 JK |
23860 | --- a/net/ipv4/icmp.c |
23861 | +++ b/net/ipv4/icmp.c | |
23862 | @@ -69,6 +69,7 @@ | |
23863 | #include <linux/jiffies.h> | |
23864 | #include <linux/kernel.h> | |
23865 | #include <linux/fcntl.h> | |
23866 | +#include <linux/sysrq.h> | |
23867 | #include <linux/socket.h> | |
23868 | #include <linux/in.h> | |
23869 | #include <linux/inet.h> | |
23870 | @@ -77,6 +78,7 @@ | |
23871 | #include <linux/string.h> | |
23872 | #include <linux/netfilter_ipv4.h> | |
23873 | #include <linux/slab.h> | |
23874 | +#include <linux/locallock.h> | |
23875 | #include <net/snmp.h> | |
23876 | #include <net/ip.h> | |
23877 | #include <net/route.h> | |
23878 | @@ -204,6 +206,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; | |
23879 | * | |
23880 | * On SMP we have one ICMP socket per-cpu. | |
23881 | */ | |
23882 | +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock); | |
23883 | + | |
23884 | static struct sock *icmp_sk(struct net *net) | |
23885 | { | |
23886 | return *this_cpu_ptr(net->ipv4.icmp_sk); | |
23887 | @@ -215,12 +219,14 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
23888 | ||
23889 | local_bh_disable(); | |
23890 | ||
23891 | + local_lock(icmp_sk_lock); | |
23892 | sk = icmp_sk(net); | |
23893 | ||
23894 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { | |
23895 | /* This can happen if the output path signals a | |
23896 | * dst_link_failure() for an outgoing ICMP packet. | |
23897 | */ | |
23898 | + local_unlock(icmp_sk_lock); | |
23899 | local_bh_enable(); | |
23900 | return NULL; | |
23901 | } | |
23902 | @@ -230,6 +236,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
23903 | static inline void icmp_xmit_unlock(struct sock *sk) | |
23904 | { | |
23905 | spin_unlock_bh(&sk->sk_lock.slock); | |
23906 | + local_unlock(icmp_sk_lock); | |
23907 | } | |
23908 | ||
23909 | int sysctl_icmp_msgs_per_sec __read_mostly = 1000; | |
23910 | @@ -358,6 +365,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
23911 | struct sock *sk; | |
23912 | struct sk_buff *skb; | |
23913 | ||
23914 | + local_lock(icmp_sk_lock); | |
23915 | sk = icmp_sk(dev_net((*rt)->dst.dev)); | |
23916 | if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, | |
23917 | icmp_param->data_len+icmp_param->head_len, | |
23918 | @@ -380,6 +388,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
23919 | skb->ip_summed = CHECKSUM_NONE; | |
23920 | ip_push_pending_frames(sk, fl4); | |
23921 | } | |
23922 | + local_unlock(icmp_sk_lock); | |
23923 | } | |
23924 | ||
23925 | /* | |
23926 | @@ -891,6 +900,30 @@ static bool icmp_redirect(struct sk_buff *skb) | |
23927 | } | |
23928 | ||
23929 | /* | |
23930 | + * 32bit and 64bit have different timestamp length, so we check for | |
23931 | + * the cookie at offset 20 and verify it is repeated at offset 50 | |
23932 | + */ | |
23933 | +#define CO_POS0 20 | |
23934 | +#define CO_POS1 50 | |
23935 | +#define CO_SIZE sizeof(int) | |
23936 | +#define ICMP_SYSRQ_SIZE 57 | |
23937 | + | |
23938 | +/* | |
23939 | + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie | |
23940 | + * pattern and if it matches send the next byte as a trigger to sysrq. | |
23941 | + */ | |
23942 | +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb) | |
23943 | +{ | |
23944 | + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq); | |
23945 | + char *p = skb->data; | |
23946 | + | |
23947 | + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) && | |
23948 | + !memcmp(&cookie, p + CO_POS1, CO_SIZE) && | |
23949 | + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE]) | |
23950 | + handle_sysrq(p[CO_POS0 + CO_SIZE]); | |
23951 | +} | |
23952 | + | |
23953 | +/* | |
23954 | * Handle ICMP_ECHO ("ping") requests. | |
23955 | * | |
23956 | * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo | |
23957 | @@ -917,6 +950,11 @@ static bool icmp_echo(struct sk_buff *skb) | |
23958 | icmp_param.data_len = skb->len; | |
23959 | icmp_param.head_len = sizeof(struct icmphdr); | |
23960 | icmp_reply(&icmp_param, skb); | |
23961 | + | |
23962 | + if (skb->len == ICMP_SYSRQ_SIZE && | |
23963 | + net->ipv4.sysctl_icmp_echo_sysrq) { | |
23964 | + icmp_check_sysrq(net, skb); | |
23965 | + } | |
23966 | } | |
23967 | /* should there be an ICMP stat for ignored echos? */ | |
23968 | return true; | |
23969 | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c | |
c7c16703 | 23970 | index 80bc36b25de2..215b90adfb05 100644 |
1a6e0f06 JK |
23971 | --- a/net/ipv4/sysctl_net_ipv4.c |
23972 | +++ b/net/ipv4/sysctl_net_ipv4.c | |
23973 | @@ -681,6 +681,13 @@ static struct ctl_table ipv4_net_table[] = { | |
23974 | .proc_handler = proc_dointvec | |
23975 | }, | |
23976 | { | |
23977 | + .procname = "icmp_echo_sysrq", | |
23978 | + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq, | |
23979 | + .maxlen = sizeof(int), | |
23980 | + .mode = 0644, | |
23981 | + .proc_handler = proc_dointvec | |
23982 | + }, | |
23983 | + { | |
23984 | .procname = "icmp_ignore_bogus_error_responses", | |
23985 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | |
23986 | .maxlen = sizeof(int), | |
23987 | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | |
c7c16703 | 23988 | index 2259114c7242..829e60985a81 100644 |
1a6e0f06 JK |
23989 | --- a/net/ipv4/tcp_ipv4.c |
23990 | +++ b/net/ipv4/tcp_ipv4.c | |
23991 | @@ -62,6 +62,7 @@ | |
23992 | #include <linux/init.h> | |
23993 | #include <linux/times.h> | |
23994 | #include <linux/slab.h> | |
23995 | +#include <linux/locallock.h> | |
23996 | ||
23997 | #include <net/net_namespace.h> | |
23998 | #include <net/icmp.h> | |
c7c16703 | 23999 | @@ -564,6 +565,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) |
1a6e0f06 JK |
24000 | } |
24001 | EXPORT_SYMBOL(tcp_v4_send_check); | |
24002 | ||
24003 | +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock); | |
24004 | /* | |
24005 | * This routine will send an RST to the other tcp. | |
24006 | * | |
c7c16703 | 24007 | @@ -691,6 +693,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) |
1a6e0f06 JK |
24008 | offsetof(struct inet_timewait_sock, tw_bound_dev_if)); |
24009 | ||
24010 | arg.tos = ip_hdr(skb)->tos; | |
24011 | + | |
24012 | + local_lock(tcp_sk_lock); | |
24013 | local_bh_disable(); | |
24014 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
24015 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
c7c16703 | 24016 | @@ -700,6 +704,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) |
1a6e0f06 JK |
24017 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); |
24018 | __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); | |
24019 | local_bh_enable(); | |
24020 | + local_unlock(tcp_sk_lock); | |
24021 | ||
24022 | #ifdef CONFIG_TCP_MD5SIG | |
24023 | out: | |
c7c16703 | 24024 | @@ -775,6 +780,7 @@ static void tcp_v4_send_ack(struct net *net, |
1a6e0f06 JK |
24025 | if (oif) |
24026 | arg.bound_dev_if = oif; | |
24027 | arg.tos = tos; | |
24028 | + local_lock(tcp_sk_lock); | |
24029 | local_bh_disable(); | |
24030 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
24031 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
c7c16703 | 24032 | @@ -783,6 +789,7 @@ static void tcp_v4_send_ack(struct net *net, |
1a6e0f06 JK |
24033 | |
24034 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | |
24035 | local_bh_enable(); | |
24036 | + local_unlock(tcp_sk_lock); | |
24037 | } | |
24038 | ||
24039 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) | |
24040 | diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c | |
c7c16703 | 24041 | index a47bbc973f2d..c1c1c64589d9 100644 |
1a6e0f06 JK |
24042 | --- a/net/mac80211/rx.c |
24043 | +++ b/net/mac80211/rx.c | |
c7c16703 | 24044 | @@ -4156,7 +4156,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, |
1a6e0f06 JK |
24045 | struct ieee80211_supported_band *sband; |
24046 | struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); | |
24047 | ||
24048 | - WARN_ON_ONCE(softirq_count() == 0); | |
24049 | + WARN_ON_ONCE_NONRT(softirq_count() == 0); | |
24050 | ||
24051 | if (WARN_ON(status->band >= NUM_NL80211_BANDS)) | |
24052 | goto drop; | |
24053 | diff --git a/net/netfilter/core.c b/net/netfilter/core.c | |
c7c16703 | 24054 | index 004af030ef1a..b64f751bda45 100644 |
1a6e0f06 JK |
24055 | --- a/net/netfilter/core.c |
24056 | +++ b/net/netfilter/core.c | |
c7c16703 | 24057 | @@ -22,12 +22,18 @@ |
1a6e0f06 JK |
24058 | #include <linux/proc_fs.h> |
24059 | #include <linux/mutex.h> | |
24060 | #include <linux/slab.h> | |
24061 | +#include <linux/locallock.h> | |
c7c16703 | 24062 | #include <linux/rcupdate.h> |
1a6e0f06 JK |
24063 | #include <net/net_namespace.h> |
24064 | #include <net/sock.h> | |
24065 | ||
24066 | #include "nf_internals.h" | |
24067 | ||
24068 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24069 | +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock); | |
24070 | +EXPORT_PER_CPU_SYMBOL(xt_write_lock); | |
24071 | +#endif | |
24072 | + | |
24073 | static DEFINE_MUTEX(afinfo_mutex); | |
24074 | ||
24075 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
24076 | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c | |
c7c16703 | 24077 | index dd2332390c45..f6a703b25b6c 100644 |
1a6e0f06 JK |
24078 | --- a/net/packet/af_packet.c |
24079 | +++ b/net/packet/af_packet.c | |
24080 | @@ -63,6 +63,7 @@ | |
24081 | #include <linux/if_packet.h> | |
24082 | #include <linux/wireless.h> | |
24083 | #include <linux/kernel.h> | |
24084 | +#include <linux/delay.h> | |
24085 | #include <linux/kmod.h> | |
24086 | #include <linux/slab.h> | |
24087 | #include <linux/vmalloc.h> | |
c7c16703 | 24088 | @@ -694,7 +695,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) |
1a6e0f06 JK |
24089 | if (BLOCK_NUM_PKTS(pbd)) { |
24090 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
24091 | /* Waiting for skb_copy_bits to finish... */ | |
24092 | - cpu_relax(); | |
24093 | + cpu_chill(); | |
24094 | } | |
24095 | } | |
24096 | ||
c7c16703 | 24097 | @@ -956,7 +957,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, |
1a6e0f06 JK |
24098 | if (!(status & TP_STATUS_BLK_TMO)) { |
24099 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
24100 | /* Waiting for skb_copy_bits to finish... */ | |
24101 | - cpu_relax(); | |
24102 | + cpu_chill(); | |
24103 | } | |
24104 | } | |
24105 | prb_close_block(pkc, pbd, po, status); | |
24106 | diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c | |
24107 | index 977f69886c00..f3e7a36b0396 100644 | |
24108 | --- a/net/rds/ib_rdma.c | |
24109 | +++ b/net/rds/ib_rdma.c | |
24110 | @@ -34,6 +34,7 @@ | |
24111 | #include <linux/slab.h> | |
24112 | #include <linux/rculist.h> | |
24113 | #include <linux/llist.h> | |
24114 | +#include <linux/delay.h> | |
24115 | ||
24116 | #include "rds_single_path.h" | |
24117 | #include "ib_mr.h" | |
24118 | @@ -210,7 +211,7 @@ static inline void wait_clean_list_grace(void) | |
24119 | for_each_online_cpu(cpu) { | |
24120 | flag = &per_cpu(clean_list_grace, cpu); | |
24121 | while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) | |
24122 | - cpu_relax(); | |
24123 | + cpu_chill(); | |
24124 | } | |
24125 | } | |
24126 | ||
24127 | diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c | |
c7c16703 | 24128 | index 7d921e56e715..13df56a738e5 100644 |
1a6e0f06 JK |
24129 | --- a/net/rxrpc/security.c |
24130 | +++ b/net/rxrpc/security.c | |
24131 | @@ -19,9 +19,6 @@ | |
24132 | #include <keys/rxrpc-type.h> | |
24133 | #include "ar-internal.h" | |
24134 | ||
24135 | -static LIST_HEAD(rxrpc_security_methods); | |
24136 | -static DECLARE_RWSEM(rxrpc_security_sem); | |
24137 | - | |
24138 | static const struct rxrpc_security *rxrpc_security_types[] = { | |
24139 | [RXRPC_SECURITY_NONE] = &rxrpc_no_security, | |
24140 | #ifdef CONFIG_RXKAD | |
24141 | diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c | |
c7c16703 | 24142 | index 206dc24add3a..00ea9bde5bb3 100644 |
1a6e0f06 JK |
24143 | --- a/net/sched/sch_api.c |
24144 | +++ b/net/sched/sch_api.c | |
c7c16703 | 24145 | @@ -981,7 +981,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, |
1a6e0f06 JK |
24146 | rcu_assign_pointer(sch->stab, stab); |
24147 | } | |
24148 | if (tca[TCA_RATE]) { | |
24149 | - seqcount_t *running; | |
24150 | + net_seqlock_t *running; | |
24151 | ||
24152 | err = -EOPNOTSUPP; | |
24153 | if (sch->flags & TCQ_F_MQROOT) | |
24154 | diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c | |
c7c16703 | 24155 | index 6cfb6e9038c2..20727e1347de 100644 |
1a6e0f06 JK |
24156 | --- a/net/sched/sch_generic.c |
24157 | +++ b/net/sched/sch_generic.c | |
c7c16703 JK |
24158 | @@ -425,7 +425,11 @@ struct Qdisc noop_qdisc = { |
24159 | .ops = &noop_qdisc_ops, | |
1a6e0f06 JK |
24160 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), |
24161 | .dev_queue = &noop_netdev_queue, | |
24162 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24163 | + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running), | |
24164 | +#else | |
24165 | .running = SEQCNT_ZERO(noop_qdisc.running), | |
24166 | +#endif | |
24167 | .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), | |
24168 | }; | |
24169 | EXPORT_SYMBOL(noop_qdisc); | |
c7c16703 | 24170 | @@ -624,9 +628,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
1a6e0f06 JK |
24171 | lockdep_set_class(&sch->busylock, |
24172 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); | |
24173 | ||
24174 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24175 | + seqlock_init(&sch->running); | |
24176 | + lockdep_set_class(&sch->running.seqcount, | |
24177 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
24178 | + lockdep_set_class(&sch->running.lock, | |
24179 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
24180 | +#else | |
24181 | seqcount_init(&sch->running); | |
24182 | lockdep_set_class(&sch->running, | |
24183 | dev->qdisc_running_key ?: &qdisc_running_key); | |
24184 | +#endif | |
24185 | ||
24186 | sch->ops = ops; | |
24187 | sch->enqueue = ops->enqueue; | |
c7c16703 | 24188 | @@ -925,7 +937,7 @@ void dev_deactivate_many(struct list_head *head) |
1a6e0f06 JK |
24189 | /* Wait for outstanding qdisc_run calls. */ |
24190 | list_for_each_entry(dev, head, close_list) | |
24191 | while (some_qdisc_is_busy(dev)) | |
24192 | - yield(); | |
24193 | + msleep(1); | |
24194 | } | |
24195 | ||
24196 | void dev_deactivate(struct net_device *dev) | |
24197 | diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c | |
c7c16703 | 24198 | index 3bc1d61694cb..480141d45f49 100644 |
1a6e0f06 JK |
24199 | --- a/net/sunrpc/svc_xprt.c |
24200 | +++ b/net/sunrpc/svc_xprt.c | |
24201 | @@ -396,7 +396,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
24202 | goto out; | |
24203 | } | |
24204 | ||
24205 | - cpu = get_cpu(); | |
24206 | + cpu = get_cpu_light(); | |
24207 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | |
24208 | ||
24209 | atomic_long_inc(&pool->sp_stats.packets); | |
24210 | @@ -432,7 +432,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
24211 | ||
24212 | atomic_long_inc(&pool->sp_stats.threads_woken); | |
24213 | wake_up_process(rqstp->rq_task); | |
24214 | - put_cpu(); | |
24215 | + put_cpu_light(); | |
24216 | goto out; | |
24217 | } | |
24218 | rcu_read_unlock(); | |
24219 | @@ -453,7 +453,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
24220 | goto redo_search; | |
24221 | } | |
24222 | rqstp = NULL; | |
24223 | - put_cpu(); | |
24224 | + put_cpu_light(); | |
24225 | out: | |
24226 | trace_svc_xprt_do_enqueue(xprt, rqstp); | |
24227 | } | |
24228 | diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h | |
24229 | index 6fdc97ef6023..523e0420d7f0 100755 | |
24230 | --- a/scripts/mkcompile_h | |
24231 | +++ b/scripts/mkcompile_h | |
24232 | @@ -4,7 +4,8 @@ TARGET=$1 | |
24233 | ARCH=$2 | |
24234 | SMP=$3 | |
24235 | PREEMPT=$4 | |
24236 | -CC=$5 | |
24237 | +RT=$5 | |
24238 | +CC=$6 | |
24239 | ||
24240 | vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } | |
24241 | ||
24242 | @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION" | |
24243 | CONFIG_FLAGS="" | |
24244 | if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi | |
24245 | if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi | |
24246 | +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi | |
24247 | UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" | |
24248 | ||
24249 | # Truncate to maximum length | |
24250 | diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c | |
c7c16703 | 24251 | index 9d33c1e85c79..3d307bda86f9 100644 |
1a6e0f06 JK |
24252 | --- a/sound/core/pcm_native.c |
24253 | +++ b/sound/core/pcm_native.c | |
24254 | @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); | |
24255 | void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) | |
24256 | { | |
24257 | if (!substream->pcm->nonatomic) | |
24258 | - local_irq_disable(); | |
24259 | + local_irq_disable_nort(); | |
24260 | snd_pcm_stream_lock(substream); | |
24261 | } | |
24262 | EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); | |
24263 | @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream) | |
24264 | { | |
24265 | snd_pcm_stream_unlock(substream); | |
24266 | if (!substream->pcm->nonatomic) | |
24267 | - local_irq_enable(); | |
24268 | + local_irq_enable_nort(); | |
24269 | } | |
24270 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); | |
24271 | ||
24272 | @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) | |
24273 | { | |
24274 | unsigned long flags = 0; | |
24275 | if (!substream->pcm->nonatomic) | |
24276 | - local_irq_save(flags); | |
24277 | + local_irq_save_nort(flags); | |
24278 | snd_pcm_stream_lock(substream); | |
24279 | return flags; | |
24280 | } | |
24281 | @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, | |
24282 | { | |
24283 | snd_pcm_stream_unlock(substream); | |
24284 | if (!substream->pcm->nonatomic) | |
24285 | - local_irq_restore(flags); | |
24286 | + local_irq_restore_nort(flags); | |
24287 | } | |
24288 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); | |
24289 |