]>
Commit | Line | Data |
---|---|---|
1 | diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt | |
2 | index 3a3b30ac2a75..9e0745cafbd8 100644 | |
3 | --- a/Documentation/sysrq.txt | |
4 | +++ b/Documentation/sysrq.txt | |
5 | @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>, | |
6 | On other - If you know of the key combos for other architectures, please | |
7 | let me know so I can add them to this section. | |
8 | ||
9 | -On all - write a character to /proc/sysrq-trigger. e.g.: | |
10 | - | |
11 | +On all - write a character to /proc/sysrq-trigger, e.g.: | |
12 | echo t > /proc/sysrq-trigger | |
13 | ||
14 | +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g. | |
15 | + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq | |
16 | + Send an ICMP echo request with this pattern plus the particular | |
17 | + SysRq command key. Example: | |
18 | + # ping -c1 -s57 -p0102030468 | |
19 | + will trigger the SysRq-H (help) command. | |
20 | + | |
21 | + | |
22 | * What are the 'command' keys? | |
23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
24 | 'b' - Will immediately reboot the system without syncing or unmounting | |
25 | diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt | |
26 | new file mode 100644 | |
27 | index 000000000000..6f2aeabf7faa | |
28 | --- /dev/null | |
29 | +++ b/Documentation/trace/histograms.txt | |
30 | @@ -0,0 +1,186 @@ | |
31 | + Using the Linux Kernel Latency Histograms | |
32 | + | |
33 | + | |
34 | +This document gives a short explanation how to enable, configure and use | |
35 | +latency histograms. Latency histograms are primarily relevant in the | |
36 | +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT) | |
37 | +and are used in the quality management of the Linux real-time | |
38 | +capabilities. | |
39 | + | |
40 | + | |
41 | +* Purpose of latency histograms | |
42 | + | |
43 | +A latency histogram continuously accumulates the frequencies of latency | |
44 | +data. There are two types of histograms | |
45 | +- potential sources of latencies | |
46 | +- effective latencies | |
47 | + | |
48 | + | |
49 | +* Potential sources of latencies | |
50 | + | |
51 | +Potential sources of latencies are code segments where interrupts, | |
52 | +preemption or both are disabled (aka critical sections). To create | |
53 | +histograms of potential sources of latency, the kernel stores the time | |
54 | +stamp at the start of a critical section, determines the time elapsed | |
55 | +when the end of the section is reached, and increments the frequency | |
56 | +counter of that latency value - irrespective of whether any concurrently | |
57 | +running process is affected by latency or not. | |
58 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
59 | + CONFIG_INTERRUPT_OFF_LATENCY | |
60 | + CONFIG_PREEMPT_OFF_LATENCY | |
61 | + | |
62 | + | |
63 | +* Effective latencies | |
64 | + | |
65 | +Effective latencies are actually occuring during wakeup of a process. To | |
66 | +determine effective latencies, the kernel stores the time stamp when a | |
67 | +process is scheduled to be woken up, and determines the duration of the | |
68 | +wakeup time shortly before control is passed over to this process. Note | |
69 | +that the apparent latency in user space may be somewhat longer, since the | |
70 | +process may be interrupted after control is passed over to it but before | |
71 | +the execution in user space takes place. Simply measuring the interval | |
72 | +between enqueuing and wakeup may also not appropriate in cases when a | |
73 | +process is scheduled as a result of a timer expiration. The timer may have | |
74 | +missed its deadline, e.g. due to disabled interrupts, but this latency | |
75 | +would not be registered. Therefore, the offsets of missed timers are | |
76 | +recorded in a separate histogram. If both wakeup latency and missed timer | |
77 | +offsets are configured and enabled, a third histogram may be enabled that | |
78 | +records the overall latency as a sum of the timer latency, if any, and the | |
79 | +wakeup latency. This histogram is called "timerandwakeup". | |
80 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
81 | + CONFIG_WAKEUP_LATENCY | |
82 | + CONFIG_MISSED_TIMER_OFSETS | |
83 | + | |
84 | + | |
85 | +* Usage | |
86 | + | |
87 | +The interface to the administration of the latency histograms is located | |
88 | +in the debugfs file system. To mount it, either enter | |
89 | + | |
90 | +mount -t sysfs nodev /sys | |
91 | +mount -t debugfs nodev /sys/kernel/debug | |
92 | + | |
93 | +from shell command line level, or add | |
94 | + | |
95 | +nodev /sys sysfs defaults 0 0 | |
96 | +nodev /sys/kernel/debug debugfs defaults 0 0 | |
97 | + | |
98 | +to the file /etc/fstab. All latency histogram related files are then | |
99 | +available in the directory /sys/kernel/debug/tracing/latency_hist. A | |
100 | +particular histogram type is enabled by writing non-zero to the related | |
101 | +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. | |
102 | +Select "preemptirqsoff" for the histograms of potential sources of | |
103 | +latencies and "wakeup" for histograms of effective latencies etc. The | |
104 | +histogram data - one per CPU - are available in the files | |
105 | + | |
106 | +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx | |
107 | +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx | |
108 | +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx | |
109 | +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx | |
110 | +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx | |
111 | +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx | |
112 | +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx | |
113 | + | |
114 | +The histograms are reset by writing non-zero to the file "reset" in a | |
115 | +particular latency directory. To reset all latency data, use | |
116 | + | |
117 | +#!/bin/sh | |
118 | + | |
119 | +TRACINGDIR=/sys/kernel/debug/tracing | |
120 | +HISTDIR=$TRACINGDIR/latency_hist | |
121 | + | |
122 | +if test -d $HISTDIR | |
123 | +then | |
124 | + cd $HISTDIR | |
125 | + for i in `find . | grep /reset$` | |
126 | + do | |
127 | + echo 1 >$i | |
128 | + done | |
129 | +fi | |
130 | + | |
131 | + | |
132 | +* Data format | |
133 | + | |
134 | +Latency data are stored with a resolution of one microsecond. The | |
135 | +maximum latency is 10,240 microseconds. The data are only valid, if the | |
136 | +overflow register is empty. Every output line contains the latency in | |
137 | +microseconds in the first row and the number of samples in the second | |
138 | +row. To display only lines with a positive latency count, use, for | |
139 | +example, | |
140 | + | |
141 | +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 | |
142 | + | |
143 | +#Minimum latency: 0 microseconds. | |
144 | +#Average latency: 0 microseconds. | |
145 | +#Maximum latency: 25 microseconds. | |
146 | +#Total samples: 3104770694 | |
147 | +#There are 0 samples greater or equal than 10240 microseconds | |
148 | +#usecs samples | |
149 | + 0 2984486876 | |
150 | + 1 49843506 | |
151 | + 2 58219047 | |
152 | + 3 5348126 | |
153 | + 4 2187960 | |
154 | + 5 3388262 | |
155 | + 6 959289 | |
156 | + 7 208294 | |
157 | + 8 40420 | |
158 | + 9 4485 | |
159 | + 10 14918 | |
160 | + 11 18340 | |
161 | + 12 25052 | |
162 | + 13 19455 | |
163 | + 14 5602 | |
164 | + 15 969 | |
165 | + 16 47 | |
166 | + 17 18 | |
167 | + 18 14 | |
168 | + 19 1 | |
169 | + 20 3 | |
170 | + 21 2 | |
171 | + 22 5 | |
172 | + 23 2 | |
173 | + 25 1 | |
174 | + | |
175 | + | |
176 | +* Wakeup latency of a selected process | |
177 | + | |
178 | +To only collect wakeup latency data of a particular process, write the | |
179 | +PID of the requested process to | |
180 | + | |
181 | +/sys/kernel/debug/tracing/latency_hist/wakeup/pid | |
182 | + | |
183 | +PIDs are not considered, if this variable is set to 0. | |
184 | + | |
185 | + | |
186 | +* Details of the process with the highest wakeup latency so far | |
187 | + | |
188 | +Selected data of the process that suffered from the highest wakeup | |
189 | +latency that occurred in a particular CPU are available in the file | |
190 | + | |
191 | +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx. | |
192 | + | |
193 | +In addition, other relevant system data at the time when the | |
194 | +latency occurred are given. | |
195 | + | |
196 | +The format of the data is (all in one line): | |
197 | +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \ | |
198 | +<- <PID> <Priority> <Command> <Timestamp> | |
199 | + | |
200 | +The value of <Timeroffset> is only relevant in the combined timer | |
201 | +and wakeup latency recording. In the wakeup recording, it is | |
202 | +always 0, in the missed_timer_offsets recording, it is the same | |
203 | +as <Latency>. | |
204 | + | |
205 | +When retrospectively searching for the origin of a latency and | |
206 | +tracing was not enabled, it may be helpful to know the name and | |
207 | +some basic data of the task that (finally) was switching to the | |
208 | +late real-tlme task. In addition to the victim's data, also the | |
209 | +data of the possible culprit are therefore displayed after the | |
210 | +"<-" symbol. | |
211 | + | |
212 | +Finally, the timestamp of the time when the latency occurred | |
213 | +in <seconds>.<microseconds> after the most recent system boot | |
214 | +is provided. | |
215 | + | |
216 | +These data are also reset when the wakeup histogram is reset. | |
217 | diff --git a/arch/Kconfig b/arch/Kconfig | |
218 | index 659bdd079277..099fc0f5155e 100644 | |
219 | --- a/arch/Kconfig | |
220 | +++ b/arch/Kconfig | |
221 | @@ -9,6 +9,7 @@ config OPROFILE | |
222 | tristate "OProfile system profiling" | |
223 | depends on PROFILING | |
224 | depends on HAVE_OPROFILE | |
225 | + depends on !PREEMPT_RT_FULL | |
226 | select RING_BUFFER | |
227 | select RING_BUFFER_ALLOW_SWAP | |
228 | help | |
229 | @@ -52,6 +53,7 @@ config KPROBES | |
230 | config JUMP_LABEL | |
231 | bool "Optimize very unlikely/likely branches" | |
232 | depends on HAVE_ARCH_JUMP_LABEL | |
233 | + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) | |
234 | help | |
235 | This option enables a transparent branch optimization that | |
236 | makes certain almost-always-true or almost-always-false branch | |
237 | diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig | |
238 | index b5d529fdffab..5715844e83e3 100644 | |
239 | --- a/arch/arm/Kconfig | |
240 | +++ b/arch/arm/Kconfig | |
241 | @@ -36,7 +36,7 @@ config ARM | |
242 | select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) | |
243 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 | |
244 | select HAVE_ARCH_HARDENED_USERCOPY | |
245 | - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU | |
246 | + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE | |
247 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU | |
248 | select HAVE_ARCH_MMAP_RND_BITS if MMU | |
249 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) | |
250 | @@ -75,6 +75,7 @@ config ARM | |
251 | select HAVE_PERF_EVENTS | |
252 | select HAVE_PERF_REGS | |
253 | select HAVE_PERF_USER_STACK_DUMP | |
254 | + select HAVE_PREEMPT_LAZY | |
255 | select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) | |
256 | select HAVE_REGS_AND_STACK_ACCESS_API | |
257 | select HAVE_SYSCALL_TRACEPOINTS | |
258 | diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h | |
259 | index e53638c8ed8a..6095a1649865 100644 | |
260 | --- a/arch/arm/include/asm/irq.h | |
261 | +++ b/arch/arm/include/asm/irq.h | |
262 | @@ -22,6 +22,8 @@ | |
263 | #endif | |
264 | ||
265 | #ifndef __ASSEMBLY__ | |
266 | +#include <linux/cpumask.h> | |
267 | + | |
268 | struct irqaction; | |
269 | struct pt_regs; | |
270 | extern void migrate_irqs(void); | |
271 | diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h | |
272 | index 12ebfcc1d539..c962084605bc 100644 | |
273 | --- a/arch/arm/include/asm/switch_to.h | |
274 | +++ b/arch/arm/include/asm/switch_to.h | |
275 | @@ -3,6 +3,13 @@ | |
276 | ||
277 | #include <linux/thread_info.h> | |
278 | ||
279 | +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM | |
280 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); | |
281 | +#else | |
282 | +static inline void | |
283 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
284 | +#endif | |
285 | + | |
286 | /* | |
287 | * For v7 SMP cores running a preemptible kernel we may be pre-empted | |
288 | * during a TLB maintenance operation, so execute an inner-shareable dsb | |
289 | @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info | |
290 | #define switch_to(prev,next,last) \ | |
291 | do { \ | |
292 | __complete_pending_tlbi(); \ | |
293 | + switch_kmaps(prev, next); \ | |
294 | last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ | |
295 | } while (0) | |
296 | ||
297 | diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h | |
298 | index 776757d1604a..1f36a4eccc72 100644 | |
299 | --- a/arch/arm/include/asm/thread_info.h | |
300 | +++ b/arch/arm/include/asm/thread_info.h | |
301 | @@ -49,6 +49,7 @@ struct cpu_context_save { | |
302 | struct thread_info { | |
303 | unsigned long flags; /* low level flags */ | |
304 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
305 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
306 | mm_segment_t addr_limit; /* address limit */ | |
307 | struct task_struct *task; /* main task structure */ | |
308 | __u32 cpu; /* cpu */ | |
309 | @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
310 | #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ | |
311 | #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ | |
312 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ | |
313 | -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ | |
314 | +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ | |
315 | +#define TIF_NEED_RESCHED_LAZY 7 | |
316 | ||
317 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ | |
318 | #define TIF_USING_IWMMXT 17 | |
319 | @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
320 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | |
321 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
322 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
323 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
324 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
325 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
326 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
327 | @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
328 | * Change these and you break ASM code in entry-common.S | |
329 | */ | |
330 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
331 | - _TIF_NOTIFY_RESUME | _TIF_UPROBE) | |
332 | + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
333 | + _TIF_NEED_RESCHED_LAZY) | |
334 | ||
335 | #endif /* __KERNEL__ */ | |
336 | #endif /* __ASM_ARM_THREAD_INFO_H */ | |
337 | diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c | |
338 | index 608008229c7d..3866da3f7bb7 100644 | |
339 | --- a/arch/arm/kernel/asm-offsets.c | |
340 | +++ b/arch/arm/kernel/asm-offsets.c | |
341 | @@ -65,6 +65,7 @@ int main(void) | |
342 | BLANK(); | |
343 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
344 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
345 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
346 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
347 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
348 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
349 | diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S | |
350 | index 9f157e7c51e7..468e224d76aa 100644 | |
351 | --- a/arch/arm/kernel/entry-armv.S | |
352 | +++ b/arch/arm/kernel/entry-armv.S | |
353 | @@ -220,11 +220,18 @@ ENDPROC(__dabt_svc) | |
354 | ||
355 | #ifdef CONFIG_PREEMPT | |
356 | ldr r8, [tsk, #TI_PREEMPT] @ get preempt count | |
357 | - ldr r0, [tsk, #TI_FLAGS] @ get flags | |
358 | teq r8, #0 @ if preempt count != 0 | |
359 | + bne 1f @ return from exeption | |
360 | + ldr r0, [tsk, #TI_FLAGS] @ get flags | |
361 | + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set | |
362 | + blne svc_preempt @ preempt! | |
363 | + | |
364 | + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
365 | + teq r8, #0 @ if preempt lazy count != 0 | |
366 | movne r0, #0 @ force flags to 0 | |
367 | - tst r0, #_TIF_NEED_RESCHED | |
368 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
369 | blne svc_preempt | |
370 | +1: | |
371 | #endif | |
372 | ||
373 | svc_exit r5, irq = 1 @ return from exception | |
374 | @@ -239,8 +246,14 @@ ENDPROC(__irq_svc) | |
375 | 1: bl preempt_schedule_irq @ irq en/disable is done inside | |
376 | ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS | |
377 | tst r0, #_TIF_NEED_RESCHED | |
378 | + bne 1b | |
379 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
380 | reteq r8 @ go again | |
381 | - b 1b | |
382 | + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
383 | + teq r0, #0 @ if preempt lazy count != 0 | |
384 | + beq 1b | |
385 | + ret r8 @ go again | |
386 | + | |
387 | #endif | |
388 | ||
389 | __und_fault: | |
390 | diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S | |
391 | index 10c3283d6c19..8872937862cc 100644 | |
392 | --- a/arch/arm/kernel/entry-common.S | |
393 | +++ b/arch/arm/kernel/entry-common.S | |
394 | @@ -36,7 +36,9 @@ | |
395 | UNWIND(.cantunwind ) | |
396 | disable_irq_notrace @ disable interrupts | |
397 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
398 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
399 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
400 | + bne fast_work_pending | |
401 | + tst r1, #_TIF_SECCOMP | |
402 | bne fast_work_pending | |
403 | ||
404 | /* perform architecture specific actions before user return */ | |
405 | @@ -62,8 +64,11 @@ ENDPROC(ret_fast_syscall) | |
406 | str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 | |
407 | disable_irq_notrace @ disable interrupts | |
408 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
409 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
410 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
411 | + bne do_slower_path | |
412 | + tst r1, #_TIF_SECCOMP | |
413 | beq no_work_pending | |
414 | +do_slower_path: | |
415 | UNWIND(.fnend ) | |
416 | ENDPROC(ret_fast_syscall) | |
417 | ||
418 | diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c | |
419 | index 69bda1a5707e..1f665acaa6a9 100644 | |
420 | --- a/arch/arm/kernel/patch.c | |
421 | +++ b/arch/arm/kernel/patch.c | |
422 | @@ -15,7 +15,7 @@ struct patch { | |
423 | unsigned int insn; | |
424 | }; | |
425 | ||
426 | -static DEFINE_SPINLOCK(patch_lock); | |
427 | +static DEFINE_RAW_SPINLOCK(patch_lock); | |
428 | ||
429 | static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) | |
430 | __acquires(&patch_lock) | |
431 | @@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) | |
432 | return addr; | |
433 | ||
434 | if (flags) | |
435 | - spin_lock_irqsave(&patch_lock, *flags); | |
436 | + raw_spin_lock_irqsave(&patch_lock, *flags); | |
437 | else | |
438 | __acquire(&patch_lock); | |
439 | ||
440 | @@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) | |
441 | clear_fixmap(fixmap); | |
442 | ||
443 | if (flags) | |
444 | - spin_unlock_irqrestore(&patch_lock, *flags); | |
445 | + raw_spin_unlock_irqrestore(&patch_lock, *flags); | |
446 | else | |
447 | __release(&patch_lock); | |
448 | } | |
449 | diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c | |
450 | index 91d2d5b01414..750550098b59 100644 | |
451 | --- a/arch/arm/kernel/process.c | |
452 | +++ b/arch/arm/kernel/process.c | |
453 | @@ -322,6 +322,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) | |
454 | } | |
455 | ||
456 | #ifdef CONFIG_MMU | |
457 | +/* | |
458 | + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not | |
459 | + * initialized by pgtable_page_ctor() then a coredump of the vector page will | |
460 | + * fail. | |
461 | + */ | |
462 | +static int __init vectors_user_mapping_init_page(void) | |
463 | +{ | |
464 | + struct page *page; | |
465 | + unsigned long addr = 0xffff0000; | |
466 | + pgd_t *pgd; | |
467 | + pud_t *pud; | |
468 | + pmd_t *pmd; | |
469 | + | |
470 | + pgd = pgd_offset_k(addr); | |
471 | + pud = pud_offset(pgd, addr); | |
472 | + pmd = pmd_offset(pud, addr); | |
473 | + page = pmd_page(*(pmd)); | |
474 | + | |
475 | + pgtable_page_ctor(page); | |
476 | + | |
477 | + return 0; | |
478 | +} | |
479 | +late_initcall(vectors_user_mapping_init_page); | |
480 | + | |
481 | #ifdef CONFIG_KUSER_HELPERS | |
482 | /* | |
483 | * The vectors page is always readable from user space for the | |
484 | diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c | |
485 | index 7b8f2141427b..96541e00b74a 100644 | |
486 | --- a/arch/arm/kernel/signal.c | |
487 | +++ b/arch/arm/kernel/signal.c | |
488 | @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) | |
489 | */ | |
490 | trace_hardirqs_off(); | |
491 | do { | |
492 | - if (likely(thread_flags & _TIF_NEED_RESCHED)) { | |
493 | + if (likely(thread_flags & (_TIF_NEED_RESCHED | | |
494 | + _TIF_NEED_RESCHED_LAZY))) { | |
495 | schedule(); | |
496 | } else { | |
497 | if (unlikely(!user_mode(regs))) | |
498 | diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c | |
499 | index 7dd14e8395e6..4cd7e3d98035 100644 | |
500 | --- a/arch/arm/kernel/smp.c | |
501 | +++ b/arch/arm/kernel/smp.c | |
502 | @@ -234,8 +234,6 @@ int __cpu_disable(void) | |
503 | flush_cache_louis(); | |
504 | local_flush_tlb_all(); | |
505 | ||
506 | - clear_tasks_mm_cpumask(cpu); | |
507 | - | |
508 | return 0; | |
509 | } | |
510 | ||
511 | @@ -251,6 +249,9 @@ void __cpu_die(unsigned int cpu) | |
512 | pr_err("CPU%u: cpu didn't die\n", cpu); | |
513 | return; | |
514 | } | |
515 | + | |
516 | + clear_tasks_mm_cpumask(cpu); | |
517 | + | |
518 | pr_notice("CPU%u: shutdown\n", cpu); | |
519 | ||
520 | /* | |
521 | diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c | |
522 | index 0bee233fef9a..314cfb232a63 100644 | |
523 | --- a/arch/arm/kernel/unwind.c | |
524 | +++ b/arch/arm/kernel/unwind.c | |
525 | @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; | |
526 | static const struct unwind_idx *__origin_unwind_idx; | |
527 | extern const struct unwind_idx __stop_unwind_idx[]; | |
528 | ||
529 | -static DEFINE_SPINLOCK(unwind_lock); | |
530 | +static DEFINE_RAW_SPINLOCK(unwind_lock); | |
531 | static LIST_HEAD(unwind_tables); | |
532 | ||
533 | /* Convert a prel31 symbol to an absolute address */ | |
534 | @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
535 | /* module unwind tables */ | |
536 | struct unwind_table *table; | |
537 | ||
538 | - spin_lock_irqsave(&unwind_lock, flags); | |
539 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
540 | list_for_each_entry(table, &unwind_tables, list) { | |
541 | if (addr >= table->begin_addr && | |
542 | addr < table->end_addr) { | |
543 | @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
544 | break; | |
545 | } | |
546 | } | |
547 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
548 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
549 | } | |
550 | ||
551 | pr_debug("%s: idx = %p\n", __func__, idx); | |
552 | @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, | |
553 | tab->begin_addr = text_addr; | |
554 | tab->end_addr = text_addr + text_size; | |
555 | ||
556 | - spin_lock_irqsave(&unwind_lock, flags); | |
557 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
558 | list_add_tail(&tab->list, &unwind_tables); | |
559 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
560 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
561 | ||
562 | return tab; | |
563 | } | |
564 | @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) | |
565 | if (!tab) | |
566 | return; | |
567 | ||
568 | - spin_lock_irqsave(&unwind_lock, flags); | |
569 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
570 | list_del(&tab->list); | |
571 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
572 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
573 | ||
574 | kfree(tab); | |
575 | } | |
576 | diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c | |
577 | index 19b5f5c1c0ff..82aa639e6737 100644 | |
578 | --- a/arch/arm/kvm/arm.c | |
579 | +++ b/arch/arm/kvm/arm.c | |
580 | @@ -619,7 +619,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
581 | * involves poking the GIC, which must be done in a | |
582 | * non-preemptible context. | |
583 | */ | |
584 | - preempt_disable(); | |
585 | + migrate_disable(); | |
586 | kvm_pmu_flush_hwstate(vcpu); | |
587 | kvm_timer_flush_hwstate(vcpu); | |
588 | kvm_vgic_flush_hwstate(vcpu); | |
589 | @@ -640,7 +640,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
590 | kvm_pmu_sync_hwstate(vcpu); | |
591 | kvm_timer_sync_hwstate(vcpu); | |
592 | kvm_vgic_sync_hwstate(vcpu); | |
593 | - preempt_enable(); | |
594 | + migrate_enable(); | |
595 | continue; | |
596 | } | |
597 | ||
598 | @@ -696,7 +696,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
599 | ||
600 | kvm_vgic_sync_hwstate(vcpu); | |
601 | ||
602 | - preempt_enable(); | |
603 | + migrate_enable(); | |
604 | ||
605 | ret = handle_exit(vcpu, run, ret); | |
606 | } | |
607 | diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c | |
608 | index 98ffe1e62ad5..df9769ddece5 100644 | |
609 | --- a/arch/arm/mach-exynos/platsmp.c | |
610 | +++ b/arch/arm/mach-exynos/platsmp.c | |
611 | @@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void) | |
612 | return (void __iomem *)(S5P_VA_SCU); | |
613 | } | |
614 | ||
615 | -static DEFINE_SPINLOCK(boot_lock); | |
616 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
617 | ||
618 | static void exynos_secondary_init(unsigned int cpu) | |
619 | { | |
620 | @@ -242,8 +242,8 @@ static void exynos_secondary_init(unsigned int cpu) | |
621 | /* | |
622 | * Synchronise with the boot thread. | |
623 | */ | |
624 | - spin_lock(&boot_lock); | |
625 | - spin_unlock(&boot_lock); | |
626 | + raw_spin_lock(&boot_lock); | |
627 | + raw_spin_unlock(&boot_lock); | |
628 | } | |
629 | ||
630 | int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) | |
631 | @@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
632 | * Set synchronisation state between this boot processor | |
633 | * and the secondary one | |
634 | */ | |
635 | - spin_lock(&boot_lock); | |
636 | + raw_spin_lock(&boot_lock); | |
637 | ||
638 | /* | |
639 | * The secondary processor is waiting to be released from | |
640 | @@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
641 | ||
642 | if (timeout == 0) { | |
643 | printk(KERN_ERR "cpu1 power enable failed"); | |
644 | - spin_unlock(&boot_lock); | |
645 | + raw_spin_unlock(&boot_lock); | |
646 | return -ETIMEDOUT; | |
647 | } | |
648 | } | |
649 | @@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
650 | * calibrations, then wait for it to finish | |
651 | */ | |
652 | fail: | |
653 | - spin_unlock(&boot_lock); | |
654 | + raw_spin_unlock(&boot_lock); | |
655 | ||
656 | return pen_release != -1 ? ret : 0; | |
657 | } | |
658 | diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c | |
659 | index 4b653a8cb75c..b03d5a922cb1 100644 | |
660 | --- a/arch/arm/mach-hisi/platmcpm.c | |
661 | +++ b/arch/arm/mach-hisi/platmcpm.c | |
662 | @@ -61,7 +61,7 @@ | |
663 | ||
664 | static void __iomem *sysctrl, *fabric; | |
665 | static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; | |
666 | -static DEFINE_SPINLOCK(boot_lock); | |
667 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
668 | static u32 fabric_phys_addr; | |
669 | /* | |
670 | * [0]: bootwrapper physical address | |
671 | @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
672 | if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) | |
673 | return -EINVAL; | |
674 | ||
675 | - spin_lock_irq(&boot_lock); | |
676 | + raw_spin_lock_irq(&boot_lock); | |
677 | ||
678 | if (hip04_cpu_table[cluster][cpu]) | |
679 | goto out; | |
680 | @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
681 | ||
682 | out: | |
683 | hip04_cpu_table[cluster][cpu]++; | |
684 | - spin_unlock_irq(&boot_lock); | |
685 | + raw_spin_unlock_irq(&boot_lock); | |
686 | ||
687 | return 0; | |
688 | } | |
689 | @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
690 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); | |
691 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | |
692 | ||
693 | - spin_lock(&boot_lock); | |
694 | + raw_spin_lock(&boot_lock); | |
695 | hip04_cpu_table[cluster][cpu]--; | |
696 | if (hip04_cpu_table[cluster][cpu] == 1) { | |
697 | /* A power_up request went ahead of us. */ | |
698 | - spin_unlock(&boot_lock); | |
699 | + raw_spin_unlock(&boot_lock); | |
700 | return; | |
701 | } else if (hip04_cpu_table[cluster][cpu] > 1) { | |
702 | pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); | |
703 | @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
704 | } | |
705 | ||
706 | last_man = hip04_cluster_is_down(cluster); | |
707 | - spin_unlock(&boot_lock); | |
708 | + raw_spin_unlock(&boot_lock); | |
709 | if (last_man) { | |
710 | /* Since it's Cortex A15, disable L2 prefetching. */ | |
711 | asm volatile( | |
712 | @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
713 | cpu >= HIP04_MAX_CPUS_PER_CLUSTER); | |
714 | ||
715 | count = TIMEOUT_MSEC / POLL_MSEC; | |
716 | - spin_lock_irq(&boot_lock); | |
717 | + raw_spin_lock_irq(&boot_lock); | |
718 | for (tries = 0; tries < count; tries++) { | |
719 | if (hip04_cpu_table[cluster][cpu]) | |
720 | goto err; | |
721 | @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
722 | data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); | |
723 | if (data & CORE_WFI_STATUS(cpu)) | |
724 | break; | |
725 | - spin_unlock_irq(&boot_lock); | |
726 | + raw_spin_unlock_irq(&boot_lock); | |
727 | /* Wait for clean L2 when the whole cluster is down. */ | |
728 | msleep(POLL_MSEC); | |
729 | - spin_lock_irq(&boot_lock); | |
730 | + raw_spin_lock_irq(&boot_lock); | |
731 | } | |
732 | if (tries >= count) | |
733 | goto err; | |
734 | @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
735 | goto err; | |
736 | if (hip04_cluster_is_down(cluster)) | |
737 | hip04_set_snoop_filter(cluster, 0); | |
738 | - spin_unlock_irq(&boot_lock); | |
739 | + raw_spin_unlock_irq(&boot_lock); | |
740 | return 1; | |
741 | err: | |
742 | - spin_unlock_irq(&boot_lock); | |
743 | + raw_spin_unlock_irq(&boot_lock); | |
744 | return 0; | |
745 | } | |
746 | #endif | |
747 | diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c | |
748 | index b4de3da6dffa..b52893319d75 100644 | |
749 | --- a/arch/arm/mach-omap2/omap-smp.c | |
750 | +++ b/arch/arm/mach-omap2/omap-smp.c | |
751 | @@ -64,7 +64,7 @@ static const struct omap_smp_config omap5_cfg __initconst = { | |
752 | .startup_addr = omap5_secondary_startup, | |
753 | }; | |
754 | ||
755 | -static DEFINE_SPINLOCK(boot_lock); | |
756 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
757 | ||
758 | void __iomem *omap4_get_scu_base(void) | |
759 | { | |
760 | @@ -131,8 +131,8 @@ static void omap4_secondary_init(unsigned int cpu) | |
761 | /* | |
762 | * Synchronise with the boot thread. | |
763 | */ | |
764 | - spin_lock(&boot_lock); | |
765 | - spin_unlock(&boot_lock); | |
766 | + raw_spin_lock(&boot_lock); | |
767 | + raw_spin_unlock(&boot_lock); | |
768 | } | |
769 | ||
770 | static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
771 | @@ -146,7 +146,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
772 | * Set synchronisation state between this boot processor | |
773 | * and the secondary one | |
774 | */ | |
775 | - spin_lock(&boot_lock); | |
776 | + raw_spin_lock(&boot_lock); | |
777 | ||
778 | /* | |
779 | * Update the AuxCoreBoot0 with boot state for secondary core. | |
780 | @@ -223,7 +223,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
781 | * Now the secondary core is starting up let it run its | |
782 | * calibrations, then wait for it to finish | |
783 | */ | |
784 | - spin_unlock(&boot_lock); | |
785 | + raw_spin_unlock(&boot_lock); | |
786 | ||
787 | return 0; | |
788 | } | |
789 | diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c | |
790 | index 0875b99add18..18b6d98d2581 100644 | |
791 | --- a/arch/arm/mach-prima2/platsmp.c | |
792 | +++ b/arch/arm/mach-prima2/platsmp.c | |
793 | @@ -22,7 +22,7 @@ | |
794 | ||
795 | static void __iomem *clk_base; | |
796 | ||
797 | -static DEFINE_SPINLOCK(boot_lock); | |
798 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
799 | ||
800 | static void sirfsoc_secondary_init(unsigned int cpu) | |
801 | { | |
802 | @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) | |
803 | /* | |
804 | * Synchronise with the boot thread. | |
805 | */ | |
806 | - spin_lock(&boot_lock); | |
807 | - spin_unlock(&boot_lock); | |
808 | + raw_spin_lock(&boot_lock); | |
809 | + raw_spin_unlock(&boot_lock); | |
810 | } | |
811 | ||
812 | static const struct of_device_id clk_ids[] = { | |
813 | @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
814 | /* make sure write buffer is drained */ | |
815 | mb(); | |
816 | ||
817 | - spin_lock(&boot_lock); | |
818 | + raw_spin_lock(&boot_lock); | |
819 | ||
820 | /* | |
821 | * The secondary processor is waiting to be released from | |
822 | @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
823 | * now the secondary core is starting up let it run its | |
824 | * calibrations, then wait for it to finish | |
825 | */ | |
826 | - spin_unlock(&boot_lock); | |
827 | + raw_spin_unlock(&boot_lock); | |
828 | ||
829 | return pen_release != -1 ? -ENOSYS : 0; | |
830 | } | |
831 | diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c | |
832 | index 5494c9e0c909..e8ce157d3548 100644 | |
833 | --- a/arch/arm/mach-qcom/platsmp.c | |
834 | +++ b/arch/arm/mach-qcom/platsmp.c | |
835 | @@ -46,7 +46,7 @@ | |
836 | ||
837 | extern void secondary_startup_arm(void); | |
838 | ||
839 | -static DEFINE_SPINLOCK(boot_lock); | |
840 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
841 | ||
842 | #ifdef CONFIG_HOTPLUG_CPU | |
843 | static void qcom_cpu_die(unsigned int cpu) | |
844 | @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) | |
845 | /* | |
846 | * Synchronise with the boot thread. | |
847 | */ | |
848 | - spin_lock(&boot_lock); | |
849 | - spin_unlock(&boot_lock); | |
850 | + raw_spin_lock(&boot_lock); | |
851 | + raw_spin_unlock(&boot_lock); | |
852 | } | |
853 | ||
854 | static int scss_release_secondary(unsigned int cpu) | |
855 | @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
856 | * set synchronisation state between this boot processor | |
857 | * and the secondary one | |
858 | */ | |
859 | - spin_lock(&boot_lock); | |
860 | + raw_spin_lock(&boot_lock); | |
861 | ||
862 | /* | |
863 | * Send the secondary CPU a soft interrupt, thereby causing | |
864 | @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
865 | * now the secondary core is starting up let it run its | |
866 | * calibrations, then wait for it to finish | |
867 | */ | |
868 | - spin_unlock(&boot_lock); | |
869 | + raw_spin_unlock(&boot_lock); | |
870 | ||
871 | return ret; | |
872 | } | |
873 | diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c | |
874 | index 8d1e2d551786..7fa56cc78118 100644 | |
875 | --- a/arch/arm/mach-spear/platsmp.c | |
876 | +++ b/arch/arm/mach-spear/platsmp.c | |
877 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
878 | sync_cache_w(&pen_release); | |
879 | } | |
880 | ||
881 | -static DEFINE_SPINLOCK(boot_lock); | |
882 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
883 | ||
884 | static void __iomem *scu_base = IOMEM(VA_SCU_BASE); | |
885 | ||
886 | @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) | |
887 | /* | |
888 | * Synchronise with the boot thread. | |
889 | */ | |
890 | - spin_lock(&boot_lock); | |
891 | - spin_unlock(&boot_lock); | |
892 | + raw_spin_lock(&boot_lock); | |
893 | + raw_spin_unlock(&boot_lock); | |
894 | } | |
895 | ||
896 | static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
897 | @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
898 | * set synchronisation state between this boot processor | |
899 | * and the secondary one | |
900 | */ | |
901 | - spin_lock(&boot_lock); | |
902 | + raw_spin_lock(&boot_lock); | |
903 | ||
904 | /* | |
905 | * The secondary processor is waiting to be released from | |
906 | @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
907 | * now the secondary core is starting up let it run its | |
908 | * calibrations, then wait for it to finish | |
909 | */ | |
910 | - spin_unlock(&boot_lock); | |
911 | + raw_spin_unlock(&boot_lock); | |
912 | ||
913 | return pen_release != -1 ? -ENOSYS : 0; | |
914 | } | |
915 | diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c | |
916 | index ea5a2277ee46..b988e081ac79 100644 | |
917 | --- a/arch/arm/mach-sti/platsmp.c | |
918 | +++ b/arch/arm/mach-sti/platsmp.c | |
919 | @@ -35,7 +35,7 @@ static void write_pen_release(int val) | |
920 | sync_cache_w(&pen_release); | |
921 | } | |
922 | ||
923 | -static DEFINE_SPINLOCK(boot_lock); | |
924 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
925 | ||
926 | static void sti_secondary_init(unsigned int cpu) | |
927 | { | |
928 | @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu) | |
929 | /* | |
930 | * Synchronise with the boot thread. | |
931 | */ | |
932 | - spin_lock(&boot_lock); | |
933 | - spin_unlock(&boot_lock); | |
934 | + raw_spin_lock(&boot_lock); | |
935 | + raw_spin_unlock(&boot_lock); | |
936 | } | |
937 | ||
938 | static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
939 | @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
940 | * set synchronisation state between this boot processor | |
941 | * and the secondary one | |
942 | */ | |
943 | - spin_lock(&boot_lock); | |
944 | + raw_spin_lock(&boot_lock); | |
945 | ||
946 | /* | |
947 | * The secondary processor is waiting to be released from | |
948 | @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
949 | * now the secondary core is starting up let it run its | |
950 | * calibrations, then wait for it to finish | |
951 | */ | |
952 | - spin_unlock(&boot_lock); | |
953 | + raw_spin_unlock(&boot_lock); | |
954 | ||
955 | return pen_release != -1 ? -ENOSYS : 0; | |
956 | } | |
957 | diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c | |
958 | index 3a2e678b8d30..3ed1e9ba6a01 100644 | |
959 | --- a/arch/arm/mm/fault.c | |
960 | +++ b/arch/arm/mm/fault.c | |
961 | @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
962 | if (addr < TASK_SIZE) | |
963 | return do_page_fault(addr, fsr, regs); | |
964 | ||
965 | + if (interrupts_enabled(regs)) | |
966 | + local_irq_enable(); | |
967 | + | |
968 | if (user_mode(regs)) | |
969 | goto bad_area; | |
970 | ||
971 | @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
972 | static int | |
973 | do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |
974 | { | |
975 | + if (interrupts_enabled(regs)) | |
976 | + local_irq_enable(); | |
977 | + | |
978 | do_bad_area(addr, fsr, regs); | |
979 | return 0; | |
980 | } | |
981 | diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c | |
982 | index d02f8187b1cc..542692dbd40a 100644 | |
983 | --- a/arch/arm/mm/highmem.c | |
984 | +++ b/arch/arm/mm/highmem.c | |
985 | @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) | |
986 | return *ptep; | |
987 | } | |
988 | ||
989 | +static unsigned int fixmap_idx(int type) | |
990 | +{ | |
991 | + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
992 | +} | |
993 | + | |
994 | void *kmap(struct page *page) | |
995 | { | |
996 | might_sleep(); | |
997 | @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap); | |
998 | ||
999 | void *kmap_atomic(struct page *page) | |
1000 | { | |
1001 | + pte_t pte = mk_pte(page, kmap_prot); | |
1002 | unsigned int idx; | |
1003 | unsigned long vaddr; | |
1004 | void *kmap; | |
1005 | int type; | |
1006 | ||
1007 | - preempt_disable(); | |
1008 | + preempt_disable_nort(); | |
1009 | pagefault_disable(); | |
1010 | if (!PageHighMem(page)) | |
1011 | return page_address(page); | |
1012 | @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page) | |
1013 | ||
1014 | type = kmap_atomic_idx_push(); | |
1015 | ||
1016 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1017 | + idx = fixmap_idx(type); | |
1018 | vaddr = __fix_to_virt(idx); | |
1019 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1020 | /* | |
1021 | @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page) | |
1022 | * in place, so the contained TLB flush ensures the TLB is updated | |
1023 | * with the new mapping. | |
1024 | */ | |
1025 | - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); | |
1026 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1027 | + current->kmap_pte[type] = pte; | |
1028 | +#endif | |
1029 | + set_fixmap_pte(idx, pte); | |
1030 | ||
1031 | return (void *)vaddr; | |
1032 | } | |
1033 | @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr) | |
1034 | ||
1035 | if (kvaddr >= (void *)FIXADDR_START) { | |
1036 | type = kmap_atomic_idx(); | |
1037 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1038 | + idx = fixmap_idx(type); | |
1039 | ||
1040 | if (cache_is_vivt()) | |
1041 | __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); | |
1042 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1043 | + current->kmap_pte[type] = __pte(0); | |
1044 | +#endif | |
1045 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1046 | BUG_ON(vaddr != __fix_to_virt(idx)); | |
1047 | - set_fixmap_pte(idx, __pte(0)); | |
1048 | #else | |
1049 | (void) idx; /* to kill a warning */ | |
1050 | #endif | |
1051 | + set_fixmap_pte(idx, __pte(0)); | |
1052 | kmap_atomic_idx_pop(); | |
1053 | } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { | |
1054 | /* this address was obtained through kmap_high_get() */ | |
1055 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); | |
1056 | } | |
1057 | pagefault_enable(); | |
1058 | - preempt_enable(); | |
1059 | + preempt_enable_nort(); | |
1060 | } | |
1061 | EXPORT_SYMBOL(__kunmap_atomic); | |
1062 | ||
1063 | void *kmap_atomic_pfn(unsigned long pfn) | |
1064 | { | |
1065 | + pte_t pte = pfn_pte(pfn, kmap_prot); | |
1066 | unsigned long vaddr; | |
1067 | int idx, type; | |
1068 | struct page *page = pfn_to_page(pfn); | |
1069 | ||
1070 | - preempt_disable(); | |
1071 | + preempt_disable_nort(); | |
1072 | pagefault_disable(); | |
1073 | if (!PageHighMem(page)) | |
1074 | return page_address(page); | |
1075 | ||
1076 | type = kmap_atomic_idx_push(); | |
1077 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1078 | + idx = fixmap_idx(type); | |
1079 | vaddr = __fix_to_virt(idx); | |
1080 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1081 | BUG_ON(!pte_none(get_fixmap_pte(vaddr))); | |
1082 | #endif | |
1083 | - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); | |
1084 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1085 | + current->kmap_pte[type] = pte; | |
1086 | +#endif | |
1087 | + set_fixmap_pte(idx, pte); | |
1088 | ||
1089 | return (void *)vaddr; | |
1090 | } | |
1091 | +#if defined CONFIG_PREEMPT_RT_FULL | |
1092 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
1093 | +{ | |
1094 | + int i; | |
1095 | + | |
1096 | + /* | |
1097 | + * Clear @prev's kmap_atomic mappings | |
1098 | + */ | |
1099 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
1100 | + int idx = fixmap_idx(i); | |
1101 | + | |
1102 | + set_fixmap_pte(idx, __pte(0)); | |
1103 | + } | |
1104 | + /* | |
1105 | + * Restore @next_p's kmap_atomic mappings | |
1106 | + */ | |
1107 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
1108 | + int idx = fixmap_idx(i); | |
1109 | + | |
1110 | + if (!pte_none(next_p->kmap_pte[i])) | |
1111 | + set_fixmap_pte(idx, next_p->kmap_pte[i]); | |
1112 | + } | |
1113 | +} | |
1114 | +#endif | |
1115 | diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c | |
1116 | index c2366510187a..6b60f582b738 100644 | |
1117 | --- a/arch/arm/plat-versatile/platsmp.c | |
1118 | +++ b/arch/arm/plat-versatile/platsmp.c | |
1119 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
1120 | sync_cache_w(&pen_release); | |
1121 | } | |
1122 | ||
1123 | -static DEFINE_SPINLOCK(boot_lock); | |
1124 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1125 | ||
1126 | void versatile_secondary_init(unsigned int cpu) | |
1127 | { | |
1128 | @@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu) | |
1129 | /* | |
1130 | * Synchronise with the boot thread. | |
1131 | */ | |
1132 | - spin_lock(&boot_lock); | |
1133 | - spin_unlock(&boot_lock); | |
1134 | + raw_spin_lock(&boot_lock); | |
1135 | + raw_spin_unlock(&boot_lock); | |
1136 | } | |
1137 | ||
1138 | int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1139 | @@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1140 | * Set synchronisation state between this boot processor | |
1141 | * and the secondary one | |
1142 | */ | |
1143 | - spin_lock(&boot_lock); | |
1144 | + raw_spin_lock(&boot_lock); | |
1145 | ||
1146 | /* | |
1147 | * This is really belt and braces; we hold unintended secondary | |
1148 | @@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1149 | * now the secondary core is starting up let it run its | |
1150 | * calibrations, then wait for it to finish | |
1151 | */ | |
1152 | - spin_unlock(&boot_lock); | |
1153 | + raw_spin_unlock(&boot_lock); | |
1154 | ||
1155 | return pen_release != -1 ? -ENOSYS : 0; | |
1156 | } | |
1157 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig | |
1158 | index 969ef880d234..1182fe883771 100644 | |
1159 | --- a/arch/arm64/Kconfig | |
1160 | +++ b/arch/arm64/Kconfig | |
1161 | @@ -91,6 +91,7 @@ config ARM64 | |
1162 | select HAVE_PERF_EVENTS | |
1163 | select HAVE_PERF_REGS | |
1164 | select HAVE_PERF_USER_STACK_DUMP | |
1165 | + select HAVE_PREEMPT_LAZY | |
1166 | select HAVE_REGS_AND_STACK_ACCESS_API | |
1167 | select HAVE_RCU_TABLE_FREE | |
1168 | select HAVE_SYSCALL_TRACEPOINTS | |
1169 | @@ -694,7 +695,7 @@ config XEN_DOM0 | |
1170 | ||
1171 | config XEN | |
1172 | bool "Xen guest support on ARM64" | |
1173 | - depends on ARM64 && OF | |
1174 | + depends on ARM64 && OF && !PREEMPT_RT_FULL | |
1175 | select SWIOTLB_XEN | |
1176 | select PARAVIRT | |
1177 | help | |
1178 | diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h | |
1179 | index e9ea5a6bd449..6c500ad63c6a 100644 | |
1180 | --- a/arch/arm64/include/asm/thread_info.h | |
1181 | +++ b/arch/arm64/include/asm/thread_info.h | |
1182 | @@ -49,6 +49,7 @@ struct thread_info { | |
1183 | mm_segment_t addr_limit; /* address limit */ | |
1184 | struct task_struct *task; /* main task structure */ | |
1185 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
1186 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
1187 | int cpu; /* cpu */ | |
1188 | }; | |
1189 | ||
1190 | @@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void) | |
1191 | #define TIF_NEED_RESCHED 1 | |
1192 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ | |
1193 | #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ | |
1194 | +#define TIF_NEED_RESCHED_LAZY 4 | |
1195 | #define TIF_NOHZ 7 | |
1196 | #define TIF_SYSCALL_TRACE 8 | |
1197 | #define TIF_SYSCALL_AUDIT 9 | |
1198 | @@ -127,6 +129,7 @@ static inline struct thread_info *current_thread_info(void) | |
1199 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
1200 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
1201 | #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) | |
1202 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
1203 | #define _TIF_NOHZ (1 << TIF_NOHZ) | |
1204 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
1205 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
1206 | @@ -135,7 +138,9 @@ static inline struct thread_info *current_thread_info(void) | |
1207 | #define _TIF_32BIT (1 << TIF_32BIT) | |
1208 | ||
1209 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
1210 | - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) | |
1211 | + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ | |
1212 | + _TIF_NEED_RESCHED_LAZY) | |
1213 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
1214 | ||
1215 | #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1216 | _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ | |
1217 | diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c | |
1218 | index 4a2f0f0fef32..6bf2bc17c400 100644 | |
1219 | --- a/arch/arm64/kernel/asm-offsets.c | |
1220 | +++ b/arch/arm64/kernel/asm-offsets.c | |
1221 | @@ -38,6 +38,7 @@ int main(void) | |
1222 | BLANK(); | |
1223 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1224 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1225 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1226 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
1227 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1228 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1229 | diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S | |
1230 | index 79b0fe24d5b7..f3c959ade308 100644 | |
1231 | --- a/arch/arm64/kernel/entry.S | |
1232 | +++ b/arch/arm64/kernel/entry.S | |
1233 | @@ -428,11 +428,16 @@ ENDPROC(el1_sync) | |
1234 | ||
1235 | #ifdef CONFIG_PREEMPT | |
1236 | ldr w24, [tsk, #TI_PREEMPT] // get preempt count | |
1237 | - cbnz w24, 1f // preempt count != 0 | |
1238 | + cbnz w24, 2f // preempt count != 0 | |
1239 | ldr x0, [tsk, #TI_FLAGS] // get flags | |
1240 | - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1241 | - bl el1_preempt | |
1242 | + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1243 | + | |
1244 | + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count | |
1245 | + cbnz w24, 2f // preempt lazy count != 0 | |
1246 | + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? | |
1247 | 1: | |
1248 | + bl el1_preempt | |
1249 | +2: | |
1250 | #endif | |
1251 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1252 | bl trace_hardirqs_on | |
1253 | @@ -446,6 +451,7 @@ ENDPROC(el1_irq) | |
1254 | 1: bl preempt_schedule_irq // irq en/disable is done inside | |
1255 | ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS | |
1256 | tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? | |
1257 | + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? | |
1258 | ret x24 | |
1259 | #endif | |
1260 | ||
1261 | diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c | |
1262 | index 404dd67080b9..639dc6d12e72 100644 | |
1263 | --- a/arch/arm64/kernel/signal.c | |
1264 | +++ b/arch/arm64/kernel/signal.c | |
1265 | @@ -409,7 +409,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, | |
1266 | */ | |
1267 | trace_hardirqs_off(); | |
1268 | do { | |
1269 | - if (thread_flags & _TIF_NEED_RESCHED) { | |
1270 | + if (thread_flags & _TIF_NEED_RESCHED_MASK) { | |
1271 | schedule(); | |
1272 | } else { | |
1273 | local_irq_enable(); | |
1274 | diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig | |
1275 | index b3c5bde43d34..8122bf058de0 100644 | |
1276 | --- a/arch/mips/Kconfig | |
1277 | +++ b/arch/mips/Kconfig | |
1278 | @@ -2514,7 +2514,7 @@ config MIPS_ASID_BITS_VARIABLE | |
1279 | # | |
1280 | config HIGHMEM | |
1281 | bool "High Memory Support" | |
1282 | - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA | |
1283 | + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL | |
1284 | ||
1285 | config CPU_SUPPORTS_HIGHMEM | |
1286 | bool | |
1287 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig | |
1288 | index 65fba4c34cd7..4b5ba68910e0 100644 | |
1289 | --- a/arch/powerpc/Kconfig | |
1290 | +++ b/arch/powerpc/Kconfig | |
1291 | @@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT | |
1292 | ||
1293 | config RWSEM_GENERIC_SPINLOCK | |
1294 | bool | |
1295 | + default y if PREEMPT_RT_FULL | |
1296 | ||
1297 | config RWSEM_XCHGADD_ALGORITHM | |
1298 | bool | |
1299 | - default y | |
1300 | + default y if !PREEMPT_RT_FULL | |
1301 | ||
1302 | config GENERIC_LOCKBREAK | |
1303 | bool | |
1304 | @@ -134,6 +135,7 @@ config PPC | |
1305 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | |
1306 | select GENERIC_STRNCPY_FROM_USER | |
1307 | select GENERIC_STRNLEN_USER | |
1308 | + select HAVE_PREEMPT_LAZY | |
1309 | select HAVE_MOD_ARCH_SPECIFIC | |
1310 | select MODULES_USE_ELF_RELA | |
1311 | select CLONE_BACKWARDS | |
1312 | @@ -321,7 +323,7 @@ menu "Kernel options" | |
1313 | ||
1314 | config HIGHMEM | |
1315 | bool "High memory support" | |
1316 | - depends on PPC32 | |
1317 | + depends on PPC32 && !PREEMPT_RT_FULL | |
1318 | ||
1319 | source kernel/Kconfig.hz | |
1320 | source kernel/Kconfig.preempt | |
1321 | diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h | |
1322 | index 87e4b2d8dcd4..981e501a4359 100644 | |
1323 | --- a/arch/powerpc/include/asm/thread_info.h | |
1324 | +++ b/arch/powerpc/include/asm/thread_info.h | |
1325 | @@ -43,6 +43,8 @@ struct thread_info { | |
1326 | int cpu; /* cpu we're on */ | |
1327 | int preempt_count; /* 0 => preemptable, | |
1328 | <0 => BUG */ | |
1329 | + int preempt_lazy_count; /* 0 => preemptable, | |
1330 | + <0 => BUG */ | |
1331 | unsigned long local_flags; /* private flags for thread */ | |
1332 | #ifdef CONFIG_LIVEPATCH | |
1333 | unsigned long *livepatch_sp; | |
1334 | @@ -88,8 +90,7 @@ static inline struct thread_info *current_thread_info(void) | |
1335 | #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ | |
1336 | #define TIF_SIGPENDING 1 /* signal pending */ | |
1337 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | |
1338 | -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling | |
1339 | - TIF_NEED_RESCHED */ | |
1340 | +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ | |
1341 | #define TIF_32BIT 4 /* 32 bit binary */ | |
1342 | #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ | |
1343 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
1344 | @@ -107,6 +108,8 @@ static inline struct thread_info *current_thread_info(void) | |
1345 | #if defined(CONFIG_PPC64) | |
1346 | #define TIF_ELF2ABI 18 /* function descriptors must die! */ | |
1347 | #endif | |
1348 | +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling | |
1349 | + TIF_NEED_RESCHED */ | |
1350 | ||
1351 | /* as above, but as bit values */ | |
1352 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
1353 | @@ -125,14 +128,16 @@ static inline struct thread_info *current_thread_info(void) | |
1354 | #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) | |
1355 | #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) | |
1356 | #define _TIF_NOHZ (1<<TIF_NOHZ) | |
1357 | +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) | |
1358 | #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1359 | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ | |
1360 | _TIF_NOHZ) | |
1361 | ||
1362 | #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ | |
1363 | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1364 | - _TIF_RESTORE_TM) | |
1365 | + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) | |
1366 | #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) | |
1367 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
1368 | ||
1369 | /* Bits in local_flags */ | |
1370 | /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ | |
1371 | diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c | |
1372 | index c833d88c423d..96e9fbc3f684 100644 | |
1373 | --- a/arch/powerpc/kernel/asm-offsets.c | |
1374 | +++ b/arch/powerpc/kernel/asm-offsets.c | |
1375 | @@ -156,6 +156,7 @@ int main(void) | |
1376 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1377 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); | |
1378 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1379 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1380 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1381 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1382 | ||
1383 | diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S | |
1384 | index 3841d749a430..6dbaeff192b9 100644 | |
1385 | --- a/arch/powerpc/kernel/entry_32.S | |
1386 | +++ b/arch/powerpc/kernel/entry_32.S | |
1387 | @@ -835,7 +835,14 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1388 | cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1389 | bne restore | |
1390 | andi. r8,r8,_TIF_NEED_RESCHED | |
1391 | + bne+ 1f | |
1392 | + lwz r0,TI_PREEMPT_LAZY(r9) | |
1393 | + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1394 | + bne restore | |
1395 | + lwz r0,TI_FLAGS(r9) | |
1396 | + andi. r0,r0,_TIF_NEED_RESCHED_LAZY | |
1397 | beq+ restore | |
1398 | +1: | |
1399 | lwz r3,_MSR(r1) | |
1400 | andi. r0,r3,MSR_EE /* interrupts off? */ | |
1401 | beq restore /* don't schedule if so */ | |
1402 | @@ -846,11 +853,11 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1403 | */ | |
1404 | bl trace_hardirqs_off | |
1405 | #endif | |
1406 | -1: bl preempt_schedule_irq | |
1407 | +2: bl preempt_schedule_irq | |
1408 | CURRENT_THREAD_INFO(r9, r1) | |
1409 | lwz r3,TI_FLAGS(r9) | |
1410 | - andi. r0,r3,_TIF_NEED_RESCHED | |
1411 | - bne- 1b | |
1412 | + andi. r0,r3,_TIF_NEED_RESCHED_MASK | |
1413 | + bne- 2b | |
1414 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1415 | /* And now, to properly rebalance the above, we tell lockdep they | |
1416 | * are being turned back on, which will happen when we return | |
1417 | @@ -1171,7 +1178,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) | |
1418 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ | |
1419 | ||
1420 | do_work: /* r10 contains MSR_KERNEL here */ | |
1421 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1422 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1423 | beq do_user_signal | |
1424 | ||
1425 | do_resched: /* r10 contains MSR_KERNEL here */ | |
1426 | @@ -1192,7 +1199,7 @@ do_resched: /* r10 contains MSR_KERNEL here */ | |
1427 | MTMSRD(r10) /* disable interrupts */ | |
1428 | CURRENT_THREAD_INFO(r9, r1) | |
1429 | lwz r9,TI_FLAGS(r9) | |
1430 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1431 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1432 | bne- do_resched | |
1433 | andi. r0,r9,_TIF_USER_WORK_MASK | |
1434 | beq restore_user | |
1435 | diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S | |
1436 | index 6432d4bf08c8..5509a26f1070 100644 | |
1437 | --- a/arch/powerpc/kernel/entry_64.S | |
1438 | +++ b/arch/powerpc/kernel/entry_64.S | |
1439 | @@ -656,7 +656,7 @@ _GLOBAL(ret_from_except_lite) | |
1440 | bl restore_math | |
1441 | b restore | |
1442 | #endif | |
1443 | -1: andi. r0,r4,_TIF_NEED_RESCHED | |
1444 | +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1445 | beq 2f | |
1446 | bl restore_interrupts | |
1447 | SCHEDULE_USER | |
1448 | @@ -718,10 +718,18 @@ _GLOBAL(ret_from_except_lite) | |
1449 | ||
1450 | #ifdef CONFIG_PREEMPT | |
1451 | /* Check if we need to preempt */ | |
1452 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1453 | - beq+ restore | |
1454 | - /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1455 | lwz r8,TI_PREEMPT(r9) | |
1456 | + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ | |
1457 | + bne restore | |
1458 | + andi. r0,r4,_TIF_NEED_RESCHED | |
1459 | + bne+ check_count | |
1460 | + | |
1461 | + andi. r0,r4,_TIF_NEED_RESCHED_LAZY | |
1462 | + beq+ restore | |
1463 | + lwz r8,TI_PREEMPT_LAZY(r9) | |
1464 | + | |
1465 | + /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1466 | +check_count: | |
1467 | cmpwi cr1,r8,0 | |
1468 | ld r0,SOFTE(r1) | |
1469 | cmpdi r0,0 | |
1470 | @@ -738,7 +746,7 @@ _GLOBAL(ret_from_except_lite) | |
1471 | /* Re-test flags and eventually loop */ | |
1472 | CURRENT_THREAD_INFO(r9, r1) | |
1473 | ld r4,TI_FLAGS(r9) | |
1474 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1475 | + andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1476 | bne 1b | |
1477 | ||
1478 | /* | |
1479 | diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c | |
1480 | index 3c05c311e35e..f83f6ac1274d 100644 | |
1481 | --- a/arch/powerpc/kernel/irq.c | |
1482 | +++ b/arch/powerpc/kernel/irq.c | |
1483 | @@ -638,6 +638,7 @@ void irq_ctx_init(void) | |
1484 | } | |
1485 | } | |
1486 | ||
1487 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1488 | void do_softirq_own_stack(void) | |
1489 | { | |
1490 | struct thread_info *curtp, *irqtp; | |
1491 | @@ -655,6 +656,7 @@ void do_softirq_own_stack(void) | |
1492 | if (irqtp->flags) | |
1493 | set_bits(irqtp->flags, &curtp->flags); | |
1494 | } | |
1495 | +#endif | |
1496 | ||
1497 | irq_hw_number_t virq_to_hw(unsigned int virq) | |
1498 | { | |
1499 | diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S | |
1500 | index 030d72df5dd5..b471a709e100 100644 | |
1501 | --- a/arch/powerpc/kernel/misc_32.S | |
1502 | +++ b/arch/powerpc/kernel/misc_32.S | |
1503 | @@ -41,6 +41,7 @@ | |
1504 | * We store the saved ksp_limit in the unused part | |
1505 | * of the STACK_FRAME_OVERHEAD | |
1506 | */ | |
1507 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1508 | _GLOBAL(call_do_softirq) | |
1509 | mflr r0 | |
1510 | stw r0,4(r1) | |
1511 | @@ -57,6 +58,7 @@ _GLOBAL(call_do_softirq) | |
1512 | stw r10,THREAD+KSP_LIMIT(r2) | |
1513 | mtlr r0 | |
1514 | blr | |
1515 | +#endif | |
1516 | ||
1517 | /* | |
1518 | * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); | |
1519 | diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S | |
1520 | index 4f178671f230..39e7d84a3492 100644 | |
1521 | --- a/arch/powerpc/kernel/misc_64.S | |
1522 | +++ b/arch/powerpc/kernel/misc_64.S | |
1523 | @@ -31,6 +31,7 @@ | |
1524 | ||
1525 | .text | |
1526 | ||
1527 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1528 | _GLOBAL(call_do_softirq) | |
1529 | mflr r0 | |
1530 | std r0,16(r1) | |
1531 | @@ -41,6 +42,7 @@ _GLOBAL(call_do_softirq) | |
1532 | ld r0,16(r1) | |
1533 | mtlr r0 | |
1534 | blr | |
1535 | +#endif | |
1536 | ||
1537 | _GLOBAL(call_do_irq) | |
1538 | mflr r0 | |
1539 | diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig | |
1540 | index 029be26b5a17..9528089ea142 100644 | |
1541 | --- a/arch/powerpc/kvm/Kconfig | |
1542 | +++ b/arch/powerpc/kvm/Kconfig | |
1543 | @@ -175,6 +175,7 @@ config KVM_E500MC | |
1544 | config KVM_MPIC | |
1545 | bool "KVM in-kernel MPIC emulation" | |
1546 | depends on KVM && E500 | |
1547 | + depends on !PREEMPT_RT_FULL | |
1548 | select HAVE_KVM_IRQCHIP | |
1549 | select HAVE_KVM_IRQFD | |
1550 | select HAVE_KVM_IRQ_ROUTING | |
1551 | diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c | |
1552 | index e48462447ff0..2670cee66064 100644 | |
1553 | --- a/arch/powerpc/platforms/ps3/device-init.c | |
1554 | +++ b/arch/powerpc/platforms/ps3/device-init.c | |
1555 | @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, | |
1556 | } | |
1557 | pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); | |
1558 | ||
1559 | - res = wait_event_interruptible(dev->done.wait, | |
1560 | + res = swait_event_interruptible(dev->done.wait, | |
1561 | dev->done.done || kthread_should_stop()); | |
1562 | if (kthread_should_stop()) | |
1563 | res = -EINTR; | |
1564 | diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c | |
1565 | index 6c0378c0b8b5..abd58b4dff97 100644 | |
1566 | --- a/arch/sh/kernel/irq.c | |
1567 | +++ b/arch/sh/kernel/irq.c | |
1568 | @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) | |
1569 | hardirq_ctx[cpu] = NULL; | |
1570 | } | |
1571 | ||
1572 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1573 | void do_softirq_own_stack(void) | |
1574 | { | |
1575 | struct thread_info *curctx; | |
1576 | @@ -174,6 +175,7 @@ void do_softirq_own_stack(void) | |
1577 | "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" | |
1578 | ); | |
1579 | } | |
1580 | +#endif | |
1581 | #else | |
1582 | static inline void handle_one_irq(unsigned int irq) | |
1583 | { | |
1584 | diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig | |
1585 | index 165ecdd24d22..b68a464a22be 100644 | |
1586 | --- a/arch/sparc/Kconfig | |
1587 | +++ b/arch/sparc/Kconfig | |
1588 | @@ -194,12 +194,10 @@ config NR_CPUS | |
1589 | source kernel/Kconfig.hz | |
1590 | ||
1591 | config RWSEM_GENERIC_SPINLOCK | |
1592 | - bool | |
1593 | - default y if SPARC32 | |
1594 | + def_bool PREEMPT_RT_FULL | |
1595 | ||
1596 | config RWSEM_XCHGADD_ALGORITHM | |
1597 | - bool | |
1598 | - default y if SPARC64 | |
1599 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1600 | ||
1601 | config GENERIC_HWEIGHT | |
1602 | bool | |
1603 | diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c | |
1604 | index 34a7930b76ef..773740521008 100644 | |
1605 | --- a/arch/sparc/kernel/irq_64.c | |
1606 | +++ b/arch/sparc/kernel/irq_64.c | |
1607 | @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) | |
1608 | set_irq_regs(old_regs); | |
1609 | } | |
1610 | ||
1611 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1612 | void do_softirq_own_stack(void) | |
1613 | { | |
1614 | void *orig_sp, *sp = softirq_stack[smp_processor_id()]; | |
1615 | @@ -868,6 +869,7 @@ void do_softirq_own_stack(void) | |
1616 | __asm__ __volatile__("mov %0, %%sp" | |
1617 | : : "r" (orig_sp)); | |
1618 | } | |
1619 | +#endif | |
1620 | ||
1621 | #ifdef CONFIG_HOTPLUG_CPU | |
1622 | void fixup_irqs(void) | |
1623 | diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig | |
1624 | index bada636d1065..f8a995c90c01 100644 | |
1625 | --- a/arch/x86/Kconfig | |
1626 | +++ b/arch/x86/Kconfig | |
1627 | @@ -17,6 +17,7 @@ config X86_64 | |
1628 | ### Arch settings | |
1629 | config X86 | |
1630 | def_bool y | |
1631 | + select HAVE_PREEMPT_LAZY | |
1632 | select ACPI_LEGACY_TABLES_LOOKUP if ACPI | |
1633 | select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI | |
1634 | select ANON_INODES | |
1635 | @@ -232,8 +233,11 @@ config ARCH_MAY_HAVE_PC_FDC | |
1636 | def_bool y | |
1637 | depends on ISA_DMA_API | |
1638 | ||
1639 | +config RWSEM_GENERIC_SPINLOCK | |
1640 | + def_bool PREEMPT_RT_FULL | |
1641 | + | |
1642 | config RWSEM_XCHGADD_ALGORITHM | |
1643 | - def_bool y | |
1644 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1645 | ||
1646 | config GENERIC_CALIBRATE_DELAY | |
1647 | def_bool y | |
1648 | @@ -897,7 +901,7 @@ config IOMMU_HELPER | |
1649 | config MAXSMP | |
1650 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" | |
1651 | depends on X86_64 && SMP && DEBUG_KERNEL | |
1652 | - select CPUMASK_OFFSTACK | |
1653 | + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL | |
1654 | ---help--- | |
1655 | Enable maximum number of CPUS and NUMA Nodes for this architecture. | |
1656 | If unsure, say N. | |
1657 | diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c | |
1658 | index aa8b0672f87a..2429414bfc71 100644 | |
1659 | --- a/arch/x86/crypto/aesni-intel_glue.c | |
1660 | +++ b/arch/x86/crypto/aesni-intel_glue.c | |
1661 | @@ -372,14 +372,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |
1662 | err = blkcipher_walk_virt(desc, &walk); | |
1663 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1664 | ||
1665 | - kernel_fpu_begin(); | |
1666 | while ((nbytes = walk.nbytes)) { | |
1667 | + kernel_fpu_begin(); | |
1668 | aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1669 | - nbytes & AES_BLOCK_MASK); | |
1670 | + nbytes & AES_BLOCK_MASK); | |
1671 | + kernel_fpu_end(); | |
1672 | nbytes &= AES_BLOCK_SIZE - 1; | |
1673 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1674 | } | |
1675 | - kernel_fpu_end(); | |
1676 | ||
1677 | return err; | |
1678 | } | |
1679 | @@ -396,14 +396,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |
1680 | err = blkcipher_walk_virt(desc, &walk); | |
1681 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1682 | ||
1683 | - kernel_fpu_begin(); | |
1684 | while ((nbytes = walk.nbytes)) { | |
1685 | + kernel_fpu_begin(); | |
1686 | aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1687 | nbytes & AES_BLOCK_MASK); | |
1688 | + kernel_fpu_end(); | |
1689 | nbytes &= AES_BLOCK_SIZE - 1; | |
1690 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1691 | } | |
1692 | - kernel_fpu_end(); | |
1693 | ||
1694 | return err; | |
1695 | } | |
1696 | @@ -420,14 +420,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |
1697 | err = blkcipher_walk_virt(desc, &walk); | |
1698 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1699 | ||
1700 | - kernel_fpu_begin(); | |
1701 | while ((nbytes = walk.nbytes)) { | |
1702 | + kernel_fpu_begin(); | |
1703 | aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1704 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1705 | + kernel_fpu_end(); | |
1706 | nbytes &= AES_BLOCK_SIZE - 1; | |
1707 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1708 | } | |
1709 | - kernel_fpu_end(); | |
1710 | ||
1711 | return err; | |
1712 | } | |
1713 | @@ -444,14 +444,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |
1714 | err = blkcipher_walk_virt(desc, &walk); | |
1715 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1716 | ||
1717 | - kernel_fpu_begin(); | |
1718 | while ((nbytes = walk.nbytes)) { | |
1719 | + kernel_fpu_begin(); | |
1720 | aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1721 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1722 | + kernel_fpu_end(); | |
1723 | nbytes &= AES_BLOCK_SIZE - 1; | |
1724 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1725 | } | |
1726 | - kernel_fpu_end(); | |
1727 | ||
1728 | return err; | |
1729 | } | |
1730 | @@ -503,18 +503,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |
1731 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | |
1732 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1733 | ||
1734 | - kernel_fpu_begin(); | |
1735 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | |
1736 | + kernel_fpu_begin(); | |
1737 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1738 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1739 | + kernel_fpu_end(); | |
1740 | nbytes &= AES_BLOCK_SIZE - 1; | |
1741 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1742 | } | |
1743 | if (walk.nbytes) { | |
1744 | + kernel_fpu_begin(); | |
1745 | ctr_crypt_final(ctx, &walk); | |
1746 | + kernel_fpu_end(); | |
1747 | err = blkcipher_walk_done(desc, &walk, 0); | |
1748 | } | |
1749 | - kernel_fpu_end(); | |
1750 | ||
1751 | return err; | |
1752 | } | |
1753 | diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c | |
1754 | index 8648158f3916..d7699130ee36 100644 | |
1755 | --- a/arch/x86/crypto/cast5_avx_glue.c | |
1756 | +++ b/arch/x86/crypto/cast5_avx_glue.c | |
1757 | @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) | |
1758 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1759 | bool enc) | |
1760 | { | |
1761 | - bool fpu_enabled = false; | |
1762 | + bool fpu_enabled; | |
1763 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1764 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
1765 | unsigned int nbytes; | |
1766 | @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1767 | u8 *wsrc = walk->src.virt.addr; | |
1768 | u8 *wdst = walk->dst.virt.addr; | |
1769 | ||
1770 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1771 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1772 | ||
1773 | /* Process multi-block batch */ | |
1774 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
1775 | @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1776 | } while (nbytes >= bsize); | |
1777 | ||
1778 | done: | |
1779 | + cast5_fpu_end(fpu_enabled); | |
1780 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1781 | } | |
1782 | - | |
1783 | - cast5_fpu_end(fpu_enabled); | |
1784 | return err; | |
1785 | } | |
1786 | ||
1787 | @@ -227,7 +226,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |
1788 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1789 | struct scatterlist *src, unsigned int nbytes) | |
1790 | { | |
1791 | - bool fpu_enabled = false; | |
1792 | + bool fpu_enabled; | |
1793 | struct blkcipher_walk walk; | |
1794 | int err; | |
1795 | ||
1796 | @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1797 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1798 | ||
1799 | while ((nbytes = walk.nbytes)) { | |
1800 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1801 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1802 | nbytes = __cbc_decrypt(desc, &walk); | |
1803 | + cast5_fpu_end(fpu_enabled); | |
1804 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1805 | } | |
1806 | - | |
1807 | - cast5_fpu_end(fpu_enabled); | |
1808 | return err; | |
1809 | } | |
1810 | ||
1811 | @@ -311,7 +309,7 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | |
1812 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1813 | struct scatterlist *src, unsigned int nbytes) | |
1814 | { | |
1815 | - bool fpu_enabled = false; | |
1816 | + bool fpu_enabled; | |
1817 | struct blkcipher_walk walk; | |
1818 | int err; | |
1819 | ||
1820 | @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1821 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1822 | ||
1823 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
1824 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1825 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1826 | nbytes = __ctr_crypt(desc, &walk); | |
1827 | + cast5_fpu_end(fpu_enabled); | |
1828 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1829 | } | |
1830 | ||
1831 | - cast5_fpu_end(fpu_enabled); | |
1832 | - | |
1833 | if (walk.nbytes) { | |
1834 | ctr_crypt_final(desc, &walk); | |
1835 | err = blkcipher_walk_done(desc, &walk, 0); | |
1836 | diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c | |
1837 | index 6a85598931b5..3a506ce7ed93 100644 | |
1838 | --- a/arch/x86/crypto/glue_helper.c | |
1839 | +++ b/arch/x86/crypto/glue_helper.c | |
1840 | @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1841 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1842 | const unsigned int bsize = 128 / 8; | |
1843 | unsigned int nbytes, i, func_bytes; | |
1844 | - bool fpu_enabled = false; | |
1845 | + bool fpu_enabled; | |
1846 | int err; | |
1847 | ||
1848 | err = blkcipher_walk_virt(desc, walk); | |
1849 | @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1850 | u8 *wdst = walk->dst.virt.addr; | |
1851 | ||
1852 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1853 | - desc, fpu_enabled, nbytes); | |
1854 | + desc, false, nbytes); | |
1855 | ||
1856 | for (i = 0; i < gctx->num_funcs; i++) { | |
1857 | func_bytes = bsize * gctx->funcs[i].num_blocks; | |
1858 | @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1859 | } | |
1860 | ||
1861 | done: | |
1862 | + glue_fpu_end(fpu_enabled); | |
1863 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1864 | } | |
1865 | ||
1866 | - glue_fpu_end(fpu_enabled); | |
1867 | return err; | |
1868 | } | |
1869 | ||
1870 | @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1871 | struct scatterlist *src, unsigned int nbytes) | |
1872 | { | |
1873 | const unsigned int bsize = 128 / 8; | |
1874 | - bool fpu_enabled = false; | |
1875 | + bool fpu_enabled; | |
1876 | struct blkcipher_walk walk; | |
1877 | int err; | |
1878 | ||
1879 | @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1880 | ||
1881 | while ((nbytes = walk.nbytes)) { | |
1882 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1883 | - desc, fpu_enabled, nbytes); | |
1884 | + desc, false, nbytes); | |
1885 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | |
1886 | + glue_fpu_end(fpu_enabled); | |
1887 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1888 | } | |
1889 | ||
1890 | - glue_fpu_end(fpu_enabled); | |
1891 | return err; | |
1892 | } | |
1893 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | |
1894 | @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1895 | struct scatterlist *src, unsigned int nbytes) | |
1896 | { | |
1897 | const unsigned int bsize = 128 / 8; | |
1898 | - bool fpu_enabled = false; | |
1899 | + bool fpu_enabled; | |
1900 | struct blkcipher_walk walk; | |
1901 | int err; | |
1902 | ||
1903 | @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1904 | ||
1905 | while ((nbytes = walk.nbytes) >= bsize) { | |
1906 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1907 | - desc, fpu_enabled, nbytes); | |
1908 | + desc, false, nbytes); | |
1909 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | |
1910 | + glue_fpu_end(fpu_enabled); | |
1911 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1912 | } | |
1913 | ||
1914 | - glue_fpu_end(fpu_enabled); | |
1915 | - | |
1916 | if (walk.nbytes) { | |
1917 | glue_ctr_crypt_final_128bit( | |
1918 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | |
1919 | @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1920 | void *tweak_ctx, void *crypt_ctx) | |
1921 | { | |
1922 | const unsigned int bsize = 128 / 8; | |
1923 | - bool fpu_enabled = false; | |
1924 | + bool fpu_enabled; | |
1925 | struct blkcipher_walk walk; | |
1926 | int err; | |
1927 | ||
1928 | @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1929 | ||
1930 | /* set minimum length to bsize, for tweak_fn */ | |
1931 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1932 | - desc, fpu_enabled, | |
1933 | + desc, false, | |
1934 | nbytes < bsize ? bsize : nbytes); | |
1935 | - | |
1936 | /* calculate first value of T */ | |
1937 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | |
1938 | + glue_fpu_end(fpu_enabled); | |
1939 | ||
1940 | while (nbytes) { | |
1941 | + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1942 | + desc, false, nbytes); | |
1943 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | |
1944 | ||
1945 | + glue_fpu_end(fpu_enabled); | |
1946 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1947 | nbytes = walk.nbytes; | |
1948 | } | |
1949 | - | |
1950 | - glue_fpu_end(fpu_enabled); | |
1951 | - | |
1952 | return err; | |
1953 | } | |
1954 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | |
1955 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | |
1956 | index bdd9cc59d20f..56d01a339ba4 100644 | |
1957 | --- a/arch/x86/entry/common.c | |
1958 | +++ b/arch/x86/entry/common.c | |
1959 | @@ -129,7 +129,7 @@ static long syscall_trace_enter(struct pt_regs *regs) | |
1960 | ||
1961 | #define EXIT_TO_USERMODE_LOOP_FLAGS \ | |
1962 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1963 | - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) | |
1964 | + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) | |
1965 | ||
1966 | static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
1967 | { | |
1968 | @@ -145,9 +145,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
1969 | /* We have work to do. */ | |
1970 | local_irq_enable(); | |
1971 | ||
1972 | - if (cached_flags & _TIF_NEED_RESCHED) | |
1973 | + if (cached_flags & _TIF_NEED_RESCHED_MASK) | |
1974 | schedule(); | |
1975 | ||
1976 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
1977 | + if (unlikely(current->forced_info.si_signo)) { | |
1978 | + struct task_struct *t = current; | |
1979 | + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); | |
1980 | + t->forced_info.si_signo = 0; | |
1981 | + } | |
1982 | +#endif | |
1983 | if (cached_flags & _TIF_UPROBE) | |
1984 | uprobe_notify_resume(regs); | |
1985 | ||
1986 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | |
1987 | index edba8606b99a..4a3389535fc6 100644 | |
1988 | --- a/arch/x86/entry/entry_32.S | |
1989 | +++ b/arch/x86/entry/entry_32.S | |
1990 | @@ -308,8 +308,25 @@ END(ret_from_exception) | |
1991 | ENTRY(resume_kernel) | |
1992 | DISABLE_INTERRUPTS(CLBR_ANY) | |
1993 | need_resched: | |
1994 | + # preempt count == 0 + NEED_RS set? | |
1995 | cmpl $0, PER_CPU_VAR(__preempt_count) | |
1996 | +#ifndef CONFIG_PREEMPT_LAZY | |
1997 | jnz restore_all | |
1998 | +#else | |
1999 | + jz test_int_off | |
2000 | + | |
2001 | + # atleast preempt count == 0 ? | |
2002 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2003 | + jne restore_all | |
2004 | + | |
2005 | + movl PER_CPU_VAR(current_task), %ebp | |
2006 | + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? | |
2007 | + jnz restore_all | |
2008 | + | |
2009 | + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) | |
2010 | + jz restore_all | |
2011 | +test_int_off: | |
2012 | +#endif | |
2013 | testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? | |
2014 | jz restore_all | |
2015 | call preempt_schedule_irq | |
2016 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | |
2017 | index ef766a358b37..28401f826ab1 100644 | |
2018 | --- a/arch/x86/entry/entry_64.S | |
2019 | +++ b/arch/x86/entry/entry_64.S | |
2020 | @@ -546,7 +546,23 @@ GLOBAL(retint_user) | |
2021 | bt $9, EFLAGS(%rsp) /* were interrupts off? */ | |
2022 | jnc 1f | |
2023 | 0: cmpl $0, PER_CPU_VAR(__preempt_count) | |
2024 | +#ifndef CONFIG_PREEMPT_LAZY | |
2025 | jnz 1f | |
2026 | +#else | |
2027 | + jz do_preempt_schedule_irq | |
2028 | + | |
2029 | + # atleast preempt count == 0 ? | |
2030 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2031 | + jnz 1f | |
2032 | + | |
2033 | + movq PER_CPU_VAR(current_task), %rcx | |
2034 | + cmpl $0, TASK_TI_preempt_lazy_count(%rcx) | |
2035 | + jnz 1f | |
2036 | + | |
2037 | + bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx) | |
2038 | + jnc 1f | |
2039 | +do_preempt_schedule_irq: | |
2040 | +#endif | |
2041 | call preempt_schedule_irq | |
2042 | jmp 0b | |
2043 | 1: | |
2044 | @@ -894,6 +910,7 @@ EXPORT_SYMBOL(native_load_gs_index) | |
2045 | jmp 2b | |
2046 | .previous | |
2047 | ||
2048 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2049 | /* Call softirq on interrupt stack. Interrupts are off. */ | |
2050 | ENTRY(do_softirq_own_stack) | |
2051 | pushq %rbp | |
2052 | @@ -906,6 +923,7 @@ ENTRY(do_softirq_own_stack) | |
2053 | decl PER_CPU_VAR(irq_count) | |
2054 | ret | |
2055 | END(do_softirq_own_stack) | |
2056 | +#endif | |
2057 | ||
2058 | #ifdef CONFIG_XEN | |
2059 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 | |
2060 | diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h | |
2061 | index 17f218645701..11bd1b7ee6eb 100644 | |
2062 | --- a/arch/x86/include/asm/preempt.h | |
2063 | +++ b/arch/x86/include/asm/preempt.h | |
2064 | @@ -79,17 +79,46 @@ static __always_inline void __preempt_count_sub(int val) | |
2065 | * a decrement which hits zero means we have no preempt_count and should | |
2066 | * reschedule. | |
2067 | */ | |
2068 | -static __always_inline bool __preempt_count_dec_and_test(void) | |
2069 | +static __always_inline bool ____preempt_count_dec_and_test(void) | |
2070 | { | |
2071 | GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); | |
2072 | } | |
2073 | ||
2074 | +static __always_inline bool __preempt_count_dec_and_test(void) | |
2075 | +{ | |
2076 | + if (____preempt_count_dec_and_test()) | |
2077 | + return true; | |
2078 | +#ifdef CONFIG_PREEMPT_LAZY | |
2079 | + if (current_thread_info()->preempt_lazy_count) | |
2080 | + return false; | |
2081 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2082 | +#else | |
2083 | + return false; | |
2084 | +#endif | |
2085 | +} | |
2086 | + | |
2087 | /* | |
2088 | * Returns true when we need to resched and can (barring IRQ state). | |
2089 | */ | |
2090 | static __always_inline bool should_resched(int preempt_offset) | |
2091 | { | |
2092 | +#ifdef CONFIG_PREEMPT_LAZY | |
2093 | + u32 tmp; | |
2094 | + | |
2095 | + tmp = raw_cpu_read_4(__preempt_count); | |
2096 | + if (tmp == preempt_offset) | |
2097 | + return true; | |
2098 | + | |
2099 | + /* preempt count == 0 ? */ | |
2100 | + tmp &= ~PREEMPT_NEED_RESCHED; | |
2101 | + if (tmp) | |
2102 | + return false; | |
2103 | + if (current_thread_info()->preempt_lazy_count) | |
2104 | + return false; | |
2105 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2106 | +#else | |
2107 | return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); | |
2108 | +#endif | |
2109 | } | |
2110 | ||
2111 | #ifdef CONFIG_PREEMPT | |
2112 | diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h | |
2113 | index 8af22be0fe61..d1328789b759 100644 | |
2114 | --- a/arch/x86/include/asm/signal.h | |
2115 | +++ b/arch/x86/include/asm/signal.h | |
2116 | @@ -27,6 +27,19 @@ typedef struct { | |
2117 | #define SA_IA32_ABI 0x02000000u | |
2118 | #define SA_X32_ABI 0x01000000u | |
2119 | ||
2120 | +/* | |
2121 | + * Because some traps use the IST stack, we must keep preemption | |
2122 | + * disabled while calling do_trap(), but do_trap() may call | |
2123 | + * force_sig_info() which will grab the signal spin_locks for the | |
2124 | + * task, which in PREEMPT_RT_FULL are mutexes. By defining | |
2125 | + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set | |
2126 | + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the | |
2127 | + * trap. | |
2128 | + */ | |
2129 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
2130 | +#define ARCH_RT_DELAYS_SIGNAL_SEND | |
2131 | +#endif | |
2132 | + | |
2133 | #ifndef CONFIG_COMPAT | |
2134 | typedef sigset_t compat_sigset_t; | |
2135 | #endif | |
2136 | diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h | |
2137 | index 58505f01962f..02fa39652cd6 100644 | |
2138 | --- a/arch/x86/include/asm/stackprotector.h | |
2139 | +++ b/arch/x86/include/asm/stackprotector.h | |
2140 | @@ -59,7 +59,7 @@ | |
2141 | */ | |
2142 | static __always_inline void boot_init_stack_canary(void) | |
2143 | { | |
2144 | - u64 canary; | |
2145 | + u64 uninitialized_var(canary); | |
2146 | u64 tsc; | |
2147 | ||
2148 | #ifdef CONFIG_X86_64 | |
2149 | @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void) | |
2150 | * of randomness. The TSC only matters for very early init, | |
2151 | * there it already has some randomness on most systems. Later | |
2152 | * on during the bootup the random pool has true entropy too. | |
2153 | + * | |
2154 | + * For preempt-rt we need to weaken the randomness a bit, as | |
2155 | + * we can't call into the random generator from atomic context | |
2156 | + * due to locking constraints. We just leave canary | |
2157 | + * uninitialized and use the TSC based randomness on top of it. | |
2158 | */ | |
2159 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2160 | get_random_bytes(&canary, sizeof(canary)); | |
2161 | +#endif | |
2162 | tsc = rdtsc(); | |
2163 | canary += tsc + (tsc << 32UL); | |
2164 | ||
2165 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | |
2166 | index ad6f5eb07a95..5ceb3a1c2b1a 100644 | |
2167 | --- a/arch/x86/include/asm/thread_info.h | |
2168 | +++ b/arch/x86/include/asm/thread_info.h | |
2169 | @@ -54,11 +54,14 @@ struct task_struct; | |
2170 | ||
2171 | struct thread_info { | |
2172 | unsigned long flags; /* low level flags */ | |
2173 | + int preempt_lazy_count; /* 0 => lazy preemptable | |
2174 | + <0 => BUG */ | |
2175 | }; | |
2176 | ||
2177 | #define INIT_THREAD_INFO(tsk) \ | |
2178 | { \ | |
2179 | .flags = 0, \ | |
2180 | + .preempt_lazy_count = 0, \ | |
2181 | } | |
2182 | ||
2183 | #define init_stack (init_thread_union.stack) | |
2184 | @@ -67,6 +70,10 @@ struct thread_info { | |
2185 | ||
2186 | #include <asm/asm-offsets.h> | |
2187 | ||
2188 | +#define GET_THREAD_INFO(reg) \ | |
2189 | + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ | |
2190 | + _ASM_SUB $(THREAD_SIZE),reg ; | |
2191 | + | |
2192 | #endif | |
2193 | ||
2194 | /* | |
2195 | @@ -85,6 +92,7 @@ struct thread_info { | |
2196 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ | |
2197 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
2198 | #define TIF_SECCOMP 8 /* secure computing */ | |
2199 | +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ | |
2200 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | |
2201 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | |
2202 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | |
2203 | @@ -108,6 +116,7 @@ struct thread_info { | |
2204 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) | |
2205 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
2206 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | |
2207 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
2208 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | |
2209 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
2210 | #define _TIF_NOTSC (1 << TIF_NOTSC) | |
2211 | @@ -143,6 +152,8 @@ struct thread_info { | |
2212 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | |
2213 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | |
2214 | ||
2215 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
2216 | + | |
2217 | #define STACK_WARN (THREAD_SIZE/8) | |
2218 | ||
2219 | /* | |
2220 | diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h | |
2221 | index 57ab86d94d64..35d25e27180f 100644 | |
2222 | --- a/arch/x86/include/asm/uv/uv_bau.h | |
2223 | +++ b/arch/x86/include/asm/uv/uv_bau.h | |
2224 | @@ -624,9 +624,9 @@ struct bau_control { | |
2225 | cycles_t send_message; | |
2226 | cycles_t period_end; | |
2227 | cycles_t period_time; | |
2228 | - spinlock_t uvhub_lock; | |
2229 | - spinlock_t queue_lock; | |
2230 | - spinlock_t disable_lock; | |
2231 | + raw_spinlock_t uvhub_lock; | |
2232 | + raw_spinlock_t queue_lock; | |
2233 | + raw_spinlock_t disable_lock; | |
2234 | /* tunables */ | |
2235 | int max_concurr; | |
2236 | int max_concurr_const; | |
2237 | @@ -815,15 +815,15 @@ static inline int atom_asr(short i, struct atomic_short *v) | |
2238 | * to be lowered below the current 'v'. atomic_add_unless can only stop | |
2239 | * on equal. | |
2240 | */ | |
2241 | -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |
2242 | +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) | |
2243 | { | |
2244 | - spin_lock(lock); | |
2245 | + raw_spin_lock(lock); | |
2246 | if (atomic_read(v) >= u) { | |
2247 | - spin_unlock(lock); | |
2248 | + raw_spin_unlock(lock); | |
2249 | return 0; | |
2250 | } | |
2251 | atomic_inc(v); | |
2252 | - spin_unlock(lock); | |
2253 | + raw_spin_unlock(lock); | |
2254 | return 1; | |
2255 | } | |
2256 | ||
2257 | diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c | |
2258 | index 931ced8ca345..167975ac8af7 100644 | |
2259 | --- a/arch/x86/kernel/acpi/boot.c | |
2260 | +++ b/arch/x86/kernel/acpi/boot.c | |
2261 | @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |
2262 | * ->ioapic_mutex | |
2263 | * ->ioapic_lock | |
2264 | */ | |
2265 | +#ifdef CONFIG_X86_IO_APIC | |
2266 | static DEFINE_MUTEX(acpi_ioapic_lock); | |
2267 | +#endif | |
2268 | ||
2269 | /* -------------------------------------------------------------------------- | |
2270 | Boot-time Configuration | |
2271 | diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c | |
2272 | index 3d8ff40ecc6f..2e96d4e0295b 100644 | |
2273 | --- a/arch/x86/kernel/apic/io_apic.c | |
2274 | +++ b/arch/x86/kernel/apic/io_apic.c | |
2275 | @@ -1712,7 +1712,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) | |
2276 | static inline bool ioapic_irqd_mask(struct irq_data *data) | |
2277 | { | |
2278 | /* If we are moving the irq we need to mask it */ | |
2279 | - if (unlikely(irqd_is_setaffinity_pending(data))) { | |
2280 | + if (unlikely(irqd_is_setaffinity_pending(data) && | |
2281 | + !irqd_irq_inprogress(data))) { | |
2282 | mask_ioapic_irq(data); | |
2283 | return true; | |
2284 | } | |
2285 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c | |
2286 | index c62e015b126c..0cc71257fca6 100644 | |
2287 | --- a/arch/x86/kernel/asm-offsets.c | |
2288 | +++ b/arch/x86/kernel/asm-offsets.c | |
2289 | @@ -36,6 +36,7 @@ void common(void) { | |
2290 | ||
2291 | BLANK(); | |
2292 | OFFSET(TASK_TI_flags, task_struct, thread_info.flags); | |
2293 | + OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count); | |
2294 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); | |
2295 | ||
2296 | BLANK(); | |
2297 | @@ -91,4 +92,5 @@ void common(void) { | |
2298 | ||
2299 | BLANK(); | |
2300 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | |
2301 | + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); | |
2302 | } | |
2303 | diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c | |
2304 | index a7fdf453d895..e3a0e969a66e 100644 | |
2305 | --- a/arch/x86/kernel/cpu/mcheck/mce.c | |
2306 | +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |
2307 | @@ -41,6 +41,8 @@ | |
2308 | #include <linux/debugfs.h> | |
2309 | #include <linux/irq_work.h> | |
2310 | #include <linux/export.h> | |
2311 | +#include <linux/jiffies.h> | |
2312 | +#include <linux/swork.h> | |
2313 | #include <linux/jump_label.h> | |
2314 | ||
2315 | #include <asm/processor.h> | |
2316 | @@ -1317,7 +1319,7 @@ void mce_log_therm_throt_event(__u64 status) | |
2317 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; | |
2318 | ||
2319 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | |
2320 | -static DEFINE_PER_CPU(struct timer_list, mce_timer); | |
2321 | +static DEFINE_PER_CPU(struct hrtimer, mce_timer); | |
2322 | ||
2323 | static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2324 | { | |
2325 | @@ -1326,32 +1328,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2326 | ||
2327 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; | |
2328 | ||
2329 | -static void __restart_timer(struct timer_list *t, unsigned long interval) | |
2330 | +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval) | |
2331 | { | |
2332 | - unsigned long when = jiffies + interval; | |
2333 | - unsigned long flags; | |
2334 | - | |
2335 | - local_irq_save(flags); | |
2336 | - | |
2337 | - if (timer_pending(t)) { | |
2338 | - if (time_before(when, t->expires)) | |
2339 | - mod_timer(t, when); | |
2340 | - } else { | |
2341 | - t->expires = round_jiffies(when); | |
2342 | - add_timer_on(t, smp_processor_id()); | |
2343 | - } | |
2344 | - | |
2345 | - local_irq_restore(flags); | |
2346 | + if (!interval) | |
2347 | + return HRTIMER_NORESTART; | |
2348 | + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval))); | |
2349 | + return HRTIMER_RESTART; | |
2350 | } | |
2351 | ||
2352 | -static void mce_timer_fn(unsigned long data) | |
2353 | +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) | |
2354 | { | |
2355 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2356 | - int cpu = smp_processor_id(); | |
2357 | unsigned long iv; | |
2358 | ||
2359 | - WARN_ON(cpu != data); | |
2360 | - | |
2361 | iv = __this_cpu_read(mce_next_interval); | |
2362 | ||
2363 | if (mce_available(this_cpu_ptr(&cpu_info))) { | |
2364 | @@ -1374,7 +1362,7 @@ static void mce_timer_fn(unsigned long data) | |
2365 | ||
2366 | done: | |
2367 | __this_cpu_write(mce_next_interval, iv); | |
2368 | - __restart_timer(t, iv); | |
2369 | + return __restart_timer(timer, iv); | |
2370 | } | |
2371 | ||
2372 | /* | |
2373 | @@ -1382,7 +1370,7 @@ static void mce_timer_fn(unsigned long data) | |
2374 | */ | |
2375 | void mce_timer_kick(unsigned long interval) | |
2376 | { | |
2377 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2378 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2379 | unsigned long iv = __this_cpu_read(mce_next_interval); | |
2380 | ||
2381 | __restart_timer(t, interval); | |
2382 | @@ -1397,7 +1385,7 @@ static void mce_timer_delete_all(void) | |
2383 | int cpu; | |
2384 | ||
2385 | for_each_online_cpu(cpu) | |
2386 | - del_timer_sync(&per_cpu(mce_timer, cpu)); | |
2387 | + hrtimer_cancel(&per_cpu(mce_timer, cpu)); | |
2388 | } | |
2389 | ||
2390 | static void mce_do_trigger(struct work_struct *work) | |
2391 | @@ -1407,6 +1395,56 @@ static void mce_do_trigger(struct work_struct *work) | |
2392 | ||
2393 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2394 | ||
2395 | +static void __mce_notify_work(struct swork_event *event) | |
2396 | +{ | |
2397 | + /* Not more than two messages every minute */ | |
2398 | + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2399 | + | |
2400 | + /* wake processes polling /dev/mcelog */ | |
2401 | + wake_up_interruptible(&mce_chrdev_wait); | |
2402 | + | |
2403 | + /* | |
2404 | + * There is no risk of missing notifications because | |
2405 | + * work_pending is always cleared before the function is | |
2406 | + * executed. | |
2407 | + */ | |
2408 | + if (mce_helper[0] && !work_pending(&mce_trigger_work)) | |
2409 | + schedule_work(&mce_trigger_work); | |
2410 | + | |
2411 | + if (__ratelimit(&ratelimit)) | |
2412 | + pr_info(HW_ERR "Machine check events logged\n"); | |
2413 | +} | |
2414 | + | |
2415 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2416 | +static bool notify_work_ready __read_mostly; | |
2417 | +static struct swork_event notify_work; | |
2418 | + | |
2419 | +static int mce_notify_work_init(void) | |
2420 | +{ | |
2421 | + int err; | |
2422 | + | |
2423 | + err = swork_get(); | |
2424 | + if (err) | |
2425 | + return err; | |
2426 | + | |
2427 | + INIT_SWORK(¬ify_work, __mce_notify_work); | |
2428 | + notify_work_ready = true; | |
2429 | + return 0; | |
2430 | +} | |
2431 | + | |
2432 | +static void mce_notify_work(void) | |
2433 | +{ | |
2434 | + if (notify_work_ready) | |
2435 | + swork_queue(¬ify_work); | |
2436 | +} | |
2437 | +#else | |
2438 | +static void mce_notify_work(void) | |
2439 | +{ | |
2440 | + __mce_notify_work(NULL); | |
2441 | +} | |
2442 | +static inline int mce_notify_work_init(void) { return 0; } | |
2443 | +#endif | |
2444 | + | |
2445 | /* | |
2446 | * Notify the user(s) about new machine check events. | |
2447 | * Can be called from interrupt context, but not from machine check/NMI | |
2448 | @@ -1414,19 +1452,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2449 | */ | |
2450 | int mce_notify_irq(void) | |
2451 | { | |
2452 | - /* Not more than two messages every minute */ | |
2453 | - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2454 | - | |
2455 | if (test_and_clear_bit(0, &mce_need_notify)) { | |
2456 | - /* wake processes polling /dev/mcelog */ | |
2457 | - wake_up_interruptible(&mce_chrdev_wait); | |
2458 | - | |
2459 | - if (mce_helper[0]) | |
2460 | - schedule_work(&mce_trigger_work); | |
2461 | - | |
2462 | - if (__ratelimit(&ratelimit)) | |
2463 | - pr_info(HW_ERR "Machine check events logged\n"); | |
2464 | - | |
2465 | + mce_notify_work(); | |
2466 | return 1; | |
2467 | } | |
2468 | return 0; | |
2469 | @@ -1732,7 +1759,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) | |
2470 | } | |
2471 | } | |
2472 | ||
2473 | -static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2474 | +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) | |
2475 | { | |
2476 | unsigned long iv = check_interval * HZ; | |
2477 | ||
2478 | @@ -1741,16 +1768,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2479 | ||
2480 | per_cpu(mce_next_interval, cpu) = iv; | |
2481 | ||
2482 | - t->expires = round_jiffies(jiffies + iv); | |
2483 | - add_timer_on(t, cpu); | |
2484 | + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), | |
2485 | + 0, HRTIMER_MODE_REL_PINNED); | |
2486 | } | |
2487 | ||
2488 | static void __mcheck_cpu_init_timer(void) | |
2489 | { | |
2490 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2491 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2492 | unsigned int cpu = smp_processor_id(); | |
2493 | ||
2494 | - setup_pinned_timer(t, mce_timer_fn, cpu); | |
2495 | + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
2496 | + t->function = mce_timer_fn; | |
2497 | mce_start_timer(cpu, t); | |
2498 | } | |
2499 | ||
2500 | @@ -2475,6 +2503,8 @@ static void mce_disable_cpu(void *h) | |
2501 | if (!mce_available(raw_cpu_ptr(&cpu_info))) | |
2502 | return; | |
2503 | ||
2504 | + hrtimer_cancel(this_cpu_ptr(&mce_timer)); | |
2505 | + | |
2506 | if (!(action & CPU_TASKS_FROZEN)) | |
2507 | cmci_clear(); | |
2508 | ||
2509 | @@ -2497,6 +2527,7 @@ static void mce_reenable_cpu(void *h) | |
2510 | if (b->init) | |
2511 | wrmsrl(msr_ops.ctl(i), b->ctl); | |
2512 | } | |
2513 | + __mcheck_cpu_init_timer(); | |
2514 | } | |
2515 | ||
2516 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | |
2517 | @@ -2504,7 +2535,6 @@ static int | |
2518 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |
2519 | { | |
2520 | unsigned int cpu = (unsigned long)hcpu; | |
2521 | - struct timer_list *t = &per_cpu(mce_timer, cpu); | |
2522 | ||
2523 | switch (action & ~CPU_TASKS_FROZEN) { | |
2524 | case CPU_ONLINE: | |
2525 | @@ -2524,11 +2554,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |
2526 | break; | |
2527 | case CPU_DOWN_PREPARE: | |
2528 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | |
2529 | - del_timer_sync(t); | |
2530 | break; | |
2531 | case CPU_DOWN_FAILED: | |
2532 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | |
2533 | - mce_start_timer(cpu, t); | |
2534 | break; | |
2535 | } | |
2536 | ||
2537 | @@ -2567,6 +2595,10 @@ static __init int mcheck_init_device(void) | |
2538 | goto err_out; | |
2539 | } | |
2540 | ||
2541 | + err = mce_notify_work_init(); | |
2542 | + if (err) | |
2543 | + goto err_out; | |
2544 | + | |
2545 | if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { | |
2546 | err = -ENOMEM; | |
2547 | goto err_out; | |
2548 | diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c | |
2549 | index 1f38d9a4d9de..053bf3b2ef39 100644 | |
2550 | --- a/arch/x86/kernel/irq_32.c | |
2551 | +++ b/arch/x86/kernel/irq_32.c | |
2552 | @@ -127,6 +127,7 @@ void irq_ctx_init(int cpu) | |
2553 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); | |
2554 | } | |
2555 | ||
2556 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2557 | void do_softirq_own_stack(void) | |
2558 | { | |
2559 | struct irq_stack *irqstk; | |
2560 | @@ -143,6 +144,7 @@ void do_softirq_own_stack(void) | |
2561 | ||
2562 | call_on_stack(__do_softirq, isp); | |
2563 | } | |
2564 | +#endif | |
2565 | ||
2566 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) | |
2567 | { | |
2568 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | |
2569 | index bd7be8efdc4c..b3b0a7f7b1ca 100644 | |
2570 | --- a/arch/x86/kernel/process_32.c | |
2571 | +++ b/arch/x86/kernel/process_32.c | |
2572 | @@ -35,6 +35,7 @@ | |
2573 | #include <linux/uaccess.h> | |
2574 | #include <linux/io.h> | |
2575 | #include <linux/kdebug.h> | |
2576 | +#include <linux/highmem.h> | |
2577 | ||
2578 | #include <asm/pgtable.h> | |
2579 | #include <asm/ldt.h> | |
2580 | @@ -195,6 +196,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |
2581 | } | |
2582 | EXPORT_SYMBOL_GPL(start_thread); | |
2583 | ||
2584 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2585 | +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
2586 | +{ | |
2587 | + int i; | |
2588 | + | |
2589 | + /* | |
2590 | + * Clear @prev's kmap_atomic mappings | |
2591 | + */ | |
2592 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
2593 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2594 | + pte_t *ptep = kmap_pte - idx; | |
2595 | + | |
2596 | + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); | |
2597 | + } | |
2598 | + /* | |
2599 | + * Restore @next_p's kmap_atomic mappings | |
2600 | + */ | |
2601 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
2602 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2603 | + | |
2604 | + if (!pte_none(next_p->kmap_pte[i])) | |
2605 | + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); | |
2606 | + } | |
2607 | +} | |
2608 | +#else | |
2609 | +static inline void | |
2610 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
2611 | +#endif | |
2612 | + | |
2613 | ||
2614 | /* | |
2615 | * switch_to(x,y) should switch tasks from x to y. | |
2616 | @@ -271,6 +301,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |
2617 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | |
2618 | __switch_to_xtra(prev_p, next_p, tss); | |
2619 | ||
2620 | + switch_kmaps(prev_p, next_p); | |
2621 | + | |
2622 | /* | |
2623 | * Leave lazy mode, flushing any hypercalls made here. | |
2624 | * This must be done before restoring TLS segments so | |
2625 | diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c | |
2626 | index 3f05c044720b..fe68afd37162 100644 | |
2627 | --- a/arch/x86/kvm/lapic.c | |
2628 | +++ b/arch/x86/kvm/lapic.c | |
2629 | @@ -1939,6 +1939,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |
2630 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | |
2631 | HRTIMER_MODE_ABS_PINNED); | |
2632 | apic->lapic_timer.timer.function = apic_timer_fn; | |
2633 | + apic->lapic_timer.timer.irqsafe = 1; | |
2634 | ||
2635 | /* | |
2636 | * APIC is created enabled. This will prevent kvm_lapic_set_base from | |
2637 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | |
2638 | index 487b957e7802..a144b8cb358b 100644 | |
2639 | --- a/arch/x86/kvm/x86.c | |
2640 | +++ b/arch/x86/kvm/x86.c | |
2641 | @@ -5932,6 +5932,13 @@ int kvm_arch_init(void *opaque) | |
2642 | goto out; | |
2643 | } | |
2644 | ||
2645 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2646 | + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | |
2647 | + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); | |
2648 | + return -EOPNOTSUPP; | |
2649 | + } | |
2650 | +#endif | |
2651 | + | |
2652 | r = kvm_mmu_module_init(); | |
2653 | if (r) | |
2654 | goto out_free_percpu; | |
2655 | diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c | |
2656 | index 6d18b70ed5a9..f752724c22e8 100644 | |
2657 | --- a/arch/x86/mm/highmem_32.c | |
2658 | +++ b/arch/x86/mm/highmem_32.c | |
2659 | @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap); | |
2660 | */ | |
2661 | void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2662 | { | |
2663 | + pte_t pte = mk_pte(page, prot); | |
2664 | unsigned long vaddr; | |
2665 | int idx, type; | |
2666 | ||
2667 | - preempt_disable(); | |
2668 | + preempt_disable_nort(); | |
2669 | pagefault_disable(); | |
2670 | ||
2671 | if (!PageHighMem(page)) | |
2672 | @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2673 | idx = type + KM_TYPE_NR*smp_processor_id(); | |
2674 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2675 | BUG_ON(!pte_none(*(kmap_pte-idx))); | |
2676 | - set_pte(kmap_pte-idx, mk_pte(page, prot)); | |
2677 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2678 | + current->kmap_pte[type] = pte; | |
2679 | +#endif | |
2680 | + set_pte(kmap_pte-idx, pte); | |
2681 | arch_flush_lazy_mmu_mode(); | |
2682 | ||
2683 | return (void *)vaddr; | |
2684 | @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr) | |
2685 | * is a bad idea also, in case the page changes cacheability | |
2686 | * attributes or becomes a protected page in a hypervisor. | |
2687 | */ | |
2688 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2689 | + current->kmap_pte[type] = __pte(0); | |
2690 | +#endif | |
2691 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2692 | kmap_atomic_idx_pop(); | |
2693 | arch_flush_lazy_mmu_mode(); | |
2694 | @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr) | |
2695 | #endif | |
2696 | ||
2697 | pagefault_enable(); | |
2698 | - preempt_enable(); | |
2699 | + preempt_enable_nort(); | |
2700 | } | |
2701 | EXPORT_SYMBOL(__kunmap_atomic); | |
2702 | ||
2703 | diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c | |
2704 | index ada98b39b8ad..585f6829653b 100644 | |
2705 | --- a/arch/x86/mm/iomap_32.c | |
2706 | +++ b/arch/x86/mm/iomap_32.c | |
2707 | @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); | |
2708 | ||
2709 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2710 | { | |
2711 | + pte_t pte = pfn_pte(pfn, prot); | |
2712 | unsigned long vaddr; | |
2713 | int idx, type; | |
2714 | ||
2715 | @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2716 | type = kmap_atomic_idx_push(); | |
2717 | idx = type + KM_TYPE_NR * smp_processor_id(); | |
2718 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2719 | - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); | |
2720 | + WARN_ON(!pte_none(*(kmap_pte - idx))); | |
2721 | + | |
2722 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2723 | + current->kmap_pte[type] = pte; | |
2724 | +#endif | |
2725 | + set_pte(kmap_pte - idx, pte); | |
2726 | arch_flush_lazy_mmu_mode(); | |
2727 | ||
2728 | return (void *)vaddr; | |
2729 | @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr) | |
2730 | * is a bad idea also, in case the page changes cacheability | |
2731 | * attributes or becomes a protected page in a hypervisor. | |
2732 | */ | |
2733 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2734 | + current->kmap_pte[type] = __pte(0); | |
2735 | +#endif | |
2736 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2737 | kmap_atomic_idx_pop(); | |
2738 | } | |
2739 | diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c | |
2740 | index e3353c97d086..01664968555c 100644 | |
2741 | --- a/arch/x86/mm/pageattr.c | |
2742 | +++ b/arch/x86/mm/pageattr.c | |
2743 | @@ -214,7 +214,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, | |
2744 | int in_flags, struct page **pages) | |
2745 | { | |
2746 | unsigned int i, level; | |
2747 | +#ifdef CONFIG_PREEMPT | |
2748 | + /* | |
2749 | + * Avoid wbinvd() because it causes latencies on all CPUs, | |
2750 | + * regardless of any CPU isolation that may be in effect. | |
2751 | + */ | |
2752 | + unsigned long do_wbinvd = 0; | |
2753 | +#else | |
2754 | unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ | |
2755 | +#endif | |
2756 | ||
2757 | BUG_ON(irqs_disabled()); | |
2758 | ||
2759 | diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c | |
2760 | index 9e42842e924a..5398f97172f9 100644 | |
2761 | --- a/arch/x86/platform/uv/tlb_uv.c | |
2762 | +++ b/arch/x86/platform/uv/tlb_uv.c | |
2763 | @@ -748,9 +748,9 @@ static void destination_plugged(struct bau_desc *bau_desc, | |
2764 | ||
2765 | quiesce_local_uvhub(hmaster); | |
2766 | ||
2767 | - spin_lock(&hmaster->queue_lock); | |
2768 | + raw_spin_lock(&hmaster->queue_lock); | |
2769 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2770 | - spin_unlock(&hmaster->queue_lock); | |
2771 | + raw_spin_unlock(&hmaster->queue_lock); | |
2772 | ||
2773 | end_uvhub_quiesce(hmaster); | |
2774 | ||
2775 | @@ -770,9 +770,9 @@ static void destination_timeout(struct bau_desc *bau_desc, | |
2776 | ||
2777 | quiesce_local_uvhub(hmaster); | |
2778 | ||
2779 | - spin_lock(&hmaster->queue_lock); | |
2780 | + raw_spin_lock(&hmaster->queue_lock); | |
2781 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2782 | - spin_unlock(&hmaster->queue_lock); | |
2783 | + raw_spin_unlock(&hmaster->queue_lock); | |
2784 | ||
2785 | end_uvhub_quiesce(hmaster); | |
2786 | ||
2787 | @@ -793,7 +793,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) | |
2788 | cycles_t tm1; | |
2789 | ||
2790 | hmaster = bcp->uvhub_master; | |
2791 | - spin_lock(&hmaster->disable_lock); | |
2792 | + raw_spin_lock(&hmaster->disable_lock); | |
2793 | if (!bcp->baudisabled) { | |
2794 | stat->s_bau_disabled++; | |
2795 | tm1 = get_cycles(); | |
2796 | @@ -806,7 +806,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) | |
2797 | } | |
2798 | } | |
2799 | } | |
2800 | - spin_unlock(&hmaster->disable_lock); | |
2801 | + raw_spin_unlock(&hmaster->disable_lock); | |
2802 | } | |
2803 | ||
2804 | static void count_max_concurr(int stat, struct bau_control *bcp, | |
2805 | @@ -869,7 +869,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, | |
2806 | */ | |
2807 | static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) | |
2808 | { | |
2809 | - spinlock_t *lock = &hmaster->uvhub_lock; | |
2810 | + raw_spinlock_t *lock = &hmaster->uvhub_lock; | |
2811 | atomic_t *v; | |
2812 | ||
2813 | v = &hmaster->active_descriptor_count; | |
2814 | @@ -1002,7 +1002,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | |
2815 | struct bau_control *hmaster; | |
2816 | ||
2817 | hmaster = bcp->uvhub_master; | |
2818 | - spin_lock(&hmaster->disable_lock); | |
2819 | + raw_spin_lock(&hmaster->disable_lock); | |
2820 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { | |
2821 | stat->s_bau_reenabled++; | |
2822 | for_each_present_cpu(tcpu) { | |
2823 | @@ -1014,10 +1014,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | |
2824 | tbcp->period_giveups = 0; | |
2825 | } | |
2826 | } | |
2827 | - spin_unlock(&hmaster->disable_lock); | |
2828 | + raw_spin_unlock(&hmaster->disable_lock); | |
2829 | return 0; | |
2830 | } | |
2831 | - spin_unlock(&hmaster->disable_lock); | |
2832 | + raw_spin_unlock(&hmaster->disable_lock); | |
2833 | return -1; | |
2834 | } | |
2835 | ||
2836 | @@ -1940,9 +1940,9 @@ static void __init init_per_cpu_tunables(void) | |
2837 | bcp->cong_reps = congested_reps; | |
2838 | bcp->disabled_period = sec_2_cycles(disabled_period); | |
2839 | bcp->giveup_limit = giveup_limit; | |
2840 | - spin_lock_init(&bcp->queue_lock); | |
2841 | - spin_lock_init(&bcp->uvhub_lock); | |
2842 | - spin_lock_init(&bcp->disable_lock); | |
2843 | + raw_spin_lock_init(&bcp->queue_lock); | |
2844 | + raw_spin_lock_init(&bcp->uvhub_lock); | |
2845 | + raw_spin_lock_init(&bcp->disable_lock); | |
2846 | } | |
2847 | } | |
2848 | ||
2849 | diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c | |
2850 | index b333fc45f9ec..8b85916e6986 100644 | |
2851 | --- a/arch/x86/platform/uv/uv_time.c | |
2852 | +++ b/arch/x86/platform/uv/uv_time.c | |
2853 | @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | |
2854 | ||
2855 | /* There is one of these allocated per node */ | |
2856 | struct uv_rtc_timer_head { | |
2857 | - spinlock_t lock; | |
2858 | + raw_spinlock_t lock; | |
2859 | /* next cpu waiting for timer, local node relative: */ | |
2860 | int next_cpu; | |
2861 | /* number of cpus on this node: */ | |
2862 | @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void) | |
2863 | uv_rtc_deallocate_timers(); | |
2864 | return -ENOMEM; | |
2865 | } | |
2866 | - spin_lock_init(&head->lock); | |
2867 | + raw_spin_lock_init(&head->lock); | |
2868 | head->ncpus = uv_blade_nr_possible_cpus(bid); | |
2869 | head->next_cpu = -1; | |
2870 | blade_info[bid] = head; | |
2871 | @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2872 | unsigned long flags; | |
2873 | int next_cpu; | |
2874 | ||
2875 | - spin_lock_irqsave(&head->lock, flags); | |
2876 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2877 | ||
2878 | next_cpu = head->next_cpu; | |
2879 | *t = expires; | |
2880 | @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2881 | if (uv_setup_intr(cpu, expires)) { | |
2882 | *t = ULLONG_MAX; | |
2883 | uv_rtc_find_next_timer(head, pnode); | |
2884 | - spin_unlock_irqrestore(&head->lock, flags); | |
2885 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2886 | return -ETIME; | |
2887 | } | |
2888 | } | |
2889 | ||
2890 | - spin_unlock_irqrestore(&head->lock, flags); | |
2891 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2892 | return 0; | |
2893 | } | |
2894 | ||
2895 | @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2896 | unsigned long flags; | |
2897 | int rc = 0; | |
2898 | ||
2899 | - spin_lock_irqsave(&head->lock, flags); | |
2900 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2901 | ||
2902 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | |
2903 | rc = 1; | |
2904 | @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2905 | uv_rtc_find_next_timer(head, pnode); | |
2906 | } | |
2907 | ||
2908 | - spin_unlock_irqrestore(&head->lock, flags); | |
2909 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2910 | ||
2911 | return rc; | |
2912 | } | |
2913 | @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2914 | static cycle_t uv_read_rtc(struct clocksource *cs) | |
2915 | { | |
2916 | unsigned long offset; | |
2917 | + cycle_t cycles; | |
2918 | ||
2919 | + preempt_disable(); | |
2920 | if (uv_get_min_hub_revision_id() == 1) | |
2921 | offset = 0; | |
2922 | else | |
2923 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | |
2924 | ||
2925 | - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
2926 | + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
2927 | + preempt_enable(); | |
2928 | + | |
2929 | + return cycles; | |
2930 | } | |
2931 | ||
2932 | /* | |
2933 | diff --git a/block/blk-core.c b/block/blk-core.c | |
2934 | index 14d7c0740dc0..dfd905bea77c 100644 | |
2935 | --- a/block/blk-core.c | |
2936 | +++ b/block/blk-core.c | |
2937 | @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |
2938 | ||
2939 | INIT_LIST_HEAD(&rq->queuelist); | |
2940 | INIT_LIST_HEAD(&rq->timeout_list); | |
2941 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2942 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
2943 | +#endif | |
2944 | rq->cpu = -1; | |
2945 | rq->q = q; | |
2946 | rq->__sector = (sector_t) -1; | |
2947 | @@ -233,7 +236,7 @@ EXPORT_SYMBOL(blk_start_queue_async); | |
2948 | **/ | |
2949 | void blk_start_queue(struct request_queue *q) | |
2950 | { | |
2951 | - WARN_ON(!irqs_disabled()); | |
2952 | + WARN_ON_NONRT(!irqs_disabled()); | |
2953 | ||
2954 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | |
2955 | __blk_run_queue(q); | |
2956 | @@ -659,7 +662,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait) | |
2957 | if (nowait) | |
2958 | return -EBUSY; | |
2959 | ||
2960 | - ret = wait_event_interruptible(q->mq_freeze_wq, | |
2961 | + ret = swait_event_interruptible(q->mq_freeze_wq, | |
2962 | !atomic_read(&q->mq_freeze_depth) || | |
2963 | blk_queue_dying(q)); | |
2964 | if (blk_queue_dying(q)) | |
2965 | @@ -679,7 +682,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) | |
2966 | struct request_queue *q = | |
2967 | container_of(ref, struct request_queue, q_usage_counter); | |
2968 | ||
2969 | - wake_up_all(&q->mq_freeze_wq); | |
2970 | + swake_up_all(&q->mq_freeze_wq); | |
2971 | } | |
2972 | ||
2973 | static void blk_rq_timed_out_timer(unsigned long data) | |
2974 | @@ -748,7 +751,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |
2975 | q->bypass_depth = 1; | |
2976 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | |
2977 | ||
2978 | - init_waitqueue_head(&q->mq_freeze_wq); | |
2979 | + init_swait_queue_head(&q->mq_freeze_wq); | |
2980 | ||
2981 | /* | |
2982 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | |
2983 | @@ -3177,7 +3180,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, | |
2984 | blk_run_queue_async(q); | |
2985 | else | |
2986 | __blk_run_queue(q); | |
2987 | - spin_unlock(q->queue_lock); | |
2988 | + spin_unlock_irq(q->queue_lock); | |
2989 | } | |
2990 | ||
2991 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) | |
2992 | @@ -3225,7 +3228,6 @@ EXPORT_SYMBOL(blk_check_plugged); | |
2993 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
2994 | { | |
2995 | struct request_queue *q; | |
2996 | - unsigned long flags; | |
2997 | struct request *rq; | |
2998 | LIST_HEAD(list); | |
2999 | unsigned int depth; | |
3000 | @@ -3245,11 +3247,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3001 | q = NULL; | |
3002 | depth = 0; | |
3003 | ||
3004 | - /* | |
3005 | - * Save and disable interrupts here, to avoid doing it for every | |
3006 | - * queue lock we have to take. | |
3007 | - */ | |
3008 | - local_irq_save(flags); | |
3009 | while (!list_empty(&list)) { | |
3010 | rq = list_entry_rq(list.next); | |
3011 | list_del_init(&rq->queuelist); | |
3012 | @@ -3262,7 +3259,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3013 | queue_unplugged(q, depth, from_schedule); | |
3014 | q = rq->q; | |
3015 | depth = 0; | |
3016 | - spin_lock(q->queue_lock); | |
3017 | + spin_lock_irq(q->queue_lock); | |
3018 | } | |
3019 | ||
3020 | /* | |
3021 | @@ -3289,8 +3286,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3022 | */ | |
3023 | if (q) | |
3024 | queue_unplugged(q, depth, from_schedule); | |
3025 | - | |
3026 | - local_irq_restore(flags); | |
3027 | } | |
3028 | ||
3029 | void blk_finish_plug(struct blk_plug *plug) | |
3030 | diff --git a/block/blk-ioc.c b/block/blk-ioc.c | |
3031 | index 381cb50a673c..dc8785233d94 100644 | |
3032 | --- a/block/blk-ioc.c | |
3033 | +++ b/block/blk-ioc.c | |
3034 | @@ -7,6 +7,7 @@ | |
3035 | #include <linux/bio.h> | |
3036 | #include <linux/blkdev.h> | |
3037 | #include <linux/slab.h> | |
3038 | +#include <linux/delay.h> | |
3039 | ||
3040 | #include "blk.h" | |
3041 | ||
3042 | @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work) | |
3043 | spin_unlock(q->queue_lock); | |
3044 | } else { | |
3045 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3046 | - cpu_relax(); | |
3047 | + cpu_chill(); | |
3048 | spin_lock_irqsave_nested(&ioc->lock, flags, 1); | |
3049 | } | |
3050 | } | |
3051 | @@ -187,7 +188,7 @@ void put_io_context_active(struct io_context *ioc) | |
3052 | spin_unlock(icq->q->queue_lock); | |
3053 | } else { | |
3054 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3055 | - cpu_relax(); | |
3056 | + cpu_chill(); | |
3057 | goto retry; | |
3058 | } | |
3059 | } | |
3060 | diff --git a/block/blk-mq.c b/block/blk-mq.c | |
3061 | index 81caceb96c3c..b12b0ab005a9 100644 | |
3062 | --- a/block/blk-mq.c | |
3063 | +++ b/block/blk-mq.c | |
3064 | @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); | |
3065 | ||
3066 | static void blk_mq_freeze_queue_wait(struct request_queue *q) | |
3067 | { | |
3068 | - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3069 | + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3070 | } | |
3071 | ||
3072 | /* | |
3073 | @@ -110,7 +110,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) | |
3074 | WARN_ON_ONCE(freeze_depth < 0); | |
3075 | if (!freeze_depth) { | |
3076 | percpu_ref_reinit(&q->q_usage_counter); | |
3077 | - wake_up_all(&q->mq_freeze_wq); | |
3078 | + swake_up_all(&q->mq_freeze_wq); | |
3079 | } | |
3080 | } | |
3081 | EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); | |
3082 | @@ -129,7 +129,7 @@ void blk_mq_wake_waiters(struct request_queue *q) | |
3083 | * dying, we need to ensure that processes currently waiting on | |
3084 | * the queue are notified as well. | |
3085 | */ | |
3086 | - wake_up_all(&q->mq_freeze_wq); | |
3087 | + swake_up_all(&q->mq_freeze_wq); | |
3088 | } | |
3089 | ||
3090 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | |
3091 | @@ -177,6 +177,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |
3092 | rq->resid_len = 0; | |
3093 | rq->sense = NULL; | |
3094 | ||
3095 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3096 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3097 | +#endif | |
3098 | INIT_LIST_HEAD(&rq->timeout_list); | |
3099 | rq->timeout = 0; | |
3100 | ||
3101 | @@ -345,6 +348,17 @@ void blk_mq_end_request(struct request *rq, int error) | |
3102 | } | |
3103 | EXPORT_SYMBOL(blk_mq_end_request); | |
3104 | ||
3105 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3106 | + | |
3107 | +void __blk_mq_complete_request_remote_work(struct work_struct *work) | |
3108 | +{ | |
3109 | + struct request *rq = container_of(work, struct request, work); | |
3110 | + | |
3111 | + rq->q->softirq_done_fn(rq); | |
3112 | +} | |
3113 | + | |
3114 | +#else | |
3115 | + | |
3116 | static void __blk_mq_complete_request_remote(void *data) | |
3117 | { | |
3118 | struct request *rq = data; | |
3119 | @@ -352,6 +366,8 @@ static void __blk_mq_complete_request_remote(void *data) | |
3120 | rq->q->softirq_done_fn(rq); | |
3121 | } | |
3122 | ||
3123 | +#endif | |
3124 | + | |
3125 | static void blk_mq_ipi_complete_request(struct request *rq) | |
3126 | { | |
3127 | struct blk_mq_ctx *ctx = rq->mq_ctx; | |
3128 | @@ -363,19 +379,23 @@ static void blk_mq_ipi_complete_request(struct request *rq) | |
3129 | return; | |
3130 | } | |
3131 | ||
3132 | - cpu = get_cpu(); | |
3133 | + cpu = get_cpu_light(); | |
3134 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) | |
3135 | shared = cpus_share_cache(cpu, ctx->cpu); | |
3136 | ||
3137 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | |
3138 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3139 | + schedule_work_on(ctx->cpu, &rq->work); | |
3140 | +#else | |
3141 | rq->csd.func = __blk_mq_complete_request_remote; | |
3142 | rq->csd.info = rq; | |
3143 | rq->csd.flags = 0; | |
3144 | smp_call_function_single_async(ctx->cpu, &rq->csd); | |
3145 | +#endif | |
3146 | } else { | |
3147 | rq->q->softirq_done_fn(rq); | |
3148 | } | |
3149 | - put_cpu(); | |
3150 | + put_cpu_light(); | |
3151 | } | |
3152 | ||
3153 | static void __blk_mq_complete_request(struct request *rq) | |
3154 | @@ -915,14 +935,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | |
3155 | return; | |
3156 | ||
3157 | if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { | |
3158 | - int cpu = get_cpu(); | |
3159 | + int cpu = get_cpu_light(); | |
3160 | if (cpumask_test_cpu(cpu, hctx->cpumask)) { | |
3161 | __blk_mq_run_hw_queue(hctx); | |
3162 | - put_cpu(); | |
3163 | + put_cpu_light(); | |
3164 | return; | |
3165 | } | |
3166 | ||
3167 | - put_cpu(); | |
3168 | + put_cpu_light(); | |
3169 | } | |
3170 | ||
3171 | kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); | |
3172 | diff --git a/block/blk-mq.h b/block/blk-mq.h | |
3173 | index e5d25249028c..1e846b842eab 100644 | |
3174 | --- a/block/blk-mq.h | |
3175 | +++ b/block/blk-mq.h | |
3176 | @@ -72,12 +72,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, | |
3177 | */ | |
3178 | static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) | |
3179 | { | |
3180 | - return __blk_mq_get_ctx(q, get_cpu()); | |
3181 | + return __blk_mq_get_ctx(q, get_cpu_light()); | |
3182 | } | |
3183 | ||
3184 | static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) | |
3185 | { | |
3186 | - put_cpu(); | |
3187 | + put_cpu_light(); | |
3188 | } | |
3189 | ||
3190 | struct blk_mq_alloc_data { | |
3191 | diff --git a/block/blk-softirq.c b/block/blk-softirq.c | |
3192 | index 06cf9807f49a..c40342643ca0 100644 | |
3193 | --- a/block/blk-softirq.c | |
3194 | +++ b/block/blk-softirq.c | |
3195 | @@ -51,6 +51,7 @@ static void trigger_softirq(void *data) | |
3196 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3197 | ||
3198 | local_irq_restore(flags); | |
3199 | + preempt_check_resched_rt(); | |
3200 | } | |
3201 | ||
3202 | /* | |
3203 | @@ -89,6 +90,7 @@ static int blk_softirq_cpu_dead(unsigned int cpu) | |
3204 | this_cpu_ptr(&blk_cpu_done)); | |
3205 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3206 | local_irq_enable(); | |
3207 | + preempt_check_resched_rt(); | |
3208 | ||
3209 | return 0; | |
3210 | } | |
3211 | @@ -141,6 +143,7 @@ void __blk_complete_request(struct request *req) | |
3212 | goto do_local; | |
3213 | ||
3214 | local_irq_restore(flags); | |
3215 | + preempt_check_resched_rt(); | |
3216 | } | |
3217 | ||
3218 | /** | |
3219 | diff --git a/block/bounce.c b/block/bounce.c | |
3220 | index 1cb5dd3a5da1..2f1ec8a67cbe 100644 | |
3221 | --- a/block/bounce.c | |
3222 | +++ b/block/bounce.c | |
3223 | @@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) | |
3224 | unsigned long flags; | |
3225 | unsigned char *vto; | |
3226 | ||
3227 | - local_irq_save(flags); | |
3228 | + local_irq_save_nort(flags); | |
3229 | vto = kmap_atomic(to->bv_page); | |
3230 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | |
3231 | kunmap_atomic(vto); | |
3232 | - local_irq_restore(flags); | |
3233 | + local_irq_restore_nort(flags); | |
3234 | } | |
3235 | ||
3236 | #else /* CONFIG_HIGHMEM */ | |
3237 | diff --git a/crypto/algapi.c b/crypto/algapi.c | |
3238 | index df939b54b09f..efe5e06adcf7 100644 | |
3239 | --- a/crypto/algapi.c | |
3240 | +++ b/crypto/algapi.c | |
3241 | @@ -718,13 +718,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); | |
3242 | ||
3243 | int crypto_register_notifier(struct notifier_block *nb) | |
3244 | { | |
3245 | - return blocking_notifier_chain_register(&crypto_chain, nb); | |
3246 | + return srcu_notifier_chain_register(&crypto_chain, nb); | |
3247 | } | |
3248 | EXPORT_SYMBOL_GPL(crypto_register_notifier); | |
3249 | ||
3250 | int crypto_unregister_notifier(struct notifier_block *nb) | |
3251 | { | |
3252 | - return blocking_notifier_chain_unregister(&crypto_chain, nb); | |
3253 | + return srcu_notifier_chain_unregister(&crypto_chain, nb); | |
3254 | } | |
3255 | EXPORT_SYMBOL_GPL(crypto_unregister_notifier); | |
3256 | ||
3257 | diff --git a/crypto/api.c b/crypto/api.c | |
3258 | index bbc147cb5dec..bc1a848f02ec 100644 | |
3259 | --- a/crypto/api.c | |
3260 | +++ b/crypto/api.c | |
3261 | @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); | |
3262 | DECLARE_RWSEM(crypto_alg_sem); | |
3263 | EXPORT_SYMBOL_GPL(crypto_alg_sem); | |
3264 | ||
3265 | -BLOCKING_NOTIFIER_HEAD(crypto_chain); | |
3266 | +SRCU_NOTIFIER_HEAD(crypto_chain); | |
3267 | EXPORT_SYMBOL_GPL(crypto_chain); | |
3268 | ||
3269 | static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); | |
3270 | @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v) | |
3271 | { | |
3272 | int ok; | |
3273 | ||
3274 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3275 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3276 | if (ok == NOTIFY_DONE) { | |
3277 | request_module("cryptomgr"); | |
3278 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3279 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3280 | } | |
3281 | ||
3282 | return ok; | |
3283 | diff --git a/crypto/internal.h b/crypto/internal.h | |
3284 | index 7eefcdb00227..0ecc7f5a2f40 100644 | |
3285 | --- a/crypto/internal.h | |
3286 | +++ b/crypto/internal.h | |
3287 | @@ -47,7 +47,7 @@ struct crypto_larval { | |
3288 | ||
3289 | extern struct list_head crypto_alg_list; | |
3290 | extern struct rw_semaphore crypto_alg_sem; | |
3291 | -extern struct blocking_notifier_head crypto_chain; | |
3292 | +extern struct srcu_notifier_head crypto_chain; | |
3293 | ||
3294 | #ifdef CONFIG_PROC_FS | |
3295 | void __init crypto_init_proc(void); | |
3296 | @@ -146,7 +146,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg) | |
3297 | ||
3298 | static inline void crypto_notify(unsigned long val, void *v) | |
3299 | { | |
3300 | - blocking_notifier_call_chain(&crypto_chain, val, v); | |
3301 | + srcu_notifier_call_chain(&crypto_chain, val, v); | |
3302 | } | |
3303 | ||
3304 | #endif /* _CRYPTO_INTERNAL_H */ | |
3305 | diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h | |
3306 | index 750fa824d42c..441edf51484a 100644 | |
3307 | --- a/drivers/acpi/acpica/acglobal.h | |
3308 | +++ b/drivers/acpi/acpica/acglobal.h | |
3309 | @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); | |
3310 | * interrupt level | |
3311 | */ | |
3312 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ | |
3313 | -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3314 | +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3315 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); | |
3316 | ||
3317 | /* Mutex for _OSI support */ | |
3318 | diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c | |
3319 | index 3b7fb99362b6..696bf8e62afb 100644 | |
3320 | --- a/drivers/acpi/acpica/hwregs.c | |
3321 | +++ b/drivers/acpi/acpica/hwregs.c | |
3322 | @@ -363,14 +363,14 @@ acpi_status acpi_hw_clear_acpi_status(void) | |
3323 | ACPI_BITMASK_ALL_FIXED_STATUS, | |
3324 | ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); | |
3325 | ||
3326 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3327 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3328 | ||
3329 | /* Clear the fixed events in PM1 A/B */ | |
3330 | ||
3331 | status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, | |
3332 | ACPI_BITMASK_ALL_FIXED_STATUS); | |
3333 | ||
3334 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3335 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3336 | ||
3337 | if (ACPI_FAILURE(status)) { | |
3338 | goto exit; | |
3339 | diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c | |
3340 | index 98c26ff39409..6e236f2ea791 100644 | |
3341 | --- a/drivers/acpi/acpica/hwxface.c | |
3342 | +++ b/drivers/acpi/acpica/hwxface.c | |
3343 | @@ -373,7 +373,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3344 | return_ACPI_STATUS(AE_BAD_PARAMETER); | |
3345 | } | |
3346 | ||
3347 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3348 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3349 | ||
3350 | /* | |
3351 | * At this point, we know that the parent register is one of the | |
3352 | @@ -434,7 +434,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3353 | ||
3354 | unlock_and_exit: | |
3355 | ||
3356 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3357 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3358 | return_ACPI_STATUS(status); | |
3359 | } | |
3360 | ||
3361 | diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c | |
3362 | index 15073375bd00..357e7ca5a587 100644 | |
3363 | --- a/drivers/acpi/acpica/utmutex.c | |
3364 | +++ b/drivers/acpi/acpica/utmutex.c | |
3365 | @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void) | |
3366 | return_ACPI_STATUS (status); | |
3367 | } | |
3368 | ||
3369 | - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); | |
3370 | + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); | |
3371 | if (ACPI_FAILURE (status)) { | |
3372 | return_ACPI_STATUS (status); | |
3373 | } | |
3374 | @@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void) | |
3375 | /* Delete the spinlocks */ | |
3376 | ||
3377 | acpi_os_delete_lock(acpi_gbl_gpe_lock); | |
3378 | - acpi_os_delete_lock(acpi_gbl_hardware_lock); | |
3379 | + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); | |
3380 | acpi_os_delete_lock(acpi_gbl_reference_count_lock); | |
3381 | ||
3382 | /* Delete the reader/writer lock */ | |
3383 | diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c | |
3384 | index 051b6158d1b7..7ad293bef6ed 100644 | |
3385 | --- a/drivers/ata/libata-sff.c | |
3386 | +++ b/drivers/ata/libata-sff.c | |
3387 | @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, | |
3388 | unsigned long flags; | |
3389 | unsigned int consumed; | |
3390 | ||
3391 | - local_irq_save(flags); | |
3392 | + local_irq_save_nort(flags); | |
3393 | consumed = ata_sff_data_xfer32(dev, buf, buflen, rw); | |
3394 | - local_irq_restore(flags); | |
3395 | + local_irq_restore_nort(flags); | |
3396 | ||
3397 | return consumed; | |
3398 | } | |
3399 | @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3400 | unsigned long flags; | |
3401 | ||
3402 | /* FIXME: use a bounce buffer */ | |
3403 | - local_irq_save(flags); | |
3404 | + local_irq_save_nort(flags); | |
3405 | buf = kmap_atomic(page); | |
3406 | ||
3407 | /* do the actual data transfer */ | |
3408 | @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3409 | do_write); | |
3410 | ||
3411 | kunmap_atomic(buf); | |
3412 | - local_irq_restore(flags); | |
3413 | + local_irq_restore_nort(flags); | |
3414 | } else { | |
3415 | buf = page_address(page); | |
3416 | ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, | |
3417 | @@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3418 | unsigned long flags; | |
3419 | ||
3420 | /* FIXME: use bounce buffer */ | |
3421 | - local_irq_save(flags); | |
3422 | + local_irq_save_nort(flags); | |
3423 | buf = kmap_atomic(page); | |
3424 | ||
3425 | /* do the actual data transfer */ | |
3426 | @@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3427 | count, rw); | |
3428 | ||
3429 | kunmap_atomic(buf); | |
3430 | - local_irq_restore(flags); | |
3431 | + local_irq_restore_nort(flags); | |
3432 | } else { | |
3433 | buf = page_address(page); | |
3434 | consumed = ap->ops->sff_data_xfer(dev, buf + offset, | |
3435 | diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c | |
3436 | index 4b5cd3a7b2b6..fa8329ad79fd 100644 | |
3437 | --- a/drivers/block/zram/zcomp.c | |
3438 | +++ b/drivers/block/zram/zcomp.c | |
3439 | @@ -118,12 +118,19 @@ ssize_t zcomp_available_show(const char *comp, char *buf) | |
3440 | ||
3441 | struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) | |
3442 | { | |
3443 | - return *get_cpu_ptr(comp->stream); | |
3444 | + struct zcomp_strm *zstrm; | |
3445 | + | |
3446 | + zstrm = *this_cpu_ptr(comp->stream); | |
3447 | + spin_lock(&zstrm->zcomp_lock); | |
3448 | + return zstrm; | |
3449 | } | |
3450 | ||
3451 | void zcomp_stream_put(struct zcomp *comp) | |
3452 | { | |
3453 | - put_cpu_ptr(comp->stream); | |
3454 | + struct zcomp_strm *zstrm; | |
3455 | + | |
3456 | + zstrm = *this_cpu_ptr(comp->stream); | |
3457 | + spin_unlock(&zstrm->zcomp_lock); | |
3458 | } | |
3459 | ||
3460 | int zcomp_compress(struct zcomp_strm *zstrm, | |
3461 | @@ -174,6 +181,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp, | |
3462 | pr_err("Can't allocate a compression stream\n"); | |
3463 | return NOTIFY_BAD; | |
3464 | } | |
3465 | + spin_lock_init(&zstrm->zcomp_lock); | |
3466 | *per_cpu_ptr(comp->stream, cpu) = zstrm; | |
3467 | break; | |
3468 | case CPU_DEAD: | |
3469 | diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h | |
3470 | index 478cac2ed465..f7a6efdc3285 100644 | |
3471 | --- a/drivers/block/zram/zcomp.h | |
3472 | +++ b/drivers/block/zram/zcomp.h | |
3473 | @@ -14,6 +14,7 @@ struct zcomp_strm { | |
3474 | /* compression/decompression buffer */ | |
3475 | void *buffer; | |
3476 | struct crypto_comp *tfm; | |
3477 | + spinlock_t zcomp_lock; | |
3478 | }; | |
3479 | ||
3480 | /* dynamic per-device compression frontend */ | |
3481 | diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c | |
3482 | index d2ef51ca9cf4..05e749736560 100644 | |
3483 | --- a/drivers/block/zram/zram_drv.c | |
3484 | +++ b/drivers/block/zram/zram_drv.c | |
3485 | @@ -528,6 +528,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) | |
3486 | goto out_error; | |
3487 | } | |
3488 | ||
3489 | + zram_meta_init_table_locks(meta, disksize); | |
3490 | + | |
3491 | return meta; | |
3492 | ||
3493 | out_error: | |
3494 | @@ -575,28 +577,28 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) | |
3495 | struct zram_meta *meta = zram->meta; | |
3496 | unsigned long handle; | |
3497 | unsigned int size; | |
3498 | + struct zcomp_strm *zstrm; | |
3499 | ||
3500 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3501 | + zram_lock_table(&meta->table[index]); | |
3502 | handle = meta->table[index].handle; | |
3503 | size = zram_get_obj_size(meta, index); | |
3504 | ||
3505 | if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3506 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3507 | + zram_unlock_table(&meta->table[index]); | |
3508 | clear_page(mem); | |
3509 | return 0; | |
3510 | } | |
3511 | ||
3512 | + zstrm = zcomp_stream_get(zram->comp); | |
3513 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); | |
3514 | if (size == PAGE_SIZE) { | |
3515 | copy_page(mem, cmem); | |
3516 | } else { | |
3517 | - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
3518 | - | |
3519 | ret = zcomp_decompress(zstrm, cmem, size, mem); | |
3520 | - zcomp_stream_put(zram->comp); | |
3521 | } | |
3522 | zs_unmap_object(meta->mem_pool, handle); | |
3523 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3524 | + zcomp_stream_put(zram->comp); | |
3525 | + zram_unlock_table(&meta->table[index]); | |
3526 | ||
3527 | /* Should NEVER happen. Return bio error if it does. */ | |
3528 | if (unlikely(ret)) { | |
3529 | @@ -616,14 +618,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, | |
3530 | struct zram_meta *meta = zram->meta; | |
3531 | page = bvec->bv_page; | |
3532 | ||
3533 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3534 | + zram_lock_table(&meta->table[index]); | |
3535 | if (unlikely(!meta->table[index].handle) || | |
3536 | zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3537 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3538 | + zram_unlock_table(&meta->table[index]); | |
3539 | handle_zero_page(bvec); | |
3540 | return 0; | |
3541 | } | |
3542 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3543 | + zram_unlock_table(&meta->table[index]); | |
3544 | ||
3545 | if (is_partial_io(bvec)) | |
3546 | /* Use a temporary buffer to decompress the page */ | |
3547 | @@ -700,10 +702,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
3548 | if (user_mem) | |
3549 | kunmap_atomic(user_mem); | |
3550 | /* Free memory associated with this sector now. */ | |
3551 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3552 | + zram_lock_table(&meta->table[index]); | |
3553 | zram_free_page(zram, index); | |
3554 | zram_set_flag(meta, index, ZRAM_ZERO); | |
3555 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3556 | + zram_unlock_table(&meta->table[index]); | |
3557 | ||
3558 | atomic64_inc(&zram->stats.zero_pages); | |
3559 | ret = 0; | |
3560 | @@ -794,12 +796,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
3561 | * Free memory associated with this sector | |
3562 | * before overwriting unused sectors. | |
3563 | */ | |
3564 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3565 | + zram_lock_table(&meta->table[index]); | |
3566 | zram_free_page(zram, index); | |
3567 | ||
3568 | meta->table[index].handle = handle; | |
3569 | zram_set_obj_size(meta, index, clen); | |
3570 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3571 | + zram_unlock_table(&meta->table[index]); | |
3572 | ||
3573 | /* Update stats */ | |
3574 | atomic64_add(clen, &zram->stats.compr_data_size); | |
3575 | @@ -842,9 +844,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, | |
3576 | } | |
3577 | ||
3578 | while (n >= PAGE_SIZE) { | |
3579 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3580 | + zram_lock_table(&meta->table[index]); | |
3581 | zram_free_page(zram, index); | |
3582 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3583 | + zram_unlock_table(&meta->table[index]); | |
3584 | atomic64_inc(&zram->stats.notify_free); | |
3585 | index++; | |
3586 | n -= PAGE_SIZE; | |
3587 | @@ -973,9 +975,9 @@ static void zram_slot_free_notify(struct block_device *bdev, | |
3588 | zram = bdev->bd_disk->private_data; | |
3589 | meta = zram->meta; | |
3590 | ||
3591 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3592 | + zram_lock_table(&meta->table[index]); | |
3593 | zram_free_page(zram, index); | |
3594 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3595 | + zram_unlock_table(&meta->table[index]); | |
3596 | atomic64_inc(&zram->stats.notify_free); | |
3597 | } | |
3598 | ||
3599 | diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h | |
3600 | index 74fcf10da374..fd4020c99b9e 100644 | |
3601 | --- a/drivers/block/zram/zram_drv.h | |
3602 | +++ b/drivers/block/zram/zram_drv.h | |
3603 | @@ -73,6 +73,9 @@ enum zram_pageflags { | |
3604 | struct zram_table_entry { | |
3605 | unsigned long handle; | |
3606 | unsigned long value; | |
3607 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
3608 | + spinlock_t lock; | |
3609 | +#endif | |
3610 | }; | |
3611 | ||
3612 | struct zram_stats { | |
3613 | @@ -120,4 +123,42 @@ struct zram { | |
3614 | */ | |
3615 | bool claim; /* Protected by bdev->bd_mutex */ | |
3616 | }; | |
3617 | + | |
3618 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
3619 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3620 | +{ | |
3621 | + bit_spin_lock(ZRAM_ACCESS, &table->value); | |
3622 | +} | |
3623 | + | |
3624 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3625 | +{ | |
3626 | + bit_spin_unlock(ZRAM_ACCESS, &table->value); | |
3627 | +} | |
3628 | + | |
3629 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { } | |
3630 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
3631 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3632 | +{ | |
3633 | + spin_lock(&table->lock); | |
3634 | + __set_bit(ZRAM_ACCESS, &table->value); | |
3635 | +} | |
3636 | + | |
3637 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3638 | +{ | |
3639 | + __clear_bit(ZRAM_ACCESS, &table->value); | |
3640 | + spin_unlock(&table->lock); | |
3641 | +} | |
3642 | + | |
3643 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) | |
3644 | +{ | |
3645 | + size_t num_pages = disksize >> PAGE_SHIFT; | |
3646 | + size_t index; | |
3647 | + | |
3648 | + for (index = 0; index < num_pages; index++) { | |
3649 | + spinlock_t *lock = &meta->table[index].lock; | |
3650 | + spin_lock_init(lock); | |
3651 | + } | |
3652 | +} | |
3653 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
3654 | + | |
3655 | #endif | |
3656 | diff --git a/drivers/char/random.c b/drivers/char/random.c | |
3657 | index d6876d506220..0c60b1e54579 100644 | |
3658 | --- a/drivers/char/random.c | |
3659 | +++ b/drivers/char/random.c | |
3660 | @@ -1028,8 +1028,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
3661 | } sample; | |
3662 | long delta, delta2, delta3; | |
3663 | ||
3664 | - preempt_disable(); | |
3665 | - | |
3666 | sample.jiffies = jiffies; | |
3667 | sample.cycles = random_get_entropy(); | |
3668 | sample.num = num; | |
3669 | @@ -1070,7 +1068,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
3670 | */ | |
3671 | credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); | |
3672 | } | |
3673 | - preempt_enable(); | |
3674 | } | |
3675 | ||
3676 | void add_input_randomness(unsigned int type, unsigned int code, | |
3677 | @@ -1123,28 +1120,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) | |
3678 | return *(ptr + f->reg_idx++); | |
3679 | } | |
3680 | ||
3681 | -void add_interrupt_randomness(int irq, int irq_flags) | |
3682 | +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) | |
3683 | { | |
3684 | struct entropy_store *r; | |
3685 | struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); | |
3686 | - struct pt_regs *regs = get_irq_regs(); | |
3687 | unsigned long now = jiffies; | |
3688 | cycles_t cycles = random_get_entropy(); | |
3689 | __u32 c_high, j_high; | |
3690 | - __u64 ip; | |
3691 | unsigned long seed; | |
3692 | int credit = 0; | |
3693 | ||
3694 | if (cycles == 0) | |
3695 | - cycles = get_reg(fast_pool, regs); | |
3696 | + cycles = get_reg(fast_pool, NULL); | |
3697 | c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; | |
3698 | j_high = (sizeof(now) > 4) ? now >> 32 : 0; | |
3699 | fast_pool->pool[0] ^= cycles ^ j_high ^ irq; | |
3700 | fast_pool->pool[1] ^= now ^ c_high; | |
3701 | - ip = regs ? instruction_pointer(regs) : _RET_IP_; | |
3702 | + if (!ip) | |
3703 | + ip = _RET_IP_; | |
3704 | fast_pool->pool[2] ^= ip; | |
3705 | fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : | |
3706 | - get_reg(fast_pool, regs); | |
3707 | + get_reg(fast_pool, NULL); | |
3708 | ||
3709 | fast_mix(fast_pool); | |
3710 | add_interrupt_bench(cycles); | |
3711 | diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c | |
3712 | index 4da2af9694a2..5b6f57f500b8 100644 | |
3713 | --- a/drivers/clocksource/tcb_clksrc.c | |
3714 | +++ b/drivers/clocksource/tcb_clksrc.c | |
3715 | @@ -23,8 +23,7 @@ | |
3716 | * this 32 bit free-running counter. the second channel is not used. | |
3717 | * | |
3718 | * - The third channel may be used to provide a 16-bit clockevent | |
3719 | - * source, used in either periodic or oneshot mode. This runs | |
3720 | - * at 32 KiHZ, and can handle delays of up to two seconds. | |
3721 | + * source, used in either periodic or oneshot mode. | |
3722 | * | |
3723 | * A boot clocksource and clockevent source are also currently needed, | |
3724 | * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so | |
3725 | @@ -74,6 +73,8 @@ static struct clocksource clksrc = { | |
3726 | struct tc_clkevt_device { | |
3727 | struct clock_event_device clkevt; | |
3728 | struct clk *clk; | |
3729 | + bool clk_enabled; | |
3730 | + u32 freq; | |
3731 | void __iomem *regs; | |
3732 | }; | |
3733 | ||
3734 | @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt) | |
3735 | return container_of(clkevt, struct tc_clkevt_device, clkevt); | |
3736 | } | |
3737 | ||
3738 | -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, | |
3739 | - * because using one of the divided clocks would usually mean the | |
3740 | - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). | |
3741 | - * | |
3742 | - * A divided clock could be good for high resolution timers, since | |
3743 | - * 30.5 usec resolution can seem "low". | |
3744 | - */ | |
3745 | static u32 timer_clock; | |
3746 | ||
3747 | +static void tc_clk_disable(struct clock_event_device *d) | |
3748 | +{ | |
3749 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3750 | + | |
3751 | + clk_disable(tcd->clk); | |
3752 | + tcd->clk_enabled = false; | |
3753 | +} | |
3754 | + | |
3755 | +static void tc_clk_enable(struct clock_event_device *d) | |
3756 | +{ | |
3757 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3758 | + | |
3759 | + if (tcd->clk_enabled) | |
3760 | + return; | |
3761 | + clk_enable(tcd->clk); | |
3762 | + tcd->clk_enabled = true; | |
3763 | +} | |
3764 | + | |
3765 | static int tc_shutdown(struct clock_event_device *d) | |
3766 | { | |
3767 | struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3768 | @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d) | |
3769 | ||
3770 | __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); | |
3771 | __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); | |
3772 | + return 0; | |
3773 | +} | |
3774 | + | |
3775 | +static int tc_shutdown_clk_off(struct clock_event_device *d) | |
3776 | +{ | |
3777 | + tc_shutdown(d); | |
3778 | if (!clockevent_state_detached(d)) | |
3779 | - clk_disable(tcd->clk); | |
3780 | + tc_clk_disable(d); | |
3781 | ||
3782 | return 0; | |
3783 | } | |
3784 | @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d) | |
3785 | if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) | |
3786 | tc_shutdown(d); | |
3787 | ||
3788 | - clk_enable(tcd->clk); | |
3789 | + tc_clk_enable(d); | |
3790 | ||
3791 | - /* slow clock, count up to RC, then irq and stop */ | |
3792 | + /* count up to RC, then irq and stop */ | |
3793 | __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | | |
3794 | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); | |
3795 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3796 | @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d) | |
3797 | /* By not making the gentime core emulate periodic mode on top | |
3798 | * of oneshot, we get lower overhead and improved accuracy. | |
3799 | */ | |
3800 | - clk_enable(tcd->clk); | |
3801 | + tc_clk_enable(d); | |
3802 | ||
3803 | - /* slow clock, count up to RC, then irq and restart */ | |
3804 | + /* count up to RC, then irq and restart */ | |
3805 | __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, | |
3806 | regs + ATMEL_TC_REG(2, CMR)); | |
3807 | - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3808 | + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3809 | ||
3810 | /* Enable clock and interrupts on RC compare */ | |
3811 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3812 | @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = { | |
3813 | .features = CLOCK_EVT_FEAT_PERIODIC | | |
3814 | CLOCK_EVT_FEAT_ONESHOT, | |
3815 | /* Should be lower than at91rm9200's system timer */ | |
3816 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
3817 | .rating = 125, | |
3818 | +#else | |
3819 | + .rating = 200, | |
3820 | +#endif | |
3821 | .set_next_event = tc_next_event, | |
3822 | - .set_state_shutdown = tc_shutdown, | |
3823 | + .set_state_shutdown = tc_shutdown_clk_off, | |
3824 | .set_state_periodic = tc_set_periodic, | |
3825 | .set_state_oneshot = tc_set_oneshot, | |
3826 | }, | |
3827 | @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle) | |
3828 | return IRQ_NONE; | |
3829 | } | |
3830 | ||
3831 | -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3832 | +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) | |
3833 | { | |
3834 | + unsigned divisor = atmel_tc_divisors[divisor_idx]; | |
3835 | int ret; | |
3836 | struct clk *t2_clk = tc->clk[2]; | |
3837 | int irq = tc->irq[2]; | |
3838 | @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3839 | clkevt.regs = tc->regs; | |
3840 | clkevt.clk = t2_clk; | |
3841 | ||
3842 | - timer_clock = clk32k_divisor_idx; | |
3843 | + timer_clock = divisor_idx; | |
3844 | + if (!divisor) | |
3845 | + clkevt.freq = 32768; | |
3846 | + else | |
3847 | + clkevt.freq = clk_get_rate(t2_clk) / divisor; | |
3848 | ||
3849 | clkevt.clkevt.cpumask = cpumask_of(0); | |
3850 | ||
3851 | @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3852 | return ret; | |
3853 | } | |
3854 | ||
3855 | - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); | |
3856 | + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); | |
3857 | ||
3858 | return ret; | |
3859 | } | |
3860 | @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void) | |
3861 | goto err_disable_t1; | |
3862 | ||
3863 | /* channel 2: periodic and oneshot timer support */ | |
3864 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
3865 | ret = setup_clkevents(tc, clk32k_divisor_idx); | |
3866 | +#else | |
3867 | + ret = setup_clkevents(tc, best_divisor_idx); | |
3868 | +#endif | |
3869 | if (ret) | |
3870 | goto err_unregister_clksrc; | |
3871 | ||
3872 | diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c | |
3873 | index 6555821bbdae..93288849b2bd 100644 | |
3874 | --- a/drivers/clocksource/timer-atmel-pit.c | |
3875 | +++ b/drivers/clocksource/timer-atmel-pit.c | |
3876 | @@ -46,6 +46,7 @@ struct pit_data { | |
3877 | u32 cycle; | |
3878 | u32 cnt; | |
3879 | unsigned int irq; | |
3880 | + bool irq_requested; | |
3881 | struct clk *mck; | |
3882 | }; | |
3883 | ||
3884 | @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev) | |
3885 | ||
3886 | /* disable irq, leaving the clocksource active */ | |
3887 | pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); | |
3888 | + if (data->irq_requested) { | |
3889 | + free_irq(data->irq, data); | |
3890 | + data->irq_requested = false; | |
3891 | + } | |
3892 | return 0; | |
3893 | } | |
3894 | ||
3895 | +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); | |
3896 | /* | |
3897 | * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) | |
3898 | */ | |
3899 | static int pit_clkevt_set_periodic(struct clock_event_device *dev) | |
3900 | { | |
3901 | struct pit_data *data = clkevt_to_pit_data(dev); | |
3902 | + int ret; | |
3903 | + | |
3904 | + ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
3905 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3906 | + "at91_tick", data); | |
3907 | + if (ret) | |
3908 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
3909 | + | |
3910 | + data->irq_requested = true; | |
3911 | ||
3912 | /* update clocksource counter */ | |
3913 | data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); | |
3914 | @@ -230,15 +245,6 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) | |
3915 | return ret; | |
3916 | } | |
3917 | ||
3918 | - /* Set up irq handler */ | |
3919 | - ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
3920 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3921 | - "at91_tick", data); | |
3922 | - if (ret) { | |
3923 | - pr_err("Unable to setup IRQ\n"); | |
3924 | - return ret; | |
3925 | - } | |
3926 | - | |
3927 | /* Set up and register clockevents */ | |
3928 | data->clkevt.name = "pit"; | |
3929 | data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; | |
3930 | diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c | |
3931 | index e90ab5b63a90..9e124087c55f 100644 | |
3932 | --- a/drivers/clocksource/timer-atmel-st.c | |
3933 | +++ b/drivers/clocksource/timer-atmel-st.c | |
3934 | @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void) | |
3935 | last_crtr = read_CRTR(); | |
3936 | } | |
3937 | ||
3938 | +static int atmel_st_irq; | |
3939 | + | |
3940 | static int clkevt32k_shutdown(struct clock_event_device *evt) | |
3941 | { | |
3942 | clkdev32k_disable_and_flush_irq(); | |
3943 | irqmask = 0; | |
3944 | regmap_write(regmap_st, AT91_ST_IER, irqmask); | |
3945 | + free_irq(atmel_st_irq, regmap_st); | |
3946 | return 0; | |
3947 | } | |
3948 | ||
3949 | static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
3950 | { | |
3951 | + int ret; | |
3952 | + | |
3953 | clkdev32k_disable_and_flush_irq(); | |
3954 | ||
3955 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
3956 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3957 | + "at91_tick", regmap_st); | |
3958 | + if (ret) | |
3959 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
3960 | + | |
3961 | /* | |
3962 | * ALM for oneshot irqs, set by next_event() | |
3963 | * before 32 seconds have passed. | |
3964 | @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
3965 | ||
3966 | static int clkevt32k_set_periodic(struct clock_event_device *dev) | |
3967 | { | |
3968 | + int ret; | |
3969 | + | |
3970 | clkdev32k_disable_and_flush_irq(); | |
3971 | ||
3972 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
3973 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3974 | + "at91_tick", regmap_st); | |
3975 | + if (ret) | |
3976 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
3977 | + | |
3978 | /* PIT for periodic irqs; fixed rate of 1/HZ */ | |
3979 | irqmask = AT91_ST_PITS; | |
3980 | regmap_write(regmap_st, AT91_ST_PIMR, timer_latch); | |
3981 | @@ -198,7 +217,7 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
3982 | { | |
3983 | struct clk *sclk; | |
3984 | unsigned int sclk_rate, val; | |
3985 | - int irq, ret; | |
3986 | + int ret; | |
3987 | ||
3988 | regmap_st = syscon_node_to_regmap(node); | |
3989 | if (IS_ERR(regmap_st)) { | |
3990 | @@ -212,21 +231,12 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
3991 | regmap_read(regmap_st, AT91_ST_SR, &val); | |
3992 | ||
3993 | /* Get the interrupts property */ | |
3994 | - irq = irq_of_parse_and_map(node, 0); | |
3995 | - if (!irq) { | |
3996 | + atmel_st_irq = irq_of_parse_and_map(node, 0); | |
3997 | + if (!atmel_st_irq) { | |
3998 | pr_err("Unable to get IRQ from DT\n"); | |
3999 | return -EINVAL; | |
4000 | } | |
4001 | ||
4002 | - /* Make IRQs happen for the system timer */ | |
4003 | - ret = request_irq(irq, at91rm9200_timer_interrupt, | |
4004 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4005 | - "at91_tick", regmap_st); | |
4006 | - if (ret) { | |
4007 | - pr_err("Unable to setup IRQ\n"); | |
4008 | - return ret; | |
4009 | - } | |
4010 | - | |
4011 | sclk = of_clk_get(node, 0); | |
4012 | if (IS_ERR(sclk)) { | |
4013 | pr_err("Unable to get slow clock\n"); | |
4014 | diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c | |
4015 | index a782ce87715c..19d265948526 100644 | |
4016 | --- a/drivers/connector/cn_proc.c | |
4017 | +++ b/drivers/connector/cn_proc.c | |
4018 | @@ -32,6 +32,7 @@ | |
4019 | #include <linux/pid_namespace.h> | |
4020 | ||
4021 | #include <linux/cn_proc.h> | |
4022 | +#include <linux/locallock.h> | |
4023 | ||
4024 | /* | |
4025 | * Size of a cn_msg followed by a proc_event structure. Since the | |
4026 | @@ -54,10 +55,11 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; | |
4027 | ||
4028 | /* proc_event_counts is used as the sequence number of the netlink message */ | |
4029 | static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; | |
4030 | +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock); | |
4031 | ||
4032 | static inline void send_msg(struct cn_msg *msg) | |
4033 | { | |
4034 | - preempt_disable(); | |
4035 | + local_lock(send_msg_lock); | |
4036 | ||
4037 | msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; | |
4038 | ((struct proc_event *)msg->data)->cpu = smp_processor_id(); | |
4039 | @@ -70,7 +72,7 @@ static inline void send_msg(struct cn_msg *msg) | |
4040 | */ | |
4041 | cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); | |
4042 | ||
4043 | - preempt_enable(); | |
4044 | + local_unlock(send_msg_lock); | |
4045 | } | |
4046 | ||
4047 | void proc_fork_connector(struct task_struct *task) | |
4048 | diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 | |
4049 | index adbd1de1cea5..1fac5074f2cf 100644 | |
4050 | --- a/drivers/cpufreq/Kconfig.x86 | |
4051 | +++ b/drivers/cpufreq/Kconfig.x86 | |
4052 | @@ -124,7 +124,7 @@ config X86_POWERNOW_K7_ACPI | |
4053 | ||
4054 | config X86_POWERNOW_K8 | |
4055 | tristate "AMD Opteron/Athlon64 PowerNow!" | |
4056 | - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ | |
4057 | + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE | |
4058 | help | |
4059 | This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. | |
4060 | Support for K10 and newer processors is now in acpi-cpufreq. | |
4061 | diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
4062 | index a218c2e395e7..5273d8f1d5dd 100644 | |
4063 | --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
4064 | +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
4065 | @@ -1537,7 +1537,9 @@ execbuf_submit(struct i915_execbuffer_params *params, | |
4066 | if (ret) | |
4067 | return ret; | |
4068 | ||
4069 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
4070 | trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); | |
4071 | +#endif | |
4072 | ||
4073 | i915_gem_execbuffer_move_to_active(vmas, params->request); | |
4074 | ||
4075 | diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4076 | index 1c237d02f30b..9e9b4404c0d7 100644 | |
4077 | --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4078 | +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4079 | @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4080 | if (!mutex_is_locked(mutex)) | |
4081 | return false; | |
4082 | ||
4083 | -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) | |
4084 | +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4085 | return mutex->owner == task; | |
4086 | #else | |
4087 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4088 | diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c | |
4089 | index 3fc286cd1157..252a1117b103 100644 | |
4090 | --- a/drivers/gpu/drm/i915/i915_irq.c | |
4091 | +++ b/drivers/gpu/drm/i915/i915_irq.c | |
4092 | @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4093 | spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); | |
4094 | ||
4095 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4096 | + preempt_disable_rt(); | |
4097 | ||
4098 | /* Get optional system timestamp before query. */ | |
4099 | if (stime) | |
4100 | @@ -863,6 +864,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4101 | *etime = ktime_get(); | |
4102 | ||
4103 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4104 | + preempt_enable_rt(); | |
4105 | ||
4106 | spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); | |
4107 | ||
4108 | diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c | |
4109 | index 869b29fe9ec4..c8b8788d9d36 100644 | |
4110 | --- a/drivers/gpu/drm/i915/intel_display.c | |
4111 | +++ b/drivers/gpu/drm/i915/intel_display.c | |
4112 | @@ -12131,7 +12131,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) | |
4113 | struct intel_crtc *intel_crtc = to_intel_crtc(crtc); | |
4114 | struct intel_flip_work *work; | |
4115 | ||
4116 | - WARN_ON(!in_interrupt()); | |
4117 | + WARN_ON_NONRT(!in_interrupt()); | |
4118 | ||
4119 | if (crtc == NULL) | |
4120 | return; | |
4121 | diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c | |
4122 | index dbed12c484c9..5c540b78e8b5 100644 | |
4123 | --- a/drivers/gpu/drm/i915/intel_sprite.c | |
4124 | +++ b/drivers/gpu/drm/i915/intel_sprite.c | |
4125 | @@ -35,6 +35,7 @@ | |
4126 | #include <drm/drm_rect.h> | |
4127 | #include <drm/drm_atomic.h> | |
4128 | #include <drm/drm_plane_helper.h> | |
4129 | +#include <linux/locallock.h> | |
4130 | #include "intel_drv.h" | |
4131 | #include "intel_frontbuffer.h" | |
4132 | #include <drm/i915_drm.h> | |
4133 | @@ -65,6 +66,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, | |
4134 | 1000 * adjusted_mode->crtc_htotal); | |
4135 | } | |
4136 | ||
4137 | +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); | |
4138 | + | |
4139 | /** | |
4140 | * intel_pipe_update_start() - start update of a set of display registers | |
4141 | * @crtc: the crtc of which the registers are going to be updated | |
4142 | @@ -95,7 +98,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) | |
4143 | min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); | |
4144 | max = vblank_start - 1; | |
4145 | ||
4146 | - local_irq_disable(); | |
4147 | + local_lock_irq(pipe_update_lock); | |
4148 | ||
4149 | if (min <= 0 || max <= 0) | |
4150 | return; | |
4151 | @@ -125,11 +128,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) | |
4152 | break; | |
4153 | } | |
4154 | ||
4155 | - local_irq_enable(); | |
4156 | + local_unlock_irq(pipe_update_lock); | |
4157 | ||
4158 | timeout = schedule_timeout(timeout); | |
4159 | ||
4160 | - local_irq_disable(); | |
4161 | + local_lock_irq(pipe_update_lock); | |
4162 | } | |
4163 | ||
4164 | finish_wait(wq, &wait); | |
4165 | @@ -181,7 +184,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work | |
4166 | crtc->base.state->event = NULL; | |
4167 | } | |
4168 | ||
4169 | - local_irq_enable(); | |
4170 | + local_unlock_irq(pipe_update_lock); | |
4171 | ||
4172 | if (crtc->debug.start_vbl_count && | |
4173 | crtc->debug.start_vbl_count != end_vbl_count) { | |
4174 | diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4175 | index 192b2d3a79cb..d5372a207326 100644 | |
4176 | --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4177 | +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4178 | @@ -23,7 +23,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4179 | if (!mutex_is_locked(mutex)) | |
4180 | return false; | |
4181 | ||
4182 | -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) | |
4183 | +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4184 | return mutex->owner == task; | |
4185 | #else | |
4186 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4187 | diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c | |
4188 | index cdb8cb568c15..b6d7fd964cbc 100644 | |
4189 | --- a/drivers/gpu/drm/radeon/radeon_display.c | |
4190 | +++ b/drivers/gpu/drm/radeon/radeon_display.c | |
4191 | @@ -1845,6 +1845,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4192 | struct radeon_device *rdev = dev->dev_private; | |
4193 | ||
4194 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4195 | + preempt_disable_rt(); | |
4196 | ||
4197 | /* Get optional system timestamp before query. */ | |
4198 | if (stime) | |
4199 | @@ -1937,6 +1938,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4200 | *etime = ktime_get(); | |
4201 | ||
4202 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4203 | + preempt_enable_rt(); | |
4204 | ||
4205 | /* Decode into vertical and horizontal scanout position. */ | |
4206 | *vpos = position & 0x1fff; | |
4207 | diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c | |
4208 | index 0276d2ef06ee..8868045eabde 100644 | |
4209 | --- a/drivers/hv/vmbus_drv.c | |
4210 | +++ b/drivers/hv/vmbus_drv.c | |
4211 | @@ -761,6 +761,8 @@ static void vmbus_isr(void) | |
4212 | void *page_addr; | |
4213 | struct hv_message *msg; | |
4214 | union hv_synic_event_flags *event; | |
4215 | + struct pt_regs *regs = get_irq_regs(); | |
4216 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
4217 | bool handled = false; | |
4218 | ||
4219 | page_addr = hv_context.synic_event_page[cpu]; | |
4220 | @@ -808,7 +810,7 @@ static void vmbus_isr(void) | |
4221 | tasklet_schedule(hv_context.msg_dpc[cpu]); | |
4222 | } | |
4223 | ||
4224 | - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); | |
4225 | + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); | |
4226 | } | |
4227 | ||
4228 | ||
4229 | diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c | |
4230 | index 36f76e28a0bf..394f142f90c7 100644 | |
4231 | --- a/drivers/ide/alim15x3.c | |
4232 | +++ b/drivers/ide/alim15x3.c | |
4233 | @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4234 | ||
4235 | isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); | |
4236 | ||
4237 | - local_irq_save(flags); | |
4238 | + local_irq_save_nort(flags); | |
4239 | ||
4240 | if (m5229_revision < 0xC2) { | |
4241 | /* | |
4242 | @@ -325,7 +325,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4243 | } | |
4244 | pci_dev_put(north); | |
4245 | pci_dev_put(isa_dev); | |
4246 | - local_irq_restore(flags); | |
4247 | + local_irq_restore_nort(flags); | |
4248 | return 0; | |
4249 | } | |
4250 | ||
4251 | diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c | |
4252 | index 0ceae5cbd89a..c212e85d7f3e 100644 | |
4253 | --- a/drivers/ide/hpt366.c | |
4254 | +++ b/drivers/ide/hpt366.c | |
4255 | @@ -1236,7 +1236,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4256 | ||
4257 | dma_old = inb(base + 2); | |
4258 | ||
4259 | - local_irq_save(flags); | |
4260 | + local_irq_save_nort(flags); | |
4261 | ||
4262 | dma_new = dma_old; | |
4263 | pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); | |
4264 | @@ -1247,7 +1247,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4265 | if (dma_new != dma_old) | |
4266 | outb(dma_new, base + 2); | |
4267 | ||
4268 | - local_irq_restore(flags); | |
4269 | + local_irq_restore_nort(flags); | |
4270 | ||
4271 | printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", | |
4272 | hwif->name, base, base + 7); | |
4273 | diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c | |
4274 | index 19763977568c..4169433faab5 100644 | |
4275 | --- a/drivers/ide/ide-io-std.c | |
4276 | +++ b/drivers/ide/ide-io-std.c | |
4277 | @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4278 | unsigned long uninitialized_var(flags); | |
4279 | ||
4280 | if ((io_32bit & 2) && !mmio) { | |
4281 | - local_irq_save(flags); | |
4282 | + local_irq_save_nort(flags); | |
4283 | ata_vlb_sync(io_ports->nsect_addr); | |
4284 | } | |
4285 | ||
4286 | @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4287 | insl(data_addr, buf, words); | |
4288 | ||
4289 | if ((io_32bit & 2) && !mmio) | |
4290 | - local_irq_restore(flags); | |
4291 | + local_irq_restore_nort(flags); | |
4292 | ||
4293 | if (((len + 1) & 3) < 2) | |
4294 | return; | |
4295 | @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4296 | unsigned long uninitialized_var(flags); | |
4297 | ||
4298 | if ((io_32bit & 2) && !mmio) { | |
4299 | - local_irq_save(flags); | |
4300 | + local_irq_save_nort(flags); | |
4301 | ata_vlb_sync(io_ports->nsect_addr); | |
4302 | } | |
4303 | ||
4304 | @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4305 | outsl(data_addr, buf, words); | |
4306 | ||
4307 | if ((io_32bit & 2) && !mmio) | |
4308 | - local_irq_restore(flags); | |
4309 | + local_irq_restore_nort(flags); | |
4310 | ||
4311 | if (((len + 1) & 3) < 2) | |
4312 | return; | |
4313 | diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c | |
4314 | index 669ea1e45795..e12e43e62245 100644 | |
4315 | --- a/drivers/ide/ide-io.c | |
4316 | +++ b/drivers/ide/ide-io.c | |
4317 | @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data) | |
4318 | /* disable_irq_nosync ?? */ | |
4319 | disable_irq(hwif->irq); | |
4320 | /* local CPU only, as if we were handling an interrupt */ | |
4321 | - local_irq_disable(); | |
4322 | + local_irq_disable_nort(); | |
4323 | if (hwif->polling) { | |
4324 | startstop = handler(drive); | |
4325 | } else if (drive_is_ready(drive)) { | |
4326 | diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c | |
4327 | index 376f2dc410c5..f014dd1b73dc 100644 | |
4328 | --- a/drivers/ide/ide-iops.c | |
4329 | +++ b/drivers/ide/ide-iops.c | |
4330 | @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, | |
4331 | if ((stat & ATA_BUSY) == 0) | |
4332 | break; | |
4333 | ||
4334 | - local_irq_restore(flags); | |
4335 | + local_irq_restore_nort(flags); | |
4336 | *rstat = stat; | |
4337 | return -EBUSY; | |
4338 | } | |
4339 | } | |
4340 | - local_irq_restore(flags); | |
4341 | + local_irq_restore_nort(flags); | |
4342 | } | |
4343 | /* | |
4344 | * Allow status to settle, then read it again. | |
4345 | diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c | |
4346 | index 0b63facd1d87..4ceba37afc0c 100644 | |
4347 | --- a/drivers/ide/ide-probe.c | |
4348 | +++ b/drivers/ide/ide-probe.c | |
4349 | @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) | |
4350 | int bswap = 1; | |
4351 | ||
4352 | /* local CPU only; some systems need this */ | |
4353 | - local_irq_save(flags); | |
4354 | + local_irq_save_nort(flags); | |
4355 | /* read 512 bytes of id info */ | |
4356 | hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); | |
4357 | - local_irq_restore(flags); | |
4358 | + local_irq_restore_nort(flags); | |
4359 | ||
4360 | drive->dev_flags |= IDE_DFLAG_ID_READ; | |
4361 | #ifdef DEBUG | |
4362 | diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c | |
4363 | index a716693417a3..be0568c722d6 100644 | |
4364 | --- a/drivers/ide/ide-taskfile.c | |
4365 | +++ b/drivers/ide/ide-taskfile.c | |
4366 | @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4367 | ||
4368 | page_is_high = PageHighMem(page); | |
4369 | if (page_is_high) | |
4370 | - local_irq_save(flags); | |
4371 | + local_irq_save_nort(flags); | |
4372 | ||
4373 | buf = kmap_atomic(page) + offset; | |
4374 | ||
4375 | @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4376 | kunmap_atomic(buf); | |
4377 | ||
4378 | if (page_is_high) | |
4379 | - local_irq_restore(flags); | |
4380 | + local_irq_restore_nort(flags); | |
4381 | ||
4382 | len -= nr_bytes; | |
4383 | } | |
4384 | @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, | |
4385 | } | |
4386 | ||
4387 | if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) | |
4388 | - local_irq_disable(); | |
4389 | + local_irq_disable_nort(); | |
4390 | ||
4391 | ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); | |
4392 | ||
4393 | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
4394 | index fddff403d5d2..cca1bb4fbfe3 100644 | |
4395 | --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
4396 | +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
4397 | @@ -902,7 +902,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) | |
4398 | ||
4399 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
4400 | ||
4401 | - local_irq_save(flags); | |
4402 | + local_irq_save_nort(flags); | |
4403 | netif_addr_lock(dev); | |
4404 | spin_lock(&priv->lock); | |
4405 | ||
4406 | @@ -984,7 +984,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) | |
4407 | ||
4408 | spin_unlock(&priv->lock); | |
4409 | netif_addr_unlock(dev); | |
4410 | - local_irq_restore(flags); | |
4411 | + local_irq_restore_nort(flags); | |
4412 | ||
4413 | /* | |
4414 | * make sure the in-flight joins have finished before we attempt | |
4415 | diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c | |
4416 | index 4a2a9e370be7..e970d9afd179 100644 | |
4417 | --- a/drivers/input/gameport/gameport.c | |
4418 | +++ b/drivers/input/gameport/gameport.c | |
4419 | @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport) | |
4420 | tx = ~0; | |
4421 | ||
4422 | for (i = 0; i < 50; i++) { | |
4423 | - local_irq_save(flags); | |
4424 | + local_irq_save_nort(flags); | |
4425 | t1 = ktime_get_ns(); | |
4426 | for (t = 0; t < 50; t++) | |
4427 | gameport_read(gameport); | |
4428 | t2 = ktime_get_ns(); | |
4429 | t3 = ktime_get_ns(); | |
4430 | - local_irq_restore(flags); | |
4431 | + local_irq_restore_nort(flags); | |
4432 | udelay(i * 10); | |
4433 | t = (t2 - t1) - (t3 - t2); | |
4434 | if (t < tx) | |
4435 | @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4436 | tx = 1 << 30; | |
4437 | ||
4438 | for(i = 0; i < 50; i++) { | |
4439 | - local_irq_save(flags); | |
4440 | + local_irq_save_nort(flags); | |
4441 | GET_TIME(t1); | |
4442 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4443 | GET_TIME(t2); | |
4444 | GET_TIME(t3); | |
4445 | - local_irq_restore(flags); | |
4446 | + local_irq_restore_nort(flags); | |
4447 | udelay(i * 10); | |
4448 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; | |
4449 | } | |
4450 | @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4451 | tx = 1 << 30; | |
4452 | ||
4453 | for(i = 0; i < 50; i++) { | |
4454 | - local_irq_save(flags); | |
4455 | + local_irq_save_nort(flags); | |
4456 | t1 = rdtsc(); | |
4457 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4458 | t2 = rdtsc(); | |
4459 | - local_irq_restore(flags); | |
4460 | + local_irq_restore_nort(flags); | |
4461 | udelay(i * 10); | |
4462 | if (t2 - t1 < tx) tx = t2 - t1; | |
4463 | } | |
4464 | diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c | |
4465 | index 11a13b5be73a..baaed0ac274b 100644 | |
4466 | --- a/drivers/iommu/amd_iommu.c | |
4467 | +++ b/drivers/iommu/amd_iommu.c | |
4468 | @@ -1923,10 +1923,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, | |
4469 | int ret; | |
4470 | ||
4471 | /* | |
4472 | - * Must be called with IRQs disabled. Warn here to detect early | |
4473 | - * when its not. | |
4474 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4475 | + * detect early when its not. | |
4476 | */ | |
4477 | - WARN_ON(!irqs_disabled()); | |
4478 | + WARN_ON_NONRT(!irqs_disabled()); | |
4479 | ||
4480 | /* lock domain */ | |
4481 | spin_lock(&domain->lock); | |
4482 | @@ -2094,10 +2094,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) | |
4483 | struct protection_domain *domain; | |
4484 | ||
4485 | /* | |
4486 | - * Must be called with IRQs disabled. Warn here to detect early | |
4487 | - * when its not. | |
4488 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4489 | + * detect early when its not. | |
4490 | */ | |
4491 | - WARN_ON(!irqs_disabled()); | |
4492 | + WARN_ON_NONRT(!irqs_disabled()); | |
4493 | ||
4494 | if (WARN_ON(!dev_data->domain)) | |
4495 | return; | |
4496 | diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c | |
4497 | index d82637ab09fd..ebe41d30c093 100644 | |
4498 | --- a/drivers/iommu/intel-iommu.c | |
4499 | +++ b/drivers/iommu/intel-iommu.c | |
4500 | @@ -479,7 +479,7 @@ struct deferred_flush_data { | |
4501 | struct deferred_flush_table *tables; | |
4502 | }; | |
4503 | ||
4504 | -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4505 | +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4506 | ||
4507 | /* bitmap for indexing intel_iommus */ | |
4508 | static int g_num_of_iommus; | |
4509 | @@ -3715,10 +3715,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, | |
4510 | struct intel_iommu *iommu; | |
4511 | struct deferred_flush_entry *entry; | |
4512 | struct deferred_flush_data *flush_data; | |
4513 | - unsigned int cpuid; | |
4514 | ||
4515 | - cpuid = get_cpu(); | |
4516 | - flush_data = per_cpu_ptr(&deferred_flush, cpuid); | |
4517 | + flush_data = raw_cpu_ptr(&deferred_flush); | |
4518 | ||
4519 | /* Flush all CPUs' entries to avoid deferring too much. If | |
4520 | * this becomes a bottleneck, can just flush us, and rely on | |
4521 | @@ -3751,8 +3749,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, | |
4522 | } | |
4523 | flush_data->size++; | |
4524 | spin_unlock_irqrestore(&flush_data->lock, flags); | |
4525 | - | |
4526 | - put_cpu(); | |
4527 | } | |
4528 | ||
4529 | static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) | |
4530 | diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c | |
4531 | index e23001bfcfee..359d5d169ec0 100644 | |
4532 | --- a/drivers/iommu/iova.c | |
4533 | +++ b/drivers/iommu/iova.c | |
4534 | @@ -22,6 +22,7 @@ | |
4535 | #include <linux/slab.h> | |
4536 | #include <linux/smp.h> | |
4537 | #include <linux/bitops.h> | |
4538 | +#include <linux/cpu.h> | |
4539 | ||
4540 | static bool iova_rcache_insert(struct iova_domain *iovad, | |
4541 | unsigned long pfn, | |
4542 | @@ -420,10 +421,8 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, | |
4543 | ||
4544 | /* Try replenishing IOVAs by flushing rcache. */ | |
4545 | flushed_rcache = true; | |
4546 | - preempt_disable(); | |
4547 | for_each_online_cpu(cpu) | |
4548 | free_cpu_cached_iovas(cpu, iovad); | |
4549 | - preempt_enable(); | |
4550 | goto retry; | |
4551 | } | |
4552 | ||
4553 | @@ -751,7 +750,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4554 | bool can_insert = false; | |
4555 | unsigned long flags; | |
4556 | ||
4557 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4558 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4559 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4560 | ||
4561 | if (!iova_magazine_full(cpu_rcache->loaded)) { | |
4562 | @@ -781,7 +780,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4563 | iova_magazine_push(cpu_rcache->loaded, iova_pfn); | |
4564 | ||
4565 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4566 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4567 | ||
4568 | if (mag_to_free) { | |
4569 | iova_magazine_free_pfns(mag_to_free, iovad); | |
4570 | @@ -815,7 +813,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4571 | bool has_pfn = false; | |
4572 | unsigned long flags; | |
4573 | ||
4574 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4575 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4576 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4577 | ||
4578 | if (!iova_magazine_empty(cpu_rcache->loaded)) { | |
4579 | @@ -837,7 +835,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4580 | iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); | |
4581 | ||
4582 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4583 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4584 | ||
4585 | return iova_pfn; | |
4586 | } | |
4587 | diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig | |
4588 | index 3f9ddb9fafa7..09da5b6b44a1 100644 | |
4589 | --- a/drivers/leds/trigger/Kconfig | |
4590 | +++ b/drivers/leds/trigger/Kconfig | |
4591 | @@ -69,7 +69,7 @@ config LEDS_TRIGGER_BACKLIGHT | |
4592 | ||
4593 | config LEDS_TRIGGER_CPU | |
4594 | bool "LED CPU Trigger" | |
4595 | - depends on LEDS_TRIGGERS | |
4596 | + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE | |
4597 | help | |
4598 | This allows LEDs to be controlled by active CPUs. This shows | |
4599 | the active CPUs across an array of LEDs so you can see which | |
4600 | diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig | |
4601 | index 4d200883c505..98b64ed5cb81 100644 | |
4602 | --- a/drivers/md/bcache/Kconfig | |
4603 | +++ b/drivers/md/bcache/Kconfig | |
4604 | @@ -1,6 +1,7 @@ | |
4605 | ||
4606 | config BCACHE | |
4607 | tristate "Block device as cache" | |
4608 | + depends on !PREEMPT_RT_FULL | |
4609 | ---help--- | |
4610 | Allows a block device to be used as cache for other devices; uses | |
4611 | a btree for indexing and the layout is optimized for SSDs. | |
4612 | diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c | |
4613 | index 31a89c8832c0..c3a7e8a9f761 100644 | |
4614 | --- a/drivers/md/dm-rq.c | |
4615 | +++ b/drivers/md/dm-rq.c | |
4616 | @@ -838,7 +838,7 @@ static void dm_old_request_fn(struct request_queue *q) | |
4617 | /* Establish tio->ti before queuing work (map_tio_request) */ | |
4618 | tio->ti = ti; | |
4619 | kthread_queue_work(&md->kworker, &tio->work); | |
4620 | - BUG_ON(!irqs_disabled()); | |
4621 | + BUG_ON_NONRT(!irqs_disabled()); | |
4622 | } | |
4623 | } | |
4624 | ||
4625 | diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c | |
4626 | index cce6057b9aca..fa2c4de32a64 100644 | |
4627 | --- a/drivers/md/raid5.c | |
4628 | +++ b/drivers/md/raid5.c | |
4629 | @@ -1928,8 +1928,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4630 | struct raid5_percpu *percpu; | |
4631 | unsigned long cpu; | |
4632 | ||
4633 | - cpu = get_cpu(); | |
4634 | + cpu = get_cpu_light(); | |
4635 | percpu = per_cpu_ptr(conf->percpu, cpu); | |
4636 | + spin_lock(&percpu->lock); | |
4637 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | |
4638 | ops_run_biofill(sh); | |
4639 | overlap_clear++; | |
4640 | @@ -1985,7 +1986,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4641 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | |
4642 | wake_up(&sh->raid_conf->wait_for_overlap); | |
4643 | } | |
4644 | - put_cpu(); | |
4645 | + spin_unlock(&percpu->lock); | |
4646 | + put_cpu_light(); | |
4647 | } | |
4648 | ||
4649 | static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, | |
4650 | @@ -6391,6 +6393,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) | |
4651 | __func__, cpu); | |
4652 | return -ENOMEM; | |
4653 | } | |
4654 | + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); | |
4655 | return 0; | |
4656 | } | |
4657 | ||
4658 | @@ -6401,7 +6404,6 @@ static int raid5_alloc_percpu(struct r5conf *conf) | |
4659 | conf->percpu = alloc_percpu(struct raid5_percpu); | |
4660 | if (!conf->percpu) | |
4661 | return -ENOMEM; | |
4662 | - | |
4663 | err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); | |
4664 | if (!err) { | |
4665 | conf->scribble_disks = max(conf->raid_disks, | |
4666 | diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h | |
4667 | index 57ec49f0839e..0739604990b7 100644 | |
4668 | --- a/drivers/md/raid5.h | |
4669 | +++ b/drivers/md/raid5.h | |
4670 | @@ -504,6 +504,7 @@ struct r5conf { | |
4671 | int recovery_disabled; | |
4672 | /* per cpu variables */ | |
4673 | struct raid5_percpu { | |
4674 | + spinlock_t lock; /* Protection for -RT */ | |
4675 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | |
4676 | struct flex_array *scribble; /* space for constructing buffer | |
4677 | * lists and performing address | |
4678 | diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig | |
4679 | index 64971baf11fa..215e91e36198 100644 | |
4680 | --- a/drivers/misc/Kconfig | |
4681 | +++ b/drivers/misc/Kconfig | |
4682 | @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI | |
4683 | config ATMEL_TCLIB | |
4684 | bool "Atmel AT32/AT91 Timer/Counter Library" | |
4685 | depends on (AVR32 || ARCH_AT91) | |
4686 | + default y if PREEMPT_RT_FULL | |
4687 | help | |
4688 | Select this if you want a library to allocate the Timer/Counter | |
4689 | blocks found on many Atmel processors. This facilitates using | |
4690 | @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC | |
4691 | are combined to make a single 32-bit timer. | |
4692 | ||
4693 | When GENERIC_CLOCKEVENTS is defined, the third timer channel | |
4694 | - may be used as a clock event device supporting oneshot mode | |
4695 | - (delays of up to two seconds) based on the 32 KiHz clock. | |
4696 | + may be used as a clock event device supporting oneshot mode. | |
4697 | ||
4698 | config ATMEL_TCB_CLKSRC_BLOCK | |
4699 | int | |
4700 | @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK | |
4701 | TC can be used for other purposes, such as PWM generation and | |
4702 | interval timing. | |
4703 | ||
4704 | +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
4705 | + bool "TC Block use 32 KiHz clock" | |
4706 | + depends on ATMEL_TCB_CLKSRC | |
4707 | + default y if !PREEMPT_RT_FULL | |
4708 | + help | |
4709 | + Select this to use 32 KiHz base clock rate as TC block clock | |
4710 | + source for clock events. | |
4711 | + | |
4712 | + | |
4713 | config DUMMY_IRQ | |
4714 | tristate "Dummy IRQ handler" | |
4715 | default n | |
4716 | diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c | |
4717 | index df990bb8c873..1a162709a85e 100644 | |
4718 | --- a/drivers/mmc/host/mmci.c | |
4719 | +++ b/drivers/mmc/host/mmci.c | |
4720 | @@ -1147,15 +1147,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
4721 | struct sg_mapping_iter *sg_miter = &host->sg_miter; | |
4722 | struct variant_data *variant = host->variant; | |
4723 | void __iomem *base = host->base; | |
4724 | - unsigned long flags; | |
4725 | u32 status; | |
4726 | ||
4727 | status = readl(base + MMCISTATUS); | |
4728 | ||
4729 | dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); | |
4730 | ||
4731 | - local_irq_save(flags); | |
4732 | - | |
4733 | do { | |
4734 | unsigned int remain, len; | |
4735 | char *buffer; | |
4736 | @@ -1195,8 +1192,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
4737 | ||
4738 | sg_miter_stop(sg_miter); | |
4739 | ||
4740 | - local_irq_restore(flags); | |
4741 | - | |
4742 | /* | |
4743 | * If we have less than the fifo 'half-full' threshold to transfer, | |
4744 | * trigger a PIO interrupt as soon as any data is available. | |
4745 | diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c | |
4746 | index 9133e7926da5..63afb921ed40 100644 | |
4747 | --- a/drivers/net/ethernet/3com/3c59x.c | |
4748 | +++ b/drivers/net/ethernet/3com/3c59x.c | |
4749 | @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) | |
4750 | { | |
4751 | struct vortex_private *vp = netdev_priv(dev); | |
4752 | unsigned long flags; | |
4753 | - local_irq_save(flags); | |
4754 | + local_irq_save_nort(flags); | |
4755 | (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); | |
4756 | - local_irq_restore(flags); | |
4757 | + local_irq_restore_nort(flags); | |
4758 | } | |
4759 | #endif | |
4760 | ||
4761 | @@ -1910,12 +1910,12 @@ static void vortex_tx_timeout(struct net_device *dev) | |
4762 | * Block interrupts because vortex_interrupt does a bare spin_lock() | |
4763 | */ | |
4764 | unsigned long flags; | |
4765 | - local_irq_save(flags); | |
4766 | + local_irq_save_nort(flags); | |
4767 | if (vp->full_bus_master_tx) | |
4768 | boomerang_interrupt(dev->irq, dev); | |
4769 | else | |
4770 | vortex_interrupt(dev->irq, dev); | |
4771 | - local_irq_restore(flags); | |
4772 | + local_irq_restore_nort(flags); | |
4773 | } | |
4774 | } | |
4775 | ||
4776 | diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c | |
4777 | index da4c2d8a4173..1420dfb56bac 100644 | |
4778 | --- a/drivers/net/ethernet/realtek/8139too.c | |
4779 | +++ b/drivers/net/ethernet/realtek/8139too.c | |
4780 | @@ -2233,7 +2233,7 @@ static void rtl8139_poll_controller(struct net_device *dev) | |
4781 | struct rtl8139_private *tp = netdev_priv(dev); | |
4782 | const int irq = tp->pci_dev->irq; | |
4783 | ||
4784 | - disable_irq(irq); | |
4785 | + disable_irq_nosync(irq); | |
4786 | rtl8139_interrupt(irq, dev); | |
4787 | enable_irq(irq); | |
4788 | } | |
4789 | diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
4790 | index bca6935a94db..d7a35ee34d03 100644 | |
4791 | --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
4792 | +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
4793 | @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, | |
4794 | while (!ctx->done.done && msecs--) | |
4795 | udelay(1000); | |
4796 | } else { | |
4797 | - wait_event_interruptible(ctx->done.wait, | |
4798 | + swait_event_interruptible(ctx->done.wait, | |
4799 | ctx->done.done); | |
4800 | } | |
4801 | break; | |
4802 | diff --git a/drivers/pci/access.c b/drivers/pci/access.c | |
4803 | index d11cdbb8fba3..223bbb9acb03 100644 | |
4804 | --- a/drivers/pci/access.c | |
4805 | +++ b/drivers/pci/access.c | |
4806 | @@ -672,7 +672,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev) | |
4807 | WARN_ON(!dev->block_cfg_access); | |
4808 | ||
4809 | dev->block_cfg_access = 0; | |
4810 | - wake_up_all(&pci_cfg_wait); | |
4811 | + wake_up_all_locked(&pci_cfg_wait); | |
4812 | raw_spin_unlock_irqrestore(&pci_lock, flags); | |
4813 | } | |
4814 | EXPORT_SYMBOL_GPL(pci_cfg_access_unlock); | |
4815 | diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c | |
4816 | index 775c88303017..f8e9e1c2b2f6 100644 | |
4817 | --- a/drivers/pinctrl/qcom/pinctrl-msm.c | |
4818 | +++ b/drivers/pinctrl/qcom/pinctrl-msm.c | |
4819 | @@ -61,7 +61,7 @@ struct msm_pinctrl { | |
4820 | struct notifier_block restart_nb; | |
4821 | int irq; | |
4822 | ||
4823 | - spinlock_t lock; | |
4824 | + raw_spinlock_t lock; | |
4825 | ||
4826 | DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO); | |
4827 | DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO); | |
4828 | @@ -153,14 +153,14 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, | |
4829 | if (WARN_ON(i == g->nfuncs)) | |
4830 | return -EINVAL; | |
4831 | ||
4832 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4833 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4834 | ||
4835 | val = readl(pctrl->regs + g->ctl_reg); | |
4836 | val &= ~mask; | |
4837 | val |= i << g->mux_bit; | |
4838 | writel(val, pctrl->regs + g->ctl_reg); | |
4839 | ||
4840 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4841 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4842 | ||
4843 | return 0; | |
4844 | } | |
4845 | @@ -323,14 +323,14 @@ static int msm_config_group_set(struct pinctrl_dev *pctldev, | |
4846 | break; | |
4847 | case PIN_CONFIG_OUTPUT: | |
4848 | /* set output value */ | |
4849 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4850 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4851 | val = readl(pctrl->regs + g->io_reg); | |
4852 | if (arg) | |
4853 | val |= BIT(g->out_bit); | |
4854 | else | |
4855 | val &= ~BIT(g->out_bit); | |
4856 | writel(val, pctrl->regs + g->io_reg); | |
4857 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4858 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4859 | ||
4860 | /* enable output */ | |
4861 | arg = 1; | |
4862 | @@ -351,12 +351,12 @@ static int msm_config_group_set(struct pinctrl_dev *pctldev, | |
4863 | return -EINVAL; | |
4864 | } | |
4865 | ||
4866 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4867 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4868 | val = readl(pctrl->regs + g->ctl_reg); | |
4869 | val &= ~(mask << bit); | |
4870 | val |= arg << bit; | |
4871 | writel(val, pctrl->regs + g->ctl_reg); | |
4872 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4873 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4874 | } | |
4875 | ||
4876 | return 0; | |
4877 | @@ -384,13 +384,13 @@ static int msm_gpio_direction_input(struct gpio_chip *chip, unsigned offset) | |
4878 | ||
4879 | g = &pctrl->soc->groups[offset]; | |
4880 | ||
4881 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4882 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4883 | ||
4884 | val = readl(pctrl->regs + g->ctl_reg); | |
4885 | val &= ~BIT(g->oe_bit); | |
4886 | writel(val, pctrl->regs + g->ctl_reg); | |
4887 | ||
4888 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4889 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4890 | ||
4891 | return 0; | |
4892 | } | |
4893 | @@ -404,7 +404,7 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in | |
4894 | ||
4895 | g = &pctrl->soc->groups[offset]; | |
4896 | ||
4897 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4898 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4899 | ||
4900 | val = readl(pctrl->regs + g->io_reg); | |
4901 | if (value) | |
4902 | @@ -417,7 +417,7 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in | |
4903 | val |= BIT(g->oe_bit); | |
4904 | writel(val, pctrl->regs + g->ctl_reg); | |
4905 | ||
4906 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4907 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4908 | ||
4909 | return 0; | |
4910 | } | |
4911 | @@ -443,7 +443,7 @@ static void msm_gpio_set(struct gpio_chip *chip, unsigned offset, int value) | |
4912 | ||
4913 | g = &pctrl->soc->groups[offset]; | |
4914 | ||
4915 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4916 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4917 | ||
4918 | val = readl(pctrl->regs + g->io_reg); | |
4919 | if (value) | |
4920 | @@ -452,7 +452,7 @@ static void msm_gpio_set(struct gpio_chip *chip, unsigned offset, int value) | |
4921 | val &= ~BIT(g->out_bit); | |
4922 | writel(val, pctrl->regs + g->io_reg); | |
4923 | ||
4924 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4925 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4926 | } | |
4927 | ||
4928 | #ifdef CONFIG_DEBUG_FS | |
4929 | @@ -571,7 +571,7 @@ static void msm_gpio_irq_mask(struct irq_data *d) | |
4930 | ||
4931 | g = &pctrl->soc->groups[d->hwirq]; | |
4932 | ||
4933 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4934 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4935 | ||
4936 | val = readl(pctrl->regs + g->intr_cfg_reg); | |
4937 | val &= ~BIT(g->intr_enable_bit); | |
4938 | @@ -579,7 +579,7 @@ static void msm_gpio_irq_mask(struct irq_data *d) | |
4939 | ||
4940 | clear_bit(d->hwirq, pctrl->enabled_irqs); | |
4941 | ||
4942 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4943 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4944 | } | |
4945 | ||
4946 | static void msm_gpio_irq_unmask(struct irq_data *d) | |
4947 | @@ -592,7 +592,7 @@ static void msm_gpio_irq_unmask(struct irq_data *d) | |
4948 | ||
4949 | g = &pctrl->soc->groups[d->hwirq]; | |
4950 | ||
4951 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4952 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4953 | ||
4954 | val = readl(pctrl->regs + g->intr_status_reg); | |
4955 | val &= ~BIT(g->intr_status_bit); | |
4956 | @@ -604,7 +604,7 @@ static void msm_gpio_irq_unmask(struct irq_data *d) | |
4957 | ||
4958 | set_bit(d->hwirq, pctrl->enabled_irqs); | |
4959 | ||
4960 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4961 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4962 | } | |
4963 | ||
4964 | static void msm_gpio_irq_ack(struct irq_data *d) | |
4965 | @@ -617,7 +617,7 @@ static void msm_gpio_irq_ack(struct irq_data *d) | |
4966 | ||
4967 | g = &pctrl->soc->groups[d->hwirq]; | |
4968 | ||
4969 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4970 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4971 | ||
4972 | val = readl(pctrl->regs + g->intr_status_reg); | |
4973 | if (g->intr_ack_high) | |
4974 | @@ -629,7 +629,7 @@ static void msm_gpio_irq_ack(struct irq_data *d) | |
4975 | if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) | |
4976 | msm_gpio_update_dual_edge_pos(pctrl, g, d); | |
4977 | ||
4978 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4979 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4980 | } | |
4981 | ||
4982 | static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) | |
4983 | @@ -642,7 +642,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) | |
4984 | ||
4985 | g = &pctrl->soc->groups[d->hwirq]; | |
4986 | ||
4987 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4988 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4989 | ||
4990 | /* | |
4991 | * For hw without possibility of detecting both edges | |
4992 | @@ -716,7 +716,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) | |
4993 | if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) | |
4994 | msm_gpio_update_dual_edge_pos(pctrl, g, d); | |
4995 | ||
4996 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4997 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4998 | ||
4999 | if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) | |
5000 | irq_set_handler_locked(d, handle_level_irq); | |
5001 | @@ -732,11 +732,11 @@ static int msm_gpio_irq_set_wake(struct irq_data *d, unsigned int on) | |
5002 | struct msm_pinctrl *pctrl = gpiochip_get_data(gc); | |
5003 | unsigned long flags; | |
5004 | ||
5005 | - spin_lock_irqsave(&pctrl->lock, flags); | |
5006 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
5007 | ||
5008 | irq_set_irq_wake(pctrl->irq, on); | |
5009 | ||
5010 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
5011 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
5012 | ||
5013 | return 0; | |
5014 | } | |
5015 | @@ -882,7 +882,7 @@ int msm_pinctrl_probe(struct platform_device *pdev, | |
5016 | pctrl->soc = soc_data; | |
5017 | pctrl->chip = msm_gpio_template; | |
5018 | ||
5019 | - spin_lock_init(&pctrl->lock); | |
5020 | + raw_spin_lock_init(&pctrl->lock); | |
5021 | ||
5022 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | |
5023 | pctrl->regs = devm_ioremap_resource(&pdev->dev, res); | |
5024 | diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c | |
5025 | index 9bd41a35a78a..8e2d436c2e3f 100644 | |
5026 | --- a/drivers/scsi/fcoe/fcoe.c | |
5027 | +++ b/drivers/scsi/fcoe/fcoe.c | |
5028 | @@ -1455,11 +1455,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, | |
5029 | static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) | |
5030 | { | |
5031 | struct fcoe_percpu_s *fps; | |
5032 | - int rc; | |
5033 | + int rc, cpu = get_cpu_light(); | |
5034 | ||
5035 | - fps = &get_cpu_var(fcoe_percpu); | |
5036 | + fps = &per_cpu(fcoe_percpu, cpu); | |
5037 | rc = fcoe_get_paged_crc_eof(skb, tlen, fps); | |
5038 | - put_cpu_var(fcoe_percpu); | |
5039 | + put_cpu_light(); | |
5040 | ||
5041 | return rc; | |
5042 | } | |
5043 | @@ -1646,11 +1646,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, | |
5044 | return 0; | |
5045 | } | |
5046 | ||
5047 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
5048 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
5049 | stats->InvalidCRCCount++; | |
5050 | if (stats->InvalidCRCCount < 5) | |
5051 | printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); | |
5052 | - put_cpu(); | |
5053 | + put_cpu_light(); | |
5054 | return -EINVAL; | |
5055 | } | |
5056 | ||
5057 | @@ -1693,7 +1693,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
5058 | */ | |
5059 | hp = (struct fcoe_hdr *) skb_network_header(skb); | |
5060 | ||
5061 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
5062 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
5063 | if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { | |
5064 | if (stats->ErrorFrames < 5) | |
5065 | printk(KERN_WARNING "fcoe: FCoE version " | |
5066 | @@ -1725,13 +1725,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
5067 | goto drop; | |
5068 | ||
5069 | if (!fcoe_filter_frames(lport, fp)) { | |
5070 | - put_cpu(); | |
5071 | + put_cpu_light(); | |
5072 | fc_exch_recv(lport, fp); | |
5073 | return; | |
5074 | } | |
5075 | drop: | |
5076 | stats->ErrorFrames++; | |
5077 | - put_cpu(); | |
5078 | + put_cpu_light(); | |
5079 | kfree_skb(skb); | |
5080 | } | |
5081 | ||
5082 | diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c | |
5083 | index dcf36537a767..1a1f2e46452c 100644 | |
5084 | --- a/drivers/scsi/fcoe/fcoe_ctlr.c | |
5085 | +++ b/drivers/scsi/fcoe/fcoe_ctlr.c | |
5086 | @@ -834,7 +834,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
5087 | ||
5088 | INIT_LIST_HEAD(&del_list); | |
5089 | ||
5090 | - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); | |
5091 | + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); | |
5092 | ||
5093 | list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { | |
5094 | deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; | |
5095 | @@ -870,7 +870,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
5096 | sel_time = fcf->time; | |
5097 | } | |
5098 | } | |
5099 | - put_cpu(); | |
5100 | + put_cpu_light(); | |
5101 | ||
5102 | list_for_each_entry_safe(fcf, next, &del_list, list) { | |
5103 | /* Removes fcf from current list */ | |
5104 | diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c | |
5105 | index 16ca31ad5ec0..c3987347e762 100644 | |
5106 | --- a/drivers/scsi/libfc/fc_exch.c | |
5107 | +++ b/drivers/scsi/libfc/fc_exch.c | |
5108 | @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, | |
5109 | } | |
5110 | memset(ep, 0, sizeof(*ep)); | |
5111 | ||
5112 | - cpu = get_cpu(); | |
5113 | + cpu = get_cpu_light(); | |
5114 | pool = per_cpu_ptr(mp->pool, cpu); | |
5115 | spin_lock_bh(&pool->lock); | |
5116 | - put_cpu(); | |
5117 | + put_cpu_light(); | |
5118 | ||
5119 | /* peek cache of free slot */ | |
5120 | if (pool->left != FC_XID_UNKNOWN) { | |
5121 | diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c | |
5122 | index 763f012fdeca..d0f61b595470 100644 | |
5123 | --- a/drivers/scsi/libsas/sas_ata.c | |
5124 | +++ b/drivers/scsi/libsas/sas_ata.c | |
5125 | @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
5126 | /* TODO: audit callers to ensure they are ready for qc_issue to | |
5127 | * unconditionally re-enable interrupts | |
5128 | */ | |
5129 | - local_irq_save(flags); | |
5130 | + local_irq_save_nort(flags); | |
5131 | spin_unlock(ap->lock); | |
5132 | ||
5133 | /* If the device fell off, no sense in issuing commands */ | |
5134 | @@ -252,7 +252,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
5135 | ||
5136 | out: | |
5137 | spin_lock(ap->lock); | |
5138 | - local_irq_restore(flags); | |
5139 | + local_irq_restore_nort(flags); | |
5140 | return ret; | |
5141 | } | |
5142 | ||
5143 | diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h | |
5144 | index edc48f3b8230..ee5c6f9dfb6f 100644 | |
5145 | --- a/drivers/scsi/qla2xxx/qla_inline.h | |
5146 | +++ b/drivers/scsi/qla2xxx/qla_inline.h | |
5147 | @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp) | |
5148 | { | |
5149 | unsigned long flags; | |
5150 | struct qla_hw_data *ha = rsp->hw; | |
5151 | - local_irq_save(flags); | |
5152 | + local_irq_save_nort(flags); | |
5153 | if (IS_P3P_TYPE(ha)) | |
5154 | qla82xx_poll(0, rsp); | |
5155 | else | |
5156 | ha->isp_ops->intr_handler(0, rsp); | |
5157 | - local_irq_restore(flags); | |
5158 | + local_irq_restore_nort(flags); | |
5159 | } | |
5160 | ||
5161 | static inline uint8_t * | |
5162 | diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c | |
5163 | index 068c4e47fac9..a2090f640397 100644 | |
5164 | --- a/drivers/scsi/qla2xxx/qla_isr.c | |
5165 | +++ b/drivers/scsi/qla2xxx/qla_isr.c | |
5166 | @@ -3125,7 +3125,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) | |
5167 | * kref_put(). | |
5168 | */ | |
5169 | kref_get(&qentry->irq_notify.kref); | |
5170 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
5171 | + swork_queue(&qentry->irq_notify.swork); | |
5172 | +#else | |
5173 | schedule_work(&qentry->irq_notify.work); | |
5174 | +#endif | |
5175 | } | |
5176 | ||
5177 | /* | |
5178 | diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c | |
5179 | index 95f4c1bcdb4c..0be934799bff 100644 | |
5180 | --- a/drivers/thermal/x86_pkg_temp_thermal.c | |
5181 | +++ b/drivers/thermal/x86_pkg_temp_thermal.c | |
5182 | @@ -29,6 +29,7 @@ | |
5183 | #include <linux/pm.h> | |
5184 | #include <linux/thermal.h> | |
5185 | #include <linux/debugfs.h> | |
5186 | +#include <linux/swork.h> | |
5187 | #include <asm/cpu_device_id.h> | |
5188 | #include <asm/mce.h> | |
5189 | ||
5190 | @@ -353,7 +354,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) | |
5191 | } | |
5192 | } | |
5193 | ||
5194 | -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5195 | +static void platform_thermal_notify_work(struct swork_event *event) | |
5196 | { | |
5197 | unsigned long flags; | |
5198 | int cpu = smp_processor_id(); | |
5199 | @@ -370,7 +371,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5200 | pkg_work_scheduled[phy_id]) { | |
5201 | disable_pkg_thres_interrupt(); | |
5202 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
5203 | - return -EINVAL; | |
5204 | + return; | |
5205 | } | |
5206 | pkg_work_scheduled[phy_id] = 1; | |
5207 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
5208 | @@ -379,9 +380,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5209 | schedule_delayed_work_on(cpu, | |
5210 | &per_cpu(pkg_temp_thermal_threshold_work, cpu), | |
5211 | msecs_to_jiffies(notify_delay_ms)); | |
5212 | +} | |
5213 | + | |
5214 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5215 | +static struct swork_event notify_work; | |
5216 | + | |
5217 | +static int thermal_notify_work_init(void) | |
5218 | +{ | |
5219 | + int err; | |
5220 | + | |
5221 | + err = swork_get(); | |
5222 | + if (err) | |
5223 | + return err; | |
5224 | + | |
5225 | + INIT_SWORK(¬ify_work, platform_thermal_notify_work); | |
5226 | return 0; | |
5227 | } | |
5228 | ||
5229 | +static void thermal_notify_work_cleanup(void) | |
5230 | +{ | |
5231 | + swork_put(); | |
5232 | +} | |
5233 | + | |
5234 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5235 | +{ | |
5236 | + swork_queue(¬ify_work); | |
5237 | + return 0; | |
5238 | +} | |
5239 | + | |
5240 | +#else /* !CONFIG_PREEMPT_RT_FULL */ | |
5241 | + | |
5242 | +static int thermal_notify_work_init(void) { return 0; } | |
5243 | + | |
5244 | +static void thermal_notify_work_cleanup(void) { } | |
5245 | + | |
5246 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5247 | +{ | |
5248 | + platform_thermal_notify_work(NULL); | |
5249 | + | |
5250 | + return 0; | |
5251 | +} | |
5252 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
5253 | + | |
5254 | static int find_siblings_cpu(int cpu) | |
5255 | { | |
5256 | int i; | |
5257 | @@ -585,6 +625,9 @@ static int __init pkg_temp_thermal_init(void) | |
5258 | if (!x86_match_cpu(pkg_temp_thermal_ids)) | |
5259 | return -ENODEV; | |
5260 | ||
5261 | + if (!thermal_notify_work_init()) | |
5262 | + return -ENODEV; | |
5263 | + | |
5264 | spin_lock_init(&pkg_work_lock); | |
5265 | platform_thermal_package_notify = | |
5266 | pkg_temp_thermal_platform_thermal_notify; | |
5267 | @@ -609,7 +652,7 @@ static int __init pkg_temp_thermal_init(void) | |
5268 | kfree(pkg_work_scheduled); | |
5269 | platform_thermal_package_notify = NULL; | |
5270 | platform_thermal_package_rate_control = NULL; | |
5271 | - | |
5272 | + thermal_notify_work_cleanup(); | |
5273 | return -ENODEV; | |
5274 | } | |
5275 | ||
5276 | @@ -634,6 +677,7 @@ static void __exit pkg_temp_thermal_exit(void) | |
5277 | mutex_unlock(&phy_dev_list_mutex); | |
5278 | platform_thermal_package_notify = NULL; | |
5279 | platform_thermal_package_rate_control = NULL; | |
5280 | + thermal_notify_work_cleanup(); | |
5281 | for_each_online_cpu(i) | |
5282 | cancel_delayed_work_sync( | |
5283 | &per_cpu(pkg_temp_thermal_threshold_work, i)); | |
5284 | diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c | |
5285 | index e8819aa20415..dd7f9bf45d6c 100644 | |
5286 | --- a/drivers/tty/serial/8250/8250_core.c | |
5287 | +++ b/drivers/tty/serial/8250/8250_core.c | |
5288 | @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg; | |
5289 | ||
5290 | static unsigned int skip_txen_test; /* force skip of txen test at init time */ | |
5291 | ||
5292 | -#define PASS_LIMIT 512 | |
5293 | +/* | |
5294 | + * On -rt we can have a more delays, and legitimately | |
5295 | + * so - so don't drop work spuriously and spam the | |
5296 | + * syslog: | |
5297 | + */ | |
5298 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5299 | +# define PASS_LIMIT 1000000 | |
5300 | +#else | |
5301 | +# define PASS_LIMIT 512 | |
5302 | +#endif | |
5303 | ||
5304 | #include <asm/serial.h> | |
5305 | /* | |
5306 | diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c | |
5307 | index 080d5a59d0a7..eecc4f111473 100644 | |
5308 | --- a/drivers/tty/serial/8250/8250_port.c | |
5309 | +++ b/drivers/tty/serial/8250/8250_port.c | |
5310 | @@ -35,6 +35,7 @@ | |
5311 | #include <linux/nmi.h> | |
5312 | #include <linux/mutex.h> | |
5313 | #include <linux/slab.h> | |
5314 | +#include <linux/kdb.h> | |
5315 | #include <linux/uaccess.h> | |
5316 | #include <linux/pm_runtime.h> | |
5317 | #include <linux/timer.h> | |
5318 | @@ -3144,9 +3145,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, | |
5319 | ||
5320 | serial8250_rpm_get(up); | |
5321 | ||
5322 | - if (port->sysrq) | |
5323 | + if (port->sysrq || oops_in_progress) | |
5324 | locked = 0; | |
5325 | - else if (oops_in_progress) | |
5326 | + else if (in_kdb_printk()) | |
5327 | locked = spin_trylock_irqsave(&port->lock, flags); | |
5328 | else | |
5329 | spin_lock_irqsave(&port->lock, flags); | |
5330 | diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c | |
5331 | index e2c33b9528d8..53af53c43e8c 100644 | |
5332 | --- a/drivers/tty/serial/amba-pl011.c | |
5333 | +++ b/drivers/tty/serial/amba-pl011.c | |
5334 | @@ -2194,13 +2194,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) | |
5335 | ||
5336 | clk_enable(uap->clk); | |
5337 | ||
5338 | - local_irq_save(flags); | |
5339 | + /* | |
5340 | + * local_irq_save(flags); | |
5341 | + * | |
5342 | + * This local_irq_save() is nonsense. If we come in via sysrq | |
5343 | + * handling then interrupts are already disabled. Aside of | |
5344 | + * that the port.sysrq check is racy on SMP regardless. | |
5345 | + */ | |
5346 | if (uap->port.sysrq) | |
5347 | locked = 0; | |
5348 | else if (oops_in_progress) | |
5349 | - locked = spin_trylock(&uap->port.lock); | |
5350 | + locked = spin_trylock_irqsave(&uap->port.lock, flags); | |
5351 | else | |
5352 | - spin_lock(&uap->port.lock); | |
5353 | + spin_lock_irqsave(&uap->port.lock, flags); | |
5354 | ||
5355 | /* | |
5356 | * First save the CR then disable the interrupts | |
5357 | @@ -2224,8 +2230,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) | |
5358 | pl011_write(old_cr, uap, REG_CR); | |
5359 | ||
5360 | if (locked) | |
5361 | - spin_unlock(&uap->port.lock); | |
5362 | - local_irq_restore(flags); | |
5363 | + spin_unlock_irqrestore(&uap->port.lock, flags); | |
5364 | ||
5365 | clk_disable(uap->clk); | |
5366 | } | |
5367 | diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c | |
5368 | index a2a529994ba5..0ee7c4c518df 100644 | |
5369 | --- a/drivers/tty/serial/omap-serial.c | |
5370 | +++ b/drivers/tty/serial/omap-serial.c | |
5371 | @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s, | |
5372 | ||
5373 | pm_runtime_get_sync(up->dev); | |
5374 | ||
5375 | - local_irq_save(flags); | |
5376 | - if (up->port.sysrq) | |
5377 | - locked = 0; | |
5378 | - else if (oops_in_progress) | |
5379 | - locked = spin_trylock(&up->port.lock); | |
5380 | + if (up->port.sysrq || oops_in_progress) | |
5381 | + locked = spin_trylock_irqsave(&up->port.lock, flags); | |
5382 | else | |
5383 | - spin_lock(&up->port.lock); | |
5384 | + spin_lock_irqsave(&up->port.lock, flags); | |
5385 | ||
5386 | /* | |
5387 | * First save the IER then disable the interrupts | |
5388 | @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s, | |
5389 | pm_runtime_mark_last_busy(up->dev); | |
5390 | pm_runtime_put_autosuspend(up->dev); | |
5391 | if (locked) | |
5392 | - spin_unlock(&up->port.lock); | |
5393 | - local_irq_restore(flags); | |
5394 | + spin_unlock_irqrestore(&up->port.lock, flags); | |
5395 | } | |
5396 | ||
5397 | static int __init | |
5398 | diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c | |
5399 | index 479e223f9cff..3418a54b4131 100644 | |
5400 | --- a/drivers/usb/core/hcd.c | |
5401 | +++ b/drivers/usb/core/hcd.c | |
5402 | @@ -1761,9 +1761,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) | |
5403 | * and no one may trigger the above deadlock situation when | |
5404 | * running complete() in tasklet. | |
5405 | */ | |
5406 | - local_irq_save(flags); | |
5407 | + local_irq_save_nort(flags); | |
5408 | urb->complete(urb); | |
5409 | - local_irq_restore(flags); | |
5410 | + local_irq_restore_nort(flags); | |
5411 | ||
5412 | usb_anchor_resume_wakeups(anchor); | |
5413 | atomic_dec(&urb->use_count); | |
5414 | diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c | |
5415 | index 17989b72cdae..88c6574b5992 100644 | |
5416 | --- a/drivers/usb/gadget/function/f_fs.c | |
5417 | +++ b/drivers/usb/gadget/function/f_fs.c | |
5418 | @@ -1593,7 +1593,7 @@ static void ffs_data_put(struct ffs_data *ffs) | |
5419 | pr_info("%s(): freeing\n", __func__); | |
5420 | ffs_data_clear(ffs); | |
5421 | BUG_ON(waitqueue_active(&ffs->ev.waitq) || | |
5422 | - waitqueue_active(&ffs->ep0req_completion.wait)); | |
5423 | + swait_active(&ffs->ep0req_completion.wait)); | |
5424 | kfree(ffs->dev_name); | |
5425 | kfree(ffs); | |
5426 | } | |
5427 | diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c | |
5428 | index 1468d8f085a3..6aae3ae25c18 100644 | |
5429 | --- a/drivers/usb/gadget/legacy/inode.c | |
5430 | +++ b/drivers/usb/gadget/legacy/inode.c | |
5431 | @@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
5432 | spin_unlock_irq (&epdata->dev->lock); | |
5433 | ||
5434 | if (likely (value == 0)) { | |
5435 | - value = wait_event_interruptible (done.wait, done.done); | |
5436 | + value = swait_event_interruptible (done.wait, done.done); | |
5437 | if (value != 0) { | |
5438 | spin_lock_irq (&epdata->dev->lock); | |
5439 | if (likely (epdata->ep != NULL)) { | |
5440 | @@ -355,7 +355,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
5441 | usb_ep_dequeue (epdata->ep, epdata->req); | |
5442 | spin_unlock_irq (&epdata->dev->lock); | |
5443 | ||
5444 | - wait_event (done.wait, done.done); | |
5445 | + swait_event (done.wait, done.done); | |
5446 | if (epdata->status == -ECONNRESET) | |
5447 | epdata->status = -EINTR; | |
5448 | } else { | |
5449 | diff --git a/fs/aio.c b/fs/aio.c | |
5450 | index 428484f2f841..2b02e2eb2158 100644 | |
5451 | --- a/fs/aio.c | |
5452 | +++ b/fs/aio.c | |
5453 | @@ -40,6 +40,7 @@ | |
5454 | #include <linux/ramfs.h> | |
5455 | #include <linux/percpu-refcount.h> | |
5456 | #include <linux/mount.h> | |
5457 | +#include <linux/swork.h> | |
5458 | ||
5459 | #include <asm/kmap_types.h> | |
5460 | #include <asm/uaccess.h> | |
5461 | @@ -115,7 +116,7 @@ struct kioctx { | |
5462 | struct page **ring_pages; | |
5463 | long nr_pages; | |
5464 | ||
5465 | - struct work_struct free_work; | |
5466 | + struct swork_event free_work; | |
5467 | ||
5468 | /* | |
5469 | * signals when all in-flight requests are done | |
5470 | @@ -258,6 +259,7 @@ static int __init aio_setup(void) | |
5471 | .mount = aio_mount, | |
5472 | .kill_sb = kill_anon_super, | |
5473 | }; | |
5474 | + BUG_ON(swork_get()); | |
5475 | aio_mnt = kern_mount(&aio_fs); | |
5476 | if (IS_ERR(aio_mnt)) | |
5477 | panic("Failed to create aio fs mount."); | |
5478 | @@ -581,9 +583,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) | |
5479 | return cancel(&kiocb->common); | |
5480 | } | |
5481 | ||
5482 | -static void free_ioctx(struct work_struct *work) | |
5483 | +static void free_ioctx(struct swork_event *sev) | |
5484 | { | |
5485 | - struct kioctx *ctx = container_of(work, struct kioctx, free_work); | |
5486 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
5487 | ||
5488 | pr_debug("freeing %p\n", ctx); | |
5489 | ||
5490 | @@ -602,8 +604,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |
5491 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) | |
5492 | complete(&ctx->rq_wait->comp); | |
5493 | ||
5494 | - INIT_WORK(&ctx->free_work, free_ioctx); | |
5495 | - schedule_work(&ctx->free_work); | |
5496 | + INIT_SWORK(&ctx->free_work, free_ioctx); | |
5497 | + swork_queue(&ctx->free_work); | |
5498 | } | |
5499 | ||
5500 | /* | |
5501 | @@ -611,9 +613,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |
5502 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - | |
5503 | * now it's safe to cancel any that need to be. | |
5504 | */ | |
5505 | -static void free_ioctx_users(struct percpu_ref *ref) | |
5506 | +static void free_ioctx_users_work(struct swork_event *sev) | |
5507 | { | |
5508 | - struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
5509 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
5510 | struct aio_kiocb *req; | |
5511 | ||
5512 | spin_lock_irq(&ctx->ctx_lock); | |
5513 | @@ -632,6 +634,14 @@ static void free_ioctx_users(struct percpu_ref *ref) | |
5514 | percpu_ref_put(&ctx->reqs); | |
5515 | } | |
5516 | ||
5517 | +static void free_ioctx_users(struct percpu_ref *ref) | |
5518 | +{ | |
5519 | + struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
5520 | + | |
5521 | + INIT_SWORK(&ctx->free_work, free_ioctx_users_work); | |
5522 | + swork_queue(&ctx->free_work); | |
5523 | +} | |
5524 | + | |
5525 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |
5526 | { | |
5527 | unsigned i, new_nr; | |
5528 | diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h | |
5529 | index a1fba4285277..3796769b4cd1 100644 | |
5530 | --- a/fs/autofs4/autofs_i.h | |
5531 | +++ b/fs/autofs4/autofs_i.h | |
5532 | @@ -31,6 +31,7 @@ | |
5533 | #include <linux/sched.h> | |
5534 | #include <linux/mount.h> | |
5535 | #include <linux/namei.h> | |
5536 | +#include <linux/delay.h> | |
5537 | #include <asm/current.h> | |
5538 | #include <linux/uaccess.h> | |
5539 | ||
5540 | diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c | |
5541 | index d8e6d421c27f..2e689ab1306b 100644 | |
5542 | --- a/fs/autofs4/expire.c | |
5543 | +++ b/fs/autofs4/expire.c | |
5544 | @@ -148,7 +148,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, | |
5545 | parent = p->d_parent; | |
5546 | if (!spin_trylock(&parent->d_lock)) { | |
5547 | spin_unlock(&p->d_lock); | |
5548 | - cpu_relax(); | |
5549 | + cpu_chill(); | |
5550 | goto relock; | |
5551 | } | |
5552 | spin_unlock(&p->d_lock); | |
5553 | diff --git a/fs/buffer.c b/fs/buffer.c | |
5554 | index b205a629001d..5646afc022ba 100644 | |
5555 | --- a/fs/buffer.c | |
5556 | +++ b/fs/buffer.c | |
5557 | @@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5558 | * decide that the page is now completely done. | |
5559 | */ | |
5560 | first = page_buffers(page); | |
5561 | - local_irq_save(flags); | |
5562 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
5563 | + flags = bh_uptodate_lock_irqsave(first); | |
5564 | clear_buffer_async_read(bh); | |
5565 | unlock_buffer(bh); | |
5566 | tmp = bh; | |
5567 | @@ -315,8 +314,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5568 | } | |
5569 | tmp = tmp->b_this_page; | |
5570 | } while (tmp != bh); | |
5571 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5572 | - local_irq_restore(flags); | |
5573 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5574 | ||
5575 | /* | |
5576 | * If none of the buffers had errors and they are all | |
5577 | @@ -328,9 +326,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5578 | return; | |
5579 | ||
5580 | still_busy: | |
5581 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5582 | - local_irq_restore(flags); | |
5583 | - return; | |
5584 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5585 | } | |
5586 | ||
5587 | /* | |
5588 | @@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
5589 | } | |
5590 | ||
5591 | first = page_buffers(page); | |
5592 | - local_irq_save(flags); | |
5593 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
5594 | + flags = bh_uptodate_lock_irqsave(first); | |
5595 | ||
5596 | clear_buffer_async_write(bh); | |
5597 | unlock_buffer(bh); | |
5598 | @@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
5599 | } | |
5600 | tmp = tmp->b_this_page; | |
5601 | } | |
5602 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5603 | - local_irq_restore(flags); | |
5604 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5605 | end_page_writeback(page); | |
5606 | return; | |
5607 | ||
5608 | still_busy: | |
5609 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5610 | - local_irq_restore(flags); | |
5611 | - return; | |
5612 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5613 | } | |
5614 | EXPORT_SYMBOL(end_buffer_async_write); | |
5615 | ||
5616 | @@ -3383,6 +3375,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) | |
5617 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); | |
5618 | if (ret) { | |
5619 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | |
5620 | + buffer_head_init_locks(ret); | |
5621 | preempt_disable(); | |
5622 | __this_cpu_inc(bh_accounting.nr); | |
5623 | recalc_bh_state(); | |
5624 | diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c | |
5625 | index 8f6a2a5863b9..4217828d0b68 100644 | |
5626 | --- a/fs/cifs/readdir.c | |
5627 | +++ b/fs/cifs/readdir.c | |
5628 | @@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, | |
5629 | struct inode *inode; | |
5630 | struct super_block *sb = parent->d_sb; | |
5631 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | |
5632 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5633 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5634 | ||
5635 | cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); | |
5636 | ||
5637 | diff --git a/fs/dcache.c b/fs/dcache.c | |
5638 | index 4485a48f4091..691039a6a872 100644 | |
5639 | --- a/fs/dcache.c | |
5640 | +++ b/fs/dcache.c | |
5641 | @@ -19,6 +19,7 @@ | |
5642 | #include <linux/mm.h> | |
5643 | #include <linux/fs.h> | |
5644 | #include <linux/fsnotify.h> | |
5645 | +#include <linux/delay.h> | |
5646 | #include <linux/slab.h> | |
5647 | #include <linux/init.h> | |
5648 | #include <linux/hash.h> | |
5649 | @@ -750,6 +751,8 @@ static inline bool fast_dput(struct dentry *dentry) | |
5650 | */ | |
5651 | void dput(struct dentry *dentry) | |
5652 | { | |
5653 | + struct dentry *parent; | |
5654 | + | |
5655 | if (unlikely(!dentry)) | |
5656 | return; | |
5657 | ||
5658 | @@ -788,9 +791,18 @@ void dput(struct dentry *dentry) | |
5659 | return; | |
5660 | ||
5661 | kill_it: | |
5662 | - dentry = dentry_kill(dentry); | |
5663 | - if (dentry) { | |
5664 | - cond_resched(); | |
5665 | + parent = dentry_kill(dentry); | |
5666 | + if (parent) { | |
5667 | + int r; | |
5668 | + | |
5669 | + if (parent == dentry) { | |
5670 | + /* the task with the highest priority won't schedule */ | |
5671 | + r = cond_resched(); | |
5672 | + if (!r) | |
5673 | + cpu_chill(); | |
5674 | + } else { | |
5675 | + dentry = parent; | |
5676 | + } | |
5677 | goto repeat; | |
5678 | } | |
5679 | } | |
5680 | @@ -2324,7 +2336,7 @@ void d_delete(struct dentry * dentry) | |
5681 | if (dentry->d_lockref.count == 1) { | |
5682 | if (!spin_trylock(&inode->i_lock)) { | |
5683 | spin_unlock(&dentry->d_lock); | |
5684 | - cpu_relax(); | |
5685 | + cpu_chill(); | |
5686 | goto again; | |
5687 | } | |
5688 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | |
5689 | @@ -2384,21 +2396,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n) | |
5690 | ||
5691 | static void d_wait_lookup(struct dentry *dentry) | |
5692 | { | |
5693 | - if (d_in_lookup(dentry)) { | |
5694 | - DECLARE_WAITQUEUE(wait, current); | |
5695 | - add_wait_queue(dentry->d_wait, &wait); | |
5696 | - do { | |
5697 | - set_current_state(TASK_UNINTERRUPTIBLE); | |
5698 | - spin_unlock(&dentry->d_lock); | |
5699 | - schedule(); | |
5700 | - spin_lock(&dentry->d_lock); | |
5701 | - } while (d_in_lookup(dentry)); | |
5702 | - } | |
5703 | + struct swait_queue __wait; | |
5704 | + | |
5705 | + if (!d_in_lookup(dentry)) | |
5706 | + return; | |
5707 | + | |
5708 | + INIT_LIST_HEAD(&__wait.task_list); | |
5709 | + do { | |
5710 | + prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE); | |
5711 | + spin_unlock(&dentry->d_lock); | |
5712 | + schedule(); | |
5713 | + spin_lock(&dentry->d_lock); | |
5714 | + } while (d_in_lookup(dentry)); | |
5715 | + finish_swait(dentry->d_wait, &__wait); | |
5716 | } | |
5717 | ||
5718 | struct dentry *d_alloc_parallel(struct dentry *parent, | |
5719 | const struct qstr *name, | |
5720 | - wait_queue_head_t *wq) | |
5721 | + struct swait_queue_head *wq) | |
5722 | { | |
5723 | unsigned int hash = name->hash; | |
5724 | struct hlist_bl_head *b = in_lookup_hash(parent, hash); | |
5725 | @@ -2507,7 +2522,7 @@ void __d_lookup_done(struct dentry *dentry) | |
5726 | hlist_bl_lock(b); | |
5727 | dentry->d_flags &= ~DCACHE_PAR_LOOKUP; | |
5728 | __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); | |
5729 | - wake_up_all(dentry->d_wait); | |
5730 | + swake_up_all(dentry->d_wait); | |
5731 | dentry->d_wait = NULL; | |
5732 | hlist_bl_unlock(b); | |
5733 | INIT_HLIST_NODE(&dentry->d_u.d_alias); | |
5734 | @@ -3604,6 +3619,11 @@ EXPORT_SYMBOL(d_genocide); | |
5735 | ||
5736 | void __init vfs_caches_init_early(void) | |
5737 | { | |
5738 | + int i; | |
5739 | + | |
5740 | + for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) | |
5741 | + INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); | |
5742 | + | |
5743 | dcache_init_early(); | |
5744 | inode_init_early(); | |
5745 | } | |
5746 | diff --git a/fs/eventpoll.c b/fs/eventpoll.c | |
5747 | index 10db91218933..42af0a06f657 100644 | |
5748 | --- a/fs/eventpoll.c | |
5749 | +++ b/fs/eventpoll.c | |
5750 | @@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) | |
5751 | */ | |
5752 | static void ep_poll_safewake(wait_queue_head_t *wq) | |
5753 | { | |
5754 | - int this_cpu = get_cpu(); | |
5755 | + int this_cpu = get_cpu_light(); | |
5756 | ||
5757 | ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, | |
5758 | ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); | |
5759 | ||
5760 | - put_cpu(); | |
5761 | + put_cpu_light(); | |
5762 | } | |
5763 | ||
5764 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | |
5765 | diff --git a/fs/exec.c b/fs/exec.c | |
5766 | index 67e86571685a..fe14cdd84016 100644 | |
5767 | --- a/fs/exec.c | |
5768 | +++ b/fs/exec.c | |
5769 | @@ -1017,12 +1017,14 @@ static int exec_mmap(struct mm_struct *mm) | |
5770 | } | |
5771 | } | |
5772 | task_lock(tsk); | |
5773 | + preempt_disable_rt(); | |
5774 | active_mm = tsk->active_mm; | |
5775 | tsk->mm = mm; | |
5776 | tsk->active_mm = mm; | |
5777 | activate_mm(active_mm, mm); | |
5778 | tsk->mm->vmacache_seqnum = 0; | |
5779 | vmacache_flush(tsk); | |
5780 | + preempt_enable_rt(); | |
5781 | task_unlock(tsk); | |
5782 | if (old_mm) { | |
5783 | up_read(&old_mm->mmap_sem); | |
5784 | diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c | |
5785 | index 642c57b8de7b..8494b9308333 100644 | |
5786 | --- a/fs/fuse/dir.c | |
5787 | +++ b/fs/fuse/dir.c | |
5788 | @@ -1191,7 +1191,7 @@ static int fuse_direntplus_link(struct file *file, | |
5789 | struct inode *dir = d_inode(parent); | |
5790 | struct fuse_conn *fc; | |
5791 | struct inode *inode; | |
5792 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5793 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5794 | ||
5795 | if (!o->nodeid) { | |
5796 | /* | |
5797 | diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c | |
5798 | index 684996c8a3a4..6e18a06aaabe 100644 | |
5799 | --- a/fs/jbd2/checkpoint.c | |
5800 | +++ b/fs/jbd2/checkpoint.c | |
5801 | @@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |
5802 | nblocks = jbd2_space_needed(journal); | |
5803 | while (jbd2_log_space_left(journal) < nblocks) { | |
5804 | write_unlock(&journal->j_state_lock); | |
5805 | + if (current->plug) | |
5806 | + io_schedule(); | |
5807 | mutex_lock(&journal->j_checkpoint_mutex); | |
5808 | ||
5809 | /* | |
5810 | diff --git a/fs/locks.c b/fs/locks.c | |
5811 | index 22c5b4aa4961..269c6a44449a 100644 | |
5812 | --- a/fs/locks.c | |
5813 | +++ b/fs/locks.c | |
5814 | @@ -935,7 +935,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) | |
5815 | return -ENOMEM; | |
5816 | } | |
5817 | ||
5818 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5819 | + percpu_down_read(&file_rwsem); | |
5820 | spin_lock(&ctx->flc_lock); | |
5821 | if (request->fl_flags & FL_ACCESS) | |
5822 | goto find_conflict; | |
5823 | @@ -976,7 +976,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) | |
5824 | ||
5825 | out: | |
5826 | spin_unlock(&ctx->flc_lock); | |
5827 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5828 | + percpu_up_read(&file_rwsem); | |
5829 | if (new_fl) | |
5830 | locks_free_lock(new_fl); | |
5831 | locks_dispose_list(&dispose); | |
5832 | @@ -1013,7 +1013,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |
5833 | new_fl2 = locks_alloc_lock(); | |
5834 | } | |
5835 | ||
5836 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5837 | + percpu_down_read(&file_rwsem); | |
5838 | spin_lock(&ctx->flc_lock); | |
5839 | /* | |
5840 | * New lock request. Walk all POSIX locks and look for conflicts. If | |
5841 | @@ -1185,7 +1185,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |
5842 | } | |
5843 | out: | |
5844 | spin_unlock(&ctx->flc_lock); | |
5845 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5846 | + percpu_up_read(&file_rwsem); | |
5847 | /* | |
5848 | * Free any unused locks. | |
5849 | */ | |
5850 | @@ -1460,7 +1460,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5851 | return error; | |
5852 | } | |
5853 | ||
5854 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5855 | + percpu_down_read(&file_rwsem); | |
5856 | spin_lock(&ctx->flc_lock); | |
5857 | ||
5858 | time_out_leases(inode, &dispose); | |
5859 | @@ -1512,13 +1512,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5860 | locks_insert_block(fl, new_fl); | |
5861 | trace_break_lease_block(inode, new_fl); | |
5862 | spin_unlock(&ctx->flc_lock); | |
5863 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5864 | + percpu_up_read(&file_rwsem); | |
5865 | ||
5866 | locks_dispose_list(&dispose); | |
5867 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | |
5868 | !new_fl->fl_next, break_time); | |
5869 | ||
5870 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5871 | + percpu_down_read(&file_rwsem); | |
5872 | spin_lock(&ctx->flc_lock); | |
5873 | trace_break_lease_unblock(inode, new_fl); | |
5874 | locks_delete_block(new_fl); | |
5875 | @@ -1535,7 +1535,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5876 | } | |
5877 | out: | |
5878 | spin_unlock(&ctx->flc_lock); | |
5879 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5880 | + percpu_up_read(&file_rwsem); | |
5881 | locks_dispose_list(&dispose); | |
5882 | locks_free_lock(new_fl); | |
5883 | return error; | |
5884 | @@ -1609,7 +1609,7 @@ int fcntl_getlease(struct file *filp) | |
5885 | ||
5886 | ctx = smp_load_acquire(&inode->i_flctx); | |
5887 | if (ctx && !list_empty_careful(&ctx->flc_lease)) { | |
5888 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5889 | + percpu_down_read(&file_rwsem); | |
5890 | spin_lock(&ctx->flc_lock); | |
5891 | time_out_leases(inode, &dispose); | |
5892 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
5893 | @@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp) | |
5894 | break; | |
5895 | } | |
5896 | spin_unlock(&ctx->flc_lock); | |
5897 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5898 | + percpu_up_read(&file_rwsem); | |
5899 | ||
5900 | locks_dispose_list(&dispose); | |
5901 | } | |
5902 | @@ -1694,7 +1694,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |
5903 | return -EINVAL; | |
5904 | } | |
5905 | ||
5906 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5907 | + percpu_down_read(&file_rwsem); | |
5908 | spin_lock(&ctx->flc_lock); | |
5909 | time_out_leases(inode, &dispose); | |
5910 | error = check_conflicting_open(dentry, arg, lease->fl_flags); | |
5911 | @@ -1765,7 +1765,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |
5912 | lease->fl_lmops->lm_setup(lease, priv); | |
5913 | out: | |
5914 | spin_unlock(&ctx->flc_lock); | |
5915 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5916 | + percpu_up_read(&file_rwsem); | |
5917 | locks_dispose_list(&dispose); | |
5918 | if (is_deleg) | |
5919 | inode_unlock(inode); | |
5920 | @@ -1788,7 +1788,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |
5921 | return error; | |
5922 | } | |
5923 | ||
5924 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5925 | + percpu_down_read(&file_rwsem); | |
5926 | spin_lock(&ctx->flc_lock); | |
5927 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
5928 | if (fl->fl_file == filp && | |
5929 | @@ -1801,7 +1801,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |
5930 | if (victim) | |
5931 | error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); | |
5932 | spin_unlock(&ctx->flc_lock); | |
5933 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5934 | + percpu_up_read(&file_rwsem); | |
5935 | locks_dispose_list(&dispose); | |
5936 | return error; | |
5937 | } | |
5938 | @@ -2532,13 +2532,13 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx) | |
5939 | if (list_empty(&ctx->flc_lease)) | |
5940 | return; | |
5941 | ||
5942 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5943 | + percpu_down_read(&file_rwsem); | |
5944 | spin_lock(&ctx->flc_lock); | |
5945 | list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) | |
5946 | if (filp == fl->fl_file) | |
5947 | lease_modify(fl, F_UNLCK, &dispose); | |
5948 | spin_unlock(&ctx->flc_lock); | |
5949 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5950 | + percpu_up_read(&file_rwsem); | |
5951 | ||
5952 | locks_dispose_list(&dispose); | |
5953 | } | |
5954 | diff --git a/fs/namei.c b/fs/namei.c | |
5955 | index 5b4eed221530..9c8dd3c83a80 100644 | |
5956 | --- a/fs/namei.c | |
5957 | +++ b/fs/namei.c | |
5958 | @@ -1629,7 +1629,7 @@ static struct dentry *lookup_slow(const struct qstr *name, | |
5959 | { | |
5960 | struct dentry *dentry = ERR_PTR(-ENOENT), *old; | |
5961 | struct inode *inode = dir->d_inode; | |
5962 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5963 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5964 | ||
5965 | inode_lock_shared(inode); | |
5966 | /* Don't go there if it's already dead */ | |
5967 | @@ -3086,7 +3086,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, | |
5968 | struct dentry *dentry; | |
5969 | int error, create_error = 0; | |
5970 | umode_t mode = op->mode; | |
5971 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5972 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5973 | ||
5974 | if (unlikely(IS_DEADDIR(dir_inode))) | |
5975 | return -ENOENT; | |
5976 | diff --git a/fs/namespace.c b/fs/namespace.c | |
5977 | index 7cea503ae06d..cb15f5397991 100644 | |
5978 | --- a/fs/namespace.c | |
5979 | +++ b/fs/namespace.c | |
5980 | @@ -14,6 +14,7 @@ | |
5981 | #include <linux/mnt_namespace.h> | |
5982 | #include <linux/user_namespace.h> | |
5983 | #include <linux/namei.h> | |
5984 | +#include <linux/delay.h> | |
5985 | #include <linux/security.h> | |
5986 | #include <linux/idr.h> | |
5987 | #include <linux/init.h> /* init_rootfs */ | |
5988 | @@ -356,8 +357,11 @@ int __mnt_want_write(struct vfsmount *m) | |
5989 | * incremented count after it has set MNT_WRITE_HOLD. | |
5990 | */ | |
5991 | smp_mb(); | |
5992 | - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) | |
5993 | - cpu_relax(); | |
5994 | + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { | |
5995 | + preempt_enable(); | |
5996 | + cpu_chill(); | |
5997 | + preempt_disable(); | |
5998 | + } | |
5999 | /* | |
6000 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | |
6001 | * be set to match its requirements. So we must not load that until | |
6002 | diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c | |
6003 | index dff600ae0d74..d726d2e09353 100644 | |
6004 | --- a/fs/nfs/delegation.c | |
6005 | +++ b/fs/nfs/delegation.c | |
6006 | @@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode *inode, | |
6007 | sp = state->owner; | |
6008 | /* Block nfs4_proc_unlck */ | |
6009 | mutex_lock(&sp->so_delegreturn_mutex); | |
6010 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
6011 | + seq = read_seqbegin(&sp->so_reclaim_seqlock); | |
6012 | err = nfs4_open_delegation_recall(ctx, state, stateid, type); | |
6013 | if (!err) | |
6014 | err = nfs_delegation_claim_locks(ctx, state, stateid); | |
6015 | - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
6016 | + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
6017 | err = -EAGAIN; | |
6018 | mutex_unlock(&sp->so_delegreturn_mutex); | |
6019 | put_nfs_open_context(ctx); | |
6020 | diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c | |
6021 | index 53e02b8bd9bd..a66e7d77cfbb 100644 | |
6022 | --- a/fs/nfs/dir.c | |
6023 | +++ b/fs/nfs/dir.c | |
6024 | @@ -485,7 +485,7 @@ static | |
6025 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |
6026 | { | |
6027 | struct qstr filename = QSTR_INIT(entry->name, entry->len); | |
6028 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6029 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6030 | struct dentry *dentry; | |
6031 | struct dentry *alias; | |
6032 | struct inode *dir = d_inode(parent); | |
6033 | @@ -1487,7 +1487,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |
6034 | struct file *file, unsigned open_flags, | |
6035 | umode_t mode, int *opened) | |
6036 | { | |
6037 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6038 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6039 | struct nfs_open_context *ctx; | |
6040 | struct dentry *res; | |
6041 | struct iattr attr = { .ia_valid = ATTR_OPEN }; | |
6042 | @@ -1802,7 +1802,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |
6043 | ||
6044 | trace_nfs_rmdir_enter(dir, dentry); | |
6045 | if (d_really_is_positive(dentry)) { | |
6046 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6047 | + down(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6048 | +#else | |
6049 | down_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6050 | +#endif | |
6051 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
6052 | /* Ensure the VFS deletes this inode */ | |
6053 | switch (error) { | |
6054 | @@ -1812,7 +1816,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |
6055 | case -ENOENT: | |
6056 | nfs_dentry_handle_enoent(dentry); | |
6057 | } | |
6058 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6059 | + up(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6060 | +#else | |
6061 | up_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6062 | +#endif | |
6063 | } else | |
6064 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
6065 | trace_nfs_rmdir_exit(dir, dentry, error); | |
6066 | diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c | |
6067 | index bf4ec5ecc97e..36cd5fc9192c 100644 | |
6068 | --- a/fs/nfs/inode.c | |
6069 | +++ b/fs/nfs/inode.c | |
6070 | @@ -1957,7 +1957,11 @@ static void init_once(void *foo) | |
6071 | nfsi->nrequests = 0; | |
6072 | nfsi->commit_info.ncommit = 0; | |
6073 | atomic_set(&nfsi->commit_info.rpcs_out, 0); | |
6074 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6075 | + sema_init(&nfsi->rmdir_sem, 1); | |
6076 | +#else | |
6077 | init_rwsem(&nfsi->rmdir_sem); | |
6078 | +#endif | |
6079 | nfs4_init_once(nfsi); | |
6080 | } | |
6081 | ||
6082 | diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h | |
6083 | index 1452177c822d..f43b01d54c59 100644 | |
6084 | --- a/fs/nfs/nfs4_fs.h | |
6085 | +++ b/fs/nfs/nfs4_fs.h | |
6086 | @@ -111,7 +111,7 @@ struct nfs4_state_owner { | |
6087 | unsigned long so_flags; | |
6088 | struct list_head so_states; | |
6089 | struct nfs_seqid_counter so_seqid; | |
6090 | - seqcount_t so_reclaim_seqcount; | |
6091 | + seqlock_t so_reclaim_seqlock; | |
6092 | struct mutex so_delegreturn_mutex; | |
6093 | }; | |
6094 | ||
6095 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c | |
6096 | index 241da19b7da4..8f9636cc298f 100644 | |
6097 | --- a/fs/nfs/nfs4proc.c | |
6098 | +++ b/fs/nfs/nfs4proc.c | |
6099 | @@ -2697,7 +2697,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |
6100 | unsigned int seq; | |
6101 | int ret; | |
6102 | ||
6103 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
6104 | + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
6105 | ||
6106 | ret = _nfs4_proc_open(opendata); | |
6107 | if (ret != 0) | |
6108 | @@ -2735,7 +2735,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |
6109 | ctx->state = state; | |
6110 | if (d_inode(dentry) == state->inode) { | |
6111 | nfs_inode_attach_open_context(ctx); | |
6112 | - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
6113 | + if (read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
6114 | nfs4_schedule_stateid_recovery(server, state); | |
6115 | } | |
6116 | out: | |
6117 | diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c | |
6118 | index 0959c9661662..dabd834d7686 100644 | |
6119 | --- a/fs/nfs/nfs4state.c | |
6120 | +++ b/fs/nfs/nfs4state.c | |
6121 | @@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, | |
6122 | nfs4_init_seqid_counter(&sp->so_seqid); | |
6123 | atomic_set(&sp->so_count, 1); | |
6124 | INIT_LIST_HEAD(&sp->so_lru); | |
6125 | - seqcount_init(&sp->so_reclaim_seqcount); | |
6126 | + seqlock_init(&sp->so_reclaim_seqlock); | |
6127 | mutex_init(&sp->so_delegreturn_mutex); | |
6128 | return sp; | |
6129 | } | |
6130 | @@ -1497,8 +1497,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs | |
6131 | * recovering after a network partition or a reboot from a | |
6132 | * server that doesn't support a grace period. | |
6133 | */ | |
6134 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6135 | + write_seqlock(&sp->so_reclaim_seqlock); | |
6136 | +#else | |
6137 | + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
6138 | +#endif | |
6139 | spin_lock(&sp->so_lock); | |
6140 | - raw_write_seqcount_begin(&sp->so_reclaim_seqcount); | |
6141 | restart: | |
6142 | list_for_each_entry(state, &sp->so_states, open_states) { | |
6143 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) | |
6144 | @@ -1567,14 +1571,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs | |
6145 | spin_lock(&sp->so_lock); | |
6146 | goto restart; | |
6147 | } | |
6148 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
6149 | spin_unlock(&sp->so_lock); | |
6150 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6151 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
6152 | +#else | |
6153 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
6154 | +#endif | |
6155 | return 0; | |
6156 | out_err: | |
6157 | nfs4_put_open_state(state); | |
6158 | - spin_lock(&sp->so_lock); | |
6159 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
6160 | - spin_unlock(&sp->so_lock); | |
6161 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6162 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
6163 | +#else | |
6164 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
6165 | +#endif | |
6166 | return status; | |
6167 | } | |
6168 | ||
6169 | diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c | |
6170 | index 191aa577dd1f..58990c8f52e0 100644 | |
6171 | --- a/fs/nfs/unlink.c | |
6172 | +++ b/fs/nfs/unlink.c | |
6173 | @@ -12,7 +12,7 @@ | |
6174 | #include <linux/sunrpc/clnt.h> | |
6175 | #include <linux/nfs_fs.h> | |
6176 | #include <linux/sched.h> | |
6177 | -#include <linux/wait.h> | |
6178 | +#include <linux/swait.h> | |
6179 | #include <linux/namei.h> | |
6180 | #include <linux/fsnotify.h> | |
6181 | ||
6182 | @@ -51,6 +51,29 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) | |
6183 | rpc_restart_call_prepare(task); | |
6184 | } | |
6185 | ||
6186 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6187 | +static void nfs_down_anon(struct semaphore *sema) | |
6188 | +{ | |
6189 | + down(sema); | |
6190 | +} | |
6191 | + | |
6192 | +static void nfs_up_anon(struct semaphore *sema) | |
6193 | +{ | |
6194 | + up(sema); | |
6195 | +} | |
6196 | + | |
6197 | +#else | |
6198 | +static void nfs_down_anon(struct rw_semaphore *rwsem) | |
6199 | +{ | |
6200 | + down_read_non_owner(rwsem); | |
6201 | +} | |
6202 | + | |
6203 | +static void nfs_up_anon(struct rw_semaphore *rwsem) | |
6204 | +{ | |
6205 | + up_read_non_owner(rwsem); | |
6206 | +} | |
6207 | +#endif | |
6208 | + | |
6209 | /** | |
6210 | * nfs_async_unlink_release - Release the sillydelete data. | |
6211 | * @task: rpc_task of the sillydelete | |
6212 | @@ -64,7 +87,7 @@ static void nfs_async_unlink_release(void *calldata) | |
6213 | struct dentry *dentry = data->dentry; | |
6214 | struct super_block *sb = dentry->d_sb; | |
6215 | ||
6216 | - up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
6217 | + nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
6218 | d_lookup_done(dentry); | |
6219 | nfs_free_unlinkdata(data); | |
6220 | dput(dentry); | |
6221 | @@ -117,10 +140,10 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
6222 | struct inode *dir = d_inode(dentry->d_parent); | |
6223 | struct dentry *alias; | |
6224 | ||
6225 | - down_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6226 | + nfs_down_anon(&NFS_I(dir)->rmdir_sem); | |
6227 | alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); | |
6228 | if (IS_ERR(alias)) { | |
6229 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6230 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
6231 | return 0; | |
6232 | } | |
6233 | if (!d_in_lookup(alias)) { | |
6234 | @@ -142,7 +165,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
6235 | ret = 0; | |
6236 | spin_unlock(&alias->d_lock); | |
6237 | dput(alias); | |
6238 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6239 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
6240 | /* | |
6241 | * If we'd displaced old cached devname, free it. At that | |
6242 | * point dentry is definitely not a root, so we won't need | |
6243 | @@ -182,7 +205,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) | |
6244 | goto out_free_name; | |
6245 | } | |
6246 | data->res.dir_attr = &data->dir_attr; | |
6247 | - init_waitqueue_head(&data->wq); | |
6248 | + init_swait_queue_head(&data->wq); | |
6249 | ||
6250 | status = -EBUSY; | |
6251 | spin_lock(&dentry->d_lock); | |
6252 | diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c | |
6253 | index fe251f187ff8..e89da4fb14c2 100644 | |
6254 | --- a/fs/ntfs/aops.c | |
6255 | +++ b/fs/ntfs/aops.c | |
6256 | @@ -92,13 +92,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6257 | ofs = 0; | |
6258 | if (file_ofs < init_size) | |
6259 | ofs = init_size - file_ofs; | |
6260 | - local_irq_save(flags); | |
6261 | + local_irq_save_nort(flags); | |
6262 | kaddr = kmap_atomic(page); | |
6263 | memset(kaddr + bh_offset(bh) + ofs, 0, | |
6264 | bh->b_size - ofs); | |
6265 | flush_dcache_page(page); | |
6266 | kunmap_atomic(kaddr); | |
6267 | - local_irq_restore(flags); | |
6268 | + local_irq_restore_nort(flags); | |
6269 | } | |
6270 | } else { | |
6271 | clear_buffer_uptodate(bh); | |
6272 | @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6273 | "0x%llx.", (unsigned long long)bh->b_blocknr); | |
6274 | } | |
6275 | first = page_buffers(page); | |
6276 | - local_irq_save(flags); | |
6277 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
6278 | + flags = bh_uptodate_lock_irqsave(first); | |
6279 | clear_buffer_async_read(bh); | |
6280 | unlock_buffer(bh); | |
6281 | tmp = bh; | |
6282 | @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6283 | } | |
6284 | tmp = tmp->b_this_page; | |
6285 | } while (tmp != bh); | |
6286 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6287 | - local_irq_restore(flags); | |
6288 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6289 | /* | |
6290 | * If none of the buffers had errors then we can set the page uptodate, | |
6291 | * but we first have to perform the post read mst fixups, if the | |
6292 | @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6293 | recs = PAGE_SIZE / rec_size; | |
6294 | /* Should have been verified before we got here... */ | |
6295 | BUG_ON(!recs); | |
6296 | - local_irq_save(flags); | |
6297 | + local_irq_save_nort(flags); | |
6298 | kaddr = kmap_atomic(page); | |
6299 | for (i = 0; i < recs; i++) | |
6300 | post_read_mst_fixup((NTFS_RECORD*)(kaddr + | |
6301 | i * rec_size), rec_size); | |
6302 | kunmap_atomic(kaddr); | |
6303 | - local_irq_restore(flags); | |
6304 | + local_irq_restore_nort(flags); | |
6305 | flush_dcache_page(page); | |
6306 | if (likely(page_uptodate && !PageError(page))) | |
6307 | SetPageUptodate(page); | |
6308 | @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6309 | unlock_page(page); | |
6310 | return; | |
6311 | still_busy: | |
6312 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6313 | - local_irq_restore(flags); | |
6314 | - return; | |
6315 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6316 | } | |
6317 | ||
6318 | /** | |
6319 | diff --git a/fs/proc/base.c b/fs/proc/base.c | |
6320 | index ca651ac00660..41d9dc789285 100644 | |
6321 | --- a/fs/proc/base.c | |
6322 | +++ b/fs/proc/base.c | |
6323 | @@ -1834,7 +1834,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, | |
6324 | ||
6325 | child = d_hash_and_lookup(dir, &qname); | |
6326 | if (!child) { | |
6327 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6328 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6329 | child = d_alloc_parallel(dir, &qname, &wq); | |
6330 | if (IS_ERR(child)) | |
6331 | goto end_instantiate; | |
6332 | diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c | |
6333 | index d4e37acd4821..000cea46434a 100644 | |
6334 | --- a/fs/proc/proc_sysctl.c | |
6335 | +++ b/fs/proc/proc_sysctl.c | |
6336 | @@ -632,7 +632,7 @@ static bool proc_sys_fill_cache(struct file *file, | |
6337 | ||
6338 | child = d_lookup(dir, &qname); | |
6339 | if (!child) { | |
6340 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6341 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6342 | child = d_alloc_parallel(dir, &qname, &wq); | |
6343 | if (IS_ERR(child)) | |
6344 | return false; | |
6345 | diff --git a/fs/timerfd.c b/fs/timerfd.c | |
6346 | index 9ae4abb4110b..8644b67c48fd 100644 | |
6347 | --- a/fs/timerfd.c | |
6348 | +++ b/fs/timerfd.c | |
6349 | @@ -460,7 +460,10 @@ static int do_timerfd_settime(int ufd, int flags, | |
6350 | break; | |
6351 | } | |
6352 | spin_unlock_irq(&ctx->wqh.lock); | |
6353 | - cpu_relax(); | |
6354 | + if (isalarm(ctx)) | |
6355 | + hrtimer_wait_for_timer(&ctx->t.alarm.timer); | |
6356 | + else | |
6357 | + hrtimer_wait_for_timer(&ctx->t.tmr); | |
6358 | } | |
6359 | ||
6360 | /* | |
6361 | diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h | |
6362 | index e861a24f06f2..b5c97d3059c7 100644 | |
6363 | --- a/include/acpi/platform/aclinux.h | |
6364 | +++ b/include/acpi/platform/aclinux.h | |
6365 | @@ -133,6 +133,7 @@ | |
6366 | ||
6367 | #define acpi_cache_t struct kmem_cache | |
6368 | #define acpi_spinlock spinlock_t * | |
6369 | +#define acpi_raw_spinlock raw_spinlock_t * | |
6370 | #define acpi_cpu_flags unsigned long | |
6371 | ||
6372 | /* Use native linux version of acpi_os_allocate_zeroed */ | |
6373 | @@ -151,6 +152,20 @@ | |
6374 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id | |
6375 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock | |
6376 | ||
6377 | +#define acpi_os_create_raw_lock(__handle) \ | |
6378 | +({ \ | |
6379 | + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ | |
6380 | + \ | |
6381 | + if (lock) { \ | |
6382 | + *(__handle) = lock; \ | |
6383 | + raw_spin_lock_init(*(__handle)); \ | |
6384 | + } \ | |
6385 | + lock ? AE_OK : AE_NO_MEMORY; \ | |
6386 | + }) | |
6387 | + | |
6388 | +#define acpi_os_delete_raw_lock(__handle) kfree(__handle) | |
6389 | + | |
6390 | + | |
6391 | /* | |
6392 | * OSL interfaces used by debugger/disassembler | |
6393 | */ | |
6394 | diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h | |
6395 | index 6f96247226a4..fa53a21263c2 100644 | |
6396 | --- a/include/asm-generic/bug.h | |
6397 | +++ b/include/asm-generic/bug.h | |
6398 | @@ -215,6 +215,20 @@ void __warn(const char *file, int line, void *caller, unsigned taint, | |
6399 | # define WARN_ON_SMP(x) ({0;}) | |
6400 | #endif | |
6401 | ||
6402 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6403 | +# define BUG_ON_RT(c) BUG_ON(c) | |
6404 | +# define BUG_ON_NONRT(c) do { } while (0) | |
6405 | +# define WARN_ON_RT(condition) WARN_ON(condition) | |
6406 | +# define WARN_ON_NONRT(condition) do { } while (0) | |
6407 | +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) | |
6408 | +#else | |
6409 | +# define BUG_ON_RT(c) do { } while (0) | |
6410 | +# define BUG_ON_NONRT(c) BUG_ON(c) | |
6411 | +# define WARN_ON_RT(condition) do { } while (0) | |
6412 | +# define WARN_ON_NONRT(condition) WARN_ON(condition) | |
6413 | +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) | |
6414 | +#endif | |
6415 | + | |
6416 | #endif /* __ASSEMBLY__ */ | |
6417 | ||
6418 | #endif | |
6419 | diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h | |
6420 | index 535ab2e13d2e..cfc246899473 100644 | |
6421 | --- a/include/linux/blk-mq.h | |
6422 | +++ b/include/linux/blk-mq.h | |
6423 | @@ -209,7 +209,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) | |
6424 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; | |
6425 | } | |
6426 | ||
6427 | - | |
6428 | +void __blk_mq_complete_request_remote_work(struct work_struct *work); | |
6429 | int blk_mq_request_started(struct request *rq); | |
6430 | void blk_mq_start_request(struct request *rq); | |
6431 | void blk_mq_end_request(struct request *rq, int error); | |
6432 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h | |
6433 | index f6a816129856..ec7a4676f8a8 100644 | |
6434 | --- a/include/linux/blkdev.h | |
6435 | +++ b/include/linux/blkdev.h | |
6436 | @@ -89,6 +89,7 @@ struct request { | |
6437 | struct list_head queuelist; | |
6438 | union { | |
6439 | struct call_single_data csd; | |
6440 | + struct work_struct work; | |
6441 | u64 fifo_time; | |
6442 | }; | |
6443 | ||
6444 | @@ -467,7 +468,7 @@ struct request_queue { | |
6445 | struct throtl_data *td; | |
6446 | #endif | |
6447 | struct rcu_head rcu_head; | |
6448 | - wait_queue_head_t mq_freeze_wq; | |
6449 | + struct swait_queue_head mq_freeze_wq; | |
6450 | struct percpu_ref q_usage_counter; | |
6451 | struct list_head all_q_node; | |
6452 | ||
6453 | diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h | |
6454 | index 8fdcb783197d..d07dbeec7bc1 100644 | |
6455 | --- a/include/linux/bottom_half.h | |
6456 | +++ b/include/linux/bottom_half.h | |
6457 | @@ -3,6 +3,39 @@ | |
6458 | ||
6459 | #include <linux/preempt.h> | |
6460 | ||
6461 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6462 | + | |
6463 | +extern void __local_bh_disable(void); | |
6464 | +extern void _local_bh_enable(void); | |
6465 | +extern void __local_bh_enable(void); | |
6466 | + | |
6467 | +static inline void local_bh_disable(void) | |
6468 | +{ | |
6469 | + __local_bh_disable(); | |
6470 | +} | |
6471 | + | |
6472 | +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |
6473 | +{ | |
6474 | + __local_bh_disable(); | |
6475 | +} | |
6476 | + | |
6477 | +static inline void local_bh_enable(void) | |
6478 | +{ | |
6479 | + __local_bh_enable(); | |
6480 | +} | |
6481 | + | |
6482 | +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) | |
6483 | +{ | |
6484 | + __local_bh_enable(); | |
6485 | +} | |
6486 | + | |
6487 | +static inline void local_bh_enable_ip(unsigned long ip) | |
6488 | +{ | |
6489 | + __local_bh_enable(); | |
6490 | +} | |
6491 | + | |
6492 | +#else | |
6493 | + | |
6494 | #ifdef CONFIG_TRACE_IRQFLAGS | |
6495 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); | |
6496 | #else | |
6497 | @@ -30,5 +63,6 @@ static inline void local_bh_enable(void) | |
6498 | { | |
6499 | __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); | |
6500 | } | |
6501 | +#endif | |
6502 | ||
6503 | #endif /* _LINUX_BH_H */ | |
6504 | diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h | |
6505 | index ebbacd14d450..be5e87f6360a 100644 | |
6506 | --- a/include/linux/buffer_head.h | |
6507 | +++ b/include/linux/buffer_head.h | |
6508 | @@ -75,8 +75,50 @@ struct buffer_head { | |
6509 | struct address_space *b_assoc_map; /* mapping this buffer is | |
6510 | associated with */ | |
6511 | atomic_t b_count; /* users using this buffer_head */ | |
6512 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6513 | + spinlock_t b_uptodate_lock; | |
6514 | +#if IS_ENABLED(CONFIG_JBD2) | |
6515 | + spinlock_t b_state_lock; | |
6516 | + spinlock_t b_journal_head_lock; | |
6517 | +#endif | |
6518 | +#endif | |
6519 | }; | |
6520 | ||
6521 | +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) | |
6522 | +{ | |
6523 | + unsigned long flags; | |
6524 | + | |
6525 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
6526 | + local_irq_save(flags); | |
6527 | + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); | |
6528 | +#else | |
6529 | + spin_lock_irqsave(&bh->b_uptodate_lock, flags); | |
6530 | +#endif | |
6531 | + return flags; | |
6532 | +} | |
6533 | + | |
6534 | +static inline void | |
6535 | +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) | |
6536 | +{ | |
6537 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
6538 | + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); | |
6539 | + local_irq_restore(flags); | |
6540 | +#else | |
6541 | + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); | |
6542 | +#endif | |
6543 | +} | |
6544 | + | |
6545 | +static inline void buffer_head_init_locks(struct buffer_head *bh) | |
6546 | +{ | |
6547 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6548 | + spin_lock_init(&bh->b_uptodate_lock); | |
6549 | +#if IS_ENABLED(CONFIG_JBD2) | |
6550 | + spin_lock_init(&bh->b_state_lock); | |
6551 | + spin_lock_init(&bh->b_journal_head_lock); | |
6552 | +#endif | |
6553 | +#endif | |
6554 | +} | |
6555 | + | |
6556 | /* | |
6557 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() | |
6558 | * and buffer_foo() functions. | |
6559 | diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h | |
6560 | index 5b17de62c962..56027cc01a56 100644 | |
6561 | --- a/include/linux/cgroup-defs.h | |
6562 | +++ b/include/linux/cgroup-defs.h | |
6563 | @@ -16,6 +16,7 @@ | |
6564 | #include <linux/percpu-refcount.h> | |
6565 | #include <linux/percpu-rwsem.h> | |
6566 | #include <linux/workqueue.h> | |
6567 | +#include <linux/swork.h> | |
6568 | ||
6569 | #ifdef CONFIG_CGROUPS | |
6570 | ||
6571 | @@ -137,6 +138,7 @@ struct cgroup_subsys_state { | |
6572 | /* percpu_ref killing and RCU release */ | |
6573 | struct rcu_head rcu_head; | |
6574 | struct work_struct destroy_work; | |
6575 | + struct swork_event destroy_swork; | |
6576 | }; | |
6577 | ||
6578 | /* | |
6579 | diff --git a/include/linux/completion.h b/include/linux/completion.h | |
6580 | index 5d5aaae3af43..3bca1590e29f 100644 | |
6581 | --- a/include/linux/completion.h | |
6582 | +++ b/include/linux/completion.h | |
6583 | @@ -7,8 +7,7 @@ | |
6584 | * Atomic wait-for-completion handler data structures. | |
6585 | * See kernel/sched/completion.c for details. | |
6586 | */ | |
6587 | - | |
6588 | -#include <linux/wait.h> | |
6589 | +#include <linux/swait.h> | |
6590 | ||
6591 | /* | |
6592 | * struct completion - structure used to maintain state for a "completion" | |
6593 | @@ -24,11 +23,11 @@ | |
6594 | */ | |
6595 | struct completion { | |
6596 | unsigned int done; | |
6597 | - wait_queue_head_t wait; | |
6598 | + struct swait_queue_head wait; | |
6599 | }; | |
6600 | ||
6601 | #define COMPLETION_INITIALIZER(work) \ | |
6602 | - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
6603 | + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
6604 | ||
6605 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | |
6606 | ({ init_completion(&work); work; }) | |
6607 | @@ -73,7 +72,7 @@ struct completion { | |
6608 | static inline void init_completion(struct completion *x) | |
6609 | { | |
6610 | x->done = 0; | |
6611 | - init_waitqueue_head(&x->wait); | |
6612 | + init_swait_queue_head(&x->wait); | |
6613 | } | |
6614 | ||
6615 | /** | |
6616 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h | |
6617 | index e571128ad99a..5e52d28c20c1 100644 | |
6618 | --- a/include/linux/cpu.h | |
6619 | +++ b/include/linux/cpu.h | |
6620 | @@ -182,6 +182,8 @@ extern void get_online_cpus(void); | |
6621 | extern void put_online_cpus(void); | |
6622 | extern void cpu_hotplug_disable(void); | |
6623 | extern void cpu_hotplug_enable(void); | |
6624 | +extern void pin_current_cpu(void); | |
6625 | +extern void unpin_current_cpu(void); | |
6626 | #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) | |
6627 | #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) | |
6628 | #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) | |
6629 | @@ -199,6 +201,8 @@ static inline void cpu_hotplug_done(void) {} | |
6630 | #define put_online_cpus() do { } while (0) | |
6631 | #define cpu_hotplug_disable() do { } while (0) | |
6632 | #define cpu_hotplug_enable() do { } while (0) | |
6633 | +static inline void pin_current_cpu(void) { } | |
6634 | +static inline void unpin_current_cpu(void) { } | |
6635 | #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
6636 | #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
6637 | /* These aren't inline functions due to a GCC bug. */ | |
6638 | diff --git a/include/linux/dcache.h b/include/linux/dcache.h | |
6639 | index 5beed7b30561..61cab7ef458e 100644 | |
6640 | --- a/include/linux/dcache.h | |
6641 | +++ b/include/linux/dcache.h | |
6642 | @@ -11,6 +11,7 @@ | |
6643 | #include <linux/rcupdate.h> | |
6644 | #include <linux/lockref.h> | |
6645 | #include <linux/stringhash.h> | |
6646 | +#include <linux/wait.h> | |
6647 | ||
6648 | struct path; | |
6649 | struct vfsmount; | |
6650 | @@ -100,7 +101,7 @@ struct dentry { | |
6651 | ||
6652 | union { | |
6653 | struct list_head d_lru; /* LRU list */ | |
6654 | - wait_queue_head_t *d_wait; /* in-lookup ones only */ | |
6655 | + struct swait_queue_head *d_wait; /* in-lookup ones only */ | |
6656 | }; | |
6657 | struct list_head d_child; /* child of parent list */ | |
6658 | struct list_head d_subdirs; /* our children */ | |
6659 | @@ -230,7 +231,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op | |
6660 | extern struct dentry * d_alloc(struct dentry *, const struct qstr *); | |
6661 | extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); | |
6662 | extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, | |
6663 | - wait_queue_head_t *); | |
6664 | + struct swait_queue_head *); | |
6665 | extern struct dentry * d_splice_alias(struct inode *, struct dentry *); | |
6666 | extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); | |
6667 | extern struct dentry * d_exact_alias(struct dentry *, struct inode *); | |
6668 | diff --git a/include/linux/delay.h b/include/linux/delay.h | |
6669 | index a6ecb34cf547..37caab306336 100644 | |
6670 | --- a/include/linux/delay.h | |
6671 | +++ b/include/linux/delay.h | |
6672 | @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds) | |
6673 | msleep(seconds * 1000); | |
6674 | } | |
6675 | ||
6676 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6677 | +extern void cpu_chill(void); | |
6678 | +#else | |
6679 | +# define cpu_chill() cpu_relax() | |
6680 | +#endif | |
6681 | + | |
6682 | #endif /* defined(_LINUX_DELAY_H) */ | |
6683 | diff --git a/include/linux/highmem.h b/include/linux/highmem.h | |
6684 | index bb3f3297062a..a117a33ef72c 100644 | |
6685 | --- a/include/linux/highmem.h | |
6686 | +++ b/include/linux/highmem.h | |
6687 | @@ -7,6 +7,7 @@ | |
6688 | #include <linux/mm.h> | |
6689 | #include <linux/uaccess.h> | |
6690 | #include <linux/hardirq.h> | |
6691 | +#include <linux/sched.h> | |
6692 | ||
6693 | #include <asm/cacheflush.h> | |
6694 | ||
6695 | @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page) | |
6696 | ||
6697 | static inline void *kmap_atomic(struct page *page) | |
6698 | { | |
6699 | - preempt_disable(); | |
6700 | + preempt_disable_nort(); | |
6701 | pagefault_disable(); | |
6702 | return page_address(page); | |
6703 | } | |
6704 | @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page) | |
6705 | static inline void __kunmap_atomic(void *addr) | |
6706 | { | |
6707 | pagefault_enable(); | |
6708 | - preempt_enable(); | |
6709 | + preempt_enable_nort(); | |
6710 | } | |
6711 | ||
6712 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) | |
6713 | @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr) | |
6714 | ||
6715 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
6716 | ||
6717 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6718 | DECLARE_PER_CPU(int, __kmap_atomic_idx); | |
6719 | +#endif | |
6720 | ||
6721 | static inline int kmap_atomic_idx_push(void) | |
6722 | { | |
6723 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6724 | int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; | |
6725 | ||
6726 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
6727 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6728 | WARN_ON_ONCE(in_irq() && !irqs_disabled()); | |
6729 | BUG_ON(idx >= KM_TYPE_NR); | |
6730 | -#endif | |
6731 | +# endif | |
6732 | return idx; | |
6733 | +#else | |
6734 | + current->kmap_idx++; | |
6735 | + BUG_ON(current->kmap_idx > KM_TYPE_NR); | |
6736 | + return current->kmap_idx - 1; | |
6737 | +#endif | |
6738 | } | |
6739 | ||
6740 | static inline int kmap_atomic_idx(void) | |
6741 | { | |
6742 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6743 | return __this_cpu_read(__kmap_atomic_idx) - 1; | |
6744 | +#else | |
6745 | + return current->kmap_idx - 1; | |
6746 | +#endif | |
6747 | } | |
6748 | ||
6749 | static inline void kmap_atomic_idx_pop(void) | |
6750 | { | |
6751 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
6752 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6753 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6754 | int idx = __this_cpu_dec_return(__kmap_atomic_idx); | |
6755 | ||
6756 | BUG_ON(idx < 0); | |
6757 | -#else | |
6758 | +# else | |
6759 | __this_cpu_dec(__kmap_atomic_idx); | |
6760 | +# endif | |
6761 | +#else | |
6762 | + current->kmap_idx--; | |
6763 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6764 | + BUG_ON(current->kmap_idx < 0); | |
6765 | +# endif | |
6766 | #endif | |
6767 | } | |
6768 | ||
6769 | diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h | |
6770 | index 5e00f80b1535..65d0671f20b4 100644 | |
6771 | --- a/include/linux/hrtimer.h | |
6772 | +++ b/include/linux/hrtimer.h | |
6773 | @@ -87,6 +87,9 @@ enum hrtimer_restart { | |
6774 | * @function: timer expiry callback function | |
6775 | * @base: pointer to the timer base (per cpu and per clock) | |
6776 | * @state: state information (See bit values above) | |
6777 | + * @cb_entry: list entry to defer timers from hardirq context | |
6778 | + * @irqsafe: timer can run in hardirq context | |
6779 | + * @praecox: timer expiry time if expired at the time of programming | |
6780 | * @is_rel: Set if the timer was armed relative | |
6781 | * @start_pid: timer statistics field to store the pid of the task which | |
6782 | * started the timer | |
6783 | @@ -103,6 +106,11 @@ struct hrtimer { | |
6784 | enum hrtimer_restart (*function)(struct hrtimer *); | |
6785 | struct hrtimer_clock_base *base; | |
6786 | u8 state; | |
6787 | + struct list_head cb_entry; | |
6788 | + int irqsafe; | |
6789 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
6790 | + ktime_t praecox; | |
6791 | +#endif | |
6792 | u8 is_rel; | |
6793 | #ifdef CONFIG_TIMER_STATS | |
6794 | int start_pid; | |
6795 | @@ -123,11 +131,7 @@ struct hrtimer_sleeper { | |
6796 | struct task_struct *task; | |
6797 | }; | |
6798 | ||
6799 | -#ifdef CONFIG_64BIT | |
6800 | # define HRTIMER_CLOCK_BASE_ALIGN 64 | |
6801 | -#else | |
6802 | -# define HRTIMER_CLOCK_BASE_ALIGN 32 | |
6803 | -#endif | |
6804 | ||
6805 | /** | |
6806 | * struct hrtimer_clock_base - the timer base for a specific clock | |
6807 | @@ -136,6 +140,7 @@ struct hrtimer_sleeper { | |
6808 | * timer to a base on another cpu. | |
6809 | * @clockid: clock id for per_cpu support | |
6810 | * @active: red black tree root node for the active timers | |
6811 | + * @expired: list head for deferred timers. | |
6812 | * @get_time: function to retrieve the current time of the clock | |
6813 | * @offset: offset of this clock to the monotonic base | |
6814 | */ | |
6815 | @@ -144,6 +149,7 @@ struct hrtimer_clock_base { | |
6816 | int index; | |
6817 | clockid_t clockid; | |
6818 | struct timerqueue_head active; | |
6819 | + struct list_head expired; | |
6820 | ktime_t (*get_time)(void); | |
6821 | ktime_t offset; | |
6822 | } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); | |
6823 | @@ -187,6 +193,7 @@ struct hrtimer_cpu_base { | |
6824 | raw_spinlock_t lock; | |
6825 | seqcount_t seq; | |
6826 | struct hrtimer *running; | |
6827 | + struct hrtimer *running_soft; | |
6828 | unsigned int cpu; | |
6829 | unsigned int active_bases; | |
6830 | unsigned int clock_was_set_seq; | |
6831 | @@ -203,6 +210,9 @@ struct hrtimer_cpu_base { | |
6832 | unsigned int nr_hangs; | |
6833 | unsigned int max_hang_time; | |
6834 | #endif | |
6835 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6836 | + wait_queue_head_t wait; | |
6837 | +#endif | |
6838 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | |
6839 | } ____cacheline_aligned; | |
6840 | ||
6841 | @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer) | |
6842 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | |
6843 | } | |
6844 | ||
6845 | +/* Softirq preemption could deadlock timer removal */ | |
6846 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6847 | + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); | |
6848 | +#else | |
6849 | +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) | |
6850 | +#endif | |
6851 | + | |
6852 | /* Query timers: */ | |
6853 | extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); | |
6854 | ||
6855 | @@ -436,7 +453,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) | |
6856 | * Helper function to check, whether the timer is running the callback | |
6857 | * function | |
6858 | */ | |
6859 | -static inline int hrtimer_callback_running(struct hrtimer *timer) | |
6860 | +static inline int hrtimer_callback_running(const struct hrtimer *timer) | |
6861 | { | |
6862 | return timer->base->cpu_base->running == timer; | |
6863 | } | |
6864 | diff --git a/include/linux/idr.h b/include/linux/idr.h | |
6865 | index 083d61e92706..5899796f50cb 100644 | |
6866 | --- a/include/linux/idr.h | |
6867 | +++ b/include/linux/idr.h | |
6868 | @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp); | |
6869 | * Each idr_preload() should be matched with an invocation of this | |
6870 | * function. See idr_preload() for details. | |
6871 | */ | |
6872 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6873 | +void idr_preload_end(void); | |
6874 | +#else | |
6875 | static inline void idr_preload_end(void) | |
6876 | { | |
6877 | preempt_enable(); | |
6878 | } | |
6879 | +#endif | |
6880 | ||
6881 | /** | |
6882 | * idr_find - return pointer for given id | |
6883 | diff --git a/include/linux/init_task.h b/include/linux/init_task.h | |
6884 | index 325f649d77ff..8af70bcc799b 100644 | |
6885 | --- a/include/linux/init_task.h | |
6886 | +++ b/include/linux/init_task.h | |
6887 | @@ -150,6 +150,12 @@ extern struct task_group root_task_group; | |
6888 | # define INIT_PERF_EVENTS(tsk) | |
6889 | #endif | |
6890 | ||
6891 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6892 | +# define INIT_TIMER_LIST .posix_timer_list = NULL, | |
6893 | +#else | |
6894 | +# define INIT_TIMER_LIST | |
6895 | +#endif | |
6896 | + | |
6897 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
6898 | # define INIT_VTIME(tsk) \ | |
6899 | .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ | |
6900 | @@ -250,6 +256,7 @@ extern struct task_group root_task_group; | |
6901 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ | |
6902 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ | |
6903 | .timer_slack_ns = 50000, /* 50 usec default slack */ \ | |
6904 | + INIT_TIMER_LIST \ | |
6905 | .pids = { \ | |
6906 | [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ | |
6907 | [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ | |
6908 | diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h | |
6909 | index 72f0721f75e7..480972ae47d3 100644 | |
6910 | --- a/include/linux/interrupt.h | |
6911 | +++ b/include/linux/interrupt.h | |
6912 | @@ -14,6 +14,7 @@ | |
6913 | #include <linux/hrtimer.h> | |
6914 | #include <linux/kref.h> | |
6915 | #include <linux/workqueue.h> | |
6916 | +#include <linux/swork.h> | |
6917 | ||
6918 | #include <linux/atomic.h> | |
6919 | #include <asm/ptrace.h> | |
6920 | @@ -61,6 +62,7 @@ | |
6921 | * interrupt handler after suspending interrupts. For system | |
6922 | * wakeup devices users need to implement wakeup detection in | |
6923 | * their interrupt handlers. | |
6924 | + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) | |
6925 | */ | |
6926 | #define IRQF_SHARED 0x00000080 | |
6927 | #define IRQF_PROBE_SHARED 0x00000100 | |
6928 | @@ -74,6 +76,7 @@ | |
6929 | #define IRQF_NO_THREAD 0x00010000 | |
6930 | #define IRQF_EARLY_RESUME 0x00020000 | |
6931 | #define IRQF_COND_SUSPEND 0x00040000 | |
6932 | +#define IRQF_NO_SOFTIRQ_CALL 0x00080000 | |
6933 | ||
6934 | #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) | |
6935 | ||
6936 | @@ -196,7 +199,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); | |
6937 | #ifdef CONFIG_LOCKDEP | |
6938 | # define local_irq_enable_in_hardirq() do { } while (0) | |
6939 | #else | |
6940 | -# define local_irq_enable_in_hardirq() local_irq_enable() | |
6941 | +# define local_irq_enable_in_hardirq() local_irq_enable_nort() | |
6942 | #endif | |
6943 | ||
6944 | extern void disable_irq_nosync(unsigned int irq); | |
6945 | @@ -216,6 +219,7 @@ extern void resume_device_irqs(void); | |
6946 | * struct irq_affinity_notify - context for notification of IRQ affinity changes | |
6947 | * @irq: Interrupt to which notification applies | |
6948 | * @kref: Reference count, for internal use | |
6949 | + * @swork: Swork item, for internal use | |
6950 | * @work: Work item, for internal use | |
6951 | * @notify: Function to be called on change. This will be | |
6952 | * called in process context. | |
6953 | @@ -227,7 +231,11 @@ extern void resume_device_irqs(void); | |
6954 | struct irq_affinity_notify { | |
6955 | unsigned int irq; | |
6956 | struct kref kref; | |
6957 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6958 | + struct swork_event swork; | |
6959 | +#else | |
6960 | struct work_struct work; | |
6961 | +#endif | |
6962 | void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); | |
6963 | void (*release)(struct kref *ref); | |
6964 | }; | |
6965 | @@ -406,9 +414,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
6966 | bool state); | |
6967 | ||
6968 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
6969 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
6970 | extern bool force_irqthreads; | |
6971 | +# else | |
6972 | +# define force_irqthreads (true) | |
6973 | +# endif | |
6974 | #else | |
6975 | -#define force_irqthreads (0) | |
6976 | +#define force_irqthreads (false) | |
6977 | #endif | |
6978 | ||
6979 | #ifndef __ARCH_SET_SOFTIRQ_PENDING | |
6980 | @@ -465,9 +477,10 @@ struct softirq_action | |
6981 | void (*action)(struct softirq_action *); | |
6982 | }; | |
6983 | ||
6984 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6985 | asmlinkage void do_softirq(void); | |
6986 | asmlinkage void __do_softirq(void); | |
6987 | - | |
6988 | +static inline void thread_do_softirq(void) { do_softirq(); } | |
6989 | #ifdef __ARCH_HAS_DO_SOFTIRQ | |
6990 | void do_softirq_own_stack(void); | |
6991 | #else | |
6992 | @@ -476,13 +489,25 @@ static inline void do_softirq_own_stack(void) | |
6993 | __do_softirq(); | |
6994 | } | |
6995 | #endif | |
6996 | +#else | |
6997 | +extern void thread_do_softirq(void); | |
6998 | +#endif | |
6999 | ||
7000 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | |
7001 | extern void softirq_init(void); | |
7002 | extern void __raise_softirq_irqoff(unsigned int nr); | |
7003 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7004 | +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); | |
7005 | +#else | |
7006 | +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
7007 | +{ | |
7008 | + __raise_softirq_irqoff(nr); | |
7009 | +} | |
7010 | +#endif | |
7011 | ||
7012 | extern void raise_softirq_irqoff(unsigned int nr); | |
7013 | extern void raise_softirq(unsigned int nr); | |
7014 | +extern void softirq_check_pending_idle(void); | |
7015 | ||
7016 | DECLARE_PER_CPU(struct task_struct *, ksoftirqd); | |
7017 | ||
7018 | @@ -504,8 +529,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) | |
7019 | to be executed on some cpu at least once after this. | |
7020 | * If the tasklet is already scheduled, but its execution is still not | |
7021 | started, it will be executed only once. | |
7022 | - * If this tasklet is already running on another CPU (or schedule is called | |
7023 | - from tasklet itself), it is rescheduled for later. | |
7024 | + * If this tasklet is already running on another CPU, it is rescheduled | |
7025 | + for later. | |
7026 | + * Schedule must not be called from the tasklet itself (a lockup occurs) | |
7027 | * Tasklet is strictly serialized wrt itself, but not | |
7028 | wrt another tasklets. If client needs some intertask synchronization, | |
7029 | he makes it with spinlocks. | |
7030 | @@ -530,27 +556,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } | |
7031 | enum | |
7032 | { | |
7033 | TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ | |
7034 | - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ | |
7035 | + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ | |
7036 | + TASKLET_STATE_PENDING /* Tasklet is pending */ | |
7037 | }; | |
7038 | ||
7039 | -#ifdef CONFIG_SMP | |
7040 | +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) | |
7041 | +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) | |
7042 | +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) | |
7043 | + | |
7044 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
7045 | static inline int tasklet_trylock(struct tasklet_struct *t) | |
7046 | { | |
7047 | return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); | |
7048 | } | |
7049 | ||
7050 | +static inline int tasklet_tryunlock(struct tasklet_struct *t) | |
7051 | +{ | |
7052 | + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; | |
7053 | +} | |
7054 | + | |
7055 | static inline void tasklet_unlock(struct tasklet_struct *t) | |
7056 | { | |
7057 | smp_mb__before_atomic(); | |
7058 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | |
7059 | } | |
7060 | ||
7061 | -static inline void tasklet_unlock_wait(struct tasklet_struct *t) | |
7062 | -{ | |
7063 | - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } | |
7064 | -} | |
7065 | +extern void tasklet_unlock_wait(struct tasklet_struct *t); | |
7066 | + | |
7067 | #else | |
7068 | #define tasklet_trylock(t) 1 | |
7069 | +#define tasklet_tryunlock(t) 1 | |
7070 | #define tasklet_unlock_wait(t) do { } while (0) | |
7071 | #define tasklet_unlock(t) do { } while (0) | |
7072 | #endif | |
7073 | @@ -599,12 +634,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) | |
7074 | smp_mb(); | |
7075 | } | |
7076 | ||
7077 | -static inline void tasklet_enable(struct tasklet_struct *t) | |
7078 | -{ | |
7079 | - smp_mb__before_atomic(); | |
7080 | - atomic_dec(&t->count); | |
7081 | -} | |
7082 | - | |
7083 | +extern void tasklet_enable(struct tasklet_struct *t); | |
7084 | extern void tasklet_kill(struct tasklet_struct *t); | |
7085 | extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); | |
7086 | extern void tasklet_init(struct tasklet_struct *t, | |
7087 | @@ -635,6 +665,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) | |
7088 | tasklet_kill(&ttimer->tasklet); | |
7089 | } | |
7090 | ||
7091 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7092 | +extern void softirq_early_init(void); | |
7093 | +#else | |
7094 | +static inline void softirq_early_init(void) { } | |
7095 | +#endif | |
7096 | + | |
7097 | /* | |
7098 | * Autoprobing for irqs: | |
7099 | * | |
7100 | diff --git a/include/linux/irq.h b/include/linux/irq.h | |
7101 | index e79875574b39..177cee0c3305 100644 | |
7102 | --- a/include/linux/irq.h | |
7103 | +++ b/include/linux/irq.h | |
7104 | @@ -72,6 +72,7 @@ enum irqchip_irq_state; | |
7105 | * IRQ_IS_POLLED - Always polled by another interrupt. Exclude | |
7106 | * it from the spurious interrupt detection | |
7107 | * mechanism and from core side polling. | |
7108 | + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) | |
7109 | * IRQ_DISABLE_UNLAZY - Disable lazy irq disable | |
7110 | */ | |
7111 | enum { | |
7112 | @@ -99,13 +100,14 @@ enum { | |
7113 | IRQ_PER_CPU_DEVID = (1 << 17), | |
7114 | IRQ_IS_POLLED = (1 << 18), | |
7115 | IRQ_DISABLE_UNLAZY = (1 << 19), | |
7116 | + IRQ_NO_SOFTIRQ_CALL = (1 << 20), | |
7117 | }; | |
7118 | ||
7119 | #define IRQF_MODIFY_MASK \ | |
7120 | (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ | |
7121 | IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ | |
7122 | IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ | |
7123 | - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) | |
7124 | + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) | |
7125 | ||
7126 | #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | |
7127 | ||
7128 | diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h | |
7129 | index 47b9ebd4a74f..2543aab05daa 100644 | |
7130 | --- a/include/linux/irq_work.h | |
7131 | +++ b/include/linux/irq_work.h | |
7132 | @@ -16,6 +16,7 @@ | |
7133 | #define IRQ_WORK_BUSY 2UL | |
7134 | #define IRQ_WORK_FLAGS 3UL | |
7135 | #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ | |
7136 | +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ | |
7137 | ||
7138 | struct irq_work { | |
7139 | unsigned long flags; | |
7140 | @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } | |
7141 | static inline void irq_work_run(void) { } | |
7142 | #endif | |
7143 | ||
7144 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
7145 | +void irq_work_tick_soft(void); | |
7146 | +#else | |
7147 | +static inline void irq_work_tick_soft(void) { } | |
7148 | +#endif | |
7149 | + | |
7150 | #endif /* _LINUX_IRQ_WORK_H */ | |
7151 | diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h | |
7152 | index c9be57931b58..eeeb540971ae 100644 | |
7153 | --- a/include/linux/irqdesc.h | |
7154 | +++ b/include/linux/irqdesc.h | |
7155 | @@ -66,6 +66,7 @@ struct irq_desc { | |
7156 | unsigned int irqs_unhandled; | |
7157 | atomic_t threads_handled; | |
7158 | int threads_handled_last; | |
7159 | + u64 random_ip; | |
7160 | raw_spinlock_t lock; | |
7161 | struct cpumask *percpu_enabled; | |
7162 | const struct cpumask *percpu_affinity; | |
7163 | diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h | |
7164 | index 5dd1272d1ab2..9b77034f7c5e 100644 | |
7165 | --- a/include/linux/irqflags.h | |
7166 | +++ b/include/linux/irqflags.h | |
7167 | @@ -25,8 +25,6 @@ | |
7168 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) | |
7169 | # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) | |
7170 | # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) | |
7171 | -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
7172 | -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
7173 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | |
7174 | #else | |
7175 | # define trace_hardirqs_on() do { } while (0) | |
7176 | @@ -39,9 +37,15 @@ | |
7177 | # define trace_softirqs_enabled(p) 0 | |
7178 | # define trace_hardirq_enter() do { } while (0) | |
7179 | # define trace_hardirq_exit() do { } while (0) | |
7180 | +# define INIT_TRACE_IRQFLAGS | |
7181 | +#endif | |
7182 | + | |
7183 | +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) | |
7184 | +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
7185 | +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
7186 | +#else | |
7187 | # define lockdep_softirq_enter() do { } while (0) | |
7188 | # define lockdep_softirq_exit() do { } while (0) | |
7189 | -# define INIT_TRACE_IRQFLAGS | |
7190 | #endif | |
7191 | ||
7192 | #if defined(CONFIG_IRQSOFF_TRACER) || \ | |
7193 | @@ -148,4 +152,23 @@ | |
7194 | ||
7195 | #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) | |
7196 | ||
7197 | +/* | |
7198 | + * local_irq* variants depending on RT/!RT | |
7199 | + */ | |
7200 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7201 | +# define local_irq_disable_nort() do { } while (0) | |
7202 | +# define local_irq_enable_nort() do { } while (0) | |
7203 | +# define local_irq_save_nort(flags) local_save_flags(flags) | |
7204 | +# define local_irq_restore_nort(flags) (void)(flags) | |
7205 | +# define local_irq_disable_rt() local_irq_disable() | |
7206 | +# define local_irq_enable_rt() local_irq_enable() | |
7207 | +#else | |
7208 | +# define local_irq_disable_nort() local_irq_disable() | |
7209 | +# define local_irq_enable_nort() local_irq_enable() | |
7210 | +# define local_irq_save_nort(flags) local_irq_save(flags) | |
7211 | +# define local_irq_restore_nort(flags) local_irq_restore(flags) | |
7212 | +# define local_irq_disable_rt() do { } while (0) | |
7213 | +# define local_irq_enable_rt() do { } while (0) | |
7214 | +#endif | |
7215 | + | |
7216 | #endif | |
7217 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h | |
7218 | index dfaa1f4dcb0c..d57dd06544a1 100644 | |
7219 | --- a/include/linux/jbd2.h | |
7220 | +++ b/include/linux/jbd2.h | |
7221 | @@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) | |
7222 | ||
7223 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | |
7224 | { | |
7225 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7226 | bit_spin_lock(BH_State, &bh->b_state); | |
7227 | +#else | |
7228 | + spin_lock(&bh->b_state_lock); | |
7229 | +#endif | |
7230 | } | |
7231 | ||
7232 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | |
7233 | { | |
7234 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7235 | return bit_spin_trylock(BH_State, &bh->b_state); | |
7236 | +#else | |
7237 | + return spin_trylock(&bh->b_state_lock); | |
7238 | +#endif | |
7239 | } | |
7240 | ||
7241 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | |
7242 | { | |
7243 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7244 | return bit_spin_is_locked(BH_State, &bh->b_state); | |
7245 | +#else | |
7246 | + return spin_is_locked(&bh->b_state_lock); | |
7247 | +#endif | |
7248 | } | |
7249 | ||
7250 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | |
7251 | { | |
7252 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7253 | bit_spin_unlock(BH_State, &bh->b_state); | |
7254 | +#else | |
7255 | + spin_unlock(&bh->b_state_lock); | |
7256 | +#endif | |
7257 | } | |
7258 | ||
7259 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | |
7260 | { | |
7261 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7262 | bit_spin_lock(BH_JournalHead, &bh->b_state); | |
7263 | +#else | |
7264 | + spin_lock(&bh->b_journal_head_lock); | |
7265 | +#endif | |
7266 | } | |
7267 | ||
7268 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |
7269 | { | |
7270 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7271 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | |
7272 | +#else | |
7273 | + spin_unlock(&bh->b_journal_head_lock); | |
7274 | +#endif | |
7275 | } | |
7276 | ||
7277 | #define J_ASSERT(assert) BUG_ON(!(assert)) | |
7278 | diff --git a/include/linux/kdb.h b/include/linux/kdb.h | |
7279 | index 410decacff8f..0861bebfc188 100644 | |
7280 | --- a/include/linux/kdb.h | |
7281 | +++ b/include/linux/kdb.h | |
7282 | @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, | |
7283 | extern __printf(1, 2) int kdb_printf(const char *, ...); | |
7284 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); | |
7285 | ||
7286 | +#define in_kdb_printk() (kdb_trap_printk) | |
7287 | extern void kdb_init(int level); | |
7288 | ||
7289 | /* Access to kdb specific polling devices */ | |
7290 | @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *, | |
7291 | extern int kdb_unregister(char *); | |
7292 | #else /* ! CONFIG_KGDB_KDB */ | |
7293 | static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } | |
7294 | +#define in_kdb_printk() (0) | |
7295 | static inline void kdb_init(int level) {} | |
7296 | static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, | |
7297 | char *help, short minlen) { return 0; } | |
7298 | diff --git a/include/linux/kernel.h b/include/linux/kernel.h | |
7299 | index bc6ed52a39b9..7894d55e4998 100644 | |
7300 | --- a/include/linux/kernel.h | |
7301 | +++ b/include/linux/kernel.h | |
7302 | @@ -194,6 +194,9 @@ extern int _cond_resched(void); | |
7303 | */ | |
7304 | # define might_sleep() \ | |
7305 | do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
7306 | + | |
7307 | +# define might_sleep_no_state_check() \ | |
7308 | + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
7309 | # define sched_annotate_sleep() (current->task_state_change = 0) | |
7310 | #else | |
7311 | static inline void ___might_sleep(const char *file, int line, | |
7312 | @@ -201,6 +204,7 @@ extern int _cond_resched(void); | |
7313 | static inline void __might_sleep(const char *file, int line, | |
7314 | int preempt_offset) { } | |
7315 | # define might_sleep() do { might_resched(); } while (0) | |
7316 | +# define might_sleep_no_state_check() do { might_resched(); } while (0) | |
7317 | # define sched_annotate_sleep() do { } while (0) | |
7318 | #endif | |
7319 | ||
7320 | @@ -488,6 +492,7 @@ extern enum system_states { | |
7321 | SYSTEM_HALT, | |
7322 | SYSTEM_POWER_OFF, | |
7323 | SYSTEM_RESTART, | |
7324 | + SYSTEM_SUSPEND, | |
7325 | } system_state; | |
7326 | ||
7327 | #define TAINT_PROPRIETARY_MODULE 0 | |
7328 | diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h | |
7329 | index cb483305e1f5..4e5062316bb6 100644 | |
7330 | --- a/include/linux/list_bl.h | |
7331 | +++ b/include/linux/list_bl.h | |
7332 | @@ -2,6 +2,7 @@ | |
7333 | #define _LINUX_LIST_BL_H | |
7334 | ||
7335 | #include <linux/list.h> | |
7336 | +#include <linux/spinlock.h> | |
7337 | #include <linux/bit_spinlock.h> | |
7338 | ||
7339 | /* | |
7340 | @@ -32,13 +33,24 @@ | |
7341 | ||
7342 | struct hlist_bl_head { | |
7343 | struct hlist_bl_node *first; | |
7344 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7345 | + raw_spinlock_t lock; | |
7346 | +#endif | |
7347 | }; | |
7348 | ||
7349 | struct hlist_bl_node { | |
7350 | struct hlist_bl_node *next, **pprev; | |
7351 | }; | |
7352 | -#define INIT_HLIST_BL_HEAD(ptr) \ | |
7353 | - ((ptr)->first = NULL) | |
7354 | + | |
7355 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7356 | +#define INIT_HLIST_BL_HEAD(h) \ | |
7357 | +do { \ | |
7358 | + (h)->first = NULL; \ | |
7359 | + raw_spin_lock_init(&(h)->lock); \ | |
7360 | +} while (0) | |
7361 | +#else | |
7362 | +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL | |
7363 | +#endif | |
7364 | ||
7365 | static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) | |
7366 | { | |
7367 | @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) | |
7368 | ||
7369 | static inline void hlist_bl_lock(struct hlist_bl_head *b) | |
7370 | { | |
7371 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7372 | bit_spin_lock(0, (unsigned long *)b); | |
7373 | +#else | |
7374 | + raw_spin_lock(&b->lock); | |
7375 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
7376 | + __set_bit(0, (unsigned long *)b); | |
7377 | +#endif | |
7378 | +#endif | |
7379 | } | |
7380 | ||
7381 | static inline void hlist_bl_unlock(struct hlist_bl_head *b) | |
7382 | { | |
7383 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7384 | __bit_spin_unlock(0, (unsigned long *)b); | |
7385 | +#else | |
7386 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
7387 | + __clear_bit(0, (unsigned long *)b); | |
7388 | +#endif | |
7389 | + raw_spin_unlock(&b->lock); | |
7390 | +#endif | |
7391 | } | |
7392 | ||
7393 | static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) | |
7394 | diff --git a/include/linux/locallock.h b/include/linux/locallock.h | |
7395 | new file mode 100644 | |
7396 | index 000000000000..845c77f1a5ca | |
7397 | --- /dev/null | |
7398 | +++ b/include/linux/locallock.h | |
7399 | @@ -0,0 +1,278 @@ | |
7400 | +#ifndef _LINUX_LOCALLOCK_H | |
7401 | +#define _LINUX_LOCALLOCK_H | |
7402 | + | |
7403 | +#include <linux/percpu.h> | |
7404 | +#include <linux/spinlock.h> | |
7405 | + | |
7406 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7407 | + | |
7408 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
7409 | +# define LL_WARN(cond) WARN_ON(cond) | |
7410 | +#else | |
7411 | +# define LL_WARN(cond) do { } while (0) | |
7412 | +#endif | |
7413 | + | |
7414 | +/* | |
7415 | + * per cpu lock based substitute for local_irq_*() | |
7416 | + */ | |
7417 | +struct local_irq_lock { | |
7418 | + spinlock_t lock; | |
7419 | + struct task_struct *owner; | |
7420 | + int nestcnt; | |
7421 | + unsigned long flags; | |
7422 | +}; | |
7423 | + | |
7424 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ | |
7425 | + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ | |
7426 | + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } | |
7427 | + | |
7428 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ | |
7429 | + DECLARE_PER_CPU(struct local_irq_lock, lvar) | |
7430 | + | |
7431 | +#define local_irq_lock_init(lvar) \ | |
7432 | + do { \ | |
7433 | + int __cpu; \ | |
7434 | + for_each_possible_cpu(__cpu) \ | |
7435 | + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ | |
7436 | + } while (0) | |
7437 | + | |
7438 | +/* | |
7439 | + * spin_lock|trylock|unlock_local flavour that does not migrate disable | |
7440 | + * used for __local_lock|trylock|unlock where get_local_var/put_local_var | |
7441 | + * already takes care of the migrate_disable/enable | |
7442 | + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. | |
7443 | + */ | |
7444 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7445 | +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock) | |
7446 | +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock) | |
7447 | +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock) | |
7448 | +#else | |
7449 | +# define spin_lock_local(lock) spin_lock(lock) | |
7450 | +# define spin_trylock_local(lock) spin_trylock(lock) | |
7451 | +# define spin_unlock_local(lock) spin_unlock(lock) | |
7452 | +#endif | |
7453 | + | |
7454 | +static inline void __local_lock(struct local_irq_lock *lv) | |
7455 | +{ | |
7456 | + if (lv->owner != current) { | |
7457 | + spin_lock_local(&lv->lock); | |
7458 | + LL_WARN(lv->owner); | |
7459 | + LL_WARN(lv->nestcnt); | |
7460 | + lv->owner = current; | |
7461 | + } | |
7462 | + lv->nestcnt++; | |
7463 | +} | |
7464 | + | |
7465 | +#define local_lock(lvar) \ | |
7466 | + do { __local_lock(&get_local_var(lvar)); } while (0) | |
7467 | + | |
7468 | +#define local_lock_on(lvar, cpu) \ | |
7469 | + do { __local_lock(&per_cpu(lvar, cpu)); } while (0) | |
7470 | + | |
7471 | +static inline int __local_trylock(struct local_irq_lock *lv) | |
7472 | +{ | |
7473 | + if (lv->owner != current && spin_trylock_local(&lv->lock)) { | |
7474 | + LL_WARN(lv->owner); | |
7475 | + LL_WARN(lv->nestcnt); | |
7476 | + lv->owner = current; | |
7477 | + lv->nestcnt = 1; | |
7478 | + return 1; | |
7479 | + } | |
7480 | + return 0; | |
7481 | +} | |
7482 | + | |
7483 | +#define local_trylock(lvar) \ | |
7484 | + ({ \ | |
7485 | + int __locked; \ | |
7486 | + __locked = __local_trylock(&get_local_var(lvar)); \ | |
7487 | + if (!__locked) \ | |
7488 | + put_local_var(lvar); \ | |
7489 | + __locked; \ | |
7490 | + }) | |
7491 | + | |
7492 | +static inline void __local_unlock(struct local_irq_lock *lv) | |
7493 | +{ | |
7494 | + LL_WARN(lv->nestcnt == 0); | |
7495 | + LL_WARN(lv->owner != current); | |
7496 | + if (--lv->nestcnt) | |
7497 | + return; | |
7498 | + | |
7499 | + lv->owner = NULL; | |
7500 | + spin_unlock_local(&lv->lock); | |
7501 | +} | |
7502 | + | |
7503 | +#define local_unlock(lvar) \ | |
7504 | + do { \ | |
7505 | + __local_unlock(this_cpu_ptr(&lvar)); \ | |
7506 | + put_local_var(lvar); \ | |
7507 | + } while (0) | |
7508 | + | |
7509 | +#define local_unlock_on(lvar, cpu) \ | |
7510 | + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) | |
7511 | + | |
7512 | +static inline void __local_lock_irq(struct local_irq_lock *lv) | |
7513 | +{ | |
7514 | + spin_lock_irqsave(&lv->lock, lv->flags); | |
7515 | + LL_WARN(lv->owner); | |
7516 | + LL_WARN(lv->nestcnt); | |
7517 | + lv->owner = current; | |
7518 | + lv->nestcnt = 1; | |
7519 | +} | |
7520 | + | |
7521 | +#define local_lock_irq(lvar) \ | |
7522 | + do { __local_lock_irq(&get_local_var(lvar)); } while (0) | |
7523 | + | |
7524 | +#define local_lock_irq_on(lvar, cpu) \ | |
7525 | + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) | |
7526 | + | |
7527 | +static inline void __local_unlock_irq(struct local_irq_lock *lv) | |
7528 | +{ | |
7529 | + LL_WARN(!lv->nestcnt); | |
7530 | + LL_WARN(lv->owner != current); | |
7531 | + lv->owner = NULL; | |
7532 | + lv->nestcnt = 0; | |
7533 | + spin_unlock_irq(&lv->lock); | |
7534 | +} | |
7535 | + | |
7536 | +#define local_unlock_irq(lvar) \ | |
7537 | + do { \ | |
7538 | + __local_unlock_irq(this_cpu_ptr(&lvar)); \ | |
7539 | + put_local_var(lvar); \ | |
7540 | + } while (0) | |
7541 | + | |
7542 | +#define local_unlock_irq_on(lvar, cpu) \ | |
7543 | + do { \ | |
7544 | + __local_unlock_irq(&per_cpu(lvar, cpu)); \ | |
7545 | + } while (0) | |
7546 | + | |
7547 | +static inline int __local_lock_irqsave(struct local_irq_lock *lv) | |
7548 | +{ | |
7549 | + if (lv->owner != current) { | |
7550 | + __local_lock_irq(lv); | |
7551 | + return 0; | |
7552 | + } else { | |
7553 | + lv->nestcnt++; | |
7554 | + return 1; | |
7555 | + } | |
7556 | +} | |
7557 | + | |
7558 | +#define local_lock_irqsave(lvar, _flags) \ | |
7559 | + do { \ | |
7560 | + if (__local_lock_irqsave(&get_local_var(lvar))) \ | |
7561 | + put_local_var(lvar); \ | |
7562 | + _flags = __this_cpu_read(lvar.flags); \ | |
7563 | + } while (0) | |
7564 | + | |
7565 | +#define local_lock_irqsave_on(lvar, _flags, cpu) \ | |
7566 | + do { \ | |
7567 | + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ | |
7568 | + _flags = per_cpu(lvar, cpu).flags; \ | |
7569 | + } while (0) | |
7570 | + | |
7571 | +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, | |
7572 | + unsigned long flags) | |
7573 | +{ | |
7574 | + LL_WARN(!lv->nestcnt); | |
7575 | + LL_WARN(lv->owner != current); | |
7576 | + if (--lv->nestcnt) | |
7577 | + return 0; | |
7578 | + | |
7579 | + lv->owner = NULL; | |
7580 | + spin_unlock_irqrestore(&lv->lock, lv->flags); | |
7581 | + return 1; | |
7582 | +} | |
7583 | + | |
7584 | +#define local_unlock_irqrestore(lvar, flags) \ | |
7585 | + do { \ | |
7586 | + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \ | |
7587 | + put_local_var(lvar); \ | |
7588 | + } while (0) | |
7589 | + | |
7590 | +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ | |
7591 | + do { \ | |
7592 | + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ | |
7593 | + } while (0) | |
7594 | + | |
7595 | +#define local_spin_trylock_irq(lvar, lock) \ | |
7596 | + ({ \ | |
7597 | + int __locked; \ | |
7598 | + local_lock_irq(lvar); \ | |
7599 | + __locked = spin_trylock(lock); \ | |
7600 | + if (!__locked) \ | |
7601 | + local_unlock_irq(lvar); \ | |
7602 | + __locked; \ | |
7603 | + }) | |
7604 | + | |
7605 | +#define local_spin_lock_irq(lvar, lock) \ | |
7606 | + do { \ | |
7607 | + local_lock_irq(lvar); \ | |
7608 | + spin_lock(lock); \ | |
7609 | + } while (0) | |
7610 | + | |
7611 | +#define local_spin_unlock_irq(lvar, lock) \ | |
7612 | + do { \ | |
7613 | + spin_unlock(lock); \ | |
7614 | + local_unlock_irq(lvar); \ | |
7615 | + } while (0) | |
7616 | + | |
7617 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
7618 | + do { \ | |
7619 | + local_lock_irqsave(lvar, flags); \ | |
7620 | + spin_lock(lock); \ | |
7621 | + } while (0) | |
7622 | + | |
7623 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
7624 | + do { \ | |
7625 | + spin_unlock(lock); \ | |
7626 | + local_unlock_irqrestore(lvar, flags); \ | |
7627 | + } while (0) | |
7628 | + | |
7629 | +#define get_locked_var(lvar, var) \ | |
7630 | + (*({ \ | |
7631 | + local_lock(lvar); \ | |
7632 | + this_cpu_ptr(&var); \ | |
7633 | + })) | |
7634 | + | |
7635 | +#define put_locked_var(lvar, var) local_unlock(lvar); | |
7636 | + | |
7637 | +#define local_lock_cpu(lvar) \ | |
7638 | + ({ \ | |
7639 | + local_lock(lvar); \ | |
7640 | + smp_processor_id(); \ | |
7641 | + }) | |
7642 | + | |
7643 | +#define local_unlock_cpu(lvar) local_unlock(lvar) | |
7644 | + | |
7645 | +#else /* PREEMPT_RT_BASE */ | |
7646 | + | |
7647 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar | |
7648 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar | |
7649 | + | |
7650 | +static inline void local_irq_lock_init(int lvar) { } | |
7651 | + | |
7652 | +#define local_lock(lvar) preempt_disable() | |
7653 | +#define local_unlock(lvar) preempt_enable() | |
7654 | +#define local_lock_irq(lvar) local_irq_disable() | |
7655 | +#define local_lock_irq_on(lvar, cpu) local_irq_disable() | |
7656 | +#define local_unlock_irq(lvar) local_irq_enable() | |
7657 | +#define local_unlock_irq_on(lvar, cpu) local_irq_enable() | |
7658 | +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) | |
7659 | +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) | |
7660 | + | |
7661 | +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) | |
7662 | +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) | |
7663 | +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) | |
7664 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
7665 | + spin_lock_irqsave(lock, flags) | |
7666 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
7667 | + spin_unlock_irqrestore(lock, flags) | |
7668 | + | |
7669 | +#define get_locked_var(lvar, var) get_cpu_var(var) | |
7670 | +#define put_locked_var(lvar, var) put_cpu_var(var) | |
7671 | + | |
7672 | +#define local_lock_cpu(lvar) get_cpu() | |
7673 | +#define local_unlock_cpu(lvar) put_cpu() | |
7674 | + | |
7675 | +#endif | |
7676 | + | |
7677 | +#endif | |
7678 | diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h | |
7679 | index 08d947fc4c59..705fb564a605 100644 | |
7680 | --- a/include/linux/mm_types.h | |
7681 | +++ b/include/linux/mm_types.h | |
7682 | @@ -11,6 +11,7 @@ | |
7683 | #include <linux/completion.h> | |
7684 | #include <linux/cpumask.h> | |
7685 | #include <linux/uprobes.h> | |
7686 | +#include <linux/rcupdate.h> | |
7687 | #include <linux/page-flags-layout.h> | |
7688 | #include <linux/workqueue.h> | |
7689 | #include <asm/page.h> | |
7690 | @@ -509,6 +510,9 @@ struct mm_struct { | |
7691 | bool tlb_flush_pending; | |
7692 | #endif | |
7693 | struct uprobes_state uprobes_state; | |
7694 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7695 | + struct rcu_head delayed_drop; | |
7696 | +#endif | |
7697 | #ifdef CONFIG_X86_INTEL_MPX | |
7698 | /* address of the bounds directory */ | |
7699 | void __user *bd_addr; | |
7700 | diff --git a/include/linux/mutex.h b/include/linux/mutex.h | |
7701 | index 2cb7531e7d7a..b3fdfc820216 100644 | |
7702 | --- a/include/linux/mutex.h | |
7703 | +++ b/include/linux/mutex.h | |
7704 | @@ -19,6 +19,17 @@ | |
7705 | #include <asm/processor.h> | |
7706 | #include <linux/osq_lock.h> | |
7707 | ||
7708 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7709 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
7710 | + , .dep_map = { .name = #lockname } | |
7711 | +#else | |
7712 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
7713 | +#endif | |
7714 | + | |
7715 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7716 | +# include <linux/mutex_rt.h> | |
7717 | +#else | |
7718 | + | |
7719 | /* | |
7720 | * Simple, straightforward mutexes with strict semantics: | |
7721 | * | |
7722 | @@ -99,13 +110,6 @@ do { \ | |
7723 | static inline void mutex_destroy(struct mutex *lock) {} | |
7724 | #endif | |
7725 | ||
7726 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7727 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
7728 | - , .dep_map = { .name = #lockname } | |
7729 | -#else | |
7730 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
7731 | -#endif | |
7732 | - | |
7733 | #define __MUTEX_INITIALIZER(lockname) \ | |
7734 | { .count = ATOMIC_INIT(1) \ | |
7735 | , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ | |
7736 | @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); | |
7737 | extern int mutex_trylock(struct mutex *lock); | |
7738 | extern void mutex_unlock(struct mutex *lock); | |
7739 | ||
7740 | +#endif /* !PREEMPT_RT_FULL */ | |
7741 | + | |
7742 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | |
7743 | ||
7744 | #endif /* __LINUX_MUTEX_H */ | |
7745 | diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h | |
7746 | new file mode 100644 | |
7747 | index 000000000000..c38a44b14da5 | |
7748 | --- /dev/null | |
7749 | +++ b/include/linux/mutex_rt.h | |
7750 | @@ -0,0 +1,84 @@ | |
7751 | +#ifndef __LINUX_MUTEX_RT_H | |
7752 | +#define __LINUX_MUTEX_RT_H | |
7753 | + | |
7754 | +#ifndef __LINUX_MUTEX_H | |
7755 | +#error "Please include mutex.h" | |
7756 | +#endif | |
7757 | + | |
7758 | +#include <linux/rtmutex.h> | |
7759 | + | |
7760 | +/* FIXME: Just for __lockfunc */ | |
7761 | +#include <linux/spinlock.h> | |
7762 | + | |
7763 | +struct mutex { | |
7764 | + struct rt_mutex lock; | |
7765 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7766 | + struct lockdep_map dep_map; | |
7767 | +#endif | |
7768 | +}; | |
7769 | + | |
7770 | +#define __MUTEX_INITIALIZER(mutexname) \ | |
7771 | + { \ | |
7772 | + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ | |
7773 | + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ | |
7774 | + } | |
7775 | + | |
7776 | +#define DEFINE_MUTEX(mutexname) \ | |
7777 | + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) | |
7778 | + | |
7779 | +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); | |
7780 | +extern void __lockfunc _mutex_lock(struct mutex *lock); | |
7781 | +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); | |
7782 | +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); | |
7783 | +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); | |
7784 | +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); | |
7785 | +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); | |
7786 | +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); | |
7787 | +extern int __lockfunc _mutex_trylock(struct mutex *lock); | |
7788 | +extern void __lockfunc _mutex_unlock(struct mutex *lock); | |
7789 | + | |
7790 | +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) | |
7791 | +#define mutex_lock(l) _mutex_lock(l) | |
7792 | +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) | |
7793 | +#define mutex_lock_killable(l) _mutex_lock_killable(l) | |
7794 | +#define mutex_trylock(l) _mutex_trylock(l) | |
7795 | +#define mutex_unlock(l) _mutex_unlock(l) | |
7796 | +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) | |
7797 | + | |
7798 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7799 | +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) | |
7800 | +# define mutex_lock_interruptible_nested(l, s) \ | |
7801 | + _mutex_lock_interruptible_nested(l, s) | |
7802 | +# define mutex_lock_killable_nested(l, s) \ | |
7803 | + _mutex_lock_killable_nested(l, s) | |
7804 | + | |
7805 | +# define mutex_lock_nest_lock(lock, nest_lock) \ | |
7806 | +do { \ | |
7807 | + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ | |
7808 | + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ | |
7809 | +} while (0) | |
7810 | + | |
7811 | +#else | |
7812 | +# define mutex_lock_nested(l, s) _mutex_lock(l) | |
7813 | +# define mutex_lock_interruptible_nested(l, s) \ | |
7814 | + _mutex_lock_interruptible(l) | |
7815 | +# define mutex_lock_killable_nested(l, s) \ | |
7816 | + _mutex_lock_killable(l) | |
7817 | +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) | |
7818 | +#endif | |
7819 | + | |
7820 | +# define mutex_init(mutex) \ | |
7821 | +do { \ | |
7822 | + static struct lock_class_key __key; \ | |
7823 | + \ | |
7824 | + rt_mutex_init(&(mutex)->lock); \ | |
7825 | + __mutex_do_init((mutex), #mutex, &__key); \ | |
7826 | +} while (0) | |
7827 | + | |
7828 | +# define __mutex_init(mutex, name, key) \ | |
7829 | +do { \ | |
7830 | + rt_mutex_init(&(mutex)->lock); \ | |
7831 | + __mutex_do_init((mutex), name, key); \ | |
7832 | +} while (0) | |
7833 | + | |
7834 | +#endif | |
7835 | diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h | |
7836 | index d83590ef74a1..0ae3b6cf430c 100644 | |
7837 | --- a/include/linux/netdevice.h | |
7838 | +++ b/include/linux/netdevice.h | |
7839 | @@ -396,7 +396,19 @@ typedef enum rx_handler_result rx_handler_result_t; | |
7840 | typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); | |
7841 | ||
7842 | void __napi_schedule(struct napi_struct *n); | |
7843 | + | |
7844 | +/* | |
7845 | + * When PREEMPT_RT_FULL is defined, all device interrupt handlers | |
7846 | + * run as threads, and they can also be preempted (without PREEMPT_RT | |
7847 | + * interrupt threads can not be preempted). Which means that calling | |
7848 | + * __napi_schedule_irqoff() from an interrupt handler can be preempted | |
7849 | + * and can corrupt the napi->poll_list. | |
7850 | + */ | |
7851 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7852 | +#define __napi_schedule_irqoff(n) __napi_schedule(n) | |
7853 | +#else | |
7854 | void __napi_schedule_irqoff(struct napi_struct *n); | |
7855 | +#endif | |
7856 | ||
7857 | static inline bool napi_disable_pending(struct napi_struct *n) | |
7858 | { | |
7859 | @@ -2461,14 +2473,53 @@ void netdev_freemem(struct net_device *dev); | |
7860 | void synchronize_net(void); | |
7861 | int init_dummy_netdev(struct net_device *dev); | |
7862 | ||
7863 | -DECLARE_PER_CPU(int, xmit_recursion); | |
7864 | #define XMIT_RECURSION_LIMIT 10 | |
7865 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7866 | +static inline int dev_recursion_level(void) | |
7867 | +{ | |
7868 | + return current->xmit_recursion; | |
7869 | +} | |
7870 | + | |
7871 | +static inline int xmit_rec_read(void) | |
7872 | +{ | |
7873 | + return current->xmit_recursion; | |
7874 | +} | |
7875 | + | |
7876 | +static inline void xmit_rec_inc(void) | |
7877 | +{ | |
7878 | + current->xmit_recursion++; | |
7879 | +} | |
7880 | + | |
7881 | +static inline void xmit_rec_dec(void) | |
7882 | +{ | |
7883 | + current->xmit_recursion--; | |
7884 | +} | |
7885 | + | |
7886 | +#else | |
7887 | + | |
7888 | +DECLARE_PER_CPU(int, xmit_recursion); | |
7889 | ||
7890 | static inline int dev_recursion_level(void) | |
7891 | { | |
7892 | return this_cpu_read(xmit_recursion); | |
7893 | } | |
7894 | ||
7895 | +static inline int xmit_rec_read(void) | |
7896 | +{ | |
7897 | + return __this_cpu_read(xmit_recursion); | |
7898 | +} | |
7899 | + | |
7900 | +static inline void xmit_rec_inc(void) | |
7901 | +{ | |
7902 | + __this_cpu_inc(xmit_recursion); | |
7903 | +} | |
7904 | + | |
7905 | +static inline void xmit_rec_dec(void) | |
7906 | +{ | |
7907 | + __this_cpu_dec(xmit_recursion); | |
7908 | +} | |
7909 | +#endif | |
7910 | + | |
7911 | struct net_device *dev_get_by_index(struct net *net, int ifindex); | |
7912 | struct net_device *__dev_get_by_index(struct net *net, int ifindex); | |
7913 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); | |
7914 | @@ -2851,6 +2902,7 @@ struct softnet_data { | |
7915 | unsigned int dropped; | |
7916 | struct sk_buff_head input_pkt_queue; | |
7917 | struct napi_struct backlog; | |
7918 | + struct sk_buff_head tofree_queue; | |
7919 | ||
7920 | }; | |
7921 | ||
7922 | diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h | |
7923 | index 2ad1a2b289b5..b4d10155af54 100644 | |
7924 | --- a/include/linux/netfilter/x_tables.h | |
7925 | +++ b/include/linux/netfilter/x_tables.h | |
7926 | @@ -4,6 +4,7 @@ | |
7927 | ||
7928 | #include <linux/netdevice.h> | |
7929 | #include <linux/static_key.h> | |
7930 | +#include <linux/locallock.h> | |
7931 | #include <uapi/linux/netfilter/x_tables.h> | |
7932 | ||
7933 | /* Test a struct->invflags and a boolean for inequality */ | |
7934 | @@ -300,6 +301,8 @@ void xt_free_table_info(struct xt_table_info *info); | |
7935 | */ | |
7936 | DECLARE_PER_CPU(seqcount_t, xt_recseq); | |
7937 | ||
7938 | +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); | |
7939 | + | |
7940 | /* xt_tee_enabled - true if x_tables needs to handle reentrancy | |
7941 | * | |
7942 | * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. | |
7943 | @@ -320,6 +323,9 @@ static inline unsigned int xt_write_recseq_begin(void) | |
7944 | { | |
7945 | unsigned int addend; | |
7946 | ||
7947 | + /* RT protection */ | |
7948 | + local_lock(xt_write_lock); | |
7949 | + | |
7950 | /* | |
7951 | * Low order bit of sequence is set if we already | |
7952 | * called xt_write_recseq_begin(). | |
7953 | @@ -350,6 +356,7 @@ static inline void xt_write_recseq_end(unsigned int addend) | |
7954 | /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ | |
7955 | smp_wmb(); | |
7956 | __this_cpu_add(xt_recseq.sequence, addend); | |
7957 | + local_unlock(xt_write_lock); | |
7958 | } | |
7959 | ||
7960 | /* | |
7961 | diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h | |
7962 | index 810124b33327..d54ca43d571f 100644 | |
7963 | --- a/include/linux/nfs_fs.h | |
7964 | +++ b/include/linux/nfs_fs.h | |
7965 | @@ -165,7 +165,11 @@ struct nfs_inode { | |
7966 | ||
7967 | /* Readers: in-flight sillydelete RPC calls */ | |
7968 | /* Writers: rmdir */ | |
7969 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7970 | + struct semaphore rmdir_sem; | |
7971 | +#else | |
7972 | struct rw_semaphore rmdir_sem; | |
7973 | +#endif | |
7974 | ||
7975 | #if IS_ENABLED(CONFIG_NFS_V4) | |
7976 | struct nfs4_cached_acl *nfs4_acl; | |
7977 | diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h | |
7978 | index beb1e10f446e..ebaf2e7bfe29 100644 | |
7979 | --- a/include/linux/nfs_xdr.h | |
7980 | +++ b/include/linux/nfs_xdr.h | |
7981 | @@ -1490,7 +1490,7 @@ struct nfs_unlinkdata { | |
7982 | struct nfs_removeargs args; | |
7983 | struct nfs_removeres res; | |
7984 | struct dentry *dentry; | |
7985 | - wait_queue_head_t wq; | |
7986 | + struct swait_queue_head wq; | |
7987 | struct rpc_cred *cred; | |
7988 | struct nfs_fattr dir_attr; | |
7989 | long timeout; | |
7990 | diff --git a/include/linux/notifier.h b/include/linux/notifier.h | |
7991 | index 4149868de4e6..babe5b9bcb91 100644 | |
7992 | --- a/include/linux/notifier.h | |
7993 | +++ b/include/linux/notifier.h | |
7994 | @@ -6,7 +6,7 @@ | |
7995 | * | |
7996 | * Alan Cox <Alan.Cox@linux.org> | |
7997 | */ | |
7998 | - | |
7999 | + | |
8000 | #ifndef _LINUX_NOTIFIER_H | |
8001 | #define _LINUX_NOTIFIER_H | |
8002 | #include <linux/errno.h> | |
8003 | @@ -42,9 +42,7 @@ | |
8004 | * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. | |
8005 | * As compensation, srcu_notifier_chain_unregister() is rather expensive. | |
8006 | * SRCU notifier chains should be used when the chain will be called very | |
8007 | - * often but notifier_blocks will seldom be removed. Also, SRCU notifier | |
8008 | - * chains are slightly more difficult to use because they require special | |
8009 | - * runtime initialization. | |
8010 | + * often but notifier_blocks will seldom be removed. | |
8011 | */ | |
8012 | ||
8013 | struct notifier_block; | |
8014 | @@ -90,7 +88,7 @@ struct srcu_notifier_head { | |
8015 | (name)->head = NULL; \ | |
8016 | } while (0) | |
8017 | ||
8018 | -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ | |
8019 | +/* srcu_notifier_heads must be cleaned up dynamically */ | |
8020 | extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
8021 | #define srcu_cleanup_notifier_head(name) \ | |
8022 | cleanup_srcu_struct(&(name)->srcu); | |
8023 | @@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
8024 | .head = NULL } | |
8025 | #define RAW_NOTIFIER_INIT(name) { \ | |
8026 | .head = NULL } | |
8027 | -/* srcu_notifier_heads cannot be initialized statically */ | |
8028 | + | |
8029 | +#define SRCU_NOTIFIER_INIT(name, pcpu) \ | |
8030 | + { \ | |
8031 | + .mutex = __MUTEX_INITIALIZER(name.mutex), \ | |
8032 | + .head = NULL, \ | |
8033 | + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ | |
8034 | + } | |
8035 | ||
8036 | #define ATOMIC_NOTIFIER_HEAD(name) \ | |
8037 | struct atomic_notifier_head name = \ | |
8038 | @@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
8039 | struct raw_notifier_head name = \ | |
8040 | RAW_NOTIFIER_INIT(name) | |
8041 | ||
8042 | +#define _SRCU_NOTIFIER_HEAD(name, mod) \ | |
8043 | + static DEFINE_PER_CPU(struct srcu_struct_array, \ | |
8044 | + name##_head_srcu_array); \ | |
8045 | + mod struct srcu_notifier_head name = \ | |
8046 | + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) | |
8047 | + | |
8048 | +#define SRCU_NOTIFIER_HEAD(name) \ | |
8049 | + _SRCU_NOTIFIER_HEAD(name, ) | |
8050 | + | |
8051 | +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ | |
8052 | + _SRCU_NOTIFIER_HEAD(name, static) | |
8053 | + | |
8054 | #ifdef __KERNEL__ | |
8055 | ||
8056 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, | |
8057 | @@ -184,12 +200,12 @@ static inline int notifier_to_errno(int ret) | |
8058 | ||
8059 | /* | |
8060 | * Declared notifiers so far. I can imagine quite a few more chains | |
8061 | - * over time (eg laptop power reset chains, reboot chain (to clean | |
8062 | + * over time (eg laptop power reset chains, reboot chain (to clean | |
8063 | * device units up), device [un]mount chain, module load/unload chain, | |
8064 | - * low memory chain, screenblank chain (for plug in modular screenblankers) | |
8065 | + * low memory chain, screenblank chain (for plug in modular screenblankers) | |
8066 | * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... | |
8067 | */ | |
8068 | - | |
8069 | + | |
8070 | /* CPU notfiers are defined in include/linux/cpu.h. */ | |
8071 | ||
8072 | /* netdevice notifiers are defined in include/linux/netdevice.h */ | |
8073 | diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h | |
8074 | index 5b2e6159b744..ea940f451606 100644 | |
8075 | --- a/include/linux/percpu-rwsem.h | |
8076 | +++ b/include/linux/percpu-rwsem.h | |
8077 | @@ -4,7 +4,7 @@ | |
8078 | #include <linux/atomic.h> | |
8079 | #include <linux/rwsem.h> | |
8080 | #include <linux/percpu.h> | |
8081 | -#include <linux/wait.h> | |
8082 | +#include <linux/swait.h> | |
8083 | #include <linux/rcu_sync.h> | |
8084 | #include <linux/lockdep.h> | |
8085 | ||
8086 | @@ -12,7 +12,7 @@ struct percpu_rw_semaphore { | |
8087 | struct rcu_sync rss; | |
8088 | unsigned int __percpu *read_count; | |
8089 | struct rw_semaphore rw_sem; | |
8090 | - wait_queue_head_t writer; | |
8091 | + struct swait_queue_head writer; | |
8092 | int readers_block; | |
8093 | }; | |
8094 | ||
8095 | @@ -22,13 +22,13 @@ static struct percpu_rw_semaphore name = { \ | |
8096 | .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ | |
8097 | .read_count = &__percpu_rwsem_rc_##name, \ | |
8098 | .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ | |
8099 | - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ | |
8100 | + .writer = __SWAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ | |
8101 | } | |
8102 | ||
8103 | extern int __percpu_down_read(struct percpu_rw_semaphore *, int); | |
8104 | extern void __percpu_up_read(struct percpu_rw_semaphore *); | |
8105 | ||
8106 | -static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) | |
8107 | +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) | |
8108 | { | |
8109 | might_sleep(); | |
8110 | ||
8111 | @@ -46,16 +46,10 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * | |
8112 | __this_cpu_inc(*sem->read_count); | |
8113 | if (unlikely(!rcu_sync_is_idle(&sem->rss))) | |
8114 | __percpu_down_read(sem, false); /* Unconditional memory barrier */ | |
8115 | - barrier(); | |
8116 | /* | |
8117 | - * The barrier() prevents the compiler from | |
8118 | + * The preempt_enable() prevents the compiler from | |
8119 | * bleeding the critical section out. | |
8120 | */ | |
8121 | -} | |
8122 | - | |
8123 | -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) | |
8124 | -{ | |
8125 | - percpu_down_read_preempt_disable(sem); | |
8126 | preempt_enable(); | |
8127 | } | |
8128 | ||
8129 | @@ -82,13 +76,9 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) | |
8130 | return ret; | |
8131 | } | |
8132 | ||
8133 | -static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) | |
8134 | +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) | |
8135 | { | |
8136 | - /* | |
8137 | - * The barrier() prevents the compiler from | |
8138 | - * bleeding the critical section out. | |
8139 | - */ | |
8140 | - barrier(); | |
8141 | + preempt_disable(); | |
8142 | /* | |
8143 | * Same as in percpu_down_read(). | |
8144 | */ | |
8145 | @@ -101,12 +91,6 @@ static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem | |
8146 | rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); | |
8147 | } | |
8148 | ||
8149 | -static inline void percpu_up_read(struct percpu_rw_semaphore *sem) | |
8150 | -{ | |
8151 | - preempt_disable(); | |
8152 | - percpu_up_read_preempt_enable(sem); | |
8153 | -} | |
8154 | - | |
8155 | extern void percpu_down_write(struct percpu_rw_semaphore *); | |
8156 | extern void percpu_up_write(struct percpu_rw_semaphore *); | |
8157 | ||
8158 | diff --git a/include/linux/percpu.h b/include/linux/percpu.h | |
8159 | index 56939d3f6e53..1c7e33fc83e4 100644 | |
8160 | --- a/include/linux/percpu.h | |
8161 | +++ b/include/linux/percpu.h | |
8162 | @@ -18,6 +18,35 @@ | |
8163 | #define PERCPU_MODULE_RESERVE 0 | |
8164 | #endif | |
8165 | ||
8166 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8167 | + | |
8168 | +#define get_local_var(var) (*({ \ | |
8169 | + migrate_disable(); \ | |
8170 | + this_cpu_ptr(&var); })) | |
8171 | + | |
8172 | +#define put_local_var(var) do { \ | |
8173 | + (void)&(var); \ | |
8174 | + migrate_enable(); \ | |
8175 | +} while (0) | |
8176 | + | |
8177 | +# define get_local_ptr(var) ({ \ | |
8178 | + migrate_disable(); \ | |
8179 | + this_cpu_ptr(var); }) | |
8180 | + | |
8181 | +# define put_local_ptr(var) do { \ | |
8182 | + (void)(var); \ | |
8183 | + migrate_enable(); \ | |
8184 | +} while (0) | |
8185 | + | |
8186 | +#else | |
8187 | + | |
8188 | +#define get_local_var(var) get_cpu_var(var) | |
8189 | +#define put_local_var(var) put_cpu_var(var) | |
8190 | +#define get_local_ptr(var) get_cpu_ptr(var) | |
8191 | +#define put_local_ptr(var) put_cpu_ptr(var) | |
8192 | + | |
8193 | +#endif | |
8194 | + | |
8195 | /* minimum unit size, also is the maximum supported allocation size */ | |
8196 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) | |
8197 | ||
8198 | diff --git a/include/linux/pid.h b/include/linux/pid.h | |
8199 | index 23705a53abba..2cc64b779f03 100644 | |
8200 | --- a/include/linux/pid.h | |
8201 | +++ b/include/linux/pid.h | |
8202 | @@ -2,6 +2,7 @@ | |
8203 | #define _LINUX_PID_H | |
8204 | ||
8205 | #include <linux/rcupdate.h> | |
8206 | +#include <linux/atomic.h> | |
8207 | ||
8208 | enum pid_type | |
8209 | { | |
8210 | diff --git a/include/linux/preempt.h b/include/linux/preempt.h | |
8211 | index 75e4e30677f1..1cfb1cb72354 100644 | |
8212 | --- a/include/linux/preempt.h | |
8213 | +++ b/include/linux/preempt.h | |
8214 | @@ -50,7 +50,11 @@ | |
8215 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | |
8216 | #define NMI_OFFSET (1UL << NMI_SHIFT) | |
8217 | ||
8218 | -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
8219 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8220 | +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
8221 | +#else | |
8222 | +# define SOFTIRQ_DISABLE_OFFSET (0) | |
8223 | +#endif | |
8224 | ||
8225 | /* We use the MSB mostly because its available */ | |
8226 | #define PREEMPT_NEED_RESCHED 0x80000000 | |
8227 | @@ -59,9 +63,15 @@ | |
8228 | #include <asm/preempt.h> | |
8229 | ||
8230 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | |
8231 | -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
8232 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | |
8233 | | NMI_MASK)) | |
8234 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8235 | +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
8236 | +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
8237 | +#else | |
8238 | +# define softirq_count() (0UL) | |
8239 | +extern int in_serving_softirq(void); | |
8240 | +#endif | |
8241 | ||
8242 | /* | |
8243 | * Are we doing bottom half or hardware interrupt processing? | |
8244 | @@ -72,7 +82,6 @@ | |
8245 | #define in_irq() (hardirq_count()) | |
8246 | #define in_softirq() (softirq_count()) | |
8247 | #define in_interrupt() (irq_count()) | |
8248 | -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
8249 | ||
8250 | /* | |
8251 | * Are we in NMI context? | |
8252 | @@ -91,7 +100,11 @@ | |
8253 | /* | |
8254 | * The preempt_count offset after spin_lock() | |
8255 | */ | |
8256 | +#if !defined(CONFIG_PREEMPT_RT_FULL) | |
8257 | #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET | |
8258 | +#else | |
8259 | +#define PREEMPT_LOCK_OFFSET 0 | |
8260 | +#endif | |
8261 | ||
8262 | /* | |
8263 | * The preempt_count offset needed for things like: | |
8264 | @@ -140,6 +153,20 @@ extern void preempt_count_sub(int val); | |
8265 | #define preempt_count_inc() preempt_count_add(1) | |
8266 | #define preempt_count_dec() preempt_count_sub(1) | |
8267 | ||
8268 | +#ifdef CONFIG_PREEMPT_LAZY | |
8269 | +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) | |
8270 | +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) | |
8271 | +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) | |
8272 | +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) | |
8273 | +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) | |
8274 | +#else | |
8275 | +#define add_preempt_lazy_count(val) do { } while (0) | |
8276 | +#define sub_preempt_lazy_count(val) do { } while (0) | |
8277 | +#define inc_preempt_lazy_count() do { } while (0) | |
8278 | +#define dec_preempt_lazy_count() do { } while (0) | |
8279 | +#define preempt_lazy_count() (0) | |
8280 | +#endif | |
8281 | + | |
8282 | #ifdef CONFIG_PREEMPT_COUNT | |
8283 | ||
8284 | #define preempt_disable() \ | |
8285 | @@ -148,13 +175,25 @@ do { \ | |
8286 | barrier(); \ | |
8287 | } while (0) | |
8288 | ||
8289 | +#define preempt_lazy_disable() \ | |
8290 | +do { \ | |
8291 | + inc_preempt_lazy_count(); \ | |
8292 | + barrier(); \ | |
8293 | +} while (0) | |
8294 | + | |
8295 | #define sched_preempt_enable_no_resched() \ | |
8296 | do { \ | |
8297 | barrier(); \ | |
8298 | preempt_count_dec(); \ | |
8299 | } while (0) | |
8300 | ||
8301 | -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
8302 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8303 | +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
8304 | +# define preempt_check_resched_rt() preempt_check_resched() | |
8305 | +#else | |
8306 | +# define preempt_enable_no_resched() preempt_enable() | |
8307 | +# define preempt_check_resched_rt() barrier(); | |
8308 | +#endif | |
8309 | ||
8310 | #define preemptible() (preempt_count() == 0 && !irqs_disabled()) | |
8311 | ||
8312 | @@ -179,6 +218,13 @@ do { \ | |
8313 | __preempt_schedule(); \ | |
8314 | } while (0) | |
8315 | ||
8316 | +#define preempt_lazy_enable() \ | |
8317 | +do { \ | |
8318 | + dec_preempt_lazy_count(); \ | |
8319 | + barrier(); \ | |
8320 | + preempt_check_resched(); \ | |
8321 | +} while (0) | |
8322 | + | |
8323 | #else /* !CONFIG_PREEMPT */ | |
8324 | #define preempt_enable() \ | |
8325 | do { \ | |
8326 | @@ -224,6 +270,7 @@ do { \ | |
8327 | #define preempt_disable_notrace() barrier() | |
8328 | #define preempt_enable_no_resched_notrace() barrier() | |
8329 | #define preempt_enable_notrace() barrier() | |
8330 | +#define preempt_check_resched_rt() barrier() | |
8331 | #define preemptible() 0 | |
8332 | ||
8333 | #endif /* CONFIG_PREEMPT_COUNT */ | |
8334 | @@ -244,10 +291,31 @@ do { \ | |
8335 | } while (0) | |
8336 | #define preempt_fold_need_resched() \ | |
8337 | do { \ | |
8338 | - if (tif_need_resched()) \ | |
8339 | + if (tif_need_resched_now()) \ | |
8340 | set_preempt_need_resched(); \ | |
8341 | } while (0) | |
8342 | ||
8343 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8344 | +# define preempt_disable_rt() preempt_disable() | |
8345 | +# define preempt_enable_rt() preempt_enable() | |
8346 | +# define preempt_disable_nort() barrier() | |
8347 | +# define preempt_enable_nort() barrier() | |
8348 | +# ifdef CONFIG_SMP | |
8349 | + extern void migrate_disable(void); | |
8350 | + extern void migrate_enable(void); | |
8351 | +# else /* CONFIG_SMP */ | |
8352 | +# define migrate_disable() barrier() | |
8353 | +# define migrate_enable() barrier() | |
8354 | +# endif /* CONFIG_SMP */ | |
8355 | +#else | |
8356 | +# define preempt_disable_rt() barrier() | |
8357 | +# define preempt_enable_rt() barrier() | |
8358 | +# define preempt_disable_nort() preempt_disable() | |
8359 | +# define preempt_enable_nort() preempt_enable() | |
8360 | +# define migrate_disable() preempt_disable() | |
8361 | +# define migrate_enable() preempt_enable() | |
8362 | +#endif | |
8363 | + | |
8364 | #ifdef CONFIG_PREEMPT_NOTIFIERS | |
8365 | ||
8366 | struct preempt_notifier; | |
8367 | diff --git a/include/linux/printk.h b/include/linux/printk.h | |
8368 | index eac1af8502bb..37e647af0b0b 100644 | |
8369 | --- a/include/linux/printk.h | |
8370 | +++ b/include/linux/printk.h | |
8371 | @@ -126,9 +126,11 @@ struct va_format { | |
8372 | #ifdef CONFIG_EARLY_PRINTK | |
8373 | extern asmlinkage __printf(1, 2) | |
8374 | void early_printk(const char *fmt, ...); | |
8375 | +extern void printk_kill(void); | |
8376 | #else | |
8377 | static inline __printf(1, 2) __cold | |
8378 | void early_printk(const char *s, ...) { } | |
8379 | +static inline void printk_kill(void) { } | |
8380 | #endif | |
8381 | ||
8382 | #ifdef CONFIG_PRINTK_NMI | |
8383 | diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h | |
8384 | index af3581b8a451..277295039c8f 100644 | |
8385 | --- a/include/linux/radix-tree.h | |
8386 | +++ b/include/linux/radix-tree.h | |
8387 | @@ -292,6 +292,8 @@ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, | |
8388 | int radix_tree_preload(gfp_t gfp_mask); | |
8389 | int radix_tree_maybe_preload(gfp_t gfp_mask); | |
8390 | int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); | |
8391 | +void radix_tree_preload_end(void); | |
8392 | + | |
8393 | void radix_tree_init(void); | |
8394 | void *radix_tree_tag_set(struct radix_tree_root *root, | |
8395 | unsigned long index, unsigned int tag); | |
8396 | @@ -314,11 +316,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | |
8397 | int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); | |
8398 | unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); | |
8399 | ||
8400 | -static inline void radix_tree_preload_end(void) | |
8401 | -{ | |
8402 | - preempt_enable(); | |
8403 | -} | |
8404 | - | |
8405 | /** | |
8406 | * struct radix_tree_iter - radix tree iterator state | |
8407 | * | |
8408 | diff --git a/include/linux/random.h b/include/linux/random.h | |
8409 | index 7bd2403e4fef..b2df7148a42b 100644 | |
8410 | --- a/include/linux/random.h | |
8411 | +++ b/include/linux/random.h | |
8412 | @@ -31,7 +31,7 @@ static inline void add_latent_entropy(void) {} | |
8413 | ||
8414 | extern void add_input_randomness(unsigned int type, unsigned int code, | |
8415 | unsigned int value) __latent_entropy; | |
8416 | -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; | |
8417 | +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy; | |
8418 | ||
8419 | extern void get_random_bytes(void *buf, int nbytes); | |
8420 | extern int add_random_ready_callback(struct random_ready_callback *rdy); | |
8421 | diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h | |
8422 | index e585018498d5..25c64474fc27 100644 | |
8423 | --- a/include/linux/rbtree.h | |
8424 | +++ b/include/linux/rbtree.h | |
8425 | @@ -31,7 +31,7 @@ | |
8426 | ||
8427 | #include <linux/kernel.h> | |
8428 | #include <linux/stddef.h> | |
8429 | -#include <linux/rcupdate.h> | |
8430 | +#include <linux/rcu_assign_pointer.h> | |
8431 | ||
8432 | struct rb_node { | |
8433 | unsigned long __rb_parent_color; | |
8434 | diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h | |
8435 | index d076183e49be..36bfb4dd57ae 100644 | |
8436 | --- a/include/linux/rbtree_augmented.h | |
8437 | +++ b/include/linux/rbtree_augmented.h | |
8438 | @@ -26,6 +26,7 @@ | |
8439 | ||
8440 | #include <linux/compiler.h> | |
8441 | #include <linux/rbtree.h> | |
8442 | +#include <linux/rcupdate.h> | |
8443 | ||
8444 | /* | |
8445 | * Please note - only struct rb_augment_callbacks and the prototypes for | |
8446 | diff --git a/include/linux/rcu_assign_pointer.h b/include/linux/rcu_assign_pointer.h | |
8447 | new file mode 100644 | |
8448 | index 000000000000..7066962a4379 | |
8449 | --- /dev/null | |
8450 | +++ b/include/linux/rcu_assign_pointer.h | |
8451 | @@ -0,0 +1,54 @@ | |
8452 | +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__ | |
8453 | +#define __LINUX_RCU_ASSIGN_POINTER_H__ | |
8454 | +#include <linux/compiler.h> | |
8455 | +#include <asm/barrier.h> | |
8456 | + | |
8457 | +/** | |
8458 | + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
8459 | + * @v: The value to statically initialize with. | |
8460 | + */ | |
8461 | +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
8462 | + | |
8463 | +/** | |
8464 | + * rcu_assign_pointer() - assign to RCU-protected pointer | |
8465 | + * @p: pointer to assign to | |
8466 | + * @v: value to assign (publish) | |
8467 | + * | |
8468 | + * Assigns the specified value to the specified RCU-protected | |
8469 | + * pointer, ensuring that any concurrent RCU readers will see | |
8470 | + * any prior initialization. | |
8471 | + * | |
8472 | + * Inserts memory barriers on architectures that require them | |
8473 | + * (which is most of them), and also prevents the compiler from | |
8474 | + * reordering the code that initializes the structure after the pointer | |
8475 | + * assignment. More importantly, this call documents which pointers | |
8476 | + * will be dereferenced by RCU read-side code. | |
8477 | + * | |
8478 | + * In some special cases, you may use RCU_INIT_POINTER() instead | |
8479 | + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
8480 | + * to the fact that it does not constrain either the CPU or the compiler. | |
8481 | + * That said, using RCU_INIT_POINTER() when you should have used | |
8482 | + * rcu_assign_pointer() is a very bad thing that results in | |
8483 | + * impossible-to-diagnose memory corruption. So please be careful. | |
8484 | + * See the RCU_INIT_POINTER() comment header for details. | |
8485 | + * | |
8486 | + * Note that rcu_assign_pointer() evaluates each of its arguments only | |
8487 | + * once, appearances notwithstanding. One of the "extra" evaluations | |
8488 | + * is in typeof() and the other visible only to sparse (__CHECKER__), | |
8489 | + * neither of which actually execute the argument. As with most cpp | |
8490 | + * macros, this execute-arguments-only-once property is important, so | |
8491 | + * please be careful when making changes to rcu_assign_pointer() and the | |
8492 | + * other macros that it invokes. | |
8493 | + */ | |
8494 | +#define rcu_assign_pointer(p, v) \ | |
8495 | +({ \ | |
8496 | + uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
8497 | + \ | |
8498 | + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
8499 | + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
8500 | + else \ | |
8501 | + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
8502 | + _r_a_p__v; \ | |
8503 | +}) | |
8504 | + | |
8505 | +#endif | |
8506 | diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h | |
8507 | index 01f71e1d2e94..30cc001d0d5a 100644 | |
8508 | --- a/include/linux/rcupdate.h | |
8509 | +++ b/include/linux/rcupdate.h | |
8510 | @@ -46,6 +46,7 @@ | |
8511 | #include <linux/compiler.h> | |
8512 | #include <linux/ktime.h> | |
8513 | #include <linux/irqflags.h> | |
8514 | +#include <linux/rcu_assign_pointer.h> | |
8515 | ||
8516 | #include <asm/barrier.h> | |
8517 | ||
8518 | @@ -178,6 +179,9 @@ void call_rcu(struct rcu_head *head, | |
8519 | ||
8520 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
8521 | ||
8522 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8523 | +#define call_rcu_bh call_rcu | |
8524 | +#else | |
8525 | /** | |
8526 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | |
8527 | * @head: structure to be used for queueing the RCU updates. | |
8528 | @@ -201,6 +205,7 @@ void call_rcu(struct rcu_head *head, | |
8529 | */ | |
8530 | void call_rcu_bh(struct rcu_head *head, | |
8531 | rcu_callback_t func); | |
8532 | +#endif | |
8533 | ||
8534 | /** | |
8535 | * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | |
8536 | @@ -301,6 +306,11 @@ void synchronize_rcu(void); | |
8537 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | |
8538 | */ | |
8539 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
8540 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8541 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
8542 | +#else | |
8543 | +static inline int sched_rcu_preempt_depth(void) { return 0; } | |
8544 | +#endif | |
8545 | ||
8546 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | |
8547 | ||
8548 | @@ -326,6 +336,8 @@ static inline int rcu_preempt_depth(void) | |
8549 | return 0; | |
8550 | } | |
8551 | ||
8552 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
8553 | + | |
8554 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
8555 | ||
8556 | /* Internal to kernel */ | |
8557 | @@ -505,7 +517,14 @@ extern struct lockdep_map rcu_callback_map; | |
8558 | int debug_lockdep_rcu_enabled(void); | |
8559 | ||
8560 | int rcu_read_lock_held(void); | |
8561 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8562 | +static inline int rcu_read_lock_bh_held(void) | |
8563 | +{ | |
8564 | + return rcu_read_lock_held(); | |
8565 | +} | |
8566 | +#else | |
8567 | int rcu_read_lock_bh_held(void); | |
8568 | +#endif | |
8569 | ||
8570 | /** | |
8571 | * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? | |
8572 | @@ -626,54 +645,6 @@ static inline void rcu_preempt_sleep_check(void) | |
8573 | }) | |
8574 | ||
8575 | /** | |
8576 | - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
8577 | - * @v: The value to statically initialize with. | |
8578 | - */ | |
8579 | -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
8580 | - | |
8581 | -/** | |
8582 | - * rcu_assign_pointer() - assign to RCU-protected pointer | |
8583 | - * @p: pointer to assign to | |
8584 | - * @v: value to assign (publish) | |
8585 | - * | |
8586 | - * Assigns the specified value to the specified RCU-protected | |
8587 | - * pointer, ensuring that any concurrent RCU readers will see | |
8588 | - * any prior initialization. | |
8589 | - * | |
8590 | - * Inserts memory barriers on architectures that require them | |
8591 | - * (which is most of them), and also prevents the compiler from | |
8592 | - * reordering the code that initializes the structure after the pointer | |
8593 | - * assignment. More importantly, this call documents which pointers | |
8594 | - * will be dereferenced by RCU read-side code. | |
8595 | - * | |
8596 | - * In some special cases, you may use RCU_INIT_POINTER() instead | |
8597 | - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
8598 | - * to the fact that it does not constrain either the CPU or the compiler. | |
8599 | - * That said, using RCU_INIT_POINTER() when you should have used | |
8600 | - * rcu_assign_pointer() is a very bad thing that results in | |
8601 | - * impossible-to-diagnose memory corruption. So please be careful. | |
8602 | - * See the RCU_INIT_POINTER() comment header for details. | |
8603 | - * | |
8604 | - * Note that rcu_assign_pointer() evaluates each of its arguments only | |
8605 | - * once, appearances notwithstanding. One of the "extra" evaluations | |
8606 | - * is in typeof() and the other visible only to sparse (__CHECKER__), | |
8607 | - * neither of which actually execute the argument. As with most cpp | |
8608 | - * macros, this execute-arguments-only-once property is important, so | |
8609 | - * please be careful when making changes to rcu_assign_pointer() and the | |
8610 | - * other macros that it invokes. | |
8611 | - */ | |
8612 | -#define rcu_assign_pointer(p, v) \ | |
8613 | -({ \ | |
8614 | - uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
8615 | - \ | |
8616 | - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
8617 | - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
8618 | - else \ | |
8619 | - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
8620 | - _r_a_p__v; \ | |
8621 | -}) | |
8622 | - | |
8623 | -/** | |
8624 | * rcu_access_pointer() - fetch RCU pointer with no dereferencing | |
8625 | * @p: The pointer to read | |
8626 | * | |
8627 | @@ -951,10 +922,14 @@ static inline void rcu_read_unlock(void) | |
8628 | static inline void rcu_read_lock_bh(void) | |
8629 | { | |
8630 | local_bh_disable(); | |
8631 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8632 | + rcu_read_lock(); | |
8633 | +#else | |
8634 | __acquire(RCU_BH); | |
8635 | rcu_lock_acquire(&rcu_bh_lock_map); | |
8636 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
8637 | "rcu_read_lock_bh() used illegally while idle"); | |
8638 | +#endif | |
8639 | } | |
8640 | ||
8641 | /* | |
8642 | @@ -964,10 +939,14 @@ static inline void rcu_read_lock_bh(void) | |
8643 | */ | |
8644 | static inline void rcu_read_unlock_bh(void) | |
8645 | { | |
8646 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8647 | + rcu_read_unlock(); | |
8648 | +#else | |
8649 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
8650 | "rcu_read_unlock_bh() used illegally while idle"); | |
8651 | rcu_lock_release(&rcu_bh_lock_map); | |
8652 | __release(RCU_BH); | |
8653 | +#endif | |
8654 | local_bh_enable(); | |
8655 | } | |
8656 | ||
8657 | diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h | |
8658 | index 63a4e4cf40a5..08ab12df2863 100644 | |
8659 | --- a/include/linux/rcutree.h | |
8660 | +++ b/include/linux/rcutree.h | |
8661 | @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu) | |
8662 | rcu_note_context_switch(); | |
8663 | } | |
8664 | ||
8665 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8666 | +# define synchronize_rcu_bh synchronize_rcu | |
8667 | +#else | |
8668 | void synchronize_rcu_bh(void); | |
8669 | +#endif | |
8670 | void synchronize_sched_expedited(void); | |
8671 | void synchronize_rcu_expedited(void); | |
8672 | ||
8673 | @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void) | |
8674 | } | |
8675 | ||
8676 | void rcu_barrier(void); | |
8677 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8678 | +# define rcu_barrier_bh rcu_barrier | |
8679 | +#else | |
8680 | void rcu_barrier_bh(void); | |
8681 | +#endif | |
8682 | void rcu_barrier_sched(void); | |
8683 | unsigned long get_state_synchronize_rcu(void); | |
8684 | void cond_synchronize_rcu(unsigned long oldstate); | |
8685 | @@ -82,17 +90,14 @@ void cond_synchronize_sched(unsigned long oldstate); | |
8686 | extern unsigned long rcutorture_testseq; | |
8687 | extern unsigned long rcutorture_vernum; | |
8688 | unsigned long rcu_batches_started(void); | |
8689 | -unsigned long rcu_batches_started_bh(void); | |
8690 | unsigned long rcu_batches_started_sched(void); | |
8691 | unsigned long rcu_batches_completed(void); | |
8692 | -unsigned long rcu_batches_completed_bh(void); | |
8693 | unsigned long rcu_batches_completed_sched(void); | |
8694 | unsigned long rcu_exp_batches_completed(void); | |
8695 | unsigned long rcu_exp_batches_completed_sched(void); | |
8696 | void show_rcu_gp_kthreads(void); | |
8697 | ||
8698 | void rcu_force_quiescent_state(void); | |
8699 | -void rcu_bh_force_quiescent_state(void); | |
8700 | void rcu_sched_force_quiescent_state(void); | |
8701 | ||
8702 | void rcu_idle_enter(void); | |
8703 | @@ -109,6 +114,16 @@ extern int rcu_scheduler_active __read_mostly; | |
8704 | ||
8705 | bool rcu_is_watching(void); | |
8706 | ||
8707 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8708 | +void rcu_bh_force_quiescent_state(void); | |
8709 | +unsigned long rcu_batches_started_bh(void); | |
8710 | +unsigned long rcu_batches_completed_bh(void); | |
8711 | +#else | |
8712 | +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state | |
8713 | +# define rcu_batches_completed_bh rcu_batches_completed | |
8714 | +# define rcu_batches_started_bh rcu_batches_completed | |
8715 | +#endif | |
8716 | + | |
8717 | void rcu_all_qs(void); | |
8718 | ||
8719 | /* RCUtree hotplug events */ | |
8720 | diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h | |
8721 | index 1abba5ce2a2f..30211c627511 100644 | |
8722 | --- a/include/linux/rtmutex.h | |
8723 | +++ b/include/linux/rtmutex.h | |
8724 | @@ -13,11 +13,15 @@ | |
8725 | #define __LINUX_RT_MUTEX_H | |
8726 | ||
8727 | #include <linux/linkage.h> | |
8728 | +#include <linux/spinlock_types_raw.h> | |
8729 | #include <linux/rbtree.h> | |
8730 | -#include <linux/spinlock_types.h> | |
8731 | ||
8732 | extern int max_lock_depth; /* for sysctl */ | |
8733 | ||
8734 | +#ifdef CONFIG_DEBUG_MUTEXES | |
8735 | +#include <linux/debug_locks.h> | |
8736 | +#endif | |
8737 | + | |
8738 | /** | |
8739 | * The rt_mutex structure | |
8740 | * | |
8741 | @@ -31,8 +35,8 @@ struct rt_mutex { | |
8742 | struct rb_root waiters; | |
8743 | struct rb_node *waiters_leftmost; | |
8744 | struct task_struct *owner; | |
8745 | -#ifdef CONFIG_DEBUG_RT_MUTEXES | |
8746 | int save_state; | |
8747 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
8748 | const char *name, *file; | |
8749 | int line; | |
8750 | void *magic; | |
8751 | @@ -55,22 +59,33 @@ struct hrtimer_sleeper; | |
8752 | # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) | |
8753 | #endif | |
8754 | ||
8755 | +# define rt_mutex_init(mutex) \ | |
8756 | + do { \ | |
8757 | + raw_spin_lock_init(&(mutex)->wait_lock); \ | |
8758 | + __rt_mutex_init(mutex, #mutex); \ | |
8759 | + } while (0) | |
8760 | + | |
8761 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
8762 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ | |
8763 | , .name = #mutexname, .file = __FILE__, .line = __LINE__ | |
8764 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) | |
8765 | extern void rt_mutex_debug_task_free(struct task_struct *tsk); | |
8766 | #else | |
8767 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
8768 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) | |
8769 | # define rt_mutex_debug_task_free(t) do { } while (0) | |
8770 | #endif | |
8771 | ||
8772 | -#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
8773 | - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
8774 | +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
8775 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
8776 | , .waiters = RB_ROOT \ | |
8777 | , .owner = NULL \ | |
8778 | - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} | |
8779 | + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
8780 | + | |
8781 | +#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
8782 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } | |
8783 | + | |
8784 | +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ | |
8785 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
8786 | + , .save_state = 1 } | |
8787 | ||
8788 | #define DEFINE_RT_MUTEX(mutexname) \ | |
8789 | struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) | |
8790 | @@ -91,6 +106,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock); | |
8791 | ||
8792 | extern void rt_mutex_lock(struct rt_mutex *lock); | |
8793 | extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); | |
8794 | +extern int rt_mutex_lock_killable(struct rt_mutex *lock); | |
8795 | extern int rt_mutex_timed_lock(struct rt_mutex *lock, | |
8796 | struct hrtimer_sleeper *timeout); | |
8797 | ||
8798 | diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h | |
8799 | new file mode 100644 | |
8800 | index 000000000000..49ed2d45d3be | |
8801 | --- /dev/null | |
8802 | +++ b/include/linux/rwlock_rt.h | |
8803 | @@ -0,0 +1,99 @@ | |
8804 | +#ifndef __LINUX_RWLOCK_RT_H | |
8805 | +#define __LINUX_RWLOCK_RT_H | |
8806 | + | |
8807 | +#ifndef __LINUX_SPINLOCK_H | |
8808 | +#error Do not include directly. Use spinlock.h | |
8809 | +#endif | |
8810 | + | |
8811 | +#define rwlock_init(rwl) \ | |
8812 | +do { \ | |
8813 | + static struct lock_class_key __key; \ | |
8814 | + \ | |
8815 | + rt_mutex_init(&(rwl)->lock); \ | |
8816 | + __rt_rwlock_init(rwl, #rwl, &__key); \ | |
8817 | +} while (0) | |
8818 | + | |
8819 | +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); | |
8820 | +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); | |
8821 | +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); | |
8822 | +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); | |
8823 | +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); | |
8824 | +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); | |
8825 | +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); | |
8826 | +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); | |
8827 | +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); | |
8828 | +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); | |
8829 | + | |
8830 | +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) | |
8831 | +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) | |
8832 | + | |
8833 | +#define write_trylock_irqsave(lock, flags) \ | |
8834 | + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) | |
8835 | + | |
8836 | +#define read_lock_irqsave(lock, flags) \ | |
8837 | + do { \ | |
8838 | + typecheck(unsigned long, flags); \ | |
8839 | + flags = rt_read_lock_irqsave(lock); \ | |
8840 | + } while (0) | |
8841 | + | |
8842 | +#define write_lock_irqsave(lock, flags) \ | |
8843 | + do { \ | |
8844 | + typecheck(unsigned long, flags); \ | |
8845 | + flags = rt_write_lock_irqsave(lock); \ | |
8846 | + } while (0) | |
8847 | + | |
8848 | +#define read_lock(lock) rt_read_lock(lock) | |
8849 | + | |
8850 | +#define read_lock_bh(lock) \ | |
8851 | + do { \ | |
8852 | + local_bh_disable(); \ | |
8853 | + rt_read_lock(lock); \ | |
8854 | + } while (0) | |
8855 | + | |
8856 | +#define read_lock_irq(lock) read_lock(lock) | |
8857 | + | |
8858 | +#define write_lock(lock) rt_write_lock(lock) | |
8859 | + | |
8860 | +#define write_lock_bh(lock) \ | |
8861 | + do { \ | |
8862 | + local_bh_disable(); \ | |
8863 | + rt_write_lock(lock); \ | |
8864 | + } while (0) | |
8865 | + | |
8866 | +#define write_lock_irq(lock) write_lock(lock) | |
8867 | + | |
8868 | +#define read_unlock(lock) rt_read_unlock(lock) | |
8869 | + | |
8870 | +#define read_unlock_bh(lock) \ | |
8871 | + do { \ | |
8872 | + rt_read_unlock(lock); \ | |
8873 | + local_bh_enable(); \ | |
8874 | + } while (0) | |
8875 | + | |
8876 | +#define read_unlock_irq(lock) read_unlock(lock) | |
8877 | + | |
8878 | +#define write_unlock(lock) rt_write_unlock(lock) | |
8879 | + | |
8880 | +#define write_unlock_bh(lock) \ | |
8881 | + do { \ | |
8882 | + rt_write_unlock(lock); \ | |
8883 | + local_bh_enable(); \ | |
8884 | + } while (0) | |
8885 | + | |
8886 | +#define write_unlock_irq(lock) write_unlock(lock) | |
8887 | + | |
8888 | +#define read_unlock_irqrestore(lock, flags) \ | |
8889 | + do { \ | |
8890 | + typecheck(unsigned long, flags); \ | |
8891 | + (void) flags; \ | |
8892 | + rt_read_unlock(lock); \ | |
8893 | + } while (0) | |
8894 | + | |
8895 | +#define write_unlock_irqrestore(lock, flags) \ | |
8896 | + do { \ | |
8897 | + typecheck(unsigned long, flags); \ | |
8898 | + (void) flags; \ | |
8899 | + rt_write_unlock(lock); \ | |
8900 | + } while (0) | |
8901 | + | |
8902 | +#endif | |
8903 | diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h | |
8904 | index cc0072e93e36..5317cd957292 100644 | |
8905 | --- a/include/linux/rwlock_types.h | |
8906 | +++ b/include/linux/rwlock_types.h | |
8907 | @@ -1,6 +1,10 @@ | |
8908 | #ifndef __LINUX_RWLOCK_TYPES_H | |
8909 | #define __LINUX_RWLOCK_TYPES_H | |
8910 | ||
8911 | +#if !defined(__LINUX_SPINLOCK_TYPES_H) | |
8912 | +# error "Do not include directly, include spinlock_types.h" | |
8913 | +#endif | |
8914 | + | |
8915 | /* | |
8916 | * include/linux/rwlock_types.h - generic rwlock type definitions | |
8917 | * and initializers | |
8918 | diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h | |
8919 | new file mode 100644 | |
8920 | index 000000000000..51b28d775fe1 | |
8921 | --- /dev/null | |
8922 | +++ b/include/linux/rwlock_types_rt.h | |
8923 | @@ -0,0 +1,33 @@ | |
8924 | +#ifndef __LINUX_RWLOCK_TYPES_RT_H | |
8925 | +#define __LINUX_RWLOCK_TYPES_RT_H | |
8926 | + | |
8927 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
8928 | +#error "Do not include directly. Include spinlock_types.h instead" | |
8929 | +#endif | |
8930 | + | |
8931 | +/* | |
8932 | + * rwlocks - rtmutex which allows single reader recursion | |
8933 | + */ | |
8934 | +typedef struct { | |
8935 | + struct rt_mutex lock; | |
8936 | + int read_depth; | |
8937 | + unsigned int break_lock; | |
8938 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8939 | + struct lockdep_map dep_map; | |
8940 | +#endif | |
8941 | +} rwlock_t; | |
8942 | + | |
8943 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8944 | +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
8945 | +#else | |
8946 | +# define RW_DEP_MAP_INIT(lockname) | |
8947 | +#endif | |
8948 | + | |
8949 | +#define __RW_LOCK_UNLOCKED(name) \ | |
8950 | + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ | |
8951 | + RW_DEP_MAP_INIT(name) } | |
8952 | + | |
8953 | +#define DEFINE_RWLOCK(name) \ | |
8954 | + rwlock_t name = __RW_LOCK_UNLOCKED(name) | |
8955 | + | |
8956 | +#endif | |
8957 | diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h | |
8958 | index dd1d14250340..8e1f44ff1f2f 100644 | |
8959 | --- a/include/linux/rwsem.h | |
8960 | +++ b/include/linux/rwsem.h | |
8961 | @@ -19,6 +19,10 @@ | |
8962 | #include <linux/osq_lock.h> | |
8963 | #endif | |
8964 | ||
8965 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8966 | +#include <linux/rwsem_rt.h> | |
8967 | +#else /* PREEMPT_RT_FULL */ | |
8968 | + | |
8969 | struct rw_semaphore; | |
8970 | ||
8971 | #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK | |
8972 | @@ -184,4 +188,6 @@ extern void up_read_non_owner(struct rw_semaphore *sem); | |
8973 | # define up_read_non_owner(sem) up_read(sem) | |
8974 | #endif | |
8975 | ||
8976 | +#endif /* !PREEMPT_RT_FULL */ | |
8977 | + | |
8978 | #endif /* _LINUX_RWSEM_H */ | |
8979 | diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h | |
8980 | new file mode 100644 | |
8981 | index 000000000000..e26bd95a57c3 | |
8982 | --- /dev/null | |
8983 | +++ b/include/linux/rwsem_rt.h | |
8984 | @@ -0,0 +1,167 @@ | |
8985 | +#ifndef _LINUX_RWSEM_RT_H | |
8986 | +#define _LINUX_RWSEM_RT_H | |
8987 | + | |
8988 | +#ifndef _LINUX_RWSEM_H | |
8989 | +#error "Include rwsem.h" | |
8990 | +#endif | |
8991 | + | |
8992 | +/* | |
8993 | + * RW-semaphores are a spinlock plus a reader-depth count. | |
8994 | + * | |
8995 | + * Note that the semantics are different from the usual | |
8996 | + * Linux rw-sems, in PREEMPT_RT mode we do not allow | |
8997 | + * multiple readers to hold the lock at once, we only allow | |
8998 | + * a read-lock owner to read-lock recursively. This is | |
8999 | + * better for latency, makes the implementation inherently | |
9000 | + * fair and makes it simpler as well. | |
9001 | + */ | |
9002 | + | |
9003 | +#include <linux/rtmutex.h> | |
9004 | + | |
9005 | +struct rw_semaphore { | |
9006 | + struct rt_mutex lock; | |
9007 | + int read_depth; | |
9008 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9009 | + struct lockdep_map dep_map; | |
9010 | +#endif | |
9011 | +}; | |
9012 | + | |
9013 | +#define __RWSEM_INITIALIZER(name) \ | |
9014 | + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ | |
9015 | + RW_DEP_MAP_INIT(name) } | |
9016 | + | |
9017 | +#define DECLARE_RWSEM(lockname) \ | |
9018 | + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) | |
9019 | + | |
9020 | +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
9021 | + struct lock_class_key *key); | |
9022 | + | |
9023 | +#define __rt_init_rwsem(sem, name, key) \ | |
9024 | + do { \ | |
9025 | + rt_mutex_init(&(sem)->lock); \ | |
9026 | + __rt_rwsem_init((sem), (name), (key));\ | |
9027 | + } while (0) | |
9028 | + | |
9029 | +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key) | |
9030 | + | |
9031 | +# define rt_init_rwsem(sem) \ | |
9032 | +do { \ | |
9033 | + static struct lock_class_key __key; \ | |
9034 | + \ | |
9035 | + __rt_init_rwsem((sem), #sem, &__key); \ | |
9036 | +} while (0) | |
9037 | + | |
9038 | +extern void rt_down_write(struct rw_semaphore *rwsem); | |
9039 | +extern int rt_down_write_killable(struct rw_semaphore *rwsem); | |
9040 | +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); | |
9041 | +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); | |
9042 | +extern int rt_down_write_killable_nested(struct rw_semaphore *rwsem, | |
9043 | + int subclass); | |
9044 | +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
9045 | + struct lockdep_map *nest); | |
9046 | +extern void rt__down_read(struct rw_semaphore *rwsem); | |
9047 | +extern void rt_down_read(struct rw_semaphore *rwsem); | |
9048 | +extern int rt_down_write_trylock(struct rw_semaphore *rwsem); | |
9049 | +extern int rt__down_read_trylock(struct rw_semaphore *rwsem); | |
9050 | +extern int rt_down_read_trylock(struct rw_semaphore *rwsem); | |
9051 | +extern void __rt_up_read(struct rw_semaphore *rwsem); | |
9052 | +extern void rt_up_read(struct rw_semaphore *rwsem); | |
9053 | +extern void rt_up_write(struct rw_semaphore *rwsem); | |
9054 | +extern void rt_downgrade_write(struct rw_semaphore *rwsem); | |
9055 | + | |
9056 | +#define init_rwsem(sem) rt_init_rwsem(sem) | |
9057 | +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) | |
9058 | + | |
9059 | +static inline int rwsem_is_contended(struct rw_semaphore *sem) | |
9060 | +{ | |
9061 | + /* rt_mutex_has_waiters() */ | |
9062 | + return !RB_EMPTY_ROOT(&sem->lock.waiters); | |
9063 | +} | |
9064 | + | |
9065 | +static inline void __down_read(struct rw_semaphore *sem) | |
9066 | +{ | |
9067 | + rt__down_read(sem); | |
9068 | +} | |
9069 | + | |
9070 | +static inline void down_read(struct rw_semaphore *sem) | |
9071 | +{ | |
9072 | + rt_down_read(sem); | |
9073 | +} | |
9074 | + | |
9075 | +static inline int __down_read_trylock(struct rw_semaphore *sem) | |
9076 | +{ | |
9077 | + return rt__down_read_trylock(sem); | |
9078 | +} | |
9079 | + | |
9080 | +static inline int down_read_trylock(struct rw_semaphore *sem) | |
9081 | +{ | |
9082 | + return rt_down_read_trylock(sem); | |
9083 | +} | |
9084 | + | |
9085 | +static inline void down_write(struct rw_semaphore *sem) | |
9086 | +{ | |
9087 | + rt_down_write(sem); | |
9088 | +} | |
9089 | + | |
9090 | +static inline int down_write_killable(struct rw_semaphore *sem) | |
9091 | +{ | |
9092 | + return rt_down_write_killable(sem); | |
9093 | +} | |
9094 | + | |
9095 | +static inline int down_write_trylock(struct rw_semaphore *sem) | |
9096 | +{ | |
9097 | + return rt_down_write_trylock(sem); | |
9098 | +} | |
9099 | + | |
9100 | +static inline void __up_read(struct rw_semaphore *sem) | |
9101 | +{ | |
9102 | + __rt_up_read(sem); | |
9103 | +} | |
9104 | + | |
9105 | +static inline void up_read(struct rw_semaphore *sem) | |
9106 | +{ | |
9107 | + rt_up_read(sem); | |
9108 | +} | |
9109 | + | |
9110 | +static inline void up_write(struct rw_semaphore *sem) | |
9111 | +{ | |
9112 | + rt_up_write(sem); | |
9113 | +} | |
9114 | + | |
9115 | +static inline void downgrade_write(struct rw_semaphore *sem) | |
9116 | +{ | |
9117 | + rt_downgrade_write(sem); | |
9118 | +} | |
9119 | + | |
9120 | +static inline void down_read_nested(struct rw_semaphore *sem, int subclass) | |
9121 | +{ | |
9122 | + return rt_down_read_nested(sem, subclass); | |
9123 | +} | |
9124 | + | |
9125 | +static inline void down_write_nested(struct rw_semaphore *sem, int subclass) | |
9126 | +{ | |
9127 | + rt_down_write_nested(sem, subclass); | |
9128 | +} | |
9129 | + | |
9130 | +static inline int down_write_killable_nested(struct rw_semaphore *sem, | |
9131 | + int subclass) | |
9132 | +{ | |
9133 | + return rt_down_write_killable_nested(sem, subclass); | |
9134 | +} | |
9135 | + | |
9136 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9137 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
9138 | + struct rw_semaphore *nest_lock) | |
9139 | +{ | |
9140 | + rt_down_write_nested_lock(sem, &nest_lock->dep_map); | |
9141 | +} | |
9142 | + | |
9143 | +#else | |
9144 | + | |
9145 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
9146 | + struct rw_semaphore *nest_lock) | |
9147 | +{ | |
9148 | + rt_down_write_nested_lock(sem, NULL); | |
9149 | +} | |
9150 | +#endif | |
9151 | +#endif | |
9152 | diff --git a/include/linux/sched.h b/include/linux/sched.h | |
9153 | index 75d9a57e212e..8cb7df0f56e3 100644 | |
9154 | --- a/include/linux/sched.h | |
9155 | +++ b/include/linux/sched.h | |
9156 | @@ -26,6 +26,7 @@ struct sched_param { | |
9157 | #include <linux/nodemask.h> | |
9158 | #include <linux/mm_types.h> | |
9159 | #include <linux/preempt.h> | |
9160 | +#include <asm/kmap_types.h> | |
9161 | ||
9162 | #include <asm/page.h> | |
9163 | #include <asm/ptrace.h> | |
9164 | @@ -243,10 +244,7 @@ extern char ___assert_task_state[1 - 2*!!( | |
9165 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ | |
9166 | __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) | |
9167 | ||
9168 | -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) | |
9169 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) | |
9170 | -#define task_is_stopped_or_traced(task) \ | |
9171 | - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | |
9172 | #define task_contributes_to_load(task) \ | |
9173 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | |
9174 | (task->flags & PF_FROZEN) == 0 && \ | |
9175 | @@ -312,6 +310,11 @@ extern char ___assert_task_state[1 - 2*!!( | |
9176 | ||
9177 | #endif | |
9178 | ||
9179 | +#define __set_current_state_no_track(state_value) \ | |
9180 | + do { current->state = (state_value); } while (0) | |
9181 | +#define set_current_state_no_track(state_value) \ | |
9182 | + set_mb(current->state, (state_value)) | |
9183 | + | |
9184 | /* Task command name length */ | |
9185 | #define TASK_COMM_LEN 16 | |
9186 | ||
9187 | @@ -1013,8 +1016,18 @@ struct wake_q_head { | |
9188 | struct wake_q_head name = { WAKE_Q_TAIL, &name.first } | |
9189 | ||
9190 | extern void wake_q_add(struct wake_q_head *head, | |
9191 | - struct task_struct *task); | |
9192 | -extern void wake_up_q(struct wake_q_head *head); | |
9193 | + struct task_struct *task); | |
9194 | +extern void __wake_up_q(struct wake_q_head *head, bool sleeper); | |
9195 | + | |
9196 | +static inline void wake_up_q(struct wake_q_head *head) | |
9197 | +{ | |
9198 | + __wake_up_q(head, false); | |
9199 | +} | |
9200 | + | |
9201 | +static inline void wake_up_q_sleeper(struct wake_q_head *head) | |
9202 | +{ | |
9203 | + __wake_up_q(head, true); | |
9204 | +} | |
9205 | ||
9206 | /* | |
9207 | * sched-domains (multiprocessor balancing) declarations: | |
9208 | @@ -1481,6 +1494,7 @@ struct task_struct { | |
9209 | struct thread_info thread_info; | |
9210 | #endif | |
9211 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | |
9212 | + volatile long saved_state; /* saved state for "spinlock sleepers" */ | |
9213 | void *stack; | |
9214 | atomic_t usage; | |
9215 | unsigned int flags; /* per process flags, defined below */ | |
9216 | @@ -1520,6 +1534,12 @@ struct task_struct { | |
9217 | #endif | |
9218 | ||
9219 | unsigned int policy; | |
9220 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9221 | + int migrate_disable; | |
9222 | +# ifdef CONFIG_SCHED_DEBUG | |
9223 | + int migrate_disable_atomic; | |
9224 | +# endif | |
9225 | +#endif | |
9226 | int nr_cpus_allowed; | |
9227 | cpumask_t cpus_allowed; | |
9228 | ||
9229 | @@ -1654,6 +1674,9 @@ struct task_struct { | |
9230 | ||
9231 | struct task_cputime cputime_expires; | |
9232 | struct list_head cpu_timers[3]; | |
9233 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9234 | + struct task_struct *posix_timer_list; | |
9235 | +#endif | |
9236 | ||
9237 | /* process credentials */ | |
9238 | const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ | |
9239 | @@ -1685,10 +1708,15 @@ struct task_struct { | |
9240 | /* signal handlers */ | |
9241 | struct signal_struct *signal; | |
9242 | struct sighand_struct *sighand; | |
9243 | + struct sigqueue *sigqueue_cache; | |
9244 | ||
9245 | sigset_t blocked, real_blocked; | |
9246 | sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ | |
9247 | struct sigpending pending; | |
9248 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9249 | + /* TODO: move me into ->restart_block ? */ | |
9250 | + struct siginfo forced_info; | |
9251 | +#endif | |
9252 | ||
9253 | unsigned long sas_ss_sp; | |
9254 | size_t sas_ss_size; | |
9255 | @@ -1917,6 +1945,12 @@ struct task_struct { | |
9256 | /* bitmask and counter of trace recursion */ | |
9257 | unsigned long trace_recursion; | |
9258 | #endif /* CONFIG_TRACING */ | |
9259 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
9260 | + u64 preempt_timestamp_hist; | |
9261 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
9262 | + long timer_offset; | |
9263 | +#endif | |
9264 | +#endif | |
9265 | #ifdef CONFIG_KCOV | |
9266 | /* Coverage collection mode enabled for this task (0 if disabled). */ | |
9267 | enum kcov_mode kcov_mode; | |
9268 | @@ -1942,9 +1976,23 @@ struct task_struct { | |
9269 | unsigned int sequential_io; | |
9270 | unsigned int sequential_io_avg; | |
9271 | #endif | |
9272 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9273 | + struct rcu_head put_rcu; | |
9274 | + int softirq_nestcnt; | |
9275 | + unsigned int softirqs_raised; | |
9276 | +#endif | |
9277 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9278 | +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 | |
9279 | + int kmap_idx; | |
9280 | + pte_t kmap_pte[KM_TYPE_NR]; | |
9281 | +# endif | |
9282 | +#endif | |
9283 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
9284 | unsigned long task_state_change; | |
9285 | #endif | |
9286 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9287 | + int xmit_recursion; | |
9288 | +#endif | |
9289 | int pagefault_disabled; | |
9290 | #ifdef CONFIG_MMU | |
9291 | struct task_struct *oom_reaper_list; | |
9292 | @@ -1984,14 +2032,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) | |
9293 | } | |
9294 | #endif | |
9295 | ||
9296 | -/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
9297 | -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | |
9298 | - | |
9299 | -static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
9300 | -{ | |
9301 | - return p->nr_cpus_allowed; | |
9302 | -} | |
9303 | - | |
9304 | #define TNF_MIGRATED 0x01 | |
9305 | #define TNF_NO_GROUP 0x02 | |
9306 | #define TNF_SHARED 0x04 | |
9307 | @@ -2207,6 +2247,15 @@ extern struct pid *cad_pid; | |
9308 | extern void free_task(struct task_struct *tsk); | |
9309 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) | |
9310 | ||
9311 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9312 | +extern void __put_task_struct_cb(struct rcu_head *rhp); | |
9313 | + | |
9314 | +static inline void put_task_struct(struct task_struct *t) | |
9315 | +{ | |
9316 | + if (atomic_dec_and_test(&t->usage)) | |
9317 | + call_rcu(&t->put_rcu, __put_task_struct_cb); | |
9318 | +} | |
9319 | +#else | |
9320 | extern void __put_task_struct(struct task_struct *t); | |
9321 | ||
9322 | static inline void put_task_struct(struct task_struct *t) | |
9323 | @@ -2214,6 +2263,7 @@ static inline void put_task_struct(struct task_struct *t) | |
9324 | if (atomic_dec_and_test(&t->usage)) | |
9325 | __put_task_struct(t); | |
9326 | } | |
9327 | +#endif | |
9328 | ||
9329 | struct task_struct *task_rcu_dereference(struct task_struct **ptask); | |
9330 | struct task_struct *try_get_task_struct(struct task_struct **ptask); | |
9331 | @@ -2255,6 +2305,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, | |
9332 | /* | |
9333 | * Per process flags | |
9334 | */ | |
9335 | +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ | |
9336 | #define PF_EXITING 0x00000004 /* getting shut down */ | |
9337 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | |
9338 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | |
9339 | @@ -2423,6 +2474,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, | |
9340 | ||
9341 | extern int set_cpus_allowed_ptr(struct task_struct *p, | |
9342 | const struct cpumask *new_mask); | |
9343 | +int migrate_me(void); | |
9344 | +void tell_sched_cpu_down_begin(int cpu); | |
9345 | +void tell_sched_cpu_down_done(int cpu); | |
9346 | + | |
9347 | #else | |
9348 | static inline void do_set_cpus_allowed(struct task_struct *p, | |
9349 | const struct cpumask *new_mask) | |
9350 | @@ -2435,6 +2490,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, | |
9351 | return -EINVAL; | |
9352 | return 0; | |
9353 | } | |
9354 | +static inline int migrate_me(void) { return 0; } | |
9355 | +static inline void tell_sched_cpu_down_begin(int cpu) { } | |
9356 | +static inline void tell_sched_cpu_down_done(int cpu) { } | |
9357 | #endif | |
9358 | ||
9359 | #ifdef CONFIG_NO_HZ_COMMON | |
9360 | @@ -2673,6 +2731,7 @@ extern void xtime_update(unsigned long ticks); | |
9361 | ||
9362 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); | |
9363 | extern int wake_up_process(struct task_struct *tsk); | |
9364 | +extern int wake_up_lock_sleeper(struct task_struct * tsk); | |
9365 | extern void wake_up_new_task(struct task_struct *tsk); | |
9366 | #ifdef CONFIG_SMP | |
9367 | extern void kick_process(struct task_struct *tsk); | |
9368 | @@ -2881,6 +2940,17 @@ static inline void mmdrop(struct mm_struct *mm) | |
9369 | __mmdrop(mm); | |
9370 | } | |
9371 | ||
9372 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9373 | +extern void __mmdrop_delayed(struct rcu_head *rhp); | |
9374 | +static inline void mmdrop_delayed(struct mm_struct *mm) | |
9375 | +{ | |
9376 | + if (atomic_dec_and_test(&mm->mm_count)) | |
9377 | + call_rcu(&mm->delayed_drop, __mmdrop_delayed); | |
9378 | +} | |
9379 | +#else | |
9380 | +# define mmdrop_delayed(mm) mmdrop(mm) | |
9381 | +#endif | |
9382 | + | |
9383 | static inline void mmdrop_async_fn(struct work_struct *work) | |
9384 | { | |
9385 | struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); | |
9386 | @@ -3273,6 +3343,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) | |
9387 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); | |
9388 | } | |
9389 | ||
9390 | +#ifdef CONFIG_PREEMPT_LAZY | |
9391 | +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) | |
9392 | +{ | |
9393 | + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
9394 | +} | |
9395 | + | |
9396 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) | |
9397 | +{ | |
9398 | + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
9399 | +} | |
9400 | + | |
9401 | +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) | |
9402 | +{ | |
9403 | + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); | |
9404 | +} | |
9405 | + | |
9406 | +static inline int need_resched_lazy(void) | |
9407 | +{ | |
9408 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
9409 | +} | |
9410 | + | |
9411 | +static inline int need_resched_now(void) | |
9412 | +{ | |
9413 | + return test_thread_flag(TIF_NEED_RESCHED); | |
9414 | +} | |
9415 | + | |
9416 | +#else | |
9417 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } | |
9418 | +static inline int need_resched_lazy(void) { return 0; } | |
9419 | + | |
9420 | +static inline int need_resched_now(void) | |
9421 | +{ | |
9422 | + return test_thread_flag(TIF_NEED_RESCHED); | |
9423 | +} | |
9424 | + | |
9425 | +#endif | |
9426 | + | |
9427 | static inline int restart_syscall(void) | |
9428 | { | |
9429 | set_tsk_thread_flag(current, TIF_SIGPENDING); | |
9430 | @@ -3304,6 +3411,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) | |
9431 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); | |
9432 | } | |
9433 | ||
9434 | +static inline bool __task_is_stopped_or_traced(struct task_struct *task) | |
9435 | +{ | |
9436 | + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) | |
9437 | + return true; | |
9438 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9439 | + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) | |
9440 | + return true; | |
9441 | +#endif | |
9442 | + return false; | |
9443 | +} | |
9444 | + | |
9445 | +static inline bool task_is_stopped_or_traced(struct task_struct *task) | |
9446 | +{ | |
9447 | + bool traced_stopped; | |
9448 | + | |
9449 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9450 | + unsigned long flags; | |
9451 | + | |
9452 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
9453 | + traced_stopped = __task_is_stopped_or_traced(task); | |
9454 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
9455 | +#else | |
9456 | + traced_stopped = __task_is_stopped_or_traced(task); | |
9457 | +#endif | |
9458 | + return traced_stopped; | |
9459 | +} | |
9460 | + | |
9461 | +static inline bool task_is_traced(struct task_struct *task) | |
9462 | +{ | |
9463 | + bool traced = false; | |
9464 | + | |
9465 | + if (task->state & __TASK_TRACED) | |
9466 | + return true; | |
9467 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9468 | + /* in case the task is sleeping on tasklist_lock */ | |
9469 | + raw_spin_lock_irq(&task->pi_lock); | |
9470 | + if (task->state & __TASK_TRACED) | |
9471 | + traced = true; | |
9472 | + else if (task->saved_state & __TASK_TRACED) | |
9473 | + traced = true; | |
9474 | + raw_spin_unlock_irq(&task->pi_lock); | |
9475 | +#endif | |
9476 | + return traced; | |
9477 | +} | |
9478 | + | |
9479 | /* | |
9480 | * cond_resched() and cond_resched_lock(): latency reduction via | |
9481 | * explicit rescheduling in places that are safe. The return | |
9482 | @@ -3329,12 +3481,16 @@ extern int __cond_resched_lock(spinlock_t *lock); | |
9483 | __cond_resched_lock(lock); \ | |
9484 | }) | |
9485 | ||
9486 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9487 | extern int __cond_resched_softirq(void); | |
9488 | ||
9489 | #define cond_resched_softirq() ({ \ | |
9490 | ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ | |
9491 | __cond_resched_softirq(); \ | |
9492 | }) | |
9493 | +#else | |
9494 | +# define cond_resched_softirq() cond_resched() | |
9495 | +#endif | |
9496 | ||
9497 | static inline void cond_resched_rcu(void) | |
9498 | { | |
9499 | @@ -3509,6 +3665,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) | |
9500 | ||
9501 | #endif /* CONFIG_SMP */ | |
9502 | ||
9503 | +static inline int __migrate_disabled(struct task_struct *p) | |
9504 | +{ | |
9505 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9506 | + return p->migrate_disable; | |
9507 | +#else | |
9508 | + return 0; | |
9509 | +#endif | |
9510 | +} | |
9511 | + | |
9512 | +/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
9513 | +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) | |
9514 | +{ | |
9515 | + if (__migrate_disabled(p)) | |
9516 | + return cpumask_of(task_cpu(p)); | |
9517 | + | |
9518 | + return &p->cpus_allowed; | |
9519 | +} | |
9520 | + | |
9521 | +static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
9522 | +{ | |
9523 | + if (__migrate_disabled(p)) | |
9524 | + return 1; | |
9525 | + return p->nr_cpus_allowed; | |
9526 | +} | |
9527 | + | |
9528 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); | |
9529 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); | |
9530 | ||
9531 | diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h | |
9532 | index ead97654c4e9..3d7223ffdd3b 100644 | |
9533 | --- a/include/linux/seqlock.h | |
9534 | +++ b/include/linux/seqlock.h | |
9535 | @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) | |
9536 | return __read_seqcount_retry(s, start); | |
9537 | } | |
9538 | ||
9539 | - | |
9540 | - | |
9541 | -static inline void raw_write_seqcount_begin(seqcount_t *s) | |
9542 | +static inline void __raw_write_seqcount_begin(seqcount_t *s) | |
9543 | { | |
9544 | s->sequence++; | |
9545 | smp_wmb(); | |
9546 | } | |
9547 | ||
9548 | -static inline void raw_write_seqcount_end(seqcount_t *s) | |
9549 | +static inline void raw_write_seqcount_begin(seqcount_t *s) | |
9550 | +{ | |
9551 | + preempt_disable_rt(); | |
9552 | + __raw_write_seqcount_begin(s); | |
9553 | +} | |
9554 | + | |
9555 | +static inline void __raw_write_seqcount_end(seqcount_t *s) | |
9556 | { | |
9557 | smp_wmb(); | |
9558 | s->sequence++; | |
9559 | } | |
9560 | ||
9561 | +static inline void raw_write_seqcount_end(seqcount_t *s) | |
9562 | +{ | |
9563 | + __raw_write_seqcount_end(s); | |
9564 | + preempt_enable_rt(); | |
9565 | +} | |
9566 | + | |
9567 | /** | |
9568 | * raw_write_seqcount_barrier - do a seq write barrier | |
9569 | * @s: pointer to seqcount_t | |
9570 | @@ -428,10 +438,32 @@ typedef struct { | |
9571 | /* | |
9572 | * Read side functions for starting and finalizing a read side section. | |
9573 | */ | |
9574 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9575 | static inline unsigned read_seqbegin(const seqlock_t *sl) | |
9576 | { | |
9577 | return read_seqcount_begin(&sl->seqcount); | |
9578 | } | |
9579 | +#else | |
9580 | +/* | |
9581 | + * Starvation safe read side for RT | |
9582 | + */ | |
9583 | +static inline unsigned read_seqbegin(seqlock_t *sl) | |
9584 | +{ | |
9585 | + unsigned ret; | |
9586 | + | |
9587 | +repeat: | |
9588 | + ret = ACCESS_ONCE(sl->seqcount.sequence); | |
9589 | + if (unlikely(ret & 1)) { | |
9590 | + /* | |
9591 | + * Take the lock and let the writer proceed (i.e. evtl | |
9592 | + * boost it), otherwise we could loop here forever. | |
9593 | + */ | |
9594 | + spin_unlock_wait(&sl->lock); | |
9595 | + goto repeat; | |
9596 | + } | |
9597 | + return ret; | |
9598 | +} | |
9599 | +#endif | |
9600 | ||
9601 | static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
9602 | { | |
9603 | @@ -446,36 +478,45 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
9604 | static inline void write_seqlock(seqlock_t *sl) | |
9605 | { | |
9606 | spin_lock(&sl->lock); | |
9607 | - write_seqcount_begin(&sl->seqcount); | |
9608 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9609 | +} | |
9610 | + | |
9611 | +static inline int try_write_seqlock(seqlock_t *sl) | |
9612 | +{ | |
9613 | + if (spin_trylock(&sl->lock)) { | |
9614 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9615 | + return 1; | |
9616 | + } | |
9617 | + return 0; | |
9618 | } | |
9619 | ||
9620 | static inline void write_sequnlock(seqlock_t *sl) | |
9621 | { | |
9622 | - write_seqcount_end(&sl->seqcount); | |
9623 | + __raw_write_seqcount_end(&sl->seqcount); | |
9624 | spin_unlock(&sl->lock); | |
9625 | } | |
9626 | ||
9627 | static inline void write_seqlock_bh(seqlock_t *sl) | |
9628 | { | |
9629 | spin_lock_bh(&sl->lock); | |
9630 | - write_seqcount_begin(&sl->seqcount); | |
9631 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9632 | } | |
9633 | ||
9634 | static inline void write_sequnlock_bh(seqlock_t *sl) | |
9635 | { | |
9636 | - write_seqcount_end(&sl->seqcount); | |
9637 | + __raw_write_seqcount_end(&sl->seqcount); | |
9638 | spin_unlock_bh(&sl->lock); | |
9639 | } | |
9640 | ||
9641 | static inline void write_seqlock_irq(seqlock_t *sl) | |
9642 | { | |
9643 | spin_lock_irq(&sl->lock); | |
9644 | - write_seqcount_begin(&sl->seqcount); | |
9645 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9646 | } | |
9647 | ||
9648 | static inline void write_sequnlock_irq(seqlock_t *sl) | |
9649 | { | |
9650 | - write_seqcount_end(&sl->seqcount); | |
9651 | + __raw_write_seqcount_end(&sl->seqcount); | |
9652 | spin_unlock_irq(&sl->lock); | |
9653 | } | |
9654 | ||
9655 | @@ -484,7 +525,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
9656 | unsigned long flags; | |
9657 | ||
9658 | spin_lock_irqsave(&sl->lock, flags); | |
9659 | - write_seqcount_begin(&sl->seqcount); | |
9660 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9661 | return flags; | |
9662 | } | |
9663 | ||
9664 | @@ -494,7 +535,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
9665 | static inline void | |
9666 | write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | |
9667 | { | |
9668 | - write_seqcount_end(&sl->seqcount); | |
9669 | + __raw_write_seqcount_end(&sl->seqcount); | |
9670 | spin_unlock_irqrestore(&sl->lock, flags); | |
9671 | } | |
9672 | ||
9673 | diff --git a/include/linux/signal.h b/include/linux/signal.h | |
9674 | index b63f63eaa39c..295540fdfc72 100644 | |
9675 | --- a/include/linux/signal.h | |
9676 | +++ b/include/linux/signal.h | |
9677 | @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig) | |
9678 | } | |
9679 | ||
9680 | extern void flush_sigqueue(struct sigpending *queue); | |
9681 | +extern void flush_task_sigqueue(struct task_struct *tsk); | |
9682 | ||
9683 | /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ | |
9684 | static inline int valid_signal(unsigned long sig) | |
9685 | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h | |
9686 | index 32810f279f8e..0db6e31161f6 100644 | |
9687 | --- a/include/linux/skbuff.h | |
9688 | +++ b/include/linux/skbuff.h | |
9689 | @@ -284,6 +284,7 @@ struct sk_buff_head { | |
9690 | ||
9691 | __u32 qlen; | |
9692 | spinlock_t lock; | |
9693 | + raw_spinlock_t raw_lock; | |
9694 | }; | |
9695 | ||
9696 | struct sk_buff; | |
9697 | @@ -1573,6 +1574,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) | |
9698 | __skb_queue_head_init(list); | |
9699 | } | |
9700 | ||
9701 | +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) | |
9702 | +{ | |
9703 | + raw_spin_lock_init(&list->raw_lock); | |
9704 | + __skb_queue_head_init(list); | |
9705 | +} | |
9706 | + | |
9707 | static inline void skb_queue_head_init_class(struct sk_buff_head *list, | |
9708 | struct lock_class_key *class) | |
9709 | { | |
9710 | diff --git a/include/linux/smp.h b/include/linux/smp.h | |
9711 | index 8e0cb7a0f836..b16ca967ad80 100644 | |
9712 | --- a/include/linux/smp.h | |
9713 | +++ b/include/linux/smp.h | |
9714 | @@ -185,6 +185,9 @@ static inline void smp_init(void) { } | |
9715 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) | |
9716 | #define put_cpu() preempt_enable() | |
9717 | ||
9718 | +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) | |
9719 | +#define put_cpu_light() migrate_enable() | |
9720 | + | |
9721 | /* | |
9722 | * Callback to arch code if there's nosmp or maxcpus=0 on the | |
9723 | * boot command line: | |
9724 | diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h | |
9725 | index 47dd0cebd204..02928fa5499d 100644 | |
9726 | --- a/include/linux/spinlock.h | |
9727 | +++ b/include/linux/spinlock.h | |
9728 | @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
9729 | #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) | |
9730 | ||
9731 | /* Include rwlock functions */ | |
9732 | -#include <linux/rwlock.h> | |
9733 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9734 | +# include <linux/rwlock_rt.h> | |
9735 | +#else | |
9736 | +# include <linux/rwlock.h> | |
9737 | +#endif | |
9738 | ||
9739 | /* | |
9740 | * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: | |
9741 | @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
9742 | # include <linux/spinlock_api_up.h> | |
9743 | #endif | |
9744 | ||
9745 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9746 | +# include <linux/spinlock_rt.h> | |
9747 | +#else /* PREEMPT_RT_FULL */ | |
9748 | + | |
9749 | /* | |
9750 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n | |
9751 | */ | |
9752 | @@ -347,6 +355,12 @@ static __always_inline void spin_unlock(spinlock_t *lock) | |
9753 | raw_spin_unlock(&lock->rlock); | |
9754 | } | |
9755 | ||
9756 | +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock) | |
9757 | +{ | |
9758 | + raw_spin_unlock(&lock->rlock); | |
9759 | + return 0; | |
9760 | +} | |
9761 | + | |
9762 | static __always_inline void spin_unlock_bh(spinlock_t *lock) | |
9763 | { | |
9764 | raw_spin_unlock_bh(&lock->rlock); | |
9765 | @@ -416,4 +430,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); | |
9766 | #define atomic_dec_and_lock(atomic, lock) \ | |
9767 | __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) | |
9768 | ||
9769 | +#endif /* !PREEMPT_RT_FULL */ | |
9770 | + | |
9771 | #endif /* __LINUX_SPINLOCK_H */ | |
9772 | diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h | |
9773 | index 5344268e6e62..043263f30e81 100644 | |
9774 | --- a/include/linux/spinlock_api_smp.h | |
9775 | +++ b/include/linux/spinlock_api_smp.h | |
9776 | @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) | |
9777 | return 0; | |
9778 | } | |
9779 | ||
9780 | -#include <linux/rwlock_api_smp.h> | |
9781 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9782 | +# include <linux/rwlock_api_smp.h> | |
9783 | +#endif | |
9784 | ||
9785 | #endif /* __LINUX_SPINLOCK_API_SMP_H */ | |
9786 | diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h | |
9787 | new file mode 100644 | |
9788 | index 000000000000..3534cff3dd08 | |
9789 | --- /dev/null | |
9790 | +++ b/include/linux/spinlock_rt.h | |
9791 | @@ -0,0 +1,164 @@ | |
9792 | +#ifndef __LINUX_SPINLOCK_RT_H | |
9793 | +#define __LINUX_SPINLOCK_RT_H | |
9794 | + | |
9795 | +#ifndef __LINUX_SPINLOCK_H | |
9796 | +#error Do not include directly. Use spinlock.h | |
9797 | +#endif | |
9798 | + | |
9799 | +#include <linux/bug.h> | |
9800 | + | |
9801 | +extern void | |
9802 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); | |
9803 | + | |
9804 | +#define spin_lock_init(slock) \ | |
9805 | +do { \ | |
9806 | + static struct lock_class_key __key; \ | |
9807 | + \ | |
9808 | + rt_mutex_init(&(slock)->lock); \ | |
9809 | + __rt_spin_lock_init(slock, #slock, &__key); \ | |
9810 | +} while (0) | |
9811 | + | |
9812 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock); | |
9813 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock); | |
9814 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock); | |
9815 | + | |
9816 | +extern void __lockfunc rt_spin_lock(spinlock_t *lock); | |
9817 | +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); | |
9818 | +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); | |
9819 | +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); | |
9820 | +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock); | |
9821 | +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); | |
9822 | +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); | |
9823 | +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); | |
9824 | +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); | |
9825 | +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); | |
9826 | + | |
9827 | +/* | |
9828 | + * lockdep-less calls, for derived types like rwlock: | |
9829 | + * (for trylock they can use rt_mutex_trylock() directly. | |
9830 | + */ | |
9831 | +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); | |
9832 | +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); | |
9833 | +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); | |
9834 | + | |
9835 | +#define spin_lock(lock) rt_spin_lock(lock) | |
9836 | + | |
9837 | +#define spin_lock_bh(lock) \ | |
9838 | + do { \ | |
9839 | + local_bh_disable(); \ | |
9840 | + rt_spin_lock(lock); \ | |
9841 | + } while (0) | |
9842 | + | |
9843 | +#define spin_lock_irq(lock) spin_lock(lock) | |
9844 | + | |
9845 | +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) | |
9846 | + | |
9847 | +#define spin_trylock(lock) \ | |
9848 | +({ \ | |
9849 | + int __locked; \ | |
9850 | + __locked = spin_do_trylock(lock); \ | |
9851 | + __locked; \ | |
9852 | +}) | |
9853 | + | |
9854 | +#ifdef CONFIG_LOCKDEP | |
9855 | +# define spin_lock_nested(lock, subclass) \ | |
9856 | + do { \ | |
9857 | + rt_spin_lock_nested(lock, subclass); \ | |
9858 | + } while (0) | |
9859 | + | |
9860 | +#define spin_lock_bh_nested(lock, subclass) \ | |
9861 | + do { \ | |
9862 | + local_bh_disable(); \ | |
9863 | + rt_spin_lock_nested(lock, subclass); \ | |
9864 | + } while (0) | |
9865 | + | |
9866 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
9867 | + do { \ | |
9868 | + typecheck(unsigned long, flags); \ | |
9869 | + flags = 0; \ | |
9870 | + rt_spin_lock_nested(lock, subclass); \ | |
9871 | + } while (0) | |
9872 | +#else | |
9873 | +# define spin_lock_nested(lock, subclass) spin_lock(lock) | |
9874 | +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) | |
9875 | + | |
9876 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
9877 | + do { \ | |
9878 | + typecheck(unsigned long, flags); \ | |
9879 | + flags = 0; \ | |
9880 | + spin_lock(lock); \ | |
9881 | + } while (0) | |
9882 | +#endif | |
9883 | + | |
9884 | +#define spin_lock_irqsave(lock, flags) \ | |
9885 | + do { \ | |
9886 | + typecheck(unsigned long, flags); \ | |
9887 | + flags = 0; \ | |
9888 | + spin_lock(lock); \ | |
9889 | + } while (0) | |
9890 | + | |
9891 | +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) | |
9892 | +{ | |
9893 | + unsigned long flags = 0; | |
9894 | +#ifdef CONFIG_TRACE_IRQFLAGS | |
9895 | + flags = rt_spin_lock_trace_flags(lock); | |
9896 | +#else | |
9897 | + spin_lock(lock); /* lock_local */ | |
9898 | +#endif | |
9899 | + return flags; | |
9900 | +} | |
9901 | + | |
9902 | +/* FIXME: we need rt_spin_lock_nest_lock */ | |
9903 | +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) | |
9904 | + | |
9905 | +#define spin_unlock(lock) rt_spin_unlock(lock) | |
9906 | +#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock) | |
9907 | + | |
9908 | +#define spin_unlock_bh(lock) \ | |
9909 | + do { \ | |
9910 | + rt_spin_unlock(lock); \ | |
9911 | + local_bh_enable(); \ | |
9912 | + } while (0) | |
9913 | + | |
9914 | +#define spin_unlock_irq(lock) spin_unlock(lock) | |
9915 | + | |
9916 | +#define spin_unlock_irqrestore(lock, flags) \ | |
9917 | + do { \ | |
9918 | + typecheck(unsigned long, flags); \ | |
9919 | + (void) flags; \ | |
9920 | + spin_unlock(lock); \ | |
9921 | + } while (0) | |
9922 | + | |
9923 | +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) | |
9924 | +#define spin_trylock_irq(lock) spin_trylock(lock) | |
9925 | + | |
9926 | +#define spin_trylock_irqsave(lock, flags) \ | |
9927 | + rt_spin_trylock_irqsave(lock, &(flags)) | |
9928 | + | |
9929 | +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) | |
9930 | + | |
9931 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
9932 | +# define spin_is_contended(lock) ((lock)->break_lock) | |
9933 | +#else | |
9934 | +# define spin_is_contended(lock) (((void)(lock), 0)) | |
9935 | +#endif | |
9936 | + | |
9937 | +static inline int spin_can_lock(spinlock_t *lock) | |
9938 | +{ | |
9939 | + return !rt_mutex_is_locked(&lock->lock); | |
9940 | +} | |
9941 | + | |
9942 | +static inline int spin_is_locked(spinlock_t *lock) | |
9943 | +{ | |
9944 | + return rt_mutex_is_locked(&lock->lock); | |
9945 | +} | |
9946 | + | |
9947 | +static inline void assert_spin_locked(spinlock_t *lock) | |
9948 | +{ | |
9949 | + BUG_ON(!spin_is_locked(lock)); | |
9950 | +} | |
9951 | + | |
9952 | +#define atomic_dec_and_lock(atomic, lock) \ | |
9953 | + atomic_dec_and_spin_lock(atomic, lock) | |
9954 | + | |
9955 | +#endif | |
9956 | diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h | |
9957 | index 73548eb13a5d..10bac715ea96 100644 | |
9958 | --- a/include/linux/spinlock_types.h | |
9959 | +++ b/include/linux/spinlock_types.h | |
9960 | @@ -9,80 +9,15 @@ | |
9961 | * Released under the General Public License (GPL). | |
9962 | */ | |
9963 | ||
9964 | -#if defined(CONFIG_SMP) | |
9965 | -# include <asm/spinlock_types.h> | |
9966 | +#include <linux/spinlock_types_raw.h> | |
9967 | + | |
9968 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9969 | +# include <linux/spinlock_types_nort.h> | |
9970 | +# include <linux/rwlock_types.h> | |
9971 | #else | |
9972 | -# include <linux/spinlock_types_up.h> | |
9973 | +# include <linux/rtmutex.h> | |
9974 | +# include <linux/spinlock_types_rt.h> | |
9975 | +# include <linux/rwlock_types_rt.h> | |
9976 | #endif | |
9977 | ||
9978 | -#include <linux/lockdep.h> | |
9979 | - | |
9980 | -typedef struct raw_spinlock { | |
9981 | - arch_spinlock_t raw_lock; | |
9982 | -#ifdef CONFIG_GENERIC_LOCKBREAK | |
9983 | - unsigned int break_lock; | |
9984 | -#endif | |
9985 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
9986 | - unsigned int magic, owner_cpu; | |
9987 | - void *owner; | |
9988 | -#endif | |
9989 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9990 | - struct lockdep_map dep_map; | |
9991 | -#endif | |
9992 | -} raw_spinlock_t; | |
9993 | - | |
9994 | -#define SPINLOCK_MAGIC 0xdead4ead | |
9995 | - | |
9996 | -#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
9997 | - | |
9998 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9999 | -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
10000 | -#else | |
10001 | -# define SPIN_DEP_MAP_INIT(lockname) | |
10002 | -#endif | |
10003 | - | |
10004 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
10005 | -# define SPIN_DEBUG_INIT(lockname) \ | |
10006 | - .magic = SPINLOCK_MAGIC, \ | |
10007 | - .owner_cpu = -1, \ | |
10008 | - .owner = SPINLOCK_OWNER_INIT, | |
10009 | -#else | |
10010 | -# define SPIN_DEBUG_INIT(lockname) | |
10011 | -#endif | |
10012 | - | |
10013 | -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
10014 | - { \ | |
10015 | - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
10016 | - SPIN_DEBUG_INIT(lockname) \ | |
10017 | - SPIN_DEP_MAP_INIT(lockname) } | |
10018 | - | |
10019 | -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
10020 | - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
10021 | - | |
10022 | -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
10023 | - | |
10024 | -typedef struct spinlock { | |
10025 | - union { | |
10026 | - struct raw_spinlock rlock; | |
10027 | - | |
10028 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10029 | -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
10030 | - struct { | |
10031 | - u8 __padding[LOCK_PADSIZE]; | |
10032 | - struct lockdep_map dep_map; | |
10033 | - }; | |
10034 | -#endif | |
10035 | - }; | |
10036 | -} spinlock_t; | |
10037 | - | |
10038 | -#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
10039 | - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
10040 | - | |
10041 | -#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
10042 | - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
10043 | - | |
10044 | -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
10045 | - | |
10046 | -#include <linux/rwlock_types.h> | |
10047 | - | |
10048 | #endif /* __LINUX_SPINLOCK_TYPES_H */ | |
10049 | diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h | |
10050 | new file mode 100644 | |
10051 | index 000000000000..f1dac1fb1d6a | |
10052 | --- /dev/null | |
10053 | +++ b/include/linux/spinlock_types_nort.h | |
10054 | @@ -0,0 +1,33 @@ | |
10055 | +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H | |
10056 | +#define __LINUX_SPINLOCK_TYPES_NORT_H | |
10057 | + | |
10058 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
10059 | +#error "Do not include directly. Include spinlock_types.h instead" | |
10060 | +#endif | |
10061 | + | |
10062 | +/* | |
10063 | + * The non RT version maps spinlocks to raw_spinlocks | |
10064 | + */ | |
10065 | +typedef struct spinlock { | |
10066 | + union { | |
10067 | + struct raw_spinlock rlock; | |
10068 | + | |
10069 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10070 | +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
10071 | + struct { | |
10072 | + u8 __padding[LOCK_PADSIZE]; | |
10073 | + struct lockdep_map dep_map; | |
10074 | + }; | |
10075 | +#endif | |
10076 | + }; | |
10077 | +} spinlock_t; | |
10078 | + | |
10079 | +#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
10080 | + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
10081 | + | |
10082 | +#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
10083 | + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
10084 | + | |
10085 | +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
10086 | + | |
10087 | +#endif | |
10088 | diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h | |
10089 | new file mode 100644 | |
10090 | index 000000000000..edffc4d53fc9 | |
10091 | --- /dev/null | |
10092 | +++ b/include/linux/spinlock_types_raw.h | |
10093 | @@ -0,0 +1,56 @@ | |
10094 | +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H | |
10095 | +#define __LINUX_SPINLOCK_TYPES_RAW_H | |
10096 | + | |
10097 | +#if defined(CONFIG_SMP) | |
10098 | +# include <asm/spinlock_types.h> | |
10099 | +#else | |
10100 | +# include <linux/spinlock_types_up.h> | |
10101 | +#endif | |
10102 | + | |
10103 | +#include <linux/lockdep.h> | |
10104 | + | |
10105 | +typedef struct raw_spinlock { | |
10106 | + arch_spinlock_t raw_lock; | |
10107 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
10108 | + unsigned int break_lock; | |
10109 | +#endif | |
10110 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
10111 | + unsigned int magic, owner_cpu; | |
10112 | + void *owner; | |
10113 | +#endif | |
10114 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10115 | + struct lockdep_map dep_map; | |
10116 | +#endif | |
10117 | +} raw_spinlock_t; | |
10118 | + | |
10119 | +#define SPINLOCK_MAGIC 0xdead4ead | |
10120 | + | |
10121 | +#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
10122 | + | |
10123 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10124 | +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
10125 | +#else | |
10126 | +# define SPIN_DEP_MAP_INIT(lockname) | |
10127 | +#endif | |
10128 | + | |
10129 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
10130 | +# define SPIN_DEBUG_INIT(lockname) \ | |
10131 | + .magic = SPINLOCK_MAGIC, \ | |
10132 | + .owner_cpu = -1, \ | |
10133 | + .owner = SPINLOCK_OWNER_INIT, | |
10134 | +#else | |
10135 | +# define SPIN_DEBUG_INIT(lockname) | |
10136 | +#endif | |
10137 | + | |
10138 | +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
10139 | + { \ | |
10140 | + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
10141 | + SPIN_DEBUG_INIT(lockname) \ | |
10142 | + SPIN_DEP_MAP_INIT(lockname) } | |
10143 | + | |
10144 | +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
10145 | + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
10146 | + | |
10147 | +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
10148 | + | |
10149 | +#endif | |
10150 | diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h | |
10151 | new file mode 100644 | |
10152 | index 000000000000..3e3d8c5f7a9a | |
10153 | --- /dev/null | |
10154 | +++ b/include/linux/spinlock_types_rt.h | |
10155 | @@ -0,0 +1,48 @@ | |
10156 | +#ifndef __LINUX_SPINLOCK_TYPES_RT_H | |
10157 | +#define __LINUX_SPINLOCK_TYPES_RT_H | |
10158 | + | |
10159 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
10160 | +#error "Do not include directly. Include spinlock_types.h instead" | |
10161 | +#endif | |
10162 | + | |
10163 | +#include <linux/cache.h> | |
10164 | + | |
10165 | +/* | |
10166 | + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: | |
10167 | + */ | |
10168 | +typedef struct spinlock { | |
10169 | + struct rt_mutex lock; | |
10170 | + unsigned int break_lock; | |
10171 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10172 | + struct lockdep_map dep_map; | |
10173 | +#endif | |
10174 | +} spinlock_t; | |
10175 | + | |
10176 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
10177 | +# define __RT_SPIN_INITIALIZER(name) \ | |
10178 | + { \ | |
10179 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
10180 | + .save_state = 1, \ | |
10181 | + .file = __FILE__, \ | |
10182 | + .line = __LINE__ , \ | |
10183 | + } | |
10184 | +#else | |
10185 | +# define __RT_SPIN_INITIALIZER(name) \ | |
10186 | + { \ | |
10187 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
10188 | + .save_state = 1, \ | |
10189 | + } | |
10190 | +#endif | |
10191 | + | |
10192 | +/* | |
10193 | +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) | |
10194 | +*/ | |
10195 | + | |
10196 | +#define __SPIN_LOCK_UNLOCKED(name) \ | |
10197 | + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ | |
10198 | + SPIN_DEP_MAP_INIT(name) } | |
10199 | + | |
10200 | +#define DEFINE_SPINLOCK(name) \ | |
10201 | + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) | |
10202 | + | |
10203 | +#endif | |
10204 | diff --git a/include/linux/srcu.h b/include/linux/srcu.h | |
10205 | index dc8eb63c6568..e793d3a257da 100644 | |
10206 | --- a/include/linux/srcu.h | |
10207 | +++ b/include/linux/srcu.h | |
10208 | @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp); | |
10209 | ||
10210 | void process_srcu(struct work_struct *work); | |
10211 | ||
10212 | -#define __SRCU_STRUCT_INIT(name) \ | |
10213 | +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ | |
10214 | { \ | |
10215 | .completed = -300, \ | |
10216 | - .per_cpu_ref = &name##_srcu_array, \ | |
10217 | + .per_cpu_ref = &pcpu_name, \ | |
10218 | .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ | |
10219 | .running = false, \ | |
10220 | .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ | |
10221 | @@ -119,7 +119,7 @@ void process_srcu(struct work_struct *work); | |
10222 | */ | |
10223 | #define __DEFINE_SRCU(name, is_static) \ | |
10224 | static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ | |
10225 | - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
10226 | + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array) | |
10227 | #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) | |
10228 | #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) | |
10229 | ||
10230 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
10231 | index d9718378a8be..e81e6dc7dcb1 100644 | |
10232 | --- a/include/linux/suspend.h | |
10233 | +++ b/include/linux/suspend.h | |
10234 | @@ -193,6 +193,12 @@ struct platform_freeze_ops { | |
10235 | void (*end)(void); | |
10236 | }; | |
10237 | ||
10238 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | |
10239 | +extern bool pm_in_action; | |
10240 | +#else | |
10241 | +# define pm_in_action false | |
10242 | +#endif | |
10243 | + | |
10244 | #ifdef CONFIG_SUSPEND | |
10245 | /** | |
10246 | * suspend_set_ops - set platform dependent suspend operations | |
10247 | diff --git a/include/linux/swait.h b/include/linux/swait.h | |
10248 | index c1f9c62a8a50..83f004a72320 100644 | |
10249 | --- a/include/linux/swait.h | |
10250 | +++ b/include/linux/swait.h | |
10251 | @@ -87,6 +87,7 @@ static inline int swait_active(struct swait_queue_head *q) | |
10252 | extern void swake_up(struct swait_queue_head *q); | |
10253 | extern void swake_up_all(struct swait_queue_head *q); | |
10254 | extern void swake_up_locked(struct swait_queue_head *q); | |
10255 | +extern void swake_up_all_locked(struct swait_queue_head *q); | |
10256 | ||
10257 | extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); | |
10258 | extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); | |
10259 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
10260 | index 55ff5593c193..52bf5477dc92 100644 | |
10261 | --- a/include/linux/swap.h | |
10262 | +++ b/include/linux/swap.h | |
10263 | @@ -11,6 +11,7 @@ | |
10264 | #include <linux/fs.h> | |
10265 | #include <linux/atomic.h> | |
10266 | #include <linux/page-flags.h> | |
10267 | +#include <linux/locallock.h> | |
10268 | #include <asm/page.h> | |
10269 | ||
10270 | struct notifier_block; | |
10271 | @@ -247,7 +248,8 @@ struct swap_info_struct { | |
10272 | void *workingset_eviction(struct address_space *mapping, struct page *page); | |
10273 | bool workingset_refault(void *shadow); | |
10274 | void workingset_activation(struct page *page); | |
10275 | -extern struct list_lru workingset_shadow_nodes; | |
10276 | +extern struct list_lru __workingset_shadow_nodes; | |
10277 | +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
10278 | ||
10279 | static inline unsigned int workingset_node_pages(struct radix_tree_node *node) | |
10280 | { | |
10281 | @@ -292,6 +294,7 @@ extern unsigned long nr_free_pagecache_pages(void); | |
10282 | ||
10283 | ||
10284 | /* linux/mm/swap.c */ | |
10285 | +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); | |
10286 | extern void lru_cache_add(struct page *); | |
10287 | extern void lru_cache_add_anon(struct page *page); | |
10288 | extern void lru_cache_add_file(struct page *page); | |
10289 | diff --git a/include/linux/swork.h b/include/linux/swork.h | |
10290 | new file mode 100644 | |
10291 | index 000000000000..f175fa9a6016 | |
10292 | --- /dev/null | |
10293 | +++ b/include/linux/swork.h | |
10294 | @@ -0,0 +1,24 @@ | |
10295 | +#ifndef _LINUX_SWORK_H | |
10296 | +#define _LINUX_SWORK_H | |
10297 | + | |
10298 | +#include <linux/list.h> | |
10299 | + | |
10300 | +struct swork_event { | |
10301 | + struct list_head item; | |
10302 | + unsigned long flags; | |
10303 | + void (*func)(struct swork_event *); | |
10304 | +}; | |
10305 | + | |
10306 | +static inline void INIT_SWORK(struct swork_event *event, | |
10307 | + void (*func)(struct swork_event *)) | |
10308 | +{ | |
10309 | + event->flags = 0; | |
10310 | + event->func = func; | |
10311 | +} | |
10312 | + | |
10313 | +bool swork_queue(struct swork_event *sev); | |
10314 | + | |
10315 | +int swork_get(void); | |
10316 | +void swork_put(void); | |
10317 | + | |
10318 | +#endif /* _LINUX_SWORK_H */ | |
10319 | diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h | |
10320 | index 2873baf5372a..eb1a108f17ca 100644 | |
10321 | --- a/include/linux/thread_info.h | |
10322 | +++ b/include/linux/thread_info.h | |
10323 | @@ -107,7 +107,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) | |
10324 | #define test_thread_flag(flag) \ | |
10325 | test_ti_thread_flag(current_thread_info(), flag) | |
10326 | ||
10327 | -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
10328 | +#ifdef CONFIG_PREEMPT_LAZY | |
10329 | +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ | |
10330 | + test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
10331 | +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) | |
10332 | +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
10333 | + | |
10334 | +#else | |
10335 | +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
10336 | +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) | |
10337 | +#define tif_need_resched_lazy() 0 | |
10338 | +#endif | |
10339 | ||
10340 | #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES | |
10341 | static inline int arch_within_stack_frames(const void * const stack, | |
10342 | diff --git a/include/linux/timer.h b/include/linux/timer.h | |
10343 | index 51d601f192d4..83cea629efe1 100644 | |
10344 | --- a/include/linux/timer.h | |
10345 | +++ b/include/linux/timer.h | |
10346 | @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer); | |
10347 | ||
10348 | extern int try_to_del_timer_sync(struct timer_list *timer); | |
10349 | ||
10350 | -#ifdef CONFIG_SMP | |
10351 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
10352 | extern int del_timer_sync(struct timer_list *timer); | |
10353 | #else | |
10354 | # define del_timer_sync(t) del_timer(t) | |
10355 | diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h | |
10356 | index be007610ceb0..15154b13a53b 100644 | |
10357 | --- a/include/linux/trace_events.h | |
10358 | +++ b/include/linux/trace_events.h | |
10359 | @@ -56,6 +56,9 @@ struct trace_entry { | |
10360 | unsigned char flags; | |
10361 | unsigned char preempt_count; | |
10362 | int pid; | |
10363 | + unsigned short migrate_disable; | |
10364 | + unsigned short padding; | |
10365 | + unsigned char preempt_lazy_count; | |
10366 | }; | |
10367 | ||
10368 | #define TRACE_EVENT_TYPE_MAX \ | |
10369 | diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h | |
10370 | index f30c187ed785..83bf0f798426 100644 | |
10371 | --- a/include/linux/uaccess.h | |
10372 | +++ b/include/linux/uaccess.h | |
10373 | @@ -24,6 +24,7 @@ static __always_inline void pagefault_disabled_dec(void) | |
10374 | */ | |
10375 | static inline void pagefault_disable(void) | |
10376 | { | |
10377 | + migrate_disable(); | |
10378 | pagefault_disabled_inc(); | |
10379 | /* | |
10380 | * make sure to have issued the store before a pagefault | |
10381 | @@ -40,6 +41,7 @@ static inline void pagefault_enable(void) | |
10382 | */ | |
10383 | barrier(); | |
10384 | pagefault_disabled_dec(); | |
10385 | + migrate_enable(); | |
10386 | } | |
10387 | ||
10388 | /* | |
10389 | diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h | |
10390 | index 4a29c75b146e..0a294e950df8 100644 | |
10391 | --- a/include/linux/uprobes.h | |
10392 | +++ b/include/linux/uprobes.h | |
10393 | @@ -27,6 +27,7 @@ | |
10394 | #include <linux/errno.h> | |
10395 | #include <linux/rbtree.h> | |
10396 | #include <linux/types.h> | |
10397 | +#include <linux/wait.h> | |
10398 | ||
10399 | struct vm_area_struct; | |
10400 | struct mm_struct; | |
10401 | diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h | |
10402 | index 613771909b6e..e28c5a43229d 100644 | |
10403 | --- a/include/linux/vmstat.h | |
10404 | +++ b/include/linux/vmstat.h | |
10405 | @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); | |
10406 | */ | |
10407 | static inline void __count_vm_event(enum vm_event_item item) | |
10408 | { | |
10409 | + preempt_disable_rt(); | |
10410 | raw_cpu_inc(vm_event_states.event[item]); | |
10411 | + preempt_enable_rt(); | |
10412 | } | |
10413 | ||
10414 | static inline void count_vm_event(enum vm_event_item item) | |
10415 | @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item) | |
10416 | ||
10417 | static inline void __count_vm_events(enum vm_event_item item, long delta) | |
10418 | { | |
10419 | + preempt_disable_rt(); | |
10420 | raw_cpu_add(vm_event_states.event[item], delta); | |
10421 | + preempt_enable_rt(); | |
10422 | } | |
10423 | ||
10424 | static inline void count_vm_events(enum vm_event_item item, long delta) | |
10425 | diff --git a/include/linux/wait.h b/include/linux/wait.h | |
10426 | index 2408e8d5c05c..db50d6609195 100644 | |
10427 | --- a/include/linux/wait.h | |
10428 | +++ b/include/linux/wait.h | |
10429 | @@ -8,6 +8,7 @@ | |
10430 | #include <linux/spinlock.h> | |
10431 | #include <asm/current.h> | |
10432 | #include <uapi/linux/wait.h> | |
10433 | +#include <linux/atomic.h> | |
10434 | ||
10435 | typedef struct __wait_queue wait_queue_t; | |
10436 | typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); | |
10437 | diff --git a/include/net/dst.h b/include/net/dst.h | |
10438 | index 6835d224d47b..55a5a9698f14 100644 | |
10439 | --- a/include/net/dst.h | |
10440 | +++ b/include/net/dst.h | |
10441 | @@ -446,7 +446,7 @@ static inline void dst_confirm(struct dst_entry *dst) | |
10442 | static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, | |
10443 | struct sk_buff *skb) | |
10444 | { | |
10445 | - const struct hh_cache *hh; | |
10446 | + struct hh_cache *hh; | |
10447 | ||
10448 | if (dst->pending_confirm) { | |
10449 | unsigned long now = jiffies; | |
10450 | diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h | |
10451 | index 231e121cc7d9..d125222b979d 100644 | |
10452 | --- a/include/net/gen_stats.h | |
10453 | +++ b/include/net/gen_stats.h | |
10454 | @@ -5,6 +5,7 @@ | |
10455 | #include <linux/socket.h> | |
10456 | #include <linux/rtnetlink.h> | |
10457 | #include <linux/pkt_sched.h> | |
10458 | +#include <net/net_seq_lock.h> | |
10459 | ||
10460 | struct gnet_stats_basic_cpu { | |
10461 | struct gnet_stats_basic_packed bstats; | |
10462 | @@ -33,11 +34,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, | |
10463 | spinlock_t *lock, struct gnet_dump *d, | |
10464 | int padattr); | |
10465 | ||
10466 | -int gnet_stats_copy_basic(const seqcount_t *running, | |
10467 | +int gnet_stats_copy_basic(net_seqlock_t *running, | |
10468 | struct gnet_dump *d, | |
10469 | struct gnet_stats_basic_cpu __percpu *cpu, | |
10470 | struct gnet_stats_basic_packed *b); | |
10471 | -void __gnet_stats_copy_basic(const seqcount_t *running, | |
10472 | +void __gnet_stats_copy_basic(net_seqlock_t *running, | |
10473 | struct gnet_stats_basic_packed *bstats, | |
10474 | struct gnet_stats_basic_cpu __percpu *cpu, | |
10475 | struct gnet_stats_basic_packed *b); | |
10476 | @@ -55,14 +56,14 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
10477 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
10478 | struct gnet_stats_rate_est64 *rate_est, | |
10479 | spinlock_t *stats_lock, | |
10480 | - seqcount_t *running, struct nlattr *opt); | |
10481 | + net_seqlock_t *running, struct nlattr *opt); | |
10482 | void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, | |
10483 | struct gnet_stats_rate_est64 *rate_est); | |
10484 | int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
10485 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
10486 | struct gnet_stats_rate_est64 *rate_est, | |
10487 | spinlock_t *stats_lock, | |
10488 | - seqcount_t *running, struct nlattr *opt); | |
10489 | + net_seqlock_t *running, struct nlattr *opt); | |
10490 | bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, | |
10491 | const struct gnet_stats_rate_est64 *rate_est); | |
10492 | #endif | |
10493 | diff --git a/include/net/neighbour.h b/include/net/neighbour.h | |
10494 | index 8b683841e574..bf656008f6e7 100644 | |
10495 | --- a/include/net/neighbour.h | |
10496 | +++ b/include/net/neighbour.h | |
10497 | @@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) | |
10498 | } | |
10499 | #endif | |
10500 | ||
10501 | -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) | |
10502 | +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) | |
10503 | { | |
10504 | unsigned int seq; | |
10505 | int hh_len; | |
10506 | @@ -501,7 +501,7 @@ struct neighbour_cb { | |
10507 | ||
10508 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | |
10509 | ||
10510 | -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, | |
10511 | +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n, | |
10512 | const struct net_device *dev) | |
10513 | { | |
10514 | unsigned int seq; | |
10515 | diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h | |
10516 | new file mode 100644 | |
10517 | index 000000000000..a7034298a82a | |
10518 | --- /dev/null | |
10519 | +++ b/include/net/net_seq_lock.h | |
10520 | @@ -0,0 +1,15 @@ | |
10521 | +#ifndef __NET_NET_SEQ_LOCK_H__ | |
10522 | +#define __NET_NET_SEQ_LOCK_H__ | |
10523 | + | |
10524 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10525 | +# define net_seqlock_t seqlock_t | |
10526 | +# define net_seq_begin(__r) read_seqbegin(__r) | |
10527 | +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) | |
10528 | + | |
10529 | +#else | |
10530 | +# define net_seqlock_t seqcount_t | |
10531 | +# define net_seq_begin(__r) read_seqcount_begin(__r) | |
10532 | +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s) | |
10533 | +#endif | |
10534 | + | |
10535 | +#endif | |
10536 | diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h | |
10537 | index 7adf4386ac8f..d3fd5c357268 100644 | |
10538 | --- a/include/net/netns/ipv4.h | |
10539 | +++ b/include/net/netns/ipv4.h | |
10540 | @@ -69,6 +69,7 @@ struct netns_ipv4 { | |
10541 | ||
10542 | int sysctl_icmp_echo_ignore_all; | |
10543 | int sysctl_icmp_echo_ignore_broadcasts; | |
10544 | + int sysctl_icmp_echo_sysrq; | |
10545 | int sysctl_icmp_ignore_bogus_error_responses; | |
10546 | int sysctl_icmp_ratelimit; | |
10547 | int sysctl_icmp_ratemask; | |
10548 | diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h | |
10549 | index e6aa0a249672..b57736f2a8a3 100644 | |
10550 | --- a/include/net/sch_generic.h | |
10551 | +++ b/include/net/sch_generic.h | |
10552 | @@ -10,6 +10,7 @@ | |
10553 | #include <linux/dynamic_queue_limits.h> | |
10554 | #include <net/gen_stats.h> | |
10555 | #include <net/rtnetlink.h> | |
10556 | +#include <net/net_seq_lock.h> | |
10557 | ||
10558 | struct Qdisc_ops; | |
10559 | struct qdisc_walker; | |
10560 | @@ -86,7 +87,7 @@ struct Qdisc { | |
10561 | struct sk_buff *gso_skb ____cacheline_aligned_in_smp; | |
10562 | struct qdisc_skb_head q; | |
10563 | struct gnet_stats_basic_packed bstats; | |
10564 | - seqcount_t running; | |
10565 | + net_seqlock_t running; | |
10566 | struct gnet_stats_queue qstats; | |
10567 | unsigned long state; | |
10568 | struct Qdisc *next_sched; | |
10569 | @@ -98,13 +99,22 @@ struct Qdisc { | |
10570 | spinlock_t busylock ____cacheline_aligned_in_smp; | |
10571 | }; | |
10572 | ||
10573 | -static inline bool qdisc_is_running(const struct Qdisc *qdisc) | |
10574 | +static inline bool qdisc_is_running(struct Qdisc *qdisc) | |
10575 | { | |
10576 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10577 | + return spin_is_locked(&qdisc->running.lock) ? true : false; | |
10578 | +#else | |
10579 | return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; | |
10580 | +#endif | |
10581 | } | |
10582 | ||
10583 | static inline bool qdisc_run_begin(struct Qdisc *qdisc) | |
10584 | { | |
10585 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10586 | + if (try_write_seqlock(&qdisc->running)) | |
10587 | + return true; | |
10588 | + return false; | |
10589 | +#else | |
10590 | if (qdisc_is_running(qdisc)) | |
10591 | return false; | |
10592 | /* Variant of write_seqcount_begin() telling lockdep a trylock | |
10593 | @@ -113,11 +123,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) | |
10594 | raw_write_seqcount_begin(&qdisc->running); | |
10595 | seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); | |
10596 | return true; | |
10597 | +#endif | |
10598 | } | |
10599 | ||
10600 | static inline void qdisc_run_end(struct Qdisc *qdisc) | |
10601 | { | |
10602 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10603 | + write_sequnlock(&qdisc->running); | |
10604 | +#else | |
10605 | write_seqcount_end(&qdisc->running); | |
10606 | +#endif | |
10607 | } | |
10608 | ||
10609 | static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) | |
10610 | @@ -308,7 +323,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) | |
10611 | return qdisc_lock(root); | |
10612 | } | |
10613 | ||
10614 | -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
10615 | +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
10616 | { | |
10617 | struct Qdisc *root = qdisc_root_sleeping(qdisc); | |
10618 | ||
10619 | diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h | |
10620 | new file mode 100644 | |
10621 | index 000000000000..f7710de1b1f3 | |
10622 | --- /dev/null | |
10623 | +++ b/include/trace/events/hist.h | |
10624 | @@ -0,0 +1,73 @@ | |
10625 | +#undef TRACE_SYSTEM | |
10626 | +#define TRACE_SYSTEM hist | |
10627 | + | |
10628 | +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ) | |
10629 | +#define _TRACE_HIST_H | |
10630 | + | |
10631 | +#include "latency_hist.h" | |
10632 | +#include <linux/tracepoint.h> | |
10633 | + | |
10634 | +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST) | |
10635 | +#define trace_preemptirqsoff_hist(a, b) | |
10636 | +#define trace_preemptirqsoff_hist_rcuidle(a, b) | |
10637 | +#else | |
10638 | +TRACE_EVENT(preemptirqsoff_hist, | |
10639 | + | |
10640 | + TP_PROTO(int reason, int starthist), | |
10641 | + | |
10642 | + TP_ARGS(reason, starthist), | |
10643 | + | |
10644 | + TP_STRUCT__entry( | |
10645 | + __field(int, reason) | |
10646 | + __field(int, starthist) | |
10647 | + ), | |
10648 | + | |
10649 | + TP_fast_assign( | |
10650 | + __entry->reason = reason; | |
10651 | + __entry->starthist = starthist; | |
10652 | + ), | |
10653 | + | |
10654 | + TP_printk("reason=%s starthist=%s", getaction(__entry->reason), | |
10655 | + __entry->starthist ? "start" : "stop") | |
10656 | +); | |
10657 | +#endif | |
10658 | + | |
10659 | +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
10660 | +#define trace_hrtimer_interrupt(a, b, c, d) | |
10661 | +#else | |
10662 | +TRACE_EVENT(hrtimer_interrupt, | |
10663 | + | |
10664 | + TP_PROTO(int cpu, long long offset, struct task_struct *curr, | |
10665 | + struct task_struct *task), | |
10666 | + | |
10667 | + TP_ARGS(cpu, offset, curr, task), | |
10668 | + | |
10669 | + TP_STRUCT__entry( | |
10670 | + __field(int, cpu) | |
10671 | + __field(long long, offset) | |
10672 | + __array(char, ccomm, TASK_COMM_LEN) | |
10673 | + __field(int, cprio) | |
10674 | + __array(char, tcomm, TASK_COMM_LEN) | |
10675 | + __field(int, tprio) | |
10676 | + ), | |
10677 | + | |
10678 | + TP_fast_assign( | |
10679 | + __entry->cpu = cpu; | |
10680 | + __entry->offset = offset; | |
10681 | + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN); | |
10682 | + __entry->cprio = curr->prio; | |
10683 | + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", | |
10684 | + task != NULL ? TASK_COMM_LEN : 7); | |
10685 | + __entry->tprio = task != NULL ? task->prio : -1; | |
10686 | + ), | |
10687 | + | |
10688 | + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]", | |
10689 | + __entry->cpu, __entry->offset, __entry->ccomm, | |
10690 | + __entry->cprio, __entry->tcomm, __entry->tprio) | |
10691 | +); | |
10692 | +#endif | |
10693 | + | |
10694 | +#endif /* _TRACE_HIST_H */ | |
10695 | + | |
10696 | +/* This part must be outside protection */ | |
10697 | +#include <trace/define_trace.h> | |
10698 | diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h | |
10699 | new file mode 100644 | |
10700 | index 000000000000..d3f2fbd560b1 | |
10701 | --- /dev/null | |
10702 | +++ b/include/trace/events/latency_hist.h | |
10703 | @@ -0,0 +1,29 @@ | |
10704 | +#ifndef _LATENCY_HIST_H | |
10705 | +#define _LATENCY_HIST_H | |
10706 | + | |
10707 | +enum hist_action { | |
10708 | + IRQS_ON, | |
10709 | + PREEMPT_ON, | |
10710 | + TRACE_STOP, | |
10711 | + IRQS_OFF, | |
10712 | + PREEMPT_OFF, | |
10713 | + TRACE_START, | |
10714 | +}; | |
10715 | + | |
10716 | +static char *actions[] = { | |
10717 | + "IRQS_ON", | |
10718 | + "PREEMPT_ON", | |
10719 | + "TRACE_STOP", | |
10720 | + "IRQS_OFF", | |
10721 | + "PREEMPT_OFF", | |
10722 | + "TRACE_START", | |
10723 | +}; | |
10724 | + | |
10725 | +static inline char *getaction(int action) | |
10726 | +{ | |
10727 | + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0])) | |
10728 | + return actions[action]; | |
10729 | + return "unknown"; | |
10730 | +} | |
10731 | + | |
10732 | +#endif /* _LATENCY_HIST_H */ | |
10733 | diff --git a/init/Kconfig b/init/Kconfig | |
10734 | index 34407f15e6d3..2ce33a32e65d 100644 | |
10735 | --- a/init/Kconfig | |
10736 | +++ b/init/Kconfig | |
10737 | @@ -506,7 +506,7 @@ config TINY_RCU | |
10738 | ||
10739 | config RCU_EXPERT | |
10740 | bool "Make expert-level adjustments to RCU configuration" | |
10741 | - default n | |
10742 | + default y if PREEMPT_RT_FULL | |
10743 | help | |
10744 | This option needs to be enabled if you wish to make | |
10745 | expert-level adjustments to RCU configuration. By default, | |
10746 | @@ -623,7 +623,7 @@ config RCU_FANOUT_LEAF | |
10747 | ||
10748 | config RCU_FAST_NO_HZ | |
10749 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | |
10750 | - depends on NO_HZ_COMMON && SMP && RCU_EXPERT | |
10751 | + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL | |
10752 | default n | |
10753 | help | |
10754 | This option permits CPUs to enter dynticks-idle state even if | |
10755 | @@ -650,7 +650,7 @@ config TREE_RCU_TRACE | |
10756 | config RCU_BOOST | |
10757 | bool "Enable RCU priority boosting" | |
10758 | depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT | |
10759 | - default n | |
10760 | + default y if PREEMPT_RT_FULL | |
10761 | help | |
10762 | This option boosts the priority of preempted RCU readers that | |
10763 | block the current preemptible RCU grace period for too long. | |
10764 | @@ -781,19 +781,6 @@ config RCU_NOCB_CPU_ALL | |
10765 | ||
10766 | endchoice | |
10767 | ||
10768 | -config RCU_EXPEDITE_BOOT | |
10769 | - bool | |
10770 | - default n | |
10771 | - help | |
10772 | - This option enables expedited grace periods at boot time, | |
10773 | - as if rcu_expedite_gp() had been invoked early in boot. | |
10774 | - The corresponding rcu_unexpedite_gp() is invoked from | |
10775 | - rcu_end_inkernel_boot(), which is intended to be invoked | |
10776 | - at the end of the kernel-only boot sequence, just before | |
10777 | - init is exec'ed. | |
10778 | - | |
10779 | - Accept the default if unsure. | |
10780 | - | |
10781 | endmenu # "RCU Subsystem" | |
10782 | ||
10783 | config BUILD_BIN2C | |
10784 | @@ -1064,6 +1051,7 @@ config CFS_BANDWIDTH | |
10785 | config RT_GROUP_SCHED | |
10786 | bool "Group scheduling for SCHED_RR/FIFO" | |
10787 | depends on CGROUP_SCHED | |
10788 | + depends on !PREEMPT_RT_FULL | |
10789 | default n | |
10790 | help | |
10791 | This feature lets you explicitly allocate real CPU bandwidth | |
10792 | @@ -1772,6 +1760,7 @@ choice | |
10793 | ||
10794 | config SLAB | |
10795 | bool "SLAB" | |
10796 | + depends on !PREEMPT_RT_FULL | |
10797 | select HAVE_HARDENED_USERCOPY_ALLOCATOR | |
10798 | help | |
10799 | The regular slab allocator that is established and known to work | |
10800 | @@ -1792,6 +1781,7 @@ config SLUB | |
10801 | config SLOB | |
10802 | depends on EXPERT | |
10803 | bool "SLOB (Simple Allocator)" | |
10804 | + depends on !PREEMPT_RT_FULL | |
10805 | help | |
10806 | SLOB replaces the stock allocator with a drastically simpler | |
10807 | allocator. SLOB is generally more space efficient but | |
10808 | @@ -1810,7 +1800,7 @@ config SLAB_FREELIST_RANDOM | |
10809 | ||
10810 | config SLUB_CPU_PARTIAL | |
10811 | default y | |
10812 | - depends on SLUB && SMP | |
10813 | + depends on SLUB && SMP && !PREEMPT_RT_FULL | |
10814 | bool "SLUB per cpu partial cache" | |
10815 | help | |
10816 | Per cpu partial caches accellerate objects allocation and freeing | |
10817 | diff --git a/init/Makefile b/init/Makefile | |
10818 | index c4fb45525d08..821190dfaa75 100644 | |
10819 | --- a/init/Makefile | |
10820 | +++ b/init/Makefile | |
10821 | @@ -35,4 +35,4 @@ $(obj)/version.o: include/generated/compile.h | |
10822 | include/generated/compile.h: FORCE | |
10823 | @$($(quiet)chk_compile.h) | |
10824 | $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ | |
10825 | - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" | |
10826 | + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" | |
10827 | diff --git a/init/main.c b/init/main.c | |
10828 | index 2858be732f6d..3c97c3c91d88 100644 | |
10829 | --- a/init/main.c | |
10830 | +++ b/init/main.c | |
10831 | @@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void) | |
10832 | setup_command_line(command_line); | |
10833 | setup_nr_cpu_ids(); | |
10834 | setup_per_cpu_areas(); | |
10835 | + softirq_early_init(); | |
10836 | boot_cpu_state_init(); | |
10837 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | |
10838 | ||
10839 | diff --git a/ipc/sem.c b/ipc/sem.c | |
10840 | index 10b94bc59d4a..b8360eaacc7a 100644 | |
10841 | --- a/ipc/sem.c | |
10842 | +++ b/ipc/sem.c | |
10843 | @@ -712,6 +712,13 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) | |
10844 | static void wake_up_sem_queue_prepare(struct list_head *pt, | |
10845 | struct sem_queue *q, int error) | |
10846 | { | |
10847 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10848 | + struct task_struct *p = q->sleeper; | |
10849 | + get_task_struct(p); | |
10850 | + q->status = error; | |
10851 | + wake_up_process(p); | |
10852 | + put_task_struct(p); | |
10853 | +#else | |
10854 | if (list_empty(pt)) { | |
10855 | /* | |
10856 | * Hold preempt off so that we don't get preempted and have the | |
10857 | @@ -723,6 +730,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
10858 | q->pid = error; | |
10859 | ||
10860 | list_add_tail(&q->list, pt); | |
10861 | +#endif | |
10862 | } | |
10863 | ||
10864 | /** | |
10865 | @@ -736,6 +744,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
10866 | */ | |
10867 | static void wake_up_sem_queue_do(struct list_head *pt) | |
10868 | { | |
10869 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
10870 | struct sem_queue *q, *t; | |
10871 | int did_something; | |
10872 | ||
10873 | @@ -748,6 +757,7 @@ static void wake_up_sem_queue_do(struct list_head *pt) | |
10874 | } | |
10875 | if (did_something) | |
10876 | preempt_enable(); | |
10877 | +#endif | |
10878 | } | |
10879 | ||
10880 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | |
10881 | diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks | |
10882 | index ebdb0043203a..b9e6aa7e5aa6 100644 | |
10883 | --- a/kernel/Kconfig.locks | |
10884 | +++ b/kernel/Kconfig.locks | |
10885 | @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW | |
10886 | ||
10887 | config MUTEX_SPIN_ON_OWNER | |
10888 | def_bool y | |
10889 | - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW | |
10890 | + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
10891 | ||
10892 | config RWSEM_SPIN_ON_OWNER | |
10893 | def_bool y | |
10894 | - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | |
10895 | + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
10896 | ||
10897 | config LOCK_SPIN_ON_OWNER | |
10898 | def_bool y | |
10899 | diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt | |
10900 | index 3f9c97419f02..11dbe26a8279 100644 | |
10901 | --- a/kernel/Kconfig.preempt | |
10902 | +++ b/kernel/Kconfig.preempt | |
10903 | @@ -1,3 +1,16 @@ | |
10904 | +config PREEMPT | |
10905 | + bool | |
10906 | + select PREEMPT_COUNT | |
10907 | + | |
10908 | +config PREEMPT_RT_BASE | |
10909 | + bool | |
10910 | + select PREEMPT | |
10911 | + | |
10912 | +config HAVE_PREEMPT_LAZY | |
10913 | + bool | |
10914 | + | |
10915 | +config PREEMPT_LAZY | |
10916 | + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL | |
10917 | ||
10918 | choice | |
10919 | prompt "Preemption Model" | |
10920 | @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY | |
10921 | ||
10922 | Select this if you are building a kernel for a desktop system. | |
10923 | ||
10924 | -config PREEMPT | |
10925 | +config PREEMPT__LL | |
10926 | bool "Preemptible Kernel (Low-Latency Desktop)" | |
10927 | - select PREEMPT_COUNT | |
10928 | + select PREEMPT | |
10929 | select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK | |
10930 | help | |
10931 | This option reduces the latency of the kernel by making | |
10932 | @@ -52,6 +65,22 @@ config PREEMPT | |
10933 | embedded system with latency requirements in the milliseconds | |
10934 | range. | |
10935 | ||
10936 | +config PREEMPT_RTB | |
10937 | + bool "Preemptible Kernel (Basic RT)" | |
10938 | + select PREEMPT_RT_BASE | |
10939 | + help | |
10940 | + This option is basically the same as (Low-Latency Desktop) but | |
10941 | + enables changes which are preliminary for the full preemptible | |
10942 | + RT kernel. | |
10943 | + | |
10944 | +config PREEMPT_RT_FULL | |
10945 | + bool "Fully Preemptible Kernel (RT)" | |
10946 | + depends on IRQ_FORCED_THREADING | |
10947 | + select PREEMPT_RT_BASE | |
10948 | + select PREEMPT_RCU | |
10949 | + help | |
10950 | + All and everything | |
10951 | + | |
10952 | endchoice | |
10953 | ||
10954 | config PREEMPT_COUNT | |
10955 | diff --git a/kernel/cgroup.c b/kernel/cgroup.c | |
10956 | index 85bc9beb046d..3b8da75ba2e0 100644 | |
10957 | --- a/kernel/cgroup.c | |
10958 | +++ b/kernel/cgroup.c | |
10959 | @@ -5040,10 +5040,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) | |
10960 | queue_work(cgroup_destroy_wq, &css->destroy_work); | |
10961 | } | |
10962 | ||
10963 | -static void css_release_work_fn(struct work_struct *work) | |
10964 | +static void css_release_work_fn(struct swork_event *sev) | |
10965 | { | |
10966 | struct cgroup_subsys_state *css = | |
10967 | - container_of(work, struct cgroup_subsys_state, destroy_work); | |
10968 | + container_of(sev, struct cgroup_subsys_state, destroy_swork); | |
10969 | struct cgroup_subsys *ss = css->ss; | |
10970 | struct cgroup *cgrp = css->cgroup; | |
10971 | ||
10972 | @@ -5086,8 +5086,8 @@ static void css_release(struct percpu_ref *ref) | |
10973 | struct cgroup_subsys_state *css = | |
10974 | container_of(ref, struct cgroup_subsys_state, refcnt); | |
10975 | ||
10976 | - INIT_WORK(&css->destroy_work, css_release_work_fn); | |
10977 | - queue_work(cgroup_destroy_wq, &css->destroy_work); | |
10978 | + INIT_SWORK(&css->destroy_swork, css_release_work_fn); | |
10979 | + swork_queue(&css->destroy_swork); | |
10980 | } | |
10981 | ||
10982 | static void init_and_link_css(struct cgroup_subsys_state *css, | |
10983 | @@ -5742,6 +5742,7 @@ static int __init cgroup_wq_init(void) | |
10984 | */ | |
10985 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | |
10986 | BUG_ON(!cgroup_destroy_wq); | |
10987 | + BUG_ON(swork_get()); | |
10988 | ||
10989 | /* | |
10990 | * Used to destroy pidlists and separate to serve as flush domain. | |
10991 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
10992 | index 217fd2e7f435..69444f1bc924 100644 | |
10993 | --- a/kernel/cpu.c | |
10994 | +++ b/kernel/cpu.c | |
10995 | @@ -239,6 +239,289 @@ static struct { | |
10996 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) | |
10997 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) | |
10998 | ||
10999 | +/** | |
11000 | + * hotplug_pcp - per cpu hotplug descriptor | |
11001 | + * @unplug: set when pin_current_cpu() needs to sync tasks | |
11002 | + * @sync_tsk: the task that waits for tasks to finish pinned sections | |
11003 | + * @refcount: counter of tasks in pinned sections | |
11004 | + * @grab_lock: set when the tasks entering pinned sections should wait | |
11005 | + * @synced: notifier for @sync_tsk to tell cpu_down it's finished | |
11006 | + * @mutex: the mutex to make tasks wait (used when @grab_lock is true) | |
11007 | + * @mutex_init: zero if the mutex hasn't been initialized yet. | |
11008 | + * | |
11009 | + * Although @unplug and @sync_tsk may point to the same task, the @unplug | |
11010 | + * is used as a flag and still exists after @sync_tsk has exited and | |
11011 | + * @sync_tsk set to NULL. | |
11012 | + */ | |
11013 | +struct hotplug_pcp { | |
11014 | + struct task_struct *unplug; | |
11015 | + struct task_struct *sync_tsk; | |
11016 | + int refcount; | |
11017 | + int grab_lock; | |
11018 | + struct completion synced; | |
11019 | + struct completion unplug_wait; | |
11020 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11021 | + /* | |
11022 | + * Note, on PREEMPT_RT, the hotplug lock must save the state of | |
11023 | + * the task, otherwise the mutex will cause the task to fail | |
11024 | + * to sleep when required. (Because it's called from migrate_disable()) | |
11025 | + * | |
11026 | + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's | |
11027 | + * state. | |
11028 | + */ | |
11029 | + spinlock_t lock; | |
11030 | +#else | |
11031 | + struct mutex mutex; | |
11032 | +#endif | |
11033 | + int mutex_init; | |
11034 | +}; | |
11035 | + | |
11036 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11037 | +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock) | |
11038 | +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock) | |
11039 | +#else | |
11040 | +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex) | |
11041 | +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex) | |
11042 | +#endif | |
11043 | + | |
11044 | +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); | |
11045 | + | |
11046 | +/** | |
11047 | + * pin_current_cpu - Prevent the current cpu from being unplugged | |
11048 | + * | |
11049 | + * Lightweight version of get_online_cpus() to prevent cpu from being | |
11050 | + * unplugged when code runs in a migration disabled region. | |
11051 | + * | |
11052 | + * Must be called with preemption disabled (preempt_count = 1)! | |
11053 | + */ | |
11054 | +void pin_current_cpu(void) | |
11055 | +{ | |
11056 | + struct hotplug_pcp *hp; | |
11057 | + int force = 0; | |
11058 | + | |
11059 | +retry: | |
11060 | + hp = this_cpu_ptr(&hotplug_pcp); | |
11061 | + | |
11062 | + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 || | |
11063 | + hp->unplug == current) { | |
11064 | + hp->refcount++; | |
11065 | + return; | |
11066 | + } | |
11067 | + if (hp->grab_lock) { | |
11068 | + preempt_enable(); | |
11069 | + hotplug_lock(hp); | |
11070 | + hotplug_unlock(hp); | |
11071 | + } else { | |
11072 | + preempt_enable(); | |
11073 | + /* | |
11074 | + * Try to push this task off of this CPU. | |
11075 | + */ | |
11076 | + if (!migrate_me()) { | |
11077 | + preempt_disable(); | |
11078 | + hp = this_cpu_ptr(&hotplug_pcp); | |
11079 | + if (!hp->grab_lock) { | |
11080 | + /* | |
11081 | + * Just let it continue it's already pinned | |
11082 | + * or about to sleep. | |
11083 | + */ | |
11084 | + force = 1; | |
11085 | + goto retry; | |
11086 | + } | |
11087 | + preempt_enable(); | |
11088 | + } | |
11089 | + } | |
11090 | + preempt_disable(); | |
11091 | + goto retry; | |
11092 | +} | |
11093 | + | |
11094 | +/** | |
11095 | + * unpin_current_cpu - Allow unplug of current cpu | |
11096 | + * | |
11097 | + * Must be called with preemption or interrupts disabled! | |
11098 | + */ | |
11099 | +void unpin_current_cpu(void) | |
11100 | +{ | |
11101 | + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); | |
11102 | + | |
11103 | + WARN_ON(hp->refcount <= 0); | |
11104 | + | |
11105 | + /* This is safe. sync_unplug_thread is pinned to this cpu */ | |
11106 | + if (!--hp->refcount && hp->unplug && hp->unplug != current) | |
11107 | + wake_up_process(hp->unplug); | |
11108 | +} | |
11109 | + | |
11110 | +static void wait_for_pinned_cpus(struct hotplug_pcp *hp) | |
11111 | +{ | |
11112 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11113 | + while (hp->refcount) { | |
11114 | + schedule_preempt_disabled(); | |
11115 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11116 | + } | |
11117 | +} | |
11118 | + | |
11119 | +static int sync_unplug_thread(void *data) | |
11120 | +{ | |
11121 | + struct hotplug_pcp *hp = data; | |
11122 | + | |
11123 | + wait_for_completion(&hp->unplug_wait); | |
11124 | + preempt_disable(); | |
11125 | + hp->unplug = current; | |
11126 | + wait_for_pinned_cpus(hp); | |
11127 | + | |
11128 | + /* | |
11129 | + * This thread will synchronize the cpu_down() with threads | |
11130 | + * that have pinned the CPU. When the pinned CPU count reaches | |
11131 | + * zero, we inform the cpu_down code to continue to the next step. | |
11132 | + */ | |
11133 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11134 | + preempt_enable(); | |
11135 | + complete(&hp->synced); | |
11136 | + | |
11137 | + /* | |
11138 | + * If all succeeds, the next step will need tasks to wait till | |
11139 | + * the CPU is offline before continuing. To do this, the grab_lock | |
11140 | + * is set and tasks going into pin_current_cpu() will block on the | |
11141 | + * mutex. But we still need to wait for those that are already in | |
11142 | + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop() | |
11143 | + * will kick this thread out. | |
11144 | + */ | |
11145 | + while (!hp->grab_lock && !kthread_should_stop()) { | |
11146 | + schedule(); | |
11147 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11148 | + } | |
11149 | + | |
11150 | + /* Make sure grab_lock is seen before we see a stale completion */ | |
11151 | + smp_mb(); | |
11152 | + | |
11153 | + /* | |
11154 | + * Now just before cpu_down() enters stop machine, we need to make | |
11155 | + * sure all tasks that are in pinned CPU sections are out, and new | |
11156 | + * tasks will now grab the lock, keeping them from entering pinned | |
11157 | + * CPU sections. | |
11158 | + */ | |
11159 | + if (!kthread_should_stop()) { | |
11160 | + preempt_disable(); | |
11161 | + wait_for_pinned_cpus(hp); | |
11162 | + preempt_enable(); | |
11163 | + complete(&hp->synced); | |
11164 | + } | |
11165 | + | |
11166 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11167 | + while (!kthread_should_stop()) { | |
11168 | + schedule(); | |
11169 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11170 | + } | |
11171 | + set_current_state(TASK_RUNNING); | |
11172 | + | |
11173 | + /* | |
11174 | + * Force this thread off this CPU as it's going down and | |
11175 | + * we don't want any more work on this CPU. | |
11176 | + */ | |
11177 | + current->flags &= ~PF_NO_SETAFFINITY; | |
11178 | + set_cpus_allowed_ptr(current, cpu_present_mask); | |
11179 | + migrate_me(); | |
11180 | + return 0; | |
11181 | +} | |
11182 | + | |
11183 | +static void __cpu_unplug_sync(struct hotplug_pcp *hp) | |
11184 | +{ | |
11185 | + wake_up_process(hp->sync_tsk); | |
11186 | + wait_for_completion(&hp->synced); | |
11187 | +} | |
11188 | + | |
11189 | +static void __cpu_unplug_wait(unsigned int cpu) | |
11190 | +{ | |
11191 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11192 | + | |
11193 | + complete(&hp->unplug_wait); | |
11194 | + wait_for_completion(&hp->synced); | |
11195 | +} | |
11196 | + | |
11197 | +/* | |
11198 | + * Start the sync_unplug_thread on the target cpu and wait for it to | |
11199 | + * complete. | |
11200 | + */ | |
11201 | +static int cpu_unplug_begin(unsigned int cpu) | |
11202 | +{ | |
11203 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11204 | + int err; | |
11205 | + | |
11206 | + /* Protected by cpu_hotplug.lock */ | |
11207 | + if (!hp->mutex_init) { | |
11208 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11209 | + spin_lock_init(&hp->lock); | |
11210 | +#else | |
11211 | + mutex_init(&hp->mutex); | |
11212 | +#endif | |
11213 | + hp->mutex_init = 1; | |
11214 | + } | |
11215 | + | |
11216 | + /* Inform the scheduler to migrate tasks off this CPU */ | |
11217 | + tell_sched_cpu_down_begin(cpu); | |
11218 | + | |
11219 | + init_completion(&hp->synced); | |
11220 | + init_completion(&hp->unplug_wait); | |
11221 | + | |
11222 | + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); | |
11223 | + if (IS_ERR(hp->sync_tsk)) { | |
11224 | + err = PTR_ERR(hp->sync_tsk); | |
11225 | + hp->sync_tsk = NULL; | |
11226 | + return err; | |
11227 | + } | |
11228 | + kthread_bind(hp->sync_tsk, cpu); | |
11229 | + | |
11230 | + /* | |
11231 | + * Wait for tasks to get out of the pinned sections, | |
11232 | + * it's still OK if new tasks enter. Some CPU notifiers will | |
11233 | + * wait for tasks that are going to enter these sections and | |
11234 | + * we must not have them block. | |
11235 | + */ | |
11236 | + wake_up_process(hp->sync_tsk); | |
11237 | + return 0; | |
11238 | +} | |
11239 | + | |
11240 | +static void cpu_unplug_sync(unsigned int cpu) | |
11241 | +{ | |
11242 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11243 | + | |
11244 | + init_completion(&hp->synced); | |
11245 | + /* The completion needs to be initialzied before setting grab_lock */ | |
11246 | + smp_wmb(); | |
11247 | + | |
11248 | + /* Grab the mutex before setting grab_lock */ | |
11249 | + hotplug_lock(hp); | |
11250 | + hp->grab_lock = 1; | |
11251 | + | |
11252 | + /* | |
11253 | + * The CPU notifiers have been completed. | |
11254 | + * Wait for tasks to get out of pinned CPU sections and have new | |
11255 | + * tasks block until the CPU is completely down. | |
11256 | + */ | |
11257 | + __cpu_unplug_sync(hp); | |
11258 | + | |
11259 | + /* All done with the sync thread */ | |
11260 | + kthread_stop(hp->sync_tsk); | |
11261 | + hp->sync_tsk = NULL; | |
11262 | +} | |
11263 | + | |
11264 | +static void cpu_unplug_done(unsigned int cpu) | |
11265 | +{ | |
11266 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11267 | + | |
11268 | + hp->unplug = NULL; | |
11269 | + /* Let all tasks know cpu unplug is finished before cleaning up */ | |
11270 | + smp_wmb(); | |
11271 | + | |
11272 | + if (hp->sync_tsk) | |
11273 | + kthread_stop(hp->sync_tsk); | |
11274 | + | |
11275 | + if (hp->grab_lock) { | |
11276 | + hotplug_unlock(hp); | |
11277 | + /* protected by cpu_hotplug.lock */ | |
11278 | + hp->grab_lock = 0; | |
11279 | + } | |
11280 | + tell_sched_cpu_down_done(cpu); | |
11281 | +} | |
11282 | ||
11283 | void get_online_cpus(void) | |
11284 | { | |
11285 | @@ -789,10 +1072,14 @@ static int takedown_cpu(unsigned int cpu) | |
11286 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | |
11287 | int err; | |
11288 | ||
11289 | + __cpu_unplug_wait(cpu); | |
11290 | /* Park the smpboot threads */ | |
11291 | kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); | |
11292 | smpboot_park_threads(cpu); | |
11293 | ||
11294 | + /* Notifiers are done. Don't let any more tasks pin this CPU. */ | |
11295 | + cpu_unplug_sync(cpu); | |
11296 | + | |
11297 | /* | |
11298 | * Prevent irq alloc/free while the dying cpu reorganizes the | |
11299 | * interrupt affinities. | |
11300 | @@ -877,6 +1164,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |
11301 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | |
11302 | int prev_state, ret = 0; | |
11303 | bool hasdied = false; | |
11304 | + int mycpu; | |
11305 | + cpumask_var_t cpumask; | |
11306 | + cpumask_var_t cpumask_org; | |
11307 | ||
11308 | if (num_online_cpus() == 1) | |
11309 | return -EBUSY; | |
11310 | @@ -884,7 +1174,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |
11311 | if (!cpu_present(cpu)) | |
11312 | return -EINVAL; | |
11313 | ||
11314 | + /* Move the downtaker off the unplug cpu */ | |
11315 | + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) | |
11316 | + return -ENOMEM; | |
11317 | + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) { | |
11318 | + free_cpumask_var(cpumask); | |
11319 | + return -ENOMEM; | |
11320 | + } | |
11321 | + | |
11322 | + cpumask_copy(cpumask_org, tsk_cpus_allowed(current)); | |
11323 | + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); | |
11324 | + set_cpus_allowed_ptr(current, cpumask); | |
11325 | + free_cpumask_var(cpumask); | |
11326 | + migrate_disable(); | |
11327 | + mycpu = smp_processor_id(); | |
11328 | + if (mycpu == cpu) { | |
11329 | + printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); | |
11330 | + migrate_enable(); | |
11331 | + ret = -EBUSY; | |
11332 | + goto restore_cpus; | |
11333 | + } | |
11334 | + | |
11335 | + migrate_enable(); | |
11336 | cpu_hotplug_begin(); | |
11337 | + ret = cpu_unplug_begin(cpu); | |
11338 | + if (ret) { | |
11339 | + printk("cpu_unplug_begin(%d) failed\n", cpu); | |
11340 | + goto out_cancel; | |
11341 | + } | |
11342 | ||
11343 | cpuhp_tasks_frozen = tasks_frozen; | |
11344 | ||
11345 | @@ -923,10 +1240,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |
11346 | ||
11347 | hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; | |
11348 | out: | |
11349 | + cpu_unplug_done(cpu); | |
11350 | +out_cancel: | |
11351 | cpu_hotplug_done(); | |
11352 | /* This post dead nonsense must die */ | |
11353 | if (!ret && hasdied) | |
11354 | cpu_notify_nofail(CPU_POST_DEAD, cpu); | |
11355 | +restore_cpus: | |
11356 | + set_cpus_allowed_ptr(current, cpumask_org); | |
11357 | + free_cpumask_var(cpumask_org); | |
11358 | return ret; | |
11359 | } | |
11360 | ||
11361 | diff --git a/kernel/cpuset.c b/kernel/cpuset.c | |
11362 | index 29f815d2ef7e..341b17f24f95 100644 | |
11363 | --- a/kernel/cpuset.c | |
11364 | +++ b/kernel/cpuset.c | |
11365 | @@ -284,7 +284,7 @@ static struct cpuset top_cpuset = { | |
11366 | */ | |
11367 | ||
11368 | static DEFINE_MUTEX(cpuset_mutex); | |
11369 | -static DEFINE_SPINLOCK(callback_lock); | |
11370 | +static DEFINE_RAW_SPINLOCK(callback_lock); | |
11371 | ||
11372 | static struct workqueue_struct *cpuset_migrate_mm_wq; | |
11373 | ||
11374 | @@ -907,9 +907,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) | |
11375 | continue; | |
11376 | rcu_read_unlock(); | |
11377 | ||
11378 | - spin_lock_irq(&callback_lock); | |
11379 | + raw_spin_lock_irq(&callback_lock); | |
11380 | cpumask_copy(cp->effective_cpus, new_cpus); | |
11381 | - spin_unlock_irq(&callback_lock); | |
11382 | + raw_spin_unlock_irq(&callback_lock); | |
11383 | ||
11384 | WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | |
11385 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); | |
11386 | @@ -974,9 +974,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |
11387 | if (retval < 0) | |
11388 | return retval; | |
11389 | ||
11390 | - spin_lock_irq(&callback_lock); | |
11391 | + raw_spin_lock_irq(&callback_lock); | |
11392 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); | |
11393 | - spin_unlock_irq(&callback_lock); | |
11394 | + raw_spin_unlock_irq(&callback_lock); | |
11395 | ||
11396 | /* use trialcs->cpus_allowed as a temp variable */ | |
11397 | update_cpumasks_hier(cs, trialcs->cpus_allowed); | |
11398 | @@ -1176,9 +1176,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |
11399 | continue; | |
11400 | rcu_read_unlock(); | |
11401 | ||
11402 | - spin_lock_irq(&callback_lock); | |
11403 | + raw_spin_lock_irq(&callback_lock); | |
11404 | cp->effective_mems = *new_mems; | |
11405 | - spin_unlock_irq(&callback_lock); | |
11406 | + raw_spin_unlock_irq(&callback_lock); | |
11407 | ||
11408 | WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | |
11409 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); | |
11410 | @@ -1246,9 +1246,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |
11411 | if (retval < 0) | |
11412 | goto done; | |
11413 | ||
11414 | - spin_lock_irq(&callback_lock); | |
11415 | + raw_spin_lock_irq(&callback_lock); | |
11416 | cs->mems_allowed = trialcs->mems_allowed; | |
11417 | - spin_unlock_irq(&callback_lock); | |
11418 | + raw_spin_unlock_irq(&callback_lock); | |
11419 | ||
11420 | /* use trialcs->mems_allowed as a temp variable */ | |
11421 | update_nodemasks_hier(cs, &trialcs->mems_allowed); | |
11422 | @@ -1339,9 +1339,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |
11423 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) | |
11424 | || (is_spread_page(cs) != is_spread_page(trialcs))); | |
11425 | ||
11426 | - spin_lock_irq(&callback_lock); | |
11427 | + raw_spin_lock_irq(&callback_lock); | |
11428 | cs->flags = trialcs->flags; | |
11429 | - spin_unlock_irq(&callback_lock); | |
11430 | + raw_spin_unlock_irq(&callback_lock); | |
11431 | ||
11432 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | |
11433 | rebuild_sched_domains_locked(); | |
11434 | @@ -1756,7 +1756,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |
11435 | cpuset_filetype_t type = seq_cft(sf)->private; | |
11436 | int ret = 0; | |
11437 | ||
11438 | - spin_lock_irq(&callback_lock); | |
11439 | + raw_spin_lock_irq(&callback_lock); | |
11440 | ||
11441 | switch (type) { | |
11442 | case FILE_CPULIST: | |
11443 | @@ -1775,7 +1775,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |
11444 | ret = -EINVAL; | |
11445 | } | |
11446 | ||
11447 | - spin_unlock_irq(&callback_lock); | |
11448 | + raw_spin_unlock_irq(&callback_lock); | |
11449 | return ret; | |
11450 | } | |
11451 | ||
11452 | @@ -1989,12 +1989,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |
11453 | ||
11454 | cpuset_inc(); | |
11455 | ||
11456 | - spin_lock_irq(&callback_lock); | |
11457 | + raw_spin_lock_irq(&callback_lock); | |
11458 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { | |
11459 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); | |
11460 | cs->effective_mems = parent->effective_mems; | |
11461 | } | |
11462 | - spin_unlock_irq(&callback_lock); | |
11463 | + raw_spin_unlock_irq(&callback_lock); | |
11464 | ||
11465 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) | |
11466 | goto out_unlock; | |
11467 | @@ -2021,12 +2021,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |
11468 | } | |
11469 | rcu_read_unlock(); | |
11470 | ||
11471 | - spin_lock_irq(&callback_lock); | |
11472 | + raw_spin_lock_irq(&callback_lock); | |
11473 | cs->mems_allowed = parent->mems_allowed; | |
11474 | cs->effective_mems = parent->mems_allowed; | |
11475 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); | |
11476 | cpumask_copy(cs->effective_cpus, parent->cpus_allowed); | |
11477 | - spin_unlock_irq(&callback_lock); | |
11478 | + raw_spin_unlock_irq(&callback_lock); | |
11479 | out_unlock: | |
11480 | mutex_unlock(&cpuset_mutex); | |
11481 | return 0; | |
11482 | @@ -2065,7 +2065,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) | |
11483 | static void cpuset_bind(struct cgroup_subsys_state *root_css) | |
11484 | { | |
11485 | mutex_lock(&cpuset_mutex); | |
11486 | - spin_lock_irq(&callback_lock); | |
11487 | + raw_spin_lock_irq(&callback_lock); | |
11488 | ||
11489 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { | |
11490 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); | |
11491 | @@ -2076,7 +2076,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) | |
11492 | top_cpuset.mems_allowed = top_cpuset.effective_mems; | |
11493 | } | |
11494 | ||
11495 | - spin_unlock_irq(&callback_lock); | |
11496 | + raw_spin_unlock_irq(&callback_lock); | |
11497 | mutex_unlock(&cpuset_mutex); | |
11498 | } | |
11499 | ||
11500 | @@ -2177,12 +2177,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, | |
11501 | { | |
11502 | bool is_empty; | |
11503 | ||
11504 | - spin_lock_irq(&callback_lock); | |
11505 | + raw_spin_lock_irq(&callback_lock); | |
11506 | cpumask_copy(cs->cpus_allowed, new_cpus); | |
11507 | cpumask_copy(cs->effective_cpus, new_cpus); | |
11508 | cs->mems_allowed = *new_mems; | |
11509 | cs->effective_mems = *new_mems; | |
11510 | - spin_unlock_irq(&callback_lock); | |
11511 | + raw_spin_unlock_irq(&callback_lock); | |
11512 | ||
11513 | /* | |
11514 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, | |
11515 | @@ -2219,10 +2219,10 @@ hotplug_update_tasks(struct cpuset *cs, | |
11516 | if (nodes_empty(*new_mems)) | |
11517 | *new_mems = parent_cs(cs)->effective_mems; | |
11518 | ||
11519 | - spin_lock_irq(&callback_lock); | |
11520 | + raw_spin_lock_irq(&callback_lock); | |
11521 | cpumask_copy(cs->effective_cpus, new_cpus); | |
11522 | cs->effective_mems = *new_mems; | |
11523 | - spin_unlock_irq(&callback_lock); | |
11524 | + raw_spin_unlock_irq(&callback_lock); | |
11525 | ||
11526 | if (cpus_updated) | |
11527 | update_tasks_cpumask(cs); | |
11528 | @@ -2308,21 +2308,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |
11529 | ||
11530 | /* synchronize cpus_allowed to cpu_active_mask */ | |
11531 | if (cpus_updated) { | |
11532 | - spin_lock_irq(&callback_lock); | |
11533 | + raw_spin_lock_irq(&callback_lock); | |
11534 | if (!on_dfl) | |
11535 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | |
11536 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); | |
11537 | - spin_unlock_irq(&callback_lock); | |
11538 | + raw_spin_unlock_irq(&callback_lock); | |
11539 | /* we don't mess with cpumasks of tasks in top_cpuset */ | |
11540 | } | |
11541 | ||
11542 | /* synchronize mems_allowed to N_MEMORY */ | |
11543 | if (mems_updated) { | |
11544 | - spin_lock_irq(&callback_lock); | |
11545 | + raw_spin_lock_irq(&callback_lock); | |
11546 | if (!on_dfl) | |
11547 | top_cpuset.mems_allowed = new_mems; | |
11548 | top_cpuset.effective_mems = new_mems; | |
11549 | - spin_unlock_irq(&callback_lock); | |
11550 | + raw_spin_unlock_irq(&callback_lock); | |
11551 | update_tasks_nodemask(&top_cpuset); | |
11552 | } | |
11553 | ||
11554 | @@ -2420,11 +2420,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | |
11555 | { | |
11556 | unsigned long flags; | |
11557 | ||
11558 | - spin_lock_irqsave(&callback_lock, flags); | |
11559 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
11560 | rcu_read_lock(); | |
11561 | guarantee_online_cpus(task_cs(tsk), pmask); | |
11562 | rcu_read_unlock(); | |
11563 | - spin_unlock_irqrestore(&callback_lock, flags); | |
11564 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
11565 | } | |
11566 | ||
11567 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) | |
11568 | @@ -2472,11 +2472,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) | |
11569 | nodemask_t mask; | |
11570 | unsigned long flags; | |
11571 | ||
11572 | - spin_lock_irqsave(&callback_lock, flags); | |
11573 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
11574 | rcu_read_lock(); | |
11575 | guarantee_online_mems(task_cs(tsk), &mask); | |
11576 | rcu_read_unlock(); | |
11577 | - spin_unlock_irqrestore(&callback_lock, flags); | |
11578 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
11579 | ||
11580 | return mask; | |
11581 | } | |
11582 | @@ -2568,14 +2568,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask) | |
11583 | return true; | |
11584 | ||
11585 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | |
11586 | - spin_lock_irqsave(&callback_lock, flags); | |
11587 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
11588 | ||
11589 | rcu_read_lock(); | |
11590 | cs = nearest_hardwall_ancestor(task_cs(current)); | |
11591 | allowed = node_isset(node, cs->mems_allowed); | |
11592 | rcu_read_unlock(); | |
11593 | ||
11594 | - spin_unlock_irqrestore(&callback_lock, flags); | |
11595 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
11596 | return allowed; | |
11597 | } | |
11598 | ||
11599 | diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c | |
11600 | index fc1ef736253c..83c666537a7a 100644 | |
11601 | --- a/kernel/debug/kdb/kdb_io.c | |
11602 | +++ b/kernel/debug/kdb/kdb_io.c | |
11603 | @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11604 | int linecount; | |
11605 | int colcount; | |
11606 | int logging, saved_loglevel = 0; | |
11607 | - int saved_trap_printk; | |
11608 | int got_printf_lock = 0; | |
11609 | int retlen = 0; | |
11610 | int fnd, len; | |
11611 | @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11612 | unsigned long uninitialized_var(flags); | |
11613 | ||
11614 | preempt_disable(); | |
11615 | - saved_trap_printk = kdb_trap_printk; | |
11616 | - kdb_trap_printk = 0; | |
11617 | ||
11618 | /* Serialize kdb_printf if multiple cpus try to write at once. | |
11619 | * But if any cpu goes recursive in kdb, just print the output, | |
11620 | @@ -855,7 +852,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11621 | } else { | |
11622 | __release(kdb_printf_lock); | |
11623 | } | |
11624 | - kdb_trap_printk = saved_trap_printk; | |
11625 | preempt_enable(); | |
11626 | return retlen; | |
11627 | } | |
11628 | @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...) | |
11629 | va_list ap; | |
11630 | int r; | |
11631 | ||
11632 | + kdb_trap_printk++; | |
11633 | va_start(ap, fmt); | |
11634 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); | |
11635 | va_end(ap); | |
11636 | + kdb_trap_printk--; | |
11637 | ||
11638 | return r; | |
11639 | } | |
11640 | diff --git a/kernel/events/core.c b/kernel/events/core.c | |
11641 | index 02c8421f8c01..3748cb7b2d6e 100644 | |
11642 | --- a/kernel/events/core.c | |
11643 | +++ b/kernel/events/core.c | |
11644 | @@ -1050,6 +1050,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) | |
11645 | raw_spin_lock_init(&cpuctx->hrtimer_lock); | |
11646 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); | |
11647 | timer->function = perf_mux_hrtimer_handler; | |
11648 | + timer->irqsafe = 1; | |
11649 | } | |
11650 | ||
11651 | static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) | |
11652 | @@ -8335,6 +8336,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) | |
11653 | ||
11654 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
11655 | hwc->hrtimer.function = perf_swevent_hrtimer; | |
11656 | + hwc->hrtimer.irqsafe = 1; | |
11657 | ||
11658 | /* | |
11659 | * Since hrtimers have a fixed rate, we can do a static freq->period | |
11660 | diff --git a/kernel/exit.c b/kernel/exit.c | |
11661 | index 3076f3089919..fb2ebcf3ca7c 100644 | |
11662 | --- a/kernel/exit.c | |
11663 | +++ b/kernel/exit.c | |
11664 | @@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk) | |
11665 | * Do this under ->siglock, we can race with another thread | |
11666 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | |
11667 | */ | |
11668 | - flush_sigqueue(&tsk->pending); | |
11669 | + flush_task_sigqueue(tsk); | |
11670 | tsk->sighand = NULL; | |
11671 | spin_unlock(&sighand->siglock); | |
11672 | ||
11673 | diff --git a/kernel/fork.c b/kernel/fork.c | |
11674 | index ba8a01564985..47784f8aed37 100644 | |
11675 | --- a/kernel/fork.c | |
11676 | +++ b/kernel/fork.c | |
11677 | @@ -76,6 +76,7 @@ | |
11678 | #include <linux/compiler.h> | |
11679 | #include <linux/sysctl.h> | |
11680 | #include <linux/kcov.h> | |
11681 | +#include <linux/kprobes.h> | |
11682 | ||
11683 | #include <asm/pgtable.h> | |
11684 | #include <asm/pgalloc.h> | |
11685 | @@ -376,13 +377,24 @@ static inline void put_signal_struct(struct signal_struct *sig) | |
11686 | if (atomic_dec_and_test(&sig->sigcnt)) | |
11687 | free_signal_struct(sig); | |
11688 | } | |
11689 | - | |
11690 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11691 | +static | |
11692 | +#endif | |
11693 | void __put_task_struct(struct task_struct *tsk) | |
11694 | { | |
11695 | WARN_ON(!tsk->exit_state); | |
11696 | WARN_ON(atomic_read(&tsk->usage)); | |
11697 | WARN_ON(tsk == current); | |
11698 | ||
11699 | + /* | |
11700 | + * Remove function-return probe instances associated with this | |
11701 | + * task and put them back on the free list. | |
11702 | + */ | |
11703 | + kprobe_flush_task(tsk); | |
11704 | + | |
11705 | + /* Task is done with its stack. */ | |
11706 | + put_task_stack(tsk); | |
11707 | + | |
11708 | cgroup_free(tsk); | |
11709 | task_numa_free(tsk); | |
11710 | security_task_free(tsk); | |
11711 | @@ -393,7 +405,18 @@ void __put_task_struct(struct task_struct *tsk) | |
11712 | if (!profile_handoff_task(tsk)) | |
11713 | free_task(tsk); | |
11714 | } | |
11715 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11716 | EXPORT_SYMBOL_GPL(__put_task_struct); | |
11717 | +#else | |
11718 | +void __put_task_struct_cb(struct rcu_head *rhp) | |
11719 | +{ | |
11720 | + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu); | |
11721 | + | |
11722 | + __put_task_struct(tsk); | |
11723 | + | |
11724 | +} | |
11725 | +EXPORT_SYMBOL_GPL(__put_task_struct_cb); | |
11726 | +#endif | |
11727 | ||
11728 | void __init __weak arch_task_cache_init(void) { } | |
11729 | ||
11730 | @@ -852,6 +875,19 @@ void __mmdrop(struct mm_struct *mm) | |
11731 | } | |
11732 | EXPORT_SYMBOL_GPL(__mmdrop); | |
11733 | ||
11734 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11735 | +/* | |
11736 | + * RCU callback for delayed mm drop. Not strictly rcu, but we don't | |
11737 | + * want another facility to make this work. | |
11738 | + */ | |
11739 | +void __mmdrop_delayed(struct rcu_head *rhp) | |
11740 | +{ | |
11741 | + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); | |
11742 | + | |
11743 | + __mmdrop(mm); | |
11744 | +} | |
11745 | +#endif | |
11746 | + | |
11747 | static inline void __mmput(struct mm_struct *mm) | |
11748 | { | |
11749 | VM_BUG_ON(atomic_read(&mm->mm_users)); | |
11750 | @@ -1426,6 +1462,9 @@ static void rt_mutex_init_task(struct task_struct *p) | |
11751 | */ | |
11752 | static void posix_cpu_timers_init(struct task_struct *tsk) | |
11753 | { | |
11754 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11755 | + tsk->posix_timer_list = NULL; | |
11756 | +#endif | |
11757 | tsk->cputime_expires.prof_exp = 0; | |
11758 | tsk->cputime_expires.virt_exp = 0; | |
11759 | tsk->cputime_expires.sched_exp = 0; | |
11760 | @@ -1552,6 +1591,7 @@ static __latent_entropy struct task_struct *copy_process( | |
11761 | spin_lock_init(&p->alloc_lock); | |
11762 | ||
11763 | init_sigpending(&p->pending); | |
11764 | + p->sigqueue_cache = NULL; | |
11765 | ||
11766 | p->utime = p->stime = p->gtime = 0; | |
11767 | p->utimescaled = p->stimescaled = 0; | |
11768 | diff --git a/kernel/futex.c b/kernel/futex.c | |
11769 | index 2c4be467fecd..064917c2d9a5 100644 | |
11770 | --- a/kernel/futex.c | |
11771 | +++ b/kernel/futex.c | |
11772 | @@ -904,7 +904,9 @@ void exit_pi_state_list(struct task_struct *curr) | |
11773 | * task still owns the PI-state: | |
11774 | */ | |
11775 | if (head->next != next) { | |
11776 | + raw_spin_unlock_irq(&curr->pi_lock); | |
11777 | spin_unlock(&hb->lock); | |
11778 | + raw_spin_lock_irq(&curr->pi_lock); | |
11779 | continue; | |
11780 | } | |
11781 | ||
11782 | @@ -1299,6 +1301,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
11783 | struct futex_pi_state *pi_state = this->pi_state; | |
11784 | u32 uninitialized_var(curval), newval; | |
11785 | WAKE_Q(wake_q); | |
11786 | + WAKE_Q(wake_sleeper_q); | |
11787 | bool deboost; | |
11788 | int ret = 0; | |
11789 | ||
11790 | @@ -1365,7 +1368,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
11791 | ||
11792 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
11793 | ||
11794 | - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); | |
11795 | + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, | |
11796 | + &wake_sleeper_q); | |
11797 | ||
11798 | /* | |
11799 | * First unlock HB so the waiter does not spin on it once he got woken | |
11800 | @@ -1373,8 +1377,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
11801 | * deboost first (and lose our higher priority), then the task might get | |
11802 | * scheduled away before the wake up can take place. | |
11803 | */ | |
11804 | - spin_unlock(&hb->lock); | |
11805 | + deboost |= spin_unlock_no_deboost(&hb->lock); | |
11806 | wake_up_q(&wake_q); | |
11807 | + wake_up_q_sleeper(&wake_sleeper_q); | |
11808 | if (deboost) | |
11809 | rt_mutex_adjust_prio(current); | |
11810 | ||
11811 | @@ -1924,6 +1929,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |
11812 | requeue_pi_wake_futex(this, &key2, hb2); | |
11813 | drop_count++; | |
11814 | continue; | |
11815 | + } else if (ret == -EAGAIN) { | |
11816 | + /* | |
11817 | + * Waiter was woken by timeout or | |
11818 | + * signal and has set pi_blocked_on to | |
11819 | + * PI_WAKEUP_INPROGRESS before we | |
11820 | + * tried to enqueue it on the rtmutex. | |
11821 | + */ | |
11822 | + this->pi_state = NULL; | |
11823 | + put_pi_state(pi_state); | |
11824 | + continue; | |
11825 | } else if (ret) { | |
11826 | /* | |
11827 | * rt_mutex_start_proxy_lock() detected a | |
11828 | @@ -2814,7 +2829,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
11829 | struct hrtimer_sleeper timeout, *to = NULL; | |
11830 | struct rt_mutex_waiter rt_waiter; | |
11831 | struct rt_mutex *pi_mutex = NULL; | |
11832 | - struct futex_hash_bucket *hb; | |
11833 | + struct futex_hash_bucket *hb, *hb2; | |
11834 | union futex_key key2 = FUTEX_KEY_INIT; | |
11835 | struct futex_q q = futex_q_init; | |
11836 | int res, ret; | |
11837 | @@ -2839,10 +2854,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
11838 | * The waiter is allocated on our stack, manipulated by the requeue | |
11839 | * code while we sleep on uaddr. | |
11840 | */ | |
11841 | - debug_rt_mutex_init_waiter(&rt_waiter); | |
11842 | - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); | |
11843 | - RB_CLEAR_NODE(&rt_waiter.tree_entry); | |
11844 | - rt_waiter.task = NULL; | |
11845 | + rt_mutex_init_waiter(&rt_waiter, false); | |
11846 | ||
11847 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); | |
11848 | if (unlikely(ret != 0)) | |
11849 | @@ -2873,20 +2885,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
11850 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ | |
11851 | futex_wait_queue_me(hb, &q, to); | |
11852 | ||
11853 | - spin_lock(&hb->lock); | |
11854 | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
11855 | - spin_unlock(&hb->lock); | |
11856 | - if (ret) | |
11857 | - goto out_put_keys; | |
11858 | + /* | |
11859 | + * On RT we must avoid races with requeue and trying to block | |
11860 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by | |
11861 | + * serializing access to pi_blocked_on with pi_lock. | |
11862 | + */ | |
11863 | + raw_spin_lock_irq(¤t->pi_lock); | |
11864 | + if (current->pi_blocked_on) { | |
11865 | + /* | |
11866 | + * We have been requeued or are in the process of | |
11867 | + * being requeued. | |
11868 | + */ | |
11869 | + raw_spin_unlock_irq(¤t->pi_lock); | |
11870 | + } else { | |
11871 | + /* | |
11872 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS | |
11873 | + * prevents a concurrent requeue from moving us to the | |
11874 | + * uaddr2 rtmutex. After that we can safely acquire | |
11875 | + * (and possibly block on) hb->lock. | |
11876 | + */ | |
11877 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; | |
11878 | + raw_spin_unlock_irq(¤t->pi_lock); | |
11879 | + | |
11880 | + spin_lock(&hb->lock); | |
11881 | + | |
11882 | + /* | |
11883 | + * Clean up pi_blocked_on. We might leak it otherwise | |
11884 | + * when we succeeded with the hb->lock in the fast | |
11885 | + * path. | |
11886 | + */ | |
11887 | + raw_spin_lock_irq(¤t->pi_lock); | |
11888 | + current->pi_blocked_on = NULL; | |
11889 | + raw_spin_unlock_irq(¤t->pi_lock); | |
11890 | + | |
11891 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
11892 | + spin_unlock(&hb->lock); | |
11893 | + if (ret) | |
11894 | + goto out_put_keys; | |
11895 | + } | |
11896 | ||
11897 | /* | |
11898 | - * In order for us to be here, we know our q.key == key2, and since | |
11899 | - * we took the hb->lock above, we also know that futex_requeue() has | |
11900 | - * completed and we no longer have to concern ourselves with a wakeup | |
11901 | - * race with the atomic proxy lock acquisition by the requeue code. The | |
11902 | - * futex_requeue dropped our key1 reference and incremented our key2 | |
11903 | - * reference count. | |
11904 | + * In order to be here, we have either been requeued, are in | |
11905 | + * the process of being requeued, or requeue successfully | |
11906 | + * acquired uaddr2 on our behalf. If pi_blocked_on was | |
11907 | + * non-null above, we may be racing with a requeue. Do not | |
11908 | + * rely on q->lock_ptr to be hb2->lock until after blocking on | |
11909 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 | |
11910 | + * reference and incremented our key2 reference count. | |
11911 | */ | |
11912 | + hb2 = hash_futex(&key2); | |
11913 | ||
11914 | /* Check if the requeue code acquired the second futex for us. */ | |
11915 | if (!q.rt_waiter) { | |
11916 | @@ -2895,14 +2942,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
11917 | * did a lock-steal - fix up the PI-state in that case. | |
11918 | */ | |
11919 | if (q.pi_state && (q.pi_state->owner != current)) { | |
11920 | - spin_lock(q.lock_ptr); | |
11921 | + spin_lock(&hb2->lock); | |
11922 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
11923 | ret = fixup_pi_state_owner(uaddr2, &q, current); | |
11924 | /* | |
11925 | * Drop the reference to the pi state which | |
11926 | * the requeue_pi() code acquired for us. | |
11927 | */ | |
11928 | put_pi_state(q.pi_state); | |
11929 | - spin_unlock(q.lock_ptr); | |
11930 | + spin_unlock(&hb2->lock); | |
11931 | } | |
11932 | } else { | |
11933 | /* | |
11934 | @@ -2915,7 +2963,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
11935 | ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); | |
11936 | debug_rt_mutex_free_waiter(&rt_waiter); | |
11937 | ||
11938 | - spin_lock(q.lock_ptr); | |
11939 | + spin_lock(&hb2->lock); | |
11940 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
11941 | /* | |
11942 | * Fixup the pi_state owner and possibly acquire the lock if we | |
11943 | * haven't already. | |
11944 | diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c | |
11945 | index d3f24905852c..f87aa8fdcc51 100644 | |
11946 | --- a/kernel/irq/handle.c | |
11947 | +++ b/kernel/irq/handle.c | |
11948 | @@ -181,10 +181,16 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |
11949 | { | |
11950 | irqreturn_t retval; | |
11951 | unsigned int flags = 0; | |
11952 | + struct pt_regs *regs = get_irq_regs(); | |
11953 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
11954 | ||
11955 | retval = __handle_irq_event_percpu(desc, &flags); | |
11956 | ||
11957 | - add_interrupt_randomness(desc->irq_data.irq, flags); | |
11958 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11959 | + desc->random_ip = ip; | |
11960 | +#else | |
11961 | + add_interrupt_randomness(desc->irq_data.irq, flags, ip); | |
11962 | +#endif | |
11963 | ||
11964 | if (!noirqdebug) | |
11965 | note_interrupt(desc, retval); | |
11966 | diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c | |
11967 | index 6b669593e7eb..e357bf6c59d5 100644 | |
11968 | --- a/kernel/irq/manage.c | |
11969 | +++ b/kernel/irq/manage.c | |
11970 | @@ -22,6 +22,7 @@ | |
11971 | #include "internals.h" | |
11972 | ||
11973 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
11974 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
11975 | __read_mostly bool force_irqthreads; | |
11976 | ||
11977 | static int __init setup_forced_irqthreads(char *arg) | |
11978 | @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg) | |
11979 | return 0; | |
11980 | } | |
11981 | early_param("threadirqs", setup_forced_irqthreads); | |
11982 | +# endif | |
11983 | #endif | |
11984 | ||
11985 | static void __synchronize_hardirq(struct irq_desc *desc) | |
11986 | @@ -233,7 +235,12 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, | |
11987 | ||
11988 | if (desc->affinity_notify) { | |
11989 | kref_get(&desc->affinity_notify->kref); | |
11990 | + | |
11991 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11992 | + swork_queue(&desc->affinity_notify->swork); | |
11993 | +#else | |
11994 | schedule_work(&desc->affinity_notify->work); | |
11995 | +#endif | |
11996 | } | |
11997 | irqd_set(data, IRQD_AFFINITY_SET); | |
11998 | ||
11999 | @@ -271,10 +278,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | |
12000 | } | |
12001 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | |
12002 | ||
12003 | -static void irq_affinity_notify(struct work_struct *work) | |
12004 | +static void _irq_affinity_notify(struct irq_affinity_notify *notify) | |
12005 | { | |
12006 | - struct irq_affinity_notify *notify = | |
12007 | - container_of(work, struct irq_affinity_notify, work); | |
12008 | struct irq_desc *desc = irq_to_desc(notify->irq); | |
12009 | cpumask_var_t cpumask; | |
12010 | unsigned long flags; | |
12011 | @@ -296,6 +301,35 @@ static void irq_affinity_notify(struct work_struct *work) | |
12012 | kref_put(¬ify->kref, notify->release); | |
12013 | } | |
12014 | ||
12015 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12016 | +static void init_helper_thread(void) | |
12017 | +{ | |
12018 | + static int init_sworker_once; | |
12019 | + | |
12020 | + if (init_sworker_once) | |
12021 | + return; | |
12022 | + if (WARN_ON(swork_get())) | |
12023 | + return; | |
12024 | + init_sworker_once = 1; | |
12025 | +} | |
12026 | + | |
12027 | +static void irq_affinity_notify(struct swork_event *swork) | |
12028 | +{ | |
12029 | + struct irq_affinity_notify *notify = | |
12030 | + container_of(swork, struct irq_affinity_notify, swork); | |
12031 | + _irq_affinity_notify(notify); | |
12032 | +} | |
12033 | + | |
12034 | +#else | |
12035 | + | |
12036 | +static void irq_affinity_notify(struct work_struct *work) | |
12037 | +{ | |
12038 | + struct irq_affinity_notify *notify = | |
12039 | + container_of(work, struct irq_affinity_notify, work); | |
12040 | + _irq_affinity_notify(notify); | |
12041 | +} | |
12042 | +#endif | |
12043 | + | |
12044 | /** | |
12045 | * irq_set_affinity_notifier - control notification of IRQ affinity changes | |
12046 | * @irq: Interrupt for which to enable/disable notification | |
12047 | @@ -324,7 +358,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) | |
12048 | if (notify) { | |
12049 | notify->irq = irq; | |
12050 | kref_init(¬ify->kref); | |
12051 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12052 | + INIT_SWORK(¬ify->swork, irq_affinity_notify); | |
12053 | + init_helper_thread(); | |
12054 | +#else | |
12055 | INIT_WORK(¬ify->work, irq_affinity_notify); | |
12056 | +#endif | |
12057 | } | |
12058 | ||
12059 | raw_spin_lock_irqsave(&desc->lock, flags); | |
12060 | @@ -879,7 +918,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | |
12061 | local_bh_disable(); | |
12062 | ret = action->thread_fn(action->irq, action->dev_id); | |
12063 | irq_finalize_oneshot(desc, action); | |
12064 | - local_bh_enable(); | |
12065 | + /* | |
12066 | + * Interrupts which have real time requirements can be set up | |
12067 | + * to avoid softirq processing in the thread handler. This is | |
12068 | + * safe as these interrupts do not raise soft interrupts. | |
12069 | + */ | |
12070 | + if (irq_settings_no_softirq_call(desc)) | |
12071 | + _local_bh_enable(); | |
12072 | + else | |
12073 | + local_bh_enable(); | |
12074 | return ret; | |
12075 | } | |
12076 | ||
12077 | @@ -976,6 +1023,12 @@ static int irq_thread(void *data) | |
12078 | if (action_ret == IRQ_WAKE_THREAD) | |
12079 | irq_wake_secondary(desc, action); | |
12080 | ||
12081 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12082 | + migrate_disable(); | |
12083 | + add_interrupt_randomness(action->irq, 0, | |
12084 | + desc->random_ip ^ (unsigned long) action); | |
12085 | + migrate_enable(); | |
12086 | +#endif | |
12087 | wake_threads_waitq(desc); | |
12088 | } | |
12089 | ||
12090 | @@ -1336,6 +1389,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |
12091 | irqd_set(&desc->irq_data, IRQD_NO_BALANCING); | |
12092 | } | |
12093 | ||
12094 | + if (new->flags & IRQF_NO_SOFTIRQ_CALL) | |
12095 | + irq_settings_set_no_softirq_call(desc); | |
12096 | + | |
12097 | /* Set default affinity mask once everything is setup */ | |
12098 | setup_affinity(desc, mask); | |
12099 | ||
12100 | @@ -2061,7 +2117,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); | |
12101 | * This call sets the internal irqchip state of an interrupt, | |
12102 | * depending on the value of @which. | |
12103 | * | |
12104 | - * This function should be called with preemption disabled if the | |
12105 | + * This function should be called with migration disabled if the | |
12106 | * interrupt controller has per-cpu registers. | |
12107 | */ | |
12108 | int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
12109 | diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h | |
12110 | index 320579d89091..2df2d4445b1e 100644 | |
12111 | --- a/kernel/irq/settings.h | |
12112 | +++ b/kernel/irq/settings.h | |
12113 | @@ -16,6 +16,7 @@ enum { | |
12114 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | |
12115 | _IRQ_IS_POLLED = IRQ_IS_POLLED, | |
12116 | _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, | |
12117 | + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL, | |
12118 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | |
12119 | }; | |
12120 | ||
12121 | @@ -30,6 +31,7 @@ enum { | |
12122 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | |
12123 | #define IRQ_IS_POLLED GOT_YOU_MORON | |
12124 | #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON | |
12125 | +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON | |
12126 | #undef IRQF_MODIFY_MASK | |
12127 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | |
12128 | ||
12129 | @@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) | |
12130 | desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); | |
12131 | } | |
12132 | ||
12133 | +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc) | |
12134 | +{ | |
12135 | + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL; | |
12136 | +} | |
12137 | + | |
12138 | +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc) | |
12139 | +{ | |
12140 | + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL; | |
12141 | +} | |
12142 | + | |
12143 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) | |
12144 | { | |
12145 | return desc->status_use_accessors & _IRQ_PER_CPU; | |
12146 | diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c | |
12147 | index 5707f97a3e6a..73f38dc7a7fb 100644 | |
12148 | --- a/kernel/irq/spurious.c | |
12149 | +++ b/kernel/irq/spurious.c | |
12150 | @@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); | |
12151 | ||
12152 | static int __init irqfixup_setup(char *str) | |
12153 | { | |
12154 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12155 | + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
12156 | + return 1; | |
12157 | +#endif | |
12158 | irqfixup = 1; | |
12159 | printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); | |
12160 | printk(KERN_WARNING "This may impact system performance.\n"); | |
12161 | @@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644); | |
12162 | ||
12163 | static int __init irqpoll_setup(char *str) | |
12164 | { | |
12165 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12166 | + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
12167 | + return 1; | |
12168 | +#endif | |
12169 | irqfixup = 2; | |
12170 | printk(KERN_WARNING "Misrouted IRQ fixup and polling support " | |
12171 | "enabled\n"); | |
12172 | diff --git a/kernel/irq_work.c b/kernel/irq_work.c | |
12173 | index bcf107ce0854..2899ba0d23d1 100644 | |
12174 | --- a/kernel/irq_work.c | |
12175 | +++ b/kernel/irq_work.c | |
12176 | @@ -17,6 +17,7 @@ | |
12177 | #include <linux/cpu.h> | |
12178 | #include <linux/notifier.h> | |
12179 | #include <linux/smp.h> | |
12180 | +#include <linux/interrupt.h> | |
12181 | #include <asm/processor.h> | |
12182 | ||
12183 | ||
12184 | @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void) | |
12185 | */ | |
12186 | bool irq_work_queue_on(struct irq_work *work, int cpu) | |
12187 | { | |
12188 | + struct llist_head *list; | |
12189 | + | |
12190 | /* All work should have been flushed before going offline */ | |
12191 | WARN_ON_ONCE(cpu_is_offline(cpu)); | |
12192 | ||
12193 | @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) | |
12194 | if (!irq_work_claim(work)) | |
12195 | return false; | |
12196 | ||
12197 | - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | |
12198 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) | |
12199 | + list = &per_cpu(lazy_list, cpu); | |
12200 | + else | |
12201 | + list = &per_cpu(raised_list, cpu); | |
12202 | + | |
12203 | + if (llist_add(&work->llnode, list)) | |
12204 | arch_send_call_function_single_ipi(cpu); | |
12205 | ||
12206 | return true; | |
12207 | @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on); | |
12208 | /* Enqueue the irq work @work on the current CPU */ | |
12209 | bool irq_work_queue(struct irq_work *work) | |
12210 | { | |
12211 | + struct llist_head *list; | |
12212 | + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
12213 | + | |
12214 | /* Only queue if not already pending */ | |
12215 | if (!irq_work_claim(work)) | |
12216 | return false; | |
12217 | @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work) | |
12218 | /* Queue the entry and raise the IPI if needed. */ | |
12219 | preempt_disable(); | |
12220 | ||
12221 | - /* If the work is "lazy", handle it from next tick if any */ | |
12222 | - if (work->flags & IRQ_WORK_LAZY) { | |
12223 | - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && | |
12224 | - tick_nohz_tick_stopped()) | |
12225 | - arch_irq_work_raise(); | |
12226 | - } else { | |
12227 | - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) | |
12228 | + lazy_work = work->flags & IRQ_WORK_LAZY; | |
12229 | + | |
12230 | + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) | |
12231 | + list = this_cpu_ptr(&lazy_list); | |
12232 | + else | |
12233 | + list = this_cpu_ptr(&raised_list); | |
12234 | + | |
12235 | + if (llist_add(&work->llnode, list)) { | |
12236 | + if (!lazy_work || tick_nohz_tick_stopped()) | |
12237 | arch_irq_work_raise(); | |
12238 | } | |
12239 | ||
12240 | @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void) | |
12241 | raised = this_cpu_ptr(&raised_list); | |
12242 | lazy = this_cpu_ptr(&lazy_list); | |
12243 | ||
12244 | - if (llist_empty(raised) || arch_irq_work_has_interrupt()) | |
12245 | - if (llist_empty(lazy)) | |
12246 | - return false; | |
12247 | + if (llist_empty(raised) && llist_empty(lazy)) | |
12248 | + return false; | |
12249 | ||
12250 | /* All work should have been flushed before going offline */ | |
12251 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | |
12252 | @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list) | |
12253 | struct irq_work *work; | |
12254 | struct llist_node *llnode; | |
12255 | ||
12256 | - BUG_ON(!irqs_disabled()); | |
12257 | + BUG_ON_NONRT(!irqs_disabled()); | |
12258 | ||
12259 | if (llist_empty(list)) | |
12260 | return; | |
12261 | @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list) | |
12262 | void irq_work_run(void) | |
12263 | { | |
12264 | irq_work_run_list(this_cpu_ptr(&raised_list)); | |
12265 | - irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
12266 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { | |
12267 | + /* | |
12268 | + * NOTE: we raise softirq via IPI for safety, | |
12269 | + * and execute in irq_work_tick() to move the | |
12270 | + * overhead from hard to soft irq context. | |
12271 | + */ | |
12272 | + if (!llist_empty(this_cpu_ptr(&lazy_list))) | |
12273 | + raise_softirq(TIMER_SOFTIRQ); | |
12274 | + } else | |
12275 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
12276 | } | |
12277 | EXPORT_SYMBOL_GPL(irq_work_run); | |
12278 | ||
12279 | @@ -179,8 +200,17 @@ void irq_work_tick(void) | |
12280 | ||
12281 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) | |
12282 | irq_work_run_list(raised); | |
12283 | + | |
12284 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) | |
12285 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
12286 | +} | |
12287 | + | |
12288 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
12289 | +void irq_work_tick_soft(void) | |
12290 | +{ | |
12291 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
12292 | } | |
12293 | +#endif | |
12294 | ||
12295 | /* | |
12296 | * Synchronize against the irq_work @entry, ensures the entry is not | |
12297 | diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c | |
12298 | index ee1bc1bb8feb..ddef07958840 100644 | |
12299 | --- a/kernel/ksysfs.c | |
12300 | +++ b/kernel/ksysfs.c | |
12301 | @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo); | |
12302 | ||
12303 | #endif /* CONFIG_KEXEC_CORE */ | |
12304 | ||
12305 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
12306 | +static ssize_t realtime_show(struct kobject *kobj, | |
12307 | + struct kobj_attribute *attr, char *buf) | |
12308 | +{ | |
12309 | + return sprintf(buf, "%d\n", 1); | |
12310 | +} | |
12311 | +KERNEL_ATTR_RO(realtime); | |
12312 | +#endif | |
12313 | + | |
12314 | /* whether file capabilities are enabled */ | |
12315 | static ssize_t fscaps_show(struct kobject *kobj, | |
12316 | struct kobj_attribute *attr, char *buf) | |
12317 | @@ -225,6 +234,9 @@ static struct attribute * kernel_attrs[] = { | |
12318 | &rcu_expedited_attr.attr, | |
12319 | &rcu_normal_attr.attr, | |
12320 | #endif | |
12321 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12322 | + &realtime_attr.attr, | |
12323 | +#endif | |
12324 | NULL | |
12325 | }; | |
12326 | ||
12327 | diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile | |
12328 | index 6f88e352cd4f..5e27fb1079e7 100644 | |
12329 | --- a/kernel/locking/Makefile | |
12330 | +++ b/kernel/locking/Makefile | |
12331 | @@ -2,7 +2,7 @@ | |
12332 | # and is generally not a function of system call inputs. | |
12333 | KCOV_INSTRUMENT := n | |
12334 | ||
12335 | -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o | |
12336 | +obj-y += semaphore.o percpu-rwsem.o | |
12337 | ||
12338 | ifdef CONFIG_FUNCTION_TRACER | |
12339 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | |
12340 | @@ -11,7 +11,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) | |
12341 | CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) | |
12342 | endif | |
12343 | ||
12344 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
12345 | +obj-y += mutex.o | |
12346 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | |
12347 | +obj-y += rwsem.o | |
12348 | +endif | |
12349 | obj-$(CONFIG_LOCKDEP) += lockdep.o | |
12350 | ifeq ($(CONFIG_PROC_FS),y) | |
12351 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | |
12352 | @@ -24,7 +28,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |
12353 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | |
12354 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |
12355 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |
12356 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
12357 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | |
12358 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | |
12359 | +endif | |
12360 | +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o | |
12361 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o | |
12362 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | |
12363 | diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c | |
12364 | index 4d7ffc0a0d00..9e52009c192e 100644 | |
12365 | --- a/kernel/locking/lockdep.c | |
12366 | +++ b/kernel/locking/lockdep.c | |
12367 | @@ -3689,6 +3689,7 @@ static void check_flags(unsigned long flags) | |
12368 | } | |
12369 | } | |
12370 | ||
12371 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12372 | /* | |
12373 | * We dont accurately track softirq state in e.g. | |
12374 | * hardirq contexts (such as on 4KSTACKS), so only | |
12375 | @@ -3703,6 +3704,7 @@ static void check_flags(unsigned long flags) | |
12376 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); | |
12377 | } | |
12378 | } | |
12379 | +#endif | |
12380 | ||
12381 | if (!debug_locks) | |
12382 | print_irqtrace_events(current); | |
12383 | diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c | |
12384 | index f8c5af52a131..788068773e61 100644 | |
12385 | --- a/kernel/locking/locktorture.c | |
12386 | +++ b/kernel/locking/locktorture.c | |
12387 | @@ -26,7 +26,6 @@ | |
12388 | #include <linux/kthread.h> | |
12389 | #include <linux/sched/rt.h> | |
12390 | #include <linux/spinlock.h> | |
12391 | -#include <linux/rwlock.h> | |
12392 | #include <linux/mutex.h> | |
12393 | #include <linux/rwsem.h> | |
12394 | #include <linux/smp.h> | |
12395 | diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c | |
12396 | index ce182599cf2e..2ad3a1e8344c 100644 | |
12397 | --- a/kernel/locking/percpu-rwsem.c | |
12398 | +++ b/kernel/locking/percpu-rwsem.c | |
12399 | @@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, | |
12400 | /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ | |
12401 | rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); | |
12402 | __init_rwsem(&sem->rw_sem, name, rwsem_key); | |
12403 | - init_waitqueue_head(&sem->writer); | |
12404 | + init_swait_queue_head(&sem->writer); | |
12405 | sem->readers_block = 0; | |
12406 | return 0; | |
12407 | } | |
12408 | @@ -103,7 +103,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem) | |
12409 | __this_cpu_dec(*sem->read_count); | |
12410 | ||
12411 | /* Prod writer to recheck readers_active */ | |
12412 | - wake_up(&sem->writer); | |
12413 | + swake_up(&sem->writer); | |
12414 | } | |
12415 | EXPORT_SYMBOL_GPL(__percpu_up_read); | |
12416 | ||
12417 | @@ -160,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem) | |
12418 | */ | |
12419 | ||
12420 | /* Wait for all now active readers to complete. */ | |
12421 | - wait_event(sem->writer, readers_active_check(sem)); | |
12422 | + swait_event(sem->writer, readers_active_check(sem)); | |
12423 | } | |
12424 | EXPORT_SYMBOL_GPL(percpu_down_write); | |
12425 | ||
12426 | diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c | |
12427 | new file mode 100644 | |
12428 | index 000000000000..665754c00e1e | |
12429 | --- /dev/null | |
12430 | +++ b/kernel/locking/rt.c | |
12431 | @@ -0,0 +1,498 @@ | |
12432 | +/* | |
12433 | + * kernel/rt.c | |
12434 | + * | |
12435 | + * Real-Time Preemption Support | |
12436 | + * | |
12437 | + * started by Ingo Molnar: | |
12438 | + * | |
12439 | + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
12440 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
12441 | + * | |
12442 | + * historic credit for proving that Linux spinlocks can be implemented via | |
12443 | + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow | |
12444 | + * and others) who prototyped it on 2.4 and did lots of comparative | |
12445 | + * research and analysis; TimeSys, for proving that you can implement a | |
12446 | + * fully preemptible kernel via the use of IRQ threading and mutexes; | |
12447 | + * Bill Huey for persuasively arguing on lkml that the mutex model is the | |
12448 | + * right one; and to MontaVista, who ported pmutexes to 2.6. | |
12449 | + * | |
12450 | + * This code is a from-scratch implementation and is not based on pmutexes, | |
12451 | + * but the idea of converting spinlocks to mutexes is used here too. | |
12452 | + * | |
12453 | + * lock debugging, locking tree, deadlock detection: | |
12454 | + * | |
12455 | + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey | |
12456 | + * Released under the General Public License (GPL). | |
12457 | + * | |
12458 | + * Includes portions of the generic R/W semaphore implementation from: | |
12459 | + * | |
12460 | + * Copyright (c) 2001 David Howells (dhowells@redhat.com). | |
12461 | + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | |
12462 | + * - Derived also from comments by Linus | |
12463 | + * | |
12464 | + * Pending ownership of locks and ownership stealing: | |
12465 | + * | |
12466 | + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt | |
12467 | + * | |
12468 | + * (also by Steven Rostedt) | |
12469 | + * - Converted single pi_lock to individual task locks. | |
12470 | + * | |
12471 | + * By Esben Nielsen: | |
12472 | + * Doing priority inheritance with help of the scheduler. | |
12473 | + * | |
12474 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
12475 | + * - major rework based on Esben Nielsens initial patch | |
12476 | + * - replaced thread_info references by task_struct refs | |
12477 | + * - removed task->pending_owner dependency | |
12478 | + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks | |
12479 | + * in the scheduler return path as discussed with Steven Rostedt | |
12480 | + * | |
12481 | + * Copyright (C) 2006, Kihon Technologies Inc. | |
12482 | + * Steven Rostedt <rostedt@goodmis.org> | |
12483 | + * - debugged and patched Thomas Gleixner's rework. | |
12484 | + * - added back the cmpxchg to the rework. | |
12485 | + * - turned atomic require back on for SMP. | |
12486 | + */ | |
12487 | + | |
12488 | +#include <linux/spinlock.h> | |
12489 | +#include <linux/rtmutex.h> | |
12490 | +#include <linux/sched.h> | |
12491 | +#include <linux/delay.h> | |
12492 | +#include <linux/module.h> | |
12493 | +#include <linux/kallsyms.h> | |
12494 | +#include <linux/syscalls.h> | |
12495 | +#include <linux/interrupt.h> | |
12496 | +#include <linux/plist.h> | |
12497 | +#include <linux/fs.h> | |
12498 | +#include <linux/futex.h> | |
12499 | +#include <linux/hrtimer.h> | |
12500 | + | |
12501 | +#include "rtmutex_common.h" | |
12502 | + | |
12503 | +/* | |
12504 | + * struct mutex functions | |
12505 | + */ | |
12506 | +void __mutex_do_init(struct mutex *mutex, const char *name, | |
12507 | + struct lock_class_key *key) | |
12508 | +{ | |
12509 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12510 | + /* | |
12511 | + * Make sure we are not reinitializing a held lock: | |
12512 | + */ | |
12513 | + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); | |
12514 | + lockdep_init_map(&mutex->dep_map, name, key, 0); | |
12515 | +#endif | |
12516 | + mutex->lock.save_state = 0; | |
12517 | +} | |
12518 | +EXPORT_SYMBOL(__mutex_do_init); | |
12519 | + | |
12520 | +void __lockfunc _mutex_lock(struct mutex *lock) | |
12521 | +{ | |
12522 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12523 | + rt_mutex_lock(&lock->lock); | |
12524 | +} | |
12525 | +EXPORT_SYMBOL(_mutex_lock); | |
12526 | + | |
12527 | +int __lockfunc _mutex_lock_interruptible(struct mutex *lock) | |
12528 | +{ | |
12529 | + int ret; | |
12530 | + | |
12531 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12532 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
12533 | + if (ret) | |
12534 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12535 | + return ret; | |
12536 | +} | |
12537 | +EXPORT_SYMBOL(_mutex_lock_interruptible); | |
12538 | + | |
12539 | +int __lockfunc _mutex_lock_killable(struct mutex *lock) | |
12540 | +{ | |
12541 | + int ret; | |
12542 | + | |
12543 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
12544 | + ret = rt_mutex_lock_killable(&lock->lock); | |
12545 | + if (ret) | |
12546 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12547 | + return ret; | |
12548 | +} | |
12549 | +EXPORT_SYMBOL(_mutex_lock_killable); | |
12550 | + | |
12551 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12552 | +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) | |
12553 | +{ | |
12554 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
12555 | + rt_mutex_lock(&lock->lock); | |
12556 | +} | |
12557 | +EXPORT_SYMBOL(_mutex_lock_nested); | |
12558 | + | |
12559 | +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | |
12560 | +{ | |
12561 | + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); | |
12562 | + rt_mutex_lock(&lock->lock); | |
12563 | +} | |
12564 | +EXPORT_SYMBOL(_mutex_lock_nest_lock); | |
12565 | + | |
12566 | +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) | |
12567 | +{ | |
12568 | + int ret; | |
12569 | + | |
12570 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
12571 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
12572 | + if (ret) | |
12573 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12574 | + return ret; | |
12575 | +} | |
12576 | +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); | |
12577 | + | |
12578 | +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) | |
12579 | +{ | |
12580 | + int ret; | |
12581 | + | |
12582 | + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
12583 | + ret = rt_mutex_lock_killable(&lock->lock); | |
12584 | + if (ret) | |
12585 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12586 | + return ret; | |
12587 | +} | |
12588 | +EXPORT_SYMBOL(_mutex_lock_killable_nested); | |
12589 | +#endif | |
12590 | + | |
12591 | +int __lockfunc _mutex_trylock(struct mutex *lock) | |
12592 | +{ | |
12593 | + int ret = rt_mutex_trylock(&lock->lock); | |
12594 | + | |
12595 | + if (ret) | |
12596 | + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
12597 | + | |
12598 | + return ret; | |
12599 | +} | |
12600 | +EXPORT_SYMBOL(_mutex_trylock); | |
12601 | + | |
12602 | +void __lockfunc _mutex_unlock(struct mutex *lock) | |
12603 | +{ | |
12604 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
12605 | + rt_mutex_unlock(&lock->lock); | |
12606 | +} | |
12607 | +EXPORT_SYMBOL(_mutex_unlock); | |
12608 | + | |
12609 | +/* | |
12610 | + * rwlock_t functions | |
12611 | + */ | |
12612 | +int __lockfunc rt_write_trylock(rwlock_t *rwlock) | |
12613 | +{ | |
12614 | + int ret; | |
12615 | + | |
12616 | + migrate_disable(); | |
12617 | + ret = rt_mutex_trylock(&rwlock->lock); | |
12618 | + if (ret) | |
12619 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
12620 | + else | |
12621 | + migrate_enable(); | |
12622 | + | |
12623 | + return ret; | |
12624 | +} | |
12625 | +EXPORT_SYMBOL(rt_write_trylock); | |
12626 | + | |
12627 | +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) | |
12628 | +{ | |
12629 | + int ret; | |
12630 | + | |
12631 | + *flags = 0; | |
12632 | + ret = rt_write_trylock(rwlock); | |
12633 | + return ret; | |
12634 | +} | |
12635 | +EXPORT_SYMBOL(rt_write_trylock_irqsave); | |
12636 | + | |
12637 | +int __lockfunc rt_read_trylock(rwlock_t *rwlock) | |
12638 | +{ | |
12639 | + struct rt_mutex *lock = &rwlock->lock; | |
12640 | + int ret = 1; | |
12641 | + | |
12642 | + /* | |
12643 | + * recursive read locks succeed when current owns the lock, | |
12644 | + * but not when read_depth == 0 which means that the lock is | |
12645 | + * write locked. | |
12646 | + */ | |
12647 | + if (rt_mutex_owner(lock) != current) { | |
12648 | + migrate_disable(); | |
12649 | + ret = rt_mutex_trylock(lock); | |
12650 | + if (ret) | |
12651 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
12652 | + else | |
12653 | + migrate_enable(); | |
12654 | + | |
12655 | + } else if (!rwlock->read_depth) { | |
12656 | + ret = 0; | |
12657 | + } | |
12658 | + | |
12659 | + if (ret) | |
12660 | + rwlock->read_depth++; | |
12661 | + | |
12662 | + return ret; | |
12663 | +} | |
12664 | +EXPORT_SYMBOL(rt_read_trylock); | |
12665 | + | |
12666 | +void __lockfunc rt_write_lock(rwlock_t *rwlock) | |
12667 | +{ | |
12668 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
12669 | + __rt_spin_lock(&rwlock->lock); | |
12670 | +} | |
12671 | +EXPORT_SYMBOL(rt_write_lock); | |
12672 | + | |
12673 | +void __lockfunc rt_read_lock(rwlock_t *rwlock) | |
12674 | +{ | |
12675 | + struct rt_mutex *lock = &rwlock->lock; | |
12676 | + | |
12677 | + | |
12678 | + /* | |
12679 | + * recursive read locks succeed when current owns the lock | |
12680 | + */ | |
12681 | + if (rt_mutex_owner(lock) != current) { | |
12682 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
12683 | + __rt_spin_lock(lock); | |
12684 | + } | |
12685 | + rwlock->read_depth++; | |
12686 | +} | |
12687 | + | |
12688 | +EXPORT_SYMBOL(rt_read_lock); | |
12689 | + | |
12690 | +void __lockfunc rt_write_unlock(rwlock_t *rwlock) | |
12691 | +{ | |
12692 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
12693 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
12694 | + __rt_spin_unlock(&rwlock->lock); | |
12695 | + migrate_enable(); | |
12696 | +} | |
12697 | +EXPORT_SYMBOL(rt_write_unlock); | |
12698 | + | |
12699 | +void __lockfunc rt_read_unlock(rwlock_t *rwlock) | |
12700 | +{ | |
12701 | + /* Release the lock only when read_depth is down to 0 */ | |
12702 | + if (--rwlock->read_depth == 0) { | |
12703 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
12704 | + __rt_spin_unlock(&rwlock->lock); | |
12705 | + migrate_enable(); | |
12706 | + } | |
12707 | +} | |
12708 | +EXPORT_SYMBOL(rt_read_unlock); | |
12709 | + | |
12710 | +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock) | |
12711 | +{ | |
12712 | + rt_write_lock(rwlock); | |
12713 | + | |
12714 | + return 0; | |
12715 | +} | |
12716 | +EXPORT_SYMBOL(rt_write_lock_irqsave); | |
12717 | + | |
12718 | +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock) | |
12719 | +{ | |
12720 | + rt_read_lock(rwlock); | |
12721 | + | |
12722 | + return 0; | |
12723 | +} | |
12724 | +EXPORT_SYMBOL(rt_read_lock_irqsave); | |
12725 | + | |
12726 | +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) | |
12727 | +{ | |
12728 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12729 | + /* | |
12730 | + * Make sure we are not reinitializing a held lock: | |
12731 | + */ | |
12732 | + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); | |
12733 | + lockdep_init_map(&rwlock->dep_map, name, key, 0); | |
12734 | +#endif | |
12735 | + rwlock->lock.save_state = 1; | |
12736 | + rwlock->read_depth = 0; | |
12737 | +} | |
12738 | +EXPORT_SYMBOL(__rt_rwlock_init); | |
12739 | + | |
12740 | +/* | |
12741 | + * rw_semaphores | |
12742 | + */ | |
12743 | + | |
12744 | +void rt_up_write(struct rw_semaphore *rwsem) | |
12745 | +{ | |
12746 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12747 | + rt_mutex_unlock(&rwsem->lock); | |
12748 | +} | |
12749 | +EXPORT_SYMBOL(rt_up_write); | |
12750 | + | |
12751 | +void __rt_up_read(struct rw_semaphore *rwsem) | |
12752 | +{ | |
12753 | + if (--rwsem->read_depth == 0) | |
12754 | + rt_mutex_unlock(&rwsem->lock); | |
12755 | +} | |
12756 | + | |
12757 | +void rt_up_read(struct rw_semaphore *rwsem) | |
12758 | +{ | |
12759 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12760 | + __rt_up_read(rwsem); | |
12761 | +} | |
12762 | +EXPORT_SYMBOL(rt_up_read); | |
12763 | + | |
12764 | +/* | |
12765 | + * downgrade a write lock into a read lock | |
12766 | + * - just wake up any readers at the front of the queue | |
12767 | + */ | |
12768 | +void rt_downgrade_write(struct rw_semaphore *rwsem) | |
12769 | +{ | |
12770 | + BUG_ON(rt_mutex_owner(&rwsem->lock) != current); | |
12771 | + rwsem->read_depth = 1; | |
12772 | +} | |
12773 | +EXPORT_SYMBOL(rt_downgrade_write); | |
12774 | + | |
12775 | +int rt_down_write_trylock(struct rw_semaphore *rwsem) | |
12776 | +{ | |
12777 | + int ret = rt_mutex_trylock(&rwsem->lock); | |
12778 | + | |
12779 | + if (ret) | |
12780 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
12781 | + return ret; | |
12782 | +} | |
12783 | +EXPORT_SYMBOL(rt_down_write_trylock); | |
12784 | + | |
12785 | +void rt_down_write(struct rw_semaphore *rwsem) | |
12786 | +{ | |
12787 | + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); | |
12788 | + rt_mutex_lock(&rwsem->lock); | |
12789 | +} | |
12790 | +EXPORT_SYMBOL(rt_down_write); | |
12791 | + | |
12792 | +int rt_down_write_killable(struct rw_semaphore *rwsem) | |
12793 | +{ | |
12794 | + int ret; | |
12795 | + | |
12796 | + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); | |
12797 | + ret = rt_mutex_lock_killable(&rwsem->lock); | |
12798 | + if (ret) | |
12799 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12800 | + return ret; | |
12801 | +} | |
12802 | +EXPORT_SYMBOL(rt_down_write_killable); | |
12803 | + | |
12804 | +int rt_down_write_killable_nested(struct rw_semaphore *rwsem, int subclass) | |
12805 | +{ | |
12806 | + int ret; | |
12807 | + | |
12808 | + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
12809 | + ret = rt_mutex_lock_killable(&rwsem->lock); | |
12810 | + if (ret) | |
12811 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
12812 | + return ret; | |
12813 | +} | |
12814 | +EXPORT_SYMBOL(rt_down_write_killable_nested); | |
12815 | + | |
12816 | +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) | |
12817 | +{ | |
12818 | + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
12819 | + rt_mutex_lock(&rwsem->lock); | |
12820 | +} | |
12821 | +EXPORT_SYMBOL(rt_down_write_nested); | |
12822 | + | |
12823 | +void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
12824 | + struct lockdep_map *nest) | |
12825 | +{ | |
12826 | + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_); | |
12827 | + rt_mutex_lock(&rwsem->lock); | |
12828 | +} | |
12829 | +EXPORT_SYMBOL(rt_down_write_nested_lock); | |
12830 | + | |
12831 | +int rt__down_read_trylock(struct rw_semaphore *rwsem) | |
12832 | +{ | |
12833 | + struct rt_mutex *lock = &rwsem->lock; | |
12834 | + int ret = 1; | |
12835 | + | |
12836 | + /* | |
12837 | + * recursive read locks succeed when current owns the rwsem, | |
12838 | + * but not when read_depth == 0 which means that the rwsem is | |
12839 | + * write locked. | |
12840 | + */ | |
12841 | + if (rt_mutex_owner(lock) != current) | |
12842 | + ret = rt_mutex_trylock(&rwsem->lock); | |
12843 | + else if (!rwsem->read_depth) | |
12844 | + ret = 0; | |
12845 | + | |
12846 | + if (ret) | |
12847 | + rwsem->read_depth++; | |
12848 | + return ret; | |
12849 | + | |
12850 | +} | |
12851 | + | |
12852 | +int rt_down_read_trylock(struct rw_semaphore *rwsem) | |
12853 | +{ | |
12854 | + int ret; | |
12855 | + | |
12856 | + ret = rt__down_read_trylock(rwsem); | |
12857 | + if (ret) | |
12858 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
12859 | + | |
12860 | + return ret; | |
12861 | +} | |
12862 | +EXPORT_SYMBOL(rt_down_read_trylock); | |
12863 | + | |
12864 | +void rt__down_read(struct rw_semaphore *rwsem) | |
12865 | +{ | |
12866 | + struct rt_mutex *lock = &rwsem->lock; | |
12867 | + | |
12868 | + if (rt_mutex_owner(lock) != current) | |
12869 | + rt_mutex_lock(&rwsem->lock); | |
12870 | + rwsem->read_depth++; | |
12871 | +} | |
12872 | +EXPORT_SYMBOL(rt__down_read); | |
12873 | + | |
12874 | +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) | |
12875 | +{ | |
12876 | + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
12877 | + rt__down_read(rwsem); | |
12878 | +} | |
12879 | + | |
12880 | +void rt_down_read(struct rw_semaphore *rwsem) | |
12881 | +{ | |
12882 | + __rt_down_read(rwsem, 0); | |
12883 | +} | |
12884 | +EXPORT_SYMBOL(rt_down_read); | |
12885 | + | |
12886 | +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass) | |
12887 | +{ | |
12888 | + __rt_down_read(rwsem, subclass); | |
12889 | +} | |
12890 | +EXPORT_SYMBOL(rt_down_read_nested); | |
12891 | + | |
12892 | +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
12893 | + struct lock_class_key *key) | |
12894 | +{ | |
12895 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12896 | + /* | |
12897 | + * Make sure we are not reinitializing a held lock: | |
12898 | + */ | |
12899 | + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); | |
12900 | + lockdep_init_map(&rwsem->dep_map, name, key, 0); | |
12901 | +#endif | |
12902 | + rwsem->read_depth = 0; | |
12903 | + rwsem->lock.save_state = 0; | |
12904 | +} | |
12905 | +EXPORT_SYMBOL(__rt_rwsem_init); | |
12906 | + | |
12907 | +/** | |
12908 | + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | |
12909 | + * @cnt: the atomic which we are to dec | |
12910 | + * @lock: the mutex to return holding if we dec to 0 | |
12911 | + * | |
12912 | + * return true and hold lock if we dec to 0, return false otherwise | |
12913 | + */ | |
12914 | +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |
12915 | +{ | |
12916 | + /* dec if we can't possibly hit 0 */ | |
12917 | + if (atomic_add_unless(cnt, -1, 1)) | |
12918 | + return 0; | |
12919 | + /* we might hit 0, so take the lock */ | |
12920 | + mutex_lock(lock); | |
12921 | + if (!atomic_dec_and_test(cnt)) { | |
12922 | + /* when we actually did the dec, we didn't hit 0 */ | |
12923 | + mutex_unlock(lock); | |
12924 | + return 0; | |
12925 | + } | |
12926 | + /* we hit 0, and we hold the lock */ | |
12927 | + return 1; | |
12928 | +} | |
12929 | +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | |
12930 | diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c | |
12931 | index 2c49d76f96c3..4f1a7663c34d 100644 | |
12932 | --- a/kernel/locking/rtmutex.c | |
12933 | +++ b/kernel/locking/rtmutex.c | |
12934 | @@ -7,6 +7,11 @@ | |
12935 | * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
12936 | * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt | |
12937 | * Copyright (C) 2006 Esben Nielsen | |
12938 | + * Adaptive Spinlocks: | |
12939 | + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, | |
12940 | + * and Peter Morreale, | |
12941 | + * Adaptive Spinlocks simplification: | |
12942 | + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> | |
12943 | * | |
12944 | * See Documentation/locking/rt-mutex-design.txt for details. | |
12945 | */ | |
12946 | @@ -16,6 +21,7 @@ | |
12947 | #include <linux/sched/rt.h> | |
12948 | #include <linux/sched/deadline.h> | |
12949 | #include <linux/timer.h> | |
12950 | +#include <linux/ww_mutex.h> | |
12951 | ||
12952 | #include "rtmutex_common.h" | |
12953 | ||
12954 | @@ -133,6 +139,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) | |
12955 | WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); | |
12956 | } | |
12957 | ||
12958 | +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) | |
12959 | +{ | |
12960 | + return waiter && waiter != PI_WAKEUP_INPROGRESS && | |
12961 | + waiter != PI_REQUEUE_INPROGRESS; | |
12962 | +} | |
12963 | + | |
12964 | /* | |
12965 | * We can speed up the acquire/release, if there's no debugging state to be | |
12966 | * set up. | |
12967 | @@ -414,6 +426,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, | |
12968 | return debug_rt_mutex_detect_deadlock(waiter, chwalk); | |
12969 | } | |
12970 | ||
12971 | +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) | |
12972 | +{ | |
12973 | + if (waiter->savestate) | |
12974 | + wake_up_lock_sleeper(waiter->task); | |
12975 | + else | |
12976 | + wake_up_process(waiter->task); | |
12977 | +} | |
12978 | + | |
12979 | /* | |
12980 | * Max number of times we'll walk the boosting chain: | |
12981 | */ | |
12982 | @@ -421,7 +441,8 @@ int max_lock_depth = 1024; | |
12983 | ||
12984 | static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) | |
12985 | { | |
12986 | - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; | |
12987 | + return rt_mutex_real_waiter(p->pi_blocked_on) ? | |
12988 | + p->pi_blocked_on->lock : NULL; | |
12989 | } | |
12990 | ||
12991 | /* | |
12992 | @@ -557,7 +578,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
12993 | * reached or the state of the chain has changed while we | |
12994 | * dropped the locks. | |
12995 | */ | |
12996 | - if (!waiter) | |
12997 | + if (!rt_mutex_real_waiter(waiter)) | |
12998 | goto out_unlock_pi; | |
12999 | ||
13000 | /* | |
13001 | @@ -719,13 +740,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
13002 | * follow here. This is the end of the chain we are walking. | |
13003 | */ | |
13004 | if (!rt_mutex_owner(lock)) { | |
13005 | + struct rt_mutex_waiter *lock_top_waiter; | |
13006 | + | |
13007 | /* | |
13008 | * If the requeue [7] above changed the top waiter, | |
13009 | * then we need to wake the new top waiter up to try | |
13010 | * to get the lock. | |
13011 | */ | |
13012 | - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) | |
13013 | - wake_up_process(rt_mutex_top_waiter(lock)->task); | |
13014 | + lock_top_waiter = rt_mutex_top_waiter(lock); | |
13015 | + if (prerequeue_top_waiter != lock_top_waiter) | |
13016 | + rt_mutex_wake_waiter(lock_top_waiter); | |
13017 | raw_spin_unlock_irq(&lock->wait_lock); | |
13018 | return 0; | |
13019 | } | |
13020 | @@ -818,6 +842,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
13021 | return ret; | |
13022 | } | |
13023 | ||
13024 | + | |
13025 | +#define STEAL_NORMAL 0 | |
13026 | +#define STEAL_LATERAL 1 | |
13027 | + | |
13028 | +/* | |
13029 | + * Note that RT tasks are excluded from lateral-steals to prevent the | |
13030 | + * introduction of an unbounded latency | |
13031 | + */ | |
13032 | +static inline int lock_is_stealable(struct task_struct *task, | |
13033 | + struct task_struct *pendowner, int mode) | |
13034 | +{ | |
13035 | + if (mode == STEAL_NORMAL || rt_task(task)) { | |
13036 | + if (task->prio >= pendowner->prio) | |
13037 | + return 0; | |
13038 | + } else if (task->prio > pendowner->prio) | |
13039 | + return 0; | |
13040 | + return 1; | |
13041 | +} | |
13042 | + | |
13043 | /* | |
13044 | * Try to take an rt-mutex | |
13045 | * | |
13046 | @@ -828,8 +871,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
13047 | * @waiter: The waiter that is queued to the lock's wait tree if the | |
13048 | * callsite called task_blocked_on_lock(), otherwise NULL | |
13049 | */ | |
13050 | -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
13051 | - struct rt_mutex_waiter *waiter) | |
13052 | +static int __try_to_take_rt_mutex(struct rt_mutex *lock, | |
13053 | + struct task_struct *task, | |
13054 | + struct rt_mutex_waiter *waiter, int mode) | |
13055 | { | |
13056 | /* | |
13057 | * Before testing whether we can acquire @lock, we set the | |
13058 | @@ -866,8 +910,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
13059 | * If waiter is not the highest priority waiter of | |
13060 | * @lock, give up. | |
13061 | */ | |
13062 | - if (waiter != rt_mutex_top_waiter(lock)) | |
13063 | + if (waiter != rt_mutex_top_waiter(lock)) { | |
13064 | + /* XXX lock_is_stealable() ? */ | |
13065 | return 0; | |
13066 | + } | |
13067 | ||
13068 | /* | |
13069 | * We can acquire the lock. Remove the waiter from the | |
13070 | @@ -885,14 +931,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
13071 | * not need to be dequeued. | |
13072 | */ | |
13073 | if (rt_mutex_has_waiters(lock)) { | |
13074 | - /* | |
13075 | - * If @task->prio is greater than or equal to | |
13076 | - * the top waiter priority (kernel view), | |
13077 | - * @task lost. | |
13078 | - */ | |
13079 | - if (task->prio >= rt_mutex_top_waiter(lock)->prio) | |
13080 | - return 0; | |
13081 | + struct task_struct *pown = rt_mutex_top_waiter(lock)->task; | |
13082 | ||
13083 | + if (task != pown && !lock_is_stealable(task, pown, mode)) | |
13084 | + return 0; | |
13085 | /* | |
13086 | * The current top waiter stays enqueued. We | |
13087 | * don't have to change anything in the lock | |
13088 | @@ -941,6 +983,433 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
13089 | return 1; | |
13090 | } | |
13091 | ||
13092 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13093 | +/* | |
13094 | + * preemptible spin_lock functions: | |
13095 | + */ | |
13096 | +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, | |
13097 | + void (*slowfn)(struct rt_mutex *lock, | |
13098 | + bool mg_off), | |
13099 | + bool do_mig_dis) | |
13100 | +{ | |
13101 | + might_sleep_no_state_check(); | |
13102 | + | |
13103 | + if (do_mig_dis) | |
13104 | + migrate_disable(); | |
13105 | + | |
13106 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
13107 | + rt_mutex_deadlock_account_lock(lock, current); | |
13108 | + else | |
13109 | + slowfn(lock, do_mig_dis); | |
13110 | +} | |
13111 | + | |
13112 | +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock, | |
13113 | + int (*slowfn)(struct rt_mutex *lock)) | |
13114 | +{ | |
13115 | + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
13116 | + rt_mutex_deadlock_account_unlock(current); | |
13117 | + return 0; | |
13118 | + } | |
13119 | + return slowfn(lock); | |
13120 | +} | |
13121 | +#ifdef CONFIG_SMP | |
13122 | +/* | |
13123 | + * Note that owner is a speculative pointer and dereferencing relies | |
13124 | + * on rcu_read_lock() and the check against the lock owner. | |
13125 | + */ | |
13126 | +static int adaptive_wait(struct rt_mutex *lock, | |
13127 | + struct task_struct *owner) | |
13128 | +{ | |
13129 | + int res = 0; | |
13130 | + | |
13131 | + rcu_read_lock(); | |
13132 | + for (;;) { | |
13133 | + if (owner != rt_mutex_owner(lock)) | |
13134 | + break; | |
13135 | + /* | |
13136 | + * Ensure that owner->on_cpu is dereferenced _after_ | |
13137 | + * checking the above to be valid. | |
13138 | + */ | |
13139 | + barrier(); | |
13140 | + if (!owner->on_cpu) { | |
13141 | + res = 1; | |
13142 | + break; | |
13143 | + } | |
13144 | + cpu_relax(); | |
13145 | + } | |
13146 | + rcu_read_unlock(); | |
13147 | + return res; | |
13148 | +} | |
13149 | +#else | |
13150 | +static int adaptive_wait(struct rt_mutex *lock, | |
13151 | + struct task_struct *orig_owner) | |
13152 | +{ | |
13153 | + return 1; | |
13154 | +} | |
13155 | +#endif | |
13156 | + | |
13157 | +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
13158 | + struct rt_mutex_waiter *waiter, | |
13159 | + struct task_struct *task, | |
13160 | + enum rtmutex_chainwalk chwalk); | |
13161 | +/* | |
13162 | + * Slow path lock function spin_lock style: this variant is very | |
13163 | + * careful not to miss any non-lock wakeups. | |
13164 | + * | |
13165 | + * We store the current state under p->pi_lock in p->saved_state and | |
13166 | + * the try_to_wake_up() code handles this accordingly. | |
13167 | + */ | |
13168 | +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, | |
13169 | + bool mg_off) | |
13170 | +{ | |
13171 | + struct task_struct *lock_owner, *self = current; | |
13172 | + struct rt_mutex_waiter waiter, *top_waiter; | |
13173 | + unsigned long flags; | |
13174 | + int ret; | |
13175 | + | |
13176 | + rt_mutex_init_waiter(&waiter, true); | |
13177 | + | |
13178 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
13179 | + | |
13180 | + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) { | |
13181 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13182 | + return; | |
13183 | + } | |
13184 | + | |
13185 | + BUG_ON(rt_mutex_owner(lock) == self); | |
13186 | + | |
13187 | + /* | |
13188 | + * We save whatever state the task is in and we'll restore it | |
13189 | + * after acquiring the lock taking real wakeups into account | |
13190 | + * as well. We are serialized via pi_lock against wakeups. See | |
13191 | + * try_to_wake_up(). | |
13192 | + */ | |
13193 | + raw_spin_lock(&self->pi_lock); | |
13194 | + self->saved_state = self->state; | |
13195 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
13196 | + raw_spin_unlock(&self->pi_lock); | |
13197 | + | |
13198 | + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK); | |
13199 | + BUG_ON(ret); | |
13200 | + | |
13201 | + for (;;) { | |
13202 | + /* Try to acquire the lock again. */ | |
13203 | + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL)) | |
13204 | + break; | |
13205 | + | |
13206 | + top_waiter = rt_mutex_top_waiter(lock); | |
13207 | + lock_owner = rt_mutex_owner(lock); | |
13208 | + | |
13209 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13210 | + | |
13211 | + debug_rt_mutex_print_deadlock(&waiter); | |
13212 | + | |
13213 | + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) { | |
13214 | + if (mg_off) | |
13215 | + migrate_enable(); | |
13216 | + schedule(); | |
13217 | + if (mg_off) | |
13218 | + migrate_disable(); | |
13219 | + } | |
13220 | + | |
13221 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
13222 | + | |
13223 | + raw_spin_lock(&self->pi_lock); | |
13224 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
13225 | + raw_spin_unlock(&self->pi_lock); | |
13226 | + } | |
13227 | + | |
13228 | + /* | |
13229 | + * Restore the task state to current->saved_state. We set it | |
13230 | + * to the original state above and the try_to_wake_up() code | |
13231 | + * has possibly updated it when a real (non-rtmutex) wakeup | |
13232 | + * happened while we were blocked. Clear saved_state so | |
13233 | + * try_to_wakeup() does not get confused. | |
13234 | + */ | |
13235 | + raw_spin_lock(&self->pi_lock); | |
13236 | + __set_current_state_no_track(self->saved_state); | |
13237 | + self->saved_state = TASK_RUNNING; | |
13238 | + raw_spin_unlock(&self->pi_lock); | |
13239 | + | |
13240 | + /* | |
13241 | + * try_to_take_rt_mutex() sets the waiter bit | |
13242 | + * unconditionally. We might have to fix that up: | |
13243 | + */ | |
13244 | + fixup_rt_mutex_waiters(lock); | |
13245 | + | |
13246 | + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock)); | |
13247 | + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry)); | |
13248 | + | |
13249 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13250 | + | |
13251 | + debug_rt_mutex_free_waiter(&waiter); | |
13252 | +} | |
13253 | + | |
13254 | +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
13255 | + struct wake_q_head *wake_sleeper_q, | |
13256 | + struct rt_mutex *lock); | |
13257 | +/* | |
13258 | + * Slow path to release a rt_mutex spin_lock style | |
13259 | + */ | |
13260 | +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) | |
13261 | +{ | |
13262 | + unsigned long flags; | |
13263 | + WAKE_Q(wake_q); | |
13264 | + WAKE_Q(wake_sleeper_q); | |
13265 | + | |
13266 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
13267 | + | |
13268 | + debug_rt_mutex_unlock(lock); | |
13269 | + | |
13270 | + rt_mutex_deadlock_account_unlock(current); | |
13271 | + | |
13272 | + if (!rt_mutex_has_waiters(lock)) { | |
13273 | + lock->owner = NULL; | |
13274 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13275 | + return 0; | |
13276 | + } | |
13277 | + | |
13278 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
13279 | + | |
13280 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13281 | + wake_up_q(&wake_q); | |
13282 | + wake_up_q_sleeper(&wake_sleeper_q); | |
13283 | + | |
13284 | + /* Undo pi boosting.when necessary */ | |
13285 | + rt_mutex_adjust_prio(current); | |
13286 | + return 0; | |
13287 | +} | |
13288 | + | |
13289 | +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock) | |
13290 | +{ | |
13291 | + unsigned long flags; | |
13292 | + WAKE_Q(wake_q); | |
13293 | + WAKE_Q(wake_sleeper_q); | |
13294 | + | |
13295 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
13296 | + | |
13297 | + debug_rt_mutex_unlock(lock); | |
13298 | + | |
13299 | + rt_mutex_deadlock_account_unlock(current); | |
13300 | + | |
13301 | + if (!rt_mutex_has_waiters(lock)) { | |
13302 | + lock->owner = NULL; | |
13303 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13304 | + return 0; | |
13305 | + } | |
13306 | + | |
13307 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
13308 | + | |
13309 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13310 | + wake_up_q(&wake_q); | |
13311 | + wake_up_q_sleeper(&wake_sleeper_q); | |
13312 | + return 1; | |
13313 | +} | |
13314 | + | |
13315 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) | |
13316 | +{ | |
13317 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false); | |
13318 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13319 | +} | |
13320 | +EXPORT_SYMBOL(rt_spin_lock__no_mg); | |
13321 | + | |
13322 | +void __lockfunc rt_spin_lock(spinlock_t *lock) | |
13323 | +{ | |
13324 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
13325 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13326 | +} | |
13327 | +EXPORT_SYMBOL(rt_spin_lock); | |
13328 | + | |
13329 | +void __lockfunc __rt_spin_lock(struct rt_mutex *lock) | |
13330 | +{ | |
13331 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true); | |
13332 | +} | |
13333 | +EXPORT_SYMBOL(__rt_spin_lock); | |
13334 | + | |
13335 | +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock) | |
13336 | +{ | |
13337 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false); | |
13338 | +} | |
13339 | +EXPORT_SYMBOL(__rt_spin_lock__no_mg); | |
13340 | + | |
13341 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13342 | +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) | |
13343 | +{ | |
13344 | + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
13345 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
13346 | +} | |
13347 | +EXPORT_SYMBOL(rt_spin_lock_nested); | |
13348 | +#endif | |
13349 | + | |
13350 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock) | |
13351 | +{ | |
13352 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
13353 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
13354 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
13355 | +} | |
13356 | +EXPORT_SYMBOL(rt_spin_unlock__no_mg); | |
13357 | + | |
13358 | +void __lockfunc rt_spin_unlock(spinlock_t *lock) | |
13359 | +{ | |
13360 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
13361 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
13362 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
13363 | + migrate_enable(); | |
13364 | +} | |
13365 | +EXPORT_SYMBOL(rt_spin_unlock); | |
13366 | + | |
13367 | +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock) | |
13368 | +{ | |
13369 | + int ret; | |
13370 | + | |
13371 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
13372 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
13373 | + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost); | |
13374 | + migrate_enable(); | |
13375 | + return ret; | |
13376 | +} | |
13377 | + | |
13378 | +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) | |
13379 | +{ | |
13380 | + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); | |
13381 | +} | |
13382 | +EXPORT_SYMBOL(__rt_spin_unlock); | |
13383 | + | |
13384 | +/* | |
13385 | + * Wait for the lock to get unlocked: instead of polling for an unlock | |
13386 | + * (like raw spinlocks do), we lock and unlock, to force the kernel to | |
13387 | + * schedule if there's contention: | |
13388 | + */ | |
13389 | +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) | |
13390 | +{ | |
13391 | + spin_lock(lock); | |
13392 | + spin_unlock(lock); | |
13393 | +} | |
13394 | +EXPORT_SYMBOL(rt_spin_unlock_wait); | |
13395 | + | |
13396 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock) | |
13397 | +{ | |
13398 | + int ret; | |
13399 | + | |
13400 | + ret = rt_mutex_trylock(&lock->lock); | |
13401 | + if (ret) | |
13402 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
13403 | + return ret; | |
13404 | +} | |
13405 | +EXPORT_SYMBOL(rt_spin_trylock__no_mg); | |
13406 | + | |
13407 | +int __lockfunc rt_spin_trylock(spinlock_t *lock) | |
13408 | +{ | |
13409 | + int ret; | |
13410 | + | |
13411 | + migrate_disable(); | |
13412 | + ret = rt_mutex_trylock(&lock->lock); | |
13413 | + if (ret) | |
13414 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
13415 | + else | |
13416 | + migrate_enable(); | |
13417 | + return ret; | |
13418 | +} | |
13419 | +EXPORT_SYMBOL(rt_spin_trylock); | |
13420 | + | |
13421 | +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) | |
13422 | +{ | |
13423 | + int ret; | |
13424 | + | |
13425 | + local_bh_disable(); | |
13426 | + ret = rt_mutex_trylock(&lock->lock); | |
13427 | + if (ret) { | |
13428 | + migrate_disable(); | |
13429 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
13430 | + } else | |
13431 | + local_bh_enable(); | |
13432 | + return ret; | |
13433 | +} | |
13434 | +EXPORT_SYMBOL(rt_spin_trylock_bh); | |
13435 | + | |
13436 | +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) | |
13437 | +{ | |
13438 | + int ret; | |
13439 | + | |
13440 | + *flags = 0; | |
13441 | + ret = rt_mutex_trylock(&lock->lock); | |
13442 | + if (ret) { | |
13443 | + migrate_disable(); | |
13444 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
13445 | + } | |
13446 | + return ret; | |
13447 | +} | |
13448 | +EXPORT_SYMBOL(rt_spin_trylock_irqsave); | |
13449 | + | |
13450 | +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) | |
13451 | +{ | |
13452 | + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ | |
13453 | + if (atomic_add_unless(atomic, -1, 1)) | |
13454 | + return 0; | |
13455 | + rt_spin_lock(lock); | |
13456 | + if (atomic_dec_and_test(atomic)) | |
13457 | + return 1; | |
13458 | + rt_spin_unlock(lock); | |
13459 | + return 0; | |
13460 | +} | |
13461 | +EXPORT_SYMBOL(atomic_dec_and_spin_lock); | |
13462 | + | |
13463 | + void | |
13464 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) | |
13465 | +{ | |
13466 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13467 | + /* | |
13468 | + * Make sure we are not reinitializing a held lock: | |
13469 | + */ | |
13470 | + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | |
13471 | + lockdep_init_map(&lock->dep_map, name, key, 0); | |
13472 | +#endif | |
13473 | +} | |
13474 | +EXPORT_SYMBOL(__rt_spin_lock_init); | |
13475 | + | |
13476 | +#endif /* PREEMPT_RT_FULL */ | |
13477 | + | |
13478 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13479 | + static inline int __sched | |
13480 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
13481 | +{ | |
13482 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
13483 | + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | |
13484 | + | |
13485 | + if (!hold_ctx) | |
13486 | + return 0; | |
13487 | + | |
13488 | + if (unlikely(ctx == hold_ctx)) | |
13489 | + return -EALREADY; | |
13490 | + | |
13491 | + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | |
13492 | + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | |
13493 | +#ifdef CONFIG_DEBUG_MUTEXES | |
13494 | + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); | |
13495 | + ctx->contending_lock = ww; | |
13496 | +#endif | |
13497 | + return -EDEADLK; | |
13498 | + } | |
13499 | + | |
13500 | + return 0; | |
13501 | +} | |
13502 | +#else | |
13503 | + static inline int __sched | |
13504 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
13505 | +{ | |
13506 | + BUG(); | |
13507 | + return 0; | |
13508 | +} | |
13509 | + | |
13510 | +#endif | |
13511 | + | |
13512 | +static inline int | |
13513 | +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
13514 | + struct rt_mutex_waiter *waiter) | |
13515 | +{ | |
13516 | + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); | |
13517 | +} | |
13518 | + | |
13519 | /* | |
13520 | * Task blocks on lock. | |
13521 | * | |
13522 | @@ -971,6 +1440,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
13523 | return -EDEADLK; | |
13524 | ||
13525 | raw_spin_lock(&task->pi_lock); | |
13526 | + | |
13527 | + /* | |
13528 | + * In the case of futex requeue PI, this will be a proxy | |
13529 | + * lock. The task will wake unaware that it is enqueueed on | |
13530 | + * this lock. Avoid blocking on two locks and corrupting | |
13531 | + * pi_blocked_on via the PI_WAKEUP_INPROGRESS | |
13532 | + * flag. futex_wait_requeue_pi() sets this when it wakes up | |
13533 | + * before requeue (due to a signal or timeout). Do not enqueue | |
13534 | + * the task if PI_WAKEUP_INPROGRESS is set. | |
13535 | + */ | |
13536 | + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) { | |
13537 | + raw_spin_unlock(&task->pi_lock); | |
13538 | + return -EAGAIN; | |
13539 | + } | |
13540 | + | |
13541 | + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); | |
13542 | + | |
13543 | __rt_mutex_adjust_prio(task); | |
13544 | waiter->task = task; | |
13545 | waiter->lock = lock; | |
13546 | @@ -994,7 +1480,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
13547 | rt_mutex_enqueue_pi(owner, waiter); | |
13548 | ||
13549 | __rt_mutex_adjust_prio(owner); | |
13550 | - if (owner->pi_blocked_on) | |
13551 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
13552 | chain_walk = 1; | |
13553 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { | |
13554 | chain_walk = 1; | |
13555 | @@ -1036,6 +1522,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
13556 | * Called with lock->wait_lock held and interrupts disabled. | |
13557 | */ | |
13558 | static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
13559 | + struct wake_q_head *wake_sleeper_q, | |
13560 | struct rt_mutex *lock) | |
13561 | { | |
13562 | struct rt_mutex_waiter *waiter; | |
13563 | @@ -1064,7 +1551,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
13564 | ||
13565 | raw_spin_unlock(¤t->pi_lock); | |
13566 | ||
13567 | - wake_q_add(wake_q, waiter->task); | |
13568 | + if (waiter->savestate) | |
13569 | + wake_q_add(wake_sleeper_q, waiter->task); | |
13570 | + else | |
13571 | + wake_q_add(wake_q, waiter->task); | |
13572 | } | |
13573 | ||
13574 | /* | |
13575 | @@ -1078,7 +1568,7 @@ static void remove_waiter(struct rt_mutex *lock, | |
13576 | { | |
13577 | bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); | |
13578 | struct task_struct *owner = rt_mutex_owner(lock); | |
13579 | - struct rt_mutex *next_lock; | |
13580 | + struct rt_mutex *next_lock = NULL; | |
13581 | ||
13582 | raw_spin_lock(¤t->pi_lock); | |
13583 | rt_mutex_dequeue(lock, waiter); | |
13584 | @@ -1102,7 +1592,8 @@ static void remove_waiter(struct rt_mutex *lock, | |
13585 | __rt_mutex_adjust_prio(owner); | |
13586 | ||
13587 | /* Store the lock on which owner is blocked or NULL */ | |
13588 | - next_lock = task_blocked_on_lock(owner); | |
13589 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
13590 | + next_lock = task_blocked_on_lock(owner); | |
13591 | ||
13592 | raw_spin_unlock(&owner->pi_lock); | |
13593 | ||
13594 | @@ -1138,17 +1629,17 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |
13595 | raw_spin_lock_irqsave(&task->pi_lock, flags); | |
13596 | ||
13597 | waiter = task->pi_blocked_on; | |
13598 | - if (!waiter || (waiter->prio == task->prio && | |
13599 | + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio && | |
13600 | !dl_prio(task->prio))) { | |
13601 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
13602 | return; | |
13603 | } | |
13604 | next_lock = waiter->lock; | |
13605 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
13606 | ||
13607 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | |
13608 | get_task_struct(task); | |
13609 | ||
13610 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
13611 | rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, | |
13612 | next_lock, NULL, task); | |
13613 | } | |
13614 | @@ -1166,7 +1657,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |
13615 | static int __sched | |
13616 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
13617 | struct hrtimer_sleeper *timeout, | |
13618 | - struct rt_mutex_waiter *waiter) | |
13619 | + struct rt_mutex_waiter *waiter, | |
13620 | + struct ww_acquire_ctx *ww_ctx) | |
13621 | { | |
13622 | int ret = 0; | |
13623 | ||
13624 | @@ -1189,6 +1681,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
13625 | break; | |
13626 | } | |
13627 | ||
13628 | + if (ww_ctx && ww_ctx->acquired > 0) { | |
13629 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
13630 | + if (ret) | |
13631 | + break; | |
13632 | + } | |
13633 | + | |
13634 | raw_spin_unlock_irq(&lock->wait_lock); | |
13635 | ||
13636 | debug_rt_mutex_print_deadlock(waiter); | |
13637 | @@ -1223,21 +1721,96 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, | |
13638 | } | |
13639 | } | |
13640 | ||
13641 | +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | |
13642 | + struct ww_acquire_ctx *ww_ctx) | |
13643 | +{ | |
13644 | +#ifdef CONFIG_DEBUG_MUTEXES | |
13645 | + /* | |
13646 | + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | |
13647 | + * but released with a normal mutex_unlock in this call. | |
13648 | + * | |
13649 | + * This should never happen, always use ww_mutex_unlock. | |
13650 | + */ | |
13651 | + DEBUG_LOCKS_WARN_ON(ww->ctx); | |
13652 | + | |
13653 | + /* | |
13654 | + * Not quite done after calling ww_acquire_done() ? | |
13655 | + */ | |
13656 | + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | |
13657 | + | |
13658 | + if (ww_ctx->contending_lock) { | |
13659 | + /* | |
13660 | + * After -EDEADLK you tried to | |
13661 | + * acquire a different ww_mutex? Bad! | |
13662 | + */ | |
13663 | + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | |
13664 | + | |
13665 | + /* | |
13666 | + * You called ww_mutex_lock after receiving -EDEADLK, | |
13667 | + * but 'forgot' to unlock everything else first? | |
13668 | + */ | |
13669 | + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | |
13670 | + ww_ctx->contending_lock = NULL; | |
13671 | + } | |
13672 | + | |
13673 | + /* | |
13674 | + * Naughty, using a different class will lead to undefined behavior! | |
13675 | + */ | |
13676 | + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | |
13677 | +#endif | |
13678 | + ww_ctx->acquired++; | |
13679 | +} | |
13680 | + | |
13681 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13682 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
13683 | + struct ww_acquire_ctx *ww_ctx) | |
13684 | +{ | |
13685 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
13686 | + struct rt_mutex_waiter *waiter, *n; | |
13687 | + | |
13688 | + /* | |
13689 | + * This branch gets optimized out for the common case, | |
13690 | + * and is only important for ww_mutex_lock. | |
13691 | + */ | |
13692 | + ww_mutex_lock_acquired(ww, ww_ctx); | |
13693 | + ww->ctx = ww_ctx; | |
13694 | + | |
13695 | + /* | |
13696 | + * Give any possible sleeping processes the chance to wake up, | |
13697 | + * so they can recheck if they have to back off. | |
13698 | + */ | |
13699 | + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters, | |
13700 | + tree_entry) { | |
13701 | + /* XXX debug rt mutex waiter wakeup */ | |
13702 | + | |
13703 | + BUG_ON(waiter->lock != lock); | |
13704 | + rt_mutex_wake_waiter(waiter); | |
13705 | + } | |
13706 | +} | |
13707 | + | |
13708 | +#else | |
13709 | + | |
13710 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
13711 | + struct ww_acquire_ctx *ww_ctx) | |
13712 | +{ | |
13713 | + BUG(); | |
13714 | +} | |
13715 | +#endif | |
13716 | + | |
13717 | /* | |
13718 | * Slow path lock function: | |
13719 | */ | |
13720 | static int __sched | |
13721 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
13722 | struct hrtimer_sleeper *timeout, | |
13723 | - enum rtmutex_chainwalk chwalk) | |
13724 | + enum rtmutex_chainwalk chwalk, | |
13725 | + struct ww_acquire_ctx *ww_ctx) | |
13726 | { | |
13727 | struct rt_mutex_waiter waiter; | |
13728 | unsigned long flags; | |
13729 | int ret = 0; | |
13730 | ||
13731 | - debug_rt_mutex_init_waiter(&waiter); | |
13732 | - RB_CLEAR_NODE(&waiter.pi_tree_entry); | |
13733 | - RB_CLEAR_NODE(&waiter.tree_entry); | |
13734 | + rt_mutex_init_waiter(&waiter, false); | |
13735 | ||
13736 | /* | |
13737 | * Technically we could use raw_spin_[un]lock_irq() here, but this can | |
13738 | @@ -1251,6 +1824,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
13739 | ||
13740 | /* Try to acquire the lock again: */ | |
13741 | if (try_to_take_rt_mutex(lock, current, NULL)) { | |
13742 | + if (ww_ctx) | |
13743 | + ww_mutex_account_lock(lock, ww_ctx); | |
13744 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13745 | return 0; | |
13746 | } | |
13747 | @@ -1265,13 +1840,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
13748 | ||
13749 | if (likely(!ret)) | |
13750 | /* sleep on the mutex */ | |
13751 | - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | |
13752 | + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, | |
13753 | + ww_ctx); | |
13754 | + else if (ww_ctx) { | |
13755 | + /* ww_mutex received EDEADLK, let it become EALREADY */ | |
13756 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
13757 | + BUG_ON(!ret); | |
13758 | + } | |
13759 | ||
13760 | if (unlikely(ret)) { | |
13761 | __set_current_state(TASK_RUNNING); | |
13762 | if (rt_mutex_has_waiters(lock)) | |
13763 | remove_waiter(lock, &waiter); | |
13764 | - rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
13765 | + /* ww_mutex want to report EDEADLK/EALREADY, let them */ | |
13766 | + if (!ww_ctx) | |
13767 | + rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
13768 | + } else if (ww_ctx) { | |
13769 | + ww_mutex_account_lock(lock, ww_ctx); | |
13770 | } | |
13771 | ||
13772 | /* | |
13773 | @@ -1331,7 +1916,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) | |
13774 | * Return whether the current task needs to undo a potential priority boosting. | |
13775 | */ | |
13776 | static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
13777 | - struct wake_q_head *wake_q) | |
13778 | + struct wake_q_head *wake_q, | |
13779 | + struct wake_q_head *wake_sleeper_q) | |
13780 | { | |
13781 | unsigned long flags; | |
13782 | ||
13783 | @@ -1387,7 +1973,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
13784 | * | |
13785 | * Queue the next waiter for wakeup once we release the wait_lock. | |
13786 | */ | |
13787 | - mark_wakeup_next_waiter(wake_q, lock); | |
13788 | + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); | |
13789 | ||
13790 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
13791 | ||
13792 | @@ -1403,31 +1989,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
13793 | */ | |
13794 | static inline int | |
13795 | rt_mutex_fastlock(struct rt_mutex *lock, int state, | |
13796 | + struct ww_acquire_ctx *ww_ctx, | |
13797 | int (*slowfn)(struct rt_mutex *lock, int state, | |
13798 | struct hrtimer_sleeper *timeout, | |
13799 | - enum rtmutex_chainwalk chwalk)) | |
13800 | + enum rtmutex_chainwalk chwalk, | |
13801 | + struct ww_acquire_ctx *ww_ctx)) | |
13802 | { | |
13803 | if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
13804 | rt_mutex_deadlock_account_lock(lock, current); | |
13805 | return 0; | |
13806 | } else | |
13807 | - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); | |
13808 | + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, | |
13809 | + ww_ctx); | |
13810 | } | |
13811 | ||
13812 | static inline int | |
13813 | rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, | |
13814 | struct hrtimer_sleeper *timeout, | |
13815 | enum rtmutex_chainwalk chwalk, | |
13816 | + struct ww_acquire_ctx *ww_ctx, | |
13817 | int (*slowfn)(struct rt_mutex *lock, int state, | |
13818 | struct hrtimer_sleeper *timeout, | |
13819 | - enum rtmutex_chainwalk chwalk)) | |
13820 | + enum rtmutex_chainwalk chwalk, | |
13821 | + struct ww_acquire_ctx *ww_ctx)) | |
13822 | { | |
13823 | if (chwalk == RT_MUTEX_MIN_CHAINWALK && | |
13824 | likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
13825 | rt_mutex_deadlock_account_lock(lock, current); | |
13826 | return 0; | |
13827 | } else | |
13828 | - return slowfn(lock, state, timeout, chwalk); | |
13829 | + return slowfn(lock, state, timeout, chwalk, ww_ctx); | |
13830 | } | |
13831 | ||
13832 | static inline int | |
13833 | @@ -1444,17 +2035,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, | |
13834 | static inline void | |
13835 | rt_mutex_fastunlock(struct rt_mutex *lock, | |
13836 | bool (*slowfn)(struct rt_mutex *lock, | |
13837 | - struct wake_q_head *wqh)) | |
13838 | + struct wake_q_head *wqh, | |
13839 | + struct wake_q_head *wq_sleeper)) | |
13840 | { | |
13841 | WAKE_Q(wake_q); | |
13842 | + WAKE_Q(wake_sleeper_q); | |
13843 | ||
13844 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
13845 | rt_mutex_deadlock_account_unlock(current); | |
13846 | ||
13847 | } else { | |
13848 | - bool deboost = slowfn(lock, &wake_q); | |
13849 | + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q); | |
13850 | ||
13851 | wake_up_q(&wake_q); | |
13852 | + wake_up_q_sleeper(&wake_sleeper_q); | |
13853 | ||
13854 | /* Undo pi boosting if necessary: */ | |
13855 | if (deboost) | |
13856 | @@ -1471,7 +2065,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) | |
13857 | { | |
13858 | might_sleep(); | |
13859 | ||
13860 | - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); | |
13861 | + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
13862 | } | |
13863 | EXPORT_SYMBOL_GPL(rt_mutex_lock); | |
13864 | ||
13865 | @@ -1488,7 +2082,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) | |
13866 | { | |
13867 | might_sleep(); | |
13868 | ||
13869 | - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); | |
13870 | + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
13871 | } | |
13872 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | |
13873 | ||
13874 | @@ -1501,11 +2095,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock, | |
13875 | might_sleep(); | |
13876 | ||
13877 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
13878 | - RT_MUTEX_FULL_CHAINWALK, | |
13879 | + RT_MUTEX_FULL_CHAINWALK, NULL, | |
13880 | rt_mutex_slowlock); | |
13881 | } | |
13882 | ||
13883 | /** | |
13884 | + * rt_mutex_lock_killable - lock a rt_mutex killable | |
13885 | + * | |
13886 | + * @lock: the rt_mutex to be locked | |
13887 | + * @detect_deadlock: deadlock detection on/off | |
13888 | + * | |
13889 | + * Returns: | |
13890 | + * 0 on success | |
13891 | + * -EINTR when interrupted by a signal | |
13892 | + * -EDEADLK when the lock would deadlock (when deadlock detection is on) | |
13893 | + */ | |
13894 | +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) | |
13895 | +{ | |
13896 | + might_sleep(); | |
13897 | + | |
13898 | + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock); | |
13899 | +} | |
13900 | +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); | |
13901 | + | |
13902 | +/** | |
13903 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | |
13904 | * the timeout structure is provided | |
13905 | * by the caller | |
13906 | @@ -1525,6 +2138,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) | |
13907 | ||
13908 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
13909 | RT_MUTEX_MIN_CHAINWALK, | |
13910 | + NULL, | |
13911 | rt_mutex_slowlock); | |
13912 | } | |
13913 | EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
13914 | @@ -1542,7 +2156,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
13915 | */ | |
13916 | int __sched rt_mutex_trylock(struct rt_mutex *lock) | |
13917 | { | |
13918 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13919 | + if (WARN_ON_ONCE(in_irq() || in_nmi())) | |
13920 | +#else | |
13921 | if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) | |
13922 | +#endif | |
13923 | return 0; | |
13924 | ||
13925 | return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); | |
13926 | @@ -1568,13 +2186,14 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); | |
13927 | * required or not. | |
13928 | */ | |
13929 | bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, | |
13930 | - struct wake_q_head *wqh) | |
13931 | + struct wake_q_head *wqh, | |
13932 | + struct wake_q_head *wq_sleeper) | |
13933 | { | |
13934 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
13935 | rt_mutex_deadlock_account_unlock(current); | |
13936 | return false; | |
13937 | } | |
13938 | - return rt_mutex_slowunlock(lock, wqh); | |
13939 | + return rt_mutex_slowunlock(lock, wqh, wq_sleeper); | |
13940 | } | |
13941 | ||
13942 | /** | |
13943 | @@ -1607,13 +2226,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); | |
13944 | void __rt_mutex_init(struct rt_mutex *lock, const char *name) | |
13945 | { | |
13946 | lock->owner = NULL; | |
13947 | - raw_spin_lock_init(&lock->wait_lock); | |
13948 | lock->waiters = RB_ROOT; | |
13949 | lock->waiters_leftmost = NULL; | |
13950 | ||
13951 | debug_rt_mutex_init(lock, name); | |
13952 | } | |
13953 | -EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
13954 | +EXPORT_SYMBOL(__rt_mutex_init); | |
13955 | ||
13956 | /** | |
13957 | * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a | |
13958 | @@ -1628,7 +2246,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
13959 | void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
13960 | struct task_struct *proxy_owner) | |
13961 | { | |
13962 | - __rt_mutex_init(lock, NULL); | |
13963 | + rt_mutex_init(lock); | |
13964 | debug_rt_mutex_proxy_lock(lock, proxy_owner); | |
13965 | rt_mutex_set_owner(lock, proxy_owner); | |
13966 | rt_mutex_deadlock_account_lock(lock, proxy_owner); | |
13967 | @@ -1676,6 +2294,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
13968 | return 1; | |
13969 | } | |
13970 | ||
13971 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13972 | + /* | |
13973 | + * In PREEMPT_RT there's an added race. | |
13974 | + * If the task, that we are about to requeue, times out, | |
13975 | + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue | |
13976 | + * to skip this task. But right after the task sets | |
13977 | + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then | |
13978 | + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. | |
13979 | + * This will replace the PI_WAKEUP_INPROGRESS with the actual | |
13980 | + * lock that it blocks on. We *must not* place this task | |
13981 | + * on this proxy lock in that case. | |
13982 | + * | |
13983 | + * To prevent this race, we first take the task's pi_lock | |
13984 | + * and check if it has updated its pi_blocked_on. If it has, | |
13985 | + * we assume that it woke up and we return -EAGAIN. | |
13986 | + * Otherwise, we set the task's pi_blocked_on to | |
13987 | + * PI_REQUEUE_INPROGRESS, so that if the task is waking up | |
13988 | + * it will know that we are in the process of requeuing it. | |
13989 | + */ | |
13990 | + raw_spin_lock(&task->pi_lock); | |
13991 | + if (task->pi_blocked_on) { | |
13992 | + raw_spin_unlock(&task->pi_lock); | |
13993 | + raw_spin_unlock_irq(&lock->wait_lock); | |
13994 | + return -EAGAIN; | |
13995 | + } | |
13996 | + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; | |
13997 | + raw_spin_unlock(&task->pi_lock); | |
13998 | +#endif | |
13999 | + | |
14000 | /* We enforce deadlock detection for futexes */ | |
14001 | ret = task_blocks_on_rt_mutex(lock, waiter, task, | |
14002 | RT_MUTEX_FULL_CHAINWALK); | |
14003 | @@ -1690,7 +2337,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
14004 | ret = 0; | |
14005 | } | |
14006 | ||
14007 | - if (unlikely(ret)) | |
14008 | + if (ret && rt_mutex_has_waiters(lock)) | |
14009 | remove_waiter(lock, waiter); | |
14010 | ||
14011 | raw_spin_unlock_irq(&lock->wait_lock); | |
14012 | @@ -1746,7 +2393,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
14013 | set_current_state(TASK_INTERRUPTIBLE); | |
14014 | ||
14015 | /* sleep on the mutex */ | |
14016 | - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | |
14017 | + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); | |
14018 | ||
14019 | if (unlikely(ret)) | |
14020 | remove_waiter(lock, waiter); | |
14021 | @@ -1761,3 +2408,89 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
14022 | ||
14023 | return ret; | |
14024 | } | |
14025 | + | |
14026 | +static inline int | |
14027 | +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
14028 | +{ | |
14029 | +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH | |
14030 | + unsigned tmp; | |
14031 | + | |
14032 | + if (ctx->deadlock_inject_countdown-- == 0) { | |
14033 | + tmp = ctx->deadlock_inject_interval; | |
14034 | + if (tmp > UINT_MAX/4) | |
14035 | + tmp = UINT_MAX; | |
14036 | + else | |
14037 | + tmp = tmp*2 + tmp + tmp/2; | |
14038 | + | |
14039 | + ctx->deadlock_inject_interval = tmp; | |
14040 | + ctx->deadlock_inject_countdown = tmp; | |
14041 | + ctx->contending_lock = lock; | |
14042 | + | |
14043 | + ww_mutex_unlock(lock); | |
14044 | + | |
14045 | + return -EDEADLK; | |
14046 | + } | |
14047 | +#endif | |
14048 | + | |
14049 | + return 0; | |
14050 | +} | |
14051 | + | |
14052 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14053 | +int __sched | |
14054 | +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
14055 | +{ | |
14056 | + int ret; | |
14057 | + | |
14058 | + might_sleep(); | |
14059 | + | |
14060 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
14061 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx); | |
14062 | + if (ret) | |
14063 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
14064 | + else if (!ret && ww_ctx->acquired > 1) | |
14065 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
14066 | + | |
14067 | + return ret; | |
14068 | +} | |
14069 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); | |
14070 | + | |
14071 | +int __sched | |
14072 | +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
14073 | +{ | |
14074 | + int ret; | |
14075 | + | |
14076 | + might_sleep(); | |
14077 | + | |
14078 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
14079 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx); | |
14080 | + if (ret) | |
14081 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
14082 | + else if (!ret && ww_ctx->acquired > 1) | |
14083 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
14084 | + | |
14085 | + return ret; | |
14086 | +} | |
14087 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock); | |
14088 | + | |
14089 | +void __sched ww_mutex_unlock(struct ww_mutex *lock) | |
14090 | +{ | |
14091 | + int nest = !!lock->ctx; | |
14092 | + | |
14093 | + /* | |
14094 | + * The unlocking fastpath is the 0->1 transition from 'locked' | |
14095 | + * into 'unlocked' state: | |
14096 | + */ | |
14097 | + if (nest) { | |
14098 | +#ifdef CONFIG_DEBUG_MUTEXES | |
14099 | + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | |
14100 | +#endif | |
14101 | + if (lock->ctx->acquired > 0) | |
14102 | + lock->ctx->acquired--; | |
14103 | + lock->ctx = NULL; | |
14104 | + } | |
14105 | + | |
14106 | + mutex_release(&lock->base.dep_map, nest, _RET_IP_); | |
14107 | + rt_mutex_unlock(&lock->base.lock); | |
14108 | +} | |
14109 | +EXPORT_SYMBOL(ww_mutex_unlock); | |
14110 | +#endif | |
14111 | diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h | |
14112 | index e317e1cbb3eb..f457c7574920 100644 | |
14113 | --- a/kernel/locking/rtmutex_common.h | |
14114 | +++ b/kernel/locking/rtmutex_common.h | |
14115 | @@ -27,6 +27,7 @@ struct rt_mutex_waiter { | |
14116 | struct rb_node pi_tree_entry; | |
14117 | struct task_struct *task; | |
14118 | struct rt_mutex *lock; | |
14119 | + bool savestate; | |
14120 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
14121 | unsigned long ip; | |
14122 | struct pid *deadlock_task_pid; | |
14123 | @@ -98,6 +99,9 @@ enum rtmutex_chainwalk { | |
14124 | /* | |
14125 | * PI-futex support (proxy locking functions, etc.): | |
14126 | */ | |
14127 | +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) | |
14128 | +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) | |
14129 | + | |
14130 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | |
14131 | extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
14132 | struct task_struct *proxy_owner); | |
14133 | @@ -111,7 +115,8 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
14134 | struct rt_mutex_waiter *waiter); | |
14135 | extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); | |
14136 | extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, | |
14137 | - struct wake_q_head *wqh); | |
14138 | + struct wake_q_head *wqh, | |
14139 | + struct wake_q_head *wq_sleeper); | |
14140 | extern void rt_mutex_adjust_prio(struct task_struct *task); | |
14141 | ||
14142 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
14143 | @@ -120,4 +125,14 @@ extern void rt_mutex_adjust_prio(struct task_struct *task); | |
14144 | # include "rtmutex.h" | |
14145 | #endif | |
14146 | ||
14147 | +static inline void | |
14148 | +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) | |
14149 | +{ | |
14150 | + debug_rt_mutex_init_waiter(waiter); | |
14151 | + waiter->task = NULL; | |
14152 | + waiter->savestate = savestate; | |
14153 | + RB_CLEAR_NODE(&waiter->pi_tree_entry); | |
14154 | + RB_CLEAR_NODE(&waiter->tree_entry); | |
14155 | +} | |
14156 | + | |
14157 | #endif | |
14158 | diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c | |
14159 | index db3ccb1dd614..909779647bd1 100644 | |
14160 | --- a/kernel/locking/spinlock.c | |
14161 | +++ b/kernel/locking/spinlock.c | |
14162 | @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ | |
14163 | * __[spin|read|write]_lock_bh() | |
14164 | */ | |
14165 | BUILD_LOCK_OPS(spin, raw_spinlock); | |
14166 | + | |
14167 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14168 | BUILD_LOCK_OPS(read, rwlock); | |
14169 | BUILD_LOCK_OPS(write, rwlock); | |
14170 | +#endif | |
14171 | ||
14172 | #endif | |
14173 | ||
14174 | @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) | |
14175 | EXPORT_SYMBOL(_raw_spin_unlock_bh); | |
14176 | #endif | |
14177 | ||
14178 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14179 | + | |
14180 | #ifndef CONFIG_INLINE_READ_TRYLOCK | |
14181 | int __lockfunc _raw_read_trylock(rwlock_t *lock) | |
14182 | { | |
14183 | @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) | |
14184 | EXPORT_SYMBOL(_raw_write_unlock_bh); | |
14185 | #endif | |
14186 | ||
14187 | +#endif /* !PREEMPT_RT_FULL */ | |
14188 | + | |
14189 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14190 | ||
14191 | void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | |
14192 | diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c | |
14193 | index 0374a596cffa..94970338d518 100644 | |
14194 | --- a/kernel/locking/spinlock_debug.c | |
14195 | +++ b/kernel/locking/spinlock_debug.c | |
14196 | @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, | |
14197 | ||
14198 | EXPORT_SYMBOL(__raw_spin_lock_init); | |
14199 | ||
14200 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14201 | void __rwlock_init(rwlock_t *lock, const char *name, | |
14202 | struct lock_class_key *key) | |
14203 | { | |
14204 | @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, | |
14205 | } | |
14206 | ||
14207 | EXPORT_SYMBOL(__rwlock_init); | |
14208 | +#endif | |
14209 | ||
14210 | static void spin_dump(raw_spinlock_t *lock, const char *msg) | |
14211 | { | |
14212 | @@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock) | |
14213 | arch_spin_unlock(&lock->raw_lock); | |
14214 | } | |
14215 | ||
14216 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14217 | static void rwlock_bug(rwlock_t *lock, const char *msg) | |
14218 | { | |
14219 | if (!debug_locks_off()) | |
14220 | @@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock) | |
14221 | debug_write_unlock(lock); | |
14222 | arch_write_unlock(&lock->raw_lock); | |
14223 | } | |
14224 | + | |
14225 | +#endif | |
14226 | diff --git a/kernel/panic.c b/kernel/panic.c | |
14227 | index e6480e20379e..7e9c1918a94e 100644 | |
14228 | --- a/kernel/panic.c | |
14229 | +++ b/kernel/panic.c | |
14230 | @@ -482,9 +482,11 @@ static u64 oops_id; | |
14231 | ||
14232 | static int init_oops_id(void) | |
14233 | { | |
14234 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14235 | if (!oops_id) | |
14236 | get_random_bytes(&oops_id, sizeof(oops_id)); | |
14237 | else | |
14238 | +#endif | |
14239 | oops_id++; | |
14240 | ||
14241 | return 0; | |
14242 | diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c | |
14243 | index b26dbc48c75b..968255f27a33 100644 | |
14244 | --- a/kernel/power/hibernate.c | |
14245 | +++ b/kernel/power/hibernate.c | |
14246 | @@ -286,6 +286,8 @@ static int create_image(int platform_mode) | |
14247 | ||
14248 | local_irq_disable(); | |
14249 | ||
14250 | + system_state = SYSTEM_SUSPEND; | |
14251 | + | |
14252 | error = syscore_suspend(); | |
14253 | if (error) { | |
14254 | printk(KERN_ERR "PM: Some system devices failed to power down, " | |
14255 | @@ -317,6 +319,7 @@ static int create_image(int platform_mode) | |
14256 | syscore_resume(); | |
14257 | ||
14258 | Enable_irqs: | |
14259 | + system_state = SYSTEM_RUNNING; | |
14260 | local_irq_enable(); | |
14261 | ||
14262 | Enable_cpus: | |
14263 | @@ -446,6 +449,7 @@ static int resume_target_kernel(bool platform_mode) | |
14264 | goto Enable_cpus; | |
14265 | ||
14266 | local_irq_disable(); | |
14267 | + system_state = SYSTEM_SUSPEND; | |
14268 | ||
14269 | error = syscore_suspend(); | |
14270 | if (error) | |
14271 | @@ -479,6 +483,7 @@ static int resume_target_kernel(bool platform_mode) | |
14272 | syscore_resume(); | |
14273 | ||
14274 | Enable_irqs: | |
14275 | + system_state = SYSTEM_RUNNING; | |
14276 | local_irq_enable(); | |
14277 | ||
14278 | Enable_cpus: | |
14279 | @@ -564,6 +569,7 @@ int hibernation_platform_enter(void) | |
14280 | goto Enable_cpus; | |
14281 | ||
14282 | local_irq_disable(); | |
14283 | + system_state = SYSTEM_SUSPEND; | |
14284 | syscore_suspend(); | |
14285 | if (pm_wakeup_pending()) { | |
14286 | error = -EAGAIN; | |
14287 | @@ -576,6 +582,7 @@ int hibernation_platform_enter(void) | |
14288 | ||
14289 | Power_up: | |
14290 | syscore_resume(); | |
14291 | + system_state = SYSTEM_RUNNING; | |
14292 | local_irq_enable(); | |
14293 | ||
14294 | Enable_cpus: | |
14295 | @@ -676,6 +683,10 @@ static int load_image_and_restore(void) | |
14296 | return error; | |
14297 | } | |
14298 | ||
14299 | +#ifndef CONFIG_SUSPEND | |
14300 | +bool pm_in_action; | |
14301 | +#endif | |
14302 | + | |
14303 | /** | |
14304 | * hibernate - Carry out system hibernation, including saving the image. | |
14305 | */ | |
14306 | @@ -689,6 +700,8 @@ int hibernate(void) | |
14307 | return -EPERM; | |
14308 | } | |
14309 | ||
14310 | + pm_in_action = true; | |
14311 | + | |
14312 | lock_system_sleep(); | |
14313 | /* The snapshot device should not be opened while we're running */ | |
14314 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
14315 | @@ -766,6 +779,7 @@ int hibernate(void) | |
14316 | atomic_inc(&snapshot_device_available); | |
14317 | Unlock: | |
14318 | unlock_system_sleep(); | |
14319 | + pm_in_action = false; | |
14320 | return error; | |
14321 | } | |
14322 | ||
14323 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c | |
14324 | index 6ccb08f57fcb..c8cbb5ed2fe3 100644 | |
14325 | --- a/kernel/power/suspend.c | |
14326 | +++ b/kernel/power/suspend.c | |
14327 | @@ -369,6 +369,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |
14328 | arch_suspend_disable_irqs(); | |
14329 | BUG_ON(!irqs_disabled()); | |
14330 | ||
14331 | + system_state = SYSTEM_SUSPEND; | |
14332 | + | |
14333 | error = syscore_suspend(); | |
14334 | if (!error) { | |
14335 | *wakeup = pm_wakeup_pending(); | |
14336 | @@ -385,6 +387,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |
14337 | syscore_resume(); | |
14338 | } | |
14339 | ||
14340 | + system_state = SYSTEM_RUNNING; | |
14341 | + | |
14342 | arch_suspend_enable_irqs(); | |
14343 | BUG_ON(irqs_disabled()); | |
14344 | ||
14345 | @@ -527,6 +531,8 @@ static int enter_state(suspend_state_t state) | |
14346 | return error; | |
14347 | } | |
14348 | ||
14349 | +bool pm_in_action; | |
14350 | + | |
14351 | /** | |
14352 | * pm_suspend - Externally visible function for suspending the system. | |
14353 | * @state: System sleep state to enter. | |
14354 | @@ -541,6 +547,8 @@ int pm_suspend(suspend_state_t state) | |
14355 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) | |
14356 | return -EINVAL; | |
14357 | ||
14358 | + pm_in_action = true; | |
14359 | + | |
14360 | error = enter_state(state); | |
14361 | if (error) { | |
14362 | suspend_stats.fail++; | |
14363 | @@ -548,6 +556,7 @@ int pm_suspend(suspend_state_t state) | |
14364 | } else { | |
14365 | suspend_stats.success++; | |
14366 | } | |
14367 | + pm_in_action = false; | |
14368 | return error; | |
14369 | } | |
14370 | EXPORT_SYMBOL(pm_suspend); | |
14371 | diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c | |
14372 | index f7a55e9ff2f7..9277ee033271 100644 | |
14373 | --- a/kernel/printk/printk.c | |
14374 | +++ b/kernel/printk/printk.c | |
14375 | @@ -351,6 +351,65 @@ __packed __aligned(4) | |
14376 | */ | |
14377 | DEFINE_RAW_SPINLOCK(logbuf_lock); | |
14378 | ||
14379 | +#ifdef CONFIG_EARLY_PRINTK | |
14380 | +struct console *early_console; | |
14381 | + | |
14382 | +static void early_vprintk(const char *fmt, va_list ap) | |
14383 | +{ | |
14384 | + if (early_console) { | |
14385 | + char buf[512]; | |
14386 | + int n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
14387 | + | |
14388 | + early_console->write(early_console, buf, n); | |
14389 | + } | |
14390 | +} | |
14391 | + | |
14392 | +asmlinkage void early_printk(const char *fmt, ...) | |
14393 | +{ | |
14394 | + va_list ap; | |
14395 | + | |
14396 | + va_start(ap, fmt); | |
14397 | + early_vprintk(fmt, ap); | |
14398 | + va_end(ap); | |
14399 | +} | |
14400 | + | |
14401 | +/* | |
14402 | + * This is independent of any log levels - a global | |
14403 | + * kill switch that turns off all of printk. | |
14404 | + * | |
14405 | + * Used by the NMI watchdog if early-printk is enabled. | |
14406 | + */ | |
14407 | +static bool __read_mostly printk_killswitch; | |
14408 | + | |
14409 | +static int __init force_early_printk_setup(char *str) | |
14410 | +{ | |
14411 | + printk_killswitch = true; | |
14412 | + return 0; | |
14413 | +} | |
14414 | +early_param("force_early_printk", force_early_printk_setup); | |
14415 | + | |
14416 | +void printk_kill(void) | |
14417 | +{ | |
14418 | + printk_killswitch = true; | |
14419 | +} | |
14420 | + | |
14421 | +#ifdef CONFIG_PRINTK | |
14422 | +static int forced_early_printk(const char *fmt, va_list ap) | |
14423 | +{ | |
14424 | + if (!printk_killswitch) | |
14425 | + return 0; | |
14426 | + early_vprintk(fmt, ap); | |
14427 | + return 1; | |
14428 | +} | |
14429 | +#endif | |
14430 | + | |
14431 | +#else | |
14432 | +static inline int forced_early_printk(const char *fmt, va_list ap) | |
14433 | +{ | |
14434 | + return 0; | |
14435 | +} | |
14436 | +#endif | |
14437 | + | |
14438 | #ifdef CONFIG_PRINTK | |
14439 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | |
14440 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | |
14441 | @@ -1337,6 +1396,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
14442 | { | |
14443 | char *text; | |
14444 | int len = 0; | |
14445 | + int attempts = 0; | |
14446 | ||
14447 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); | |
14448 | if (!text) | |
14449 | @@ -1348,6 +1408,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
14450 | u64 seq; | |
14451 | u32 idx; | |
14452 | enum log_flags prev; | |
14453 | + int num_msg; | |
14454 | +try_again: | |
14455 | + attempts++; | |
14456 | + if (attempts > 10) { | |
14457 | + len = -EBUSY; | |
14458 | + goto out; | |
14459 | + } | |
14460 | + num_msg = 0; | |
14461 | ||
14462 | /* | |
14463 | * Find first record that fits, including all following records, | |
14464 | @@ -1363,6 +1431,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
14465 | prev = msg->flags; | |
14466 | idx = log_next(idx); | |
14467 | seq++; | |
14468 | + num_msg++; | |
14469 | + if (num_msg > 5) { | |
14470 | + num_msg = 0; | |
14471 | + raw_spin_unlock_irq(&logbuf_lock); | |
14472 | + raw_spin_lock_irq(&logbuf_lock); | |
14473 | + if (clear_seq < log_first_seq) | |
14474 | + goto try_again; | |
14475 | + } | |
14476 | } | |
14477 | ||
14478 | /* move first record forward until length fits into the buffer */ | |
14479 | @@ -1376,6 +1452,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
14480 | prev = msg->flags; | |
14481 | idx = log_next(idx); | |
14482 | seq++; | |
14483 | + num_msg++; | |
14484 | + if (num_msg > 5) { | |
14485 | + num_msg = 0; | |
14486 | + raw_spin_unlock_irq(&logbuf_lock); | |
14487 | + raw_spin_lock_irq(&logbuf_lock); | |
14488 | + if (clear_seq < log_first_seq) | |
14489 | + goto try_again; | |
14490 | + } | |
14491 | } | |
14492 | ||
14493 | /* last message fitting into this dump */ | |
14494 | @@ -1416,6 +1500,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
14495 | clear_seq = log_next_seq; | |
14496 | clear_idx = log_next_idx; | |
14497 | } | |
14498 | +out: | |
14499 | raw_spin_unlock_irq(&logbuf_lock); | |
14500 | ||
14501 | kfree(text); | |
14502 | @@ -1569,6 +1654,12 @@ static void call_console_drivers(int level, | |
14503 | if (!console_drivers) | |
14504 | return; | |
14505 | ||
14506 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
14507 | + if (in_irq() || in_nmi()) | |
14508 | + return; | |
14509 | + } | |
14510 | + | |
14511 | + migrate_disable(); | |
14512 | for_each_console(con) { | |
14513 | if (exclusive_console && con != exclusive_console) | |
14514 | continue; | |
14515 | @@ -1584,6 +1675,7 @@ static void call_console_drivers(int level, | |
14516 | else | |
14517 | con->write(con, text, len); | |
14518 | } | |
14519 | + migrate_enable(); | |
14520 | } | |
14521 | ||
14522 | /* | |
14523 | @@ -1781,6 +1873,13 @@ asmlinkage int vprintk_emit(int facility, int level, | |
14524 | /* cpu currently holding logbuf_lock in this function */ | |
14525 | static unsigned int logbuf_cpu = UINT_MAX; | |
14526 | ||
14527 | + /* | |
14528 | + * Fall back to early_printk if a debugging subsystem has | |
14529 | + * killed printk output | |
14530 | + */ | |
14531 | + if (unlikely(forced_early_printk(fmt, args))) | |
14532 | + return 1; | |
14533 | + | |
14534 | if (level == LOGLEVEL_SCHED) { | |
14535 | level = LOGLEVEL_DEFAULT; | |
14536 | in_sched = true; | |
14537 | @@ -1885,13 +1984,23 @@ asmlinkage int vprintk_emit(int facility, int level, | |
14538 | ||
14539 | /* If called from the scheduler, we can not call up(). */ | |
14540 | if (!in_sched) { | |
14541 | + int may_trylock = 1; | |
14542 | + | |
14543 | lockdep_off(); | |
14544 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14545 | + /* | |
14546 | + * we can't take a sleeping lock with IRQs or preeption disabled | |
14547 | + * so we can't print in these contexts | |
14548 | + */ | |
14549 | + if (!(preempt_count() == 0 && !irqs_disabled())) | |
14550 | + may_trylock = 0; | |
14551 | +#endif | |
14552 | /* | |
14553 | * Try to acquire and then immediately release the console | |
14554 | * semaphore. The release will print out buffers and wake up | |
14555 | * /dev/kmsg and syslog() users. | |
14556 | */ | |
14557 | - if (console_trylock()) | |
14558 | + if (may_trylock && console_trylock()) | |
14559 | console_unlock(); | |
14560 | lockdep_on(); | |
14561 | } | |
14562 | @@ -2014,26 +2123,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func); | |
14563 | ||
14564 | #endif /* CONFIG_PRINTK */ | |
14565 | ||
14566 | -#ifdef CONFIG_EARLY_PRINTK | |
14567 | -struct console *early_console; | |
14568 | - | |
14569 | -asmlinkage __visible void early_printk(const char *fmt, ...) | |
14570 | -{ | |
14571 | - va_list ap; | |
14572 | - char buf[512]; | |
14573 | - int n; | |
14574 | - | |
14575 | - if (!early_console) | |
14576 | - return; | |
14577 | - | |
14578 | - va_start(ap, fmt); | |
14579 | - n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
14580 | - va_end(ap); | |
14581 | - | |
14582 | - early_console->write(early_console, buf, n); | |
14583 | -} | |
14584 | -#endif | |
14585 | - | |
14586 | static int __add_preferred_console(char *name, int idx, char *options, | |
14587 | char *brl_options) | |
14588 | { | |
14589 | @@ -2303,11 +2392,16 @@ static void console_cont_flush(char *text, size_t size) | |
14590 | goto out; | |
14591 | ||
14592 | len = cont_print_text(text, size); | |
14593 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14594 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
14595 | + call_console_drivers(cont.level, NULL, 0, text, len); | |
14596 | +#else | |
14597 | raw_spin_unlock(&logbuf_lock); | |
14598 | stop_critical_timings(); | |
14599 | call_console_drivers(cont.level, NULL, 0, text, len); | |
14600 | start_critical_timings(); | |
14601 | local_irq_restore(flags); | |
14602 | +#endif | |
14603 | return; | |
14604 | out: | |
14605 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
14606 | @@ -2431,13 +2525,17 @@ void console_unlock(void) | |
14607 | console_idx = log_next(console_idx); | |
14608 | console_seq++; | |
14609 | console_prev = msg->flags; | |
14610 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14611 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
14612 | + call_console_drivers(level, ext_text, ext_len, text, len); | |
14613 | +#else | |
14614 | raw_spin_unlock(&logbuf_lock); | |
14615 | ||
14616 | stop_critical_timings(); /* don't trace print latency */ | |
14617 | call_console_drivers(level, ext_text, ext_len, text, len); | |
14618 | start_critical_timings(); | |
14619 | local_irq_restore(flags); | |
14620 | - | |
14621 | +#endif | |
14622 | if (do_cond_resched) | |
14623 | cond_resched(); | |
14624 | } | |
14625 | @@ -2489,6 +2587,11 @@ void console_unblank(void) | |
14626 | { | |
14627 | struct console *c; | |
14628 | ||
14629 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
14630 | + if (in_irq() || in_nmi()) | |
14631 | + return; | |
14632 | + } | |
14633 | + | |
14634 | /* | |
14635 | * console_unblank can no longer be called in interrupt context unless | |
14636 | * oops_in_progress is set to 1.. | |
14637 | diff --git a/kernel/ptrace.c b/kernel/ptrace.c | |
14638 | index 49ba7c1ade9d..44f44b47ec07 100644 | |
14639 | --- a/kernel/ptrace.c | |
14640 | +++ b/kernel/ptrace.c | |
14641 | @@ -166,7 +166,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) | |
14642 | ||
14643 | spin_lock_irq(&task->sighand->siglock); | |
14644 | if (task_is_traced(task) && !__fatal_signal_pending(task)) { | |
14645 | - task->state = __TASK_TRACED; | |
14646 | + unsigned long flags; | |
14647 | + | |
14648 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
14649 | + if (task->state & __TASK_TRACED) | |
14650 | + task->state = __TASK_TRACED; | |
14651 | + else | |
14652 | + task->saved_state = __TASK_TRACED; | |
14653 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14654 | ret = true; | |
14655 | } | |
14656 | spin_unlock_irq(&task->sighand->siglock); | |
14657 | diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c | |
14658 | index bf08fee53dc7..eeb8ce4ad7b6 100644 | |
14659 | --- a/kernel/rcu/rcutorture.c | |
14660 | +++ b/kernel/rcu/rcutorture.c | |
14661 | @@ -404,6 +404,7 @@ static struct rcu_torture_ops rcu_ops = { | |
14662 | .name = "rcu" | |
14663 | }; | |
14664 | ||
14665 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14666 | /* | |
14667 | * Definitions for rcu_bh torture testing. | |
14668 | */ | |
14669 | @@ -443,6 +444,12 @@ static struct rcu_torture_ops rcu_bh_ops = { | |
14670 | .name = "rcu_bh" | |
14671 | }; | |
14672 | ||
14673 | +#else | |
14674 | +static struct rcu_torture_ops rcu_bh_ops = { | |
14675 | + .ttype = INVALID_RCU_FLAVOR, | |
14676 | +}; | |
14677 | +#endif | |
14678 | + | |
14679 | /* | |
14680 | * Don't even think about trying any of these in real life!!! | |
14681 | * The names includes "busted", and they really means it! | |
14682 | diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c | |
14683 | index 10f62c6f48e7..dbee19478f09 100644 | |
14684 | --- a/kernel/rcu/tree.c | |
14685 | +++ b/kernel/rcu/tree.c | |
14686 | @@ -55,6 +55,11 @@ | |
14687 | #include <linux/random.h> | |
14688 | #include <linux/trace_events.h> | |
14689 | #include <linux/suspend.h> | |
14690 | +#include <linux/delay.h> | |
14691 | +#include <linux/gfp.h> | |
14692 | +#include <linux/oom.h> | |
14693 | +#include <linux/smpboot.h> | |
14694 | +#include "../time/tick-internal.h" | |
14695 | ||
14696 | #include "tree.h" | |
14697 | #include "rcu.h" | |
14698 | @@ -260,6 +265,19 @@ void rcu_sched_qs(void) | |
14699 | this_cpu_ptr(&rcu_sched_data), true); | |
14700 | } | |
14701 | ||
14702 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14703 | +static void rcu_preempt_qs(void); | |
14704 | + | |
14705 | +void rcu_bh_qs(void) | |
14706 | +{ | |
14707 | + unsigned long flags; | |
14708 | + | |
14709 | + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */ | |
14710 | + local_irq_save(flags); | |
14711 | + rcu_preempt_qs(); | |
14712 | + local_irq_restore(flags); | |
14713 | +} | |
14714 | +#else | |
14715 | void rcu_bh_qs(void) | |
14716 | { | |
14717 | if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { | |
14718 | @@ -269,6 +287,7 @@ void rcu_bh_qs(void) | |
14719 | __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); | |
14720 | } | |
14721 | } | |
14722 | +#endif | |
14723 | ||
14724 | static DEFINE_PER_CPU(int, rcu_sched_qs_mask); | |
14725 | ||
14726 | @@ -449,11 +468,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched); | |
14727 | /* | |
14728 | * Return the number of RCU BH batches started thus far for debug & stats. | |
14729 | */ | |
14730 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14731 | unsigned long rcu_batches_started_bh(void) | |
14732 | { | |
14733 | return rcu_bh_state.gpnum; | |
14734 | } | |
14735 | EXPORT_SYMBOL_GPL(rcu_batches_started_bh); | |
14736 | +#endif | |
14737 | ||
14738 | /* | |
14739 | * Return the number of RCU batches completed thus far for debug & stats. | |
14740 | @@ -473,6 +494,7 @@ unsigned long rcu_batches_completed_sched(void) | |
14741 | } | |
14742 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | |
14743 | ||
14744 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14745 | /* | |
14746 | * Return the number of RCU BH batches completed thus far for debug & stats. | |
14747 | */ | |
14748 | @@ -481,6 +503,7 @@ unsigned long rcu_batches_completed_bh(void) | |
14749 | return rcu_bh_state.completed; | |
14750 | } | |
14751 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | |
14752 | +#endif | |
14753 | ||
14754 | /* | |
14755 | * Return the number of RCU expedited batches completed thus far for | |
14756 | @@ -504,6 +527,7 @@ unsigned long rcu_exp_batches_completed_sched(void) | |
14757 | } | |
14758 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); | |
14759 | ||
14760 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14761 | /* | |
14762 | * Force a quiescent state. | |
14763 | */ | |
14764 | @@ -522,6 +546,13 @@ void rcu_bh_force_quiescent_state(void) | |
14765 | } | |
14766 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | |
14767 | ||
14768 | +#else | |
14769 | +void rcu_force_quiescent_state(void) | |
14770 | +{ | |
14771 | +} | |
14772 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |
14773 | +#endif | |
14774 | + | |
14775 | /* | |
14776 | * Force a quiescent state for RCU-sched. | |
14777 | */ | |
14778 | @@ -572,9 +603,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, | |
14779 | case RCU_FLAVOR: | |
14780 | rsp = rcu_state_p; | |
14781 | break; | |
14782 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14783 | case RCU_BH_FLAVOR: | |
14784 | rsp = &rcu_bh_state; | |
14785 | break; | |
14786 | +#endif | |
14787 | case RCU_SCHED_FLAVOR: | |
14788 | rsp = &rcu_sched_state; | |
14789 | break; | |
14790 | @@ -3016,18 +3049,17 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |
14791 | /* | |
14792 | * Do RCU core processing for the current CPU. | |
14793 | */ | |
14794 | -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) | |
14795 | +static __latent_entropy void rcu_process_callbacks(void) | |
14796 | { | |
14797 | struct rcu_state *rsp; | |
14798 | ||
14799 | if (cpu_is_offline(smp_processor_id())) | |
14800 | return; | |
14801 | - trace_rcu_utilization(TPS("Start RCU core")); | |
14802 | for_each_rcu_flavor(rsp) | |
14803 | __rcu_process_callbacks(rsp); | |
14804 | - trace_rcu_utilization(TPS("End RCU core")); | |
14805 | } | |
14806 | ||
14807 | +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
14808 | /* | |
14809 | * Schedule RCU callback invocation. If the specified type of RCU | |
14810 | * does not support RCU priority boosting, just do a direct call, | |
14811 | @@ -3039,19 +3071,106 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |
14812 | { | |
14813 | if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) | |
14814 | return; | |
14815 | - if (likely(!rsp->boost)) { | |
14816 | - rcu_do_batch(rsp, rdp); | |
14817 | - return; | |
14818 | - } | |
14819 | - invoke_rcu_callbacks_kthread(); | |
14820 | + rcu_do_batch(rsp, rdp); | |
14821 | } | |
14822 | ||
14823 | +static void rcu_wake_cond(struct task_struct *t, int status) | |
14824 | +{ | |
14825 | + /* | |
14826 | + * If the thread is yielding, only wake it when this | |
14827 | + * is invoked from idle | |
14828 | + */ | |
14829 | + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) | |
14830 | + wake_up_process(t); | |
14831 | +} | |
14832 | + | |
14833 | +/* | |
14834 | + * Wake up this CPU's rcuc kthread to do RCU core processing. | |
14835 | + */ | |
14836 | static void invoke_rcu_core(void) | |
14837 | { | |
14838 | - if (cpu_online(smp_processor_id())) | |
14839 | - raise_softirq(RCU_SOFTIRQ); | |
14840 | + unsigned long flags; | |
14841 | + struct task_struct *t; | |
14842 | + | |
14843 | + if (!cpu_online(smp_processor_id())) | |
14844 | + return; | |
14845 | + local_irq_save(flags); | |
14846 | + __this_cpu_write(rcu_cpu_has_work, 1); | |
14847 | + t = __this_cpu_read(rcu_cpu_kthread_task); | |
14848 | + if (t != NULL && current != t) | |
14849 | + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); | |
14850 | + local_irq_restore(flags); | |
14851 | } | |
14852 | ||
14853 | +static void rcu_cpu_kthread_park(unsigned int cpu) | |
14854 | +{ | |
14855 | + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
14856 | +} | |
14857 | + | |
14858 | +static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
14859 | +{ | |
14860 | + return __this_cpu_read(rcu_cpu_has_work); | |
14861 | +} | |
14862 | + | |
14863 | +/* | |
14864 | + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
14865 | + * RCU softirq used in flavors and configurations of RCU that do not | |
14866 | + * support RCU priority boosting. | |
14867 | + */ | |
14868 | +static void rcu_cpu_kthread(unsigned int cpu) | |
14869 | +{ | |
14870 | + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
14871 | + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
14872 | + int spincnt; | |
14873 | + | |
14874 | + for (spincnt = 0; spincnt < 10; spincnt++) { | |
14875 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
14876 | + local_bh_disable(); | |
14877 | + *statusp = RCU_KTHREAD_RUNNING; | |
14878 | + this_cpu_inc(rcu_cpu_kthread_loops); | |
14879 | + local_irq_disable(); | |
14880 | + work = *workp; | |
14881 | + *workp = 0; | |
14882 | + local_irq_enable(); | |
14883 | + if (work) | |
14884 | + rcu_process_callbacks(); | |
14885 | + local_bh_enable(); | |
14886 | + if (*workp == 0) { | |
14887 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
14888 | + *statusp = RCU_KTHREAD_WAITING; | |
14889 | + return; | |
14890 | + } | |
14891 | + } | |
14892 | + *statusp = RCU_KTHREAD_YIELDING; | |
14893 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
14894 | + schedule_timeout_interruptible(2); | |
14895 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
14896 | + *statusp = RCU_KTHREAD_WAITING; | |
14897 | +} | |
14898 | + | |
14899 | +static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
14900 | + .store = &rcu_cpu_kthread_task, | |
14901 | + .thread_should_run = rcu_cpu_kthread_should_run, | |
14902 | + .thread_fn = rcu_cpu_kthread, | |
14903 | + .thread_comm = "rcuc/%u", | |
14904 | + .setup = rcu_cpu_kthread_setup, | |
14905 | + .park = rcu_cpu_kthread_park, | |
14906 | +}; | |
14907 | + | |
14908 | +/* | |
14909 | + * Spawn per-CPU RCU core processing kthreads. | |
14910 | + */ | |
14911 | +static int __init rcu_spawn_core_kthreads(void) | |
14912 | +{ | |
14913 | + int cpu; | |
14914 | + | |
14915 | + for_each_possible_cpu(cpu) | |
14916 | + per_cpu(rcu_cpu_has_work, cpu) = 0; | |
14917 | + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
14918 | + return 0; | |
14919 | +} | |
14920 | +early_initcall(rcu_spawn_core_kthreads); | |
14921 | + | |
14922 | /* | |
14923 | * Handle any core-RCU processing required by a call_rcu() invocation. | |
14924 | */ | |
14925 | @@ -3195,6 +3314,7 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) | |
14926 | } | |
14927 | EXPORT_SYMBOL_GPL(call_rcu_sched); | |
14928 | ||
14929 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14930 | /* | |
14931 | * Queue an RCU callback for invocation after a quicker grace period. | |
14932 | */ | |
14933 | @@ -3203,6 +3323,7 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) | |
14934 | __call_rcu(head, func, &rcu_bh_state, -1, 0); | |
14935 | } | |
14936 | EXPORT_SYMBOL_GPL(call_rcu_bh); | |
14937 | +#endif | |
14938 | ||
14939 | /* | |
14940 | * Queue an RCU callback for lazy invocation after a grace period. | |
14941 | @@ -3294,6 +3415,7 @@ void synchronize_sched(void) | |
14942 | } | |
14943 | EXPORT_SYMBOL_GPL(synchronize_sched); | |
14944 | ||
14945 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14946 | /** | |
14947 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | |
14948 | * | |
14949 | @@ -3320,6 +3442,7 @@ void synchronize_rcu_bh(void) | |
14950 | wait_rcu_gp(call_rcu_bh); | |
14951 | } | |
14952 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | |
14953 | +#endif | |
14954 | ||
14955 | /** | |
14956 | * get_state_synchronize_rcu - Snapshot current RCU state | |
14957 | @@ -3698,6 +3821,7 @@ static void _rcu_barrier(struct rcu_state *rsp) | |
14958 | mutex_unlock(&rsp->barrier_mutex); | |
14959 | } | |
14960 | ||
14961 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14962 | /** | |
14963 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | |
14964 | */ | |
14965 | @@ -3706,6 +3830,7 @@ void rcu_barrier_bh(void) | |
14966 | _rcu_barrier(&rcu_bh_state); | |
14967 | } | |
14968 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |
14969 | +#endif | |
14970 | ||
14971 | /** | |
14972 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | |
14973 | @@ -4227,12 +4352,13 @@ void __init rcu_init(void) | |
14974 | ||
14975 | rcu_bootup_announce(); | |
14976 | rcu_init_geometry(); | |
14977 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14978 | rcu_init_one(&rcu_bh_state); | |
14979 | +#endif | |
14980 | rcu_init_one(&rcu_sched_state); | |
14981 | if (dump_tree) | |
14982 | rcu_dump_rcu_node_tree(&rcu_sched_state); | |
14983 | __rcu_init_preempt(); | |
14984 | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
14985 | ||
14986 | /* | |
14987 | * We don't need protection against CPU-hotplug here because | |
14988 | diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h | |
14989 | index e99a5234d9ed..958ac107062c 100644 | |
14990 | --- a/kernel/rcu/tree.h | |
14991 | +++ b/kernel/rcu/tree.h | |
14992 | @@ -588,18 +588,18 @@ extern struct list_head rcu_struct_flavors; | |
14993 | */ | |
14994 | extern struct rcu_state rcu_sched_state; | |
14995 | ||
14996 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14997 | extern struct rcu_state rcu_bh_state; | |
14998 | +#endif | |
14999 | ||
15000 | #ifdef CONFIG_PREEMPT_RCU | |
15001 | extern struct rcu_state rcu_preempt_state; | |
15002 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | |
15003 | ||
15004 | -#ifdef CONFIG_RCU_BOOST | |
15005 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
15006 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | |
15007 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
15008 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | |
15009 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
15010 | ||
15011 | #ifndef RCU_TREE_NONCORE | |
15012 | ||
15013 | @@ -619,10 +619,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); | |
15014 | static void __init __rcu_init_preempt(void); | |
15015 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | |
15016 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | |
15017 | -static void invoke_rcu_callbacks_kthread(void); | |
15018 | static bool rcu_is_callbacks_kthread(void); | |
15019 | +static void rcu_cpu_kthread_setup(unsigned int cpu); | |
15020 | #ifdef CONFIG_RCU_BOOST | |
15021 | -static void rcu_preempt_do_callbacks(void); | |
15022 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
15023 | struct rcu_node *rnp); | |
15024 | #endif /* #ifdef CONFIG_RCU_BOOST */ | |
15025 | diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h | |
15026 | index 56583e764ebf..7c656f8e192f 100644 | |
15027 | --- a/kernel/rcu/tree_plugin.h | |
15028 | +++ b/kernel/rcu/tree_plugin.h | |
15029 | @@ -24,25 +24,10 @@ | |
15030 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | |
15031 | */ | |
15032 | ||
15033 | -#include <linux/delay.h> | |
15034 | -#include <linux/gfp.h> | |
15035 | -#include <linux/oom.h> | |
15036 | -#include <linux/smpboot.h> | |
15037 | -#include "../time/tick-internal.h" | |
15038 | - | |
15039 | #ifdef CONFIG_RCU_BOOST | |
15040 | ||
15041 | #include "../locking/rtmutex_common.h" | |
15042 | ||
15043 | -/* | |
15044 | - * Control variables for per-CPU and per-rcu_node kthreads. These | |
15045 | - * handle all flavors of RCU. | |
15046 | - */ | |
15047 | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
15048 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
15049 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
15050 | -DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
15051 | - | |
15052 | #else /* #ifdef CONFIG_RCU_BOOST */ | |
15053 | ||
15054 | /* | |
15055 | @@ -55,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
15056 | ||
15057 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
15058 | ||
15059 | +/* | |
15060 | + * Control variables for per-CPU and per-rcu_node kthreads. These | |
15061 | + * handle all flavors of RCU. | |
15062 | + */ | |
15063 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
15064 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
15065 | +DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
15066 | + | |
15067 | #ifdef CONFIG_RCU_NOCB_CPU | |
15068 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | |
15069 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ | |
15070 | @@ -426,7 +419,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |
15071 | } | |
15072 | ||
15073 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | |
15074 | - if (in_irq() || in_serving_softirq()) { | |
15075 | + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { | |
15076 | lockdep_rcu_suspicious(__FILE__, __LINE__, | |
15077 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | |
15078 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", | |
15079 | @@ -632,15 +625,6 @@ static void rcu_preempt_check_callbacks(void) | |
15080 | t->rcu_read_unlock_special.b.need_qs = true; | |
15081 | } | |
15082 | ||
15083 | -#ifdef CONFIG_RCU_BOOST | |
15084 | - | |
15085 | -static void rcu_preempt_do_callbacks(void) | |
15086 | -{ | |
15087 | - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); | |
15088 | -} | |
15089 | - | |
15090 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
15091 | - | |
15092 | /* | |
15093 | * Queue a preemptible-RCU callback for invocation after a grace period. | |
15094 | */ | |
15095 | @@ -829,6 +813,19 @@ void exit_rcu(void) | |
15096 | ||
15097 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
15098 | ||
15099 | +/* | |
15100 | + * If boosting, set rcuc kthreads to realtime priority. | |
15101 | + */ | |
15102 | +static void rcu_cpu_kthread_setup(unsigned int cpu) | |
15103 | +{ | |
15104 | +#ifdef CONFIG_RCU_BOOST | |
15105 | + struct sched_param sp; | |
15106 | + | |
15107 | + sp.sched_priority = kthread_prio; | |
15108 | + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
15109 | +#endif /* #ifdef CONFIG_RCU_BOOST */ | |
15110 | +} | |
15111 | + | |
15112 | #ifdef CONFIG_RCU_BOOST | |
15113 | ||
15114 | #include "../locking/rtmutex_common.h" | |
15115 | @@ -860,16 +857,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |
15116 | ||
15117 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | |
15118 | ||
15119 | -static void rcu_wake_cond(struct task_struct *t, int status) | |
15120 | -{ | |
15121 | - /* | |
15122 | - * If the thread is yielding, only wake it when this | |
15123 | - * is invoked from idle | |
15124 | - */ | |
15125 | - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) | |
15126 | - wake_up_process(t); | |
15127 | -} | |
15128 | - | |
15129 | /* | |
15130 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | |
15131 | * or ->boost_tasks, advancing the pointer to the next task in the | |
15132 | @@ -1013,23 +1000,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
15133 | } | |
15134 | ||
15135 | /* | |
15136 | - * Wake up the per-CPU kthread to invoke RCU callbacks. | |
15137 | - */ | |
15138 | -static void invoke_rcu_callbacks_kthread(void) | |
15139 | -{ | |
15140 | - unsigned long flags; | |
15141 | - | |
15142 | - local_irq_save(flags); | |
15143 | - __this_cpu_write(rcu_cpu_has_work, 1); | |
15144 | - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | |
15145 | - current != __this_cpu_read(rcu_cpu_kthread_task)) { | |
15146 | - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), | |
15147 | - __this_cpu_read(rcu_cpu_kthread_status)); | |
15148 | - } | |
15149 | - local_irq_restore(flags); | |
15150 | -} | |
15151 | - | |
15152 | -/* | |
15153 | * Is the current CPU running the RCU-callbacks kthread? | |
15154 | * Caller must have preemption disabled. | |
15155 | */ | |
15156 | @@ -1083,67 +1053,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
15157 | return 0; | |
15158 | } | |
15159 | ||
15160 | -static void rcu_kthread_do_work(void) | |
15161 | -{ | |
15162 | - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); | |
15163 | - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); | |
15164 | - rcu_preempt_do_callbacks(); | |
15165 | -} | |
15166 | - | |
15167 | -static void rcu_cpu_kthread_setup(unsigned int cpu) | |
15168 | -{ | |
15169 | - struct sched_param sp; | |
15170 | - | |
15171 | - sp.sched_priority = kthread_prio; | |
15172 | - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
15173 | -} | |
15174 | - | |
15175 | -static void rcu_cpu_kthread_park(unsigned int cpu) | |
15176 | -{ | |
15177 | - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
15178 | -} | |
15179 | - | |
15180 | -static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
15181 | -{ | |
15182 | - return __this_cpu_read(rcu_cpu_has_work); | |
15183 | -} | |
15184 | - | |
15185 | -/* | |
15186 | - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
15187 | - * RCU softirq used in flavors and configurations of RCU that do not | |
15188 | - * support RCU priority boosting. | |
15189 | - */ | |
15190 | -static void rcu_cpu_kthread(unsigned int cpu) | |
15191 | -{ | |
15192 | - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
15193 | - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
15194 | - int spincnt; | |
15195 | - | |
15196 | - for (spincnt = 0; spincnt < 10; spincnt++) { | |
15197 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
15198 | - local_bh_disable(); | |
15199 | - *statusp = RCU_KTHREAD_RUNNING; | |
15200 | - this_cpu_inc(rcu_cpu_kthread_loops); | |
15201 | - local_irq_disable(); | |
15202 | - work = *workp; | |
15203 | - *workp = 0; | |
15204 | - local_irq_enable(); | |
15205 | - if (work) | |
15206 | - rcu_kthread_do_work(); | |
15207 | - local_bh_enable(); | |
15208 | - if (*workp == 0) { | |
15209 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
15210 | - *statusp = RCU_KTHREAD_WAITING; | |
15211 | - return; | |
15212 | - } | |
15213 | - } | |
15214 | - *statusp = RCU_KTHREAD_YIELDING; | |
15215 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
15216 | - schedule_timeout_interruptible(2); | |
15217 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
15218 | - *statusp = RCU_KTHREAD_WAITING; | |
15219 | -} | |
15220 | - | |
15221 | /* | |
15222 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | |
15223 | * served by the rcu_node in question. The CPU hotplug lock is still | |
15224 | @@ -1174,26 +1083,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | |
15225 | free_cpumask_var(cm); | |
15226 | } | |
15227 | ||
15228 | -static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
15229 | - .store = &rcu_cpu_kthread_task, | |
15230 | - .thread_should_run = rcu_cpu_kthread_should_run, | |
15231 | - .thread_fn = rcu_cpu_kthread, | |
15232 | - .thread_comm = "rcuc/%u", | |
15233 | - .setup = rcu_cpu_kthread_setup, | |
15234 | - .park = rcu_cpu_kthread_park, | |
15235 | -}; | |
15236 | - | |
15237 | /* | |
15238 | * Spawn boost kthreads -- called as soon as the scheduler is running. | |
15239 | */ | |
15240 | static void __init rcu_spawn_boost_kthreads(void) | |
15241 | { | |
15242 | struct rcu_node *rnp; | |
15243 | - int cpu; | |
15244 | - | |
15245 | - for_each_possible_cpu(cpu) | |
15246 | - per_cpu(rcu_cpu_has_work, cpu) = 0; | |
15247 | - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
15248 | rcu_for_each_leaf_node(rcu_state_p, rnp) | |
15249 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | |
15250 | } | |
15251 | @@ -1216,11 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
15252 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | |
15253 | } | |
15254 | ||
15255 | -static void invoke_rcu_callbacks_kthread(void) | |
15256 | -{ | |
15257 | - WARN_ON_ONCE(1); | |
15258 | -} | |
15259 | - | |
15260 | static bool rcu_is_callbacks_kthread(void) | |
15261 | { | |
15262 | return false; | |
15263 | @@ -1244,7 +1134,7 @@ static void rcu_prepare_kthreads(int cpu) | |
15264 | ||
15265 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
15266 | ||
15267 | -#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
15268 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) | |
15269 | ||
15270 | /* | |
15271 | * Check to see if any future RCU-related work will need to be done | |
15272 | @@ -1261,7 +1151,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
15273 | return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) | |
15274 | ? 0 : rcu_cpu_has_callbacks(NULL); | |
15275 | } | |
15276 | +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ | |
15277 | ||
15278 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
15279 | /* | |
15280 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | |
15281 | * after it. | |
15282 | @@ -1357,6 +1249,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |
15283 | return cbs_ready; | |
15284 | } | |
15285 | ||
15286 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15287 | + | |
15288 | /* | |
15289 | * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | |
15290 | * to invoke. If the CPU has callbacks, try to advance them. Tell the | |
15291 | @@ -1402,6 +1296,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
15292 | *nextevt = basemono + dj * TICK_NSEC; | |
15293 | return 0; | |
15294 | } | |
15295 | +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */ | |
15296 | ||
15297 | /* | |
15298 | * Prepare a CPU for idle from an RCU perspective. The first major task | |
15299 | diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c | |
15300 | index 4f6db7e6a117..ee02e1e1b3e5 100644 | |
15301 | --- a/kernel/rcu/update.c | |
15302 | +++ b/kernel/rcu/update.c | |
15303 | @@ -62,7 +62,7 @@ | |
15304 | #ifndef CONFIG_TINY_RCU | |
15305 | module_param(rcu_expedited, int, 0); | |
15306 | module_param(rcu_normal, int, 0); | |
15307 | -static int rcu_normal_after_boot; | |
15308 | +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
15309 | module_param(rcu_normal_after_boot, int, 0); | |
15310 | #endif /* #ifndef CONFIG_TINY_RCU */ | |
15311 | ||
15312 | @@ -132,8 +132,7 @@ bool rcu_gp_is_normal(void) | |
15313 | } | |
15314 | EXPORT_SYMBOL_GPL(rcu_gp_is_normal); | |
15315 | ||
15316 | -static atomic_t rcu_expedited_nesting = | |
15317 | - ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); | |
15318 | +static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1); | |
15319 | ||
15320 | /* | |
15321 | * Should normal grace-period primitives be expedited? Intended for | |
15322 | @@ -182,8 +181,7 @@ EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); | |
15323 | */ | |
15324 | void rcu_end_inkernel_boot(void) | |
15325 | { | |
15326 | - if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) | |
15327 | - rcu_unexpedite_gp(); | |
15328 | + rcu_unexpedite_gp(); | |
15329 | if (rcu_normal_after_boot) | |
15330 | WRITE_ONCE(rcu_normal, 1); | |
15331 | } | |
15332 | @@ -298,6 +296,7 @@ int rcu_read_lock_held(void) | |
15333 | } | |
15334 | EXPORT_SYMBOL_GPL(rcu_read_lock_held); | |
15335 | ||
15336 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15337 | /** | |
15338 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | |
15339 | * | |
15340 | @@ -324,6 +323,7 @@ int rcu_read_lock_bh_held(void) | |
15341 | return in_softirq() || irqs_disabled(); | |
15342 | } | |
15343 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |
15344 | +#endif | |
15345 | ||
15346 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | |
15347 | ||
15348 | diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile | |
15349 | index 5e59b832ae2b..7337a7f60e3f 100644 | |
15350 | --- a/kernel/sched/Makefile | |
15351 | +++ b/kernel/sched/Makefile | |
15352 | @@ -17,7 +17,7 @@ endif | |
15353 | ||
15354 | obj-y += core.o loadavg.o clock.o cputime.o | |
15355 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o | |
15356 | -obj-y += wait.o swait.o completion.o idle.o | |
15357 | +obj-y += wait.o swait.o swork.o completion.o idle.o | |
15358 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o | |
15359 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | |
15360 | obj-$(CONFIG_SCHEDSTATS) += stats.o | |
15361 | diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c | |
15362 | index 8d0f35debf35..b62cf6400fe0 100644 | |
15363 | --- a/kernel/sched/completion.c | |
15364 | +++ b/kernel/sched/completion.c | |
15365 | @@ -30,10 +30,10 @@ void complete(struct completion *x) | |
15366 | { | |
15367 | unsigned long flags; | |
15368 | ||
15369 | - spin_lock_irqsave(&x->wait.lock, flags); | |
15370 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
15371 | x->done++; | |
15372 | - __wake_up_locked(&x->wait, TASK_NORMAL, 1); | |
15373 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
15374 | + swake_up_locked(&x->wait); | |
15375 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
15376 | } | |
15377 | EXPORT_SYMBOL(complete); | |
15378 | ||
15379 | @@ -50,10 +50,10 @@ void complete_all(struct completion *x) | |
15380 | { | |
15381 | unsigned long flags; | |
15382 | ||
15383 | - spin_lock_irqsave(&x->wait.lock, flags); | |
15384 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
15385 | x->done += UINT_MAX/2; | |
15386 | - __wake_up_locked(&x->wait, TASK_NORMAL, 0); | |
15387 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
15388 | + swake_up_all_locked(&x->wait); | |
15389 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
15390 | } | |
15391 | EXPORT_SYMBOL(complete_all); | |
15392 | ||
15393 | @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x, | |
15394 | long (*action)(long), long timeout, int state) | |
15395 | { | |
15396 | if (!x->done) { | |
15397 | - DECLARE_WAITQUEUE(wait, current); | |
15398 | + DECLARE_SWAITQUEUE(wait); | |
15399 | ||
15400 | - __add_wait_queue_tail_exclusive(&x->wait, &wait); | |
15401 | + __prepare_to_swait(&x->wait, &wait); | |
15402 | do { | |
15403 | if (signal_pending_state(state, current)) { | |
15404 | timeout = -ERESTARTSYS; | |
15405 | break; | |
15406 | } | |
15407 | __set_current_state(state); | |
15408 | - spin_unlock_irq(&x->wait.lock); | |
15409 | + raw_spin_unlock_irq(&x->wait.lock); | |
15410 | timeout = action(timeout); | |
15411 | - spin_lock_irq(&x->wait.lock); | |
15412 | + raw_spin_lock_irq(&x->wait.lock); | |
15413 | } while (!x->done && timeout); | |
15414 | - __remove_wait_queue(&x->wait, &wait); | |
15415 | + __finish_swait(&x->wait, &wait); | |
15416 | if (!x->done) | |
15417 | return timeout; | |
15418 | } | |
15419 | @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x, | |
15420 | { | |
15421 | might_sleep(); | |
15422 | ||
15423 | - spin_lock_irq(&x->wait.lock); | |
15424 | + raw_spin_lock_irq(&x->wait.lock); | |
15425 | timeout = do_wait_for_common(x, action, timeout, state); | |
15426 | - spin_unlock_irq(&x->wait.lock); | |
15427 | + raw_spin_unlock_irq(&x->wait.lock); | |
15428 | return timeout; | |
15429 | } | |
15430 | ||
15431 | @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x) | |
15432 | if (!READ_ONCE(x->done)) | |
15433 | return 0; | |
15434 | ||
15435 | - spin_lock_irqsave(&x->wait.lock, flags); | |
15436 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
15437 | if (!x->done) | |
15438 | ret = 0; | |
15439 | else | |
15440 | x->done--; | |
15441 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
15442 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
15443 | return ret; | |
15444 | } | |
15445 | EXPORT_SYMBOL(try_wait_for_completion); | |
15446 | @@ -311,7 +311,7 @@ bool completion_done(struct completion *x) | |
15447 | * after it's acquired the lock. | |
15448 | */ | |
15449 | smp_rmb(); | |
15450 | - spin_unlock_wait(&x->wait.lock); | |
15451 | + raw_spin_unlock_wait(&x->wait.lock); | |
15452 | return true; | |
15453 | } | |
15454 | EXPORT_SYMBOL(completion_done); | |
15455 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c | |
15456 | index 154fd689fe02..a6aa5801b21e 100644 | |
15457 | --- a/kernel/sched/core.c | |
15458 | +++ b/kernel/sched/core.c | |
15459 | @@ -129,7 +129,11 @@ const_debug unsigned int sysctl_sched_features = | |
15460 | * Number of tasks to iterate in a single balance run. | |
15461 | * Limited because this is done with IRQs disabled. | |
15462 | */ | |
15463 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15464 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | |
15465 | +#else | |
15466 | +const_debug unsigned int sysctl_sched_nr_migrate = 8; | |
15467 | +#endif | |
15468 | ||
15469 | /* | |
15470 | * period over which we average the RT time consumption, measured | |
15471 | @@ -345,6 +349,7 @@ static void init_rq_hrtick(struct rq *rq) | |
15472 | ||
15473 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
15474 | rq->hrtick_timer.function = hrtick; | |
15475 | + rq->hrtick_timer.irqsafe = 1; | |
15476 | } | |
15477 | #else /* CONFIG_SCHED_HRTICK */ | |
15478 | static inline void hrtick_clear(struct rq *rq) | |
15479 | @@ -449,7 +454,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) | |
15480 | head->lastp = &node->next; | |
15481 | } | |
15482 | ||
15483 | -void wake_up_q(struct wake_q_head *head) | |
15484 | +void __wake_up_q(struct wake_q_head *head, bool sleeper) | |
15485 | { | |
15486 | struct wake_q_node *node = head->first; | |
15487 | ||
15488 | @@ -466,7 +471,10 @@ void wake_up_q(struct wake_q_head *head) | |
15489 | * wake_up_process() implies a wmb() to pair with the queueing | |
15490 | * in wake_q_add() so as not to miss wakeups. | |
15491 | */ | |
15492 | - wake_up_process(task); | |
15493 | + if (sleeper) | |
15494 | + wake_up_lock_sleeper(task); | |
15495 | + else | |
15496 | + wake_up_process(task); | |
15497 | put_task_struct(task); | |
15498 | } | |
15499 | } | |
15500 | @@ -502,6 +510,38 @@ void resched_curr(struct rq *rq) | |
15501 | trace_sched_wake_idle_without_ipi(cpu); | |
15502 | } | |
15503 | ||
15504 | +#ifdef CONFIG_PREEMPT_LAZY | |
15505 | +void resched_curr_lazy(struct rq *rq) | |
15506 | +{ | |
15507 | + struct task_struct *curr = rq->curr; | |
15508 | + int cpu; | |
15509 | + | |
15510 | + if (!sched_feat(PREEMPT_LAZY)) { | |
15511 | + resched_curr(rq); | |
15512 | + return; | |
15513 | + } | |
15514 | + | |
15515 | + lockdep_assert_held(&rq->lock); | |
15516 | + | |
15517 | + if (test_tsk_need_resched(curr)) | |
15518 | + return; | |
15519 | + | |
15520 | + if (test_tsk_need_resched_lazy(curr)) | |
15521 | + return; | |
15522 | + | |
15523 | + set_tsk_need_resched_lazy(curr); | |
15524 | + | |
15525 | + cpu = cpu_of(rq); | |
15526 | + if (cpu == smp_processor_id()) | |
15527 | + return; | |
15528 | + | |
15529 | + /* NEED_RESCHED_LAZY must be visible before we test polling */ | |
15530 | + smp_mb(); | |
15531 | + if (!tsk_is_polling(curr)) | |
15532 | + smp_send_reschedule(cpu); | |
15533 | +} | |
15534 | +#endif | |
15535 | + | |
15536 | void resched_cpu(int cpu) | |
15537 | { | |
15538 | struct rq *rq = cpu_rq(cpu); | |
15539 | @@ -525,11 +565,14 @@ void resched_cpu(int cpu) | |
15540 | */ | |
15541 | int get_nohz_timer_target(void) | |
15542 | { | |
15543 | - int i, cpu = smp_processor_id(); | |
15544 | + int i, cpu; | |
15545 | struct sched_domain *sd; | |
15546 | ||
15547 | + preempt_disable_rt(); | |
15548 | + cpu = smp_processor_id(); | |
15549 | + | |
15550 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) | |
15551 | - return cpu; | |
15552 | + goto preempt_en_rt; | |
15553 | ||
15554 | rcu_read_lock(); | |
15555 | for_each_domain(cpu, sd) { | |
15556 | @@ -548,6 +591,8 @@ int get_nohz_timer_target(void) | |
15557 | cpu = housekeeping_any_cpu(); | |
15558 | unlock: | |
15559 | rcu_read_unlock(); | |
15560 | +preempt_en_rt: | |
15561 | + preempt_enable_rt(); | |
15562 | return cpu; | |
15563 | } | |
15564 | /* | |
15565 | @@ -1100,6 +1145,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |
15566 | ||
15567 | lockdep_assert_held(&p->pi_lock); | |
15568 | ||
15569 | + if (__migrate_disabled(p)) { | |
15570 | + cpumask_copy(&p->cpus_allowed, new_mask); | |
15571 | + return; | |
15572 | + } | |
15573 | + | |
15574 | queued = task_on_rq_queued(p); | |
15575 | running = task_current(rq, p); | |
15576 | ||
15577 | @@ -1122,6 +1172,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |
15578 | set_curr_task(rq, p); | |
15579 | } | |
15580 | ||
15581 | +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); | |
15582 | +static DEFINE_MUTEX(sched_down_mutex); | |
15583 | +static cpumask_t sched_down_cpumask; | |
15584 | + | |
15585 | +void tell_sched_cpu_down_begin(int cpu) | |
15586 | +{ | |
15587 | + mutex_lock(&sched_down_mutex); | |
15588 | + cpumask_set_cpu(cpu, &sched_down_cpumask); | |
15589 | + mutex_unlock(&sched_down_mutex); | |
15590 | +} | |
15591 | + | |
15592 | +void tell_sched_cpu_down_done(int cpu) | |
15593 | +{ | |
15594 | + mutex_lock(&sched_down_mutex); | |
15595 | + cpumask_clear_cpu(cpu, &sched_down_cpumask); | |
15596 | + mutex_unlock(&sched_down_mutex); | |
15597 | +} | |
15598 | + | |
15599 | +/** | |
15600 | + * migrate_me - try to move the current task off this cpu | |
15601 | + * | |
15602 | + * Used by the pin_current_cpu() code to try to get tasks | |
15603 | + * to move off the current CPU as it is going down. | |
15604 | + * It will only move the task if the task isn't pinned to | |
15605 | + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY) | |
15606 | + * and the task has to be in a RUNNING state. Otherwise the | |
15607 | + * movement of the task will wake it up (change its state | |
15608 | + * to running) when the task did not expect it. | |
15609 | + * | |
15610 | + * Returns 1 if it succeeded in moving the current task | |
15611 | + * 0 otherwise. | |
15612 | + */ | |
15613 | +int migrate_me(void) | |
15614 | +{ | |
15615 | + struct task_struct *p = current; | |
15616 | + struct migration_arg arg; | |
15617 | + struct cpumask *cpumask; | |
15618 | + struct cpumask *mask; | |
15619 | + unsigned int dest_cpu; | |
15620 | + struct rq_flags rf; | |
15621 | + struct rq *rq; | |
15622 | + | |
15623 | + /* | |
15624 | + * We can not migrate tasks bounded to a CPU or tasks not | |
15625 | + * running. The movement of the task will wake it up. | |
15626 | + */ | |
15627 | + if (p->flags & PF_NO_SETAFFINITY || p->state) | |
15628 | + return 0; | |
15629 | + | |
15630 | + mutex_lock(&sched_down_mutex); | |
15631 | + rq = task_rq_lock(p, &rf); | |
15632 | + | |
15633 | + cpumask = this_cpu_ptr(&sched_cpumasks); | |
15634 | + mask = &p->cpus_allowed; | |
15635 | + | |
15636 | + cpumask_andnot(cpumask, mask, &sched_down_cpumask); | |
15637 | + | |
15638 | + if (!cpumask_weight(cpumask)) { | |
15639 | + /* It's only on this CPU? */ | |
15640 | + task_rq_unlock(rq, p, &rf); | |
15641 | + mutex_unlock(&sched_down_mutex); | |
15642 | + return 0; | |
15643 | + } | |
15644 | + | |
15645 | + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask); | |
15646 | + | |
15647 | + arg.task = p; | |
15648 | + arg.dest_cpu = dest_cpu; | |
15649 | + | |
15650 | + task_rq_unlock(rq, p, &rf); | |
15651 | + | |
15652 | + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | |
15653 | + tlb_migrate_finish(p->mm); | |
15654 | + mutex_unlock(&sched_down_mutex); | |
15655 | + | |
15656 | + return 1; | |
15657 | +} | |
15658 | + | |
15659 | /* | |
15660 | * Change a given task's CPU affinity. Migrate the thread to a | |
15661 | * proper CPU and schedule it away if the CPU it's executing on | |
15662 | @@ -1179,7 +1307,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, | |
15663 | } | |
15664 | ||
15665 | /* Can the task run on the task's current CPU? If so, we're done */ | |
15666 | - if (cpumask_test_cpu(task_cpu(p), new_mask)) | |
15667 | + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) | |
15668 | goto out; | |
15669 | ||
15670 | dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); | |
15671 | @@ -1366,6 +1494,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) | |
15672 | return ret; | |
15673 | } | |
15674 | ||
15675 | +static bool check_task_state(struct task_struct *p, long match_state) | |
15676 | +{ | |
15677 | + bool match = false; | |
15678 | + | |
15679 | + raw_spin_lock_irq(&p->pi_lock); | |
15680 | + if (p->state == match_state || p->saved_state == match_state) | |
15681 | + match = true; | |
15682 | + raw_spin_unlock_irq(&p->pi_lock); | |
15683 | + | |
15684 | + return match; | |
15685 | +} | |
15686 | + | |
15687 | /* | |
15688 | * wait_task_inactive - wait for a thread to unschedule. | |
15689 | * | |
15690 | @@ -1410,7 +1550,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |
15691 | * is actually now running somewhere else! | |
15692 | */ | |
15693 | while (task_running(rq, p)) { | |
15694 | - if (match_state && unlikely(p->state != match_state)) | |
15695 | + if (match_state && !check_task_state(p, match_state)) | |
15696 | return 0; | |
15697 | cpu_relax(); | |
15698 | } | |
15699 | @@ -1425,7 +1565,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |
15700 | running = task_running(rq, p); | |
15701 | queued = task_on_rq_queued(p); | |
15702 | ncsw = 0; | |
15703 | - if (!match_state || p->state == match_state) | |
15704 | + if (!match_state || p->state == match_state || | |
15705 | + p->saved_state == match_state) | |
15706 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | |
15707 | task_rq_unlock(rq, p, &rf); | |
15708 | ||
15709 | @@ -1680,10 +1821,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl | |
15710 | { | |
15711 | activate_task(rq, p, en_flags); | |
15712 | p->on_rq = TASK_ON_RQ_QUEUED; | |
15713 | - | |
15714 | - /* if a worker is waking up, notify workqueue */ | |
15715 | - if (p->flags & PF_WQ_WORKER) | |
15716 | - wq_worker_waking_up(p, cpu_of(rq)); | |
15717 | } | |
15718 | ||
15719 | /* | |
15720 | @@ -2018,8 +2155,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |
15721 | */ | |
15722 | smp_mb__before_spinlock(); | |
15723 | raw_spin_lock_irqsave(&p->pi_lock, flags); | |
15724 | - if (!(p->state & state)) | |
15725 | + if (!(p->state & state)) { | |
15726 | + /* | |
15727 | + * The task might be running due to a spinlock sleeper | |
15728 | + * wakeup. Check the saved state and set it to running | |
15729 | + * if the wakeup condition is true. | |
15730 | + */ | |
15731 | + if (!(wake_flags & WF_LOCK_SLEEPER)) { | |
15732 | + if (p->saved_state & state) { | |
15733 | + p->saved_state = TASK_RUNNING; | |
15734 | + success = 1; | |
15735 | + } | |
15736 | + } | |
15737 | goto out; | |
15738 | + } | |
15739 | + | |
15740 | + /* | |
15741 | + * If this is a regular wakeup, then we can unconditionally | |
15742 | + * clear the saved state of a "lock sleeper". | |
15743 | + */ | |
15744 | + if (!(wake_flags & WF_LOCK_SLEEPER)) | |
15745 | + p->saved_state = TASK_RUNNING; | |
15746 | ||
15747 | trace_sched_waking(p); | |
15748 | ||
15749 | @@ -2102,53 +2258,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |
15750 | } | |
15751 | ||
15752 | /** | |
15753 | - * try_to_wake_up_local - try to wake up a local task with rq lock held | |
15754 | - * @p: the thread to be awakened | |
15755 | - * @cookie: context's cookie for pinning | |
15756 | - * | |
15757 | - * Put @p on the run-queue if it's not already there. The caller must | |
15758 | - * ensure that this_rq() is locked, @p is bound to this_rq() and not | |
15759 | - * the current task. | |
15760 | - */ | |
15761 | -static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie) | |
15762 | -{ | |
15763 | - struct rq *rq = task_rq(p); | |
15764 | - | |
15765 | - if (WARN_ON_ONCE(rq != this_rq()) || | |
15766 | - WARN_ON_ONCE(p == current)) | |
15767 | - return; | |
15768 | - | |
15769 | - lockdep_assert_held(&rq->lock); | |
15770 | - | |
15771 | - if (!raw_spin_trylock(&p->pi_lock)) { | |
15772 | - /* | |
15773 | - * This is OK, because current is on_cpu, which avoids it being | |
15774 | - * picked for load-balance and preemption/IRQs are still | |
15775 | - * disabled avoiding further scheduler activity on it and we've | |
15776 | - * not yet picked a replacement task. | |
15777 | - */ | |
15778 | - lockdep_unpin_lock(&rq->lock, cookie); | |
15779 | - raw_spin_unlock(&rq->lock); | |
15780 | - raw_spin_lock(&p->pi_lock); | |
15781 | - raw_spin_lock(&rq->lock); | |
15782 | - lockdep_repin_lock(&rq->lock, cookie); | |
15783 | - } | |
15784 | - | |
15785 | - if (!(p->state & TASK_NORMAL)) | |
15786 | - goto out; | |
15787 | - | |
15788 | - trace_sched_waking(p); | |
15789 | - | |
15790 | - if (!task_on_rq_queued(p)) | |
15791 | - ttwu_activate(rq, p, ENQUEUE_WAKEUP); | |
15792 | - | |
15793 | - ttwu_do_wakeup(rq, p, 0, cookie); | |
15794 | - ttwu_stat(p, smp_processor_id(), 0); | |
15795 | -out: | |
15796 | - raw_spin_unlock(&p->pi_lock); | |
15797 | -} | |
15798 | - | |
15799 | -/** | |
15800 | * wake_up_process - Wake up a specific process | |
15801 | * @p: The process to be woken up. | |
15802 | * | |
15803 | @@ -2166,6 +2275,18 @@ int wake_up_process(struct task_struct *p) | |
15804 | } | |
15805 | EXPORT_SYMBOL(wake_up_process); | |
15806 | ||
15807 | +/** | |
15808 | + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" | |
15809 | + * @p: The process to be woken up. | |
15810 | + * | |
15811 | + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate | |
15812 | + * the nature of the wakeup. | |
15813 | + */ | |
15814 | +int wake_up_lock_sleeper(struct task_struct *p) | |
15815 | +{ | |
15816 | + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); | |
15817 | +} | |
15818 | + | |
15819 | int wake_up_state(struct task_struct *p, unsigned int state) | |
15820 | { | |
15821 | return try_to_wake_up(p, state, 0); | |
15822 | @@ -2442,6 +2563,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) | |
15823 | p->on_cpu = 0; | |
15824 | #endif | |
15825 | init_task_preempt_count(p); | |
15826 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
15827 | + task_thread_info(p)->preempt_lazy_count = 0; | |
15828 | +#endif | |
15829 | #ifdef CONFIG_SMP | |
15830 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | |
15831 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
15832 | @@ -2770,21 +2894,16 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |
15833 | finish_arch_post_lock_switch(); | |
15834 | ||
15835 | fire_sched_in_preempt_notifiers(current); | |
15836 | + /* | |
15837 | + * We use mmdrop_delayed() here so we don't have to do the | |
15838 | + * full __mmdrop() when we are the last user. | |
15839 | + */ | |
15840 | if (mm) | |
15841 | - mmdrop(mm); | |
15842 | + mmdrop_delayed(mm); | |
15843 | if (unlikely(prev_state == TASK_DEAD)) { | |
15844 | if (prev->sched_class->task_dead) | |
15845 | prev->sched_class->task_dead(prev); | |
15846 | ||
15847 | - /* | |
15848 | - * Remove function-return probe instances associated with this | |
15849 | - * task and put them back on the free list. | |
15850 | - */ | |
15851 | - kprobe_flush_task(prev); | |
15852 | - | |
15853 | - /* Task is done with its stack. */ | |
15854 | - put_task_stack(prev); | |
15855 | - | |
15856 | put_task_struct(prev); | |
15857 | } | |
15858 | ||
15859 | @@ -3252,6 +3371,77 @@ static inline void schedule_debug(struct task_struct *prev) | |
15860 | schedstat_inc(this_rq()->sched_count); | |
15861 | } | |
15862 | ||
15863 | +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) | |
15864 | + | |
15865 | +void migrate_disable(void) | |
15866 | +{ | |
15867 | + struct task_struct *p = current; | |
15868 | + | |
15869 | + if (in_atomic() || irqs_disabled()) { | |
15870 | +#ifdef CONFIG_SCHED_DEBUG | |
15871 | + p->migrate_disable_atomic++; | |
15872 | +#endif | |
15873 | + return; | |
15874 | + } | |
15875 | + | |
15876 | +#ifdef CONFIG_SCHED_DEBUG | |
15877 | + if (unlikely(p->migrate_disable_atomic)) { | |
15878 | + tracing_off(); | |
15879 | + WARN_ON_ONCE(1); | |
15880 | + } | |
15881 | +#endif | |
15882 | + | |
15883 | + if (p->migrate_disable) { | |
15884 | + p->migrate_disable++; | |
15885 | + return; | |
15886 | + } | |
15887 | + | |
15888 | + preempt_disable(); | |
15889 | + preempt_lazy_disable(); | |
15890 | + pin_current_cpu(); | |
15891 | + p->migrate_disable = 1; | |
15892 | + preempt_enable(); | |
15893 | +} | |
15894 | +EXPORT_SYMBOL(migrate_disable); | |
15895 | + | |
15896 | +void migrate_enable(void) | |
15897 | +{ | |
15898 | + struct task_struct *p = current; | |
15899 | + | |
15900 | + if (in_atomic() || irqs_disabled()) { | |
15901 | +#ifdef CONFIG_SCHED_DEBUG | |
15902 | + p->migrate_disable_atomic--; | |
15903 | +#endif | |
15904 | + return; | |
15905 | + } | |
15906 | + | |
15907 | +#ifdef CONFIG_SCHED_DEBUG | |
15908 | + if (unlikely(p->migrate_disable_atomic)) { | |
15909 | + tracing_off(); | |
15910 | + WARN_ON_ONCE(1); | |
15911 | + } | |
15912 | +#endif | |
15913 | + WARN_ON_ONCE(p->migrate_disable <= 0); | |
15914 | + | |
15915 | + if (p->migrate_disable > 1) { | |
15916 | + p->migrate_disable--; | |
15917 | + return; | |
15918 | + } | |
15919 | + | |
15920 | + preempt_disable(); | |
15921 | + /* | |
15922 | + * Clearing migrate_disable causes tsk_cpus_allowed to | |
15923 | + * show the tasks original cpu affinity. | |
15924 | + */ | |
15925 | + p->migrate_disable = 0; | |
15926 | + | |
15927 | + unpin_current_cpu(); | |
15928 | + preempt_enable(); | |
15929 | + preempt_lazy_enable(); | |
15930 | +} | |
15931 | +EXPORT_SYMBOL(migrate_enable); | |
15932 | +#endif | |
15933 | + | |
15934 | /* | |
15935 | * Pick up the highest-prio task: | |
15936 | */ | |
15937 | @@ -3368,19 +3558,6 @@ static void __sched notrace __schedule(bool preempt) | |
15938 | } else { | |
15939 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | |
15940 | prev->on_rq = 0; | |
15941 | - | |
15942 | - /* | |
15943 | - * If a worker went to sleep, notify and ask workqueue | |
15944 | - * whether it wants to wake up a task to maintain | |
15945 | - * concurrency. | |
15946 | - */ | |
15947 | - if (prev->flags & PF_WQ_WORKER) { | |
15948 | - struct task_struct *to_wakeup; | |
15949 | - | |
15950 | - to_wakeup = wq_worker_sleeping(prev); | |
15951 | - if (to_wakeup) | |
15952 | - try_to_wake_up_local(to_wakeup, cookie); | |
15953 | - } | |
15954 | } | |
15955 | switch_count = &prev->nvcsw; | |
15956 | } | |
15957 | @@ -3390,6 +3567,7 @@ static void __sched notrace __schedule(bool preempt) | |
15958 | ||
15959 | next = pick_next_task(rq, prev, cookie); | |
15960 | clear_tsk_need_resched(prev); | |
15961 | + clear_tsk_need_resched_lazy(prev); | |
15962 | clear_preempt_need_resched(); | |
15963 | rq->clock_skip_update = 0; | |
15964 | ||
15965 | @@ -3437,9 +3615,20 @@ void __noreturn do_task_dead(void) | |
15966 | ||
15967 | static inline void sched_submit_work(struct task_struct *tsk) | |
15968 | { | |
15969 | - if (!tsk->state || tsk_is_pi_blocked(tsk)) | |
15970 | + if (!tsk->state) | |
15971 | return; | |
15972 | /* | |
15973 | + * If a worker went to sleep, notify and ask workqueue whether | |
15974 | + * it wants to wake up a task to maintain concurrency. | |
15975 | + */ | |
15976 | + if (tsk->flags & PF_WQ_WORKER) | |
15977 | + wq_worker_sleeping(tsk); | |
15978 | + | |
15979 | + | |
15980 | + if (tsk_is_pi_blocked(tsk)) | |
15981 | + return; | |
15982 | + | |
15983 | + /* | |
15984 | * If we are going to sleep and we have plugged IO queued, | |
15985 | * make sure to submit it to avoid deadlocks. | |
15986 | */ | |
15987 | @@ -3447,6 +3636,12 @@ static inline void sched_submit_work(struct task_struct *tsk) | |
15988 | blk_schedule_flush_plug(tsk); | |
15989 | } | |
15990 | ||
15991 | +static void sched_update_worker(struct task_struct *tsk) | |
15992 | +{ | |
15993 | + if (tsk->flags & PF_WQ_WORKER) | |
15994 | + wq_worker_running(tsk); | |
15995 | +} | |
15996 | + | |
15997 | asmlinkage __visible void __sched schedule(void) | |
15998 | { | |
15999 | struct task_struct *tsk = current; | |
16000 | @@ -3457,6 +3652,7 @@ asmlinkage __visible void __sched schedule(void) | |
16001 | __schedule(false); | |
16002 | sched_preempt_enable_no_resched(); | |
16003 | } while (need_resched()); | |
16004 | + sched_update_worker(tsk); | |
16005 | } | |
16006 | EXPORT_SYMBOL(schedule); | |
16007 | ||
16008 | @@ -3520,6 +3716,30 @@ static void __sched notrace preempt_schedule_common(void) | |
16009 | } while (need_resched()); | |
16010 | } | |
16011 | ||
16012 | +#ifdef CONFIG_PREEMPT_LAZY | |
16013 | +/* | |
16014 | + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is | |
16015 | + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as | |
16016 | + * preempt_lazy_count counter >0. | |
16017 | + */ | |
16018 | +static __always_inline int preemptible_lazy(void) | |
16019 | +{ | |
16020 | + if (test_thread_flag(TIF_NEED_RESCHED)) | |
16021 | + return 1; | |
16022 | + if (current_thread_info()->preempt_lazy_count) | |
16023 | + return 0; | |
16024 | + return 1; | |
16025 | +} | |
16026 | + | |
16027 | +#else | |
16028 | + | |
16029 | +static inline int preemptible_lazy(void) | |
16030 | +{ | |
16031 | + return 1; | |
16032 | +} | |
16033 | + | |
16034 | +#endif | |
16035 | + | |
16036 | #ifdef CONFIG_PREEMPT | |
16037 | /* | |
16038 | * this is the entry point to schedule() from in-kernel preemption | |
16039 | @@ -3534,7 +3754,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |
16040 | */ | |
16041 | if (likely(!preemptible())) | |
16042 | return; | |
16043 | - | |
16044 | + if (!preemptible_lazy()) | |
16045 | + return; | |
16046 | preempt_schedule_common(); | |
16047 | } | |
16048 | NOKPROBE_SYMBOL(preempt_schedule); | |
16049 | @@ -3561,6 +3782,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) | |
16050 | if (likely(!preemptible())) | |
16051 | return; | |
16052 | ||
16053 | + if (!preemptible_lazy()) | |
16054 | + return; | |
16055 | + | |
16056 | do { | |
16057 | /* | |
16058 | * Because the function tracer can trace preempt_count_sub() | |
16059 | @@ -3583,7 +3807,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) | |
16060 | * an infinite recursion. | |
16061 | */ | |
16062 | prev_ctx = exception_enter(); | |
16063 | + /* | |
16064 | + * The add/subtract must not be traced by the function | |
16065 | + * tracer. But we still want to account for the | |
16066 | + * preempt off latency tracer. Since the _notrace versions | |
16067 | + * of add/subtract skip the accounting for latency tracer | |
16068 | + * we must force it manually. | |
16069 | + */ | |
16070 | + start_critical_timings(); | |
16071 | __schedule(true); | |
16072 | + stop_critical_timings(); | |
16073 | exception_exit(prev_ctx); | |
16074 | ||
16075 | preempt_latency_stop(1); | |
16076 | @@ -4939,6 +5172,7 @@ int __cond_resched_lock(spinlock_t *lock) | |
16077 | } | |
16078 | EXPORT_SYMBOL(__cond_resched_lock); | |
16079 | ||
16080 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16081 | int __sched __cond_resched_softirq(void) | |
16082 | { | |
16083 | BUG_ON(!in_softirq()); | |
16084 | @@ -4952,6 +5186,7 @@ int __sched __cond_resched_softirq(void) | |
16085 | return 0; | |
16086 | } | |
16087 | EXPORT_SYMBOL(__cond_resched_softirq); | |
16088 | +#endif | |
16089 | ||
16090 | /** | |
16091 | * yield - yield the current processor to other threads. | |
16092 | @@ -5315,7 +5550,9 @@ void init_idle(struct task_struct *idle, int cpu) | |
16093 | ||
16094 | /* Set the preempt count _outside_ the spinlocks! */ | |
16095 | init_idle_preempt_count(idle, cpu); | |
16096 | - | |
16097 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
16098 | + task_thread_info(idle)->preempt_lazy_count = 0; | |
16099 | +#endif | |
16100 | /* | |
16101 | * The idle tasks have their own, simple scheduling class: | |
16102 | */ | |
16103 | @@ -5458,6 +5695,8 @@ void sched_setnuma(struct task_struct *p, int nid) | |
16104 | #endif /* CONFIG_NUMA_BALANCING */ | |
16105 | ||
16106 | #ifdef CONFIG_HOTPLUG_CPU | |
16107 | +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); | |
16108 | + | |
16109 | /* | |
16110 | * Ensures that the idle task is using init_mm right before its cpu goes | |
16111 | * offline. | |
16112 | @@ -5472,7 +5711,12 @@ void idle_task_exit(void) | |
16113 | switch_mm_irqs_off(mm, &init_mm, current); | |
16114 | finish_arch_post_lock_switch(); | |
16115 | } | |
16116 | - mmdrop(mm); | |
16117 | + /* | |
16118 | + * Defer the cleanup to an alive cpu. On RT we can neither | |
16119 | + * call mmdrop() nor mmdrop_delayed() from here. | |
16120 | + */ | |
16121 | + per_cpu(idle_last_mm, smp_processor_id()) = mm; | |
16122 | + | |
16123 | } | |
16124 | ||
16125 | /* | |
16126 | @@ -7418,6 +7662,10 @@ int sched_cpu_dying(unsigned int cpu) | |
16127 | update_max_interval(); | |
16128 | nohz_balance_exit_idle(cpu); | |
16129 | hrtick_clear(rq); | |
16130 | + if (per_cpu(idle_last_mm, cpu)) { | |
16131 | + mmdrop_delayed(per_cpu(idle_last_mm, cpu)); | |
16132 | + per_cpu(idle_last_mm, cpu) = NULL; | |
16133 | + } | |
16134 | return 0; | |
16135 | } | |
16136 | #endif | |
16137 | @@ -7698,7 +7946,7 @@ void __init sched_init(void) | |
16138 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
16139 | static inline int preempt_count_equals(int preempt_offset) | |
16140 | { | |
16141 | - int nested = preempt_count() + rcu_preempt_depth(); | |
16142 | + int nested = preempt_count() + sched_rcu_preempt_depth(); | |
16143 | ||
16144 | return (nested == preempt_offset); | |
16145 | } | |
16146 | diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c | |
16147 | index 37e2449186c4..26dcaabde8b3 100644 | |
16148 | --- a/kernel/sched/deadline.c | |
16149 | +++ b/kernel/sched/deadline.c | |
16150 | @@ -687,6 +687,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |
16151 | ||
16152 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
16153 | timer->function = dl_task_timer; | |
16154 | + timer->irqsafe = 1; | |
16155 | } | |
16156 | ||
16157 | static | |
16158 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c | |
16159 | index fa178b62ea79..935224123441 100644 | |
16160 | --- a/kernel/sched/debug.c | |
16161 | +++ b/kernel/sched/debug.c | |
16162 | @@ -558,6 +558,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | |
16163 | P(rt_throttled); | |
16164 | PN(rt_time); | |
16165 | PN(rt_runtime); | |
16166 | +#ifdef CONFIG_SMP | |
16167 | + P(rt_nr_migratory); | |
16168 | +#endif | |
16169 | ||
16170 | #undef PN | |
16171 | #undef P | |
16172 | @@ -953,6 +956,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |
16173 | #endif | |
16174 | P(policy); | |
16175 | P(prio); | |
16176 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16177 | + P(migrate_disable); | |
16178 | +#endif | |
16179 | + P(nr_cpus_allowed); | |
16180 | #undef PN_SCHEDSTAT | |
16181 | #undef PN | |
16182 | #undef __PN | |
16183 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | |
16184 | index c242944f5cbd..4aeb2e2e41bc 100644 | |
16185 | --- a/kernel/sched/fair.c | |
16186 | +++ b/kernel/sched/fair.c | |
16187 | @@ -3518,7 +3518,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |
16188 | ideal_runtime = sched_slice(cfs_rq, curr); | |
16189 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | |
16190 | if (delta_exec > ideal_runtime) { | |
16191 | - resched_curr(rq_of(cfs_rq)); | |
16192 | + resched_curr_lazy(rq_of(cfs_rq)); | |
16193 | /* | |
16194 | * The current task ran long enough, ensure it doesn't get | |
16195 | * re-elected due to buddy favours. | |
16196 | @@ -3542,7 +3542,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |
16197 | return; | |
16198 | ||
16199 | if (delta > ideal_runtime) | |
16200 | - resched_curr(rq_of(cfs_rq)); | |
16201 | + resched_curr_lazy(rq_of(cfs_rq)); | |
16202 | } | |
16203 | ||
16204 | static void | |
16205 | @@ -3684,7 +3684,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |
16206 | * validating it and just reschedule. | |
16207 | */ | |
16208 | if (queued) { | |
16209 | - resched_curr(rq_of(cfs_rq)); | |
16210 | + resched_curr_lazy(rq_of(cfs_rq)); | |
16211 | return; | |
16212 | } | |
16213 | /* | |
16214 | @@ -3866,7 +3866,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) | |
16215 | * hierarchy can be throttled | |
16216 | */ | |
16217 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | |
16218 | - resched_curr(rq_of(cfs_rq)); | |
16219 | + resched_curr_lazy(rq_of(cfs_rq)); | |
16220 | } | |
16221 | ||
16222 | static __always_inline | |
16223 | @@ -4494,7 +4494,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |
16224 | ||
16225 | if (delta < 0) { | |
16226 | if (rq->curr == p) | |
16227 | - resched_curr(rq); | |
16228 | + resched_curr_lazy(rq); | |
16229 | return; | |
16230 | } | |
16231 | hrtick_start(rq, delta); | |
16232 | @@ -5905,7 +5905,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |
16233 | return; | |
16234 | ||
16235 | preempt: | |
16236 | - resched_curr(rq); | |
16237 | + resched_curr_lazy(rq); | |
16238 | /* | |
16239 | * Only set the backward buddy when the current task is still | |
16240 | * on the rq. This can happen when a wakeup gets interleaved | |
16241 | @@ -8631,7 +8631,7 @@ static void task_fork_fair(struct task_struct *p) | |
16242 | * 'current' within the tree based on its new key value. | |
16243 | */ | |
16244 | swap(curr->vruntime, se->vruntime); | |
16245 | - resched_curr(rq); | |
16246 | + resched_curr_lazy(rq); | |
16247 | } | |
16248 | ||
16249 | se->vruntime -= cfs_rq->min_vruntime; | |
16250 | @@ -8655,7 +8655,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) | |
16251 | */ | |
16252 | if (rq->curr == p) { | |
16253 | if (p->prio > oldprio) | |
16254 | - resched_curr(rq); | |
16255 | + resched_curr_lazy(rq); | |
16256 | } else | |
16257 | check_preempt_curr(rq, p, 0); | |
16258 | } | |
16259 | diff --git a/kernel/sched/features.h b/kernel/sched/features.h | |
16260 | index 69631fa46c2f..6d28fcd08872 100644 | |
16261 | --- a/kernel/sched/features.h | |
16262 | +++ b/kernel/sched/features.h | |
16263 | @@ -45,11 +45,19 @@ SCHED_FEAT(LB_BIAS, true) | |
16264 | */ | |
16265 | SCHED_FEAT(NONTASK_CAPACITY, true) | |
16266 | ||
16267 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16268 | +SCHED_FEAT(TTWU_QUEUE, false) | |
16269 | +# ifdef CONFIG_PREEMPT_LAZY | |
16270 | +SCHED_FEAT(PREEMPT_LAZY, true) | |
16271 | +# endif | |
16272 | +#else | |
16273 | + | |
16274 | /* | |
16275 | * Queue remote wakeups on the target CPU and process them | |
16276 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | |
16277 | */ | |
16278 | SCHED_FEAT(TTWU_QUEUE, true) | |
16279 | +#endif | |
16280 | ||
16281 | #ifdef HAVE_RT_PUSH_IPI | |
16282 | /* | |
16283 | diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c | |
16284 | index 2516b8df6dbb..2556baa0a97e 100644 | |
16285 | --- a/kernel/sched/rt.c | |
16286 | +++ b/kernel/sched/rt.c | |
16287 | @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |
16288 | ||
16289 | hrtimer_init(&rt_b->rt_period_timer, | |
16290 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
16291 | + rt_b->rt_period_timer.irqsafe = 1; | |
16292 | rt_b->rt_period_timer.function = sched_rt_period_timer; | |
16293 | } | |
16294 | ||
16295 | @@ -101,6 +102,7 @@ void init_rt_rq(struct rt_rq *rt_rq) | |
16296 | rt_rq->push_cpu = nr_cpu_ids; | |
16297 | raw_spin_lock_init(&rt_rq->push_lock); | |
16298 | init_irq_work(&rt_rq->push_work, push_irq_work_func); | |
16299 | + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ; | |
16300 | #endif | |
16301 | #endif /* CONFIG_SMP */ | |
16302 | /* We start is dequeued state, because no RT tasks are queued */ | |
16303 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h | |
16304 | index 055f935d4421..19324ac27026 100644 | |
16305 | --- a/kernel/sched/sched.h | |
16306 | +++ b/kernel/sched/sched.h | |
16307 | @@ -1163,6 +1163,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |
16308 | #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ | |
16309 | #define WF_FORK 0x02 /* child wakeup after fork */ | |
16310 | #define WF_MIGRATED 0x4 /* internal use, task got migrated */ | |
16311 | +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ | |
16312 | ||
16313 | /* | |
16314 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | |
16315 | @@ -1346,6 +1347,15 @@ extern void init_sched_fair_class(void); | |
16316 | extern void resched_curr(struct rq *rq); | |
16317 | extern void resched_cpu(int cpu); | |
16318 | ||
16319 | +#ifdef CONFIG_PREEMPT_LAZY | |
16320 | +extern void resched_curr_lazy(struct rq *rq); | |
16321 | +#else | |
16322 | +static inline void resched_curr_lazy(struct rq *rq) | |
16323 | +{ | |
16324 | + resched_curr(rq); | |
16325 | +} | |
16326 | +#endif | |
16327 | + | |
16328 | extern struct rt_bandwidth def_rt_bandwidth; | |
16329 | extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); | |
16330 | ||
16331 | diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c | |
16332 | index 82f0dff90030..ef027ff3250a 100644 | |
16333 | --- a/kernel/sched/swait.c | |
16334 | +++ b/kernel/sched/swait.c | |
16335 | @@ -1,5 +1,6 @@ | |
16336 | #include <linux/sched.h> | |
16337 | #include <linux/swait.h> | |
16338 | +#include <linux/suspend.h> | |
16339 | ||
16340 | void __init_swait_queue_head(struct swait_queue_head *q, const char *name, | |
16341 | struct lock_class_key *key) | |
16342 | @@ -29,6 +30,25 @@ void swake_up_locked(struct swait_queue_head *q) | |
16343 | } | |
16344 | EXPORT_SYMBOL(swake_up_locked); | |
16345 | ||
16346 | +void swake_up_all_locked(struct swait_queue_head *q) | |
16347 | +{ | |
16348 | + struct swait_queue *curr; | |
16349 | + int wakes = 0; | |
16350 | + | |
16351 | + while (!list_empty(&q->task_list)) { | |
16352 | + | |
16353 | + curr = list_first_entry(&q->task_list, typeof(*curr), | |
16354 | + task_list); | |
16355 | + wake_up_process(curr->task); | |
16356 | + list_del_init(&curr->task_list); | |
16357 | + wakes++; | |
16358 | + } | |
16359 | + if (pm_in_action) | |
16360 | + return; | |
16361 | + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes); | |
16362 | +} | |
16363 | +EXPORT_SYMBOL(swake_up_all_locked); | |
16364 | + | |
16365 | void swake_up(struct swait_queue_head *q) | |
16366 | { | |
16367 | unsigned long flags; | |
16368 | @@ -54,6 +74,7 @@ void swake_up_all(struct swait_queue_head *q) | |
16369 | if (!swait_active(q)) | |
16370 | return; | |
16371 | ||
16372 | + WARN_ON(irqs_disabled()); | |
16373 | raw_spin_lock_irq(&q->lock); | |
16374 | list_splice_init(&q->task_list, &tmp); | |
16375 | while (!list_empty(&tmp)) { | |
16376 | diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c | |
16377 | new file mode 100644 | |
16378 | index 000000000000..1950f40ca725 | |
16379 | --- /dev/null | |
16380 | +++ b/kernel/sched/swork.c | |
16381 | @@ -0,0 +1,173 @@ | |
16382 | +/* | |
16383 | + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de | |
16384 | + * | |
16385 | + * Provides a framework for enqueuing callbacks from irq context | |
16386 | + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. | |
16387 | + */ | |
16388 | + | |
16389 | +#include <linux/swait.h> | |
16390 | +#include <linux/swork.h> | |
16391 | +#include <linux/kthread.h> | |
16392 | +#include <linux/slab.h> | |
16393 | +#include <linux/spinlock.h> | |
16394 | +#include <linux/export.h> | |
16395 | + | |
16396 | +#define SWORK_EVENT_PENDING (1 << 0) | |
16397 | + | |
16398 | +static DEFINE_MUTEX(worker_mutex); | |
16399 | +static struct sworker *glob_worker; | |
16400 | + | |
16401 | +struct sworker { | |
16402 | + struct list_head events; | |
16403 | + struct swait_queue_head wq; | |
16404 | + | |
16405 | + raw_spinlock_t lock; | |
16406 | + | |
16407 | + struct task_struct *task; | |
16408 | + int refs; | |
16409 | +}; | |
16410 | + | |
16411 | +static bool swork_readable(struct sworker *worker) | |
16412 | +{ | |
16413 | + bool r; | |
16414 | + | |
16415 | + if (kthread_should_stop()) | |
16416 | + return true; | |
16417 | + | |
16418 | + raw_spin_lock_irq(&worker->lock); | |
16419 | + r = !list_empty(&worker->events); | |
16420 | + raw_spin_unlock_irq(&worker->lock); | |
16421 | + | |
16422 | + return r; | |
16423 | +} | |
16424 | + | |
16425 | +static int swork_kthread(void *arg) | |
16426 | +{ | |
16427 | + struct sworker *worker = arg; | |
16428 | + | |
16429 | + for (;;) { | |
16430 | + swait_event_interruptible(worker->wq, | |
16431 | + swork_readable(worker)); | |
16432 | + if (kthread_should_stop()) | |
16433 | + break; | |
16434 | + | |
16435 | + raw_spin_lock_irq(&worker->lock); | |
16436 | + while (!list_empty(&worker->events)) { | |
16437 | + struct swork_event *sev; | |
16438 | + | |
16439 | + sev = list_first_entry(&worker->events, | |
16440 | + struct swork_event, item); | |
16441 | + list_del(&sev->item); | |
16442 | + raw_spin_unlock_irq(&worker->lock); | |
16443 | + | |
16444 | + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, | |
16445 | + &sev->flags)); | |
16446 | + sev->func(sev); | |
16447 | + raw_spin_lock_irq(&worker->lock); | |
16448 | + } | |
16449 | + raw_spin_unlock_irq(&worker->lock); | |
16450 | + } | |
16451 | + return 0; | |
16452 | +} | |
16453 | + | |
16454 | +static struct sworker *swork_create(void) | |
16455 | +{ | |
16456 | + struct sworker *worker; | |
16457 | + | |
16458 | + worker = kzalloc(sizeof(*worker), GFP_KERNEL); | |
16459 | + if (!worker) | |
16460 | + return ERR_PTR(-ENOMEM); | |
16461 | + | |
16462 | + INIT_LIST_HEAD(&worker->events); | |
16463 | + raw_spin_lock_init(&worker->lock); | |
16464 | + init_swait_queue_head(&worker->wq); | |
16465 | + | |
16466 | + worker->task = kthread_run(swork_kthread, worker, "kswork"); | |
16467 | + if (IS_ERR(worker->task)) { | |
16468 | + kfree(worker); | |
16469 | + return ERR_PTR(-ENOMEM); | |
16470 | + } | |
16471 | + | |
16472 | + return worker; | |
16473 | +} | |
16474 | + | |
16475 | +static void swork_destroy(struct sworker *worker) | |
16476 | +{ | |
16477 | + kthread_stop(worker->task); | |
16478 | + | |
16479 | + WARN_ON(!list_empty(&worker->events)); | |
16480 | + kfree(worker); | |
16481 | +} | |
16482 | + | |
16483 | +/** | |
16484 | + * swork_queue - queue swork | |
16485 | + * | |
16486 | + * Returns %false if @work was already on a queue, %true otherwise. | |
16487 | + * | |
16488 | + * The work is queued and processed on a random CPU | |
16489 | + */ | |
16490 | +bool swork_queue(struct swork_event *sev) | |
16491 | +{ | |
16492 | + unsigned long flags; | |
16493 | + | |
16494 | + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) | |
16495 | + return false; | |
16496 | + | |
16497 | + raw_spin_lock_irqsave(&glob_worker->lock, flags); | |
16498 | + list_add_tail(&sev->item, &glob_worker->events); | |
16499 | + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); | |
16500 | + | |
16501 | + swake_up(&glob_worker->wq); | |
16502 | + return true; | |
16503 | +} | |
16504 | +EXPORT_SYMBOL_GPL(swork_queue); | |
16505 | + | |
16506 | +/** | |
16507 | + * swork_get - get an instance of the sworker | |
16508 | + * | |
16509 | + * Returns an negative error code if the initialization if the worker did not | |
16510 | + * work, %0 otherwise. | |
16511 | + * | |
16512 | + */ | |
16513 | +int swork_get(void) | |
16514 | +{ | |
16515 | + struct sworker *worker; | |
16516 | + | |
16517 | + mutex_lock(&worker_mutex); | |
16518 | + if (!glob_worker) { | |
16519 | + worker = swork_create(); | |
16520 | + if (IS_ERR(worker)) { | |
16521 | + mutex_unlock(&worker_mutex); | |
16522 | + return -ENOMEM; | |
16523 | + } | |
16524 | + | |
16525 | + glob_worker = worker; | |
16526 | + } | |
16527 | + | |
16528 | + glob_worker->refs++; | |
16529 | + mutex_unlock(&worker_mutex); | |
16530 | + | |
16531 | + return 0; | |
16532 | +} | |
16533 | +EXPORT_SYMBOL_GPL(swork_get); | |
16534 | + | |
16535 | +/** | |
16536 | + * swork_put - puts an instance of the sworker | |
16537 | + * | |
16538 | + * Will destroy the sworker thread. This function must not be called until all | |
16539 | + * queued events have been completed. | |
16540 | + */ | |
16541 | +void swork_put(void) | |
16542 | +{ | |
16543 | + mutex_lock(&worker_mutex); | |
16544 | + | |
16545 | + glob_worker->refs--; | |
16546 | + if (glob_worker->refs > 0) | |
16547 | + goto out; | |
16548 | + | |
16549 | + swork_destroy(glob_worker); | |
16550 | + glob_worker = NULL; | |
16551 | +out: | |
16552 | + mutex_unlock(&worker_mutex); | |
16553 | +} | |
16554 | +EXPORT_SYMBOL_GPL(swork_put); | |
16555 | diff --git a/kernel/signal.c b/kernel/signal.c | |
16556 | index 75761acc77cf..ae0773c76bb0 100644 | |
16557 | --- a/kernel/signal.c | |
16558 | +++ b/kernel/signal.c | |
16559 | @@ -14,6 +14,7 @@ | |
16560 | #include <linux/export.h> | |
16561 | #include <linux/init.h> | |
16562 | #include <linux/sched.h> | |
16563 | +#include <linux/sched/rt.h> | |
16564 | #include <linux/fs.h> | |
16565 | #include <linux/tty.h> | |
16566 | #include <linux/binfmts.h> | |
16567 | @@ -352,13 +353,30 @@ static bool task_participate_group_stop(struct task_struct *task) | |
16568 | return false; | |
16569 | } | |
16570 | ||
16571 | +static inline struct sigqueue *get_task_cache(struct task_struct *t) | |
16572 | +{ | |
16573 | + struct sigqueue *q = t->sigqueue_cache; | |
16574 | + | |
16575 | + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) | |
16576 | + return NULL; | |
16577 | + return q; | |
16578 | +} | |
16579 | + | |
16580 | +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) | |
16581 | +{ | |
16582 | + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) | |
16583 | + return 0; | |
16584 | + return 1; | |
16585 | +} | |
16586 | + | |
16587 | /* | |
16588 | * allocate a new signal queue record | |
16589 | * - this may be called without locks if and only if t == current, otherwise an | |
16590 | * appropriate lock must be held to stop the target task from exiting | |
16591 | */ | |
16592 | static struct sigqueue * | |
16593 | -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) | |
16594 | +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, | |
16595 | + int override_rlimit, int fromslab) | |
16596 | { | |
16597 | struct sigqueue *q = NULL; | |
16598 | struct user_struct *user; | |
16599 | @@ -375,7 +393,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
16600 | if (override_rlimit || | |
16601 | atomic_read(&user->sigpending) <= | |
16602 | task_rlimit(t, RLIMIT_SIGPENDING)) { | |
16603 | - q = kmem_cache_alloc(sigqueue_cachep, flags); | |
16604 | + if (!fromslab) | |
16605 | + q = get_task_cache(t); | |
16606 | + if (!q) | |
16607 | + q = kmem_cache_alloc(sigqueue_cachep, flags); | |
16608 | } else { | |
16609 | print_dropped_signal(sig); | |
16610 | } | |
16611 | @@ -392,6 +413,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
16612 | return q; | |
16613 | } | |
16614 | ||
16615 | +static struct sigqueue * | |
16616 | +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, | |
16617 | + int override_rlimit) | |
16618 | +{ | |
16619 | + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); | |
16620 | +} | |
16621 | + | |
16622 | static void __sigqueue_free(struct sigqueue *q) | |
16623 | { | |
16624 | if (q->flags & SIGQUEUE_PREALLOC) | |
16625 | @@ -401,6 +429,21 @@ static void __sigqueue_free(struct sigqueue *q) | |
16626 | kmem_cache_free(sigqueue_cachep, q); | |
16627 | } | |
16628 | ||
16629 | +static void sigqueue_free_current(struct sigqueue *q) | |
16630 | +{ | |
16631 | + struct user_struct *up; | |
16632 | + | |
16633 | + if (q->flags & SIGQUEUE_PREALLOC) | |
16634 | + return; | |
16635 | + | |
16636 | + up = q->user; | |
16637 | + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { | |
16638 | + atomic_dec(&up->sigpending); | |
16639 | + free_uid(up); | |
16640 | + } else | |
16641 | + __sigqueue_free(q); | |
16642 | +} | |
16643 | + | |
16644 | void flush_sigqueue(struct sigpending *queue) | |
16645 | { | |
16646 | struct sigqueue *q; | |
16647 | @@ -414,6 +457,21 @@ void flush_sigqueue(struct sigpending *queue) | |
16648 | } | |
16649 | ||
16650 | /* | |
16651 | + * Called from __exit_signal. Flush tsk->pending and | |
16652 | + * tsk->sigqueue_cache | |
16653 | + */ | |
16654 | +void flush_task_sigqueue(struct task_struct *tsk) | |
16655 | +{ | |
16656 | + struct sigqueue *q; | |
16657 | + | |
16658 | + flush_sigqueue(&tsk->pending); | |
16659 | + | |
16660 | + q = get_task_cache(tsk); | |
16661 | + if (q) | |
16662 | + kmem_cache_free(sigqueue_cachep, q); | |
16663 | +} | |
16664 | + | |
16665 | +/* | |
16666 | * Flush all pending signals for this kthread. | |
16667 | */ | |
16668 | void flush_signals(struct task_struct *t) | |
16669 | @@ -525,7 +583,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | |
16670 | still_pending: | |
16671 | list_del_init(&first->list); | |
16672 | copy_siginfo(info, &first->info); | |
16673 | - __sigqueue_free(first); | |
16674 | + sigqueue_free_current(first); | |
16675 | } else { | |
16676 | /* | |
16677 | * Ok, it wasn't in the queue. This must be | |
16678 | @@ -560,6 +618,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |
16679 | { | |
16680 | int signr; | |
16681 | ||
16682 | + WARN_ON_ONCE(tsk != current); | |
16683 | + | |
16684 | /* We only dequeue private signals from ourselves, we don't let | |
16685 | * signalfd steal them | |
16686 | */ | |
16687 | @@ -1156,8 +1216,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, | |
16688 | * We don't want to have recursive SIGSEGV's etc, for example, | |
16689 | * that is why we also clear SIGNAL_UNKILLABLE. | |
16690 | */ | |
16691 | -int | |
16692 | -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16693 | +static int | |
16694 | +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16695 | { | |
16696 | unsigned long int flags; | |
16697 | int ret, blocked, ignored; | |
16698 | @@ -1182,6 +1242,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16699 | return ret; | |
16700 | } | |
16701 | ||
16702 | +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
16703 | +{ | |
16704 | +/* | |
16705 | + * On some archs, PREEMPT_RT has to delay sending a signal from a trap | |
16706 | + * since it can not enable preemption, and the signal code's spin_locks | |
16707 | + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will | |
16708 | + * send the signal on exit of the trap. | |
16709 | + */ | |
16710 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
16711 | + if (in_atomic()) { | |
16712 | + if (WARN_ON_ONCE(t != current)) | |
16713 | + return 0; | |
16714 | + if (WARN_ON_ONCE(t->forced_info.si_signo)) | |
16715 | + return 0; | |
16716 | + | |
16717 | + if (is_si_special(info)) { | |
16718 | + WARN_ON_ONCE(info != SEND_SIG_PRIV); | |
16719 | + t->forced_info.si_signo = sig; | |
16720 | + t->forced_info.si_errno = 0; | |
16721 | + t->forced_info.si_code = SI_KERNEL; | |
16722 | + t->forced_info.si_pid = 0; | |
16723 | + t->forced_info.si_uid = 0; | |
16724 | + } else { | |
16725 | + t->forced_info = *info; | |
16726 | + } | |
16727 | + | |
16728 | + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); | |
16729 | + return 0; | |
16730 | + } | |
16731 | +#endif | |
16732 | + return do_force_sig_info(sig, info, t); | |
16733 | +} | |
16734 | + | |
16735 | /* | |
16736 | * Nuke all other threads in the group. | |
16737 | */ | |
16738 | @@ -1216,12 +1309,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
16739 | * Disable interrupts early to avoid deadlocks. | |
16740 | * See rcu_read_unlock() comment header for details. | |
16741 | */ | |
16742 | - local_irq_save(*flags); | |
16743 | + local_irq_save_nort(*flags); | |
16744 | rcu_read_lock(); | |
16745 | sighand = rcu_dereference(tsk->sighand); | |
16746 | if (unlikely(sighand == NULL)) { | |
16747 | rcu_read_unlock(); | |
16748 | - local_irq_restore(*flags); | |
16749 | + local_irq_restore_nort(*flags); | |
16750 | break; | |
16751 | } | |
16752 | /* | |
16753 | @@ -1242,7 +1335,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
16754 | } | |
16755 | spin_unlock(&sighand->siglock); | |
16756 | rcu_read_unlock(); | |
16757 | - local_irq_restore(*flags); | |
16758 | + local_irq_restore_nort(*flags); | |
16759 | } | |
16760 | ||
16761 | return sighand; | |
16762 | @@ -1485,7 +1578,8 @@ EXPORT_SYMBOL(kill_pid); | |
16763 | */ | |
16764 | struct sigqueue *sigqueue_alloc(void) | |
16765 | { | |
16766 | - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); | |
16767 | + /* Preallocated sigqueue objects always from the slabcache ! */ | |
16768 | + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); | |
16769 | ||
16770 | if (q) | |
16771 | q->flags |= SIGQUEUE_PREALLOC; | |
16772 | @@ -1846,15 +1940,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |
16773 | if (gstop_done && ptrace_reparented(current)) | |
16774 | do_notify_parent_cldstop(current, false, why); | |
16775 | ||
16776 | - /* | |
16777 | - * Don't want to allow preemption here, because | |
16778 | - * sys_ptrace() needs this task to be inactive. | |
16779 | - * | |
16780 | - * XXX: implement read_unlock_no_resched(). | |
16781 | - */ | |
16782 | - preempt_disable(); | |
16783 | read_unlock(&tasklist_lock); | |
16784 | - preempt_enable_no_resched(); | |
16785 | freezable_schedule(); | |
16786 | } else { | |
16787 | /* | |
16788 | diff --git a/kernel/softirq.c b/kernel/softirq.c | |
16789 | index 744fa611cae0..819bd7cf5ad0 100644 | |
16790 | --- a/kernel/softirq.c | |
16791 | +++ b/kernel/softirq.c | |
16792 | @@ -21,10 +21,12 @@ | |
16793 | #include <linux/freezer.h> | |
16794 | #include <linux/kthread.h> | |
16795 | #include <linux/rcupdate.h> | |
16796 | +#include <linux/delay.h> | |
16797 | #include <linux/ftrace.h> | |
16798 | #include <linux/smp.h> | |
16799 | #include <linux/smpboot.h> | |
16800 | #include <linux/tick.h> | |
16801 | +#include <linux/locallock.h> | |
16802 | #include <linux/irq.h> | |
16803 | ||
16804 | #define CREATE_TRACE_POINTS | |
16805 | @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat); | |
16806 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | |
16807 | ||
16808 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |
16809 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16810 | +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) | |
16811 | +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); | |
16812 | +#endif | |
16813 | ||
16814 | const char * const softirq_to_name[NR_SOFTIRQS] = { | |
16815 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", | |
16816 | "TASKLET", "SCHED", "HRTIMER", "RCU" | |
16817 | }; | |
16818 | ||
16819 | +#ifdef CONFIG_NO_HZ_COMMON | |
16820 | +# ifdef CONFIG_PREEMPT_RT_FULL | |
16821 | + | |
16822 | +struct softirq_runner { | |
16823 | + struct task_struct *runner[NR_SOFTIRQS]; | |
16824 | +}; | |
16825 | + | |
16826 | +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners); | |
16827 | + | |
16828 | +static inline void softirq_set_runner(unsigned int sirq) | |
16829 | +{ | |
16830 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
16831 | + | |
16832 | + sr->runner[sirq] = current; | |
16833 | +} | |
16834 | + | |
16835 | +static inline void softirq_clr_runner(unsigned int sirq) | |
16836 | +{ | |
16837 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
16838 | + | |
16839 | + sr->runner[sirq] = NULL; | |
16840 | +} | |
16841 | + | |
16842 | +/* | |
16843 | + * On preempt-rt a softirq running context might be blocked on a | |
16844 | + * lock. There might be no other runnable task on this CPU because the | |
16845 | + * lock owner runs on some other CPU. So we have to go into idle with | |
16846 | + * the pending bit set. Therefor we need to check this otherwise we | |
16847 | + * warn about false positives which confuses users and defeats the | |
16848 | + * whole purpose of this test. | |
16849 | + * | |
16850 | + * This code is called with interrupts disabled. | |
16851 | + */ | |
16852 | +void softirq_check_pending_idle(void) | |
16853 | +{ | |
16854 | + static int rate_limit; | |
16855 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
16856 | + u32 warnpending; | |
16857 | + int i; | |
16858 | + | |
16859 | + if (rate_limit >= 10) | |
16860 | + return; | |
16861 | + | |
16862 | + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; | |
16863 | + for (i = 0; i < NR_SOFTIRQS; i++) { | |
16864 | + struct task_struct *tsk = sr->runner[i]; | |
16865 | + | |
16866 | + /* | |
16867 | + * The wakeup code in rtmutex.c wakes up the task | |
16868 | + * _before_ it sets pi_blocked_on to NULL under | |
16869 | + * tsk->pi_lock. So we need to check for both: state | |
16870 | + * and pi_blocked_on. | |
16871 | + */ | |
16872 | + if (tsk) { | |
16873 | + raw_spin_lock(&tsk->pi_lock); | |
16874 | + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { | |
16875 | + /* Clear all bits pending in that task */ | |
16876 | + warnpending &= ~(tsk->softirqs_raised); | |
16877 | + warnpending &= ~(1 << i); | |
16878 | + } | |
16879 | + raw_spin_unlock(&tsk->pi_lock); | |
16880 | + } | |
16881 | + } | |
16882 | + | |
16883 | + if (warnpending) { | |
16884 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
16885 | + warnpending); | |
16886 | + rate_limit++; | |
16887 | + } | |
16888 | +} | |
16889 | +# else | |
16890 | +/* | |
16891 | + * On !PREEMPT_RT we just printk rate limited: | |
16892 | + */ | |
16893 | +void softirq_check_pending_idle(void) | |
16894 | +{ | |
16895 | + static int rate_limit; | |
16896 | + | |
16897 | + if (rate_limit < 10 && | |
16898 | + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
16899 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
16900 | + local_softirq_pending()); | |
16901 | + rate_limit++; | |
16902 | + } | |
16903 | +} | |
16904 | +# endif | |
16905 | + | |
16906 | +#else /* !CONFIG_NO_HZ_COMMON */ | |
16907 | +static inline void softirq_set_runner(unsigned int sirq) { } | |
16908 | +static inline void softirq_clr_runner(unsigned int sirq) { } | |
16909 | +#endif | |
16910 | + | |
16911 | /* | |
16912 | * we cannot loop indefinitely here to avoid userspace starvation, | |
16913 | * but we also don't want to introduce a worst case 1/HZ latency | |
16914 | @@ -77,6 +175,38 @@ static void wakeup_softirqd(void) | |
16915 | wake_up_process(tsk); | |
16916 | } | |
16917 | ||
16918 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16919 | +static void wakeup_timer_softirqd(void) | |
16920 | +{ | |
16921 | + /* Interrupts are disabled: no need to stop preemption */ | |
16922 | + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); | |
16923 | + | |
16924 | + if (tsk && tsk->state != TASK_RUNNING) | |
16925 | + wake_up_process(tsk); | |
16926 | +} | |
16927 | +#endif | |
16928 | + | |
16929 | +static void handle_softirq(unsigned int vec_nr) | |
16930 | +{ | |
16931 | + struct softirq_action *h = softirq_vec + vec_nr; | |
16932 | + int prev_count; | |
16933 | + | |
16934 | + prev_count = preempt_count(); | |
16935 | + | |
16936 | + kstat_incr_softirqs_this_cpu(vec_nr); | |
16937 | + | |
16938 | + trace_softirq_entry(vec_nr); | |
16939 | + h->action(h); | |
16940 | + trace_softirq_exit(vec_nr); | |
16941 | + if (unlikely(prev_count != preempt_count())) { | |
16942 | + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
16943 | + vec_nr, softirq_to_name[vec_nr], h->action, | |
16944 | + prev_count, preempt_count()); | |
16945 | + preempt_count_set(prev_count); | |
16946 | + } | |
16947 | +} | |
16948 | + | |
16949 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16950 | /* | |
16951 | * If ksoftirqd is scheduled, we do not want to process pending softirqs | |
16952 | * right now. Let ksoftirqd handle this at its own rate, to get fairness. | |
16953 | @@ -88,6 +218,47 @@ static bool ksoftirqd_running(void) | |
16954 | return tsk && (tsk->state == TASK_RUNNING); | |
16955 | } | |
16956 | ||
16957 | +static inline int ksoftirqd_softirq_pending(void) | |
16958 | +{ | |
16959 | + return local_softirq_pending(); | |
16960 | +} | |
16961 | + | |
16962 | +static void handle_pending_softirqs(u32 pending) | |
16963 | +{ | |
16964 | + struct softirq_action *h = softirq_vec; | |
16965 | + int softirq_bit; | |
16966 | + | |
16967 | + local_irq_enable(); | |
16968 | + | |
16969 | + h = softirq_vec; | |
16970 | + | |
16971 | + while ((softirq_bit = ffs(pending))) { | |
16972 | + unsigned int vec_nr; | |
16973 | + | |
16974 | + h += softirq_bit - 1; | |
16975 | + vec_nr = h - softirq_vec; | |
16976 | + handle_softirq(vec_nr); | |
16977 | + | |
16978 | + h++; | |
16979 | + pending >>= softirq_bit; | |
16980 | + } | |
16981 | + | |
16982 | + rcu_bh_qs(); | |
16983 | + local_irq_disable(); | |
16984 | +} | |
16985 | + | |
16986 | +static void run_ksoftirqd(unsigned int cpu) | |
16987 | +{ | |
16988 | + local_irq_disable(); | |
16989 | + if (ksoftirqd_softirq_pending()) { | |
16990 | + __do_softirq(); | |
16991 | + local_irq_enable(); | |
16992 | + cond_resched_rcu_qs(); | |
16993 | + return; | |
16994 | + } | |
16995 | + local_irq_enable(); | |
16996 | +} | |
16997 | + | |
16998 | /* | |
16999 | * preempt_count and SOFTIRQ_OFFSET usage: | |
17000 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | |
17001 | @@ -243,10 +414,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) | |
17002 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | |
17003 | unsigned long old_flags = current->flags; | |
17004 | int max_restart = MAX_SOFTIRQ_RESTART; | |
17005 | - struct softirq_action *h; | |
17006 | bool in_hardirq; | |
17007 | __u32 pending; | |
17008 | - int softirq_bit; | |
17009 | ||
17010 | /* | |
17011 | * Mask out PF_MEMALLOC s current task context is borrowed for the | |
17012 | @@ -265,36 +434,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) | |
17013 | /* Reset the pending bitmask before enabling irqs */ | |
17014 | set_softirq_pending(0); | |
17015 | ||
17016 | - local_irq_enable(); | |
17017 | - | |
17018 | - h = softirq_vec; | |
17019 | - | |
17020 | - while ((softirq_bit = ffs(pending))) { | |
17021 | - unsigned int vec_nr; | |
17022 | - int prev_count; | |
17023 | - | |
17024 | - h += softirq_bit - 1; | |
17025 | - | |
17026 | - vec_nr = h - softirq_vec; | |
17027 | - prev_count = preempt_count(); | |
17028 | - | |
17029 | - kstat_incr_softirqs_this_cpu(vec_nr); | |
17030 | - | |
17031 | - trace_softirq_entry(vec_nr); | |
17032 | - h->action(h); | |
17033 | - trace_softirq_exit(vec_nr); | |
17034 | - if (unlikely(prev_count != preempt_count())) { | |
17035 | - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
17036 | - vec_nr, softirq_to_name[vec_nr], h->action, | |
17037 | - prev_count, preempt_count()); | |
17038 | - preempt_count_set(prev_count); | |
17039 | - } | |
17040 | - h++; | |
17041 | - pending >>= softirq_bit; | |
17042 | - } | |
17043 | - | |
17044 | - rcu_bh_qs(); | |
17045 | - local_irq_disable(); | |
17046 | + handle_pending_softirqs(pending); | |
17047 | ||
17048 | pending = local_softirq_pending(); | |
17049 | if (pending) { | |
17050 | @@ -331,6 +471,309 @@ asmlinkage __visible void do_softirq(void) | |
17051 | } | |
17052 | ||
17053 | /* | |
17054 | + * This function must run with irqs disabled! | |
17055 | + */ | |
17056 | +void raise_softirq_irqoff(unsigned int nr) | |
17057 | +{ | |
17058 | + __raise_softirq_irqoff(nr); | |
17059 | + | |
17060 | + /* | |
17061 | + * If we're in an interrupt or softirq, we're done | |
17062 | + * (this also catches softirq-disabled code). We will | |
17063 | + * actually run the softirq once we return from | |
17064 | + * the irq or softirq. | |
17065 | + * | |
17066 | + * Otherwise we wake up ksoftirqd to make sure we | |
17067 | + * schedule the softirq soon. | |
17068 | + */ | |
17069 | + if (!in_interrupt()) | |
17070 | + wakeup_softirqd(); | |
17071 | +} | |
17072 | + | |
17073 | +void __raise_softirq_irqoff(unsigned int nr) | |
17074 | +{ | |
17075 | + trace_softirq_raise(nr); | |
17076 | + or_softirq_pending(1UL << nr); | |
17077 | +} | |
17078 | + | |
17079 | +static inline void local_bh_disable_nort(void) { local_bh_disable(); } | |
17080 | +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } | |
17081 | +static void ksoftirqd_set_sched_params(unsigned int cpu) { } | |
17082 | + | |
17083 | +#else /* !PREEMPT_RT_FULL */ | |
17084 | + | |
17085 | +/* | |
17086 | + * On RT we serialize softirq execution with a cpu local lock per softirq | |
17087 | + */ | |
17088 | +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks); | |
17089 | + | |
17090 | +void __init softirq_early_init(void) | |
17091 | +{ | |
17092 | + int i; | |
17093 | + | |
17094 | + for (i = 0; i < NR_SOFTIRQS; i++) | |
17095 | + local_irq_lock_init(local_softirq_locks[i]); | |
17096 | +} | |
17097 | + | |
17098 | +static void lock_softirq(int which) | |
17099 | +{ | |
17100 | + local_lock(local_softirq_locks[which]); | |
17101 | +} | |
17102 | + | |
17103 | +static void unlock_softirq(int which) | |
17104 | +{ | |
17105 | + local_unlock(local_softirq_locks[which]); | |
17106 | +} | |
17107 | + | |
17108 | +static void do_single_softirq(int which) | |
17109 | +{ | |
17110 | + unsigned long old_flags = current->flags; | |
17111 | + | |
17112 | + current->flags &= ~PF_MEMALLOC; | |
17113 | + vtime_account_irq_enter(current); | |
17114 | + current->flags |= PF_IN_SOFTIRQ; | |
17115 | + lockdep_softirq_enter(); | |
17116 | + local_irq_enable(); | |
17117 | + handle_softirq(which); | |
17118 | + local_irq_disable(); | |
17119 | + lockdep_softirq_exit(); | |
17120 | + current->flags &= ~PF_IN_SOFTIRQ; | |
17121 | + vtime_account_irq_enter(current); | |
17122 | + tsk_restore_flags(current, old_flags, PF_MEMALLOC); | |
17123 | +} | |
17124 | + | |
17125 | +/* | |
17126 | + * Called with interrupts disabled. Process softirqs which were raised | |
17127 | + * in current context (or on behalf of ksoftirqd). | |
17128 | + */ | |
17129 | +static void do_current_softirqs(void) | |
17130 | +{ | |
17131 | + while (current->softirqs_raised) { | |
17132 | + int i = __ffs(current->softirqs_raised); | |
17133 | + unsigned int pending, mask = (1U << i); | |
17134 | + | |
17135 | + current->softirqs_raised &= ~mask; | |
17136 | + local_irq_enable(); | |
17137 | + | |
17138 | + /* | |
17139 | + * If the lock is contended, we boost the owner to | |
17140 | + * process the softirq or leave the critical section | |
17141 | + * now. | |
17142 | + */ | |
17143 | + lock_softirq(i); | |
17144 | + local_irq_disable(); | |
17145 | + softirq_set_runner(i); | |
17146 | + /* | |
17147 | + * Check with the local_softirq_pending() bits, | |
17148 | + * whether we need to process this still or if someone | |
17149 | + * else took care of it. | |
17150 | + */ | |
17151 | + pending = local_softirq_pending(); | |
17152 | + if (pending & mask) { | |
17153 | + set_softirq_pending(pending & ~mask); | |
17154 | + do_single_softirq(i); | |
17155 | + } | |
17156 | + softirq_clr_runner(i); | |
17157 | + WARN_ON(current->softirq_nestcnt != 1); | |
17158 | + local_irq_enable(); | |
17159 | + unlock_softirq(i); | |
17160 | + local_irq_disable(); | |
17161 | + } | |
17162 | +} | |
17163 | + | |
17164 | +void __local_bh_disable(void) | |
17165 | +{ | |
17166 | + if (++current->softirq_nestcnt == 1) | |
17167 | + migrate_disable(); | |
17168 | +} | |
17169 | +EXPORT_SYMBOL(__local_bh_disable); | |
17170 | + | |
17171 | +void __local_bh_enable(void) | |
17172 | +{ | |
17173 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
17174 | + return; | |
17175 | + | |
17176 | + local_irq_disable(); | |
17177 | + if (current->softirq_nestcnt == 1 && current->softirqs_raised) | |
17178 | + do_current_softirqs(); | |
17179 | + local_irq_enable(); | |
17180 | + | |
17181 | + if (--current->softirq_nestcnt == 0) | |
17182 | + migrate_enable(); | |
17183 | +} | |
17184 | +EXPORT_SYMBOL(__local_bh_enable); | |
17185 | + | |
17186 | +void _local_bh_enable(void) | |
17187 | +{ | |
17188 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
17189 | + return; | |
17190 | + if (--current->softirq_nestcnt == 0) | |
17191 | + migrate_enable(); | |
17192 | +} | |
17193 | +EXPORT_SYMBOL(_local_bh_enable); | |
17194 | + | |
17195 | +int in_serving_softirq(void) | |
17196 | +{ | |
17197 | + return current->flags & PF_IN_SOFTIRQ; | |
17198 | +} | |
17199 | +EXPORT_SYMBOL(in_serving_softirq); | |
17200 | + | |
17201 | +/* Called with preemption disabled */ | |
17202 | +static void run_ksoftirqd(unsigned int cpu) | |
17203 | +{ | |
17204 | + local_irq_disable(); | |
17205 | + current->softirq_nestcnt++; | |
17206 | + | |
17207 | + do_current_softirqs(); | |
17208 | + current->softirq_nestcnt--; | |
17209 | + local_irq_enable(); | |
17210 | + cond_resched_rcu_qs(); | |
17211 | +} | |
17212 | + | |
17213 | +/* | |
17214 | + * Called from netif_rx_ni(). Preemption enabled, but migration | |
17215 | + * disabled. So the cpu can't go away under us. | |
17216 | + */ | |
17217 | +void thread_do_softirq(void) | |
17218 | +{ | |
17219 | + if (!in_serving_softirq() && current->softirqs_raised) { | |
17220 | + current->softirq_nestcnt++; | |
17221 | + do_current_softirqs(); | |
17222 | + current->softirq_nestcnt--; | |
17223 | + } | |
17224 | +} | |
17225 | + | |
17226 | +static void do_raise_softirq_irqoff(unsigned int nr) | |
17227 | +{ | |
17228 | + unsigned int mask; | |
17229 | + | |
17230 | + mask = 1UL << nr; | |
17231 | + | |
17232 | + trace_softirq_raise(nr); | |
17233 | + or_softirq_pending(mask); | |
17234 | + | |
17235 | + /* | |
17236 | + * If we are not in a hard interrupt and inside a bh disabled | |
17237 | + * region, we simply raise the flag on current. local_bh_enable() | |
17238 | + * will make sure that the softirq is executed. Otherwise we | |
17239 | + * delegate it to ksoftirqd. | |
17240 | + */ | |
17241 | + if (!in_irq() && current->softirq_nestcnt) | |
17242 | + current->softirqs_raised |= mask; | |
17243 | + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) | |
17244 | + return; | |
17245 | + | |
17246 | + if (mask & TIMER_SOFTIRQS) | |
17247 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
17248 | + else | |
17249 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
17250 | +} | |
17251 | + | |
17252 | +static void wakeup_proper_softirq(unsigned int nr) | |
17253 | +{ | |
17254 | + if ((1UL << nr) & TIMER_SOFTIRQS) | |
17255 | + wakeup_timer_softirqd(); | |
17256 | + else | |
17257 | + wakeup_softirqd(); | |
17258 | +} | |
17259 | + | |
17260 | +void __raise_softirq_irqoff(unsigned int nr) | |
17261 | +{ | |
17262 | + do_raise_softirq_irqoff(nr); | |
17263 | + if (!in_irq() && !current->softirq_nestcnt) | |
17264 | + wakeup_proper_softirq(nr); | |
17265 | +} | |
17266 | + | |
17267 | +/* | |
17268 | + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd | |
17269 | + */ | |
17270 | +void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
17271 | +{ | |
17272 | + unsigned int mask; | |
17273 | + | |
17274 | + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) || | |
17275 | + !__this_cpu_read(ktimer_softirqd))) | |
17276 | + return; | |
17277 | + mask = 1UL << nr; | |
17278 | + | |
17279 | + trace_softirq_raise(nr); | |
17280 | + or_softirq_pending(mask); | |
17281 | + if (mask & TIMER_SOFTIRQS) | |
17282 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
17283 | + else | |
17284 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
17285 | + wakeup_proper_softirq(nr); | |
17286 | +} | |
17287 | + | |
17288 | +/* | |
17289 | + * This function must run with irqs disabled! | |
17290 | + */ | |
17291 | +void raise_softirq_irqoff(unsigned int nr) | |
17292 | +{ | |
17293 | + do_raise_softirq_irqoff(nr); | |
17294 | + | |
17295 | + /* | |
17296 | + * If we're in an hard interrupt we let irq return code deal | |
17297 | + * with the wakeup of ksoftirqd. | |
17298 | + */ | |
17299 | + if (in_irq()) | |
17300 | + return; | |
17301 | + /* | |
17302 | + * If we are in thread context but outside of a bh disabled | |
17303 | + * region, we need to wake ksoftirqd as well. | |
17304 | + * | |
17305 | + * CHECKME: Some of the places which do that could be wrapped | |
17306 | + * into local_bh_disable/enable pairs. Though it's unclear | |
17307 | + * whether this is worth the effort. To find those places just | |
17308 | + * raise a WARN() if the condition is met. | |
17309 | + */ | |
17310 | + if (!current->softirq_nestcnt) | |
17311 | + wakeup_proper_softirq(nr); | |
17312 | +} | |
17313 | + | |
17314 | +static inline int ksoftirqd_softirq_pending(void) | |
17315 | +{ | |
17316 | + return current->softirqs_raised; | |
17317 | +} | |
17318 | + | |
17319 | +static inline void local_bh_disable_nort(void) { } | |
17320 | +static inline void _local_bh_enable_nort(void) { } | |
17321 | + | |
17322 | +static inline void ksoftirqd_set_sched_params(unsigned int cpu) | |
17323 | +{ | |
17324 | + /* Take over all but timer pending softirqs when starting */ | |
17325 | + local_irq_disable(); | |
17326 | + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; | |
17327 | + local_irq_enable(); | |
17328 | +} | |
17329 | + | |
17330 | +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) | |
17331 | +{ | |
17332 | + struct sched_param param = { .sched_priority = 1 }; | |
17333 | + | |
17334 | + sched_setscheduler(current, SCHED_FIFO, ¶m); | |
17335 | + | |
17336 | + /* Take over timer pending softirqs when starting */ | |
17337 | + local_irq_disable(); | |
17338 | + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; | |
17339 | + local_irq_enable(); | |
17340 | +} | |
17341 | + | |
17342 | +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, | |
17343 | + bool online) | |
17344 | +{ | |
17345 | + struct sched_param param = { .sched_priority = 0 }; | |
17346 | + | |
17347 | + sched_setscheduler(current, SCHED_NORMAL, ¶m); | |
17348 | +} | |
17349 | + | |
17350 | +static int ktimer_softirqd_should_run(unsigned int cpu) | |
17351 | +{ | |
17352 | + return current->softirqs_raised; | |
17353 | +} | |
17354 | + | |
17355 | +#endif /* PREEMPT_RT_FULL */ | |
17356 | +/* | |
17357 | * Enter an interrupt context. | |
17358 | */ | |
17359 | void irq_enter(void) | |
17360 | @@ -341,9 +784,9 @@ void irq_enter(void) | |
17361 | * Prevent raise_softirq from needlessly waking up ksoftirqd | |
17362 | * here, as softirq will be serviced on return from interrupt. | |
17363 | */ | |
17364 | - local_bh_disable(); | |
17365 | + local_bh_disable_nort(); | |
17366 | tick_irq_enter(); | |
17367 | - _local_bh_enable(); | |
17368 | + _local_bh_enable_nort(); | |
17369 | } | |
17370 | ||
17371 | __irq_enter(); | |
17372 | @@ -351,6 +794,7 @@ void irq_enter(void) | |
17373 | ||
17374 | static inline void invoke_softirq(void) | |
17375 | { | |
17376 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
17377 | if (ksoftirqd_running()) | |
17378 | return; | |
17379 | ||
17380 | @@ -373,6 +817,18 @@ static inline void invoke_softirq(void) | |
17381 | } else { | |
17382 | wakeup_softirqd(); | |
17383 | } | |
17384 | +#else /* PREEMPT_RT_FULL */ | |
17385 | + unsigned long flags; | |
17386 | + | |
17387 | + local_irq_save(flags); | |
17388 | + if (__this_cpu_read(ksoftirqd) && | |
17389 | + __this_cpu_read(ksoftirqd)->softirqs_raised) | |
17390 | + wakeup_softirqd(); | |
17391 | + if (__this_cpu_read(ktimer_softirqd) && | |
17392 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised) | |
17393 | + wakeup_timer_softirqd(); | |
17394 | + local_irq_restore(flags); | |
17395 | +#endif | |
17396 | } | |
17397 | ||
17398 | static inline void tick_irq_exit(void) | |
17399 | @@ -409,26 +865,6 @@ void irq_exit(void) | |
17400 | trace_hardirq_exit(); /* must be last! */ | |
17401 | } | |
17402 | ||
17403 | -/* | |
17404 | - * This function must run with irqs disabled! | |
17405 | - */ | |
17406 | -inline void raise_softirq_irqoff(unsigned int nr) | |
17407 | -{ | |
17408 | - __raise_softirq_irqoff(nr); | |
17409 | - | |
17410 | - /* | |
17411 | - * If we're in an interrupt or softirq, we're done | |
17412 | - * (this also catches softirq-disabled code). We will | |
17413 | - * actually run the softirq once we return from | |
17414 | - * the irq or softirq. | |
17415 | - * | |
17416 | - * Otherwise we wake up ksoftirqd to make sure we | |
17417 | - * schedule the softirq soon. | |
17418 | - */ | |
17419 | - if (!in_interrupt()) | |
17420 | - wakeup_softirqd(); | |
17421 | -} | |
17422 | - | |
17423 | void raise_softirq(unsigned int nr) | |
17424 | { | |
17425 | unsigned long flags; | |
17426 | @@ -438,12 +874,6 @@ void raise_softirq(unsigned int nr) | |
17427 | local_irq_restore(flags); | |
17428 | } | |
17429 | ||
17430 | -void __raise_softirq_irqoff(unsigned int nr) | |
17431 | -{ | |
17432 | - trace_softirq_raise(nr); | |
17433 | - or_softirq_pending(1UL << nr); | |
17434 | -} | |
17435 | - | |
17436 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | |
17437 | { | |
17438 | softirq_vec[nr].action = action; | |
17439 | @@ -460,15 +890,45 @@ struct tasklet_head { | |
17440 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); | |
17441 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | |
17442 | ||
17443 | +static void inline | |
17444 | +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) | |
17445 | +{ | |
17446 | + if (tasklet_trylock(t)) { | |
17447 | +again: | |
17448 | + /* We may have been preempted before tasklet_trylock | |
17449 | + * and __tasklet_action may have already run. | |
17450 | + * So double check the sched bit while the takslet | |
17451 | + * is locked before adding it to the list. | |
17452 | + */ | |
17453 | + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { | |
17454 | + t->next = NULL; | |
17455 | + *head->tail = t; | |
17456 | + head->tail = &(t->next); | |
17457 | + raise_softirq_irqoff(nr); | |
17458 | + tasklet_unlock(t); | |
17459 | + } else { | |
17460 | + /* This is subtle. If we hit the corner case above | |
17461 | + * It is possible that we get preempted right here, | |
17462 | + * and another task has successfully called | |
17463 | + * tasklet_schedule(), then this function, and | |
17464 | + * failed on the trylock. Thus we must be sure | |
17465 | + * before releasing the tasklet lock, that the | |
17466 | + * SCHED_BIT is clear. Otherwise the tasklet | |
17467 | + * may get its SCHED_BIT set, but not added to the | |
17468 | + * list | |
17469 | + */ | |
17470 | + if (!tasklet_tryunlock(t)) | |
17471 | + goto again; | |
17472 | + } | |
17473 | + } | |
17474 | +} | |
17475 | + | |
17476 | void __tasklet_schedule(struct tasklet_struct *t) | |
17477 | { | |
17478 | unsigned long flags; | |
17479 | ||
17480 | local_irq_save(flags); | |
17481 | - t->next = NULL; | |
17482 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
17483 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
17484 | - raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
17485 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); | |
17486 | local_irq_restore(flags); | |
17487 | } | |
17488 | EXPORT_SYMBOL(__tasklet_schedule); | |
17489 | @@ -478,10 +938,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |
17490 | unsigned long flags; | |
17491 | ||
17492 | local_irq_save(flags); | |
17493 | - t->next = NULL; | |
17494 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
17495 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
17496 | - raise_softirq_irqoff(HI_SOFTIRQ); | |
17497 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); | |
17498 | local_irq_restore(flags); | |
17499 | } | |
17500 | EXPORT_SYMBOL(__tasklet_hi_schedule); | |
17501 | @@ -490,82 +947,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) | |
17502 | { | |
17503 | BUG_ON(!irqs_disabled()); | |
17504 | ||
17505 | - t->next = __this_cpu_read(tasklet_hi_vec.head); | |
17506 | - __this_cpu_write(tasklet_hi_vec.head, t); | |
17507 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
17508 | + __tasklet_hi_schedule(t); | |
17509 | } | |
17510 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | |
17511 | ||
17512 | -static __latent_entropy void tasklet_action(struct softirq_action *a) | |
17513 | +void tasklet_enable(struct tasklet_struct *t) | |
17514 | { | |
17515 | - struct tasklet_struct *list; | |
17516 | + if (!atomic_dec_and_test(&t->count)) | |
17517 | + return; | |
17518 | + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) | |
17519 | + tasklet_schedule(t); | |
17520 | +} | |
17521 | +EXPORT_SYMBOL(tasklet_enable); | |
17522 | ||
17523 | - local_irq_disable(); | |
17524 | - list = __this_cpu_read(tasklet_vec.head); | |
17525 | - __this_cpu_write(tasklet_vec.head, NULL); | |
17526 | - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
17527 | - local_irq_enable(); | |
17528 | +static void __tasklet_action(struct softirq_action *a, | |
17529 | + struct tasklet_struct *list) | |
17530 | +{ | |
17531 | + int loops = 1000000; | |
17532 | ||
17533 | while (list) { | |
17534 | struct tasklet_struct *t = list; | |
17535 | ||
17536 | list = list->next; | |
17537 | ||
17538 | - if (tasklet_trylock(t)) { | |
17539 | - if (!atomic_read(&t->count)) { | |
17540 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
17541 | - &t->state)) | |
17542 | - BUG(); | |
17543 | - t->func(t->data); | |
17544 | - tasklet_unlock(t); | |
17545 | - continue; | |
17546 | - } | |
17547 | - tasklet_unlock(t); | |
17548 | + /* | |
17549 | + * Should always succeed - after a tasklist got on the | |
17550 | + * list (after getting the SCHED bit set from 0 to 1), | |
17551 | + * nothing but the tasklet softirq it got queued to can | |
17552 | + * lock it: | |
17553 | + */ | |
17554 | + if (!tasklet_trylock(t)) { | |
17555 | + WARN_ON(1); | |
17556 | + continue; | |
17557 | } | |
17558 | ||
17559 | - local_irq_disable(); | |
17560 | t->next = NULL; | |
17561 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
17562 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
17563 | - __raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
17564 | - local_irq_enable(); | |
17565 | + | |
17566 | + /* | |
17567 | + * If we cannot handle the tasklet because it's disabled, | |
17568 | + * mark it as pending. tasklet_enable() will later | |
17569 | + * re-schedule the tasklet. | |
17570 | + */ | |
17571 | + if (unlikely(atomic_read(&t->count))) { | |
17572 | +out_disabled: | |
17573 | + /* implicit unlock: */ | |
17574 | + wmb(); | |
17575 | + t->state = TASKLET_STATEF_PENDING; | |
17576 | + continue; | |
17577 | + } | |
17578 | + | |
17579 | + /* | |
17580 | + * After this point on the tasklet might be rescheduled | |
17581 | + * on another CPU, but it can only be added to another | |
17582 | + * CPU's tasklet list if we unlock the tasklet (which we | |
17583 | + * dont do yet). | |
17584 | + */ | |
17585 | + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
17586 | + WARN_ON(1); | |
17587 | + | |
17588 | +again: | |
17589 | + t->func(t->data); | |
17590 | + | |
17591 | + /* | |
17592 | + * Try to unlock the tasklet. We must use cmpxchg, because | |
17593 | + * another CPU might have scheduled or disabled the tasklet. | |
17594 | + * We only allow the STATE_RUN -> 0 transition here. | |
17595 | + */ | |
17596 | + while (!tasklet_tryunlock(t)) { | |
17597 | + /* | |
17598 | + * If it got disabled meanwhile, bail out: | |
17599 | + */ | |
17600 | + if (atomic_read(&t->count)) | |
17601 | + goto out_disabled; | |
17602 | + /* | |
17603 | + * If it got scheduled meanwhile, re-execute | |
17604 | + * the tasklet function: | |
17605 | + */ | |
17606 | + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
17607 | + goto again; | |
17608 | + if (!--loops) { | |
17609 | + printk("hm, tasklet state: %08lx\n", t->state); | |
17610 | + WARN_ON(1); | |
17611 | + tasklet_unlock(t); | |
17612 | + break; | |
17613 | + } | |
17614 | + } | |
17615 | } | |
17616 | } | |
17617 | ||
17618 | +static void tasklet_action(struct softirq_action *a) | |
17619 | +{ | |
17620 | + struct tasklet_struct *list; | |
17621 | + | |
17622 | + local_irq_disable(); | |
17623 | + | |
17624 | + list = __this_cpu_read(tasklet_vec.head); | |
17625 | + __this_cpu_write(tasklet_vec.head, NULL); | |
17626 | + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
17627 | + | |
17628 | + local_irq_enable(); | |
17629 | + | |
17630 | + __tasklet_action(a, list); | |
17631 | +} | |
17632 | + | |
17633 | static __latent_entropy void tasklet_hi_action(struct softirq_action *a) | |
17634 | { | |
17635 | struct tasklet_struct *list; | |
17636 | ||
17637 | local_irq_disable(); | |
17638 | + | |
17639 | list = __this_cpu_read(tasklet_hi_vec.head); | |
17640 | __this_cpu_write(tasklet_hi_vec.head, NULL); | |
17641 | __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); | |
17642 | + | |
17643 | local_irq_enable(); | |
17644 | ||
17645 | - while (list) { | |
17646 | - struct tasklet_struct *t = list; | |
17647 | - | |
17648 | - list = list->next; | |
17649 | - | |
17650 | - if (tasklet_trylock(t)) { | |
17651 | - if (!atomic_read(&t->count)) { | |
17652 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
17653 | - &t->state)) | |
17654 | - BUG(); | |
17655 | - t->func(t->data); | |
17656 | - tasklet_unlock(t); | |
17657 | - continue; | |
17658 | - } | |
17659 | - tasklet_unlock(t); | |
17660 | - } | |
17661 | - | |
17662 | - local_irq_disable(); | |
17663 | - t->next = NULL; | |
17664 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
17665 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
17666 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
17667 | - local_irq_enable(); | |
17668 | - } | |
17669 | + __tasklet_action(a, list); | |
17670 | } | |
17671 | ||
17672 | void tasklet_init(struct tasklet_struct *t, | |
17673 | @@ -586,7 +1083,7 @@ void tasklet_kill(struct tasklet_struct *t) | |
17674 | ||
17675 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | |
17676 | do { | |
17677 | - yield(); | |
17678 | + msleep(1); | |
17679 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); | |
17680 | } | |
17681 | tasklet_unlock_wait(t); | |
17682 | @@ -660,25 +1157,26 @@ void __init softirq_init(void) | |
17683 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); | |
17684 | } | |
17685 | ||
17686 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
17687 | +void tasklet_unlock_wait(struct tasklet_struct *t) | |
17688 | +{ | |
17689 | + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { | |
17690 | + /* | |
17691 | + * Hack for now to avoid this busy-loop: | |
17692 | + */ | |
17693 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17694 | + msleep(1); | |
17695 | +#else | |
17696 | + barrier(); | |
17697 | +#endif | |
17698 | + } | |
17699 | +} | |
17700 | +EXPORT_SYMBOL(tasklet_unlock_wait); | |
17701 | +#endif | |
17702 | + | |
17703 | static int ksoftirqd_should_run(unsigned int cpu) | |
17704 | { | |
17705 | - return local_softirq_pending(); | |
17706 | -} | |
17707 | - | |
17708 | -static void run_ksoftirqd(unsigned int cpu) | |
17709 | -{ | |
17710 | - local_irq_disable(); | |
17711 | - if (local_softirq_pending()) { | |
17712 | - /* | |
17713 | - * We can safely run softirq on inline stack, as we are not deep | |
17714 | - * in the task stack here. | |
17715 | - */ | |
17716 | - __do_softirq(); | |
17717 | - local_irq_enable(); | |
17718 | - cond_resched_rcu_qs(); | |
17719 | - return; | |
17720 | - } | |
17721 | - local_irq_enable(); | |
17722 | + return ksoftirqd_softirq_pending(); | |
17723 | } | |
17724 | ||
17725 | #ifdef CONFIG_HOTPLUG_CPU | |
17726 | @@ -745,17 +1243,31 @@ static int takeover_tasklets(unsigned int cpu) | |
17727 | ||
17728 | static struct smp_hotplug_thread softirq_threads = { | |
17729 | .store = &ksoftirqd, | |
17730 | + .setup = ksoftirqd_set_sched_params, | |
17731 | .thread_should_run = ksoftirqd_should_run, | |
17732 | .thread_fn = run_ksoftirqd, | |
17733 | .thread_comm = "ksoftirqd/%u", | |
17734 | }; | |
17735 | ||
17736 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17737 | +static struct smp_hotplug_thread softirq_timer_threads = { | |
17738 | + .store = &ktimer_softirqd, | |
17739 | + .setup = ktimer_softirqd_set_sched_params, | |
17740 | + .cleanup = ktimer_softirqd_clr_sched_params, | |
17741 | + .thread_should_run = ktimer_softirqd_should_run, | |
17742 | + .thread_fn = run_ksoftirqd, | |
17743 | + .thread_comm = "ktimersoftd/%u", | |
17744 | +}; | |
17745 | +#endif | |
17746 | + | |
17747 | static __init int spawn_ksoftirqd(void) | |
17748 | { | |
17749 | cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, | |
17750 | takeover_tasklets); | |
17751 | BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); | |
17752 | - | |
17753 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17754 | + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); | |
17755 | +#endif | |
17756 | return 0; | |
17757 | } | |
17758 | early_initcall(spawn_ksoftirqd); | |
17759 | diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c | |
17760 | index ec9ab2f01489..8b89dbedeaff 100644 | |
17761 | --- a/kernel/stop_machine.c | |
17762 | +++ b/kernel/stop_machine.c | |
17763 | @@ -36,7 +36,7 @@ struct cpu_stop_done { | |
17764 | struct cpu_stopper { | |
17765 | struct task_struct *thread; | |
17766 | ||
17767 | - spinlock_t lock; | |
17768 | + raw_spinlock_t lock; | |
17769 | bool enabled; /* is this stopper enabled? */ | |
17770 | struct list_head works; /* list of pending works */ | |
17771 | ||
17772 | @@ -78,14 +78,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) | |
17773 | unsigned long flags; | |
17774 | bool enabled; | |
17775 | ||
17776 | - spin_lock_irqsave(&stopper->lock, flags); | |
17777 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
17778 | enabled = stopper->enabled; | |
17779 | if (enabled) | |
17780 | __cpu_stop_queue_work(stopper, work); | |
17781 | else if (work->done) | |
17782 | cpu_stop_signal_done(work->done); | |
17783 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
17784 | ||
17785 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
17786 | return enabled; | |
17787 | } | |
17788 | ||
17789 | @@ -231,8 +231,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |
17790 | struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); | |
17791 | int err; | |
17792 | retry: | |
17793 | - spin_lock_irq(&stopper1->lock); | |
17794 | - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
17795 | + raw_spin_lock_irq(&stopper1->lock); | |
17796 | + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
17797 | ||
17798 | err = -ENOENT; | |
17799 | if (!stopper1->enabled || !stopper2->enabled) | |
17800 | @@ -255,8 +255,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |
17801 | __cpu_stop_queue_work(stopper1, work1); | |
17802 | __cpu_stop_queue_work(stopper2, work2); | |
17803 | unlock: | |
17804 | - spin_unlock(&stopper2->lock); | |
17805 | - spin_unlock_irq(&stopper1->lock); | |
17806 | + raw_spin_unlock(&stopper2->lock); | |
17807 | + raw_spin_unlock_irq(&stopper1->lock); | |
17808 | ||
17809 | if (unlikely(err == -EDEADLK)) { | |
17810 | while (stop_cpus_in_progress) | |
17811 | @@ -448,9 +448,9 @@ static int cpu_stop_should_run(unsigned int cpu) | |
17812 | unsigned long flags; | |
17813 | int run; | |
17814 | ||
17815 | - spin_lock_irqsave(&stopper->lock, flags); | |
17816 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
17817 | run = !list_empty(&stopper->works); | |
17818 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
17819 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
17820 | return run; | |
17821 | } | |
17822 | ||
17823 | @@ -461,13 +461,13 @@ static void cpu_stopper_thread(unsigned int cpu) | |
17824 | ||
17825 | repeat: | |
17826 | work = NULL; | |
17827 | - spin_lock_irq(&stopper->lock); | |
17828 | + raw_spin_lock_irq(&stopper->lock); | |
17829 | if (!list_empty(&stopper->works)) { | |
17830 | work = list_first_entry(&stopper->works, | |
17831 | struct cpu_stop_work, list); | |
17832 | list_del_init(&work->list); | |
17833 | } | |
17834 | - spin_unlock_irq(&stopper->lock); | |
17835 | + raw_spin_unlock_irq(&stopper->lock); | |
17836 | ||
17837 | if (work) { | |
17838 | cpu_stop_fn_t fn = work->fn; | |
17839 | @@ -475,6 +475,8 @@ static void cpu_stopper_thread(unsigned int cpu) | |
17840 | struct cpu_stop_done *done = work->done; | |
17841 | int ret; | |
17842 | ||
17843 | + /* XXX */ | |
17844 | + | |
17845 | /* cpu stop callbacks must not sleep, make in_atomic() == T */ | |
17846 | preempt_count_inc(); | |
17847 | ret = fn(arg); | |
17848 | @@ -541,7 +543,7 @@ static int __init cpu_stop_init(void) | |
17849 | for_each_possible_cpu(cpu) { | |
17850 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | |
17851 | ||
17852 | - spin_lock_init(&stopper->lock); | |
17853 | + raw_spin_lock_init(&stopper->lock); | |
17854 | INIT_LIST_HEAD(&stopper->works); | |
17855 | } | |
17856 | ||
17857 | diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c | |
17858 | index bb5ec425dfe0..8338b14ed3a3 100644 | |
17859 | --- a/kernel/time/hrtimer.c | |
17860 | +++ b/kernel/time/hrtimer.c | |
17861 | @@ -53,6 +53,7 @@ | |
17862 | #include <asm/uaccess.h> | |
17863 | ||
17864 | #include <trace/events/timer.h> | |
17865 | +#include <trace/events/hist.h> | |
17866 | ||
17867 | #include "tick-internal.h" | |
17868 | ||
17869 | @@ -695,6 +696,29 @@ static void hrtimer_switch_to_hres(void) | |
17870 | retrigger_next_event(NULL); | |
17871 | } | |
17872 | ||
17873 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17874 | + | |
17875 | +static struct swork_event clock_set_delay_work; | |
17876 | + | |
17877 | +static void run_clock_set_delay(struct swork_event *event) | |
17878 | +{ | |
17879 | + clock_was_set(); | |
17880 | +} | |
17881 | + | |
17882 | +void clock_was_set_delayed(void) | |
17883 | +{ | |
17884 | + swork_queue(&clock_set_delay_work); | |
17885 | +} | |
17886 | + | |
17887 | +static __init int create_clock_set_delay_thread(void) | |
17888 | +{ | |
17889 | + WARN_ON(swork_get()); | |
17890 | + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay); | |
17891 | + return 0; | |
17892 | +} | |
17893 | +early_initcall(create_clock_set_delay_thread); | |
17894 | +#else /* PREEMPT_RT_FULL */ | |
17895 | + | |
17896 | static void clock_was_set_work(struct work_struct *work) | |
17897 | { | |
17898 | clock_was_set(); | |
17899 | @@ -710,6 +734,7 @@ void clock_was_set_delayed(void) | |
17900 | { | |
17901 | schedule_work(&hrtimer_work); | |
17902 | } | |
17903 | +#endif | |
17904 | ||
17905 | #else | |
17906 | ||
17907 | @@ -719,11 +744,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } | |
17908 | static inline void hrtimer_switch_to_hres(void) { } | |
17909 | static inline void | |
17910 | hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } | |
17911 | -static inline int hrtimer_reprogram(struct hrtimer *timer, | |
17912 | - struct hrtimer_clock_base *base) | |
17913 | -{ | |
17914 | - return 0; | |
17915 | -} | |
17916 | +static inline void hrtimer_reprogram(struct hrtimer *timer, | |
17917 | + struct hrtimer_clock_base *base) { } | |
17918 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | |
17919 | static inline void retrigger_next_event(void *arg) { } | |
17920 | ||
17921 | @@ -855,6 +877,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |
17922 | } | |
17923 | EXPORT_SYMBOL_GPL(hrtimer_forward); | |
17924 | ||
17925 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17926 | +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) | |
17927 | + | |
17928 | +/** | |
17929 | + * hrtimer_wait_for_timer - Wait for a running timer | |
17930 | + * | |
17931 | + * @timer: timer to wait for | |
17932 | + * | |
17933 | + * The function waits in case the timers callback function is | |
17934 | + * currently executed on the waitqueue of the timer base. The | |
17935 | + * waitqueue is woken up after the timer callback function has | |
17936 | + * finished execution. | |
17937 | + */ | |
17938 | +void hrtimer_wait_for_timer(const struct hrtimer *timer) | |
17939 | +{ | |
17940 | + struct hrtimer_clock_base *base = timer->base; | |
17941 | + | |
17942 | + if (base && base->cpu_base && !timer->irqsafe) | |
17943 | + wait_event(base->cpu_base->wait, | |
17944 | + !(hrtimer_callback_running(timer))); | |
17945 | +} | |
17946 | + | |
17947 | +#else | |
17948 | +# define wake_up_timer_waiters(b) do { } while (0) | |
17949 | +#endif | |
17950 | + | |
17951 | /* | |
17952 | * enqueue_hrtimer - internal function to (re)start a timer | |
17953 | * | |
17954 | @@ -896,6 +944,11 @@ static void __remove_hrtimer(struct hrtimer *timer, | |
17955 | if (!(state & HRTIMER_STATE_ENQUEUED)) | |
17956 | return; | |
17957 | ||
17958 | + if (unlikely(!list_empty(&timer->cb_entry))) { | |
17959 | + list_del_init(&timer->cb_entry); | |
17960 | + return; | |
17961 | + } | |
17962 | + | |
17963 | if (!timerqueue_del(&base->active, &timer->node)) | |
17964 | cpu_base->active_bases &= ~(1 << base->index); | |
17965 | ||
17966 | @@ -991,7 +1044,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |
17967 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | |
17968 | ||
17969 | timer_stats_hrtimer_set_start_info(timer); | |
17970 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
17971 | + { | |
17972 | + ktime_t now = new_base->get_time(); | |
17973 | ||
17974 | + if (ktime_to_ns(tim) < ktime_to_ns(now)) | |
17975 | + timer->praecox = now; | |
17976 | + else | |
17977 | + timer->praecox = ktime_set(0, 0); | |
17978 | + } | |
17979 | +#endif | |
17980 | leftmost = enqueue_hrtimer(timer, new_base); | |
17981 | if (!leftmost) | |
17982 | goto unlock; | |
17983 | @@ -1063,7 +1125,7 @@ int hrtimer_cancel(struct hrtimer *timer) | |
17984 | ||
17985 | if (ret >= 0) | |
17986 | return ret; | |
17987 | - cpu_relax(); | |
17988 | + hrtimer_wait_for_timer(timer); | |
17989 | } | |
17990 | } | |
17991 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | |
17992 | @@ -1127,6 +1189,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |
17993 | ||
17994 | base = hrtimer_clockid_to_base(clock_id); | |
17995 | timer->base = &cpu_base->clock_base[base]; | |
17996 | + INIT_LIST_HEAD(&timer->cb_entry); | |
17997 | timerqueue_init(&timer->node); | |
17998 | ||
17999 | #ifdef CONFIG_TIMER_STATS | |
18000 | @@ -1167,6 +1230,7 @@ bool hrtimer_active(const struct hrtimer *timer) | |
18001 | seq = raw_read_seqcount_begin(&cpu_base->seq); | |
18002 | ||
18003 | if (timer->state != HRTIMER_STATE_INACTIVE || | |
18004 | + cpu_base->running_soft == timer || | |
18005 | cpu_base->running == timer) | |
18006 | return true; | |
18007 | ||
18008 | @@ -1265,10 +1329,112 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |
18009 | cpu_base->running = NULL; | |
18010 | } | |
18011 | ||
18012 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
18013 | +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, | |
18014 | + struct hrtimer_clock_base *base) | |
18015 | +{ | |
18016 | + int leftmost; | |
18017 | + | |
18018 | + if (restart != HRTIMER_NORESTART && | |
18019 | + !(timer->state & HRTIMER_STATE_ENQUEUED)) { | |
18020 | + | |
18021 | + leftmost = enqueue_hrtimer(timer, base); | |
18022 | + if (!leftmost) | |
18023 | + return; | |
18024 | +#ifdef CONFIG_HIGH_RES_TIMERS | |
18025 | + if (!hrtimer_is_hres_active(timer)) { | |
18026 | + /* | |
18027 | + * Kick to reschedule the next tick to handle the new timer | |
18028 | + * on dynticks target. | |
18029 | + */ | |
18030 | + if (base->cpu_base->nohz_active) | |
18031 | + wake_up_nohz_cpu(base->cpu_base->cpu); | |
18032 | + } else { | |
18033 | + | |
18034 | + hrtimer_reprogram(timer, base); | |
18035 | + } | |
18036 | +#endif | |
18037 | + } | |
18038 | +} | |
18039 | + | |
18040 | +/* | |
18041 | + * The changes in mainline which removed the callback modes from | |
18042 | + * hrtimer are not yet working with -rt. The non wakeup_process() | |
18043 | + * based callbacks which involve sleeping locks need to be treated | |
18044 | + * seperately. | |
18045 | + */ | |
18046 | +static void hrtimer_rt_run_pending(void) | |
18047 | +{ | |
18048 | + enum hrtimer_restart (*fn)(struct hrtimer *); | |
18049 | + struct hrtimer_cpu_base *cpu_base; | |
18050 | + struct hrtimer_clock_base *base; | |
18051 | + struct hrtimer *timer; | |
18052 | + int index, restart; | |
18053 | + | |
18054 | + local_irq_disable(); | |
18055 | + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); | |
18056 | + | |
18057 | + raw_spin_lock(&cpu_base->lock); | |
18058 | + | |
18059 | + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { | |
18060 | + base = &cpu_base->clock_base[index]; | |
18061 | + | |
18062 | + while (!list_empty(&base->expired)) { | |
18063 | + timer = list_first_entry(&base->expired, | |
18064 | + struct hrtimer, cb_entry); | |
18065 | + | |
18066 | + /* | |
18067 | + * Same as the above __run_hrtimer function | |
18068 | + * just we run with interrupts enabled. | |
18069 | + */ | |
18070 | + debug_deactivate(timer); | |
18071 | + cpu_base->running_soft = timer; | |
18072 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
18073 | + | |
18074 | + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); | |
18075 | + timer_stats_account_hrtimer(timer); | |
18076 | + fn = timer->function; | |
18077 | + | |
18078 | + raw_spin_unlock_irq(&cpu_base->lock); | |
18079 | + restart = fn(timer); | |
18080 | + raw_spin_lock_irq(&cpu_base->lock); | |
18081 | + | |
18082 | + hrtimer_rt_reprogram(restart, timer, base); | |
18083 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
18084 | + | |
18085 | + WARN_ON_ONCE(cpu_base->running_soft != timer); | |
18086 | + cpu_base->running_soft = NULL; | |
18087 | + } | |
18088 | + } | |
18089 | + | |
18090 | + raw_spin_unlock_irq(&cpu_base->lock); | |
18091 | + | |
18092 | + wake_up_timer_waiters(cpu_base); | |
18093 | +} | |
18094 | + | |
18095 | +static int hrtimer_rt_defer(struct hrtimer *timer) | |
18096 | +{ | |
18097 | + if (timer->irqsafe) | |
18098 | + return 0; | |
18099 | + | |
18100 | + __remove_hrtimer(timer, timer->base, timer->state, 0); | |
18101 | + list_add_tail(&timer->cb_entry, &timer->base->expired); | |
18102 | + return 1; | |
18103 | +} | |
18104 | + | |
18105 | +#else | |
18106 | + | |
18107 | +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } | |
18108 | + | |
18109 | +#endif | |
18110 | + | |
18111 | +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); | |
18112 | + | |
18113 | static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
18114 | { | |
18115 | struct hrtimer_clock_base *base = cpu_base->clock_base; | |
18116 | unsigned int active = cpu_base->active_bases; | |
18117 | + int raise = 0; | |
18118 | ||
18119 | for (; active; base++, active >>= 1) { | |
18120 | struct timerqueue_node *node; | |
18121 | @@ -1284,6 +1450,15 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
18122 | ||
18123 | timer = container_of(node, struct hrtimer, node); | |
18124 | ||
18125 | + trace_hrtimer_interrupt(raw_smp_processor_id(), | |
18126 | + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ? | |
18127 | + timer->praecox : hrtimer_get_expires(timer), | |
18128 | + basenow)), | |
18129 | + current, | |
18130 | + timer->function == hrtimer_wakeup ? | |
18131 | + container_of(timer, struct hrtimer_sleeper, | |
18132 | + timer)->task : NULL); | |
18133 | + | |
18134 | /* | |
18135 | * The immediate goal for using the softexpires is | |
18136 | * minimizing wakeups, not running timers at the | |
18137 | @@ -1299,9 +1474,14 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
18138 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) | |
18139 | break; | |
18140 | ||
18141 | - __run_hrtimer(cpu_base, base, timer, &basenow); | |
18142 | + if (!hrtimer_rt_defer(timer)) | |
18143 | + __run_hrtimer(cpu_base, base, timer, &basenow); | |
18144 | + else | |
18145 | + raise = 1; | |
18146 | } | |
18147 | } | |
18148 | + if (raise) | |
18149 | + raise_softirq_irqoff(HRTIMER_SOFTIRQ); | |
18150 | } | |
18151 | ||
18152 | #ifdef CONFIG_HIGH_RES_TIMERS | |
18153 | @@ -1464,16 +1644,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) | |
18154 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |
18155 | { | |
18156 | sl->timer.function = hrtimer_wakeup; | |
18157 | + sl->timer.irqsafe = 1; | |
18158 | sl->task = task; | |
18159 | } | |
18160 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | |
18161 | ||
18162 | -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | |
18163 | +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, | |
18164 | + unsigned long state) | |
18165 | { | |
18166 | hrtimer_init_sleeper(t, current); | |
18167 | ||
18168 | do { | |
18169 | - set_current_state(TASK_INTERRUPTIBLE); | |
18170 | + set_current_state(state); | |
18171 | hrtimer_start_expires(&t->timer, mode); | |
18172 | ||
18173 | if (likely(t->task)) | |
18174 | @@ -1515,7 +1697,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
18175 | HRTIMER_MODE_ABS); | |
18176 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | |
18177 | ||
18178 | - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | |
18179 | + /* cpu_chill() does not care about restart state. */ | |
18180 | + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) | |
18181 | goto out; | |
18182 | ||
18183 | rmtp = restart->nanosleep.rmtp; | |
18184 | @@ -1532,8 +1715,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
18185 | return ret; | |
18186 | } | |
18187 | ||
18188 | -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
18189 | - const enum hrtimer_mode mode, const clockid_t clockid) | |
18190 | +static long | |
18191 | +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
18192 | + const enum hrtimer_mode mode, const clockid_t clockid, | |
18193 | + unsigned long state) | |
18194 | { | |
18195 | struct restart_block *restart; | |
18196 | struct hrtimer_sleeper t; | |
18197 | @@ -1546,7 +1731,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
18198 | ||
18199 | hrtimer_init_on_stack(&t.timer, clockid, mode); | |
18200 | hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); | |
18201 | - if (do_nanosleep(&t, mode)) | |
18202 | + if (do_nanosleep(&t, mode, state)) | |
18203 | goto out; | |
18204 | ||
18205 | /* Absolute timers do not update the rmtp value and restart: */ | |
18206 | @@ -1573,6 +1758,12 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
18207 | return ret; | |
18208 | } | |
18209 | ||
18210 | +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
18211 | + const enum hrtimer_mode mode, const clockid_t clockid) | |
18212 | +{ | |
18213 | + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); | |
18214 | +} | |
18215 | + | |
18216 | SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
18217 | struct timespec __user *, rmtp) | |
18218 | { | |
18219 | @@ -1587,6 +1778,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
18220 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); | |
18221 | } | |
18222 | ||
18223 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18224 | +/* | |
18225 | + * Sleep for 1 ms in hope whoever holds what we want will let it go. | |
18226 | + */ | |
18227 | +void cpu_chill(void) | |
18228 | +{ | |
18229 | + struct timespec tu = { | |
18230 | + .tv_nsec = NSEC_PER_MSEC, | |
18231 | + }; | |
18232 | + unsigned int freeze_flag = current->flags & PF_NOFREEZE; | |
18233 | + | |
18234 | + current->flags |= PF_NOFREEZE; | |
18235 | + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC, | |
18236 | + TASK_UNINTERRUPTIBLE); | |
18237 | + if (!freeze_flag) | |
18238 | + current->flags &= ~PF_NOFREEZE; | |
18239 | +} | |
18240 | +EXPORT_SYMBOL(cpu_chill); | |
18241 | +#endif | |
18242 | + | |
18243 | /* | |
18244 | * Functions related to boot-time initialization: | |
18245 | */ | |
18246 | @@ -1598,10 +1809,14 @@ int hrtimers_prepare_cpu(unsigned int cpu) | |
18247 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | |
18248 | cpu_base->clock_base[i].cpu_base = cpu_base; | |
18249 | timerqueue_init_head(&cpu_base->clock_base[i].active); | |
18250 | + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); | |
18251 | } | |
18252 | ||
18253 | cpu_base->cpu = cpu; | |
18254 | hrtimer_init_hres(cpu_base); | |
18255 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
18256 | + init_waitqueue_head(&cpu_base->wait); | |
18257 | +#endif | |
18258 | return 0; | |
18259 | } | |
18260 | ||
18261 | @@ -1671,9 +1886,26 @@ int hrtimers_dead_cpu(unsigned int scpu) | |
18262 | ||
18263 | #endif /* CONFIG_HOTPLUG_CPU */ | |
18264 | ||
18265 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
18266 | + | |
18267 | +static void run_hrtimer_softirq(struct softirq_action *h) | |
18268 | +{ | |
18269 | + hrtimer_rt_run_pending(); | |
18270 | +} | |
18271 | + | |
18272 | +static void hrtimers_open_softirq(void) | |
18273 | +{ | |
18274 | + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); | |
18275 | +} | |
18276 | + | |
18277 | +#else | |
18278 | +static void hrtimers_open_softirq(void) { } | |
18279 | +#endif | |
18280 | + | |
18281 | void __init hrtimers_init(void) | |
18282 | { | |
18283 | hrtimers_prepare_cpu(smp_processor_id()); | |
18284 | + hrtimers_open_softirq(); | |
18285 | } | |
18286 | ||
18287 | /** | |
18288 | diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c | |
18289 | index 1d5c7204ddc9..184de6751180 100644 | |
18290 | --- a/kernel/time/itimer.c | |
18291 | +++ b/kernel/time/itimer.c | |
18292 | @@ -213,6 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) | |
18293 | /* We are sharing ->siglock with it_real_fn() */ | |
18294 | if (hrtimer_try_to_cancel(timer) < 0) { | |
18295 | spin_unlock_irq(&tsk->sighand->siglock); | |
18296 | + hrtimer_wait_for_timer(&tsk->signal->real_timer); | |
18297 | goto again; | |
18298 | } | |
18299 | expires = timeval_to_ktime(value->it_value); | |
18300 | diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c | |
18301 | index 555e21f7b966..a5d6435fabbb 100644 | |
18302 | --- a/kernel/time/jiffies.c | |
18303 | +++ b/kernel/time/jiffies.c | |
18304 | @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = { | |
18305 | .max_cycles = 10, | |
18306 | }; | |
18307 | ||
18308 | -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); | |
18309 | +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); | |
18310 | +__cacheline_aligned_in_smp seqcount_t jiffies_seq; | |
18311 | ||
18312 | #if (BITS_PER_LONG < 64) | |
18313 | u64 get_jiffies_64(void) | |
18314 | @@ -83,9 +84,9 @@ u64 get_jiffies_64(void) | |
18315 | u64 ret; | |
18316 | ||
18317 | do { | |
18318 | - seq = read_seqbegin(&jiffies_lock); | |
18319 | + seq = read_seqcount_begin(&jiffies_seq); | |
18320 | ret = jiffies_64; | |
18321 | - } while (read_seqretry(&jiffies_lock, seq)); | |
18322 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
18323 | return ret; | |
18324 | } | |
18325 | EXPORT_SYMBOL(get_jiffies_64); | |
18326 | diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c | |
18327 | index 6df8927c58a5..05b7391bf9bd 100644 | |
18328 | --- a/kernel/time/ntp.c | |
18329 | +++ b/kernel/time/ntp.c | |
18330 | @@ -17,6 +17,7 @@ | |
18331 | #include <linux/module.h> | |
18332 | #include <linux/rtc.h> | |
18333 | #include <linux/math64.h> | |
18334 | +#include <linux/swork.h> | |
18335 | ||
18336 | #include "ntp_internal.h" | |
18337 | #include "timekeeping_internal.h" | |
18338 | @@ -568,10 +569,35 @@ static void sync_cmos_clock(struct work_struct *work) | |
18339 | &sync_cmos_work, timespec64_to_jiffies(&next)); | |
18340 | } | |
18341 | ||
18342 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18343 | + | |
18344 | +static void run_clock_set_delay(struct swork_event *event) | |
18345 | +{ | |
18346 | + queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
18347 | +} | |
18348 | + | |
18349 | +static struct swork_event ntp_cmos_swork; | |
18350 | + | |
18351 | +void ntp_notify_cmos_timer(void) | |
18352 | +{ | |
18353 | + swork_queue(&ntp_cmos_swork); | |
18354 | +} | |
18355 | + | |
18356 | +static __init int create_cmos_delay_thread(void) | |
18357 | +{ | |
18358 | + WARN_ON(swork_get()); | |
18359 | + INIT_SWORK(&ntp_cmos_swork, run_clock_set_delay); | |
18360 | + return 0; | |
18361 | +} | |
18362 | +early_initcall(create_cmos_delay_thread); | |
18363 | + | |
18364 | +#else | |
18365 | + | |
18366 | void ntp_notify_cmos_timer(void) | |
18367 | { | |
18368 | queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
18369 | } | |
18370 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
18371 | ||
18372 | #else | |
18373 | void ntp_notify_cmos_timer(void) { } | |
18374 | diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c | |
18375 | index 39008d78927a..633f4eaca9e7 100644 | |
18376 | --- a/kernel/time/posix-cpu-timers.c | |
18377 | +++ b/kernel/time/posix-cpu-timers.c | |
18378 | @@ -3,6 +3,7 @@ | |
18379 | */ | |
18380 | ||
18381 | #include <linux/sched.h> | |
18382 | +#include <linux/sched/rt.h> | |
18383 | #include <linux/posix-timers.h> | |
18384 | #include <linux/errno.h> | |
18385 | #include <linux/math64.h> | |
18386 | @@ -620,7 +621,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |
18387 | /* | |
18388 | * Disarm any old timer after extracting its expiry time. | |
18389 | */ | |
18390 | - WARN_ON_ONCE(!irqs_disabled()); | |
18391 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
18392 | ||
18393 | ret = 0; | |
18394 | old_incr = timer->it.cpu.incr; | |
18395 | @@ -1064,7 +1065,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |
18396 | /* | |
18397 | * Now re-arm for the new expiry time. | |
18398 | */ | |
18399 | - WARN_ON_ONCE(!irqs_disabled()); | |
18400 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
18401 | arm_timer(timer); | |
18402 | unlock_task_sighand(p, &flags); | |
18403 | ||
18404 | @@ -1153,13 +1154,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |
18405 | * already updated our counts. We need to check if any timers fire now. | |
18406 | * Interrupts are disabled. | |
18407 | */ | |
18408 | -void run_posix_cpu_timers(struct task_struct *tsk) | |
18409 | +static void __run_posix_cpu_timers(struct task_struct *tsk) | |
18410 | { | |
18411 | LIST_HEAD(firing); | |
18412 | struct k_itimer *timer, *next; | |
18413 | unsigned long flags; | |
18414 | ||
18415 | - WARN_ON_ONCE(!irqs_disabled()); | |
18416 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
18417 | ||
18418 | /* | |
18419 | * The fast path checks that there are no expired thread or thread | |
18420 | @@ -1213,6 +1214,190 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |
18421 | } | |
18422 | } | |
18423 | ||
18424 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
18425 | +#include <linux/kthread.h> | |
18426 | +#include <linux/cpu.h> | |
18427 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); | |
18428 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); | |
18429 | + | |
18430 | +static int posix_cpu_timers_thread(void *data) | |
18431 | +{ | |
18432 | + int cpu = (long)data; | |
18433 | + | |
18434 | + BUG_ON(per_cpu(posix_timer_task,cpu) != current); | |
18435 | + | |
18436 | + while (!kthread_should_stop()) { | |
18437 | + struct task_struct *tsk = NULL; | |
18438 | + struct task_struct *next = NULL; | |
18439 | + | |
18440 | + if (cpu_is_offline(cpu)) | |
18441 | + goto wait_to_die; | |
18442 | + | |
18443 | + /* grab task list */ | |
18444 | + raw_local_irq_disable(); | |
18445 | + tsk = per_cpu(posix_timer_tasklist, cpu); | |
18446 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
18447 | + raw_local_irq_enable(); | |
18448 | + | |
18449 | + /* its possible the list is empty, just return */ | |
18450 | + if (!tsk) { | |
18451 | + set_current_state(TASK_INTERRUPTIBLE); | |
18452 | + schedule(); | |
18453 | + __set_current_state(TASK_RUNNING); | |
18454 | + continue; | |
18455 | + } | |
18456 | + | |
18457 | + /* Process task list */ | |
18458 | + while (1) { | |
18459 | + /* save next */ | |
18460 | + next = tsk->posix_timer_list; | |
18461 | + | |
18462 | + /* run the task timers, clear its ptr and | |
18463 | + * unreference it | |
18464 | + */ | |
18465 | + __run_posix_cpu_timers(tsk); | |
18466 | + tsk->posix_timer_list = NULL; | |
18467 | + put_task_struct(tsk); | |
18468 | + | |
18469 | + /* check if this is the last on the list */ | |
18470 | + if (next == tsk) | |
18471 | + break; | |
18472 | + tsk = next; | |
18473 | + } | |
18474 | + } | |
18475 | + return 0; | |
18476 | + | |
18477 | +wait_to_die: | |
18478 | + /* Wait for kthread_stop */ | |
18479 | + set_current_state(TASK_INTERRUPTIBLE); | |
18480 | + while (!kthread_should_stop()) { | |
18481 | + schedule(); | |
18482 | + set_current_state(TASK_INTERRUPTIBLE); | |
18483 | + } | |
18484 | + __set_current_state(TASK_RUNNING); | |
18485 | + return 0; | |
18486 | +} | |
18487 | + | |
18488 | +static inline int __fastpath_timer_check(struct task_struct *tsk) | |
18489 | +{ | |
18490 | + /* tsk == current, ensure it is safe to use ->signal/sighand */ | |
18491 | + if (unlikely(tsk->exit_state)) | |
18492 | + return 0; | |
18493 | + | |
18494 | + if (!task_cputime_zero(&tsk->cputime_expires)) | |
18495 | + return 1; | |
18496 | + | |
18497 | + if (!task_cputime_zero(&tsk->signal->cputime_expires)) | |
18498 | + return 1; | |
18499 | + | |
18500 | + return 0; | |
18501 | +} | |
18502 | + | |
18503 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
18504 | +{ | |
18505 | + unsigned long cpu = smp_processor_id(); | |
18506 | + struct task_struct *tasklist; | |
18507 | + | |
18508 | + BUG_ON(!irqs_disabled()); | |
18509 | + if(!per_cpu(posix_timer_task, cpu)) | |
18510 | + return; | |
18511 | + /* get per-cpu references */ | |
18512 | + tasklist = per_cpu(posix_timer_tasklist, cpu); | |
18513 | + | |
18514 | + /* check to see if we're already queued */ | |
18515 | + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { | |
18516 | + get_task_struct(tsk); | |
18517 | + if (tasklist) { | |
18518 | + tsk->posix_timer_list = tasklist; | |
18519 | + } else { | |
18520 | + /* | |
18521 | + * The list is terminated by a self-pointing | |
18522 | + * task_struct | |
18523 | + */ | |
18524 | + tsk->posix_timer_list = tsk; | |
18525 | + } | |
18526 | + per_cpu(posix_timer_tasklist, cpu) = tsk; | |
18527 | + | |
18528 | + wake_up_process(per_cpu(posix_timer_task, cpu)); | |
18529 | + } | |
18530 | +} | |
18531 | + | |
18532 | +/* | |
18533 | + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. | |
18534 | + * Here we can start up the necessary migration thread for the new CPU. | |
18535 | + */ | |
18536 | +static int posix_cpu_thread_call(struct notifier_block *nfb, | |
18537 | + unsigned long action, void *hcpu) | |
18538 | +{ | |
18539 | + int cpu = (long)hcpu; | |
18540 | + struct task_struct *p; | |
18541 | + struct sched_param param; | |
18542 | + | |
18543 | + switch (action) { | |
18544 | + case CPU_UP_PREPARE: | |
18545 | + p = kthread_create(posix_cpu_timers_thread, hcpu, | |
18546 | + "posixcputmr/%d",cpu); | |
18547 | + if (IS_ERR(p)) | |
18548 | + return NOTIFY_BAD; | |
18549 | + p->flags |= PF_NOFREEZE; | |
18550 | + kthread_bind(p, cpu); | |
18551 | + /* Must be high prio to avoid getting starved */ | |
18552 | + param.sched_priority = MAX_RT_PRIO-1; | |
18553 | + sched_setscheduler(p, SCHED_FIFO, ¶m); | |
18554 | + per_cpu(posix_timer_task,cpu) = p; | |
18555 | + break; | |
18556 | + case CPU_ONLINE: | |
18557 | + /* Strictly unneccessary, as first user will wake it. */ | |
18558 | + wake_up_process(per_cpu(posix_timer_task,cpu)); | |
18559 | + break; | |
18560 | +#ifdef CONFIG_HOTPLUG_CPU | |
18561 | + case CPU_UP_CANCELED: | |
18562 | + /* Unbind it from offline cpu so it can run. Fall thru. */ | |
18563 | + kthread_bind(per_cpu(posix_timer_task, cpu), | |
18564 | + cpumask_any(cpu_online_mask)); | |
18565 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
18566 | + per_cpu(posix_timer_task,cpu) = NULL; | |
18567 | + break; | |
18568 | + case CPU_DEAD: | |
18569 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
18570 | + per_cpu(posix_timer_task,cpu) = NULL; | |
18571 | + break; | |
18572 | +#endif | |
18573 | + } | |
18574 | + return NOTIFY_OK; | |
18575 | +} | |
18576 | + | |
18577 | +/* Register at highest priority so that task migration (migrate_all_tasks) | |
18578 | + * happens before everything else. | |
18579 | + */ | |
18580 | +static struct notifier_block posix_cpu_thread_notifier = { | |
18581 | + .notifier_call = posix_cpu_thread_call, | |
18582 | + .priority = 10 | |
18583 | +}; | |
18584 | + | |
18585 | +static int __init posix_cpu_thread_init(void) | |
18586 | +{ | |
18587 | + void *hcpu = (void *)(long)smp_processor_id(); | |
18588 | + /* Start one for boot CPU. */ | |
18589 | + unsigned long cpu; | |
18590 | + | |
18591 | + /* init the per-cpu posix_timer_tasklets */ | |
18592 | + for_each_possible_cpu(cpu) | |
18593 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
18594 | + | |
18595 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu); | |
18596 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu); | |
18597 | + register_cpu_notifier(&posix_cpu_thread_notifier); | |
18598 | + return 0; | |
18599 | +} | |
18600 | +early_initcall(posix_cpu_thread_init); | |
18601 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
18602 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
18603 | +{ | |
18604 | + __run_posix_cpu_timers(tsk); | |
18605 | +} | |
18606 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
18607 | + | |
18608 | /* | |
18609 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | |
18610 | * The tsk->sighand->siglock must be held by the caller. | |
18611 | diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c | |
18612 | index f2826c35e918..464a98155a0e 100644 | |
18613 | --- a/kernel/time/posix-timers.c | |
18614 | +++ b/kernel/time/posix-timers.c | |
18615 | @@ -506,6 +506,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) | |
18616 | static struct pid *good_sigevent(sigevent_t * event) | |
18617 | { | |
18618 | struct task_struct *rtn = current->group_leader; | |
18619 | + int sig = event->sigev_signo; | |
18620 | ||
18621 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && | |
18622 | (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || | |
18623 | @@ -514,7 +515,8 @@ static struct pid *good_sigevent(sigevent_t * event) | |
18624 | return NULL; | |
18625 | ||
18626 | if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && | |
18627 | - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) | |
18628 | + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || | |
18629 | + sig_kernel_coredump(sig))) | |
18630 | return NULL; | |
18631 | ||
18632 | return task_pid(rtn); | |
18633 | @@ -826,6 +828,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) | |
18634 | return overrun; | |
18635 | } | |
18636 | ||
18637 | +/* | |
18638 | + * Protected by RCU! | |
18639 | + */ | |
18640 | +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr) | |
18641 | +{ | |
18642 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18643 | + if (kc->timer_set == common_timer_set) | |
18644 | + hrtimer_wait_for_timer(&timr->it.real.timer); | |
18645 | + else | |
18646 | + /* FIXME: Whacky hack for posix-cpu-timers */ | |
18647 | + schedule_timeout(1); | |
18648 | +#endif | |
18649 | +} | |
18650 | + | |
18651 | /* Set a POSIX.1b interval timer. */ | |
18652 | /* timr->it_lock is taken. */ | |
18653 | static int | |
18654 | @@ -903,6 +919,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
18655 | if (!timr) | |
18656 | return -EINVAL; | |
18657 | ||
18658 | + rcu_read_lock(); | |
18659 | kc = clockid_to_kclock(timr->it_clock); | |
18660 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) | |
18661 | error = -EINVAL; | |
18662 | @@ -911,9 +928,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
18663 | ||
18664 | unlock_timer(timr, flag); | |
18665 | if (error == TIMER_RETRY) { | |
18666 | + timer_wait_for_callback(kc, timr); | |
18667 | rtn = NULL; // We already got the old time... | |
18668 | + rcu_read_unlock(); | |
18669 | goto retry; | |
18670 | } | |
18671 | + rcu_read_unlock(); | |
18672 | ||
18673 | if (old_setting && !error && | |
18674 | copy_to_user(old_setting, &old_spec, sizeof (old_spec))) | |
18675 | @@ -951,10 +971,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) | |
18676 | if (!timer) | |
18677 | return -EINVAL; | |
18678 | ||
18679 | + rcu_read_lock(); | |
18680 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
18681 | unlock_timer(timer, flags); | |
18682 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
18683 | + timer); | |
18684 | + rcu_read_unlock(); | |
18685 | goto retry_delete; | |
18686 | } | |
18687 | + rcu_read_unlock(); | |
18688 | ||
18689 | spin_lock(¤t->sighand->siglock); | |
18690 | list_del(&timer->list); | |
18691 | @@ -980,8 +1005,18 @@ static void itimer_delete(struct k_itimer *timer) | |
18692 | retry_delete: | |
18693 | spin_lock_irqsave(&timer->it_lock, flags); | |
18694 | ||
18695 | - if (timer_delete_hook(timer) == TIMER_RETRY) { | |
18696 | + /* On RT we can race with a deletion */ | |
18697 | + if (!timer->it_signal) { | |
18698 | unlock_timer(timer, flags); | |
18699 | + return; | |
18700 | + } | |
18701 | + | |
18702 | + if (timer_delete_hook(timer) == TIMER_RETRY) { | |
18703 | + rcu_read_lock(); | |
18704 | + unlock_timer(timer, flags); | |
18705 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
18706 | + timer); | |
18707 | + rcu_read_unlock(); | |
18708 | goto retry_delete; | |
18709 | } | |
18710 | list_del(&timer->list); | |
18711 | diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c | |
18712 | index 690b797f522e..fe8ba1619879 100644 | |
18713 | --- a/kernel/time/tick-broadcast-hrtimer.c | |
18714 | +++ b/kernel/time/tick-broadcast-hrtimer.c | |
18715 | @@ -107,5 +107,6 @@ void tick_setup_hrtimer_broadcast(void) | |
18716 | { | |
18717 | hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
18718 | bctimer.function = bc_handler; | |
18719 | + bctimer.irqsafe = true; | |
18720 | clockevents_register_device(&ce_broadcast_hrtimer); | |
18721 | } | |
18722 | diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c | |
18723 | index 4fcd99e12aa0..5a47f2e98faf 100644 | |
18724 | --- a/kernel/time/tick-common.c | |
18725 | +++ b/kernel/time/tick-common.c | |
18726 | @@ -79,13 +79,15 @@ int tick_is_oneshot_available(void) | |
18727 | static void tick_periodic(int cpu) | |
18728 | { | |
18729 | if (tick_do_timer_cpu == cpu) { | |
18730 | - write_seqlock(&jiffies_lock); | |
18731 | + raw_spin_lock(&jiffies_lock); | |
18732 | + write_seqcount_begin(&jiffies_seq); | |
18733 | ||
18734 | /* Keep track of the next tick event */ | |
18735 | tick_next_period = ktime_add(tick_next_period, tick_period); | |
18736 | ||
18737 | do_timer(1); | |
18738 | - write_sequnlock(&jiffies_lock); | |
18739 | + write_seqcount_end(&jiffies_seq); | |
18740 | + raw_spin_unlock(&jiffies_lock); | |
18741 | update_wall_time(); | |
18742 | } | |
18743 | ||
18744 | @@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | |
18745 | ktime_t next; | |
18746 | ||
18747 | do { | |
18748 | - seq = read_seqbegin(&jiffies_lock); | |
18749 | + seq = read_seqcount_begin(&jiffies_seq); | |
18750 | next = tick_next_period; | |
18751 | - } while (read_seqretry(&jiffies_lock, seq)); | |
18752 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
18753 | ||
18754 | clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); | |
18755 | ||
18756 | diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c | |
18757 | index 3bcb61b52f6c..66d85482a96e 100644 | |
18758 | --- a/kernel/time/tick-sched.c | |
18759 | +++ b/kernel/time/tick-sched.c | |
18760 | @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now) | |
18761 | return; | |
18762 | ||
18763 | /* Reevaluate with jiffies_lock held */ | |
18764 | - write_seqlock(&jiffies_lock); | |
18765 | + raw_spin_lock(&jiffies_lock); | |
18766 | + write_seqcount_begin(&jiffies_seq); | |
18767 | ||
18768 | delta = ktime_sub(now, last_jiffies_update); | |
18769 | if (delta.tv64 >= tick_period.tv64) { | |
18770 | @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now) | |
18771 | /* Keep the tick_next_period variable up to date */ | |
18772 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | |
18773 | } else { | |
18774 | - write_sequnlock(&jiffies_lock); | |
18775 | + write_seqcount_end(&jiffies_seq); | |
18776 | + raw_spin_unlock(&jiffies_lock); | |
18777 | return; | |
18778 | } | |
18779 | - write_sequnlock(&jiffies_lock); | |
18780 | + write_seqcount_end(&jiffies_seq); | |
18781 | + raw_spin_unlock(&jiffies_lock); | |
18782 | update_wall_time(); | |
18783 | } | |
18784 | ||
18785 | @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void) | |
18786 | { | |
18787 | ktime_t period; | |
18788 | ||
18789 | - write_seqlock(&jiffies_lock); | |
18790 | + raw_spin_lock(&jiffies_lock); | |
18791 | + write_seqcount_begin(&jiffies_seq); | |
18792 | /* Did we start the jiffies update yet ? */ | |
18793 | if (last_jiffies_update.tv64 == 0) | |
18794 | last_jiffies_update = tick_next_period; | |
18795 | period = last_jiffies_update; | |
18796 | - write_sequnlock(&jiffies_lock); | |
18797 | + write_seqcount_end(&jiffies_seq); | |
18798 | + raw_spin_unlock(&jiffies_lock); | |
18799 | return period; | |
18800 | } | |
18801 | ||
18802 | @@ -215,6 +220,7 @@ static void nohz_full_kick_func(struct irq_work *work) | |
18803 | ||
18804 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |
18805 | .func = nohz_full_kick_func, | |
18806 | + .flags = IRQ_WORK_HARD_IRQ, | |
18807 | }; | |
18808 | ||
18809 | /* | |
18810 | @@ -673,10 +679,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |
18811 | ||
18812 | /* Read jiffies and the time when jiffies were updated last */ | |
18813 | do { | |
18814 | - seq = read_seqbegin(&jiffies_lock); | |
18815 | + seq = read_seqcount_begin(&jiffies_seq); | |
18816 | basemono = last_jiffies_update.tv64; | |
18817 | basejiff = jiffies; | |
18818 | - } while (read_seqretry(&jiffies_lock, seq)); | |
18819 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
18820 | ts->last_jiffies = basejiff; | |
18821 | ||
18822 | if (rcu_needs_cpu(basemono, &next_rcu) || | |
18823 | @@ -877,14 +883,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |
18824 | return false; | |
18825 | ||
18826 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | |
18827 | - static int ratelimit; | |
18828 | - | |
18829 | - if (ratelimit < 10 && | |
18830 | - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
18831 | - pr_warn("NOHZ: local_softirq_pending %02x\n", | |
18832 | - (unsigned int) local_softirq_pending()); | |
18833 | - ratelimit++; | |
18834 | - } | |
18835 | + softirq_check_pending_idle(); | |
18836 | return false; | |
18837 | } | |
18838 | ||
18839 | @@ -1193,6 +1192,7 @@ void tick_setup_sched_timer(void) | |
18840 | * Emulate tick processing via per-CPU hrtimers: | |
18841 | */ | |
18842 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
18843 | + ts->sched_timer.irqsafe = 1; | |
18844 | ts->sched_timer.function = tick_sched_timer; | |
18845 | ||
18846 | /* Get the next period (per-CPU) */ | |
18847 | diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c | |
18848 | index 46e312e9be38..fa75cf5d9253 100644 | |
18849 | --- a/kernel/time/timekeeping.c | |
18850 | +++ b/kernel/time/timekeeping.c | |
18851 | @@ -2328,8 +2328,10 @@ EXPORT_SYMBOL(hardpps); | |
18852 | */ | |
18853 | void xtime_update(unsigned long ticks) | |
18854 | { | |
18855 | - write_seqlock(&jiffies_lock); | |
18856 | + raw_spin_lock(&jiffies_lock); | |
18857 | + write_seqcount_begin(&jiffies_seq); | |
18858 | do_timer(ticks); | |
18859 | - write_sequnlock(&jiffies_lock); | |
18860 | + write_seqcount_end(&jiffies_seq); | |
18861 | + raw_spin_unlock(&jiffies_lock); | |
18862 | update_wall_time(); | |
18863 | } | |
18864 | diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h | |
18865 | index 704f595ce83f..763a3e5121ff 100644 | |
18866 | --- a/kernel/time/timekeeping.h | |
18867 | +++ b/kernel/time/timekeeping.h | |
18868 | @@ -19,7 +19,8 @@ extern void timekeeping_resume(void); | |
18869 | extern void do_timer(unsigned long ticks); | |
18870 | extern void update_wall_time(void); | |
18871 | ||
18872 | -extern seqlock_t jiffies_lock; | |
18873 | +extern raw_spinlock_t jiffies_lock; | |
18874 | +extern seqcount_t jiffies_seq; | |
18875 | ||
18876 | #define CS_NAME_LEN 32 | |
18877 | ||
18878 | diff --git a/kernel/time/timer.c b/kernel/time/timer.c | |
18879 | index c611c47de884..08a5ab762495 100644 | |
18880 | --- a/kernel/time/timer.c | |
18881 | +++ b/kernel/time/timer.c | |
18882 | @@ -193,8 +193,11 @@ EXPORT_SYMBOL(jiffies_64); | |
18883 | #endif | |
18884 | ||
18885 | struct timer_base { | |
18886 | - spinlock_t lock; | |
18887 | + raw_spinlock_t lock; | |
18888 | struct timer_list *running_timer; | |
18889 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18890 | + struct swait_queue_head wait_for_running_timer; | |
18891 | +#endif | |
18892 | unsigned long clk; | |
18893 | unsigned long next_expiry; | |
18894 | unsigned int cpu; | |
18895 | @@ -948,10 +951,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, | |
18896 | ||
18897 | if (!(tf & TIMER_MIGRATING)) { | |
18898 | base = get_timer_base(tf); | |
18899 | - spin_lock_irqsave(&base->lock, *flags); | |
18900 | + raw_spin_lock_irqsave(&base->lock, *flags); | |
18901 | if (timer->flags == tf) | |
18902 | return base; | |
18903 | - spin_unlock_irqrestore(&base->lock, *flags); | |
18904 | + raw_spin_unlock_irqrestore(&base->lock, *flags); | |
18905 | } | |
18906 | cpu_relax(); | |
18907 | } | |
18908 | @@ -1023,9 +1026,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |
18909 | /* See the comment in lock_timer_base() */ | |
18910 | timer->flags |= TIMER_MIGRATING; | |
18911 | ||
18912 | - spin_unlock(&base->lock); | |
18913 | + raw_spin_unlock(&base->lock); | |
18914 | base = new_base; | |
18915 | - spin_lock(&base->lock); | |
18916 | + raw_spin_lock(&base->lock); | |
18917 | WRITE_ONCE(timer->flags, | |
18918 | (timer->flags & ~TIMER_BASEMASK) | base->cpu); | |
18919 | } | |
18920 | @@ -1050,7 +1053,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |
18921 | } | |
18922 | ||
18923 | out_unlock: | |
18924 | - spin_unlock_irqrestore(&base->lock, flags); | |
18925 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18926 | ||
18927 | return ret; | |
18928 | } | |
18929 | @@ -1144,19 +1147,46 @@ void add_timer_on(struct timer_list *timer, int cpu) | |
18930 | if (base != new_base) { | |
18931 | timer->flags |= TIMER_MIGRATING; | |
18932 | ||
18933 | - spin_unlock(&base->lock); | |
18934 | + raw_spin_unlock(&base->lock); | |
18935 | base = new_base; | |
18936 | - spin_lock(&base->lock); | |
18937 | + raw_spin_lock(&base->lock); | |
18938 | WRITE_ONCE(timer->flags, | |
18939 | (timer->flags & ~TIMER_BASEMASK) | cpu); | |
18940 | } | |
18941 | ||
18942 | debug_activate(timer, timer->expires); | |
18943 | internal_add_timer(base, timer); | |
18944 | - spin_unlock_irqrestore(&base->lock, flags); | |
18945 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18946 | } | |
18947 | EXPORT_SYMBOL_GPL(add_timer_on); | |
18948 | ||
18949 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18950 | +/* | |
18951 | + * Wait for a running timer | |
18952 | + */ | |
18953 | +static void wait_for_running_timer(struct timer_list *timer) | |
18954 | +{ | |
18955 | + struct timer_base *base; | |
18956 | + u32 tf = timer->flags; | |
18957 | + | |
18958 | + if (tf & TIMER_MIGRATING) | |
18959 | + return; | |
18960 | + | |
18961 | + base = get_timer_base(tf); | |
18962 | + swait_event(base->wait_for_running_timer, | |
18963 | + base->running_timer != timer); | |
18964 | +} | |
18965 | + | |
18966 | +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) | |
18967 | +#else | |
18968 | +static inline void wait_for_running_timer(struct timer_list *timer) | |
18969 | +{ | |
18970 | + cpu_relax(); | |
18971 | +} | |
18972 | + | |
18973 | +# define wakeup_timer_waiters(b) do { } while (0) | |
18974 | +#endif | |
18975 | + | |
18976 | /** | |
18977 | * del_timer - deactive a timer. | |
18978 | * @timer: the timer to be deactivated | |
18979 | @@ -1180,7 +1210,7 @@ int del_timer(struct timer_list *timer) | |
18980 | if (timer_pending(timer)) { | |
18981 | base = lock_timer_base(timer, &flags); | |
18982 | ret = detach_if_pending(timer, base, true); | |
18983 | - spin_unlock_irqrestore(&base->lock, flags); | |
18984 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18985 | } | |
18986 | ||
18987 | return ret; | |
18988 | @@ -1208,13 +1238,13 @@ int try_to_del_timer_sync(struct timer_list *timer) | |
18989 | timer_stats_timer_clear_start_info(timer); | |
18990 | ret = detach_if_pending(timer, base, true); | |
18991 | } | |
18992 | - spin_unlock_irqrestore(&base->lock, flags); | |
18993 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
18994 | ||
18995 | return ret; | |
18996 | } | |
18997 | EXPORT_SYMBOL(try_to_del_timer_sync); | |
18998 | ||
18999 | -#ifdef CONFIG_SMP | |
19000 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
19001 | /** | |
19002 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | |
19003 | * @timer: the timer to be deactivated | |
19004 | @@ -1274,7 +1304,7 @@ int del_timer_sync(struct timer_list *timer) | |
19005 | int ret = try_to_del_timer_sync(timer); | |
19006 | if (ret >= 0) | |
19007 | return ret; | |
19008 | - cpu_relax(); | |
19009 | + wait_for_running_timer(timer); | |
19010 | } | |
19011 | } | |
19012 | EXPORT_SYMBOL(del_timer_sync); | |
19013 | @@ -1339,14 +1369,17 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) | |
19014 | fn = timer->function; | |
19015 | data = timer->data; | |
19016 | ||
19017 | - if (timer->flags & TIMER_IRQSAFE) { | |
19018 | - spin_unlock(&base->lock); | |
19019 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && | |
19020 | + timer->flags & TIMER_IRQSAFE) { | |
19021 | + raw_spin_unlock(&base->lock); | |
19022 | call_timer_fn(timer, fn, data); | |
19023 | - spin_lock(&base->lock); | |
19024 | + base->running_timer = NULL; | |
19025 | + raw_spin_lock(&base->lock); | |
19026 | } else { | |
19027 | - spin_unlock_irq(&base->lock); | |
19028 | + raw_spin_unlock_irq(&base->lock); | |
19029 | call_timer_fn(timer, fn, data); | |
19030 | - spin_lock_irq(&base->lock); | |
19031 | + base->running_timer = NULL; | |
19032 | + raw_spin_lock_irq(&base->lock); | |
19033 | } | |
19034 | } | |
19035 | } | |
19036 | @@ -1515,7 +1548,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |
19037 | if (cpu_is_offline(smp_processor_id())) | |
19038 | return expires; | |
19039 | ||
19040 | - spin_lock(&base->lock); | |
19041 | + raw_spin_lock(&base->lock); | |
19042 | nextevt = __next_timer_interrupt(base); | |
19043 | is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); | |
19044 | base->next_expiry = nextevt; | |
19045 | @@ -1543,7 +1576,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |
19046 | if ((expires - basem) > TICK_NSEC) | |
19047 | base->is_idle = true; | |
19048 | } | |
19049 | - spin_unlock(&base->lock); | |
19050 | + raw_spin_unlock(&base->lock); | |
19051 | ||
19052 | return cmp_next_hrtimer_event(basem, expires); | |
19053 | } | |
19054 | @@ -1608,13 +1641,13 @@ void update_process_times(int user_tick) | |
19055 | ||
19056 | /* Note: this timer irq context must be accounted for as well. */ | |
19057 | account_process_tick(p, user_tick); | |
19058 | + scheduler_tick(); | |
19059 | run_local_timers(); | |
19060 | rcu_check_callbacks(user_tick); | |
19061 | -#ifdef CONFIG_IRQ_WORK | |
19062 | +#if defined(CONFIG_IRQ_WORK) | |
19063 | if (in_irq()) | |
19064 | irq_work_tick(); | |
19065 | #endif | |
19066 | - scheduler_tick(); | |
19067 | run_posix_cpu_timers(p); | |
19068 | } | |
19069 | ||
19070 | @@ -1630,7 +1663,7 @@ static inline void __run_timers(struct timer_base *base) | |
19071 | if (!time_after_eq(jiffies, base->clk)) | |
19072 | return; | |
19073 | ||
19074 | - spin_lock_irq(&base->lock); | |
19075 | + raw_spin_lock_irq(&base->lock); | |
19076 | ||
19077 | while (time_after_eq(jiffies, base->clk)) { | |
19078 | ||
19079 | @@ -1640,8 +1673,8 @@ static inline void __run_timers(struct timer_base *base) | |
19080 | while (levels--) | |
19081 | expire_timers(base, heads + levels); | |
19082 | } | |
19083 | - base->running_timer = NULL; | |
19084 | - spin_unlock_irq(&base->lock); | |
19085 | + raw_spin_unlock_irq(&base->lock); | |
19086 | + wakeup_timer_waiters(base); | |
19087 | } | |
19088 | ||
19089 | /* | |
19090 | @@ -1651,6 +1684,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) | |
19091 | { | |
19092 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | |
19093 | ||
19094 | + irq_work_tick_soft(); | |
19095 | + | |
19096 | __run_timers(base); | |
19097 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) | |
19098 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); | |
19099 | @@ -1836,16 +1871,16 @@ int timers_dead_cpu(unsigned int cpu) | |
19100 | * The caller is globally serialized and nobody else | |
19101 | * takes two locks at once, deadlock is not possible. | |
19102 | */ | |
19103 | - spin_lock_irq(&new_base->lock); | |
19104 | - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
19105 | + raw_spin_lock_irq(&new_base->lock); | |
19106 | + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
19107 | ||
19108 | BUG_ON(old_base->running_timer); | |
19109 | ||
19110 | for (i = 0; i < WHEEL_SIZE; i++) | |
19111 | migrate_timer_list(new_base, old_base->vectors + i); | |
19112 | ||
19113 | - spin_unlock(&old_base->lock); | |
19114 | - spin_unlock_irq(&new_base->lock); | |
19115 | + raw_spin_unlock(&old_base->lock); | |
19116 | + raw_spin_unlock_irq(&new_base->lock); | |
19117 | put_cpu_ptr(&timer_bases); | |
19118 | } | |
19119 | return 0; | |
19120 | @@ -1861,8 +1896,11 @@ static void __init init_timer_cpu(int cpu) | |
19121 | for (i = 0; i < NR_BASES; i++) { | |
19122 | base = per_cpu_ptr(&timer_bases[i], cpu); | |
19123 | base->cpu = cpu; | |
19124 | - spin_lock_init(&base->lock); | |
19125 | + raw_spin_lock_init(&base->lock); | |
19126 | base->clk = jiffies; | |
19127 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19128 | + init_swait_queue_head(&base->wait_for_running_timer); | |
19129 | +#endif | |
19130 | } | |
19131 | } | |
19132 | ||
19133 | diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig | |
19134 | index 2a96b063d659..812e37237eb8 100644 | |
19135 | --- a/kernel/trace/Kconfig | |
19136 | +++ b/kernel/trace/Kconfig | |
19137 | @@ -182,6 +182,24 @@ config IRQSOFF_TRACER | |
19138 | enabled. This option and the preempt-off timing option can be | |
19139 | used together or separately.) | |
19140 | ||
19141 | +config INTERRUPT_OFF_HIST | |
19142 | + bool "Interrupts-off Latency Histogram" | |
19143 | + depends on IRQSOFF_TRACER | |
19144 | + help | |
19145 | + This option generates continuously updated histograms (one per cpu) | |
19146 | + of the duration of time periods with interrupts disabled. The | |
19147 | + histograms are disabled by default. To enable them, write a non-zero | |
19148 | + number to | |
19149 | + | |
19150 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
19151 | + | |
19152 | + If PREEMPT_OFF_HIST is also selected, additional histograms (one | |
19153 | + per cpu) are generated that accumulate the duration of time periods | |
19154 | + when both interrupts and preemption are disabled. The histogram data | |
19155 | + will be located in the debug file system at | |
19156 | + | |
19157 | + /sys/kernel/debug/tracing/latency_hist/irqsoff | |
19158 | + | |
19159 | config PREEMPT_TRACER | |
19160 | bool "Preemption-off Latency Tracer" | |
19161 | default n | |
19162 | @@ -206,6 +224,24 @@ config PREEMPT_TRACER | |
19163 | enabled. This option and the irqs-off timing option can be | |
19164 | used together or separately.) | |
19165 | ||
19166 | +config PREEMPT_OFF_HIST | |
19167 | + bool "Preemption-off Latency Histogram" | |
19168 | + depends on PREEMPT_TRACER | |
19169 | + help | |
19170 | + This option generates continuously updated histograms (one per cpu) | |
19171 | + of the duration of time periods with preemption disabled. The | |
19172 | + histograms are disabled by default. To enable them, write a non-zero | |
19173 | + number to | |
19174 | + | |
19175 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
19176 | + | |
19177 | + If INTERRUPT_OFF_HIST is also selected, additional histograms (one | |
19178 | + per cpu) are generated that accumulate the duration of time periods | |
19179 | + when both interrupts and preemption are disabled. The histogram data | |
19180 | + will be located in the debug file system at | |
19181 | + | |
19182 | + /sys/kernel/debug/tracing/latency_hist/preemptoff | |
19183 | + | |
19184 | config SCHED_TRACER | |
19185 | bool "Scheduling Latency Tracer" | |
19186 | select GENERIC_TRACER | |
19187 | @@ -251,6 +287,74 @@ config HWLAT_TRACER | |
19188 | file. Every time a latency is greater than tracing_thresh, it will | |
19189 | be recorded into the ring buffer. | |
19190 | ||
19191 | +config WAKEUP_LATENCY_HIST | |
19192 | + bool "Scheduling Latency Histogram" | |
19193 | + depends on SCHED_TRACER | |
19194 | + help | |
19195 | + This option generates continuously updated histograms (one per cpu) | |
19196 | + of the scheduling latency of the highest priority task. | |
19197 | + The histograms are disabled by default. To enable them, write a | |
19198 | + non-zero number to | |
19199 | + | |
19200 | + /sys/kernel/debug/tracing/latency_hist/enable/wakeup | |
19201 | + | |
19202 | + Two different algorithms are used, one to determine the latency of | |
19203 | + processes that exclusively use the highest priority of the system and | |
19204 | + another one to determine the latency of processes that share the | |
19205 | + highest system priority with other processes. The former is used to | |
19206 | + improve hardware and system software, the latter to optimize the | |
19207 | + priority design of a given system. The histogram data will be | |
19208 | + located in the debug file system at | |
19209 | + | |
19210 | + /sys/kernel/debug/tracing/latency_hist/wakeup | |
19211 | + | |
19212 | + and | |
19213 | + | |
19214 | + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio | |
19215 | + | |
19216 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
19217 | + Histogram are selected, additional histogram data will be collected | |
19218 | + that contain, in addition to the wakeup latency, the timer latency, in | |
19219 | + case the wakeup was triggered by an expired timer. These histograms | |
19220 | + are available in the | |
19221 | + | |
19222 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
19223 | + | |
19224 | + directory. They reflect the apparent interrupt and scheduling latency | |
19225 | + and are best suitable to determine the worst-case latency of a given | |
19226 | + system. To enable these histograms, write a non-zero number to | |
19227 | + | |
19228 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
19229 | + | |
19230 | +config MISSED_TIMER_OFFSETS_HIST | |
19231 | + depends on HIGH_RES_TIMERS | |
19232 | + select GENERIC_TRACER | |
19233 | + bool "Missed Timer Offsets Histogram" | |
19234 | + help | |
19235 | + Generate a histogram of missed timer offsets in microseconds. The | |
19236 | + histograms are disabled by default. To enable them, write a non-zero | |
19237 | + number to | |
19238 | + | |
19239 | + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets | |
19240 | + | |
19241 | + The histogram data will be located in the debug file system at | |
19242 | + | |
19243 | + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets | |
19244 | + | |
19245 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
19246 | + Histogram are selected, additional histogram data will be collected | |
19247 | + that contain, in addition to the wakeup latency, the timer latency, in | |
19248 | + case the wakeup was triggered by an expired timer. These histograms | |
19249 | + are available in the | |
19250 | + | |
19251 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
19252 | + | |
19253 | + directory. They reflect the apparent interrupt and scheduling latency | |
19254 | + and are best suitable to determine the worst-case latency of a given | |
19255 | + system. To enable these histograms, write a non-zero number to | |
19256 | + | |
19257 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
19258 | + | |
19259 | config ENABLE_DEFAULT_TRACERS | |
19260 | bool "Trace process context switches and events" | |
19261 | depends on !GENERIC_TRACER | |
19262 | diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile | |
19263 | index e57980845549..83af000b783c 100644 | |
19264 | --- a/kernel/trace/Makefile | |
19265 | +++ b/kernel/trace/Makefile | |
19266 | @@ -38,6 +38,10 @@ obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o | |
19267 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o | |
19268 | obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o | |
19269 | obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o | |
19270 | +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o | |
19271 | +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o | |
19272 | +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o | |
19273 | +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o | |
19274 | obj-$(CONFIG_NOP_TRACER) += trace_nop.o | |
19275 | obj-$(CONFIG_STACK_TRACER) += trace_stack.o | |
19276 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | |
19277 | diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c | |
19278 | new file mode 100644 | |
19279 | index 000000000000..7f6ee70dea41 | |
19280 | --- /dev/null | |
19281 | +++ b/kernel/trace/latency_hist.c | |
19282 | @@ -0,0 +1,1178 @@ | |
19283 | +/* | |
19284 | + * kernel/trace/latency_hist.c | |
19285 | + * | |
19286 | + * Add support for histograms of preemption-off latency and | |
19287 | + * interrupt-off latency and wakeup latency, it depends on | |
19288 | + * Real-Time Preemption Support. | |
19289 | + * | |
19290 | + * Copyright (C) 2005 MontaVista Software, Inc. | |
19291 | + * Yi Yang <yyang@ch.mvista.com> | |
19292 | + * | |
19293 | + * Converted to work with the new latency tracer. | |
19294 | + * Copyright (C) 2008 Red Hat, Inc. | |
19295 | + * Steven Rostedt <srostedt@redhat.com> | |
19296 | + * | |
19297 | + */ | |
19298 | +#include <linux/module.h> | |
19299 | +#include <linux/debugfs.h> | |
19300 | +#include <linux/seq_file.h> | |
19301 | +#include <linux/percpu.h> | |
19302 | +#include <linux/kallsyms.h> | |
19303 | +#include <linux/uaccess.h> | |
19304 | +#include <linux/sched.h> | |
19305 | +#include <linux/sched/rt.h> | |
19306 | +#include <linux/slab.h> | |
19307 | +#include <linux/atomic.h> | |
19308 | +#include <asm/div64.h> | |
19309 | + | |
19310 | +#include "trace.h" | |
19311 | +#include <trace/events/sched.h> | |
19312 | + | |
19313 | +#define NSECS_PER_USECS 1000L | |
19314 | + | |
19315 | +#define CREATE_TRACE_POINTS | |
19316 | +#include <trace/events/hist.h> | |
19317 | + | |
19318 | +enum { | |
19319 | + IRQSOFF_LATENCY = 0, | |
19320 | + PREEMPTOFF_LATENCY, | |
19321 | + PREEMPTIRQSOFF_LATENCY, | |
19322 | + WAKEUP_LATENCY, | |
19323 | + WAKEUP_LATENCY_SHAREDPRIO, | |
19324 | + MISSED_TIMER_OFFSETS, | |
19325 | + TIMERANDWAKEUP_LATENCY, | |
19326 | + MAX_LATENCY_TYPE, | |
19327 | +}; | |
19328 | + | |
19329 | +#define MAX_ENTRY_NUM 10240 | |
19330 | + | |
19331 | +struct hist_data { | |
19332 | + atomic_t hist_mode; /* 0 log, 1 don't log */ | |
19333 | + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */ | |
19334 | + long min_lat; | |
19335 | + long max_lat; | |
19336 | + unsigned long long below_hist_bound_samples; | |
19337 | + unsigned long long above_hist_bound_samples; | |
19338 | + long long accumulate_lat; | |
19339 | + unsigned long long total_samples; | |
19340 | + unsigned long long hist_array[MAX_ENTRY_NUM]; | |
19341 | +}; | |
19342 | + | |
19343 | +struct enable_data { | |
19344 | + int latency_type; | |
19345 | + int enabled; | |
19346 | +}; | |
19347 | + | |
19348 | +static char *latency_hist_dir_root = "latency_hist"; | |
19349 | + | |
19350 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19351 | +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist); | |
19352 | +static char *irqsoff_hist_dir = "irqsoff"; | |
19353 | +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start); | |
19354 | +static DEFINE_PER_CPU(int, hist_irqsoff_counting); | |
19355 | +#endif | |
19356 | + | |
19357 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19358 | +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist); | |
19359 | +static char *preemptoff_hist_dir = "preemptoff"; | |
19360 | +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start); | |
19361 | +static DEFINE_PER_CPU(int, hist_preemptoff_counting); | |
19362 | +#endif | |
19363 | + | |
19364 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
19365 | +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist); | |
19366 | +static char *preemptirqsoff_hist_dir = "preemptirqsoff"; | |
19367 | +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start); | |
19368 | +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting); | |
19369 | +#endif | |
19370 | + | |
19371 | +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST) | |
19372 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start); | |
19373 | +static struct enable_data preemptirqsoff_enabled_data = { | |
19374 | + .latency_type = PREEMPTIRQSOFF_LATENCY, | |
19375 | + .enabled = 0, | |
19376 | +}; | |
19377 | +#endif | |
19378 | + | |
19379 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19380 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19381 | +struct maxlatproc_data { | |
19382 | + char comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
19383 | + char current_comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
19384 | + int pid; | |
19385 | + int current_pid; | |
19386 | + int prio; | |
19387 | + int current_prio; | |
19388 | + long latency; | |
19389 | + long timeroffset; | |
19390 | + cycle_t timestamp; | |
19391 | +}; | |
19392 | +#endif | |
19393 | + | |
19394 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19395 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist); | |
19396 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio); | |
19397 | +static char *wakeup_latency_hist_dir = "wakeup"; | |
19398 | +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio"; | |
19399 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
19400 | + struct task_struct *p); | |
19401 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
19402 | + bool preempt, struct task_struct *prev, struct task_struct *next); | |
19403 | +static notrace void probe_sched_migrate_task(void *, | |
19404 | + struct task_struct *task, int cpu); | |
19405 | +static struct enable_data wakeup_latency_enabled_data = { | |
19406 | + .latency_type = WAKEUP_LATENCY, | |
19407 | + .enabled = 0, | |
19408 | +}; | |
19409 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc); | |
19410 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio); | |
19411 | +static DEFINE_PER_CPU(struct task_struct *, wakeup_task); | |
19412 | +static DEFINE_PER_CPU(int, wakeup_sharedprio); | |
19413 | +static unsigned long wakeup_pid; | |
19414 | +#endif | |
19415 | + | |
19416 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19417 | +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets); | |
19418 | +static char *missed_timer_offsets_dir = "missed_timer_offsets"; | |
19419 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
19420 | + long long offset, struct task_struct *curr, struct task_struct *task); | |
19421 | +static struct enable_data missed_timer_offsets_enabled_data = { | |
19422 | + .latency_type = MISSED_TIMER_OFFSETS, | |
19423 | + .enabled = 0, | |
19424 | +}; | |
19425 | +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc); | |
19426 | +static unsigned long missed_timer_offsets_pid; | |
19427 | +#endif | |
19428 | + | |
19429 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19430 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19431 | +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist); | |
19432 | +static char *timerandwakeup_latency_hist_dir = "timerandwakeup"; | |
19433 | +static struct enable_data timerandwakeup_enabled_data = { | |
19434 | + .latency_type = TIMERANDWAKEUP_LATENCY, | |
19435 | + .enabled = 0, | |
19436 | +}; | |
19437 | +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc); | |
19438 | +#endif | |
19439 | + | |
19440 | +void notrace latency_hist(int latency_type, int cpu, long latency, | |
19441 | + long timeroffset, cycle_t stop, | |
19442 | + struct task_struct *p) | |
19443 | +{ | |
19444 | + struct hist_data *my_hist; | |
19445 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19446 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19447 | + struct maxlatproc_data *mp = NULL; | |
19448 | +#endif | |
19449 | + | |
19450 | + if (!cpu_possible(cpu) || latency_type < 0 || | |
19451 | + latency_type >= MAX_LATENCY_TYPE) | |
19452 | + return; | |
19453 | + | |
19454 | + switch (latency_type) { | |
19455 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19456 | + case IRQSOFF_LATENCY: | |
19457 | + my_hist = &per_cpu(irqsoff_hist, cpu); | |
19458 | + break; | |
19459 | +#endif | |
19460 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19461 | + case PREEMPTOFF_LATENCY: | |
19462 | + my_hist = &per_cpu(preemptoff_hist, cpu); | |
19463 | + break; | |
19464 | +#endif | |
19465 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
19466 | + case PREEMPTIRQSOFF_LATENCY: | |
19467 | + my_hist = &per_cpu(preemptirqsoff_hist, cpu); | |
19468 | + break; | |
19469 | +#endif | |
19470 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19471 | + case WAKEUP_LATENCY: | |
19472 | + my_hist = &per_cpu(wakeup_latency_hist, cpu); | |
19473 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
19474 | + break; | |
19475 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
19476 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
19477 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
19478 | + break; | |
19479 | +#endif | |
19480 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19481 | + case MISSED_TIMER_OFFSETS: | |
19482 | + my_hist = &per_cpu(missed_timer_offsets, cpu); | |
19483 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
19484 | + break; | |
19485 | +#endif | |
19486 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19487 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19488 | + case TIMERANDWAKEUP_LATENCY: | |
19489 | + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
19490 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
19491 | + break; | |
19492 | +#endif | |
19493 | + | |
19494 | + default: | |
19495 | + return; | |
19496 | + } | |
19497 | + | |
19498 | + latency += my_hist->offset; | |
19499 | + | |
19500 | + if (atomic_read(&my_hist->hist_mode) == 0) | |
19501 | + return; | |
19502 | + | |
19503 | + if (latency < 0 || latency >= MAX_ENTRY_NUM) { | |
19504 | + if (latency < 0) | |
19505 | + my_hist->below_hist_bound_samples++; | |
19506 | + else | |
19507 | + my_hist->above_hist_bound_samples++; | |
19508 | + } else | |
19509 | + my_hist->hist_array[latency]++; | |
19510 | + | |
19511 | + if (unlikely(latency > my_hist->max_lat || | |
19512 | + my_hist->min_lat == LONG_MAX)) { | |
19513 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19514 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19515 | + if (latency_type == WAKEUP_LATENCY || | |
19516 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
19517 | + latency_type == MISSED_TIMER_OFFSETS || | |
19518 | + latency_type == TIMERANDWAKEUP_LATENCY) { | |
19519 | + strncpy(mp->comm, p->comm, sizeof(mp->comm)); | |
19520 | + strncpy(mp->current_comm, current->comm, | |
19521 | + sizeof(mp->current_comm)); | |
19522 | + mp->pid = task_pid_nr(p); | |
19523 | + mp->current_pid = task_pid_nr(current); | |
19524 | + mp->prio = p->prio; | |
19525 | + mp->current_prio = current->prio; | |
19526 | + mp->latency = latency; | |
19527 | + mp->timeroffset = timeroffset; | |
19528 | + mp->timestamp = stop; | |
19529 | + } | |
19530 | +#endif | |
19531 | + my_hist->max_lat = latency; | |
19532 | + } | |
19533 | + if (unlikely(latency < my_hist->min_lat)) | |
19534 | + my_hist->min_lat = latency; | |
19535 | + my_hist->total_samples++; | |
19536 | + my_hist->accumulate_lat += latency; | |
19537 | +} | |
19538 | + | |
19539 | +static void *l_start(struct seq_file *m, loff_t *pos) | |
19540 | +{ | |
19541 | + loff_t *index_ptr = NULL; | |
19542 | + loff_t index = *pos; | |
19543 | + struct hist_data *my_hist = m->private; | |
19544 | + | |
19545 | + if (index == 0) { | |
19546 | + char minstr[32], avgstr[32], maxstr[32]; | |
19547 | + | |
19548 | + atomic_dec(&my_hist->hist_mode); | |
19549 | + | |
19550 | + if (likely(my_hist->total_samples)) { | |
19551 | + long avg = (long) div64_s64(my_hist->accumulate_lat, | |
19552 | + my_hist->total_samples); | |
19553 | + snprintf(minstr, sizeof(minstr), "%ld", | |
19554 | + my_hist->min_lat - my_hist->offset); | |
19555 | + snprintf(avgstr, sizeof(avgstr), "%ld", | |
19556 | + avg - my_hist->offset); | |
19557 | + snprintf(maxstr, sizeof(maxstr), "%ld", | |
19558 | + my_hist->max_lat - my_hist->offset); | |
19559 | + } else { | |
19560 | + strcpy(minstr, "<undef>"); | |
19561 | + strcpy(avgstr, minstr); | |
19562 | + strcpy(maxstr, minstr); | |
19563 | + } | |
19564 | + | |
19565 | + seq_printf(m, "#Minimum latency: %s microseconds\n" | |
19566 | + "#Average latency: %s microseconds\n" | |
19567 | + "#Maximum latency: %s microseconds\n" | |
19568 | + "#Total samples: %llu\n" | |
19569 | + "#There are %llu samples lower than %ld" | |
19570 | + " microseconds.\n" | |
19571 | + "#There are %llu samples greater or equal" | |
19572 | + " than %ld microseconds.\n" | |
19573 | + "#usecs\t%16s\n", | |
19574 | + minstr, avgstr, maxstr, | |
19575 | + my_hist->total_samples, | |
19576 | + my_hist->below_hist_bound_samples, | |
19577 | + -my_hist->offset, | |
19578 | + my_hist->above_hist_bound_samples, | |
19579 | + MAX_ENTRY_NUM - my_hist->offset, | |
19580 | + "samples"); | |
19581 | + } | |
19582 | + if (index < MAX_ENTRY_NUM) { | |
19583 | + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); | |
19584 | + if (index_ptr) | |
19585 | + *index_ptr = index; | |
19586 | + } | |
19587 | + | |
19588 | + return index_ptr; | |
19589 | +} | |
19590 | + | |
19591 | +static void *l_next(struct seq_file *m, void *p, loff_t *pos) | |
19592 | +{ | |
19593 | + loff_t *index_ptr = p; | |
19594 | + struct hist_data *my_hist = m->private; | |
19595 | + | |
19596 | + if (++*pos >= MAX_ENTRY_NUM) { | |
19597 | + atomic_inc(&my_hist->hist_mode); | |
19598 | + return NULL; | |
19599 | + } | |
19600 | + *index_ptr = *pos; | |
19601 | + return index_ptr; | |
19602 | +} | |
19603 | + | |
19604 | +static void l_stop(struct seq_file *m, void *p) | |
19605 | +{ | |
19606 | + kfree(p); | |
19607 | +} | |
19608 | + | |
19609 | +static int l_show(struct seq_file *m, void *p) | |
19610 | +{ | |
19611 | + int index = *(loff_t *) p; | |
19612 | + struct hist_data *my_hist = m->private; | |
19613 | + | |
19614 | + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset, | |
19615 | + my_hist->hist_array[index]); | |
19616 | + return 0; | |
19617 | +} | |
19618 | + | |
19619 | +static const struct seq_operations latency_hist_seq_op = { | |
19620 | + .start = l_start, | |
19621 | + .next = l_next, | |
19622 | + .stop = l_stop, | |
19623 | + .show = l_show | |
19624 | +}; | |
19625 | + | |
19626 | +static int latency_hist_open(struct inode *inode, struct file *file) | |
19627 | +{ | |
19628 | + int ret; | |
19629 | + | |
19630 | + ret = seq_open(file, &latency_hist_seq_op); | |
19631 | + if (!ret) { | |
19632 | + struct seq_file *seq = file->private_data; | |
19633 | + seq->private = inode->i_private; | |
19634 | + } | |
19635 | + return ret; | |
19636 | +} | |
19637 | + | |
19638 | +static const struct file_operations latency_hist_fops = { | |
19639 | + .open = latency_hist_open, | |
19640 | + .read = seq_read, | |
19641 | + .llseek = seq_lseek, | |
19642 | + .release = seq_release, | |
19643 | +}; | |
19644 | + | |
19645 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19646 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19647 | +static void clear_maxlatprocdata(struct maxlatproc_data *mp) | |
19648 | +{ | |
19649 | + mp->comm[0] = mp->current_comm[0] = '\0'; | |
19650 | + mp->prio = mp->current_prio = mp->pid = mp->current_pid = | |
19651 | + mp->latency = mp->timeroffset = -1; | |
19652 | + mp->timestamp = 0; | |
19653 | +} | |
19654 | +#endif | |
19655 | + | |
19656 | +static void hist_reset(struct hist_data *hist) | |
19657 | +{ | |
19658 | + atomic_dec(&hist->hist_mode); | |
19659 | + | |
19660 | + memset(hist->hist_array, 0, sizeof(hist->hist_array)); | |
19661 | + hist->below_hist_bound_samples = 0ULL; | |
19662 | + hist->above_hist_bound_samples = 0ULL; | |
19663 | + hist->min_lat = LONG_MAX; | |
19664 | + hist->max_lat = LONG_MIN; | |
19665 | + hist->total_samples = 0ULL; | |
19666 | + hist->accumulate_lat = 0LL; | |
19667 | + | |
19668 | + atomic_inc(&hist->hist_mode); | |
19669 | +} | |
19670 | + | |
19671 | +static ssize_t | |
19672 | +latency_hist_reset(struct file *file, const char __user *a, | |
19673 | + size_t size, loff_t *off) | |
19674 | +{ | |
19675 | + int cpu; | |
19676 | + struct hist_data *hist = NULL; | |
19677 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19678 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19679 | + struct maxlatproc_data *mp = NULL; | |
19680 | +#endif | |
19681 | + off_t latency_type = (off_t) file->private_data; | |
19682 | + | |
19683 | + for_each_online_cpu(cpu) { | |
19684 | + | |
19685 | + switch (latency_type) { | |
19686 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19687 | + case PREEMPTOFF_LATENCY: | |
19688 | + hist = &per_cpu(preemptoff_hist, cpu); | |
19689 | + break; | |
19690 | +#endif | |
19691 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19692 | + case IRQSOFF_LATENCY: | |
19693 | + hist = &per_cpu(irqsoff_hist, cpu); | |
19694 | + break; | |
19695 | +#endif | |
19696 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
19697 | + case PREEMPTIRQSOFF_LATENCY: | |
19698 | + hist = &per_cpu(preemptirqsoff_hist, cpu); | |
19699 | + break; | |
19700 | +#endif | |
19701 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19702 | + case WAKEUP_LATENCY: | |
19703 | + hist = &per_cpu(wakeup_latency_hist, cpu); | |
19704 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
19705 | + break; | |
19706 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
19707 | + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
19708 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
19709 | + break; | |
19710 | +#endif | |
19711 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19712 | + case MISSED_TIMER_OFFSETS: | |
19713 | + hist = &per_cpu(missed_timer_offsets, cpu); | |
19714 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
19715 | + break; | |
19716 | +#endif | |
19717 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19718 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19719 | + case TIMERANDWAKEUP_LATENCY: | |
19720 | + hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
19721 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
19722 | + break; | |
19723 | +#endif | |
19724 | + } | |
19725 | + | |
19726 | + hist_reset(hist); | |
19727 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19728 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19729 | + if (latency_type == WAKEUP_LATENCY || | |
19730 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
19731 | + latency_type == MISSED_TIMER_OFFSETS || | |
19732 | + latency_type == TIMERANDWAKEUP_LATENCY) | |
19733 | + clear_maxlatprocdata(mp); | |
19734 | +#endif | |
19735 | + } | |
19736 | + | |
19737 | + return size; | |
19738 | +} | |
19739 | + | |
19740 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19741 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19742 | +static ssize_t | |
19743 | +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
19744 | +{ | |
19745 | + char buf[64]; | |
19746 | + int r; | |
19747 | + unsigned long *this_pid = file->private_data; | |
19748 | + | |
19749 | + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid); | |
19750 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
19751 | +} | |
19752 | + | |
19753 | +static ssize_t do_pid(struct file *file, const char __user *ubuf, | |
19754 | + size_t cnt, loff_t *ppos) | |
19755 | +{ | |
19756 | + char buf[64]; | |
19757 | + unsigned long pid; | |
19758 | + unsigned long *this_pid = file->private_data; | |
19759 | + | |
19760 | + if (cnt >= sizeof(buf)) | |
19761 | + return -EINVAL; | |
19762 | + | |
19763 | + if (copy_from_user(&buf, ubuf, cnt)) | |
19764 | + return -EFAULT; | |
19765 | + | |
19766 | + buf[cnt] = '\0'; | |
19767 | + | |
19768 | + if (kstrtoul(buf, 10, &pid)) | |
19769 | + return -EINVAL; | |
19770 | + | |
19771 | + *this_pid = pid; | |
19772 | + | |
19773 | + return cnt; | |
19774 | +} | |
19775 | +#endif | |
19776 | + | |
19777 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
19778 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19779 | +static ssize_t | |
19780 | +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
19781 | +{ | |
19782 | + int r; | |
19783 | + struct maxlatproc_data *mp = file->private_data; | |
19784 | + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8); | |
19785 | + unsigned long long t; | |
19786 | + unsigned long usecs, secs; | |
19787 | + char *buf; | |
19788 | + | |
19789 | + if (mp->pid == -1 || mp->current_pid == -1) { | |
19790 | + buf = "(none)\n"; | |
19791 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, | |
19792 | + strlen(buf)); | |
19793 | + } | |
19794 | + | |
19795 | + buf = kmalloc(strmaxlen, GFP_KERNEL); | |
19796 | + if (buf == NULL) | |
19797 | + return -ENOMEM; | |
19798 | + | |
19799 | + t = ns2usecs(mp->timestamp); | |
19800 | + usecs = do_div(t, USEC_PER_SEC); | |
19801 | + secs = (unsigned long) t; | |
19802 | + r = snprintf(buf, strmaxlen, | |
19803 | + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid, | |
19804 | + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm, | |
19805 | + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm, | |
19806 | + secs, usecs); | |
19807 | + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
19808 | + kfree(buf); | |
19809 | + return r; | |
19810 | +} | |
19811 | +#endif | |
19812 | + | |
19813 | +static ssize_t | |
19814 | +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
19815 | +{ | |
19816 | + char buf[64]; | |
19817 | + struct enable_data *ed = file->private_data; | |
19818 | + int r; | |
19819 | + | |
19820 | + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled); | |
19821 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
19822 | +} | |
19823 | + | |
19824 | +static ssize_t | |
19825 | +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) | |
19826 | +{ | |
19827 | + char buf[64]; | |
19828 | + long enable; | |
19829 | + struct enable_data *ed = file->private_data; | |
19830 | + | |
19831 | + if (cnt >= sizeof(buf)) | |
19832 | + return -EINVAL; | |
19833 | + | |
19834 | + if (copy_from_user(&buf, ubuf, cnt)) | |
19835 | + return -EFAULT; | |
19836 | + | |
19837 | + buf[cnt] = 0; | |
19838 | + | |
19839 | + if (kstrtoul(buf, 10, &enable)) | |
19840 | + return -EINVAL; | |
19841 | + | |
19842 | + if ((enable && ed->enabled) || (!enable && !ed->enabled)) | |
19843 | + return cnt; | |
19844 | + | |
19845 | + if (enable) { | |
19846 | + int ret; | |
19847 | + | |
19848 | + switch (ed->latency_type) { | |
19849 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
19850 | + case PREEMPTIRQSOFF_LATENCY: | |
19851 | + ret = register_trace_preemptirqsoff_hist( | |
19852 | + probe_preemptirqsoff_hist, NULL); | |
19853 | + if (ret) { | |
19854 | + pr_info("wakeup trace: Couldn't assign " | |
19855 | + "probe_preemptirqsoff_hist " | |
19856 | + "to trace_preemptirqsoff_hist\n"); | |
19857 | + return ret; | |
19858 | + } | |
19859 | + break; | |
19860 | +#endif | |
19861 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19862 | + case WAKEUP_LATENCY: | |
19863 | + ret = register_trace_sched_wakeup( | |
19864 | + probe_wakeup_latency_hist_start, NULL); | |
19865 | + if (ret) { | |
19866 | + pr_info("wakeup trace: Couldn't assign " | |
19867 | + "probe_wakeup_latency_hist_start " | |
19868 | + "to trace_sched_wakeup\n"); | |
19869 | + return ret; | |
19870 | + } | |
19871 | + ret = register_trace_sched_wakeup_new( | |
19872 | + probe_wakeup_latency_hist_start, NULL); | |
19873 | + if (ret) { | |
19874 | + pr_info("wakeup trace: Couldn't assign " | |
19875 | + "probe_wakeup_latency_hist_start " | |
19876 | + "to trace_sched_wakeup_new\n"); | |
19877 | + unregister_trace_sched_wakeup( | |
19878 | + probe_wakeup_latency_hist_start, NULL); | |
19879 | + return ret; | |
19880 | + } | |
19881 | + ret = register_trace_sched_switch( | |
19882 | + probe_wakeup_latency_hist_stop, NULL); | |
19883 | + if (ret) { | |
19884 | + pr_info("wakeup trace: Couldn't assign " | |
19885 | + "probe_wakeup_latency_hist_stop " | |
19886 | + "to trace_sched_switch\n"); | |
19887 | + unregister_trace_sched_wakeup( | |
19888 | + probe_wakeup_latency_hist_start, NULL); | |
19889 | + unregister_trace_sched_wakeup_new( | |
19890 | + probe_wakeup_latency_hist_start, NULL); | |
19891 | + return ret; | |
19892 | + } | |
19893 | + ret = register_trace_sched_migrate_task( | |
19894 | + probe_sched_migrate_task, NULL); | |
19895 | + if (ret) { | |
19896 | + pr_info("wakeup trace: Couldn't assign " | |
19897 | + "probe_sched_migrate_task " | |
19898 | + "to trace_sched_migrate_task\n"); | |
19899 | + unregister_trace_sched_wakeup( | |
19900 | + probe_wakeup_latency_hist_start, NULL); | |
19901 | + unregister_trace_sched_wakeup_new( | |
19902 | + probe_wakeup_latency_hist_start, NULL); | |
19903 | + unregister_trace_sched_switch( | |
19904 | + probe_wakeup_latency_hist_stop, NULL); | |
19905 | + return ret; | |
19906 | + } | |
19907 | + break; | |
19908 | +#endif | |
19909 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19910 | + case MISSED_TIMER_OFFSETS: | |
19911 | + ret = register_trace_hrtimer_interrupt( | |
19912 | + probe_hrtimer_interrupt, NULL); | |
19913 | + if (ret) { | |
19914 | + pr_info("wakeup trace: Couldn't assign " | |
19915 | + "probe_hrtimer_interrupt " | |
19916 | + "to trace_hrtimer_interrupt\n"); | |
19917 | + return ret; | |
19918 | + } | |
19919 | + break; | |
19920 | +#endif | |
19921 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
19922 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
19923 | + case TIMERANDWAKEUP_LATENCY: | |
19924 | + if (!wakeup_latency_enabled_data.enabled || | |
19925 | + !missed_timer_offsets_enabled_data.enabled) | |
19926 | + return -EINVAL; | |
19927 | + break; | |
19928 | +#endif | |
19929 | + default: | |
19930 | + break; | |
19931 | + } | |
19932 | + } else { | |
19933 | + switch (ed->latency_type) { | |
19934 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
19935 | + case PREEMPTIRQSOFF_LATENCY: | |
19936 | + { | |
19937 | + int cpu; | |
19938 | + | |
19939 | + unregister_trace_preemptirqsoff_hist( | |
19940 | + probe_preemptirqsoff_hist, NULL); | |
19941 | + for_each_online_cpu(cpu) { | |
19942 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
19943 | + per_cpu(hist_irqsoff_counting, | |
19944 | + cpu) = 0; | |
19945 | +#endif | |
19946 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
19947 | + per_cpu(hist_preemptoff_counting, | |
19948 | + cpu) = 0; | |
19949 | +#endif | |
19950 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
19951 | + per_cpu(hist_preemptirqsoff_counting, | |
19952 | + cpu) = 0; | |
19953 | +#endif | |
19954 | + } | |
19955 | + } | |
19956 | + break; | |
19957 | +#endif | |
19958 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19959 | + case WAKEUP_LATENCY: | |
19960 | + { | |
19961 | + int cpu; | |
19962 | + | |
19963 | + unregister_trace_sched_wakeup( | |
19964 | + probe_wakeup_latency_hist_start, NULL); | |
19965 | + unregister_trace_sched_wakeup_new( | |
19966 | + probe_wakeup_latency_hist_start, NULL); | |
19967 | + unregister_trace_sched_switch( | |
19968 | + probe_wakeup_latency_hist_stop, NULL); | |
19969 | + unregister_trace_sched_migrate_task( | |
19970 | + probe_sched_migrate_task, NULL); | |
19971 | + | |
19972 | + for_each_online_cpu(cpu) { | |
19973 | + per_cpu(wakeup_task, cpu) = NULL; | |
19974 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
19975 | + } | |
19976 | + } | |
19977 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19978 | + timerandwakeup_enabled_data.enabled = 0; | |
19979 | +#endif | |
19980 | + break; | |
19981 | +#endif | |
19982 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19983 | + case MISSED_TIMER_OFFSETS: | |
19984 | + unregister_trace_hrtimer_interrupt( | |
19985 | + probe_hrtimer_interrupt, NULL); | |
19986 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
19987 | + timerandwakeup_enabled_data.enabled = 0; | |
19988 | +#endif | |
19989 | + break; | |
19990 | +#endif | |
19991 | + default: | |
19992 | + break; | |
19993 | + } | |
19994 | + } | |
19995 | + ed->enabled = enable; | |
19996 | + return cnt; | |
19997 | +} | |
19998 | + | |
19999 | +static const struct file_operations latency_hist_reset_fops = { | |
20000 | + .open = tracing_open_generic, | |
20001 | + .write = latency_hist_reset, | |
20002 | +}; | |
20003 | + | |
20004 | +static const struct file_operations enable_fops = { | |
20005 | + .open = tracing_open_generic, | |
20006 | + .read = show_enable, | |
20007 | + .write = do_enable, | |
20008 | +}; | |
20009 | + | |
20010 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20011 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20012 | +static const struct file_operations pid_fops = { | |
20013 | + .open = tracing_open_generic, | |
20014 | + .read = show_pid, | |
20015 | + .write = do_pid, | |
20016 | +}; | |
20017 | + | |
20018 | +static const struct file_operations maxlatproc_fops = { | |
20019 | + .open = tracing_open_generic, | |
20020 | + .read = show_maxlatproc, | |
20021 | +}; | |
20022 | +#endif | |
20023 | + | |
20024 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
20025 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, | |
20026 | + int starthist) | |
20027 | +{ | |
20028 | + int cpu = raw_smp_processor_id(); | |
20029 | + int time_set = 0; | |
20030 | + | |
20031 | + if (starthist) { | |
20032 | + cycle_t uninitialized_var(start); | |
20033 | + | |
20034 | + if (!preempt_count() && !irqs_disabled()) | |
20035 | + return; | |
20036 | + | |
20037 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
20038 | + if ((reason == IRQS_OFF || reason == TRACE_START) && | |
20039 | + !per_cpu(hist_irqsoff_counting, cpu)) { | |
20040 | + per_cpu(hist_irqsoff_counting, cpu) = 1; | |
20041 | + start = ftrace_now(cpu); | |
20042 | + time_set++; | |
20043 | + per_cpu(hist_irqsoff_start, cpu) = start; | |
20044 | + } | |
20045 | +#endif | |
20046 | + | |
20047 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
20048 | + if ((reason == PREEMPT_OFF || reason == TRACE_START) && | |
20049 | + !per_cpu(hist_preemptoff_counting, cpu)) { | |
20050 | + per_cpu(hist_preemptoff_counting, cpu) = 1; | |
20051 | + if (!(time_set++)) | |
20052 | + start = ftrace_now(cpu); | |
20053 | + per_cpu(hist_preemptoff_start, cpu) = start; | |
20054 | + } | |
20055 | +#endif | |
20056 | + | |
20057 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
20058 | + if (per_cpu(hist_irqsoff_counting, cpu) && | |
20059 | + per_cpu(hist_preemptoff_counting, cpu) && | |
20060 | + !per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
20061 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 1; | |
20062 | + if (!time_set) | |
20063 | + start = ftrace_now(cpu); | |
20064 | + per_cpu(hist_preemptirqsoff_start, cpu) = start; | |
20065 | + } | |
20066 | +#endif | |
20067 | + } else { | |
20068 | + cycle_t uninitialized_var(stop); | |
20069 | + | |
20070 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
20071 | + if ((reason == IRQS_ON || reason == TRACE_STOP) && | |
20072 | + per_cpu(hist_irqsoff_counting, cpu)) { | |
20073 | + cycle_t start = per_cpu(hist_irqsoff_start, cpu); | |
20074 | + | |
20075 | + stop = ftrace_now(cpu); | |
20076 | + time_set++; | |
20077 | + if (start) { | |
20078 | + long latency = ((long) (stop - start)) / | |
20079 | + NSECS_PER_USECS; | |
20080 | + | |
20081 | + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0, | |
20082 | + stop, NULL); | |
20083 | + } | |
20084 | + per_cpu(hist_irqsoff_counting, cpu) = 0; | |
20085 | + } | |
20086 | +#endif | |
20087 | + | |
20088 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
20089 | + if ((reason == PREEMPT_ON || reason == TRACE_STOP) && | |
20090 | + per_cpu(hist_preemptoff_counting, cpu)) { | |
20091 | + cycle_t start = per_cpu(hist_preemptoff_start, cpu); | |
20092 | + | |
20093 | + if (!(time_set++)) | |
20094 | + stop = ftrace_now(cpu); | |
20095 | + if (start) { | |
20096 | + long latency = ((long) (stop - start)) / | |
20097 | + NSECS_PER_USECS; | |
20098 | + | |
20099 | + latency_hist(PREEMPTOFF_LATENCY, cpu, latency, | |
20100 | + 0, stop, NULL); | |
20101 | + } | |
20102 | + per_cpu(hist_preemptoff_counting, cpu) = 0; | |
20103 | + } | |
20104 | +#endif | |
20105 | + | |
20106 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
20107 | + if ((!per_cpu(hist_irqsoff_counting, cpu) || | |
20108 | + !per_cpu(hist_preemptoff_counting, cpu)) && | |
20109 | + per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
20110 | + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu); | |
20111 | + | |
20112 | + if (!time_set) | |
20113 | + stop = ftrace_now(cpu); | |
20114 | + if (start) { | |
20115 | + long latency = ((long) (stop - start)) / | |
20116 | + NSECS_PER_USECS; | |
20117 | + | |
20118 | + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu, | |
20119 | + latency, 0, stop, NULL); | |
20120 | + } | |
20121 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 0; | |
20122 | + } | |
20123 | +#endif | |
20124 | + } | |
20125 | +} | |
20126 | +#endif | |
20127 | + | |
20128 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
20129 | +static DEFINE_RAW_SPINLOCK(wakeup_lock); | |
20130 | +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task, | |
20131 | + int cpu) | |
20132 | +{ | |
20133 | + int old_cpu = task_cpu(task); | |
20134 | + | |
20135 | + if (cpu != old_cpu) { | |
20136 | + unsigned long flags; | |
20137 | + struct task_struct *cpu_wakeup_task; | |
20138 | + | |
20139 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
20140 | + | |
20141 | + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu); | |
20142 | + if (task == cpu_wakeup_task) { | |
20143 | + put_task_struct(cpu_wakeup_task); | |
20144 | + per_cpu(wakeup_task, old_cpu) = NULL; | |
20145 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task; | |
20146 | + get_task_struct(cpu_wakeup_task); | |
20147 | + } | |
20148 | + | |
20149 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
20150 | + } | |
20151 | +} | |
20152 | + | |
20153 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
20154 | + struct task_struct *p) | |
20155 | +{ | |
20156 | + unsigned long flags; | |
20157 | + struct task_struct *curr = current; | |
20158 | + int cpu = task_cpu(p); | |
20159 | + struct task_struct *cpu_wakeup_task; | |
20160 | + | |
20161 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
20162 | + | |
20163 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
20164 | + | |
20165 | + if (wakeup_pid) { | |
20166 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
20167 | + p->prio == curr->prio) | |
20168 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
20169 | + if (likely(wakeup_pid != task_pid_nr(p))) | |
20170 | + goto out; | |
20171 | + } else { | |
20172 | + if (likely(!rt_task(p)) || | |
20173 | + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) || | |
20174 | + p->prio > curr->prio) | |
20175 | + goto out; | |
20176 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
20177 | + p->prio == curr->prio) | |
20178 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
20179 | + } | |
20180 | + | |
20181 | + if (cpu_wakeup_task) | |
20182 | + put_task_struct(cpu_wakeup_task); | |
20183 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p; | |
20184 | + get_task_struct(cpu_wakeup_task); | |
20185 | + cpu_wakeup_task->preempt_timestamp_hist = | |
20186 | + ftrace_now(raw_smp_processor_id()); | |
20187 | +out: | |
20188 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
20189 | +} | |
20190 | + | |
20191 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
20192 | + bool preempt, struct task_struct *prev, struct task_struct *next) | |
20193 | +{ | |
20194 | + unsigned long flags; | |
20195 | + int cpu = task_cpu(next); | |
20196 | + long latency; | |
20197 | + cycle_t stop; | |
20198 | + struct task_struct *cpu_wakeup_task; | |
20199 | + | |
20200 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
20201 | + | |
20202 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
20203 | + | |
20204 | + if (cpu_wakeup_task == NULL) | |
20205 | + goto out; | |
20206 | + | |
20207 | + /* Already running? */ | |
20208 | + if (unlikely(current == cpu_wakeup_task)) | |
20209 | + goto out_reset; | |
20210 | + | |
20211 | + if (next != cpu_wakeup_task) { | |
20212 | + if (next->prio < cpu_wakeup_task->prio) | |
20213 | + goto out_reset; | |
20214 | + | |
20215 | + if (next->prio == cpu_wakeup_task->prio) | |
20216 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
20217 | + | |
20218 | + goto out; | |
20219 | + } | |
20220 | + | |
20221 | + if (current->prio == cpu_wakeup_task->prio) | |
20222 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
20223 | + | |
20224 | + /* | |
20225 | + * The task we are waiting for is about to be switched to. | |
20226 | + * Calculate latency and store it in histogram. | |
20227 | + */ | |
20228 | + stop = ftrace_now(raw_smp_processor_id()); | |
20229 | + | |
20230 | + latency = ((long) (stop - next->preempt_timestamp_hist)) / | |
20231 | + NSECS_PER_USECS; | |
20232 | + | |
20233 | + if (per_cpu(wakeup_sharedprio, cpu)) { | |
20234 | + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop, | |
20235 | + next); | |
20236 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
20237 | + } else { | |
20238 | + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next); | |
20239 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20240 | + if (timerandwakeup_enabled_data.enabled) { | |
20241 | + latency_hist(TIMERANDWAKEUP_LATENCY, cpu, | |
20242 | + next->timer_offset + latency, next->timer_offset, | |
20243 | + stop, next); | |
20244 | + } | |
20245 | +#endif | |
20246 | + } | |
20247 | + | |
20248 | +out_reset: | |
20249 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20250 | + next->timer_offset = 0; | |
20251 | +#endif | |
20252 | + put_task_struct(cpu_wakeup_task); | |
20253 | + per_cpu(wakeup_task, cpu) = NULL; | |
20254 | +out: | |
20255 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
20256 | +} | |
20257 | +#endif | |
20258 | + | |
20259 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20260 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
20261 | + long long latency_ns, struct task_struct *curr, | |
20262 | + struct task_struct *task) | |
20263 | +{ | |
20264 | + if (latency_ns <= 0 && task != NULL && rt_task(task) && | |
20265 | + (task->prio < curr->prio || | |
20266 | + (task->prio == curr->prio && | |
20267 | + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) { | |
20268 | + long latency; | |
20269 | + cycle_t now; | |
20270 | + | |
20271 | + if (missed_timer_offsets_pid) { | |
20272 | + if (likely(missed_timer_offsets_pid != | |
20273 | + task_pid_nr(task))) | |
20274 | + return; | |
20275 | + } | |
20276 | + | |
20277 | + now = ftrace_now(cpu); | |
20278 | + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS); | |
20279 | + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now, | |
20280 | + task); | |
20281 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
20282 | + task->timer_offset = latency; | |
20283 | +#endif | |
20284 | + } | |
20285 | +} | |
20286 | +#endif | |
20287 | + | |
20288 | +static __init int latency_hist_init(void) | |
20289 | +{ | |
20290 | + struct dentry *latency_hist_root = NULL; | |
20291 | + struct dentry *dentry; | |
20292 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
20293 | + struct dentry *dentry_sharedprio; | |
20294 | +#endif | |
20295 | + struct dentry *entry; | |
20296 | + struct dentry *enable_root; | |
20297 | + int i = 0; | |
20298 | + struct hist_data *my_hist; | |
20299 | + char name[64]; | |
20300 | + char *cpufmt = "CPU%d"; | |
20301 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20302 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20303 | + char *cpufmt_maxlatproc = "max_latency-CPU%d"; | |
20304 | + struct maxlatproc_data *mp = NULL; | |
20305 | +#endif | |
20306 | + | |
20307 | + dentry = tracing_init_dentry(); | |
20308 | + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry); | |
20309 | + enable_root = debugfs_create_dir("enable", latency_hist_root); | |
20310 | + | |
20311 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
20312 | + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root); | |
20313 | + for_each_possible_cpu(i) { | |
20314 | + sprintf(name, cpufmt, i); | |
20315 | + entry = debugfs_create_file(name, 0444, dentry, | |
20316 | + &per_cpu(irqsoff_hist, i), &latency_hist_fops); | |
20317 | + my_hist = &per_cpu(irqsoff_hist, i); | |
20318 | + atomic_set(&my_hist->hist_mode, 1); | |
20319 | + my_hist->min_lat = LONG_MAX; | |
20320 | + } | |
20321 | + entry = debugfs_create_file("reset", 0644, dentry, | |
20322 | + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops); | |
20323 | +#endif | |
20324 | + | |
20325 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
20326 | + dentry = debugfs_create_dir(preemptoff_hist_dir, | |
20327 | + latency_hist_root); | |
20328 | + for_each_possible_cpu(i) { | |
20329 | + sprintf(name, cpufmt, i); | |
20330 | + entry = debugfs_create_file(name, 0444, dentry, | |
20331 | + &per_cpu(preemptoff_hist, i), &latency_hist_fops); | |
20332 | + my_hist = &per_cpu(preemptoff_hist, i); | |
20333 | + atomic_set(&my_hist->hist_mode, 1); | |
20334 | + my_hist->min_lat = LONG_MAX; | |
20335 | + } | |
20336 | + entry = debugfs_create_file("reset", 0644, dentry, | |
20337 | + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops); | |
20338 | +#endif | |
20339 | + | |
20340 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
20341 | + dentry = debugfs_create_dir(preemptirqsoff_hist_dir, | |
20342 | + latency_hist_root); | |
20343 | + for_each_possible_cpu(i) { | |
20344 | + sprintf(name, cpufmt, i); | |
20345 | + entry = debugfs_create_file(name, 0444, dentry, | |
20346 | + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops); | |
20347 | + my_hist = &per_cpu(preemptirqsoff_hist, i); | |
20348 | + atomic_set(&my_hist->hist_mode, 1); | |
20349 | + my_hist->min_lat = LONG_MAX; | |
20350 | + } | |
20351 | + entry = debugfs_create_file("reset", 0644, dentry, | |
20352 | + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops); | |
20353 | +#endif | |
20354 | + | |
20355 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
20356 | + entry = debugfs_create_file("preemptirqsoff", 0644, | |
20357 | + enable_root, (void *)&preemptirqsoff_enabled_data, | |
20358 | + &enable_fops); | |
20359 | +#endif | |
20360 | + | |
20361 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
20362 | + dentry = debugfs_create_dir(wakeup_latency_hist_dir, | |
20363 | + latency_hist_root); | |
20364 | + dentry_sharedprio = debugfs_create_dir( | |
20365 | + wakeup_latency_hist_dir_sharedprio, dentry); | |
20366 | + for_each_possible_cpu(i) { | |
20367 | + sprintf(name, cpufmt, i); | |
20368 | + | |
20369 | + entry = debugfs_create_file(name, 0444, dentry, | |
20370 | + &per_cpu(wakeup_latency_hist, i), | |
20371 | + &latency_hist_fops); | |
20372 | + my_hist = &per_cpu(wakeup_latency_hist, i); | |
20373 | + atomic_set(&my_hist->hist_mode, 1); | |
20374 | + my_hist->min_lat = LONG_MAX; | |
20375 | + | |
20376 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, | |
20377 | + &per_cpu(wakeup_latency_hist_sharedprio, i), | |
20378 | + &latency_hist_fops); | |
20379 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i); | |
20380 | + atomic_set(&my_hist->hist_mode, 1); | |
20381 | + my_hist->min_lat = LONG_MAX; | |
20382 | + | |
20383 | + sprintf(name, cpufmt_maxlatproc, i); | |
20384 | + | |
20385 | + mp = &per_cpu(wakeup_maxlatproc, i); | |
20386 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
20387 | + &maxlatproc_fops); | |
20388 | + clear_maxlatprocdata(mp); | |
20389 | + | |
20390 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i); | |
20391 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp, | |
20392 | + &maxlatproc_fops); | |
20393 | + clear_maxlatprocdata(mp); | |
20394 | + } | |
20395 | + entry = debugfs_create_file("pid", 0644, dentry, | |
20396 | + (void *)&wakeup_pid, &pid_fops); | |
20397 | + entry = debugfs_create_file("reset", 0644, dentry, | |
20398 | + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops); | |
20399 | + entry = debugfs_create_file("reset", 0644, dentry_sharedprio, | |
20400 | + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops); | |
20401 | + entry = debugfs_create_file("wakeup", 0644, | |
20402 | + enable_root, (void *)&wakeup_latency_enabled_data, | |
20403 | + &enable_fops); | |
20404 | +#endif | |
20405 | + | |
20406 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20407 | + dentry = debugfs_create_dir(missed_timer_offsets_dir, | |
20408 | + latency_hist_root); | |
20409 | + for_each_possible_cpu(i) { | |
20410 | + sprintf(name, cpufmt, i); | |
20411 | + entry = debugfs_create_file(name, 0444, dentry, | |
20412 | + &per_cpu(missed_timer_offsets, i), &latency_hist_fops); | |
20413 | + my_hist = &per_cpu(missed_timer_offsets, i); | |
20414 | + atomic_set(&my_hist->hist_mode, 1); | |
20415 | + my_hist->min_lat = LONG_MAX; | |
20416 | + | |
20417 | + sprintf(name, cpufmt_maxlatproc, i); | |
20418 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, i); | |
20419 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
20420 | + &maxlatproc_fops); | |
20421 | + clear_maxlatprocdata(mp); | |
20422 | + } | |
20423 | + entry = debugfs_create_file("pid", 0644, dentry, | |
20424 | + (void *)&missed_timer_offsets_pid, &pid_fops); | |
20425 | + entry = debugfs_create_file("reset", 0644, dentry, | |
20426 | + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops); | |
20427 | + entry = debugfs_create_file("missed_timer_offsets", 0644, | |
20428 | + enable_root, (void *)&missed_timer_offsets_enabled_data, | |
20429 | + &enable_fops); | |
20430 | +#endif | |
20431 | + | |
20432 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
20433 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20434 | + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir, | |
20435 | + latency_hist_root); | |
20436 | + for_each_possible_cpu(i) { | |
20437 | + sprintf(name, cpufmt, i); | |
20438 | + entry = debugfs_create_file(name, 0444, dentry, | |
20439 | + &per_cpu(timerandwakeup_latency_hist, i), | |
20440 | + &latency_hist_fops); | |
20441 | + my_hist = &per_cpu(timerandwakeup_latency_hist, i); | |
20442 | + atomic_set(&my_hist->hist_mode, 1); | |
20443 | + my_hist->min_lat = LONG_MAX; | |
20444 | + | |
20445 | + sprintf(name, cpufmt_maxlatproc, i); | |
20446 | + mp = &per_cpu(timerandwakeup_maxlatproc, i); | |
20447 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
20448 | + &maxlatproc_fops); | |
20449 | + clear_maxlatprocdata(mp); | |
20450 | + } | |
20451 | + entry = debugfs_create_file("reset", 0644, dentry, | |
20452 | + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops); | |
20453 | + entry = debugfs_create_file("timerandwakeup", 0644, | |
20454 | + enable_root, (void *)&timerandwakeup_enabled_data, | |
20455 | + &enable_fops); | |
20456 | +#endif | |
20457 | + return 0; | |
20458 | +} | |
20459 | + | |
20460 | +device_initcall(latency_hist_init); | |
20461 | diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c | |
20462 | index 8696ce6bf2f6..277f048a4695 100644 | |
20463 | --- a/kernel/trace/trace.c | |
20464 | +++ b/kernel/trace/trace.c | |
20465 | @@ -1897,6 +1897,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
20466 | struct task_struct *tsk = current; | |
20467 | ||
20468 | entry->preempt_count = pc & 0xff; | |
20469 | + entry->preempt_lazy_count = preempt_lazy_count(); | |
20470 | entry->pid = (tsk) ? tsk->pid : 0; | |
20471 | entry->flags = | |
20472 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | |
20473 | @@ -1907,8 +1908,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
20474 | ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | | |
20475 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | | |
20476 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | |
20477 | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
20478 | + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
20479 | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | | |
20480 | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); | |
20481 | + | |
20482 | + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; | |
20483 | } | |
20484 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | |
20485 | ||
20486 | @@ -2892,14 +2896,17 @@ get_total_entries(struct trace_buffer *buf, | |
20487 | ||
20488 | static void print_lat_help_header(struct seq_file *m) | |
20489 | { | |
20490 | - seq_puts(m, "# _------=> CPU# \n" | |
20491 | - "# / _-----=> irqs-off \n" | |
20492 | - "# | / _----=> need-resched \n" | |
20493 | - "# || / _---=> hardirq/softirq \n" | |
20494 | - "# ||| / _--=> preempt-depth \n" | |
20495 | - "# |||| / delay \n" | |
20496 | - "# cmd pid ||||| time | caller \n" | |
20497 | - "# \\ / ||||| \\ | / \n"); | |
20498 | + seq_puts(m, "# _--------=> CPU# \n" | |
20499 | + "# / _-------=> irqs-off \n" | |
20500 | + "# | / _------=> need-resched \n" | |
20501 | + "# || / _-----=> need-resched_lazy \n" | |
20502 | + "# ||| / _----=> hardirq/softirq \n" | |
20503 | + "# |||| / _---=> preempt-depth \n" | |
20504 | + "# ||||| / _--=> preempt-lazy-depth\n" | |
20505 | + "# |||||| / _-=> migrate-disable \n" | |
20506 | + "# ||||||| / delay \n" | |
20507 | + "# cmd pid |||||||| time | caller \n" | |
20508 | + "# \\ / |||||||| \\ | / \n"); | |
20509 | } | |
20510 | ||
20511 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) | |
20512 | @@ -2925,11 +2932,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file | |
20513 | print_event_info(buf, m); | |
20514 | seq_puts(m, "# _-----=> irqs-off\n" | |
20515 | "# / _----=> need-resched\n" | |
20516 | - "# | / _---=> hardirq/softirq\n" | |
20517 | - "# || / _--=> preempt-depth\n" | |
20518 | - "# ||| / delay\n" | |
20519 | - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" | |
20520 | - "# | | | |||| | |\n"); | |
20521 | + "# |/ _-----=> need-resched_lazy\n" | |
20522 | + "# || / _---=> hardirq/softirq\n" | |
20523 | + "# ||| / _--=> preempt-depth\n" | |
20524 | + "# |||| / _-=> preempt-lazy-depth\n" | |
20525 | + "# ||||| / _-=> migrate-disable \n" | |
20526 | + "# |||||| / delay\n" | |
20527 | + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n" | |
20528 | + "# | | | ||||||| | |\n"); | |
20529 | } | |
20530 | ||
20531 | void | |
20532 | diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h | |
20533 | index fd24b1f9ac43..852b2c81be25 100644 | |
20534 | --- a/kernel/trace/trace.h | |
20535 | +++ b/kernel/trace/trace.h | |
20536 | @@ -124,6 +124,7 @@ struct kretprobe_trace_entry_head { | |
20537 | * NEED_RESCHED - reschedule is requested | |
20538 | * HARDIRQ - inside an interrupt handler | |
20539 | * SOFTIRQ - inside a softirq handler | |
20540 | + * NEED_RESCHED_LAZY - lazy reschedule is requested | |
20541 | */ | |
20542 | enum trace_flag_type { | |
20543 | TRACE_FLAG_IRQS_OFF = 0x01, | |
20544 | @@ -133,6 +134,7 @@ enum trace_flag_type { | |
20545 | TRACE_FLAG_SOFTIRQ = 0x10, | |
20546 | TRACE_FLAG_PREEMPT_RESCHED = 0x20, | |
20547 | TRACE_FLAG_NMI = 0x40, | |
20548 | + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, | |
20549 | }; | |
20550 | ||
20551 | #define TRACE_BUF_SIZE 1024 | |
20552 | diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c | |
20553 | index 03c0a48c3ac4..0b85d516b491 100644 | |
20554 | --- a/kernel/trace/trace_events.c | |
20555 | +++ b/kernel/trace/trace_events.c | |
20556 | @@ -187,6 +187,8 @@ static int trace_define_common_fields(void) | |
20557 | __common_field(unsigned char, flags); | |
20558 | __common_field(unsigned char, preempt_count); | |
20559 | __common_field(int, pid); | |
20560 | + __common_field(unsigned short, migrate_disable); | |
20561 | + __common_field(unsigned short, padding); | |
20562 | ||
20563 | return ret; | |
20564 | } | |
20565 | diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c | |
20566 | index 03cdff84d026..940bd10b4406 100644 | |
20567 | --- a/kernel/trace/trace_irqsoff.c | |
20568 | +++ b/kernel/trace/trace_irqsoff.c | |
20569 | @@ -13,6 +13,7 @@ | |
20570 | #include <linux/uaccess.h> | |
20571 | #include <linux/module.h> | |
20572 | #include <linux/ftrace.h> | |
20573 | +#include <trace/events/hist.h> | |
20574 | ||
20575 | #include "trace.h" | |
20576 | ||
20577 | @@ -424,11 +425,13 @@ void start_critical_timings(void) | |
20578 | { | |
20579 | if (preempt_trace() || irq_trace()) | |
20580 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20581 | + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1); | |
20582 | } | |
20583 | EXPORT_SYMBOL_GPL(start_critical_timings); | |
20584 | ||
20585 | void stop_critical_timings(void) | |
20586 | { | |
20587 | + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0); | |
20588 | if (preempt_trace() || irq_trace()) | |
20589 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20590 | } | |
20591 | @@ -438,6 +441,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings); | |
20592 | #ifdef CONFIG_PROVE_LOCKING | |
20593 | void time_hardirqs_on(unsigned long a0, unsigned long a1) | |
20594 | { | |
20595 | + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0); | |
20596 | if (!preempt_trace() && irq_trace()) | |
20597 | stop_critical_timing(a0, a1); | |
20598 | } | |
20599 | @@ -446,6 +450,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) | |
20600 | { | |
20601 | if (!preempt_trace() && irq_trace()) | |
20602 | start_critical_timing(a0, a1); | |
20603 | + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1); | |
20604 | } | |
20605 | ||
20606 | #else /* !CONFIG_PROVE_LOCKING */ | |
20607 | @@ -471,6 +476,7 @@ inline void print_irqtrace_events(struct task_struct *curr) | |
20608 | */ | |
20609 | void trace_hardirqs_on(void) | |
20610 | { | |
20611 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
20612 | if (!preempt_trace() && irq_trace()) | |
20613 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20614 | } | |
20615 | @@ -480,11 +486,13 @@ void trace_hardirqs_off(void) | |
20616 | { | |
20617 | if (!preempt_trace() && irq_trace()) | |
20618 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
20619 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
20620 | } | |
20621 | EXPORT_SYMBOL(trace_hardirqs_off); | |
20622 | ||
20623 | __visible void trace_hardirqs_on_caller(unsigned long caller_addr) | |
20624 | { | |
20625 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
20626 | if (!preempt_trace() && irq_trace()) | |
20627 | stop_critical_timing(CALLER_ADDR0, caller_addr); | |
20628 | } | |
20629 | @@ -494,6 +502,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) | |
20630 | { | |
20631 | if (!preempt_trace() && irq_trace()) | |
20632 | start_critical_timing(CALLER_ADDR0, caller_addr); | |
20633 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
20634 | } | |
20635 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
20636 | ||
20637 | @@ -503,12 +512,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
20638 | #ifdef CONFIG_PREEMPT_TRACER | |
20639 | void trace_preempt_on(unsigned long a0, unsigned long a1) | |
20640 | { | |
20641 | + trace_preemptirqsoff_hist(PREEMPT_ON, 0); | |
20642 | if (preempt_trace() && !irq_trace()) | |
20643 | stop_critical_timing(a0, a1); | |
20644 | } | |
20645 | ||
20646 | void trace_preempt_off(unsigned long a0, unsigned long a1) | |
20647 | { | |
20648 | + trace_preemptirqsoff_hist(PREEMPT_ON, 1); | |
20649 | if (preempt_trace() && !irq_trace()) | |
20650 | start_critical_timing(a0, a1); | |
20651 | } | |
20652 | diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c | |
20653 | index 3fc20422c166..65a6dde71a7d 100644 | |
20654 | --- a/kernel/trace/trace_output.c | |
20655 | +++ b/kernel/trace/trace_output.c | |
20656 | @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
20657 | { | |
20658 | char hardsoft_irq; | |
20659 | char need_resched; | |
20660 | + char need_resched_lazy; | |
20661 | char irqs_off; | |
20662 | int hardirq; | |
20663 | int softirq; | |
20664 | @@ -416,6 +417,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
20665 | break; | |
20666 | } | |
20667 | ||
20668 | + need_resched_lazy = | |
20669 | + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; | |
20670 | + | |
20671 | hardsoft_irq = | |
20672 | (nmi && hardirq) ? 'Z' : | |
20673 | nmi ? 'z' : | |
20674 | @@ -424,14 +428,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
20675 | softirq ? 's' : | |
20676 | '.' ; | |
20677 | ||
20678 | - trace_seq_printf(s, "%c%c%c", | |
20679 | - irqs_off, need_resched, hardsoft_irq); | |
20680 | + trace_seq_printf(s, "%c%c%c%c", | |
20681 | + irqs_off, need_resched, need_resched_lazy, | |
20682 | + hardsoft_irq); | |
20683 | ||
20684 | if (entry->preempt_count) | |
20685 | trace_seq_printf(s, "%x", entry->preempt_count); | |
20686 | else | |
20687 | trace_seq_putc(s, '.'); | |
20688 | ||
20689 | + if (entry->preempt_lazy_count) | |
20690 | + trace_seq_printf(s, "%x", entry->preempt_lazy_count); | |
20691 | + else | |
20692 | + trace_seq_putc(s, '.'); | |
20693 | + | |
20694 | + if (entry->migrate_disable) | |
20695 | + trace_seq_printf(s, "%x", entry->migrate_disable); | |
20696 | + else | |
20697 | + trace_seq_putc(s, '.'); | |
20698 | + | |
20699 | return !trace_seq_has_overflowed(s); | |
20700 | } | |
20701 | ||
20702 | diff --git a/kernel/user.c b/kernel/user.c | |
20703 | index b069ccbfb0b0..1a2e88e98b5e 100644 | |
20704 | --- a/kernel/user.c | |
20705 | +++ b/kernel/user.c | |
20706 | @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up) | |
20707 | if (!up) | |
20708 | return; | |
20709 | ||
20710 | - local_irq_save(flags); | |
20711 | + local_irq_save_nort(flags); | |
20712 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) | |
20713 | free_user(up, flags); | |
20714 | else | |
20715 | - local_irq_restore(flags); | |
20716 | + local_irq_restore_nort(flags); | |
20717 | } | |
20718 | ||
20719 | struct user_struct *alloc_uid(kuid_t uid) | |
20720 | diff --git a/kernel/watchdog.c b/kernel/watchdog.c | |
20721 | index 6d1020c03d41..70c6a2f79f7e 100644 | |
20722 | --- a/kernel/watchdog.c | |
20723 | +++ b/kernel/watchdog.c | |
20724 | @@ -315,6 +315,8 @@ static int is_softlockup(unsigned long touch_ts) | |
20725 | ||
20726 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | |
20727 | ||
20728 | +static DEFINE_RAW_SPINLOCK(watchdog_output_lock); | |
20729 | + | |
20730 | static struct perf_event_attr wd_hw_attr = { | |
20731 | .type = PERF_TYPE_HARDWARE, | |
20732 | .config = PERF_COUNT_HW_CPU_CYCLES, | |
20733 | @@ -348,6 +350,13 @@ static void watchdog_overflow_callback(struct perf_event *event, | |
20734 | /* only print hardlockups once */ | |
20735 | if (__this_cpu_read(hard_watchdog_warn) == true) | |
20736 | return; | |
20737 | + /* | |
20738 | + * If early-printk is enabled then make sure we do not | |
20739 | + * lock up in printk() and kill console logging: | |
20740 | + */ | |
20741 | + printk_kill(); | |
20742 | + | |
20743 | + raw_spin_lock(&watchdog_output_lock); | |
20744 | ||
20745 | pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); | |
20746 | print_modules(); | |
20747 | @@ -365,6 +374,7 @@ static void watchdog_overflow_callback(struct perf_event *event, | |
20748 | !test_and_set_bit(0, &hardlockup_allcpu_dumped)) | |
20749 | trigger_allbutself_cpu_backtrace(); | |
20750 | ||
20751 | + raw_spin_unlock(&watchdog_output_lock); | |
20752 | if (hardlockup_panic) | |
20753 | nmi_panic(regs, "Hard LOCKUP"); | |
20754 | ||
20755 | @@ -512,6 +522,7 @@ static void watchdog_enable(unsigned int cpu) | |
20756 | /* kick off the timer for the hardlockup detector */ | |
20757 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
20758 | hrtimer->function = watchdog_timer_fn; | |
20759 | + hrtimer->irqsafe = 1; | |
20760 | ||
20761 | /* Enable the perf event */ | |
20762 | watchdog_nmi_enable(cpu); | |
20763 | diff --git a/kernel/workqueue.c b/kernel/workqueue.c | |
20764 | index 479d840db286..24eba6620a45 100644 | |
20765 | --- a/kernel/workqueue.c | |
20766 | +++ b/kernel/workqueue.c | |
20767 | @@ -48,6 +48,8 @@ | |
20768 | #include <linux/nodemask.h> | |
20769 | #include <linux/moduleparam.h> | |
20770 | #include <linux/uaccess.h> | |
20771 | +#include <linux/locallock.h> | |
20772 | +#include <linux/delay.h> | |
20773 | ||
20774 | #include "workqueue_internal.h" | |
20775 | ||
20776 | @@ -121,11 +123,16 @@ enum { | |
20777 | * cpu or grabbing pool->lock is enough for read access. If | |
20778 | * POOL_DISASSOCIATED is set, it's identical to L. | |
20779 | * | |
20780 | + * On RT we need the extra protection via rt_lock_idle_list() for | |
20781 | + * the list manipulations against read access from | |
20782 | + * wq_worker_sleeping(). All other places are nicely serialized via | |
20783 | + * pool->lock. | |
20784 | + * | |
20785 | * A: pool->attach_mutex protected. | |
20786 | * | |
20787 | * PL: wq_pool_mutex protected. | |
20788 | * | |
20789 | - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. | |
20790 | + * PR: wq_pool_mutex protected for writes. RCU protected for reads. | |
20791 | * | |
20792 | * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. | |
20793 | * | |
20794 | @@ -134,7 +141,7 @@ enum { | |
20795 | * | |
20796 | * WQ: wq->mutex protected. | |
20797 | * | |
20798 | - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. | |
20799 | + * WR: wq->mutex protected for writes. RCU protected for reads. | |
20800 | * | |
20801 | * MD: wq_mayday_lock protected. | |
20802 | */ | |
20803 | @@ -185,7 +192,7 @@ struct worker_pool { | |
20804 | atomic_t nr_running ____cacheline_aligned_in_smp; | |
20805 | ||
20806 | /* | |
20807 | - * Destruction of pool is sched-RCU protected to allow dereferences | |
20808 | + * Destruction of pool is RCU protected to allow dereferences | |
20809 | * from get_work_pool(). | |
20810 | */ | |
20811 | struct rcu_head rcu; | |
20812 | @@ -214,7 +221,7 @@ struct pool_workqueue { | |
20813 | /* | |
20814 | * Release of unbound pwq is punted to system_wq. See put_pwq() | |
20815 | * and pwq_unbound_release_workfn() for details. pool_workqueue | |
20816 | - * itself is also sched-RCU protected so that the first pwq can be | |
20817 | + * itself is also RCU protected so that the first pwq can be | |
20818 | * determined without grabbing wq->mutex. | |
20819 | */ | |
20820 | struct work_struct unbound_release_work; | |
20821 | @@ -348,6 +355,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); | |
20822 | struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; | |
20823 | EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); | |
20824 | ||
20825 | +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); | |
20826 | + | |
20827 | static int worker_thread(void *__worker); | |
20828 | static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20829 | ||
20830 | @@ -355,20 +364,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20831 | #include <trace/events/workqueue.h> | |
20832 | ||
20833 | #define assert_rcu_or_pool_mutex() \ | |
20834 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
20835 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
20836 | !lockdep_is_held(&wq_pool_mutex), \ | |
20837 | - "sched RCU or wq_pool_mutex should be held") | |
20838 | + "RCU or wq_pool_mutex should be held") | |
20839 | ||
20840 | #define assert_rcu_or_wq_mutex(wq) \ | |
20841 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
20842 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
20843 | !lockdep_is_held(&wq->mutex), \ | |
20844 | - "sched RCU or wq->mutex should be held") | |
20845 | + "RCU or wq->mutex should be held") | |
20846 | ||
20847 | #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ | |
20848 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
20849 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
20850 | !lockdep_is_held(&wq->mutex) && \ | |
20851 | !lockdep_is_held(&wq_pool_mutex), \ | |
20852 | - "sched RCU, wq->mutex or wq_pool_mutex should be held") | |
20853 | + "RCU, wq->mutex or wq_pool_mutex should be held") | |
20854 | ||
20855 | #define for_each_cpu_worker_pool(pool, cpu) \ | |
20856 | for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ | |
20857 | @@ -380,7 +389,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20858 | * @pool: iteration cursor | |
20859 | * @pi: integer used for iteration | |
20860 | * | |
20861 | - * This must be called either with wq_pool_mutex held or sched RCU read | |
20862 | + * This must be called either with wq_pool_mutex held or RCU read | |
20863 | * locked. If the pool needs to be used beyond the locking in effect, the | |
20864 | * caller is responsible for guaranteeing that the pool stays online. | |
20865 | * | |
20866 | @@ -412,7 +421,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20867 | * @pwq: iteration cursor | |
20868 | * @wq: the target workqueue | |
20869 | * | |
20870 | - * This must be called either with wq->mutex held or sched RCU read locked. | |
20871 | + * This must be called either with wq->mutex held or RCU read locked. | |
20872 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
20873 | * responsible for guaranteeing that the pwq stays online. | |
20874 | * | |
20875 | @@ -424,6 +433,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
20876 | if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ | |
20877 | else | |
20878 | ||
20879 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
20880 | +static inline void rt_lock_idle_list(struct worker_pool *pool) | |
20881 | +{ | |
20882 | + preempt_disable(); | |
20883 | +} | |
20884 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) | |
20885 | +{ | |
20886 | + preempt_enable(); | |
20887 | +} | |
20888 | +static inline void sched_lock_idle_list(struct worker_pool *pool) { } | |
20889 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) { } | |
20890 | +#else | |
20891 | +static inline void rt_lock_idle_list(struct worker_pool *pool) { } | |
20892 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) { } | |
20893 | +static inline void sched_lock_idle_list(struct worker_pool *pool) | |
20894 | +{ | |
20895 | + spin_lock_irq(&pool->lock); | |
20896 | +} | |
20897 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) | |
20898 | +{ | |
20899 | + spin_unlock_irq(&pool->lock); | |
20900 | +} | |
20901 | +#endif | |
20902 | + | |
20903 | + | |
20904 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | |
20905 | ||
20906 | static struct debug_obj_descr work_debug_descr; | |
20907 | @@ -548,7 +582,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) | |
20908 | * @wq: the target workqueue | |
20909 | * @node: the node ID | |
20910 | * | |
20911 | - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU | |
20912 | + * This must be called with any of wq_pool_mutex, wq->mutex or RCU | |
20913 | * read locked. | |
20914 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
20915 | * responsible for guaranteeing that the pwq stays online. | |
20916 | @@ -692,8 +726,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work) | |
20917 | * @work: the work item of interest | |
20918 | * | |
20919 | * Pools are created and destroyed under wq_pool_mutex, and allows read | |
20920 | - * access under sched-RCU read lock. As such, this function should be | |
20921 | - * called under wq_pool_mutex or with preemption disabled. | |
20922 | + * access under RCU read lock. As such, this function should be | |
20923 | + * called under wq_pool_mutex or inside of a rcu_read_lock() region. | |
20924 | * | |
20925 | * All fields of the returned pool are accessible as long as the above | |
20926 | * mentioned locking is in effect. If the returned pool needs to be used | |
20927 | @@ -830,50 +864,45 @@ static struct worker *first_idle_worker(struct worker_pool *pool) | |
20928 | */ | |
20929 | static void wake_up_worker(struct worker_pool *pool) | |
20930 | { | |
20931 | - struct worker *worker = first_idle_worker(pool); | |
20932 | + struct worker *worker; | |
20933 | + | |
20934 | + rt_lock_idle_list(pool); | |
20935 | + | |
20936 | + worker = first_idle_worker(pool); | |
20937 | ||
20938 | if (likely(worker)) | |
20939 | wake_up_process(worker->task); | |
20940 | + | |
20941 | + rt_unlock_idle_list(pool); | |
20942 | } | |
20943 | ||
20944 | /** | |
20945 | - * wq_worker_waking_up - a worker is waking up | |
20946 | + * wq_worker_running - a worker is running again | |
20947 | * @task: task waking up | |
20948 | - * @cpu: CPU @task is waking up to | |
20949 | * | |
20950 | - * This function is called during try_to_wake_up() when a worker is | |
20951 | - * being awoken. | |
20952 | - * | |
20953 | - * CONTEXT: | |
20954 | - * spin_lock_irq(rq->lock) | |
20955 | + * This function is called when a worker returns from schedule() | |
20956 | */ | |
20957 | -void wq_worker_waking_up(struct task_struct *task, int cpu) | |
20958 | +void wq_worker_running(struct task_struct *task) | |
20959 | { | |
20960 | struct worker *worker = kthread_data(task); | |
20961 | ||
20962 | - if (!(worker->flags & WORKER_NOT_RUNNING)) { | |
20963 | - WARN_ON_ONCE(worker->pool->cpu != cpu); | |
20964 | + if (!worker->sleeping) | |
20965 | + return; | |
20966 | + if (!(worker->flags & WORKER_NOT_RUNNING)) | |
20967 | atomic_inc(&worker->pool->nr_running); | |
20968 | - } | |
20969 | + worker->sleeping = 0; | |
20970 | } | |
20971 | ||
20972 | /** | |
20973 | * wq_worker_sleeping - a worker is going to sleep | |
20974 | * @task: task going to sleep | |
20975 | * | |
20976 | - * This function is called during schedule() when a busy worker is | |
20977 | - * going to sleep. Worker on the same cpu can be woken up by | |
20978 | - * returning pointer to its task. | |
20979 | - * | |
20980 | - * CONTEXT: | |
20981 | - * spin_lock_irq(rq->lock) | |
20982 | - * | |
20983 | - * Return: | |
20984 | - * Worker task on @cpu to wake up, %NULL if none. | |
20985 | + * This function is called from schedule() when a busy worker is | |
20986 | + * going to sleep. | |
20987 | */ | |
20988 | -struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
20989 | +void wq_worker_sleeping(struct task_struct *task) | |
20990 | { | |
20991 | - struct worker *worker = kthread_data(task), *to_wakeup = NULL; | |
20992 | + struct worker *worker = kthread_data(task); | |
20993 | struct worker_pool *pool; | |
20994 | ||
20995 | /* | |
20996 | @@ -882,29 +911,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
20997 | * checking NOT_RUNNING. | |
20998 | */ | |
20999 | if (worker->flags & WORKER_NOT_RUNNING) | |
21000 | - return NULL; | |
21001 | + return; | |
21002 | ||
21003 | pool = worker->pool; | |
21004 | ||
21005 | - /* this can only happen on the local cpu */ | |
21006 | - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id())) | |
21007 | - return NULL; | |
21008 | + if (WARN_ON_ONCE(worker->sleeping)) | |
21009 | + return; | |
21010 | + | |
21011 | + worker->sleeping = 1; | |
21012 | ||
21013 | /* | |
21014 | * The counterpart of the following dec_and_test, implied mb, | |
21015 | * worklist not empty test sequence is in insert_work(). | |
21016 | * Please read comment there. | |
21017 | - * | |
21018 | - * NOT_RUNNING is clear. This means that we're bound to and | |
21019 | - * running on the local cpu w/ rq lock held and preemption | |
21020 | - * disabled, which in turn means that none else could be | |
21021 | - * manipulating idle_list, so dereferencing idle_list without pool | |
21022 | - * lock is safe. | |
21023 | */ | |
21024 | if (atomic_dec_and_test(&pool->nr_running) && | |
21025 | - !list_empty(&pool->worklist)) | |
21026 | - to_wakeup = first_idle_worker(pool); | |
21027 | - return to_wakeup ? to_wakeup->task : NULL; | |
21028 | + !list_empty(&pool->worklist)) { | |
21029 | + sched_lock_idle_list(pool); | |
21030 | + wake_up_worker(pool); | |
21031 | + sched_unlock_idle_list(pool); | |
21032 | + } | |
21033 | } | |
21034 | ||
21035 | /** | |
21036 | @@ -1098,12 +1124,14 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) | |
21037 | { | |
21038 | if (pwq) { | |
21039 | /* | |
21040 | - * As both pwqs and pools are sched-RCU protected, the | |
21041 | + * As both pwqs and pools are RCU protected, the | |
21042 | * following lock operations are safe. | |
21043 | */ | |
21044 | - spin_lock_irq(&pwq->pool->lock); | |
21045 | + rcu_read_lock(); | |
21046 | + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); | |
21047 | put_pwq(pwq); | |
21048 | - spin_unlock_irq(&pwq->pool->lock); | |
21049 | + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); | |
21050 | + rcu_read_unlock(); | |
21051 | } | |
21052 | } | |
21053 | ||
21054 | @@ -1207,7 +1235,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
21055 | struct worker_pool *pool; | |
21056 | struct pool_workqueue *pwq; | |
21057 | ||
21058 | - local_irq_save(*flags); | |
21059 | + local_lock_irqsave(pendingb_lock, *flags); | |
21060 | ||
21061 | /* try to steal the timer if it exists */ | |
21062 | if (is_dwork) { | |
21063 | @@ -1226,6 +1254,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
21064 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) | |
21065 | return 0; | |
21066 | ||
21067 | + rcu_read_lock(); | |
21068 | /* | |
21069 | * The queueing is in progress, or it is already queued. Try to | |
21070 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | |
21071 | @@ -1264,14 +1293,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
21072 | set_work_pool_and_keep_pending(work, pool->id); | |
21073 | ||
21074 | spin_unlock(&pool->lock); | |
21075 | + rcu_read_unlock(); | |
21076 | return 1; | |
21077 | } | |
21078 | spin_unlock(&pool->lock); | |
21079 | fail: | |
21080 | - local_irq_restore(*flags); | |
21081 | + rcu_read_unlock(); | |
21082 | + local_unlock_irqrestore(pendingb_lock, *flags); | |
21083 | if (work_is_canceling(work)) | |
21084 | return -ENOENT; | |
21085 | - cpu_relax(); | |
21086 | + cpu_chill(); | |
21087 | return -EAGAIN; | |
21088 | } | |
21089 | ||
21090 | @@ -1373,7 +1404,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
21091 | * queued or lose PENDING. Grabbing PENDING and queueing should | |
21092 | * happen with IRQ disabled. | |
21093 | */ | |
21094 | - WARN_ON_ONCE(!irqs_disabled()); | |
21095 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
21096 | ||
21097 | debug_work_activate(work); | |
21098 | ||
21099 | @@ -1381,6 +1412,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
21100 | if (unlikely(wq->flags & __WQ_DRAINING) && | |
21101 | WARN_ON_ONCE(!is_chained_work(wq))) | |
21102 | return; | |
21103 | + rcu_read_lock(); | |
21104 | retry: | |
21105 | if (req_cpu == WORK_CPU_UNBOUND) | |
21106 | cpu = wq_select_unbound_cpu(raw_smp_processor_id()); | |
21107 | @@ -1437,10 +1469,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
21108 | /* pwq determined, queue */ | |
21109 | trace_workqueue_queue_work(req_cpu, pwq, work); | |
21110 | ||
21111 | - if (WARN_ON(!list_empty(&work->entry))) { | |
21112 | - spin_unlock(&pwq->pool->lock); | |
21113 | - return; | |
21114 | - } | |
21115 | + if (WARN_ON(!list_empty(&work->entry))) | |
21116 | + goto out; | |
21117 | ||
21118 | pwq->nr_in_flight[pwq->work_color]++; | |
21119 | work_flags = work_color_to_flags(pwq->work_color); | |
21120 | @@ -1458,7 +1488,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
21121 | ||
21122 | insert_work(pwq, work, worklist, work_flags); | |
21123 | ||
21124 | +out: | |
21125 | spin_unlock(&pwq->pool->lock); | |
21126 | + rcu_read_unlock(); | |
21127 | } | |
21128 | ||
21129 | /** | |
21130 | @@ -1478,14 +1510,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, | |
21131 | bool ret = false; | |
21132 | unsigned long flags; | |
21133 | ||
21134 | - local_irq_save(flags); | |
21135 | + local_lock_irqsave(pendingb_lock,flags); | |
21136 | ||
21137 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
21138 | __queue_work(cpu, wq, work); | |
21139 | ret = true; | |
21140 | } | |
21141 | ||
21142 | - local_irq_restore(flags); | |
21143 | + local_unlock_irqrestore(pendingb_lock, flags); | |
21144 | return ret; | |
21145 | } | |
21146 | EXPORT_SYMBOL(queue_work_on); | |
21147 | @@ -1552,14 +1584,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |
21148 | unsigned long flags; | |
21149 | ||
21150 | /* read the comment in __queue_work() */ | |
21151 | - local_irq_save(flags); | |
21152 | + local_lock_irqsave(pendingb_lock, flags); | |
21153 | ||
21154 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
21155 | __queue_delayed_work(cpu, wq, dwork, delay); | |
21156 | ret = true; | |
21157 | } | |
21158 | ||
21159 | - local_irq_restore(flags); | |
21160 | + local_unlock_irqrestore(pendingb_lock, flags); | |
21161 | return ret; | |
21162 | } | |
21163 | EXPORT_SYMBOL(queue_delayed_work_on); | |
21164 | @@ -1594,7 +1626,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, | |
21165 | ||
21166 | if (likely(ret >= 0)) { | |
21167 | __queue_delayed_work(cpu, wq, dwork, delay); | |
21168 | - local_irq_restore(flags); | |
21169 | + local_unlock_irqrestore(pendingb_lock, flags); | |
21170 | } | |
21171 | ||
21172 | /* -ENOENT from try_to_grab_pending() becomes %true */ | |
21173 | @@ -1627,7 +1659,9 @@ static void worker_enter_idle(struct worker *worker) | |
21174 | worker->last_active = jiffies; | |
21175 | ||
21176 | /* idle_list is LIFO */ | |
21177 | + rt_lock_idle_list(pool); | |
21178 | list_add(&worker->entry, &pool->idle_list); | |
21179 | + rt_unlock_idle_list(pool); | |
21180 | ||
21181 | if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) | |
21182 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); | |
21183 | @@ -1660,7 +1694,9 @@ static void worker_leave_idle(struct worker *worker) | |
21184 | return; | |
21185 | worker_clr_flags(worker, WORKER_IDLE); | |
21186 | pool->nr_idle--; | |
21187 | + rt_lock_idle_list(pool); | |
21188 | list_del_init(&worker->entry); | |
21189 | + rt_unlock_idle_list(pool); | |
21190 | } | |
21191 | ||
21192 | static struct worker *alloc_worker(int node) | |
21193 | @@ -1826,7 +1862,9 @@ static void destroy_worker(struct worker *worker) | |
21194 | pool->nr_workers--; | |
21195 | pool->nr_idle--; | |
21196 | ||
21197 | + rt_lock_idle_list(pool); | |
21198 | list_del_init(&worker->entry); | |
21199 | + rt_unlock_idle_list(pool); | |
21200 | worker->flags |= WORKER_DIE; | |
21201 | wake_up_process(worker->task); | |
21202 | } | |
21203 | @@ -2785,14 +2823,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) | |
21204 | ||
21205 | might_sleep(); | |
21206 | ||
21207 | - local_irq_disable(); | |
21208 | + rcu_read_lock(); | |
21209 | pool = get_work_pool(work); | |
21210 | if (!pool) { | |
21211 | - local_irq_enable(); | |
21212 | + rcu_read_unlock(); | |
21213 | return false; | |
21214 | } | |
21215 | ||
21216 | - spin_lock(&pool->lock); | |
21217 | + spin_lock_irq(&pool->lock); | |
21218 | /* see the comment in try_to_grab_pending() with the same code */ | |
21219 | pwq = get_work_pwq(work); | |
21220 | if (pwq) { | |
21221 | @@ -2821,10 +2859,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) | |
21222 | else | |
21223 | lock_map_acquire_read(&pwq->wq->lockdep_map); | |
21224 | lock_map_release(&pwq->wq->lockdep_map); | |
21225 | - | |
21226 | + rcu_read_unlock(); | |
21227 | return true; | |
21228 | already_gone: | |
21229 | spin_unlock_irq(&pool->lock); | |
21230 | + rcu_read_unlock(); | |
21231 | return false; | |
21232 | } | |
21233 | ||
21234 | @@ -2911,7 +2950,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) | |
21235 | ||
21236 | /* tell other tasks trying to grab @work to back off */ | |
21237 | mark_work_canceling(work); | |
21238 | - local_irq_restore(flags); | |
21239 | + local_unlock_irqrestore(pendingb_lock, flags); | |
21240 | ||
21241 | flush_work(work); | |
21242 | clear_work_data(work); | |
21243 | @@ -2966,10 +3005,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); | |
21244 | */ | |
21245 | bool flush_delayed_work(struct delayed_work *dwork) | |
21246 | { | |
21247 | - local_irq_disable(); | |
21248 | + local_lock_irq(pendingb_lock); | |
21249 | if (del_timer_sync(&dwork->timer)) | |
21250 | __queue_work(dwork->cpu, dwork->wq, &dwork->work); | |
21251 | - local_irq_enable(); | |
21252 | + local_unlock_irq(pendingb_lock); | |
21253 | return flush_work(&dwork->work); | |
21254 | } | |
21255 | EXPORT_SYMBOL(flush_delayed_work); | |
21256 | @@ -2987,7 +3026,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) | |
21257 | return false; | |
21258 | ||
21259 | set_work_pool_and_clear_pending(work, get_work_pool_id(work)); | |
21260 | - local_irq_restore(flags); | |
21261 | + local_unlock_irqrestore(pendingb_lock, flags); | |
21262 | return ret; | |
21263 | } | |
21264 | ||
21265 | @@ -3245,7 +3284,7 @@ static void rcu_free_pool(struct rcu_head *rcu) | |
21266 | * put_unbound_pool - put a worker_pool | |
21267 | * @pool: worker_pool to put | |
21268 | * | |
21269 | - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU | |
21270 | + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU | |
21271 | * safe manner. get_unbound_pool() calls this function on its failure path | |
21272 | * and this function should be able to release pools which went through, | |
21273 | * successfully or not, init_worker_pool(). | |
21274 | @@ -3299,8 +3338,8 @@ static void put_unbound_pool(struct worker_pool *pool) | |
21275 | del_timer_sync(&pool->idle_timer); | |
21276 | del_timer_sync(&pool->mayday_timer); | |
21277 | ||
21278 | - /* sched-RCU protected to allow dereferences from get_work_pool() */ | |
21279 | - call_rcu_sched(&pool->rcu, rcu_free_pool); | |
21280 | + /* RCU protected to allow dereferences from get_work_pool() */ | |
21281 | + call_rcu(&pool->rcu, rcu_free_pool); | |
21282 | } | |
21283 | ||
21284 | /** | |
21285 | @@ -3407,14 +3446,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) | |
21286 | put_unbound_pool(pool); | |
21287 | mutex_unlock(&wq_pool_mutex); | |
21288 | ||
21289 | - call_rcu_sched(&pwq->rcu, rcu_free_pwq); | |
21290 | + call_rcu(&pwq->rcu, rcu_free_pwq); | |
21291 | ||
21292 | /* | |
21293 | * If we're the last pwq going away, @wq is already dead and no one | |
21294 | * is gonna access it anymore. Schedule RCU free. | |
21295 | */ | |
21296 | if (is_last) | |
21297 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
21298 | + call_rcu(&wq->rcu, rcu_free_wq); | |
21299 | } | |
21300 | ||
21301 | /** | |
21302 | @@ -4064,7 +4103,7 @@ void destroy_workqueue(struct workqueue_struct *wq) | |
21303 | * The base ref is never dropped on per-cpu pwqs. Directly | |
21304 | * schedule RCU free. | |
21305 | */ | |
21306 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
21307 | + call_rcu(&wq->rcu, rcu_free_wq); | |
21308 | } else { | |
21309 | /* | |
21310 | * We're the sole accessor of @wq at this point. Directly | |
21311 | @@ -4157,7 +4196,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) | |
21312 | struct pool_workqueue *pwq; | |
21313 | bool ret; | |
21314 | ||
21315 | - rcu_read_lock_sched(); | |
21316 | + rcu_read_lock(); | |
21317 | + preempt_disable(); | |
21318 | ||
21319 | if (cpu == WORK_CPU_UNBOUND) | |
21320 | cpu = smp_processor_id(); | |
21321 | @@ -4168,7 +4208,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) | |
21322 | pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); | |
21323 | ||
21324 | ret = !list_empty(&pwq->delayed_works); | |
21325 | - rcu_read_unlock_sched(); | |
21326 | + preempt_enable(); | |
21327 | + rcu_read_unlock(); | |
21328 | ||
21329 | return ret; | |
21330 | } | |
21331 | @@ -4194,15 +4235,15 @@ unsigned int work_busy(struct work_struct *work) | |
21332 | if (work_pending(work)) | |
21333 | ret |= WORK_BUSY_PENDING; | |
21334 | ||
21335 | - local_irq_save(flags); | |
21336 | + rcu_read_lock(); | |
21337 | pool = get_work_pool(work); | |
21338 | if (pool) { | |
21339 | - spin_lock(&pool->lock); | |
21340 | + spin_lock_irqsave(&pool->lock, flags); | |
21341 | if (find_worker_executing_work(pool, work)) | |
21342 | ret |= WORK_BUSY_RUNNING; | |
21343 | - spin_unlock(&pool->lock); | |
21344 | + spin_unlock_irqrestore(&pool->lock, flags); | |
21345 | } | |
21346 | - local_irq_restore(flags); | |
21347 | + rcu_read_unlock(); | |
21348 | ||
21349 | return ret; | |
21350 | } | |
21351 | @@ -4391,7 +4432,7 @@ void show_workqueue_state(void) | |
21352 | unsigned long flags; | |
21353 | int pi; | |
21354 | ||
21355 | - rcu_read_lock_sched(); | |
21356 | + rcu_read_lock(); | |
21357 | ||
21358 | pr_info("Showing busy workqueues and worker pools:\n"); | |
21359 | ||
21360 | @@ -4444,7 +4485,7 @@ void show_workqueue_state(void) | |
21361 | spin_unlock_irqrestore(&pool->lock, flags); | |
21362 | } | |
21363 | ||
21364 | - rcu_read_unlock_sched(); | |
21365 | + rcu_read_unlock(); | |
21366 | } | |
21367 | ||
21368 | /* | |
21369 | @@ -4782,16 +4823,16 @@ bool freeze_workqueues_busy(void) | |
21370 | * nr_active is monotonically decreasing. It's safe | |
21371 | * to peek without lock. | |
21372 | */ | |
21373 | - rcu_read_lock_sched(); | |
21374 | + rcu_read_lock(); | |
21375 | for_each_pwq(pwq, wq) { | |
21376 | WARN_ON_ONCE(pwq->nr_active < 0); | |
21377 | if (pwq->nr_active) { | |
21378 | busy = true; | |
21379 | - rcu_read_unlock_sched(); | |
21380 | + rcu_read_unlock(); | |
21381 | goto out_unlock; | |
21382 | } | |
21383 | } | |
21384 | - rcu_read_unlock_sched(); | |
21385 | + rcu_read_unlock(); | |
21386 | } | |
21387 | out_unlock: | |
21388 | mutex_unlock(&wq_pool_mutex); | |
21389 | @@ -4981,7 +5022,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, | |
21390 | const char *delim = ""; | |
21391 | int node, written = 0; | |
21392 | ||
21393 | - rcu_read_lock_sched(); | |
21394 | + get_online_cpus(); | |
21395 | + rcu_read_lock(); | |
21396 | for_each_node(node) { | |
21397 | written += scnprintf(buf + written, PAGE_SIZE - written, | |
21398 | "%s%d:%d", delim, node, | |
21399 | @@ -4989,7 +5031,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, | |
21400 | delim = " "; | |
21401 | } | |
21402 | written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | |
21403 | - rcu_read_unlock_sched(); | |
21404 | + rcu_read_unlock(); | |
21405 | + put_online_cpus(); | |
21406 | ||
21407 | return written; | |
21408 | } | |
21409 | diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h | |
21410 | index 8635417c587b..f000c4d6917e 100644 | |
21411 | --- a/kernel/workqueue_internal.h | |
21412 | +++ b/kernel/workqueue_internal.h | |
21413 | @@ -43,6 +43,7 @@ struct worker { | |
21414 | unsigned long last_active; /* L: last active timestamp */ | |
21415 | unsigned int flags; /* X: flags */ | |
21416 | int id; /* I: worker id */ | |
21417 | + int sleeping; /* None */ | |
21418 | ||
21419 | /* | |
21420 | * Opaque string set with work_set_desc(). Printed out with task | |
21421 | @@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void) | |
21422 | * Scheduler hooks for concurrency managed workqueue. Only to be used from | |
21423 | * sched/core.c and workqueue.c. | |
21424 | */ | |
21425 | -void wq_worker_waking_up(struct task_struct *task, int cpu); | |
21426 | -struct task_struct *wq_worker_sleeping(struct task_struct *task); | |
21427 | +void wq_worker_running(struct task_struct *task); | |
21428 | +void wq_worker_sleeping(struct task_struct *task); | |
21429 | ||
21430 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ | |
21431 | diff --git a/lib/Kconfig b/lib/Kconfig | |
21432 | index 260a80e313b9..b06becb3f477 100644 | |
21433 | --- a/lib/Kconfig | |
21434 | +++ b/lib/Kconfig | |
21435 | @@ -400,6 +400,7 @@ config CHECK_SIGNATURE | |
21436 | ||
21437 | config CPUMASK_OFFSTACK | |
21438 | bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS | |
21439 | + depends on !PREEMPT_RT_FULL | |
21440 | help | |
21441 | Use dynamic allocation for cpumask_var_t, instead of putting | |
21442 | them on the stack. This is a bit more expensive, but avoids | |
21443 | diff --git a/lib/debugobjects.c b/lib/debugobjects.c | |
21444 | index 056052dc8e91..d8494e126de8 100644 | |
21445 | --- a/lib/debugobjects.c | |
21446 | +++ b/lib/debugobjects.c | |
21447 | @@ -308,7 +308,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) | |
21448 | struct debug_obj *obj; | |
21449 | unsigned long flags; | |
21450 | ||
21451 | - fill_pool(); | |
21452 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
21453 | + if (preempt_count() == 0 && !irqs_disabled()) | |
21454 | +#endif | |
21455 | + fill_pool(); | |
21456 | ||
21457 | db = get_bucket((unsigned long) addr); | |
21458 | ||
21459 | diff --git a/lib/idr.c b/lib/idr.c | |
21460 | index 6098336df267..9decbe914595 100644 | |
21461 | --- a/lib/idr.c | |
21462 | +++ b/lib/idr.c | |
21463 | @@ -30,6 +30,7 @@ | |
21464 | #include <linux/idr.h> | |
21465 | #include <linux/spinlock.h> | |
21466 | #include <linux/percpu.h> | |
21467 | +#include <linux/locallock.h> | |
21468 | ||
21469 | #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) | |
21470 | #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) | |
21471 | @@ -45,6 +46,37 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); | |
21472 | static DEFINE_PER_CPU(int, idr_preload_cnt); | |
21473 | static DEFINE_SPINLOCK(simple_ida_lock); | |
21474 | ||
21475 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
21476 | +static DEFINE_LOCAL_IRQ_LOCK(idr_lock); | |
21477 | + | |
21478 | +static inline void idr_preload_lock(void) | |
21479 | +{ | |
21480 | + local_lock(idr_lock); | |
21481 | +} | |
21482 | + | |
21483 | +static inline void idr_preload_unlock(void) | |
21484 | +{ | |
21485 | + local_unlock(idr_lock); | |
21486 | +} | |
21487 | + | |
21488 | +void idr_preload_end(void) | |
21489 | +{ | |
21490 | + idr_preload_unlock(); | |
21491 | +} | |
21492 | +EXPORT_SYMBOL(idr_preload_end); | |
21493 | +#else | |
21494 | +static inline void idr_preload_lock(void) | |
21495 | +{ | |
21496 | + preempt_disable(); | |
21497 | +} | |
21498 | + | |
21499 | +static inline void idr_preload_unlock(void) | |
21500 | +{ | |
21501 | + preempt_enable(); | |
21502 | +} | |
21503 | +#endif | |
21504 | + | |
21505 | + | |
21506 | /* the maximum ID which can be allocated given idr->layers */ | |
21507 | static int idr_max(int layers) | |
21508 | { | |
21509 | @@ -115,14 +147,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) | |
21510 | * context. See idr_preload() for details. | |
21511 | */ | |
21512 | if (!in_interrupt()) { | |
21513 | - preempt_disable(); | |
21514 | + idr_preload_lock(); | |
21515 | new = __this_cpu_read(idr_preload_head); | |
21516 | if (new) { | |
21517 | __this_cpu_write(idr_preload_head, new->ary[0]); | |
21518 | __this_cpu_dec(idr_preload_cnt); | |
21519 | new->ary[0] = NULL; | |
21520 | } | |
21521 | - preempt_enable(); | |
21522 | + idr_preload_unlock(); | |
21523 | if (new) | |
21524 | return new; | |
21525 | } | |
21526 | @@ -366,7 +398,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, | |
21527 | idr_mark_full(pa, id); | |
21528 | } | |
21529 | ||
21530 | - | |
21531 | /** | |
21532 | * idr_preload - preload for idr_alloc() | |
21533 | * @gfp_mask: allocation mask to use for preloading | |
21534 | @@ -401,7 +432,7 @@ void idr_preload(gfp_t gfp_mask) | |
21535 | WARN_ON_ONCE(in_interrupt()); | |
21536 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | |
21537 | ||
21538 | - preempt_disable(); | |
21539 | + idr_preload_lock(); | |
21540 | ||
21541 | /* | |
21542 | * idr_alloc() is likely to succeed w/o full idr_layer buffer and | |
21543 | @@ -413,9 +444,9 @@ void idr_preload(gfp_t gfp_mask) | |
21544 | while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { | |
21545 | struct idr_layer *new; | |
21546 | ||
21547 | - preempt_enable(); | |
21548 | + idr_preload_unlock(); | |
21549 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); | |
21550 | - preempt_disable(); | |
21551 | + idr_preload_lock(); | |
21552 | if (!new) | |
21553 | break; | |
21554 | ||
21555 | diff --git a/lib/irq_poll.c b/lib/irq_poll.c | |
21556 | index 1d6565e81030..b23a79761df7 100644 | |
21557 | --- a/lib/irq_poll.c | |
21558 | +++ b/lib/irq_poll.c | |
21559 | @@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop) | |
21560 | list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); | |
21561 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
21562 | local_irq_restore(flags); | |
21563 | + preempt_check_resched_rt(); | |
21564 | } | |
21565 | EXPORT_SYMBOL(irq_poll_sched); | |
21566 | ||
21567 | @@ -71,6 +72,7 @@ void irq_poll_complete(struct irq_poll *iop) | |
21568 | local_irq_save(flags); | |
21569 | __irq_poll_complete(iop); | |
21570 | local_irq_restore(flags); | |
21571 | + preempt_check_resched_rt(); | |
21572 | } | |
21573 | EXPORT_SYMBOL(irq_poll_complete); | |
21574 | ||
21575 | @@ -95,6 +97,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) | |
21576 | } | |
21577 | ||
21578 | local_irq_enable(); | |
21579 | + preempt_check_resched_rt(); | |
21580 | ||
21581 | /* Even though interrupts have been re-enabled, this | |
21582 | * access is safe because interrupts can only add new | |
21583 | @@ -132,6 +135,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) | |
21584 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
21585 | ||
21586 | local_irq_enable(); | |
21587 | + preempt_check_resched_rt(); | |
21588 | } | |
21589 | ||
21590 | /** | |
21591 | @@ -195,6 +199,7 @@ static int irq_poll_cpu_dead(unsigned int cpu) | |
21592 | this_cpu_ptr(&blk_cpu_iopoll)); | |
21593 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
21594 | local_irq_enable(); | |
21595 | + preempt_check_resched_rt(); | |
21596 | ||
21597 | return 0; | |
21598 | } | |
21599 | diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c | |
21600 | index f3a217ea0388..4611b156ef79 100644 | |
21601 | --- a/lib/locking-selftest.c | |
21602 | +++ b/lib/locking-selftest.c | |
21603 | @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) | |
21604 | #include "locking-selftest-spin-hardirq.h" | |
21605 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) | |
21606 | ||
21607 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21608 | + | |
21609 | #include "locking-selftest-rlock-hardirq.h" | |
21610 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) | |
21611 | ||
21612 | @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) | |
21613 | #include "locking-selftest-wlock-softirq.h" | |
21614 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) | |
21615 | ||
21616 | +#endif | |
21617 | + | |
21618 | #undef E1 | |
21619 | #undef E2 | |
21620 | ||
21621 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21622 | /* | |
21623 | * Enabling hardirqs with a softirq-safe lock held: | |
21624 | */ | |
21625 | @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
21626 | #undef E1 | |
21627 | #undef E2 | |
21628 | ||
21629 | +#endif | |
21630 | + | |
21631 | /* | |
21632 | * Enabling irqs with an irq-safe lock held: | |
21633 | */ | |
21634 | @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
21635 | #include "locking-selftest-spin-hardirq.h" | |
21636 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) | |
21637 | ||
21638 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21639 | + | |
21640 | #include "locking-selftest-rlock-hardirq.h" | |
21641 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) | |
21642 | ||
21643 | @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) | |
21644 | #include "locking-selftest-wlock-softirq.h" | |
21645 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
21646 | ||
21647 | +#endif | |
21648 | + | |
21649 | #undef E1 | |
21650 | #undef E2 | |
21651 | ||
21652 | @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
21653 | #include "locking-selftest-spin-hardirq.h" | |
21654 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) | |
21655 | ||
21656 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21657 | + | |
21658 | #include "locking-selftest-rlock-hardirq.h" | |
21659 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) | |
21660 | ||
21661 | @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) | |
21662 | #include "locking-selftest-wlock-softirq.h" | |
21663 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
21664 | ||
21665 | +#endif | |
21666 | + | |
21667 | #undef E1 | |
21668 | #undef E2 | |
21669 | #undef E3 | |
21670 | @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
21671 | #include "locking-selftest-spin-hardirq.h" | |
21672 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) | |
21673 | ||
21674 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21675 | + | |
21676 | #include "locking-selftest-rlock-hardirq.h" | |
21677 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) | |
21678 | ||
21679 | @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) | |
21680 | #include "locking-selftest-wlock-softirq.h" | |
21681 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) | |
21682 | ||
21683 | +#endif | |
21684 | + | |
21685 | #undef E1 | |
21686 | #undef E2 | |
21687 | #undef E3 | |
21688 | ||
21689 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21690 | + | |
21691 | /* | |
21692 | * read-lock / write-lock irq inversion. | |
21693 | * | |
21694 | @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) | |
21695 | #undef E2 | |
21696 | #undef E3 | |
21697 | ||
21698 | +#endif | |
21699 | + | |
21700 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21701 | + | |
21702 | /* | |
21703 | * read-lock / write-lock recursion that is actually safe. | |
21704 | */ | |
21705 | @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) | |
21706 | #undef E2 | |
21707 | #undef E3 | |
21708 | ||
21709 | +#endif | |
21710 | + | |
21711 | /* | |
21712 | * read-lock / write-lock recursion that is unsafe. | |
21713 | */ | |
21714 | @@ -1858,6 +1885,7 @@ void locking_selftest(void) | |
21715 | ||
21716 | printk(" --------------------------------------------------------------------------\n"); | |
21717 | ||
21718 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21719 | /* | |
21720 | * irq-context testcases: | |
21721 | */ | |
21722 | @@ -1870,6 +1898,28 @@ void locking_selftest(void) | |
21723 | ||
21724 | DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); | |
21725 | // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); | |
21726 | +#else | |
21727 | + /* On -rt, we only do hardirq context test for raw spinlock */ | |
21728 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); | |
21729 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21); | |
21730 | + | |
21731 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12); | |
21732 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21); | |
21733 | + | |
21734 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123); | |
21735 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132); | |
21736 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213); | |
21737 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231); | |
21738 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312); | |
21739 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321); | |
21740 | + | |
21741 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123); | |
21742 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132); | |
21743 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213); | |
21744 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231); | |
21745 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); | |
21746 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); | |
21747 | +#endif | |
21748 | ||
21749 | ww_tests(); | |
21750 | ||
21751 | diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c | |
21752 | index 6d40944960de..822a2c027e72 100644 | |
21753 | --- a/lib/percpu_ida.c | |
21754 | +++ b/lib/percpu_ida.c | |
21755 | @@ -26,6 +26,9 @@ | |
21756 | #include <linux/string.h> | |
21757 | #include <linux/spinlock.h> | |
21758 | #include <linux/percpu_ida.h> | |
21759 | +#include <linux/locallock.h> | |
21760 | + | |
21761 | +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock); | |
21762 | ||
21763 | struct percpu_ida_cpu { | |
21764 | /* | |
21765 | @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21766 | unsigned long flags; | |
21767 | int tag; | |
21768 | ||
21769 | - local_irq_save(flags); | |
21770 | + local_lock_irqsave(irq_off_lock, flags); | |
21771 | tags = this_cpu_ptr(pool->tag_cpu); | |
21772 | ||
21773 | /* Fastpath */ | |
21774 | tag = alloc_local_tag(tags); | |
21775 | if (likely(tag >= 0)) { | |
21776 | - local_irq_restore(flags); | |
21777 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21778 | return tag; | |
21779 | } | |
21780 | ||
21781 | @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21782 | ||
21783 | if (!tags->nr_free) | |
21784 | alloc_global_tags(pool, tags); | |
21785 | + | |
21786 | if (!tags->nr_free) | |
21787 | steal_tags(pool, tags); | |
21788 | ||
21789 | @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21790 | } | |
21791 | ||
21792 | spin_unlock(&pool->lock); | |
21793 | - local_irq_restore(flags); | |
21794 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21795 | ||
21796 | if (tag >= 0 || state == TASK_RUNNING) | |
21797 | break; | |
21798 | @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
21799 | ||
21800 | schedule(); | |
21801 | ||
21802 | - local_irq_save(flags); | |
21803 | + local_lock_irqsave(irq_off_lock, flags); | |
21804 | tags = this_cpu_ptr(pool->tag_cpu); | |
21805 | } | |
21806 | if (state != TASK_RUNNING) | |
21807 | @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
21808 | ||
21809 | BUG_ON(tag >= pool->nr_tags); | |
21810 | ||
21811 | - local_irq_save(flags); | |
21812 | + local_lock_irqsave(irq_off_lock, flags); | |
21813 | tags = this_cpu_ptr(pool->tag_cpu); | |
21814 | ||
21815 | spin_lock(&tags->lock); | |
21816 | @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
21817 | spin_unlock(&pool->lock); | |
21818 | } | |
21819 | ||
21820 | - local_irq_restore(flags); | |
21821 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21822 | } | |
21823 | EXPORT_SYMBOL_GPL(percpu_ida_free); | |
21824 | ||
21825 | @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
21826 | struct percpu_ida_cpu *remote; | |
21827 | unsigned cpu, i, err = 0; | |
21828 | ||
21829 | - local_irq_save(flags); | |
21830 | + local_lock_irqsave(irq_off_lock, flags); | |
21831 | for_each_possible_cpu(cpu) { | |
21832 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | |
21833 | spin_lock(&remote->lock); | |
21834 | @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
21835 | } | |
21836 | spin_unlock(&pool->lock); | |
21837 | out: | |
21838 | - local_irq_restore(flags); | |
21839 | + local_unlock_irqrestore(irq_off_lock, flags); | |
21840 | return err; | |
21841 | } | |
21842 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | |
21843 | diff --git a/lib/radix-tree.c b/lib/radix-tree.c | |
21844 | index 8e6d552c40dd..741da5a77fd5 100644 | |
21845 | --- a/lib/radix-tree.c | |
21846 | +++ b/lib/radix-tree.c | |
21847 | @@ -36,7 +36,7 @@ | |
21848 | #include <linux/bitops.h> | |
21849 | #include <linux/rcupdate.h> | |
21850 | #include <linux/preempt.h> /* in_interrupt() */ | |
21851 | - | |
21852 | +#include <linux/locallock.h> | |
21853 | ||
21854 | /* Number of nodes in fully populated tree of given height */ | |
21855 | static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; | |
21856 | @@ -68,6 +68,7 @@ struct radix_tree_preload { | |
21857 | struct radix_tree_node *nodes; | |
21858 | }; | |
21859 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; | |
21860 | +static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock); | |
21861 | ||
21862 | static inline void *node_to_entry(void *ptr) | |
21863 | { | |
21864 | @@ -290,13 +291,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |
21865 | * succeed in getting a node here (and never reach | |
21866 | * kmem_cache_alloc) | |
21867 | */ | |
21868 | - rtp = this_cpu_ptr(&radix_tree_preloads); | |
21869 | + rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads); | |
21870 | if (rtp->nr) { | |
21871 | ret = rtp->nodes; | |
21872 | rtp->nodes = ret->private_data; | |
21873 | ret->private_data = NULL; | |
21874 | rtp->nr--; | |
21875 | } | |
21876 | + put_locked_var(radix_tree_preloads_lock, radix_tree_preloads); | |
21877 | /* | |
21878 | * Update the allocation stack trace as this is more useful | |
21879 | * for debugging. | |
21880 | @@ -357,14 +359,14 @@ static int __radix_tree_preload(gfp_t gfp_mask, int nr) | |
21881 | */ | |
21882 | gfp_mask &= ~__GFP_ACCOUNT; | |
21883 | ||
21884 | - preempt_disable(); | |
21885 | + local_lock(radix_tree_preloads_lock); | |
21886 | rtp = this_cpu_ptr(&radix_tree_preloads); | |
21887 | while (rtp->nr < nr) { | |
21888 | - preempt_enable(); | |
21889 | + local_unlock(radix_tree_preloads_lock); | |
21890 | node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); | |
21891 | if (node == NULL) | |
21892 | goto out; | |
21893 | - preempt_disable(); | |
21894 | + local_lock(radix_tree_preloads_lock); | |
21895 | rtp = this_cpu_ptr(&radix_tree_preloads); | |
21896 | if (rtp->nr < nr) { | |
21897 | node->private_data = rtp->nodes; | |
21898 | @@ -406,7 +408,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) | |
21899 | if (gfpflags_allow_blocking(gfp_mask)) | |
21900 | return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); | |
21901 | /* Preloading doesn't help anything with this gfp mask, skip it */ | |
21902 | - preempt_disable(); | |
21903 | + local_lock(radix_tree_preloads_lock); | |
21904 | return 0; | |
21905 | } | |
21906 | EXPORT_SYMBOL(radix_tree_maybe_preload); | |
21907 | @@ -422,7 +424,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |
21908 | ||
21909 | /* Preloading doesn't help anything with this gfp mask, skip it */ | |
21910 | if (!gfpflags_allow_blocking(gfp_mask)) { | |
21911 | - preempt_disable(); | |
21912 | + local_lock(radix_tree_preloads_lock); | |
21913 | return 0; | |
21914 | } | |
21915 | ||
21916 | @@ -456,6 +458,12 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |
21917 | return __radix_tree_preload(gfp_mask, nr_nodes); | |
21918 | } | |
21919 | ||
21920 | +void radix_tree_preload_end(void) | |
21921 | +{ | |
21922 | + local_unlock(radix_tree_preloads_lock); | |
21923 | +} | |
21924 | +EXPORT_SYMBOL(radix_tree_preload_end); | |
21925 | + | |
21926 | /* | |
21927 | * The maximum index which can be stored in a radix tree | |
21928 | */ | |
21929 | diff --git a/lib/scatterlist.c b/lib/scatterlist.c | |
21930 | index 004fc70fc56a..ccc46992a517 100644 | |
21931 | --- a/lib/scatterlist.c | |
21932 | +++ b/lib/scatterlist.c | |
21933 | @@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) | |
21934 | flush_kernel_dcache_page(miter->page); | |
21935 | ||
21936 | if (miter->__flags & SG_MITER_ATOMIC) { | |
21937 | - WARN_ON_ONCE(preemptible()); | |
21938 | + WARN_ON_ONCE(!pagefault_disabled()); | |
21939 | kunmap_atomic(miter->addr); | |
21940 | } else | |
21941 | kunmap(miter->page); | |
21942 | @@ -664,7 +664,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
21943 | if (!sg_miter_skip(&miter, skip)) | |
21944 | return false; | |
21945 | ||
21946 | - local_irq_save(flags); | |
21947 | + local_irq_save_nort(flags); | |
21948 | ||
21949 | while (sg_miter_next(&miter) && offset < buflen) { | |
21950 | unsigned int len; | |
21951 | @@ -681,7 +681,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
21952 | ||
21953 | sg_miter_stop(&miter); | |
21954 | ||
21955 | - local_irq_restore(flags); | |
21956 | + local_irq_restore_nort(flags); | |
21957 | return offset; | |
21958 | } | |
21959 | EXPORT_SYMBOL(sg_copy_buffer); | |
21960 | diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c | |
21961 | index 1afec32de6f2..11fa431046a8 100644 | |
21962 | --- a/lib/smp_processor_id.c | |
21963 | +++ b/lib/smp_processor_id.c | |
21964 | @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1, | |
21965 | if (!printk_ratelimit()) | |
21966 | goto out_enable; | |
21967 | ||
21968 | - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", | |
21969 | - what1, what2, preempt_count() - 1, current->comm, current->pid); | |
21970 | + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n", | |
21971 | + what1, what2, preempt_count() - 1, __migrate_disabled(current), | |
21972 | + current->comm, current->pid); | |
21973 | ||
21974 | print_symbol("caller is %s\n", (long)__builtin_return_address(0)); | |
21975 | dump_stack(); | |
21976 | diff --git a/localversion-rt b/localversion-rt | |
21977 | new file mode 100644 | |
21978 | index 000000000000..ad3da1bcab7e | |
21979 | --- /dev/null | |
21980 | +++ b/localversion-rt | |
21981 | @@ -0,0 +1 @@ | |
21982 | +-rt4 | |
21983 | diff --git a/mm/Kconfig b/mm/Kconfig | |
21984 | index 86e3e0e74d20..77e5862a1ed2 100644 | |
21985 | --- a/mm/Kconfig | |
21986 | +++ b/mm/Kconfig | |
21987 | @@ -410,7 +410,7 @@ config NOMMU_INITIAL_TRIM_EXCESS | |
21988 | ||
21989 | config TRANSPARENT_HUGEPAGE | |
21990 | bool "Transparent Hugepage Support" | |
21991 | - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE | |
21992 | + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL | |
21993 | select COMPACTION | |
21994 | select RADIX_TREE_MULTIORDER | |
21995 | help | |
21996 | diff --git a/mm/backing-dev.c b/mm/backing-dev.c | |
21997 | index 8fde443f36d7..d7a863b0ec20 100644 | |
21998 | --- a/mm/backing-dev.c | |
21999 | +++ b/mm/backing-dev.c | |
22000 | @@ -457,9 +457,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested) | |
22001 | { | |
22002 | unsigned long flags; | |
22003 | ||
22004 | - local_irq_save(flags); | |
22005 | + local_irq_save_nort(flags); | |
22006 | if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { | |
22007 | - local_irq_restore(flags); | |
22008 | + local_irq_restore_nort(flags); | |
22009 | return; | |
22010 | } | |
22011 | ||
22012 | diff --git a/mm/compaction.c b/mm/compaction.c | |
22013 | index 70e6bec46dc2..6678ed58b7c6 100644 | |
22014 | --- a/mm/compaction.c | |
22015 | +++ b/mm/compaction.c | |
22016 | @@ -1593,10 +1593,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro | |
22017 | block_start_pfn(cc->migrate_pfn, cc->order); | |
22018 | ||
22019 | if (cc->last_migrated_pfn < current_block_start) { | |
22020 | - cpu = get_cpu(); | |
22021 | + cpu = get_cpu_light(); | |
22022 | + local_lock_irq(swapvec_lock); | |
22023 | lru_add_drain_cpu(cpu); | |
22024 | + local_unlock_irq(swapvec_lock); | |
22025 | drain_local_pages(zone); | |
22026 | - put_cpu(); | |
22027 | + put_cpu_light(); | |
22028 | /* No more flushing until we migrate again */ | |
22029 | cc->last_migrated_pfn = 0; | |
22030 | } | |
22031 | diff --git a/mm/filemap.c b/mm/filemap.c | |
22032 | index 779801092ef1..554e1b4d0fc5 100644 | |
22033 | --- a/mm/filemap.c | |
22034 | +++ b/mm/filemap.c | |
22035 | @@ -159,9 +159,12 @@ static int page_cache_tree_insert(struct address_space *mapping, | |
22036 | * node->private_list is protected by | |
22037 | * mapping->tree_lock. | |
22038 | */ | |
22039 | - if (!list_empty(&node->private_list)) | |
22040 | - list_lru_del(&workingset_shadow_nodes, | |
22041 | + if (!list_empty(&node->private_list)) { | |
22042 | + local_lock(workingset_shadow_lock); | |
22043 | + list_lru_del(&__workingset_shadow_nodes, | |
22044 | &node->private_list); | |
22045 | + local_unlock(workingset_shadow_lock); | |
22046 | + } | |
22047 | } | |
22048 | return 0; | |
22049 | } | |
22050 | @@ -217,8 +220,10 @@ static void page_cache_tree_delete(struct address_space *mapping, | |
22051 | if (!dax_mapping(mapping) && !workingset_node_pages(node) && | |
22052 | list_empty(&node->private_list)) { | |
22053 | node->private_data = mapping; | |
22054 | - list_lru_add(&workingset_shadow_nodes, | |
22055 | - &node->private_list); | |
22056 | + local_lock(workingset_shadow_lock); | |
22057 | + list_lru_add(&__workingset_shadow_nodes, | |
22058 | + &node->private_list); | |
22059 | + local_unlock(workingset_shadow_lock); | |
22060 | } | |
22061 | } | |
22062 | ||
22063 | diff --git a/mm/highmem.c b/mm/highmem.c | |
22064 | index 50b4ca6787f0..77518a3b35a1 100644 | |
22065 | --- a/mm/highmem.c | |
22066 | +++ b/mm/highmem.c | |
22067 | @@ -29,10 +29,11 @@ | |
22068 | #include <linux/kgdb.h> | |
22069 | #include <asm/tlbflush.h> | |
22070 | ||
22071 | - | |
22072 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22073 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
22074 | DEFINE_PER_CPU(int, __kmap_atomic_idx); | |
22075 | #endif | |
22076 | +#endif | |
22077 | ||
22078 | /* | |
22079 | * Virtual_count is not a pure "count". | |
22080 | @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) | |
22081 | unsigned long totalhigh_pages __read_mostly; | |
22082 | EXPORT_SYMBOL(totalhigh_pages); | |
22083 | ||
22084 | - | |
22085 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22086 | EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); | |
22087 | +#endif | |
22088 | ||
22089 | unsigned int nr_free_highpages (void) | |
22090 | { | |
22091 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c | |
22092 | index d536a9daa511..70ac8827ee8c 100644 | |
22093 | --- a/mm/memcontrol.c | |
22094 | +++ b/mm/memcontrol.c | |
22095 | @@ -67,6 +67,7 @@ | |
22096 | #include <net/sock.h> | |
22097 | #include <net/ip.h> | |
22098 | #include "slab.h" | |
22099 | +#include <linux/locallock.h> | |
22100 | ||
22101 | #include <asm/uaccess.h> | |
22102 | ||
22103 | @@ -92,6 +93,8 @@ int do_swap_account __read_mostly; | |
22104 | #define do_swap_account 0 | |
22105 | #endif | |
22106 | ||
22107 | +static DEFINE_LOCAL_IRQ_LOCK(event_lock); | |
22108 | + | |
22109 | /* Whether legacy memory+swap accounting is active */ | |
22110 | static bool do_memsw_account(void) | |
22111 | { | |
22112 | @@ -1692,6 +1695,7 @@ struct memcg_stock_pcp { | |
22113 | #define FLUSHING_CACHED_CHARGE 0 | |
22114 | }; | |
22115 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); | |
22116 | +static DEFINE_LOCAL_IRQ_LOCK(memcg_stock_ll); | |
22117 | static DEFINE_MUTEX(percpu_charge_mutex); | |
22118 | ||
22119 | /** | |
22120 | @@ -1714,7 +1718,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
22121 | if (nr_pages > CHARGE_BATCH) | |
22122 | return ret; | |
22123 | ||
22124 | - local_irq_save(flags); | |
22125 | + local_lock_irqsave(memcg_stock_ll, flags); | |
22126 | ||
22127 | stock = this_cpu_ptr(&memcg_stock); | |
22128 | if (memcg == stock->cached && stock->nr_pages >= nr_pages) { | |
22129 | @@ -1722,7 +1726,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
22130 | ret = true; | |
22131 | } | |
22132 | ||
22133 | - local_irq_restore(flags); | |
22134 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
22135 | ||
22136 | return ret; | |
22137 | } | |
22138 | @@ -1749,13 +1753,13 @@ static void drain_local_stock(struct work_struct *dummy) | |
22139 | struct memcg_stock_pcp *stock; | |
22140 | unsigned long flags; | |
22141 | ||
22142 | - local_irq_save(flags); | |
22143 | + local_lock_irqsave(memcg_stock_ll, flags); | |
22144 | ||
22145 | stock = this_cpu_ptr(&memcg_stock); | |
22146 | drain_stock(stock); | |
22147 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); | |
22148 | ||
22149 | - local_irq_restore(flags); | |
22150 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
22151 | } | |
22152 | ||
22153 | /* | |
22154 | @@ -1767,7 +1771,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
22155 | struct memcg_stock_pcp *stock; | |
22156 | unsigned long flags; | |
22157 | ||
22158 | - local_irq_save(flags); | |
22159 | + local_lock_irqsave(memcg_stock_ll, flags); | |
22160 | ||
22161 | stock = this_cpu_ptr(&memcg_stock); | |
22162 | if (stock->cached != memcg) { /* reset if necessary */ | |
22163 | @@ -1776,7 +1780,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
22164 | } | |
22165 | stock->nr_pages += nr_pages; | |
22166 | ||
22167 | - local_irq_restore(flags); | |
22168 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
22169 | } | |
22170 | ||
22171 | /* | |
22172 | @@ -1792,7 +1796,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) | |
22173 | return; | |
22174 | /* Notify other cpus that system-wide "drain" is running */ | |
22175 | get_online_cpus(); | |
22176 | - curcpu = get_cpu(); | |
22177 | + curcpu = get_cpu_light(); | |
22178 | for_each_online_cpu(cpu) { | |
22179 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | |
22180 | struct mem_cgroup *memcg; | |
22181 | @@ -1809,7 +1813,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) | |
22182 | schedule_work_on(cpu, &stock->work); | |
22183 | } | |
22184 | } | |
22185 | - put_cpu(); | |
22186 | + put_cpu_light(); | |
22187 | put_online_cpus(); | |
22188 | mutex_unlock(&percpu_charge_mutex); | |
22189 | } | |
22190 | @@ -4548,12 +4552,12 @@ static int mem_cgroup_move_account(struct page *page, | |
22191 | ||
22192 | ret = 0; | |
22193 | ||
22194 | - local_irq_disable(); | |
22195 | + local_lock_irq(event_lock); | |
22196 | mem_cgroup_charge_statistics(to, page, compound, nr_pages); | |
22197 | memcg_check_events(to, page); | |
22198 | mem_cgroup_charge_statistics(from, page, compound, -nr_pages); | |
22199 | memcg_check_events(from, page); | |
22200 | - local_irq_enable(); | |
22201 | + local_unlock_irq(event_lock); | |
22202 | out_unlock: | |
22203 | unlock_page(page); | |
22204 | out: | |
22205 | @@ -5428,10 +5432,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, | |
22206 | ||
22207 | commit_charge(page, memcg, lrucare); | |
22208 | ||
22209 | - local_irq_disable(); | |
22210 | + local_lock_irq(event_lock); | |
22211 | mem_cgroup_charge_statistics(memcg, page, compound, nr_pages); | |
22212 | memcg_check_events(memcg, page); | |
22213 | - local_irq_enable(); | |
22214 | + local_unlock_irq(event_lock); | |
22215 | ||
22216 | if (do_memsw_account() && PageSwapCache(page)) { | |
22217 | swp_entry_t entry = { .val = page_private(page) }; | |
22218 | @@ -5487,14 +5491,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, | |
22219 | memcg_oom_recover(memcg); | |
22220 | } | |
22221 | ||
22222 | - local_irq_save(flags); | |
22223 | + local_lock_irqsave(event_lock, flags); | |
22224 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); | |
22225 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); | |
22226 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); | |
22227 | __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); | |
22228 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); | |
22229 | memcg_check_events(memcg, dummy_page); | |
22230 | - local_irq_restore(flags); | |
22231 | + local_unlock_irqrestore(event_lock, flags); | |
22232 | ||
22233 | if (!mem_cgroup_is_root(memcg)) | |
22234 | css_put_many(&memcg->css, nr_pages); | |
22235 | @@ -5649,10 +5653,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) | |
22236 | ||
22237 | commit_charge(newpage, memcg, false); | |
22238 | ||
22239 | - local_irq_save(flags); | |
22240 | + local_lock_irqsave(event_lock, flags); | |
22241 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); | |
22242 | memcg_check_events(memcg, newpage); | |
22243 | - local_irq_restore(flags); | |
22244 | + local_unlock_irqrestore(event_lock, flags); | |
22245 | } | |
22246 | ||
22247 | DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); | |
22248 | @@ -5832,6 +5836,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |
22249 | { | |
22250 | struct mem_cgroup *memcg, *swap_memcg; | |
22251 | unsigned short oldid; | |
22252 | + unsigned long flags; | |
22253 | ||
22254 | VM_BUG_ON_PAGE(PageLRU(page), page); | |
22255 | VM_BUG_ON_PAGE(page_count(page), page); | |
22256 | @@ -5872,12 +5877,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |
22257 | * important here to have the interrupts disabled because it is the | |
22258 | * only synchronisation we have for udpating the per-CPU variables. | |
22259 | */ | |
22260 | + local_lock_irqsave(event_lock, flags); | |
22261 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
22262 | VM_BUG_ON(!irqs_disabled()); | |
22263 | +#endif | |
22264 | mem_cgroup_charge_statistics(memcg, page, false, -1); | |
22265 | memcg_check_events(memcg, page); | |
22266 | ||
22267 | if (!mem_cgroup_is_root(memcg)) | |
22268 | css_put(&memcg->css); | |
22269 | + local_unlock_irqrestore(event_lock, flags); | |
22270 | } | |
22271 | ||
22272 | /* | |
22273 | diff --git a/mm/mmu_context.c b/mm/mmu_context.c | |
22274 | index 6f4d27c5bb32..5cd25c745a8f 100644 | |
22275 | --- a/mm/mmu_context.c | |
22276 | +++ b/mm/mmu_context.c | |
22277 | @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm) | |
22278 | struct task_struct *tsk = current; | |
22279 | ||
22280 | task_lock(tsk); | |
22281 | + preempt_disable_rt(); | |
22282 | active_mm = tsk->active_mm; | |
22283 | if (active_mm != mm) { | |
22284 | atomic_inc(&mm->mm_count); | |
22285 | @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm) | |
22286 | } | |
22287 | tsk->mm = mm; | |
22288 | switch_mm(active_mm, mm, tsk); | |
22289 | + preempt_enable_rt(); | |
22290 | task_unlock(tsk); | |
22291 | #ifdef finish_arch_post_lock_switch | |
22292 | finish_arch_post_lock_switch(); | |
22293 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
22294 | index 34ada718ef47..21f0dc3fe2aa 100644 | |
22295 | --- a/mm/page_alloc.c | |
22296 | +++ b/mm/page_alloc.c | |
22297 | @@ -61,6 +61,7 @@ | |
22298 | #include <linux/page_ext.h> | |
22299 | #include <linux/hugetlb.h> | |
22300 | #include <linux/sched/rt.h> | |
22301 | +#include <linux/locallock.h> | |
22302 | #include <linux/page_owner.h> | |
22303 | #include <linux/kthread.h> | |
22304 | #include <linux/memcontrol.h> | |
22305 | @@ -281,6 +282,18 @@ EXPORT_SYMBOL(nr_node_ids); | |
22306 | EXPORT_SYMBOL(nr_online_nodes); | |
22307 | #endif | |
22308 | ||
22309 | +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); | |
22310 | + | |
22311 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
22312 | +# define cpu_lock_irqsave(cpu, flags) \ | |
22313 | + local_lock_irqsave_on(pa_lock, flags, cpu) | |
22314 | +# define cpu_unlock_irqrestore(cpu, flags) \ | |
22315 | + local_unlock_irqrestore_on(pa_lock, flags, cpu) | |
22316 | +#else | |
22317 | +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) | |
22318 | +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) | |
22319 | +#endif | |
22320 | + | |
22321 | int page_group_by_mobility_disabled __read_mostly; | |
22322 | ||
22323 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | |
22324 | @@ -1072,7 +1085,7 @@ static bool bulkfree_pcp_prepare(struct page *page) | |
22325 | #endif /* CONFIG_DEBUG_VM */ | |
22326 | ||
22327 | /* | |
22328 | - * Frees a number of pages from the PCP lists | |
22329 | + * Frees a number of pages which have been collected from the pcp lists. | |
22330 | * Assumes all pages on list are in same zone, and of same order. | |
22331 | * count is the number of pages to free. | |
22332 | * | |
22333 | @@ -1083,19 +1096,58 @@ static bool bulkfree_pcp_prepare(struct page *page) | |
22334 | * pinned" detection logic. | |
22335 | */ | |
22336 | static void free_pcppages_bulk(struct zone *zone, int count, | |
22337 | - struct per_cpu_pages *pcp) | |
22338 | + struct list_head *list) | |
22339 | { | |
22340 | - int migratetype = 0; | |
22341 | - int batch_free = 0; | |
22342 | unsigned long nr_scanned; | |
22343 | bool isolated_pageblocks; | |
22344 | + unsigned long flags; | |
22345 | + | |
22346 | + spin_lock_irqsave(&zone->lock, flags); | |
22347 | ||
22348 | - spin_lock(&zone->lock); | |
22349 | isolated_pageblocks = has_isolate_pageblock(zone); | |
22350 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
22351 | if (nr_scanned) | |
22352 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
22353 | ||
22354 | + while (!list_empty(list)) { | |
22355 | + struct page *page; | |
22356 | + int mt; /* migratetype of the to-be-freed page */ | |
22357 | + | |
22358 | + page = list_first_entry(list, struct page, lru); | |
22359 | + /* must delete as __free_one_page list manipulates */ | |
22360 | + list_del(&page->lru); | |
22361 | + | |
22362 | + mt = get_pcppage_migratetype(page); | |
22363 | + /* MIGRATE_ISOLATE page should not go to pcplists */ | |
22364 | + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
22365 | + /* Pageblock could have been isolated meanwhile */ | |
22366 | + if (unlikely(isolated_pageblocks)) | |
22367 | + mt = get_pageblock_migratetype(page); | |
22368 | + | |
22369 | + if (bulkfree_pcp_prepare(page)) | |
22370 | + continue; | |
22371 | + | |
22372 | + __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
22373 | + trace_mm_page_pcpu_drain(page, 0, mt); | |
22374 | + count--; | |
22375 | + } | |
22376 | + WARN_ON(count != 0); | |
22377 | + spin_unlock_irqrestore(&zone->lock, flags); | |
22378 | +} | |
22379 | + | |
22380 | +/* | |
22381 | + * Moves a number of pages from the PCP lists to free list which | |
22382 | + * is freed outside of the locked region. | |
22383 | + * | |
22384 | + * Assumes all pages on list are in same zone, and of same order. | |
22385 | + * count is the number of pages to free. | |
22386 | + */ | |
22387 | +static void isolate_pcp_pages(int count, struct per_cpu_pages *src, | |
22388 | + struct list_head *dst) | |
22389 | +{ | |
22390 | + int migratetype = 0; | |
22391 | + int batch_free = 0; | |
22392 | + | |
22393 | while (count) { | |
22394 | struct page *page; | |
22395 | struct list_head *list; | |
22396 | @@ -1111,7 +1163,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |
22397 | batch_free++; | |
22398 | if (++migratetype == MIGRATE_PCPTYPES) | |
22399 | migratetype = 0; | |
22400 | - list = &pcp->lists[migratetype]; | |
22401 | + list = &src->lists[migratetype]; | |
22402 | } while (list_empty(list)); | |
22403 | ||
22404 | /* This is the only non-empty list. Free them all. */ | |
22405 | @@ -1119,27 +1171,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |
22406 | batch_free = count; | |
22407 | ||
22408 | do { | |
22409 | - int mt; /* migratetype of the to-be-freed page */ | |
22410 | - | |
22411 | page = list_last_entry(list, struct page, lru); | |
22412 | - /* must delete as __free_one_page list manipulates */ | |
22413 | list_del(&page->lru); | |
22414 | ||
22415 | - mt = get_pcppage_migratetype(page); | |
22416 | - /* MIGRATE_ISOLATE page should not go to pcplists */ | |
22417 | - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
22418 | - /* Pageblock could have been isolated meanwhile */ | |
22419 | - if (unlikely(isolated_pageblocks)) | |
22420 | - mt = get_pageblock_migratetype(page); | |
22421 | - | |
22422 | - if (bulkfree_pcp_prepare(page)) | |
22423 | - continue; | |
22424 | - | |
22425 | - __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
22426 | - trace_mm_page_pcpu_drain(page, 0, mt); | |
22427 | + list_add(&page->lru, dst); | |
22428 | } while (--count && --batch_free && !list_empty(list)); | |
22429 | } | |
22430 | - spin_unlock(&zone->lock); | |
22431 | } | |
22432 | ||
22433 | static void free_one_page(struct zone *zone, | |
22434 | @@ -1148,7 +1185,9 @@ static void free_one_page(struct zone *zone, | |
22435 | int migratetype) | |
22436 | { | |
22437 | unsigned long nr_scanned; | |
22438 | - spin_lock(&zone->lock); | |
22439 | + unsigned long flags; | |
22440 | + | |
22441 | + spin_lock_irqsave(&zone->lock, flags); | |
22442 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
22443 | if (nr_scanned) | |
22444 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
22445 | @@ -1158,7 +1197,7 @@ static void free_one_page(struct zone *zone, | |
22446 | migratetype = get_pfnblock_migratetype(page, pfn); | |
22447 | } | |
22448 | __free_one_page(page, pfn, zone, order, migratetype); | |
22449 | - spin_unlock(&zone->lock); | |
22450 | + spin_unlock_irqrestore(&zone->lock, flags); | |
22451 | } | |
22452 | ||
22453 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |
22454 | @@ -1244,10 +1283,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |
22455 | return; | |
22456 | ||
22457 | migratetype = get_pfnblock_migratetype(page, pfn); | |
22458 | - local_irq_save(flags); | |
22459 | + local_lock_irqsave(pa_lock, flags); | |
22460 | __count_vm_events(PGFREE, 1 << order); | |
22461 | free_one_page(page_zone(page), page, pfn, order, migratetype); | |
22462 | - local_irq_restore(flags); | |
22463 | + local_unlock_irqrestore(pa_lock, flags); | |
22464 | } | |
22465 | ||
22466 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) | |
22467 | @@ -2246,16 +2285,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |
22468 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |
22469 | { | |
22470 | unsigned long flags; | |
22471 | + LIST_HEAD(dst); | |
22472 | int to_drain, batch; | |
22473 | ||
22474 | - local_irq_save(flags); | |
22475 | + local_lock_irqsave(pa_lock, flags); | |
22476 | batch = READ_ONCE(pcp->batch); | |
22477 | to_drain = min(pcp->count, batch); | |
22478 | if (to_drain > 0) { | |
22479 | - free_pcppages_bulk(zone, to_drain, pcp); | |
22480 | + isolate_pcp_pages(to_drain, pcp, &dst); | |
22481 | pcp->count -= to_drain; | |
22482 | } | |
22483 | - local_irq_restore(flags); | |
22484 | + local_unlock_irqrestore(pa_lock, flags); | |
22485 | + free_pcppages_bulk(zone, to_drain, &dst); | |
22486 | } | |
22487 | #endif | |
22488 | ||
22489 | @@ -2271,16 +2312,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) | |
22490 | unsigned long flags; | |
22491 | struct per_cpu_pageset *pset; | |
22492 | struct per_cpu_pages *pcp; | |
22493 | + LIST_HEAD(dst); | |
22494 | + int count; | |
22495 | ||
22496 | - local_irq_save(flags); | |
22497 | + cpu_lock_irqsave(cpu, flags); | |
22498 | pset = per_cpu_ptr(zone->pageset, cpu); | |
22499 | ||
22500 | pcp = &pset->pcp; | |
22501 | - if (pcp->count) { | |
22502 | - free_pcppages_bulk(zone, pcp->count, pcp); | |
22503 | + count = pcp->count; | |
22504 | + if (count) { | |
22505 | + isolate_pcp_pages(count, pcp, &dst); | |
22506 | pcp->count = 0; | |
22507 | } | |
22508 | - local_irq_restore(flags); | |
22509 | + cpu_unlock_irqrestore(cpu, flags); | |
22510 | + if (count) | |
22511 | + free_pcppages_bulk(zone, count, &dst); | |
22512 | } | |
22513 | ||
22514 | /* | |
22515 | @@ -2366,8 +2412,17 @@ void drain_all_pages(struct zone *zone) | |
22516 | else | |
22517 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | |
22518 | } | |
22519 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
22520 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, | |
22521 | zone, 1); | |
22522 | +#else | |
22523 | + for_each_cpu(cpu, &cpus_with_pcps) { | |
22524 | + if (zone) | |
22525 | + drain_pages_zone(cpu, zone); | |
22526 | + else | |
22527 | + drain_pages(cpu); | |
22528 | + } | |
22529 | +#endif | |
22530 | } | |
22531 | ||
22532 | #ifdef CONFIG_HIBERNATION | |
22533 | @@ -2427,7 +2482,7 @@ void free_hot_cold_page(struct page *page, bool cold) | |
22534 | ||
22535 | migratetype = get_pfnblock_migratetype(page, pfn); | |
22536 | set_pcppage_migratetype(page, migratetype); | |
22537 | - local_irq_save(flags); | |
22538 | + local_lock_irqsave(pa_lock, flags); | |
22539 | __count_vm_event(PGFREE); | |
22540 | ||
22541 | /* | |
22542 | @@ -2453,12 +2508,17 @@ void free_hot_cold_page(struct page *page, bool cold) | |
22543 | pcp->count++; | |
22544 | if (pcp->count >= pcp->high) { | |
22545 | unsigned long batch = READ_ONCE(pcp->batch); | |
22546 | - free_pcppages_bulk(zone, batch, pcp); | |
22547 | + LIST_HEAD(dst); | |
22548 | + | |
22549 | + isolate_pcp_pages(batch, pcp, &dst); | |
22550 | pcp->count -= batch; | |
22551 | + local_unlock_irqrestore(pa_lock, flags); | |
22552 | + free_pcppages_bulk(zone, batch, &dst); | |
22553 | + return; | |
22554 | } | |
22555 | ||
22556 | out: | |
22557 | - local_irq_restore(flags); | |
22558 | + local_unlock_irqrestore(pa_lock, flags); | |
22559 | } | |
22560 | ||
22561 | /* | |
22562 | @@ -2600,7 +2660,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
22563 | struct per_cpu_pages *pcp; | |
22564 | struct list_head *list; | |
22565 | ||
22566 | - local_irq_save(flags); | |
22567 | + local_lock_irqsave(pa_lock, flags); | |
22568 | do { | |
22569 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | |
22570 | list = &pcp->lists[migratetype]; | |
22571 | @@ -2627,7 +2687,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
22572 | * allocate greater than order-1 page units with __GFP_NOFAIL. | |
22573 | */ | |
22574 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); | |
22575 | - spin_lock_irqsave(&zone->lock, flags); | |
22576 | + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); | |
22577 | ||
22578 | do { | |
22579 | page = NULL; | |
22580 | @@ -2639,22 +2699,24 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
22581 | if (!page) | |
22582 | page = __rmqueue(zone, order, migratetype); | |
22583 | } while (page && check_new_pages(page, order)); | |
22584 | - spin_unlock(&zone->lock); | |
22585 | - if (!page) | |
22586 | + if (!page) { | |
22587 | + spin_unlock(&zone->lock); | |
22588 | goto failed; | |
22589 | + } | |
22590 | __mod_zone_freepage_state(zone, -(1 << order), | |
22591 | get_pcppage_migratetype(page)); | |
22592 | + spin_unlock(&zone->lock); | |
22593 | } | |
22594 | ||
22595 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | |
22596 | zone_statistics(preferred_zone, zone, gfp_flags); | |
22597 | - local_irq_restore(flags); | |
22598 | + local_unlock_irqrestore(pa_lock, flags); | |
22599 | ||
22600 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | |
22601 | return page; | |
22602 | ||
22603 | failed: | |
22604 | - local_irq_restore(flags); | |
22605 | + local_unlock_irqrestore(pa_lock, flags); | |
22606 | return NULL; | |
22607 | } | |
22608 | ||
22609 | @@ -6505,7 +6567,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |
22610 | int cpu = (unsigned long)hcpu; | |
22611 | ||
22612 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
22613 | + local_lock_irq_on(swapvec_lock, cpu); | |
22614 | lru_add_drain_cpu(cpu); | |
22615 | + local_unlock_irq_on(swapvec_lock, cpu); | |
22616 | drain_pages(cpu); | |
22617 | ||
22618 | /* | |
22619 | @@ -6531,6 +6595,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |
22620 | void __init page_alloc_init(void) | |
22621 | { | |
22622 | hotcpu_notifier(page_alloc_cpu_notify, 0); | |
22623 | + local_irq_lock_init(pa_lock); | |
22624 | } | |
22625 | ||
22626 | /* | |
22627 | @@ -7359,7 +7424,7 @@ void zone_pcp_reset(struct zone *zone) | |
22628 | struct per_cpu_pageset *pset; | |
22629 | ||
22630 | /* avoid races with drain_pages() */ | |
22631 | - local_irq_save(flags); | |
22632 | + local_lock_irqsave(pa_lock, flags); | |
22633 | if (zone->pageset != &boot_pageset) { | |
22634 | for_each_online_cpu(cpu) { | |
22635 | pset = per_cpu_ptr(zone->pageset, cpu); | |
22636 | @@ -7368,7 +7433,7 @@ void zone_pcp_reset(struct zone *zone) | |
22637 | free_percpu(zone->pageset); | |
22638 | zone->pageset = &boot_pageset; | |
22639 | } | |
22640 | - local_irq_restore(flags); | |
22641 | + local_unlock_irqrestore(pa_lock, flags); | |
22642 | } | |
22643 | ||
22644 | #ifdef CONFIG_MEMORY_HOTREMOVE | |
22645 | diff --git a/mm/slab.h b/mm/slab.h | |
22646 | index bc05fdc3edce..610cf61634f0 100644 | |
22647 | --- a/mm/slab.h | |
22648 | +++ b/mm/slab.h | |
22649 | @@ -426,7 +426,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |
22650 | * The slab lists for all objects. | |
22651 | */ | |
22652 | struct kmem_cache_node { | |
22653 | +#ifdef CONFIG_SLUB | |
22654 | + raw_spinlock_t list_lock; | |
22655 | +#else | |
22656 | spinlock_t list_lock; | |
22657 | +#endif | |
22658 | ||
22659 | #ifdef CONFIG_SLAB | |
22660 | struct list_head slabs_partial; /* partial list first, better asm code */ | |
22661 | diff --git a/mm/slub.c b/mm/slub.c | |
22662 | index 2b3e740609e9..1732f9c5d31f 100644 | |
22663 | --- a/mm/slub.c | |
22664 | +++ b/mm/slub.c | |
22665 | @@ -1141,7 +1141,7 @@ static noinline int free_debug_processing( | |
22666 | unsigned long uninitialized_var(flags); | |
22667 | int ret = 0; | |
22668 | ||
22669 | - spin_lock_irqsave(&n->list_lock, flags); | |
22670 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22671 | slab_lock(page); | |
22672 | ||
22673 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | |
22674 | @@ -1176,7 +1176,7 @@ static noinline int free_debug_processing( | |
22675 | bulk_cnt, cnt); | |
22676 | ||
22677 | slab_unlock(page); | |
22678 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22679 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22680 | if (!ret) | |
22681 | slab_fix(s, "Object at 0x%p not freed", object); | |
22682 | return ret; | |
22683 | @@ -1304,6 +1304,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, | |
22684 | ||
22685 | #endif /* CONFIG_SLUB_DEBUG */ | |
22686 | ||
22687 | +struct slub_free_list { | |
22688 | + raw_spinlock_t lock; | |
22689 | + struct list_head list; | |
22690 | +}; | |
22691 | +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); | |
22692 | + | |
22693 | /* | |
22694 | * Hooks for other subsystems that check memory allocations. In a typical | |
22695 | * production configuration these hooks all should produce no code at all. | |
22696 | @@ -1523,10 +1529,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |
22697 | void *start, *p; | |
22698 | int idx, order; | |
22699 | bool shuffle; | |
22700 | + bool enableirqs = false; | |
22701 | ||
22702 | flags &= gfp_allowed_mask; | |
22703 | ||
22704 | if (gfpflags_allow_blocking(flags)) | |
22705 | + enableirqs = true; | |
22706 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
22707 | + if (system_state == SYSTEM_RUNNING) | |
22708 | + enableirqs = true; | |
22709 | +#endif | |
22710 | + if (enableirqs) | |
22711 | local_irq_enable(); | |
22712 | ||
22713 | flags |= s->allocflags; | |
22714 | @@ -1601,7 +1614,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |
22715 | page->frozen = 1; | |
22716 | ||
22717 | out: | |
22718 | - if (gfpflags_allow_blocking(flags)) | |
22719 | + if (enableirqs) | |
22720 | local_irq_disable(); | |
22721 | if (!page) | |
22722 | return NULL; | |
22723 | @@ -1660,6 +1673,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |
22724 | __free_pages(page, order); | |
22725 | } | |
22726 | ||
22727 | +static void free_delayed(struct list_head *h) | |
22728 | +{ | |
22729 | + while(!list_empty(h)) { | |
22730 | + struct page *page = list_first_entry(h, struct page, lru); | |
22731 | + | |
22732 | + list_del(&page->lru); | |
22733 | + __free_slab(page->slab_cache, page); | |
22734 | + } | |
22735 | +} | |
22736 | + | |
22737 | #define need_reserve_slab_rcu \ | |
22738 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | |
22739 | ||
22740 | @@ -1691,6 +1714,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) | |
22741 | } | |
22742 | ||
22743 | call_rcu(head, rcu_free_slab); | |
22744 | + } else if (irqs_disabled()) { | |
22745 | + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); | |
22746 | + | |
22747 | + raw_spin_lock(&f->lock); | |
22748 | + list_add(&page->lru, &f->list); | |
22749 | + raw_spin_unlock(&f->lock); | |
22750 | } else | |
22751 | __free_slab(s, page); | |
22752 | } | |
22753 | @@ -1798,7 +1827,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |
22754 | if (!n || !n->nr_partial) | |
22755 | return NULL; | |
22756 | ||
22757 | - spin_lock(&n->list_lock); | |
22758 | + raw_spin_lock(&n->list_lock); | |
22759 | list_for_each_entry_safe(page, page2, &n->partial, lru) { | |
22760 | void *t; | |
22761 | ||
22762 | @@ -1823,7 +1852,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |
22763 | break; | |
22764 | ||
22765 | } | |
22766 | - spin_unlock(&n->list_lock); | |
22767 | + raw_spin_unlock(&n->list_lock); | |
22768 | return object; | |
22769 | } | |
22770 | ||
22771 | @@ -2069,7 +2098,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, | |
22772 | * that acquire_slab() will see a slab page that | |
22773 | * is frozen | |
22774 | */ | |
22775 | - spin_lock(&n->list_lock); | |
22776 | + raw_spin_lock(&n->list_lock); | |
22777 | } | |
22778 | } else { | |
22779 | m = M_FULL; | |
22780 | @@ -2080,7 +2109,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, | |
22781 | * slabs from diagnostic functions will not see | |
22782 | * any frozen slabs. | |
22783 | */ | |
22784 | - spin_lock(&n->list_lock); | |
22785 | + raw_spin_lock(&n->list_lock); | |
22786 | } | |
22787 | } | |
22788 | ||
22789 | @@ -2115,7 +2144,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, | |
22790 | goto redo; | |
22791 | ||
22792 | if (lock) | |
22793 | - spin_unlock(&n->list_lock); | |
22794 | + raw_spin_unlock(&n->list_lock); | |
22795 | ||
22796 | if (m == M_FREE) { | |
22797 | stat(s, DEACTIVATE_EMPTY); | |
22798 | @@ -2147,10 +2176,10 @@ static void unfreeze_partials(struct kmem_cache *s, | |
22799 | n2 = get_node(s, page_to_nid(page)); | |
22800 | if (n != n2) { | |
22801 | if (n) | |
22802 | - spin_unlock(&n->list_lock); | |
22803 | + raw_spin_unlock(&n->list_lock); | |
22804 | ||
22805 | n = n2; | |
22806 | - spin_lock(&n->list_lock); | |
22807 | + raw_spin_lock(&n->list_lock); | |
22808 | } | |
22809 | ||
22810 | do { | |
22811 | @@ -2179,7 +2208,7 @@ static void unfreeze_partials(struct kmem_cache *s, | |
22812 | } | |
22813 | ||
22814 | if (n) | |
22815 | - spin_unlock(&n->list_lock); | |
22816 | + raw_spin_unlock(&n->list_lock); | |
22817 | ||
22818 | while (discard_page) { | |
22819 | page = discard_page; | |
22820 | @@ -2218,14 +2247,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |
22821 | pobjects = oldpage->pobjects; | |
22822 | pages = oldpage->pages; | |
22823 | if (drain && pobjects > s->cpu_partial) { | |
22824 | + struct slub_free_list *f; | |
22825 | unsigned long flags; | |
22826 | + LIST_HEAD(tofree); | |
22827 | /* | |
22828 | * partial array is full. Move the existing | |
22829 | * set to the per node partial list. | |
22830 | */ | |
22831 | local_irq_save(flags); | |
22832 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | |
22833 | + f = this_cpu_ptr(&slub_free_list); | |
22834 | + raw_spin_lock(&f->lock); | |
22835 | + list_splice_init(&f->list, &tofree); | |
22836 | + raw_spin_unlock(&f->lock); | |
22837 | local_irq_restore(flags); | |
22838 | + free_delayed(&tofree); | |
22839 | oldpage = NULL; | |
22840 | pobjects = 0; | |
22841 | pages = 0; | |
22842 | @@ -2297,7 +2333,22 @@ static bool has_cpu_slab(int cpu, void *info) | |
22843 | ||
22844 | static void flush_all(struct kmem_cache *s) | |
22845 | { | |
22846 | + LIST_HEAD(tofree); | |
22847 | + int cpu; | |
22848 | + | |
22849 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); | |
22850 | + for_each_online_cpu(cpu) { | |
22851 | + struct slub_free_list *f; | |
22852 | + | |
22853 | + if (!has_cpu_slab(cpu, s)) | |
22854 | + continue; | |
22855 | + | |
22856 | + f = &per_cpu(slub_free_list, cpu); | |
22857 | + raw_spin_lock_irq(&f->lock); | |
22858 | + list_splice_init(&f->list, &tofree); | |
22859 | + raw_spin_unlock_irq(&f->lock); | |
22860 | + free_delayed(&tofree); | |
22861 | + } | |
22862 | } | |
22863 | ||
22864 | /* | |
22865 | @@ -2352,10 +2403,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, | |
22866 | unsigned long x = 0; | |
22867 | struct page *page; | |
22868 | ||
22869 | - spin_lock_irqsave(&n->list_lock, flags); | |
22870 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22871 | list_for_each_entry(page, &n->partial, lru) | |
22872 | x += get_count(page); | |
22873 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22874 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22875 | return x; | |
22876 | } | |
22877 | #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ | |
22878 | @@ -2493,8 +2544,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) | |
22879 | * already disabled (which is the case for bulk allocation). | |
22880 | */ | |
22881 | static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
22882 | - unsigned long addr, struct kmem_cache_cpu *c) | |
22883 | + unsigned long addr, struct kmem_cache_cpu *c, | |
22884 | + struct list_head *to_free) | |
22885 | { | |
22886 | + struct slub_free_list *f; | |
22887 | void *freelist; | |
22888 | struct page *page; | |
22889 | ||
22890 | @@ -2554,6 +2607,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
22891 | VM_BUG_ON(!c->page->frozen); | |
22892 | c->freelist = get_freepointer(s, freelist); | |
22893 | c->tid = next_tid(c->tid); | |
22894 | + | |
22895 | +out: | |
22896 | + f = this_cpu_ptr(&slub_free_list); | |
22897 | + raw_spin_lock(&f->lock); | |
22898 | + list_splice_init(&f->list, to_free); | |
22899 | + raw_spin_unlock(&f->lock); | |
22900 | + | |
22901 | return freelist; | |
22902 | ||
22903 | new_slab: | |
22904 | @@ -2585,7 +2645,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
22905 | deactivate_slab(s, page, get_freepointer(s, freelist)); | |
22906 | c->page = NULL; | |
22907 | c->freelist = NULL; | |
22908 | - return freelist; | |
22909 | + goto out; | |
22910 | } | |
22911 | ||
22912 | /* | |
22913 | @@ -2597,6 +2657,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
22914 | { | |
22915 | void *p; | |
22916 | unsigned long flags; | |
22917 | + LIST_HEAD(tofree); | |
22918 | ||
22919 | local_irq_save(flags); | |
22920 | #ifdef CONFIG_PREEMPT | |
22921 | @@ -2608,8 +2669,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
22922 | c = this_cpu_ptr(s->cpu_slab); | |
22923 | #endif | |
22924 | ||
22925 | - p = ___slab_alloc(s, gfpflags, node, addr, c); | |
22926 | + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); | |
22927 | local_irq_restore(flags); | |
22928 | + free_delayed(&tofree); | |
22929 | return p; | |
22930 | } | |
22931 | ||
22932 | @@ -2795,7 +2857,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
22933 | ||
22934 | do { | |
22935 | if (unlikely(n)) { | |
22936 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22937 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22938 | n = NULL; | |
22939 | } | |
22940 | prior = page->freelist; | |
22941 | @@ -2827,7 +2889,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
22942 | * Otherwise the list_lock will synchronize with | |
22943 | * other processors updating the list of slabs. | |
22944 | */ | |
22945 | - spin_lock_irqsave(&n->list_lock, flags); | |
22946 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
22947 | ||
22948 | } | |
22949 | } | |
22950 | @@ -2869,7 +2931,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
22951 | add_partial(n, page, DEACTIVATE_TO_TAIL); | |
22952 | stat(s, FREE_ADD_PARTIAL); | |
22953 | } | |
22954 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22955 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22956 | return; | |
22957 | ||
22958 | slab_empty: | |
22959 | @@ -2884,7 +2946,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
22960 | remove_full(s, n, page); | |
22961 | } | |
22962 | ||
22963 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
22964 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
22965 | stat(s, FREE_SLAB); | |
22966 | discard_slab(s, page); | |
22967 | } | |
22968 | @@ -3089,6 +3151,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
22969 | void **p) | |
22970 | { | |
22971 | struct kmem_cache_cpu *c; | |
22972 | + LIST_HEAD(to_free); | |
22973 | int i; | |
22974 | ||
22975 | /* memcg and kmem_cache debug support */ | |
22976 | @@ -3112,7 +3175,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
22977 | * of re-populating per CPU c->freelist | |
22978 | */ | |
22979 | p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, | |
22980 | - _RET_IP_, c); | |
22981 | + _RET_IP_, c, &to_free); | |
22982 | if (unlikely(!p[i])) | |
22983 | goto error; | |
22984 | ||
22985 | @@ -3124,6 +3187,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
22986 | } | |
22987 | c->tid = next_tid(c->tid); | |
22988 | local_irq_enable(); | |
22989 | + free_delayed(&to_free); | |
22990 | ||
22991 | /* Clear memory outside IRQ disabled fastpath loop */ | |
22992 | if (unlikely(flags & __GFP_ZERO)) { | |
22993 | @@ -3271,7 +3335,7 @@ static void | |
22994 | init_kmem_cache_node(struct kmem_cache_node *n) | |
22995 | { | |
22996 | n->nr_partial = 0; | |
22997 | - spin_lock_init(&n->list_lock); | |
22998 | + raw_spin_lock_init(&n->list_lock); | |
22999 | INIT_LIST_HEAD(&n->partial); | |
23000 | #ifdef CONFIG_SLUB_DEBUG | |
23001 | atomic_long_set(&n->nr_slabs, 0); | |
23002 | @@ -3615,6 +3679,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, | |
23003 | const char *text) | |
23004 | { | |
23005 | #ifdef CONFIG_SLUB_DEBUG | |
23006 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
23007 | + /* XXX move out of irq-off section */ | |
23008 | + slab_err(s, page, text, s->name); | |
23009 | +#else | |
23010 | void *addr = page_address(page); | |
23011 | void *p; | |
23012 | unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * | |
23013 | @@ -3635,6 +3703,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, | |
23014 | slab_unlock(page); | |
23015 | kfree(map); | |
23016 | #endif | |
23017 | +#endif | |
23018 | } | |
23019 | ||
23020 | /* | |
23021 | @@ -3648,7 +3717,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |
23022 | struct page *page, *h; | |
23023 | ||
23024 | BUG_ON(irqs_disabled()); | |
23025 | - spin_lock_irq(&n->list_lock); | |
23026 | + raw_spin_lock_irq(&n->list_lock); | |
23027 | list_for_each_entry_safe(page, h, &n->partial, lru) { | |
23028 | if (!page->inuse) { | |
23029 | remove_partial(n, page); | |
23030 | @@ -3658,7 +3727,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |
23031 | "Objects remaining in %s on __kmem_cache_shutdown()"); | |
23032 | } | |
23033 | } | |
23034 | - spin_unlock_irq(&n->list_lock); | |
23035 | + raw_spin_unlock_irq(&n->list_lock); | |
23036 | ||
23037 | list_for_each_entry_safe(page, h, &discard, lru) | |
23038 | discard_slab(s, page); | |
23039 | @@ -3916,7 +3985,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |
23040 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) | |
23041 | INIT_LIST_HEAD(promote + i); | |
23042 | ||
23043 | - spin_lock_irqsave(&n->list_lock, flags); | |
23044 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
23045 | ||
23046 | /* | |
23047 | * Build lists of slabs to discard or promote. | |
23048 | @@ -3947,7 +4016,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |
23049 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) | |
23050 | list_splice(promote + i, &n->partial); | |
23051 | ||
23052 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
23053 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
23054 | ||
23055 | /* Release empty slabs */ | |
23056 | list_for_each_entry_safe(page, t, &discard, lru) | |
23057 | @@ -4123,6 +4192,12 @@ void __init kmem_cache_init(void) | |
23058 | { | |
23059 | static __initdata struct kmem_cache boot_kmem_cache, | |
23060 | boot_kmem_cache_node; | |
23061 | + int cpu; | |
23062 | + | |
23063 | + for_each_possible_cpu(cpu) { | |
23064 | + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); | |
23065 | + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); | |
23066 | + } | |
23067 | ||
23068 | if (debug_guardpage_minorder()) | |
23069 | slub_max_order = 0; | |
23070 | @@ -4331,7 +4406,7 @@ static int validate_slab_node(struct kmem_cache *s, | |
23071 | struct page *page; | |
23072 | unsigned long flags; | |
23073 | ||
23074 | - spin_lock_irqsave(&n->list_lock, flags); | |
23075 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
23076 | ||
23077 | list_for_each_entry(page, &n->partial, lru) { | |
23078 | validate_slab_slab(s, page, map); | |
23079 | @@ -4353,7 +4428,7 @@ static int validate_slab_node(struct kmem_cache *s, | |
23080 | s->name, count, atomic_long_read(&n->nr_slabs)); | |
23081 | ||
23082 | out: | |
23083 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
23084 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
23085 | return count; | |
23086 | } | |
23087 | ||
23088 | @@ -4541,12 +4616,12 @@ static int list_locations(struct kmem_cache *s, char *buf, | |
23089 | if (!atomic_long_read(&n->nr_slabs)) | |
23090 | continue; | |
23091 | ||
23092 | - spin_lock_irqsave(&n->list_lock, flags); | |
23093 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
23094 | list_for_each_entry(page, &n->partial, lru) | |
23095 | process_slab(&t, s, page, alloc, map); | |
23096 | list_for_each_entry(page, &n->full, lru) | |
23097 | process_slab(&t, s, page, alloc, map); | |
23098 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
23099 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
23100 | } | |
23101 | ||
23102 | for (i = 0; i < t.count; i++) { | |
23103 | diff --git a/mm/swap.c b/mm/swap.c | |
23104 | index 4dcf852e1e6d..69c3a5b24060 100644 | |
23105 | --- a/mm/swap.c | |
23106 | +++ b/mm/swap.c | |
23107 | @@ -32,6 +32,7 @@ | |
23108 | #include <linux/memcontrol.h> | |
23109 | #include <linux/gfp.h> | |
23110 | #include <linux/uio.h> | |
23111 | +#include <linux/locallock.h> | |
23112 | #include <linux/hugetlb.h> | |
23113 | #include <linux/page_idle.h> | |
23114 | ||
23115 | @@ -50,6 +51,8 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); | |
23116 | #ifdef CONFIG_SMP | |
23117 | static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); | |
23118 | #endif | |
23119 | +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); | |
23120 | +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock); | |
23121 | ||
23122 | /* | |
23123 | * This path almost never happens for VM activity - pages are normally | |
23124 | @@ -240,11 +243,11 @@ void rotate_reclaimable_page(struct page *page) | |
23125 | unsigned long flags; | |
23126 | ||
23127 | get_page(page); | |
23128 | - local_irq_save(flags); | |
23129 | + local_lock_irqsave(rotate_lock, flags); | |
23130 | pvec = this_cpu_ptr(&lru_rotate_pvecs); | |
23131 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
23132 | pagevec_move_tail(pvec); | |
23133 | - local_irq_restore(flags); | |
23134 | + local_unlock_irqrestore(rotate_lock, flags); | |
23135 | } | |
23136 | } | |
23137 | ||
23138 | @@ -294,12 +297,13 @@ void activate_page(struct page *page) | |
23139 | { | |
23140 | page = compound_head(page); | |
23141 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
23142 | - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
23143 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
23144 | + activate_page_pvecs); | |
23145 | ||
23146 | get_page(page); | |
23147 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
23148 | pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
23149 | - put_cpu_var(activate_page_pvecs); | |
23150 | + put_locked_var(swapvec_lock, activate_page_pvecs); | |
23151 | } | |
23152 | } | |
23153 | ||
23154 | @@ -326,7 +330,7 @@ void activate_page(struct page *page) | |
23155 | ||
23156 | static void __lru_cache_activate_page(struct page *page) | |
23157 | { | |
23158 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
23159 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
23160 | int i; | |
23161 | ||
23162 | /* | |
23163 | @@ -348,7 +352,7 @@ static void __lru_cache_activate_page(struct page *page) | |
23164 | } | |
23165 | } | |
23166 | ||
23167 | - put_cpu_var(lru_add_pvec); | |
23168 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
23169 | } | |
23170 | ||
23171 | /* | |
23172 | @@ -390,12 +394,12 @@ EXPORT_SYMBOL(mark_page_accessed); | |
23173 | ||
23174 | static void __lru_cache_add(struct page *page) | |
23175 | { | |
23176 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
23177 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
23178 | ||
23179 | get_page(page); | |
23180 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
23181 | __pagevec_lru_add(pvec); | |
23182 | - put_cpu_var(lru_add_pvec); | |
23183 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
23184 | } | |
23185 | ||
23186 | /** | |
23187 | @@ -593,9 +597,15 @@ void lru_add_drain_cpu(int cpu) | |
23188 | unsigned long flags; | |
23189 | ||
23190 | /* No harm done if a racing interrupt already did this */ | |
23191 | - local_irq_save(flags); | |
23192 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
23193 | + local_lock_irqsave_on(rotate_lock, flags, cpu); | |
23194 | pagevec_move_tail(pvec); | |
23195 | - local_irq_restore(flags); | |
23196 | + local_unlock_irqrestore_on(rotate_lock, flags, cpu); | |
23197 | +#else | |
23198 | + local_lock_irqsave(rotate_lock, flags); | |
23199 | + pagevec_move_tail(pvec); | |
23200 | + local_unlock_irqrestore(rotate_lock, flags); | |
23201 | +#endif | |
23202 | } | |
23203 | ||
23204 | pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); | |
23205 | @@ -627,11 +637,12 @@ void deactivate_file_page(struct page *page) | |
23206 | return; | |
23207 | ||
23208 | if (likely(get_page_unless_zero(page))) { | |
23209 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); | |
23210 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
23211 | + lru_deactivate_file_pvecs); | |
23212 | ||
23213 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
23214 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); | |
23215 | - put_cpu_var(lru_deactivate_file_pvecs); | |
23216 | + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs); | |
23217 | } | |
23218 | } | |
23219 | ||
23220 | @@ -646,27 +657,31 @@ void deactivate_file_page(struct page *page) | |
23221 | void deactivate_page(struct page *page) | |
23222 | { | |
23223 | if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { | |
23224 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); | |
23225 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
23226 | + lru_deactivate_pvecs); | |
23227 | ||
23228 | get_page(page); | |
23229 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
23230 | pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); | |
23231 | - put_cpu_var(lru_deactivate_pvecs); | |
23232 | + put_locked_var(swapvec_lock, lru_deactivate_pvecs); | |
23233 | } | |
23234 | } | |
23235 | ||
23236 | void lru_add_drain(void) | |
23237 | { | |
23238 | - lru_add_drain_cpu(get_cpu()); | |
23239 | - put_cpu(); | |
23240 | + lru_add_drain_cpu(local_lock_cpu(swapvec_lock)); | |
23241 | + local_unlock_cpu(swapvec_lock); | |
23242 | } | |
23243 | ||
23244 | -static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
23245 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
23246 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
23247 | { | |
23248 | - lru_add_drain(); | |
23249 | + local_lock_on(swapvec_lock, cpu); | |
23250 | + lru_add_drain_cpu(cpu); | |
23251 | + local_unlock_on(swapvec_lock, cpu); | |
23252 | } | |
23253 | ||
23254 | -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
23255 | +#else | |
23256 | ||
23257 | /* | |
23258 | * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM | |
23259 | @@ -686,6 +701,22 @@ static int __init lru_init(void) | |
23260 | } | |
23261 | early_initcall(lru_init); | |
23262 | ||
23263 | +static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
23264 | +{ | |
23265 | + lru_add_drain(); | |
23266 | +} | |
23267 | + | |
23268 | +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
23269 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
23270 | +{ | |
23271 | + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
23272 | + | |
23273 | + INIT_WORK(work, lru_add_drain_per_cpu); | |
23274 | + queue_work_on(cpu, lru_add_drain_wq, work); | |
23275 | + cpumask_set_cpu(cpu, has_work); | |
23276 | +} | |
23277 | +#endif | |
23278 | + | |
23279 | void lru_add_drain_all(void) | |
23280 | { | |
23281 | static DEFINE_MUTEX(lock); | |
23282 | @@ -697,21 +728,18 @@ void lru_add_drain_all(void) | |
23283 | cpumask_clear(&has_work); | |
23284 | ||
23285 | for_each_online_cpu(cpu) { | |
23286 | - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
23287 | - | |
23288 | if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || | |
23289 | pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || | |
23290 | pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || | |
23291 | pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || | |
23292 | - need_activate_page_drain(cpu)) { | |
23293 | - INIT_WORK(work, lru_add_drain_per_cpu); | |
23294 | - queue_work_on(cpu, lru_add_drain_wq, work); | |
23295 | - cpumask_set_cpu(cpu, &has_work); | |
23296 | - } | |
23297 | + need_activate_page_drain(cpu)) | |
23298 | + remote_lru_add_drain(cpu, &has_work); | |
23299 | } | |
23300 | ||
23301 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
23302 | for_each_cpu(cpu, &has_work) | |
23303 | flush_work(&per_cpu(lru_add_drain_work, cpu)); | |
23304 | +#endif | |
23305 | ||
23306 | put_online_cpus(); | |
23307 | mutex_unlock(&lock); | |
23308 | diff --git a/mm/truncate.c b/mm/truncate.c | |
23309 | index 8d8c62d89e6d..5bf1bd25d077 100644 | |
23310 | --- a/mm/truncate.c | |
23311 | +++ b/mm/truncate.c | |
23312 | @@ -62,9 +62,12 @@ static void clear_exceptional_entry(struct address_space *mapping, | |
23313 | * protected by mapping->tree_lock. | |
23314 | */ | |
23315 | if (!workingset_node_shadows(node) && | |
23316 | - !list_empty(&node->private_list)) | |
23317 | - list_lru_del(&workingset_shadow_nodes, | |
23318 | + !list_empty(&node->private_list)) { | |
23319 | + local_lock(workingset_shadow_lock); | |
23320 | + list_lru_del(&__workingset_shadow_nodes, | |
23321 | &node->private_list); | |
23322 | + local_unlock(workingset_shadow_lock); | |
23323 | + } | |
23324 | __radix_tree_delete_node(&mapping->page_tree, node); | |
23325 | unlock: | |
23326 | spin_unlock_irq(&mapping->tree_lock); | |
23327 | diff --git a/mm/vmalloc.c b/mm/vmalloc.c | |
23328 | index f2481cb4e6b2..db4de08fa97c 100644 | |
23329 | --- a/mm/vmalloc.c | |
23330 | +++ b/mm/vmalloc.c | |
23331 | @@ -845,7 +845,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
23332 | struct vmap_block *vb; | |
23333 | struct vmap_area *va; | |
23334 | unsigned long vb_idx; | |
23335 | - int node, err; | |
23336 | + int node, err, cpu; | |
23337 | void *vaddr; | |
23338 | ||
23339 | node = numa_node_id(); | |
23340 | @@ -888,11 +888,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
23341 | BUG_ON(err); | |
23342 | radix_tree_preload_end(); | |
23343 | ||
23344 | - vbq = &get_cpu_var(vmap_block_queue); | |
23345 | + cpu = get_cpu_light(); | |
23346 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
23347 | spin_lock(&vbq->lock); | |
23348 | list_add_tail_rcu(&vb->free_list, &vbq->free); | |
23349 | spin_unlock(&vbq->lock); | |
23350 | - put_cpu_var(vmap_block_queue); | |
23351 | + put_cpu_light(); | |
23352 | ||
23353 | return vaddr; | |
23354 | } | |
23355 | @@ -961,6 +962,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
23356 | struct vmap_block *vb; | |
23357 | void *vaddr = NULL; | |
23358 | unsigned int order; | |
23359 | + int cpu; | |
23360 | ||
23361 | BUG_ON(offset_in_page(size)); | |
23362 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | |
23363 | @@ -975,7 +977,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
23364 | order = get_order(size); | |
23365 | ||
23366 | rcu_read_lock(); | |
23367 | - vbq = &get_cpu_var(vmap_block_queue); | |
23368 | + cpu = get_cpu_light(); | |
23369 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
23370 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | |
23371 | unsigned long pages_off; | |
23372 | ||
23373 | @@ -998,7 +1001,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
23374 | break; | |
23375 | } | |
23376 | ||
23377 | - put_cpu_var(vmap_block_queue); | |
23378 | + put_cpu_light(); | |
23379 | rcu_read_unlock(); | |
23380 | ||
23381 | /* Allocate new block if nothing was found */ | |
23382 | diff --git a/mm/vmstat.c b/mm/vmstat.c | |
23383 | index 604f26a4f696..312006d2db50 100644 | |
23384 | --- a/mm/vmstat.c | |
23385 | +++ b/mm/vmstat.c | |
23386 | @@ -245,6 +245,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
23387 | long x; | |
23388 | long t; | |
23389 | ||
23390 | + preempt_disable_rt(); | |
23391 | x = delta + __this_cpu_read(*p); | |
23392 | ||
23393 | t = __this_cpu_read(pcp->stat_threshold); | |
23394 | @@ -254,6 +255,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
23395 | x = 0; | |
23396 | } | |
23397 | __this_cpu_write(*p, x); | |
23398 | + preempt_enable_rt(); | |
23399 | } | |
23400 | EXPORT_SYMBOL(__mod_zone_page_state); | |
23401 | ||
23402 | @@ -265,6 +267,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
23403 | long x; | |
23404 | long t; | |
23405 | ||
23406 | + preempt_disable_rt(); | |
23407 | x = delta + __this_cpu_read(*p); | |
23408 | ||
23409 | t = __this_cpu_read(pcp->stat_threshold); | |
23410 | @@ -274,6 +277,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
23411 | x = 0; | |
23412 | } | |
23413 | __this_cpu_write(*p, x); | |
23414 | + preempt_enable_rt(); | |
23415 | } | |
23416 | EXPORT_SYMBOL(__mod_node_page_state); | |
23417 | ||
23418 | @@ -306,6 +310,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
23419 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
23420 | s8 v, t; | |
23421 | ||
23422 | + preempt_disable_rt(); | |
23423 | v = __this_cpu_inc_return(*p); | |
23424 | t = __this_cpu_read(pcp->stat_threshold); | |
23425 | if (unlikely(v > t)) { | |
23426 | @@ -314,6 +319,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
23427 | zone_page_state_add(v + overstep, zone, item); | |
23428 | __this_cpu_write(*p, -overstep); | |
23429 | } | |
23430 | + preempt_enable_rt(); | |
23431 | } | |
23432 | ||
23433 | void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
23434 | @@ -322,6 +328,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
23435 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
23436 | s8 v, t; | |
23437 | ||
23438 | + preempt_disable_rt(); | |
23439 | v = __this_cpu_inc_return(*p); | |
23440 | t = __this_cpu_read(pcp->stat_threshold); | |
23441 | if (unlikely(v > t)) { | |
23442 | @@ -330,6 +337,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
23443 | node_page_state_add(v + overstep, pgdat, item); | |
23444 | __this_cpu_write(*p, -overstep); | |
23445 | } | |
23446 | + preempt_enable_rt(); | |
23447 | } | |
23448 | ||
23449 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
23450 | @@ -350,6 +358,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
23451 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
23452 | s8 v, t; | |
23453 | ||
23454 | + preempt_disable_rt(); | |
23455 | v = __this_cpu_dec_return(*p); | |
23456 | t = __this_cpu_read(pcp->stat_threshold); | |
23457 | if (unlikely(v < - t)) { | |
23458 | @@ -358,6 +367,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
23459 | zone_page_state_add(v - overstep, zone, item); | |
23460 | __this_cpu_write(*p, overstep); | |
23461 | } | |
23462 | + preempt_enable_rt(); | |
23463 | } | |
23464 | ||
23465 | void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
23466 | @@ -366,6 +376,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
23467 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
23468 | s8 v, t; | |
23469 | ||
23470 | + preempt_disable_rt(); | |
23471 | v = __this_cpu_dec_return(*p); | |
23472 | t = __this_cpu_read(pcp->stat_threshold); | |
23473 | if (unlikely(v < - t)) { | |
23474 | @@ -374,6 +385,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
23475 | node_page_state_add(v - overstep, pgdat, item); | |
23476 | __this_cpu_write(*p, overstep); | |
23477 | } | |
23478 | + preempt_enable_rt(); | |
23479 | } | |
23480 | ||
23481 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
23482 | diff --git a/mm/workingset.c b/mm/workingset.c | |
23483 | index fb1f9183d89a..7e6ef1a48cd3 100644 | |
23484 | --- a/mm/workingset.c | |
23485 | +++ b/mm/workingset.c | |
23486 | @@ -334,7 +334,8 @@ void workingset_activation(struct page *page) | |
23487 | * point where they would still be useful. | |
23488 | */ | |
23489 | ||
23490 | -struct list_lru workingset_shadow_nodes; | |
23491 | +struct list_lru __workingset_shadow_nodes; | |
23492 | +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
23493 | ||
23494 | static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
23495 | struct shrink_control *sc) | |
23496 | @@ -344,9 +345,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
23497 | unsigned long pages; | |
23498 | ||
23499 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
23500 | - local_irq_disable(); | |
23501 | - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); | |
23502 | - local_irq_enable(); | |
23503 | + local_lock_irq(workingset_shadow_lock); | |
23504 | + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc); | |
23505 | + local_unlock_irq(workingset_shadow_lock); | |
23506 | ||
23507 | if (sc->memcg) { | |
23508 | pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, | |
23509 | @@ -438,9 +439,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |
23510 | spin_unlock(&mapping->tree_lock); | |
23511 | ret = LRU_REMOVED_RETRY; | |
23512 | out: | |
23513 | - local_irq_enable(); | |
23514 | + local_unlock_irq(workingset_shadow_lock); | |
23515 | cond_resched(); | |
23516 | - local_irq_disable(); | |
23517 | + local_lock_irq(workingset_shadow_lock); | |
23518 | spin_lock(lru_lock); | |
23519 | return ret; | |
23520 | } | |
23521 | @@ -451,10 +452,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, | |
23522 | unsigned long ret; | |
23523 | ||
23524 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
23525 | - local_irq_disable(); | |
23526 | - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc, | |
23527 | + local_lock_irq(workingset_shadow_lock); | |
23528 | + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc, | |
23529 | shadow_lru_isolate, NULL); | |
23530 | - local_irq_enable(); | |
23531 | + local_unlock_irq(workingset_shadow_lock); | |
23532 | return ret; | |
23533 | } | |
23534 | ||
23535 | @@ -492,7 +493,7 @@ static int __init workingset_init(void) | |
23536 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | |
23537 | timestamp_bits, max_order, bucket_order); | |
23538 | ||
23539 | - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | |
23540 | + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key); | |
23541 | if (ret) | |
23542 | goto err; | |
23543 | ret = register_shrinker(&workingset_shadow_shrinker); | |
23544 | @@ -500,7 +501,7 @@ static int __init workingset_init(void) | |
23545 | goto err_list_lru; | |
23546 | return 0; | |
23547 | err_list_lru: | |
23548 | - list_lru_destroy(&workingset_shadow_nodes); | |
23549 | + list_lru_destroy(&__workingset_shadow_nodes); | |
23550 | err: | |
23551 | return ret; | |
23552 | } | |
23553 | diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c | |
23554 | index b0bc023d25c5..5af6426fbcbe 100644 | |
23555 | --- a/mm/zsmalloc.c | |
23556 | +++ b/mm/zsmalloc.c | |
23557 | @@ -53,6 +53,7 @@ | |
23558 | #include <linux/mount.h> | |
23559 | #include <linux/migrate.h> | |
23560 | #include <linux/pagemap.h> | |
23561 | +#include <linux/locallock.h> | |
23562 | ||
23563 | #define ZSPAGE_MAGIC 0x58 | |
23564 | ||
23565 | @@ -70,9 +71,22 @@ | |
23566 | */ | |
23567 | #define ZS_MAX_ZSPAGE_ORDER 2 | |
23568 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) | |
23569 | - | |
23570 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) | |
23571 | ||
23572 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23573 | + | |
23574 | +struct zsmalloc_handle { | |
23575 | + unsigned long addr; | |
23576 | + struct mutex lock; | |
23577 | +}; | |
23578 | + | |
23579 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) | |
23580 | + | |
23581 | +#else | |
23582 | + | |
23583 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) | |
23584 | +#endif | |
23585 | + | |
23586 | /* | |
23587 | * Object location (<PFN>, <obj_idx>) is encoded as | |
23588 | * as single (unsigned long) handle value. | |
23589 | @@ -327,7 +341,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} | |
23590 | ||
23591 | static int create_cache(struct zs_pool *pool) | |
23592 | { | |
23593 | - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, | |
23594 | + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, | |
23595 | 0, 0, NULL); | |
23596 | if (!pool->handle_cachep) | |
23597 | return 1; | |
23598 | @@ -351,10 +365,27 @@ static void destroy_cache(struct zs_pool *pool) | |
23599 | ||
23600 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) | |
23601 | { | |
23602 | - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, | |
23603 | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
23604 | + void *p; | |
23605 | + | |
23606 | + p = kmem_cache_alloc(pool->handle_cachep, | |
23607 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
23608 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23609 | + if (p) { | |
23610 | + struct zsmalloc_handle *zh = p; | |
23611 | + | |
23612 | + mutex_init(&zh->lock); | |
23613 | + } | |
23614 | +#endif | |
23615 | + return (unsigned long)p; | |
23616 | } | |
23617 | ||
23618 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23619 | +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) | |
23620 | +{ | |
23621 | + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); | |
23622 | +} | |
23623 | +#endif | |
23624 | + | |
23625 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) | |
23626 | { | |
23627 | kmem_cache_free(pool->handle_cachep, (void *)handle); | |
23628 | @@ -373,12 +404,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) | |
23629 | ||
23630 | static void record_obj(unsigned long handle, unsigned long obj) | |
23631 | { | |
23632 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23633 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23634 | + | |
23635 | + WRITE_ONCE(zh->addr, obj); | |
23636 | +#else | |
23637 | /* | |
23638 | * lsb of @obj represents handle lock while other bits | |
23639 | * represent object value the handle is pointing so | |
23640 | * updating shouldn't do store tearing. | |
23641 | */ | |
23642 | WRITE_ONCE(*(unsigned long *)handle, obj); | |
23643 | +#endif | |
23644 | } | |
23645 | ||
23646 | /* zpool driver */ | |
23647 | @@ -467,6 +504,7 @@ MODULE_ALIAS("zpool-zsmalloc"); | |
23648 | ||
23649 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ | |
23650 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); | |
23651 | +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); | |
23652 | ||
23653 | static bool is_zspage_isolated(struct zspage *zspage) | |
23654 | { | |
23655 | @@ -902,7 +940,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) | |
23656 | ||
23657 | static unsigned long handle_to_obj(unsigned long handle) | |
23658 | { | |
23659 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23660 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23661 | + | |
23662 | + return zh->addr; | |
23663 | +#else | |
23664 | return *(unsigned long *)handle; | |
23665 | +#endif | |
23666 | } | |
23667 | ||
23668 | static unsigned long obj_to_head(struct page *page, void *obj) | |
23669 | @@ -916,22 +960,46 @@ static unsigned long obj_to_head(struct page *page, void *obj) | |
23670 | ||
23671 | static inline int testpin_tag(unsigned long handle) | |
23672 | { | |
23673 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23674 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23675 | + | |
23676 | + return mutex_is_locked(&zh->lock); | |
23677 | +#else | |
23678 | return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23679 | +#endif | |
23680 | } | |
23681 | ||
23682 | static inline int trypin_tag(unsigned long handle) | |
23683 | { | |
23684 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23685 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23686 | + | |
23687 | + return mutex_trylock(&zh->lock); | |
23688 | +#else | |
23689 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23690 | +#endif | |
23691 | } | |
23692 | ||
23693 | static void pin_tag(unsigned long handle) | |
23694 | { | |
23695 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23696 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23697 | + | |
23698 | + return mutex_lock(&zh->lock); | |
23699 | +#else | |
23700 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23701 | +#endif | |
23702 | } | |
23703 | ||
23704 | static void unpin_tag(unsigned long handle) | |
23705 | { | |
23706 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23707 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
23708 | + | |
23709 | + return mutex_unlock(&zh->lock); | |
23710 | +#else | |
23711 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
23712 | +#endif | |
23713 | } | |
23714 | ||
23715 | static void reset_page(struct page *page) | |
23716 | @@ -1423,7 +1491,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |
23717 | class = pool->size_class[class_idx]; | |
23718 | off = (class->size * obj_idx) & ~PAGE_MASK; | |
23719 | ||
23720 | - area = &get_cpu_var(zs_map_area); | |
23721 | + area = &get_locked_var(zs_map_area_lock, zs_map_area); | |
23722 | area->vm_mm = mm; | |
23723 | if (off + class->size <= PAGE_SIZE) { | |
23724 | /* this object is contained entirely within a page */ | |
23725 | @@ -1477,7 +1545,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | |
23726 | ||
23727 | __zs_unmap_object(area, pages, off, class->size); | |
23728 | } | |
23729 | - put_cpu_var(zs_map_area); | |
23730 | + put_locked_var(zs_map_area_lock, zs_map_area); | |
23731 | ||
23732 | migrate_read_unlock(zspage); | |
23733 | unpin_tag(handle); | |
23734 | diff --git a/net/core/dev.c b/net/core/dev.c | |
23735 | index e1d731fdc72c..6ab4b7863755 100644 | |
23736 | --- a/net/core/dev.c | |
23737 | +++ b/net/core/dev.c | |
23738 | @@ -190,6 +190,7 @@ static unsigned int napi_gen_id = NR_CPUS; | |
23739 | static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); | |
23740 | ||
23741 | static seqcount_t devnet_rename_seq; | |
23742 | +static DEFINE_MUTEX(devnet_rename_mutex); | |
23743 | ||
23744 | static inline void dev_base_seq_inc(struct net *net) | |
23745 | { | |
23746 | @@ -211,14 +212,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |
23747 | static inline void rps_lock(struct softnet_data *sd) | |
23748 | { | |
23749 | #ifdef CONFIG_RPS | |
23750 | - spin_lock(&sd->input_pkt_queue.lock); | |
23751 | + raw_spin_lock(&sd->input_pkt_queue.raw_lock); | |
23752 | #endif | |
23753 | } | |
23754 | ||
23755 | static inline void rps_unlock(struct softnet_data *sd) | |
23756 | { | |
23757 | #ifdef CONFIG_RPS | |
23758 | - spin_unlock(&sd->input_pkt_queue.lock); | |
23759 | + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); | |
23760 | #endif | |
23761 | } | |
23762 | ||
23763 | @@ -888,7 +889,8 @@ int netdev_get_name(struct net *net, char *name, int ifindex) | |
23764 | strcpy(name, dev->name); | |
23765 | rcu_read_unlock(); | |
23766 | if (read_seqcount_retry(&devnet_rename_seq, seq)) { | |
23767 | - cond_resched(); | |
23768 | + mutex_lock(&devnet_rename_mutex); | |
23769 | + mutex_unlock(&devnet_rename_mutex); | |
23770 | goto retry; | |
23771 | } | |
23772 | ||
23773 | @@ -1157,20 +1159,17 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23774 | if (dev->flags & IFF_UP) | |
23775 | return -EBUSY; | |
23776 | ||
23777 | - write_seqcount_begin(&devnet_rename_seq); | |
23778 | + mutex_lock(&devnet_rename_mutex); | |
23779 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
23780 | ||
23781 | - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | |
23782 | - write_seqcount_end(&devnet_rename_seq); | |
23783 | - return 0; | |
23784 | - } | |
23785 | + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | |
23786 | + goto outunlock; | |
23787 | ||
23788 | memcpy(oldname, dev->name, IFNAMSIZ); | |
23789 | ||
23790 | err = dev_get_valid_name(net, dev, newname); | |
23791 | - if (err < 0) { | |
23792 | - write_seqcount_end(&devnet_rename_seq); | |
23793 | - return err; | |
23794 | - } | |
23795 | + if (err < 0) | |
23796 | + goto outunlock; | |
23797 | ||
23798 | if (oldname[0] && !strchr(oldname, '%')) | |
23799 | netdev_info(dev, "renamed from %s\n", oldname); | |
23800 | @@ -1183,11 +1182,12 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23801 | if (ret) { | |
23802 | memcpy(dev->name, oldname, IFNAMSIZ); | |
23803 | dev->name_assign_type = old_assign_type; | |
23804 | - write_seqcount_end(&devnet_rename_seq); | |
23805 | - return ret; | |
23806 | + err = ret; | |
23807 | + goto outunlock; | |
23808 | } | |
23809 | ||
23810 | - write_seqcount_end(&devnet_rename_seq); | |
23811 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
23812 | + mutex_unlock(&devnet_rename_mutex); | |
23813 | ||
23814 | netdev_adjacent_rename_links(dev, oldname); | |
23815 | ||
23816 | @@ -1208,7 +1208,8 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23817 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | |
23818 | if (err >= 0) { | |
23819 | err = ret; | |
23820 | - write_seqcount_begin(&devnet_rename_seq); | |
23821 | + mutex_lock(&devnet_rename_mutex); | |
23822 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
23823 | memcpy(dev->name, oldname, IFNAMSIZ); | |
23824 | memcpy(oldname, newname, IFNAMSIZ); | |
23825 | dev->name_assign_type = old_assign_type; | |
23826 | @@ -1221,6 +1222,11 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
23827 | } | |
23828 | ||
23829 | return err; | |
23830 | + | |
23831 | +outunlock: | |
23832 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
23833 | + mutex_unlock(&devnet_rename_mutex); | |
23834 | + return err; | |
23835 | } | |
23836 | ||
23837 | /** | |
23838 | @@ -2263,6 +2269,7 @@ static void __netif_reschedule(struct Qdisc *q) | |
23839 | sd->output_queue_tailp = &q->next_sched; | |
23840 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
23841 | local_irq_restore(flags); | |
23842 | + preempt_check_resched_rt(); | |
23843 | } | |
23844 | ||
23845 | void __netif_schedule(struct Qdisc *q) | |
23846 | @@ -2344,6 +2351,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) | |
23847 | __this_cpu_write(softnet_data.completion_queue, skb); | |
23848 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
23849 | local_irq_restore(flags); | |
23850 | + preempt_check_resched_rt(); | |
23851 | } | |
23852 | EXPORT_SYMBOL(__dev_kfree_skb_irq); | |
23853 | ||
23854 | @@ -3078,7 +3086,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |
23855 | * This permits qdisc->running owner to get the lock more | |
23856 | * often and dequeue packets faster. | |
23857 | */ | |
23858 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23859 | + contended = true; | |
23860 | +#else | |
23861 | contended = qdisc_is_running(q); | |
23862 | +#endif | |
23863 | if (unlikely(contended)) | |
23864 | spin_lock(&q->busylock); | |
23865 | ||
23866 | @@ -3141,8 +3153,10 @@ static void skb_update_prio(struct sk_buff *skb) | |
23867 | #define skb_update_prio(skb) | |
23868 | #endif | |
23869 | ||
23870 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23871 | DEFINE_PER_CPU(int, xmit_recursion); | |
23872 | EXPORT_SYMBOL(xmit_recursion); | |
23873 | +#endif | |
23874 | ||
23875 | /** | |
23876 | * dev_loopback_xmit - loop back @skb | |
23877 | @@ -3376,8 +3390,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |
23878 | int cpu = smp_processor_id(); /* ok because BHs are off */ | |
23879 | ||
23880 | if (txq->xmit_lock_owner != cpu) { | |
23881 | - if (unlikely(__this_cpu_read(xmit_recursion) > | |
23882 | - XMIT_RECURSION_LIMIT)) | |
23883 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) | |
23884 | goto recursion_alert; | |
23885 | ||
23886 | skb = validate_xmit_skb(skb, dev); | |
23887 | @@ -3387,9 +3400,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |
23888 | HARD_TX_LOCK(dev, txq, cpu); | |
23889 | ||
23890 | if (!netif_xmit_stopped(txq)) { | |
23891 | - __this_cpu_inc(xmit_recursion); | |
23892 | + xmit_rec_inc(); | |
23893 | skb = dev_hard_start_xmit(skb, dev, txq, &rc); | |
23894 | - __this_cpu_dec(xmit_recursion); | |
23895 | + xmit_rec_dec(); | |
23896 | if (dev_xmit_complete(rc)) { | |
23897 | HARD_TX_UNLOCK(dev, txq); | |
23898 | goto out; | |
23899 | @@ -3763,6 +3776,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, | |
23900 | rps_unlock(sd); | |
23901 | ||
23902 | local_irq_restore(flags); | |
23903 | + preempt_check_resched_rt(); | |
23904 | ||
23905 | atomic_long_inc(&skb->dev->rx_dropped); | |
23906 | kfree_skb(skb); | |
23907 | @@ -3781,7 +3795,7 @@ static int netif_rx_internal(struct sk_buff *skb) | |
23908 | struct rps_dev_flow voidflow, *rflow = &voidflow; | |
23909 | int cpu; | |
23910 | ||
23911 | - preempt_disable(); | |
23912 | + migrate_disable(); | |
23913 | rcu_read_lock(); | |
23914 | ||
23915 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | |
23916 | @@ -3791,13 +3805,13 @@ static int netif_rx_internal(struct sk_buff *skb) | |
23917 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | |
23918 | ||
23919 | rcu_read_unlock(); | |
23920 | - preempt_enable(); | |
23921 | + migrate_enable(); | |
23922 | } else | |
23923 | #endif | |
23924 | { | |
23925 | unsigned int qtail; | |
23926 | - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | |
23927 | - put_cpu(); | |
23928 | + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); | |
23929 | + put_cpu_light(); | |
23930 | } | |
23931 | return ret; | |
23932 | } | |
23933 | @@ -3831,11 +3845,9 @@ int netif_rx_ni(struct sk_buff *skb) | |
23934 | ||
23935 | trace_netif_rx_ni_entry(skb); | |
23936 | ||
23937 | - preempt_disable(); | |
23938 | + local_bh_disable(); | |
23939 | err = netif_rx_internal(skb); | |
23940 | - if (local_softirq_pending()) | |
23941 | - do_softirq(); | |
23942 | - preempt_enable(); | |
23943 | + local_bh_enable(); | |
23944 | ||
23945 | return err; | |
23946 | } | |
23947 | @@ -4314,7 +4326,7 @@ static void flush_backlog(struct work_struct *work) | |
23948 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { | |
23949 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { | |
23950 | __skb_unlink(skb, &sd->input_pkt_queue); | |
23951 | - kfree_skb(skb); | |
23952 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
23953 | input_queue_head_incr(sd); | |
23954 | } | |
23955 | } | |
23956 | @@ -4324,11 +4336,14 @@ static void flush_backlog(struct work_struct *work) | |
23957 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { | |
23958 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { | |
23959 | __skb_unlink(skb, &sd->process_queue); | |
23960 | - kfree_skb(skb); | |
23961 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
23962 | input_queue_head_incr(sd); | |
23963 | } | |
23964 | } | |
23965 | + if (!skb_queue_empty(&sd->tofree_queue)) | |
23966 | + raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
23967 | local_bh_enable(); | |
23968 | + | |
23969 | } | |
23970 | ||
23971 | static void flush_all_backlogs(void) | |
23972 | @@ -4809,6 +4824,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) | |
23973 | sd->rps_ipi_list = NULL; | |
23974 | ||
23975 | local_irq_enable(); | |
23976 | + preempt_check_resched_rt(); | |
23977 | ||
23978 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | |
23979 | while (remsd) { | |
23980 | @@ -4822,6 +4838,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) | |
23981 | } else | |
23982 | #endif | |
23983 | local_irq_enable(); | |
23984 | + preempt_check_resched_rt(); | |
23985 | } | |
23986 | ||
23987 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) | |
23988 | @@ -4851,7 +4868,9 @@ static int process_backlog(struct napi_struct *napi, int quota) | |
23989 | while (again) { | |
23990 | struct sk_buff *skb; | |
23991 | ||
23992 | + local_irq_disable(); | |
23993 | while ((skb = __skb_dequeue(&sd->process_queue))) { | |
23994 | + local_irq_enable(); | |
23995 | rcu_read_lock(); | |
23996 | __netif_receive_skb(skb); | |
23997 | rcu_read_unlock(); | |
23998 | @@ -4859,9 +4878,9 @@ static int process_backlog(struct napi_struct *napi, int quota) | |
23999 | if (++work >= quota) | |
24000 | return work; | |
24001 | ||
24002 | + local_irq_disable(); | |
24003 | } | |
24004 | ||
24005 | - local_irq_disable(); | |
24006 | rps_lock(sd); | |
24007 | if (skb_queue_empty(&sd->input_pkt_queue)) { | |
24008 | /* | |
24009 | @@ -4899,9 +4918,11 @@ void __napi_schedule(struct napi_struct *n) | |
24010 | local_irq_save(flags); | |
24011 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
24012 | local_irq_restore(flags); | |
24013 | + preempt_check_resched_rt(); | |
24014 | } | |
24015 | EXPORT_SYMBOL(__napi_schedule); | |
24016 | ||
24017 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
24018 | /** | |
24019 | * __napi_schedule_irqoff - schedule for receive | |
24020 | * @n: entry to schedule | |
24021 | @@ -4913,6 +4934,7 @@ void __napi_schedule_irqoff(struct napi_struct *n) | |
24022 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
24023 | } | |
24024 | EXPORT_SYMBOL(__napi_schedule_irqoff); | |
24025 | +#endif | |
24026 | ||
24027 | void __napi_complete(struct napi_struct *n) | |
24028 | { | |
24029 | @@ -5202,13 +5224,21 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) | |
24030 | struct softnet_data *sd = this_cpu_ptr(&softnet_data); | |
24031 | unsigned long time_limit = jiffies + 2; | |
24032 | int budget = netdev_budget; | |
24033 | + struct sk_buff_head tofree_q; | |
24034 | + struct sk_buff *skb; | |
24035 | LIST_HEAD(list); | |
24036 | LIST_HEAD(repoll); | |
24037 | ||
24038 | + __skb_queue_head_init(&tofree_q); | |
24039 | + | |
24040 | local_irq_disable(); | |
24041 | + skb_queue_splice_init(&sd->tofree_queue, &tofree_q); | |
24042 | list_splice_init(&sd->poll_list, &list); | |
24043 | local_irq_enable(); | |
24044 | ||
24045 | + while ((skb = __skb_dequeue(&tofree_q))) | |
24046 | + kfree_skb(skb); | |
24047 | + | |
24048 | for (;;) { | |
24049 | struct napi_struct *n; | |
24050 | ||
24051 | @@ -5239,7 +5269,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) | |
24052 | list_splice_tail(&repoll, &list); | |
24053 | list_splice(&list, &sd->poll_list); | |
24054 | if (!list_empty(&sd->poll_list)) | |
24055 | - __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
24056 | + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); | |
24057 | ||
24058 | net_rps_action_and_irq_enable(sd); | |
24059 | } | |
24060 | @@ -8000,16 +8030,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |
24061 | ||
24062 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
24063 | local_irq_enable(); | |
24064 | + preempt_check_resched_rt(); | |
24065 | ||
24066 | /* Process offline CPU's input_pkt_queue */ | |
24067 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { | |
24068 | netif_rx_ni(skb); | |
24069 | input_queue_head_incr(oldsd); | |
24070 | } | |
24071 | - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { | |
24072 | + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | |
24073 | netif_rx_ni(skb); | |
24074 | input_queue_head_incr(oldsd); | |
24075 | } | |
24076 | + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { | |
24077 | + kfree_skb(skb); | |
24078 | + } | |
24079 | ||
24080 | return NOTIFY_OK; | |
24081 | } | |
24082 | @@ -8314,8 +8348,9 @@ static int __init net_dev_init(void) | |
24083 | ||
24084 | INIT_WORK(flush, flush_backlog); | |
24085 | ||
24086 | - skb_queue_head_init(&sd->input_pkt_queue); | |
24087 | - skb_queue_head_init(&sd->process_queue); | |
24088 | + skb_queue_head_init_raw(&sd->input_pkt_queue); | |
24089 | + skb_queue_head_init_raw(&sd->process_queue); | |
24090 | + skb_queue_head_init_raw(&sd->tofree_queue); | |
24091 | INIT_LIST_HEAD(&sd->poll_list); | |
24092 | sd->output_queue_tailp = &sd->output_queue; | |
24093 | #ifdef CONFIG_RPS | |
24094 | diff --git a/net/core/filter.c b/net/core/filter.c | |
24095 | index b391209838ef..b86e9681a88e 100644 | |
24096 | --- a/net/core/filter.c | |
24097 | +++ b/net/core/filter.c | |
24098 | @@ -1645,7 +1645,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) | |
24099 | { | |
24100 | int ret; | |
24101 | ||
24102 | - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { | |
24103 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) { | |
24104 | net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); | |
24105 | kfree_skb(skb); | |
24106 | return -ENETDOWN; | |
24107 | @@ -1653,9 +1653,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) | |
24108 | ||
24109 | skb->dev = dev; | |
24110 | ||
24111 | - __this_cpu_inc(xmit_recursion); | |
24112 | + xmit_rec_inc(); | |
24113 | ret = dev_queue_xmit(skb); | |
24114 | - __this_cpu_dec(xmit_recursion); | |
24115 | + xmit_rec_dec(); | |
24116 | ||
24117 | return ret; | |
24118 | } | |
24119 | diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c | |
24120 | index cad8e791f28e..2a9364fe62a5 100644 | |
24121 | --- a/net/core/gen_estimator.c | |
24122 | +++ b/net/core/gen_estimator.c | |
24123 | @@ -84,7 +84,7 @@ struct gen_estimator | |
24124 | struct gnet_stats_basic_packed *bstats; | |
24125 | struct gnet_stats_rate_est64 *rate_est; | |
24126 | spinlock_t *stats_lock; | |
24127 | - seqcount_t *running; | |
24128 | + net_seqlock_t *running; | |
24129 | int ewma_log; | |
24130 | u32 last_packets; | |
24131 | unsigned long avpps; | |
24132 | @@ -213,7 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
24133 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
24134 | struct gnet_stats_rate_est64 *rate_est, | |
24135 | spinlock_t *stats_lock, | |
24136 | - seqcount_t *running, | |
24137 | + net_seqlock_t *running, | |
24138 | struct nlattr *opt) | |
24139 | { | |
24140 | struct gen_estimator *est; | |
24141 | @@ -309,7 +309,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
24142 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
24143 | struct gnet_stats_rate_est64 *rate_est, | |
24144 | spinlock_t *stats_lock, | |
24145 | - seqcount_t *running, struct nlattr *opt) | |
24146 | + net_seqlock_t *running, struct nlattr *opt) | |
24147 | { | |
24148 | gen_kill_estimator(bstats, rate_est); | |
24149 | return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); | |
24150 | diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c | |
24151 | index 508e051304fb..bc3b17b78c94 100644 | |
24152 | --- a/net/core/gen_stats.c | |
24153 | +++ b/net/core/gen_stats.c | |
24154 | @@ -130,7 +130,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, | |
24155 | } | |
24156 | ||
24157 | void | |
24158 | -__gnet_stats_copy_basic(const seqcount_t *running, | |
24159 | +__gnet_stats_copy_basic(net_seqlock_t *running, | |
24160 | struct gnet_stats_basic_packed *bstats, | |
24161 | struct gnet_stats_basic_cpu __percpu *cpu, | |
24162 | struct gnet_stats_basic_packed *b) | |
24163 | @@ -143,10 +143,10 @@ __gnet_stats_copy_basic(const seqcount_t *running, | |
24164 | } | |
24165 | do { | |
24166 | if (running) | |
24167 | - seq = read_seqcount_begin(running); | |
24168 | + seq = net_seq_begin(running); | |
24169 | bstats->bytes = b->bytes; | |
24170 | bstats->packets = b->packets; | |
24171 | - } while (running && read_seqcount_retry(running, seq)); | |
24172 | + } while (running && net_seq_retry(running, seq)); | |
24173 | } | |
24174 | EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
24175 | ||
24176 | @@ -164,7 +164,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
24177 | * if the room in the socket buffer was not sufficient. | |
24178 | */ | |
24179 | int | |
24180 | -gnet_stats_copy_basic(const seqcount_t *running, | |
24181 | +gnet_stats_copy_basic(net_seqlock_t *running, | |
24182 | struct gnet_dump *d, | |
24183 | struct gnet_stats_basic_cpu __percpu *cpu, | |
24184 | struct gnet_stats_basic_packed *b) | |
24185 | diff --git a/net/core/skbuff.c b/net/core/skbuff.c | |
24186 | index 1e3e0087245b..1077b39db717 100644 | |
24187 | --- a/net/core/skbuff.c | |
24188 | +++ b/net/core/skbuff.c | |
24189 | @@ -64,6 +64,7 @@ | |
24190 | #include <linux/errqueue.h> | |
24191 | #include <linux/prefetch.h> | |
24192 | #include <linux/if_vlan.h> | |
24193 | +#include <linux/locallock.h> | |
24194 | ||
24195 | #include <net/protocol.h> | |
24196 | #include <net/dst.h> | |
24197 | @@ -360,6 +361,8 @@ struct napi_alloc_cache { | |
24198 | ||
24199 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); | |
24200 | static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); | |
24201 | +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); | |
24202 | +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); | |
24203 | ||
24204 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
24205 | { | |
24206 | @@ -367,10 +370,10 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
24207 | unsigned long flags; | |
24208 | void *data; | |
24209 | ||
24210 | - local_irq_save(flags); | |
24211 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
24212 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
24213 | data = __alloc_page_frag(nc, fragsz, gfp_mask); | |
24214 | - local_irq_restore(flags); | |
24215 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
24216 | return data; | |
24217 | } | |
24218 | ||
24219 | @@ -389,9 +392,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |
24220 | ||
24221 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
24222 | { | |
24223 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
24224 | + struct napi_alloc_cache *nc; | |
24225 | + void *data; | |
24226 | ||
24227 | - return __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
24228 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24229 | + data = __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
24230 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24231 | + return data; | |
24232 | } | |
24233 | ||
24234 | void *napi_alloc_frag(unsigned int fragsz) | |
24235 | @@ -438,13 +445,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
24236 | if (sk_memalloc_socks()) | |
24237 | gfp_mask |= __GFP_MEMALLOC; | |
24238 | ||
24239 | - local_irq_save(flags); | |
24240 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
24241 | ||
24242 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
24243 | data = __alloc_page_frag(nc, len, gfp_mask); | |
24244 | pfmemalloc = nc->pfmemalloc; | |
24245 | ||
24246 | - local_irq_restore(flags); | |
24247 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
24248 | ||
24249 | if (unlikely(!data)) | |
24250 | return NULL; | |
24251 | @@ -485,9 +492,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); | |
24252 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
24253 | gfp_t gfp_mask) | |
24254 | { | |
24255 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
24256 | + struct napi_alloc_cache *nc; | |
24257 | struct sk_buff *skb; | |
24258 | void *data; | |
24259 | + bool pfmemalloc; | |
24260 | ||
24261 | len += NET_SKB_PAD + NET_IP_ALIGN; | |
24262 | ||
24263 | @@ -505,7 +513,10 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
24264 | if (sk_memalloc_socks()) | |
24265 | gfp_mask |= __GFP_MEMALLOC; | |
24266 | ||
24267 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24268 | data = __alloc_page_frag(&nc->page, len, gfp_mask); | |
24269 | + pfmemalloc = nc->page.pfmemalloc; | |
24270 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24271 | if (unlikely(!data)) | |
24272 | return NULL; | |
24273 | ||
24274 | @@ -516,7 +527,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
24275 | } | |
24276 | ||
24277 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
24278 | - if (nc->page.pfmemalloc) | |
24279 | + if (pfmemalloc) | |
24280 | skb->pfmemalloc = 1; | |
24281 | skb->head_frag = 1; | |
24282 | ||
24283 | @@ -760,23 +771,26 @@ EXPORT_SYMBOL(consume_skb); | |
24284 | ||
24285 | void __kfree_skb_flush(void) | |
24286 | { | |
24287 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
24288 | + struct napi_alloc_cache *nc; | |
24289 | ||
24290 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24291 | /* flush skb_cache if containing objects */ | |
24292 | if (nc->skb_count) { | |
24293 | kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, | |
24294 | nc->skb_cache); | |
24295 | nc->skb_count = 0; | |
24296 | } | |
24297 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24298 | } | |
24299 | ||
24300 | static inline void _kfree_skb_defer(struct sk_buff *skb) | |
24301 | { | |
24302 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
24303 | + struct napi_alloc_cache *nc; | |
24304 | ||
24305 | /* drop skb->head and call any destructors for packet */ | |
24306 | skb_release_all(skb); | |
24307 | ||
24308 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24309 | /* record skb to CPU local list */ | |
24310 | nc->skb_cache[nc->skb_count++] = skb; | |
24311 | ||
24312 | @@ -791,6 +805,7 @@ static inline void _kfree_skb_defer(struct sk_buff *skb) | |
24313 | nc->skb_cache); | |
24314 | nc->skb_count = 0; | |
24315 | } | |
24316 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
24317 | } | |
24318 | void __kfree_skb_defer(struct sk_buff *skb) | |
24319 | { | |
24320 | diff --git a/net/core/sock.c b/net/core/sock.c | |
24321 | index bc6543f7de36..2c32ee79620f 100644 | |
24322 | --- a/net/core/sock.c | |
24323 | +++ b/net/core/sock.c | |
24324 | @@ -2488,12 +2488,11 @@ void lock_sock_nested(struct sock *sk, int subclass) | |
24325 | if (sk->sk_lock.owned) | |
24326 | __lock_sock(sk); | |
24327 | sk->sk_lock.owned = 1; | |
24328 | - spin_unlock(&sk->sk_lock.slock); | |
24329 | + spin_unlock_bh(&sk->sk_lock.slock); | |
24330 | /* | |
24331 | * The sk_lock has mutex_lock() semantics here: | |
24332 | */ | |
24333 | mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); | |
24334 | - local_bh_enable(); | |
24335 | } | |
24336 | EXPORT_SYMBOL(lock_sock_nested); | |
24337 | ||
24338 | diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c | |
24339 | index 48734ee6293f..e6864ff11352 100644 | |
24340 | --- a/net/ipv4/icmp.c | |
24341 | +++ b/net/ipv4/icmp.c | |
24342 | @@ -69,6 +69,7 @@ | |
24343 | #include <linux/jiffies.h> | |
24344 | #include <linux/kernel.h> | |
24345 | #include <linux/fcntl.h> | |
24346 | +#include <linux/sysrq.h> | |
24347 | #include <linux/socket.h> | |
24348 | #include <linux/in.h> | |
24349 | #include <linux/inet.h> | |
24350 | @@ -77,6 +78,7 @@ | |
24351 | #include <linux/string.h> | |
24352 | #include <linux/netfilter_ipv4.h> | |
24353 | #include <linux/slab.h> | |
24354 | +#include <linux/locallock.h> | |
24355 | #include <net/snmp.h> | |
24356 | #include <net/ip.h> | |
24357 | #include <net/route.h> | |
24358 | @@ -204,6 +206,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; | |
24359 | * | |
24360 | * On SMP we have one ICMP socket per-cpu. | |
24361 | */ | |
24362 | +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock); | |
24363 | + | |
24364 | static struct sock *icmp_sk(struct net *net) | |
24365 | { | |
24366 | return *this_cpu_ptr(net->ipv4.icmp_sk); | |
24367 | @@ -215,12 +219,14 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
24368 | ||
24369 | local_bh_disable(); | |
24370 | ||
24371 | + local_lock(icmp_sk_lock); | |
24372 | sk = icmp_sk(net); | |
24373 | ||
24374 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { | |
24375 | /* This can happen if the output path signals a | |
24376 | * dst_link_failure() for an outgoing ICMP packet. | |
24377 | */ | |
24378 | + local_unlock(icmp_sk_lock); | |
24379 | local_bh_enable(); | |
24380 | return NULL; | |
24381 | } | |
24382 | @@ -230,6 +236,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
24383 | static inline void icmp_xmit_unlock(struct sock *sk) | |
24384 | { | |
24385 | spin_unlock_bh(&sk->sk_lock.slock); | |
24386 | + local_unlock(icmp_sk_lock); | |
24387 | } | |
24388 | ||
24389 | int sysctl_icmp_msgs_per_sec __read_mostly = 1000; | |
24390 | @@ -358,6 +365,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
24391 | struct sock *sk; | |
24392 | struct sk_buff *skb; | |
24393 | ||
24394 | + local_lock(icmp_sk_lock); | |
24395 | sk = icmp_sk(dev_net((*rt)->dst.dev)); | |
24396 | if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, | |
24397 | icmp_param->data_len+icmp_param->head_len, | |
24398 | @@ -380,6 +388,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
24399 | skb->ip_summed = CHECKSUM_NONE; | |
24400 | ip_push_pending_frames(sk, fl4); | |
24401 | } | |
24402 | + local_unlock(icmp_sk_lock); | |
24403 | } | |
24404 | ||
24405 | /* | |
24406 | @@ -891,6 +900,30 @@ static bool icmp_redirect(struct sk_buff *skb) | |
24407 | } | |
24408 | ||
24409 | /* | |
24410 | + * 32bit and 64bit have different timestamp length, so we check for | |
24411 | + * the cookie at offset 20 and verify it is repeated at offset 50 | |
24412 | + */ | |
24413 | +#define CO_POS0 20 | |
24414 | +#define CO_POS1 50 | |
24415 | +#define CO_SIZE sizeof(int) | |
24416 | +#define ICMP_SYSRQ_SIZE 57 | |
24417 | + | |
24418 | +/* | |
24419 | + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie | |
24420 | + * pattern and if it matches send the next byte as a trigger to sysrq. | |
24421 | + */ | |
24422 | +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb) | |
24423 | +{ | |
24424 | + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq); | |
24425 | + char *p = skb->data; | |
24426 | + | |
24427 | + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) && | |
24428 | + !memcmp(&cookie, p + CO_POS1, CO_SIZE) && | |
24429 | + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE]) | |
24430 | + handle_sysrq(p[CO_POS0 + CO_SIZE]); | |
24431 | +} | |
24432 | + | |
24433 | +/* | |
24434 | * Handle ICMP_ECHO ("ping") requests. | |
24435 | * | |
24436 | * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo | |
24437 | @@ -917,6 +950,11 @@ static bool icmp_echo(struct sk_buff *skb) | |
24438 | icmp_param.data_len = skb->len; | |
24439 | icmp_param.head_len = sizeof(struct icmphdr); | |
24440 | icmp_reply(&icmp_param, skb); | |
24441 | + | |
24442 | + if (skb->len == ICMP_SYSRQ_SIZE && | |
24443 | + net->ipv4.sysctl_icmp_echo_sysrq) { | |
24444 | + icmp_check_sysrq(net, skb); | |
24445 | + } | |
24446 | } | |
24447 | /* should there be an ICMP stat for ignored echos? */ | |
24448 | return true; | |
24449 | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c | |
24450 | index 80bc36b25de2..215b90adfb05 100644 | |
24451 | --- a/net/ipv4/sysctl_net_ipv4.c | |
24452 | +++ b/net/ipv4/sysctl_net_ipv4.c | |
24453 | @@ -681,6 +681,13 @@ static struct ctl_table ipv4_net_table[] = { | |
24454 | .proc_handler = proc_dointvec | |
24455 | }, | |
24456 | { | |
24457 | + .procname = "icmp_echo_sysrq", | |
24458 | + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq, | |
24459 | + .maxlen = sizeof(int), | |
24460 | + .mode = 0644, | |
24461 | + .proc_handler = proc_dointvec | |
24462 | + }, | |
24463 | + { | |
24464 | .procname = "icmp_ignore_bogus_error_responses", | |
24465 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | |
24466 | .maxlen = sizeof(int), | |
24467 | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | |
24468 | index 2259114c7242..829e60985a81 100644 | |
24469 | --- a/net/ipv4/tcp_ipv4.c | |
24470 | +++ b/net/ipv4/tcp_ipv4.c | |
24471 | @@ -62,6 +62,7 @@ | |
24472 | #include <linux/init.h> | |
24473 | #include <linux/times.h> | |
24474 | #include <linux/slab.h> | |
24475 | +#include <linux/locallock.h> | |
24476 | ||
24477 | #include <net/net_namespace.h> | |
24478 | #include <net/icmp.h> | |
24479 | @@ -564,6 +565,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) | |
24480 | } | |
24481 | EXPORT_SYMBOL(tcp_v4_send_check); | |
24482 | ||
24483 | +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock); | |
24484 | /* | |
24485 | * This routine will send an RST to the other tcp. | |
24486 | * | |
24487 | @@ -691,6 +693,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) | |
24488 | offsetof(struct inet_timewait_sock, tw_bound_dev_if)); | |
24489 | ||
24490 | arg.tos = ip_hdr(skb)->tos; | |
24491 | + | |
24492 | + local_lock(tcp_sk_lock); | |
24493 | local_bh_disable(); | |
24494 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
24495 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
24496 | @@ -700,6 +704,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) | |
24497 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | |
24498 | __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); | |
24499 | local_bh_enable(); | |
24500 | + local_unlock(tcp_sk_lock); | |
24501 | ||
24502 | #ifdef CONFIG_TCP_MD5SIG | |
24503 | out: | |
24504 | @@ -775,6 +780,7 @@ static void tcp_v4_send_ack(struct net *net, | |
24505 | if (oif) | |
24506 | arg.bound_dev_if = oif; | |
24507 | arg.tos = tos; | |
24508 | + local_lock(tcp_sk_lock); | |
24509 | local_bh_disable(); | |
24510 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
24511 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
24512 | @@ -783,6 +789,7 @@ static void tcp_v4_send_ack(struct net *net, | |
24513 | ||
24514 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | |
24515 | local_bh_enable(); | |
24516 | + local_unlock(tcp_sk_lock); | |
24517 | } | |
24518 | ||
24519 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) | |
24520 | diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c | |
24521 | index 2384b4aae064..bf7ab51d7035 100644 | |
24522 | --- a/net/mac80211/rx.c | |
24523 | +++ b/net/mac80211/rx.c | |
24524 | @@ -4166,7 +4166,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, | |
24525 | struct ieee80211_supported_band *sband; | |
24526 | struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); | |
24527 | ||
24528 | - WARN_ON_ONCE(softirq_count() == 0); | |
24529 | + WARN_ON_ONCE_NONRT(softirq_count() == 0); | |
24530 | ||
24531 | if (WARN_ON(status->band >= NUM_NL80211_BANDS)) | |
24532 | goto drop; | |
24533 | diff --git a/net/netfilter/core.c b/net/netfilter/core.c | |
24534 | index 004af030ef1a..b64f751bda45 100644 | |
24535 | --- a/net/netfilter/core.c | |
24536 | +++ b/net/netfilter/core.c | |
24537 | @@ -22,12 +22,18 @@ | |
24538 | #include <linux/proc_fs.h> | |
24539 | #include <linux/mutex.h> | |
24540 | #include <linux/slab.h> | |
24541 | +#include <linux/locallock.h> | |
24542 | #include <linux/rcupdate.h> | |
24543 | #include <net/net_namespace.h> | |
24544 | #include <net/sock.h> | |
24545 | ||
24546 | #include "nf_internals.h" | |
24547 | ||
24548 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24549 | +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock); | |
24550 | +EXPORT_PER_CPU_SYMBOL(xt_write_lock); | |
24551 | +#endif | |
24552 | + | |
24553 | static DEFINE_MUTEX(afinfo_mutex); | |
24554 | ||
24555 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
24556 | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c | |
24557 | index dd2332390c45..f6a703b25b6c 100644 | |
24558 | --- a/net/packet/af_packet.c | |
24559 | +++ b/net/packet/af_packet.c | |
24560 | @@ -63,6 +63,7 @@ | |
24561 | #include <linux/if_packet.h> | |
24562 | #include <linux/wireless.h> | |
24563 | #include <linux/kernel.h> | |
24564 | +#include <linux/delay.h> | |
24565 | #include <linux/kmod.h> | |
24566 | #include <linux/slab.h> | |
24567 | #include <linux/vmalloc.h> | |
24568 | @@ -694,7 +695,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) | |
24569 | if (BLOCK_NUM_PKTS(pbd)) { | |
24570 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
24571 | /* Waiting for skb_copy_bits to finish... */ | |
24572 | - cpu_relax(); | |
24573 | + cpu_chill(); | |
24574 | } | |
24575 | } | |
24576 | ||
24577 | @@ -956,7 +957,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, | |
24578 | if (!(status & TP_STATUS_BLK_TMO)) { | |
24579 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
24580 | /* Waiting for skb_copy_bits to finish... */ | |
24581 | - cpu_relax(); | |
24582 | + cpu_chill(); | |
24583 | } | |
24584 | } | |
24585 | prb_close_block(pkc, pbd, po, status); | |
24586 | diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c | |
24587 | index 977f69886c00..f3e7a36b0396 100644 | |
24588 | --- a/net/rds/ib_rdma.c | |
24589 | +++ b/net/rds/ib_rdma.c | |
24590 | @@ -34,6 +34,7 @@ | |
24591 | #include <linux/slab.h> | |
24592 | #include <linux/rculist.h> | |
24593 | #include <linux/llist.h> | |
24594 | +#include <linux/delay.h> | |
24595 | ||
24596 | #include "rds_single_path.h" | |
24597 | #include "ib_mr.h" | |
24598 | @@ -210,7 +211,7 @@ static inline void wait_clean_list_grace(void) | |
24599 | for_each_online_cpu(cpu) { | |
24600 | flag = &per_cpu(clean_list_grace, cpu); | |
24601 | while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) | |
24602 | - cpu_relax(); | |
24603 | + cpu_chill(); | |
24604 | } | |
24605 | } | |
24606 | ||
24607 | diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c | |
24608 | index 7d921e56e715..13df56a738e5 100644 | |
24609 | --- a/net/rxrpc/security.c | |
24610 | +++ b/net/rxrpc/security.c | |
24611 | @@ -19,9 +19,6 @@ | |
24612 | #include <keys/rxrpc-type.h> | |
24613 | #include "ar-internal.h" | |
24614 | ||
24615 | -static LIST_HEAD(rxrpc_security_methods); | |
24616 | -static DECLARE_RWSEM(rxrpc_security_sem); | |
24617 | - | |
24618 | static const struct rxrpc_security *rxrpc_security_types[] = { | |
24619 | [RXRPC_SECURITY_NONE] = &rxrpc_no_security, | |
24620 | #ifdef CONFIG_RXKAD | |
24621 | diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c | |
24622 | index 206dc24add3a..00ea9bde5bb3 100644 | |
24623 | --- a/net/sched/sch_api.c | |
24624 | +++ b/net/sched/sch_api.c | |
24625 | @@ -981,7 +981,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, | |
24626 | rcu_assign_pointer(sch->stab, stab); | |
24627 | } | |
24628 | if (tca[TCA_RATE]) { | |
24629 | - seqcount_t *running; | |
24630 | + net_seqlock_t *running; | |
24631 | ||
24632 | err = -EOPNOTSUPP; | |
24633 | if (sch->flags & TCQ_F_MQROOT) | |
24634 | diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c | |
24635 | index 6cfb6e9038c2..20727e1347de 100644 | |
24636 | --- a/net/sched/sch_generic.c | |
24637 | +++ b/net/sched/sch_generic.c | |
24638 | @@ -425,7 +425,11 @@ struct Qdisc noop_qdisc = { | |
24639 | .ops = &noop_qdisc_ops, | |
24640 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), | |
24641 | .dev_queue = &noop_netdev_queue, | |
24642 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24643 | + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running), | |
24644 | +#else | |
24645 | .running = SEQCNT_ZERO(noop_qdisc.running), | |
24646 | +#endif | |
24647 | .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), | |
24648 | }; | |
24649 | EXPORT_SYMBOL(noop_qdisc); | |
24650 | @@ -624,9 +628,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |
24651 | lockdep_set_class(&sch->busylock, | |
24652 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); | |
24653 | ||
24654 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24655 | + seqlock_init(&sch->running); | |
24656 | + lockdep_set_class(&sch->running.seqcount, | |
24657 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
24658 | + lockdep_set_class(&sch->running.lock, | |
24659 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
24660 | +#else | |
24661 | seqcount_init(&sch->running); | |
24662 | lockdep_set_class(&sch->running, | |
24663 | dev->qdisc_running_key ?: &qdisc_running_key); | |
24664 | +#endif | |
24665 | ||
24666 | sch->ops = ops; | |
24667 | sch->enqueue = ops->enqueue; | |
24668 | @@ -925,7 +937,7 @@ void dev_deactivate_many(struct list_head *head) | |
24669 | /* Wait for outstanding qdisc_run calls. */ | |
24670 | list_for_each_entry(dev, head, close_list) | |
24671 | while (some_qdisc_is_busy(dev)) | |
24672 | - yield(); | |
24673 | + msleep(1); | |
24674 | } | |
24675 | ||
24676 | void dev_deactivate(struct net_device *dev) | |
24677 | diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c | |
24678 | index 9c9db55a0c1e..e6583b018a72 100644 | |
24679 | --- a/net/sunrpc/svc_xprt.c | |
24680 | +++ b/net/sunrpc/svc_xprt.c | |
24681 | @@ -396,7 +396,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
24682 | goto out; | |
24683 | } | |
24684 | ||
24685 | - cpu = get_cpu(); | |
24686 | + cpu = get_cpu_light(); | |
24687 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | |
24688 | ||
24689 | atomic_long_inc(&pool->sp_stats.packets); | |
24690 | @@ -432,7 +432,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
24691 | ||
24692 | atomic_long_inc(&pool->sp_stats.threads_woken); | |
24693 | wake_up_process(rqstp->rq_task); | |
24694 | - put_cpu(); | |
24695 | + put_cpu_light(); | |
24696 | goto out; | |
24697 | } | |
24698 | rcu_read_unlock(); | |
24699 | @@ -453,7 +453,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
24700 | goto redo_search; | |
24701 | } | |
24702 | rqstp = NULL; | |
24703 | - put_cpu(); | |
24704 | + put_cpu_light(); | |
24705 | out: | |
24706 | trace_svc_xprt_do_enqueue(xprt, rqstp); | |
24707 | } | |
24708 | diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h | |
24709 | index 6fdc97ef6023..523e0420d7f0 100755 | |
24710 | --- a/scripts/mkcompile_h | |
24711 | +++ b/scripts/mkcompile_h | |
24712 | @@ -4,7 +4,8 @@ TARGET=$1 | |
24713 | ARCH=$2 | |
24714 | SMP=$3 | |
24715 | PREEMPT=$4 | |
24716 | -CC=$5 | |
24717 | +RT=$5 | |
24718 | +CC=$6 | |
24719 | ||
24720 | vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } | |
24721 | ||
24722 | @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION" | |
24723 | CONFIG_FLAGS="" | |
24724 | if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi | |
24725 | if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi | |
24726 | +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi | |
24727 | UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" | |
24728 | ||
24729 | # Truncate to maximum length | |
24730 | diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c | |
24731 | index 9d33c1e85c79..3d307bda86f9 100644 | |
24732 | --- a/sound/core/pcm_native.c | |
24733 | +++ b/sound/core/pcm_native.c | |
24734 | @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); | |
24735 | void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) | |
24736 | { | |
24737 | if (!substream->pcm->nonatomic) | |
24738 | - local_irq_disable(); | |
24739 | + local_irq_disable_nort(); | |
24740 | snd_pcm_stream_lock(substream); | |
24741 | } | |
24742 | EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); | |
24743 | @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream) | |
24744 | { | |
24745 | snd_pcm_stream_unlock(substream); | |
24746 | if (!substream->pcm->nonatomic) | |
24747 | - local_irq_enable(); | |
24748 | + local_irq_enable_nort(); | |
24749 | } | |
24750 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); | |
24751 | ||
24752 | @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) | |
24753 | { | |
24754 | unsigned long flags = 0; | |
24755 | if (!substream->pcm->nonatomic) | |
24756 | - local_irq_save(flags); | |
24757 | + local_irq_save_nort(flags); | |
24758 | snd_pcm_stream_lock(substream); | |
24759 | return flags; | |
24760 | } | |
24761 | @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, | |
24762 | { | |
24763 | snd_pcm_stream_unlock(substream); | |
24764 | if (!substream->pcm->nonatomic) | |
24765 | - local_irq_restore(flags); | |
24766 | + local_irq_restore_nort(flags); | |
24767 | } | |
24768 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); | |
24769 |