]>
Commit | Line | Data |
---|---|---|
1a6e0f06 JK |
1 | diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt |
2 | index 3a3b30ac2a75..9e0745cafbd8 100644 | |
3 | --- a/Documentation/sysrq.txt | |
4 | +++ b/Documentation/sysrq.txt | |
5 | @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>, | |
6 | On other - If you know of the key combos for other architectures, please | |
7 | let me know so I can add them to this section. | |
8 | ||
9 | -On all - write a character to /proc/sysrq-trigger. e.g.: | |
10 | - | |
11 | +On all - write a character to /proc/sysrq-trigger, e.g.: | |
12 | echo t > /proc/sysrq-trigger | |
13 | ||
14 | +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g. | |
15 | + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq | |
16 | + Send an ICMP echo request with this pattern plus the particular | |
17 | + SysRq command key. Example: | |
18 | + # ping -c1 -s57 -p0102030468 | |
19 | + will trigger the SysRq-H (help) command. | |
20 | + | |
21 | + | |
22 | * What are the 'command' keys? | |
23 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
24 | 'b' - Will immediately reboot the system without syncing or unmounting | |
25 | diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt | |
26 | new file mode 100644 | |
27 | index 000000000000..6f2aeabf7faa | |
28 | --- /dev/null | |
29 | +++ b/Documentation/trace/histograms.txt | |
30 | @@ -0,0 +1,186 @@ | |
31 | + Using the Linux Kernel Latency Histograms | |
32 | + | |
33 | + | |
34 | +This document gives a short explanation how to enable, configure and use | |
35 | +latency histograms. Latency histograms are primarily relevant in the | |
36 | +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT) | |
37 | +and are used in the quality management of the Linux real-time | |
38 | +capabilities. | |
39 | + | |
40 | + | |
41 | +* Purpose of latency histograms | |
42 | + | |
43 | +A latency histogram continuously accumulates the frequencies of latency | |
44 | +data. There are two types of histograms | |
45 | +- potential sources of latencies | |
46 | +- effective latencies | |
47 | + | |
48 | + | |
49 | +* Potential sources of latencies | |
50 | + | |
51 | +Potential sources of latencies are code segments where interrupts, | |
52 | +preemption or both are disabled (aka critical sections). To create | |
53 | +histograms of potential sources of latency, the kernel stores the time | |
54 | +stamp at the start of a critical section, determines the time elapsed | |
55 | +when the end of the section is reached, and increments the frequency | |
56 | +counter of that latency value - irrespective of whether any concurrently | |
57 | +running process is affected by latency or not. | |
58 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
59 | + CONFIG_INTERRUPT_OFF_LATENCY | |
60 | + CONFIG_PREEMPT_OFF_LATENCY | |
61 | + | |
62 | + | |
63 | +* Effective latencies | |
64 | + | |
65 | +Effective latencies are actually occuring during wakeup of a process. To | |
66 | +determine effective latencies, the kernel stores the time stamp when a | |
67 | +process is scheduled to be woken up, and determines the duration of the | |
68 | +wakeup time shortly before control is passed over to this process. Note | |
69 | +that the apparent latency in user space may be somewhat longer, since the | |
70 | +process may be interrupted after control is passed over to it but before | |
71 | +the execution in user space takes place. Simply measuring the interval | |
72 | +between enqueuing and wakeup may also not appropriate in cases when a | |
73 | +process is scheduled as a result of a timer expiration. The timer may have | |
74 | +missed its deadline, e.g. due to disabled interrupts, but this latency | |
75 | +would not be registered. Therefore, the offsets of missed timers are | |
76 | +recorded in a separate histogram. If both wakeup latency and missed timer | |
77 | +offsets are configured and enabled, a third histogram may be enabled that | |
78 | +records the overall latency as a sum of the timer latency, if any, and the | |
79 | +wakeup latency. This histogram is called "timerandwakeup". | |
80 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
81 | + CONFIG_WAKEUP_LATENCY | |
82 | + CONFIG_MISSED_TIMER_OFSETS | |
83 | + | |
84 | + | |
85 | +* Usage | |
86 | + | |
87 | +The interface to the administration of the latency histograms is located | |
88 | +in the debugfs file system. To mount it, either enter | |
89 | + | |
90 | +mount -t sysfs nodev /sys | |
91 | +mount -t debugfs nodev /sys/kernel/debug | |
92 | + | |
93 | +from shell command line level, or add | |
94 | + | |
95 | +nodev /sys sysfs defaults 0 0 | |
96 | +nodev /sys/kernel/debug debugfs defaults 0 0 | |
97 | + | |
98 | +to the file /etc/fstab. All latency histogram related files are then | |
99 | +available in the directory /sys/kernel/debug/tracing/latency_hist. A | |
100 | +particular histogram type is enabled by writing non-zero to the related | |
101 | +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. | |
102 | +Select "preemptirqsoff" for the histograms of potential sources of | |
103 | +latencies and "wakeup" for histograms of effective latencies etc. The | |
104 | +histogram data - one per CPU - are available in the files | |
105 | + | |
106 | +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx | |
107 | +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx | |
108 | +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx | |
109 | +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx | |
110 | +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx | |
111 | +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx | |
112 | +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx | |
113 | + | |
114 | +The histograms are reset by writing non-zero to the file "reset" in a | |
115 | +particular latency directory. To reset all latency data, use | |
116 | + | |
117 | +#!/bin/sh | |
118 | + | |
119 | +TRACINGDIR=/sys/kernel/debug/tracing | |
120 | +HISTDIR=$TRACINGDIR/latency_hist | |
121 | + | |
122 | +if test -d $HISTDIR | |
123 | +then | |
124 | + cd $HISTDIR | |
125 | + for i in `find . | grep /reset$` | |
126 | + do | |
127 | + echo 1 >$i | |
128 | + done | |
129 | +fi | |
130 | + | |
131 | + | |
132 | +* Data format | |
133 | + | |
134 | +Latency data are stored with a resolution of one microsecond. The | |
135 | +maximum latency is 10,240 microseconds. The data are only valid, if the | |
136 | +overflow register is empty. Every output line contains the latency in | |
137 | +microseconds in the first row and the number of samples in the second | |
138 | +row. To display only lines with a positive latency count, use, for | |
139 | +example, | |
140 | + | |
141 | +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 | |
142 | + | |
143 | +#Minimum latency: 0 microseconds. | |
144 | +#Average latency: 0 microseconds. | |
145 | +#Maximum latency: 25 microseconds. | |
146 | +#Total samples: 3104770694 | |
147 | +#There are 0 samples greater or equal than 10240 microseconds | |
148 | +#usecs samples | |
149 | + 0 2984486876 | |
150 | + 1 49843506 | |
151 | + 2 58219047 | |
152 | + 3 5348126 | |
153 | + 4 2187960 | |
154 | + 5 3388262 | |
155 | + 6 959289 | |
156 | + 7 208294 | |
157 | + 8 40420 | |
158 | + 9 4485 | |
159 | + 10 14918 | |
160 | + 11 18340 | |
161 | + 12 25052 | |
162 | + 13 19455 | |
163 | + 14 5602 | |
164 | + 15 969 | |
165 | + 16 47 | |
166 | + 17 18 | |
167 | + 18 14 | |
168 | + 19 1 | |
169 | + 20 3 | |
170 | + 21 2 | |
171 | + 22 5 | |
172 | + 23 2 | |
173 | + 25 1 | |
174 | + | |
175 | + | |
176 | +* Wakeup latency of a selected process | |
177 | + | |
178 | +To only collect wakeup latency data of a particular process, write the | |
179 | +PID of the requested process to | |
180 | + | |
181 | +/sys/kernel/debug/tracing/latency_hist/wakeup/pid | |
182 | + | |
183 | +PIDs are not considered, if this variable is set to 0. | |
184 | + | |
185 | + | |
186 | +* Details of the process with the highest wakeup latency so far | |
187 | + | |
188 | +Selected data of the process that suffered from the highest wakeup | |
189 | +latency that occurred in a particular CPU are available in the file | |
190 | + | |
191 | +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx. | |
192 | + | |
193 | +In addition, other relevant system data at the time when the | |
194 | +latency occurred are given. | |
195 | + | |
196 | +The format of the data is (all in one line): | |
197 | +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \ | |
198 | +<- <PID> <Priority> <Command> <Timestamp> | |
199 | + | |
200 | +The value of <Timeroffset> is only relevant in the combined timer | |
201 | +and wakeup latency recording. In the wakeup recording, it is | |
202 | +always 0, in the missed_timer_offsets recording, it is the same | |
203 | +as <Latency>. | |
204 | + | |
205 | +When retrospectively searching for the origin of a latency and | |
206 | +tracing was not enabled, it may be helpful to know the name and | |
207 | +some basic data of the task that (finally) was switching to the | |
208 | +late real-tlme task. In addition to the victim's data, also the | |
209 | +data of the possible culprit are therefore displayed after the | |
210 | +"<-" symbol. | |
211 | + | |
212 | +Finally, the timestamp of the time when the latency occurred | |
213 | +in <seconds>.<microseconds> after the most recent system boot | |
214 | +is provided. | |
215 | + | |
216 | +These data are also reset when the wakeup histogram is reset. | |
7c18450a JK |
217 | diff --git a/MAINTAINERS b/MAINTAINERS |
218 | index 63cefa62324c..be0ea1e5c4cc 100644 | |
219 | --- a/MAINTAINERS | |
220 | +++ b/MAINTAINERS | |
221 | @@ -5196,6 +5196,23 @@ F: fs/fuse/ | |
222 | F: include/uapi/linux/fuse.h | |
223 | F: Documentation/filesystems/fuse.txt | |
224 | ||
225 | +FUTEX SUBSYSTEM | |
226 | +M: Thomas Gleixner <tglx@linutronix.de> | |
227 | +M: Ingo Molnar <mingo@redhat.com> | |
228 | +R: Peter Zijlstra <peterz@infradead.org> | |
229 | +R: Darren Hart <dvhart@infradead.org> | |
230 | +L: linux-kernel@vger.kernel.org | |
231 | +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core | |
232 | +S: Maintained | |
233 | +F: kernel/futex.c | |
234 | +F: kernel/futex_compat.c | |
235 | +F: include/asm-generic/futex.h | |
236 | +F: include/linux/futex.h | |
237 | +F: include/uapi/linux/futex.h | |
238 | +F: tools/testing/selftests/futex/ | |
239 | +F: tools/perf/bench/futex* | |
240 | +F: Documentation/*futex* | |
241 | + | |
242 | FUTURE DOMAIN TMC-16x0 SCSI DRIVER (16-bit) | |
243 | M: Rik Faith <faith@cs.unc.edu> | |
244 | L: linux-scsi@vger.kernel.org | |
1a6e0f06 | 245 | diff --git a/arch/Kconfig b/arch/Kconfig |
c7c16703 | 246 | index 659bdd079277..099fc0f5155e 100644 |
1a6e0f06 JK |
247 | --- a/arch/Kconfig |
248 | +++ b/arch/Kconfig | |
249 | @@ -9,6 +9,7 @@ config OPROFILE | |
250 | tristate "OProfile system profiling" | |
251 | depends on PROFILING | |
252 | depends on HAVE_OPROFILE | |
253 | + depends on !PREEMPT_RT_FULL | |
254 | select RING_BUFFER | |
255 | select RING_BUFFER_ALLOW_SWAP | |
256 | help | |
257 | @@ -52,6 +53,7 @@ config KPROBES | |
258 | config JUMP_LABEL | |
259 | bool "Optimize very unlikely/likely branches" | |
260 | depends on HAVE_ARCH_JUMP_LABEL | |
261 | + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) | |
262 | help | |
263 | This option enables a transparent branch optimization that | |
264 | makes certain almost-always-true or almost-always-false branch | |
265 | diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig | |
c7c16703 | 266 | index b5d529fdffab..5715844e83e3 100644 |
1a6e0f06 JK |
267 | --- a/arch/arm/Kconfig |
268 | +++ b/arch/arm/Kconfig | |
269 | @@ -36,7 +36,7 @@ config ARM | |
270 | select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) | |
271 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 | |
272 | select HAVE_ARCH_HARDENED_USERCOPY | |
273 | - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU | |
274 | + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE | |
275 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU | |
276 | select HAVE_ARCH_MMAP_RND_BITS if MMU | |
277 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) | |
278 | @@ -75,6 +75,7 @@ config ARM | |
279 | select HAVE_PERF_EVENTS | |
280 | select HAVE_PERF_REGS | |
281 | select HAVE_PERF_USER_STACK_DUMP | |
282 | + select HAVE_PREEMPT_LAZY | |
283 | select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) | |
284 | select HAVE_REGS_AND_STACK_ACCESS_API | |
285 | select HAVE_SYSCALL_TRACEPOINTS | |
c7c16703 JK |
286 | diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h |
287 | index e53638c8ed8a..6095a1649865 100644 | |
288 | --- a/arch/arm/include/asm/irq.h | |
289 | +++ b/arch/arm/include/asm/irq.h | |
290 | @@ -22,6 +22,8 @@ | |
291 | #endif | |
292 | ||
293 | #ifndef __ASSEMBLY__ | |
294 | +#include <linux/cpumask.h> | |
295 | + | |
296 | struct irqaction; | |
297 | struct pt_regs; | |
298 | extern void migrate_irqs(void); | |
1a6e0f06 JK |
299 | diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h |
300 | index 12ebfcc1d539..c962084605bc 100644 | |
301 | --- a/arch/arm/include/asm/switch_to.h | |
302 | +++ b/arch/arm/include/asm/switch_to.h | |
303 | @@ -3,6 +3,13 @@ | |
304 | ||
305 | #include <linux/thread_info.h> | |
306 | ||
307 | +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM | |
308 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); | |
309 | +#else | |
310 | +static inline void | |
311 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
312 | +#endif | |
313 | + | |
314 | /* | |
315 | * For v7 SMP cores running a preemptible kernel we may be pre-empted | |
316 | * during a TLB maintenance operation, so execute an inner-shareable dsb | |
317 | @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info | |
318 | #define switch_to(prev,next,last) \ | |
319 | do { \ | |
320 | __complete_pending_tlbi(); \ | |
321 | + switch_kmaps(prev, next); \ | |
322 | last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ | |
323 | } while (0) | |
324 | ||
325 | diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h | |
326 | index 776757d1604a..1f36a4eccc72 100644 | |
327 | --- a/arch/arm/include/asm/thread_info.h | |
328 | +++ b/arch/arm/include/asm/thread_info.h | |
329 | @@ -49,6 +49,7 @@ struct cpu_context_save { | |
330 | struct thread_info { | |
331 | unsigned long flags; /* low level flags */ | |
332 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
333 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
334 | mm_segment_t addr_limit; /* address limit */ | |
335 | struct task_struct *task; /* main task structure */ | |
336 | __u32 cpu; /* cpu */ | |
337 | @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
338 | #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ | |
339 | #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ | |
340 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ | |
341 | -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ | |
342 | +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ | |
343 | +#define TIF_NEED_RESCHED_LAZY 7 | |
344 | ||
345 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ | |
346 | #define TIF_USING_IWMMXT 17 | |
347 | @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
348 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | |
349 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
350 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
351 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
352 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
353 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
354 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
355 | @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
356 | * Change these and you break ASM code in entry-common.S | |
357 | */ | |
358 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
359 | - _TIF_NOTIFY_RESUME | _TIF_UPROBE) | |
360 | + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
361 | + _TIF_NEED_RESCHED_LAZY) | |
362 | ||
363 | #endif /* __KERNEL__ */ | |
364 | #endif /* __ASM_ARM_THREAD_INFO_H */ | |
365 | diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c | |
366 | index 608008229c7d..3866da3f7bb7 100644 | |
367 | --- a/arch/arm/kernel/asm-offsets.c | |
368 | +++ b/arch/arm/kernel/asm-offsets.c | |
369 | @@ -65,6 +65,7 @@ int main(void) | |
370 | BLANK(); | |
371 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
372 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
373 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
374 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
375 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
376 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
377 | diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S | |
378 | index 9f157e7c51e7..468e224d76aa 100644 | |
379 | --- a/arch/arm/kernel/entry-armv.S | |
380 | +++ b/arch/arm/kernel/entry-armv.S | |
381 | @@ -220,11 +220,18 @@ ENDPROC(__dabt_svc) | |
382 | ||
383 | #ifdef CONFIG_PREEMPT | |
384 | ldr r8, [tsk, #TI_PREEMPT] @ get preempt count | |
385 | - ldr r0, [tsk, #TI_FLAGS] @ get flags | |
386 | teq r8, #0 @ if preempt count != 0 | |
387 | + bne 1f @ return from exeption | |
388 | + ldr r0, [tsk, #TI_FLAGS] @ get flags | |
389 | + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set | |
390 | + blne svc_preempt @ preempt! | |
391 | + | |
392 | + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
393 | + teq r8, #0 @ if preempt lazy count != 0 | |
394 | movne r0, #0 @ force flags to 0 | |
395 | - tst r0, #_TIF_NEED_RESCHED | |
396 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
397 | blne svc_preempt | |
398 | +1: | |
399 | #endif | |
400 | ||
401 | svc_exit r5, irq = 1 @ return from exception | |
402 | @@ -239,8 +246,14 @@ ENDPROC(__irq_svc) | |
403 | 1: bl preempt_schedule_irq @ irq en/disable is done inside | |
404 | ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS | |
405 | tst r0, #_TIF_NEED_RESCHED | |
406 | + bne 1b | |
407 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
408 | reteq r8 @ go again | |
409 | - b 1b | |
410 | + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
411 | + teq r0, #0 @ if preempt lazy count != 0 | |
412 | + beq 1b | |
413 | + ret r8 @ go again | |
414 | + | |
415 | #endif | |
416 | ||
417 | __und_fault: | |
418 | diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S | |
419 | index 10c3283d6c19..8872937862cc 100644 | |
420 | --- a/arch/arm/kernel/entry-common.S | |
421 | +++ b/arch/arm/kernel/entry-common.S | |
422 | @@ -36,7 +36,9 @@ | |
423 | UNWIND(.cantunwind ) | |
424 | disable_irq_notrace @ disable interrupts | |
425 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
426 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
427 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
428 | + bne fast_work_pending | |
429 | + tst r1, #_TIF_SECCOMP | |
430 | bne fast_work_pending | |
431 | ||
432 | /* perform architecture specific actions before user return */ | |
433 | @@ -62,8 +64,11 @@ ENDPROC(ret_fast_syscall) | |
434 | str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 | |
435 | disable_irq_notrace @ disable interrupts | |
436 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
437 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
438 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
439 | + bne do_slower_path | |
440 | + tst r1, #_TIF_SECCOMP | |
441 | beq no_work_pending | |
442 | +do_slower_path: | |
443 | UNWIND(.fnend ) | |
444 | ENDPROC(ret_fast_syscall) | |
445 | ||
c7c16703 JK |
446 | diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c |
447 | index 69bda1a5707e..1f665acaa6a9 100644 | |
448 | --- a/arch/arm/kernel/patch.c | |
449 | +++ b/arch/arm/kernel/patch.c | |
450 | @@ -15,7 +15,7 @@ struct patch { | |
451 | unsigned int insn; | |
452 | }; | |
453 | ||
454 | -static DEFINE_SPINLOCK(patch_lock); | |
455 | +static DEFINE_RAW_SPINLOCK(patch_lock); | |
456 | ||
457 | static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) | |
458 | __acquires(&patch_lock) | |
459 | @@ -32,7 +32,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) | |
460 | return addr; | |
461 | ||
462 | if (flags) | |
463 | - spin_lock_irqsave(&patch_lock, *flags); | |
464 | + raw_spin_lock_irqsave(&patch_lock, *flags); | |
465 | else | |
466 | __acquire(&patch_lock); | |
467 | ||
468 | @@ -47,7 +47,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) | |
469 | clear_fixmap(fixmap); | |
470 | ||
471 | if (flags) | |
472 | - spin_unlock_irqrestore(&patch_lock, *flags); | |
473 | + raw_spin_unlock_irqrestore(&patch_lock, *flags); | |
474 | else | |
475 | __release(&patch_lock); | |
476 | } | |
1a6e0f06 | 477 | diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c |
c7c16703 | 478 | index 91d2d5b01414..750550098b59 100644 |
1a6e0f06 JK |
479 | --- a/arch/arm/kernel/process.c |
480 | +++ b/arch/arm/kernel/process.c | |
c7c16703 | 481 | @@ -322,6 +322,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) |
1a6e0f06 JK |
482 | } |
483 | ||
484 | #ifdef CONFIG_MMU | |
485 | +/* | |
486 | + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not | |
487 | + * initialized by pgtable_page_ctor() then a coredump of the vector page will | |
488 | + * fail. | |
489 | + */ | |
490 | +static int __init vectors_user_mapping_init_page(void) | |
491 | +{ | |
492 | + struct page *page; | |
493 | + unsigned long addr = 0xffff0000; | |
494 | + pgd_t *pgd; | |
495 | + pud_t *pud; | |
496 | + pmd_t *pmd; | |
497 | + | |
498 | + pgd = pgd_offset_k(addr); | |
499 | + pud = pud_offset(pgd, addr); | |
500 | + pmd = pmd_offset(pud, addr); | |
501 | + page = pmd_page(*(pmd)); | |
502 | + | |
503 | + pgtable_page_ctor(page); | |
504 | + | |
505 | + return 0; | |
506 | +} | |
507 | +late_initcall(vectors_user_mapping_init_page); | |
508 | + | |
509 | #ifdef CONFIG_KUSER_HELPERS | |
510 | /* | |
511 | * The vectors page is always readable from user space for the | |
512 | diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c | |
513 | index 7b8f2141427b..96541e00b74a 100644 | |
514 | --- a/arch/arm/kernel/signal.c | |
515 | +++ b/arch/arm/kernel/signal.c | |
516 | @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) | |
517 | */ | |
518 | trace_hardirqs_off(); | |
519 | do { | |
520 | - if (likely(thread_flags & _TIF_NEED_RESCHED)) { | |
521 | + if (likely(thread_flags & (_TIF_NEED_RESCHED | | |
522 | + _TIF_NEED_RESCHED_LAZY))) { | |
523 | schedule(); | |
524 | } else { | |
525 | if (unlikely(!user_mode(regs))) | |
526 | diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c | |
c7c16703 | 527 | index 7dd14e8395e6..4cd7e3d98035 100644 |
1a6e0f06 JK |
528 | --- a/arch/arm/kernel/smp.c |
529 | +++ b/arch/arm/kernel/smp.c | |
530 | @@ -234,8 +234,6 @@ int __cpu_disable(void) | |
531 | flush_cache_louis(); | |
532 | local_flush_tlb_all(); | |
533 | ||
534 | - clear_tasks_mm_cpumask(cpu); | |
535 | - | |
536 | return 0; | |
537 | } | |
538 | ||
539 | @@ -251,6 +249,9 @@ void __cpu_die(unsigned int cpu) | |
540 | pr_err("CPU%u: cpu didn't die\n", cpu); | |
541 | return; | |
542 | } | |
543 | + | |
544 | + clear_tasks_mm_cpumask(cpu); | |
545 | + | |
546 | pr_notice("CPU%u: shutdown\n", cpu); | |
547 | ||
548 | /* | |
549 | diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c | |
550 | index 0bee233fef9a..314cfb232a63 100644 | |
551 | --- a/arch/arm/kernel/unwind.c | |
552 | +++ b/arch/arm/kernel/unwind.c | |
553 | @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; | |
554 | static const struct unwind_idx *__origin_unwind_idx; | |
555 | extern const struct unwind_idx __stop_unwind_idx[]; | |
556 | ||
557 | -static DEFINE_SPINLOCK(unwind_lock); | |
558 | +static DEFINE_RAW_SPINLOCK(unwind_lock); | |
559 | static LIST_HEAD(unwind_tables); | |
560 | ||
561 | /* Convert a prel31 symbol to an absolute address */ | |
562 | @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
563 | /* module unwind tables */ | |
564 | struct unwind_table *table; | |
565 | ||
566 | - spin_lock_irqsave(&unwind_lock, flags); | |
567 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
568 | list_for_each_entry(table, &unwind_tables, list) { | |
569 | if (addr >= table->begin_addr && | |
570 | addr < table->end_addr) { | |
571 | @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
572 | break; | |
573 | } | |
574 | } | |
575 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
576 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
577 | } | |
578 | ||
579 | pr_debug("%s: idx = %p\n", __func__, idx); | |
580 | @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, | |
581 | tab->begin_addr = text_addr; | |
582 | tab->end_addr = text_addr + text_size; | |
583 | ||
584 | - spin_lock_irqsave(&unwind_lock, flags); | |
585 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
586 | list_add_tail(&tab->list, &unwind_tables); | |
587 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
588 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
589 | ||
590 | return tab; | |
591 | } | |
592 | @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) | |
593 | if (!tab) | |
594 | return; | |
595 | ||
596 | - spin_lock_irqsave(&unwind_lock, flags); | |
597 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
598 | list_del(&tab->list); | |
599 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
600 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
601 | ||
602 | kfree(tab); | |
603 | } | |
604 | diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c | |
c7c16703 | 605 | index 19b5f5c1c0ff..82aa639e6737 100644 |
1a6e0f06 JK |
606 | --- a/arch/arm/kvm/arm.c |
607 | +++ b/arch/arm/kvm/arm.c | |
c7c16703 | 608 | @@ -619,7 +619,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
1a6e0f06 JK |
609 | * involves poking the GIC, which must be done in a |
610 | * non-preemptible context. | |
611 | */ | |
612 | - preempt_disable(); | |
613 | + migrate_disable(); | |
614 | kvm_pmu_flush_hwstate(vcpu); | |
615 | kvm_timer_flush_hwstate(vcpu); | |
616 | kvm_vgic_flush_hwstate(vcpu); | |
c7c16703 | 617 | @@ -640,7 +640,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
1a6e0f06 JK |
618 | kvm_pmu_sync_hwstate(vcpu); |
619 | kvm_timer_sync_hwstate(vcpu); | |
620 | kvm_vgic_sync_hwstate(vcpu); | |
621 | - preempt_enable(); | |
622 | + migrate_enable(); | |
623 | continue; | |
624 | } | |
625 | ||
c7c16703 | 626 | @@ -696,7 +696,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) |
1a6e0f06 JK |
627 | |
628 | kvm_vgic_sync_hwstate(vcpu); | |
629 | ||
630 | - preempt_enable(); | |
631 | + migrate_enable(); | |
632 | ||
633 | ret = handle_exit(vcpu, run, ret); | |
634 | } | |
635 | diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c | |
636 | index 98ffe1e62ad5..df9769ddece5 100644 | |
637 | --- a/arch/arm/mach-exynos/platsmp.c | |
638 | +++ b/arch/arm/mach-exynos/platsmp.c | |
639 | @@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void) | |
640 | return (void __iomem *)(S5P_VA_SCU); | |
641 | } | |
642 | ||
643 | -static DEFINE_SPINLOCK(boot_lock); | |
644 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
645 | ||
646 | static void exynos_secondary_init(unsigned int cpu) | |
647 | { | |
648 | @@ -242,8 +242,8 @@ static void exynos_secondary_init(unsigned int cpu) | |
649 | /* | |
650 | * Synchronise with the boot thread. | |
651 | */ | |
652 | - spin_lock(&boot_lock); | |
653 | - spin_unlock(&boot_lock); | |
654 | + raw_spin_lock(&boot_lock); | |
655 | + raw_spin_unlock(&boot_lock); | |
656 | } | |
657 | ||
658 | int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) | |
659 | @@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
660 | * Set synchronisation state between this boot processor | |
661 | * and the secondary one | |
662 | */ | |
663 | - spin_lock(&boot_lock); | |
664 | + raw_spin_lock(&boot_lock); | |
665 | ||
666 | /* | |
667 | * The secondary processor is waiting to be released from | |
668 | @@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
669 | ||
670 | if (timeout == 0) { | |
671 | printk(KERN_ERR "cpu1 power enable failed"); | |
672 | - spin_unlock(&boot_lock); | |
673 | + raw_spin_unlock(&boot_lock); | |
674 | return -ETIMEDOUT; | |
675 | } | |
676 | } | |
677 | @@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
678 | * calibrations, then wait for it to finish | |
679 | */ | |
680 | fail: | |
681 | - spin_unlock(&boot_lock); | |
682 | + raw_spin_unlock(&boot_lock); | |
683 | ||
684 | return pen_release != -1 ? ret : 0; | |
685 | } | |
686 | diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c | |
687 | index 4b653a8cb75c..b03d5a922cb1 100644 | |
688 | --- a/arch/arm/mach-hisi/platmcpm.c | |
689 | +++ b/arch/arm/mach-hisi/platmcpm.c | |
690 | @@ -61,7 +61,7 @@ | |
691 | ||
692 | static void __iomem *sysctrl, *fabric; | |
693 | static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; | |
694 | -static DEFINE_SPINLOCK(boot_lock); | |
695 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
696 | static u32 fabric_phys_addr; | |
697 | /* | |
698 | * [0]: bootwrapper physical address | |
699 | @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
700 | if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) | |
701 | return -EINVAL; | |
702 | ||
703 | - spin_lock_irq(&boot_lock); | |
704 | + raw_spin_lock_irq(&boot_lock); | |
705 | ||
706 | if (hip04_cpu_table[cluster][cpu]) | |
707 | goto out; | |
708 | @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
709 | ||
710 | out: | |
711 | hip04_cpu_table[cluster][cpu]++; | |
712 | - spin_unlock_irq(&boot_lock); | |
713 | + raw_spin_unlock_irq(&boot_lock); | |
714 | ||
715 | return 0; | |
716 | } | |
717 | @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
718 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); | |
719 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | |
720 | ||
721 | - spin_lock(&boot_lock); | |
722 | + raw_spin_lock(&boot_lock); | |
723 | hip04_cpu_table[cluster][cpu]--; | |
724 | if (hip04_cpu_table[cluster][cpu] == 1) { | |
725 | /* A power_up request went ahead of us. */ | |
726 | - spin_unlock(&boot_lock); | |
727 | + raw_spin_unlock(&boot_lock); | |
728 | return; | |
729 | } else if (hip04_cpu_table[cluster][cpu] > 1) { | |
730 | pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); | |
731 | @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
732 | } | |
733 | ||
734 | last_man = hip04_cluster_is_down(cluster); | |
735 | - spin_unlock(&boot_lock); | |
736 | + raw_spin_unlock(&boot_lock); | |
737 | if (last_man) { | |
738 | /* Since it's Cortex A15, disable L2 prefetching. */ | |
739 | asm volatile( | |
740 | @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
741 | cpu >= HIP04_MAX_CPUS_PER_CLUSTER); | |
742 | ||
743 | count = TIMEOUT_MSEC / POLL_MSEC; | |
744 | - spin_lock_irq(&boot_lock); | |
745 | + raw_spin_lock_irq(&boot_lock); | |
746 | for (tries = 0; tries < count; tries++) { | |
747 | if (hip04_cpu_table[cluster][cpu]) | |
748 | goto err; | |
749 | @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
750 | data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); | |
751 | if (data & CORE_WFI_STATUS(cpu)) | |
752 | break; | |
753 | - spin_unlock_irq(&boot_lock); | |
754 | + raw_spin_unlock_irq(&boot_lock); | |
755 | /* Wait for clean L2 when the whole cluster is down. */ | |
756 | msleep(POLL_MSEC); | |
757 | - spin_lock_irq(&boot_lock); | |
758 | + raw_spin_lock_irq(&boot_lock); | |
759 | } | |
760 | if (tries >= count) | |
761 | goto err; | |
762 | @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
763 | goto err; | |
764 | if (hip04_cluster_is_down(cluster)) | |
765 | hip04_set_snoop_filter(cluster, 0); | |
766 | - spin_unlock_irq(&boot_lock); | |
767 | + raw_spin_unlock_irq(&boot_lock); | |
768 | return 1; | |
769 | err: | |
770 | - spin_unlock_irq(&boot_lock); | |
771 | + raw_spin_unlock_irq(&boot_lock); | |
772 | return 0; | |
773 | } | |
774 | #endif | |
775 | diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c | |
776 | index b4de3da6dffa..b52893319d75 100644 | |
777 | --- a/arch/arm/mach-omap2/omap-smp.c | |
778 | +++ b/arch/arm/mach-omap2/omap-smp.c | |
779 | @@ -64,7 +64,7 @@ static const struct omap_smp_config omap5_cfg __initconst = { | |
780 | .startup_addr = omap5_secondary_startup, | |
781 | }; | |
782 | ||
783 | -static DEFINE_SPINLOCK(boot_lock); | |
784 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
785 | ||
786 | void __iomem *omap4_get_scu_base(void) | |
787 | { | |
788 | @@ -131,8 +131,8 @@ static void omap4_secondary_init(unsigned int cpu) | |
789 | /* | |
790 | * Synchronise with the boot thread. | |
791 | */ | |
792 | - spin_lock(&boot_lock); | |
793 | - spin_unlock(&boot_lock); | |
794 | + raw_spin_lock(&boot_lock); | |
795 | + raw_spin_unlock(&boot_lock); | |
796 | } | |
797 | ||
798 | static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
799 | @@ -146,7 +146,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
800 | * Set synchronisation state between this boot processor | |
801 | * and the secondary one | |
802 | */ | |
803 | - spin_lock(&boot_lock); | |
804 | + raw_spin_lock(&boot_lock); | |
805 | ||
806 | /* | |
807 | * Update the AuxCoreBoot0 with boot state for secondary core. | |
808 | @@ -223,7 +223,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
809 | * Now the secondary core is starting up let it run its | |
810 | * calibrations, then wait for it to finish | |
811 | */ | |
812 | - spin_unlock(&boot_lock); | |
813 | + raw_spin_unlock(&boot_lock); | |
814 | ||
815 | return 0; | |
816 | } | |
817 | diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c | |
818 | index 0875b99add18..18b6d98d2581 100644 | |
819 | --- a/arch/arm/mach-prima2/platsmp.c | |
820 | +++ b/arch/arm/mach-prima2/platsmp.c | |
821 | @@ -22,7 +22,7 @@ | |
822 | ||
823 | static void __iomem *clk_base; | |
824 | ||
825 | -static DEFINE_SPINLOCK(boot_lock); | |
826 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
827 | ||
828 | static void sirfsoc_secondary_init(unsigned int cpu) | |
829 | { | |
830 | @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) | |
831 | /* | |
832 | * Synchronise with the boot thread. | |
833 | */ | |
834 | - spin_lock(&boot_lock); | |
835 | - spin_unlock(&boot_lock); | |
836 | + raw_spin_lock(&boot_lock); | |
837 | + raw_spin_unlock(&boot_lock); | |
838 | } | |
839 | ||
840 | static const struct of_device_id clk_ids[] = { | |
841 | @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
842 | /* make sure write buffer is drained */ | |
843 | mb(); | |
844 | ||
845 | - spin_lock(&boot_lock); | |
846 | + raw_spin_lock(&boot_lock); | |
847 | ||
848 | /* | |
849 | * The secondary processor is waiting to be released from | |
850 | @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
851 | * now the secondary core is starting up let it run its | |
852 | * calibrations, then wait for it to finish | |
853 | */ | |
854 | - spin_unlock(&boot_lock); | |
855 | + raw_spin_unlock(&boot_lock); | |
856 | ||
857 | return pen_release != -1 ? -ENOSYS : 0; | |
858 | } | |
859 | diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c | |
860 | index 5494c9e0c909..e8ce157d3548 100644 | |
861 | --- a/arch/arm/mach-qcom/platsmp.c | |
862 | +++ b/arch/arm/mach-qcom/platsmp.c | |
863 | @@ -46,7 +46,7 @@ | |
864 | ||
865 | extern void secondary_startup_arm(void); | |
866 | ||
867 | -static DEFINE_SPINLOCK(boot_lock); | |
868 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
869 | ||
870 | #ifdef CONFIG_HOTPLUG_CPU | |
871 | static void qcom_cpu_die(unsigned int cpu) | |
872 | @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) | |
873 | /* | |
874 | * Synchronise with the boot thread. | |
875 | */ | |
876 | - spin_lock(&boot_lock); | |
877 | - spin_unlock(&boot_lock); | |
878 | + raw_spin_lock(&boot_lock); | |
879 | + raw_spin_unlock(&boot_lock); | |
880 | } | |
881 | ||
882 | static int scss_release_secondary(unsigned int cpu) | |
883 | @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
884 | * set synchronisation state between this boot processor | |
885 | * and the secondary one | |
886 | */ | |
887 | - spin_lock(&boot_lock); | |
888 | + raw_spin_lock(&boot_lock); | |
889 | ||
890 | /* | |
891 | * Send the secondary CPU a soft interrupt, thereby causing | |
892 | @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
893 | * now the secondary core is starting up let it run its | |
894 | * calibrations, then wait for it to finish | |
895 | */ | |
896 | - spin_unlock(&boot_lock); | |
897 | + raw_spin_unlock(&boot_lock); | |
898 | ||
899 | return ret; | |
900 | } | |
901 | diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c | |
902 | index 8d1e2d551786..7fa56cc78118 100644 | |
903 | --- a/arch/arm/mach-spear/platsmp.c | |
904 | +++ b/arch/arm/mach-spear/platsmp.c | |
905 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
906 | sync_cache_w(&pen_release); | |
907 | } | |
908 | ||
909 | -static DEFINE_SPINLOCK(boot_lock); | |
910 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
911 | ||
912 | static void __iomem *scu_base = IOMEM(VA_SCU_BASE); | |
913 | ||
914 | @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) | |
915 | /* | |
916 | * Synchronise with the boot thread. | |
917 | */ | |
918 | - spin_lock(&boot_lock); | |
919 | - spin_unlock(&boot_lock); | |
920 | + raw_spin_lock(&boot_lock); | |
921 | + raw_spin_unlock(&boot_lock); | |
922 | } | |
923 | ||
924 | static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
925 | @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
926 | * set synchronisation state between this boot processor | |
927 | * and the secondary one | |
928 | */ | |
929 | - spin_lock(&boot_lock); | |
930 | + raw_spin_lock(&boot_lock); | |
931 | ||
932 | /* | |
933 | * The secondary processor is waiting to be released from | |
934 | @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
935 | * now the secondary core is starting up let it run its | |
936 | * calibrations, then wait for it to finish | |
937 | */ | |
938 | - spin_unlock(&boot_lock); | |
939 | + raw_spin_unlock(&boot_lock); | |
940 | ||
941 | return pen_release != -1 ? -ENOSYS : 0; | |
942 | } | |
943 | diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c | |
944 | index ea5a2277ee46..b988e081ac79 100644 | |
945 | --- a/arch/arm/mach-sti/platsmp.c | |
946 | +++ b/arch/arm/mach-sti/platsmp.c | |
947 | @@ -35,7 +35,7 @@ static void write_pen_release(int val) | |
948 | sync_cache_w(&pen_release); | |
949 | } | |
950 | ||
951 | -static DEFINE_SPINLOCK(boot_lock); | |
952 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
953 | ||
954 | static void sti_secondary_init(unsigned int cpu) | |
955 | { | |
956 | @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu) | |
957 | /* | |
958 | * Synchronise with the boot thread. | |
959 | */ | |
960 | - spin_lock(&boot_lock); | |
961 | - spin_unlock(&boot_lock); | |
962 | + raw_spin_lock(&boot_lock); | |
963 | + raw_spin_unlock(&boot_lock); | |
964 | } | |
965 | ||
966 | static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
967 | @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
968 | * set synchronisation state between this boot processor | |
969 | * and the secondary one | |
970 | */ | |
971 | - spin_lock(&boot_lock); | |
972 | + raw_spin_lock(&boot_lock); | |
973 | ||
974 | /* | |
975 | * The secondary processor is waiting to be released from | |
976 | @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
977 | * now the secondary core is starting up let it run its | |
978 | * calibrations, then wait for it to finish | |
979 | */ | |
980 | - spin_unlock(&boot_lock); | |
981 | + raw_spin_unlock(&boot_lock); | |
982 | ||
983 | return pen_release != -1 ? -ENOSYS : 0; | |
984 | } | |
985 | diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c | |
5c015b7c | 986 | index 0122ad1a6027..926b1be48043 100644 |
1a6e0f06 JK |
987 | --- a/arch/arm/mm/fault.c |
988 | +++ b/arch/arm/mm/fault.c | |
989 | @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
990 | if (addr < TASK_SIZE) | |
991 | return do_page_fault(addr, fsr, regs); | |
992 | ||
993 | + if (interrupts_enabled(regs)) | |
994 | + local_irq_enable(); | |
995 | + | |
996 | if (user_mode(regs)) | |
997 | goto bad_area; | |
998 | ||
999 | @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
1000 | static int | |
1001 | do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |
1002 | { | |
1003 | + if (interrupts_enabled(regs)) | |
1004 | + local_irq_enable(); | |
1005 | + | |
1006 | do_bad_area(addr, fsr, regs); | |
1007 | return 0; | |
1008 | } | |
1009 | diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c | |
1010 | index d02f8187b1cc..542692dbd40a 100644 | |
1011 | --- a/arch/arm/mm/highmem.c | |
1012 | +++ b/arch/arm/mm/highmem.c | |
1013 | @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) | |
1014 | return *ptep; | |
1015 | } | |
1016 | ||
1017 | +static unsigned int fixmap_idx(int type) | |
1018 | +{ | |
1019 | + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1020 | +} | |
1021 | + | |
1022 | void *kmap(struct page *page) | |
1023 | { | |
1024 | might_sleep(); | |
1025 | @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap); | |
1026 | ||
1027 | void *kmap_atomic(struct page *page) | |
1028 | { | |
1029 | + pte_t pte = mk_pte(page, kmap_prot); | |
1030 | unsigned int idx; | |
1031 | unsigned long vaddr; | |
1032 | void *kmap; | |
1033 | int type; | |
1034 | ||
1035 | - preempt_disable(); | |
1036 | + preempt_disable_nort(); | |
1037 | pagefault_disable(); | |
1038 | if (!PageHighMem(page)) | |
1039 | return page_address(page); | |
1040 | @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page) | |
1041 | ||
1042 | type = kmap_atomic_idx_push(); | |
1043 | ||
1044 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1045 | + idx = fixmap_idx(type); | |
1046 | vaddr = __fix_to_virt(idx); | |
1047 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1048 | /* | |
1049 | @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page) | |
1050 | * in place, so the contained TLB flush ensures the TLB is updated | |
1051 | * with the new mapping. | |
1052 | */ | |
1053 | - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); | |
1054 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1055 | + current->kmap_pte[type] = pte; | |
1056 | +#endif | |
1057 | + set_fixmap_pte(idx, pte); | |
1058 | ||
1059 | return (void *)vaddr; | |
1060 | } | |
1061 | @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr) | |
1062 | ||
1063 | if (kvaddr >= (void *)FIXADDR_START) { | |
1064 | type = kmap_atomic_idx(); | |
1065 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1066 | + idx = fixmap_idx(type); | |
1067 | ||
1068 | if (cache_is_vivt()) | |
1069 | __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); | |
1070 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1071 | + current->kmap_pte[type] = __pte(0); | |
1072 | +#endif | |
1073 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1074 | BUG_ON(vaddr != __fix_to_virt(idx)); | |
1075 | - set_fixmap_pte(idx, __pte(0)); | |
1076 | #else | |
1077 | (void) idx; /* to kill a warning */ | |
1078 | #endif | |
1079 | + set_fixmap_pte(idx, __pte(0)); | |
1080 | kmap_atomic_idx_pop(); | |
1081 | } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { | |
1082 | /* this address was obtained through kmap_high_get() */ | |
1083 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); | |
1084 | } | |
1085 | pagefault_enable(); | |
1086 | - preempt_enable(); | |
1087 | + preempt_enable_nort(); | |
1088 | } | |
1089 | EXPORT_SYMBOL(__kunmap_atomic); | |
1090 | ||
1091 | void *kmap_atomic_pfn(unsigned long pfn) | |
1092 | { | |
1093 | + pte_t pte = pfn_pte(pfn, kmap_prot); | |
1094 | unsigned long vaddr; | |
1095 | int idx, type; | |
1096 | struct page *page = pfn_to_page(pfn); | |
1097 | ||
1098 | - preempt_disable(); | |
1099 | + preempt_disable_nort(); | |
1100 | pagefault_disable(); | |
1101 | if (!PageHighMem(page)) | |
1102 | return page_address(page); | |
1103 | ||
1104 | type = kmap_atomic_idx_push(); | |
1105 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1106 | + idx = fixmap_idx(type); | |
1107 | vaddr = __fix_to_virt(idx); | |
1108 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1109 | BUG_ON(!pte_none(get_fixmap_pte(vaddr))); | |
1110 | #endif | |
1111 | - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); | |
1112 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1113 | + current->kmap_pte[type] = pte; | |
1114 | +#endif | |
1115 | + set_fixmap_pte(idx, pte); | |
1116 | ||
1117 | return (void *)vaddr; | |
1118 | } | |
1119 | +#if defined CONFIG_PREEMPT_RT_FULL | |
1120 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
1121 | +{ | |
1122 | + int i; | |
1123 | + | |
1124 | + /* | |
1125 | + * Clear @prev's kmap_atomic mappings | |
1126 | + */ | |
1127 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
1128 | + int idx = fixmap_idx(i); | |
1129 | + | |
1130 | + set_fixmap_pte(idx, __pte(0)); | |
1131 | + } | |
1132 | + /* | |
1133 | + * Restore @next_p's kmap_atomic mappings | |
1134 | + */ | |
1135 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
1136 | + int idx = fixmap_idx(i); | |
1137 | + | |
1138 | + if (!pte_none(next_p->kmap_pte[i])) | |
1139 | + set_fixmap_pte(idx, next_p->kmap_pte[i]); | |
1140 | + } | |
1141 | +} | |
1142 | +#endif | |
1143 | diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c | |
1144 | index c2366510187a..6b60f582b738 100644 | |
1145 | --- a/arch/arm/plat-versatile/platsmp.c | |
1146 | +++ b/arch/arm/plat-versatile/platsmp.c | |
1147 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
1148 | sync_cache_w(&pen_release); | |
1149 | } | |
1150 | ||
1151 | -static DEFINE_SPINLOCK(boot_lock); | |
1152 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1153 | ||
1154 | void versatile_secondary_init(unsigned int cpu) | |
1155 | { | |
1156 | @@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu) | |
1157 | /* | |
1158 | * Synchronise with the boot thread. | |
1159 | */ | |
1160 | - spin_lock(&boot_lock); | |
1161 | - spin_unlock(&boot_lock); | |
1162 | + raw_spin_lock(&boot_lock); | |
1163 | + raw_spin_unlock(&boot_lock); | |
1164 | } | |
1165 | ||
1166 | int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1167 | @@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1168 | * Set synchronisation state between this boot processor | |
1169 | * and the secondary one | |
1170 | */ | |
1171 | - spin_lock(&boot_lock); | |
1172 | + raw_spin_lock(&boot_lock); | |
1173 | ||
1174 | /* | |
1175 | * This is really belt and braces; we hold unintended secondary | |
1176 | @@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1177 | * now the secondary core is starting up let it run its | |
1178 | * calibrations, then wait for it to finish | |
1179 | */ | |
1180 | - spin_unlock(&boot_lock); | |
1181 | + raw_spin_unlock(&boot_lock); | |
1182 | ||
1183 | return pen_release != -1 ? -ENOSYS : 0; | |
1184 | } | |
1185 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig | |
33c7bf0f | 1186 | index cf57a7799a0f..78d1b49fbed5 100644 |
1a6e0f06 JK |
1187 | --- a/arch/arm64/Kconfig |
1188 | +++ b/arch/arm64/Kconfig | |
c7c16703 | 1189 | @@ -91,6 +91,7 @@ config ARM64 |
1a6e0f06 JK |
1190 | select HAVE_PERF_EVENTS |
1191 | select HAVE_PERF_REGS | |
1192 | select HAVE_PERF_USER_STACK_DUMP | |
1193 | + select HAVE_PREEMPT_LAZY | |
1194 | select HAVE_REGS_AND_STACK_ACCESS_API | |
1195 | select HAVE_RCU_TABLE_FREE | |
1196 | select HAVE_SYSCALL_TRACEPOINTS | |
33c7bf0f | 1197 | @@ -704,7 +705,7 @@ config XEN_DOM0 |
1a6e0f06 JK |
1198 | |
1199 | config XEN | |
1200 | bool "Xen guest support on ARM64" | |
1201 | - depends on ARM64 && OF | |
1202 | + depends on ARM64 && OF && !PREEMPT_RT_FULL | |
1203 | select SWIOTLB_XEN | |
1204 | select PARAVIRT | |
1205 | help | |
1206 | diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h | |
c7c16703 | 1207 | index e9ea5a6bd449..6c500ad63c6a 100644 |
1a6e0f06 JK |
1208 | --- a/arch/arm64/include/asm/thread_info.h |
1209 | +++ b/arch/arm64/include/asm/thread_info.h | |
1210 | @@ -49,6 +49,7 @@ struct thread_info { | |
1211 | mm_segment_t addr_limit; /* address limit */ | |
1212 | struct task_struct *task; /* main task structure */ | |
1213 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
1214 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
1215 | int cpu; /* cpu */ | |
1216 | }; | |
1217 | ||
c7c16703 | 1218 | @@ -112,6 +113,7 @@ static inline struct thread_info *current_thread_info(void) |
1a6e0f06 JK |
1219 | #define TIF_NEED_RESCHED 1 |
1220 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ | |
1221 | #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ | |
1222 | +#define TIF_NEED_RESCHED_LAZY 4 | |
1223 | #define TIF_NOHZ 7 | |
1224 | #define TIF_SYSCALL_TRACE 8 | |
1225 | #define TIF_SYSCALL_AUDIT 9 | |
c7c16703 | 1226 | @@ -127,6 +129,7 @@ static inline struct thread_info *current_thread_info(void) |
1a6e0f06 JK |
1227 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) |
1228 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
1229 | #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) | |
1230 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
1231 | #define _TIF_NOHZ (1 << TIF_NOHZ) | |
1232 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
1233 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
c7c16703 | 1234 | @@ -135,7 +138,9 @@ static inline struct thread_info *current_thread_info(void) |
1a6e0f06 JK |
1235 | #define _TIF_32BIT (1 << TIF_32BIT) |
1236 | ||
1237 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
1238 | - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) | |
1239 | + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ | |
1240 | + _TIF_NEED_RESCHED_LAZY) | |
c7c16703 | 1241 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) |
1a6e0f06 JK |
1242 | |
1243 | #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1244 | _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ | |
1245 | diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c | |
7c18450a | 1246 | index c58ddf8c4062..a8f2f7c1fe12 100644 |
1a6e0f06 JK |
1247 | --- a/arch/arm64/kernel/asm-offsets.c |
1248 | +++ b/arch/arm64/kernel/asm-offsets.c | |
c7c16703 | 1249 | @@ -38,6 +38,7 @@ int main(void) |
1a6e0f06 JK |
1250 | BLANK(); |
1251 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1252 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1253 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1254 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
1255 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1256 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1257 | diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S | |
1f39f580 | 1258 | index 79b0fe24d5b7..f3c959ade308 100644 |
1a6e0f06 JK |
1259 | --- a/arch/arm64/kernel/entry.S |
1260 | +++ b/arch/arm64/kernel/entry.S | |
c7c16703 | 1261 | @@ -428,11 +428,16 @@ ENDPROC(el1_sync) |
1a6e0f06 JK |
1262 | |
1263 | #ifdef CONFIG_PREEMPT | |
1264 | ldr w24, [tsk, #TI_PREEMPT] // get preempt count | |
1265 | - cbnz w24, 1f // preempt count != 0 | |
1266 | + cbnz w24, 2f // preempt count != 0 | |
1267 | ldr x0, [tsk, #TI_FLAGS] // get flags | |
1268 | - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1269 | - bl el1_preempt | |
1270 | + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1271 | + | |
1272 | + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count | |
1273 | + cbnz w24, 2f // preempt lazy count != 0 | |
1274 | + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? | |
1275 | 1: | |
1276 | + bl el1_preempt | |
1277 | +2: | |
1278 | #endif | |
1279 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1280 | bl trace_hardirqs_on | |
c7c16703 | 1281 | @@ -446,6 +451,7 @@ ENDPROC(el1_irq) |
1a6e0f06 JK |
1282 | 1: bl preempt_schedule_irq // irq en/disable is done inside |
1283 | ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS | |
1284 | tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? | |
1285 | + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? | |
1286 | ret x24 | |
1287 | #endif | |
1288 | ||
c7c16703 JK |
1289 | diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c |
1290 | index 404dd67080b9..639dc6d12e72 100644 | |
1291 | --- a/arch/arm64/kernel/signal.c | |
1292 | +++ b/arch/arm64/kernel/signal.c | |
1293 | @@ -409,7 +409,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, | |
1294 | */ | |
1295 | trace_hardirqs_off(); | |
1296 | do { | |
1297 | - if (thread_flags & _TIF_NEED_RESCHED) { | |
1298 | + if (thread_flags & _TIF_NEED_RESCHED_MASK) { | |
1299 | schedule(); | |
1300 | } else { | |
1301 | local_irq_enable(); | |
1a6e0f06 | 1302 | diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig |
7c18450a | 1303 | index 5a4f2eb9d0d5..867eca2e7210 100644 |
1a6e0f06 JK |
1304 | --- a/arch/mips/Kconfig |
1305 | +++ b/arch/mips/Kconfig | |
7c18450a | 1306 | @@ -2515,7 +2515,7 @@ config MIPS_ASID_BITS_VARIABLE |
1a6e0f06 JK |
1307 | # |
1308 | config HIGHMEM | |
1309 | bool "High Memory Support" | |
1310 | - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA | |
1311 | + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL | |
1312 | ||
1313 | config CPU_SUPPORTS_HIGHMEM | |
1314 | bool | |
1315 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig | |
c7c16703 | 1316 | index 65fba4c34cd7..4b5ba68910e0 100644 |
1a6e0f06 JK |
1317 | --- a/arch/powerpc/Kconfig |
1318 | +++ b/arch/powerpc/Kconfig | |
c7c16703 | 1319 | @@ -52,10 +52,11 @@ config LOCKDEP_SUPPORT |
1a6e0f06 JK |
1320 | |
1321 | config RWSEM_GENERIC_SPINLOCK | |
1322 | bool | |
1323 | + default y if PREEMPT_RT_FULL | |
1324 | ||
1325 | config RWSEM_XCHGADD_ALGORITHM | |
1326 | bool | |
1327 | - default y | |
1328 | + default y if !PREEMPT_RT_FULL | |
1329 | ||
1330 | config GENERIC_LOCKBREAK | |
1331 | bool | |
c7c16703 | 1332 | @@ -134,6 +135,7 @@ config PPC |
1a6e0f06 JK |
1333 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST |
1334 | select GENERIC_STRNCPY_FROM_USER | |
1335 | select GENERIC_STRNLEN_USER | |
1336 | + select HAVE_PREEMPT_LAZY | |
1337 | select HAVE_MOD_ARCH_SPECIFIC | |
1338 | select MODULES_USE_ELF_RELA | |
1339 | select CLONE_BACKWARDS | |
c7c16703 | 1340 | @@ -321,7 +323,7 @@ menu "Kernel options" |
1a6e0f06 JK |
1341 | |
1342 | config HIGHMEM | |
1343 | bool "High memory support" | |
1344 | - depends on PPC32 | |
1345 | + depends on PPC32 && !PREEMPT_RT_FULL | |
1346 | ||
1347 | source kernel/Kconfig.hz | |
1348 | source kernel/Kconfig.preempt | |
1349 | diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h | |
1350 | index 87e4b2d8dcd4..981e501a4359 100644 | |
1351 | --- a/arch/powerpc/include/asm/thread_info.h | |
1352 | +++ b/arch/powerpc/include/asm/thread_info.h | |
1353 | @@ -43,6 +43,8 @@ struct thread_info { | |
1354 | int cpu; /* cpu we're on */ | |
1355 | int preempt_count; /* 0 => preemptable, | |
1356 | <0 => BUG */ | |
1357 | + int preempt_lazy_count; /* 0 => preemptable, | |
1358 | + <0 => BUG */ | |
1359 | unsigned long local_flags; /* private flags for thread */ | |
1360 | #ifdef CONFIG_LIVEPATCH | |
1361 | unsigned long *livepatch_sp; | |
1362 | @@ -88,8 +90,7 @@ static inline struct thread_info *current_thread_info(void) | |
1363 | #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ | |
1364 | #define TIF_SIGPENDING 1 /* signal pending */ | |
1365 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | |
1366 | -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling | |
1367 | - TIF_NEED_RESCHED */ | |
1368 | +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ | |
1369 | #define TIF_32BIT 4 /* 32 bit binary */ | |
1370 | #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ | |
1371 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
1372 | @@ -107,6 +108,8 @@ static inline struct thread_info *current_thread_info(void) | |
1373 | #if defined(CONFIG_PPC64) | |
1374 | #define TIF_ELF2ABI 18 /* function descriptors must die! */ | |
1375 | #endif | |
1376 | +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling | |
1377 | + TIF_NEED_RESCHED */ | |
1378 | ||
1379 | /* as above, but as bit values */ | |
1380 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
1381 | @@ -125,14 +128,16 @@ static inline struct thread_info *current_thread_info(void) | |
1382 | #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) | |
1383 | #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) | |
1384 | #define _TIF_NOHZ (1<<TIF_NOHZ) | |
1385 | +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) | |
1386 | #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1387 | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ | |
1388 | _TIF_NOHZ) | |
1389 | ||
1390 | #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ | |
1391 | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1392 | - _TIF_RESTORE_TM) | |
1393 | + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) | |
1394 | #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) | |
1395 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
1396 | ||
1397 | /* Bits in local_flags */ | |
1398 | /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ | |
1399 | diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c | |
c7c16703 | 1400 | index c833d88c423d..96e9fbc3f684 100644 |
1a6e0f06 JK |
1401 | --- a/arch/powerpc/kernel/asm-offsets.c |
1402 | +++ b/arch/powerpc/kernel/asm-offsets.c | |
1403 | @@ -156,6 +156,7 @@ int main(void) | |
1404 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1405 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); | |
1406 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1407 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1408 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1409 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1410 | ||
1411 | diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S | |
c7c16703 | 1412 | index 3841d749a430..6dbaeff192b9 100644 |
1a6e0f06 JK |
1413 | --- a/arch/powerpc/kernel/entry_32.S |
1414 | +++ b/arch/powerpc/kernel/entry_32.S | |
1415 | @@ -835,7 +835,14 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1416 | cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1417 | bne restore | |
1418 | andi. r8,r8,_TIF_NEED_RESCHED | |
1419 | + bne+ 1f | |
1420 | + lwz r0,TI_PREEMPT_LAZY(r9) | |
1421 | + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1422 | + bne restore | |
1423 | + lwz r0,TI_FLAGS(r9) | |
1424 | + andi. r0,r0,_TIF_NEED_RESCHED_LAZY | |
1425 | beq+ restore | |
1426 | +1: | |
1427 | lwz r3,_MSR(r1) | |
1428 | andi. r0,r3,MSR_EE /* interrupts off? */ | |
1429 | beq restore /* don't schedule if so */ | |
1430 | @@ -846,11 +853,11 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1431 | */ | |
1432 | bl trace_hardirqs_off | |
1433 | #endif | |
1434 | -1: bl preempt_schedule_irq | |
1435 | +2: bl preempt_schedule_irq | |
1436 | CURRENT_THREAD_INFO(r9, r1) | |
1437 | lwz r3,TI_FLAGS(r9) | |
1438 | - andi. r0,r3,_TIF_NEED_RESCHED | |
1439 | - bne- 1b | |
1440 | + andi. r0,r3,_TIF_NEED_RESCHED_MASK | |
1441 | + bne- 2b | |
1442 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1443 | /* And now, to properly rebalance the above, we tell lockdep they | |
1444 | * are being turned back on, which will happen when we return | |
1445 | @@ -1171,7 +1178,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) | |
1446 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ | |
1447 | ||
1448 | do_work: /* r10 contains MSR_KERNEL here */ | |
1449 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1450 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1451 | beq do_user_signal | |
1452 | ||
1453 | do_resched: /* r10 contains MSR_KERNEL here */ | |
1454 | @@ -1192,7 +1199,7 @@ do_resched: /* r10 contains MSR_KERNEL here */ | |
1455 | MTMSRD(r10) /* disable interrupts */ | |
1456 | CURRENT_THREAD_INFO(r9, r1) | |
1457 | lwz r9,TI_FLAGS(r9) | |
1458 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1459 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1460 | bne- do_resched | |
1461 | andi. r0,r9,_TIF_USER_WORK_MASK | |
1462 | beq restore_user | |
1463 | diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S | |
7c18450a | 1464 | index 767ef6d68c9e..2cb4d5552319 100644 |
1a6e0f06 JK |
1465 | --- a/arch/powerpc/kernel/entry_64.S |
1466 | +++ b/arch/powerpc/kernel/entry_64.S | |
c7c16703 | 1467 | @@ -656,7 +656,7 @@ _GLOBAL(ret_from_except_lite) |
1a6e0f06 JK |
1468 | bl restore_math |
1469 | b restore | |
1470 | #endif | |
1471 | -1: andi. r0,r4,_TIF_NEED_RESCHED | |
1472 | +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1473 | beq 2f | |
1474 | bl restore_interrupts | |
1475 | SCHEDULE_USER | |
c7c16703 | 1476 | @@ -718,10 +718,18 @@ _GLOBAL(ret_from_except_lite) |
1a6e0f06 JK |
1477 | |
1478 | #ifdef CONFIG_PREEMPT | |
1479 | /* Check if we need to preempt */ | |
1480 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1481 | - beq+ restore | |
1482 | - /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1483 | lwz r8,TI_PREEMPT(r9) | |
1484 | + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ | |
1485 | + bne restore | |
1486 | + andi. r0,r4,_TIF_NEED_RESCHED | |
1487 | + bne+ check_count | |
1488 | + | |
1489 | + andi. r0,r4,_TIF_NEED_RESCHED_LAZY | |
1490 | + beq+ restore | |
1491 | + lwz r8,TI_PREEMPT_LAZY(r9) | |
1492 | + | |
1493 | + /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1494 | +check_count: | |
1495 | cmpwi cr1,r8,0 | |
1496 | ld r0,SOFTE(r1) | |
1497 | cmpdi r0,0 | |
c7c16703 | 1498 | @@ -738,7 +746,7 @@ _GLOBAL(ret_from_except_lite) |
1a6e0f06 JK |
1499 | /* Re-test flags and eventually loop */ |
1500 | CURRENT_THREAD_INFO(r9, r1) | |
1501 | ld r4,TI_FLAGS(r9) | |
1502 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1503 | + andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1504 | bne 1b | |
1505 | ||
1506 | /* | |
1507 | diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c | |
c7c16703 | 1508 | index 3c05c311e35e..f83f6ac1274d 100644 |
1a6e0f06 JK |
1509 | --- a/arch/powerpc/kernel/irq.c |
1510 | +++ b/arch/powerpc/kernel/irq.c | |
c7c16703 | 1511 | @@ -638,6 +638,7 @@ void irq_ctx_init(void) |
1a6e0f06 JK |
1512 | } |
1513 | } | |
1514 | ||
1515 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1516 | void do_softirq_own_stack(void) | |
1517 | { | |
1518 | struct thread_info *curtp, *irqtp; | |
c7c16703 | 1519 | @@ -655,6 +656,7 @@ void do_softirq_own_stack(void) |
1a6e0f06 JK |
1520 | if (irqtp->flags) |
1521 | set_bits(irqtp->flags, &curtp->flags); | |
1522 | } | |
1523 | +#endif | |
1524 | ||
1525 | irq_hw_number_t virq_to_hw(unsigned int virq) | |
1526 | { | |
1527 | diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S | |
c7c16703 | 1528 | index 030d72df5dd5..b471a709e100 100644 |
1a6e0f06 JK |
1529 | --- a/arch/powerpc/kernel/misc_32.S |
1530 | +++ b/arch/powerpc/kernel/misc_32.S | |
c7c16703 | 1531 | @@ -41,6 +41,7 @@ |
1a6e0f06 JK |
1532 | * We store the saved ksp_limit in the unused part |
1533 | * of the STACK_FRAME_OVERHEAD | |
1534 | */ | |
1535 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1536 | _GLOBAL(call_do_softirq) | |
1537 | mflr r0 | |
1538 | stw r0,4(r1) | |
c7c16703 | 1539 | @@ -57,6 +58,7 @@ _GLOBAL(call_do_softirq) |
1a6e0f06 JK |
1540 | stw r10,THREAD+KSP_LIMIT(r2) |
1541 | mtlr r0 | |
1542 | blr | |
1543 | +#endif | |
1544 | ||
1545 | /* | |
1546 | * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); | |
1547 | diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S | |
7c18450a | 1548 | index 4cefe6888b18..cb2ee4be999a 100644 |
1a6e0f06 JK |
1549 | --- a/arch/powerpc/kernel/misc_64.S |
1550 | +++ b/arch/powerpc/kernel/misc_64.S | |
c7c16703 | 1551 | @@ -31,6 +31,7 @@ |
1a6e0f06 JK |
1552 | |
1553 | .text | |
1554 | ||
1555 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1556 | _GLOBAL(call_do_softirq) | |
1557 | mflr r0 | |
1558 | std r0,16(r1) | |
c7c16703 | 1559 | @@ -41,6 +42,7 @@ _GLOBAL(call_do_softirq) |
1a6e0f06 JK |
1560 | ld r0,16(r1) |
1561 | mtlr r0 | |
1562 | blr | |
1563 | +#endif | |
1564 | ||
1565 | _GLOBAL(call_do_irq) | |
1566 | mflr r0 | |
1567 | diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig | |
c7c16703 | 1568 | index 029be26b5a17..9528089ea142 100644 |
1a6e0f06 JK |
1569 | --- a/arch/powerpc/kvm/Kconfig |
1570 | +++ b/arch/powerpc/kvm/Kconfig | |
c7c16703 | 1571 | @@ -175,6 +175,7 @@ config KVM_E500MC |
1a6e0f06 JK |
1572 | config KVM_MPIC |
1573 | bool "KVM in-kernel MPIC emulation" | |
1574 | depends on KVM && E500 | |
1575 | + depends on !PREEMPT_RT_FULL | |
1576 | select HAVE_KVM_IRQCHIP | |
1577 | select HAVE_KVM_IRQFD | |
1578 | select HAVE_KVM_IRQ_ROUTING | |
1579 | diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c | |
c7c16703 | 1580 | index e48462447ff0..2670cee66064 100644 |
1a6e0f06 JK |
1581 | --- a/arch/powerpc/platforms/ps3/device-init.c |
1582 | +++ b/arch/powerpc/platforms/ps3/device-init.c | |
1583 | @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, | |
1584 | } | |
1585 | pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); | |
1586 | ||
1587 | - res = wait_event_interruptible(dev->done.wait, | |
1588 | + res = swait_event_interruptible(dev->done.wait, | |
1589 | dev->done.done || kthread_should_stop()); | |
1590 | if (kthread_should_stop()) | |
1591 | res = -EINTR; | |
1592 | diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c | |
1593 | index 6c0378c0b8b5..abd58b4dff97 100644 | |
1594 | --- a/arch/sh/kernel/irq.c | |
1595 | +++ b/arch/sh/kernel/irq.c | |
1596 | @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) | |
1597 | hardirq_ctx[cpu] = NULL; | |
1598 | } | |
1599 | ||
1600 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1601 | void do_softirq_own_stack(void) | |
1602 | { | |
1603 | struct thread_info *curctx; | |
1604 | @@ -174,6 +175,7 @@ void do_softirq_own_stack(void) | |
1605 | "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" | |
1606 | ); | |
1607 | } | |
1608 | +#endif | |
1609 | #else | |
1610 | static inline void handle_one_irq(unsigned int irq) | |
1611 | { | |
1612 | diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig | |
c7c16703 | 1613 | index 165ecdd24d22..b68a464a22be 100644 |
1a6e0f06 JK |
1614 | --- a/arch/sparc/Kconfig |
1615 | +++ b/arch/sparc/Kconfig | |
c7c16703 | 1616 | @@ -194,12 +194,10 @@ config NR_CPUS |
1a6e0f06 JK |
1617 | source kernel/Kconfig.hz |
1618 | ||
1619 | config RWSEM_GENERIC_SPINLOCK | |
1620 | - bool | |
1621 | - default y if SPARC32 | |
1622 | + def_bool PREEMPT_RT_FULL | |
1623 | ||
1624 | config RWSEM_XCHGADD_ALGORITHM | |
1625 | - bool | |
1626 | - default y if SPARC64 | |
1627 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1628 | ||
1629 | config GENERIC_HWEIGHT | |
1630 | bool | |
1631 | diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c | |
1632 | index 34a7930b76ef..773740521008 100644 | |
1633 | --- a/arch/sparc/kernel/irq_64.c | |
1634 | +++ b/arch/sparc/kernel/irq_64.c | |
1635 | @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) | |
1636 | set_irq_regs(old_regs); | |
1637 | } | |
1638 | ||
1639 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1640 | void do_softirq_own_stack(void) | |
1641 | { | |
1642 | void *orig_sp, *sp = softirq_stack[smp_processor_id()]; | |
1643 | @@ -868,6 +869,7 @@ void do_softirq_own_stack(void) | |
1644 | __asm__ __volatile__("mov %0, %%sp" | |
1645 | : : "r" (orig_sp)); | |
1646 | } | |
1647 | +#endif | |
1648 | ||
1649 | #ifdef CONFIG_HOTPLUG_CPU | |
1650 | void fixup_irqs(void) | |
1651 | diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig | |
c7c16703 | 1652 | index bada636d1065..f8a995c90c01 100644 |
1a6e0f06 JK |
1653 | --- a/arch/x86/Kconfig |
1654 | +++ b/arch/x86/Kconfig | |
1655 | @@ -17,6 +17,7 @@ config X86_64 | |
1656 | ### Arch settings | |
1657 | config X86 | |
1658 | def_bool y | |
1659 | + select HAVE_PREEMPT_LAZY | |
1660 | select ACPI_LEGACY_TABLES_LOOKUP if ACPI | |
1661 | select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI | |
1662 | select ANON_INODES | |
c7c16703 | 1663 | @@ -232,8 +233,11 @@ config ARCH_MAY_HAVE_PC_FDC |
1a6e0f06 JK |
1664 | def_bool y |
1665 | depends on ISA_DMA_API | |
1666 | ||
1667 | +config RWSEM_GENERIC_SPINLOCK | |
1668 | + def_bool PREEMPT_RT_FULL | |
1669 | + | |
1670 | config RWSEM_XCHGADD_ALGORITHM | |
1671 | - def_bool y | |
1672 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1673 | ||
1674 | config GENERIC_CALIBRATE_DELAY | |
1675 | def_bool y | |
c7c16703 | 1676 | @@ -897,7 +901,7 @@ config IOMMU_HELPER |
1a6e0f06 JK |
1677 | config MAXSMP |
1678 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" | |
1679 | depends on X86_64 && SMP && DEBUG_KERNEL | |
1680 | - select CPUMASK_OFFSTACK | |
1681 | + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL | |
1682 | ---help--- | |
1683 | Enable maximum number of CPUS and NUMA Nodes for this architecture. | |
1684 | If unsure, say N. | |
1685 | diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c | |
c7c16703 | 1686 | index aa8b0672f87a..2429414bfc71 100644 |
1a6e0f06 JK |
1687 | --- a/arch/x86/crypto/aesni-intel_glue.c |
1688 | +++ b/arch/x86/crypto/aesni-intel_glue.c | |
1689 | @@ -372,14 +372,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |
1690 | err = blkcipher_walk_virt(desc, &walk); | |
1691 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1692 | ||
1693 | - kernel_fpu_begin(); | |
1694 | while ((nbytes = walk.nbytes)) { | |
1695 | + kernel_fpu_begin(); | |
1696 | aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1697 | - nbytes & AES_BLOCK_MASK); | |
1698 | + nbytes & AES_BLOCK_MASK); | |
1699 | + kernel_fpu_end(); | |
1700 | nbytes &= AES_BLOCK_SIZE - 1; | |
1701 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1702 | } | |
1703 | - kernel_fpu_end(); | |
1704 | ||
1705 | return err; | |
1706 | } | |
1707 | @@ -396,14 +396,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |
1708 | err = blkcipher_walk_virt(desc, &walk); | |
1709 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1710 | ||
1711 | - kernel_fpu_begin(); | |
1712 | while ((nbytes = walk.nbytes)) { | |
1713 | + kernel_fpu_begin(); | |
1714 | aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1715 | nbytes & AES_BLOCK_MASK); | |
1716 | + kernel_fpu_end(); | |
1717 | nbytes &= AES_BLOCK_SIZE - 1; | |
1718 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1719 | } | |
1720 | - kernel_fpu_end(); | |
1721 | ||
1722 | return err; | |
1723 | } | |
1724 | @@ -420,14 +420,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |
1725 | err = blkcipher_walk_virt(desc, &walk); | |
1726 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1727 | ||
1728 | - kernel_fpu_begin(); | |
1729 | while ((nbytes = walk.nbytes)) { | |
1730 | + kernel_fpu_begin(); | |
1731 | aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1732 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1733 | + kernel_fpu_end(); | |
1734 | nbytes &= AES_BLOCK_SIZE - 1; | |
1735 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1736 | } | |
1737 | - kernel_fpu_end(); | |
1738 | ||
1739 | return err; | |
1740 | } | |
1741 | @@ -444,14 +444,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |
1742 | err = blkcipher_walk_virt(desc, &walk); | |
1743 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1744 | ||
1745 | - kernel_fpu_begin(); | |
1746 | while ((nbytes = walk.nbytes)) { | |
1747 | + kernel_fpu_begin(); | |
1748 | aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1749 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1750 | + kernel_fpu_end(); | |
1751 | nbytes &= AES_BLOCK_SIZE - 1; | |
1752 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1753 | } | |
1754 | - kernel_fpu_end(); | |
1755 | ||
1756 | return err; | |
1757 | } | |
1758 | @@ -503,18 +503,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |
1759 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | |
1760 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1761 | ||
1762 | - kernel_fpu_begin(); | |
1763 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | |
1764 | + kernel_fpu_begin(); | |
1765 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1766 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1767 | + kernel_fpu_end(); | |
1768 | nbytes &= AES_BLOCK_SIZE - 1; | |
1769 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1770 | } | |
1771 | if (walk.nbytes) { | |
1772 | + kernel_fpu_begin(); | |
1773 | ctr_crypt_final(ctx, &walk); | |
1774 | + kernel_fpu_end(); | |
1775 | err = blkcipher_walk_done(desc, &walk, 0); | |
1776 | } | |
1777 | - kernel_fpu_end(); | |
1778 | ||
1779 | return err; | |
1780 | } | |
1781 | diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c | |
1782 | index 8648158f3916..d7699130ee36 100644 | |
1783 | --- a/arch/x86/crypto/cast5_avx_glue.c | |
1784 | +++ b/arch/x86/crypto/cast5_avx_glue.c | |
1785 | @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) | |
1786 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1787 | bool enc) | |
1788 | { | |
1789 | - bool fpu_enabled = false; | |
1790 | + bool fpu_enabled; | |
1791 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1792 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
1793 | unsigned int nbytes; | |
1794 | @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1795 | u8 *wsrc = walk->src.virt.addr; | |
1796 | u8 *wdst = walk->dst.virt.addr; | |
1797 | ||
1798 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1799 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1800 | ||
1801 | /* Process multi-block batch */ | |
1802 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
1803 | @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1804 | } while (nbytes >= bsize); | |
1805 | ||
1806 | done: | |
1807 | + cast5_fpu_end(fpu_enabled); | |
1808 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1809 | } | |
1810 | - | |
1811 | - cast5_fpu_end(fpu_enabled); | |
1812 | return err; | |
1813 | } | |
1814 | ||
1815 | @@ -227,7 +226,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |
1816 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1817 | struct scatterlist *src, unsigned int nbytes) | |
1818 | { | |
1819 | - bool fpu_enabled = false; | |
1820 | + bool fpu_enabled; | |
1821 | struct blkcipher_walk walk; | |
1822 | int err; | |
1823 | ||
1824 | @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1825 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1826 | ||
1827 | while ((nbytes = walk.nbytes)) { | |
1828 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1829 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1830 | nbytes = __cbc_decrypt(desc, &walk); | |
1831 | + cast5_fpu_end(fpu_enabled); | |
1832 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1833 | } | |
1834 | - | |
1835 | - cast5_fpu_end(fpu_enabled); | |
1836 | return err; | |
1837 | } | |
1838 | ||
1839 | @@ -311,7 +309,7 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | |
1840 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1841 | struct scatterlist *src, unsigned int nbytes) | |
1842 | { | |
1843 | - bool fpu_enabled = false; | |
1844 | + bool fpu_enabled; | |
1845 | struct blkcipher_walk walk; | |
1846 | int err; | |
1847 | ||
1848 | @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1849 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1850 | ||
1851 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
1852 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1853 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1854 | nbytes = __ctr_crypt(desc, &walk); | |
1855 | + cast5_fpu_end(fpu_enabled); | |
1856 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1857 | } | |
1858 | ||
1859 | - cast5_fpu_end(fpu_enabled); | |
1860 | - | |
1861 | if (walk.nbytes) { | |
1862 | ctr_crypt_final(desc, &walk); | |
1863 | err = blkcipher_walk_done(desc, &walk, 0); | |
1864 | diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c | |
1865 | index 6a85598931b5..3a506ce7ed93 100644 | |
1866 | --- a/arch/x86/crypto/glue_helper.c | |
1867 | +++ b/arch/x86/crypto/glue_helper.c | |
1868 | @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1869 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1870 | const unsigned int bsize = 128 / 8; | |
1871 | unsigned int nbytes, i, func_bytes; | |
1872 | - bool fpu_enabled = false; | |
1873 | + bool fpu_enabled; | |
1874 | int err; | |
1875 | ||
1876 | err = blkcipher_walk_virt(desc, walk); | |
1877 | @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1878 | u8 *wdst = walk->dst.virt.addr; | |
1879 | ||
1880 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1881 | - desc, fpu_enabled, nbytes); | |
1882 | + desc, false, nbytes); | |
1883 | ||
1884 | for (i = 0; i < gctx->num_funcs; i++) { | |
1885 | func_bytes = bsize * gctx->funcs[i].num_blocks; | |
1886 | @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1887 | } | |
1888 | ||
1889 | done: | |
1890 | + glue_fpu_end(fpu_enabled); | |
1891 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1892 | } | |
1893 | ||
1894 | - glue_fpu_end(fpu_enabled); | |
1895 | return err; | |
1896 | } | |
1897 | ||
1898 | @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1899 | struct scatterlist *src, unsigned int nbytes) | |
1900 | { | |
1901 | const unsigned int bsize = 128 / 8; | |
1902 | - bool fpu_enabled = false; | |
1903 | + bool fpu_enabled; | |
1904 | struct blkcipher_walk walk; | |
1905 | int err; | |
1906 | ||
1907 | @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1908 | ||
1909 | while ((nbytes = walk.nbytes)) { | |
1910 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1911 | - desc, fpu_enabled, nbytes); | |
1912 | + desc, false, nbytes); | |
1913 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | |
1914 | + glue_fpu_end(fpu_enabled); | |
1915 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1916 | } | |
1917 | ||
1918 | - glue_fpu_end(fpu_enabled); | |
1919 | return err; | |
1920 | } | |
1921 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | |
1922 | @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1923 | struct scatterlist *src, unsigned int nbytes) | |
1924 | { | |
1925 | const unsigned int bsize = 128 / 8; | |
1926 | - bool fpu_enabled = false; | |
1927 | + bool fpu_enabled; | |
1928 | struct blkcipher_walk walk; | |
1929 | int err; | |
1930 | ||
1931 | @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1932 | ||
1933 | while ((nbytes = walk.nbytes) >= bsize) { | |
1934 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1935 | - desc, fpu_enabled, nbytes); | |
1936 | + desc, false, nbytes); | |
1937 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | |
1938 | + glue_fpu_end(fpu_enabled); | |
1939 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1940 | } | |
1941 | ||
1942 | - glue_fpu_end(fpu_enabled); | |
1943 | - | |
1944 | if (walk.nbytes) { | |
1945 | glue_ctr_crypt_final_128bit( | |
1946 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | |
1947 | @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1948 | void *tweak_ctx, void *crypt_ctx) | |
1949 | { | |
1950 | const unsigned int bsize = 128 / 8; | |
1951 | - bool fpu_enabled = false; | |
1952 | + bool fpu_enabled; | |
1953 | struct blkcipher_walk walk; | |
1954 | int err; | |
1955 | ||
1956 | @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1957 | ||
1958 | /* set minimum length to bsize, for tweak_fn */ | |
1959 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1960 | - desc, fpu_enabled, | |
1961 | + desc, false, | |
1962 | nbytes < bsize ? bsize : nbytes); | |
1963 | - | |
1964 | /* calculate first value of T */ | |
1965 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | |
1966 | + glue_fpu_end(fpu_enabled); | |
1967 | ||
1968 | while (nbytes) { | |
1969 | + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1970 | + desc, false, nbytes); | |
1971 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | |
1972 | ||
1973 | + glue_fpu_end(fpu_enabled); | |
1974 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1975 | nbytes = walk.nbytes; | |
1976 | } | |
1977 | - | |
1978 | - glue_fpu_end(fpu_enabled); | |
1979 | - | |
1980 | return err; | |
1981 | } | |
1982 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | |
1983 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | |
c7c16703 | 1984 | index bdd9cc59d20f..56d01a339ba4 100644 |
1a6e0f06 JK |
1985 | --- a/arch/x86/entry/common.c |
1986 | +++ b/arch/x86/entry/common.c | |
c7c16703 | 1987 | @@ -129,7 +129,7 @@ static long syscall_trace_enter(struct pt_regs *regs) |
1a6e0f06 JK |
1988 | |
1989 | #define EXIT_TO_USERMODE_LOOP_FLAGS \ | |
1990 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1991 | - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) | |
1992 | + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) | |
1993 | ||
1994 | static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
1995 | { | |
c7c16703 | 1996 | @@ -145,9 +145,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) |
1a6e0f06 JK |
1997 | /* We have work to do. */ |
1998 | local_irq_enable(); | |
1999 | ||
2000 | - if (cached_flags & _TIF_NEED_RESCHED) | |
2001 | + if (cached_flags & _TIF_NEED_RESCHED_MASK) | |
2002 | schedule(); | |
2003 | ||
2004 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
2005 | + if (unlikely(current->forced_info.si_signo)) { | |
2006 | + struct task_struct *t = current; | |
2007 | + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); | |
2008 | + t->forced_info.si_signo = 0; | |
2009 | + } | |
2010 | +#endif | |
2011 | if (cached_flags & _TIF_UPROBE) | |
2012 | uprobe_notify_resume(regs); | |
2013 | ||
2014 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | |
c7c16703 | 2015 | index edba8606b99a..4a3389535fc6 100644 |
1a6e0f06 JK |
2016 | --- a/arch/x86/entry/entry_32.S |
2017 | +++ b/arch/x86/entry/entry_32.S | |
c7c16703 | 2018 | @@ -308,8 +308,25 @@ END(ret_from_exception) |
1a6e0f06 JK |
2019 | ENTRY(resume_kernel) |
2020 | DISABLE_INTERRUPTS(CLBR_ANY) | |
2021 | need_resched: | |
2022 | + # preempt count == 0 + NEED_RS set? | |
2023 | cmpl $0, PER_CPU_VAR(__preempt_count) | |
2024 | +#ifndef CONFIG_PREEMPT_LAZY | |
2025 | jnz restore_all | |
2026 | +#else | |
2027 | + jz test_int_off | |
2028 | + | |
2029 | + # atleast preempt count == 0 ? | |
2030 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2031 | + jne restore_all | |
2032 | + | |
c7c16703 JK |
2033 | + movl PER_CPU_VAR(current_task), %ebp |
2034 | + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? | |
1a6e0f06 JK |
2035 | + jnz restore_all |
2036 | + | |
c7c16703 | 2037 | + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) |
1a6e0f06 JK |
2038 | + jz restore_all |
2039 | +test_int_off: | |
2040 | +#endif | |
2041 | testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? | |
2042 | jz restore_all | |
2043 | call preempt_schedule_irq | |
2044 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | |
c7c16703 | 2045 | index ef766a358b37..28401f826ab1 100644 |
1a6e0f06 JK |
2046 | --- a/arch/x86/entry/entry_64.S |
2047 | +++ b/arch/x86/entry/entry_64.S | |
c7c16703 | 2048 | @@ -546,7 +546,23 @@ GLOBAL(retint_user) |
1a6e0f06 JK |
2049 | bt $9, EFLAGS(%rsp) /* were interrupts off? */ |
2050 | jnc 1f | |
2051 | 0: cmpl $0, PER_CPU_VAR(__preempt_count) | |
2052 | +#ifndef CONFIG_PREEMPT_LAZY | |
2053 | jnz 1f | |
2054 | +#else | |
2055 | + jz do_preempt_schedule_irq | |
2056 | + | |
2057 | + # atleast preempt count == 0 ? | |
2058 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2059 | + jnz 1f | |
2060 | + | |
c7c16703 JK |
2061 | + movq PER_CPU_VAR(current_task), %rcx |
2062 | + cmpl $0, TASK_TI_preempt_lazy_count(%rcx) | |
1a6e0f06 JK |
2063 | + jnz 1f |
2064 | + | |
c7c16703 | 2065 | + bt $TIF_NEED_RESCHED_LAZY,TASK_TI_flags(%rcx) |
1a6e0f06 JK |
2066 | + jnc 1f |
2067 | +do_preempt_schedule_irq: | |
2068 | +#endif | |
2069 | call preempt_schedule_irq | |
2070 | jmp 0b | |
2071 | 1: | |
c7c16703 | 2072 | @@ -894,6 +910,7 @@ EXPORT_SYMBOL(native_load_gs_index) |
1a6e0f06 JK |
2073 | jmp 2b |
2074 | .previous | |
2075 | ||
2076 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2077 | /* Call softirq on interrupt stack. Interrupts are off. */ | |
2078 | ENTRY(do_softirq_own_stack) | |
2079 | pushq %rbp | |
c7c16703 | 2080 | @@ -906,6 +923,7 @@ ENTRY(do_softirq_own_stack) |
1a6e0f06 JK |
2081 | decl PER_CPU_VAR(irq_count) |
2082 | ret | |
2083 | END(do_softirq_own_stack) | |
2084 | +#endif | |
2085 | ||
2086 | #ifdef CONFIG_XEN | |
2087 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 | |
2088 | diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h | |
2089 | index 17f218645701..11bd1b7ee6eb 100644 | |
2090 | --- a/arch/x86/include/asm/preempt.h | |
2091 | +++ b/arch/x86/include/asm/preempt.h | |
2092 | @@ -79,17 +79,46 @@ static __always_inline void __preempt_count_sub(int val) | |
2093 | * a decrement which hits zero means we have no preempt_count and should | |
2094 | * reschedule. | |
2095 | */ | |
2096 | -static __always_inline bool __preempt_count_dec_and_test(void) | |
2097 | +static __always_inline bool ____preempt_count_dec_and_test(void) | |
2098 | { | |
2099 | GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); | |
2100 | } | |
2101 | ||
2102 | +static __always_inline bool __preempt_count_dec_and_test(void) | |
2103 | +{ | |
2104 | + if (____preempt_count_dec_and_test()) | |
2105 | + return true; | |
2106 | +#ifdef CONFIG_PREEMPT_LAZY | |
2107 | + if (current_thread_info()->preempt_lazy_count) | |
2108 | + return false; | |
2109 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2110 | +#else | |
2111 | + return false; | |
2112 | +#endif | |
2113 | +} | |
2114 | + | |
2115 | /* | |
2116 | * Returns true when we need to resched and can (barring IRQ state). | |
2117 | */ | |
2118 | static __always_inline bool should_resched(int preempt_offset) | |
2119 | { | |
2120 | +#ifdef CONFIG_PREEMPT_LAZY | |
2121 | + u32 tmp; | |
2122 | + | |
2123 | + tmp = raw_cpu_read_4(__preempt_count); | |
2124 | + if (tmp == preempt_offset) | |
2125 | + return true; | |
2126 | + | |
2127 | + /* preempt count == 0 ? */ | |
2128 | + tmp &= ~PREEMPT_NEED_RESCHED; | |
2129 | + if (tmp) | |
2130 | + return false; | |
2131 | + if (current_thread_info()->preempt_lazy_count) | |
2132 | + return false; | |
2133 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2134 | +#else | |
2135 | return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); | |
2136 | +#endif | |
2137 | } | |
2138 | ||
2139 | #ifdef CONFIG_PREEMPT | |
2140 | diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h | |
c7c16703 | 2141 | index 8af22be0fe61..d1328789b759 100644 |
1a6e0f06 JK |
2142 | --- a/arch/x86/include/asm/signal.h |
2143 | +++ b/arch/x86/include/asm/signal.h | |
c7c16703 JK |
2144 | @@ -27,6 +27,19 @@ typedef struct { |
2145 | #define SA_IA32_ABI 0x02000000u | |
2146 | #define SA_X32_ABI 0x01000000u | |
1a6e0f06 JK |
2147 | |
2148 | +/* | |
2149 | + * Because some traps use the IST stack, we must keep preemption | |
2150 | + * disabled while calling do_trap(), but do_trap() may call | |
2151 | + * force_sig_info() which will grab the signal spin_locks for the | |
2152 | + * task, which in PREEMPT_RT_FULL are mutexes. By defining | |
2153 | + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set | |
2154 | + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the | |
2155 | + * trap. | |
2156 | + */ | |
2157 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
2158 | +#define ARCH_RT_DELAYS_SIGNAL_SEND | |
2159 | +#endif | |
2160 | + | |
2161 | #ifndef CONFIG_COMPAT | |
2162 | typedef sigset_t compat_sigset_t; | |
2163 | #endif | |
2164 | diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h | |
2165 | index 58505f01962f..02fa39652cd6 100644 | |
2166 | --- a/arch/x86/include/asm/stackprotector.h | |
2167 | +++ b/arch/x86/include/asm/stackprotector.h | |
2168 | @@ -59,7 +59,7 @@ | |
2169 | */ | |
2170 | static __always_inline void boot_init_stack_canary(void) | |
2171 | { | |
2172 | - u64 canary; | |
2173 | + u64 uninitialized_var(canary); | |
2174 | u64 tsc; | |
2175 | ||
2176 | #ifdef CONFIG_X86_64 | |
2177 | @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void) | |
2178 | * of randomness. The TSC only matters for very early init, | |
2179 | * there it already has some randomness on most systems. Later | |
2180 | * on during the bootup the random pool has true entropy too. | |
2181 | + * | |
2182 | + * For preempt-rt we need to weaken the randomness a bit, as | |
2183 | + * we can't call into the random generator from atomic context | |
2184 | + * due to locking constraints. We just leave canary | |
2185 | + * uninitialized and use the TSC based randomness on top of it. | |
2186 | */ | |
2187 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2188 | get_random_bytes(&canary, sizeof(canary)); | |
2189 | +#endif | |
2190 | tsc = rdtsc(); | |
2191 | canary += tsc + (tsc << 32UL); | |
2192 | ||
2193 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | |
c7c16703 | 2194 | index ad6f5eb07a95..5ceb3a1c2b1a 100644 |
1a6e0f06 JK |
2195 | --- a/arch/x86/include/asm/thread_info.h |
2196 | +++ b/arch/x86/include/asm/thread_info.h | |
c7c16703 JK |
2197 | @@ -54,11 +54,14 @@ struct task_struct; |
2198 | ||
2199 | struct thread_info { | |
2200 | unsigned long flags; /* low level flags */ | |
2201 | + int preempt_lazy_count; /* 0 => lazy preemptable | |
1a6e0f06 JK |
2202 | + <0 => BUG */ |
2203 | }; | |
2204 | ||
2205 | #define INIT_THREAD_INFO(tsk) \ | |
c7c16703 JK |
2206 | { \ |
2207 | .flags = 0, \ | |
2208 | + .preempt_lazy_count = 0, \ | |
2209 | } | |
2210 | ||
2211 | #define init_stack (init_thread_union.stack) | |
2212 | @@ -67,6 +70,10 @@ struct thread_info { | |
1a6e0f06 JK |
2213 | |
2214 | #include <asm/asm-offsets.h> | |
2215 | ||
2216 | +#define GET_THREAD_INFO(reg) \ | |
2217 | + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ | |
2218 | + _ASM_SUB $(THREAD_SIZE),reg ; | |
2219 | + | |
2220 | #endif | |
2221 | ||
2222 | /* | |
c7c16703 | 2223 | @@ -85,6 +92,7 @@ struct thread_info { |
1a6e0f06 JK |
2224 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ |
2225 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
2226 | #define TIF_SECCOMP 8 /* secure computing */ | |
2227 | +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ | |
2228 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | |
2229 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | |
2230 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | |
c7c16703 | 2231 | @@ -108,6 +116,7 @@ struct thread_info { |
1a6e0f06 JK |
2232 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) |
2233 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
2234 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | |
2235 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
2236 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | |
2237 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
2238 | #define _TIF_NOTSC (1 << TIF_NOTSC) | |
c7c16703 | 2239 | @@ -143,6 +152,8 @@ struct thread_info { |
1a6e0f06 JK |
2240 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
2241 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | |
2242 | ||
2243 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
2244 | + | |
2245 | #define STACK_WARN (THREAD_SIZE/8) | |
2246 | ||
2247 | /* | |
2248 | diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h | |
c7c16703 | 2249 | index 57ab86d94d64..35d25e27180f 100644 |
1a6e0f06 JK |
2250 | --- a/arch/x86/include/asm/uv/uv_bau.h |
2251 | +++ b/arch/x86/include/asm/uv/uv_bau.h | |
c7c16703 | 2252 | @@ -624,9 +624,9 @@ struct bau_control { |
1a6e0f06 JK |
2253 | cycles_t send_message; |
2254 | cycles_t period_end; | |
2255 | cycles_t period_time; | |
2256 | - spinlock_t uvhub_lock; | |
2257 | - spinlock_t queue_lock; | |
2258 | - spinlock_t disable_lock; | |
2259 | + raw_spinlock_t uvhub_lock; | |
2260 | + raw_spinlock_t queue_lock; | |
2261 | + raw_spinlock_t disable_lock; | |
2262 | /* tunables */ | |
2263 | int max_concurr; | |
2264 | int max_concurr_const; | |
c7c16703 | 2265 | @@ -815,15 +815,15 @@ static inline int atom_asr(short i, struct atomic_short *v) |
1a6e0f06 JK |
2266 | * to be lowered below the current 'v'. atomic_add_unless can only stop |
2267 | * on equal. | |
2268 | */ | |
2269 | -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |
2270 | +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) | |
2271 | { | |
2272 | - spin_lock(lock); | |
2273 | + raw_spin_lock(lock); | |
2274 | if (atomic_read(v) >= u) { | |
2275 | - spin_unlock(lock); | |
2276 | + raw_spin_unlock(lock); | |
2277 | return 0; | |
2278 | } | |
2279 | atomic_inc(v); | |
2280 | - spin_unlock(lock); | |
2281 | + raw_spin_unlock(lock); | |
2282 | return 1; | |
2283 | } | |
2284 | ||
2285 | diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c | |
c7c16703 | 2286 | index 931ced8ca345..167975ac8af7 100644 |
1a6e0f06 JK |
2287 | --- a/arch/x86/kernel/acpi/boot.c |
2288 | +++ b/arch/x86/kernel/acpi/boot.c | |
2289 | @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |
2290 | * ->ioapic_mutex | |
2291 | * ->ioapic_lock | |
2292 | */ | |
2293 | +#ifdef CONFIG_X86_IO_APIC | |
2294 | static DEFINE_MUTEX(acpi_ioapic_lock); | |
2295 | +#endif | |
2296 | ||
2297 | /* -------------------------------------------------------------------------- | |
2298 | Boot-time Configuration | |
2299 | diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c | |
5c015b7c | 2300 | index d1e25564b3c1..67e585fa801f 100644 |
1a6e0f06 JK |
2301 | --- a/arch/x86/kernel/apic/io_apic.c |
2302 | +++ b/arch/x86/kernel/apic/io_apic.c | |
2303 | @@ -1712,7 +1712,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) | |
2304 | static inline bool ioapic_irqd_mask(struct irq_data *data) | |
2305 | { | |
2306 | /* If we are moving the irq we need to mask it */ | |
2307 | - if (unlikely(irqd_is_setaffinity_pending(data))) { | |
2308 | + if (unlikely(irqd_is_setaffinity_pending(data) && | |
2309 | + !irqd_irq_inprogress(data))) { | |
2310 | mask_ioapic_irq(data); | |
2311 | return true; | |
2312 | } | |
2313 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c | |
c7c16703 | 2314 | index c62e015b126c..0cc71257fca6 100644 |
1a6e0f06 JK |
2315 | --- a/arch/x86/kernel/asm-offsets.c |
2316 | +++ b/arch/x86/kernel/asm-offsets.c | |
c7c16703 | 2317 | @@ -36,6 +36,7 @@ void common(void) { |
1a6e0f06 JK |
2318 | |
2319 | BLANK(); | |
c7c16703 JK |
2320 | OFFSET(TASK_TI_flags, task_struct, thread_info.flags); |
2321 | + OFFSET(TASK_TI_preempt_lazy_count, task_struct, thread_info.preempt_lazy_count); | |
1a6e0f06 | 2322 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); |
c7c16703 JK |
2323 | |
2324 | BLANK(); | |
2325 | @@ -91,4 +92,5 @@ void common(void) { | |
1a6e0f06 JK |
2326 | |
2327 | BLANK(); | |
2328 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | |
2329 | + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); | |
2330 | } | |
2331 | diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c | |
7c18450a | 2332 | index 22cda29d654e..57c85e3af092 100644 |
1a6e0f06 JK |
2333 | --- a/arch/x86/kernel/cpu/mcheck/mce.c |
2334 | +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |
2335 | @@ -41,6 +41,8 @@ | |
2336 | #include <linux/debugfs.h> | |
2337 | #include <linux/irq_work.h> | |
2338 | #include <linux/export.h> | |
2339 | +#include <linux/jiffies.h> | |
2340 | +#include <linux/swork.h> | |
c7c16703 | 2341 | #include <linux/jump_label.h> |
1a6e0f06 JK |
2342 | |
2343 | #include <asm/processor.h> | |
7c18450a | 2344 | @@ -1307,7 +1309,7 @@ void mce_log_therm_throt_event(__u64 status) |
1a6e0f06 JK |
2345 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; |
2346 | ||
2347 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | |
2348 | -static DEFINE_PER_CPU(struct timer_list, mce_timer); | |
2349 | +static DEFINE_PER_CPU(struct hrtimer, mce_timer); | |
2350 | ||
2351 | static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2352 | { | |
7c18450a | 2353 | @@ -1316,32 +1318,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) |
1a6e0f06 JK |
2354 | |
2355 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; | |
2356 | ||
2357 | -static void __restart_timer(struct timer_list *t, unsigned long interval) | |
2358 | +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval) | |
2359 | { | |
2360 | - unsigned long when = jiffies + interval; | |
2361 | - unsigned long flags; | |
2362 | - | |
2363 | - local_irq_save(flags); | |
2364 | - | |
2365 | - if (timer_pending(t)) { | |
2366 | - if (time_before(when, t->expires)) | |
2367 | - mod_timer(t, when); | |
2368 | - } else { | |
2369 | - t->expires = round_jiffies(when); | |
2370 | - add_timer_on(t, smp_processor_id()); | |
2371 | - } | |
2372 | - | |
2373 | - local_irq_restore(flags); | |
2374 | + if (!interval) | |
2375 | + return HRTIMER_NORESTART; | |
2376 | + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval))); | |
2377 | + return HRTIMER_RESTART; | |
2378 | } | |
2379 | ||
2380 | -static void mce_timer_fn(unsigned long data) | |
2381 | +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) | |
2382 | { | |
2383 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2384 | - int cpu = smp_processor_id(); | |
2385 | unsigned long iv; | |
2386 | ||
2387 | - WARN_ON(cpu != data); | |
2388 | - | |
2389 | iv = __this_cpu_read(mce_next_interval); | |
2390 | ||
2391 | if (mce_available(this_cpu_ptr(&cpu_info))) { | |
7c18450a | 2392 | @@ -1364,7 +1352,7 @@ static void mce_timer_fn(unsigned long data) |
1a6e0f06 JK |
2393 | |
2394 | done: | |
2395 | __this_cpu_write(mce_next_interval, iv); | |
2396 | - __restart_timer(t, iv); | |
2397 | + return __restart_timer(timer, iv); | |
2398 | } | |
2399 | ||
2400 | /* | |
7c18450a | 2401 | @@ -1372,7 +1360,7 @@ static void mce_timer_fn(unsigned long data) |
1a6e0f06 JK |
2402 | */ |
2403 | void mce_timer_kick(unsigned long interval) | |
2404 | { | |
2405 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2406 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2407 | unsigned long iv = __this_cpu_read(mce_next_interval); | |
2408 | ||
2409 | __restart_timer(t, interval); | |
7c18450a | 2410 | @@ -1387,7 +1375,7 @@ static void mce_timer_delete_all(void) |
1a6e0f06 JK |
2411 | int cpu; |
2412 | ||
2413 | for_each_online_cpu(cpu) | |
2414 | - del_timer_sync(&per_cpu(mce_timer, cpu)); | |
2415 | + hrtimer_cancel(&per_cpu(mce_timer, cpu)); | |
2416 | } | |
2417 | ||
2418 | static void mce_do_trigger(struct work_struct *work) | |
7c18450a | 2419 | @@ -1397,6 +1385,56 @@ static void mce_do_trigger(struct work_struct *work) |
1a6e0f06 JK |
2420 | |
2421 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2422 | ||
2423 | +static void __mce_notify_work(struct swork_event *event) | |
2424 | +{ | |
2425 | + /* Not more than two messages every minute */ | |
2426 | + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2427 | + | |
2428 | + /* wake processes polling /dev/mcelog */ | |
2429 | + wake_up_interruptible(&mce_chrdev_wait); | |
2430 | + | |
2431 | + /* | |
2432 | + * There is no risk of missing notifications because | |
2433 | + * work_pending is always cleared before the function is | |
2434 | + * executed. | |
2435 | + */ | |
2436 | + if (mce_helper[0] && !work_pending(&mce_trigger_work)) | |
2437 | + schedule_work(&mce_trigger_work); | |
2438 | + | |
2439 | + if (__ratelimit(&ratelimit)) | |
2440 | + pr_info(HW_ERR "Machine check events logged\n"); | |
2441 | +} | |
2442 | + | |
2443 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2444 | +static bool notify_work_ready __read_mostly; | |
2445 | +static struct swork_event notify_work; | |
2446 | + | |
2447 | +static int mce_notify_work_init(void) | |
2448 | +{ | |
2449 | + int err; | |
2450 | + | |
2451 | + err = swork_get(); | |
2452 | + if (err) | |
2453 | + return err; | |
2454 | + | |
2455 | + INIT_SWORK(¬ify_work, __mce_notify_work); | |
2456 | + notify_work_ready = true; | |
2457 | + return 0; | |
2458 | +} | |
2459 | + | |
2460 | +static void mce_notify_work(void) | |
2461 | +{ | |
2462 | + if (notify_work_ready) | |
2463 | + swork_queue(¬ify_work); | |
2464 | +} | |
2465 | +#else | |
2466 | +static void mce_notify_work(void) | |
2467 | +{ | |
2468 | + __mce_notify_work(NULL); | |
2469 | +} | |
2470 | +static inline int mce_notify_work_init(void) { return 0; } | |
2471 | +#endif | |
2472 | + | |
2473 | /* | |
2474 | * Notify the user(s) about new machine check events. | |
2475 | * Can be called from interrupt context, but not from machine check/NMI | |
7c18450a | 2476 | @@ -1404,19 +1442,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
1a6e0f06 JK |
2477 | */ |
2478 | int mce_notify_irq(void) | |
2479 | { | |
2480 | - /* Not more than two messages every minute */ | |
2481 | - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2482 | - | |
2483 | if (test_and_clear_bit(0, &mce_need_notify)) { | |
2484 | - /* wake processes polling /dev/mcelog */ | |
2485 | - wake_up_interruptible(&mce_chrdev_wait); | |
2486 | - | |
2487 | - if (mce_helper[0]) | |
2488 | - schedule_work(&mce_trigger_work); | |
2489 | - | |
2490 | - if (__ratelimit(&ratelimit)) | |
2491 | - pr_info(HW_ERR "Machine check events logged\n"); | |
2492 | - | |
2493 | + mce_notify_work(); | |
2494 | return 1; | |
2495 | } | |
2496 | return 0; | |
7c18450a | 2497 | @@ -1722,7 +1749,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) |
1a6e0f06 JK |
2498 | } |
2499 | } | |
2500 | ||
2501 | -static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2502 | +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) | |
2503 | { | |
2504 | unsigned long iv = check_interval * HZ; | |
2505 | ||
7c18450a | 2506 | @@ -1731,16 +1758,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1a6e0f06 JK |
2507 | |
2508 | per_cpu(mce_next_interval, cpu) = iv; | |
2509 | ||
2510 | - t->expires = round_jiffies(jiffies + iv); | |
2511 | - add_timer_on(t, cpu); | |
2512 | + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), | |
2513 | + 0, HRTIMER_MODE_REL_PINNED); | |
2514 | } | |
2515 | ||
2516 | static void __mcheck_cpu_init_timer(void) | |
2517 | { | |
2518 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2519 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2520 | unsigned int cpu = smp_processor_id(); | |
2521 | ||
2522 | - setup_pinned_timer(t, mce_timer_fn, cpu); | |
2523 | + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
2524 | + t->function = mce_timer_fn; | |
2525 | mce_start_timer(cpu, t); | |
2526 | } | |
2527 | ||
7c18450a | 2528 | @@ -2465,6 +2493,8 @@ static void mce_disable_cpu(void *h) |
1a6e0f06 JK |
2529 | if (!mce_available(raw_cpu_ptr(&cpu_info))) |
2530 | return; | |
2531 | ||
2532 | + hrtimer_cancel(this_cpu_ptr(&mce_timer)); | |
2533 | + | |
2534 | if (!(action & CPU_TASKS_FROZEN)) | |
2535 | cmci_clear(); | |
2536 | ||
7c18450a | 2537 | @@ -2487,6 +2517,7 @@ static void mce_reenable_cpu(void *h) |
1a6e0f06 JK |
2538 | if (b->init) |
2539 | wrmsrl(msr_ops.ctl(i), b->ctl); | |
2540 | } | |
2541 | + __mcheck_cpu_init_timer(); | |
2542 | } | |
2543 | ||
2544 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | |
7c18450a | 2545 | @@ -2494,7 +2525,6 @@ static int |
1a6e0f06 JK |
2546 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
2547 | { | |
2548 | unsigned int cpu = (unsigned long)hcpu; | |
2549 | - struct timer_list *t = &per_cpu(mce_timer, cpu); | |
2550 | ||
2551 | switch (action & ~CPU_TASKS_FROZEN) { | |
2552 | case CPU_ONLINE: | |
7c18450a | 2553 | @@ -2514,11 +2544,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
1a6e0f06 JK |
2554 | break; |
2555 | case CPU_DOWN_PREPARE: | |
2556 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | |
2557 | - del_timer_sync(t); | |
2558 | break; | |
2559 | case CPU_DOWN_FAILED: | |
2560 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | |
2561 | - mce_start_timer(cpu, t); | |
2562 | break; | |
2563 | } | |
2564 | ||
7c18450a | 2565 | @@ -2557,6 +2585,10 @@ static __init int mcheck_init_device(void) |
1a6e0f06 JK |
2566 | goto err_out; |
2567 | } | |
2568 | ||
2569 | + err = mce_notify_work_init(); | |
2570 | + if (err) | |
2571 | + goto err_out; | |
2572 | + | |
2573 | if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { | |
2574 | err = -ENOMEM; | |
2575 | goto err_out; | |
1a6e0f06 JK |
2576 | diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c |
2577 | index 1f38d9a4d9de..053bf3b2ef39 100644 | |
2578 | --- a/arch/x86/kernel/irq_32.c | |
2579 | +++ b/arch/x86/kernel/irq_32.c | |
2580 | @@ -127,6 +127,7 @@ void irq_ctx_init(int cpu) | |
2581 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); | |
2582 | } | |
2583 | ||
2584 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2585 | void do_softirq_own_stack(void) | |
2586 | { | |
2587 | struct irq_stack *irqstk; | |
2588 | @@ -143,6 +144,7 @@ void do_softirq_own_stack(void) | |
2589 | ||
2590 | call_on_stack(__do_softirq, isp); | |
2591 | } | |
2592 | +#endif | |
2593 | ||
2594 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) | |
2595 | { | |
2596 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | |
c7c16703 | 2597 | index bd7be8efdc4c..b3b0a7f7b1ca 100644 |
1a6e0f06 JK |
2598 | --- a/arch/x86/kernel/process_32.c |
2599 | +++ b/arch/x86/kernel/process_32.c | |
2600 | @@ -35,6 +35,7 @@ | |
2601 | #include <linux/uaccess.h> | |
2602 | #include <linux/io.h> | |
2603 | #include <linux/kdebug.h> | |
2604 | +#include <linux/highmem.h> | |
2605 | ||
2606 | #include <asm/pgtable.h> | |
2607 | #include <asm/ldt.h> | |
c7c16703 | 2608 | @@ -195,6 +196,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) |
1a6e0f06 JK |
2609 | } |
2610 | EXPORT_SYMBOL_GPL(start_thread); | |
2611 | ||
2612 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2613 | +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
2614 | +{ | |
2615 | + int i; | |
2616 | + | |
2617 | + /* | |
2618 | + * Clear @prev's kmap_atomic mappings | |
2619 | + */ | |
2620 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
2621 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2622 | + pte_t *ptep = kmap_pte - idx; | |
2623 | + | |
2624 | + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); | |
2625 | + } | |
2626 | + /* | |
2627 | + * Restore @next_p's kmap_atomic mappings | |
2628 | + */ | |
2629 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
2630 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2631 | + | |
2632 | + if (!pte_none(next_p->kmap_pte[i])) | |
2633 | + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); | |
2634 | + } | |
2635 | +} | |
2636 | +#else | |
2637 | +static inline void | |
2638 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
2639 | +#endif | |
2640 | + | |
2641 | ||
2642 | /* | |
2643 | * switch_to(x,y) should switch tasks from x to y. | |
c7c16703 | 2644 | @@ -271,6 +301,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
1a6e0f06 JK |
2645 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
2646 | __switch_to_xtra(prev_p, next_p, tss); | |
2647 | ||
2648 | + switch_kmaps(prev_p, next_p); | |
2649 | + | |
2650 | /* | |
2651 | * Leave lazy mode, flushing any hypercalls made here. | |
2652 | * This must be done before restoring TLS segments so | |
2653 | diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c | |
1f39f580 | 2654 | index 3f05c044720b..fe68afd37162 100644 |
1a6e0f06 JK |
2655 | --- a/arch/x86/kvm/lapic.c |
2656 | +++ b/arch/x86/kvm/lapic.c | |
c7c16703 | 2657 | @@ -1939,6 +1939,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) |
1a6e0f06 JK |
2658 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
2659 | HRTIMER_MODE_ABS_PINNED); | |
2660 | apic->lapic_timer.timer.function = apic_timer_fn; | |
2661 | + apic->lapic_timer.timer.irqsafe = 1; | |
2662 | ||
2663 | /* | |
2664 | * APIC is created enabled. This will prevent kvm_lapic_set_base from | |
2665 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | |
33c7bf0f | 2666 | index e5bc139d1ba7..fa0aa5931a4b 100644 |
1a6e0f06 JK |
2667 | --- a/arch/x86/kvm/x86.c |
2668 | +++ b/arch/x86/kvm/x86.c | |
5c015b7c | 2669 | @@ -5933,6 +5933,13 @@ int kvm_arch_init(void *opaque) |
1a6e0f06 JK |
2670 | goto out; |
2671 | } | |
2672 | ||
2673 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2674 | + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | |
2675 | + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); | |
2676 | + return -EOPNOTSUPP; | |
2677 | + } | |
2678 | +#endif | |
2679 | + | |
2680 | r = kvm_mmu_module_init(); | |
2681 | if (r) | |
2682 | goto out_free_percpu; | |
2683 | diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c | |
2684 | index 6d18b70ed5a9..f752724c22e8 100644 | |
2685 | --- a/arch/x86/mm/highmem_32.c | |
2686 | +++ b/arch/x86/mm/highmem_32.c | |
2687 | @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap); | |
2688 | */ | |
2689 | void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2690 | { | |
2691 | + pte_t pte = mk_pte(page, prot); | |
2692 | unsigned long vaddr; | |
2693 | int idx, type; | |
2694 | ||
2695 | - preempt_disable(); | |
2696 | + preempt_disable_nort(); | |
2697 | pagefault_disable(); | |
2698 | ||
2699 | if (!PageHighMem(page)) | |
2700 | @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2701 | idx = type + KM_TYPE_NR*smp_processor_id(); | |
2702 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2703 | BUG_ON(!pte_none(*(kmap_pte-idx))); | |
2704 | - set_pte(kmap_pte-idx, mk_pte(page, prot)); | |
2705 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2706 | + current->kmap_pte[type] = pte; | |
2707 | +#endif | |
2708 | + set_pte(kmap_pte-idx, pte); | |
2709 | arch_flush_lazy_mmu_mode(); | |
2710 | ||
2711 | return (void *)vaddr; | |
2712 | @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr) | |
2713 | * is a bad idea also, in case the page changes cacheability | |
2714 | * attributes or becomes a protected page in a hypervisor. | |
2715 | */ | |
2716 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2717 | + current->kmap_pte[type] = __pte(0); | |
2718 | +#endif | |
2719 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2720 | kmap_atomic_idx_pop(); | |
2721 | arch_flush_lazy_mmu_mode(); | |
2722 | @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr) | |
2723 | #endif | |
2724 | ||
2725 | pagefault_enable(); | |
2726 | - preempt_enable(); | |
2727 | + preempt_enable_nort(); | |
2728 | } | |
2729 | EXPORT_SYMBOL(__kunmap_atomic); | |
2730 | ||
2731 | diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c | |
2732 | index ada98b39b8ad..585f6829653b 100644 | |
2733 | --- a/arch/x86/mm/iomap_32.c | |
2734 | +++ b/arch/x86/mm/iomap_32.c | |
2735 | @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); | |
2736 | ||
2737 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2738 | { | |
2739 | + pte_t pte = pfn_pte(pfn, prot); | |
2740 | unsigned long vaddr; | |
2741 | int idx, type; | |
2742 | ||
2743 | @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2744 | type = kmap_atomic_idx_push(); | |
2745 | idx = type + KM_TYPE_NR * smp_processor_id(); | |
2746 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2747 | - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); | |
2748 | + WARN_ON(!pte_none(*(kmap_pte - idx))); | |
2749 | + | |
2750 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2751 | + current->kmap_pte[type] = pte; | |
2752 | +#endif | |
2753 | + set_pte(kmap_pte - idx, pte); | |
2754 | arch_flush_lazy_mmu_mode(); | |
2755 | ||
2756 | return (void *)vaddr; | |
2757 | @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr) | |
2758 | * is a bad idea also, in case the page changes cacheability | |
2759 | * attributes or becomes a protected page in a hypervisor. | |
2760 | */ | |
2761 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2762 | + current->kmap_pte[type] = __pte(0); | |
2763 | +#endif | |
2764 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2765 | kmap_atomic_idx_pop(); | |
2766 | } | |
1f39f580 JK |
2767 | diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c |
2768 | index e3353c97d086..01664968555c 100644 | |
2769 | --- a/arch/x86/mm/pageattr.c | |
2770 | +++ b/arch/x86/mm/pageattr.c | |
2771 | @@ -214,7 +214,15 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, | |
2772 | int in_flags, struct page **pages) | |
2773 | { | |
2774 | unsigned int i, level; | |
2775 | +#ifdef CONFIG_PREEMPT | |
2776 | + /* | |
2777 | + * Avoid wbinvd() because it causes latencies on all CPUs, | |
2778 | + * regardless of any CPU isolation that may be in effect. | |
2779 | + */ | |
2780 | + unsigned long do_wbinvd = 0; | |
2781 | +#else | |
2782 | unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ | |
2783 | +#endif | |
2784 | ||
2785 | BUG_ON(irqs_disabled()); | |
2786 | ||
1a6e0f06 | 2787 | diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c |
c7c16703 | 2788 | index 9e42842e924a..5398f97172f9 100644 |
1a6e0f06 JK |
2789 | --- a/arch/x86/platform/uv/tlb_uv.c |
2790 | +++ b/arch/x86/platform/uv/tlb_uv.c | |
c7c16703 | 2791 | @@ -748,9 +748,9 @@ static void destination_plugged(struct bau_desc *bau_desc, |
1a6e0f06 JK |
2792 | |
2793 | quiesce_local_uvhub(hmaster); | |
2794 | ||
2795 | - spin_lock(&hmaster->queue_lock); | |
2796 | + raw_spin_lock(&hmaster->queue_lock); | |
2797 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2798 | - spin_unlock(&hmaster->queue_lock); | |
2799 | + raw_spin_unlock(&hmaster->queue_lock); | |
2800 | ||
2801 | end_uvhub_quiesce(hmaster); | |
2802 | ||
c7c16703 | 2803 | @@ -770,9 +770,9 @@ static void destination_timeout(struct bau_desc *bau_desc, |
1a6e0f06 JK |
2804 | |
2805 | quiesce_local_uvhub(hmaster); | |
2806 | ||
2807 | - spin_lock(&hmaster->queue_lock); | |
2808 | + raw_spin_lock(&hmaster->queue_lock); | |
2809 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2810 | - spin_unlock(&hmaster->queue_lock); | |
2811 | + raw_spin_unlock(&hmaster->queue_lock); | |
2812 | ||
2813 | end_uvhub_quiesce(hmaster); | |
2814 | ||
c7c16703 | 2815 | @@ -793,7 +793,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2816 | cycles_t tm1; |
2817 | ||
2818 | hmaster = bcp->uvhub_master; | |
2819 | - spin_lock(&hmaster->disable_lock); | |
2820 | + raw_spin_lock(&hmaster->disable_lock); | |
2821 | if (!bcp->baudisabled) { | |
2822 | stat->s_bau_disabled++; | |
2823 | tm1 = get_cycles(); | |
c7c16703 | 2824 | @@ -806,7 +806,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2825 | } |
2826 | } | |
2827 | } | |
2828 | - spin_unlock(&hmaster->disable_lock); | |
2829 | + raw_spin_unlock(&hmaster->disable_lock); | |
2830 | } | |
2831 | ||
2832 | static void count_max_concurr(int stat, struct bau_control *bcp, | |
c7c16703 | 2833 | @@ -869,7 +869,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, |
1a6e0f06 JK |
2834 | */ |
2835 | static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) | |
2836 | { | |
2837 | - spinlock_t *lock = &hmaster->uvhub_lock; | |
2838 | + raw_spinlock_t *lock = &hmaster->uvhub_lock; | |
2839 | atomic_t *v; | |
2840 | ||
2841 | v = &hmaster->active_descriptor_count; | |
c7c16703 | 2842 | @@ -1002,7 +1002,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2843 | struct bau_control *hmaster; |
2844 | ||
2845 | hmaster = bcp->uvhub_master; | |
2846 | - spin_lock(&hmaster->disable_lock); | |
2847 | + raw_spin_lock(&hmaster->disable_lock); | |
2848 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { | |
2849 | stat->s_bau_reenabled++; | |
2850 | for_each_present_cpu(tcpu) { | |
c7c16703 | 2851 | @@ -1014,10 +1014,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) |
1a6e0f06 JK |
2852 | tbcp->period_giveups = 0; |
2853 | } | |
2854 | } | |
2855 | - spin_unlock(&hmaster->disable_lock); | |
2856 | + raw_spin_unlock(&hmaster->disable_lock); | |
2857 | return 0; | |
2858 | } | |
2859 | - spin_unlock(&hmaster->disable_lock); | |
2860 | + raw_spin_unlock(&hmaster->disable_lock); | |
2861 | return -1; | |
2862 | } | |
2863 | ||
c7c16703 | 2864 | @@ -1940,9 +1940,9 @@ static void __init init_per_cpu_tunables(void) |
1a6e0f06 | 2865 | bcp->cong_reps = congested_reps; |
c7c16703 JK |
2866 | bcp->disabled_period = sec_2_cycles(disabled_period); |
2867 | bcp->giveup_limit = giveup_limit; | |
1a6e0f06 JK |
2868 | - spin_lock_init(&bcp->queue_lock); |
2869 | - spin_lock_init(&bcp->uvhub_lock); | |
2870 | - spin_lock_init(&bcp->disable_lock); | |
2871 | + raw_spin_lock_init(&bcp->queue_lock); | |
2872 | + raw_spin_lock_init(&bcp->uvhub_lock); | |
2873 | + raw_spin_lock_init(&bcp->disable_lock); | |
2874 | } | |
2875 | } | |
2876 | ||
2877 | diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c | |
2878 | index b333fc45f9ec..8b85916e6986 100644 | |
2879 | --- a/arch/x86/platform/uv/uv_time.c | |
2880 | +++ b/arch/x86/platform/uv/uv_time.c | |
2881 | @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | |
2882 | ||
2883 | /* There is one of these allocated per node */ | |
2884 | struct uv_rtc_timer_head { | |
2885 | - spinlock_t lock; | |
2886 | + raw_spinlock_t lock; | |
2887 | /* next cpu waiting for timer, local node relative: */ | |
2888 | int next_cpu; | |
2889 | /* number of cpus on this node: */ | |
2890 | @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void) | |
2891 | uv_rtc_deallocate_timers(); | |
2892 | return -ENOMEM; | |
2893 | } | |
2894 | - spin_lock_init(&head->lock); | |
2895 | + raw_spin_lock_init(&head->lock); | |
2896 | head->ncpus = uv_blade_nr_possible_cpus(bid); | |
2897 | head->next_cpu = -1; | |
2898 | blade_info[bid] = head; | |
2899 | @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2900 | unsigned long flags; | |
2901 | int next_cpu; | |
2902 | ||
2903 | - spin_lock_irqsave(&head->lock, flags); | |
2904 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2905 | ||
2906 | next_cpu = head->next_cpu; | |
2907 | *t = expires; | |
2908 | @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2909 | if (uv_setup_intr(cpu, expires)) { | |
2910 | *t = ULLONG_MAX; | |
2911 | uv_rtc_find_next_timer(head, pnode); | |
2912 | - spin_unlock_irqrestore(&head->lock, flags); | |
2913 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2914 | return -ETIME; | |
2915 | } | |
2916 | } | |
2917 | ||
2918 | - spin_unlock_irqrestore(&head->lock, flags); | |
2919 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2920 | return 0; | |
2921 | } | |
2922 | ||
2923 | @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2924 | unsigned long flags; | |
2925 | int rc = 0; | |
2926 | ||
2927 | - spin_lock_irqsave(&head->lock, flags); | |
2928 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2929 | ||
2930 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | |
2931 | rc = 1; | |
2932 | @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2933 | uv_rtc_find_next_timer(head, pnode); | |
2934 | } | |
2935 | ||
2936 | - spin_unlock_irqrestore(&head->lock, flags); | |
2937 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2938 | ||
2939 | return rc; | |
2940 | } | |
2941 | @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2942 | static cycle_t uv_read_rtc(struct clocksource *cs) | |
2943 | { | |
2944 | unsigned long offset; | |
2945 | + cycle_t cycles; | |
2946 | ||
2947 | + preempt_disable(); | |
2948 | if (uv_get_min_hub_revision_id() == 1) | |
2949 | offset = 0; | |
2950 | else | |
2951 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | |
2952 | ||
2953 | - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
2954 | + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
2955 | + preempt_enable(); | |
2956 | + | |
2957 | + return cycles; | |
2958 | } | |
2959 | ||
2960 | /* | |
2961 | diff --git a/block/blk-core.c b/block/blk-core.c | |
7c18450a | 2962 | index d1f2801ce836..6f945bb0fa1a 100644 |
1a6e0f06 JK |
2963 | --- a/block/blk-core.c |
2964 | +++ b/block/blk-core.c | |
2965 | @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |
2966 | ||
2967 | INIT_LIST_HEAD(&rq->queuelist); | |
2968 | INIT_LIST_HEAD(&rq->timeout_list); | |
2969 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2970 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
2971 | +#endif | |
2972 | rq->cpu = -1; | |
2973 | rq->q = q; | |
2974 | rq->__sector = (sector_t) -1; | |
2975 | @@ -233,7 +236,7 @@ EXPORT_SYMBOL(blk_start_queue_async); | |
2976 | **/ | |
2977 | void blk_start_queue(struct request_queue *q) | |
2978 | { | |
2979 | - WARN_ON(!irqs_disabled()); | |
2980 | + WARN_ON_NONRT(!irqs_disabled()); | |
2981 | ||
2982 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | |
2983 | __blk_run_queue(q); | |
2984 | @@ -659,7 +662,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait) | |
2985 | if (nowait) | |
2986 | return -EBUSY; | |
2987 | ||
2988 | - ret = wait_event_interruptible(q->mq_freeze_wq, | |
2989 | + ret = swait_event_interruptible(q->mq_freeze_wq, | |
2990 | !atomic_read(&q->mq_freeze_depth) || | |
2991 | blk_queue_dying(q)); | |
2992 | if (blk_queue_dying(q)) | |
2993 | @@ -679,7 +682,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) | |
2994 | struct request_queue *q = | |
2995 | container_of(ref, struct request_queue, q_usage_counter); | |
2996 | ||
2997 | - wake_up_all(&q->mq_freeze_wq); | |
2998 | + swake_up_all(&q->mq_freeze_wq); | |
2999 | } | |
3000 | ||
3001 | static void blk_rq_timed_out_timer(unsigned long data) | |
3002 | @@ -748,7 +751,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |
3003 | q->bypass_depth = 1; | |
3004 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | |
3005 | ||
3006 | - init_waitqueue_head(&q->mq_freeze_wq); | |
3007 | + init_swait_queue_head(&q->mq_freeze_wq); | |
3008 | ||
3009 | /* | |
3010 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | |
7c18450a | 3011 | @@ -3200,7 +3203,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, |
1a6e0f06 JK |
3012 | blk_run_queue_async(q); |
3013 | else | |
3014 | __blk_run_queue(q); | |
3015 | - spin_unlock(q->queue_lock); | |
3016 | + spin_unlock_irq(q->queue_lock); | |
3017 | } | |
3018 | ||
3019 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) | |
7c18450a | 3020 | @@ -3248,7 +3251,6 @@ EXPORT_SYMBOL(blk_check_plugged); |
1a6e0f06 JK |
3021 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
3022 | { | |
3023 | struct request_queue *q; | |
3024 | - unsigned long flags; | |
3025 | struct request *rq; | |
3026 | LIST_HEAD(list); | |
3027 | unsigned int depth; | |
7c18450a | 3028 | @@ -3268,11 +3270,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
1a6e0f06 JK |
3029 | q = NULL; |
3030 | depth = 0; | |
3031 | ||
3032 | - /* | |
3033 | - * Save and disable interrupts here, to avoid doing it for every | |
3034 | - * queue lock we have to take. | |
3035 | - */ | |
3036 | - local_irq_save(flags); | |
3037 | while (!list_empty(&list)) { | |
3038 | rq = list_entry_rq(list.next); | |
3039 | list_del_init(&rq->queuelist); | |
7c18450a | 3040 | @@ -3285,7 +3282,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
1a6e0f06 JK |
3041 | queue_unplugged(q, depth, from_schedule); |
3042 | q = rq->q; | |
3043 | depth = 0; | |
3044 | - spin_lock(q->queue_lock); | |
3045 | + spin_lock_irq(q->queue_lock); | |
3046 | } | |
3047 | ||
3048 | /* | |
7c18450a | 3049 | @@ -3312,8 +3309,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) |
1a6e0f06 JK |
3050 | */ |
3051 | if (q) | |
3052 | queue_unplugged(q, depth, from_schedule); | |
3053 | - | |
3054 | - local_irq_restore(flags); | |
3055 | } | |
3056 | ||
3057 | void blk_finish_plug(struct blk_plug *plug) | |
3058 | diff --git a/block/blk-ioc.c b/block/blk-ioc.c | |
3059 | index 381cb50a673c..dc8785233d94 100644 | |
3060 | --- a/block/blk-ioc.c | |
3061 | +++ b/block/blk-ioc.c | |
3062 | @@ -7,6 +7,7 @@ | |
3063 | #include <linux/bio.h> | |
3064 | #include <linux/blkdev.h> | |
3065 | #include <linux/slab.h> | |
3066 | +#include <linux/delay.h> | |
3067 | ||
3068 | #include "blk.h" | |
3069 | ||
3070 | @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work) | |
3071 | spin_unlock(q->queue_lock); | |
3072 | } else { | |
3073 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3074 | - cpu_relax(); | |
3075 | + cpu_chill(); | |
3076 | spin_lock_irqsave_nested(&ioc->lock, flags, 1); | |
3077 | } | |
3078 | } | |
3079 | @@ -187,7 +188,7 @@ void put_io_context_active(struct io_context *ioc) | |
3080 | spin_unlock(icq->q->queue_lock); | |
3081 | } else { | |
3082 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3083 | - cpu_relax(); | |
3084 | + cpu_chill(); | |
3085 | goto retry; | |
3086 | } | |
3087 | } | |
1a6e0f06 | 3088 | diff --git a/block/blk-mq.c b/block/blk-mq.c |
7c18450a | 3089 | index 7b597ec4e9c5..48c9652a701c 100644 |
1a6e0f06 JK |
3090 | --- a/block/blk-mq.c |
3091 | +++ b/block/blk-mq.c | |
c7c16703 | 3092 | @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); |
1a6e0f06 JK |
3093 | |
3094 | static void blk_mq_freeze_queue_wait(struct request_queue *q) | |
3095 | { | |
3096 | - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3097 | + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3098 | } | |
3099 | ||
3100 | /* | |
c7c16703 | 3101 | @@ -110,7 +110,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) |
1a6e0f06 JK |
3102 | WARN_ON_ONCE(freeze_depth < 0); |
3103 | if (!freeze_depth) { | |
3104 | percpu_ref_reinit(&q->q_usage_counter); | |
3105 | - wake_up_all(&q->mq_freeze_wq); | |
3106 | + swake_up_all(&q->mq_freeze_wq); | |
3107 | } | |
3108 | } | |
3109 | EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); | |
c7c16703 | 3110 | @@ -129,7 +129,7 @@ void blk_mq_wake_waiters(struct request_queue *q) |
1a6e0f06 JK |
3111 | * dying, we need to ensure that processes currently waiting on |
3112 | * the queue are notified as well. | |
3113 | */ | |
3114 | - wake_up_all(&q->mq_freeze_wq); | |
3115 | + swake_up_all(&q->mq_freeze_wq); | |
3116 | } | |
3117 | ||
3118 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | |
c7c16703 | 3119 | @@ -177,6 +177,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, |
1a6e0f06 JK |
3120 | rq->resid_len = 0; |
3121 | rq->sense = NULL; | |
3122 | ||
3123 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3124 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3125 | +#endif | |
3126 | INIT_LIST_HEAD(&rq->timeout_list); | |
3127 | rq->timeout = 0; | |
3128 | ||
c7c16703 | 3129 | @@ -345,6 +348,17 @@ void blk_mq_end_request(struct request *rq, int error) |
1a6e0f06 JK |
3130 | } |
3131 | EXPORT_SYMBOL(blk_mq_end_request); | |
3132 | ||
3133 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3134 | + | |
3135 | +void __blk_mq_complete_request_remote_work(struct work_struct *work) | |
3136 | +{ | |
3137 | + struct request *rq = container_of(work, struct request, work); | |
3138 | + | |
3139 | + rq->q->softirq_done_fn(rq); | |
3140 | +} | |
3141 | + | |
3142 | +#else | |
3143 | + | |
3144 | static void __blk_mq_complete_request_remote(void *data) | |
3145 | { | |
3146 | struct request *rq = data; | |
c7c16703 | 3147 | @@ -352,6 +366,8 @@ static void __blk_mq_complete_request_remote(void *data) |
1a6e0f06 JK |
3148 | rq->q->softirq_done_fn(rq); |
3149 | } | |
3150 | ||
3151 | +#endif | |
3152 | + | |
3153 | static void blk_mq_ipi_complete_request(struct request *rq) | |
3154 | { | |
3155 | struct blk_mq_ctx *ctx = rq->mq_ctx; | |
c7c16703 | 3156 | @@ -363,19 +379,23 @@ static void blk_mq_ipi_complete_request(struct request *rq) |
1a6e0f06 JK |
3157 | return; |
3158 | } | |
3159 | ||
3160 | - cpu = get_cpu(); | |
3161 | + cpu = get_cpu_light(); | |
3162 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) | |
3163 | shared = cpus_share_cache(cpu, ctx->cpu); | |
3164 | ||
3165 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | |
3166 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3167 | + schedule_work_on(ctx->cpu, &rq->work); | |
3168 | +#else | |
3169 | rq->csd.func = __blk_mq_complete_request_remote; | |
3170 | rq->csd.info = rq; | |
3171 | rq->csd.flags = 0; | |
3172 | smp_call_function_single_async(ctx->cpu, &rq->csd); | |
3173 | +#endif | |
3174 | } else { | |
3175 | rq->q->softirq_done_fn(rq); | |
3176 | } | |
3177 | - put_cpu(); | |
3178 | + put_cpu_light(); | |
3179 | } | |
3180 | ||
3181 | static void __blk_mq_complete_request(struct request *rq) | |
33c7bf0f | 3182 | @@ -906,14 +926,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) |
1a6e0f06 JK |
3183 | return; |
3184 | ||
c7c16703 | 3185 | if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { |
1a6e0f06 JK |
3186 | - int cpu = get_cpu(); |
3187 | + int cpu = get_cpu_light(); | |
3188 | if (cpumask_test_cpu(cpu, hctx->cpumask)) { | |
3189 | __blk_mq_run_hw_queue(hctx); | |
3190 | - put_cpu(); | |
3191 | + put_cpu_light(); | |
3192 | return; | |
3193 | } | |
3194 | ||
3195 | - put_cpu(); | |
3196 | + put_cpu_light(); | |
3197 | } | |
3198 | ||
c7c16703 | 3199 | kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); |
1a6e0f06 | 3200 | diff --git a/block/blk-mq.h b/block/blk-mq.h |
c7c16703 | 3201 | index e5d25249028c..1e846b842eab 100644 |
1a6e0f06 JK |
3202 | --- a/block/blk-mq.h |
3203 | +++ b/block/blk-mq.h | |
c7c16703 | 3204 | @@ -72,12 +72,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, |
1a6e0f06 JK |
3205 | */ |
3206 | static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) | |
3207 | { | |
3208 | - return __blk_mq_get_ctx(q, get_cpu()); | |
3209 | + return __blk_mq_get_ctx(q, get_cpu_light()); | |
3210 | } | |
3211 | ||
3212 | static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) | |
3213 | { | |
3214 | - put_cpu(); | |
3215 | + put_cpu_light(); | |
3216 | } | |
3217 | ||
3218 | struct blk_mq_alloc_data { | |
3219 | diff --git a/block/blk-softirq.c b/block/blk-softirq.c | |
c7c16703 | 3220 | index 06cf9807f49a..c40342643ca0 100644 |
1a6e0f06 JK |
3221 | --- a/block/blk-softirq.c |
3222 | +++ b/block/blk-softirq.c | |
3223 | @@ -51,6 +51,7 @@ static void trigger_softirq(void *data) | |
3224 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3225 | ||
3226 | local_irq_restore(flags); | |
3227 | + preempt_check_resched_rt(); | |
3228 | } | |
3229 | ||
3230 | /* | |
c7c16703 JK |
3231 | @@ -89,6 +90,7 @@ static int blk_softirq_cpu_dead(unsigned int cpu) |
3232 | this_cpu_ptr(&blk_cpu_done)); | |
3233 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3234 | local_irq_enable(); | |
3235 | + preempt_check_resched_rt(); | |
1a6e0f06 | 3236 | |
c7c16703 JK |
3237 | return 0; |
3238 | } | |
3239 | @@ -141,6 +143,7 @@ void __blk_complete_request(struct request *req) | |
1a6e0f06 JK |
3240 | goto do_local; |
3241 | ||
3242 | local_irq_restore(flags); | |
3243 | + preempt_check_resched_rt(); | |
3244 | } | |
3245 | ||
3246 | /** | |
3247 | diff --git a/block/bounce.c b/block/bounce.c | |
3248 | index 1cb5dd3a5da1..2f1ec8a67cbe 100644 | |
3249 | --- a/block/bounce.c | |
3250 | +++ b/block/bounce.c | |
3251 | @@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) | |
3252 | unsigned long flags; | |
3253 | unsigned char *vto; | |
3254 | ||
3255 | - local_irq_save(flags); | |
3256 | + local_irq_save_nort(flags); | |
3257 | vto = kmap_atomic(to->bv_page); | |
3258 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | |
3259 | kunmap_atomic(vto); | |
3260 | - local_irq_restore(flags); | |
3261 | + local_irq_restore_nort(flags); | |
3262 | } | |
3263 | ||
3264 | #else /* CONFIG_HIGHMEM */ | |
3265 | diff --git a/crypto/algapi.c b/crypto/algapi.c | |
5c015b7c | 3266 | index 1fad2a6b3bbb..ecb7315426a9 100644 |
1a6e0f06 JK |
3267 | --- a/crypto/algapi.c |
3268 | +++ b/crypto/algapi.c | |
5c015b7c | 3269 | @@ -719,13 +719,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); |
1a6e0f06 JK |
3270 | |
3271 | int crypto_register_notifier(struct notifier_block *nb) | |
3272 | { | |
3273 | - return blocking_notifier_chain_register(&crypto_chain, nb); | |
3274 | + return srcu_notifier_chain_register(&crypto_chain, nb); | |
3275 | } | |
3276 | EXPORT_SYMBOL_GPL(crypto_register_notifier); | |
3277 | ||
3278 | int crypto_unregister_notifier(struct notifier_block *nb) | |
3279 | { | |
3280 | - return blocking_notifier_chain_unregister(&crypto_chain, nb); | |
3281 | + return srcu_notifier_chain_unregister(&crypto_chain, nb); | |
3282 | } | |
3283 | EXPORT_SYMBOL_GPL(crypto_unregister_notifier); | |
3284 | ||
3285 | diff --git a/crypto/api.c b/crypto/api.c | |
3286 | index bbc147cb5dec..bc1a848f02ec 100644 | |
3287 | --- a/crypto/api.c | |
3288 | +++ b/crypto/api.c | |
3289 | @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); | |
3290 | DECLARE_RWSEM(crypto_alg_sem); | |
3291 | EXPORT_SYMBOL_GPL(crypto_alg_sem); | |
3292 | ||
3293 | -BLOCKING_NOTIFIER_HEAD(crypto_chain); | |
3294 | +SRCU_NOTIFIER_HEAD(crypto_chain); | |
3295 | EXPORT_SYMBOL_GPL(crypto_chain); | |
3296 | ||
3297 | static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); | |
3298 | @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v) | |
3299 | { | |
3300 | int ok; | |
3301 | ||
3302 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3303 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3304 | if (ok == NOTIFY_DONE) { | |
3305 | request_module("cryptomgr"); | |
3306 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3307 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3308 | } | |
3309 | ||
3310 | return ok; | |
3311 | diff --git a/crypto/internal.h b/crypto/internal.h | |
3312 | index 7eefcdb00227..0ecc7f5a2f40 100644 | |
3313 | --- a/crypto/internal.h | |
3314 | +++ b/crypto/internal.h | |
3315 | @@ -47,7 +47,7 @@ struct crypto_larval { | |
3316 | ||
3317 | extern struct list_head crypto_alg_list; | |
3318 | extern struct rw_semaphore crypto_alg_sem; | |
3319 | -extern struct blocking_notifier_head crypto_chain; | |
3320 | +extern struct srcu_notifier_head crypto_chain; | |
3321 | ||
3322 | #ifdef CONFIG_PROC_FS | |
3323 | void __init crypto_init_proc(void); | |
3324 | @@ -146,7 +146,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg) | |
3325 | ||
3326 | static inline void crypto_notify(unsigned long val, void *v) | |
3327 | { | |
3328 | - blocking_notifier_call_chain(&crypto_chain, val, v); | |
3329 | + srcu_notifier_call_chain(&crypto_chain, val, v); | |
3330 | } | |
3331 | ||
3332 | #endif /* _CRYPTO_INTERNAL_H */ | |
3333 | diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h | |
c7c16703 | 3334 | index 750fa824d42c..441edf51484a 100644 |
1a6e0f06 JK |
3335 | --- a/drivers/acpi/acpica/acglobal.h |
3336 | +++ b/drivers/acpi/acpica/acglobal.h | |
3337 | @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); | |
3338 | * interrupt level | |
3339 | */ | |
3340 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ | |
3341 | -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3342 | +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3343 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); | |
3344 | ||
3345 | /* Mutex for _OSI support */ | |
3346 | diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c | |
3347 | index 3b7fb99362b6..696bf8e62afb 100644 | |
3348 | --- a/drivers/acpi/acpica/hwregs.c | |
3349 | +++ b/drivers/acpi/acpica/hwregs.c | |
3350 | @@ -363,14 +363,14 @@ acpi_status acpi_hw_clear_acpi_status(void) | |
3351 | ACPI_BITMASK_ALL_FIXED_STATUS, | |
3352 | ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); | |
3353 | ||
3354 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3355 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3356 | ||
3357 | /* Clear the fixed events in PM1 A/B */ | |
3358 | ||
3359 | status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, | |
3360 | ACPI_BITMASK_ALL_FIXED_STATUS); | |
3361 | ||
3362 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3363 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3364 | ||
3365 | if (ACPI_FAILURE(status)) { | |
3366 | goto exit; | |
3367 | diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c | |
3368 | index 98c26ff39409..6e236f2ea791 100644 | |
3369 | --- a/drivers/acpi/acpica/hwxface.c | |
3370 | +++ b/drivers/acpi/acpica/hwxface.c | |
3371 | @@ -373,7 +373,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3372 | return_ACPI_STATUS(AE_BAD_PARAMETER); | |
3373 | } | |
3374 | ||
3375 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3376 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3377 | ||
3378 | /* | |
3379 | * At this point, we know that the parent register is one of the | |
3380 | @@ -434,7 +434,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3381 | ||
3382 | unlock_and_exit: | |
3383 | ||
3384 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3385 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3386 | return_ACPI_STATUS(status); | |
3387 | } | |
3388 | ||
3389 | diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c | |
3390 | index 15073375bd00..357e7ca5a587 100644 | |
3391 | --- a/drivers/acpi/acpica/utmutex.c | |
3392 | +++ b/drivers/acpi/acpica/utmutex.c | |
3393 | @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void) | |
3394 | return_ACPI_STATUS (status); | |
3395 | } | |
3396 | ||
3397 | - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); | |
3398 | + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); | |
3399 | if (ACPI_FAILURE (status)) { | |
3400 | return_ACPI_STATUS (status); | |
3401 | } | |
3402 | @@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void) | |
3403 | /* Delete the spinlocks */ | |
3404 | ||
3405 | acpi_os_delete_lock(acpi_gbl_gpe_lock); | |
3406 | - acpi_os_delete_lock(acpi_gbl_hardware_lock); | |
3407 | + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); | |
3408 | acpi_os_delete_lock(acpi_gbl_reference_count_lock); | |
3409 | ||
3410 | /* Delete the reader/writer lock */ | |
3411 | diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c | |
3412 | index 051b6158d1b7..7ad293bef6ed 100644 | |
3413 | --- a/drivers/ata/libata-sff.c | |
3414 | +++ b/drivers/ata/libata-sff.c | |
3415 | @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, | |
3416 | unsigned long flags; | |
3417 | unsigned int consumed; | |
3418 | ||
3419 | - local_irq_save(flags); | |
3420 | + local_irq_save_nort(flags); | |
3421 | consumed = ata_sff_data_xfer32(dev, buf, buflen, rw); | |
3422 | - local_irq_restore(flags); | |
3423 | + local_irq_restore_nort(flags); | |
3424 | ||
3425 | return consumed; | |
3426 | } | |
3427 | @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3428 | unsigned long flags; | |
3429 | ||
3430 | /* FIXME: use a bounce buffer */ | |
3431 | - local_irq_save(flags); | |
3432 | + local_irq_save_nort(flags); | |
3433 | buf = kmap_atomic(page); | |
3434 | ||
3435 | /* do the actual data transfer */ | |
3436 | @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3437 | do_write); | |
3438 | ||
3439 | kunmap_atomic(buf); | |
3440 | - local_irq_restore(flags); | |
3441 | + local_irq_restore_nort(flags); | |
3442 | } else { | |
3443 | buf = page_address(page); | |
3444 | ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, | |
3445 | @@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3446 | unsigned long flags; | |
3447 | ||
3448 | /* FIXME: use bounce buffer */ | |
3449 | - local_irq_save(flags); | |
3450 | + local_irq_save_nort(flags); | |
3451 | buf = kmap_atomic(page); | |
3452 | ||
3453 | /* do the actual data transfer */ | |
3454 | @@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3455 | count, rw); | |
3456 | ||
3457 | kunmap_atomic(buf); | |
3458 | - local_irq_restore(flags); | |
3459 | + local_irq_restore_nort(flags); | |
3460 | } else { | |
3461 | buf = page_address(page); | |
3462 | consumed = ap->ops->sff_data_xfer(dev, buf + offset, | |
3463 | diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c | |
3464 | index 4b5cd3a7b2b6..fa8329ad79fd 100644 | |
3465 | --- a/drivers/block/zram/zcomp.c | |
3466 | +++ b/drivers/block/zram/zcomp.c | |
3467 | @@ -118,12 +118,19 @@ ssize_t zcomp_available_show(const char *comp, char *buf) | |
3468 | ||
3469 | struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) | |
3470 | { | |
3471 | - return *get_cpu_ptr(comp->stream); | |
3472 | + struct zcomp_strm *zstrm; | |
3473 | + | |
3474 | + zstrm = *this_cpu_ptr(comp->stream); | |
3475 | + spin_lock(&zstrm->zcomp_lock); | |
3476 | + return zstrm; | |
3477 | } | |
3478 | ||
3479 | void zcomp_stream_put(struct zcomp *comp) | |
3480 | { | |
3481 | - put_cpu_ptr(comp->stream); | |
3482 | + struct zcomp_strm *zstrm; | |
3483 | + | |
3484 | + zstrm = *this_cpu_ptr(comp->stream); | |
3485 | + spin_unlock(&zstrm->zcomp_lock); | |
3486 | } | |
3487 | ||
3488 | int zcomp_compress(struct zcomp_strm *zstrm, | |
3489 | @@ -174,6 +181,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp, | |
3490 | pr_err("Can't allocate a compression stream\n"); | |
3491 | return NOTIFY_BAD; | |
3492 | } | |
3493 | + spin_lock_init(&zstrm->zcomp_lock); | |
3494 | *per_cpu_ptr(comp->stream, cpu) = zstrm; | |
3495 | break; | |
3496 | case CPU_DEAD: | |
3497 | diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h | |
3498 | index 478cac2ed465..f7a6efdc3285 100644 | |
3499 | --- a/drivers/block/zram/zcomp.h | |
3500 | +++ b/drivers/block/zram/zcomp.h | |
3501 | @@ -14,6 +14,7 @@ struct zcomp_strm { | |
3502 | /* compression/decompression buffer */ | |
3503 | void *buffer; | |
3504 | struct crypto_comp *tfm; | |
3505 | + spinlock_t zcomp_lock; | |
3506 | }; | |
3507 | ||
3508 | /* dynamic per-device compression frontend */ | |
3509 | diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c | |
7c18450a | 3510 | index c9914d653968..2038d138f286 100644 |
1a6e0f06 JK |
3511 | --- a/drivers/block/zram/zram_drv.c |
3512 | +++ b/drivers/block/zram/zram_drv.c | |
1f39f580 | 3513 | @@ -528,6 +528,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) |
1a6e0f06 JK |
3514 | goto out_error; |
3515 | } | |
3516 | ||
3517 | + zram_meta_init_table_locks(meta, disksize); | |
3518 | + | |
3519 | return meta; | |
3520 | ||
3521 | out_error: | |
1f39f580 | 3522 | @@ -575,28 +577,28 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) |
1a6e0f06 JK |
3523 | struct zram_meta *meta = zram->meta; |
3524 | unsigned long handle; | |
3525 | unsigned int size; | |
3526 | + struct zcomp_strm *zstrm; | |
3527 | ||
3528 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3529 | + zram_lock_table(&meta->table[index]); | |
3530 | handle = meta->table[index].handle; | |
3531 | size = zram_get_obj_size(meta, index); | |
3532 | ||
3533 | if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3534 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3535 | + zram_unlock_table(&meta->table[index]); | |
7c18450a | 3536 | memset(mem, 0, PAGE_SIZE); |
1a6e0f06 JK |
3537 | return 0; |
3538 | } | |
3539 | ||
3540 | + zstrm = zcomp_stream_get(zram->comp); | |
3541 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); | |
3542 | if (size == PAGE_SIZE) { | |
7c18450a | 3543 | memcpy(mem, cmem, PAGE_SIZE); |
1a6e0f06 JK |
3544 | } else { |
3545 | - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
3546 | - | |
3547 | ret = zcomp_decompress(zstrm, cmem, size, mem); | |
3548 | - zcomp_stream_put(zram->comp); | |
3549 | } | |
3550 | zs_unmap_object(meta->mem_pool, handle); | |
3551 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3552 | + zcomp_stream_put(zram->comp); | |
3553 | + zram_unlock_table(&meta->table[index]); | |
3554 | ||
3555 | /* Should NEVER happen. Return bio error if it does. */ | |
3556 | if (unlikely(ret)) { | |
1f39f580 | 3557 | @@ -616,14 +618,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, |
1a6e0f06 JK |
3558 | struct zram_meta *meta = zram->meta; |
3559 | page = bvec->bv_page; | |
3560 | ||
3561 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3562 | + zram_lock_table(&meta->table[index]); | |
3563 | if (unlikely(!meta->table[index].handle) || | |
3564 | zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3565 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3566 | + zram_unlock_table(&meta->table[index]); | |
3567 | handle_zero_page(bvec); | |
3568 | return 0; | |
3569 | } | |
3570 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3571 | + zram_unlock_table(&meta->table[index]); | |
3572 | ||
3573 | if (is_partial_io(bvec)) | |
3574 | /* Use a temporary buffer to decompress the page */ | |
1f39f580 | 3575 | @@ -700,10 +702,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, |
1a6e0f06 JK |
3576 | if (user_mem) |
3577 | kunmap_atomic(user_mem); | |
3578 | /* Free memory associated with this sector now. */ | |
3579 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3580 | + zram_lock_table(&meta->table[index]); | |
3581 | zram_free_page(zram, index); | |
3582 | zram_set_flag(meta, index, ZRAM_ZERO); | |
3583 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3584 | + zram_unlock_table(&meta->table[index]); | |
3585 | ||
3586 | atomic64_inc(&zram->stats.zero_pages); | |
3587 | ret = 0; | |
1f39f580 | 3588 | @@ -794,12 +796,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, |
1a6e0f06 JK |
3589 | * Free memory associated with this sector |
3590 | * before overwriting unused sectors. | |
3591 | */ | |
3592 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3593 | + zram_lock_table(&meta->table[index]); | |
3594 | zram_free_page(zram, index); | |
3595 | ||
3596 | meta->table[index].handle = handle; | |
3597 | zram_set_obj_size(meta, index, clen); | |
3598 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3599 | + zram_unlock_table(&meta->table[index]); | |
3600 | ||
3601 | /* Update stats */ | |
3602 | atomic64_add(clen, &zram->stats.compr_data_size); | |
1f39f580 | 3603 | @@ -842,9 +844,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, |
1a6e0f06 JK |
3604 | } |
3605 | ||
3606 | while (n >= PAGE_SIZE) { | |
3607 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3608 | + zram_lock_table(&meta->table[index]); | |
3609 | zram_free_page(zram, index); | |
3610 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3611 | + zram_unlock_table(&meta->table[index]); | |
3612 | atomic64_inc(&zram->stats.notify_free); | |
3613 | index++; | |
3614 | n -= PAGE_SIZE; | |
1f39f580 | 3615 | @@ -973,9 +975,9 @@ static void zram_slot_free_notify(struct block_device *bdev, |
1a6e0f06 JK |
3616 | zram = bdev->bd_disk->private_data; |
3617 | meta = zram->meta; | |
3618 | ||
3619 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3620 | + zram_lock_table(&meta->table[index]); | |
3621 | zram_free_page(zram, index); | |
3622 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3623 | + zram_unlock_table(&meta->table[index]); | |
3624 | atomic64_inc(&zram->stats.notify_free); | |
3625 | } | |
3626 | ||
3627 | diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h | |
3628 | index 74fcf10da374..fd4020c99b9e 100644 | |
3629 | --- a/drivers/block/zram/zram_drv.h | |
3630 | +++ b/drivers/block/zram/zram_drv.h | |
3631 | @@ -73,6 +73,9 @@ enum zram_pageflags { | |
3632 | struct zram_table_entry { | |
3633 | unsigned long handle; | |
3634 | unsigned long value; | |
3635 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
3636 | + spinlock_t lock; | |
3637 | +#endif | |
3638 | }; | |
3639 | ||
3640 | struct zram_stats { | |
3641 | @@ -120,4 +123,42 @@ struct zram { | |
3642 | */ | |
3643 | bool claim; /* Protected by bdev->bd_mutex */ | |
3644 | }; | |
3645 | + | |
3646 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
3647 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3648 | +{ | |
3649 | + bit_spin_lock(ZRAM_ACCESS, &table->value); | |
3650 | +} | |
3651 | + | |
3652 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3653 | +{ | |
3654 | + bit_spin_unlock(ZRAM_ACCESS, &table->value); | |
3655 | +} | |
3656 | + | |
3657 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { } | |
3658 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
3659 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3660 | +{ | |
3661 | + spin_lock(&table->lock); | |
3662 | + __set_bit(ZRAM_ACCESS, &table->value); | |
3663 | +} | |
3664 | + | |
3665 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3666 | +{ | |
3667 | + __clear_bit(ZRAM_ACCESS, &table->value); | |
3668 | + spin_unlock(&table->lock); | |
3669 | +} | |
3670 | + | |
3671 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) | |
3672 | +{ | |
3673 | + size_t num_pages = disksize >> PAGE_SHIFT; | |
3674 | + size_t index; | |
3675 | + | |
3676 | + for (index = 0; index < num_pages; index++) { | |
3677 | + spinlock_t *lock = &meta->table[index].lock; | |
3678 | + spin_lock_init(lock); | |
3679 | + } | |
3680 | +} | |
3681 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
3682 | + | |
3683 | #endif | |
3684 | diff --git a/drivers/char/random.c b/drivers/char/random.c | |
7c18450a | 3685 | index 08d1dd58c0d2..25ee319dc8e3 100644 |
1a6e0f06 JK |
3686 | --- a/drivers/char/random.c |
3687 | +++ b/drivers/char/random.c | |
7c18450a JK |
3688 | @@ -262,6 +262,7 @@ |
3689 | #include <linux/syscalls.h> | |
3690 | #include <linux/completion.h> | |
3691 | #include <linux/uuid.h> | |
3692 | +#include <linux/locallock.h> | |
3693 | #include <crypto/chacha20.h> | |
3694 | ||
3695 | #include <asm/processor.h> | |
3696 | @@ -1028,8 +1029,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
1a6e0f06 JK |
3697 | } sample; |
3698 | long delta, delta2, delta3; | |
3699 | ||
3700 | - preempt_disable(); | |
3701 | - | |
3702 | sample.jiffies = jiffies; | |
3703 | sample.cycles = random_get_entropy(); | |
3704 | sample.num = num; | |
7c18450a | 3705 | @@ -1070,7 +1069,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) |
1a6e0f06 JK |
3706 | */ |
3707 | credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); | |
3708 | } | |
3709 | - preempt_enable(); | |
3710 | } | |
3711 | ||
3712 | void add_input_randomness(unsigned int type, unsigned int code, | |
7c18450a | 3713 | @@ -1123,28 +1121,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) |
1a6e0f06 JK |
3714 | return *(ptr + f->reg_idx++); |
3715 | } | |
3716 | ||
3717 | -void add_interrupt_randomness(int irq, int irq_flags) | |
3718 | +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) | |
3719 | { | |
3720 | struct entropy_store *r; | |
3721 | struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); | |
3722 | - struct pt_regs *regs = get_irq_regs(); | |
3723 | unsigned long now = jiffies; | |
3724 | cycles_t cycles = random_get_entropy(); | |
3725 | __u32 c_high, j_high; | |
3726 | - __u64 ip; | |
3727 | unsigned long seed; | |
3728 | int credit = 0; | |
3729 | ||
3730 | if (cycles == 0) | |
3731 | - cycles = get_reg(fast_pool, regs); | |
3732 | + cycles = get_reg(fast_pool, NULL); | |
3733 | c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; | |
3734 | j_high = (sizeof(now) > 4) ? now >> 32 : 0; | |
3735 | fast_pool->pool[0] ^= cycles ^ j_high ^ irq; | |
3736 | fast_pool->pool[1] ^= now ^ c_high; | |
3737 | - ip = regs ? instruction_pointer(regs) : _RET_IP_; | |
3738 | + if (!ip) | |
3739 | + ip = _RET_IP_; | |
3740 | fast_pool->pool[2] ^= ip; | |
3741 | fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : | |
3742 | - get_reg(fast_pool, regs); | |
3743 | + get_reg(fast_pool, NULL); | |
3744 | ||
3745 | fast_mix(fast_pool); | |
3746 | add_interrupt_bench(cycles); | |
7c18450a JK |
3747 | @@ -2056,6 +2053,7 @@ struct batched_entropy { |
3748 | * goal of being quite fast and not depleting entropy. | |
3749 | */ | |
3750 | static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); | |
3751 | +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_long_lock); | |
3752 | unsigned long get_random_long(void) | |
3753 | { | |
3754 | unsigned long ret; | |
3755 | @@ -2064,13 +2062,13 @@ unsigned long get_random_long(void) | |
3756 | if (arch_get_random_long(&ret)) | |
3757 | return ret; | |
3758 | ||
3759 | - batch = &get_cpu_var(batched_entropy_long); | |
3760 | + batch = &get_locked_var(batched_entropy_long_lock, batched_entropy_long); | |
3761 | if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { | |
3762 | extract_crng((u8 *)batch->entropy_long); | |
3763 | batch->position = 0; | |
3764 | } | |
3765 | ret = batch->entropy_long[batch->position++]; | |
3766 | - put_cpu_var(batched_entropy_long); | |
3767 | + put_locked_var(batched_entropy_long_lock, batched_entropy_long); | |
3768 | return ret; | |
3769 | } | |
3770 | EXPORT_SYMBOL(get_random_long); | |
3771 | @@ -2082,6 +2080,8 @@ unsigned int get_random_int(void) | |
3772 | } | |
3773 | #else | |
3774 | static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); | |
3775 | +static DEFINE_LOCAL_IRQ_LOCK(batched_entropy_int_lock); | |
3776 | + | |
3777 | unsigned int get_random_int(void) | |
3778 | { | |
3779 | unsigned int ret; | |
3780 | @@ -2090,13 +2090,13 @@ unsigned int get_random_int(void) | |
3781 | if (arch_get_random_int(&ret)) | |
3782 | return ret; | |
3783 | ||
3784 | - batch = &get_cpu_var(batched_entropy_int); | |
3785 | + batch = &get_locked_var(batched_entropy_int_lock, batched_entropy_int); | |
3786 | if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { | |
3787 | extract_crng((u8 *)batch->entropy_int); | |
3788 | batch->position = 0; | |
3789 | } | |
3790 | ret = batch->entropy_int[batch->position++]; | |
3791 | - put_cpu_var(batched_entropy_int); | |
3792 | + put_locked_var(batched_entropy_int_lock, batched_entropy_int); | |
3793 | return ret; | |
3794 | } | |
3795 | #endif | |
1a6e0f06 JK |
3796 | diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c |
3797 | index 4da2af9694a2..5b6f57f500b8 100644 | |
3798 | --- a/drivers/clocksource/tcb_clksrc.c | |
3799 | +++ b/drivers/clocksource/tcb_clksrc.c | |
3800 | @@ -23,8 +23,7 @@ | |
3801 | * this 32 bit free-running counter. the second channel is not used. | |
3802 | * | |
3803 | * - The third channel may be used to provide a 16-bit clockevent | |
3804 | - * source, used in either periodic or oneshot mode. This runs | |
3805 | - * at 32 KiHZ, and can handle delays of up to two seconds. | |
3806 | + * source, used in either periodic or oneshot mode. | |
3807 | * | |
3808 | * A boot clocksource and clockevent source are also currently needed, | |
3809 | * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so | |
3810 | @@ -74,6 +73,8 @@ static struct clocksource clksrc = { | |
3811 | struct tc_clkevt_device { | |
3812 | struct clock_event_device clkevt; | |
3813 | struct clk *clk; | |
3814 | + bool clk_enabled; | |
3815 | + u32 freq; | |
3816 | void __iomem *regs; | |
3817 | }; | |
3818 | ||
3819 | @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt) | |
3820 | return container_of(clkevt, struct tc_clkevt_device, clkevt); | |
3821 | } | |
3822 | ||
3823 | -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, | |
3824 | - * because using one of the divided clocks would usually mean the | |
3825 | - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). | |
3826 | - * | |
3827 | - * A divided clock could be good for high resolution timers, since | |
3828 | - * 30.5 usec resolution can seem "low". | |
3829 | - */ | |
3830 | static u32 timer_clock; | |
3831 | ||
3832 | +static void tc_clk_disable(struct clock_event_device *d) | |
3833 | +{ | |
3834 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3835 | + | |
3836 | + clk_disable(tcd->clk); | |
3837 | + tcd->clk_enabled = false; | |
3838 | +} | |
3839 | + | |
3840 | +static void tc_clk_enable(struct clock_event_device *d) | |
3841 | +{ | |
3842 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3843 | + | |
3844 | + if (tcd->clk_enabled) | |
3845 | + return; | |
3846 | + clk_enable(tcd->clk); | |
3847 | + tcd->clk_enabled = true; | |
3848 | +} | |
3849 | + | |
3850 | static int tc_shutdown(struct clock_event_device *d) | |
3851 | { | |
3852 | struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3853 | @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d) | |
3854 | ||
3855 | __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); | |
3856 | __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); | |
3857 | + return 0; | |
3858 | +} | |
3859 | + | |
3860 | +static int tc_shutdown_clk_off(struct clock_event_device *d) | |
3861 | +{ | |
3862 | + tc_shutdown(d); | |
3863 | if (!clockevent_state_detached(d)) | |
3864 | - clk_disable(tcd->clk); | |
3865 | + tc_clk_disable(d); | |
3866 | ||
3867 | return 0; | |
3868 | } | |
3869 | @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d) | |
3870 | if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) | |
3871 | tc_shutdown(d); | |
3872 | ||
3873 | - clk_enable(tcd->clk); | |
3874 | + tc_clk_enable(d); | |
3875 | ||
3876 | - /* slow clock, count up to RC, then irq and stop */ | |
3877 | + /* count up to RC, then irq and stop */ | |
3878 | __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | | |
3879 | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); | |
3880 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3881 | @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d) | |
3882 | /* By not making the gentime core emulate periodic mode on top | |
3883 | * of oneshot, we get lower overhead and improved accuracy. | |
3884 | */ | |
3885 | - clk_enable(tcd->clk); | |
3886 | + tc_clk_enable(d); | |
3887 | ||
3888 | - /* slow clock, count up to RC, then irq and restart */ | |
3889 | + /* count up to RC, then irq and restart */ | |
3890 | __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, | |
3891 | regs + ATMEL_TC_REG(2, CMR)); | |
3892 | - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3893 | + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3894 | ||
3895 | /* Enable clock and interrupts on RC compare */ | |
3896 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3897 | @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = { | |
3898 | .features = CLOCK_EVT_FEAT_PERIODIC | | |
3899 | CLOCK_EVT_FEAT_ONESHOT, | |
3900 | /* Should be lower than at91rm9200's system timer */ | |
3901 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
3902 | .rating = 125, | |
3903 | +#else | |
3904 | + .rating = 200, | |
3905 | +#endif | |
3906 | .set_next_event = tc_next_event, | |
3907 | - .set_state_shutdown = tc_shutdown, | |
3908 | + .set_state_shutdown = tc_shutdown_clk_off, | |
3909 | .set_state_periodic = tc_set_periodic, | |
3910 | .set_state_oneshot = tc_set_oneshot, | |
3911 | }, | |
3912 | @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle) | |
3913 | return IRQ_NONE; | |
3914 | } | |
3915 | ||
3916 | -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3917 | +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) | |
3918 | { | |
3919 | + unsigned divisor = atmel_tc_divisors[divisor_idx]; | |
3920 | int ret; | |
3921 | struct clk *t2_clk = tc->clk[2]; | |
3922 | int irq = tc->irq[2]; | |
3923 | @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3924 | clkevt.regs = tc->regs; | |
3925 | clkevt.clk = t2_clk; | |
3926 | ||
3927 | - timer_clock = clk32k_divisor_idx; | |
3928 | + timer_clock = divisor_idx; | |
3929 | + if (!divisor) | |
3930 | + clkevt.freq = 32768; | |
3931 | + else | |
3932 | + clkevt.freq = clk_get_rate(t2_clk) / divisor; | |
3933 | ||
3934 | clkevt.clkevt.cpumask = cpumask_of(0); | |
3935 | ||
3936 | @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3937 | return ret; | |
3938 | } | |
3939 | ||
3940 | - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); | |
3941 | + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); | |
3942 | ||
3943 | return ret; | |
3944 | } | |
3945 | @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void) | |
3946 | goto err_disable_t1; | |
3947 | ||
3948 | /* channel 2: periodic and oneshot timer support */ | |
3949 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
3950 | ret = setup_clkevents(tc, clk32k_divisor_idx); | |
3951 | +#else | |
3952 | + ret = setup_clkevents(tc, best_divisor_idx); | |
3953 | +#endif | |
3954 | if (ret) | |
3955 | goto err_unregister_clksrc; | |
3956 | ||
3957 | diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c | |
c7c16703 | 3958 | index 6555821bbdae..93288849b2bd 100644 |
1a6e0f06 JK |
3959 | --- a/drivers/clocksource/timer-atmel-pit.c |
3960 | +++ b/drivers/clocksource/timer-atmel-pit.c | |
3961 | @@ -46,6 +46,7 @@ struct pit_data { | |
3962 | u32 cycle; | |
3963 | u32 cnt; | |
3964 | unsigned int irq; | |
3965 | + bool irq_requested; | |
3966 | struct clk *mck; | |
3967 | }; | |
3968 | ||
3969 | @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev) | |
3970 | ||
3971 | /* disable irq, leaving the clocksource active */ | |
3972 | pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); | |
3973 | + if (data->irq_requested) { | |
3974 | + free_irq(data->irq, data); | |
3975 | + data->irq_requested = false; | |
3976 | + } | |
3977 | return 0; | |
3978 | } | |
3979 | ||
3980 | +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); | |
3981 | /* | |
3982 | * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) | |
3983 | */ | |
3984 | static int pit_clkevt_set_periodic(struct clock_event_device *dev) | |
3985 | { | |
3986 | struct pit_data *data = clkevt_to_pit_data(dev); | |
3987 | + int ret; | |
3988 | + | |
3989 | + ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
3990 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
3991 | + "at91_tick", data); | |
3992 | + if (ret) | |
3993 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
3994 | + | |
3995 | + data->irq_requested = true; | |
3996 | ||
3997 | /* update clocksource counter */ | |
3998 | data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); | |
c7c16703 | 3999 | @@ -230,15 +245,6 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) |
1a6e0f06 JK |
4000 | return ret; |
4001 | } | |
4002 | ||
4003 | - /* Set up irq handler */ | |
4004 | - ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
4005 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4006 | - "at91_tick", data); | |
4007 | - if (ret) { | |
4008 | - pr_err("Unable to setup IRQ\n"); | |
4009 | - return ret; | |
4010 | - } | |
4011 | - | |
4012 | /* Set up and register clockevents */ | |
4013 | data->clkevt.name = "pit"; | |
4014 | data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; | |
4015 | diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c | |
4016 | index e90ab5b63a90..9e124087c55f 100644 | |
4017 | --- a/drivers/clocksource/timer-atmel-st.c | |
4018 | +++ b/drivers/clocksource/timer-atmel-st.c | |
4019 | @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void) | |
4020 | last_crtr = read_CRTR(); | |
4021 | } | |
4022 | ||
4023 | +static int atmel_st_irq; | |
4024 | + | |
4025 | static int clkevt32k_shutdown(struct clock_event_device *evt) | |
4026 | { | |
4027 | clkdev32k_disable_and_flush_irq(); | |
4028 | irqmask = 0; | |
4029 | regmap_write(regmap_st, AT91_ST_IER, irqmask); | |
4030 | + free_irq(atmel_st_irq, regmap_st); | |
4031 | return 0; | |
4032 | } | |
4033 | ||
4034 | static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
4035 | { | |
4036 | + int ret; | |
4037 | + | |
4038 | clkdev32k_disable_and_flush_irq(); | |
4039 | ||
4040 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
4041 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4042 | + "at91_tick", regmap_st); | |
4043 | + if (ret) | |
4044 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
4045 | + | |
4046 | /* | |
4047 | * ALM for oneshot irqs, set by next_event() | |
4048 | * before 32 seconds have passed. | |
4049 | @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
4050 | ||
4051 | static int clkevt32k_set_periodic(struct clock_event_device *dev) | |
4052 | { | |
4053 | + int ret; | |
4054 | + | |
4055 | clkdev32k_disable_and_flush_irq(); | |
4056 | ||
4057 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
4058 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4059 | + "at91_tick", regmap_st); | |
4060 | + if (ret) | |
4061 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
4062 | + | |
4063 | /* PIT for periodic irqs; fixed rate of 1/HZ */ | |
4064 | irqmask = AT91_ST_PITS; | |
4065 | regmap_write(regmap_st, AT91_ST_PIMR, timer_latch); | |
4066 | @@ -198,7 +217,7 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
4067 | { | |
4068 | struct clk *sclk; | |
4069 | unsigned int sclk_rate, val; | |
4070 | - int irq, ret; | |
4071 | + int ret; | |
4072 | ||
4073 | regmap_st = syscon_node_to_regmap(node); | |
4074 | if (IS_ERR(regmap_st)) { | |
4075 | @@ -212,21 +231,12 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
4076 | regmap_read(regmap_st, AT91_ST_SR, &val); | |
4077 | ||
4078 | /* Get the interrupts property */ | |
4079 | - irq = irq_of_parse_and_map(node, 0); | |
4080 | - if (!irq) { | |
4081 | + atmel_st_irq = irq_of_parse_and_map(node, 0); | |
4082 | + if (!atmel_st_irq) { | |
4083 | pr_err("Unable to get IRQ from DT\n"); | |
4084 | return -EINVAL; | |
4085 | } | |
4086 | ||
4087 | - /* Make IRQs happen for the system timer */ | |
4088 | - ret = request_irq(irq, at91rm9200_timer_interrupt, | |
4089 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4090 | - "at91_tick", regmap_st); | |
4091 | - if (ret) { | |
4092 | - pr_err("Unable to setup IRQ\n"); | |
4093 | - return ret; | |
4094 | - } | |
4095 | - | |
4096 | sclk = of_clk_get(node, 0); | |
4097 | if (IS_ERR(sclk)) { | |
4098 | pr_err("Unable to get slow clock\n"); | |
4099 | diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c | |
4100 | index a782ce87715c..19d265948526 100644 | |
4101 | --- a/drivers/connector/cn_proc.c | |
4102 | +++ b/drivers/connector/cn_proc.c | |
4103 | @@ -32,6 +32,7 @@ | |
4104 | #include <linux/pid_namespace.h> | |
4105 | ||
4106 | #include <linux/cn_proc.h> | |
4107 | +#include <linux/locallock.h> | |
4108 | ||
4109 | /* | |
4110 | * Size of a cn_msg followed by a proc_event structure. Since the | |
4111 | @@ -54,10 +55,11 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; | |
4112 | ||
4113 | /* proc_event_counts is used as the sequence number of the netlink message */ | |
4114 | static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; | |
4115 | +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock); | |
4116 | ||
4117 | static inline void send_msg(struct cn_msg *msg) | |
4118 | { | |
4119 | - preempt_disable(); | |
4120 | + local_lock(send_msg_lock); | |
4121 | ||
4122 | msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; | |
4123 | ((struct proc_event *)msg->data)->cpu = smp_processor_id(); | |
4124 | @@ -70,7 +72,7 @@ static inline void send_msg(struct cn_msg *msg) | |
4125 | */ | |
4126 | cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); | |
4127 | ||
4128 | - preempt_enable(); | |
4129 | + local_unlock(send_msg_lock); | |
4130 | } | |
4131 | ||
4132 | void proc_fork_connector(struct task_struct *task) | |
4133 | diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 | |
4134 | index adbd1de1cea5..1fac5074f2cf 100644 | |
4135 | --- a/drivers/cpufreq/Kconfig.x86 | |
4136 | +++ b/drivers/cpufreq/Kconfig.x86 | |
4137 | @@ -124,7 +124,7 @@ config X86_POWERNOW_K7_ACPI | |
4138 | ||
4139 | config X86_POWERNOW_K8 | |
4140 | tristate "AMD Opteron/Athlon64 PowerNow!" | |
4141 | - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ | |
4142 | + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE | |
4143 | help | |
4144 | This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. | |
4145 | Support for K10 and newer processors is now in acpi-cpufreq. | |
4146 | diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
7c18450a | 4147 | index 2117f172d7a2..96c15501b0c8 100644 |
1a6e0f06 JK |
4148 | --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c |
4149 | +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
7c18450a | 4150 | @@ -1489,7 +1489,9 @@ execbuf_submit(struct i915_execbuffer_params *params, |
1a6e0f06 JK |
4151 | if (ret) |
4152 | return ret; | |
4153 | ||
4154 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
4155 | trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); | |
4156 | +#endif | |
4157 | ||
4158 | i915_gem_execbuffer_move_to_active(vmas, params->request); | |
4159 | ||
4160 | diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
7c18450a | 4161 | index 755d78832a66..97fb03dc4971 100644 |
1a6e0f06 JK |
4162 | --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c |
4163 | +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4164 | @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4165 | if (!mutex_is_locked(mutex)) | |
4166 | return false; | |
4167 | ||
4168 | -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) | |
4169 | +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4170 | return mutex->owner == task; | |
4171 | #else | |
4172 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4173 | diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c | |
7c18450a | 4174 | index 02908e37c228..05c0480576e1 100644 |
1a6e0f06 JK |
4175 | --- a/drivers/gpu/drm/i915/i915_irq.c |
4176 | +++ b/drivers/gpu/drm/i915/i915_irq.c | |
4177 | @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4178 | spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); | |
4179 | ||
4180 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4181 | + preempt_disable_rt(); | |
4182 | ||
4183 | /* Get optional system timestamp before query. */ | |
4184 | if (stime) | |
4185 | @@ -863,6 +864,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4186 | *etime = ktime_get(); | |
4187 | ||
4188 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4189 | + preempt_enable_rt(); | |
4190 | ||
4191 | spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); | |
4192 | ||
4193 | diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c | |
7c18450a | 4194 | index 5dc6082639db..c32458fb3be2 100644 |
1a6e0f06 JK |
4195 | --- a/drivers/gpu/drm/i915/intel_display.c |
4196 | +++ b/drivers/gpu/drm/i915/intel_display.c | |
7c18450a | 4197 | @@ -12131,7 +12131,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) |
1a6e0f06 JK |
4198 | struct intel_crtc *intel_crtc = to_intel_crtc(crtc); |
4199 | struct intel_flip_work *work; | |
4200 | ||
4201 | - WARN_ON(!in_interrupt()); | |
4202 | + WARN_ON_NONRT(!in_interrupt()); | |
4203 | ||
4204 | if (crtc == NULL) | |
4205 | return; | |
4206 | diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c | |
c7c16703 | 4207 | index dbed12c484c9..5c540b78e8b5 100644 |
1a6e0f06 JK |
4208 | --- a/drivers/gpu/drm/i915/intel_sprite.c |
4209 | +++ b/drivers/gpu/drm/i915/intel_sprite.c | |
c7c16703 JK |
4210 | @@ -35,6 +35,7 @@ |
4211 | #include <drm/drm_rect.h> | |
4212 | #include <drm/drm_atomic.h> | |
4213 | #include <drm/drm_plane_helper.h> | |
4214 | +#include <linux/locallock.h> | |
1a6e0f06 | 4215 | #include "intel_drv.h" |
c7c16703 | 4216 | #include "intel_frontbuffer.h" |
1a6e0f06 | 4217 | #include <drm/i915_drm.h> |
c7c16703 | 4218 | @@ -65,6 +66,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, |
1a6e0f06 JK |
4219 | 1000 * adjusted_mode->crtc_htotal); |
4220 | } | |
4221 | ||
4222 | +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); | |
4223 | + | |
4224 | /** | |
4225 | * intel_pipe_update_start() - start update of a set of display registers | |
4226 | * @crtc: the crtc of which the registers are going to be updated | |
c7c16703 | 4227 | @@ -95,7 +98,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) |
1a6e0f06 JK |
4228 | min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); |
4229 | max = vblank_start - 1; | |
4230 | ||
4231 | - local_irq_disable(); | |
4232 | + local_lock_irq(pipe_update_lock); | |
4233 | ||
4234 | if (min <= 0 || max <= 0) | |
4235 | return; | |
c7c16703 | 4236 | @@ -125,11 +128,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) |
1a6e0f06 JK |
4237 | break; |
4238 | } | |
4239 | ||
4240 | - local_irq_enable(); | |
4241 | + local_unlock_irq(pipe_update_lock); | |
4242 | ||
4243 | timeout = schedule_timeout(timeout); | |
4244 | ||
4245 | - local_irq_disable(); | |
4246 | + local_lock_irq(pipe_update_lock); | |
4247 | } | |
4248 | ||
4249 | finish_wait(wq, &wait); | |
c7c16703 | 4250 | @@ -181,7 +184,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work |
1a6e0f06 JK |
4251 | crtc->base.state->event = NULL; |
4252 | } | |
4253 | ||
4254 | - local_irq_enable(); | |
4255 | + local_unlock_irq(pipe_update_lock); | |
4256 | ||
4257 | if (crtc->debug.start_vbl_count && | |
4258 | crtc->debug.start_vbl_count != end_vbl_count) { | |
4259 | diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
c7c16703 | 4260 | index 192b2d3a79cb..d5372a207326 100644 |
1a6e0f06 JK |
4261 | --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c |
4262 | +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4263 | @@ -23,7 +23,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4264 | if (!mutex_is_locked(mutex)) | |
4265 | return false; | |
4266 | ||
4267 | -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) | |
4268 | +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4269 | return mutex->owner == task; | |
4270 | #else | |
4271 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4272 | diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c | |
c7c16703 | 4273 | index cdb8cb568c15..b6d7fd964cbc 100644 |
1a6e0f06 JK |
4274 | --- a/drivers/gpu/drm/radeon/radeon_display.c |
4275 | +++ b/drivers/gpu/drm/radeon/radeon_display.c | |
c7c16703 | 4276 | @@ -1845,6 +1845,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, |
1a6e0f06 JK |
4277 | struct radeon_device *rdev = dev->dev_private; |
4278 | ||
4279 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4280 | + preempt_disable_rt(); | |
4281 | ||
4282 | /* Get optional system timestamp before query. */ | |
4283 | if (stime) | |
c7c16703 | 4284 | @@ -1937,6 +1938,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, |
1a6e0f06 JK |
4285 | *etime = ktime_get(); |
4286 | ||
4287 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4288 | + preempt_enable_rt(); | |
4289 | ||
4290 | /* Decode into vertical and horizontal scanout position. */ | |
4291 | *vpos = position & 0x1fff; | |
4292 | diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c | |
c7c16703 | 4293 | index 0276d2ef06ee..8868045eabde 100644 |
1a6e0f06 JK |
4294 | --- a/drivers/hv/vmbus_drv.c |
4295 | +++ b/drivers/hv/vmbus_drv.c | |
4296 | @@ -761,6 +761,8 @@ static void vmbus_isr(void) | |
4297 | void *page_addr; | |
4298 | struct hv_message *msg; | |
4299 | union hv_synic_event_flags *event; | |
4300 | + struct pt_regs *regs = get_irq_regs(); | |
4301 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
4302 | bool handled = false; | |
4303 | ||
4304 | page_addr = hv_context.synic_event_page[cpu]; | |
4305 | @@ -808,7 +810,7 @@ static void vmbus_isr(void) | |
4306 | tasklet_schedule(hv_context.msg_dpc[cpu]); | |
4307 | } | |
4308 | ||
4309 | - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); | |
4310 | + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); | |
4311 | } | |
4312 | ||
4313 | ||
4314 | diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c | |
4315 | index 36f76e28a0bf..394f142f90c7 100644 | |
4316 | --- a/drivers/ide/alim15x3.c | |
4317 | +++ b/drivers/ide/alim15x3.c | |
4318 | @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4319 | ||
4320 | isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); | |
4321 | ||
4322 | - local_irq_save(flags); | |
4323 | + local_irq_save_nort(flags); | |
4324 | ||
4325 | if (m5229_revision < 0xC2) { | |
4326 | /* | |
4327 | @@ -325,7 +325,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4328 | } | |
4329 | pci_dev_put(north); | |
4330 | pci_dev_put(isa_dev); | |
4331 | - local_irq_restore(flags); | |
4332 | + local_irq_restore_nort(flags); | |
4333 | return 0; | |
4334 | } | |
4335 | ||
4336 | diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c | |
4337 | index 0ceae5cbd89a..c212e85d7f3e 100644 | |
4338 | --- a/drivers/ide/hpt366.c | |
4339 | +++ b/drivers/ide/hpt366.c | |
4340 | @@ -1236,7 +1236,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4341 | ||
4342 | dma_old = inb(base + 2); | |
4343 | ||
4344 | - local_irq_save(flags); | |
4345 | + local_irq_save_nort(flags); | |
4346 | ||
4347 | dma_new = dma_old; | |
4348 | pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); | |
4349 | @@ -1247,7 +1247,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4350 | if (dma_new != dma_old) | |
4351 | outb(dma_new, base + 2); | |
4352 | ||
4353 | - local_irq_restore(flags); | |
4354 | + local_irq_restore_nort(flags); | |
4355 | ||
4356 | printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", | |
4357 | hwif->name, base, base + 7); | |
4358 | diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c | |
4359 | index 19763977568c..4169433faab5 100644 | |
4360 | --- a/drivers/ide/ide-io-std.c | |
4361 | +++ b/drivers/ide/ide-io-std.c | |
4362 | @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4363 | unsigned long uninitialized_var(flags); | |
4364 | ||
4365 | if ((io_32bit & 2) && !mmio) { | |
4366 | - local_irq_save(flags); | |
4367 | + local_irq_save_nort(flags); | |
4368 | ata_vlb_sync(io_ports->nsect_addr); | |
4369 | } | |
4370 | ||
4371 | @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4372 | insl(data_addr, buf, words); | |
4373 | ||
4374 | if ((io_32bit & 2) && !mmio) | |
4375 | - local_irq_restore(flags); | |
4376 | + local_irq_restore_nort(flags); | |
4377 | ||
4378 | if (((len + 1) & 3) < 2) | |
4379 | return; | |
4380 | @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4381 | unsigned long uninitialized_var(flags); | |
4382 | ||
4383 | if ((io_32bit & 2) && !mmio) { | |
4384 | - local_irq_save(flags); | |
4385 | + local_irq_save_nort(flags); | |
4386 | ata_vlb_sync(io_ports->nsect_addr); | |
4387 | } | |
4388 | ||
4389 | @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4390 | outsl(data_addr, buf, words); | |
4391 | ||
4392 | if ((io_32bit & 2) && !mmio) | |
4393 | - local_irq_restore(flags); | |
4394 | + local_irq_restore_nort(flags); | |
4395 | ||
4396 | if (((len + 1) & 3) < 2) | |
4397 | return; | |
4398 | diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c | |
4399 | index 669ea1e45795..e12e43e62245 100644 | |
4400 | --- a/drivers/ide/ide-io.c | |
4401 | +++ b/drivers/ide/ide-io.c | |
4402 | @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data) | |
4403 | /* disable_irq_nosync ?? */ | |
4404 | disable_irq(hwif->irq); | |
4405 | /* local CPU only, as if we were handling an interrupt */ | |
4406 | - local_irq_disable(); | |
4407 | + local_irq_disable_nort(); | |
4408 | if (hwif->polling) { | |
4409 | startstop = handler(drive); | |
4410 | } else if (drive_is_ready(drive)) { | |
4411 | diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c | |
4412 | index 376f2dc410c5..f014dd1b73dc 100644 | |
4413 | --- a/drivers/ide/ide-iops.c | |
4414 | +++ b/drivers/ide/ide-iops.c | |
4415 | @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, | |
4416 | if ((stat & ATA_BUSY) == 0) | |
4417 | break; | |
4418 | ||
4419 | - local_irq_restore(flags); | |
4420 | + local_irq_restore_nort(flags); | |
4421 | *rstat = stat; | |
4422 | return -EBUSY; | |
4423 | } | |
4424 | } | |
4425 | - local_irq_restore(flags); | |
4426 | + local_irq_restore_nort(flags); | |
4427 | } | |
4428 | /* | |
4429 | * Allow status to settle, then read it again. | |
4430 | diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c | |
4431 | index 0b63facd1d87..4ceba37afc0c 100644 | |
4432 | --- a/drivers/ide/ide-probe.c | |
4433 | +++ b/drivers/ide/ide-probe.c | |
4434 | @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) | |
4435 | int bswap = 1; | |
4436 | ||
4437 | /* local CPU only; some systems need this */ | |
4438 | - local_irq_save(flags); | |
4439 | + local_irq_save_nort(flags); | |
4440 | /* read 512 bytes of id info */ | |
4441 | hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); | |
4442 | - local_irq_restore(flags); | |
4443 | + local_irq_restore_nort(flags); | |
4444 | ||
4445 | drive->dev_flags |= IDE_DFLAG_ID_READ; | |
4446 | #ifdef DEBUG | |
4447 | diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c | |
4448 | index a716693417a3..be0568c722d6 100644 | |
4449 | --- a/drivers/ide/ide-taskfile.c | |
4450 | +++ b/drivers/ide/ide-taskfile.c | |
4451 | @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4452 | ||
4453 | page_is_high = PageHighMem(page); | |
4454 | if (page_is_high) | |
4455 | - local_irq_save(flags); | |
4456 | + local_irq_save_nort(flags); | |
4457 | ||
4458 | buf = kmap_atomic(page) + offset; | |
4459 | ||
4460 | @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4461 | kunmap_atomic(buf); | |
4462 | ||
4463 | if (page_is_high) | |
4464 | - local_irq_restore(flags); | |
4465 | + local_irq_restore_nort(flags); | |
4466 | ||
4467 | len -= nr_bytes; | |
4468 | } | |
4469 | @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, | |
4470 | } | |
4471 | ||
4472 | if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) | |
4473 | - local_irq_disable(); | |
4474 | + local_irq_disable_nort(); | |
4475 | ||
4476 | ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); | |
4477 | ||
4478 | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
c7c16703 | 4479 | index fddff403d5d2..cca1bb4fbfe3 100644 |
1a6e0f06 JK |
4480 | --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c |
4481 | +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
c7c16703 | 4482 | @@ -902,7 +902,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) |
1a6e0f06 JK |
4483 | |
4484 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
4485 | ||
4486 | - local_irq_save(flags); | |
4487 | + local_irq_save_nort(flags); | |
4488 | netif_addr_lock(dev); | |
4489 | spin_lock(&priv->lock); | |
4490 | ||
c7c16703 | 4491 | @@ -984,7 +984,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) |
1a6e0f06 JK |
4492 | |
4493 | spin_unlock(&priv->lock); | |
4494 | netif_addr_unlock(dev); | |
4495 | - local_irq_restore(flags); | |
4496 | + local_irq_restore_nort(flags); | |
4497 | ||
4498 | /* | |
4499 | * make sure the in-flight joins have finished before we attempt | |
4500 | diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c | |
4501 | index 4a2a9e370be7..e970d9afd179 100644 | |
4502 | --- a/drivers/input/gameport/gameport.c | |
4503 | +++ b/drivers/input/gameport/gameport.c | |
4504 | @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport) | |
4505 | tx = ~0; | |
4506 | ||
4507 | for (i = 0; i < 50; i++) { | |
4508 | - local_irq_save(flags); | |
4509 | + local_irq_save_nort(flags); | |
4510 | t1 = ktime_get_ns(); | |
4511 | for (t = 0; t < 50; t++) | |
4512 | gameport_read(gameport); | |
4513 | t2 = ktime_get_ns(); | |
4514 | t3 = ktime_get_ns(); | |
4515 | - local_irq_restore(flags); | |
4516 | + local_irq_restore_nort(flags); | |
4517 | udelay(i * 10); | |
4518 | t = (t2 - t1) - (t3 - t2); | |
4519 | if (t < tx) | |
4520 | @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4521 | tx = 1 << 30; | |
4522 | ||
4523 | for(i = 0; i < 50; i++) { | |
4524 | - local_irq_save(flags); | |
4525 | + local_irq_save_nort(flags); | |
4526 | GET_TIME(t1); | |
4527 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4528 | GET_TIME(t2); | |
4529 | GET_TIME(t3); | |
4530 | - local_irq_restore(flags); | |
4531 | + local_irq_restore_nort(flags); | |
4532 | udelay(i * 10); | |
4533 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; | |
4534 | } | |
4535 | @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4536 | tx = 1 << 30; | |
4537 | ||
4538 | for(i = 0; i < 50; i++) { | |
4539 | - local_irq_save(flags); | |
4540 | + local_irq_save_nort(flags); | |
4541 | t1 = rdtsc(); | |
4542 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4543 | t2 = rdtsc(); | |
4544 | - local_irq_restore(flags); | |
4545 | + local_irq_restore_nort(flags); | |
4546 | udelay(i * 10); | |
4547 | if (t2 - t1 < tx) tx = t2 - t1; | |
4548 | } | |
4549 | diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c | |
c7c16703 | 4550 | index 11a13b5be73a..baaed0ac274b 100644 |
1a6e0f06 JK |
4551 | --- a/drivers/iommu/amd_iommu.c |
4552 | +++ b/drivers/iommu/amd_iommu.c | |
c7c16703 | 4553 | @@ -1923,10 +1923,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, |
1a6e0f06 JK |
4554 | int ret; |
4555 | ||
4556 | /* | |
4557 | - * Must be called with IRQs disabled. Warn here to detect early | |
4558 | - * when its not. | |
4559 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4560 | + * detect early when its not. | |
4561 | */ | |
4562 | - WARN_ON(!irqs_disabled()); | |
4563 | + WARN_ON_NONRT(!irqs_disabled()); | |
4564 | ||
4565 | /* lock domain */ | |
4566 | spin_lock(&domain->lock); | |
c7c16703 | 4567 | @@ -2094,10 +2094,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) |
1a6e0f06 JK |
4568 | struct protection_domain *domain; |
4569 | ||
4570 | /* | |
4571 | - * Must be called with IRQs disabled. Warn here to detect early | |
4572 | - * when its not. | |
4573 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4574 | + * detect early when its not. | |
4575 | */ | |
4576 | - WARN_ON(!irqs_disabled()); | |
4577 | + WARN_ON_NONRT(!irqs_disabled()); | |
4578 | ||
4579 | if (WARN_ON(!dev_data->domain)) | |
4580 | return; | |
4581 | diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c | |
33c7bf0f | 4582 | index b9e50c10213b..fd3b4657723f 100644 |
1a6e0f06 JK |
4583 | --- a/drivers/iommu/intel-iommu.c |
4584 | +++ b/drivers/iommu/intel-iommu.c | |
4585 | @@ -479,7 +479,7 @@ struct deferred_flush_data { | |
4586 | struct deferred_flush_table *tables; | |
4587 | }; | |
4588 | ||
4589 | -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4590 | +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4591 | ||
4592 | /* bitmap for indexing intel_iommus */ | |
4593 | static int g_num_of_iommus; | |
33c7bf0f | 4594 | @@ -3716,10 +3716,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, |
1a6e0f06 JK |
4595 | struct intel_iommu *iommu; |
4596 | struct deferred_flush_entry *entry; | |
4597 | struct deferred_flush_data *flush_data; | |
4598 | - unsigned int cpuid; | |
4599 | ||
4600 | - cpuid = get_cpu(); | |
4601 | - flush_data = per_cpu_ptr(&deferred_flush, cpuid); | |
4602 | + flush_data = raw_cpu_ptr(&deferred_flush); | |
4603 | ||
4604 | /* Flush all CPUs' entries to avoid deferring too much. If | |
4605 | * this becomes a bottleneck, can just flush us, and rely on | |
33c7bf0f | 4606 | @@ -3752,8 +3750,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, |
1a6e0f06 JK |
4607 | } |
4608 | flush_data->size++; | |
4609 | spin_unlock_irqrestore(&flush_data->lock, flags); | |
4610 | - | |
4611 | - put_cpu(); | |
4612 | } | |
4613 | ||
4614 | static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) | |
4615 | diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c | |
4616 | index e23001bfcfee..359d5d169ec0 100644 | |
4617 | --- a/drivers/iommu/iova.c | |
4618 | +++ b/drivers/iommu/iova.c | |
4619 | @@ -22,6 +22,7 @@ | |
4620 | #include <linux/slab.h> | |
4621 | #include <linux/smp.h> | |
4622 | #include <linux/bitops.h> | |
4623 | +#include <linux/cpu.h> | |
4624 | ||
4625 | static bool iova_rcache_insert(struct iova_domain *iovad, | |
4626 | unsigned long pfn, | |
4627 | @@ -420,10 +421,8 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, | |
4628 | ||
4629 | /* Try replenishing IOVAs by flushing rcache. */ | |
4630 | flushed_rcache = true; | |
4631 | - preempt_disable(); | |
4632 | for_each_online_cpu(cpu) | |
4633 | free_cpu_cached_iovas(cpu, iovad); | |
4634 | - preempt_enable(); | |
4635 | goto retry; | |
4636 | } | |
4637 | ||
4638 | @@ -751,7 +750,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4639 | bool can_insert = false; | |
4640 | unsigned long flags; | |
4641 | ||
4642 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4643 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4644 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4645 | ||
4646 | if (!iova_magazine_full(cpu_rcache->loaded)) { | |
4647 | @@ -781,7 +780,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4648 | iova_magazine_push(cpu_rcache->loaded, iova_pfn); | |
4649 | ||
4650 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4651 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4652 | ||
4653 | if (mag_to_free) { | |
4654 | iova_magazine_free_pfns(mag_to_free, iovad); | |
4655 | @@ -815,7 +813,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4656 | bool has_pfn = false; | |
4657 | unsigned long flags; | |
4658 | ||
4659 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4660 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4661 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4662 | ||
4663 | if (!iova_magazine_empty(cpu_rcache->loaded)) { | |
4664 | @@ -837,7 +835,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4665 | iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); | |
4666 | ||
4667 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4668 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4669 | ||
4670 | return iova_pfn; | |
4671 | } | |
4672 | diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig | |
4673 | index 3f9ddb9fafa7..09da5b6b44a1 100644 | |
4674 | --- a/drivers/leds/trigger/Kconfig | |
4675 | +++ b/drivers/leds/trigger/Kconfig | |
4676 | @@ -69,7 +69,7 @@ config LEDS_TRIGGER_BACKLIGHT | |
4677 | ||
4678 | config LEDS_TRIGGER_CPU | |
4679 | bool "LED CPU Trigger" | |
4680 | - depends on LEDS_TRIGGERS | |
4681 | + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE | |
4682 | help | |
4683 | This allows LEDs to be controlled by active CPUs. This shows | |
4684 | the active CPUs across an array of LEDs so you can see which | |
4685 | diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig | |
4686 | index 4d200883c505..98b64ed5cb81 100644 | |
4687 | --- a/drivers/md/bcache/Kconfig | |
4688 | +++ b/drivers/md/bcache/Kconfig | |
4689 | @@ -1,6 +1,7 @@ | |
4690 | ||
4691 | config BCACHE | |
4692 | tristate "Block device as cache" | |
4693 | + depends on !PREEMPT_RT_FULL | |
4694 | ---help--- | |
4695 | Allows a block device to be used as cache for other devices; uses | |
4696 | a btree for indexing and the layout is optimized for SSDs. | |
4697 | diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c | |
5c015b7c | 4698 | index 2c965424d383..2c8877f50626 100644 |
1a6e0f06 JK |
4699 | --- a/drivers/md/dm-rq.c |
4700 | +++ b/drivers/md/dm-rq.c | |
5c015b7c | 4701 | @@ -842,7 +842,7 @@ static void dm_old_request_fn(struct request_queue *q) |
1a6e0f06 JK |
4702 | /* Establish tio->ti before queuing work (map_tio_request) */ |
4703 | tio->ti = ti; | |
c7c16703 | 4704 | kthread_queue_work(&md->kworker, &tio->work); |
1a6e0f06 JK |
4705 | - BUG_ON(!irqs_disabled()); |
4706 | + BUG_ON_NONRT(!irqs_disabled()); | |
4707 | } | |
4708 | } | |
4709 | ||
4710 | diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c | |
c7c16703 | 4711 | index cce6057b9aca..fa2c4de32a64 100644 |
1a6e0f06 JK |
4712 | --- a/drivers/md/raid5.c |
4713 | +++ b/drivers/md/raid5.c | |
4714 | @@ -1928,8 +1928,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4715 | struct raid5_percpu *percpu; | |
4716 | unsigned long cpu; | |
4717 | ||
4718 | - cpu = get_cpu(); | |
4719 | + cpu = get_cpu_light(); | |
4720 | percpu = per_cpu_ptr(conf->percpu, cpu); | |
4721 | + spin_lock(&percpu->lock); | |
4722 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | |
4723 | ops_run_biofill(sh); | |
4724 | overlap_clear++; | |
4725 | @@ -1985,7 +1986,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4726 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | |
4727 | wake_up(&sh->raid_conf->wait_for_overlap); | |
4728 | } | |
4729 | - put_cpu(); | |
4730 | + spin_unlock(&percpu->lock); | |
4731 | + put_cpu_light(); | |
4732 | } | |
4733 | ||
4734 | static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, | |
c7c16703 JK |
4735 | @@ -6391,6 +6393,7 @@ static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) |
4736 | __func__, cpu); | |
4737 | return -ENOMEM; | |
1a6e0f06 | 4738 | } |
c7c16703 JK |
4739 | + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); |
4740 | return 0; | |
4741 | } | |
1a6e0f06 | 4742 | |
c7c16703 JK |
4743 | @@ -6401,7 +6404,6 @@ static int raid5_alloc_percpu(struct r5conf *conf) |
4744 | conf->percpu = alloc_percpu(struct raid5_percpu); | |
4745 | if (!conf->percpu) | |
4746 | return -ENOMEM; | |
4747 | - | |
4748 | err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); | |
4749 | if (!err) { | |
4750 | conf->scribble_disks = max(conf->raid_disks, | |
1a6e0f06 | 4751 | diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h |
c7c16703 | 4752 | index 57ec49f0839e..0739604990b7 100644 |
1a6e0f06 JK |
4753 | --- a/drivers/md/raid5.h |
4754 | +++ b/drivers/md/raid5.h | |
4755 | @@ -504,6 +504,7 @@ struct r5conf { | |
4756 | int recovery_disabled; | |
4757 | /* per cpu variables */ | |
4758 | struct raid5_percpu { | |
c7c16703 JK |
4759 | + spinlock_t lock; /* Protection for -RT */ |
4760 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | |
4761 | struct flex_array *scribble; /* space for constructing buffer | |
4762 | * lists and performing address | |
4763 | diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig | |
4764 | index 64971baf11fa..215e91e36198 100644 | |
4765 | --- a/drivers/misc/Kconfig | |
4766 | +++ b/drivers/misc/Kconfig | |
4767 | @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI | |
4768 | config ATMEL_TCLIB | |
4769 | bool "Atmel AT32/AT91 Timer/Counter Library" | |
4770 | depends on (AVR32 || ARCH_AT91) | |
4771 | + default y if PREEMPT_RT_FULL | |
4772 | help | |
4773 | Select this if you want a library to allocate the Timer/Counter | |
4774 | blocks found on many Atmel processors. This facilitates using | |
4775 | @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC | |
4776 | are combined to make a single 32-bit timer. | |
4777 | ||
4778 | When GENERIC_CLOCKEVENTS is defined, the third timer channel | |
4779 | - may be used as a clock event device supporting oneshot mode | |
4780 | - (delays of up to two seconds) based on the 32 KiHz clock. | |
4781 | + may be used as a clock event device supporting oneshot mode. | |
4782 | ||
4783 | config ATMEL_TCB_CLKSRC_BLOCK | |
4784 | int | |
4785 | @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK | |
4786 | TC can be used for other purposes, such as PWM generation and | |
4787 | interval timing. | |
4788 | ||
4789 | +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
4790 | + bool "TC Block use 32 KiHz clock" | |
4791 | + depends on ATMEL_TCB_CLKSRC | |
4792 | + default y if !PREEMPT_RT_FULL | |
4793 | + help | |
4794 | + Select this to use 32 KiHz base clock rate as TC block clock | |
4795 | + source for clock events. | |
1a6e0f06 | 4796 | + |
1a6e0f06 | 4797 | + |
c7c16703 JK |
4798 | config DUMMY_IRQ |
4799 | tristate "Dummy IRQ handler" | |
4800 | default n | |
1a6e0f06 JK |
4801 | diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c |
4802 | index df990bb8c873..1a162709a85e 100644 | |
4803 | --- a/drivers/mmc/host/mmci.c | |
4804 | +++ b/drivers/mmc/host/mmci.c | |
4805 | @@ -1147,15 +1147,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
4806 | struct sg_mapping_iter *sg_miter = &host->sg_miter; | |
4807 | struct variant_data *variant = host->variant; | |
4808 | void __iomem *base = host->base; | |
4809 | - unsigned long flags; | |
4810 | u32 status; | |
4811 | ||
4812 | status = readl(base + MMCISTATUS); | |
4813 | ||
4814 | dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); | |
4815 | ||
4816 | - local_irq_save(flags); | |
4817 | - | |
4818 | do { | |
4819 | unsigned int remain, len; | |
4820 | char *buffer; | |
4821 | @@ -1195,8 +1192,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
4822 | ||
4823 | sg_miter_stop(sg_miter); | |
4824 | ||
4825 | - local_irq_restore(flags); | |
4826 | - | |
4827 | /* | |
4828 | * If we have less than the fifo 'half-full' threshold to transfer, | |
4829 | * trigger a PIO interrupt as soon as any data is available. | |
4830 | diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c | |
c7c16703 | 4831 | index 9133e7926da5..63afb921ed40 100644 |
1a6e0f06 JK |
4832 | --- a/drivers/net/ethernet/3com/3c59x.c |
4833 | +++ b/drivers/net/ethernet/3com/3c59x.c | |
4834 | @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) | |
4835 | { | |
4836 | struct vortex_private *vp = netdev_priv(dev); | |
4837 | unsigned long flags; | |
4838 | - local_irq_save(flags); | |
4839 | + local_irq_save_nort(flags); | |
4840 | (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); | |
4841 | - local_irq_restore(flags); | |
4842 | + local_irq_restore_nort(flags); | |
4843 | } | |
4844 | #endif | |
4845 | ||
4846 | @@ -1910,12 +1910,12 @@ static void vortex_tx_timeout(struct net_device *dev) | |
4847 | * Block interrupts because vortex_interrupt does a bare spin_lock() | |
4848 | */ | |
4849 | unsigned long flags; | |
4850 | - local_irq_save(flags); | |
4851 | + local_irq_save_nort(flags); | |
4852 | if (vp->full_bus_master_tx) | |
4853 | boomerang_interrupt(dev->irq, dev); | |
4854 | else | |
4855 | vortex_interrupt(dev->irq, dev); | |
4856 | - local_irq_restore(flags); | |
4857 | + local_irq_restore_nort(flags); | |
4858 | } | |
4859 | } | |
4860 | ||
4861 | diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c | |
4862 | index da4c2d8a4173..1420dfb56bac 100644 | |
4863 | --- a/drivers/net/ethernet/realtek/8139too.c | |
4864 | +++ b/drivers/net/ethernet/realtek/8139too.c | |
4865 | @@ -2233,7 +2233,7 @@ static void rtl8139_poll_controller(struct net_device *dev) | |
4866 | struct rtl8139_private *tp = netdev_priv(dev); | |
4867 | const int irq = tp->pci_dev->irq; | |
4868 | ||
4869 | - disable_irq(irq); | |
4870 | + disable_irq_nosync(irq); | |
4871 | rtl8139_interrupt(irq, dev); | |
4872 | enable_irq(irq); | |
4873 | } | |
4874 | diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
c7c16703 | 4875 | index bca6935a94db..d7a35ee34d03 100644 |
1a6e0f06 JK |
4876 | --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c |
4877 | +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
4878 | @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, | |
4879 | while (!ctx->done.done && msecs--) | |
4880 | udelay(1000); | |
4881 | } else { | |
4882 | - wait_event_interruptible(ctx->done.wait, | |
4883 | + swait_event_interruptible(ctx->done.wait, | |
4884 | ctx->done.done); | |
4885 | } | |
4886 | break; | |
4887 | diff --git a/drivers/pci/access.c b/drivers/pci/access.c | |
4888 | index d11cdbb8fba3..223bbb9acb03 100644 | |
4889 | --- a/drivers/pci/access.c | |
4890 | +++ b/drivers/pci/access.c | |
4891 | @@ -672,7 +672,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev) | |
4892 | WARN_ON(!dev->block_cfg_access); | |
4893 | ||
4894 | dev->block_cfg_access = 0; | |
4895 | - wake_up_all(&pci_cfg_wait); | |
4896 | + wake_up_all_locked(&pci_cfg_wait); | |
4897 | raw_spin_unlock_irqrestore(&pci_lock, flags); | |
4898 | } | |
4899 | EXPORT_SYMBOL_GPL(pci_cfg_access_unlock); | |
1f39f580 | 4900 | diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c |
33c7bf0f | 4901 | index bedce3453dd3..faf038978650 100644 |
1f39f580 JK |
4902 | --- a/drivers/pinctrl/qcom/pinctrl-msm.c |
4903 | +++ b/drivers/pinctrl/qcom/pinctrl-msm.c | |
4904 | @@ -61,7 +61,7 @@ struct msm_pinctrl { | |
4905 | struct notifier_block restart_nb; | |
4906 | int irq; | |
4907 | ||
4908 | - spinlock_t lock; | |
4909 | + raw_spinlock_t lock; | |
4910 | ||
4911 | DECLARE_BITMAP(dual_edge_irqs, MAX_NR_GPIO); | |
4912 | DECLARE_BITMAP(enabled_irqs, MAX_NR_GPIO); | |
4913 | @@ -153,14 +153,14 @@ static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev, | |
4914 | if (WARN_ON(i == g->nfuncs)) | |
4915 | return -EINVAL; | |
4916 | ||
4917 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4918 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4919 | ||
4920 | val = readl(pctrl->regs + g->ctl_reg); | |
4921 | val &= ~mask; | |
4922 | val |= i << g->mux_bit; | |
4923 | writel(val, pctrl->regs + g->ctl_reg); | |
4924 | ||
4925 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4926 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4927 | ||
4928 | return 0; | |
4929 | } | |
4930 | @@ -323,14 +323,14 @@ static int msm_config_group_set(struct pinctrl_dev *pctldev, | |
4931 | break; | |
4932 | case PIN_CONFIG_OUTPUT: | |
4933 | /* set output value */ | |
4934 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4935 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4936 | val = readl(pctrl->regs + g->io_reg); | |
4937 | if (arg) | |
4938 | val |= BIT(g->out_bit); | |
4939 | else | |
4940 | val &= ~BIT(g->out_bit); | |
4941 | writel(val, pctrl->regs + g->io_reg); | |
4942 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4943 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4944 | ||
4945 | /* enable output */ | |
4946 | arg = 1; | |
4947 | @@ -351,12 +351,12 @@ static int msm_config_group_set(struct pinctrl_dev *pctldev, | |
4948 | return -EINVAL; | |
4949 | } | |
4950 | ||
4951 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4952 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4953 | val = readl(pctrl->regs + g->ctl_reg); | |
4954 | val &= ~(mask << bit); | |
4955 | val |= arg << bit; | |
4956 | writel(val, pctrl->regs + g->ctl_reg); | |
4957 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4958 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4959 | } | |
4960 | ||
4961 | return 0; | |
4962 | @@ -384,13 +384,13 @@ static int msm_gpio_direction_input(struct gpio_chip *chip, unsigned offset) | |
4963 | ||
4964 | g = &pctrl->soc->groups[offset]; | |
4965 | ||
4966 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4967 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4968 | ||
4969 | val = readl(pctrl->regs + g->ctl_reg); | |
4970 | val &= ~BIT(g->oe_bit); | |
4971 | writel(val, pctrl->regs + g->ctl_reg); | |
4972 | ||
4973 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4974 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4975 | ||
4976 | return 0; | |
4977 | } | |
4978 | @@ -404,7 +404,7 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in | |
4979 | ||
4980 | g = &pctrl->soc->groups[offset]; | |
4981 | ||
4982 | - spin_lock_irqsave(&pctrl->lock, flags); | |
4983 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
4984 | ||
4985 | val = readl(pctrl->regs + g->io_reg); | |
4986 | if (value) | |
4987 | @@ -417,7 +417,7 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in | |
4988 | val |= BIT(g->oe_bit); | |
4989 | writel(val, pctrl->regs + g->ctl_reg); | |
4990 | ||
4991 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
4992 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
4993 | ||
4994 | return 0; | |
4995 | } | |
4996 | @@ -443,7 +443,7 @@ static void msm_gpio_set(struct gpio_chip *chip, unsigned offset, int value) | |
4997 | ||
4998 | g = &pctrl->soc->groups[offset]; | |
4999 | ||
5000 | - spin_lock_irqsave(&pctrl->lock, flags); | |
5001 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
5002 | ||
5003 | val = readl(pctrl->regs + g->io_reg); | |
5004 | if (value) | |
5005 | @@ -452,7 +452,7 @@ static void msm_gpio_set(struct gpio_chip *chip, unsigned offset, int value) | |
5006 | val &= ~BIT(g->out_bit); | |
5007 | writel(val, pctrl->regs + g->io_reg); | |
5008 | ||
5009 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
5010 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
5011 | } | |
5012 | ||
5013 | #ifdef CONFIG_DEBUG_FS | |
5014 | @@ -571,7 +571,7 @@ static void msm_gpio_irq_mask(struct irq_data *d) | |
5015 | ||
5016 | g = &pctrl->soc->groups[d->hwirq]; | |
5017 | ||
5018 | - spin_lock_irqsave(&pctrl->lock, flags); | |
5019 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
5020 | ||
5021 | val = readl(pctrl->regs + g->intr_cfg_reg); | |
5022 | val &= ~BIT(g->intr_enable_bit); | |
5023 | @@ -579,7 +579,7 @@ static void msm_gpio_irq_mask(struct irq_data *d) | |
5024 | ||
5025 | clear_bit(d->hwirq, pctrl->enabled_irqs); | |
5026 | ||
5027 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
5028 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
5029 | } | |
5030 | ||
5031 | static void msm_gpio_irq_unmask(struct irq_data *d) | |
5032 | @@ -592,7 +592,7 @@ static void msm_gpio_irq_unmask(struct irq_data *d) | |
5033 | ||
5034 | g = &pctrl->soc->groups[d->hwirq]; | |
5035 | ||
5036 | - spin_lock_irqsave(&pctrl->lock, flags); | |
5037 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
5038 | ||
33c7bf0f JK |
5039 | val = readl(pctrl->regs + g->intr_cfg_reg); |
5040 | val |= BIT(g->intr_enable_bit); | |
5041 | @@ -600,7 +600,7 @@ static void msm_gpio_irq_unmask(struct irq_data *d) | |
1f39f580 JK |
5042 | |
5043 | set_bit(d->hwirq, pctrl->enabled_irqs); | |
5044 | ||
5045 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
5046 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
5047 | } | |
5048 | ||
5049 | static void msm_gpio_irq_ack(struct irq_data *d) | |
33c7bf0f | 5050 | @@ -613,7 +613,7 @@ static void msm_gpio_irq_ack(struct irq_data *d) |
1f39f580 JK |
5051 | |
5052 | g = &pctrl->soc->groups[d->hwirq]; | |
5053 | ||
5054 | - spin_lock_irqsave(&pctrl->lock, flags); | |
5055 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
5056 | ||
5057 | val = readl(pctrl->regs + g->intr_status_reg); | |
5058 | if (g->intr_ack_high) | |
33c7bf0f | 5059 | @@ -625,7 +625,7 @@ static void msm_gpio_irq_ack(struct irq_data *d) |
1f39f580 JK |
5060 | if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) |
5061 | msm_gpio_update_dual_edge_pos(pctrl, g, d); | |
5062 | ||
5063 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
5064 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
5065 | } | |
5066 | ||
5067 | static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) | |
33c7bf0f | 5068 | @@ -638,7 +638,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) |
1f39f580 JK |
5069 | |
5070 | g = &pctrl->soc->groups[d->hwirq]; | |
5071 | ||
5072 | - spin_lock_irqsave(&pctrl->lock, flags); | |
5073 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
5074 | ||
5075 | /* | |
5076 | * For hw without possibility of detecting both edges | |
33c7bf0f | 5077 | @@ -712,7 +712,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type) |
1f39f580 JK |
5078 | if (test_bit(d->hwirq, pctrl->dual_edge_irqs)) |
5079 | msm_gpio_update_dual_edge_pos(pctrl, g, d); | |
5080 | ||
5081 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
5082 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
5083 | ||
5084 | if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) | |
5085 | irq_set_handler_locked(d, handle_level_irq); | |
33c7bf0f | 5086 | @@ -728,11 +728,11 @@ static int msm_gpio_irq_set_wake(struct irq_data *d, unsigned int on) |
1f39f580 JK |
5087 | struct msm_pinctrl *pctrl = gpiochip_get_data(gc); |
5088 | unsigned long flags; | |
5089 | ||
5090 | - spin_lock_irqsave(&pctrl->lock, flags); | |
5091 | + raw_spin_lock_irqsave(&pctrl->lock, flags); | |
5092 | ||
5093 | irq_set_irq_wake(pctrl->irq, on); | |
5094 | ||
5095 | - spin_unlock_irqrestore(&pctrl->lock, flags); | |
5096 | + raw_spin_unlock_irqrestore(&pctrl->lock, flags); | |
5097 | ||
5098 | return 0; | |
5099 | } | |
33c7bf0f | 5100 | @@ -878,7 +878,7 @@ int msm_pinctrl_probe(struct platform_device *pdev, |
1f39f580 JK |
5101 | pctrl->soc = soc_data; |
5102 | pctrl->chip = msm_gpio_template; | |
5103 | ||
5104 | - spin_lock_init(&pctrl->lock); | |
5105 | + raw_spin_lock_init(&pctrl->lock); | |
5106 | ||
5107 | res = platform_get_resource(pdev, IORESOURCE_MEM, 0); | |
5108 | pctrl->regs = devm_ioremap_resource(&pdev->dev, res); | |
1a6e0f06 JK |
5109 | diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c |
5110 | index 9bd41a35a78a..8e2d436c2e3f 100644 | |
5111 | --- a/drivers/scsi/fcoe/fcoe.c | |
5112 | +++ b/drivers/scsi/fcoe/fcoe.c | |
5113 | @@ -1455,11 +1455,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, | |
5114 | static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) | |
5115 | { | |
5116 | struct fcoe_percpu_s *fps; | |
5117 | - int rc; | |
5118 | + int rc, cpu = get_cpu_light(); | |
5119 | ||
5120 | - fps = &get_cpu_var(fcoe_percpu); | |
5121 | + fps = &per_cpu(fcoe_percpu, cpu); | |
5122 | rc = fcoe_get_paged_crc_eof(skb, tlen, fps); | |
5123 | - put_cpu_var(fcoe_percpu); | |
5124 | + put_cpu_light(); | |
5125 | ||
5126 | return rc; | |
5127 | } | |
5128 | @@ -1646,11 +1646,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, | |
5129 | return 0; | |
5130 | } | |
5131 | ||
5132 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
5133 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
5134 | stats->InvalidCRCCount++; | |
5135 | if (stats->InvalidCRCCount < 5) | |
5136 | printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); | |
5137 | - put_cpu(); | |
5138 | + put_cpu_light(); | |
5139 | return -EINVAL; | |
5140 | } | |
5141 | ||
5142 | @@ -1693,7 +1693,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
5143 | */ | |
5144 | hp = (struct fcoe_hdr *) skb_network_header(skb); | |
5145 | ||
5146 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
5147 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
5148 | if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { | |
5149 | if (stats->ErrorFrames < 5) | |
5150 | printk(KERN_WARNING "fcoe: FCoE version " | |
5151 | @@ -1725,13 +1725,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
5152 | goto drop; | |
5153 | ||
5154 | if (!fcoe_filter_frames(lport, fp)) { | |
5155 | - put_cpu(); | |
5156 | + put_cpu_light(); | |
5157 | fc_exch_recv(lport, fp); | |
5158 | return; | |
5159 | } | |
5160 | drop: | |
5161 | stats->ErrorFrames++; | |
5162 | - put_cpu(); | |
5163 | + put_cpu_light(); | |
5164 | kfree_skb(skb); | |
5165 | } | |
5166 | ||
5167 | diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c | |
5168 | index dcf36537a767..1a1f2e46452c 100644 | |
5169 | --- a/drivers/scsi/fcoe/fcoe_ctlr.c | |
5170 | +++ b/drivers/scsi/fcoe/fcoe_ctlr.c | |
5171 | @@ -834,7 +834,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
5172 | ||
5173 | INIT_LIST_HEAD(&del_list); | |
5174 | ||
5175 | - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); | |
5176 | + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); | |
5177 | ||
5178 | list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { | |
5179 | deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; | |
5180 | @@ -870,7 +870,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
5181 | sel_time = fcf->time; | |
5182 | } | |
5183 | } | |
5184 | - put_cpu(); | |
5185 | + put_cpu_light(); | |
5186 | ||
5187 | list_for_each_entry_safe(fcf, next, &del_list, list) { | |
5188 | /* Removes fcf from current list */ | |
5189 | diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c | |
c7c16703 | 5190 | index 16ca31ad5ec0..c3987347e762 100644 |
1a6e0f06 JK |
5191 | --- a/drivers/scsi/libfc/fc_exch.c |
5192 | +++ b/drivers/scsi/libfc/fc_exch.c | |
5193 | @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, | |
5194 | } | |
5195 | memset(ep, 0, sizeof(*ep)); | |
5196 | ||
5197 | - cpu = get_cpu(); | |
5198 | + cpu = get_cpu_light(); | |
5199 | pool = per_cpu_ptr(mp->pool, cpu); | |
5200 | spin_lock_bh(&pool->lock); | |
5201 | - put_cpu(); | |
5202 | + put_cpu_light(); | |
5203 | ||
5204 | /* peek cache of free slot */ | |
5205 | if (pool->left != FC_XID_UNKNOWN) { | |
5206 | diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c | |
7c18450a | 5207 | index 87f5e694dbed..23c0a50fb6aa 100644 |
1a6e0f06 JK |
5208 | --- a/drivers/scsi/libsas/sas_ata.c |
5209 | +++ b/drivers/scsi/libsas/sas_ata.c | |
5210 | @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
5211 | /* TODO: audit callers to ensure they are ready for qc_issue to | |
5212 | * unconditionally re-enable interrupts | |
5213 | */ | |
5214 | - local_irq_save(flags); | |
5215 | + local_irq_save_nort(flags); | |
5216 | spin_unlock(ap->lock); | |
5217 | ||
5218 | /* If the device fell off, no sense in issuing commands */ | |
5219 | @@ -252,7 +252,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
5220 | ||
5221 | out: | |
5222 | spin_lock(ap->lock); | |
5223 | - local_irq_restore(flags); | |
5224 | + local_irq_restore_nort(flags); | |
5225 | return ret; | |
5226 | } | |
5227 | ||
5228 | diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h | |
5229 | index edc48f3b8230..ee5c6f9dfb6f 100644 | |
5230 | --- a/drivers/scsi/qla2xxx/qla_inline.h | |
5231 | +++ b/drivers/scsi/qla2xxx/qla_inline.h | |
5232 | @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp) | |
5233 | { | |
5234 | unsigned long flags; | |
5235 | struct qla_hw_data *ha = rsp->hw; | |
5236 | - local_irq_save(flags); | |
5237 | + local_irq_save_nort(flags); | |
5238 | if (IS_P3P_TYPE(ha)) | |
5239 | qla82xx_poll(0, rsp); | |
5240 | else | |
5241 | ha->isp_ops->intr_handler(0, rsp); | |
5242 | - local_irq_restore(flags); | |
5243 | + local_irq_restore_nort(flags); | |
5244 | } | |
5245 | ||
5246 | static inline uint8_t * | |
5247 | diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c | |
c7c16703 | 5248 | index 068c4e47fac9..a2090f640397 100644 |
1a6e0f06 JK |
5249 | --- a/drivers/scsi/qla2xxx/qla_isr.c |
5250 | +++ b/drivers/scsi/qla2xxx/qla_isr.c | |
5251 | @@ -3125,7 +3125,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) | |
5252 | * kref_put(). | |
5253 | */ | |
5254 | kref_get(&qentry->irq_notify.kref); | |
5255 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
5256 | + swork_queue(&qentry->irq_notify.swork); | |
5257 | +#else | |
5258 | schedule_work(&qentry->irq_notify.work); | |
5259 | +#endif | |
5260 | } | |
5261 | ||
5262 | /* | |
5263 | diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c | |
c7c16703 | 5264 | index 95f4c1bcdb4c..0be934799bff 100644 |
1a6e0f06 JK |
5265 | --- a/drivers/thermal/x86_pkg_temp_thermal.c |
5266 | +++ b/drivers/thermal/x86_pkg_temp_thermal.c | |
5267 | @@ -29,6 +29,7 @@ | |
5268 | #include <linux/pm.h> | |
5269 | #include <linux/thermal.h> | |
5270 | #include <linux/debugfs.h> | |
5271 | +#include <linux/swork.h> | |
5272 | #include <asm/cpu_device_id.h> | |
5273 | #include <asm/mce.h> | |
5274 | ||
c7c16703 | 5275 | @@ -353,7 +354,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) |
1a6e0f06 JK |
5276 | } |
5277 | } | |
5278 | ||
5279 | -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5280 | +static void platform_thermal_notify_work(struct swork_event *event) | |
5281 | { | |
5282 | unsigned long flags; | |
5283 | int cpu = smp_processor_id(); | |
c7c16703 | 5284 | @@ -370,7 +371,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) |
1a6e0f06 JK |
5285 | pkg_work_scheduled[phy_id]) { |
5286 | disable_pkg_thres_interrupt(); | |
5287 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
5288 | - return -EINVAL; | |
5289 | + return; | |
5290 | } | |
5291 | pkg_work_scheduled[phy_id] = 1; | |
5292 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
c7c16703 | 5293 | @@ -379,9 +380,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) |
1a6e0f06 JK |
5294 | schedule_delayed_work_on(cpu, |
5295 | &per_cpu(pkg_temp_thermal_threshold_work, cpu), | |
5296 | msecs_to_jiffies(notify_delay_ms)); | |
5297 | +} | |
5298 | + | |
5299 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5300 | +static struct swork_event notify_work; | |
5301 | + | |
5302 | +static int thermal_notify_work_init(void) | |
5303 | +{ | |
5304 | + int err; | |
5305 | + | |
5306 | + err = swork_get(); | |
5307 | + if (err) | |
5308 | + return err; | |
5309 | + | |
5310 | + INIT_SWORK(¬ify_work, platform_thermal_notify_work); | |
5311 | return 0; | |
5312 | } | |
5313 | ||
5314 | +static void thermal_notify_work_cleanup(void) | |
5315 | +{ | |
5316 | + swork_put(); | |
5317 | +} | |
5318 | + | |
5319 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5320 | +{ | |
5321 | + swork_queue(¬ify_work); | |
5322 | + return 0; | |
5323 | +} | |
5324 | + | |
5325 | +#else /* !CONFIG_PREEMPT_RT_FULL */ | |
5326 | + | |
5327 | +static int thermal_notify_work_init(void) { return 0; } | |
5328 | + | |
5329 | +static void thermal_notify_work_cleanup(void) { } | |
5330 | + | |
5331 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
5332 | +{ | |
5333 | + platform_thermal_notify_work(NULL); | |
5334 | + | |
5335 | + return 0; | |
5336 | +} | |
5337 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
5338 | + | |
5339 | static int find_siblings_cpu(int cpu) | |
5340 | { | |
5341 | int i; | |
c7c16703 | 5342 | @@ -585,6 +625,9 @@ static int __init pkg_temp_thermal_init(void) |
1a6e0f06 JK |
5343 | if (!x86_match_cpu(pkg_temp_thermal_ids)) |
5344 | return -ENODEV; | |
5345 | ||
5346 | + if (!thermal_notify_work_init()) | |
5347 | + return -ENODEV; | |
5348 | + | |
5349 | spin_lock_init(&pkg_work_lock); | |
5350 | platform_thermal_package_notify = | |
5351 | pkg_temp_thermal_platform_thermal_notify; | |
c7c16703 | 5352 | @@ -609,7 +652,7 @@ static int __init pkg_temp_thermal_init(void) |
1a6e0f06 JK |
5353 | kfree(pkg_work_scheduled); |
5354 | platform_thermal_package_notify = NULL; | |
5355 | platform_thermal_package_rate_control = NULL; | |
5356 | - | |
5357 | + thermal_notify_work_cleanup(); | |
5358 | return -ENODEV; | |
5359 | } | |
5360 | ||
c7c16703 | 5361 | @@ -634,6 +677,7 @@ static void __exit pkg_temp_thermal_exit(void) |
1a6e0f06 JK |
5362 | mutex_unlock(&phy_dev_list_mutex); |
5363 | platform_thermal_package_notify = NULL; | |
5364 | platform_thermal_package_rate_control = NULL; | |
5365 | + thermal_notify_work_cleanup(); | |
5366 | for_each_online_cpu(i) | |
5367 | cancel_delayed_work_sync( | |
5368 | &per_cpu(pkg_temp_thermal_threshold_work, i)); | |
5369 | diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c | |
1f39f580 | 5370 | index e8819aa20415..dd7f9bf45d6c 100644 |
1a6e0f06 JK |
5371 | --- a/drivers/tty/serial/8250/8250_core.c |
5372 | +++ b/drivers/tty/serial/8250/8250_core.c | |
5373 | @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg; | |
5374 | ||
5375 | static unsigned int skip_txen_test; /* force skip of txen test at init time */ | |
5376 | ||
5377 | -#define PASS_LIMIT 512 | |
5378 | +/* | |
5379 | + * On -rt we can have a more delays, and legitimately | |
5380 | + * so - so don't drop work spuriously and spam the | |
5381 | + * syslog: | |
5382 | + */ | |
5383 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
5384 | +# define PASS_LIMIT 1000000 | |
5385 | +#else | |
5386 | +# define PASS_LIMIT 512 | |
5387 | +#endif | |
5388 | ||
5389 | #include <asm/serial.h> | |
5390 | /* | |
5391 | diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c | |
1f39f580 | 5392 | index 080d5a59d0a7..eecc4f111473 100644 |
1a6e0f06 JK |
5393 | --- a/drivers/tty/serial/8250/8250_port.c |
5394 | +++ b/drivers/tty/serial/8250/8250_port.c | |
5395 | @@ -35,6 +35,7 @@ | |
5396 | #include <linux/nmi.h> | |
5397 | #include <linux/mutex.h> | |
5398 | #include <linux/slab.h> | |
5399 | +#include <linux/kdb.h> | |
5400 | #include <linux/uaccess.h> | |
5401 | #include <linux/pm_runtime.h> | |
5402 | #include <linux/timer.h> | |
c7c16703 | 5403 | @@ -3144,9 +3145,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, |
1a6e0f06 JK |
5404 | |
5405 | serial8250_rpm_get(up); | |
5406 | ||
5407 | - if (port->sysrq) | |
5408 | + if (port->sysrq || oops_in_progress) | |
5409 | locked = 0; | |
5410 | - else if (oops_in_progress) | |
5411 | + else if (in_kdb_printk()) | |
5412 | locked = spin_trylock_irqsave(&port->lock, flags); | |
5413 | else | |
5414 | spin_lock_irqsave(&port->lock, flags); | |
5415 | diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c | |
c7c16703 | 5416 | index e2c33b9528d8..53af53c43e8c 100644 |
1a6e0f06 JK |
5417 | --- a/drivers/tty/serial/amba-pl011.c |
5418 | +++ b/drivers/tty/serial/amba-pl011.c | |
c7c16703 | 5419 | @@ -2194,13 +2194,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) |
1a6e0f06 JK |
5420 | |
5421 | clk_enable(uap->clk); | |
5422 | ||
5423 | - local_irq_save(flags); | |
5424 | + /* | |
5425 | + * local_irq_save(flags); | |
5426 | + * | |
5427 | + * This local_irq_save() is nonsense. If we come in via sysrq | |
5428 | + * handling then interrupts are already disabled. Aside of | |
5429 | + * that the port.sysrq check is racy on SMP regardless. | |
5430 | + */ | |
5431 | if (uap->port.sysrq) | |
5432 | locked = 0; | |
5433 | else if (oops_in_progress) | |
5434 | - locked = spin_trylock(&uap->port.lock); | |
5435 | + locked = spin_trylock_irqsave(&uap->port.lock, flags); | |
5436 | else | |
5437 | - spin_lock(&uap->port.lock); | |
5438 | + spin_lock_irqsave(&uap->port.lock, flags); | |
5439 | ||
5440 | /* | |
5441 | * First save the CR then disable the interrupts | |
c7c16703 | 5442 | @@ -2224,8 +2230,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) |
1a6e0f06 JK |
5443 | pl011_write(old_cr, uap, REG_CR); |
5444 | ||
5445 | if (locked) | |
5446 | - spin_unlock(&uap->port.lock); | |
5447 | - local_irq_restore(flags); | |
5448 | + spin_unlock_irqrestore(&uap->port.lock, flags); | |
5449 | ||
5450 | clk_disable(uap->clk); | |
5451 | } | |
5452 | diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c | |
5453 | index a2a529994ba5..0ee7c4c518df 100644 | |
5454 | --- a/drivers/tty/serial/omap-serial.c | |
5455 | +++ b/drivers/tty/serial/omap-serial.c | |
5456 | @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s, | |
5457 | ||
5458 | pm_runtime_get_sync(up->dev); | |
5459 | ||
5460 | - local_irq_save(flags); | |
5461 | - if (up->port.sysrq) | |
5462 | - locked = 0; | |
5463 | - else if (oops_in_progress) | |
5464 | - locked = spin_trylock(&up->port.lock); | |
5465 | + if (up->port.sysrq || oops_in_progress) | |
5466 | + locked = spin_trylock_irqsave(&up->port.lock, flags); | |
5467 | else | |
5468 | - spin_lock(&up->port.lock); | |
5469 | + spin_lock_irqsave(&up->port.lock, flags); | |
5470 | ||
5471 | /* | |
5472 | * First save the IER then disable the interrupts | |
5473 | @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s, | |
5474 | pm_runtime_mark_last_busy(up->dev); | |
5475 | pm_runtime_put_autosuspend(up->dev); | |
5476 | if (locked) | |
5477 | - spin_unlock(&up->port.lock); | |
5478 | - local_irq_restore(flags); | |
5479 | + spin_unlock_irqrestore(&up->port.lock, flags); | |
5480 | } | |
5481 | ||
5482 | static int __init | |
1a6e0f06 | 5483 | diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c |
7c18450a | 5484 | index f029aad67183..87c026876640 100644 |
1a6e0f06 JK |
5485 | --- a/drivers/usb/core/hcd.c |
5486 | +++ b/drivers/usb/core/hcd.c | |
7c18450a | 5487 | @@ -1764,9 +1764,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) |
1a6e0f06 JK |
5488 | * and no one may trigger the above deadlock situation when |
5489 | * running complete() in tasklet. | |
5490 | */ | |
5491 | - local_irq_save(flags); | |
5492 | + local_irq_save_nort(flags); | |
5493 | urb->complete(urb); | |
5494 | - local_irq_restore(flags); | |
5495 | + local_irq_restore_nort(flags); | |
5496 | ||
5497 | usb_anchor_resume_wakeups(anchor); | |
5498 | atomic_dec(&urb->use_count); | |
5499 | diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c | |
33c7bf0f | 5500 | index 89081b834615..90b231b7ad0a 100644 |
1a6e0f06 JK |
5501 | --- a/drivers/usb/gadget/function/f_fs.c |
5502 | +++ b/drivers/usb/gadget/function/f_fs.c | |
c7c16703 | 5503 | @@ -1593,7 +1593,7 @@ static void ffs_data_put(struct ffs_data *ffs) |
1a6e0f06 JK |
5504 | pr_info("%s(): freeing\n", __func__); |
5505 | ffs_data_clear(ffs); | |
5506 | BUG_ON(waitqueue_active(&ffs->ev.waitq) || | |
5507 | - waitqueue_active(&ffs->ep0req_completion.wait)); | |
5508 | + swait_active(&ffs->ep0req_completion.wait)); | |
5509 | kfree(ffs->dev_name); | |
5510 | kfree(ffs); | |
5511 | } | |
5512 | diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c | |
c7c16703 | 5513 | index 1468d8f085a3..6aae3ae25c18 100644 |
1a6e0f06 JK |
5514 | --- a/drivers/usb/gadget/legacy/inode.c |
5515 | +++ b/drivers/usb/gadget/legacy/inode.c | |
5516 | @@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
5517 | spin_unlock_irq (&epdata->dev->lock); | |
5518 | ||
5519 | if (likely (value == 0)) { | |
5520 | - value = wait_event_interruptible (done.wait, done.done); | |
5521 | + value = swait_event_interruptible (done.wait, done.done); | |
5522 | if (value != 0) { | |
5523 | spin_lock_irq (&epdata->dev->lock); | |
5524 | if (likely (epdata->ep != NULL)) { | |
5525 | @@ -355,7 +355,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
5526 | usb_ep_dequeue (epdata->ep, epdata->req); | |
5527 | spin_unlock_irq (&epdata->dev->lock); | |
5528 | ||
5529 | - wait_event (done.wait, done.done); | |
5530 | + swait_event (done.wait, done.done); | |
5531 | if (epdata->status == -ECONNRESET) | |
5532 | epdata->status = -EINTR; | |
5533 | } else { | |
5534 | diff --git a/fs/aio.c b/fs/aio.c | |
c7c16703 | 5535 | index 428484f2f841..2b02e2eb2158 100644 |
1a6e0f06 JK |
5536 | --- a/fs/aio.c |
5537 | +++ b/fs/aio.c | |
5538 | @@ -40,6 +40,7 @@ | |
5539 | #include <linux/ramfs.h> | |
5540 | #include <linux/percpu-refcount.h> | |
5541 | #include <linux/mount.h> | |
5542 | +#include <linux/swork.h> | |
5543 | ||
5544 | #include <asm/kmap_types.h> | |
5545 | #include <asm/uaccess.h> | |
5546 | @@ -115,7 +116,7 @@ struct kioctx { | |
5547 | struct page **ring_pages; | |
5548 | long nr_pages; | |
5549 | ||
5550 | - struct work_struct free_work; | |
5551 | + struct swork_event free_work; | |
5552 | ||
5553 | /* | |
5554 | * signals when all in-flight requests are done | |
5555 | @@ -258,6 +259,7 @@ static int __init aio_setup(void) | |
5556 | .mount = aio_mount, | |
5557 | .kill_sb = kill_anon_super, | |
5558 | }; | |
5559 | + BUG_ON(swork_get()); | |
5560 | aio_mnt = kern_mount(&aio_fs); | |
5561 | if (IS_ERR(aio_mnt)) | |
5562 | panic("Failed to create aio fs mount."); | |
c7c16703 | 5563 | @@ -581,9 +583,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) |
1a6e0f06 JK |
5564 | return cancel(&kiocb->common); |
5565 | } | |
5566 | ||
5567 | -static void free_ioctx(struct work_struct *work) | |
5568 | +static void free_ioctx(struct swork_event *sev) | |
5569 | { | |
5570 | - struct kioctx *ctx = container_of(work, struct kioctx, free_work); | |
5571 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
5572 | ||
5573 | pr_debug("freeing %p\n", ctx); | |
5574 | ||
c7c16703 | 5575 | @@ -602,8 +604,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) |
1a6e0f06 JK |
5576 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) |
5577 | complete(&ctx->rq_wait->comp); | |
5578 | ||
5579 | - INIT_WORK(&ctx->free_work, free_ioctx); | |
5580 | - schedule_work(&ctx->free_work); | |
5581 | + INIT_SWORK(&ctx->free_work, free_ioctx); | |
5582 | + swork_queue(&ctx->free_work); | |
5583 | } | |
5584 | ||
5585 | /* | |
c7c16703 | 5586 | @@ -611,9 +613,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) |
1a6e0f06 JK |
5587 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - |
5588 | * now it's safe to cancel any that need to be. | |
5589 | */ | |
5590 | -static void free_ioctx_users(struct percpu_ref *ref) | |
5591 | +static void free_ioctx_users_work(struct swork_event *sev) | |
5592 | { | |
5593 | - struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
5594 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
5595 | struct aio_kiocb *req; | |
5596 | ||
5597 | spin_lock_irq(&ctx->ctx_lock); | |
c7c16703 | 5598 | @@ -632,6 +634,14 @@ static void free_ioctx_users(struct percpu_ref *ref) |
1a6e0f06 JK |
5599 | percpu_ref_put(&ctx->reqs); |
5600 | } | |
5601 | ||
5602 | +static void free_ioctx_users(struct percpu_ref *ref) | |
5603 | +{ | |
5604 | + struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
5605 | + | |
5606 | + INIT_SWORK(&ctx->free_work, free_ioctx_users_work); | |
5607 | + swork_queue(&ctx->free_work); | |
5608 | +} | |
5609 | + | |
5610 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |
5611 | { | |
5612 | unsigned i, new_nr; | |
5613 | diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h | |
c7c16703 | 5614 | index a1fba4285277..3796769b4cd1 100644 |
1a6e0f06 JK |
5615 | --- a/fs/autofs4/autofs_i.h |
5616 | +++ b/fs/autofs4/autofs_i.h | |
c7c16703 | 5617 | @@ -31,6 +31,7 @@ |
1a6e0f06 JK |
5618 | #include <linux/sched.h> |
5619 | #include <linux/mount.h> | |
5620 | #include <linux/namei.h> | |
5621 | +#include <linux/delay.h> | |
5622 | #include <asm/current.h> | |
5623 | #include <linux/uaccess.h> | |
5624 | ||
5625 | diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c | |
5626 | index d8e6d421c27f..2e689ab1306b 100644 | |
5627 | --- a/fs/autofs4/expire.c | |
5628 | +++ b/fs/autofs4/expire.c | |
5629 | @@ -148,7 +148,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, | |
5630 | parent = p->d_parent; | |
5631 | if (!spin_trylock(&parent->d_lock)) { | |
5632 | spin_unlock(&p->d_lock); | |
5633 | - cpu_relax(); | |
5634 | + cpu_chill(); | |
5635 | goto relock; | |
5636 | } | |
5637 | spin_unlock(&p->d_lock); | |
5638 | diff --git a/fs/buffer.c b/fs/buffer.c | |
c7c16703 | 5639 | index b205a629001d..5646afc022ba 100644 |
1a6e0f06 JK |
5640 | --- a/fs/buffer.c |
5641 | +++ b/fs/buffer.c | |
5642 | @@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5643 | * decide that the page is now completely done. | |
5644 | */ | |
5645 | first = page_buffers(page); | |
5646 | - local_irq_save(flags); | |
5647 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
5648 | + flags = bh_uptodate_lock_irqsave(first); | |
5649 | clear_buffer_async_read(bh); | |
5650 | unlock_buffer(bh); | |
5651 | tmp = bh; | |
5652 | @@ -315,8 +314,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5653 | } | |
5654 | tmp = tmp->b_this_page; | |
5655 | } while (tmp != bh); | |
5656 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5657 | - local_irq_restore(flags); | |
5658 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5659 | ||
5660 | /* | |
5661 | * If none of the buffers had errors and they are all | |
5662 | @@ -328,9 +326,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
5663 | return; | |
5664 | ||
5665 | still_busy: | |
5666 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5667 | - local_irq_restore(flags); | |
5668 | - return; | |
5669 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5670 | } | |
5671 | ||
5672 | /* | |
5673 | @@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
5674 | } | |
5675 | ||
5676 | first = page_buffers(page); | |
5677 | - local_irq_save(flags); | |
5678 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
5679 | + flags = bh_uptodate_lock_irqsave(first); | |
5680 | ||
5681 | clear_buffer_async_write(bh); | |
5682 | unlock_buffer(bh); | |
5683 | @@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
5684 | } | |
5685 | tmp = tmp->b_this_page; | |
5686 | } | |
5687 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5688 | - local_irq_restore(flags); | |
5689 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5690 | end_page_writeback(page); | |
5691 | return; | |
5692 | ||
5693 | still_busy: | |
5694 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
5695 | - local_irq_restore(flags); | |
5696 | - return; | |
5697 | + bh_uptodate_unlock_irqrestore(first, flags); | |
5698 | } | |
5699 | EXPORT_SYMBOL(end_buffer_async_write); | |
5700 | ||
c7c16703 | 5701 | @@ -3383,6 +3375,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) |
1a6e0f06 JK |
5702 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); |
5703 | if (ret) { | |
5704 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | |
5705 | + buffer_head_init_locks(ret); | |
5706 | preempt_disable(); | |
5707 | __this_cpu_inc(bh_accounting.nr); | |
5708 | recalc_bh_state(); | |
5709 | diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c | |
5c015b7c | 5710 | index a27fc8791551..791aecb7c1ac 100644 |
1a6e0f06 JK |
5711 | --- a/fs/cifs/readdir.c |
5712 | +++ b/fs/cifs/readdir.c | |
5713 | @@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, | |
5714 | struct inode *inode; | |
5715 | struct super_block *sb = parent->d_sb; | |
5716 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | |
5717 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5718 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5719 | ||
5720 | cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); | |
5721 | ||
5722 | diff --git a/fs/dcache.c b/fs/dcache.c | |
1f39f580 | 5723 | index 4485a48f4091..691039a6a872 100644 |
1a6e0f06 JK |
5724 | --- a/fs/dcache.c |
5725 | +++ b/fs/dcache.c | |
5726 | @@ -19,6 +19,7 @@ | |
5727 | #include <linux/mm.h> | |
5728 | #include <linux/fs.h> | |
5729 | #include <linux/fsnotify.h> | |
5730 | +#include <linux/delay.h> | |
5731 | #include <linux/slab.h> | |
5732 | #include <linux/init.h> | |
5733 | #include <linux/hash.h> | |
5734 | @@ -750,6 +751,8 @@ static inline bool fast_dput(struct dentry *dentry) | |
5735 | */ | |
5736 | void dput(struct dentry *dentry) | |
5737 | { | |
5738 | + struct dentry *parent; | |
5739 | + | |
5740 | if (unlikely(!dentry)) | |
5741 | return; | |
5742 | ||
5743 | @@ -788,9 +791,18 @@ void dput(struct dentry *dentry) | |
5744 | return; | |
5745 | ||
5746 | kill_it: | |
5747 | - dentry = dentry_kill(dentry); | |
5748 | - if (dentry) { | |
5749 | - cond_resched(); | |
5750 | + parent = dentry_kill(dentry); | |
5751 | + if (parent) { | |
5752 | + int r; | |
5753 | + | |
5754 | + if (parent == dentry) { | |
5755 | + /* the task with the highest priority won't schedule */ | |
5756 | + r = cond_resched(); | |
5757 | + if (!r) | |
5758 | + cpu_chill(); | |
5759 | + } else { | |
5760 | + dentry = parent; | |
5761 | + } | |
5762 | goto repeat; | |
5763 | } | |
5764 | } | |
1f39f580 | 5765 | @@ -2324,7 +2336,7 @@ void d_delete(struct dentry * dentry) |
1a6e0f06 JK |
5766 | if (dentry->d_lockref.count == 1) { |
5767 | if (!spin_trylock(&inode->i_lock)) { | |
5768 | spin_unlock(&dentry->d_lock); | |
5769 | - cpu_relax(); | |
5770 | + cpu_chill(); | |
5771 | goto again; | |
5772 | } | |
5773 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | |
1f39f580 | 5774 | @@ -2384,21 +2396,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n) |
1a6e0f06 JK |
5775 | |
5776 | static void d_wait_lookup(struct dentry *dentry) | |
5777 | { | |
5778 | - if (d_in_lookup(dentry)) { | |
5779 | - DECLARE_WAITQUEUE(wait, current); | |
5780 | - add_wait_queue(dentry->d_wait, &wait); | |
5781 | - do { | |
5782 | - set_current_state(TASK_UNINTERRUPTIBLE); | |
5783 | - spin_unlock(&dentry->d_lock); | |
5784 | - schedule(); | |
5785 | - spin_lock(&dentry->d_lock); | |
5786 | - } while (d_in_lookup(dentry)); | |
5787 | - } | |
5788 | + struct swait_queue __wait; | |
5789 | + | |
5790 | + if (!d_in_lookup(dentry)) | |
5791 | + return; | |
5792 | + | |
5793 | + INIT_LIST_HEAD(&__wait.task_list); | |
5794 | + do { | |
5795 | + prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE); | |
5796 | + spin_unlock(&dentry->d_lock); | |
5797 | + schedule(); | |
5798 | + spin_lock(&dentry->d_lock); | |
5799 | + } while (d_in_lookup(dentry)); | |
5800 | + finish_swait(dentry->d_wait, &__wait); | |
5801 | } | |
5802 | ||
5803 | struct dentry *d_alloc_parallel(struct dentry *parent, | |
5804 | const struct qstr *name, | |
5805 | - wait_queue_head_t *wq) | |
5806 | + struct swait_queue_head *wq) | |
5807 | { | |
5808 | unsigned int hash = name->hash; | |
5809 | struct hlist_bl_head *b = in_lookup_hash(parent, hash); | |
1f39f580 | 5810 | @@ -2507,7 +2522,7 @@ void __d_lookup_done(struct dentry *dentry) |
1a6e0f06 JK |
5811 | hlist_bl_lock(b); |
5812 | dentry->d_flags &= ~DCACHE_PAR_LOOKUP; | |
5813 | __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); | |
5814 | - wake_up_all(dentry->d_wait); | |
5815 | + swake_up_all(dentry->d_wait); | |
5816 | dentry->d_wait = NULL; | |
5817 | hlist_bl_unlock(b); | |
5818 | INIT_HLIST_NODE(&dentry->d_u.d_alias); | |
1f39f580 | 5819 | @@ -3604,6 +3619,11 @@ EXPORT_SYMBOL(d_genocide); |
1a6e0f06 JK |
5820 | |
5821 | void __init vfs_caches_init_early(void) | |
5822 | { | |
5823 | + int i; | |
5824 | + | |
5825 | + for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) | |
5826 | + INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); | |
5827 | + | |
5828 | dcache_init_early(); | |
5829 | inode_init_early(); | |
5830 | } | |
5831 | diff --git a/fs/eventpoll.c b/fs/eventpoll.c | |
5832 | index 10db91218933..42af0a06f657 100644 | |
5833 | --- a/fs/eventpoll.c | |
5834 | +++ b/fs/eventpoll.c | |
5835 | @@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) | |
5836 | */ | |
5837 | static void ep_poll_safewake(wait_queue_head_t *wq) | |
5838 | { | |
5839 | - int this_cpu = get_cpu(); | |
5840 | + int this_cpu = get_cpu_light(); | |
5841 | ||
5842 | ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, | |
5843 | ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); | |
5844 | ||
5845 | - put_cpu(); | |
5846 | + put_cpu_light(); | |
5847 | } | |
5848 | ||
5849 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | |
5850 | diff --git a/fs/exec.c b/fs/exec.c | |
c7c16703 | 5851 | index 67e86571685a..fe14cdd84016 100644 |
1a6e0f06 JK |
5852 | --- a/fs/exec.c |
5853 | +++ b/fs/exec.c | |
c7c16703 | 5854 | @@ -1017,12 +1017,14 @@ static int exec_mmap(struct mm_struct *mm) |
1a6e0f06 JK |
5855 | } |
5856 | } | |
5857 | task_lock(tsk); | |
5858 | + preempt_disable_rt(); | |
5859 | active_mm = tsk->active_mm; | |
5860 | tsk->mm = mm; | |
5861 | tsk->active_mm = mm; | |
5862 | activate_mm(active_mm, mm); | |
5863 | tsk->mm->vmacache_seqnum = 0; | |
5864 | vmacache_flush(tsk); | |
5865 | + preempt_enable_rt(); | |
5866 | task_unlock(tsk); | |
5867 | if (old_mm) { | |
5868 | up_read(&old_mm->mmap_sem); | |
5869 | diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c | |
1f39f580 | 5870 | index 642c57b8de7b..8494b9308333 100644 |
1a6e0f06 JK |
5871 | --- a/fs/fuse/dir.c |
5872 | +++ b/fs/fuse/dir.c | |
c7c16703 | 5873 | @@ -1191,7 +1191,7 @@ static int fuse_direntplus_link(struct file *file, |
1a6e0f06 JK |
5874 | struct inode *dir = d_inode(parent); |
5875 | struct fuse_conn *fc; | |
5876 | struct inode *inode; | |
5877 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
5878 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
5879 | ||
5880 | if (!o->nodeid) { | |
5881 | /* | |
5882 | diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c | |
5883 | index 684996c8a3a4..6e18a06aaabe 100644 | |
5884 | --- a/fs/jbd2/checkpoint.c | |
5885 | +++ b/fs/jbd2/checkpoint.c | |
5886 | @@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |
5887 | nblocks = jbd2_space_needed(journal); | |
5888 | while (jbd2_log_space_left(journal) < nblocks) { | |
5889 | write_unlock(&journal->j_state_lock); | |
5890 | + if (current->plug) | |
5891 | + io_schedule(); | |
5892 | mutex_lock(&journal->j_checkpoint_mutex); | |
5893 | ||
5894 | /* | |
c7c16703 JK |
5895 | diff --git a/fs/locks.c b/fs/locks.c |
5896 | index 22c5b4aa4961..269c6a44449a 100644 | |
5897 | --- a/fs/locks.c | |
5898 | +++ b/fs/locks.c | |
5899 | @@ -935,7 +935,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) | |
5900 | return -ENOMEM; | |
5901 | } | |
5902 | ||
5903 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5904 | + percpu_down_read(&file_rwsem); | |
5905 | spin_lock(&ctx->flc_lock); | |
5906 | if (request->fl_flags & FL_ACCESS) | |
5907 | goto find_conflict; | |
5908 | @@ -976,7 +976,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request) | |
5909 | ||
5910 | out: | |
5911 | spin_unlock(&ctx->flc_lock); | |
5912 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5913 | + percpu_up_read(&file_rwsem); | |
5914 | if (new_fl) | |
5915 | locks_free_lock(new_fl); | |
5916 | locks_dispose_list(&dispose); | |
5917 | @@ -1013,7 +1013,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |
5918 | new_fl2 = locks_alloc_lock(); | |
5919 | } | |
5920 | ||
5921 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5922 | + percpu_down_read(&file_rwsem); | |
5923 | spin_lock(&ctx->flc_lock); | |
5924 | /* | |
5925 | * New lock request. Walk all POSIX locks and look for conflicts. If | |
5926 | @@ -1185,7 +1185,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, | |
5927 | } | |
5928 | out: | |
5929 | spin_unlock(&ctx->flc_lock); | |
5930 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5931 | + percpu_up_read(&file_rwsem); | |
5932 | /* | |
5933 | * Free any unused locks. | |
5934 | */ | |
5935 | @@ -1460,7 +1460,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5936 | return error; | |
5937 | } | |
5938 | ||
5939 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5940 | + percpu_down_read(&file_rwsem); | |
5941 | spin_lock(&ctx->flc_lock); | |
5942 | ||
5943 | time_out_leases(inode, &dispose); | |
5944 | @@ -1512,13 +1512,13 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5945 | locks_insert_block(fl, new_fl); | |
5946 | trace_break_lease_block(inode, new_fl); | |
5947 | spin_unlock(&ctx->flc_lock); | |
5948 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5949 | + percpu_up_read(&file_rwsem); | |
5950 | ||
5951 | locks_dispose_list(&dispose); | |
5952 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | |
5953 | !new_fl->fl_next, break_time); | |
5954 | ||
5955 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5956 | + percpu_down_read(&file_rwsem); | |
5957 | spin_lock(&ctx->flc_lock); | |
5958 | trace_break_lease_unblock(inode, new_fl); | |
5959 | locks_delete_block(new_fl); | |
5960 | @@ -1535,7 +1535,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) | |
5961 | } | |
5962 | out: | |
5963 | spin_unlock(&ctx->flc_lock); | |
5964 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5965 | + percpu_up_read(&file_rwsem); | |
5966 | locks_dispose_list(&dispose); | |
5967 | locks_free_lock(new_fl); | |
5968 | return error; | |
5969 | @@ -1609,7 +1609,7 @@ int fcntl_getlease(struct file *filp) | |
5970 | ||
5971 | ctx = smp_load_acquire(&inode->i_flctx); | |
5972 | if (ctx && !list_empty_careful(&ctx->flc_lease)) { | |
5973 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5974 | + percpu_down_read(&file_rwsem); | |
5975 | spin_lock(&ctx->flc_lock); | |
5976 | time_out_leases(inode, &dispose); | |
5977 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
5978 | @@ -1619,7 +1619,7 @@ int fcntl_getlease(struct file *filp) | |
5979 | break; | |
5980 | } | |
5981 | spin_unlock(&ctx->flc_lock); | |
5982 | - percpu_up_read_preempt_enable(&file_rwsem); | |
5983 | + percpu_up_read(&file_rwsem); | |
5984 | ||
5985 | locks_dispose_list(&dispose); | |
5986 | } | |
5987 | @@ -1694,7 +1694,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |
5988 | return -EINVAL; | |
5989 | } | |
5990 | ||
5991 | - percpu_down_read_preempt_disable(&file_rwsem); | |
5992 | + percpu_down_read(&file_rwsem); | |
5993 | spin_lock(&ctx->flc_lock); | |
5994 | time_out_leases(inode, &dispose); | |
5995 | error = check_conflicting_open(dentry, arg, lease->fl_flags); | |
5996 | @@ -1765,7 +1765,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr | |
5997 | lease->fl_lmops->lm_setup(lease, priv); | |
5998 | out: | |
5999 | spin_unlock(&ctx->flc_lock); | |
6000 | - percpu_up_read_preempt_enable(&file_rwsem); | |
6001 | + percpu_up_read(&file_rwsem); | |
6002 | locks_dispose_list(&dispose); | |
6003 | if (is_deleg) | |
6004 | inode_unlock(inode); | |
6005 | @@ -1788,7 +1788,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |
6006 | return error; | |
6007 | } | |
6008 | ||
6009 | - percpu_down_read_preempt_disable(&file_rwsem); | |
6010 | + percpu_down_read(&file_rwsem); | |
6011 | spin_lock(&ctx->flc_lock); | |
6012 | list_for_each_entry(fl, &ctx->flc_lease, fl_list) { | |
6013 | if (fl->fl_file == filp && | |
6014 | @@ -1801,7 +1801,7 @@ static int generic_delete_lease(struct file *filp, void *owner) | |
6015 | if (victim) | |
6016 | error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); | |
6017 | spin_unlock(&ctx->flc_lock); | |
6018 | - percpu_up_read_preempt_enable(&file_rwsem); | |
6019 | + percpu_up_read(&file_rwsem); | |
6020 | locks_dispose_list(&dispose); | |
6021 | return error; | |
6022 | } | |
6023 | @@ -2532,13 +2532,13 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx) | |
6024 | if (list_empty(&ctx->flc_lease)) | |
6025 | return; | |
6026 | ||
6027 | - percpu_down_read_preempt_disable(&file_rwsem); | |
6028 | + percpu_down_read(&file_rwsem); | |
6029 | spin_lock(&ctx->flc_lock); | |
6030 | list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) | |
6031 | if (filp == fl->fl_file) | |
6032 | lease_modify(fl, F_UNLCK, &dispose); | |
6033 | spin_unlock(&ctx->flc_lock); | |
6034 | - percpu_up_read_preempt_enable(&file_rwsem); | |
6035 | + percpu_up_read(&file_rwsem); | |
6036 | ||
6037 | locks_dispose_list(&dispose); | |
6038 | } | |
1a6e0f06 | 6039 | diff --git a/fs/namei.c b/fs/namei.c |
33c7bf0f | 6040 | index d5e5140c1045..150fbdd8e04c 100644 |
1a6e0f06 JK |
6041 | --- a/fs/namei.c |
6042 | +++ b/fs/namei.c | |
33c7bf0f | 6043 | @@ -1626,7 +1626,7 @@ static struct dentry *lookup_slow(const struct qstr *name, |
1a6e0f06 JK |
6044 | { |
6045 | struct dentry *dentry = ERR_PTR(-ENOENT), *old; | |
6046 | struct inode *inode = dir->d_inode; | |
6047 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6048 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6049 | ||
6050 | inode_lock_shared(inode); | |
6051 | /* Don't go there if it's already dead */ | |
33c7bf0f | 6052 | @@ -3083,7 +3083,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, |
1a6e0f06 JK |
6053 | struct dentry *dentry; |
6054 | int error, create_error = 0; | |
6055 | umode_t mode = op->mode; | |
6056 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6057 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6058 | ||
6059 | if (unlikely(IS_DEADDIR(dir_inode))) | |
6060 | return -ENOENT; | |
6061 | diff --git a/fs/namespace.c b/fs/namespace.c | |
33c7bf0f | 6062 | index 5e35057f07ac..843d274ba167 100644 |
1a6e0f06 JK |
6063 | --- a/fs/namespace.c |
6064 | +++ b/fs/namespace.c | |
6065 | @@ -14,6 +14,7 @@ | |
6066 | #include <linux/mnt_namespace.h> | |
6067 | #include <linux/user_namespace.h> | |
6068 | #include <linux/namei.h> | |
6069 | +#include <linux/delay.h> | |
6070 | #include <linux/security.h> | |
6071 | #include <linux/idr.h> | |
6072 | #include <linux/init.h> /* init_rootfs */ | |
c7c16703 | 6073 | @@ -356,8 +357,11 @@ int __mnt_want_write(struct vfsmount *m) |
1a6e0f06 JK |
6074 | * incremented count after it has set MNT_WRITE_HOLD. |
6075 | */ | |
6076 | smp_mb(); | |
6077 | - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) | |
6078 | - cpu_relax(); | |
6079 | + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { | |
6080 | + preempt_enable(); | |
6081 | + cpu_chill(); | |
6082 | + preempt_disable(); | |
6083 | + } | |
6084 | /* | |
6085 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | |
6086 | * be set to match its requirements. So we must not load that until | |
6087 | diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c | |
c7c16703 | 6088 | index dff600ae0d74..d726d2e09353 100644 |
1a6e0f06 JK |
6089 | --- a/fs/nfs/delegation.c |
6090 | +++ b/fs/nfs/delegation.c | |
6091 | @@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode *inode, | |
6092 | sp = state->owner; | |
6093 | /* Block nfs4_proc_unlck */ | |
6094 | mutex_lock(&sp->so_delegreturn_mutex); | |
6095 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
6096 | + seq = read_seqbegin(&sp->so_reclaim_seqlock); | |
6097 | err = nfs4_open_delegation_recall(ctx, state, stateid, type); | |
6098 | if (!err) | |
6099 | err = nfs_delegation_claim_locks(ctx, state, stateid); | |
6100 | - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
6101 | + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
6102 | err = -EAGAIN; | |
6103 | mutex_unlock(&sp->so_delegreturn_mutex); | |
6104 | put_nfs_open_context(ctx); | |
6105 | diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c | |
1f39f580 | 6106 | index 53e02b8bd9bd..a66e7d77cfbb 100644 |
1a6e0f06 JK |
6107 | --- a/fs/nfs/dir.c |
6108 | +++ b/fs/nfs/dir.c | |
6109 | @@ -485,7 +485,7 @@ static | |
6110 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |
6111 | { | |
6112 | struct qstr filename = QSTR_INIT(entry->name, entry->len); | |
6113 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6114 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6115 | struct dentry *dentry; | |
6116 | struct dentry *alias; | |
6117 | struct inode *dir = d_inode(parent); | |
1f39f580 | 6118 | @@ -1487,7 +1487,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, |
1a6e0f06 JK |
6119 | struct file *file, unsigned open_flags, |
6120 | umode_t mode, int *opened) | |
6121 | { | |
6122 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6123 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6124 | struct nfs_open_context *ctx; | |
6125 | struct dentry *res; | |
6126 | struct iattr attr = { .ia_valid = ATTR_OPEN }; | |
1f39f580 | 6127 | @@ -1802,7 +1802,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) |
1a6e0f06 JK |
6128 | |
6129 | trace_nfs_rmdir_enter(dir, dentry); | |
6130 | if (d_really_is_positive(dentry)) { | |
6131 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6132 | + down(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6133 | +#else | |
6134 | down_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6135 | +#endif | |
6136 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
6137 | /* Ensure the VFS deletes this inode */ | |
6138 | switch (error) { | |
1f39f580 | 6139 | @@ -1812,7 +1816,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) |
1a6e0f06 JK |
6140 | case -ENOENT: |
6141 | nfs_dentry_handle_enoent(dentry); | |
6142 | } | |
6143 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6144 | + up(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6145 | +#else | |
6146 | up_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
6147 | +#endif | |
6148 | } else | |
6149 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
6150 | trace_nfs_rmdir_exit(dir, dentry, error); | |
6151 | diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c | |
6152 | index bf4ec5ecc97e..36cd5fc9192c 100644 | |
6153 | --- a/fs/nfs/inode.c | |
6154 | +++ b/fs/nfs/inode.c | |
6155 | @@ -1957,7 +1957,11 @@ static void init_once(void *foo) | |
6156 | nfsi->nrequests = 0; | |
6157 | nfsi->commit_info.ncommit = 0; | |
6158 | atomic_set(&nfsi->commit_info.rpcs_out, 0); | |
6159 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6160 | + sema_init(&nfsi->rmdir_sem, 1); | |
6161 | +#else | |
6162 | init_rwsem(&nfsi->rmdir_sem); | |
6163 | +#endif | |
6164 | nfs4_init_once(nfsi); | |
6165 | } | |
6166 | ||
6167 | diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h | |
c7c16703 | 6168 | index 1452177c822d..f43b01d54c59 100644 |
1a6e0f06 JK |
6169 | --- a/fs/nfs/nfs4_fs.h |
6170 | +++ b/fs/nfs/nfs4_fs.h | |
c7c16703 | 6171 | @@ -111,7 +111,7 @@ struct nfs4_state_owner { |
1a6e0f06 JK |
6172 | unsigned long so_flags; |
6173 | struct list_head so_states; | |
6174 | struct nfs_seqid_counter so_seqid; | |
6175 | - seqcount_t so_reclaim_seqcount; | |
6176 | + seqlock_t so_reclaim_seqlock; | |
6177 | struct mutex so_delegreturn_mutex; | |
6178 | }; | |
6179 | ||
6180 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c | |
7c18450a | 6181 | index 4e894d301c88..3300a4b5c87c 100644 |
1a6e0f06 JK |
6182 | --- a/fs/nfs/nfs4proc.c |
6183 | +++ b/fs/nfs/nfs4proc.c | |
7c18450a | 6184 | @@ -2695,7 +2695,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, |
1a6e0f06 JK |
6185 | unsigned int seq; |
6186 | int ret; | |
6187 | ||
6188 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
6189 | + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
6190 | ||
6191 | ret = _nfs4_proc_open(opendata); | |
6192 | if (ret != 0) | |
7c18450a | 6193 | @@ -2733,7 +2733,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, |
33c7bf0f | 6194 | |
1a6e0f06 JK |
6195 | if (d_inode(dentry) == state->inode) { |
6196 | nfs_inode_attach_open_context(ctx); | |
6197 | - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
6198 | + if (read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
6199 | nfs4_schedule_stateid_recovery(server, state); | |
6200 | } | |
6201 | out: | |
6202 | diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c | |
c7c16703 | 6203 | index 0959c9661662..dabd834d7686 100644 |
1a6e0f06 JK |
6204 | --- a/fs/nfs/nfs4state.c |
6205 | +++ b/fs/nfs/nfs4state.c | |
6206 | @@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, | |
6207 | nfs4_init_seqid_counter(&sp->so_seqid); | |
6208 | atomic_set(&sp->so_count, 1); | |
6209 | INIT_LIST_HEAD(&sp->so_lru); | |
6210 | - seqcount_init(&sp->so_reclaim_seqcount); | |
6211 | + seqlock_init(&sp->so_reclaim_seqlock); | |
6212 | mutex_init(&sp->so_delegreturn_mutex); | |
6213 | return sp; | |
6214 | } | |
c7c16703 | 6215 | @@ -1497,8 +1497,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs |
1a6e0f06 JK |
6216 | * recovering after a network partition or a reboot from a |
6217 | * server that doesn't support a grace period. | |
6218 | */ | |
6219 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6220 | + write_seqlock(&sp->so_reclaim_seqlock); | |
6221 | +#else | |
6222 | + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
6223 | +#endif | |
6224 | spin_lock(&sp->so_lock); | |
6225 | - raw_write_seqcount_begin(&sp->so_reclaim_seqcount); | |
6226 | restart: | |
6227 | list_for_each_entry(state, &sp->so_states, open_states) { | |
6228 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) | |
c7c16703 | 6229 | @@ -1567,14 +1571,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs |
1a6e0f06 JK |
6230 | spin_lock(&sp->so_lock); |
6231 | goto restart; | |
6232 | } | |
6233 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
6234 | spin_unlock(&sp->so_lock); | |
6235 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6236 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
6237 | +#else | |
6238 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
6239 | +#endif | |
6240 | return 0; | |
6241 | out_err: | |
6242 | nfs4_put_open_state(state); | |
6243 | - spin_lock(&sp->so_lock); | |
6244 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
6245 | - spin_unlock(&sp->so_lock); | |
6246 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6247 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
6248 | +#else | |
6249 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
6250 | +#endif | |
6251 | return status; | |
6252 | } | |
6253 | ||
6254 | diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c | |
6255 | index 191aa577dd1f..58990c8f52e0 100644 | |
6256 | --- a/fs/nfs/unlink.c | |
6257 | +++ b/fs/nfs/unlink.c | |
6258 | @@ -12,7 +12,7 @@ | |
6259 | #include <linux/sunrpc/clnt.h> | |
6260 | #include <linux/nfs_fs.h> | |
6261 | #include <linux/sched.h> | |
6262 | -#include <linux/wait.h> | |
6263 | +#include <linux/swait.h> | |
6264 | #include <linux/namei.h> | |
6265 | #include <linux/fsnotify.h> | |
6266 | ||
6267 | @@ -51,6 +51,29 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) | |
6268 | rpc_restart_call_prepare(task); | |
6269 | } | |
6270 | ||
6271 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6272 | +static void nfs_down_anon(struct semaphore *sema) | |
6273 | +{ | |
6274 | + down(sema); | |
6275 | +} | |
6276 | + | |
6277 | +static void nfs_up_anon(struct semaphore *sema) | |
6278 | +{ | |
6279 | + up(sema); | |
6280 | +} | |
6281 | + | |
6282 | +#else | |
6283 | +static void nfs_down_anon(struct rw_semaphore *rwsem) | |
6284 | +{ | |
6285 | + down_read_non_owner(rwsem); | |
6286 | +} | |
6287 | + | |
6288 | +static void nfs_up_anon(struct rw_semaphore *rwsem) | |
6289 | +{ | |
6290 | + up_read_non_owner(rwsem); | |
6291 | +} | |
6292 | +#endif | |
6293 | + | |
6294 | /** | |
6295 | * nfs_async_unlink_release - Release the sillydelete data. | |
6296 | * @task: rpc_task of the sillydelete | |
6297 | @@ -64,7 +87,7 @@ static void nfs_async_unlink_release(void *calldata) | |
6298 | struct dentry *dentry = data->dentry; | |
6299 | struct super_block *sb = dentry->d_sb; | |
6300 | ||
6301 | - up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
6302 | + nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
6303 | d_lookup_done(dentry); | |
6304 | nfs_free_unlinkdata(data); | |
6305 | dput(dentry); | |
6306 | @@ -117,10 +140,10 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
6307 | struct inode *dir = d_inode(dentry->d_parent); | |
6308 | struct dentry *alias; | |
6309 | ||
6310 | - down_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6311 | + nfs_down_anon(&NFS_I(dir)->rmdir_sem); | |
6312 | alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); | |
6313 | if (IS_ERR(alias)) { | |
6314 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6315 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
6316 | return 0; | |
6317 | } | |
6318 | if (!d_in_lookup(alias)) { | |
6319 | @@ -142,7 +165,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
6320 | ret = 0; | |
6321 | spin_unlock(&alias->d_lock); | |
6322 | dput(alias); | |
6323 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
6324 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
6325 | /* | |
6326 | * If we'd displaced old cached devname, free it. At that | |
6327 | * point dentry is definitely not a root, so we won't need | |
6328 | @@ -182,7 +205,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) | |
6329 | goto out_free_name; | |
6330 | } | |
6331 | data->res.dir_attr = &data->dir_attr; | |
6332 | - init_waitqueue_head(&data->wq); | |
6333 | + init_swait_queue_head(&data->wq); | |
6334 | ||
6335 | status = -EBUSY; | |
6336 | spin_lock(&dentry->d_lock); | |
6337 | diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c | |
6338 | index fe251f187ff8..e89da4fb14c2 100644 | |
6339 | --- a/fs/ntfs/aops.c | |
6340 | +++ b/fs/ntfs/aops.c | |
6341 | @@ -92,13 +92,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6342 | ofs = 0; | |
6343 | if (file_ofs < init_size) | |
6344 | ofs = init_size - file_ofs; | |
6345 | - local_irq_save(flags); | |
6346 | + local_irq_save_nort(flags); | |
6347 | kaddr = kmap_atomic(page); | |
6348 | memset(kaddr + bh_offset(bh) + ofs, 0, | |
6349 | bh->b_size - ofs); | |
6350 | flush_dcache_page(page); | |
6351 | kunmap_atomic(kaddr); | |
6352 | - local_irq_restore(flags); | |
6353 | + local_irq_restore_nort(flags); | |
6354 | } | |
6355 | } else { | |
6356 | clear_buffer_uptodate(bh); | |
6357 | @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6358 | "0x%llx.", (unsigned long long)bh->b_blocknr); | |
6359 | } | |
6360 | first = page_buffers(page); | |
6361 | - local_irq_save(flags); | |
6362 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
6363 | + flags = bh_uptodate_lock_irqsave(first); | |
6364 | clear_buffer_async_read(bh); | |
6365 | unlock_buffer(bh); | |
6366 | tmp = bh; | |
6367 | @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6368 | } | |
6369 | tmp = tmp->b_this_page; | |
6370 | } while (tmp != bh); | |
6371 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6372 | - local_irq_restore(flags); | |
6373 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6374 | /* | |
6375 | * If none of the buffers had errors then we can set the page uptodate, | |
6376 | * but we first have to perform the post read mst fixups, if the | |
6377 | @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6378 | recs = PAGE_SIZE / rec_size; | |
6379 | /* Should have been verified before we got here... */ | |
6380 | BUG_ON(!recs); | |
6381 | - local_irq_save(flags); | |
6382 | + local_irq_save_nort(flags); | |
6383 | kaddr = kmap_atomic(page); | |
6384 | for (i = 0; i < recs; i++) | |
6385 | post_read_mst_fixup((NTFS_RECORD*)(kaddr + | |
6386 | i * rec_size), rec_size); | |
6387 | kunmap_atomic(kaddr); | |
6388 | - local_irq_restore(flags); | |
6389 | + local_irq_restore_nort(flags); | |
6390 | flush_dcache_page(page); | |
6391 | if (likely(page_uptodate && !PageError(page))) | |
6392 | SetPageUptodate(page); | |
6393 | @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6394 | unlock_page(page); | |
6395 | return; | |
6396 | still_busy: | |
6397 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6398 | - local_irq_restore(flags); | |
6399 | - return; | |
6400 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6401 | } | |
6402 | ||
6403 | /** | |
6404 | diff --git a/fs/proc/base.c b/fs/proc/base.c | |
c7c16703 | 6405 | index ca651ac00660..41d9dc789285 100644 |
1a6e0f06 JK |
6406 | --- a/fs/proc/base.c |
6407 | +++ b/fs/proc/base.c | |
c7c16703 | 6408 | @@ -1834,7 +1834,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, |
1a6e0f06 JK |
6409 | |
6410 | child = d_hash_and_lookup(dir, &qname); | |
6411 | if (!child) { | |
6412 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6413 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6414 | child = d_alloc_parallel(dir, &qname, &wq); | |
6415 | if (IS_ERR(child)) | |
6416 | goto end_instantiate; | |
6417 | diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c | |
1f39f580 | 6418 | index d4e37acd4821..000cea46434a 100644 |
1a6e0f06 JK |
6419 | --- a/fs/proc/proc_sysctl.c |
6420 | +++ b/fs/proc/proc_sysctl.c | |
c7c16703 | 6421 | @@ -632,7 +632,7 @@ static bool proc_sys_fill_cache(struct file *file, |
1a6e0f06 JK |
6422 | |
6423 | child = d_lookup(dir, &qname); | |
6424 | if (!child) { | |
6425 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6426 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6427 | child = d_alloc_parallel(dir, &qname, &wq); | |
6428 | if (IS_ERR(child)) | |
6429 | return false; | |
6430 | diff --git a/fs/timerfd.c b/fs/timerfd.c | |
7c18450a | 6431 | index ab8dd1538381..5580853f57dd 100644 |
1a6e0f06 JK |
6432 | --- a/fs/timerfd.c |
6433 | +++ b/fs/timerfd.c | |
7c18450a | 6434 | @@ -471,7 +471,10 @@ static int do_timerfd_settime(int ufd, int flags, |
1a6e0f06 JK |
6435 | break; |
6436 | } | |
6437 | spin_unlock_irq(&ctx->wqh.lock); | |
6438 | - cpu_relax(); | |
6439 | + if (isalarm(ctx)) | |
6440 | + hrtimer_wait_for_timer(&ctx->t.alarm.timer); | |
6441 | + else | |
6442 | + hrtimer_wait_for_timer(&ctx->t.tmr); | |
6443 | } | |
6444 | ||
6445 | /* | |
6446 | diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h | |
c7c16703 | 6447 | index e861a24f06f2..b5c97d3059c7 100644 |
1a6e0f06 JK |
6448 | --- a/include/acpi/platform/aclinux.h |
6449 | +++ b/include/acpi/platform/aclinux.h | |
c7c16703 | 6450 | @@ -133,6 +133,7 @@ |
1a6e0f06 JK |
6451 | |
6452 | #define acpi_cache_t struct kmem_cache | |
6453 | #define acpi_spinlock spinlock_t * | |
6454 | +#define acpi_raw_spinlock raw_spinlock_t * | |
6455 | #define acpi_cpu_flags unsigned long | |
6456 | ||
6457 | /* Use native linux version of acpi_os_allocate_zeroed */ | |
c7c16703 | 6458 | @@ -151,6 +152,20 @@ |
1a6e0f06 JK |
6459 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id |
6460 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock | |
6461 | ||
6462 | +#define acpi_os_create_raw_lock(__handle) \ | |
6463 | +({ \ | |
6464 | + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ | |
6465 | + \ | |
6466 | + if (lock) { \ | |
6467 | + *(__handle) = lock; \ | |
6468 | + raw_spin_lock_init(*(__handle)); \ | |
6469 | + } \ | |
6470 | + lock ? AE_OK : AE_NO_MEMORY; \ | |
6471 | + }) | |
6472 | + | |
6473 | +#define acpi_os_delete_raw_lock(__handle) kfree(__handle) | |
6474 | + | |
6475 | + | |
6476 | /* | |
6477 | * OSL interfaces used by debugger/disassembler | |
6478 | */ | |
6479 | diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h | |
6480 | index 6f96247226a4..fa53a21263c2 100644 | |
6481 | --- a/include/asm-generic/bug.h | |
6482 | +++ b/include/asm-generic/bug.h | |
6483 | @@ -215,6 +215,20 @@ void __warn(const char *file, int line, void *caller, unsigned taint, | |
6484 | # define WARN_ON_SMP(x) ({0;}) | |
6485 | #endif | |
6486 | ||
6487 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6488 | +# define BUG_ON_RT(c) BUG_ON(c) | |
6489 | +# define BUG_ON_NONRT(c) do { } while (0) | |
6490 | +# define WARN_ON_RT(condition) WARN_ON(condition) | |
6491 | +# define WARN_ON_NONRT(condition) do { } while (0) | |
6492 | +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) | |
6493 | +#else | |
6494 | +# define BUG_ON_RT(c) do { } while (0) | |
6495 | +# define BUG_ON_NONRT(c) BUG_ON(c) | |
6496 | +# define WARN_ON_RT(condition) do { } while (0) | |
6497 | +# define WARN_ON_NONRT(condition) WARN_ON(condition) | |
6498 | +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) | |
6499 | +#endif | |
6500 | + | |
6501 | #endif /* __ASSEMBLY__ */ | |
6502 | ||
6503 | #endif | |
6504 | diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h | |
c7c16703 | 6505 | index 535ab2e13d2e..cfc246899473 100644 |
1a6e0f06 JK |
6506 | --- a/include/linux/blk-mq.h |
6507 | +++ b/include/linux/blk-mq.h | |
c7c16703 JK |
6508 | @@ -209,7 +209,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) |
6509 | return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; | |
6510 | } | |
1a6e0f06 | 6511 | |
c7c16703 | 6512 | - |
1a6e0f06 | 6513 | +void __blk_mq_complete_request_remote_work(struct work_struct *work); |
1a6e0f06 JK |
6514 | int blk_mq_request_started(struct request *rq); |
6515 | void blk_mq_start_request(struct request *rq); | |
c7c16703 | 6516 | void blk_mq_end_request(struct request *rq, int error); |
1a6e0f06 | 6517 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h |
1f39f580 | 6518 | index f6a816129856..ec7a4676f8a8 100644 |
1a6e0f06 JK |
6519 | --- a/include/linux/blkdev.h |
6520 | +++ b/include/linux/blkdev.h | |
6521 | @@ -89,6 +89,7 @@ struct request { | |
6522 | struct list_head queuelist; | |
6523 | union { | |
6524 | struct call_single_data csd; | |
6525 | + struct work_struct work; | |
6526 | u64 fifo_time; | |
6527 | }; | |
6528 | ||
6529 | @@ -467,7 +468,7 @@ struct request_queue { | |
6530 | struct throtl_data *td; | |
6531 | #endif | |
6532 | struct rcu_head rcu_head; | |
6533 | - wait_queue_head_t mq_freeze_wq; | |
6534 | + struct swait_queue_head mq_freeze_wq; | |
6535 | struct percpu_ref q_usage_counter; | |
6536 | struct list_head all_q_node; | |
6537 | ||
6538 | diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h | |
6539 | index 8fdcb783197d..d07dbeec7bc1 100644 | |
6540 | --- a/include/linux/bottom_half.h | |
6541 | +++ b/include/linux/bottom_half.h | |
6542 | @@ -3,6 +3,39 @@ | |
6543 | ||
6544 | #include <linux/preempt.h> | |
6545 | ||
6546 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6547 | + | |
6548 | +extern void __local_bh_disable(void); | |
6549 | +extern void _local_bh_enable(void); | |
6550 | +extern void __local_bh_enable(void); | |
6551 | + | |
6552 | +static inline void local_bh_disable(void) | |
6553 | +{ | |
6554 | + __local_bh_disable(); | |
6555 | +} | |
6556 | + | |
6557 | +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |
6558 | +{ | |
6559 | + __local_bh_disable(); | |
6560 | +} | |
6561 | + | |
6562 | +static inline void local_bh_enable(void) | |
6563 | +{ | |
6564 | + __local_bh_enable(); | |
6565 | +} | |
6566 | + | |
6567 | +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) | |
6568 | +{ | |
6569 | + __local_bh_enable(); | |
6570 | +} | |
6571 | + | |
6572 | +static inline void local_bh_enable_ip(unsigned long ip) | |
6573 | +{ | |
6574 | + __local_bh_enable(); | |
6575 | +} | |
6576 | + | |
6577 | +#else | |
6578 | + | |
6579 | #ifdef CONFIG_TRACE_IRQFLAGS | |
6580 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); | |
6581 | #else | |
6582 | @@ -30,5 +63,6 @@ static inline void local_bh_enable(void) | |
6583 | { | |
6584 | __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); | |
6585 | } | |
6586 | +#endif | |
6587 | ||
6588 | #endif /* _LINUX_BH_H */ | |
6589 | diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h | |
6590 | index ebbacd14d450..be5e87f6360a 100644 | |
6591 | --- a/include/linux/buffer_head.h | |
6592 | +++ b/include/linux/buffer_head.h | |
6593 | @@ -75,8 +75,50 @@ struct buffer_head { | |
6594 | struct address_space *b_assoc_map; /* mapping this buffer is | |
6595 | associated with */ | |
6596 | atomic_t b_count; /* users using this buffer_head */ | |
6597 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6598 | + spinlock_t b_uptodate_lock; | |
6599 | +#if IS_ENABLED(CONFIG_JBD2) | |
6600 | + spinlock_t b_state_lock; | |
6601 | + spinlock_t b_journal_head_lock; | |
6602 | +#endif | |
6603 | +#endif | |
6604 | }; | |
6605 | ||
6606 | +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) | |
6607 | +{ | |
6608 | + unsigned long flags; | |
6609 | + | |
6610 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
6611 | + local_irq_save(flags); | |
6612 | + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); | |
6613 | +#else | |
6614 | + spin_lock_irqsave(&bh->b_uptodate_lock, flags); | |
6615 | +#endif | |
6616 | + return flags; | |
6617 | +} | |
6618 | + | |
6619 | +static inline void | |
6620 | +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) | |
6621 | +{ | |
6622 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
6623 | + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); | |
6624 | + local_irq_restore(flags); | |
6625 | +#else | |
6626 | + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); | |
6627 | +#endif | |
6628 | +} | |
6629 | + | |
6630 | +static inline void buffer_head_init_locks(struct buffer_head *bh) | |
6631 | +{ | |
6632 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6633 | + spin_lock_init(&bh->b_uptodate_lock); | |
6634 | +#if IS_ENABLED(CONFIG_JBD2) | |
6635 | + spin_lock_init(&bh->b_state_lock); | |
6636 | + spin_lock_init(&bh->b_journal_head_lock); | |
6637 | +#endif | |
6638 | +#endif | |
6639 | +} | |
6640 | + | |
6641 | /* | |
6642 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() | |
6643 | * and buffer_foo() functions. | |
6644 | diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h | |
6645 | index 5b17de62c962..56027cc01a56 100644 | |
6646 | --- a/include/linux/cgroup-defs.h | |
6647 | +++ b/include/linux/cgroup-defs.h | |
6648 | @@ -16,6 +16,7 @@ | |
6649 | #include <linux/percpu-refcount.h> | |
6650 | #include <linux/percpu-rwsem.h> | |
6651 | #include <linux/workqueue.h> | |
6652 | +#include <linux/swork.h> | |
6653 | ||
6654 | #ifdef CONFIG_CGROUPS | |
6655 | ||
6656 | @@ -137,6 +138,7 @@ struct cgroup_subsys_state { | |
6657 | /* percpu_ref killing and RCU release */ | |
6658 | struct rcu_head rcu_head; | |
6659 | struct work_struct destroy_work; | |
6660 | + struct swork_event destroy_swork; | |
6661 | }; | |
6662 | ||
6663 | /* | |
6664 | diff --git a/include/linux/completion.h b/include/linux/completion.h | |
6665 | index 5d5aaae3af43..3bca1590e29f 100644 | |
6666 | --- a/include/linux/completion.h | |
6667 | +++ b/include/linux/completion.h | |
6668 | @@ -7,8 +7,7 @@ | |
6669 | * Atomic wait-for-completion handler data structures. | |
6670 | * See kernel/sched/completion.c for details. | |
6671 | */ | |
6672 | - | |
6673 | -#include <linux/wait.h> | |
6674 | +#include <linux/swait.h> | |
6675 | ||
6676 | /* | |
6677 | * struct completion - structure used to maintain state for a "completion" | |
6678 | @@ -24,11 +23,11 @@ | |
6679 | */ | |
6680 | struct completion { | |
6681 | unsigned int done; | |
6682 | - wait_queue_head_t wait; | |
6683 | + struct swait_queue_head wait; | |
6684 | }; | |
6685 | ||
6686 | #define COMPLETION_INITIALIZER(work) \ | |
6687 | - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
6688 | + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
6689 | ||
6690 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | |
6691 | ({ init_completion(&work); work; }) | |
6692 | @@ -73,7 +72,7 @@ struct completion { | |
6693 | static inline void init_completion(struct completion *x) | |
6694 | { | |
6695 | x->done = 0; | |
6696 | - init_waitqueue_head(&x->wait); | |
6697 | + init_swait_queue_head(&x->wait); | |
6698 | } | |
6699 | ||
6700 | /** | |
6701 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h | |
c7c16703 | 6702 | index e571128ad99a..5e52d28c20c1 100644 |
1a6e0f06 JK |
6703 | --- a/include/linux/cpu.h |
6704 | +++ b/include/linux/cpu.h | |
c7c16703 | 6705 | @@ -182,6 +182,8 @@ extern void get_online_cpus(void); |
1a6e0f06 JK |
6706 | extern void put_online_cpus(void); |
6707 | extern void cpu_hotplug_disable(void); | |
6708 | extern void cpu_hotplug_enable(void); | |
6709 | +extern void pin_current_cpu(void); | |
6710 | +extern void unpin_current_cpu(void); | |
6711 | #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) | |
6712 | #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) | |
6713 | #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) | |
c7c16703 | 6714 | @@ -199,6 +201,8 @@ static inline void cpu_hotplug_done(void) {} |
1a6e0f06 JK |
6715 | #define put_online_cpus() do { } while (0) |
6716 | #define cpu_hotplug_disable() do { } while (0) | |
6717 | #define cpu_hotplug_enable() do { } while (0) | |
6718 | +static inline void pin_current_cpu(void) { } | |
6719 | +static inline void unpin_current_cpu(void) { } | |
6720 | #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
6721 | #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
6722 | /* These aren't inline functions due to a GCC bug. */ | |
6723 | diff --git a/include/linux/dcache.h b/include/linux/dcache.h | |
c7c16703 | 6724 | index 5beed7b30561..61cab7ef458e 100644 |
1a6e0f06 JK |
6725 | --- a/include/linux/dcache.h |
6726 | +++ b/include/linux/dcache.h | |
6727 | @@ -11,6 +11,7 @@ | |
6728 | #include <linux/rcupdate.h> | |
6729 | #include <linux/lockref.h> | |
6730 | #include <linux/stringhash.h> | |
6731 | +#include <linux/wait.h> | |
6732 | ||
6733 | struct path; | |
6734 | struct vfsmount; | |
6735 | @@ -100,7 +101,7 @@ struct dentry { | |
6736 | ||
6737 | union { | |
6738 | struct list_head d_lru; /* LRU list */ | |
6739 | - wait_queue_head_t *d_wait; /* in-lookup ones only */ | |
6740 | + struct swait_queue_head *d_wait; /* in-lookup ones only */ | |
6741 | }; | |
6742 | struct list_head d_child; /* child of parent list */ | |
6743 | struct list_head d_subdirs; /* our children */ | |
6744 | @@ -230,7 +231,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op | |
6745 | extern struct dentry * d_alloc(struct dentry *, const struct qstr *); | |
6746 | extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); | |
6747 | extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, | |
6748 | - wait_queue_head_t *); | |
6749 | + struct swait_queue_head *); | |
6750 | extern struct dentry * d_splice_alias(struct inode *, struct dentry *); | |
6751 | extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); | |
6752 | extern struct dentry * d_exact_alias(struct dentry *, struct inode *); | |
6753 | diff --git a/include/linux/delay.h b/include/linux/delay.h | |
6754 | index a6ecb34cf547..37caab306336 100644 | |
6755 | --- a/include/linux/delay.h | |
6756 | +++ b/include/linux/delay.h | |
6757 | @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds) | |
6758 | msleep(seconds * 1000); | |
6759 | } | |
6760 | ||
6761 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6762 | +extern void cpu_chill(void); | |
6763 | +#else | |
6764 | +# define cpu_chill() cpu_relax() | |
6765 | +#endif | |
6766 | + | |
6767 | #endif /* defined(_LINUX_DELAY_H) */ | |
1a6e0f06 JK |
6768 | diff --git a/include/linux/highmem.h b/include/linux/highmem.h |
6769 | index bb3f3297062a..a117a33ef72c 100644 | |
6770 | --- a/include/linux/highmem.h | |
6771 | +++ b/include/linux/highmem.h | |
6772 | @@ -7,6 +7,7 @@ | |
6773 | #include <linux/mm.h> | |
6774 | #include <linux/uaccess.h> | |
6775 | #include <linux/hardirq.h> | |
6776 | +#include <linux/sched.h> | |
6777 | ||
6778 | #include <asm/cacheflush.h> | |
6779 | ||
6780 | @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page) | |
6781 | ||
6782 | static inline void *kmap_atomic(struct page *page) | |
6783 | { | |
6784 | - preempt_disable(); | |
6785 | + preempt_disable_nort(); | |
6786 | pagefault_disable(); | |
6787 | return page_address(page); | |
6788 | } | |
6789 | @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page) | |
6790 | static inline void __kunmap_atomic(void *addr) | |
6791 | { | |
6792 | pagefault_enable(); | |
6793 | - preempt_enable(); | |
6794 | + preempt_enable_nort(); | |
6795 | } | |
6796 | ||
6797 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) | |
6798 | @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr) | |
6799 | ||
6800 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
6801 | ||
6802 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6803 | DECLARE_PER_CPU(int, __kmap_atomic_idx); | |
6804 | +#endif | |
6805 | ||
6806 | static inline int kmap_atomic_idx_push(void) | |
6807 | { | |
6808 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6809 | int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; | |
6810 | ||
6811 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
6812 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6813 | WARN_ON_ONCE(in_irq() && !irqs_disabled()); | |
6814 | BUG_ON(idx >= KM_TYPE_NR); | |
6815 | -#endif | |
6816 | +# endif | |
6817 | return idx; | |
6818 | +#else | |
6819 | + current->kmap_idx++; | |
6820 | + BUG_ON(current->kmap_idx > KM_TYPE_NR); | |
6821 | + return current->kmap_idx - 1; | |
6822 | +#endif | |
6823 | } | |
6824 | ||
6825 | static inline int kmap_atomic_idx(void) | |
6826 | { | |
6827 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6828 | return __this_cpu_read(__kmap_atomic_idx) - 1; | |
6829 | +#else | |
6830 | + return current->kmap_idx - 1; | |
6831 | +#endif | |
6832 | } | |
6833 | ||
6834 | static inline void kmap_atomic_idx_pop(void) | |
6835 | { | |
6836 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
6837 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
6838 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6839 | int idx = __this_cpu_dec_return(__kmap_atomic_idx); | |
6840 | ||
6841 | BUG_ON(idx < 0); | |
6842 | -#else | |
6843 | +# else | |
6844 | __this_cpu_dec(__kmap_atomic_idx); | |
6845 | +# endif | |
6846 | +#else | |
6847 | + current->kmap_idx--; | |
6848 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
6849 | + BUG_ON(current->kmap_idx < 0); | |
6850 | +# endif | |
6851 | #endif | |
6852 | } | |
6853 | ||
6854 | diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h | |
5c015b7c | 6855 | index 5e00f80b1535..a34e10b55cde 100644 |
1a6e0f06 JK |
6856 | --- a/include/linux/hrtimer.h |
6857 | +++ b/include/linux/hrtimer.h | |
6858 | @@ -87,6 +87,9 @@ enum hrtimer_restart { | |
6859 | * @function: timer expiry callback function | |
6860 | * @base: pointer to the timer base (per cpu and per clock) | |
6861 | * @state: state information (See bit values above) | |
6862 | + * @cb_entry: list entry to defer timers from hardirq context | |
6863 | + * @irqsafe: timer can run in hardirq context | |
6864 | + * @praecox: timer expiry time if expired at the time of programming | |
6865 | * @is_rel: Set if the timer was armed relative | |
6866 | * @start_pid: timer statistics field to store the pid of the task which | |
6867 | * started the timer | |
6868 | @@ -103,6 +106,11 @@ struct hrtimer { | |
6869 | enum hrtimer_restart (*function)(struct hrtimer *); | |
6870 | struct hrtimer_clock_base *base; | |
6871 | u8 state; | |
6872 | + struct list_head cb_entry; | |
6873 | + int irqsafe; | |
6874 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
6875 | + ktime_t praecox; | |
6876 | +#endif | |
6877 | u8 is_rel; | |
6878 | #ifdef CONFIG_TIMER_STATS | |
6879 | int start_pid; | |
6880 | @@ -123,11 +131,7 @@ struct hrtimer_sleeper { | |
6881 | struct task_struct *task; | |
6882 | }; | |
6883 | ||
6884 | -#ifdef CONFIG_64BIT | |
6885 | # define HRTIMER_CLOCK_BASE_ALIGN 64 | |
6886 | -#else | |
6887 | -# define HRTIMER_CLOCK_BASE_ALIGN 32 | |
6888 | -#endif | |
6889 | ||
6890 | /** | |
6891 | * struct hrtimer_clock_base - the timer base for a specific clock | |
6892 | @@ -136,6 +140,7 @@ struct hrtimer_sleeper { | |
6893 | * timer to a base on another cpu. | |
6894 | * @clockid: clock id for per_cpu support | |
6895 | * @active: red black tree root node for the active timers | |
6896 | + * @expired: list head for deferred timers. | |
6897 | * @get_time: function to retrieve the current time of the clock | |
6898 | * @offset: offset of this clock to the monotonic base | |
6899 | */ | |
6900 | @@ -144,6 +149,7 @@ struct hrtimer_clock_base { | |
6901 | int index; | |
6902 | clockid_t clockid; | |
6903 | struct timerqueue_head active; | |
6904 | + struct list_head expired; | |
6905 | ktime_t (*get_time)(void); | |
6906 | ktime_t offset; | |
6907 | } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); | |
6908 | @@ -187,6 +193,7 @@ struct hrtimer_cpu_base { | |
6909 | raw_spinlock_t lock; | |
6910 | seqcount_t seq; | |
6911 | struct hrtimer *running; | |
6912 | + struct hrtimer *running_soft; | |
6913 | unsigned int cpu; | |
6914 | unsigned int active_bases; | |
6915 | unsigned int clock_was_set_seq; | |
6916 | @@ -203,6 +210,9 @@ struct hrtimer_cpu_base { | |
6917 | unsigned int nr_hangs; | |
6918 | unsigned int max_hang_time; | |
6919 | #endif | |
6920 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6921 | + wait_queue_head_t wait; | |
6922 | +#endif | |
6923 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | |
6924 | } ____cacheline_aligned; | |
6925 | ||
6926 | @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer) | |
6927 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | |
6928 | } | |
6929 | ||
6930 | +/* Softirq preemption could deadlock timer removal */ | |
6931 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6932 | + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); | |
6933 | +#else | |
6934 | +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) | |
6935 | +#endif | |
6936 | + | |
6937 | /* Query timers: */ | |
6938 | extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); | |
6939 | ||
5c015b7c | 6940 | @@ -436,9 +453,15 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) |
1a6e0f06 JK |
6941 | * Helper function to check, whether the timer is running the callback |
6942 | * function | |
6943 | */ | |
6944 | -static inline int hrtimer_callback_running(struct hrtimer *timer) | |
6945 | +static inline int hrtimer_callback_running(const struct hrtimer *timer) | |
6946 | { | |
5c015b7c JK |
6947 | - return timer->base->cpu_base->running == timer; |
6948 | + if (timer->base->cpu_base->running == timer) | |
6949 | + return 1; | |
6950 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6951 | + if (timer->base->cpu_base->running_soft == timer) | |
6952 | + return 1; | |
6953 | +#endif | |
6954 | + return 0; | |
1a6e0f06 | 6955 | } |
5c015b7c JK |
6956 | |
6957 | /* Forward a hrtimer so it expires after now: */ | |
1a6e0f06 JK |
6958 | diff --git a/include/linux/idr.h b/include/linux/idr.h |
6959 | index 083d61e92706..5899796f50cb 100644 | |
6960 | --- a/include/linux/idr.h | |
6961 | +++ b/include/linux/idr.h | |
6962 | @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp); | |
6963 | * Each idr_preload() should be matched with an invocation of this | |
6964 | * function. See idr_preload() for details. | |
6965 | */ | |
6966 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6967 | +void idr_preload_end(void); | |
6968 | +#else | |
6969 | static inline void idr_preload_end(void) | |
6970 | { | |
6971 | preempt_enable(); | |
6972 | } | |
6973 | +#endif | |
6974 | ||
6975 | /** | |
6976 | * idr_find - return pointer for given id | |
6977 | diff --git a/include/linux/init_task.h b/include/linux/init_task.h | |
7c18450a | 6978 | index 325f649d77ff..a56e263f5005 100644 |
1a6e0f06 JK |
6979 | --- a/include/linux/init_task.h |
6980 | +++ b/include/linux/init_task.h | |
c7c16703 | 6981 | @@ -150,6 +150,12 @@ extern struct task_group root_task_group; |
1a6e0f06 JK |
6982 | # define INIT_PERF_EVENTS(tsk) |
6983 | #endif | |
6984 | ||
6985 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6986 | +# define INIT_TIMER_LIST .posix_timer_list = NULL, | |
6987 | +#else | |
6988 | +# define INIT_TIMER_LIST | |
6989 | +#endif | |
6990 | + | |
6991 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
6992 | # define INIT_VTIME(tsk) \ | |
6993 | .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ | |
7c18450a JK |
6994 | @@ -164,6 +170,7 @@ extern struct task_group root_task_group; |
6995 | #ifdef CONFIG_RT_MUTEXES | |
6996 | # define INIT_RT_MUTEXES(tsk) \ | |
6997 | .pi_waiters = RB_ROOT, \ | |
6998 | + .pi_top_task = NULL, \ | |
6999 | .pi_waiters_leftmost = NULL, | |
7000 | #else | |
7001 | # define INIT_RT_MUTEXES(tsk) | |
7002 | @@ -250,6 +257,7 @@ extern struct task_group root_task_group; | |
1a6e0f06 JK |
7003 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ |
7004 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ | |
7005 | .timer_slack_ns = 50000, /* 50 usec default slack */ \ | |
7006 | + INIT_TIMER_LIST \ | |
7007 | .pids = { \ | |
7008 | [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ | |
7009 | [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ | |
7010 | diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h | |
c7c16703 | 7011 | index 72f0721f75e7..480972ae47d3 100644 |
1a6e0f06 JK |
7012 | --- a/include/linux/interrupt.h |
7013 | +++ b/include/linux/interrupt.h | |
7014 | @@ -14,6 +14,7 @@ | |
7015 | #include <linux/hrtimer.h> | |
7016 | #include <linux/kref.h> | |
7017 | #include <linux/workqueue.h> | |
7018 | +#include <linux/swork.h> | |
7019 | ||
7020 | #include <linux/atomic.h> | |
7021 | #include <asm/ptrace.h> | |
7022 | @@ -61,6 +62,7 @@ | |
7023 | * interrupt handler after suspending interrupts. For system | |
7024 | * wakeup devices users need to implement wakeup detection in | |
7025 | * their interrupt handlers. | |
7026 | + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) | |
7027 | */ | |
7028 | #define IRQF_SHARED 0x00000080 | |
7029 | #define IRQF_PROBE_SHARED 0x00000100 | |
7030 | @@ -74,6 +76,7 @@ | |
7031 | #define IRQF_NO_THREAD 0x00010000 | |
7032 | #define IRQF_EARLY_RESUME 0x00020000 | |
7033 | #define IRQF_COND_SUSPEND 0x00040000 | |
7034 | +#define IRQF_NO_SOFTIRQ_CALL 0x00080000 | |
7035 | ||
7036 | #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) | |
7037 | ||
7038 | @@ -196,7 +199,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); | |
7039 | #ifdef CONFIG_LOCKDEP | |
7040 | # define local_irq_enable_in_hardirq() do { } while (0) | |
7041 | #else | |
7042 | -# define local_irq_enable_in_hardirq() local_irq_enable() | |
7043 | +# define local_irq_enable_in_hardirq() local_irq_enable_nort() | |
7044 | #endif | |
7045 | ||
7046 | extern void disable_irq_nosync(unsigned int irq); | |
7047 | @@ -216,6 +219,7 @@ extern void resume_device_irqs(void); | |
7048 | * struct irq_affinity_notify - context for notification of IRQ affinity changes | |
7049 | * @irq: Interrupt to which notification applies | |
7050 | * @kref: Reference count, for internal use | |
7051 | + * @swork: Swork item, for internal use | |
7052 | * @work: Work item, for internal use | |
7053 | * @notify: Function to be called on change. This will be | |
7054 | * called in process context. | |
7055 | @@ -227,7 +231,11 @@ extern void resume_device_irqs(void); | |
7056 | struct irq_affinity_notify { | |
7057 | unsigned int irq; | |
7058 | struct kref kref; | |
7059 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7060 | + struct swork_event swork; | |
7061 | +#else | |
7062 | struct work_struct work; | |
7063 | +#endif | |
7064 | void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); | |
7065 | void (*release)(struct kref *ref); | |
7066 | }; | |
c7c16703 | 7067 | @@ -406,9 +414,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, |
1a6e0f06 JK |
7068 | bool state); |
7069 | ||
7070 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
7071 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
7072 | extern bool force_irqthreads; | |
7073 | +# else | |
7074 | +# define force_irqthreads (true) | |
7075 | +# endif | |
7076 | #else | |
7077 | -#define force_irqthreads (0) | |
7078 | +#define force_irqthreads (false) | |
7079 | #endif | |
7080 | ||
7081 | #ifndef __ARCH_SET_SOFTIRQ_PENDING | |
c7c16703 | 7082 | @@ -465,9 +477,10 @@ struct softirq_action |
1a6e0f06 JK |
7083 | void (*action)(struct softirq_action *); |
7084 | }; | |
7085 | ||
7086 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
7087 | asmlinkage void do_softirq(void); | |
7088 | asmlinkage void __do_softirq(void); | |
7089 | - | |
7090 | +static inline void thread_do_softirq(void) { do_softirq(); } | |
7091 | #ifdef __ARCH_HAS_DO_SOFTIRQ | |
7092 | void do_softirq_own_stack(void); | |
7093 | #else | |
c7c16703 | 7094 | @@ -476,13 +489,25 @@ static inline void do_softirq_own_stack(void) |
1a6e0f06 JK |
7095 | __do_softirq(); |
7096 | } | |
7097 | #endif | |
7098 | +#else | |
7099 | +extern void thread_do_softirq(void); | |
7100 | +#endif | |
7101 | ||
7102 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | |
7103 | extern void softirq_init(void); | |
7104 | extern void __raise_softirq_irqoff(unsigned int nr); | |
7105 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7106 | +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); | |
7107 | +#else | |
7108 | +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
7109 | +{ | |
7110 | + __raise_softirq_irqoff(nr); | |
7111 | +} | |
7112 | +#endif | |
7113 | ||
7114 | extern void raise_softirq_irqoff(unsigned int nr); | |
7115 | extern void raise_softirq(unsigned int nr); | |
7116 | +extern void softirq_check_pending_idle(void); | |
7117 | ||
7118 | DECLARE_PER_CPU(struct task_struct *, ksoftirqd); | |
7119 | ||
c7c16703 | 7120 | @@ -504,8 +529,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) |
1a6e0f06 JK |
7121 | to be executed on some cpu at least once after this. |
7122 | * If the tasklet is already scheduled, but its execution is still not | |
7123 | started, it will be executed only once. | |
7124 | - * If this tasklet is already running on another CPU (or schedule is called | |
7125 | - from tasklet itself), it is rescheduled for later. | |
7126 | + * If this tasklet is already running on another CPU, it is rescheduled | |
7127 | + for later. | |
7128 | + * Schedule must not be called from the tasklet itself (a lockup occurs) | |
7129 | * Tasklet is strictly serialized wrt itself, but not | |
7130 | wrt another tasklets. If client needs some intertask synchronization, | |
7131 | he makes it with spinlocks. | |
c7c16703 | 7132 | @@ -530,27 +556,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } |
1a6e0f06 JK |
7133 | enum |
7134 | { | |
7135 | TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ | |
7136 | - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ | |
7137 | + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ | |
7138 | + TASKLET_STATE_PENDING /* Tasklet is pending */ | |
7139 | }; | |
7140 | ||
7141 | -#ifdef CONFIG_SMP | |
7142 | +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) | |
7143 | +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) | |
7144 | +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) | |
7145 | + | |
7146 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
7147 | static inline int tasklet_trylock(struct tasklet_struct *t) | |
7148 | { | |
7149 | return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); | |
7150 | } | |
7151 | ||
7152 | +static inline int tasklet_tryunlock(struct tasklet_struct *t) | |
7153 | +{ | |
7154 | + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; | |
7155 | +} | |
7156 | + | |
7157 | static inline void tasklet_unlock(struct tasklet_struct *t) | |
7158 | { | |
7159 | smp_mb__before_atomic(); | |
7160 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | |
7161 | } | |
7162 | ||
7163 | -static inline void tasklet_unlock_wait(struct tasklet_struct *t) | |
7164 | -{ | |
7165 | - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } | |
7166 | -} | |
7167 | +extern void tasklet_unlock_wait(struct tasklet_struct *t); | |
7168 | + | |
7169 | #else | |
7170 | #define tasklet_trylock(t) 1 | |
7171 | +#define tasklet_tryunlock(t) 1 | |
7172 | #define tasklet_unlock_wait(t) do { } while (0) | |
7173 | #define tasklet_unlock(t) do { } while (0) | |
7174 | #endif | |
c7c16703 | 7175 | @@ -599,12 +634,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) |
1a6e0f06 JK |
7176 | smp_mb(); |
7177 | } | |
7178 | ||
7179 | -static inline void tasklet_enable(struct tasklet_struct *t) | |
7180 | -{ | |
7181 | - smp_mb__before_atomic(); | |
7182 | - atomic_dec(&t->count); | |
7183 | -} | |
7184 | - | |
7185 | +extern void tasklet_enable(struct tasklet_struct *t); | |
7186 | extern void tasklet_kill(struct tasklet_struct *t); | |
7187 | extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); | |
7188 | extern void tasklet_init(struct tasklet_struct *t, | |
c7c16703 | 7189 | @@ -635,6 +665,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) |
1a6e0f06 JK |
7190 | tasklet_kill(&ttimer->tasklet); |
7191 | } | |
7192 | ||
7193 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7194 | +extern void softirq_early_init(void); | |
7195 | +#else | |
7196 | +static inline void softirq_early_init(void) { } | |
7197 | +#endif | |
7198 | + | |
7199 | /* | |
7200 | * Autoprobing for irqs: | |
7201 | * | |
7202 | diff --git a/include/linux/irq.h b/include/linux/irq.h | |
5c015b7c | 7203 | index 39e3254e5769..8ebac94fbb9f 100644 |
1a6e0f06 JK |
7204 | --- a/include/linux/irq.h |
7205 | +++ b/include/linux/irq.h | |
7206 | @@ -72,6 +72,7 @@ enum irqchip_irq_state; | |
7207 | * IRQ_IS_POLLED - Always polled by another interrupt. Exclude | |
7208 | * it from the spurious interrupt detection | |
7209 | * mechanism and from core side polling. | |
7210 | + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) | |
7211 | * IRQ_DISABLE_UNLAZY - Disable lazy irq disable | |
7212 | */ | |
7213 | enum { | |
7214 | @@ -99,13 +100,14 @@ enum { | |
7215 | IRQ_PER_CPU_DEVID = (1 << 17), | |
7216 | IRQ_IS_POLLED = (1 << 18), | |
7217 | IRQ_DISABLE_UNLAZY = (1 << 19), | |
7218 | + IRQ_NO_SOFTIRQ_CALL = (1 << 20), | |
7219 | }; | |
7220 | ||
7221 | #define IRQF_MODIFY_MASK \ | |
7222 | (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ | |
7223 | IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ | |
7224 | IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ | |
7225 | - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) | |
7226 | + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) | |
7227 | ||
7228 | #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | |
7229 | ||
7230 | diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h | |
7231 | index 47b9ebd4a74f..2543aab05daa 100644 | |
7232 | --- a/include/linux/irq_work.h | |
7233 | +++ b/include/linux/irq_work.h | |
7234 | @@ -16,6 +16,7 @@ | |
7235 | #define IRQ_WORK_BUSY 2UL | |
7236 | #define IRQ_WORK_FLAGS 3UL | |
7237 | #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ | |
7238 | +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ | |
7239 | ||
7240 | struct irq_work { | |
7241 | unsigned long flags; | |
7242 | @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } | |
7243 | static inline void irq_work_run(void) { } | |
7244 | #endif | |
7245 | ||
7246 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
7247 | +void irq_work_tick_soft(void); | |
7248 | +#else | |
7249 | +static inline void irq_work_tick_soft(void) { } | |
7250 | +#endif | |
7251 | + | |
7252 | #endif /* _LINUX_IRQ_WORK_H */ | |
7253 | diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h | |
c7c16703 | 7254 | index c9be57931b58..eeeb540971ae 100644 |
1a6e0f06 JK |
7255 | --- a/include/linux/irqdesc.h |
7256 | +++ b/include/linux/irqdesc.h | |
c7c16703 | 7257 | @@ -66,6 +66,7 @@ struct irq_desc { |
1a6e0f06 JK |
7258 | unsigned int irqs_unhandled; |
7259 | atomic_t threads_handled; | |
7260 | int threads_handled_last; | |
7261 | + u64 random_ip; | |
7262 | raw_spinlock_t lock; | |
7263 | struct cpumask *percpu_enabled; | |
7264 | const struct cpumask *percpu_affinity; | |
7265 | diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h | |
7266 | index 5dd1272d1ab2..9b77034f7c5e 100644 | |
7267 | --- a/include/linux/irqflags.h | |
7268 | +++ b/include/linux/irqflags.h | |
7269 | @@ -25,8 +25,6 @@ | |
7270 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) | |
7271 | # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) | |
7272 | # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) | |
7273 | -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
7274 | -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
7275 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | |
7276 | #else | |
7277 | # define trace_hardirqs_on() do { } while (0) | |
7278 | @@ -39,9 +37,15 @@ | |
7279 | # define trace_softirqs_enabled(p) 0 | |
7280 | # define trace_hardirq_enter() do { } while (0) | |
7281 | # define trace_hardirq_exit() do { } while (0) | |
7282 | +# define INIT_TRACE_IRQFLAGS | |
7283 | +#endif | |
7284 | + | |
7285 | +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) | |
7286 | +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
7287 | +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
7288 | +#else | |
7289 | # define lockdep_softirq_enter() do { } while (0) | |
7290 | # define lockdep_softirq_exit() do { } while (0) | |
7291 | -# define INIT_TRACE_IRQFLAGS | |
7292 | #endif | |
7293 | ||
7294 | #if defined(CONFIG_IRQSOFF_TRACER) || \ | |
7295 | @@ -148,4 +152,23 @@ | |
7296 | ||
7297 | #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) | |
7298 | ||
7299 | +/* | |
7300 | + * local_irq* variants depending on RT/!RT | |
7301 | + */ | |
7302 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7303 | +# define local_irq_disable_nort() do { } while (0) | |
7304 | +# define local_irq_enable_nort() do { } while (0) | |
7305 | +# define local_irq_save_nort(flags) local_save_flags(flags) | |
7306 | +# define local_irq_restore_nort(flags) (void)(flags) | |
7307 | +# define local_irq_disable_rt() local_irq_disable() | |
7308 | +# define local_irq_enable_rt() local_irq_enable() | |
7309 | +#else | |
7310 | +# define local_irq_disable_nort() local_irq_disable() | |
7311 | +# define local_irq_enable_nort() local_irq_enable() | |
7312 | +# define local_irq_save_nort(flags) local_irq_save(flags) | |
7313 | +# define local_irq_restore_nort(flags) local_irq_restore(flags) | |
7314 | +# define local_irq_disable_rt() do { } while (0) | |
7315 | +# define local_irq_enable_rt() do { } while (0) | |
7316 | +#endif | |
7317 | + | |
7318 | #endif | |
7319 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h | |
7320 | index dfaa1f4dcb0c..d57dd06544a1 100644 | |
7321 | --- a/include/linux/jbd2.h | |
7322 | +++ b/include/linux/jbd2.h | |
7323 | @@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) | |
7324 | ||
7325 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | |
7326 | { | |
7327 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7328 | bit_spin_lock(BH_State, &bh->b_state); | |
7329 | +#else | |
7330 | + spin_lock(&bh->b_state_lock); | |
7331 | +#endif | |
7332 | } | |
7333 | ||
7334 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | |
7335 | { | |
7336 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7337 | return bit_spin_trylock(BH_State, &bh->b_state); | |
7338 | +#else | |
7339 | + return spin_trylock(&bh->b_state_lock); | |
7340 | +#endif | |
7341 | } | |
7342 | ||
7343 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | |
7344 | { | |
7345 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7346 | return bit_spin_is_locked(BH_State, &bh->b_state); | |
7347 | +#else | |
7348 | + return spin_is_locked(&bh->b_state_lock); | |
7349 | +#endif | |
7350 | } | |
7351 | ||
7352 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | |
7353 | { | |
7354 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7355 | bit_spin_unlock(BH_State, &bh->b_state); | |
7356 | +#else | |
7357 | + spin_unlock(&bh->b_state_lock); | |
7358 | +#endif | |
7359 | } | |
7360 | ||
7361 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | |
7362 | { | |
7363 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7364 | bit_spin_lock(BH_JournalHead, &bh->b_state); | |
7365 | +#else | |
7366 | + spin_lock(&bh->b_journal_head_lock); | |
7367 | +#endif | |
7368 | } | |
7369 | ||
7370 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |
7371 | { | |
7372 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7373 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | |
7374 | +#else | |
7375 | + spin_unlock(&bh->b_journal_head_lock); | |
7376 | +#endif | |
7377 | } | |
7378 | ||
7379 | #define J_ASSERT(assert) BUG_ON(!(assert)) | |
7380 | diff --git a/include/linux/kdb.h b/include/linux/kdb.h | |
7381 | index 410decacff8f..0861bebfc188 100644 | |
7382 | --- a/include/linux/kdb.h | |
7383 | +++ b/include/linux/kdb.h | |
7384 | @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, | |
7385 | extern __printf(1, 2) int kdb_printf(const char *, ...); | |
7386 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); | |
7387 | ||
7388 | +#define in_kdb_printk() (kdb_trap_printk) | |
7389 | extern void kdb_init(int level); | |
7390 | ||
7391 | /* Access to kdb specific polling devices */ | |
7392 | @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *, | |
7393 | extern int kdb_unregister(char *); | |
7394 | #else /* ! CONFIG_KGDB_KDB */ | |
7395 | static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } | |
7396 | +#define in_kdb_printk() (0) | |
7397 | static inline void kdb_init(int level) {} | |
7398 | static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, | |
7399 | char *help, short minlen) { return 0; } | |
7400 | diff --git a/include/linux/kernel.h b/include/linux/kernel.h | |
c7c16703 | 7401 | index bc6ed52a39b9..7894d55e4998 100644 |
1a6e0f06 JK |
7402 | --- a/include/linux/kernel.h |
7403 | +++ b/include/linux/kernel.h | |
7404 | @@ -194,6 +194,9 @@ extern int _cond_resched(void); | |
7405 | */ | |
7406 | # define might_sleep() \ | |
7407 | do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
7408 | + | |
7409 | +# define might_sleep_no_state_check() \ | |
7410 | + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
7411 | # define sched_annotate_sleep() (current->task_state_change = 0) | |
7412 | #else | |
7413 | static inline void ___might_sleep(const char *file, int line, | |
7414 | @@ -201,6 +204,7 @@ extern int _cond_resched(void); | |
7415 | static inline void __might_sleep(const char *file, int line, | |
7416 | int preempt_offset) { } | |
7417 | # define might_sleep() do { might_resched(); } while (0) | |
7418 | +# define might_sleep_no_state_check() do { might_resched(); } while (0) | |
7419 | # define sched_annotate_sleep() do { } while (0) | |
7420 | #endif | |
7421 | ||
c7c16703 | 7422 | @@ -488,6 +492,7 @@ extern enum system_states { |
1a6e0f06 JK |
7423 | SYSTEM_HALT, |
7424 | SYSTEM_POWER_OFF, | |
7425 | SYSTEM_RESTART, | |
7426 | + SYSTEM_SUSPEND, | |
7427 | } system_state; | |
7428 | ||
7429 | #define TAINT_PROPRIETARY_MODULE 0 | |
1a6e0f06 JK |
7430 | diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h |
7431 | index cb483305e1f5..4e5062316bb6 100644 | |
7432 | --- a/include/linux/list_bl.h | |
7433 | +++ b/include/linux/list_bl.h | |
7434 | @@ -2,6 +2,7 @@ | |
7435 | #define _LINUX_LIST_BL_H | |
7436 | ||
7437 | #include <linux/list.h> | |
7438 | +#include <linux/spinlock.h> | |
7439 | #include <linux/bit_spinlock.h> | |
7440 | ||
7441 | /* | |
7442 | @@ -32,13 +33,24 @@ | |
7443 | ||
7444 | struct hlist_bl_head { | |
7445 | struct hlist_bl_node *first; | |
7446 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7447 | + raw_spinlock_t lock; | |
7448 | +#endif | |
7449 | }; | |
7450 | ||
7451 | struct hlist_bl_node { | |
7452 | struct hlist_bl_node *next, **pprev; | |
7453 | }; | |
7454 | -#define INIT_HLIST_BL_HEAD(ptr) \ | |
7455 | - ((ptr)->first = NULL) | |
7456 | + | |
7457 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7458 | +#define INIT_HLIST_BL_HEAD(h) \ | |
7459 | +do { \ | |
7460 | + (h)->first = NULL; \ | |
7461 | + raw_spin_lock_init(&(h)->lock); \ | |
7462 | +} while (0) | |
7463 | +#else | |
7464 | +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL | |
7465 | +#endif | |
7466 | ||
7467 | static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) | |
7468 | { | |
7469 | @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) | |
7470 | ||
7471 | static inline void hlist_bl_lock(struct hlist_bl_head *b) | |
7472 | { | |
7473 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7474 | bit_spin_lock(0, (unsigned long *)b); | |
7475 | +#else | |
7476 | + raw_spin_lock(&b->lock); | |
7477 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
7478 | + __set_bit(0, (unsigned long *)b); | |
7479 | +#endif | |
7480 | +#endif | |
7481 | } | |
7482 | ||
7483 | static inline void hlist_bl_unlock(struct hlist_bl_head *b) | |
7484 | { | |
7485 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7486 | __bit_spin_unlock(0, (unsigned long *)b); | |
7487 | +#else | |
7488 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
7489 | + __clear_bit(0, (unsigned long *)b); | |
7490 | +#endif | |
7491 | + raw_spin_unlock(&b->lock); | |
7492 | +#endif | |
7493 | } | |
7494 | ||
7495 | static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) | |
7496 | diff --git a/include/linux/locallock.h b/include/linux/locallock.h | |
7497 | new file mode 100644 | |
7498 | index 000000000000..845c77f1a5ca | |
7499 | --- /dev/null | |
7500 | +++ b/include/linux/locallock.h | |
7501 | @@ -0,0 +1,278 @@ | |
7502 | +#ifndef _LINUX_LOCALLOCK_H | |
7503 | +#define _LINUX_LOCALLOCK_H | |
7504 | + | |
7505 | +#include <linux/percpu.h> | |
7506 | +#include <linux/spinlock.h> | |
7507 | + | |
7508 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7509 | + | |
7510 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
7511 | +# define LL_WARN(cond) WARN_ON(cond) | |
7512 | +#else | |
7513 | +# define LL_WARN(cond) do { } while (0) | |
7514 | +#endif | |
7515 | + | |
7516 | +/* | |
7517 | + * per cpu lock based substitute for local_irq_*() | |
7518 | + */ | |
7519 | +struct local_irq_lock { | |
7520 | + spinlock_t lock; | |
7521 | + struct task_struct *owner; | |
7522 | + int nestcnt; | |
7523 | + unsigned long flags; | |
7524 | +}; | |
7525 | + | |
7526 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ | |
7527 | + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ | |
7528 | + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } | |
7529 | + | |
7530 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ | |
7531 | + DECLARE_PER_CPU(struct local_irq_lock, lvar) | |
7532 | + | |
7533 | +#define local_irq_lock_init(lvar) \ | |
7534 | + do { \ | |
7535 | + int __cpu; \ | |
7536 | + for_each_possible_cpu(__cpu) \ | |
7537 | + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ | |
7538 | + } while (0) | |
7539 | + | |
7540 | +/* | |
7541 | + * spin_lock|trylock|unlock_local flavour that does not migrate disable | |
7542 | + * used for __local_lock|trylock|unlock where get_local_var/put_local_var | |
7543 | + * already takes care of the migrate_disable/enable | |
7544 | + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. | |
7545 | + */ | |
7546 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7547 | +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock) | |
7548 | +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock) | |
7549 | +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock) | |
7550 | +#else | |
7551 | +# define spin_lock_local(lock) spin_lock(lock) | |
7552 | +# define spin_trylock_local(lock) spin_trylock(lock) | |
7553 | +# define spin_unlock_local(lock) spin_unlock(lock) | |
7554 | +#endif | |
7555 | + | |
7556 | +static inline void __local_lock(struct local_irq_lock *lv) | |
7557 | +{ | |
7558 | + if (lv->owner != current) { | |
7559 | + spin_lock_local(&lv->lock); | |
7560 | + LL_WARN(lv->owner); | |
7561 | + LL_WARN(lv->nestcnt); | |
7562 | + lv->owner = current; | |
7563 | + } | |
7564 | + lv->nestcnt++; | |
7565 | +} | |
7566 | + | |
7567 | +#define local_lock(lvar) \ | |
7568 | + do { __local_lock(&get_local_var(lvar)); } while (0) | |
7569 | + | |
7570 | +#define local_lock_on(lvar, cpu) \ | |
7571 | + do { __local_lock(&per_cpu(lvar, cpu)); } while (0) | |
7572 | + | |
7573 | +static inline int __local_trylock(struct local_irq_lock *lv) | |
7574 | +{ | |
7575 | + if (lv->owner != current && spin_trylock_local(&lv->lock)) { | |
7576 | + LL_WARN(lv->owner); | |
7577 | + LL_WARN(lv->nestcnt); | |
7578 | + lv->owner = current; | |
7579 | + lv->nestcnt = 1; | |
7580 | + return 1; | |
7581 | + } | |
7582 | + return 0; | |
7583 | +} | |
7584 | + | |
7585 | +#define local_trylock(lvar) \ | |
7586 | + ({ \ | |
7587 | + int __locked; \ | |
7588 | + __locked = __local_trylock(&get_local_var(lvar)); \ | |
7589 | + if (!__locked) \ | |
7590 | + put_local_var(lvar); \ | |
7591 | + __locked; \ | |
7592 | + }) | |
7593 | + | |
7594 | +static inline void __local_unlock(struct local_irq_lock *lv) | |
7595 | +{ | |
7596 | + LL_WARN(lv->nestcnt == 0); | |
7597 | + LL_WARN(lv->owner != current); | |
7598 | + if (--lv->nestcnt) | |
7599 | + return; | |
7600 | + | |
7601 | + lv->owner = NULL; | |
7602 | + spin_unlock_local(&lv->lock); | |
7603 | +} | |
7604 | + | |
7605 | +#define local_unlock(lvar) \ | |
7606 | + do { \ | |
7607 | + __local_unlock(this_cpu_ptr(&lvar)); \ | |
7608 | + put_local_var(lvar); \ | |
7609 | + } while (0) | |
7610 | + | |
7611 | +#define local_unlock_on(lvar, cpu) \ | |
7612 | + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) | |
7613 | + | |
7614 | +static inline void __local_lock_irq(struct local_irq_lock *lv) | |
7615 | +{ | |
7616 | + spin_lock_irqsave(&lv->lock, lv->flags); | |
7617 | + LL_WARN(lv->owner); | |
7618 | + LL_WARN(lv->nestcnt); | |
7619 | + lv->owner = current; | |
7620 | + lv->nestcnt = 1; | |
7621 | +} | |
7622 | + | |
7623 | +#define local_lock_irq(lvar) \ | |
7624 | + do { __local_lock_irq(&get_local_var(lvar)); } while (0) | |
7625 | + | |
7626 | +#define local_lock_irq_on(lvar, cpu) \ | |
7627 | + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) | |
7628 | + | |
7629 | +static inline void __local_unlock_irq(struct local_irq_lock *lv) | |
7630 | +{ | |
7631 | + LL_WARN(!lv->nestcnt); | |
7632 | + LL_WARN(lv->owner != current); | |
7633 | + lv->owner = NULL; | |
7634 | + lv->nestcnt = 0; | |
7635 | + spin_unlock_irq(&lv->lock); | |
7636 | +} | |
7637 | + | |
7638 | +#define local_unlock_irq(lvar) \ | |
7639 | + do { \ | |
7640 | + __local_unlock_irq(this_cpu_ptr(&lvar)); \ | |
7641 | + put_local_var(lvar); \ | |
7642 | + } while (0) | |
7643 | + | |
7644 | +#define local_unlock_irq_on(lvar, cpu) \ | |
7645 | + do { \ | |
7646 | + __local_unlock_irq(&per_cpu(lvar, cpu)); \ | |
7647 | + } while (0) | |
7648 | + | |
7649 | +static inline int __local_lock_irqsave(struct local_irq_lock *lv) | |
7650 | +{ | |
7651 | + if (lv->owner != current) { | |
7652 | + __local_lock_irq(lv); | |
7653 | + return 0; | |
7654 | + } else { | |
7655 | + lv->nestcnt++; | |
7656 | + return 1; | |
7657 | + } | |
7658 | +} | |
7659 | + | |
7660 | +#define local_lock_irqsave(lvar, _flags) \ | |
7661 | + do { \ | |
7662 | + if (__local_lock_irqsave(&get_local_var(lvar))) \ | |
7663 | + put_local_var(lvar); \ | |
7664 | + _flags = __this_cpu_read(lvar.flags); \ | |
7665 | + } while (0) | |
7666 | + | |
7667 | +#define local_lock_irqsave_on(lvar, _flags, cpu) \ | |
7668 | + do { \ | |
7669 | + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ | |
7670 | + _flags = per_cpu(lvar, cpu).flags; \ | |
7671 | + } while (0) | |
7672 | + | |
7673 | +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, | |
7674 | + unsigned long flags) | |
7675 | +{ | |
7676 | + LL_WARN(!lv->nestcnt); | |
7677 | + LL_WARN(lv->owner != current); | |
7678 | + if (--lv->nestcnt) | |
7679 | + return 0; | |
7680 | + | |
7681 | + lv->owner = NULL; | |
7682 | + spin_unlock_irqrestore(&lv->lock, lv->flags); | |
7683 | + return 1; | |
7684 | +} | |
7685 | + | |
7686 | +#define local_unlock_irqrestore(lvar, flags) \ | |
7687 | + do { \ | |
7688 | + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \ | |
7689 | + put_local_var(lvar); \ | |
7690 | + } while (0) | |
7691 | + | |
7692 | +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ | |
7693 | + do { \ | |
7694 | + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ | |
7695 | + } while (0) | |
7696 | + | |
7697 | +#define local_spin_trylock_irq(lvar, lock) \ | |
7698 | + ({ \ | |
7699 | + int __locked; \ | |
7700 | + local_lock_irq(lvar); \ | |
7701 | + __locked = spin_trylock(lock); \ | |
7702 | + if (!__locked) \ | |
7703 | + local_unlock_irq(lvar); \ | |
7704 | + __locked; \ | |
7705 | + }) | |
7706 | + | |
7707 | +#define local_spin_lock_irq(lvar, lock) \ | |
7708 | + do { \ | |
7709 | + local_lock_irq(lvar); \ | |
7710 | + spin_lock(lock); \ | |
7711 | + } while (0) | |
7712 | + | |
7713 | +#define local_spin_unlock_irq(lvar, lock) \ | |
7714 | + do { \ | |
7715 | + spin_unlock(lock); \ | |
7716 | + local_unlock_irq(lvar); \ | |
7717 | + } while (0) | |
7718 | + | |
7719 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
7720 | + do { \ | |
7721 | + local_lock_irqsave(lvar, flags); \ | |
7722 | + spin_lock(lock); \ | |
7723 | + } while (0) | |
7724 | + | |
7725 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
7726 | + do { \ | |
7727 | + spin_unlock(lock); \ | |
7728 | + local_unlock_irqrestore(lvar, flags); \ | |
7729 | + } while (0) | |
7730 | + | |
7731 | +#define get_locked_var(lvar, var) \ | |
7732 | + (*({ \ | |
7733 | + local_lock(lvar); \ | |
7734 | + this_cpu_ptr(&var); \ | |
7735 | + })) | |
7736 | + | |
7737 | +#define put_locked_var(lvar, var) local_unlock(lvar); | |
7738 | + | |
7739 | +#define local_lock_cpu(lvar) \ | |
7740 | + ({ \ | |
7741 | + local_lock(lvar); \ | |
7742 | + smp_processor_id(); \ | |
7743 | + }) | |
7744 | + | |
7745 | +#define local_unlock_cpu(lvar) local_unlock(lvar) | |
7746 | + | |
7747 | +#else /* PREEMPT_RT_BASE */ | |
7748 | + | |
7749 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar | |
7750 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar | |
7751 | + | |
7752 | +static inline void local_irq_lock_init(int lvar) { } | |
7753 | + | |
7754 | +#define local_lock(lvar) preempt_disable() | |
7755 | +#define local_unlock(lvar) preempt_enable() | |
7756 | +#define local_lock_irq(lvar) local_irq_disable() | |
7757 | +#define local_lock_irq_on(lvar, cpu) local_irq_disable() | |
7758 | +#define local_unlock_irq(lvar) local_irq_enable() | |
7759 | +#define local_unlock_irq_on(lvar, cpu) local_irq_enable() | |
7760 | +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) | |
7761 | +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) | |
7762 | + | |
7763 | +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) | |
7764 | +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) | |
7765 | +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) | |
7766 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
7767 | + spin_lock_irqsave(lock, flags) | |
7768 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
7769 | + spin_unlock_irqrestore(lock, flags) | |
7770 | + | |
7771 | +#define get_locked_var(lvar, var) get_cpu_var(var) | |
7772 | +#define put_locked_var(lvar, var) put_cpu_var(var) | |
7773 | + | |
7774 | +#define local_lock_cpu(lvar) get_cpu() | |
7775 | +#define local_unlock_cpu(lvar) put_cpu() | |
7776 | + | |
7777 | +#endif | |
7778 | + | |
7779 | +#endif | |
7780 | diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h | |
c7c16703 | 7781 | index 08d947fc4c59..705fb564a605 100644 |
1a6e0f06 JK |
7782 | --- a/include/linux/mm_types.h |
7783 | +++ b/include/linux/mm_types.h | |
7784 | @@ -11,6 +11,7 @@ | |
7785 | #include <linux/completion.h> | |
7786 | #include <linux/cpumask.h> | |
7787 | #include <linux/uprobes.h> | |
7788 | +#include <linux/rcupdate.h> | |
7789 | #include <linux/page-flags-layout.h> | |
7790 | #include <linux/workqueue.h> | |
7791 | #include <asm/page.h> | |
c7c16703 | 7792 | @@ -509,6 +510,9 @@ struct mm_struct { |
1a6e0f06 JK |
7793 | bool tlb_flush_pending; |
7794 | #endif | |
7795 | struct uprobes_state uprobes_state; | |
7796 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7797 | + struct rcu_head delayed_drop; | |
7798 | +#endif | |
7799 | #ifdef CONFIG_X86_INTEL_MPX | |
7800 | /* address of the bounds directory */ | |
7801 | void __user *bd_addr; | |
5c015b7c JK |
7802 | diff --git a/include/linux/module.h b/include/linux/module.h |
7803 | index 0c3207d26ac0..5944baaa3f28 100644 | |
7804 | --- a/include/linux/module.h | |
7805 | +++ b/include/linux/module.h | |
7806 | @@ -496,6 +496,7 @@ static inline int module_is_live(struct module *mod) | |
7807 | struct module *__module_text_address(unsigned long addr); | |
7808 | struct module *__module_address(unsigned long addr); | |
7809 | bool is_module_address(unsigned long addr); | |
7810 | +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); | |
7811 | bool is_module_percpu_address(unsigned long addr); | |
7812 | bool is_module_text_address(unsigned long addr); | |
7813 | ||
7814 | @@ -663,6 +664,11 @@ static inline bool is_module_percpu_address(unsigned long addr) | |
7815 | return false; | |
7816 | } | |
7817 | ||
7818 | +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) | |
7819 | +{ | |
7820 | + return false; | |
7821 | +} | |
7822 | + | |
7823 | static inline bool is_module_text_address(unsigned long addr) | |
7824 | { | |
7825 | return false; | |
1a6e0f06 JK |
7826 | diff --git a/include/linux/mutex.h b/include/linux/mutex.h |
7827 | index 2cb7531e7d7a..b3fdfc820216 100644 | |
7828 | --- a/include/linux/mutex.h | |
7829 | +++ b/include/linux/mutex.h | |
7830 | @@ -19,6 +19,17 @@ | |
7831 | #include <asm/processor.h> | |
7832 | #include <linux/osq_lock.h> | |
7833 | ||
7834 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7835 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
7836 | + , .dep_map = { .name = #lockname } | |
7837 | +#else | |
7838 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
7839 | +#endif | |
7840 | + | |
7841 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7842 | +# include <linux/mutex_rt.h> | |
7843 | +#else | |
7844 | + | |
7845 | /* | |
7846 | * Simple, straightforward mutexes with strict semantics: | |
7847 | * | |
7848 | @@ -99,13 +110,6 @@ do { \ | |
7849 | static inline void mutex_destroy(struct mutex *lock) {} | |
7850 | #endif | |
7851 | ||
7852 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7853 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
7854 | - , .dep_map = { .name = #lockname } | |
7855 | -#else | |
7856 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
7857 | -#endif | |
7858 | - | |
7859 | #define __MUTEX_INITIALIZER(lockname) \ | |
7860 | { .count = ATOMIC_INIT(1) \ | |
7861 | , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ | |
7862 | @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); | |
7863 | extern int mutex_trylock(struct mutex *lock); | |
7864 | extern void mutex_unlock(struct mutex *lock); | |
7865 | ||
7866 | +#endif /* !PREEMPT_RT_FULL */ | |
7867 | + | |
7868 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | |
7869 | ||
7870 | #endif /* __LINUX_MUTEX_H */ | |
7871 | diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h | |
7872 | new file mode 100644 | |
5c015b7c | 7873 | index 000000000000..e0284edec655 |
1a6e0f06 JK |
7874 | --- /dev/null |
7875 | +++ b/include/linux/mutex_rt.h | |
5c015b7c | 7876 | @@ -0,0 +1,89 @@ |
1a6e0f06 JK |
7877 | +#ifndef __LINUX_MUTEX_RT_H |
7878 | +#define __LINUX_MUTEX_RT_H | |
7879 | + | |
7880 | +#ifndef __LINUX_MUTEX_H | |
7881 | +#error "Please include mutex.h" | |
7882 | +#endif | |
7883 | + | |
7884 | +#include <linux/rtmutex.h> | |
7885 | + | |
7886 | +/* FIXME: Just for __lockfunc */ | |
7887 | +#include <linux/spinlock.h> | |
7888 | + | |
7889 | +struct mutex { | |
7890 | + struct rt_mutex lock; | |
7891 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7892 | + struct lockdep_map dep_map; | |
7893 | +#endif | |
7894 | +}; | |
7895 | + | |
7896 | +#define __MUTEX_INITIALIZER(mutexname) \ | |
7897 | + { \ | |
7898 | + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ | |
7899 | + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ | |
7900 | + } | |
7901 | + | |
7902 | +#define DEFINE_MUTEX(mutexname) \ | |
7903 | + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) | |
7904 | + | |
7905 | +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); | |
7906 | +extern void __lockfunc _mutex_lock(struct mutex *lock); | |
7907 | +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); | |
7908 | +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); | |
7909 | +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); | |
7910 | +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); | |
7911 | +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); | |
7912 | +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); | |
7913 | +extern int __lockfunc _mutex_trylock(struct mutex *lock); | |
7914 | +extern void __lockfunc _mutex_unlock(struct mutex *lock); | |
7915 | + | |
7916 | +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) | |
7917 | +#define mutex_lock(l) _mutex_lock(l) | |
7918 | +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) | |
7919 | +#define mutex_lock_killable(l) _mutex_lock_killable(l) | |
7920 | +#define mutex_trylock(l) _mutex_trylock(l) | |
7921 | +#define mutex_unlock(l) _mutex_unlock(l) | |
5c015b7c JK |
7922 | + |
7923 | +#ifdef CONFIG_DEBUG_MUTEXES | |
1a6e0f06 | 7924 | +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) |
5c015b7c JK |
7925 | +#else |
7926 | +static inline void mutex_destroy(struct mutex *lock) {} | |
7927 | +#endif | |
1a6e0f06 JK |
7928 | + |
7929 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
7930 | +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) | |
7931 | +# define mutex_lock_interruptible_nested(l, s) \ | |
7932 | + _mutex_lock_interruptible_nested(l, s) | |
7933 | +# define mutex_lock_killable_nested(l, s) \ | |
7934 | + _mutex_lock_killable_nested(l, s) | |
7935 | + | |
7936 | +# define mutex_lock_nest_lock(lock, nest_lock) \ | |
7937 | +do { \ | |
7938 | + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ | |
7939 | + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ | |
7940 | +} while (0) | |
7941 | + | |
7942 | +#else | |
7943 | +# define mutex_lock_nested(l, s) _mutex_lock(l) | |
7944 | +# define mutex_lock_interruptible_nested(l, s) \ | |
7945 | + _mutex_lock_interruptible(l) | |
7946 | +# define mutex_lock_killable_nested(l, s) \ | |
7947 | + _mutex_lock_killable(l) | |
7948 | +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) | |
7949 | +#endif | |
7950 | + | |
7951 | +# define mutex_init(mutex) \ | |
7952 | +do { \ | |
7953 | + static struct lock_class_key __key; \ | |
7954 | + \ | |
7955 | + rt_mutex_init(&(mutex)->lock); \ | |
7956 | + __mutex_do_init((mutex), #mutex, &__key); \ | |
7957 | +} while (0) | |
7958 | + | |
7959 | +# define __mutex_init(mutex, name, key) \ | |
7960 | +do { \ | |
7961 | + rt_mutex_init(&(mutex)->lock); \ | |
7962 | + __mutex_do_init((mutex), name, key); \ | |
7963 | +} while (0) | |
7964 | + | |
7965 | +#endif | |
7966 | diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h | |
5c015b7c | 7967 | index bb9b102c15cd..a5b12b8ad196 100644 |
1a6e0f06 JK |
7968 | --- a/include/linux/netdevice.h |
7969 | +++ b/include/linux/netdevice.h | |
c7c16703 JK |
7970 | @@ -396,7 +396,19 @@ typedef enum rx_handler_result rx_handler_result_t; |
7971 | typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); | |
7972 | ||
7973 | void __napi_schedule(struct napi_struct *n); | |
7974 | + | |
7975 | +/* | |
7976 | + * When PREEMPT_RT_FULL is defined, all device interrupt handlers | |
7977 | + * run as threads, and they can also be preempted (without PREEMPT_RT | |
7978 | + * interrupt threads can not be preempted). Which means that calling | |
7979 | + * __napi_schedule_irqoff() from an interrupt handler can be preempted | |
7980 | + * and can corrupt the napi->poll_list. | |
7981 | + */ | |
7982 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7983 | +#define __napi_schedule_irqoff(n) __napi_schedule(n) | |
7984 | +#else | |
7985 | void __napi_schedule_irqoff(struct napi_struct *n); | |
7986 | +#endif | |
7987 | ||
7988 | static inline bool napi_disable_pending(struct napi_struct *n) | |
7989 | { | |
5c015b7c | 7990 | @@ -2463,14 +2475,53 @@ void netdev_freemem(struct net_device *dev); |
1a6e0f06 JK |
7991 | void synchronize_net(void); |
7992 | int init_dummy_netdev(struct net_device *dev); | |
7993 | ||
7994 | -DECLARE_PER_CPU(int, xmit_recursion); | |
7995 | #define XMIT_RECURSION_LIMIT 10 | |
7996 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7997 | +static inline int dev_recursion_level(void) | |
7998 | +{ | |
7999 | + return current->xmit_recursion; | |
8000 | +} | |
8001 | + | |
8002 | +static inline int xmit_rec_read(void) | |
8003 | +{ | |
8004 | + return current->xmit_recursion; | |
8005 | +} | |
8006 | + | |
8007 | +static inline void xmit_rec_inc(void) | |
8008 | +{ | |
8009 | + current->xmit_recursion++; | |
8010 | +} | |
8011 | + | |
8012 | +static inline void xmit_rec_dec(void) | |
8013 | +{ | |
8014 | + current->xmit_recursion--; | |
8015 | +} | |
8016 | + | |
8017 | +#else | |
8018 | + | |
8019 | +DECLARE_PER_CPU(int, xmit_recursion); | |
8020 | ||
8021 | static inline int dev_recursion_level(void) | |
8022 | { | |
8023 | return this_cpu_read(xmit_recursion); | |
8024 | } | |
8025 | ||
8026 | +static inline int xmit_rec_read(void) | |
8027 | +{ | |
8028 | + return __this_cpu_read(xmit_recursion); | |
8029 | +} | |
8030 | + | |
8031 | +static inline void xmit_rec_inc(void) | |
8032 | +{ | |
8033 | + __this_cpu_inc(xmit_recursion); | |
8034 | +} | |
8035 | + | |
8036 | +static inline void xmit_rec_dec(void) | |
8037 | +{ | |
8038 | + __this_cpu_dec(xmit_recursion); | |
8039 | +} | |
8040 | +#endif | |
8041 | + | |
8042 | struct net_device *dev_get_by_index(struct net *net, int ifindex); | |
8043 | struct net_device *__dev_get_by_index(struct net *net, int ifindex); | |
8044 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); | |
5c015b7c | 8045 | @@ -2855,6 +2906,7 @@ struct softnet_data { |
1a6e0f06 JK |
8046 | unsigned int dropped; |
8047 | struct sk_buff_head input_pkt_queue; | |
8048 | struct napi_struct backlog; | |
8049 | + struct sk_buff_head tofree_queue; | |
8050 | ||
8051 | }; | |
8052 | ||
8053 | diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h | |
8054 | index 2ad1a2b289b5..b4d10155af54 100644 | |
8055 | --- a/include/linux/netfilter/x_tables.h | |
8056 | +++ b/include/linux/netfilter/x_tables.h | |
8057 | @@ -4,6 +4,7 @@ | |
8058 | ||
8059 | #include <linux/netdevice.h> | |
8060 | #include <linux/static_key.h> | |
8061 | +#include <linux/locallock.h> | |
8062 | #include <uapi/linux/netfilter/x_tables.h> | |
8063 | ||
8064 | /* Test a struct->invflags and a boolean for inequality */ | |
8065 | @@ -300,6 +301,8 @@ void xt_free_table_info(struct xt_table_info *info); | |
8066 | */ | |
8067 | DECLARE_PER_CPU(seqcount_t, xt_recseq); | |
8068 | ||
8069 | +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); | |
8070 | + | |
8071 | /* xt_tee_enabled - true if x_tables needs to handle reentrancy | |
8072 | * | |
8073 | * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. | |
8074 | @@ -320,6 +323,9 @@ static inline unsigned int xt_write_recseq_begin(void) | |
8075 | { | |
8076 | unsigned int addend; | |
8077 | ||
8078 | + /* RT protection */ | |
8079 | + local_lock(xt_write_lock); | |
8080 | + | |
8081 | /* | |
8082 | * Low order bit of sequence is set if we already | |
8083 | * called xt_write_recseq_begin(). | |
8084 | @@ -350,6 +356,7 @@ static inline void xt_write_recseq_end(unsigned int addend) | |
8085 | /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ | |
8086 | smp_wmb(); | |
8087 | __this_cpu_add(xt_recseq.sequence, addend); | |
8088 | + local_unlock(xt_write_lock); | |
8089 | } | |
8090 | ||
8091 | /* | |
8092 | diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h | |
8093 | index 810124b33327..d54ca43d571f 100644 | |
8094 | --- a/include/linux/nfs_fs.h | |
8095 | +++ b/include/linux/nfs_fs.h | |
8096 | @@ -165,7 +165,11 @@ struct nfs_inode { | |
8097 | ||
8098 | /* Readers: in-flight sillydelete RPC calls */ | |
8099 | /* Writers: rmdir */ | |
8100 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8101 | + struct semaphore rmdir_sem; | |
8102 | +#else | |
8103 | struct rw_semaphore rmdir_sem; | |
8104 | +#endif | |
8105 | ||
8106 | #if IS_ENABLED(CONFIG_NFS_V4) | |
8107 | struct nfs4_cached_acl *nfs4_acl; | |
8108 | diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h | |
c7c16703 | 8109 | index beb1e10f446e..ebaf2e7bfe29 100644 |
1a6e0f06 JK |
8110 | --- a/include/linux/nfs_xdr.h |
8111 | +++ b/include/linux/nfs_xdr.h | |
c7c16703 | 8112 | @@ -1490,7 +1490,7 @@ struct nfs_unlinkdata { |
1a6e0f06 JK |
8113 | struct nfs_removeargs args; |
8114 | struct nfs_removeres res; | |
8115 | struct dentry *dentry; | |
8116 | - wait_queue_head_t wq; | |
8117 | + struct swait_queue_head wq; | |
8118 | struct rpc_cred *cred; | |
8119 | struct nfs_fattr dir_attr; | |
8120 | long timeout; | |
8121 | diff --git a/include/linux/notifier.h b/include/linux/notifier.h | |
8122 | index 4149868de4e6..babe5b9bcb91 100644 | |
8123 | --- a/include/linux/notifier.h | |
8124 | +++ b/include/linux/notifier.h | |
8125 | @@ -6,7 +6,7 @@ | |
8126 | * | |
8127 | * Alan Cox <Alan.Cox@linux.org> | |
8128 | */ | |
8129 | - | |
8130 | + | |
8131 | #ifndef _LINUX_NOTIFIER_H | |
8132 | #define _LINUX_NOTIFIER_H | |
8133 | #include <linux/errno.h> | |
8134 | @@ -42,9 +42,7 @@ | |
8135 | * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. | |
8136 | * As compensation, srcu_notifier_chain_unregister() is rather expensive. | |
8137 | * SRCU notifier chains should be used when the chain will be called very | |
8138 | - * often but notifier_blocks will seldom be removed. Also, SRCU notifier | |
8139 | - * chains are slightly more difficult to use because they require special | |
8140 | - * runtime initialization. | |
8141 | + * often but notifier_blocks will seldom be removed. | |
8142 | */ | |
8143 | ||
8144 | struct notifier_block; | |
8145 | @@ -90,7 +88,7 @@ struct srcu_notifier_head { | |
8146 | (name)->head = NULL; \ | |
8147 | } while (0) | |
8148 | ||
8149 | -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ | |
8150 | +/* srcu_notifier_heads must be cleaned up dynamically */ | |
8151 | extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
8152 | #define srcu_cleanup_notifier_head(name) \ | |
8153 | cleanup_srcu_struct(&(name)->srcu); | |
8154 | @@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
8155 | .head = NULL } | |
8156 | #define RAW_NOTIFIER_INIT(name) { \ | |
8157 | .head = NULL } | |
8158 | -/* srcu_notifier_heads cannot be initialized statically */ | |
8159 | + | |
8160 | +#define SRCU_NOTIFIER_INIT(name, pcpu) \ | |
8161 | + { \ | |
8162 | + .mutex = __MUTEX_INITIALIZER(name.mutex), \ | |
8163 | + .head = NULL, \ | |
8164 | + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ | |
8165 | + } | |
8166 | ||
8167 | #define ATOMIC_NOTIFIER_HEAD(name) \ | |
8168 | struct atomic_notifier_head name = \ | |
8169 | @@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
8170 | struct raw_notifier_head name = \ | |
8171 | RAW_NOTIFIER_INIT(name) | |
8172 | ||
8173 | +#define _SRCU_NOTIFIER_HEAD(name, mod) \ | |
8174 | + static DEFINE_PER_CPU(struct srcu_struct_array, \ | |
8175 | + name##_head_srcu_array); \ | |
8176 | + mod struct srcu_notifier_head name = \ | |
8177 | + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) | |
8178 | + | |
8179 | +#define SRCU_NOTIFIER_HEAD(name) \ | |
8180 | + _SRCU_NOTIFIER_HEAD(name, ) | |
8181 | + | |
8182 | +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ | |
8183 | + _SRCU_NOTIFIER_HEAD(name, static) | |
8184 | + | |
8185 | #ifdef __KERNEL__ | |
8186 | ||
8187 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, | |
8188 | @@ -184,12 +200,12 @@ static inline int notifier_to_errno(int ret) | |
8189 | ||
8190 | /* | |
8191 | * Declared notifiers so far. I can imagine quite a few more chains | |
8192 | - * over time (eg laptop power reset chains, reboot chain (to clean | |
8193 | + * over time (eg laptop power reset chains, reboot chain (to clean | |
8194 | * device units up), device [un]mount chain, module load/unload chain, | |
8195 | - * low memory chain, screenblank chain (for plug in modular screenblankers) | |
8196 | + * low memory chain, screenblank chain (for plug in modular screenblankers) | |
8197 | * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... | |
8198 | */ | |
8199 | - | |
8200 | + | |
8201 | /* CPU notfiers are defined in include/linux/cpu.h. */ | |
8202 | ||
8203 | /* netdevice notifiers are defined in include/linux/netdevice.h */ | |
c7c16703 JK |
8204 | diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h |
8205 | index 5b2e6159b744..ea940f451606 100644 | |
8206 | --- a/include/linux/percpu-rwsem.h | |
8207 | +++ b/include/linux/percpu-rwsem.h | |
8208 | @@ -4,7 +4,7 @@ | |
8209 | #include <linux/atomic.h> | |
8210 | #include <linux/rwsem.h> | |
8211 | #include <linux/percpu.h> | |
8212 | -#include <linux/wait.h> | |
8213 | +#include <linux/swait.h> | |
8214 | #include <linux/rcu_sync.h> | |
8215 | #include <linux/lockdep.h> | |
8216 | ||
8217 | @@ -12,7 +12,7 @@ struct percpu_rw_semaphore { | |
8218 | struct rcu_sync rss; | |
8219 | unsigned int __percpu *read_count; | |
8220 | struct rw_semaphore rw_sem; | |
8221 | - wait_queue_head_t writer; | |
8222 | + struct swait_queue_head writer; | |
8223 | int readers_block; | |
8224 | }; | |
8225 | ||
8226 | @@ -22,13 +22,13 @@ static struct percpu_rw_semaphore name = { \ | |
8227 | .rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC), \ | |
8228 | .read_count = &__percpu_rwsem_rc_##name, \ | |
8229 | .rw_sem = __RWSEM_INITIALIZER(name.rw_sem), \ | |
8230 | - .writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ | |
8231 | + .writer = __SWAIT_QUEUE_HEAD_INITIALIZER(name.writer), \ | |
8232 | } | |
8233 | ||
8234 | extern int __percpu_down_read(struct percpu_rw_semaphore *, int); | |
8235 | extern void __percpu_up_read(struct percpu_rw_semaphore *); | |
8236 | ||
8237 | -static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem) | |
8238 | +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) | |
8239 | { | |
8240 | might_sleep(); | |
8241 | ||
8242 | @@ -46,16 +46,10 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * | |
8243 | __this_cpu_inc(*sem->read_count); | |
8244 | if (unlikely(!rcu_sync_is_idle(&sem->rss))) | |
8245 | __percpu_down_read(sem, false); /* Unconditional memory barrier */ | |
8246 | - barrier(); | |
8247 | /* | |
8248 | - * The barrier() prevents the compiler from | |
8249 | + * The preempt_enable() prevents the compiler from | |
8250 | * bleeding the critical section out. | |
8251 | */ | |
8252 | -} | |
8253 | - | |
8254 | -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) | |
8255 | -{ | |
8256 | - percpu_down_read_preempt_disable(sem); | |
8257 | preempt_enable(); | |
8258 | } | |
8259 | ||
8260 | @@ -82,13 +76,9 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem) | |
8261 | return ret; | |
8262 | } | |
8263 | ||
8264 | -static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem) | |
8265 | +static inline void percpu_up_read(struct percpu_rw_semaphore *sem) | |
8266 | { | |
8267 | - /* | |
8268 | - * The barrier() prevents the compiler from | |
8269 | - * bleeding the critical section out. | |
8270 | - */ | |
8271 | - barrier(); | |
8272 | + preempt_disable(); | |
8273 | /* | |
8274 | * Same as in percpu_down_read(). | |
8275 | */ | |
8276 | @@ -101,12 +91,6 @@ static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem | |
8277 | rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_); | |
8278 | } | |
8279 | ||
8280 | -static inline void percpu_up_read(struct percpu_rw_semaphore *sem) | |
8281 | -{ | |
8282 | - preempt_disable(); | |
8283 | - percpu_up_read_preempt_enable(sem); | |
8284 | -} | |
8285 | - | |
8286 | extern void percpu_down_write(struct percpu_rw_semaphore *); | |
8287 | extern void percpu_up_write(struct percpu_rw_semaphore *); | |
8288 | ||
1a6e0f06 | 8289 | diff --git a/include/linux/percpu.h b/include/linux/percpu.h |
5c015b7c | 8290 | index 56939d3f6e53..b988bf40ad3e 100644 |
1a6e0f06 JK |
8291 | --- a/include/linux/percpu.h |
8292 | +++ b/include/linux/percpu.h | |
8293 | @@ -18,6 +18,35 @@ | |
8294 | #define PERCPU_MODULE_RESERVE 0 | |
8295 | #endif | |
8296 | ||
8297 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8298 | + | |
8299 | +#define get_local_var(var) (*({ \ | |
8300 | + migrate_disable(); \ | |
8301 | + this_cpu_ptr(&var); })) | |
8302 | + | |
8303 | +#define put_local_var(var) do { \ | |
8304 | + (void)&(var); \ | |
8305 | + migrate_enable(); \ | |
8306 | +} while (0) | |
8307 | + | |
8308 | +# define get_local_ptr(var) ({ \ | |
8309 | + migrate_disable(); \ | |
8310 | + this_cpu_ptr(var); }) | |
8311 | + | |
8312 | +# define put_local_ptr(var) do { \ | |
8313 | + (void)(var); \ | |
8314 | + migrate_enable(); \ | |
8315 | +} while (0) | |
8316 | + | |
8317 | +#else | |
8318 | + | |
8319 | +#define get_local_var(var) get_cpu_var(var) | |
8320 | +#define put_local_var(var) put_cpu_var(var) | |
8321 | +#define get_local_ptr(var) get_cpu_ptr(var) | |
8322 | +#define put_local_ptr(var) put_cpu_ptr(var) | |
8323 | + | |
8324 | +#endif | |
8325 | + | |
8326 | /* minimum unit size, also is the maximum supported allocation size */ | |
8327 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) | |
8328 | ||
5c015b7c JK |
8329 | @@ -110,6 +139,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, |
8330 | #endif | |
8331 | ||
8332 | extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); | |
8333 | +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); | |
8334 | extern bool is_kernel_percpu_address(unsigned long addr); | |
8335 | ||
8336 | #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) | |
1a6e0f06 JK |
8337 | diff --git a/include/linux/pid.h b/include/linux/pid.h |
8338 | index 23705a53abba..2cc64b779f03 100644 | |
8339 | --- a/include/linux/pid.h | |
8340 | +++ b/include/linux/pid.h | |
8341 | @@ -2,6 +2,7 @@ | |
8342 | #define _LINUX_PID_H | |
8343 | ||
8344 | #include <linux/rcupdate.h> | |
8345 | +#include <linux/atomic.h> | |
8346 | ||
8347 | enum pid_type | |
8348 | { | |
8349 | diff --git a/include/linux/preempt.h b/include/linux/preempt.h | |
8350 | index 75e4e30677f1..1cfb1cb72354 100644 | |
8351 | --- a/include/linux/preempt.h | |
8352 | +++ b/include/linux/preempt.h | |
8353 | @@ -50,7 +50,11 @@ | |
8354 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | |
8355 | #define NMI_OFFSET (1UL << NMI_SHIFT) | |
8356 | ||
8357 | -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
8358 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8359 | +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
8360 | +#else | |
8361 | +# define SOFTIRQ_DISABLE_OFFSET (0) | |
8362 | +#endif | |
8363 | ||
8364 | /* We use the MSB mostly because its available */ | |
8365 | #define PREEMPT_NEED_RESCHED 0x80000000 | |
8366 | @@ -59,9 +63,15 @@ | |
8367 | #include <asm/preempt.h> | |
8368 | ||
8369 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | |
8370 | -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
8371 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | |
8372 | | NMI_MASK)) | |
8373 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8374 | +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
8375 | +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
8376 | +#else | |
8377 | +# define softirq_count() (0UL) | |
8378 | +extern int in_serving_softirq(void); | |
8379 | +#endif | |
8380 | ||
8381 | /* | |
8382 | * Are we doing bottom half or hardware interrupt processing? | |
8383 | @@ -72,7 +82,6 @@ | |
8384 | #define in_irq() (hardirq_count()) | |
8385 | #define in_softirq() (softirq_count()) | |
8386 | #define in_interrupt() (irq_count()) | |
8387 | -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
8388 | ||
8389 | /* | |
8390 | * Are we in NMI context? | |
8391 | @@ -91,7 +100,11 @@ | |
8392 | /* | |
8393 | * The preempt_count offset after spin_lock() | |
8394 | */ | |
8395 | +#if !defined(CONFIG_PREEMPT_RT_FULL) | |
8396 | #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET | |
8397 | +#else | |
8398 | +#define PREEMPT_LOCK_OFFSET 0 | |
8399 | +#endif | |
8400 | ||
8401 | /* | |
8402 | * The preempt_count offset needed for things like: | |
8403 | @@ -140,6 +153,20 @@ extern void preempt_count_sub(int val); | |
8404 | #define preempt_count_inc() preempt_count_add(1) | |
8405 | #define preempt_count_dec() preempt_count_sub(1) | |
8406 | ||
8407 | +#ifdef CONFIG_PREEMPT_LAZY | |
8408 | +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) | |
8409 | +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) | |
8410 | +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) | |
8411 | +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) | |
8412 | +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) | |
8413 | +#else | |
8414 | +#define add_preempt_lazy_count(val) do { } while (0) | |
8415 | +#define sub_preempt_lazy_count(val) do { } while (0) | |
8416 | +#define inc_preempt_lazy_count() do { } while (0) | |
8417 | +#define dec_preempt_lazy_count() do { } while (0) | |
8418 | +#define preempt_lazy_count() (0) | |
8419 | +#endif | |
8420 | + | |
8421 | #ifdef CONFIG_PREEMPT_COUNT | |
8422 | ||
8423 | #define preempt_disable() \ | |
8424 | @@ -148,13 +175,25 @@ do { \ | |
8425 | barrier(); \ | |
8426 | } while (0) | |
8427 | ||
8428 | +#define preempt_lazy_disable() \ | |
8429 | +do { \ | |
8430 | + inc_preempt_lazy_count(); \ | |
8431 | + barrier(); \ | |
8432 | +} while (0) | |
8433 | + | |
8434 | #define sched_preempt_enable_no_resched() \ | |
8435 | do { \ | |
8436 | barrier(); \ | |
8437 | preempt_count_dec(); \ | |
8438 | } while (0) | |
8439 | ||
8440 | -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
8441 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8442 | +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
8443 | +# define preempt_check_resched_rt() preempt_check_resched() | |
8444 | +#else | |
8445 | +# define preempt_enable_no_resched() preempt_enable() | |
8446 | +# define preempt_check_resched_rt() barrier(); | |
8447 | +#endif | |
8448 | ||
8449 | #define preemptible() (preempt_count() == 0 && !irqs_disabled()) | |
8450 | ||
8451 | @@ -179,6 +218,13 @@ do { \ | |
8452 | __preempt_schedule(); \ | |
8453 | } while (0) | |
8454 | ||
8455 | +#define preempt_lazy_enable() \ | |
8456 | +do { \ | |
8457 | + dec_preempt_lazy_count(); \ | |
8458 | + barrier(); \ | |
8459 | + preempt_check_resched(); \ | |
8460 | +} while (0) | |
8461 | + | |
8462 | #else /* !CONFIG_PREEMPT */ | |
8463 | #define preempt_enable() \ | |
8464 | do { \ | |
8465 | @@ -224,6 +270,7 @@ do { \ | |
8466 | #define preempt_disable_notrace() barrier() | |
8467 | #define preempt_enable_no_resched_notrace() barrier() | |
8468 | #define preempt_enable_notrace() barrier() | |
8469 | +#define preempt_check_resched_rt() barrier() | |
8470 | #define preemptible() 0 | |
8471 | ||
8472 | #endif /* CONFIG_PREEMPT_COUNT */ | |
8473 | @@ -244,10 +291,31 @@ do { \ | |
8474 | } while (0) | |
8475 | #define preempt_fold_need_resched() \ | |
8476 | do { \ | |
8477 | - if (tif_need_resched()) \ | |
8478 | + if (tif_need_resched_now()) \ | |
8479 | set_preempt_need_resched(); \ | |
8480 | } while (0) | |
8481 | ||
8482 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8483 | +# define preempt_disable_rt() preempt_disable() | |
8484 | +# define preempt_enable_rt() preempt_enable() | |
8485 | +# define preempt_disable_nort() barrier() | |
8486 | +# define preempt_enable_nort() barrier() | |
8487 | +# ifdef CONFIG_SMP | |
8488 | + extern void migrate_disable(void); | |
8489 | + extern void migrate_enable(void); | |
8490 | +# else /* CONFIG_SMP */ | |
8491 | +# define migrate_disable() barrier() | |
8492 | +# define migrate_enable() barrier() | |
8493 | +# endif /* CONFIG_SMP */ | |
8494 | +#else | |
8495 | +# define preempt_disable_rt() barrier() | |
8496 | +# define preempt_enable_rt() barrier() | |
8497 | +# define preempt_disable_nort() preempt_disable() | |
8498 | +# define preempt_enable_nort() preempt_enable() | |
8499 | +# define migrate_disable() preempt_disable() | |
8500 | +# define migrate_enable() preempt_enable() | |
8501 | +#endif | |
8502 | + | |
8503 | #ifdef CONFIG_PREEMPT_NOTIFIERS | |
8504 | ||
8505 | struct preempt_notifier; | |
8506 | diff --git a/include/linux/printk.h b/include/linux/printk.h | |
c7c16703 | 8507 | index eac1af8502bb..37e647af0b0b 100644 |
1a6e0f06 JK |
8508 | --- a/include/linux/printk.h |
8509 | +++ b/include/linux/printk.h | |
c7c16703 | 8510 | @@ -126,9 +126,11 @@ struct va_format { |
1a6e0f06 JK |
8511 | #ifdef CONFIG_EARLY_PRINTK |
8512 | extern asmlinkage __printf(1, 2) | |
8513 | void early_printk(const char *fmt, ...); | |
8514 | +extern void printk_kill(void); | |
8515 | #else | |
8516 | static inline __printf(1, 2) __cold | |
8517 | void early_printk(const char *s, ...) { } | |
8518 | +static inline void printk_kill(void) { } | |
8519 | #endif | |
8520 | ||
8521 | #ifdef CONFIG_PRINTK_NMI | |
8522 | diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h | |
1f39f580 | 8523 | index af3581b8a451..277295039c8f 100644 |
1a6e0f06 JK |
8524 | --- a/include/linux/radix-tree.h |
8525 | +++ b/include/linux/radix-tree.h | |
1f39f580 | 8526 | @@ -292,6 +292,8 @@ unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, |
1a6e0f06 JK |
8527 | int radix_tree_preload(gfp_t gfp_mask); |
8528 | int radix_tree_maybe_preload(gfp_t gfp_mask); | |
8529 | int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); | |
1f39f580 JK |
8530 | +void radix_tree_preload_end(void); |
8531 | + | |
1a6e0f06 JK |
8532 | void radix_tree_init(void); |
8533 | void *radix_tree_tag_set(struct radix_tree_root *root, | |
8534 | unsigned long index, unsigned int tag); | |
1f39f580 JK |
8535 | @@ -314,11 +316,6 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, |
8536 | int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag); | |
8537 | unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); | |
1a6e0f06 | 8538 | |
1f39f580 JK |
8539 | -static inline void radix_tree_preload_end(void) |
8540 | -{ | |
1a6e0f06 | 8541 | - preempt_enable(); |
1f39f580 JK |
8542 | -} |
8543 | - | |
1a6e0f06 | 8544 | /** |
1f39f580 JK |
8545 | * struct radix_tree_iter - radix tree iterator state |
8546 | * | |
1a6e0f06 | 8547 | diff --git a/include/linux/random.h b/include/linux/random.h |
7c18450a | 8548 | index 16ab429735a7..9d0fecb5b6c2 100644 |
1a6e0f06 JK |
8549 | --- a/include/linux/random.h |
8550 | +++ b/include/linux/random.h | |
c7c16703 JK |
8551 | @@ -31,7 +31,7 @@ static inline void add_latent_entropy(void) {} |
8552 | ||
1a6e0f06 | 8553 | extern void add_input_randomness(unsigned int type, unsigned int code, |
c7c16703 JK |
8554 | unsigned int value) __latent_entropy; |
8555 | -extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy; | |
8556 | +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) __latent_entropy; | |
1a6e0f06 JK |
8557 | |
8558 | extern void get_random_bytes(void *buf, int nbytes); | |
8559 | extern int add_random_ready_callback(struct random_ready_callback *rdy); | |
8560 | diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h | |
8561 | index e585018498d5..25c64474fc27 100644 | |
8562 | --- a/include/linux/rbtree.h | |
8563 | +++ b/include/linux/rbtree.h | |
8564 | @@ -31,7 +31,7 @@ | |
8565 | ||
8566 | #include <linux/kernel.h> | |
8567 | #include <linux/stddef.h> | |
8568 | -#include <linux/rcupdate.h> | |
8569 | +#include <linux/rcu_assign_pointer.h> | |
8570 | ||
8571 | struct rb_node { | |
8572 | unsigned long __rb_parent_color; | |
8573 | diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h | |
8574 | index d076183e49be..36bfb4dd57ae 100644 | |
8575 | --- a/include/linux/rbtree_augmented.h | |
8576 | +++ b/include/linux/rbtree_augmented.h | |
8577 | @@ -26,6 +26,7 @@ | |
8578 | ||
8579 | #include <linux/compiler.h> | |
8580 | #include <linux/rbtree.h> | |
8581 | +#include <linux/rcupdate.h> | |
8582 | ||
8583 | /* | |
8584 | * Please note - only struct rb_augment_callbacks and the prototypes for | |
8585 | diff --git a/include/linux/rcu_assign_pointer.h b/include/linux/rcu_assign_pointer.h | |
8586 | new file mode 100644 | |
8587 | index 000000000000..7066962a4379 | |
8588 | --- /dev/null | |
8589 | +++ b/include/linux/rcu_assign_pointer.h | |
8590 | @@ -0,0 +1,54 @@ | |
8591 | +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__ | |
8592 | +#define __LINUX_RCU_ASSIGN_POINTER_H__ | |
8593 | +#include <linux/compiler.h> | |
8594 | +#include <asm/barrier.h> | |
8595 | + | |
8596 | +/** | |
8597 | + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
8598 | + * @v: The value to statically initialize with. | |
8599 | + */ | |
8600 | +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
8601 | + | |
8602 | +/** | |
8603 | + * rcu_assign_pointer() - assign to RCU-protected pointer | |
8604 | + * @p: pointer to assign to | |
8605 | + * @v: value to assign (publish) | |
8606 | + * | |
8607 | + * Assigns the specified value to the specified RCU-protected | |
8608 | + * pointer, ensuring that any concurrent RCU readers will see | |
8609 | + * any prior initialization. | |
8610 | + * | |
8611 | + * Inserts memory barriers on architectures that require them | |
8612 | + * (which is most of them), and also prevents the compiler from | |
8613 | + * reordering the code that initializes the structure after the pointer | |
8614 | + * assignment. More importantly, this call documents which pointers | |
8615 | + * will be dereferenced by RCU read-side code. | |
8616 | + * | |
8617 | + * In some special cases, you may use RCU_INIT_POINTER() instead | |
8618 | + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
8619 | + * to the fact that it does not constrain either the CPU or the compiler. | |
8620 | + * That said, using RCU_INIT_POINTER() when you should have used | |
8621 | + * rcu_assign_pointer() is a very bad thing that results in | |
8622 | + * impossible-to-diagnose memory corruption. So please be careful. | |
8623 | + * See the RCU_INIT_POINTER() comment header for details. | |
8624 | + * | |
8625 | + * Note that rcu_assign_pointer() evaluates each of its arguments only | |
8626 | + * once, appearances notwithstanding. One of the "extra" evaluations | |
8627 | + * is in typeof() and the other visible only to sparse (__CHECKER__), | |
8628 | + * neither of which actually execute the argument. As with most cpp | |
8629 | + * macros, this execute-arguments-only-once property is important, so | |
8630 | + * please be careful when making changes to rcu_assign_pointer() and the | |
8631 | + * other macros that it invokes. | |
8632 | + */ | |
8633 | +#define rcu_assign_pointer(p, v) \ | |
8634 | +({ \ | |
8635 | + uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
8636 | + \ | |
8637 | + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
8638 | + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
8639 | + else \ | |
8640 | + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
8641 | + _r_a_p__v; \ | |
8642 | +}) | |
8643 | + | |
8644 | +#endif | |
8645 | diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h | |
1f39f580 | 8646 | index 01f71e1d2e94..30cc001d0d5a 100644 |
1a6e0f06 JK |
8647 | --- a/include/linux/rcupdate.h |
8648 | +++ b/include/linux/rcupdate.h | |
8649 | @@ -46,6 +46,7 @@ | |
8650 | #include <linux/compiler.h> | |
8651 | #include <linux/ktime.h> | |
8652 | #include <linux/irqflags.h> | |
8653 | +#include <linux/rcu_assign_pointer.h> | |
8654 | ||
8655 | #include <asm/barrier.h> | |
8656 | ||
8657 | @@ -178,6 +179,9 @@ void call_rcu(struct rcu_head *head, | |
8658 | ||
8659 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
8660 | ||
8661 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8662 | +#define call_rcu_bh call_rcu | |
8663 | +#else | |
8664 | /** | |
8665 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | |
8666 | * @head: structure to be used for queueing the RCU updates. | |
8667 | @@ -201,6 +205,7 @@ void call_rcu(struct rcu_head *head, | |
8668 | */ | |
8669 | void call_rcu_bh(struct rcu_head *head, | |
8670 | rcu_callback_t func); | |
8671 | +#endif | |
8672 | ||
8673 | /** | |
8674 | * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | |
8675 | @@ -301,6 +306,11 @@ void synchronize_rcu(void); | |
8676 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | |
8677 | */ | |
8678 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
8679 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8680 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
8681 | +#else | |
8682 | +static inline int sched_rcu_preempt_depth(void) { return 0; } | |
8683 | +#endif | |
8684 | ||
8685 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | |
8686 | ||
8687 | @@ -326,6 +336,8 @@ static inline int rcu_preempt_depth(void) | |
8688 | return 0; | |
8689 | } | |
8690 | ||
8691 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
8692 | + | |
8693 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
8694 | ||
8695 | /* Internal to kernel */ | |
1f39f580 | 8696 | @@ -505,7 +517,14 @@ extern struct lockdep_map rcu_callback_map; |
1a6e0f06 JK |
8697 | int debug_lockdep_rcu_enabled(void); |
8698 | ||
8699 | int rcu_read_lock_held(void); | |
8700 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8701 | +static inline int rcu_read_lock_bh_held(void) | |
8702 | +{ | |
8703 | + return rcu_read_lock_held(); | |
8704 | +} | |
8705 | +#else | |
8706 | int rcu_read_lock_bh_held(void); | |
8707 | +#endif | |
8708 | ||
8709 | /** | |
8710 | * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? | |
1f39f580 | 8711 | @@ -626,54 +645,6 @@ static inline void rcu_preempt_sleep_check(void) |
1a6e0f06 JK |
8712 | }) |
8713 | ||
8714 | /** | |
8715 | - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
8716 | - * @v: The value to statically initialize with. | |
8717 | - */ | |
8718 | -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
8719 | - | |
8720 | -/** | |
8721 | - * rcu_assign_pointer() - assign to RCU-protected pointer | |
8722 | - * @p: pointer to assign to | |
8723 | - * @v: value to assign (publish) | |
8724 | - * | |
8725 | - * Assigns the specified value to the specified RCU-protected | |
8726 | - * pointer, ensuring that any concurrent RCU readers will see | |
8727 | - * any prior initialization. | |
8728 | - * | |
8729 | - * Inserts memory barriers on architectures that require them | |
8730 | - * (which is most of them), and also prevents the compiler from | |
8731 | - * reordering the code that initializes the structure after the pointer | |
8732 | - * assignment. More importantly, this call documents which pointers | |
8733 | - * will be dereferenced by RCU read-side code. | |
8734 | - * | |
8735 | - * In some special cases, you may use RCU_INIT_POINTER() instead | |
8736 | - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
8737 | - * to the fact that it does not constrain either the CPU or the compiler. | |
8738 | - * That said, using RCU_INIT_POINTER() when you should have used | |
8739 | - * rcu_assign_pointer() is a very bad thing that results in | |
8740 | - * impossible-to-diagnose memory corruption. So please be careful. | |
8741 | - * See the RCU_INIT_POINTER() comment header for details. | |
8742 | - * | |
8743 | - * Note that rcu_assign_pointer() evaluates each of its arguments only | |
8744 | - * once, appearances notwithstanding. One of the "extra" evaluations | |
8745 | - * is in typeof() and the other visible only to sparse (__CHECKER__), | |
8746 | - * neither of which actually execute the argument. As with most cpp | |
8747 | - * macros, this execute-arguments-only-once property is important, so | |
8748 | - * please be careful when making changes to rcu_assign_pointer() and the | |
8749 | - * other macros that it invokes. | |
8750 | - */ | |
8751 | -#define rcu_assign_pointer(p, v) \ | |
8752 | -({ \ | |
8753 | - uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
8754 | - \ | |
8755 | - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
8756 | - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
8757 | - else \ | |
8758 | - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
8759 | - _r_a_p__v; \ | |
8760 | -}) | |
8761 | - | |
8762 | -/** | |
8763 | * rcu_access_pointer() - fetch RCU pointer with no dereferencing | |
8764 | * @p: The pointer to read | |
8765 | * | |
1f39f580 | 8766 | @@ -951,10 +922,14 @@ static inline void rcu_read_unlock(void) |
1a6e0f06 JK |
8767 | static inline void rcu_read_lock_bh(void) |
8768 | { | |
8769 | local_bh_disable(); | |
8770 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8771 | + rcu_read_lock(); | |
8772 | +#else | |
8773 | __acquire(RCU_BH); | |
8774 | rcu_lock_acquire(&rcu_bh_lock_map); | |
8775 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
8776 | "rcu_read_lock_bh() used illegally while idle"); | |
8777 | +#endif | |
8778 | } | |
8779 | ||
8780 | /* | |
1f39f580 | 8781 | @@ -964,10 +939,14 @@ static inline void rcu_read_lock_bh(void) |
1a6e0f06 JK |
8782 | */ |
8783 | static inline void rcu_read_unlock_bh(void) | |
8784 | { | |
8785 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8786 | + rcu_read_unlock(); | |
8787 | +#else | |
8788 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
8789 | "rcu_read_unlock_bh() used illegally while idle"); | |
8790 | rcu_lock_release(&rcu_bh_lock_map); | |
8791 | __release(RCU_BH); | |
8792 | +#endif | |
8793 | local_bh_enable(); | |
8794 | } | |
8795 | ||
8796 | diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h | |
8797 | index 63a4e4cf40a5..08ab12df2863 100644 | |
8798 | --- a/include/linux/rcutree.h | |
8799 | +++ b/include/linux/rcutree.h | |
8800 | @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu) | |
8801 | rcu_note_context_switch(); | |
8802 | } | |
8803 | ||
8804 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8805 | +# define synchronize_rcu_bh synchronize_rcu | |
8806 | +#else | |
8807 | void synchronize_rcu_bh(void); | |
8808 | +#endif | |
8809 | void synchronize_sched_expedited(void); | |
8810 | void synchronize_rcu_expedited(void); | |
8811 | ||
8812 | @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void) | |
8813 | } | |
8814 | ||
8815 | void rcu_barrier(void); | |
8816 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8817 | +# define rcu_barrier_bh rcu_barrier | |
8818 | +#else | |
8819 | void rcu_barrier_bh(void); | |
8820 | +#endif | |
8821 | void rcu_barrier_sched(void); | |
8822 | unsigned long get_state_synchronize_rcu(void); | |
8823 | void cond_synchronize_rcu(unsigned long oldstate); | |
8824 | @@ -82,17 +90,14 @@ void cond_synchronize_sched(unsigned long oldstate); | |
8825 | extern unsigned long rcutorture_testseq; | |
8826 | extern unsigned long rcutorture_vernum; | |
8827 | unsigned long rcu_batches_started(void); | |
8828 | -unsigned long rcu_batches_started_bh(void); | |
8829 | unsigned long rcu_batches_started_sched(void); | |
8830 | unsigned long rcu_batches_completed(void); | |
8831 | -unsigned long rcu_batches_completed_bh(void); | |
8832 | unsigned long rcu_batches_completed_sched(void); | |
8833 | unsigned long rcu_exp_batches_completed(void); | |
8834 | unsigned long rcu_exp_batches_completed_sched(void); | |
8835 | void show_rcu_gp_kthreads(void); | |
8836 | ||
8837 | void rcu_force_quiescent_state(void); | |
8838 | -void rcu_bh_force_quiescent_state(void); | |
8839 | void rcu_sched_force_quiescent_state(void); | |
8840 | ||
8841 | void rcu_idle_enter(void); | |
8842 | @@ -109,6 +114,16 @@ extern int rcu_scheduler_active __read_mostly; | |
8843 | ||
8844 | bool rcu_is_watching(void); | |
8845 | ||
8846 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8847 | +void rcu_bh_force_quiescent_state(void); | |
8848 | +unsigned long rcu_batches_started_bh(void); | |
8849 | +unsigned long rcu_batches_completed_bh(void); | |
8850 | +#else | |
8851 | +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state | |
8852 | +# define rcu_batches_completed_bh rcu_batches_completed | |
8853 | +# define rcu_batches_started_bh rcu_batches_completed | |
8854 | +#endif | |
8855 | + | |
8856 | void rcu_all_qs(void); | |
8857 | ||
8858 | /* RCUtree hotplug events */ | |
8859 | diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h | |
33c7bf0f | 8860 | index 1abba5ce2a2f..294a8b4875f1 100644 |
1a6e0f06 JK |
8861 | --- a/include/linux/rtmutex.h |
8862 | +++ b/include/linux/rtmutex.h | |
8863 | @@ -13,11 +13,15 @@ | |
8864 | #define __LINUX_RT_MUTEX_H | |
8865 | ||
8866 | #include <linux/linkage.h> | |
8867 | +#include <linux/spinlock_types_raw.h> | |
8868 | #include <linux/rbtree.h> | |
8869 | -#include <linux/spinlock_types.h> | |
8870 | ||
8871 | extern int max_lock_depth; /* for sysctl */ | |
8872 | ||
8873 | +#ifdef CONFIG_DEBUG_MUTEXES | |
8874 | +#include <linux/debug_locks.h> | |
8875 | +#endif | |
8876 | + | |
8877 | /** | |
8878 | * The rt_mutex structure | |
8879 | * | |
8880 | @@ -31,8 +35,8 @@ struct rt_mutex { | |
8881 | struct rb_root waiters; | |
8882 | struct rb_node *waiters_leftmost; | |
8883 | struct task_struct *owner; | |
8884 | -#ifdef CONFIG_DEBUG_RT_MUTEXES | |
8885 | int save_state; | |
8886 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
8887 | const char *name, *file; | |
8888 | int line; | |
8889 | void *magic; | |
8890 | @@ -55,22 +59,33 @@ struct hrtimer_sleeper; | |
8891 | # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) | |
8892 | #endif | |
8893 | ||
8894 | +# define rt_mutex_init(mutex) \ | |
8895 | + do { \ | |
8896 | + raw_spin_lock_init(&(mutex)->wait_lock); \ | |
8897 | + __rt_mutex_init(mutex, #mutex); \ | |
8898 | + } while (0) | |
8899 | + | |
8900 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
8901 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ | |
8902 | , .name = #mutexname, .file = __FILE__, .line = __LINE__ | |
8903 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) | |
8904 | extern void rt_mutex_debug_task_free(struct task_struct *tsk); | |
8905 | #else | |
8906 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
8907 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) | |
8908 | # define rt_mutex_debug_task_free(t) do { } while (0) | |
8909 | #endif | |
8910 | ||
8911 | -#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
8912 | - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
8913 | +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
8914 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
8915 | , .waiters = RB_ROOT \ | |
8916 | , .owner = NULL \ | |
8917 | - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} | |
8918 | + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
8919 | + | |
8920 | +#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
8921 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } | |
8922 | + | |
8923 | +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ | |
8924 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
8925 | + , .save_state = 1 } | |
8926 | ||
8927 | #define DEFINE_RT_MUTEX(mutexname) \ | |
8928 | struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) | |
33c7bf0f JK |
8929 | @@ -90,7 +105,9 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name); |
8930 | extern void rt_mutex_destroy(struct rt_mutex *lock); | |
1a6e0f06 JK |
8931 | |
8932 | extern void rt_mutex_lock(struct rt_mutex *lock); | |
33c7bf0f | 8933 | +extern int rt_mutex_lock_state(struct rt_mutex *lock, int state); |
1a6e0f06 JK |
8934 | extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); |
8935 | +extern int rt_mutex_lock_killable(struct rt_mutex *lock); | |
8936 | extern int rt_mutex_timed_lock(struct rt_mutex *lock, | |
8937 | struct hrtimer_sleeper *timeout); | |
8938 | ||
8939 | diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h | |
8940 | new file mode 100644 | |
8941 | index 000000000000..49ed2d45d3be | |
8942 | --- /dev/null | |
8943 | +++ b/include/linux/rwlock_rt.h | |
8944 | @@ -0,0 +1,99 @@ | |
8945 | +#ifndef __LINUX_RWLOCK_RT_H | |
8946 | +#define __LINUX_RWLOCK_RT_H | |
8947 | + | |
8948 | +#ifndef __LINUX_SPINLOCK_H | |
8949 | +#error Do not include directly. Use spinlock.h | |
8950 | +#endif | |
8951 | + | |
8952 | +#define rwlock_init(rwl) \ | |
8953 | +do { \ | |
8954 | + static struct lock_class_key __key; \ | |
8955 | + \ | |
8956 | + rt_mutex_init(&(rwl)->lock); \ | |
8957 | + __rt_rwlock_init(rwl, #rwl, &__key); \ | |
8958 | +} while (0) | |
8959 | + | |
8960 | +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); | |
8961 | +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); | |
8962 | +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); | |
8963 | +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); | |
8964 | +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); | |
8965 | +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); | |
8966 | +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); | |
8967 | +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); | |
8968 | +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); | |
8969 | +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); | |
8970 | + | |
8971 | +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) | |
8972 | +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) | |
8973 | + | |
8974 | +#define write_trylock_irqsave(lock, flags) \ | |
8975 | + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) | |
8976 | + | |
8977 | +#define read_lock_irqsave(lock, flags) \ | |
8978 | + do { \ | |
8979 | + typecheck(unsigned long, flags); \ | |
8980 | + flags = rt_read_lock_irqsave(lock); \ | |
8981 | + } while (0) | |
8982 | + | |
8983 | +#define write_lock_irqsave(lock, flags) \ | |
8984 | + do { \ | |
8985 | + typecheck(unsigned long, flags); \ | |
8986 | + flags = rt_write_lock_irqsave(lock); \ | |
8987 | + } while (0) | |
8988 | + | |
8989 | +#define read_lock(lock) rt_read_lock(lock) | |
8990 | + | |
8991 | +#define read_lock_bh(lock) \ | |
8992 | + do { \ | |
8993 | + local_bh_disable(); \ | |
8994 | + rt_read_lock(lock); \ | |
8995 | + } while (0) | |
8996 | + | |
8997 | +#define read_lock_irq(lock) read_lock(lock) | |
8998 | + | |
8999 | +#define write_lock(lock) rt_write_lock(lock) | |
9000 | + | |
9001 | +#define write_lock_bh(lock) \ | |
9002 | + do { \ | |
9003 | + local_bh_disable(); \ | |
9004 | + rt_write_lock(lock); \ | |
9005 | + } while (0) | |
9006 | + | |
9007 | +#define write_lock_irq(lock) write_lock(lock) | |
9008 | + | |
9009 | +#define read_unlock(lock) rt_read_unlock(lock) | |
9010 | + | |
9011 | +#define read_unlock_bh(lock) \ | |
9012 | + do { \ | |
9013 | + rt_read_unlock(lock); \ | |
9014 | + local_bh_enable(); \ | |
9015 | + } while (0) | |
9016 | + | |
9017 | +#define read_unlock_irq(lock) read_unlock(lock) | |
9018 | + | |
9019 | +#define write_unlock(lock) rt_write_unlock(lock) | |
9020 | + | |
9021 | +#define write_unlock_bh(lock) \ | |
9022 | + do { \ | |
9023 | + rt_write_unlock(lock); \ | |
9024 | + local_bh_enable(); \ | |
9025 | + } while (0) | |
9026 | + | |
9027 | +#define write_unlock_irq(lock) write_unlock(lock) | |
9028 | + | |
9029 | +#define read_unlock_irqrestore(lock, flags) \ | |
9030 | + do { \ | |
9031 | + typecheck(unsigned long, flags); \ | |
9032 | + (void) flags; \ | |
9033 | + rt_read_unlock(lock); \ | |
9034 | + } while (0) | |
9035 | + | |
9036 | +#define write_unlock_irqrestore(lock, flags) \ | |
9037 | + do { \ | |
9038 | + typecheck(unsigned long, flags); \ | |
9039 | + (void) flags; \ | |
9040 | + rt_write_unlock(lock); \ | |
9041 | + } while (0) | |
9042 | + | |
9043 | +#endif | |
9044 | diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h | |
9045 | index cc0072e93e36..5317cd957292 100644 | |
9046 | --- a/include/linux/rwlock_types.h | |
9047 | +++ b/include/linux/rwlock_types.h | |
9048 | @@ -1,6 +1,10 @@ | |
9049 | #ifndef __LINUX_RWLOCK_TYPES_H | |
9050 | #define __LINUX_RWLOCK_TYPES_H | |
9051 | ||
9052 | +#if !defined(__LINUX_SPINLOCK_TYPES_H) | |
9053 | +# error "Do not include directly, include spinlock_types.h" | |
9054 | +#endif | |
9055 | + | |
9056 | /* | |
9057 | * include/linux/rwlock_types.h - generic rwlock type definitions | |
9058 | * and initializers | |
9059 | diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h | |
9060 | new file mode 100644 | |
9061 | index 000000000000..51b28d775fe1 | |
9062 | --- /dev/null | |
9063 | +++ b/include/linux/rwlock_types_rt.h | |
9064 | @@ -0,0 +1,33 @@ | |
9065 | +#ifndef __LINUX_RWLOCK_TYPES_RT_H | |
9066 | +#define __LINUX_RWLOCK_TYPES_RT_H | |
9067 | + | |
9068 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
9069 | +#error "Do not include directly. Include spinlock_types.h instead" | |
9070 | +#endif | |
9071 | + | |
9072 | +/* | |
9073 | + * rwlocks - rtmutex which allows single reader recursion | |
9074 | + */ | |
9075 | +typedef struct { | |
9076 | + struct rt_mutex lock; | |
9077 | + int read_depth; | |
9078 | + unsigned int break_lock; | |
9079 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9080 | + struct lockdep_map dep_map; | |
9081 | +#endif | |
9082 | +} rwlock_t; | |
9083 | + | |
9084 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
9085 | +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
9086 | +#else | |
9087 | +# define RW_DEP_MAP_INIT(lockname) | |
9088 | +#endif | |
9089 | + | |
9090 | +#define __RW_LOCK_UNLOCKED(name) \ | |
9091 | + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ | |
9092 | + RW_DEP_MAP_INIT(name) } | |
9093 | + | |
9094 | +#define DEFINE_RWLOCK(name) \ | |
9095 | + rwlock_t name = __RW_LOCK_UNLOCKED(name) | |
9096 | + | |
9097 | +#endif | |
9098 | diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h | |
33c7bf0f | 9099 | index dd1d14250340..aa2ac1f65c2d 100644 |
1a6e0f06 JK |
9100 | --- a/include/linux/rwsem.h |
9101 | +++ b/include/linux/rwsem.h | |
9102 | @@ -19,6 +19,10 @@ | |
9103 | #include <linux/osq_lock.h> | |
9104 | #endif | |
9105 | ||
9106 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9107 | +#include <linux/rwsem_rt.h> | |
9108 | +#else /* PREEMPT_RT_FULL */ | |
9109 | + | |
9110 | struct rw_semaphore; | |
9111 | ||
9112 | #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK | |
33c7bf0f JK |
9113 | @@ -106,6 +110,13 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) |
9114 | return !list_empty(&sem->wait_list); | |
9115 | } | |
1a6e0f06 JK |
9116 | |
9117 | +#endif /* !PREEMPT_RT_FULL */ | |
9118 | + | |
33c7bf0f JK |
9119 | +/* |
9120 | + * The functions below are the same for all rwsem implementations including | |
9121 | + * the RT specific variant. | |
9122 | + */ | |
9123 | + | |
9124 | /* | |
9125 | * lock for reading | |
9126 | */ | |
1a6e0f06 JK |
9127 | diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h |
9128 | new file mode 100644 | |
33c7bf0f | 9129 | index 000000000000..2ffbf093ae92 |
1a6e0f06 JK |
9130 | --- /dev/null |
9131 | +++ b/include/linux/rwsem_rt.h | |
33c7bf0f | 9132 | @@ -0,0 +1,67 @@ |
1a6e0f06 JK |
9133 | +#ifndef _LINUX_RWSEM_RT_H |
9134 | +#define _LINUX_RWSEM_RT_H | |
9135 | + | |
9136 | +#ifndef _LINUX_RWSEM_H | |
9137 | +#error "Include rwsem.h" | |
9138 | +#endif | |
9139 | + | |
1a6e0f06 | 9140 | +#include <linux/rtmutex.h> |
33c7bf0f JK |
9141 | +#include <linux/swait.h> |
9142 | + | |
9143 | +#define READER_BIAS (1U << 31) | |
9144 | +#define WRITER_BIAS (1U << 30) | |
1a6e0f06 JK |
9145 | + |
9146 | +struct rw_semaphore { | |
33c7bf0f JK |
9147 | + atomic_t readers; |
9148 | + struct rt_mutex rtmutex; | |
1a6e0f06 JK |
9149 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
9150 | + struct lockdep_map dep_map; | |
9151 | +#endif | |
9152 | +}; | |
9153 | + | |
33c7bf0f JK |
9154 | +#define __RWSEM_INITIALIZER(name) \ |
9155 | +{ \ | |
9156 | + .readers = ATOMIC_INIT(READER_BIAS), \ | |
9157 | + .rtmutex = __RT_MUTEX_INITIALIZER(name.rtmutex), \ | |
9158 | + RW_DEP_MAP_INIT(name) \ | |
9159 | +} | |
1a6e0f06 JK |
9160 | + |
9161 | +#define DECLARE_RWSEM(lockname) \ | |
9162 | + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) | |
9163 | + | |
33c7bf0f JK |
9164 | +extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, |
9165 | + struct lock_class_key *key); | |
1a6e0f06 | 9166 | + |
33c7bf0f JK |
9167 | +#define __init_rwsem(sem, name, key) \ |
9168 | +do { \ | |
9169 | + rt_mutex_init(&(sem)->rtmutex); \ | |
9170 | + __rwsem_init((sem), (name), (key)); \ | |
9171 | +} while (0) | |
1a6e0f06 | 9172 | + |
33c7bf0f | 9173 | +#define init_rwsem(sem) \ |
1a6e0f06 JK |
9174 | +do { \ |
9175 | + static struct lock_class_key __key; \ | |
9176 | + \ | |
33c7bf0f | 9177 | + __init_rwsem((sem), #sem, &__key); \ |
1a6e0f06 JK |
9178 | +} while (0) |
9179 | + | |
33c7bf0f | 9180 | +static inline int rwsem_is_locked(struct rw_semaphore *sem) |
1a6e0f06 | 9181 | +{ |
33c7bf0f | 9182 | + return atomic_read(&sem->readers) != READER_BIAS; |
1a6e0f06 JK |
9183 | +} |
9184 | + | |
33c7bf0f | 9185 | +static inline int rwsem_is_contended(struct rw_semaphore *sem) |
1a6e0f06 | 9186 | +{ |
33c7bf0f | 9187 | + return atomic_read(&sem->readers) > 0; |
1a6e0f06 JK |
9188 | +} |
9189 | + | |
33c7bf0f JK |
9190 | +extern void __down_read(struct rw_semaphore *sem); |
9191 | +extern int __down_read_trylock(struct rw_semaphore *sem); | |
9192 | +extern void __down_write(struct rw_semaphore *sem); | |
9193 | +extern int __must_check __down_write_killable(struct rw_semaphore *sem); | |
9194 | +extern int __down_write_trylock(struct rw_semaphore *sem); | |
9195 | +extern void __up_read(struct rw_semaphore *sem); | |
9196 | +extern void __up_write(struct rw_semaphore *sem); | |
9197 | +extern void __downgrade_write(struct rw_semaphore *sem); | |
1a6e0f06 | 9198 | + |
1a6e0f06 JK |
9199 | +#endif |
9200 | diff --git a/include/linux/sched.h b/include/linux/sched.h | |
7c18450a | 9201 | index f425eb3318ab..e010fb4d640d 100644 |
1a6e0f06 JK |
9202 | --- a/include/linux/sched.h |
9203 | +++ b/include/linux/sched.h | |
9204 | @@ -26,6 +26,7 @@ struct sched_param { | |
9205 | #include <linux/nodemask.h> | |
9206 | #include <linux/mm_types.h> | |
9207 | #include <linux/preempt.h> | |
9208 | +#include <asm/kmap_types.h> | |
9209 | ||
9210 | #include <asm/page.h> | |
9211 | #include <asm/ptrace.h> | |
9212 | @@ -243,10 +244,7 @@ extern char ___assert_task_state[1 - 2*!!( | |
9213 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ | |
9214 | __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) | |
9215 | ||
9216 | -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) | |
9217 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) | |
9218 | -#define task_is_stopped_or_traced(task) \ | |
9219 | - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | |
9220 | #define task_contributes_to_load(task) \ | |
9221 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | |
9222 | (task->flags & PF_FROZEN) == 0 && \ | |
9223 | @@ -312,6 +310,11 @@ extern char ___assert_task_state[1 - 2*!!( | |
9224 | ||
9225 | #endif | |
9226 | ||
9227 | +#define __set_current_state_no_track(state_value) \ | |
9228 | + do { current->state = (state_value); } while (0) | |
9229 | +#define set_current_state_no_track(state_value) \ | |
9230 | + set_mb(current->state, (state_value)) | |
9231 | + | |
9232 | /* Task command name length */ | |
9233 | #define TASK_COMM_LEN 16 | |
9234 | ||
c7c16703 | 9235 | @@ -1013,8 +1016,18 @@ struct wake_q_head { |
1a6e0f06 JK |
9236 | struct wake_q_head name = { WAKE_Q_TAIL, &name.first } |
9237 | ||
9238 | extern void wake_q_add(struct wake_q_head *head, | |
9239 | - struct task_struct *task); | |
9240 | -extern void wake_up_q(struct wake_q_head *head); | |
9241 | + struct task_struct *task); | |
9242 | +extern void __wake_up_q(struct wake_q_head *head, bool sleeper); | |
9243 | + | |
9244 | +static inline void wake_up_q(struct wake_q_head *head) | |
9245 | +{ | |
9246 | + __wake_up_q(head, false); | |
9247 | +} | |
9248 | + | |
9249 | +static inline void wake_up_q_sleeper(struct wake_q_head *head) | |
9250 | +{ | |
9251 | + __wake_up_q(head, true); | |
9252 | +} | |
9253 | ||
9254 | /* | |
9255 | * sched-domains (multiprocessor balancing) declarations: | |
c7c16703 JK |
9256 | @@ -1481,6 +1494,7 @@ struct task_struct { |
9257 | struct thread_info thread_info; | |
9258 | #endif | |
1a6e0f06 | 9259 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ |
c7c16703 | 9260 | + volatile long saved_state; /* saved state for "spinlock sleepers" */ |
1a6e0f06 JK |
9261 | void *stack; |
9262 | atomic_t usage; | |
9263 | unsigned int flags; /* per process flags, defined below */ | |
c7c16703 | 9264 | @@ -1520,6 +1534,12 @@ struct task_struct { |
1a6e0f06 JK |
9265 | #endif |
9266 | ||
9267 | unsigned int policy; | |
9268 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9269 | + int migrate_disable; | |
9270 | +# ifdef CONFIG_SCHED_DEBUG | |
9271 | + int migrate_disable_atomic; | |
9272 | +# endif | |
9273 | +#endif | |
9274 | int nr_cpus_allowed; | |
9275 | cpumask_t cpus_allowed; | |
9276 | ||
7c18450a | 9277 | @@ -1658,6 +1678,9 @@ struct task_struct { |
1a6e0f06 JK |
9278 | |
9279 | struct task_cputime cputime_expires; | |
9280 | struct list_head cpu_timers[3]; | |
9281 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9282 | + struct task_struct *posix_timer_list; | |
9283 | +#endif | |
9284 | ||
9285 | /* process credentials */ | |
c7c16703 | 9286 | const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ |
7c18450a | 9287 | @@ -1689,10 +1712,15 @@ struct task_struct { |
1a6e0f06 JK |
9288 | /* signal handlers */ |
9289 | struct signal_struct *signal; | |
9290 | struct sighand_struct *sighand; | |
9291 | + struct sigqueue *sigqueue_cache; | |
9292 | ||
9293 | sigset_t blocked, real_blocked; | |
9294 | sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ | |
9295 | struct sigpending pending; | |
9296 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9297 | + /* TODO: move me into ->restart_block ? */ | |
9298 | + struct siginfo forced_info; | |
9299 | +#endif | |
9300 | ||
9301 | unsigned long sas_ss_sp; | |
9302 | size_t sas_ss_size; | |
7c18450a JK |
9303 | @@ -1723,6 +1751,8 @@ struct task_struct { |
9304 | /* PI waiters blocked on a rt_mutex held by this task */ | |
9305 | struct rb_root pi_waiters; | |
9306 | struct rb_node *pi_waiters_leftmost; | |
9307 | + /* Updated under owner's pi_lock and rq lock */ | |
9308 | + struct task_struct *pi_top_task; | |
9309 | /* Deadlock detection and priority inheritance handling */ | |
9310 | struct rt_mutex_waiter *pi_blocked_on; | |
9311 | #endif | |
9312 | @@ -1921,6 +1951,12 @@ struct task_struct { | |
1a6e0f06 JK |
9313 | /* bitmask and counter of trace recursion */ |
9314 | unsigned long trace_recursion; | |
9315 | #endif /* CONFIG_TRACING */ | |
9316 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
9317 | + u64 preempt_timestamp_hist; | |
9318 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
9319 | + long timer_offset; | |
9320 | +#endif | |
9321 | +#endif | |
9322 | #ifdef CONFIG_KCOV | |
9323 | /* Coverage collection mode enabled for this task (0 if disabled). */ | |
9324 | enum kcov_mode kcov_mode; | |
7c18450a | 9325 | @@ -1946,9 +1982,23 @@ struct task_struct { |
1a6e0f06 JK |
9326 | unsigned int sequential_io; |
9327 | unsigned int sequential_io_avg; | |
9328 | #endif | |
9329 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9330 | + struct rcu_head put_rcu; | |
9331 | + int softirq_nestcnt; | |
9332 | + unsigned int softirqs_raised; | |
9333 | +#endif | |
9334 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9335 | +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 | |
9336 | + int kmap_idx; | |
9337 | + pte_t kmap_pte[KM_TYPE_NR]; | |
9338 | +# endif | |
9339 | +#endif | |
9340 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
9341 | unsigned long task_state_change; | |
9342 | #endif | |
9343 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9344 | + int xmit_recursion; | |
9345 | +#endif | |
9346 | int pagefault_disabled; | |
9347 | #ifdef CONFIG_MMU | |
9348 | struct task_struct *oom_reaper_list; | |
7c18450a | 9349 | @@ -1988,14 +2038,6 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) |
c7c16703 | 9350 | } |
1a6e0f06 JK |
9351 | #endif |
9352 | ||
9353 | -/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
9354 | -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | |
9355 | - | |
9356 | -static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
9357 | -{ | |
9358 | - return p->nr_cpus_allowed; | |
9359 | -} | |
9360 | - | |
9361 | #define TNF_MIGRATED 0x01 | |
9362 | #define TNF_NO_GROUP 0x02 | |
9363 | #define TNF_SHARED 0x04 | |
7c18450a | 9364 | @@ -2211,6 +2253,15 @@ extern struct pid *cad_pid; |
1a6e0f06 JK |
9365 | extern void free_task(struct task_struct *tsk); |
9366 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) | |
9367 | ||
9368 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9369 | +extern void __put_task_struct_cb(struct rcu_head *rhp); | |
9370 | + | |
9371 | +static inline void put_task_struct(struct task_struct *t) | |
9372 | +{ | |
9373 | + if (atomic_dec_and_test(&t->usage)) | |
9374 | + call_rcu(&t->put_rcu, __put_task_struct_cb); | |
9375 | +} | |
9376 | +#else | |
9377 | extern void __put_task_struct(struct task_struct *t); | |
9378 | ||
9379 | static inline void put_task_struct(struct task_struct *t) | |
7c18450a | 9380 | @@ -2218,6 +2269,7 @@ static inline void put_task_struct(struct task_struct *t) |
1a6e0f06 JK |
9381 | if (atomic_dec_and_test(&t->usage)) |
9382 | __put_task_struct(t); | |
9383 | } | |
9384 | +#endif | |
9385 | ||
9386 | struct task_struct *task_rcu_dereference(struct task_struct **ptask); | |
9387 | struct task_struct *try_get_task_struct(struct task_struct **ptask); | |
7c18450a | 9388 | @@ -2259,6 +2311,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, |
1a6e0f06 JK |
9389 | /* |
9390 | * Per process flags | |
9391 | */ | |
9392 | +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ | |
9393 | #define PF_EXITING 0x00000004 /* getting shut down */ | |
9394 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | |
9395 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | |
7c18450a | 9396 | @@ -2427,6 +2480,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, |
1a6e0f06 JK |
9397 | |
9398 | extern int set_cpus_allowed_ptr(struct task_struct *p, | |
9399 | const struct cpumask *new_mask); | |
9400 | +int migrate_me(void); | |
9401 | +void tell_sched_cpu_down_begin(int cpu); | |
9402 | +void tell_sched_cpu_down_done(int cpu); | |
9403 | + | |
9404 | #else | |
9405 | static inline void do_set_cpus_allowed(struct task_struct *p, | |
9406 | const struct cpumask *new_mask) | |
7c18450a | 9407 | @@ -2439,6 +2496,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, |
1a6e0f06 JK |
9408 | return -EINVAL; |
9409 | return 0; | |
9410 | } | |
9411 | +static inline int migrate_me(void) { return 0; } | |
9412 | +static inline void tell_sched_cpu_down_begin(int cpu) { } | |
9413 | +static inline void tell_sched_cpu_down_done(int cpu) { } | |
9414 | #endif | |
9415 | ||
9416 | #ifdef CONFIG_NO_HZ_COMMON | |
7c18450a | 9417 | @@ -2677,6 +2737,7 @@ extern void xtime_update(unsigned long ticks); |
1a6e0f06 JK |
9418 | |
9419 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); | |
9420 | extern int wake_up_process(struct task_struct *tsk); | |
9421 | +extern int wake_up_lock_sleeper(struct task_struct * tsk); | |
9422 | extern void wake_up_new_task(struct task_struct *tsk); | |
9423 | #ifdef CONFIG_SMP | |
9424 | extern void kick_process(struct task_struct *tsk); | |
7c18450a | 9425 | @@ -2885,6 +2946,17 @@ static inline void mmdrop(struct mm_struct *mm) |
1a6e0f06 JK |
9426 | __mmdrop(mm); |
9427 | } | |
9428 | ||
9429 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9430 | +extern void __mmdrop_delayed(struct rcu_head *rhp); | |
9431 | +static inline void mmdrop_delayed(struct mm_struct *mm) | |
9432 | +{ | |
9433 | + if (atomic_dec_and_test(&mm->mm_count)) | |
9434 | + call_rcu(&mm->delayed_drop, __mmdrop_delayed); | |
9435 | +} | |
9436 | +#else | |
9437 | +# define mmdrop_delayed(mm) mmdrop(mm) | |
9438 | +#endif | |
9439 | + | |
c7c16703 | 9440 | static inline void mmdrop_async_fn(struct work_struct *work) |
1a6e0f06 | 9441 | { |
c7c16703 | 9442 | struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); |
7c18450a | 9443 | @@ -3277,6 +3349,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) |
1a6e0f06 JK |
9444 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); |
9445 | } | |
9446 | ||
9447 | +#ifdef CONFIG_PREEMPT_LAZY | |
9448 | +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) | |
9449 | +{ | |
9450 | + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
9451 | +} | |
9452 | + | |
9453 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) | |
9454 | +{ | |
9455 | + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
9456 | +} | |
9457 | + | |
9458 | +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) | |
9459 | +{ | |
9460 | + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); | |
9461 | +} | |
9462 | + | |
9463 | +static inline int need_resched_lazy(void) | |
9464 | +{ | |
9465 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
9466 | +} | |
9467 | + | |
9468 | +static inline int need_resched_now(void) | |
9469 | +{ | |
9470 | + return test_thread_flag(TIF_NEED_RESCHED); | |
9471 | +} | |
9472 | + | |
9473 | +#else | |
9474 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } | |
9475 | +static inline int need_resched_lazy(void) { return 0; } | |
9476 | + | |
9477 | +static inline int need_resched_now(void) | |
9478 | +{ | |
9479 | + return test_thread_flag(TIF_NEED_RESCHED); | |
9480 | +} | |
9481 | + | |
9482 | +#endif | |
9483 | + | |
9484 | static inline int restart_syscall(void) | |
9485 | { | |
9486 | set_tsk_thread_flag(current, TIF_SIGPENDING); | |
7c18450a | 9487 | @@ -3308,6 +3417,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) |
1a6e0f06 JK |
9488 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); |
9489 | } | |
9490 | ||
9491 | +static inline bool __task_is_stopped_or_traced(struct task_struct *task) | |
9492 | +{ | |
9493 | + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) | |
9494 | + return true; | |
9495 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9496 | + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) | |
9497 | + return true; | |
9498 | +#endif | |
9499 | + return false; | |
9500 | +} | |
9501 | + | |
9502 | +static inline bool task_is_stopped_or_traced(struct task_struct *task) | |
9503 | +{ | |
9504 | + bool traced_stopped; | |
9505 | + | |
9506 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9507 | + unsigned long flags; | |
9508 | + | |
9509 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
9510 | + traced_stopped = __task_is_stopped_or_traced(task); | |
9511 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
9512 | +#else | |
9513 | + traced_stopped = __task_is_stopped_or_traced(task); | |
9514 | +#endif | |
9515 | + return traced_stopped; | |
9516 | +} | |
9517 | + | |
9518 | +static inline bool task_is_traced(struct task_struct *task) | |
9519 | +{ | |
9520 | + bool traced = false; | |
9521 | + | |
9522 | + if (task->state & __TASK_TRACED) | |
9523 | + return true; | |
9524 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9525 | + /* in case the task is sleeping on tasklist_lock */ | |
9526 | + raw_spin_lock_irq(&task->pi_lock); | |
9527 | + if (task->state & __TASK_TRACED) | |
9528 | + traced = true; | |
9529 | + else if (task->saved_state & __TASK_TRACED) | |
9530 | + traced = true; | |
9531 | + raw_spin_unlock_irq(&task->pi_lock); | |
9532 | +#endif | |
9533 | + return traced; | |
9534 | +} | |
9535 | + | |
9536 | /* | |
9537 | * cond_resched() and cond_resched_lock(): latency reduction via | |
9538 | * explicit rescheduling in places that are safe. The return | |
7c18450a | 9539 | @@ -3333,12 +3487,16 @@ extern int __cond_resched_lock(spinlock_t *lock); |
1a6e0f06 JK |
9540 | __cond_resched_lock(lock); \ |
9541 | }) | |
9542 | ||
9543 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9544 | extern int __cond_resched_softirq(void); | |
9545 | ||
9546 | #define cond_resched_softirq() ({ \ | |
9547 | ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ | |
9548 | __cond_resched_softirq(); \ | |
9549 | }) | |
9550 | +#else | |
9551 | +# define cond_resched_softirq() cond_resched() | |
9552 | +#endif | |
9553 | ||
9554 | static inline void cond_resched_rcu(void) | |
9555 | { | |
7c18450a | 9556 | @@ -3513,6 +3671,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) |
1a6e0f06 JK |
9557 | |
9558 | #endif /* CONFIG_SMP */ | |
9559 | ||
9560 | +static inline int __migrate_disabled(struct task_struct *p) | |
9561 | +{ | |
9562 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9563 | + return p->migrate_disable; | |
9564 | +#else | |
9565 | + return 0; | |
9566 | +#endif | |
9567 | +} | |
9568 | + | |
9569 | +/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
9570 | +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) | |
9571 | +{ | |
9572 | + if (__migrate_disabled(p)) | |
9573 | + return cpumask_of(task_cpu(p)); | |
9574 | + | |
9575 | + return &p->cpus_allowed; | |
9576 | +} | |
9577 | + | |
9578 | +static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
9579 | +{ | |
9580 | + if (__migrate_disabled(p)) | |
9581 | + return 1; | |
9582 | + return p->nr_cpus_allowed; | |
9583 | +} | |
9584 | + | |
9585 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); | |
9586 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); | |
9587 | ||
7c18450a JK |
9588 | diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h |
9589 | index a30b172df6e1..db3e91f2bc03 100644 | |
9590 | --- a/include/linux/sched/rt.h | |
9591 | +++ b/include/linux/sched/rt.h | |
9592 | @@ -16,27 +16,20 @@ static inline int rt_task(struct task_struct *p) | |
9593 | } | |
9594 | ||
9595 | #ifdef CONFIG_RT_MUTEXES | |
9596 | -extern int rt_mutex_getprio(struct task_struct *p); | |
9597 | -extern void rt_mutex_setprio(struct task_struct *p, int prio); | |
9598 | -extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); | |
9599 | -extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); | |
9600 | +/* | |
9601 | + * Must hold either p->pi_lock or task_rq(p)->lock. | |
9602 | + */ | |
9603 | +static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) | |
9604 | +{ | |
9605 | + return p->pi_top_task; | |
9606 | +} | |
9607 | +extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); | |
9608 | extern void rt_mutex_adjust_pi(struct task_struct *p); | |
9609 | static inline bool tsk_is_pi_blocked(struct task_struct *tsk) | |
9610 | { | |
9611 | return tsk->pi_blocked_on != NULL; | |
9612 | } | |
9613 | #else | |
9614 | -static inline int rt_mutex_getprio(struct task_struct *p) | |
9615 | -{ | |
9616 | - return p->normal_prio; | |
9617 | -} | |
9618 | - | |
9619 | -static inline int rt_mutex_get_effective_prio(struct task_struct *task, | |
9620 | - int newprio) | |
9621 | -{ | |
9622 | - return newprio; | |
9623 | -} | |
9624 | - | |
9625 | static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) | |
9626 | { | |
9627 | return NULL; | |
1a6e0f06 JK |
9628 | diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h |
9629 | index ead97654c4e9..3d7223ffdd3b 100644 | |
9630 | --- a/include/linux/seqlock.h | |
9631 | +++ b/include/linux/seqlock.h | |
9632 | @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) | |
9633 | return __read_seqcount_retry(s, start); | |
9634 | } | |
9635 | ||
9636 | - | |
9637 | - | |
9638 | -static inline void raw_write_seqcount_begin(seqcount_t *s) | |
9639 | +static inline void __raw_write_seqcount_begin(seqcount_t *s) | |
9640 | { | |
9641 | s->sequence++; | |
9642 | smp_wmb(); | |
9643 | } | |
9644 | ||
9645 | -static inline void raw_write_seqcount_end(seqcount_t *s) | |
9646 | +static inline void raw_write_seqcount_begin(seqcount_t *s) | |
9647 | +{ | |
9648 | + preempt_disable_rt(); | |
9649 | + __raw_write_seqcount_begin(s); | |
9650 | +} | |
9651 | + | |
9652 | +static inline void __raw_write_seqcount_end(seqcount_t *s) | |
9653 | { | |
9654 | smp_wmb(); | |
9655 | s->sequence++; | |
9656 | } | |
9657 | ||
9658 | +static inline void raw_write_seqcount_end(seqcount_t *s) | |
9659 | +{ | |
9660 | + __raw_write_seqcount_end(s); | |
9661 | + preempt_enable_rt(); | |
9662 | +} | |
9663 | + | |
9664 | /** | |
9665 | * raw_write_seqcount_barrier - do a seq write barrier | |
9666 | * @s: pointer to seqcount_t | |
9667 | @@ -428,10 +438,32 @@ typedef struct { | |
9668 | /* | |
9669 | * Read side functions for starting and finalizing a read side section. | |
9670 | */ | |
9671 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9672 | static inline unsigned read_seqbegin(const seqlock_t *sl) | |
9673 | { | |
9674 | return read_seqcount_begin(&sl->seqcount); | |
9675 | } | |
9676 | +#else | |
9677 | +/* | |
9678 | + * Starvation safe read side for RT | |
9679 | + */ | |
9680 | +static inline unsigned read_seqbegin(seqlock_t *sl) | |
9681 | +{ | |
9682 | + unsigned ret; | |
9683 | + | |
9684 | +repeat: | |
9685 | + ret = ACCESS_ONCE(sl->seqcount.sequence); | |
9686 | + if (unlikely(ret & 1)) { | |
9687 | + /* | |
9688 | + * Take the lock and let the writer proceed (i.e. evtl | |
9689 | + * boost it), otherwise we could loop here forever. | |
9690 | + */ | |
9691 | + spin_unlock_wait(&sl->lock); | |
9692 | + goto repeat; | |
9693 | + } | |
9694 | + return ret; | |
9695 | +} | |
9696 | +#endif | |
9697 | ||
9698 | static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
9699 | { | |
9700 | @@ -446,36 +478,45 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
9701 | static inline void write_seqlock(seqlock_t *sl) | |
9702 | { | |
9703 | spin_lock(&sl->lock); | |
9704 | - write_seqcount_begin(&sl->seqcount); | |
9705 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9706 | +} | |
9707 | + | |
9708 | +static inline int try_write_seqlock(seqlock_t *sl) | |
9709 | +{ | |
9710 | + if (spin_trylock(&sl->lock)) { | |
9711 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9712 | + return 1; | |
9713 | + } | |
9714 | + return 0; | |
9715 | } | |
9716 | ||
9717 | static inline void write_sequnlock(seqlock_t *sl) | |
9718 | { | |
9719 | - write_seqcount_end(&sl->seqcount); | |
9720 | + __raw_write_seqcount_end(&sl->seqcount); | |
9721 | spin_unlock(&sl->lock); | |
9722 | } | |
9723 | ||
9724 | static inline void write_seqlock_bh(seqlock_t *sl) | |
9725 | { | |
9726 | spin_lock_bh(&sl->lock); | |
9727 | - write_seqcount_begin(&sl->seqcount); | |
9728 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9729 | } | |
9730 | ||
9731 | static inline void write_sequnlock_bh(seqlock_t *sl) | |
9732 | { | |
9733 | - write_seqcount_end(&sl->seqcount); | |
9734 | + __raw_write_seqcount_end(&sl->seqcount); | |
9735 | spin_unlock_bh(&sl->lock); | |
9736 | } | |
9737 | ||
9738 | static inline void write_seqlock_irq(seqlock_t *sl) | |
9739 | { | |
9740 | spin_lock_irq(&sl->lock); | |
9741 | - write_seqcount_begin(&sl->seqcount); | |
9742 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9743 | } | |
9744 | ||
9745 | static inline void write_sequnlock_irq(seqlock_t *sl) | |
9746 | { | |
9747 | - write_seqcount_end(&sl->seqcount); | |
9748 | + __raw_write_seqcount_end(&sl->seqcount); | |
9749 | spin_unlock_irq(&sl->lock); | |
9750 | } | |
9751 | ||
9752 | @@ -484,7 +525,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
9753 | unsigned long flags; | |
9754 | ||
9755 | spin_lock_irqsave(&sl->lock, flags); | |
9756 | - write_seqcount_begin(&sl->seqcount); | |
9757 | + __raw_write_seqcount_begin(&sl->seqcount); | |
9758 | return flags; | |
9759 | } | |
9760 | ||
9761 | @@ -494,7 +535,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
9762 | static inline void | |
9763 | write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | |
9764 | { | |
9765 | - write_seqcount_end(&sl->seqcount); | |
9766 | + __raw_write_seqcount_end(&sl->seqcount); | |
9767 | spin_unlock_irqrestore(&sl->lock, flags); | |
9768 | } | |
9769 | ||
9770 | diff --git a/include/linux/signal.h b/include/linux/signal.h | |
9771 | index b63f63eaa39c..295540fdfc72 100644 | |
9772 | --- a/include/linux/signal.h | |
9773 | +++ b/include/linux/signal.h | |
9774 | @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig) | |
9775 | } | |
9776 | ||
9777 | extern void flush_sigqueue(struct sigpending *queue); | |
9778 | +extern void flush_task_sigqueue(struct task_struct *tsk); | |
9779 | ||
9780 | /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ | |
9781 | static inline int valid_signal(unsigned long sig) | |
9782 | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h | |
c7c16703 | 9783 | index 32810f279f8e..0db6e31161f6 100644 |
1a6e0f06 JK |
9784 | --- a/include/linux/skbuff.h |
9785 | +++ b/include/linux/skbuff.h | |
9786 | @@ -284,6 +284,7 @@ struct sk_buff_head { | |
9787 | ||
9788 | __u32 qlen; | |
9789 | spinlock_t lock; | |
9790 | + raw_spinlock_t raw_lock; | |
9791 | }; | |
9792 | ||
9793 | struct sk_buff; | |
c7c16703 | 9794 | @@ -1573,6 +1574,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) |
1a6e0f06 JK |
9795 | __skb_queue_head_init(list); |
9796 | } | |
9797 | ||
9798 | +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) | |
9799 | +{ | |
9800 | + raw_spin_lock_init(&list->raw_lock); | |
9801 | + __skb_queue_head_init(list); | |
9802 | +} | |
9803 | + | |
9804 | static inline void skb_queue_head_init_class(struct sk_buff_head *list, | |
9805 | struct lock_class_key *class) | |
9806 | { | |
9807 | diff --git a/include/linux/smp.h b/include/linux/smp.h | |
33c7bf0f | 9808 | index 8e0cb7a0f836..891c533724f5 100644 |
1a6e0f06 JK |
9809 | --- a/include/linux/smp.h |
9810 | +++ b/include/linux/smp.h | |
33c7bf0f JK |
9811 | @@ -120,6 +120,13 @@ extern unsigned int setup_max_cpus; |
9812 | extern void __init setup_nr_cpu_ids(void); | |
9813 | extern void __init smp_init(void); | |
9814 | ||
9815 | +extern int __boot_cpu_id; | |
9816 | + | |
9817 | +static inline int get_boot_cpu_id(void) | |
9818 | +{ | |
9819 | + return __boot_cpu_id; | |
9820 | +} | |
9821 | + | |
9822 | #else /* !SMP */ | |
9823 | ||
9824 | static inline void smp_send_stop(void) { } | |
9825 | @@ -158,6 +165,11 @@ static inline void smp_init(void) { up_late_init(); } | |
9826 | static inline void smp_init(void) { } | |
9827 | #endif | |
9828 | ||
9829 | +static inline int get_boot_cpu_id(void) | |
9830 | +{ | |
9831 | + return 0; | |
9832 | +} | |
9833 | + | |
9834 | #endif /* !SMP */ | |
9835 | ||
9836 | /* | |
9837 | @@ -185,6 +197,9 @@ static inline void smp_init(void) { } | |
1a6e0f06 JK |
9838 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) |
9839 | #define put_cpu() preempt_enable() | |
9840 | ||
9841 | +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) | |
9842 | +#define put_cpu_light() migrate_enable() | |
9843 | + | |
9844 | /* | |
9845 | * Callback to arch code if there's nosmp or maxcpus=0 on the | |
9846 | * boot command line: | |
9847 | diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h | |
33c7bf0f | 9848 | index 47dd0cebd204..b241cc044bd3 100644 |
1a6e0f06 JK |
9849 | --- a/include/linux/spinlock.h |
9850 | +++ b/include/linux/spinlock.h | |
9851 | @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
9852 | #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) | |
9853 | ||
9854 | /* Include rwlock functions */ | |
9855 | -#include <linux/rwlock.h> | |
9856 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9857 | +# include <linux/rwlock_rt.h> | |
9858 | +#else | |
9859 | +# include <linux/rwlock.h> | |
9860 | +#endif | |
9861 | ||
9862 | /* | |
9863 | * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: | |
9864 | @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
9865 | # include <linux/spinlock_api_up.h> | |
9866 | #endif | |
9867 | ||
9868 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9869 | +# include <linux/spinlock_rt.h> | |
9870 | +#else /* PREEMPT_RT_FULL */ | |
9871 | + | |
9872 | /* | |
9873 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n | |
9874 | */ | |
33c7bf0f | 9875 | @@ -416,4 +424,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); |
1a6e0f06 JK |
9876 | #define atomic_dec_and_lock(atomic, lock) \ |
9877 | __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) | |
9878 | ||
9879 | +#endif /* !PREEMPT_RT_FULL */ | |
9880 | + | |
9881 | #endif /* __LINUX_SPINLOCK_H */ | |
9882 | diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h | |
9883 | index 5344268e6e62..043263f30e81 100644 | |
9884 | --- a/include/linux/spinlock_api_smp.h | |
9885 | +++ b/include/linux/spinlock_api_smp.h | |
9886 | @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) | |
9887 | return 0; | |
9888 | } | |
9889 | ||
9890 | -#include <linux/rwlock_api_smp.h> | |
9891 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9892 | +# include <linux/rwlock_api_smp.h> | |
9893 | +#endif | |
9894 | ||
9895 | #endif /* __LINUX_SPINLOCK_API_SMP_H */ | |
9896 | diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h | |
9897 | new file mode 100644 | |
33c7bf0f | 9898 | index 000000000000..43ca841b913a |
1a6e0f06 JK |
9899 | --- /dev/null |
9900 | +++ b/include/linux/spinlock_rt.h | |
33c7bf0f | 9901 | @@ -0,0 +1,162 @@ |
1a6e0f06 JK |
9902 | +#ifndef __LINUX_SPINLOCK_RT_H |
9903 | +#define __LINUX_SPINLOCK_RT_H | |
9904 | + | |
9905 | +#ifndef __LINUX_SPINLOCK_H | |
9906 | +#error Do not include directly. Use spinlock.h | |
9907 | +#endif | |
9908 | + | |
9909 | +#include <linux/bug.h> | |
9910 | + | |
9911 | +extern void | |
9912 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); | |
9913 | + | |
9914 | +#define spin_lock_init(slock) \ | |
9915 | +do { \ | |
9916 | + static struct lock_class_key __key; \ | |
9917 | + \ | |
9918 | + rt_mutex_init(&(slock)->lock); \ | |
9919 | + __rt_spin_lock_init(slock, #slock, &__key); \ | |
9920 | +} while (0) | |
9921 | + | |
9922 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock); | |
9923 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock); | |
9924 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock); | |
9925 | + | |
9926 | +extern void __lockfunc rt_spin_lock(spinlock_t *lock); | |
9927 | +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); | |
9928 | +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); | |
9929 | +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); | |
1a6e0f06 JK |
9930 | +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); |
9931 | +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); | |
9932 | +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); | |
9933 | +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); | |
9934 | +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); | |
9935 | + | |
9936 | +/* | |
9937 | + * lockdep-less calls, for derived types like rwlock: | |
9938 | + * (for trylock they can use rt_mutex_trylock() directly. | |
9939 | + */ | |
9940 | +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); | |
9941 | +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); | |
9942 | +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); | |
1a6e0f06 JK |
9943 | + |
9944 | +#define spin_lock(lock) rt_spin_lock(lock) | |
9945 | + | |
9946 | +#define spin_lock_bh(lock) \ | |
9947 | + do { \ | |
9948 | + local_bh_disable(); \ | |
9949 | + rt_spin_lock(lock); \ | |
9950 | + } while (0) | |
9951 | + | |
9952 | +#define spin_lock_irq(lock) spin_lock(lock) | |
9953 | + | |
9954 | +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) | |
9955 | + | |
9956 | +#define spin_trylock(lock) \ | |
9957 | +({ \ | |
9958 | + int __locked; \ | |
9959 | + __locked = spin_do_trylock(lock); \ | |
9960 | + __locked; \ | |
9961 | +}) | |
9962 | + | |
9963 | +#ifdef CONFIG_LOCKDEP | |
9964 | +# define spin_lock_nested(lock, subclass) \ | |
9965 | + do { \ | |
9966 | + rt_spin_lock_nested(lock, subclass); \ | |
9967 | + } while (0) | |
9968 | + | |
9969 | +#define spin_lock_bh_nested(lock, subclass) \ | |
9970 | + do { \ | |
9971 | + local_bh_disable(); \ | |
9972 | + rt_spin_lock_nested(lock, subclass); \ | |
9973 | + } while (0) | |
9974 | + | |
9975 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
9976 | + do { \ | |
9977 | + typecheck(unsigned long, flags); \ | |
9978 | + flags = 0; \ | |
9979 | + rt_spin_lock_nested(lock, subclass); \ | |
9980 | + } while (0) | |
9981 | +#else | |
9982 | +# define spin_lock_nested(lock, subclass) spin_lock(lock) | |
9983 | +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) | |
9984 | + | |
9985 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
9986 | + do { \ | |
9987 | + typecheck(unsigned long, flags); \ | |
9988 | + flags = 0; \ | |
9989 | + spin_lock(lock); \ | |
9990 | + } while (0) | |
9991 | +#endif | |
9992 | + | |
9993 | +#define spin_lock_irqsave(lock, flags) \ | |
9994 | + do { \ | |
9995 | + typecheck(unsigned long, flags); \ | |
9996 | + flags = 0; \ | |
9997 | + spin_lock(lock); \ | |
9998 | + } while (0) | |
9999 | + | |
10000 | +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) | |
10001 | +{ | |
10002 | + unsigned long flags = 0; | |
10003 | +#ifdef CONFIG_TRACE_IRQFLAGS | |
10004 | + flags = rt_spin_lock_trace_flags(lock); | |
10005 | +#else | |
10006 | + spin_lock(lock); /* lock_local */ | |
10007 | +#endif | |
10008 | + return flags; | |
10009 | +} | |
10010 | + | |
10011 | +/* FIXME: we need rt_spin_lock_nest_lock */ | |
10012 | +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) | |
10013 | + | |
10014 | +#define spin_unlock(lock) rt_spin_unlock(lock) | |
1a6e0f06 JK |
10015 | + |
10016 | +#define spin_unlock_bh(lock) \ | |
10017 | + do { \ | |
10018 | + rt_spin_unlock(lock); \ | |
10019 | + local_bh_enable(); \ | |
10020 | + } while (0) | |
10021 | + | |
10022 | +#define spin_unlock_irq(lock) spin_unlock(lock) | |
10023 | + | |
10024 | +#define spin_unlock_irqrestore(lock, flags) \ | |
10025 | + do { \ | |
10026 | + typecheck(unsigned long, flags); \ | |
10027 | + (void) flags; \ | |
10028 | + spin_unlock(lock); \ | |
10029 | + } while (0) | |
10030 | + | |
10031 | +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) | |
10032 | +#define spin_trylock_irq(lock) spin_trylock(lock) | |
10033 | + | |
10034 | +#define spin_trylock_irqsave(lock, flags) \ | |
10035 | + rt_spin_trylock_irqsave(lock, &(flags)) | |
10036 | + | |
10037 | +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) | |
10038 | + | |
10039 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
10040 | +# define spin_is_contended(lock) ((lock)->break_lock) | |
10041 | +#else | |
10042 | +# define spin_is_contended(lock) (((void)(lock), 0)) | |
10043 | +#endif | |
10044 | + | |
10045 | +static inline int spin_can_lock(spinlock_t *lock) | |
10046 | +{ | |
10047 | + return !rt_mutex_is_locked(&lock->lock); | |
10048 | +} | |
10049 | + | |
10050 | +static inline int spin_is_locked(spinlock_t *lock) | |
10051 | +{ | |
10052 | + return rt_mutex_is_locked(&lock->lock); | |
10053 | +} | |
10054 | + | |
10055 | +static inline void assert_spin_locked(spinlock_t *lock) | |
10056 | +{ | |
10057 | + BUG_ON(!spin_is_locked(lock)); | |
10058 | +} | |
10059 | + | |
10060 | +#define atomic_dec_and_lock(atomic, lock) \ | |
10061 | + atomic_dec_and_spin_lock(atomic, lock) | |
10062 | + | |
10063 | +#endif | |
10064 | diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h | |
10065 | index 73548eb13a5d..10bac715ea96 100644 | |
10066 | --- a/include/linux/spinlock_types.h | |
10067 | +++ b/include/linux/spinlock_types.h | |
10068 | @@ -9,80 +9,15 @@ | |
10069 | * Released under the General Public License (GPL). | |
10070 | */ | |
10071 | ||
10072 | -#if defined(CONFIG_SMP) | |
10073 | -# include <asm/spinlock_types.h> | |
10074 | +#include <linux/spinlock_types_raw.h> | |
10075 | + | |
10076 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
10077 | +# include <linux/spinlock_types_nort.h> | |
10078 | +# include <linux/rwlock_types.h> | |
10079 | #else | |
10080 | -# include <linux/spinlock_types_up.h> | |
10081 | +# include <linux/rtmutex.h> | |
10082 | +# include <linux/spinlock_types_rt.h> | |
10083 | +# include <linux/rwlock_types_rt.h> | |
10084 | #endif | |
10085 | ||
10086 | -#include <linux/lockdep.h> | |
10087 | - | |
10088 | -typedef struct raw_spinlock { | |
10089 | - arch_spinlock_t raw_lock; | |
10090 | -#ifdef CONFIG_GENERIC_LOCKBREAK | |
10091 | - unsigned int break_lock; | |
10092 | -#endif | |
10093 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
10094 | - unsigned int magic, owner_cpu; | |
10095 | - void *owner; | |
10096 | -#endif | |
10097 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10098 | - struct lockdep_map dep_map; | |
10099 | -#endif | |
10100 | -} raw_spinlock_t; | |
10101 | - | |
10102 | -#define SPINLOCK_MAGIC 0xdead4ead | |
10103 | - | |
10104 | -#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
10105 | - | |
10106 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10107 | -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
10108 | -#else | |
10109 | -# define SPIN_DEP_MAP_INIT(lockname) | |
10110 | -#endif | |
10111 | - | |
10112 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
10113 | -# define SPIN_DEBUG_INIT(lockname) \ | |
10114 | - .magic = SPINLOCK_MAGIC, \ | |
10115 | - .owner_cpu = -1, \ | |
10116 | - .owner = SPINLOCK_OWNER_INIT, | |
10117 | -#else | |
10118 | -# define SPIN_DEBUG_INIT(lockname) | |
10119 | -#endif | |
10120 | - | |
10121 | -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
10122 | - { \ | |
10123 | - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
10124 | - SPIN_DEBUG_INIT(lockname) \ | |
10125 | - SPIN_DEP_MAP_INIT(lockname) } | |
10126 | - | |
10127 | -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
10128 | - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
10129 | - | |
10130 | -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
10131 | - | |
10132 | -typedef struct spinlock { | |
10133 | - union { | |
10134 | - struct raw_spinlock rlock; | |
10135 | - | |
10136 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10137 | -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
10138 | - struct { | |
10139 | - u8 __padding[LOCK_PADSIZE]; | |
10140 | - struct lockdep_map dep_map; | |
10141 | - }; | |
10142 | -#endif | |
10143 | - }; | |
10144 | -} spinlock_t; | |
10145 | - | |
10146 | -#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
10147 | - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
10148 | - | |
10149 | -#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
10150 | - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
10151 | - | |
10152 | -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
10153 | - | |
10154 | -#include <linux/rwlock_types.h> | |
10155 | - | |
10156 | #endif /* __LINUX_SPINLOCK_TYPES_H */ | |
10157 | diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h | |
10158 | new file mode 100644 | |
10159 | index 000000000000..f1dac1fb1d6a | |
10160 | --- /dev/null | |
10161 | +++ b/include/linux/spinlock_types_nort.h | |
10162 | @@ -0,0 +1,33 @@ | |
10163 | +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H | |
10164 | +#define __LINUX_SPINLOCK_TYPES_NORT_H | |
10165 | + | |
10166 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
10167 | +#error "Do not include directly. Include spinlock_types.h instead" | |
10168 | +#endif | |
10169 | + | |
10170 | +/* | |
10171 | + * The non RT version maps spinlocks to raw_spinlocks | |
10172 | + */ | |
10173 | +typedef struct spinlock { | |
10174 | + union { | |
10175 | + struct raw_spinlock rlock; | |
10176 | + | |
10177 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10178 | +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
10179 | + struct { | |
10180 | + u8 __padding[LOCK_PADSIZE]; | |
10181 | + struct lockdep_map dep_map; | |
10182 | + }; | |
10183 | +#endif | |
10184 | + }; | |
10185 | +} spinlock_t; | |
10186 | + | |
10187 | +#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
10188 | + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
10189 | + | |
10190 | +#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
10191 | + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
10192 | + | |
10193 | +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
10194 | + | |
10195 | +#endif | |
10196 | diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h | |
10197 | new file mode 100644 | |
10198 | index 000000000000..edffc4d53fc9 | |
10199 | --- /dev/null | |
10200 | +++ b/include/linux/spinlock_types_raw.h | |
10201 | @@ -0,0 +1,56 @@ | |
10202 | +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H | |
10203 | +#define __LINUX_SPINLOCK_TYPES_RAW_H | |
10204 | + | |
10205 | +#if defined(CONFIG_SMP) | |
10206 | +# include <asm/spinlock_types.h> | |
10207 | +#else | |
10208 | +# include <linux/spinlock_types_up.h> | |
10209 | +#endif | |
10210 | + | |
10211 | +#include <linux/lockdep.h> | |
10212 | + | |
10213 | +typedef struct raw_spinlock { | |
10214 | + arch_spinlock_t raw_lock; | |
10215 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
10216 | + unsigned int break_lock; | |
10217 | +#endif | |
10218 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
10219 | + unsigned int magic, owner_cpu; | |
10220 | + void *owner; | |
10221 | +#endif | |
10222 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10223 | + struct lockdep_map dep_map; | |
10224 | +#endif | |
10225 | +} raw_spinlock_t; | |
10226 | + | |
10227 | +#define SPINLOCK_MAGIC 0xdead4ead | |
10228 | + | |
10229 | +#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
10230 | + | |
10231 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10232 | +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
10233 | +#else | |
10234 | +# define SPIN_DEP_MAP_INIT(lockname) | |
10235 | +#endif | |
10236 | + | |
10237 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
10238 | +# define SPIN_DEBUG_INIT(lockname) \ | |
10239 | + .magic = SPINLOCK_MAGIC, \ | |
10240 | + .owner_cpu = -1, \ | |
10241 | + .owner = SPINLOCK_OWNER_INIT, | |
10242 | +#else | |
10243 | +# define SPIN_DEBUG_INIT(lockname) | |
10244 | +#endif | |
10245 | + | |
10246 | +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
10247 | + { \ | |
10248 | + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
10249 | + SPIN_DEBUG_INIT(lockname) \ | |
10250 | + SPIN_DEP_MAP_INIT(lockname) } | |
10251 | + | |
10252 | +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
10253 | + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
10254 | + | |
10255 | +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
10256 | + | |
10257 | +#endif | |
10258 | diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h | |
10259 | new file mode 100644 | |
10260 | index 000000000000..3e3d8c5f7a9a | |
10261 | --- /dev/null | |
10262 | +++ b/include/linux/spinlock_types_rt.h | |
10263 | @@ -0,0 +1,48 @@ | |
10264 | +#ifndef __LINUX_SPINLOCK_TYPES_RT_H | |
10265 | +#define __LINUX_SPINLOCK_TYPES_RT_H | |
10266 | + | |
10267 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
10268 | +#error "Do not include directly. Include spinlock_types.h instead" | |
10269 | +#endif | |
10270 | + | |
10271 | +#include <linux/cache.h> | |
10272 | + | |
10273 | +/* | |
10274 | + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: | |
10275 | + */ | |
10276 | +typedef struct spinlock { | |
10277 | + struct rt_mutex lock; | |
10278 | + unsigned int break_lock; | |
10279 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10280 | + struct lockdep_map dep_map; | |
10281 | +#endif | |
10282 | +} spinlock_t; | |
10283 | + | |
10284 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
10285 | +# define __RT_SPIN_INITIALIZER(name) \ | |
10286 | + { \ | |
10287 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
10288 | + .save_state = 1, \ | |
10289 | + .file = __FILE__, \ | |
10290 | + .line = __LINE__ , \ | |
10291 | + } | |
10292 | +#else | |
10293 | +# define __RT_SPIN_INITIALIZER(name) \ | |
10294 | + { \ | |
10295 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
10296 | + .save_state = 1, \ | |
10297 | + } | |
10298 | +#endif | |
10299 | + | |
10300 | +/* | |
10301 | +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) | |
10302 | +*/ | |
10303 | + | |
10304 | +#define __SPIN_LOCK_UNLOCKED(name) \ | |
10305 | + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ | |
10306 | + SPIN_DEP_MAP_INIT(name) } | |
10307 | + | |
10308 | +#define DEFINE_SPINLOCK(name) \ | |
10309 | + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) | |
10310 | + | |
10311 | +#endif | |
10312 | diff --git a/include/linux/srcu.h b/include/linux/srcu.h | |
10313 | index dc8eb63c6568..e793d3a257da 100644 | |
10314 | --- a/include/linux/srcu.h | |
10315 | +++ b/include/linux/srcu.h | |
10316 | @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp); | |
10317 | ||
10318 | void process_srcu(struct work_struct *work); | |
10319 | ||
10320 | -#define __SRCU_STRUCT_INIT(name) \ | |
10321 | +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ | |
10322 | { \ | |
10323 | .completed = -300, \ | |
10324 | - .per_cpu_ref = &name##_srcu_array, \ | |
10325 | + .per_cpu_ref = &pcpu_name, \ | |
10326 | .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ | |
10327 | .running = false, \ | |
10328 | .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ | |
10329 | @@ -119,7 +119,7 @@ void process_srcu(struct work_struct *work); | |
10330 | */ | |
10331 | #define __DEFINE_SRCU(name, is_static) \ | |
10332 | static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ | |
10333 | - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
10334 | + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array) | |
10335 | #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) | |
10336 | #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) | |
10337 | ||
10338 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
c7c16703 | 10339 | index d9718378a8be..e81e6dc7dcb1 100644 |
1a6e0f06 JK |
10340 | --- a/include/linux/suspend.h |
10341 | +++ b/include/linux/suspend.h | |
10342 | @@ -193,6 +193,12 @@ struct platform_freeze_ops { | |
10343 | void (*end)(void); | |
10344 | }; | |
10345 | ||
10346 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | |
10347 | +extern bool pm_in_action; | |
10348 | +#else | |
10349 | +# define pm_in_action false | |
10350 | +#endif | |
10351 | + | |
10352 | #ifdef CONFIG_SUSPEND | |
10353 | /** | |
10354 | * suspend_set_ops - set platform dependent suspend operations | |
10355 | diff --git a/include/linux/swait.h b/include/linux/swait.h | |
10356 | index c1f9c62a8a50..83f004a72320 100644 | |
10357 | --- a/include/linux/swait.h | |
10358 | +++ b/include/linux/swait.h | |
10359 | @@ -87,6 +87,7 @@ static inline int swait_active(struct swait_queue_head *q) | |
10360 | extern void swake_up(struct swait_queue_head *q); | |
10361 | extern void swake_up_all(struct swait_queue_head *q); | |
10362 | extern void swake_up_locked(struct swait_queue_head *q); | |
10363 | +extern void swake_up_all_locked(struct swait_queue_head *q); | |
10364 | ||
10365 | extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); | |
10366 | extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); | |
10367 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
1f39f580 | 10368 | index 55ff5593c193..52bf5477dc92 100644 |
1a6e0f06 JK |
10369 | --- a/include/linux/swap.h |
10370 | +++ b/include/linux/swap.h | |
10371 | @@ -11,6 +11,7 @@ | |
10372 | #include <linux/fs.h> | |
10373 | #include <linux/atomic.h> | |
10374 | #include <linux/page-flags.h> | |
10375 | +#include <linux/locallock.h> | |
10376 | #include <asm/page.h> | |
10377 | ||
10378 | struct notifier_block; | |
1f39f580 | 10379 | @@ -247,7 +248,8 @@ struct swap_info_struct { |
1a6e0f06 JK |
10380 | void *workingset_eviction(struct address_space *mapping, struct page *page); |
10381 | bool workingset_refault(void *shadow); | |
10382 | void workingset_activation(struct page *page); | |
10383 | -extern struct list_lru workingset_shadow_nodes; | |
10384 | +extern struct list_lru __workingset_shadow_nodes; | |
10385 | +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
10386 | ||
10387 | static inline unsigned int workingset_node_pages(struct radix_tree_node *node) | |
10388 | { | |
1f39f580 | 10389 | @@ -292,6 +294,7 @@ extern unsigned long nr_free_pagecache_pages(void); |
1a6e0f06 JK |
10390 | |
10391 | ||
10392 | /* linux/mm/swap.c */ | |
10393 | +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); | |
10394 | extern void lru_cache_add(struct page *); | |
10395 | extern void lru_cache_add_anon(struct page *page); | |
10396 | extern void lru_cache_add_file(struct page *page); | |
10397 | diff --git a/include/linux/swork.h b/include/linux/swork.h | |
10398 | new file mode 100644 | |
10399 | index 000000000000..f175fa9a6016 | |
10400 | --- /dev/null | |
10401 | +++ b/include/linux/swork.h | |
10402 | @@ -0,0 +1,24 @@ | |
10403 | +#ifndef _LINUX_SWORK_H | |
10404 | +#define _LINUX_SWORK_H | |
10405 | + | |
10406 | +#include <linux/list.h> | |
10407 | + | |
10408 | +struct swork_event { | |
10409 | + struct list_head item; | |
10410 | + unsigned long flags; | |
10411 | + void (*func)(struct swork_event *); | |
10412 | +}; | |
10413 | + | |
10414 | +static inline void INIT_SWORK(struct swork_event *event, | |
10415 | + void (*func)(struct swork_event *)) | |
10416 | +{ | |
10417 | + event->flags = 0; | |
10418 | + event->func = func; | |
10419 | +} | |
10420 | + | |
10421 | +bool swork_queue(struct swork_event *sev); | |
10422 | + | |
10423 | +int swork_get(void); | |
10424 | +void swork_put(void); | |
10425 | + | |
10426 | +#endif /* _LINUX_SWORK_H */ | |
10427 | diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h | |
c7c16703 | 10428 | index 2873baf5372a..eb1a108f17ca 100644 |
1a6e0f06 JK |
10429 | --- a/include/linux/thread_info.h |
10430 | +++ b/include/linux/thread_info.h | |
c7c16703 | 10431 | @@ -107,7 +107,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) |
1a6e0f06 JK |
10432 | #define test_thread_flag(flag) \ |
10433 | test_ti_thread_flag(current_thread_info(), flag) | |
10434 | ||
10435 | -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
10436 | +#ifdef CONFIG_PREEMPT_LAZY | |
10437 | +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ | |
10438 | + test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
10439 | +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) | |
10440 | +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
10441 | + | |
10442 | +#else | |
10443 | +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
10444 | +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) | |
10445 | +#define tif_need_resched_lazy() 0 | |
10446 | +#endif | |
10447 | ||
10448 | #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES | |
10449 | static inline int arch_within_stack_frames(const void * const stack, | |
10450 | diff --git a/include/linux/timer.h b/include/linux/timer.h | |
10451 | index 51d601f192d4..83cea629efe1 100644 | |
10452 | --- a/include/linux/timer.h | |
10453 | +++ b/include/linux/timer.h | |
10454 | @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer); | |
10455 | ||
10456 | extern int try_to_del_timer_sync(struct timer_list *timer); | |
10457 | ||
10458 | -#ifdef CONFIG_SMP | |
10459 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
10460 | extern int del_timer_sync(struct timer_list *timer); | |
10461 | #else | |
10462 | # define del_timer_sync(t) del_timer(t) | |
10463 | diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h | |
10464 | index be007610ceb0..15154b13a53b 100644 | |
10465 | --- a/include/linux/trace_events.h | |
10466 | +++ b/include/linux/trace_events.h | |
10467 | @@ -56,6 +56,9 @@ struct trace_entry { | |
10468 | unsigned char flags; | |
10469 | unsigned char preempt_count; | |
10470 | int pid; | |
10471 | + unsigned short migrate_disable; | |
10472 | + unsigned short padding; | |
10473 | + unsigned char preempt_lazy_count; | |
10474 | }; | |
10475 | ||
10476 | #define TRACE_EVENT_TYPE_MAX \ | |
10477 | diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h | |
10478 | index f30c187ed785..83bf0f798426 100644 | |
10479 | --- a/include/linux/uaccess.h | |
10480 | +++ b/include/linux/uaccess.h | |
10481 | @@ -24,6 +24,7 @@ static __always_inline void pagefault_disabled_dec(void) | |
10482 | */ | |
10483 | static inline void pagefault_disable(void) | |
10484 | { | |
10485 | + migrate_disable(); | |
10486 | pagefault_disabled_inc(); | |
10487 | /* | |
10488 | * make sure to have issued the store before a pagefault | |
10489 | @@ -40,6 +41,7 @@ static inline void pagefault_enable(void) | |
10490 | */ | |
10491 | barrier(); | |
10492 | pagefault_disabled_dec(); | |
10493 | + migrate_enable(); | |
10494 | } | |
10495 | ||
10496 | /* | |
10497 | diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h | |
10498 | index 4a29c75b146e..0a294e950df8 100644 | |
10499 | --- a/include/linux/uprobes.h | |
10500 | +++ b/include/linux/uprobes.h | |
10501 | @@ -27,6 +27,7 @@ | |
10502 | #include <linux/errno.h> | |
10503 | #include <linux/rbtree.h> | |
10504 | #include <linux/types.h> | |
10505 | +#include <linux/wait.h> | |
10506 | ||
10507 | struct vm_area_struct; | |
10508 | struct mm_struct; | |
10509 | diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h | |
10510 | index 613771909b6e..e28c5a43229d 100644 | |
10511 | --- a/include/linux/vmstat.h | |
10512 | +++ b/include/linux/vmstat.h | |
10513 | @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); | |
10514 | */ | |
10515 | static inline void __count_vm_event(enum vm_event_item item) | |
10516 | { | |
10517 | + preempt_disable_rt(); | |
10518 | raw_cpu_inc(vm_event_states.event[item]); | |
10519 | + preempt_enable_rt(); | |
10520 | } | |
10521 | ||
10522 | static inline void count_vm_event(enum vm_event_item item) | |
10523 | @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item) | |
10524 | ||
10525 | static inline void __count_vm_events(enum vm_event_item item, long delta) | |
10526 | { | |
10527 | + preempt_disable_rt(); | |
10528 | raw_cpu_add(vm_event_states.event[item], delta); | |
10529 | + preempt_enable_rt(); | |
10530 | } | |
10531 | ||
10532 | static inline void count_vm_events(enum vm_event_item item, long delta) | |
10533 | diff --git a/include/linux/wait.h b/include/linux/wait.h | |
c7c16703 | 10534 | index 2408e8d5c05c..db50d6609195 100644 |
1a6e0f06 JK |
10535 | --- a/include/linux/wait.h |
10536 | +++ b/include/linux/wait.h | |
10537 | @@ -8,6 +8,7 @@ | |
10538 | #include <linux/spinlock.h> | |
10539 | #include <asm/current.h> | |
10540 | #include <uapi/linux/wait.h> | |
10541 | +#include <linux/atomic.h> | |
10542 | ||
10543 | typedef struct __wait_queue wait_queue_t; | |
10544 | typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); | |
10545 | diff --git a/include/net/dst.h b/include/net/dst.h | |
10546 | index 6835d224d47b..55a5a9698f14 100644 | |
10547 | --- a/include/net/dst.h | |
10548 | +++ b/include/net/dst.h | |
10549 | @@ -446,7 +446,7 @@ static inline void dst_confirm(struct dst_entry *dst) | |
10550 | static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, | |
10551 | struct sk_buff *skb) | |
10552 | { | |
10553 | - const struct hh_cache *hh; | |
10554 | + struct hh_cache *hh; | |
10555 | ||
10556 | if (dst->pending_confirm) { | |
10557 | unsigned long now = jiffies; | |
10558 | diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h | |
10559 | index 231e121cc7d9..d125222b979d 100644 | |
10560 | --- a/include/net/gen_stats.h | |
10561 | +++ b/include/net/gen_stats.h | |
10562 | @@ -5,6 +5,7 @@ | |
10563 | #include <linux/socket.h> | |
10564 | #include <linux/rtnetlink.h> | |
10565 | #include <linux/pkt_sched.h> | |
10566 | +#include <net/net_seq_lock.h> | |
10567 | ||
10568 | struct gnet_stats_basic_cpu { | |
10569 | struct gnet_stats_basic_packed bstats; | |
10570 | @@ -33,11 +34,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, | |
10571 | spinlock_t *lock, struct gnet_dump *d, | |
10572 | int padattr); | |
10573 | ||
10574 | -int gnet_stats_copy_basic(const seqcount_t *running, | |
10575 | +int gnet_stats_copy_basic(net_seqlock_t *running, | |
10576 | struct gnet_dump *d, | |
10577 | struct gnet_stats_basic_cpu __percpu *cpu, | |
10578 | struct gnet_stats_basic_packed *b); | |
10579 | -void __gnet_stats_copy_basic(const seqcount_t *running, | |
10580 | +void __gnet_stats_copy_basic(net_seqlock_t *running, | |
10581 | struct gnet_stats_basic_packed *bstats, | |
10582 | struct gnet_stats_basic_cpu __percpu *cpu, | |
10583 | struct gnet_stats_basic_packed *b); | |
10584 | @@ -55,14 +56,14 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
10585 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
10586 | struct gnet_stats_rate_est64 *rate_est, | |
10587 | spinlock_t *stats_lock, | |
10588 | - seqcount_t *running, struct nlattr *opt); | |
10589 | + net_seqlock_t *running, struct nlattr *opt); | |
10590 | void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, | |
10591 | struct gnet_stats_rate_est64 *rate_est); | |
10592 | int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
10593 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
10594 | struct gnet_stats_rate_est64 *rate_est, | |
10595 | spinlock_t *stats_lock, | |
10596 | - seqcount_t *running, struct nlattr *opt); | |
10597 | + net_seqlock_t *running, struct nlattr *opt); | |
10598 | bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, | |
10599 | const struct gnet_stats_rate_est64 *rate_est); | |
10600 | #endif | |
10601 | diff --git a/include/net/neighbour.h b/include/net/neighbour.h | |
10602 | index 8b683841e574..bf656008f6e7 100644 | |
10603 | --- a/include/net/neighbour.h | |
10604 | +++ b/include/net/neighbour.h | |
10605 | @@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) | |
10606 | } | |
10607 | #endif | |
10608 | ||
10609 | -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) | |
10610 | +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) | |
10611 | { | |
10612 | unsigned int seq; | |
10613 | int hh_len; | |
10614 | @@ -501,7 +501,7 @@ struct neighbour_cb { | |
10615 | ||
10616 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | |
10617 | ||
10618 | -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, | |
10619 | +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n, | |
10620 | const struct net_device *dev) | |
10621 | { | |
10622 | unsigned int seq; | |
10623 | diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h | |
10624 | new file mode 100644 | |
10625 | index 000000000000..a7034298a82a | |
10626 | --- /dev/null | |
10627 | +++ b/include/net/net_seq_lock.h | |
10628 | @@ -0,0 +1,15 @@ | |
10629 | +#ifndef __NET_NET_SEQ_LOCK_H__ | |
10630 | +#define __NET_NET_SEQ_LOCK_H__ | |
10631 | + | |
10632 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10633 | +# define net_seqlock_t seqlock_t | |
10634 | +# define net_seq_begin(__r) read_seqbegin(__r) | |
10635 | +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) | |
10636 | + | |
10637 | +#else | |
10638 | +# define net_seqlock_t seqcount_t | |
10639 | +# define net_seq_begin(__r) read_seqcount_begin(__r) | |
10640 | +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s) | |
10641 | +#endif | |
10642 | + | |
10643 | +#endif | |
10644 | diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h | |
c7c16703 | 10645 | index 7adf4386ac8f..d3fd5c357268 100644 |
1a6e0f06 JK |
10646 | --- a/include/net/netns/ipv4.h |
10647 | +++ b/include/net/netns/ipv4.h | |
c7c16703 | 10648 | @@ -69,6 +69,7 @@ struct netns_ipv4 { |
1a6e0f06 JK |
10649 | |
10650 | int sysctl_icmp_echo_ignore_all; | |
10651 | int sysctl_icmp_echo_ignore_broadcasts; | |
10652 | + int sysctl_icmp_echo_sysrq; | |
10653 | int sysctl_icmp_ignore_bogus_error_responses; | |
10654 | int sysctl_icmp_ratelimit; | |
10655 | int sysctl_icmp_ratemask; | |
10656 | diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h | |
c7c16703 | 10657 | index e6aa0a249672..b57736f2a8a3 100644 |
1a6e0f06 JK |
10658 | --- a/include/net/sch_generic.h |
10659 | +++ b/include/net/sch_generic.h | |
10660 | @@ -10,6 +10,7 @@ | |
10661 | #include <linux/dynamic_queue_limits.h> | |
10662 | #include <net/gen_stats.h> | |
10663 | #include <net/rtnetlink.h> | |
10664 | +#include <net/net_seq_lock.h> | |
10665 | ||
10666 | struct Qdisc_ops; | |
10667 | struct qdisc_walker; | |
c7c16703 | 10668 | @@ -86,7 +87,7 @@ struct Qdisc { |
1a6e0f06 | 10669 | struct sk_buff *gso_skb ____cacheline_aligned_in_smp; |
c7c16703 | 10670 | struct qdisc_skb_head q; |
1a6e0f06 JK |
10671 | struct gnet_stats_basic_packed bstats; |
10672 | - seqcount_t running; | |
10673 | + net_seqlock_t running; | |
10674 | struct gnet_stats_queue qstats; | |
10675 | unsigned long state; | |
10676 | struct Qdisc *next_sched; | |
c7c16703 | 10677 | @@ -98,13 +99,22 @@ struct Qdisc { |
1a6e0f06 JK |
10678 | spinlock_t busylock ____cacheline_aligned_in_smp; |
10679 | }; | |
10680 | ||
10681 | -static inline bool qdisc_is_running(const struct Qdisc *qdisc) | |
10682 | +static inline bool qdisc_is_running(struct Qdisc *qdisc) | |
10683 | { | |
10684 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10685 | + return spin_is_locked(&qdisc->running.lock) ? true : false; | |
10686 | +#else | |
10687 | return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; | |
10688 | +#endif | |
10689 | } | |
10690 | ||
10691 | static inline bool qdisc_run_begin(struct Qdisc *qdisc) | |
10692 | { | |
10693 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10694 | + if (try_write_seqlock(&qdisc->running)) | |
10695 | + return true; | |
10696 | + return false; | |
10697 | +#else | |
10698 | if (qdisc_is_running(qdisc)) | |
10699 | return false; | |
10700 | /* Variant of write_seqcount_begin() telling lockdep a trylock | |
c7c16703 | 10701 | @@ -113,11 +123,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) |
1a6e0f06 JK |
10702 | raw_write_seqcount_begin(&qdisc->running); |
10703 | seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); | |
10704 | return true; | |
10705 | +#endif | |
10706 | } | |
10707 | ||
10708 | static inline void qdisc_run_end(struct Qdisc *qdisc) | |
10709 | { | |
10710 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10711 | + write_sequnlock(&qdisc->running); | |
10712 | +#else | |
10713 | write_seqcount_end(&qdisc->running); | |
10714 | +#endif | |
10715 | } | |
10716 | ||
10717 | static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) | |
c7c16703 | 10718 | @@ -308,7 +323,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) |
1a6e0f06 JK |
10719 | return qdisc_lock(root); |
10720 | } | |
10721 | ||
10722 | -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
10723 | +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
10724 | { | |
10725 | struct Qdisc *root = qdisc_root_sleeping(qdisc); | |
10726 | ||
10727 | diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h | |
10728 | new file mode 100644 | |
10729 | index 000000000000..f7710de1b1f3 | |
10730 | --- /dev/null | |
10731 | +++ b/include/trace/events/hist.h | |
10732 | @@ -0,0 +1,73 @@ | |
10733 | +#undef TRACE_SYSTEM | |
10734 | +#define TRACE_SYSTEM hist | |
10735 | + | |
10736 | +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ) | |
10737 | +#define _TRACE_HIST_H | |
10738 | + | |
10739 | +#include "latency_hist.h" | |
10740 | +#include <linux/tracepoint.h> | |
10741 | + | |
10742 | +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST) | |
10743 | +#define trace_preemptirqsoff_hist(a, b) | |
10744 | +#define trace_preemptirqsoff_hist_rcuidle(a, b) | |
10745 | +#else | |
10746 | +TRACE_EVENT(preemptirqsoff_hist, | |
10747 | + | |
10748 | + TP_PROTO(int reason, int starthist), | |
10749 | + | |
10750 | + TP_ARGS(reason, starthist), | |
10751 | + | |
10752 | + TP_STRUCT__entry( | |
10753 | + __field(int, reason) | |
10754 | + __field(int, starthist) | |
10755 | + ), | |
10756 | + | |
10757 | + TP_fast_assign( | |
10758 | + __entry->reason = reason; | |
10759 | + __entry->starthist = starthist; | |
10760 | + ), | |
10761 | + | |
10762 | + TP_printk("reason=%s starthist=%s", getaction(__entry->reason), | |
10763 | + __entry->starthist ? "start" : "stop") | |
10764 | +); | |
10765 | +#endif | |
10766 | + | |
10767 | +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
10768 | +#define trace_hrtimer_interrupt(a, b, c, d) | |
10769 | +#else | |
10770 | +TRACE_EVENT(hrtimer_interrupt, | |
10771 | + | |
10772 | + TP_PROTO(int cpu, long long offset, struct task_struct *curr, | |
10773 | + struct task_struct *task), | |
10774 | + | |
10775 | + TP_ARGS(cpu, offset, curr, task), | |
10776 | + | |
10777 | + TP_STRUCT__entry( | |
10778 | + __field(int, cpu) | |
10779 | + __field(long long, offset) | |
10780 | + __array(char, ccomm, TASK_COMM_LEN) | |
10781 | + __field(int, cprio) | |
10782 | + __array(char, tcomm, TASK_COMM_LEN) | |
10783 | + __field(int, tprio) | |
10784 | + ), | |
10785 | + | |
10786 | + TP_fast_assign( | |
10787 | + __entry->cpu = cpu; | |
10788 | + __entry->offset = offset; | |
10789 | + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN); | |
10790 | + __entry->cprio = curr->prio; | |
10791 | + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", | |
10792 | + task != NULL ? TASK_COMM_LEN : 7); | |
10793 | + __entry->tprio = task != NULL ? task->prio : -1; | |
10794 | + ), | |
10795 | + | |
10796 | + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]", | |
10797 | + __entry->cpu, __entry->offset, __entry->ccomm, | |
10798 | + __entry->cprio, __entry->tcomm, __entry->tprio) | |
10799 | +); | |
10800 | +#endif | |
10801 | + | |
10802 | +#endif /* _TRACE_HIST_H */ | |
10803 | + | |
10804 | +/* This part must be outside protection */ | |
10805 | +#include <trace/define_trace.h> | |
10806 | diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h | |
10807 | new file mode 100644 | |
10808 | index 000000000000..d3f2fbd560b1 | |
10809 | --- /dev/null | |
10810 | +++ b/include/trace/events/latency_hist.h | |
10811 | @@ -0,0 +1,29 @@ | |
10812 | +#ifndef _LATENCY_HIST_H | |
10813 | +#define _LATENCY_HIST_H | |
10814 | + | |
10815 | +enum hist_action { | |
10816 | + IRQS_ON, | |
10817 | + PREEMPT_ON, | |
10818 | + TRACE_STOP, | |
10819 | + IRQS_OFF, | |
10820 | + PREEMPT_OFF, | |
10821 | + TRACE_START, | |
10822 | +}; | |
10823 | + | |
10824 | +static char *actions[] = { | |
10825 | + "IRQS_ON", | |
10826 | + "PREEMPT_ON", | |
10827 | + "TRACE_STOP", | |
10828 | + "IRQS_OFF", | |
10829 | + "PREEMPT_OFF", | |
10830 | + "TRACE_START", | |
10831 | +}; | |
10832 | + | |
10833 | +static inline char *getaction(int action) | |
10834 | +{ | |
10835 | + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0])) | |
10836 | + return actions[action]; | |
10837 | + return "unknown"; | |
10838 | +} | |
10839 | + | |
10840 | +#endif /* _LATENCY_HIST_H */ | |
7c18450a JK |
10841 | diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h |
10842 | index 9b90c57517a9..516ae88cddf4 100644 | |
10843 | --- a/include/trace/events/sched.h | |
10844 | +++ b/include/trace/events/sched.h | |
10845 | @@ -70,7 +70,7 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, | |
10846 | TP_fast_assign( | |
10847 | memcpy(__entry->comm, p->comm, TASK_COMM_LEN); | |
10848 | __entry->pid = p->pid; | |
10849 | - __entry->prio = p->prio; | |
10850 | + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ | |
10851 | __entry->success = 1; /* rudiment, kill when possible */ | |
10852 | __entry->target_cpu = task_cpu(p); | |
10853 | ), | |
10854 | @@ -147,6 +147,7 @@ TRACE_EVENT(sched_switch, | |
10855 | memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); | |
10856 | __entry->next_pid = next->pid; | |
10857 | __entry->next_prio = next->prio; | |
10858 | + /* XXX SCHED_DEADLINE */ | |
10859 | ), | |
10860 | ||
10861 | TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", | |
10862 | @@ -181,7 +182,7 @@ TRACE_EVENT(sched_migrate_task, | |
10863 | TP_fast_assign( | |
10864 | memcpy(__entry->comm, p->comm, TASK_COMM_LEN); | |
10865 | __entry->pid = p->pid; | |
10866 | - __entry->prio = p->prio; | |
10867 | + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ | |
10868 | __entry->orig_cpu = task_cpu(p); | |
10869 | __entry->dest_cpu = dest_cpu; | |
10870 | ), | |
10871 | @@ -206,7 +207,7 @@ DECLARE_EVENT_CLASS(sched_process_template, | |
10872 | TP_fast_assign( | |
10873 | memcpy(__entry->comm, p->comm, TASK_COMM_LEN); | |
10874 | __entry->pid = p->pid; | |
10875 | - __entry->prio = p->prio; | |
10876 | + __entry->prio = p->prio; /* XXX SCHED_DEADLINE */ | |
10877 | ), | |
10878 | ||
10879 | TP_printk("comm=%s pid=%d prio=%d", | |
10880 | @@ -253,7 +254,7 @@ TRACE_EVENT(sched_process_wait, | |
10881 | TP_fast_assign( | |
10882 | memcpy(__entry->comm, current->comm, TASK_COMM_LEN); | |
10883 | __entry->pid = pid_nr(pid); | |
10884 | - __entry->prio = current->prio; | |
10885 | + __entry->prio = current->prio; /* XXX SCHED_DEADLINE */ | |
10886 | ), | |
10887 | ||
10888 | TP_printk("comm=%s pid=%d prio=%d", | |
10889 | @@ -413,9 +414,9 @@ DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime, | |
10890 | */ | |
10891 | TRACE_EVENT(sched_pi_setprio, | |
10892 | ||
10893 | - TP_PROTO(struct task_struct *tsk, int newprio), | |
10894 | + TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task), | |
10895 | ||
10896 | - TP_ARGS(tsk, newprio), | |
10897 | + TP_ARGS(tsk, pi_task), | |
10898 | ||
10899 | TP_STRUCT__entry( | |
10900 | __array( char, comm, TASK_COMM_LEN ) | |
10901 | @@ -428,7 +429,8 @@ TRACE_EVENT(sched_pi_setprio, | |
10902 | memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); | |
10903 | __entry->pid = tsk->pid; | |
10904 | __entry->oldprio = tsk->prio; | |
10905 | - __entry->newprio = newprio; | |
10906 | + __entry->newprio = pi_task ? pi_task->prio : tsk->prio; | |
10907 | + /* XXX SCHED_DEADLINE bits missing */ | |
10908 | ), | |
10909 | ||
10910 | TP_printk("comm=%s pid=%d oldprio=%d newprio=%d", | |
1a6e0f06 | 10911 | diff --git a/init/Kconfig b/init/Kconfig |
c7c16703 | 10912 | index 34407f15e6d3..2ce33a32e65d 100644 |
1a6e0f06 JK |
10913 | --- a/init/Kconfig |
10914 | +++ b/init/Kconfig | |
c7c16703 | 10915 | @@ -506,7 +506,7 @@ config TINY_RCU |
1a6e0f06 JK |
10916 | |
10917 | config RCU_EXPERT | |
10918 | bool "Make expert-level adjustments to RCU configuration" | |
10919 | - default n | |
10920 | + default y if PREEMPT_RT_FULL | |
10921 | help | |
10922 | This option needs to be enabled if you wish to make | |
10923 | expert-level adjustments to RCU configuration. By default, | |
c7c16703 | 10924 | @@ -623,7 +623,7 @@ config RCU_FANOUT_LEAF |
1a6e0f06 JK |
10925 | |
10926 | config RCU_FAST_NO_HZ | |
10927 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | |
10928 | - depends on NO_HZ_COMMON && SMP && RCU_EXPERT | |
10929 | + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL | |
10930 | default n | |
10931 | help | |
10932 | This option permits CPUs to enter dynticks-idle state even if | |
c7c16703 | 10933 | @@ -650,7 +650,7 @@ config TREE_RCU_TRACE |
1a6e0f06 JK |
10934 | config RCU_BOOST |
10935 | bool "Enable RCU priority boosting" | |
10936 | depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT | |
10937 | - default n | |
10938 | + default y if PREEMPT_RT_FULL | |
10939 | help | |
10940 | This option boosts the priority of preempted RCU readers that | |
10941 | block the current preemptible RCU grace period for too long. | |
c7c16703 JK |
10942 | @@ -781,19 +781,6 @@ config RCU_NOCB_CPU_ALL |
10943 | ||
10944 | endchoice | |
10945 | ||
10946 | -config RCU_EXPEDITE_BOOT | |
10947 | - bool | |
10948 | - default n | |
10949 | - help | |
10950 | - This option enables expedited grace periods at boot time, | |
10951 | - as if rcu_expedite_gp() had been invoked early in boot. | |
10952 | - The corresponding rcu_unexpedite_gp() is invoked from | |
10953 | - rcu_end_inkernel_boot(), which is intended to be invoked | |
10954 | - at the end of the kernel-only boot sequence, just before | |
10955 | - init is exec'ed. | |
10956 | - | |
10957 | - Accept the default if unsure. | |
10958 | - | |
10959 | endmenu # "RCU Subsystem" | |
10960 | ||
10961 | config BUILD_BIN2C | |
10962 | @@ -1064,6 +1051,7 @@ config CFS_BANDWIDTH | |
1a6e0f06 JK |
10963 | config RT_GROUP_SCHED |
10964 | bool "Group scheduling for SCHED_RR/FIFO" | |
10965 | depends on CGROUP_SCHED | |
10966 | + depends on !PREEMPT_RT_FULL | |
10967 | default n | |
10968 | help | |
10969 | This feature lets you explicitly allocate real CPU bandwidth | |
c7c16703 | 10970 | @@ -1772,6 +1760,7 @@ choice |
1a6e0f06 JK |
10971 | |
10972 | config SLAB | |
10973 | bool "SLAB" | |
10974 | + depends on !PREEMPT_RT_FULL | |
10975 | select HAVE_HARDENED_USERCOPY_ALLOCATOR | |
10976 | help | |
10977 | The regular slab allocator that is established and known to work | |
c7c16703 | 10978 | @@ -1792,6 +1781,7 @@ config SLUB |
1a6e0f06 JK |
10979 | config SLOB |
10980 | depends on EXPERT | |
10981 | bool "SLOB (Simple Allocator)" | |
c7c16703 JK |
10982 | + depends on !PREEMPT_RT_FULL |
10983 | help | |
10984 | SLOB replaces the stock allocator with a drastically simpler | |
10985 | allocator. SLOB is generally more space efficient but | |
10986 | @@ -1810,7 +1800,7 @@ config SLAB_FREELIST_RANDOM | |
10987 | ||
10988 | config SLUB_CPU_PARTIAL | |
10989 | default y | |
10990 | - depends on SLUB && SMP | |
10991 | + depends on SLUB && SMP && !PREEMPT_RT_FULL | |
10992 | bool "SLUB per cpu partial cache" | |
10993 | help | |
10994 | Per cpu partial caches accellerate objects allocation and freeing | |
10995 | diff --git a/init/Makefile b/init/Makefile | |
10996 | index c4fb45525d08..821190dfaa75 100644 | |
10997 | --- a/init/Makefile | |
10998 | +++ b/init/Makefile | |
10999 | @@ -35,4 +35,4 @@ $(obj)/version.o: include/generated/compile.h | |
11000 | include/generated/compile.h: FORCE | |
11001 | @$($(quiet)chk_compile.h) | |
11002 | $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ | |
11003 | - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" | |
11004 | + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" | |
11005 | diff --git a/init/main.c b/init/main.c | |
7c18450a | 11006 | index ae3996ae9bac..6470deef01c9 100644 |
c7c16703 JK |
11007 | --- a/init/main.c |
11008 | +++ b/init/main.c | |
11009 | @@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void) | |
11010 | setup_command_line(command_line); | |
11011 | setup_nr_cpu_ids(); | |
11012 | setup_per_cpu_areas(); | |
11013 | + softirq_early_init(); | |
11014 | boot_cpu_state_init(); | |
11015 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | |
1a6e0f06 | 11016 | |
1a6e0f06 | 11017 | diff --git a/ipc/sem.c b/ipc/sem.c |
c7c16703 | 11018 | index 10b94bc59d4a..b8360eaacc7a 100644 |
1a6e0f06 JK |
11019 | --- a/ipc/sem.c |
11020 | +++ b/ipc/sem.c | |
11021 | @@ -712,6 +712,13 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) | |
11022 | static void wake_up_sem_queue_prepare(struct list_head *pt, | |
11023 | struct sem_queue *q, int error) | |
11024 | { | |
11025 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11026 | + struct task_struct *p = q->sleeper; | |
11027 | + get_task_struct(p); | |
11028 | + q->status = error; | |
11029 | + wake_up_process(p); | |
11030 | + put_task_struct(p); | |
11031 | +#else | |
11032 | if (list_empty(pt)) { | |
11033 | /* | |
11034 | * Hold preempt off so that we don't get preempted and have the | |
11035 | @@ -723,6 +730,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
11036 | q->pid = error; | |
11037 | ||
11038 | list_add_tail(&q->list, pt); | |
11039 | +#endif | |
11040 | } | |
11041 | ||
11042 | /** | |
11043 | @@ -736,6 +744,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
11044 | */ | |
11045 | static void wake_up_sem_queue_do(struct list_head *pt) | |
11046 | { | |
11047 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11048 | struct sem_queue *q, *t; | |
11049 | int did_something; | |
11050 | ||
11051 | @@ -748,6 +757,7 @@ static void wake_up_sem_queue_do(struct list_head *pt) | |
11052 | } | |
11053 | if (did_something) | |
11054 | preempt_enable(); | |
11055 | +#endif | |
11056 | } | |
11057 | ||
11058 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | |
11059 | diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks | |
11060 | index ebdb0043203a..b9e6aa7e5aa6 100644 | |
11061 | --- a/kernel/Kconfig.locks | |
11062 | +++ b/kernel/Kconfig.locks | |
11063 | @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW | |
11064 | ||
11065 | config MUTEX_SPIN_ON_OWNER | |
11066 | def_bool y | |
11067 | - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW | |
11068 | + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
11069 | ||
11070 | config RWSEM_SPIN_ON_OWNER | |
11071 | def_bool y | |
11072 | - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | |
11073 | + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
11074 | ||
11075 | config LOCK_SPIN_ON_OWNER | |
11076 | def_bool y | |
11077 | diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt | |
11078 | index 3f9c97419f02..11dbe26a8279 100644 | |
11079 | --- a/kernel/Kconfig.preempt | |
11080 | +++ b/kernel/Kconfig.preempt | |
11081 | @@ -1,3 +1,16 @@ | |
11082 | +config PREEMPT | |
11083 | + bool | |
11084 | + select PREEMPT_COUNT | |
11085 | + | |
11086 | +config PREEMPT_RT_BASE | |
11087 | + bool | |
11088 | + select PREEMPT | |
11089 | + | |
11090 | +config HAVE_PREEMPT_LAZY | |
11091 | + bool | |
11092 | + | |
11093 | +config PREEMPT_LAZY | |
11094 | + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL | |
11095 | ||
11096 | choice | |
11097 | prompt "Preemption Model" | |
11098 | @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY | |
11099 | ||
11100 | Select this if you are building a kernel for a desktop system. | |
11101 | ||
11102 | -config PREEMPT | |
11103 | +config PREEMPT__LL | |
11104 | bool "Preemptible Kernel (Low-Latency Desktop)" | |
11105 | - select PREEMPT_COUNT | |
11106 | + select PREEMPT | |
11107 | select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK | |
11108 | help | |
11109 | This option reduces the latency of the kernel by making | |
11110 | @@ -52,6 +65,22 @@ config PREEMPT | |
11111 | embedded system with latency requirements in the milliseconds | |
11112 | range. | |
11113 | ||
11114 | +config PREEMPT_RTB | |
11115 | + bool "Preemptible Kernel (Basic RT)" | |
11116 | + select PREEMPT_RT_BASE | |
11117 | + help | |
11118 | + This option is basically the same as (Low-Latency Desktop) but | |
11119 | + enables changes which are preliminary for the full preemptible | |
11120 | + RT kernel. | |
11121 | + | |
11122 | +config PREEMPT_RT_FULL | |
11123 | + bool "Fully Preemptible Kernel (RT)" | |
11124 | + depends on IRQ_FORCED_THREADING | |
11125 | + select PREEMPT_RT_BASE | |
11126 | + select PREEMPT_RCU | |
11127 | + help | |
11128 | + All and everything | |
11129 | + | |
11130 | endchoice | |
11131 | ||
11132 | config PREEMPT_COUNT | |
1a6e0f06 | 11133 | diff --git a/kernel/cgroup.c b/kernel/cgroup.c |
7c18450a | 11134 | index a3d2aad2443f..bb6b252648ff 100644 |
1a6e0f06 JK |
11135 | --- a/kernel/cgroup.c |
11136 | +++ b/kernel/cgroup.c | |
7c18450a | 11137 | @@ -5041,10 +5041,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) |
1a6e0f06 JK |
11138 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
11139 | } | |
11140 | ||
11141 | -static void css_release_work_fn(struct work_struct *work) | |
11142 | +static void css_release_work_fn(struct swork_event *sev) | |
11143 | { | |
11144 | struct cgroup_subsys_state *css = | |
11145 | - container_of(work, struct cgroup_subsys_state, destroy_work); | |
11146 | + container_of(sev, struct cgroup_subsys_state, destroy_swork); | |
11147 | struct cgroup_subsys *ss = css->ss; | |
11148 | struct cgroup *cgrp = css->cgroup; | |
11149 | ||
7c18450a | 11150 | @@ -5087,8 +5087,8 @@ static void css_release(struct percpu_ref *ref) |
1a6e0f06 JK |
11151 | struct cgroup_subsys_state *css = |
11152 | container_of(ref, struct cgroup_subsys_state, refcnt); | |
11153 | ||
11154 | - INIT_WORK(&css->destroy_work, css_release_work_fn); | |
11155 | - queue_work(cgroup_destroy_wq, &css->destroy_work); | |
11156 | + INIT_SWORK(&css->destroy_swork, css_release_work_fn); | |
11157 | + swork_queue(&css->destroy_swork); | |
11158 | } | |
11159 | ||
11160 | static void init_and_link_css(struct cgroup_subsys_state *css, | |
7c18450a | 11161 | @@ -5740,6 +5740,7 @@ static int __init cgroup_wq_init(void) |
1a6e0f06 JK |
11162 | */ |
11163 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | |
11164 | BUG_ON(!cgroup_destroy_wq); | |
11165 | + BUG_ON(swork_get()); | |
11166 | ||
11167 | /* | |
11168 | * Used to destroy pidlists and separate to serve as flush domain. | |
11169 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
7c18450a | 11170 | index 99c6c568bc55..f1c64e563970 100644 |
1a6e0f06 JK |
11171 | --- a/kernel/cpu.c |
11172 | +++ b/kernel/cpu.c | |
c7c16703 | 11173 | @@ -239,6 +239,289 @@ static struct { |
1a6e0f06 JK |
11174 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) |
11175 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) | |
11176 | ||
11177 | +/** | |
11178 | + * hotplug_pcp - per cpu hotplug descriptor | |
11179 | + * @unplug: set when pin_current_cpu() needs to sync tasks | |
11180 | + * @sync_tsk: the task that waits for tasks to finish pinned sections | |
11181 | + * @refcount: counter of tasks in pinned sections | |
11182 | + * @grab_lock: set when the tasks entering pinned sections should wait | |
11183 | + * @synced: notifier for @sync_tsk to tell cpu_down it's finished | |
11184 | + * @mutex: the mutex to make tasks wait (used when @grab_lock is true) | |
11185 | + * @mutex_init: zero if the mutex hasn't been initialized yet. | |
11186 | + * | |
11187 | + * Although @unplug and @sync_tsk may point to the same task, the @unplug | |
11188 | + * is used as a flag and still exists after @sync_tsk has exited and | |
11189 | + * @sync_tsk set to NULL. | |
11190 | + */ | |
11191 | +struct hotplug_pcp { | |
11192 | + struct task_struct *unplug; | |
11193 | + struct task_struct *sync_tsk; | |
11194 | + int refcount; | |
11195 | + int grab_lock; | |
11196 | + struct completion synced; | |
11197 | + struct completion unplug_wait; | |
11198 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11199 | + /* | |
11200 | + * Note, on PREEMPT_RT, the hotplug lock must save the state of | |
11201 | + * the task, otherwise the mutex will cause the task to fail | |
11202 | + * to sleep when required. (Because it's called from migrate_disable()) | |
11203 | + * | |
11204 | + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's | |
11205 | + * state. | |
11206 | + */ | |
11207 | + spinlock_t lock; | |
11208 | +#else | |
11209 | + struct mutex mutex; | |
11210 | +#endif | |
11211 | + int mutex_init; | |
11212 | +}; | |
11213 | + | |
11214 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11215 | +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock) | |
11216 | +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock) | |
11217 | +#else | |
11218 | +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex) | |
11219 | +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex) | |
11220 | +#endif | |
11221 | + | |
11222 | +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); | |
11223 | + | |
11224 | +/** | |
11225 | + * pin_current_cpu - Prevent the current cpu from being unplugged | |
11226 | + * | |
11227 | + * Lightweight version of get_online_cpus() to prevent cpu from being | |
11228 | + * unplugged when code runs in a migration disabled region. | |
11229 | + * | |
11230 | + * Must be called with preemption disabled (preempt_count = 1)! | |
11231 | + */ | |
11232 | +void pin_current_cpu(void) | |
11233 | +{ | |
11234 | + struct hotplug_pcp *hp; | |
11235 | + int force = 0; | |
11236 | + | |
11237 | +retry: | |
11238 | + hp = this_cpu_ptr(&hotplug_pcp); | |
11239 | + | |
11240 | + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 || | |
11241 | + hp->unplug == current) { | |
11242 | + hp->refcount++; | |
11243 | + return; | |
11244 | + } | |
11245 | + if (hp->grab_lock) { | |
11246 | + preempt_enable(); | |
11247 | + hotplug_lock(hp); | |
11248 | + hotplug_unlock(hp); | |
11249 | + } else { | |
11250 | + preempt_enable(); | |
11251 | + /* | |
11252 | + * Try to push this task off of this CPU. | |
11253 | + */ | |
11254 | + if (!migrate_me()) { | |
11255 | + preempt_disable(); | |
11256 | + hp = this_cpu_ptr(&hotplug_pcp); | |
11257 | + if (!hp->grab_lock) { | |
11258 | + /* | |
11259 | + * Just let it continue it's already pinned | |
11260 | + * or about to sleep. | |
11261 | + */ | |
11262 | + force = 1; | |
11263 | + goto retry; | |
11264 | + } | |
11265 | + preempt_enable(); | |
11266 | + } | |
11267 | + } | |
11268 | + preempt_disable(); | |
11269 | + goto retry; | |
11270 | +} | |
11271 | + | |
11272 | +/** | |
11273 | + * unpin_current_cpu - Allow unplug of current cpu | |
11274 | + * | |
11275 | + * Must be called with preemption or interrupts disabled! | |
11276 | + */ | |
11277 | +void unpin_current_cpu(void) | |
11278 | +{ | |
11279 | + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); | |
11280 | + | |
11281 | + WARN_ON(hp->refcount <= 0); | |
11282 | + | |
11283 | + /* This is safe. sync_unplug_thread is pinned to this cpu */ | |
11284 | + if (!--hp->refcount && hp->unplug && hp->unplug != current) | |
11285 | + wake_up_process(hp->unplug); | |
11286 | +} | |
11287 | + | |
11288 | +static void wait_for_pinned_cpus(struct hotplug_pcp *hp) | |
11289 | +{ | |
11290 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11291 | + while (hp->refcount) { | |
11292 | + schedule_preempt_disabled(); | |
11293 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11294 | + } | |
11295 | +} | |
11296 | + | |
11297 | +static int sync_unplug_thread(void *data) | |
11298 | +{ | |
11299 | + struct hotplug_pcp *hp = data; | |
11300 | + | |
11301 | + wait_for_completion(&hp->unplug_wait); | |
11302 | + preempt_disable(); | |
11303 | + hp->unplug = current; | |
11304 | + wait_for_pinned_cpus(hp); | |
11305 | + | |
11306 | + /* | |
11307 | + * This thread will synchronize the cpu_down() with threads | |
11308 | + * that have pinned the CPU. When the pinned CPU count reaches | |
11309 | + * zero, we inform the cpu_down code to continue to the next step. | |
11310 | + */ | |
11311 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11312 | + preempt_enable(); | |
11313 | + complete(&hp->synced); | |
11314 | + | |
11315 | + /* | |
11316 | + * If all succeeds, the next step will need tasks to wait till | |
11317 | + * the CPU is offline before continuing. To do this, the grab_lock | |
11318 | + * is set and tasks going into pin_current_cpu() will block on the | |
11319 | + * mutex. But we still need to wait for those that are already in | |
11320 | + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop() | |
11321 | + * will kick this thread out. | |
11322 | + */ | |
11323 | + while (!hp->grab_lock && !kthread_should_stop()) { | |
11324 | + schedule(); | |
11325 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11326 | + } | |
11327 | + | |
11328 | + /* Make sure grab_lock is seen before we see a stale completion */ | |
11329 | + smp_mb(); | |
11330 | + | |
11331 | + /* | |
11332 | + * Now just before cpu_down() enters stop machine, we need to make | |
11333 | + * sure all tasks that are in pinned CPU sections are out, and new | |
11334 | + * tasks will now grab the lock, keeping them from entering pinned | |
11335 | + * CPU sections. | |
11336 | + */ | |
11337 | + if (!kthread_should_stop()) { | |
11338 | + preempt_disable(); | |
11339 | + wait_for_pinned_cpus(hp); | |
11340 | + preempt_enable(); | |
11341 | + complete(&hp->synced); | |
11342 | + } | |
11343 | + | |
11344 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11345 | + while (!kthread_should_stop()) { | |
11346 | + schedule(); | |
11347 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
11348 | + } | |
11349 | + set_current_state(TASK_RUNNING); | |
11350 | + | |
11351 | + /* | |
11352 | + * Force this thread off this CPU as it's going down and | |
11353 | + * we don't want any more work on this CPU. | |
11354 | + */ | |
11355 | + current->flags &= ~PF_NO_SETAFFINITY; | |
11356 | + set_cpus_allowed_ptr(current, cpu_present_mask); | |
11357 | + migrate_me(); | |
11358 | + return 0; | |
11359 | +} | |
11360 | + | |
11361 | +static void __cpu_unplug_sync(struct hotplug_pcp *hp) | |
11362 | +{ | |
11363 | + wake_up_process(hp->sync_tsk); | |
11364 | + wait_for_completion(&hp->synced); | |
11365 | +} | |
11366 | + | |
11367 | +static void __cpu_unplug_wait(unsigned int cpu) | |
11368 | +{ | |
11369 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11370 | + | |
11371 | + complete(&hp->unplug_wait); | |
11372 | + wait_for_completion(&hp->synced); | |
11373 | +} | |
11374 | + | |
11375 | +/* | |
11376 | + * Start the sync_unplug_thread on the target cpu and wait for it to | |
11377 | + * complete. | |
11378 | + */ | |
11379 | +static int cpu_unplug_begin(unsigned int cpu) | |
11380 | +{ | |
11381 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11382 | + int err; | |
11383 | + | |
11384 | + /* Protected by cpu_hotplug.lock */ | |
11385 | + if (!hp->mutex_init) { | |
11386 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11387 | + spin_lock_init(&hp->lock); | |
11388 | +#else | |
11389 | + mutex_init(&hp->mutex); | |
11390 | +#endif | |
11391 | + hp->mutex_init = 1; | |
11392 | + } | |
11393 | + | |
11394 | + /* Inform the scheduler to migrate tasks off this CPU */ | |
11395 | + tell_sched_cpu_down_begin(cpu); | |
11396 | + | |
11397 | + init_completion(&hp->synced); | |
11398 | + init_completion(&hp->unplug_wait); | |
11399 | + | |
11400 | + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); | |
11401 | + if (IS_ERR(hp->sync_tsk)) { | |
11402 | + err = PTR_ERR(hp->sync_tsk); | |
11403 | + hp->sync_tsk = NULL; | |
11404 | + return err; | |
11405 | + } | |
11406 | + kthread_bind(hp->sync_tsk, cpu); | |
11407 | + | |
11408 | + /* | |
11409 | + * Wait for tasks to get out of the pinned sections, | |
11410 | + * it's still OK if new tasks enter. Some CPU notifiers will | |
11411 | + * wait for tasks that are going to enter these sections and | |
11412 | + * we must not have them block. | |
11413 | + */ | |
11414 | + wake_up_process(hp->sync_tsk); | |
11415 | + return 0; | |
11416 | +} | |
11417 | + | |
11418 | +static void cpu_unplug_sync(unsigned int cpu) | |
11419 | +{ | |
11420 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11421 | + | |
11422 | + init_completion(&hp->synced); | |
11423 | + /* The completion needs to be initialzied before setting grab_lock */ | |
11424 | + smp_wmb(); | |
11425 | + | |
11426 | + /* Grab the mutex before setting grab_lock */ | |
11427 | + hotplug_lock(hp); | |
11428 | + hp->grab_lock = 1; | |
11429 | + | |
11430 | + /* | |
11431 | + * The CPU notifiers have been completed. | |
11432 | + * Wait for tasks to get out of pinned CPU sections and have new | |
11433 | + * tasks block until the CPU is completely down. | |
11434 | + */ | |
11435 | + __cpu_unplug_sync(hp); | |
11436 | + | |
11437 | + /* All done with the sync thread */ | |
11438 | + kthread_stop(hp->sync_tsk); | |
11439 | + hp->sync_tsk = NULL; | |
11440 | +} | |
11441 | + | |
11442 | +static void cpu_unplug_done(unsigned int cpu) | |
11443 | +{ | |
11444 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
11445 | + | |
11446 | + hp->unplug = NULL; | |
11447 | + /* Let all tasks know cpu unplug is finished before cleaning up */ | |
11448 | + smp_wmb(); | |
11449 | + | |
11450 | + if (hp->sync_tsk) | |
11451 | + kthread_stop(hp->sync_tsk); | |
11452 | + | |
11453 | + if (hp->grab_lock) { | |
11454 | + hotplug_unlock(hp); | |
11455 | + /* protected by cpu_hotplug.lock */ | |
11456 | + hp->grab_lock = 0; | |
11457 | + } | |
11458 | + tell_sched_cpu_down_done(cpu); | |
11459 | +} | |
11460 | ||
11461 | void get_online_cpus(void) | |
11462 | { | |
c7c16703 | 11463 | @@ -789,10 +1072,14 @@ static int takedown_cpu(unsigned int cpu) |
1a6e0f06 JK |
11464 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
11465 | int err; | |
11466 | ||
11467 | + __cpu_unplug_wait(cpu); | |
11468 | /* Park the smpboot threads */ | |
11469 | kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); | |
11470 | smpboot_park_threads(cpu); | |
11471 | ||
11472 | + /* Notifiers are done. Don't let any more tasks pin this CPU. */ | |
11473 | + cpu_unplug_sync(cpu); | |
11474 | + | |
11475 | /* | |
11476 | * Prevent irq alloc/free while the dying cpu reorganizes the | |
11477 | * interrupt affinities. | |
c7c16703 | 11478 | @@ -877,6 +1164,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, |
1a6e0f06 JK |
11479 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); |
11480 | int prev_state, ret = 0; | |
11481 | bool hasdied = false; | |
11482 | + int mycpu; | |
11483 | + cpumask_var_t cpumask; | |
11484 | + cpumask_var_t cpumask_org; | |
11485 | ||
11486 | if (num_online_cpus() == 1) | |
11487 | return -EBUSY; | |
c7c16703 | 11488 | @@ -884,7 +1174,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, |
1a6e0f06 JK |
11489 | if (!cpu_present(cpu)) |
11490 | return -EINVAL; | |
11491 | ||
11492 | + /* Move the downtaker off the unplug cpu */ | |
11493 | + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) | |
11494 | + return -ENOMEM; | |
11495 | + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) { | |
11496 | + free_cpumask_var(cpumask); | |
11497 | + return -ENOMEM; | |
11498 | + } | |
11499 | + | |
11500 | + cpumask_copy(cpumask_org, tsk_cpus_allowed(current)); | |
11501 | + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); | |
11502 | + set_cpus_allowed_ptr(current, cpumask); | |
11503 | + free_cpumask_var(cpumask); | |
11504 | + migrate_disable(); | |
11505 | + mycpu = smp_processor_id(); | |
11506 | + if (mycpu == cpu) { | |
11507 | + printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); | |
11508 | + migrate_enable(); | |
11509 | + ret = -EBUSY; | |
11510 | + goto restore_cpus; | |
11511 | + } | |
11512 | + | |
11513 | + migrate_enable(); | |
11514 | cpu_hotplug_begin(); | |
11515 | + ret = cpu_unplug_begin(cpu); | |
11516 | + if (ret) { | |
11517 | + printk("cpu_unplug_begin(%d) failed\n", cpu); | |
11518 | + goto out_cancel; | |
11519 | + } | |
11520 | ||
11521 | cpuhp_tasks_frozen = tasks_frozen; | |
11522 | ||
c7c16703 | 11523 | @@ -923,10 +1240,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, |
1a6e0f06 JK |
11524 | |
11525 | hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; | |
11526 | out: | |
11527 | + cpu_unplug_done(cpu); | |
11528 | +out_cancel: | |
11529 | cpu_hotplug_done(); | |
11530 | /* This post dead nonsense must die */ | |
11531 | if (!ret && hasdied) | |
11532 | cpu_notify_nofail(CPU_POST_DEAD, cpu); | |
11533 | +restore_cpus: | |
11534 | + set_cpus_allowed_ptr(current, cpumask_org); | |
11535 | + free_cpumask_var(cpumask_org); | |
11536 | return ret; | |
11537 | } | |
11538 | ||
33c7bf0f JK |
11539 | @@ -1240,6 +1562,8 @@ core_initcall(cpu_hotplug_pm_sync_init); |
11540 | ||
11541 | #endif /* CONFIG_PM_SLEEP_SMP */ | |
11542 | ||
11543 | +int __boot_cpu_id; | |
11544 | + | |
11545 | #endif /* CONFIG_SMP */ | |
11546 | ||
11547 | /* Boot processor state steps */ | |
7c18450a | 11548 | @@ -1924,6 +2248,10 @@ void __init boot_cpu_init(void) |
33c7bf0f JK |
11549 | set_cpu_active(cpu, true); |
11550 | set_cpu_present(cpu, true); | |
11551 | set_cpu_possible(cpu, true); | |
11552 | + | |
11553 | +#ifdef CONFIG_SMP | |
11554 | + __boot_cpu_id = cpu; | |
11555 | +#endif | |
11556 | } | |
11557 | ||
11558 | /* | |
1f39f580 JK |
11559 | diff --git a/kernel/cpuset.c b/kernel/cpuset.c |
11560 | index 29f815d2ef7e..341b17f24f95 100644 | |
11561 | --- a/kernel/cpuset.c | |
11562 | +++ b/kernel/cpuset.c | |
11563 | @@ -284,7 +284,7 @@ static struct cpuset top_cpuset = { | |
11564 | */ | |
11565 | ||
11566 | static DEFINE_MUTEX(cpuset_mutex); | |
11567 | -static DEFINE_SPINLOCK(callback_lock); | |
11568 | +static DEFINE_RAW_SPINLOCK(callback_lock); | |
11569 | ||
11570 | static struct workqueue_struct *cpuset_migrate_mm_wq; | |
11571 | ||
11572 | @@ -907,9 +907,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) | |
11573 | continue; | |
11574 | rcu_read_unlock(); | |
11575 | ||
11576 | - spin_lock_irq(&callback_lock); | |
11577 | + raw_spin_lock_irq(&callback_lock); | |
11578 | cpumask_copy(cp->effective_cpus, new_cpus); | |
11579 | - spin_unlock_irq(&callback_lock); | |
11580 | + raw_spin_unlock_irq(&callback_lock); | |
11581 | ||
11582 | WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | |
11583 | !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); | |
11584 | @@ -974,9 +974,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, | |
11585 | if (retval < 0) | |
11586 | return retval; | |
11587 | ||
11588 | - spin_lock_irq(&callback_lock); | |
11589 | + raw_spin_lock_irq(&callback_lock); | |
11590 | cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); | |
11591 | - spin_unlock_irq(&callback_lock); | |
11592 | + raw_spin_unlock_irq(&callback_lock); | |
11593 | ||
11594 | /* use trialcs->cpus_allowed as a temp variable */ | |
11595 | update_cpumasks_hier(cs, trialcs->cpus_allowed); | |
11596 | @@ -1176,9 +1176,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) | |
11597 | continue; | |
11598 | rcu_read_unlock(); | |
11599 | ||
11600 | - spin_lock_irq(&callback_lock); | |
11601 | + raw_spin_lock_irq(&callback_lock); | |
11602 | cp->effective_mems = *new_mems; | |
11603 | - spin_unlock_irq(&callback_lock); | |
11604 | + raw_spin_unlock_irq(&callback_lock); | |
11605 | ||
11606 | WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && | |
11607 | !nodes_equal(cp->mems_allowed, cp->effective_mems)); | |
11608 | @@ -1246,9 +1246,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, | |
11609 | if (retval < 0) | |
11610 | goto done; | |
11611 | ||
11612 | - spin_lock_irq(&callback_lock); | |
11613 | + raw_spin_lock_irq(&callback_lock); | |
11614 | cs->mems_allowed = trialcs->mems_allowed; | |
11615 | - spin_unlock_irq(&callback_lock); | |
11616 | + raw_spin_unlock_irq(&callback_lock); | |
11617 | ||
11618 | /* use trialcs->mems_allowed as a temp variable */ | |
11619 | update_nodemasks_hier(cs, &trialcs->mems_allowed); | |
11620 | @@ -1339,9 +1339,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, | |
11621 | spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) | |
11622 | || (is_spread_page(cs) != is_spread_page(trialcs))); | |
11623 | ||
11624 | - spin_lock_irq(&callback_lock); | |
11625 | + raw_spin_lock_irq(&callback_lock); | |
11626 | cs->flags = trialcs->flags; | |
11627 | - spin_unlock_irq(&callback_lock); | |
11628 | + raw_spin_unlock_irq(&callback_lock); | |
11629 | ||
11630 | if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) | |
11631 | rebuild_sched_domains_locked(); | |
11632 | @@ -1756,7 +1756,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |
11633 | cpuset_filetype_t type = seq_cft(sf)->private; | |
11634 | int ret = 0; | |
11635 | ||
11636 | - spin_lock_irq(&callback_lock); | |
11637 | + raw_spin_lock_irq(&callback_lock); | |
11638 | ||
11639 | switch (type) { | |
11640 | case FILE_CPULIST: | |
11641 | @@ -1775,7 +1775,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) | |
11642 | ret = -EINVAL; | |
11643 | } | |
11644 | ||
11645 | - spin_unlock_irq(&callback_lock); | |
11646 | + raw_spin_unlock_irq(&callback_lock); | |
11647 | return ret; | |
11648 | } | |
11649 | ||
11650 | @@ -1989,12 +1989,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |
11651 | ||
11652 | cpuset_inc(); | |
11653 | ||
11654 | - spin_lock_irq(&callback_lock); | |
11655 | + raw_spin_lock_irq(&callback_lock); | |
11656 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { | |
11657 | cpumask_copy(cs->effective_cpus, parent->effective_cpus); | |
11658 | cs->effective_mems = parent->effective_mems; | |
11659 | } | |
11660 | - spin_unlock_irq(&callback_lock); | |
11661 | + raw_spin_unlock_irq(&callback_lock); | |
11662 | ||
11663 | if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) | |
11664 | goto out_unlock; | |
11665 | @@ -2021,12 +2021,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) | |
11666 | } | |
11667 | rcu_read_unlock(); | |
11668 | ||
11669 | - spin_lock_irq(&callback_lock); | |
11670 | + raw_spin_lock_irq(&callback_lock); | |
11671 | cs->mems_allowed = parent->mems_allowed; | |
11672 | cs->effective_mems = parent->mems_allowed; | |
11673 | cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); | |
11674 | cpumask_copy(cs->effective_cpus, parent->cpus_allowed); | |
11675 | - spin_unlock_irq(&callback_lock); | |
11676 | + raw_spin_unlock_irq(&callback_lock); | |
11677 | out_unlock: | |
11678 | mutex_unlock(&cpuset_mutex); | |
11679 | return 0; | |
11680 | @@ -2065,7 +2065,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) | |
11681 | static void cpuset_bind(struct cgroup_subsys_state *root_css) | |
11682 | { | |
11683 | mutex_lock(&cpuset_mutex); | |
11684 | - spin_lock_irq(&callback_lock); | |
11685 | + raw_spin_lock_irq(&callback_lock); | |
11686 | ||
11687 | if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { | |
11688 | cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); | |
11689 | @@ -2076,7 +2076,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) | |
11690 | top_cpuset.mems_allowed = top_cpuset.effective_mems; | |
11691 | } | |
11692 | ||
11693 | - spin_unlock_irq(&callback_lock); | |
11694 | + raw_spin_unlock_irq(&callback_lock); | |
11695 | mutex_unlock(&cpuset_mutex); | |
11696 | } | |
11697 | ||
11698 | @@ -2177,12 +2177,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, | |
11699 | { | |
11700 | bool is_empty; | |
11701 | ||
11702 | - spin_lock_irq(&callback_lock); | |
11703 | + raw_spin_lock_irq(&callback_lock); | |
11704 | cpumask_copy(cs->cpus_allowed, new_cpus); | |
11705 | cpumask_copy(cs->effective_cpus, new_cpus); | |
11706 | cs->mems_allowed = *new_mems; | |
11707 | cs->effective_mems = *new_mems; | |
11708 | - spin_unlock_irq(&callback_lock); | |
11709 | + raw_spin_unlock_irq(&callback_lock); | |
11710 | ||
11711 | /* | |
11712 | * Don't call update_tasks_cpumask() if the cpuset becomes empty, | |
11713 | @@ -2219,10 +2219,10 @@ hotplug_update_tasks(struct cpuset *cs, | |
11714 | if (nodes_empty(*new_mems)) | |
11715 | *new_mems = parent_cs(cs)->effective_mems; | |
11716 | ||
11717 | - spin_lock_irq(&callback_lock); | |
11718 | + raw_spin_lock_irq(&callback_lock); | |
11719 | cpumask_copy(cs->effective_cpus, new_cpus); | |
11720 | cs->effective_mems = *new_mems; | |
11721 | - spin_unlock_irq(&callback_lock); | |
11722 | + raw_spin_unlock_irq(&callback_lock); | |
11723 | ||
11724 | if (cpus_updated) | |
11725 | update_tasks_cpumask(cs); | |
11726 | @@ -2308,21 +2308,21 @@ static void cpuset_hotplug_workfn(struct work_struct *work) | |
11727 | ||
11728 | /* synchronize cpus_allowed to cpu_active_mask */ | |
11729 | if (cpus_updated) { | |
11730 | - spin_lock_irq(&callback_lock); | |
11731 | + raw_spin_lock_irq(&callback_lock); | |
11732 | if (!on_dfl) | |
11733 | cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); | |
11734 | cpumask_copy(top_cpuset.effective_cpus, &new_cpus); | |
11735 | - spin_unlock_irq(&callback_lock); | |
11736 | + raw_spin_unlock_irq(&callback_lock); | |
11737 | /* we don't mess with cpumasks of tasks in top_cpuset */ | |
11738 | } | |
11739 | ||
11740 | /* synchronize mems_allowed to N_MEMORY */ | |
11741 | if (mems_updated) { | |
11742 | - spin_lock_irq(&callback_lock); | |
11743 | + raw_spin_lock_irq(&callback_lock); | |
11744 | if (!on_dfl) | |
11745 | top_cpuset.mems_allowed = new_mems; | |
11746 | top_cpuset.effective_mems = new_mems; | |
11747 | - spin_unlock_irq(&callback_lock); | |
11748 | + raw_spin_unlock_irq(&callback_lock); | |
11749 | update_tasks_nodemask(&top_cpuset); | |
11750 | } | |
11751 | ||
11752 | @@ -2420,11 +2420,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) | |
11753 | { | |
11754 | unsigned long flags; | |
11755 | ||
11756 | - spin_lock_irqsave(&callback_lock, flags); | |
11757 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
11758 | rcu_read_lock(); | |
11759 | guarantee_online_cpus(task_cs(tsk), pmask); | |
11760 | rcu_read_unlock(); | |
11761 | - spin_unlock_irqrestore(&callback_lock, flags); | |
11762 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
11763 | } | |
11764 | ||
11765 | void cpuset_cpus_allowed_fallback(struct task_struct *tsk) | |
11766 | @@ -2472,11 +2472,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) | |
11767 | nodemask_t mask; | |
11768 | unsigned long flags; | |
11769 | ||
11770 | - spin_lock_irqsave(&callback_lock, flags); | |
11771 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
11772 | rcu_read_lock(); | |
11773 | guarantee_online_mems(task_cs(tsk), &mask); | |
11774 | rcu_read_unlock(); | |
11775 | - spin_unlock_irqrestore(&callback_lock, flags); | |
11776 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
11777 | ||
11778 | return mask; | |
11779 | } | |
11780 | @@ -2568,14 +2568,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask) | |
11781 | return true; | |
11782 | ||
11783 | /* Not hardwall and node outside mems_allowed: scan up cpusets */ | |
11784 | - spin_lock_irqsave(&callback_lock, flags); | |
11785 | + raw_spin_lock_irqsave(&callback_lock, flags); | |
11786 | ||
11787 | rcu_read_lock(); | |
11788 | cs = nearest_hardwall_ancestor(task_cs(current)); | |
11789 | allowed = node_isset(node, cs->mems_allowed); | |
11790 | rcu_read_unlock(); | |
11791 | ||
11792 | - spin_unlock_irqrestore(&callback_lock, flags); | |
11793 | + raw_spin_unlock_irqrestore(&callback_lock, flags); | |
11794 | return allowed; | |
11795 | } | |
11796 | ||
1a6e0f06 JK |
11797 | diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c |
11798 | index fc1ef736253c..83c666537a7a 100644 | |
11799 | --- a/kernel/debug/kdb/kdb_io.c | |
11800 | +++ b/kernel/debug/kdb/kdb_io.c | |
11801 | @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11802 | int linecount; | |
11803 | int colcount; | |
11804 | int logging, saved_loglevel = 0; | |
11805 | - int saved_trap_printk; | |
11806 | int got_printf_lock = 0; | |
11807 | int retlen = 0; | |
11808 | int fnd, len; | |
11809 | @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11810 | unsigned long uninitialized_var(flags); | |
11811 | ||
11812 | preempt_disable(); | |
11813 | - saved_trap_printk = kdb_trap_printk; | |
11814 | - kdb_trap_printk = 0; | |
11815 | ||
11816 | /* Serialize kdb_printf if multiple cpus try to write at once. | |
11817 | * But if any cpu goes recursive in kdb, just print the output, | |
11818 | @@ -855,7 +852,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
11819 | } else { | |
11820 | __release(kdb_printf_lock); | |
11821 | } | |
11822 | - kdb_trap_printk = saved_trap_printk; | |
11823 | preempt_enable(); | |
11824 | return retlen; | |
11825 | } | |
11826 | @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...) | |
11827 | va_list ap; | |
11828 | int r; | |
11829 | ||
11830 | + kdb_trap_printk++; | |
11831 | va_start(ap, fmt); | |
11832 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); | |
11833 | va_end(ap); | |
11834 | + kdb_trap_printk--; | |
11835 | ||
11836 | return r; | |
11837 | } | |
11838 | diff --git a/kernel/events/core.c b/kernel/events/core.c | |
33c7bf0f | 11839 | index 07c0dc806dfc..baf1a2867d74 100644 |
1a6e0f06 JK |
11840 | --- a/kernel/events/core.c |
11841 | +++ b/kernel/events/core.c | |
c7c16703 | 11842 | @@ -1050,6 +1050,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) |
1a6e0f06 JK |
11843 | raw_spin_lock_init(&cpuctx->hrtimer_lock); |
11844 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); | |
11845 | timer->function = perf_mux_hrtimer_handler; | |
11846 | + timer->irqsafe = 1; | |
11847 | } | |
11848 | ||
11849 | static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) | |
5c015b7c | 11850 | @@ -8363,6 +8364,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) |
1a6e0f06 JK |
11851 | |
11852 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
11853 | hwc->hrtimer.function = perf_swevent_hrtimer; | |
11854 | + hwc->hrtimer.irqsafe = 1; | |
11855 | ||
11856 | /* | |
11857 | * Since hrtimers have a fixed rate, we can do a static freq->period | |
11858 | diff --git a/kernel/exit.c b/kernel/exit.c | |
c7c16703 | 11859 | index 3076f3089919..fb2ebcf3ca7c 100644 |
1a6e0f06 JK |
11860 | --- a/kernel/exit.c |
11861 | +++ b/kernel/exit.c | |
11862 | @@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk) | |
11863 | * Do this under ->siglock, we can race with another thread | |
11864 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | |
11865 | */ | |
11866 | - flush_sigqueue(&tsk->pending); | |
11867 | + flush_task_sigqueue(tsk); | |
11868 | tsk->sighand = NULL; | |
11869 | spin_unlock(&sighand->siglock); | |
11870 | ||
11871 | diff --git a/kernel/fork.c b/kernel/fork.c | |
7c18450a | 11872 | index ba8a01564985..416d91e4af97 100644 |
1a6e0f06 JK |
11873 | --- a/kernel/fork.c |
11874 | +++ b/kernel/fork.c | |
c7c16703 JK |
11875 | @@ -76,6 +76,7 @@ |
11876 | #include <linux/compiler.h> | |
11877 | #include <linux/sysctl.h> | |
11878 | #include <linux/kcov.h> | |
11879 | +#include <linux/kprobes.h> | |
11880 | ||
11881 | #include <asm/pgtable.h> | |
11882 | #include <asm/pgalloc.h> | |
11883 | @@ -376,13 +377,24 @@ static inline void put_signal_struct(struct signal_struct *sig) | |
1a6e0f06 JK |
11884 | if (atomic_dec_and_test(&sig->sigcnt)) |
11885 | free_signal_struct(sig); | |
11886 | } | |
11887 | - | |
11888 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11889 | +static | |
11890 | +#endif | |
11891 | void __put_task_struct(struct task_struct *tsk) | |
11892 | { | |
11893 | WARN_ON(!tsk->exit_state); | |
c7c16703 JK |
11894 | WARN_ON(atomic_read(&tsk->usage)); |
11895 | WARN_ON(tsk == current); | |
11896 | ||
11897 | + /* | |
11898 | + * Remove function-return probe instances associated with this | |
11899 | + * task and put them back on the free list. | |
11900 | + */ | |
11901 | + kprobe_flush_task(tsk); | |
11902 | + | |
11903 | + /* Task is done with its stack. */ | |
11904 | + put_task_stack(tsk); | |
11905 | + | |
11906 | cgroup_free(tsk); | |
11907 | task_numa_free(tsk); | |
11908 | security_task_free(tsk); | |
11909 | @@ -393,7 +405,18 @@ void __put_task_struct(struct task_struct *tsk) | |
1a6e0f06 JK |
11910 | if (!profile_handoff_task(tsk)) |
11911 | free_task(tsk); | |
11912 | } | |
11913 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11914 | EXPORT_SYMBOL_GPL(__put_task_struct); | |
11915 | +#else | |
11916 | +void __put_task_struct_cb(struct rcu_head *rhp) | |
11917 | +{ | |
11918 | + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu); | |
11919 | + | |
11920 | + __put_task_struct(tsk); | |
11921 | + | |
11922 | +} | |
11923 | +EXPORT_SYMBOL_GPL(__put_task_struct_cb); | |
11924 | +#endif | |
11925 | ||
11926 | void __init __weak arch_task_cache_init(void) { } | |
11927 | ||
c7c16703 | 11928 | @@ -852,6 +875,19 @@ void __mmdrop(struct mm_struct *mm) |
1a6e0f06 JK |
11929 | } |
11930 | EXPORT_SYMBOL_GPL(__mmdrop); | |
11931 | ||
11932 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11933 | +/* | |
11934 | + * RCU callback for delayed mm drop. Not strictly rcu, but we don't | |
11935 | + * want another facility to make this work. | |
11936 | + */ | |
11937 | +void __mmdrop_delayed(struct rcu_head *rhp) | |
11938 | +{ | |
11939 | + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); | |
11940 | + | |
11941 | + __mmdrop(mm); | |
11942 | +} | |
11943 | +#endif | |
11944 | + | |
11945 | static inline void __mmput(struct mm_struct *mm) | |
11946 | { | |
11947 | VM_BUG_ON(atomic_read(&mm->mm_users)); | |
7c18450a JK |
11948 | @@ -1417,6 +1453,7 @@ static void rt_mutex_init_task(struct task_struct *p) |
11949 | #ifdef CONFIG_RT_MUTEXES | |
11950 | p->pi_waiters = RB_ROOT; | |
11951 | p->pi_waiters_leftmost = NULL; | |
11952 | + p->pi_top_task = NULL; | |
11953 | p->pi_blocked_on = NULL; | |
11954 | #endif | |
11955 | } | |
11956 | @@ -1426,6 +1463,9 @@ static void rt_mutex_init_task(struct task_struct *p) | |
1a6e0f06 JK |
11957 | */ |
11958 | static void posix_cpu_timers_init(struct task_struct *tsk) | |
11959 | { | |
11960 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11961 | + tsk->posix_timer_list = NULL; | |
11962 | +#endif | |
11963 | tsk->cputime_expires.prof_exp = 0; | |
11964 | tsk->cputime_expires.virt_exp = 0; | |
11965 | tsk->cputime_expires.sched_exp = 0; | |
7c18450a | 11966 | @@ -1552,6 +1592,7 @@ static __latent_entropy struct task_struct *copy_process( |
1a6e0f06 JK |
11967 | spin_lock_init(&p->alloc_lock); |
11968 | ||
11969 | init_sigpending(&p->pending); | |
11970 | + p->sigqueue_cache = NULL; | |
11971 | ||
11972 | p->utime = p->stime = p->gtime = 0; | |
11973 | p->utimescaled = p->stimescaled = 0; | |
11974 | diff --git a/kernel/futex.c b/kernel/futex.c | |
7c18450a | 11975 | index 4c6b6e697b73..d9bab63efccb 100644 |
1a6e0f06 JK |
11976 | --- a/kernel/futex.c |
11977 | +++ b/kernel/futex.c | |
33c7bf0f JK |
11978 | @@ -800,7 +800,7 @@ static int refill_pi_state_cache(void) |
11979 | return 0; | |
11980 | } | |
11981 | ||
11982 | -static struct futex_pi_state * alloc_pi_state(void) | |
11983 | +static struct futex_pi_state *alloc_pi_state(void) | |
11984 | { | |
11985 | struct futex_pi_state *pi_state = current->pi_state_cache; | |
11986 | ||
11987 | @@ -810,6 +810,11 @@ static struct futex_pi_state * alloc_pi_state(void) | |
11988 | return pi_state; | |
11989 | } | |
11990 | ||
11991 | +static void get_pi_state(struct futex_pi_state *pi_state) | |
11992 | +{ | |
11993 | + WARN_ON_ONCE(!atomic_inc_not_zero(&pi_state->refcount)); | |
11994 | +} | |
11995 | + | |
11996 | /* | |
11997 | * Drops a reference to the pi_state object and frees or caches it | |
11998 | * when the last reference is gone. | |
11999 | @@ -854,7 +859,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) | |
12000 | * Look up the task based on what TID userspace gave us. | |
12001 | * We dont trust it. | |
12002 | */ | |
12003 | -static struct task_struct * futex_find_get_task(pid_t pid) | |
12004 | +static struct task_struct *futex_find_get_task(pid_t pid) | |
12005 | { | |
12006 | struct task_struct *p; | |
12007 | ||
12008 | @@ -904,7 +909,9 @@ void exit_pi_state_list(struct task_struct *curr) | |
1a6e0f06 JK |
12009 | * task still owns the PI-state: |
12010 | */ | |
12011 | if (head->next != next) { | |
12012 | + raw_spin_unlock_irq(&curr->pi_lock); | |
12013 | spin_unlock(&hb->lock); | |
12014 | + raw_spin_lock_irq(&curr->pi_lock); | |
12015 | continue; | |
12016 | } | |
12017 | ||
33c7bf0f JK |
12018 | @@ -914,10 +921,12 @@ void exit_pi_state_list(struct task_struct *curr) |
12019 | pi_state->owner = NULL; | |
12020 | raw_spin_unlock_irq(&curr->pi_lock); | |
1a6e0f06 | 12021 | |
33c7bf0f JK |
12022 | - rt_mutex_unlock(&pi_state->pi_mutex); |
12023 | - | |
12024 | + get_pi_state(pi_state); | |
12025 | spin_unlock(&hb->lock); | |
1a6e0f06 | 12026 | |
33c7bf0f JK |
12027 | + rt_mutex_futex_unlock(&pi_state->pi_mutex); |
12028 | + put_pi_state(pi_state); | |
12029 | + | |
12030 | raw_spin_lock_irq(&curr->pi_lock); | |
12031 | } | |
12032 | raw_spin_unlock_irq(&curr->pi_lock); | |
12033 | @@ -971,6 +980,39 @@ void exit_pi_state_list(struct task_struct *curr) | |
12034 | * | |
12035 | * [10] There is no transient state which leaves owner and user space | |
12036 | * TID out of sync. | |
12037 | + * | |
12038 | + * | |
12039 | + * Serialization and lifetime rules: | |
12040 | + * | |
12041 | + * hb->lock: | |
12042 | + * | |
12043 | + * hb -> futex_q, relation | |
12044 | + * futex_q -> pi_state, relation | |
12045 | + * | |
12046 | + * (cannot be raw because hb can contain arbitrary amount | |
12047 | + * of futex_q's) | |
12048 | + * | |
12049 | + * pi_mutex->wait_lock: | |
12050 | + * | |
12051 | + * {uval, pi_state} | |
12052 | + * | |
12053 | + * (and pi_mutex 'obviously') | |
12054 | + * | |
12055 | + * p->pi_lock: | |
12056 | + * | |
12057 | + * p->pi_state_list -> pi_state->list, relation | |
12058 | + * | |
12059 | + * pi_state->refcount: | |
12060 | + * | |
12061 | + * pi_state lifetime | |
12062 | + * | |
12063 | + * | |
12064 | + * Lock order: | |
12065 | + * | |
12066 | + * hb->lock | |
12067 | + * pi_mutex->wait_lock | |
12068 | + * p->pi_lock | |
12069 | + * | |
12070 | */ | |
1a6e0f06 | 12071 | |
33c7bf0f | 12072 | /* |
7c18450a | 12073 | @@ -978,10 +1020,13 @@ void exit_pi_state_list(struct task_struct *curr) |
33c7bf0f JK |
12074 | * the pi_state against the user space value. If correct, attach to |
12075 | * it. | |
12076 | */ | |
12077 | -static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, | |
12078 | +static int attach_to_pi_state(u32 __user *uaddr, u32 uval, | |
12079 | + struct futex_pi_state *pi_state, | |
12080 | struct futex_pi_state **ps) | |
12081 | { | |
12082 | pid_t pid = uval & FUTEX_TID_MASK; | |
7c18450a JK |
12083 | + u32 uval2; |
12084 | + int ret; | |
1a6e0f06 JK |
12085 | |
12086 | /* | |
33c7bf0f | 12087 | * Userspace might have messed up non-PI and PI futexes [3] |
7c18450a | 12088 | @@ -989,9 +1034,39 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, |
33c7bf0f JK |
12089 | if (unlikely(!pi_state)) |
12090 | return -EINVAL; | |
1a6e0f06 | 12091 | |
1a6e0f06 | 12092 | + /* |
33c7bf0f JK |
12093 | + * We get here with hb->lock held, and having found a |
12094 | + * futex_top_waiter(). This means that futex_lock_pi() of said futex_q | |
12095 | + * has dropped the hb->lock in between queue_me() and unqueue_me_pi(), | |
12096 | + * which in turn means that futex_lock_pi() still has a reference on | |
12097 | + * our pi_state. | |
12098 | + * | |
12099 | + * The waiter holding a reference on @pi_state also protects against | |
12100 | + * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi() | |
12101 | + * and futex_wait_requeue_pi() as it cannot go to 0 and consequently | |
12102 | + * free pi_state before we can take a reference ourselves. | |
1a6e0f06 | 12103 | + */ |
33c7bf0f JK |
12104 | WARN_ON(!atomic_read(&pi_state->refcount)); |
12105 | ||
12106 | /* | |
12107 | + * Now that we have a pi_state, we can acquire wait_lock | |
12108 | + * and do the state validation. | |
12109 | + */ | |
12110 | + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
1a6e0f06 | 12111 | + |
33c7bf0f JK |
12112 | + /* |
12113 | + * Since {uval, pi_state} is serialized by wait_lock, and our current | |
12114 | + * uval was read without holding it, it can have changed. Verify it | |
12115 | + * still is what we expect it to be, otherwise retry the entire | |
12116 | + * operation. | |
12117 | + */ | |
12118 | + if (get_futex_value_locked(&uval2, uaddr)) | |
12119 | + goto out_efault; | |
1a6e0f06 | 12120 | + |
33c7bf0f JK |
12121 | + if (uval != uval2) |
12122 | + goto out_eagain; | |
1a6e0f06 | 12123 | + |
33c7bf0f JK |
12124 | + /* |
12125 | * Handle the owner died case: | |
12126 | */ | |
12127 | if (uval & FUTEX_OWNER_DIED) { | |
7c18450a | 12128 | @@ -1006,11 +1081,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, |
33c7bf0f JK |
12129 | * is not 0. Inconsistent state. [5] |
12130 | */ | |
12131 | if (pid) | |
12132 | - return -EINVAL; | |
12133 | + goto out_einval; | |
12134 | /* | |
12135 | * Take a ref on the state and return success. [4] | |
12136 | */ | |
12137 | - goto out_state; | |
12138 | + goto out_attach; | |
12139 | } | |
12140 | ||
12141 | /* | |
7c18450a | 12142 | @@ -1022,14 +1097,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, |
33c7bf0f JK |
12143 | * Take a ref on the state and return success. [6] |
12144 | */ | |
12145 | if (!pid) | |
12146 | - goto out_state; | |
12147 | + goto out_attach; | |
12148 | } else { | |
12149 | /* | |
12150 | * If the owner died bit is not set, then the pi_state | |
12151 | * must have an owner. [7] | |
12152 | */ | |
12153 | if (!pi_state->owner) | |
12154 | - return -EINVAL; | |
12155 | + goto out_einval; | |
12156 | } | |
1a6e0f06 JK |
12157 | |
12158 | /* | |
7c18450a | 12159 | @@ -1038,11 +1113,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, |
33c7bf0f JK |
12160 | * user space TID. [9/10] |
12161 | */ | |
12162 | if (pid != task_pid_vnr(pi_state->owner)) | |
12163 | - return -EINVAL; | |
12164 | -out_state: | |
12165 | - atomic_inc(&pi_state->refcount); | |
12166 | + goto out_einval; | |
12167 | + | |
12168 | +out_attach: | |
12169 | + get_pi_state(pi_state); | |
12170 | + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12171 | *ps = pi_state; | |
12172 | return 0; | |
12173 | + | |
12174 | +out_einval: | |
12175 | + ret = -EINVAL; | |
12176 | + goto out_error; | |
12177 | + | |
12178 | +out_eagain: | |
12179 | + ret = -EAGAIN; | |
12180 | + goto out_error; | |
12181 | + | |
12182 | +out_efault: | |
12183 | + ret = -EFAULT; | |
12184 | + goto out_error; | |
12185 | + | |
12186 | +out_error: | |
12187 | + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12188 | + return ret; | |
12189 | } | |
12190 | ||
12191 | /* | |
7c18450a | 12192 | @@ -1093,6 +1186,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, |
33c7bf0f JK |
12193 | |
12194 | /* | |
12195 | * No existing pi state. First waiter. [2] | |
12196 | + * | |
12197 | + * This creates pi_state, we have hb->lock held, this means nothing can | |
12198 | + * observe this state, wait_lock is irrelevant. | |
12199 | */ | |
12200 | pi_state = alloc_pi_state(); | |
12201 | ||
7c18450a | 12202 | @@ -1117,17 +1213,18 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key, |
33c7bf0f JK |
12203 | return 0; |
12204 | } | |
12205 | ||
12206 | -static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, | |
12207 | +static int lookup_pi_state(u32 __user *uaddr, u32 uval, | |
12208 | + struct futex_hash_bucket *hb, | |
12209 | union futex_key *key, struct futex_pi_state **ps) | |
12210 | { | |
12211 | - struct futex_q *match = futex_top_waiter(hb, key); | |
12212 | + struct futex_q *top_waiter = futex_top_waiter(hb, key); | |
12213 | ||
12214 | /* | |
12215 | * If there is a waiter on that futex, validate it and | |
12216 | * attach to the pi_state when the validation succeeds. | |
12217 | */ | |
12218 | - if (match) | |
12219 | - return attach_to_pi_state(uval, match->pi_state, ps); | |
12220 | + if (top_waiter) | |
12221 | + return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); | |
12222 | ||
12223 | /* | |
12224 | * We are the first waiter - try to look up the owner based on | |
7c18450a | 12225 | @@ -1146,7 +1243,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) |
33c7bf0f JK |
12226 | if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) |
12227 | return -EFAULT; | |
12228 | ||
12229 | - /*If user space value changed, let the caller retry */ | |
12230 | + /* If user space value changed, let the caller retry */ | |
12231 | return curval != uval ? -EAGAIN : 0; | |
12232 | } | |
12233 | ||
7c18450a | 12234 | @@ -1174,7 +1271,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, |
33c7bf0f JK |
12235 | struct task_struct *task, int set_waiters) |
12236 | { | |
12237 | u32 uval, newval, vpid = task_pid_vnr(task); | |
12238 | - struct futex_q *match; | |
12239 | + struct futex_q *top_waiter; | |
12240 | int ret; | |
12241 | ||
12242 | /* | |
7c18450a | 12243 | @@ -1200,9 +1297,9 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, |
33c7bf0f JK |
12244 | * Lookup existing state first. If it exists, try to attach to |
12245 | * its pi_state. | |
12246 | */ | |
12247 | - match = futex_top_waiter(hb, key); | |
12248 | - if (match) | |
12249 | - return attach_to_pi_state(uval, match->pi_state, ps); | |
12250 | + top_waiter = futex_top_waiter(hb, key); | |
12251 | + if (top_waiter) | |
12252 | + return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps); | |
12253 | ||
12254 | /* | |
12255 | * No waiter and user TID is 0. We are here because the | |
7c18450a JK |
12256 | @@ -1283,50 +1380,45 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q) |
12257 | wake_q_add(wake_q, p); | |
12258 | __unqueue_futex(q); | |
12259 | /* | |
12260 | - * The waiting task can free the futex_q as soon as | |
12261 | - * q->lock_ptr = NULL is written, without taking any locks. A | |
12262 | - * memory barrier is required here to prevent the following | |
12263 | - * store to lock_ptr from getting ahead of the plist_del. | |
12264 | + * The waiting task can free the futex_q as soon as q->lock_ptr = NULL | |
12265 | + * is written, without taking any locks. This is possible in the event | |
12266 | + * of a spurious wakeup, for example. A memory barrier is required here | |
12267 | + * to prevent the following store to lock_ptr from getting ahead of the | |
12268 | + * plist_del in __unqueue_futex(). | |
33c7bf0f JK |
12269 | */ |
12270 | - smp_wmb(); | |
12271 | - q->lock_ptr = NULL; | |
12272 | + smp_store_release(&q->lock_ptr, NULL); | |
12273 | } | |
12274 | ||
12275 | -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
12276 | - struct futex_hash_bucket *hb) | |
12277 | +/* | |
12278 | + * Caller must hold a reference on @pi_state. | |
12279 | + */ | |
12280 | +static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state) | |
12281 | { | |
12282 | - struct task_struct *new_owner; | |
12283 | - struct futex_pi_state *pi_state = this->pi_state; | |
12284 | u32 uninitialized_var(curval), newval; | |
12285 | + struct task_struct *new_owner; | |
7c18450a | 12286 | + bool postunlock = false; |
33c7bf0f JK |
12287 | WAKE_Q(wake_q); |
12288 | - bool deboost; | |
12289 | + WAKE_Q(wake_sleeper_q); | |
12290 | int ret = 0; | |
12291 | ||
12292 | - if (!pi_state) | |
12293 | - return -EINVAL; | |
12294 | - | |
12295 | - /* | |
12296 | - * If current does not own the pi_state then the futex is | |
12297 | - * inconsistent and user space fiddled with the futex value. | |
12298 | - */ | |
12299 | - if (pi_state->owner != current) | |
12300 | - return -EINVAL; | |
12301 | - | |
12302 | - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
12303 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | |
12304 | + if (WARN_ON_ONCE(!new_owner)) { | |
12305 | + /* | |
12306 | + * As per the comment in futex_unlock_pi() this should not happen. | |
12307 | + * | |
12308 | + * When this happens, give up our locks and try again, giving | |
12309 | + * the futex_lock_pi() instance time to complete, either by | |
12310 | + * waiting on the rtmutex or removing itself from the futex | |
12311 | + * queue. | |
12312 | + */ | |
12313 | + ret = -EAGAIN; | |
12314 | + goto out_unlock; | |
12315 | + } | |
12316 | ||
12317 | /* | |
12318 | - * It is possible that the next waiter (the one that brought | |
12319 | - * this owner to the kernel) timed out and is no longer | |
12320 | - * waiting on the lock. | |
12321 | - */ | |
12322 | - if (!new_owner) | |
12323 | - new_owner = this->task; | |
12324 | - | |
12325 | - /* | |
12326 | - * We pass it to the next owner. The WAITERS bit is always | |
12327 | - * kept enabled while there is PI state around. We cleanup the | |
12328 | - * owner died bit, because we are the owner. | |
12329 | + * We pass it to the next owner. The WAITERS bit is always kept | |
12330 | + * enabled while there is PI state around. We cleanup the owner | |
12331 | + * died bit, because we are the owner. | |
12332 | */ | |
12333 | newval = FUTEX_WAITERS | task_pid_vnr(new_owner); | |
12334 | ||
7c18450a | 12335 | @@ -1335,6 +1427,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, |
33c7bf0f JK |
12336 | |
12337 | if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { | |
12338 | ret = -EFAULT; | |
12339 | + | |
12340 | } else if (curval != uval) { | |
12341 | /* | |
12342 | * If a unconditional UNLOCK_PI operation (user space did not | |
7c18450a | 12343 | @@ -1347,10 +1440,14 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, |
33c7bf0f JK |
12344 | else |
12345 | ret = -EINVAL; | |
12346 | } | |
12347 | - if (ret) { | |
12348 | - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12349 | - return ret; | |
12350 | - } | |
12351 | + | |
12352 | + if (ret) | |
12353 | + goto out_unlock; | |
7c18450a JK |
12354 | + |
12355 | + /* | |
12356 | + * This is a point of no return; once we modify the uval there is no | |
12357 | + * going back and subsequent operations must not fail. | |
12358 | + */ | |
33c7bf0f JK |
12359 | |
12360 | raw_spin_lock(&pi_state->owner->pi_lock); | |
12361 | WARN_ON(list_empty(&pi_state->list)); | |
7c18450a | 12362 | @@ -1363,22 +1460,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, |
33c7bf0f JK |
12363 | pi_state->owner = new_owner; |
12364 | raw_spin_unlock(&new_owner->pi_lock); | |
12365 | ||
7c18450a JK |
12366 | + postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, |
12367 | + &wake_sleeper_q); | |
33c7bf0f JK |
12368 | +out_unlock: |
12369 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12370 | ||
12371 | - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); | |
7c18450a JK |
12372 | + if (postunlock) |
12373 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
12374 | ||
33c7bf0f JK |
12375 | - /* |
12376 | - * First unlock HB so the waiter does not spin on it once he got woken | |
12377 | - * up. Second wake up the waiter before the priority is adjusted. If we | |
12378 | - * deboost first (and lose our higher priority), then the task might get | |
12379 | - * scheduled away before the wake up can take place. | |
12380 | - */ | |
12381 | - spin_unlock(&hb->lock); | |
12382 | - wake_up_q(&wake_q); | |
12383 | - if (deboost) | |
7c18450a JK |
12384 | - rt_mutex_adjust_prio(current); |
12385 | - | |
33c7bf0f JK |
12386 | - return 0; |
12387 | + return ret; | |
12388 | } | |
12389 | ||
12390 | /* | |
12391 | @@ -1824,7 +1914,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |
12392 | * If that call succeeds then we have pi_state and an | |
12393 | * initial refcount on it. | |
12394 | */ | |
12395 | - ret = lookup_pi_state(ret, hb2, &key2, &pi_state); | |
12396 | + ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); | |
12397 | } | |
12398 | ||
12399 | switch (ret) { | |
12400 | @@ -1907,7 +1997,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |
12401 | * refcount on the pi_state and store the pointer in | |
12402 | * the futex_q object of the waiter. | |
12403 | */ | |
12404 | - atomic_inc(&pi_state->refcount); | |
12405 | + get_pi_state(pi_state); | |
12406 | this->pi_state = pi_state; | |
12407 | ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, | |
12408 | this->rt_waiter, | |
12409 | @@ -1924,6 +2014,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |
12410 | requeue_pi_wake_futex(this, &key2, hb2); | |
12411 | drop_count++; | |
12412 | continue; | |
12413 | + } else if (ret == -EAGAIN) { | |
12414 | + /* | |
12415 | + * Waiter was woken by timeout or | |
12416 | + * signal and has set pi_blocked_on to | |
12417 | + * PI_WAKEUP_INPROGRESS before we | |
12418 | + * tried to enqueue it on the rtmutex. | |
12419 | + */ | |
12420 | + this->pi_state = NULL; | |
12421 | + put_pi_state(pi_state); | |
12422 | + continue; | |
12423 | } else if (ret) { | |
12424 | /* | |
12425 | * rt_mutex_start_proxy_lock() detected a | |
12426 | @@ -2007,20 +2107,7 @@ queue_unlock(struct futex_hash_bucket *hb) | |
12427 | hb_waiters_dec(hb); | |
12428 | } | |
12429 | ||
12430 | -/** | |
12431 | - * queue_me() - Enqueue the futex_q on the futex_hash_bucket | |
12432 | - * @q: The futex_q to enqueue | |
12433 | - * @hb: The destination hash bucket | |
12434 | - * | |
12435 | - * The hb->lock must be held by the caller, and is released here. A call to | |
12436 | - * queue_me() is typically paired with exactly one call to unqueue_me(). The | |
12437 | - * exceptions involve the PI related operations, which may use unqueue_me_pi() | |
12438 | - * or nothing if the unqueue is done as part of the wake process and the unqueue | |
12439 | - * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | |
12440 | - * an example). | |
12441 | - */ | |
12442 | -static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |
12443 | - __releases(&hb->lock) | |
12444 | +static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |
12445 | { | |
12446 | int prio; | |
12447 | ||
12448 | @@ -2037,6 +2124,24 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |
12449 | plist_node_init(&q->list, prio); | |
12450 | plist_add(&q->list, &hb->chain); | |
12451 | q->task = current; | |
12452 | +} | |
12453 | + | |
12454 | +/** | |
12455 | + * queue_me() - Enqueue the futex_q on the futex_hash_bucket | |
12456 | + * @q: The futex_q to enqueue | |
12457 | + * @hb: The destination hash bucket | |
12458 | + * | |
12459 | + * The hb->lock must be held by the caller, and is released here. A call to | |
12460 | + * queue_me() is typically paired with exactly one call to unqueue_me(). The | |
12461 | + * exceptions involve the PI related operations, which may use unqueue_me_pi() | |
12462 | + * or nothing if the unqueue is done as part of the wake process and the unqueue | |
12463 | + * state is implicit in the state of woken task (see futex_wait_requeue_pi() for | |
12464 | + * an example). | |
12465 | + */ | |
12466 | +static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb) | |
12467 | + __releases(&hb->lock) | |
12468 | +{ | |
12469 | + __queue_me(q, hb); | |
12470 | spin_unlock(&hb->lock); | |
12471 | } | |
12472 | ||
12473 | @@ -2123,10 +2228,13 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |
12474 | { | |
12475 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | |
12476 | struct futex_pi_state *pi_state = q->pi_state; | |
12477 | - struct task_struct *oldowner = pi_state->owner; | |
12478 | u32 uval, uninitialized_var(curval), newval; | |
12479 | + struct task_struct *oldowner; | |
12480 | int ret; | |
12481 | ||
12482 | + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
12483 | + | |
12484 | + oldowner = pi_state->owner; | |
12485 | /* Owner died? */ | |
12486 | if (!pi_state->owner) | |
12487 | newtid |= FUTEX_OWNER_DIED; | |
12488 | @@ -2134,7 +2242,8 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |
12489 | /* | |
12490 | * We are here either because we stole the rtmutex from the | |
12491 | * previous highest priority waiter or we are the highest priority | |
12492 | - * waiter but failed to get the rtmutex the first time. | |
12493 | + * waiter but have failed to get the rtmutex the first time. | |
12494 | + * | |
12495 | * We have to replace the newowner TID in the user space variable. | |
12496 | * This must be atomic as we have to preserve the owner died bit here. | |
12497 | * | |
12498 | @@ -2142,17 +2251,16 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |
12499 | * because we can fault here. Imagine swapped out pages or a fork | |
12500 | * that marked all the anonymous memory readonly for cow. | |
12501 | * | |
12502 | - * Modifying pi_state _before_ the user space value would | |
12503 | - * leave the pi_state in an inconsistent state when we fault | |
12504 | - * here, because we need to drop the hash bucket lock to | |
12505 | - * handle the fault. This might be observed in the PID check | |
12506 | - * in lookup_pi_state. | |
12507 | + * Modifying pi_state _before_ the user space value would leave the | |
12508 | + * pi_state in an inconsistent state when we fault here, because we | |
12509 | + * need to drop the locks to handle the fault. This might be observed | |
12510 | + * in the PID check in lookup_pi_state. | |
12511 | */ | |
12512 | retry: | |
12513 | if (get_futex_value_locked(&uval, uaddr)) | |
12514 | goto handle_fault; | |
12515 | ||
12516 | - while (1) { | |
12517 | + for (;;) { | |
12518 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | |
12519 | ||
12520 | if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) | |
12521 | @@ -2167,47 +2275,60 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |
12522 | * itself. | |
12523 | */ | |
12524 | if (pi_state->owner != NULL) { | |
12525 | - raw_spin_lock_irq(&pi_state->owner->pi_lock); | |
12526 | + raw_spin_lock(&pi_state->owner->pi_lock); | |
12527 | WARN_ON(list_empty(&pi_state->list)); | |
12528 | list_del_init(&pi_state->list); | |
12529 | - raw_spin_unlock_irq(&pi_state->owner->pi_lock); | |
12530 | + raw_spin_unlock(&pi_state->owner->pi_lock); | |
12531 | } | |
12532 | ||
12533 | pi_state->owner = newowner; | |
12534 | ||
12535 | - raw_spin_lock_irq(&newowner->pi_lock); | |
12536 | + raw_spin_lock(&newowner->pi_lock); | |
12537 | WARN_ON(!list_empty(&pi_state->list)); | |
12538 | list_add(&pi_state->list, &newowner->pi_state_list); | |
12539 | - raw_spin_unlock_irq(&newowner->pi_lock); | |
12540 | + raw_spin_unlock(&newowner->pi_lock); | |
12541 | + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12542 | + | |
12543 | return 0; | |
12544 | ||
12545 | /* | |
12546 | - * To handle the page fault we need to drop the hash bucket | |
12547 | - * lock here. That gives the other task (either the highest priority | |
12548 | - * waiter itself or the task which stole the rtmutex) the | |
12549 | - * chance to try the fixup of the pi_state. So once we are | |
12550 | - * back from handling the fault we need to check the pi_state | |
12551 | - * after reacquiring the hash bucket lock and before trying to | |
12552 | - * do another fixup. When the fixup has been done already we | |
12553 | - * simply return. | |
12554 | + * To handle the page fault we need to drop the locks here. That gives | |
12555 | + * the other task (either the highest priority waiter itself or the | |
12556 | + * task which stole the rtmutex) the chance to try the fixup of the | |
12557 | + * pi_state. So once we are back from handling the fault we need to | |
12558 | + * check the pi_state after reacquiring the locks and before trying to | |
12559 | + * do another fixup. When the fixup has been done already we simply | |
12560 | + * return. | |
12561 | + * | |
12562 | + * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely | |
12563 | + * drop hb->lock since the caller owns the hb -> futex_q relation. | |
12564 | + * Dropping the pi_mutex->wait_lock requires the state revalidate. | |
12565 | */ | |
12566 | handle_fault: | |
12567 | + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12568 | spin_unlock(q->lock_ptr); | |
12569 | ||
12570 | ret = fault_in_user_writeable(uaddr); | |
12571 | ||
12572 | spin_lock(q->lock_ptr); | |
12573 | + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
12574 | ||
12575 | /* | |
12576 | * Check if someone else fixed it for us: | |
12577 | */ | |
12578 | - if (pi_state->owner != oldowner) | |
12579 | - return 0; | |
12580 | + if (pi_state->owner != oldowner) { | |
12581 | + ret = 0; | |
12582 | + goto out_unlock; | |
12583 | + } | |
12584 | ||
12585 | if (ret) | |
12586 | - return ret; | |
12587 | + goto out_unlock; | |
12588 | ||
12589 | goto retry; | |
12590 | + | |
12591 | +out_unlock: | |
12592 | + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12593 | + return ret; | |
12594 | } | |
12595 | ||
12596 | static long futex_wait_restart(struct restart_block *restart); | |
12597 | @@ -2229,13 +2350,16 @@ static long futex_wait_restart(struct restart_block *restart); | |
12598 | */ | |
12599 | static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) | |
12600 | { | |
12601 | - struct task_struct *owner; | |
12602 | int ret = 0; | |
12603 | ||
12604 | if (locked) { | |
12605 | /* | |
12606 | * Got the lock. We might not be the anticipated owner if we | |
12607 | * did a lock-steal - fix up the PI-state in that case: | |
12608 | + * | |
12609 | + * We can safely read pi_state->owner without holding wait_lock | |
12610 | + * because we now own the rt_mutex, only the owner will attempt | |
12611 | + * to change it. | |
12612 | */ | |
12613 | if (q->pi_state->owner != current) | |
12614 | ret = fixup_pi_state_owner(uaddr, q, current); | |
12615 | @@ -2243,43 +2367,15 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) | |
12616 | } | |
12617 | ||
12618 | /* | |
12619 | - * Catch the rare case, where the lock was released when we were on the | |
12620 | - * way back before we locked the hash bucket. | |
12621 | - */ | |
12622 | - if (q->pi_state->owner == current) { | |
12623 | - /* | |
12624 | - * Try to get the rt_mutex now. This might fail as some other | |
12625 | - * task acquired the rt_mutex after we removed ourself from the | |
12626 | - * rt_mutex waiters list. | |
12627 | - */ | |
12628 | - if (rt_mutex_trylock(&q->pi_state->pi_mutex)) { | |
12629 | - locked = 1; | |
12630 | - goto out; | |
12631 | - } | |
12632 | - | |
12633 | - /* | |
12634 | - * pi_state is incorrect, some other task did a lock steal and | |
12635 | - * we returned due to timeout or signal without taking the | |
12636 | - * rt_mutex. Too late. | |
12637 | - */ | |
12638 | - raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); | |
12639 | - owner = rt_mutex_owner(&q->pi_state->pi_mutex); | |
12640 | - if (!owner) | |
12641 | - owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); | |
12642 | - raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); | |
12643 | - ret = fixup_pi_state_owner(uaddr, q, owner); | |
12644 | - goto out; | |
12645 | - } | |
12646 | - | |
12647 | - /* | |
12648 | * Paranoia check. If we did not take the lock, then we should not be | |
12649 | * the owner of the rt_mutex. | |
12650 | */ | |
12651 | - if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) | |
12652 | + if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) { | |
12653 | printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p " | |
12654 | "pi-state %p\n", ret, | |
12655 | q->pi_state->pi_mutex.owner, | |
12656 | q->pi_state->owner); | |
12657 | + } | |
12658 | ||
12659 | out: | |
12660 | return ret ? ret : locked; | |
12661 | @@ -2503,6 +2599,8 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, | |
12662 | ktime_t *time, int trylock) | |
12663 | { | |
12664 | struct hrtimer_sleeper timeout, *to = NULL; | |
12665 | + struct futex_pi_state *pi_state = NULL; | |
12666 | + struct rt_mutex_waiter rt_waiter; | |
12667 | struct futex_hash_bucket *hb; | |
12668 | struct futex_q q = futex_q_init; | |
12669 | int res, ret; | |
12670 | @@ -2555,25 +2653,77 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, | |
12671 | } | |
12672 | } | |
12673 | ||
12674 | + WARN_ON(!q.pi_state); | |
12675 | + | |
12676 | /* | |
12677 | * Only actually queue now that the atomic ops are done: | |
12678 | */ | |
12679 | - queue_me(&q, hb); | |
12680 | + __queue_me(&q, hb); | |
12681 | ||
12682 | - WARN_ON(!q.pi_state); | |
12683 | - /* | |
12684 | - * Block on the PI mutex: | |
12685 | - */ | |
12686 | - if (!trylock) { | |
12687 | - ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); | |
12688 | - } else { | |
12689 | - ret = rt_mutex_trylock(&q.pi_state->pi_mutex); | |
12690 | + if (trylock) { | |
12691 | + ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); | |
12692 | /* Fixup the trylock return value: */ | |
12693 | ret = ret ? 0 : -EWOULDBLOCK; | |
12694 | + goto no_block; | |
12695 | } | |
12696 | ||
12697 | + rt_mutex_init_waiter(&rt_waiter, false); | |
12698 | + | |
12699 | + /* | |
12700 | + * On PREEMPT_RT_FULL, when hb->lock becomes an rt_mutex, we must not | |
12701 | + * hold it while doing rt_mutex_start_proxy(), because then it will | |
12702 | + * include hb->lock in the blocking chain, even through we'll not in | |
12703 | + * fact hold it while blocking. This will lead it to report -EDEADLK | |
12704 | + * and BUG when futex_unlock_pi() interleaves with this. | |
12705 | + * | |
12706 | + * Therefore acquire wait_lock while holding hb->lock, but drop the | |
12707 | + * latter before calling rt_mutex_start_proxy_lock(). This still fully | |
12708 | + * serializes against futex_unlock_pi() as that does the exact same | |
12709 | + * lock handoff sequence. | |
12710 | + */ | |
12711 | + raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock); | |
12712 | + /* | |
12713 | + * the migrate_disable() here disables migration in the in_atomic() fast | |
12714 | + * path which is enabled again in the following spin_unlock(). We have | |
12715 | + * one migrate_disable() pending in the slow-path which is reversed | |
12716 | + * after the raw_spin_unlock_irq() where we leave the atomic context. | |
12717 | + */ | |
12718 | + migrate_disable(); | |
12719 | + | |
12720 | + spin_unlock(q.lock_ptr); | |
12721 | + ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); | |
12722 | + raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); | |
12723 | + migrate_enable(); | |
12724 | + | |
12725 | + if (ret) { | |
12726 | + if (ret == 1) | |
12727 | + ret = 0; | |
12728 | + | |
12729 | + spin_lock(q.lock_ptr); | |
12730 | + goto no_block; | |
12731 | + } | |
12732 | + | |
12733 | + | |
12734 | + if (unlikely(to)) | |
12735 | + hrtimer_start_expires(&to->timer, HRTIMER_MODE_ABS); | |
12736 | + | |
12737 | + ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter); | |
12738 | + | |
12739 | spin_lock(q.lock_ptr); | |
12740 | /* | |
12741 | + * If we failed to acquire the lock (signal/timeout), we must | |
12742 | + * first acquire the hb->lock before removing the lock from the | |
12743 | + * rt_mutex waitqueue, such that we can keep the hb and rt_mutex | |
12744 | + * wait lists consistent. | |
12745 | + * | |
12746 | + * In particular; it is important that futex_unlock_pi() can not | |
12747 | + * observe this inconsistency. | |
12748 | + */ | |
12749 | + if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) | |
12750 | + ret = 0; | |
12751 | + | |
12752 | +no_block: | |
12753 | + /* | |
12754 | * Fixup the pi_state owner and possibly acquire the lock if we | |
12755 | * haven't already. | |
12756 | */ | |
12757 | @@ -2589,12 +2739,19 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, | |
12758 | * If fixup_owner() faulted and was unable to handle the fault, unlock | |
12759 | * it and return the fault to userspace. | |
12760 | */ | |
12761 | - if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) | |
12762 | - rt_mutex_unlock(&q.pi_state->pi_mutex); | |
12763 | + if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) { | |
12764 | + pi_state = q.pi_state; | |
12765 | + get_pi_state(pi_state); | |
12766 | + } | |
12767 | ||
12768 | /* Unqueue and drop the lock */ | |
12769 | unqueue_me_pi(&q); | |
12770 | ||
12771 | + if (pi_state) { | |
12772 | + rt_mutex_futex_unlock(&pi_state->pi_mutex); | |
12773 | + put_pi_state(pi_state); | |
12774 | + } | |
12775 | + | |
12776 | goto out_put_key; | |
12777 | ||
12778 | out_unlock_put_key: | |
7c18450a JK |
12779 | @@ -2603,8 +2760,10 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, |
12780 | out_put_key: | |
12781 | put_futex_key(&q.key); | |
12782 | out: | |
12783 | - if (to) | |
12784 | + if (to) { | |
12785 | + hrtimer_cancel(&to->timer); | |
12786 | destroy_hrtimer_on_stack(&to->timer); | |
12787 | + } | |
12788 | return ret != -EINTR ? ret : -ERESTARTNOINTR; | |
12789 | ||
12790 | uaddr_faulted: | |
12791 | @@ -2631,7 +2790,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) | |
33c7bf0f JK |
12792 | u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current); |
12793 | union futex_key key = FUTEX_KEY_INIT; | |
12794 | struct futex_hash_bucket *hb; | |
12795 | - struct futex_q *match; | |
12796 | + struct futex_q *top_waiter; | |
12797 | int ret; | |
12798 | ||
12799 | retry: | |
7c18450a | 12800 | @@ -2655,12 +2814,48 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) |
33c7bf0f JK |
12801 | * all and we at least want to know if user space fiddled |
12802 | * with the futex value instead of blindly unlocking. | |
12803 | */ | |
12804 | - match = futex_top_waiter(hb, &key); | |
12805 | - if (match) { | |
12806 | - ret = wake_futex_pi(uaddr, uval, match, hb); | |
12807 | + top_waiter = futex_top_waiter(hb, &key); | |
12808 | + if (top_waiter) { | |
12809 | + struct futex_pi_state *pi_state = top_waiter->pi_state; | |
12810 | + | |
12811 | + ret = -EINVAL; | |
12812 | + if (!pi_state) | |
12813 | + goto out_unlock; | |
12814 | + | |
12815 | /* | |
12816 | - * In case of success wake_futex_pi dropped the hash | |
12817 | - * bucket lock. | |
12818 | + * If current does not own the pi_state then the futex is | |
12819 | + * inconsistent and user space fiddled with the futex value. | |
12820 | + */ | |
12821 | + if (pi_state->owner != current) | |
12822 | + goto out_unlock; | |
12823 | + | |
12824 | + get_pi_state(pi_state); | |
12825 | + /* | |
12826 | + * By taking wait_lock while still holding hb->lock, we ensure | |
12827 | + * there is no point where we hold neither; and therefore | |
12828 | + * wake_futex_pi() must observe a state consistent with what we | |
12829 | + * observed. | |
12830 | + */ | |
12831 | + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
12832 | + /* | |
12833 | + * Magic trickery for now to make the RT migrate disable | |
12834 | + * logic happy. The following spin_unlock() happens with | |
12835 | + * interrupts disabled so the internal migrate_enable() | |
12836 | + * won't undo the migrate_disable() which was issued when | |
12837 | + * locking hb->lock. | |
12838 | + */ | |
12839 | + migrate_disable(); | |
12840 | + spin_unlock(&hb->lock); | |
12841 | + | |
12842 | + /* Drops pi_state->pi_mutex.wait_lock */ | |
12843 | + ret = wake_futex_pi(uaddr, uval, pi_state); | |
12844 | + | |
12845 | + migrate_enable(); | |
12846 | + | |
12847 | + put_pi_state(pi_state); | |
12848 | + | |
12849 | + /* | |
12850 | + * Success, we're done! No tricky corner cases. | |
12851 | */ | |
12852 | if (!ret) | |
12853 | goto out_putkey; | |
7c18450a | 12854 | @@ -2675,7 +2870,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) |
33c7bf0f JK |
12855 | * setting the FUTEX_WAITERS bit. Try again. |
12856 | */ | |
12857 | if (ret == -EAGAIN) { | |
12858 | - spin_unlock(&hb->lock); | |
12859 | put_futex_key(&key); | |
12860 | goto retry; | |
12861 | } | |
7c18450a | 12862 | @@ -2683,7 +2877,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) |
33c7bf0f JK |
12863 | * wake_futex_pi has detected invalid state. Tell user |
12864 | * space. | |
12865 | */ | |
12866 | - goto out_unlock; | |
12867 | + goto out_putkey; | |
12868 | } | |
12869 | ||
12870 | /* | |
7c18450a | 12871 | @@ -2693,8 +2887,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) |
33c7bf0f JK |
12872 | * preserve the WAITERS bit not the OWNER_DIED one. We are the |
12873 | * owner. | |
12874 | */ | |
12875 | - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) | |
12876 | + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) { | |
12877 | + spin_unlock(&hb->lock); | |
12878 | goto pi_faulted; | |
12879 | + } | |
12880 | ||
12881 | /* | |
12882 | * If uval has changed, let user space handle it. | |
7c18450a | 12883 | @@ -2708,7 +2904,6 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) |
33c7bf0f JK |
12884 | return ret; |
12885 | ||
12886 | pi_faulted: | |
12887 | - spin_unlock(&hb->lock); | |
12888 | put_futex_key(&key); | |
12889 | ||
12890 | ret = fault_in_user_writeable(uaddr); | |
7c18450a | 12891 | @@ -2812,8 +3007,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
33c7bf0f JK |
12892 | u32 __user *uaddr2) |
12893 | { | |
12894 | struct hrtimer_sleeper timeout, *to = NULL; | |
12895 | + struct futex_pi_state *pi_state = NULL; | |
12896 | struct rt_mutex_waiter rt_waiter; | |
12897 | - struct futex_hash_bucket *hb; | |
12898 | + struct futex_hash_bucket *hb, *hb2; | |
12899 | union futex_key key2 = FUTEX_KEY_INIT; | |
12900 | struct futex_q q = futex_q_init; | |
12901 | int res, ret; | |
7c18450a | 12902 | @@ -2838,10 +3034,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
33c7bf0f JK |
12903 | * The waiter is allocated on our stack, manipulated by the requeue |
12904 | * code while we sleep on uaddr. | |
12905 | */ | |
12906 | - debug_rt_mutex_init_waiter(&rt_waiter); | |
12907 | - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); | |
12908 | - RB_CLEAR_NODE(&rt_waiter.tree_entry); | |
12909 | - rt_waiter.task = NULL; | |
12910 | + rt_mutex_init_waiter(&rt_waiter, false); | |
12911 | ||
12912 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); | |
12913 | if (unlikely(ret != 0)) | |
7c18450a | 12914 | @@ -2872,20 +3065,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
33c7bf0f JK |
12915 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ |
12916 | futex_wait_queue_me(hb, &q, to); | |
12917 | ||
12918 | - spin_lock(&hb->lock); | |
12919 | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
12920 | - spin_unlock(&hb->lock); | |
12921 | - if (ret) | |
12922 | - goto out_put_keys; | |
12923 | + /* | |
12924 | + * On RT we must avoid races with requeue and trying to block | |
12925 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by | |
12926 | + * serializing access to pi_blocked_on with pi_lock. | |
12927 | + */ | |
12928 | + raw_spin_lock_irq(¤t->pi_lock); | |
12929 | + if (current->pi_blocked_on) { | |
12930 | + /* | |
12931 | + * We have been requeued or are in the process of | |
12932 | + * being requeued. | |
12933 | + */ | |
12934 | + raw_spin_unlock_irq(¤t->pi_lock); | |
12935 | + } else { | |
12936 | + /* | |
12937 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS | |
12938 | + * prevents a concurrent requeue from moving us to the | |
12939 | + * uaddr2 rtmutex. After that we can safely acquire | |
12940 | + * (and possibly block on) hb->lock. | |
12941 | + */ | |
12942 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; | |
12943 | + raw_spin_unlock_irq(¤t->pi_lock); | |
12944 | + | |
12945 | + spin_lock(&hb->lock); | |
12946 | + | |
12947 | + /* | |
12948 | + * Clean up pi_blocked_on. We might leak it otherwise | |
12949 | + * when we succeeded with the hb->lock in the fast | |
12950 | + * path. | |
12951 | + */ | |
12952 | + raw_spin_lock_irq(¤t->pi_lock); | |
12953 | + current->pi_blocked_on = NULL; | |
12954 | + raw_spin_unlock_irq(¤t->pi_lock); | |
12955 | + | |
12956 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
12957 | + spin_unlock(&hb->lock); | |
12958 | + if (ret) | |
12959 | + goto out_put_keys; | |
12960 | + } | |
12961 | ||
12962 | /* | |
12963 | - * In order for us to be here, we know our q.key == key2, and since | |
12964 | - * we took the hb->lock above, we also know that futex_requeue() has | |
12965 | - * completed and we no longer have to concern ourselves with a wakeup | |
12966 | - * race with the atomic proxy lock acquisition by the requeue code. The | |
12967 | - * futex_requeue dropped our key1 reference and incremented our key2 | |
12968 | - * reference count. | |
1a6e0f06 JK |
12969 | + * In order to be here, we have either been requeued, are in |
12970 | + * the process of being requeued, or requeue successfully | |
12971 | + * acquired uaddr2 on our behalf. If pi_blocked_on was | |
12972 | + * non-null above, we may be racing with a requeue. Do not | |
12973 | + * rely on q->lock_ptr to be hb2->lock until after blocking on | |
12974 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 | |
12975 | + * reference and incremented our key2 reference count. | |
12976 | */ | |
12977 | + hb2 = hash_futex(&key2); | |
12978 | ||
12979 | /* Check if the requeue code acquired the second futex for us. */ | |
12980 | if (!q.rt_waiter) { | |
7c18450a | 12981 | @@ -2894,16 +3122,19 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
1a6e0f06 JK |
12982 | * did a lock-steal - fix up the PI-state in that case. |
12983 | */ | |
12984 | if (q.pi_state && (q.pi_state->owner != current)) { | |
12985 | - spin_lock(q.lock_ptr); | |
12986 | + spin_lock(&hb2->lock); | |
12987 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
12988 | ret = fixup_pi_state_owner(uaddr2, &q, current); | |
33c7bf0f JK |
12989 | - if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) |
12990 | - rt_mutex_unlock(&q.pi_state->pi_mutex); | |
12991 | + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { | |
12992 | + pi_state = q.pi_state; | |
12993 | + get_pi_state(pi_state); | |
12994 | + } | |
1a6e0f06 JK |
12995 | /* |
12996 | * Drop the reference to the pi state which | |
12997 | * the requeue_pi() code acquired for us. | |
12998 | */ | |
12999 | put_pi_state(q.pi_state); | |
13000 | - spin_unlock(q.lock_ptr); | |
13001 | + spin_unlock(&hb2->lock); | |
13002 | } | |
13003 | } else { | |
33c7bf0f | 13004 | struct rt_mutex *pi_mutex; |
7c18450a | 13005 | @@ -2915,10 +3146,14 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
33c7bf0f JK |
13006 | */ |
13007 | WARN_ON(!q.pi_state); | |
13008 | pi_mutex = &q.pi_state->pi_mutex; | |
13009 | - ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); | |
13010 | - debug_rt_mutex_free_waiter(&rt_waiter); | |
13011 | + ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter); | |
1a6e0f06 JK |
13012 | |
13013 | - spin_lock(q.lock_ptr); | |
13014 | + spin_lock(&hb2->lock); | |
13015 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
33c7bf0f JK |
13016 | + if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter)) |
13017 | + ret = 0; | |
13018 | + | |
13019 | + debug_rt_mutex_free_waiter(&rt_waiter); | |
1a6e0f06 JK |
13020 | /* |
13021 | * Fixup the pi_state owner and possibly acquire the lock if we | |
13022 | * haven't already. | |
7c18450a | 13023 | @@ -2936,13 +3171,20 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, |
33c7bf0f JK |
13024 | * the fault, unlock the rt_mutex and return the fault to |
13025 | * userspace. | |
13026 | */ | |
13027 | - if (ret && rt_mutex_owner(pi_mutex) == current) | |
13028 | - rt_mutex_unlock(pi_mutex); | |
13029 | + if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) { | |
13030 | + pi_state = q.pi_state; | |
13031 | + get_pi_state(pi_state); | |
13032 | + } | |
13033 | ||
13034 | /* Unqueue and drop the lock. */ | |
13035 | unqueue_me_pi(&q); | |
13036 | } | |
13037 | ||
13038 | + if (pi_state) { | |
13039 | + rt_mutex_futex_unlock(&pi_state->pi_mutex); | |
13040 | + put_pi_state(pi_state); | |
13041 | + } | |
13042 | + | |
13043 | if (ret == -EINTR) { | |
13044 | /* | |
13045 | * We've already been requeued, but cannot restart by calling | |
1a6e0f06 JK |
13046 | diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c |
13047 | index d3f24905852c..f87aa8fdcc51 100644 | |
13048 | --- a/kernel/irq/handle.c | |
13049 | +++ b/kernel/irq/handle.c | |
13050 | @@ -181,10 +181,16 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |
13051 | { | |
13052 | irqreturn_t retval; | |
13053 | unsigned int flags = 0; | |
13054 | + struct pt_regs *regs = get_irq_regs(); | |
13055 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
13056 | ||
13057 | retval = __handle_irq_event_percpu(desc, &flags); | |
13058 | ||
13059 | - add_interrupt_randomness(desc->irq_data.irq, flags); | |
13060 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13061 | + desc->random_ip = ip; | |
13062 | +#else | |
13063 | + add_interrupt_randomness(desc->irq_data.irq, flags, ip); | |
13064 | +#endif | |
13065 | ||
13066 | if (!noirqdebug) | |
13067 | note_interrupt(desc, retval); | |
13068 | diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c | |
c7c16703 | 13069 | index 6b669593e7eb..e357bf6c59d5 100644 |
1a6e0f06 JK |
13070 | --- a/kernel/irq/manage.c |
13071 | +++ b/kernel/irq/manage.c | |
13072 | @@ -22,6 +22,7 @@ | |
13073 | #include "internals.h" | |
13074 | ||
13075 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
13076 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
13077 | __read_mostly bool force_irqthreads; | |
13078 | ||
13079 | static int __init setup_forced_irqthreads(char *arg) | |
13080 | @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg) | |
13081 | return 0; | |
13082 | } | |
13083 | early_param("threadirqs", setup_forced_irqthreads); | |
13084 | +# endif | |
13085 | #endif | |
13086 | ||
13087 | static void __synchronize_hardirq(struct irq_desc *desc) | |
13088 | @@ -233,7 +235,12 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, | |
13089 | ||
13090 | if (desc->affinity_notify) { | |
13091 | kref_get(&desc->affinity_notify->kref); | |
13092 | + | |
13093 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13094 | + swork_queue(&desc->affinity_notify->swork); | |
13095 | +#else | |
13096 | schedule_work(&desc->affinity_notify->work); | |
13097 | +#endif | |
13098 | } | |
13099 | irqd_set(data, IRQD_AFFINITY_SET); | |
13100 | ||
13101 | @@ -271,10 +278,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | |
13102 | } | |
13103 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | |
13104 | ||
13105 | -static void irq_affinity_notify(struct work_struct *work) | |
13106 | +static void _irq_affinity_notify(struct irq_affinity_notify *notify) | |
13107 | { | |
13108 | - struct irq_affinity_notify *notify = | |
13109 | - container_of(work, struct irq_affinity_notify, work); | |
13110 | struct irq_desc *desc = irq_to_desc(notify->irq); | |
13111 | cpumask_var_t cpumask; | |
13112 | unsigned long flags; | |
13113 | @@ -296,6 +301,35 @@ static void irq_affinity_notify(struct work_struct *work) | |
13114 | kref_put(¬ify->kref, notify->release); | |
13115 | } | |
13116 | ||
13117 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13118 | +static void init_helper_thread(void) | |
13119 | +{ | |
13120 | + static int init_sworker_once; | |
13121 | + | |
13122 | + if (init_sworker_once) | |
13123 | + return; | |
13124 | + if (WARN_ON(swork_get())) | |
13125 | + return; | |
13126 | + init_sworker_once = 1; | |
13127 | +} | |
13128 | + | |
13129 | +static void irq_affinity_notify(struct swork_event *swork) | |
13130 | +{ | |
13131 | + struct irq_affinity_notify *notify = | |
13132 | + container_of(swork, struct irq_affinity_notify, swork); | |
13133 | + _irq_affinity_notify(notify); | |
13134 | +} | |
13135 | + | |
13136 | +#else | |
13137 | + | |
13138 | +static void irq_affinity_notify(struct work_struct *work) | |
13139 | +{ | |
13140 | + struct irq_affinity_notify *notify = | |
13141 | + container_of(work, struct irq_affinity_notify, work); | |
13142 | + _irq_affinity_notify(notify); | |
13143 | +} | |
13144 | +#endif | |
13145 | + | |
13146 | /** | |
13147 | * irq_set_affinity_notifier - control notification of IRQ affinity changes | |
13148 | * @irq: Interrupt for which to enable/disable notification | |
13149 | @@ -324,7 +358,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) | |
13150 | if (notify) { | |
13151 | notify->irq = irq; | |
13152 | kref_init(¬ify->kref); | |
13153 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13154 | + INIT_SWORK(¬ify->swork, irq_affinity_notify); | |
13155 | + init_helper_thread(); | |
13156 | +#else | |
13157 | INIT_WORK(¬ify->work, irq_affinity_notify); | |
13158 | +#endif | |
13159 | } | |
13160 | ||
13161 | raw_spin_lock_irqsave(&desc->lock, flags); | |
13162 | @@ -879,7 +918,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | |
13163 | local_bh_disable(); | |
13164 | ret = action->thread_fn(action->irq, action->dev_id); | |
13165 | irq_finalize_oneshot(desc, action); | |
13166 | - local_bh_enable(); | |
13167 | + /* | |
13168 | + * Interrupts which have real time requirements can be set up | |
13169 | + * to avoid softirq processing in the thread handler. This is | |
13170 | + * safe as these interrupts do not raise soft interrupts. | |
13171 | + */ | |
13172 | + if (irq_settings_no_softirq_call(desc)) | |
13173 | + _local_bh_enable(); | |
13174 | + else | |
13175 | + local_bh_enable(); | |
13176 | return ret; | |
13177 | } | |
13178 | ||
13179 | @@ -976,6 +1023,12 @@ static int irq_thread(void *data) | |
13180 | if (action_ret == IRQ_WAKE_THREAD) | |
13181 | irq_wake_secondary(desc, action); | |
13182 | ||
13183 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13184 | + migrate_disable(); | |
13185 | + add_interrupt_randomness(action->irq, 0, | |
13186 | + desc->random_ip ^ (unsigned long) action); | |
13187 | + migrate_enable(); | |
13188 | +#endif | |
13189 | wake_threads_waitq(desc); | |
13190 | } | |
13191 | ||
13192 | @@ -1336,6 +1389,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |
13193 | irqd_set(&desc->irq_data, IRQD_NO_BALANCING); | |
13194 | } | |
13195 | ||
13196 | + if (new->flags & IRQF_NO_SOFTIRQ_CALL) | |
13197 | + irq_settings_set_no_softirq_call(desc); | |
13198 | + | |
13199 | /* Set default affinity mask once everything is setup */ | |
13200 | setup_affinity(desc, mask); | |
13201 | ||
13202 | @@ -2061,7 +2117,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); | |
13203 | * This call sets the internal irqchip state of an interrupt, | |
13204 | * depending on the value of @which. | |
13205 | * | |
13206 | - * This function should be called with preemption disabled if the | |
13207 | + * This function should be called with migration disabled if the | |
13208 | * interrupt controller has per-cpu registers. | |
13209 | */ | |
13210 | int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
13211 | diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h | |
13212 | index 320579d89091..2df2d4445b1e 100644 | |
13213 | --- a/kernel/irq/settings.h | |
13214 | +++ b/kernel/irq/settings.h | |
13215 | @@ -16,6 +16,7 @@ enum { | |
13216 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | |
13217 | _IRQ_IS_POLLED = IRQ_IS_POLLED, | |
13218 | _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, | |
13219 | + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL, | |
13220 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | |
13221 | }; | |
13222 | ||
13223 | @@ -30,6 +31,7 @@ enum { | |
13224 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | |
13225 | #define IRQ_IS_POLLED GOT_YOU_MORON | |
13226 | #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON | |
13227 | +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON | |
13228 | #undef IRQF_MODIFY_MASK | |
13229 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | |
13230 | ||
13231 | @@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) | |
13232 | desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); | |
13233 | } | |
13234 | ||
13235 | +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc) | |
13236 | +{ | |
13237 | + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL; | |
13238 | +} | |
13239 | + | |
13240 | +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc) | |
13241 | +{ | |
13242 | + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL; | |
13243 | +} | |
13244 | + | |
13245 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) | |
13246 | { | |
13247 | return desc->status_use_accessors & _IRQ_PER_CPU; | |
13248 | diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c | |
13249 | index 5707f97a3e6a..73f38dc7a7fb 100644 | |
13250 | --- a/kernel/irq/spurious.c | |
13251 | +++ b/kernel/irq/spurious.c | |
13252 | @@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); | |
13253 | ||
13254 | static int __init irqfixup_setup(char *str) | |
13255 | { | |
13256 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13257 | + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
13258 | + return 1; | |
13259 | +#endif | |
13260 | irqfixup = 1; | |
13261 | printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); | |
13262 | printk(KERN_WARNING "This may impact system performance.\n"); | |
13263 | @@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644); | |
13264 | ||
13265 | static int __init irqpoll_setup(char *str) | |
13266 | { | |
13267 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13268 | + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
13269 | + return 1; | |
13270 | +#endif | |
13271 | irqfixup = 2; | |
13272 | printk(KERN_WARNING "Misrouted IRQ fixup and polling support " | |
13273 | "enabled\n"); | |
13274 | diff --git a/kernel/irq_work.c b/kernel/irq_work.c | |
13275 | index bcf107ce0854..2899ba0d23d1 100644 | |
13276 | --- a/kernel/irq_work.c | |
13277 | +++ b/kernel/irq_work.c | |
13278 | @@ -17,6 +17,7 @@ | |
13279 | #include <linux/cpu.h> | |
13280 | #include <linux/notifier.h> | |
13281 | #include <linux/smp.h> | |
13282 | +#include <linux/interrupt.h> | |
13283 | #include <asm/processor.h> | |
13284 | ||
13285 | ||
13286 | @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void) | |
13287 | */ | |
13288 | bool irq_work_queue_on(struct irq_work *work, int cpu) | |
13289 | { | |
13290 | + struct llist_head *list; | |
13291 | + | |
13292 | /* All work should have been flushed before going offline */ | |
13293 | WARN_ON_ONCE(cpu_is_offline(cpu)); | |
13294 | ||
13295 | @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) | |
13296 | if (!irq_work_claim(work)) | |
13297 | return false; | |
13298 | ||
13299 | - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | |
13300 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) | |
13301 | + list = &per_cpu(lazy_list, cpu); | |
13302 | + else | |
13303 | + list = &per_cpu(raised_list, cpu); | |
13304 | + | |
13305 | + if (llist_add(&work->llnode, list)) | |
13306 | arch_send_call_function_single_ipi(cpu); | |
13307 | ||
13308 | return true; | |
13309 | @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on); | |
13310 | /* Enqueue the irq work @work on the current CPU */ | |
13311 | bool irq_work_queue(struct irq_work *work) | |
13312 | { | |
13313 | + struct llist_head *list; | |
13314 | + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
13315 | + | |
13316 | /* Only queue if not already pending */ | |
13317 | if (!irq_work_claim(work)) | |
13318 | return false; | |
13319 | @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work) | |
13320 | /* Queue the entry and raise the IPI if needed. */ | |
13321 | preempt_disable(); | |
13322 | ||
13323 | - /* If the work is "lazy", handle it from next tick if any */ | |
13324 | - if (work->flags & IRQ_WORK_LAZY) { | |
13325 | - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && | |
13326 | - tick_nohz_tick_stopped()) | |
13327 | - arch_irq_work_raise(); | |
13328 | - } else { | |
13329 | - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) | |
13330 | + lazy_work = work->flags & IRQ_WORK_LAZY; | |
13331 | + | |
13332 | + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) | |
13333 | + list = this_cpu_ptr(&lazy_list); | |
13334 | + else | |
13335 | + list = this_cpu_ptr(&raised_list); | |
13336 | + | |
13337 | + if (llist_add(&work->llnode, list)) { | |
13338 | + if (!lazy_work || tick_nohz_tick_stopped()) | |
13339 | arch_irq_work_raise(); | |
13340 | } | |
13341 | ||
13342 | @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void) | |
13343 | raised = this_cpu_ptr(&raised_list); | |
13344 | lazy = this_cpu_ptr(&lazy_list); | |
13345 | ||
13346 | - if (llist_empty(raised) || arch_irq_work_has_interrupt()) | |
13347 | - if (llist_empty(lazy)) | |
13348 | - return false; | |
13349 | + if (llist_empty(raised) && llist_empty(lazy)) | |
13350 | + return false; | |
13351 | ||
13352 | /* All work should have been flushed before going offline */ | |
13353 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | |
13354 | @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list) | |
13355 | struct irq_work *work; | |
13356 | struct llist_node *llnode; | |
13357 | ||
13358 | - BUG_ON(!irqs_disabled()); | |
13359 | + BUG_ON_NONRT(!irqs_disabled()); | |
13360 | ||
13361 | if (llist_empty(list)) | |
13362 | return; | |
13363 | @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list) | |
13364 | void irq_work_run(void) | |
13365 | { | |
13366 | irq_work_run_list(this_cpu_ptr(&raised_list)); | |
13367 | - irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13368 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { | |
13369 | + /* | |
13370 | + * NOTE: we raise softirq via IPI for safety, | |
13371 | + * and execute in irq_work_tick() to move the | |
13372 | + * overhead from hard to soft irq context. | |
13373 | + */ | |
13374 | + if (!llist_empty(this_cpu_ptr(&lazy_list))) | |
13375 | + raise_softirq(TIMER_SOFTIRQ); | |
13376 | + } else | |
13377 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13378 | } | |
13379 | EXPORT_SYMBOL_GPL(irq_work_run); | |
13380 | ||
13381 | @@ -179,8 +200,17 @@ void irq_work_tick(void) | |
13382 | ||
13383 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) | |
13384 | irq_work_run_list(raised); | |
13385 | + | |
13386 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) | |
13387 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13388 | +} | |
13389 | + | |
13390 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
13391 | +void irq_work_tick_soft(void) | |
13392 | +{ | |
13393 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13394 | } | |
13395 | +#endif | |
13396 | ||
13397 | /* | |
13398 | * Synchronize against the irq_work @entry, ensures the entry is not | |
13399 | diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c | |
13400 | index ee1bc1bb8feb..ddef07958840 100644 | |
13401 | --- a/kernel/ksysfs.c | |
13402 | +++ b/kernel/ksysfs.c | |
13403 | @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo); | |
13404 | ||
13405 | #endif /* CONFIG_KEXEC_CORE */ | |
13406 | ||
13407 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
13408 | +static ssize_t realtime_show(struct kobject *kobj, | |
13409 | + struct kobj_attribute *attr, char *buf) | |
13410 | +{ | |
13411 | + return sprintf(buf, "%d\n", 1); | |
13412 | +} | |
13413 | +KERNEL_ATTR_RO(realtime); | |
13414 | +#endif | |
13415 | + | |
13416 | /* whether file capabilities are enabled */ | |
13417 | static ssize_t fscaps_show(struct kobject *kobj, | |
13418 | struct kobj_attribute *attr, char *buf) | |
13419 | @@ -225,6 +234,9 @@ static struct attribute * kernel_attrs[] = { | |
13420 | &rcu_expedited_attr.attr, | |
13421 | &rcu_normal_attr.attr, | |
13422 | #endif | |
13423 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13424 | + &realtime_attr.attr, | |
13425 | +#endif | |
13426 | NULL | |
13427 | }; | |
13428 | ||
13429 | diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile | |
33c7bf0f | 13430 | index 6f88e352cd4f..6ff9e8011dd0 100644 |
1a6e0f06 JK |
13431 | --- a/kernel/locking/Makefile |
13432 | +++ b/kernel/locking/Makefile | |
13433 | @@ -2,7 +2,7 @@ | |
13434 | # and is generally not a function of system call inputs. | |
13435 | KCOV_INSTRUMENT := n | |
13436 | ||
13437 | -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o | |
13438 | +obj-y += semaphore.o percpu-rwsem.o | |
13439 | ||
13440 | ifdef CONFIG_FUNCTION_TRACER | |
13441 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | |
13442 | @@ -11,7 +11,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) | |
13443 | CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) | |
13444 | endif | |
13445 | ||
13446 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
13447 | +obj-y += mutex.o | |
13448 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | |
1a6e0f06 | 13449 | +endif |
33c7bf0f | 13450 | +obj-y += rwsem.o |
1a6e0f06 JK |
13451 | obj-$(CONFIG_LOCKDEP) += lockdep.o |
13452 | ifeq ($(CONFIG_PROC_FS),y) | |
13453 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | |
c7c16703 | 13454 | @@ -24,7 +28,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o |
1a6e0f06 JK |
13455 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
13456 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |
13457 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |
13458 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
13459 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | |
13460 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | |
13461 | +endif | |
33c7bf0f | 13462 | +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o rwsem-rt.o |
1a6e0f06 JK |
13463 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o |
13464 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | |
1a6e0f06 | 13465 | diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c |
5c015b7c | 13466 | index 4d7ffc0a0d00..3d157b3128eb 100644 |
1a6e0f06 JK |
13467 | --- a/kernel/locking/lockdep.c |
13468 | +++ b/kernel/locking/lockdep.c | |
5c015b7c JK |
13469 | @@ -658,6 +658,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) |
13470 | struct lockdep_subclass_key *key; | |
13471 | struct hlist_head *hash_head; | |
13472 | struct lock_class *class; | |
13473 | + bool is_static = false; | |
13474 | ||
13475 | if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { | |
13476 | debug_locks_off(); | |
13477 | @@ -671,10 +672,23 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |
13478 | ||
13479 | /* | |
13480 | * Static locks do not have their class-keys yet - for them the key | |
13481 | - * is the lock object itself: | |
13482 | + * is the lock object itself. If the lock is in the per cpu area, | |
13483 | + * the canonical address of the lock (per cpu offset removed) is | |
13484 | + * used. | |
13485 | */ | |
13486 | - if (unlikely(!lock->key)) | |
13487 | - lock->key = (void *)lock; | |
13488 | + if (unlikely(!lock->key)) { | |
13489 | + unsigned long can_addr, addr = (unsigned long)lock; | |
13490 | + | |
13491 | + if (__is_kernel_percpu_address(addr, &can_addr)) | |
13492 | + lock->key = (void *)can_addr; | |
13493 | + else if (__is_module_percpu_address(addr, &can_addr)) | |
13494 | + lock->key = (void *)can_addr; | |
13495 | + else if (static_obj(lock)) | |
13496 | + lock->key = (void *)lock; | |
13497 | + else | |
13498 | + return ERR_PTR(-EINVAL); | |
13499 | + is_static = true; | |
13500 | + } | |
13501 | ||
13502 | /* | |
13503 | * NOTE: the class-key must be unique. For dynamic locks, a static | |
13504 | @@ -706,7 +720,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |
13505 | } | |
13506 | } | |
13507 | ||
13508 | - return NULL; | |
13509 | + return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); | |
13510 | } | |
13511 | ||
13512 | /* | |
13513 | @@ -724,19 +738,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |
13514 | DEBUG_LOCKS_WARN_ON(!irqs_disabled()); | |
13515 | ||
13516 | class = look_up_lock_class(lock, subclass); | |
13517 | - if (likely(class)) | |
13518 | + if (likely(!IS_ERR_OR_NULL(class))) | |
13519 | goto out_set_class_cache; | |
13520 | ||
13521 | /* | |
13522 | * Debug-check: all keys must be persistent! | |
13523 | - */ | |
13524 | - if (!static_obj(lock->key)) { | |
13525 | + */ | |
13526 | + if (IS_ERR(class)) { | |
13527 | debug_locks_off(); | |
13528 | printk("INFO: trying to register non-static key.\n"); | |
13529 | printk("the code is fine but needs lockdep annotation.\n"); | |
13530 | printk("turning off the locking correctness validator.\n"); | |
13531 | dump_stack(); | |
13532 | - | |
13533 | return NULL; | |
13534 | } | |
13535 | ||
13536 | @@ -3410,7 +3423,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) | |
13537 | * Clearly if the lock hasn't been acquired _ever_, we're not | |
13538 | * holding it either, so report failure. | |
13539 | */ | |
13540 | - if (!class) | |
13541 | + if (IS_ERR_OR_NULL(class)) | |
13542 | return 0; | |
13543 | ||
13544 | /* | |
13545 | @@ -3689,6 +3702,7 @@ static void check_flags(unsigned long flags) | |
1a6e0f06 JK |
13546 | } |
13547 | } | |
13548 | ||
13549 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13550 | /* | |
13551 | * We dont accurately track softirq state in e.g. | |
13552 | * hardirq contexts (such as on 4KSTACKS), so only | |
5c015b7c | 13553 | @@ -3703,6 +3717,7 @@ static void check_flags(unsigned long flags) |
1a6e0f06 JK |
13554 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); |
13555 | } | |
13556 | } | |
13557 | +#endif | |
13558 | ||
13559 | if (!debug_locks) | |
13560 | print_irqtrace_events(current); | |
5c015b7c JK |
13561 | @@ -4159,7 +4174,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) |
13562 | * If the class exists we look it up and zap it: | |
13563 | */ | |
13564 | class = look_up_lock_class(lock, j); | |
13565 | - if (class) | |
13566 | + if (!IS_ERR_OR_NULL(class)) | |
13567 | zap_class(class); | |
13568 | } | |
13569 | /* | |
1a6e0f06 JK |
13570 | diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c |
13571 | index f8c5af52a131..788068773e61 100644 | |
13572 | --- a/kernel/locking/locktorture.c | |
13573 | +++ b/kernel/locking/locktorture.c | |
13574 | @@ -26,7 +26,6 @@ | |
13575 | #include <linux/kthread.h> | |
13576 | #include <linux/sched/rt.h> | |
13577 | #include <linux/spinlock.h> | |
13578 | -#include <linux/rwlock.h> | |
13579 | #include <linux/mutex.h> | |
13580 | #include <linux/rwsem.h> | |
13581 | #include <linux/smp.h> | |
c7c16703 JK |
13582 | diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c |
13583 | index ce182599cf2e..2ad3a1e8344c 100644 | |
13584 | --- a/kernel/locking/percpu-rwsem.c | |
13585 | +++ b/kernel/locking/percpu-rwsem.c | |
13586 | @@ -18,7 +18,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, | |
13587 | /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ | |
13588 | rcu_sync_init(&sem->rss, RCU_SCHED_SYNC); | |
13589 | __init_rwsem(&sem->rw_sem, name, rwsem_key); | |
13590 | - init_waitqueue_head(&sem->writer); | |
13591 | + init_swait_queue_head(&sem->writer); | |
13592 | sem->readers_block = 0; | |
13593 | return 0; | |
13594 | } | |
13595 | @@ -103,7 +103,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem) | |
13596 | __this_cpu_dec(*sem->read_count); | |
13597 | ||
13598 | /* Prod writer to recheck readers_active */ | |
13599 | - wake_up(&sem->writer); | |
13600 | + swake_up(&sem->writer); | |
13601 | } | |
13602 | EXPORT_SYMBOL_GPL(__percpu_up_read); | |
13603 | ||
13604 | @@ -160,7 +160,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem) | |
13605 | */ | |
13606 | ||
13607 | /* Wait for all now active readers to complete. */ | |
13608 | - wait_event(sem->writer, readers_active_check(sem)); | |
13609 | + swait_event(sem->writer, readers_active_check(sem)); | |
13610 | } | |
13611 | EXPORT_SYMBOL_GPL(percpu_down_write); | |
13612 | ||
1a6e0f06 JK |
13613 | diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c |
13614 | new file mode 100644 | |
33c7bf0f | 13615 | index 000000000000..6284e3b15091 |
1a6e0f06 JK |
13616 | --- /dev/null |
13617 | +++ b/kernel/locking/rt.c | |
33c7bf0f | 13618 | @@ -0,0 +1,331 @@ |
1a6e0f06 JK |
13619 | +/* |
13620 | + * kernel/rt.c | |
13621 | + * | |
13622 | + * Real-Time Preemption Support | |
13623 | + * | |
13624 | + * started by Ingo Molnar: | |
13625 | + * | |
13626 | + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
13627 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
13628 | + * | |
13629 | + * historic credit for proving that Linux spinlocks can be implemented via | |
13630 | + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow | |
13631 | + * and others) who prototyped it on 2.4 and did lots of comparative | |
13632 | + * research and analysis; TimeSys, for proving that you can implement a | |
13633 | + * fully preemptible kernel via the use of IRQ threading and mutexes; | |
13634 | + * Bill Huey for persuasively arguing on lkml that the mutex model is the | |
13635 | + * right one; and to MontaVista, who ported pmutexes to 2.6. | |
13636 | + * | |
13637 | + * This code is a from-scratch implementation and is not based on pmutexes, | |
13638 | + * but the idea of converting spinlocks to mutexes is used here too. | |
13639 | + * | |
13640 | + * lock debugging, locking tree, deadlock detection: | |
13641 | + * | |
13642 | + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey | |
13643 | + * Released under the General Public License (GPL). | |
13644 | + * | |
13645 | + * Includes portions of the generic R/W semaphore implementation from: | |
13646 | + * | |
13647 | + * Copyright (c) 2001 David Howells (dhowells@redhat.com). | |
13648 | + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | |
13649 | + * - Derived also from comments by Linus | |
13650 | + * | |
13651 | + * Pending ownership of locks and ownership stealing: | |
13652 | + * | |
13653 | + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt | |
13654 | + * | |
13655 | + * (also by Steven Rostedt) | |
13656 | + * - Converted single pi_lock to individual task locks. | |
13657 | + * | |
13658 | + * By Esben Nielsen: | |
13659 | + * Doing priority inheritance with help of the scheduler. | |
13660 | + * | |
13661 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
13662 | + * - major rework based on Esben Nielsens initial patch | |
13663 | + * - replaced thread_info references by task_struct refs | |
13664 | + * - removed task->pending_owner dependency | |
13665 | + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks | |
13666 | + * in the scheduler return path as discussed with Steven Rostedt | |
13667 | + * | |
13668 | + * Copyright (C) 2006, Kihon Technologies Inc. | |
13669 | + * Steven Rostedt <rostedt@goodmis.org> | |
13670 | + * - debugged and patched Thomas Gleixner's rework. | |
13671 | + * - added back the cmpxchg to the rework. | |
13672 | + * - turned atomic require back on for SMP. | |
13673 | + */ | |
13674 | + | |
13675 | +#include <linux/spinlock.h> | |
13676 | +#include <linux/rtmutex.h> | |
13677 | +#include <linux/sched.h> | |
13678 | +#include <linux/delay.h> | |
13679 | +#include <linux/module.h> | |
13680 | +#include <linux/kallsyms.h> | |
13681 | +#include <linux/syscalls.h> | |
13682 | +#include <linux/interrupt.h> | |
13683 | +#include <linux/plist.h> | |
13684 | +#include <linux/fs.h> | |
13685 | +#include <linux/futex.h> | |
13686 | +#include <linux/hrtimer.h> | |
13687 | + | |
13688 | +#include "rtmutex_common.h" | |
13689 | + | |
13690 | +/* | |
13691 | + * struct mutex functions | |
13692 | + */ | |
13693 | +void __mutex_do_init(struct mutex *mutex, const char *name, | |
13694 | + struct lock_class_key *key) | |
13695 | +{ | |
13696 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13697 | + /* | |
13698 | + * Make sure we are not reinitializing a held lock: | |
13699 | + */ | |
13700 | + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); | |
13701 | + lockdep_init_map(&mutex->dep_map, name, key, 0); | |
13702 | +#endif | |
13703 | + mutex->lock.save_state = 0; | |
13704 | +} | |
13705 | +EXPORT_SYMBOL(__mutex_do_init); | |
13706 | + | |
13707 | +void __lockfunc _mutex_lock(struct mutex *lock) | |
13708 | +{ | |
13709 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13710 | + rt_mutex_lock(&lock->lock); | |
13711 | +} | |
13712 | +EXPORT_SYMBOL(_mutex_lock); | |
13713 | + | |
13714 | +int __lockfunc _mutex_lock_interruptible(struct mutex *lock) | |
13715 | +{ | |
13716 | + int ret; | |
13717 | + | |
13718 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13719 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
13720 | + if (ret) | |
13721 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13722 | + return ret; | |
13723 | +} | |
13724 | +EXPORT_SYMBOL(_mutex_lock_interruptible); | |
13725 | + | |
13726 | +int __lockfunc _mutex_lock_killable(struct mutex *lock) | |
13727 | +{ | |
13728 | + int ret; | |
13729 | + | |
13730 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13731 | + ret = rt_mutex_lock_killable(&lock->lock); | |
13732 | + if (ret) | |
13733 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13734 | + return ret; | |
13735 | +} | |
13736 | +EXPORT_SYMBOL(_mutex_lock_killable); | |
13737 | + | |
13738 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13739 | +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) | |
13740 | +{ | |
13741 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
13742 | + rt_mutex_lock(&lock->lock); | |
13743 | +} | |
13744 | +EXPORT_SYMBOL(_mutex_lock_nested); | |
13745 | + | |
13746 | +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | |
13747 | +{ | |
13748 | + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); | |
13749 | + rt_mutex_lock(&lock->lock); | |
13750 | +} | |
13751 | +EXPORT_SYMBOL(_mutex_lock_nest_lock); | |
13752 | + | |
13753 | +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) | |
13754 | +{ | |
13755 | + int ret; | |
13756 | + | |
13757 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
13758 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
13759 | + if (ret) | |
13760 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13761 | + return ret; | |
13762 | +} | |
13763 | +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); | |
13764 | + | |
13765 | +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) | |
13766 | +{ | |
13767 | + int ret; | |
13768 | + | |
13769 | + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
13770 | + ret = rt_mutex_lock_killable(&lock->lock); | |
13771 | + if (ret) | |
13772 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13773 | + return ret; | |
13774 | +} | |
13775 | +EXPORT_SYMBOL(_mutex_lock_killable_nested); | |
13776 | +#endif | |
13777 | + | |
13778 | +int __lockfunc _mutex_trylock(struct mutex *lock) | |
13779 | +{ | |
13780 | + int ret = rt_mutex_trylock(&lock->lock); | |
13781 | + | |
13782 | + if (ret) | |
13783 | + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
13784 | + | |
13785 | + return ret; | |
13786 | +} | |
13787 | +EXPORT_SYMBOL(_mutex_trylock); | |
13788 | + | |
13789 | +void __lockfunc _mutex_unlock(struct mutex *lock) | |
13790 | +{ | |
13791 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13792 | + rt_mutex_unlock(&lock->lock); | |
13793 | +} | |
13794 | +EXPORT_SYMBOL(_mutex_unlock); | |
13795 | + | |
13796 | +/* | |
13797 | + * rwlock_t functions | |
13798 | + */ | |
13799 | +int __lockfunc rt_write_trylock(rwlock_t *rwlock) | |
13800 | +{ | |
13801 | + int ret; | |
13802 | + | |
13803 | + migrate_disable(); | |
13804 | + ret = rt_mutex_trylock(&rwlock->lock); | |
13805 | + if (ret) | |
13806 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
13807 | + else | |
13808 | + migrate_enable(); | |
13809 | + | |
13810 | + return ret; | |
13811 | +} | |
13812 | +EXPORT_SYMBOL(rt_write_trylock); | |
13813 | + | |
13814 | +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) | |
13815 | +{ | |
13816 | + int ret; | |
13817 | + | |
13818 | + *flags = 0; | |
13819 | + ret = rt_write_trylock(rwlock); | |
13820 | + return ret; | |
13821 | +} | |
13822 | +EXPORT_SYMBOL(rt_write_trylock_irqsave); | |
13823 | + | |
13824 | +int __lockfunc rt_read_trylock(rwlock_t *rwlock) | |
13825 | +{ | |
13826 | + struct rt_mutex *lock = &rwlock->lock; | |
13827 | + int ret = 1; | |
13828 | + | |
13829 | + /* | |
13830 | + * recursive read locks succeed when current owns the lock, | |
13831 | + * but not when read_depth == 0 which means that the lock is | |
13832 | + * write locked. | |
13833 | + */ | |
13834 | + if (rt_mutex_owner(lock) != current) { | |
13835 | + migrate_disable(); | |
13836 | + ret = rt_mutex_trylock(lock); | |
13837 | + if (ret) | |
13838 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
13839 | + else | |
13840 | + migrate_enable(); | |
13841 | + | |
13842 | + } else if (!rwlock->read_depth) { | |
13843 | + ret = 0; | |
13844 | + } | |
13845 | + | |
13846 | + if (ret) | |
13847 | + rwlock->read_depth++; | |
13848 | + | |
13849 | + return ret; | |
13850 | +} | |
13851 | +EXPORT_SYMBOL(rt_read_trylock); | |
13852 | + | |
13853 | +void __lockfunc rt_write_lock(rwlock_t *rwlock) | |
13854 | +{ | |
13855 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
13856 | + __rt_spin_lock(&rwlock->lock); | |
13857 | +} | |
13858 | +EXPORT_SYMBOL(rt_write_lock); | |
13859 | + | |
33c7bf0f | 13860 | +void __lockfunc rt_read_lock(rwlock_t *rwlock) |
1a6e0f06 | 13861 | +{ |
33c7bf0f | 13862 | + struct rt_mutex *lock = &rwlock->lock; |
1a6e0f06 | 13863 | + |
1a6e0f06 | 13864 | + |
33c7bf0f JK |
13865 | + /* |
13866 | + * recursive read locks succeed when current owns the lock | |
13867 | + */ | |
13868 | + if (rt_mutex_owner(lock) != current) { | |
13869 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
13870 | + __rt_spin_lock(lock); | |
13871 | + } | |
13872 | + rwlock->read_depth++; | |
1a6e0f06 | 13873 | +} |
1a6e0f06 | 13874 | + |
33c7bf0f | 13875 | +EXPORT_SYMBOL(rt_read_lock); |
1a6e0f06 | 13876 | + |
33c7bf0f JK |
13877 | +void __lockfunc rt_write_unlock(rwlock_t *rwlock) |
13878 | +{ | |
13879 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
13880 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
13881 | + __rt_spin_unlock(&rwlock->lock); | |
13882 | + migrate_enable(); | |
1a6e0f06 | 13883 | +} |
33c7bf0f | 13884 | +EXPORT_SYMBOL(rt_write_unlock); |
1a6e0f06 | 13885 | + |
33c7bf0f | 13886 | +void __lockfunc rt_read_unlock(rwlock_t *rwlock) |
1a6e0f06 | 13887 | +{ |
33c7bf0f JK |
13888 | + /* Release the lock only when read_depth is down to 0 */ |
13889 | + if (--rwlock->read_depth == 0) { | |
13890 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
13891 | + __rt_spin_unlock(&rwlock->lock); | |
13892 | + migrate_enable(); | |
13893 | + } | |
1a6e0f06 | 13894 | +} |
33c7bf0f | 13895 | +EXPORT_SYMBOL(rt_read_unlock); |
1a6e0f06 | 13896 | + |
33c7bf0f | 13897 | +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock) |
1a6e0f06 | 13898 | +{ |
33c7bf0f JK |
13899 | + rt_write_lock(rwlock); |
13900 | + | |
13901 | + return 0; | |
1a6e0f06 | 13902 | +} |
33c7bf0f | 13903 | +EXPORT_SYMBOL(rt_write_lock_irqsave); |
1a6e0f06 | 13904 | + |
33c7bf0f | 13905 | +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock) |
1a6e0f06 | 13906 | +{ |
33c7bf0f JK |
13907 | + rt_read_lock(rwlock); |
13908 | + | |
13909 | + return 0; | |
1a6e0f06 | 13910 | +} |
33c7bf0f | 13911 | +EXPORT_SYMBOL(rt_read_lock_irqsave); |
1a6e0f06 | 13912 | + |
33c7bf0f | 13913 | +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) |
1a6e0f06 JK |
13914 | +{ |
13915 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13916 | + /* | |
13917 | + * Make sure we are not reinitializing a held lock: | |
13918 | + */ | |
33c7bf0f JK |
13919 | + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); |
13920 | + lockdep_init_map(&rwlock->dep_map, name, key, 0); | |
1a6e0f06 | 13921 | +#endif |
33c7bf0f JK |
13922 | + rwlock->lock.save_state = 1; |
13923 | + rwlock->read_depth = 0; | |
1a6e0f06 | 13924 | +} |
33c7bf0f | 13925 | +EXPORT_SYMBOL(__rt_rwlock_init); |
1a6e0f06 JK |
13926 | + |
13927 | +/** | |
13928 | + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | |
13929 | + * @cnt: the atomic which we are to dec | |
13930 | + * @lock: the mutex to return holding if we dec to 0 | |
13931 | + * | |
13932 | + * return true and hold lock if we dec to 0, return false otherwise | |
13933 | + */ | |
13934 | +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |
13935 | +{ | |
13936 | + /* dec if we can't possibly hit 0 */ | |
13937 | + if (atomic_add_unless(cnt, -1, 1)) | |
13938 | + return 0; | |
13939 | + /* we might hit 0, so take the lock */ | |
13940 | + mutex_lock(lock); | |
13941 | + if (!atomic_dec_and_test(cnt)) { | |
13942 | + /* when we actually did the dec, we didn't hit 0 */ | |
13943 | + mutex_unlock(lock); | |
13944 | + return 0; | |
13945 | + } | |
13946 | + /* we hit 0, and we hold the lock */ | |
13947 | + return 1; | |
13948 | +} | |
13949 | +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | |
33c7bf0f JK |
13950 | diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c |
13951 | index 62b6cee8ea7f..0613c4b1d059 100644 | |
13952 | --- a/kernel/locking/rtmutex-debug.c | |
13953 | +++ b/kernel/locking/rtmutex-debug.c | |
13954 | @@ -173,12 +173,3 @@ void debug_rt_mutex_init(struct rt_mutex *lock, const char *name) | |
13955 | lock->name = name; | |
13956 | } | |
13957 | ||
13958 | -void | |
13959 | -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task) | |
13960 | -{ | |
13961 | -} | |
13962 | - | |
13963 | -void rt_mutex_deadlock_account_unlock(struct task_struct *task) | |
13964 | -{ | |
13965 | -} | |
13966 | - | |
13967 | diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h | |
13968 | index d0519c3432b6..b585af9a1b50 100644 | |
13969 | --- a/kernel/locking/rtmutex-debug.h | |
13970 | +++ b/kernel/locking/rtmutex-debug.h | |
13971 | @@ -9,9 +9,6 @@ | |
13972 | * This file contains macros used solely by rtmutex.c. Debug version. | |
13973 | */ | |
13974 | ||
13975 | -extern void | |
13976 | -rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task); | |
13977 | -extern void rt_mutex_deadlock_account_unlock(struct task_struct *task); | |
13978 | extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter); | |
13979 | extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter); | |
13980 | extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name); | |
1a6e0f06 | 13981 | diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c |
7c18450a | 13982 | index 2c49d76f96c3..eec63f064b3f 100644 |
1a6e0f06 JK |
13983 | --- a/kernel/locking/rtmutex.c |
13984 | +++ b/kernel/locking/rtmutex.c | |
13985 | @@ -7,6 +7,11 @@ | |
13986 | * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
13987 | * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt | |
13988 | * Copyright (C) 2006 Esben Nielsen | |
13989 | + * Adaptive Spinlocks: | |
13990 | + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, | |
13991 | + * and Peter Morreale, | |
13992 | + * Adaptive Spinlocks simplification: | |
13993 | + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> | |
13994 | * | |
13995 | * See Documentation/locking/rt-mutex-design.txt for details. | |
13996 | */ | |
13997 | @@ -16,6 +21,7 @@ | |
13998 | #include <linux/sched/rt.h> | |
13999 | #include <linux/sched/deadline.h> | |
14000 | #include <linux/timer.h> | |
14001 | +#include <linux/ww_mutex.h> | |
14002 | ||
14003 | #include "rtmutex_common.h" | |
14004 | ||
c7c16703 JK |
14005 | @@ -133,6 +139,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) |
14006 | WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS); | |
1a6e0f06 JK |
14007 | } |
14008 | ||
14009 | +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) | |
14010 | +{ | |
14011 | + return waiter && waiter != PI_WAKEUP_INPROGRESS && | |
14012 | + waiter != PI_REQUEUE_INPROGRESS; | |
14013 | +} | |
14014 | + | |
14015 | /* | |
14016 | * We can speed up the acquire/release, if there's no debugging state to be | |
14017 | * set up. | |
7c18450a JK |
14018 | @@ -222,12 +234,25 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, |
14019 | } | |
14020 | #endif | |
14021 | ||
14022 | +#define STEAL_NORMAL 0 | |
14023 | +#define STEAL_LATERAL 1 | |
14024 | +/* | |
14025 | + * Only use with rt_mutex_waiter_{less,equal}() | |
14026 | + */ | |
14027 | +#define task_to_waiter(p) \ | |
14028 | + &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline } | |
14029 | + | |
14030 | static inline int | |
14031 | rt_mutex_waiter_less(struct rt_mutex_waiter *left, | |
14032 | - struct rt_mutex_waiter *right) | |
14033 | + struct rt_mutex_waiter *right, int mode) | |
14034 | { | |
14035 | - if (left->prio < right->prio) | |
14036 | - return 1; | |
14037 | + if (mode == STEAL_NORMAL) { | |
14038 | + if (left->prio < right->prio) | |
14039 | + return 1; | |
14040 | + } else { | |
14041 | + if (left->prio <= right->prio) | |
14042 | + return 1; | |
14043 | + } | |
14044 | ||
14045 | /* | |
14046 | * If both waiters have dl_prio(), we check the deadlines of the | |
14047 | @@ -236,12 +261,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left, | |
14048 | * then right waiter has a dl_prio() too. | |
14049 | */ | |
14050 | if (dl_prio(left->prio)) | |
14051 | - return dl_time_before(left->task->dl.deadline, | |
14052 | - right->task->dl.deadline); | |
14053 | + return dl_time_before(left->deadline, right->deadline); | |
14054 | ||
14055 | return 0; | |
14056 | } | |
14057 | ||
14058 | +static inline int | |
14059 | +rt_mutex_waiter_equal(struct rt_mutex_waiter *left, | |
14060 | + struct rt_mutex_waiter *right) | |
14061 | +{ | |
14062 | + if (left->prio != right->prio) | |
14063 | + return 0; | |
14064 | + | |
14065 | + /* | |
14066 | + * If both waiters have dl_prio(), we check the deadlines of the | |
14067 | + * associated tasks. | |
14068 | + * If left waiter has a dl_prio(), and we didn't return 0 above, | |
14069 | + * then right waiter has a dl_prio() too. | |
14070 | + */ | |
14071 | + if (dl_prio(left->prio)) | |
14072 | + return left->deadline == right->deadline; | |
14073 | + | |
14074 | + return 1; | |
14075 | +} | |
14076 | + | |
14077 | static void | |
14078 | rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) | |
14079 | { | |
14080 | @@ -253,7 +296,7 @@ rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) | |
14081 | while (*link) { | |
14082 | parent = *link; | |
14083 | entry = rb_entry(parent, struct rt_mutex_waiter, tree_entry); | |
14084 | - if (rt_mutex_waiter_less(waiter, entry)) { | |
14085 | + if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { | |
14086 | link = &parent->rb_left; | |
14087 | } else { | |
14088 | link = &parent->rb_right; | |
14089 | @@ -292,7 +335,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) | |
14090 | while (*link) { | |
14091 | parent = *link; | |
14092 | entry = rb_entry(parent, struct rt_mutex_waiter, pi_tree_entry); | |
14093 | - if (rt_mutex_waiter_less(waiter, entry)) { | |
14094 | + if (rt_mutex_waiter_less(waiter, entry, STEAL_NORMAL)) { | |
14095 | link = &parent->rb_left; | |
14096 | } else { | |
14097 | link = &parent->rb_right; | |
14098 | @@ -320,72 +363,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) | |
14099 | RB_CLEAR_NODE(&waiter->pi_tree_entry); | |
14100 | } | |
14101 | ||
14102 | -/* | |
14103 | - * Calculate task priority from the waiter tree priority | |
14104 | - * | |
14105 | - * Return task->normal_prio when the waiter tree is empty or when | |
14106 | - * the waiter is not allowed to do priority boosting | |
14107 | - */ | |
14108 | -int rt_mutex_getprio(struct task_struct *task) | |
14109 | +static void rt_mutex_adjust_prio(struct task_struct *p) | |
14110 | { | |
14111 | - if (likely(!task_has_pi_waiters(task))) | |
14112 | - return task->normal_prio; | |
14113 | + struct task_struct *pi_task = NULL; | |
14114 | ||
14115 | - return min(task_top_pi_waiter(task)->prio, | |
14116 | - task->normal_prio); | |
14117 | -} | |
14118 | + lockdep_assert_held(&p->pi_lock); | |
14119 | ||
14120 | -struct task_struct *rt_mutex_get_top_task(struct task_struct *task) | |
14121 | -{ | |
14122 | - if (likely(!task_has_pi_waiters(task))) | |
14123 | - return NULL; | |
14124 | + if (task_has_pi_waiters(p)) | |
14125 | + pi_task = task_top_pi_waiter(p)->task; | |
14126 | ||
14127 | - return task_top_pi_waiter(task)->task; | |
14128 | -} | |
14129 | - | |
14130 | -/* | |
14131 | - * Called by sched_setscheduler() to get the priority which will be | |
14132 | - * effective after the change. | |
14133 | - */ | |
14134 | -int rt_mutex_get_effective_prio(struct task_struct *task, int newprio) | |
14135 | -{ | |
14136 | - if (!task_has_pi_waiters(task)) | |
14137 | - return newprio; | |
14138 | - | |
14139 | - if (task_top_pi_waiter(task)->task->prio <= newprio) | |
14140 | - return task_top_pi_waiter(task)->task->prio; | |
14141 | - return newprio; | |
14142 | -} | |
14143 | - | |
14144 | -/* | |
14145 | - * Adjust the priority of a task, after its pi_waiters got modified. | |
14146 | - * | |
14147 | - * This can be both boosting and unboosting. task->pi_lock must be held. | |
14148 | - */ | |
14149 | -static void __rt_mutex_adjust_prio(struct task_struct *task) | |
14150 | -{ | |
14151 | - int prio = rt_mutex_getprio(task); | |
14152 | - | |
14153 | - if (task->prio != prio || dl_prio(prio)) | |
14154 | - rt_mutex_setprio(task, prio); | |
14155 | -} | |
14156 | - | |
14157 | -/* | |
14158 | - * Adjust task priority (undo boosting). Called from the exit path of | |
14159 | - * rt_mutex_slowunlock() and rt_mutex_slowlock(). | |
14160 | - * | |
14161 | - * (Note: We do this outside of the protection of lock->wait_lock to | |
14162 | - * allow the lock to be taken while or before we readjust the priority | |
14163 | - * of task. We do not use the spin_xx_mutex() variants here as we are | |
14164 | - * outside of the debug path.) | |
14165 | - */ | |
14166 | -void rt_mutex_adjust_prio(struct task_struct *task) | |
14167 | -{ | |
14168 | - unsigned long flags; | |
14169 | - | |
14170 | - raw_spin_lock_irqsave(&task->pi_lock, flags); | |
14171 | - __rt_mutex_adjust_prio(task); | |
14172 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14173 | + rt_mutex_setprio(p, pi_task); | |
14174 | } | |
14175 | ||
14176 | /* | |
14177 | @@ -414,6 +401,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, | |
1a6e0f06 JK |
14178 | return debug_rt_mutex_detect_deadlock(waiter, chwalk); |
14179 | } | |
14180 | ||
14181 | +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) | |
14182 | +{ | |
14183 | + if (waiter->savestate) | |
14184 | + wake_up_lock_sleeper(waiter->task); | |
14185 | + else | |
14186 | + wake_up_process(waiter->task); | |
14187 | +} | |
14188 | + | |
14189 | /* | |
14190 | * Max number of times we'll walk the boosting chain: | |
14191 | */ | |
7c18450a | 14192 | @@ -421,7 +416,8 @@ int max_lock_depth = 1024; |
1a6e0f06 JK |
14193 | |
14194 | static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) | |
14195 | { | |
14196 | - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; | |
14197 | + return rt_mutex_real_waiter(p->pi_blocked_on) ? | |
14198 | + p->pi_blocked_on->lock : NULL; | |
14199 | } | |
14200 | ||
14201 | /* | |
7c18450a | 14202 | @@ -557,7 +553,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
1a6e0f06 JK |
14203 | * reached or the state of the chain has changed while we |
14204 | * dropped the locks. | |
14205 | */ | |
14206 | - if (!waiter) | |
14207 | + if (!rt_mutex_real_waiter(waiter)) | |
14208 | goto out_unlock_pi; | |
14209 | ||
14210 | /* | |
7c18450a JK |
14211 | @@ -608,7 +604,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
14212 | * enabled we continue, but stop the requeueing in the chain | |
14213 | * walk. | |
14214 | */ | |
14215 | - if (waiter->prio == task->prio) { | |
14216 | + if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { | |
14217 | if (!detect_deadlock) | |
14218 | goto out_unlock_pi; | |
14219 | else | |
14220 | @@ -704,7 +700,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
14221 | ||
14222 | /* [7] Requeue the waiter in the lock waiter tree. */ | |
14223 | rt_mutex_dequeue(lock, waiter); | |
14224 | + | |
14225 | + /* | |
14226 | + * Update the waiter prio fields now that we're dequeued. | |
14227 | + * | |
14228 | + * These values can have changed through either: | |
14229 | + * | |
14230 | + * sys_sched_set_scheduler() / sys_sched_setattr() | |
14231 | + * | |
14232 | + * or | |
14233 | + * | |
14234 | + * DL CBS enforcement advancing the effective deadline. | |
14235 | + * | |
14236 | + * Even though pi_waiters also uses these fields, and that tree is only | |
14237 | + * updated in [11], we can do this here, since we hold [L], which | |
14238 | + * serializes all pi_waiters access and rb_erase() does not care about | |
14239 | + * the values of the node being removed. | |
14240 | + */ | |
14241 | waiter->prio = task->prio; | |
14242 | + waiter->deadline = task->dl.deadline; | |
14243 | + | |
14244 | rt_mutex_enqueue(lock, waiter); | |
14245 | ||
14246 | /* [8] Release the task */ | |
14247 | @@ -719,13 +734,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
1a6e0f06 JK |
14248 | * follow here. This is the end of the chain we are walking. |
14249 | */ | |
14250 | if (!rt_mutex_owner(lock)) { | |
14251 | + struct rt_mutex_waiter *lock_top_waiter; | |
14252 | + | |
14253 | /* | |
14254 | * If the requeue [7] above changed the top waiter, | |
14255 | * then we need to wake the new top waiter up to try | |
14256 | * to get the lock. | |
14257 | */ | |
14258 | - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) | |
14259 | - wake_up_process(rt_mutex_top_waiter(lock)->task); | |
14260 | + lock_top_waiter = rt_mutex_top_waiter(lock); | |
14261 | + if (prerequeue_top_waiter != lock_top_waiter) | |
14262 | + rt_mutex_wake_waiter(lock_top_waiter); | |
14263 | raw_spin_unlock_irq(&lock->wait_lock); | |
14264 | return 0; | |
14265 | } | |
7c18450a JK |
14266 | @@ -745,7 +763,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
14267 | */ | |
14268 | rt_mutex_dequeue_pi(task, prerequeue_top_waiter); | |
14269 | rt_mutex_enqueue_pi(task, waiter); | |
14270 | - __rt_mutex_adjust_prio(task); | |
14271 | + rt_mutex_adjust_prio(task); | |
14272 | ||
14273 | } else if (prerequeue_top_waiter == waiter) { | |
14274 | /* | |
14275 | @@ -761,7 +779,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
14276 | rt_mutex_dequeue_pi(task, waiter); | |
14277 | waiter = rt_mutex_top_waiter(lock); | |
14278 | rt_mutex_enqueue_pi(task, waiter); | |
14279 | - __rt_mutex_adjust_prio(task); | |
14280 | + rt_mutex_adjust_prio(task); | |
14281 | } else { | |
14282 | /* | |
14283 | * Nothing changed. No need to do any priority | |
14284 | @@ -818,6 +836,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
1a6e0f06 JK |
14285 | return ret; |
14286 | } | |
14287 | ||
1a6e0f06 JK |
14288 | + |
14289 | /* | |
14290 | * Try to take an rt-mutex | |
14291 | * | |
7c18450a | 14292 | @@ -828,9 +847,12 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, |
1a6e0f06 JK |
14293 | * @waiter: The waiter that is queued to the lock's wait tree if the |
14294 | * callsite called task_blocked_on_lock(), otherwise NULL | |
14295 | */ | |
14296 | -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
14297 | - struct rt_mutex_waiter *waiter) | |
14298 | +static int __try_to_take_rt_mutex(struct rt_mutex *lock, | |
14299 | + struct task_struct *task, | |
14300 | + struct rt_mutex_waiter *waiter, int mode) | |
14301 | { | |
7c18450a JK |
14302 | + lockdep_assert_held(&lock->wait_lock); |
14303 | + | |
1a6e0f06 JK |
14304 | /* |
14305 | * Before testing whether we can acquire @lock, we set the | |
7c18450a JK |
14306 | * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all |
14307 | @@ -866,8 +888,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
1a6e0f06 JK |
14308 | * If waiter is not the highest priority waiter of |
14309 | * @lock, give up. | |
14310 | */ | |
14311 | - if (waiter != rt_mutex_top_waiter(lock)) | |
14312 | + if (waiter != rt_mutex_top_waiter(lock)) { | |
7c18450a | 14313 | + /* XXX rt_mutex_waiter_less() ? */ |
1a6e0f06 JK |
14314 | return 0; |
14315 | + } | |
14316 | ||
14317 | /* | |
14318 | * We can acquire the lock. Remove the waiter from the | |
7c18450a | 14319 | @@ -885,14 +909,26 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, |
1a6e0f06 JK |
14320 | * not need to be dequeued. |
14321 | */ | |
14322 | if (rt_mutex_has_waiters(lock)) { | |
1a6e0f06 | 14323 | + struct task_struct *pown = rt_mutex_top_waiter(lock)->task; |
7c18450a JK |
14324 | + |
14325 | + if (task != pown) | |
1a6e0f06 | 14326 | + return 0; |
7c18450a JK |
14327 | + |
14328 | + /* | |
14329 | + * Note that RT tasks are excluded from lateral-steals | |
14330 | + * to prevent the introduction of an unbounded latency. | |
14331 | + */ | |
14332 | + if (rt_task(task)) | |
14333 | + mode = STEAL_NORMAL; | |
14334 | /* | |
14335 | * If @task->prio is greater than or equal to | |
14336 | * the top waiter priority (kernel view), | |
14337 | * @task lost. | |
14338 | */ | |
14339 | - if (task->prio >= rt_mutex_top_waiter(lock)->prio) | |
14340 | + if (!rt_mutex_waiter_less(task_to_waiter(task), | |
14341 | + rt_mutex_top_waiter(lock), | |
14342 | + mode)) | |
14343 | return 0; | |
14344 | - | |
1a6e0f06 JK |
14345 | /* |
14346 | * The current top waiter stays enqueued. We | |
14347 | * don't have to change anything in the lock | |
7c18450a | 14348 | @@ -936,11 +972,384 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, |
33c7bf0f JK |
14349 | */ |
14350 | rt_mutex_set_owner(lock, task); | |
14351 | ||
14352 | - rt_mutex_deadlock_account_lock(lock, task); | |
14353 | - | |
1a6e0f06 JK |
14354 | return 1; |
14355 | } | |
14356 | ||
14357 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14358 | +/* | |
14359 | + * preemptible spin_lock functions: | |
14360 | + */ | |
14361 | +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, | |
14362 | + void (*slowfn)(struct rt_mutex *lock, | |
14363 | + bool mg_off), | |
14364 | + bool do_mig_dis) | |
14365 | +{ | |
14366 | + might_sleep_no_state_check(); | |
14367 | + | |
14368 | + if (do_mig_dis) | |
14369 | + migrate_disable(); | |
14370 | + | |
14371 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
33c7bf0f | 14372 | + return; |
1a6e0f06 JK |
14373 | + else |
14374 | + slowfn(lock, do_mig_dis); | |
14375 | +} | |
14376 | + | |
33c7bf0f JK |
14377 | +static inline void rt_spin_lock_fastunlock(struct rt_mutex *lock, |
14378 | + void (*slowfn)(struct rt_mutex *lock)) | |
1a6e0f06 | 14379 | +{ |
33c7bf0f JK |
14380 | + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) |
14381 | + return; | |
14382 | + else | |
14383 | + slowfn(lock); | |
1a6e0f06 JK |
14384 | +} |
14385 | +#ifdef CONFIG_SMP | |
14386 | +/* | |
14387 | + * Note that owner is a speculative pointer and dereferencing relies | |
14388 | + * on rcu_read_lock() and the check against the lock owner. | |
14389 | + */ | |
14390 | +static int adaptive_wait(struct rt_mutex *lock, | |
14391 | + struct task_struct *owner) | |
14392 | +{ | |
14393 | + int res = 0; | |
14394 | + | |
14395 | + rcu_read_lock(); | |
14396 | + for (;;) { | |
14397 | + if (owner != rt_mutex_owner(lock)) | |
14398 | + break; | |
14399 | + /* | |
14400 | + * Ensure that owner->on_cpu is dereferenced _after_ | |
14401 | + * checking the above to be valid. | |
14402 | + */ | |
14403 | + barrier(); | |
14404 | + if (!owner->on_cpu) { | |
14405 | + res = 1; | |
14406 | + break; | |
14407 | + } | |
14408 | + cpu_relax(); | |
14409 | + } | |
14410 | + rcu_read_unlock(); | |
14411 | + return res; | |
14412 | +} | |
14413 | +#else | |
14414 | +static int adaptive_wait(struct rt_mutex *lock, | |
14415 | + struct task_struct *orig_owner) | |
14416 | +{ | |
14417 | + return 1; | |
14418 | +} | |
14419 | +#endif | |
14420 | + | |
14421 | +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
14422 | + struct rt_mutex_waiter *waiter, | |
14423 | + struct task_struct *task, | |
14424 | + enum rtmutex_chainwalk chwalk); | |
14425 | +/* | |
14426 | + * Slow path lock function spin_lock style: this variant is very | |
14427 | + * careful not to miss any non-lock wakeups. | |
14428 | + * | |
14429 | + * We store the current state under p->pi_lock in p->saved_state and | |
14430 | + * the try_to_wake_up() code handles this accordingly. | |
14431 | + */ | |
14432 | +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, | |
14433 | + bool mg_off) | |
14434 | +{ | |
14435 | + struct task_struct *lock_owner, *self = current; | |
14436 | + struct rt_mutex_waiter waiter, *top_waiter; | |
14437 | + unsigned long flags; | |
14438 | + int ret; | |
14439 | + | |
14440 | + rt_mutex_init_waiter(&waiter, true); | |
14441 | + | |
14442 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
14443 | + | |
14444 | + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) { | |
14445 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14446 | + return; | |
14447 | + } | |
14448 | + | |
14449 | + BUG_ON(rt_mutex_owner(lock) == self); | |
14450 | + | |
14451 | + /* | |
14452 | + * We save whatever state the task is in and we'll restore it | |
14453 | + * after acquiring the lock taking real wakeups into account | |
14454 | + * as well. We are serialized via pi_lock against wakeups. See | |
14455 | + * try_to_wake_up(). | |
14456 | + */ | |
14457 | + raw_spin_lock(&self->pi_lock); | |
14458 | + self->saved_state = self->state; | |
14459 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
14460 | + raw_spin_unlock(&self->pi_lock); | |
14461 | + | |
14462 | + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK); | |
14463 | + BUG_ON(ret); | |
14464 | + | |
14465 | + for (;;) { | |
14466 | + /* Try to acquire the lock again. */ | |
14467 | + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL)) | |
14468 | + break; | |
14469 | + | |
14470 | + top_waiter = rt_mutex_top_waiter(lock); | |
14471 | + lock_owner = rt_mutex_owner(lock); | |
14472 | + | |
14473 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14474 | + | |
14475 | + debug_rt_mutex_print_deadlock(&waiter); | |
14476 | + | |
14477 | + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) { | |
14478 | + if (mg_off) | |
14479 | + migrate_enable(); | |
14480 | + schedule(); | |
14481 | + if (mg_off) | |
14482 | + migrate_disable(); | |
14483 | + } | |
14484 | + | |
14485 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
14486 | + | |
14487 | + raw_spin_lock(&self->pi_lock); | |
14488 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
14489 | + raw_spin_unlock(&self->pi_lock); | |
14490 | + } | |
14491 | + | |
14492 | + /* | |
14493 | + * Restore the task state to current->saved_state. We set it | |
14494 | + * to the original state above and the try_to_wake_up() code | |
14495 | + * has possibly updated it when a real (non-rtmutex) wakeup | |
14496 | + * happened while we were blocked. Clear saved_state so | |
14497 | + * try_to_wakeup() does not get confused. | |
14498 | + */ | |
14499 | + raw_spin_lock(&self->pi_lock); | |
14500 | + __set_current_state_no_track(self->saved_state); | |
14501 | + self->saved_state = TASK_RUNNING; | |
14502 | + raw_spin_unlock(&self->pi_lock); | |
14503 | + | |
14504 | + /* | |
14505 | + * try_to_take_rt_mutex() sets the waiter bit | |
14506 | + * unconditionally. We might have to fix that up: | |
14507 | + */ | |
14508 | + fixup_rt_mutex_waiters(lock); | |
14509 | + | |
14510 | + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock)); | |
14511 | + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry)); | |
14512 | + | |
14513 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14514 | + | |
14515 | + debug_rt_mutex_free_waiter(&waiter); | |
14516 | +} | |
14517 | + | |
7c18450a JK |
14518 | +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, |
14519 | + struct wake_q_head *wake_q, | |
14520 | + struct wake_q_head *wq_sleeper); | |
1a6e0f06 JK |
14521 | +/* |
14522 | + * Slow path to release a rt_mutex spin_lock style | |
14523 | + */ | |
33c7bf0f | 14524 | +static void noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) |
1a6e0f06 JK |
14525 | +{ |
14526 | + unsigned long flags; | |
14527 | + WAKE_Q(wake_q); | |
14528 | + WAKE_Q(wake_sleeper_q); | |
7c18450a | 14529 | + bool postunlock; |
1a6e0f06 JK |
14530 | + |
14531 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
7c18450a | 14532 | + postunlock = __rt_mutex_unlock_common(lock, &wake_q, &wake_sleeper_q); |
1a6e0f06 | 14533 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
1a6e0f06 | 14534 | + |
7c18450a JK |
14535 | + if (postunlock) |
14536 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
1a6e0f06 JK |
14537 | +} |
14538 | + | |
14539 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) | |
14540 | +{ | |
14541 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false); | |
14542 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
14543 | +} | |
14544 | +EXPORT_SYMBOL(rt_spin_lock__no_mg); | |
14545 | + | |
14546 | +void __lockfunc rt_spin_lock(spinlock_t *lock) | |
14547 | +{ | |
14548 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
14549 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
14550 | +} | |
14551 | +EXPORT_SYMBOL(rt_spin_lock); | |
14552 | + | |
14553 | +void __lockfunc __rt_spin_lock(struct rt_mutex *lock) | |
14554 | +{ | |
14555 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true); | |
14556 | +} | |
14557 | +EXPORT_SYMBOL(__rt_spin_lock); | |
14558 | + | |
14559 | +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock) | |
14560 | +{ | |
14561 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false); | |
14562 | +} | |
14563 | +EXPORT_SYMBOL(__rt_spin_lock__no_mg); | |
14564 | + | |
14565 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14566 | +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) | |
14567 | +{ | |
14568 | + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
14569 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
14570 | +} | |
14571 | +EXPORT_SYMBOL(rt_spin_lock_nested); | |
14572 | +#endif | |
14573 | + | |
14574 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock) | |
14575 | +{ | |
14576 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
14577 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
14578 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
14579 | +} | |
14580 | +EXPORT_SYMBOL(rt_spin_unlock__no_mg); | |
14581 | + | |
14582 | +void __lockfunc rt_spin_unlock(spinlock_t *lock) | |
14583 | +{ | |
14584 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
14585 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
14586 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
14587 | + migrate_enable(); | |
14588 | +} | |
14589 | +EXPORT_SYMBOL(rt_spin_unlock); | |
14590 | + | |
1a6e0f06 JK |
14591 | +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) |
14592 | +{ | |
14593 | + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); | |
14594 | +} | |
14595 | +EXPORT_SYMBOL(__rt_spin_unlock); | |
14596 | + | |
14597 | +/* | |
14598 | + * Wait for the lock to get unlocked: instead of polling for an unlock | |
14599 | + * (like raw spinlocks do), we lock and unlock, to force the kernel to | |
14600 | + * schedule if there's contention: | |
14601 | + */ | |
14602 | +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) | |
14603 | +{ | |
14604 | + spin_lock(lock); | |
14605 | + spin_unlock(lock); | |
14606 | +} | |
14607 | +EXPORT_SYMBOL(rt_spin_unlock_wait); | |
14608 | + | |
1a6e0f06 JK |
14609 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock) |
14610 | +{ | |
14611 | + int ret; | |
14612 | + | |
14613 | + ret = rt_mutex_trylock(&lock->lock); | |
14614 | + if (ret) | |
14615 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14616 | + return ret; | |
14617 | +} | |
14618 | +EXPORT_SYMBOL(rt_spin_trylock__no_mg); | |
14619 | + | |
14620 | +int __lockfunc rt_spin_trylock(spinlock_t *lock) | |
14621 | +{ | |
14622 | + int ret; | |
14623 | + | |
14624 | + migrate_disable(); | |
14625 | + ret = rt_mutex_trylock(&lock->lock); | |
14626 | + if (ret) | |
14627 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14628 | + else | |
14629 | + migrate_enable(); | |
14630 | + return ret; | |
14631 | +} | |
14632 | +EXPORT_SYMBOL(rt_spin_trylock); | |
14633 | + | |
14634 | +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) | |
14635 | +{ | |
14636 | + int ret; | |
14637 | + | |
14638 | + local_bh_disable(); | |
14639 | + ret = rt_mutex_trylock(&lock->lock); | |
14640 | + if (ret) { | |
14641 | + migrate_disable(); | |
14642 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14643 | + } else | |
14644 | + local_bh_enable(); | |
14645 | + return ret; | |
14646 | +} | |
14647 | +EXPORT_SYMBOL(rt_spin_trylock_bh); | |
14648 | + | |
14649 | +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) | |
14650 | +{ | |
14651 | + int ret; | |
14652 | + | |
14653 | + *flags = 0; | |
14654 | + ret = rt_mutex_trylock(&lock->lock); | |
14655 | + if (ret) { | |
14656 | + migrate_disable(); | |
14657 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14658 | + } | |
14659 | + return ret; | |
14660 | +} | |
14661 | +EXPORT_SYMBOL(rt_spin_trylock_irqsave); | |
14662 | + | |
14663 | +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) | |
14664 | +{ | |
14665 | + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ | |
14666 | + if (atomic_add_unless(atomic, -1, 1)) | |
14667 | + return 0; | |
14668 | + rt_spin_lock(lock); | |
14669 | + if (atomic_dec_and_test(atomic)) | |
14670 | + return 1; | |
14671 | + rt_spin_unlock(lock); | |
14672 | + return 0; | |
14673 | +} | |
14674 | +EXPORT_SYMBOL(atomic_dec_and_spin_lock); | |
14675 | + | |
14676 | + void | |
14677 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) | |
14678 | +{ | |
14679 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14680 | + /* | |
14681 | + * Make sure we are not reinitializing a held lock: | |
14682 | + */ | |
14683 | + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | |
14684 | + lockdep_init_map(&lock->dep_map, name, key, 0); | |
14685 | +#endif | |
14686 | +} | |
14687 | +EXPORT_SYMBOL(__rt_spin_lock_init); | |
14688 | + | |
14689 | +#endif /* PREEMPT_RT_FULL */ | |
14690 | + | |
14691 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14692 | + static inline int __sched | |
14693 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
14694 | +{ | |
14695 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
14696 | + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | |
14697 | + | |
14698 | + if (!hold_ctx) | |
14699 | + return 0; | |
14700 | + | |
14701 | + if (unlikely(ctx == hold_ctx)) | |
14702 | + return -EALREADY; | |
14703 | + | |
14704 | + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | |
14705 | + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | |
14706 | +#ifdef CONFIG_DEBUG_MUTEXES | |
14707 | + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); | |
14708 | + ctx->contending_lock = ww; | |
14709 | +#endif | |
14710 | + return -EDEADLK; | |
14711 | + } | |
14712 | + | |
14713 | + return 0; | |
14714 | +} | |
14715 | +#else | |
14716 | + static inline int __sched | |
14717 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
14718 | +{ | |
14719 | + BUG(); | |
14720 | + return 0; | |
14721 | +} | |
14722 | + | |
14723 | +#endif | |
14724 | + | |
14725 | +static inline int | |
14726 | +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
14727 | + struct rt_mutex_waiter *waiter) | |
14728 | +{ | |
14729 | + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); | |
14730 | +} | |
14731 | + | |
14732 | /* | |
14733 | * Task blocks on lock. | |
14734 | * | |
7c18450a JK |
14735 | @@ -958,6 +1367,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
14736 | struct rt_mutex *next_lock; | |
14737 | int chain_walk = 0, res; | |
14738 | ||
14739 | + lockdep_assert_held(&lock->wait_lock); | |
14740 | + | |
14741 | /* | |
14742 | * Early deadlock detection. We really don't want the task to | |
14743 | * enqueue on itself just to untangle the mess later. It's not | |
14744 | @@ -971,10 +1382,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
1a6e0f06 JK |
14745 | return -EDEADLK; |
14746 | ||
14747 | raw_spin_lock(&task->pi_lock); | |
7c18450a | 14748 | - __rt_mutex_adjust_prio(task); |
1a6e0f06 JK |
14749 | + |
14750 | + /* | |
14751 | + * In the case of futex requeue PI, this will be a proxy | |
14752 | + * lock. The task will wake unaware that it is enqueueed on | |
14753 | + * this lock. Avoid blocking on two locks and corrupting | |
14754 | + * pi_blocked_on via the PI_WAKEUP_INPROGRESS | |
14755 | + * flag. futex_wait_requeue_pi() sets this when it wakes up | |
14756 | + * before requeue (due to a signal or timeout). Do not enqueue | |
14757 | + * the task if PI_WAKEUP_INPROGRESS is set. | |
14758 | + */ | |
14759 | + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) { | |
14760 | + raw_spin_unlock(&task->pi_lock); | |
14761 | + return -EAGAIN; | |
14762 | + } | |
14763 | + | |
14764 | + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); | |
14765 | + | |
7c18450a | 14766 | + rt_mutex_adjust_prio(task); |
1a6e0f06 JK |
14767 | waiter->task = task; |
14768 | waiter->lock = lock; | |
7c18450a JK |
14769 | waiter->prio = task->prio; |
14770 | + waiter->deadline = task->dl.deadline; | |
14771 | ||
14772 | /* Get the top priority waiter on the lock */ | |
14773 | if (rt_mutex_has_waiters(lock)) | |
14774 | @@ -993,8 +1422,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
14775 | rt_mutex_dequeue_pi(owner, top_waiter); | |
1a6e0f06 JK |
14776 | rt_mutex_enqueue_pi(owner, waiter); |
14777 | ||
7c18450a | 14778 | - __rt_mutex_adjust_prio(owner); |
1a6e0f06 | 14779 | - if (owner->pi_blocked_on) |
7c18450a | 14780 | + rt_mutex_adjust_prio(owner); |
1a6e0f06 JK |
14781 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) |
14782 | chain_walk = 1; | |
14783 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { | |
14784 | chain_walk = 1; | |
7c18450a | 14785 | @@ -1036,6 +1465,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
1a6e0f06 JK |
14786 | * Called with lock->wait_lock held and interrupts disabled. |
14787 | */ | |
14788 | static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
14789 | + struct wake_q_head *wake_sleeper_q, | |
14790 | struct rt_mutex *lock) | |
14791 | { | |
14792 | struct rt_mutex_waiter *waiter; | |
7c18450a JK |
14793 | @@ -1045,12 +1475,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, |
14794 | waiter = rt_mutex_top_waiter(lock); | |
1a6e0f06 | 14795 | |
7c18450a JK |
14796 | /* |
14797 | - * Remove it from current->pi_waiters. We do not adjust a | |
14798 | - * possible priority boost right now. We execute wakeup in the | |
14799 | - * boosted mode and go back to normal after releasing | |
14800 | - * lock->wait_lock. | |
14801 | + * Remove it from current->pi_waiters and deboost. | |
14802 | + * | |
14803 | + * We must in fact deboost here in order to ensure we call | |
14804 | + * rt_mutex_setprio() to update p->pi_top_task before the | |
14805 | + * task unblocks. | |
14806 | */ | |
14807 | rt_mutex_dequeue_pi(current, waiter); | |
14808 | + rt_mutex_adjust_prio(current); | |
1a6e0f06 | 14809 | |
7c18450a JK |
14810 | /* |
14811 | * As we are waking up the top waiter, and the waiter stays | |
14812 | @@ -1062,9 +1494,22 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
14813 | */ | |
14814 | lock->owner = (void *) RT_MUTEX_HAS_WAITERS; | |
14815 | ||
14816 | + /* | |
14817 | + * We deboosted before waking the top waiter task such that we don't | |
14818 | + * run two tasks with the 'same' priority (and ensure the | |
14819 | + * p->pi_top_task pointer points to a blocked task). This however can | |
14820 | + * lead to priority inversion if we would get preempted after the | |
14821 | + * deboost but before waking our donor task, hence the preempt_disable() | |
14822 | + * before unlock. | |
14823 | + * | |
14824 | + * Pairs with preempt_enable() in rt_mutex_postunlock(); | |
14825 | + */ | |
14826 | + preempt_disable(); | |
1a6e0f06 JK |
14827 | + if (waiter->savestate) |
14828 | + wake_q_add(wake_sleeper_q, waiter->task); | |
14829 | + else | |
14830 | + wake_q_add(wake_q, waiter->task); | |
7c18450a JK |
14831 | raw_spin_unlock(¤t->pi_lock); |
14832 | - | |
14833 | - wake_q_add(wake_q, waiter->task); | |
1a6e0f06 JK |
14834 | } |
14835 | ||
14836 | /* | |
7c18450a | 14837 | @@ -1078,7 +1523,9 @@ static void remove_waiter(struct rt_mutex *lock, |
1a6e0f06 JK |
14838 | { |
14839 | bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); | |
14840 | struct task_struct *owner = rt_mutex_owner(lock); | |
14841 | - struct rt_mutex *next_lock; | |
14842 | + struct rt_mutex *next_lock = NULL; | |
7c18450a JK |
14843 | + |
14844 | + lockdep_assert_held(&lock->wait_lock); | |
1a6e0f06 JK |
14845 | |
14846 | raw_spin_lock(¤t->pi_lock); | |
14847 | rt_mutex_dequeue(lock, waiter); | |
7c18450a JK |
14848 | @@ -1099,10 +1546,11 @@ static void remove_waiter(struct rt_mutex *lock, |
14849 | if (rt_mutex_has_waiters(lock)) | |
14850 | rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); | |
14851 | ||
14852 | - __rt_mutex_adjust_prio(owner); | |
14853 | + rt_mutex_adjust_prio(owner); | |
1a6e0f06 JK |
14854 | |
14855 | /* Store the lock on which owner is blocked or NULL */ | |
14856 | - next_lock = task_blocked_on_lock(owner); | |
14857 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
14858 | + next_lock = task_blocked_on_lock(owner); | |
14859 | ||
14860 | raw_spin_unlock(&owner->pi_lock); | |
14861 | ||
33c7bf0f | 14862 | @@ -1138,21 +1586,30 @@ void rt_mutex_adjust_pi(struct task_struct *task) |
1a6e0f06 JK |
14863 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
14864 | ||
14865 | waiter = task->pi_blocked_on; | |
14866 | - if (!waiter || (waiter->prio == task->prio && | |
7c18450a JK |
14867 | - !dl_prio(task->prio))) { |
14868 | + if (!rt_mutex_real_waiter(waiter) || | |
14869 | + rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { | |
1a6e0f06 JK |
14870 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); |
14871 | return; | |
14872 | } | |
14873 | next_lock = waiter->lock; | |
14874 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14875 | ||
14876 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | |
14877 | get_task_struct(task); | |
14878 | ||
14879 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14880 | rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, | |
14881 | next_lock, NULL, task); | |
14882 | } | |
33c7bf0f JK |
14883 | |
14884 | +void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) | |
14885 | +{ | |
14886 | + debug_rt_mutex_init_waiter(waiter); | |
14887 | + RB_CLEAR_NODE(&waiter->pi_tree_entry); | |
14888 | + RB_CLEAR_NODE(&waiter->tree_entry); | |
14889 | + waiter->task = NULL; | |
14890 | + waiter->savestate = savestate; | |
14891 | +} | |
14892 | + | |
14893 | /** | |
14894 | * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop | |
14895 | * @lock: the rt_mutex to take | |
14896 | @@ -1166,7 +1623,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |
1a6e0f06 JK |
14897 | static int __sched |
14898 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
14899 | struct hrtimer_sleeper *timeout, | |
14900 | - struct rt_mutex_waiter *waiter) | |
14901 | + struct rt_mutex_waiter *waiter, | |
14902 | + struct ww_acquire_ctx *ww_ctx) | |
14903 | { | |
14904 | int ret = 0; | |
14905 | ||
33c7bf0f JK |
14906 | @@ -1175,16 +1633,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, |
14907 | if (try_to_take_rt_mutex(lock, current, waiter)) | |
14908 | break; | |
1a6e0f06 | 14909 | |
33c7bf0f JK |
14910 | - /* |
14911 | - * TASK_INTERRUPTIBLE checks for signals and | |
14912 | - * timeout. Ignored otherwise. | |
14913 | - */ | |
14914 | - if (unlikely(state == TASK_INTERRUPTIBLE)) { | |
14915 | - /* Signal pending? */ | |
14916 | - if (signal_pending(current)) | |
14917 | - ret = -EINTR; | |
14918 | - if (timeout && !timeout->task) | |
14919 | - ret = -ETIMEDOUT; | |
14920 | + if (timeout && !timeout->task) { | |
14921 | + ret = -ETIMEDOUT; | |
14922 | + break; | |
14923 | + } | |
14924 | + if (signal_pending_state(state, current)) { | |
14925 | + ret = -EINTR; | |
14926 | + break; | |
1a6e0f06 JK |
14927 | + } |
14928 | + | |
33c7bf0f JK |
14929 | + if (ww_ctx && ww_ctx->acquired > 0) { |
14930 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
14931 | if (ret) | |
14932 | break; | |
14933 | } | |
14934 | @@ -1223,21 +1682,148 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, | |
1a6e0f06 JK |
14935 | } |
14936 | } | |
14937 | ||
14938 | +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | |
14939 | + struct ww_acquire_ctx *ww_ctx) | |
14940 | +{ | |
14941 | +#ifdef CONFIG_DEBUG_MUTEXES | |
14942 | + /* | |
14943 | + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | |
14944 | + * but released with a normal mutex_unlock in this call. | |
14945 | + * | |
14946 | + * This should never happen, always use ww_mutex_unlock. | |
14947 | + */ | |
14948 | + DEBUG_LOCKS_WARN_ON(ww->ctx); | |
14949 | + | |
14950 | + /* | |
14951 | + * Not quite done after calling ww_acquire_done() ? | |
14952 | + */ | |
14953 | + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | |
14954 | + | |
14955 | + if (ww_ctx->contending_lock) { | |
14956 | + /* | |
14957 | + * After -EDEADLK you tried to | |
14958 | + * acquire a different ww_mutex? Bad! | |
14959 | + */ | |
14960 | + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | |
14961 | + | |
14962 | + /* | |
14963 | + * You called ww_mutex_lock after receiving -EDEADLK, | |
14964 | + * but 'forgot' to unlock everything else first? | |
14965 | + */ | |
14966 | + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | |
14967 | + ww_ctx->contending_lock = NULL; | |
14968 | + } | |
14969 | + | |
14970 | + /* | |
14971 | + * Naughty, using a different class will lead to undefined behavior! | |
14972 | + */ | |
14973 | + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | |
14974 | +#endif | |
14975 | + ww_ctx->acquired++; | |
14976 | +} | |
14977 | + | |
14978 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14979 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
14980 | + struct ww_acquire_ctx *ww_ctx) | |
14981 | +{ | |
14982 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
14983 | + struct rt_mutex_waiter *waiter, *n; | |
14984 | + | |
14985 | + /* | |
14986 | + * This branch gets optimized out for the common case, | |
14987 | + * and is only important for ww_mutex_lock. | |
14988 | + */ | |
14989 | + ww_mutex_lock_acquired(ww, ww_ctx); | |
14990 | + ww->ctx = ww_ctx; | |
14991 | + | |
14992 | + /* | |
14993 | + * Give any possible sleeping processes the chance to wake up, | |
14994 | + * so they can recheck if they have to back off. | |
14995 | + */ | |
14996 | + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters, | |
14997 | + tree_entry) { | |
14998 | + /* XXX debug rt mutex waiter wakeup */ | |
14999 | + | |
15000 | + BUG_ON(waiter->lock != lock); | |
15001 | + rt_mutex_wake_waiter(waiter); | |
15002 | + } | |
15003 | +} | |
15004 | + | |
15005 | +#else | |
15006 | + | |
15007 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
15008 | + struct ww_acquire_ctx *ww_ctx) | |
15009 | +{ | |
15010 | + BUG(); | |
15011 | +} | |
15012 | +#endif | |
33c7bf0f JK |
15013 | + |
15014 | +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, | |
15015 | + struct hrtimer_sleeper *timeout, | |
15016 | + enum rtmutex_chainwalk chwalk, | |
15017 | + struct ww_acquire_ctx *ww_ctx, | |
15018 | + struct rt_mutex_waiter *waiter) | |
15019 | +{ | |
15020 | + int ret; | |
15021 | + | |
15022 | + /* Try to acquire the lock again: */ | |
15023 | + if (try_to_take_rt_mutex(lock, current, NULL)) { | |
15024 | + if (ww_ctx) | |
15025 | + ww_mutex_account_lock(lock, ww_ctx); | |
15026 | + return 0; | |
15027 | + } | |
15028 | + | |
15029 | + set_current_state(state); | |
15030 | + | |
15031 | + /* Setup the timer, when timeout != NULL */ | |
15032 | + if (unlikely(timeout)) | |
15033 | + hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | |
15034 | + | |
15035 | + ret = task_blocks_on_rt_mutex(lock, waiter, current, chwalk); | |
15036 | + | |
15037 | + if (likely(!ret)) { | |
15038 | + /* sleep on the mutex */ | |
15039 | + ret = __rt_mutex_slowlock(lock, state, timeout, waiter, | |
15040 | + ww_ctx); | |
15041 | + } else if (ww_ctx) { | |
15042 | + /* ww_mutex received EDEADLK, let it become EALREADY */ | |
15043 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
15044 | + BUG_ON(!ret); | |
15045 | + } | |
15046 | + | |
15047 | + if (unlikely(ret)) { | |
15048 | + __set_current_state(TASK_RUNNING); | |
15049 | + if (rt_mutex_has_waiters(lock)) | |
15050 | + remove_waiter(lock, waiter); | |
15051 | + /* ww_mutex want to report EDEADLK/EALREADY, let them */ | |
15052 | + if (!ww_ctx) | |
15053 | + rt_mutex_handle_deadlock(ret, chwalk, waiter); | |
15054 | + } else if (ww_ctx) { | |
15055 | + ww_mutex_account_lock(lock, ww_ctx); | |
15056 | + } | |
15057 | + | |
15058 | + /* | |
15059 | + * try_to_take_rt_mutex() sets the waiter bit | |
15060 | + * unconditionally. We might have to fix that up. | |
15061 | + */ | |
15062 | + fixup_rt_mutex_waiters(lock); | |
15063 | + return ret; | |
15064 | +} | |
1a6e0f06 JK |
15065 | + |
15066 | /* | |
15067 | * Slow path lock function: | |
15068 | */ | |
15069 | static int __sched | |
15070 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
15071 | struct hrtimer_sleeper *timeout, | |
15072 | - enum rtmutex_chainwalk chwalk) | |
15073 | + enum rtmutex_chainwalk chwalk, | |
15074 | + struct ww_acquire_ctx *ww_ctx) | |
15075 | { | |
15076 | struct rt_mutex_waiter waiter; | |
15077 | unsigned long flags; | |
15078 | int ret = 0; | |
15079 | ||
15080 | - debug_rt_mutex_init_waiter(&waiter); | |
15081 | - RB_CLEAR_NODE(&waiter.pi_tree_entry); | |
15082 | - RB_CLEAR_NODE(&waiter.tree_entry); | |
15083 | + rt_mutex_init_waiter(&waiter, false); | |
15084 | ||
15085 | /* | |
15086 | * Technically we could use raw_spin_[un]lock_irq() here, but this can | |
33c7bf0f JK |
15087 | @@ -1249,36 +1835,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, |
15088 | */ | |
15089 | raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
1a6e0f06 | 15090 | |
33c7bf0f JK |
15091 | - /* Try to acquire the lock again: */ |
15092 | - if (try_to_take_rt_mutex(lock, current, NULL)) { | |
15093 | - raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
15094 | - return 0; | |
15095 | - } | |
15096 | - | |
15097 | - set_current_state(state); | |
15098 | - | |
15099 | - /* Setup the timer, when timeout != NULL */ | |
15100 | - if (unlikely(timeout)) | |
15101 | - hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS); | |
15102 | - | |
15103 | - ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); | |
15104 | - | |
15105 | - if (likely(!ret)) | |
15106 | - /* sleep on the mutex */ | |
1a6e0f06 | 15107 | - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); |
33c7bf0f JK |
15108 | - |
15109 | - if (unlikely(ret)) { | |
15110 | - __set_current_state(TASK_RUNNING); | |
15111 | - if (rt_mutex_has_waiters(lock)) | |
15112 | - remove_waiter(lock, &waiter); | |
1a6e0f06 | 15113 | - rt_mutex_handle_deadlock(ret, chwalk, &waiter); |
33c7bf0f JK |
15114 | - } |
15115 | - | |
15116 | - /* | |
15117 | - * try_to_take_rt_mutex() sets the waiter bit | |
15118 | - * unconditionally. We might have to fix that up. | |
15119 | - */ | |
15120 | - fixup_rt_mutex_waiters(lock); | |
15121 | + ret = rt_mutex_slowlock_locked(lock, state, timeout, chwalk, ww_ctx, | |
15122 | + &waiter); | |
1a6e0f06 | 15123 | |
33c7bf0f JK |
15124 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
15125 | ||
7c18450a JK |
15126 | @@ -1328,10 +1886,12 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) |
15127 | ||
15128 | /* | |
15129 | * Slow path to release a rt-mutex. | |
15130 | - * Return whether the current task needs to undo a potential priority boosting. | |
15131 | + * | |
15132 | + * Return whether the current task needs to call rt_mutex_postunlock(). | |
1a6e0f06 JK |
15133 | */ |
15134 | static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
15135 | - struct wake_q_head *wake_q) | |
15136 | + struct wake_q_head *wake_q, | |
15137 | + struct wake_q_head *wake_sleeper_q) | |
15138 | { | |
15139 | unsigned long flags; | |
15140 | ||
7c18450a | 15141 | @@ -1340,8 +1900,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, |
33c7bf0f JK |
15142 | |
15143 | debug_rt_mutex_unlock(lock); | |
15144 | ||
15145 | - rt_mutex_deadlock_account_unlock(current); | |
15146 | - | |
15147 | /* | |
15148 | * We must be careful here if the fast path is enabled. If we | |
15149 | * have no waiters queued we cannot set owner to NULL here | |
7c18450a | 15150 | @@ -1387,12 +1945,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, |
1a6e0f06 JK |
15151 | * |
15152 | * Queue the next waiter for wakeup once we release the wait_lock. | |
15153 | */ | |
15154 | - mark_wakeup_next_waiter(wake_q, lock); | |
7c18450a | 15155 | - |
1a6e0f06 | 15156 | + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); |
1a6e0f06 JK |
15157 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); |
15158 | ||
7c18450a JK |
15159 | - /* check PI boosting */ |
15160 | - return true; | |
15161 | + return true; /* call rt_mutex_postunlock() */ | |
15162 | } | |
15163 | ||
15164 | /* | |
15165 | @@ -1403,63 +1959,85 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
1a6e0f06 JK |
15166 | */ |
15167 | static inline int | |
15168 | rt_mutex_fastlock(struct rt_mutex *lock, int state, | |
15169 | + struct ww_acquire_ctx *ww_ctx, | |
15170 | int (*slowfn)(struct rt_mutex *lock, int state, | |
15171 | struct hrtimer_sleeper *timeout, | |
15172 | - enum rtmutex_chainwalk chwalk)) | |
15173 | + enum rtmutex_chainwalk chwalk, | |
15174 | + struct ww_acquire_ctx *ww_ctx)) | |
15175 | { | |
33c7bf0f JK |
15176 | - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { |
15177 | - rt_mutex_deadlock_account_lock(lock, current); | |
15178 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
1a6e0f06 | 15179 | return 0; |
33c7bf0f | 15180 | - } else |
1a6e0f06 | 15181 | - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); |
33c7bf0f JK |
15182 | + |
15183 | + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); | |
1a6e0f06 JK |
15184 | } |
15185 | ||
15186 | static inline int | |
15187 | rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, | |
15188 | struct hrtimer_sleeper *timeout, | |
15189 | enum rtmutex_chainwalk chwalk, | |
15190 | + struct ww_acquire_ctx *ww_ctx, | |
15191 | int (*slowfn)(struct rt_mutex *lock, int state, | |
15192 | struct hrtimer_sleeper *timeout, | |
15193 | - enum rtmutex_chainwalk chwalk)) | |
15194 | + enum rtmutex_chainwalk chwalk, | |
15195 | + struct ww_acquire_ctx *ww_ctx)) | |
15196 | { | |
15197 | if (chwalk == RT_MUTEX_MIN_CHAINWALK && | |
33c7bf0f JK |
15198 | - likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { |
15199 | - rt_mutex_deadlock_account_lock(lock, current); | |
15200 | + likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
1a6e0f06 | 15201 | return 0; |
33c7bf0f | 15202 | - } else |
1a6e0f06 | 15203 | - return slowfn(lock, state, timeout, chwalk); |
33c7bf0f JK |
15204 | + |
15205 | + return slowfn(lock, state, timeout, chwalk, ww_ctx); | |
1a6e0f06 JK |
15206 | } |
15207 | ||
15208 | static inline int | |
33c7bf0f JK |
15209 | rt_mutex_fasttrylock(struct rt_mutex *lock, |
15210 | int (*slowfn)(struct rt_mutex *lock)) | |
15211 | { | |
15212 | - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
15213 | - rt_mutex_deadlock_account_lock(lock, current); | |
15214 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
15215 | return 1; | |
15216 | - } | |
15217 | + | |
15218 | return slowfn(lock); | |
15219 | } | |
15220 | ||
7c18450a JK |
15221 | +/* |
15222 | + * Performs the wakeup of the the top-waiter and re-enables preemption. | |
15223 | + */ | |
15224 | +void rt_mutex_postunlock(struct wake_q_head *wake_q, | |
15225 | + struct wake_q_head *wq_sleeper) | |
15226 | +{ | |
15227 | + wake_up_q(wake_q); | |
15228 | + wake_up_q_sleeper(wq_sleeper); | |
15229 | + | |
15230 | + /* Pairs with preempt_disable() in rt_mutex_slowunlock() */ | |
15231 | + preempt_enable(); | |
15232 | +} | |
15233 | + | |
1a6e0f06 JK |
15234 | static inline void |
15235 | rt_mutex_fastunlock(struct rt_mutex *lock, | |
15236 | bool (*slowfn)(struct rt_mutex *lock, | |
15237 | - struct wake_q_head *wqh)) | |
15238 | + struct wake_q_head *wqh, | |
15239 | + struct wake_q_head *wq_sleeper)) | |
15240 | { | |
15241 | WAKE_Q(wake_q); | |
15242 | + WAKE_Q(wake_sleeper_q); | |
15243 | ||
33c7bf0f JK |
15244 | - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { |
15245 | - rt_mutex_deadlock_account_unlock(current); | |
15246 | + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) | |
15247 | + return; | |
1a6e0f06 | 15248 | |
33c7bf0f | 15249 | - } else { |
1a6e0f06 | 15250 | - bool deboost = slowfn(lock, &wake_q); |
7c18450a JK |
15251 | + if (slowfn(lock, &wake_q, &wake_sleeper_q)) |
15252 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
15253 | +} | |
1a6e0f06 | 15254 | |
33c7bf0f | 15255 | - wake_up_q(&wake_q); |
33c7bf0f JK |
15256 | +/** |
15257 | + * rt_mutex_lock_state - lock a rt_mutex with a given state | |
15258 | + * | |
15259 | + * @lock: The rt_mutex to be locked | |
15260 | + * @state: The state to set when blocking on the rt_mutex | |
15261 | + */ | |
15262 | +int __sched rt_mutex_lock_state(struct rt_mutex *lock, int state) | |
15263 | +{ | |
15264 | + might_sleep(); | |
7c18450a JK |
15265 | |
15266 | - /* Undo pi boosting if necessary: */ | |
15267 | - if (deboost) | |
15268 | - rt_mutex_adjust_prio(current); | |
15269 | - } | |
33c7bf0f JK |
15270 | + return rt_mutex_fastlock(lock, state, NULL, rt_mutex_slowlock); |
15271 | } | |
1a6e0f06 | 15272 | |
33c7bf0f | 15273 | /** |
7c18450a | 15274 | @@ -1469,15 +2047,13 @@ rt_mutex_fastunlock(struct rt_mutex *lock, |
33c7bf0f JK |
15275 | */ |
15276 | void __sched rt_mutex_lock(struct rt_mutex *lock) | |
15277 | { | |
15278 | - might_sleep(); | |
15279 | - | |
1a6e0f06 | 15280 | - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); |
33c7bf0f | 15281 | + rt_mutex_lock_state(lock, TASK_UNINTERRUPTIBLE); |
1a6e0f06 JK |
15282 | } |
15283 | EXPORT_SYMBOL_GPL(rt_mutex_lock); | |
15284 | ||
33c7bf0f JK |
15285 | /** |
15286 | * rt_mutex_lock_interruptible - lock a rt_mutex interruptible | |
15287 | - * | |
15288 | + ** | |
15289 | * @lock: the rt_mutex to be locked | |
15290 | * | |
15291 | * Returns: | |
7c18450a | 15292 | @@ -1486,23 +2062,32 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); |
33c7bf0f JK |
15293 | */ |
15294 | int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) | |
1a6e0f06 | 15295 | { |
33c7bf0f JK |
15296 | - might_sleep(); |
15297 | - | |
1a6e0f06 | 15298 | - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); |
33c7bf0f | 15299 | + return rt_mutex_lock_state(lock, TASK_INTERRUPTIBLE); |
1a6e0f06 JK |
15300 | } |
15301 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | |
15302 | ||
33c7bf0f JK |
15303 | -/* |
15304 | - * Futex variant with full deadlock detection. | |
15305 | +/** | |
1a6e0f06 JK |
15306 | + * rt_mutex_lock_killable - lock a rt_mutex killable |
15307 | + * | |
15308 | + * @lock: the rt_mutex to be locked | |
15309 | + * @detect_deadlock: deadlock detection on/off | |
15310 | + * | |
15311 | + * Returns: | |
15312 | + * 0 on success | |
15313 | + * -EINTR when interrupted by a signal | |
33c7bf0f JK |
15314 | */ |
15315 | -int rt_mutex_timed_futex_lock(struct rt_mutex *lock, | |
15316 | - struct hrtimer_sleeper *timeout) | |
1a6e0f06 | 15317 | +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) |
33c7bf0f JK |
15318 | { |
15319 | - might_sleep(); | |
15320 | + return rt_mutex_lock_state(lock, TASK_KILLABLE); | |
1a6e0f06 JK |
15321 | +} |
15322 | +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); | |
33c7bf0f JK |
15323 | |
15324 | - return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
15325 | - RT_MUTEX_FULL_CHAINWALK, | |
15326 | - rt_mutex_slowlock); | |
15327 | +/* | |
15328 | + * Futex variant, must not use fastpath. | |
15329 | + */ | |
15330 | +int __sched rt_mutex_futex_trylock(struct rt_mutex *lock) | |
15331 | +{ | |
15332 | + return rt_mutex_slowtrylock(lock); | |
15333 | } | |
15334 | ||
15335 | /** | |
7c18450a | 15336 | @@ -1525,6 +2110,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) |
1a6e0f06 JK |
15337 | |
15338 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
15339 | RT_MUTEX_MIN_CHAINWALK, | |
15340 | + NULL, | |
15341 | rt_mutex_slowlock); | |
15342 | } | |
15343 | EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
7c18450a | 15344 | @@ -1542,7 +2128,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); |
1a6e0f06 JK |
15345 | */ |
15346 | int __sched rt_mutex_trylock(struct rt_mutex *lock) | |
15347 | { | |
15348 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15349 | + if (WARN_ON_ONCE(in_irq() || in_nmi())) | |
15350 | +#else | |
15351 | if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) | |
15352 | +#endif | |
15353 | return 0; | |
15354 | ||
15355 | return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); | |
7c18450a JK |
15356 | @@ -1560,21 +2150,53 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock) |
15357 | } | |
33c7bf0f JK |
15358 | EXPORT_SYMBOL_GPL(rt_mutex_unlock); |
15359 | ||
7c18450a | 15360 | -/** |
33c7bf0f JK |
15361 | - * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock |
15362 | - * @lock: the rt_mutex to be unlocked | |
15363 | - * | |
15364 | - * Returns: true/false indicating whether priority adjustment is | |
15365 | - * required or not. | |
7c18450a | 15366 | - */ |
33c7bf0f | 15367 | -bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, |
1a6e0f06 | 15368 | - struct wake_q_head *wqh) |
7c18450a JK |
15369 | +static bool __sched __rt_mutex_unlock_common(struct rt_mutex *lock, |
15370 | + struct wake_q_head *wake_q, | |
15371 | + struct wake_q_head *wq_sleeper) | |
1a6e0f06 | 15372 | { |
33c7bf0f JK |
15373 | - if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { |
15374 | - rt_mutex_deadlock_account_unlock(current); | |
15375 | - return false; | |
15376 | + lockdep_assert_held(&lock->wait_lock); | |
15377 | + | |
15378 | + debug_rt_mutex_unlock(lock); | |
15379 | + | |
15380 | + if (!rt_mutex_has_waiters(lock)) { | |
15381 | + lock->owner = NULL; | |
15382 | + return false; /* done */ | |
7c18450a JK |
15383 | } |
15384 | - return rt_mutex_slowunlock(lock, wqh); | |
33c7bf0f | 15385 | + |
7c18450a JK |
15386 | + /* |
15387 | + * We've already deboosted, mark_wakeup_next_waiter() will | |
15388 | + * retain preempt_disabled when we drop the wait_lock, to | |
15389 | + * avoid inversion prior to the wakeup. preempt_disable() | |
15390 | + * therein pairs with rt_mutex_postunlock(). | |
15391 | + */ | |
33c7bf0f | 15392 | + mark_wakeup_next_waiter(wake_q, wq_sleeper, lock); |
7c18450a JK |
15393 | + |
15394 | + return true; /* call postunlock() */ | |
15395 | +} | |
15396 | + | |
15397 | +/** | |
15398 | + * Futex variant, that since futex variants do not use the fast-path, can be | |
15399 | + * simple and will not need to retry. | |
15400 | + */ | |
15401 | +bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock, | |
15402 | + struct wake_q_head *wake_q, | |
15403 | + struct wake_q_head *wq_sleeper) | |
15404 | +{ | |
15405 | + return __rt_mutex_unlock_common(lock, wake_q, wq_sleeper); | |
33c7bf0f JK |
15406 | +} |
15407 | + | |
15408 | +void __sched rt_mutex_futex_unlock(struct rt_mutex *lock) | |
15409 | +{ | |
15410 | + WAKE_Q(wake_q); | |
15411 | + WAKE_Q(wake_sleeper_q); | |
7c18450a | 15412 | + bool postunlock; |
33c7bf0f JK |
15413 | + |
15414 | + raw_spin_lock_irq(&lock->wait_lock); | |
7c18450a | 15415 | + postunlock = __rt_mutex_futex_unlock(lock, &wake_q, &wake_sleeper_q); |
33c7bf0f JK |
15416 | + raw_spin_unlock_irq(&lock->wait_lock); |
15417 | + | |
7c18450a JK |
15418 | + if (postunlock) |
15419 | + rt_mutex_postunlock(&wake_q, &wake_sleeper_q); | |
1a6e0f06 JK |
15420 | } |
15421 | ||
15422 | /** | |
7c18450a | 15423 | @@ -1607,13 +2229,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); |
1a6e0f06 JK |
15424 | void __rt_mutex_init(struct rt_mutex *lock, const char *name) |
15425 | { | |
15426 | lock->owner = NULL; | |
15427 | - raw_spin_lock_init(&lock->wait_lock); | |
15428 | lock->waiters = RB_ROOT; | |
15429 | lock->waiters_leftmost = NULL; | |
15430 | ||
15431 | debug_rt_mutex_init(lock, name); | |
15432 | } | |
15433 | -EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
15434 | +EXPORT_SYMBOL(__rt_mutex_init); | |
15435 | ||
15436 | /** | |
15437 | * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a | |
7c18450a | 15438 | @@ -1628,10 +2249,9 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); |
1a6e0f06 JK |
15439 | void rt_mutex_init_proxy_locked(struct rt_mutex *lock, |
15440 | struct task_struct *proxy_owner) | |
15441 | { | |
15442 | - __rt_mutex_init(lock, NULL); | |
15443 | + rt_mutex_init(lock); | |
15444 | debug_rt_mutex_proxy_lock(lock, proxy_owner); | |
15445 | rt_mutex_set_owner(lock, proxy_owner); | |
33c7bf0f JK |
15446 | - rt_mutex_deadlock_account_lock(lock, proxy_owner); |
15447 | } | |
1a6e0f06 | 15448 | |
33c7bf0f | 15449 | /** |
7c18450a | 15450 | @@ -1647,7 +2267,66 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
33c7bf0f JK |
15451 | { |
15452 | debug_rt_mutex_proxy_unlock(lock); | |
15453 | rt_mutex_set_owner(lock, NULL); | |
15454 | - rt_mutex_deadlock_account_unlock(proxy_owner); | |
15455 | +} | |
15456 | + | |
15457 | +int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
15458 | + struct rt_mutex_waiter *waiter, | |
15459 | + struct task_struct *task) | |
15460 | +{ | |
15461 | + int ret; | |
15462 | + | |
15463 | + if (try_to_take_rt_mutex(lock, task, NULL)) | |
15464 | + return 1; | |
15465 | + | |
1a6e0f06 JK |
15466 | +#ifdef CONFIG_PREEMPT_RT_FULL |
15467 | + /* | |
15468 | + * In PREEMPT_RT there's an added race. | |
15469 | + * If the task, that we are about to requeue, times out, | |
15470 | + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue | |
15471 | + * to skip this task. But right after the task sets | |
15472 | + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then | |
15473 | + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. | |
15474 | + * This will replace the PI_WAKEUP_INPROGRESS with the actual | |
15475 | + * lock that it blocks on. We *must not* place this task | |
15476 | + * on this proxy lock in that case. | |
15477 | + * | |
15478 | + * To prevent this race, we first take the task's pi_lock | |
15479 | + * and check if it has updated its pi_blocked_on. If it has, | |
15480 | + * we assume that it woke up and we return -EAGAIN. | |
15481 | + * Otherwise, we set the task's pi_blocked_on to | |
15482 | + * PI_REQUEUE_INPROGRESS, so that if the task is waking up | |
15483 | + * it will know that we are in the process of requeuing it. | |
15484 | + */ | |
15485 | + raw_spin_lock(&task->pi_lock); | |
15486 | + if (task->pi_blocked_on) { | |
15487 | + raw_spin_unlock(&task->pi_lock); | |
15488 | + raw_spin_unlock_irq(&lock->wait_lock); | |
15489 | + return -EAGAIN; | |
15490 | + } | |
15491 | + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; | |
15492 | + raw_spin_unlock(&task->pi_lock); | |
15493 | +#endif | |
15494 | + | |
33c7bf0f JK |
15495 | + /* We enforce deadlock detection for futexes */ |
15496 | + ret = task_blocks_on_rt_mutex(lock, waiter, task, | |
15497 | + RT_MUTEX_FULL_CHAINWALK); | |
15498 | + | |
15499 | + if (ret && !rt_mutex_owner(lock)) { | |
15500 | + /* | |
15501 | + * Reset the return value. We might have | |
15502 | + * returned with -EDEADLK and the owner | |
15503 | + * released the lock while we were walking the | |
15504 | + * pi chain. Let the waiter sort it out. | |
15505 | + */ | |
15506 | + ret = 0; | |
15507 | + } | |
15508 | + | |
1a6e0f06 | 15509 | + if (ret && rt_mutex_has_waiters(lock)) |
33c7bf0f JK |
15510 | + remove_waiter(lock, waiter); |
15511 | + | |
15512 | + debug_rt_mutex_print_deadlock(waiter); | |
15513 | + | |
15514 | + return ret; | |
15515 | } | |
15516 | ||
15517 | /** | |
7c18450a | 15518 | @@ -1670,33 +2349,9 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
33c7bf0f | 15519 | int ret; |
1a6e0f06 | 15520 | |
33c7bf0f JK |
15521 | raw_spin_lock_irq(&lock->wait_lock); |
15522 | - | |
15523 | - if (try_to_take_rt_mutex(lock, task, NULL)) { | |
15524 | - raw_spin_unlock_irq(&lock->wait_lock); | |
15525 | - return 1; | |
15526 | - } | |
15527 | - | |
15528 | - /* We enforce deadlock detection for futexes */ | |
15529 | - ret = task_blocks_on_rt_mutex(lock, waiter, task, | |
15530 | - RT_MUTEX_FULL_CHAINWALK); | |
15531 | - | |
15532 | - if (ret && !rt_mutex_owner(lock)) { | |
15533 | - /* | |
15534 | - * Reset the return value. We might have | |
15535 | - * returned with -EDEADLK and the owner | |
15536 | - * released the lock while we were walking the | |
15537 | - * pi chain. Let the waiter sort it out. | |
15538 | - */ | |
15539 | - ret = 0; | |
15540 | - } | |
15541 | - | |
15542 | - if (unlikely(ret)) | |
15543 | - remove_waiter(lock, waiter); | |
15544 | - | |
15545 | + ret = __rt_mutex_start_proxy_lock(lock, waiter, task); | |
1a6e0f06 | 15546 | raw_spin_unlock_irq(&lock->wait_lock); |
33c7bf0f JK |
15547 | |
15548 | - debug_rt_mutex_print_deadlock(waiter); | |
15549 | - | |
15550 | return ret; | |
15551 | } | |
15552 | ||
7c18450a | 15553 | @@ -1721,24 +2376,27 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) |
33c7bf0f JK |
15554 | } |
15555 | ||
15556 | /** | |
15557 | - * rt_mutex_finish_proxy_lock() - Complete lock acquisition | |
15558 | + * rt_mutex_wait_proxy_lock() - Wait for lock acquisition | |
15559 | * @lock: the rt_mutex we were woken on | |
15560 | * @to: the timeout, null if none. hrtimer should already have | |
15561 | * been started. | |
15562 | * @waiter: the pre-initialized rt_mutex_waiter | |
15563 | * | |
15564 | - * Complete the lock acquisition started our behalf by another thread. | |
15565 | + * Wait for the the lock acquisition started on our behalf by | |
15566 | + * rt_mutex_start_proxy_lock(). Upon failure, the caller must call | |
15567 | + * rt_mutex_cleanup_proxy_lock(). | |
15568 | * | |
15569 | * Returns: | |
15570 | * 0 - success | |
15571 | * <0 - error, one of -EINTR, -ETIMEDOUT | |
15572 | * | |
15573 | - * Special API call for PI-futex requeue support | |
15574 | + * Special API call for PI-futex support | |
15575 | */ | |
15576 | -int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
15577 | +int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, | |
15578 | struct hrtimer_sleeper *to, | |
15579 | struct rt_mutex_waiter *waiter) | |
15580 | { | |
7c18450a JK |
15581 | + struct task_struct *tsk = current; |
15582 | int ret; | |
15583 | ||
15584 | raw_spin_lock_irq(&lock->wait_lock); | |
15585 | @@ -1746,10 +2404,65 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
1a6e0f06 JK |
15586 | set_current_state(TASK_INTERRUPTIBLE); |
15587 | ||
15588 | /* sleep on the mutex */ | |
15589 | - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | |
15590 | + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); | |
15591 | ||
33c7bf0f | 15592 | - if (unlikely(ret)) |
7c18450a JK |
15593 | + /* |
15594 | + * RT has a problem here when the wait got interrupted by a timeout | |
15595 | + * or a signal. task->pi_blocked_on is still set. The task must | |
15596 | + * acquire the hash bucket lock when returning from this function. | |
15597 | + * | |
15598 | + * If the hash bucket lock is contended then the | |
15599 | + * BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)) in | |
15600 | + * task_blocks_on_rt_mutex() will trigger. This can be avoided by | |
15601 | + * clearing task->pi_blocked_on which removes the task from the | |
15602 | + * boosting chain of the rtmutex. That's correct because the task | |
15603 | + * is not longer blocked on it. | |
15604 | + */ | |
15605 | + if (ret) { | |
15606 | + raw_spin_lock(&tsk->pi_lock); | |
15607 | + tsk->pi_blocked_on = NULL; | |
15608 | + raw_spin_unlock(&tsk->pi_lock); | |
15609 | + } | |
15610 | + | |
33c7bf0f JK |
15611 | + raw_spin_unlock_irq(&lock->wait_lock); |
15612 | + | |
15613 | + return ret; | |
15614 | +} | |
15615 | + | |
15616 | +/** | |
15617 | + * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition | |
15618 | + * @lock: the rt_mutex we were woken on | |
15619 | + * @waiter: the pre-initialized rt_mutex_waiter | |
15620 | + * | |
15621 | + * Attempt to clean up after a failed rt_mutex_wait_proxy_lock(). | |
15622 | + * | |
15623 | + * Unless we acquired the lock; we're still enqueued on the wait-list and can | |
15624 | + * in fact still be granted ownership until we're removed. Therefore we can | |
15625 | + * find we are in fact the owner and must disregard the | |
15626 | + * rt_mutex_wait_proxy_lock() failure. | |
15627 | + * | |
15628 | + * Returns: | |
15629 | + * true - did the cleanup, we done. | |
15630 | + * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned, | |
15631 | + * caller should disregards its return value. | |
15632 | + * | |
15633 | + * Special API call for PI-futex support | |
15634 | + */ | |
15635 | +bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, | |
15636 | + struct rt_mutex_waiter *waiter) | |
15637 | +{ | |
15638 | + bool cleanup = false; | |
15639 | + | |
15640 | + raw_spin_lock_irq(&lock->wait_lock); | |
15641 | + /* | |
15642 | + * Unless we're the owner; we're still enqueued on the wait_list. | |
15643 | + * So check if we became owner, if not, take us off the wait_list. | |
15644 | + */ | |
15645 | + if (rt_mutex_owner(lock) != current) { | |
1a6e0f06 | 15646 | remove_waiter(lock, waiter); |
33c7bf0f JK |
15647 | + fixup_rt_mutex_waiters(lock); |
15648 | + cleanup = true; | |
15649 | + } | |
1a6e0f06 | 15650 | |
33c7bf0f JK |
15651 | /* |
15652 | * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might | |
7c18450a | 15653 | @@ -1759,5 +2472,91 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, |
33c7bf0f JK |
15654 | |
15655 | raw_spin_unlock_irq(&lock->wait_lock); | |
15656 | ||
15657 | + return cleanup; | |
15658 | +} | |
1a6e0f06 JK |
15659 | + |
15660 | +static inline int | |
15661 | +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
15662 | +{ | |
15663 | +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH | |
15664 | + unsigned tmp; | |
15665 | + | |
15666 | + if (ctx->deadlock_inject_countdown-- == 0) { | |
15667 | + tmp = ctx->deadlock_inject_interval; | |
15668 | + if (tmp > UINT_MAX/4) | |
15669 | + tmp = UINT_MAX; | |
15670 | + else | |
15671 | + tmp = tmp*2 + tmp + tmp/2; | |
15672 | + | |
15673 | + ctx->deadlock_inject_interval = tmp; | |
15674 | + ctx->deadlock_inject_countdown = tmp; | |
15675 | + ctx->contending_lock = lock; | |
15676 | + | |
15677 | + ww_mutex_unlock(lock); | |
15678 | + | |
15679 | + return -EDEADLK; | |
15680 | + } | |
15681 | +#endif | |
15682 | + | |
15683 | + return 0; | |
15684 | +} | |
15685 | + | |
15686 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15687 | +int __sched | |
15688 | +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
15689 | +{ | |
15690 | + int ret; | |
15691 | + | |
15692 | + might_sleep(); | |
15693 | + | |
15694 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
15695 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx); | |
15696 | + if (ret) | |
15697 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
15698 | + else if (!ret && ww_ctx->acquired > 1) | |
15699 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
15700 | + | |
33c7bf0f JK |
15701 | return ret; |
15702 | } | |
1a6e0f06 JK |
15703 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); |
15704 | + | |
15705 | +int __sched | |
15706 | +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
15707 | +{ | |
15708 | + int ret; | |
15709 | + | |
15710 | + might_sleep(); | |
15711 | + | |
15712 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
15713 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx); | |
15714 | + if (ret) | |
15715 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
15716 | + else if (!ret && ww_ctx->acquired > 1) | |
15717 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
15718 | + | |
15719 | + return ret; | |
15720 | +} | |
15721 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock); | |
15722 | + | |
15723 | +void __sched ww_mutex_unlock(struct ww_mutex *lock) | |
15724 | +{ | |
15725 | + int nest = !!lock->ctx; | |
15726 | + | |
15727 | + /* | |
15728 | + * The unlocking fastpath is the 0->1 transition from 'locked' | |
15729 | + * into 'unlocked' state: | |
15730 | + */ | |
15731 | + if (nest) { | |
15732 | +#ifdef CONFIG_DEBUG_MUTEXES | |
15733 | + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | |
15734 | +#endif | |
15735 | + if (lock->ctx->acquired > 0) | |
15736 | + lock->ctx->acquired--; | |
15737 | + lock->ctx = NULL; | |
15738 | + } | |
15739 | + | |
15740 | + mutex_release(&lock->base.dep_map, nest, _RET_IP_); | |
15741 | + rt_mutex_unlock(&lock->base.lock); | |
15742 | +} | |
15743 | +EXPORT_SYMBOL(ww_mutex_unlock); | |
15744 | +#endif | |
33c7bf0f JK |
15745 | diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h |
15746 | index c4060584c407..6607802efa8b 100644 | |
15747 | --- a/kernel/locking/rtmutex.h | |
15748 | +++ b/kernel/locking/rtmutex.h | |
15749 | @@ -11,8 +11,6 @@ | |
15750 | */ | |
15751 | ||
15752 | #define rt_mutex_deadlock_check(l) (0) | |
15753 | -#define rt_mutex_deadlock_account_lock(m, t) do { } while (0) | |
15754 | -#define rt_mutex_deadlock_account_unlock(l) do { } while (0) | |
15755 | #define debug_rt_mutex_init_waiter(w) do { } while (0) | |
15756 | #define debug_rt_mutex_free_waiter(w) do { } while (0) | |
15757 | #define debug_rt_mutex_lock(l) do { } while (0) | |
1a6e0f06 | 15758 | diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h |
7c18450a | 15759 | index e317e1cbb3eb..64d89d780059 100644 |
1a6e0f06 JK |
15760 | --- a/kernel/locking/rtmutex_common.h |
15761 | +++ b/kernel/locking/rtmutex_common.h | |
7c18450a | 15762 | @@ -27,12 +27,14 @@ struct rt_mutex_waiter { |
1a6e0f06 JK |
15763 | struct rb_node pi_tree_entry; |
15764 | struct task_struct *task; | |
15765 | struct rt_mutex *lock; | |
15766 | + bool savestate; | |
15767 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
15768 | unsigned long ip; | |
15769 | struct pid *deadlock_task_pid; | |
7c18450a JK |
15770 | struct rt_mutex *deadlock_lock; |
15771 | #endif | |
15772 | int prio; | |
15773 | + u64 deadline; | |
15774 | }; | |
15775 | ||
15776 | /* | |
15777 | @@ -98,21 +100,45 @@ enum rtmutex_chainwalk { | |
1a6e0f06 JK |
15778 | /* |
15779 | * PI-futex support (proxy locking functions, etc.): | |
15780 | */ | |
15781 | +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) | |
15782 | +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) | |
15783 | + | |
15784 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | |
15785 | extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
15786 | struct task_struct *proxy_owner); | |
33c7bf0f JK |
15787 | extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, |
15788 | struct task_struct *proxy_owner); | |
15789 | +extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savetate); | |
15790 | +extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
15791 | + struct rt_mutex_waiter *waiter, | |
15792 | + struct task_struct *task); | |
15793 | extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
15794 | struct rt_mutex_waiter *waiter, | |
15795 | struct task_struct *task); | |
15796 | -extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
15797 | - struct hrtimer_sleeper *to, | |
15798 | - struct rt_mutex_waiter *waiter); | |
15799 | -extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); | |
15800 | -extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, | |
1a6e0f06 | 15801 | - struct wake_q_head *wqh); |
7c18450a | 15802 | -extern void rt_mutex_adjust_prio(struct task_struct *task); |
33c7bf0f JK |
15803 | +extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock, |
15804 | + struct hrtimer_sleeper *to, | |
15805 | + struct rt_mutex_waiter *waiter); | |
15806 | +extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock, | |
15807 | + struct rt_mutex_waiter *waiter); | |
15808 | + | |
15809 | +extern int rt_mutex_futex_trylock(struct rt_mutex *l); | |
15810 | + | |
15811 | +extern void rt_mutex_futex_unlock(struct rt_mutex *lock); | |
15812 | +extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock, | |
15813 | + struct wake_q_head *wqh, | |
15814 | + struct wake_q_head *wq_sleeper); | |
15815 | + | |
7c18450a JK |
15816 | +extern void rt_mutex_postunlock(struct wake_q_head *wake_q, |
15817 | + struct wake_q_head *wq_sleeper); | |
15818 | + | |
33c7bf0f JK |
15819 | +/* RW semaphore special interface */ |
15820 | +struct ww_acquire_ctx; | |
15821 | + | |
15822 | +int __sched rt_mutex_slowlock_locked(struct rt_mutex *lock, int state, | |
15823 | + struct hrtimer_sleeper *timeout, | |
15824 | + enum rtmutex_chainwalk chwalk, | |
15825 | + struct ww_acquire_ctx *ww_ctx, | |
15826 | + struct rt_mutex_waiter *waiter); | |
7c18450a | 15827 | |
1a6e0f06 | 15828 | #ifdef CONFIG_DEBUG_RT_MUTEXES |
33c7bf0f | 15829 | # include "rtmutex-debug.h" |
33c7bf0f JK |
15830 | diff --git a/kernel/locking/rwsem-rt.c b/kernel/locking/rwsem-rt.c |
15831 | new file mode 100644 | |
15832 | index 000000000000..4a708ffcded6 | |
15833 | --- /dev/null | |
15834 | +++ b/kernel/locking/rwsem-rt.c | |
15835 | @@ -0,0 +1,268 @@ | |
15836 | +/* | |
15837 | + */ | |
15838 | +#include <linux/rwsem.h> | |
15839 | +#include <linux/sched.h> | |
15840 | +#include <linux/export.h> | |
15841 | + | |
15842 | +#include "rtmutex_common.h" | |
15843 | + | |
15844 | +/* | |
15845 | + * RT-specific reader/writer semaphores | |
15846 | + * | |
15847 | + * down_write() | |
15848 | + * 1) Lock sem->rtmutex | |
15849 | + * 2) Remove the reader BIAS to force readers into the slow path | |
15850 | + * 3) Wait until all readers have left the critical region | |
15851 | + * 4) Mark it write locked | |
15852 | + * | |
15853 | + * up_write() | |
15854 | + * 1) Remove the write locked marker | |
15855 | + * 2) Set the reader BIAS so readers can use the fast path again | |
15856 | + * 3) Unlock sem->rtmutex to release blocked readers | |
15857 | + * | |
15858 | + * down_read() | |
15859 | + * 1) Try fast path acquisition (reader BIAS is set) | |
15860 | + * 2) Take sem->rtmutex.wait_lock which protects the writelocked flag | |
15861 | + * 3) If !writelocked, acquire it for read | |
15862 | + * 4) If writelocked, block on sem->rtmutex | |
15863 | + * 5) unlock sem->rtmutex, goto 1) | |
15864 | + * | |
15865 | + * up_read() | |
15866 | + * 1) Try fast path release (reader count != 1) | |
15867 | + * 2) Wake the writer waiting in down_write()#3 | |
15868 | + * | |
15869 | + * down_read()#3 has the consequence, that rw semaphores on RT are not writer | |
15870 | + * fair, but writers, which should be avoided in RT tasks (think mmap_sem), | |
15871 | + * are subject to the rtmutex priority/DL inheritance mechanism. | |
15872 | + * | |
15873 | + * It's possible to make the rw semaphores writer fair by keeping a list of | |
15874 | + * active readers. A blocked writer would force all newly incoming readers to | |
15875 | + * block on the rtmutex, but the rtmutex would have to be proxy locked for one | |
15876 | + * reader after the other. We can't use multi-reader inheritance because there | |
15877 | + * is no way to support that with SCHED_DEADLINE. Implementing the one by one | |
15878 | + * reader boosting/handover mechanism is a major surgery for a very dubious | |
15879 | + * value. | |
15880 | + * | |
15881 | + * The risk of writer starvation is there, but the pathological use cases | |
15882 | + * which trigger it are not necessarily the typical RT workloads. | |
15883 | + */ | |
15884 | + | |
15885 | +void __rwsem_init(struct rw_semaphore *sem, const char *name, | |
15886 | + struct lock_class_key *key) | |
1a6e0f06 | 15887 | +{ |
33c7bf0f JK |
15888 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC |
15889 | + /* | |
15890 | + * Make sure we are not reinitializing a held semaphore: | |
15891 | + */ | |
15892 | + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | |
15893 | + lockdep_init_map(&sem->dep_map, name, key, 0); | |
15894 | +#endif | |
15895 | + atomic_set(&sem->readers, READER_BIAS); | |
1a6e0f06 | 15896 | +} |
33c7bf0f | 15897 | +EXPORT_SYMBOL(__rwsem_init); |
1a6e0f06 | 15898 | + |
33c7bf0f JK |
15899 | +int __down_read_trylock(struct rw_semaphore *sem) |
15900 | +{ | |
15901 | + int r, old; | |
15902 | + | |
15903 | + /* | |
15904 | + * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is | |
15905 | + * set. | |
15906 | + */ | |
15907 | + for (r = atomic_read(&sem->readers); r < 0;) { | |
15908 | + old = atomic_cmpxchg(&sem->readers, r, r + 1); | |
15909 | + if (likely(old == r)) | |
15910 | + return 1; | |
15911 | + r = old; | |
15912 | + } | |
15913 | + return 0; | |
15914 | +} | |
15915 | + | |
15916 | +void __sched __down_read(struct rw_semaphore *sem) | |
15917 | +{ | |
15918 | + struct rt_mutex *m = &sem->rtmutex; | |
15919 | + struct rt_mutex_waiter waiter; | |
15920 | + | |
15921 | + if (__down_read_trylock(sem)) | |
15922 | + return; | |
15923 | + | |
15924 | + might_sleep(); | |
15925 | + raw_spin_lock_irq(&m->wait_lock); | |
15926 | + /* | |
15927 | + * Allow readers as long as the writer has not completely | |
15928 | + * acquired the semaphore for write. | |
15929 | + */ | |
15930 | + if (atomic_read(&sem->readers) != WRITER_BIAS) { | |
15931 | + atomic_inc(&sem->readers); | |
15932 | + raw_spin_unlock_irq(&m->wait_lock); | |
15933 | + return; | |
15934 | + } | |
15935 | + | |
15936 | + /* | |
15937 | + * Call into the slow lock path with the rtmutex->wait_lock | |
15938 | + * held, so this can't result in the following race: | |
15939 | + * | |
15940 | + * Reader1 Reader2 Writer | |
15941 | + * down_read() | |
15942 | + * down_write() | |
15943 | + * rtmutex_lock(m) | |
15944 | + * swait() | |
15945 | + * down_read() | |
15946 | + * unlock(m->wait_lock) | |
15947 | + * up_read() | |
15948 | + * swake() | |
15949 | + * lock(m->wait_lock) | |
15950 | + * sem->writelocked=true | |
15951 | + * unlock(m->wait_lock) | |
15952 | + * | |
15953 | + * up_write() | |
15954 | + * sem->writelocked=false | |
15955 | + * rtmutex_unlock(m) | |
15956 | + * down_read() | |
15957 | + * down_write() | |
15958 | + * rtmutex_lock(m) | |
15959 | + * swait() | |
15960 | + * rtmutex_lock(m) | |
15961 | + * | |
15962 | + * That would put Reader1 behind the writer waiting on | |
15963 | + * Reader2 to call up_read() which might be unbound. | |
15964 | + */ | |
15965 | + rt_mutex_init_waiter(&waiter, false); | |
15966 | + rt_mutex_slowlock_locked(m, TASK_UNINTERRUPTIBLE, NULL, | |
15967 | + RT_MUTEX_MIN_CHAINWALK, NULL, | |
15968 | + &waiter); | |
15969 | + /* | |
15970 | + * The slowlock() above is guaranteed to return with the rtmutex is | |
15971 | + * now held, so there can't be a writer active. Increment the reader | |
15972 | + * count and immediately drop the rtmutex again. | |
15973 | + */ | |
15974 | + atomic_inc(&sem->readers); | |
15975 | + raw_spin_unlock_irq(&m->wait_lock); | |
15976 | + rt_mutex_unlock(m); | |
15977 | + | |
15978 | + debug_rt_mutex_free_waiter(&waiter); | |
15979 | +} | |
15980 | + | |
15981 | +void __up_read(struct rw_semaphore *sem) | |
15982 | +{ | |
15983 | + struct rt_mutex *m = &sem->rtmutex; | |
15984 | + struct task_struct *tsk; | |
15985 | + | |
15986 | + /* | |
15987 | + * sem->readers can only hit 0 when a writer is waiting for the | |
15988 | + * active readers to leave the critical region. | |
15989 | + */ | |
15990 | + if (!atomic_dec_and_test(&sem->readers)) | |
15991 | + return; | |
15992 | + | |
15993 | + might_sleep(); | |
15994 | + raw_spin_lock_irq(&m->wait_lock); | |
15995 | + /* | |
15996 | + * Wake the writer, i.e. the rtmutex owner. It might release the | |
15997 | + * rtmutex concurrently in the fast path (due to a signal), but to | |
15998 | + * clean up the rwsem it needs to acquire m->wait_lock. The worst | |
15999 | + * case which can happen is a spurious wakeup. | |
16000 | + */ | |
16001 | + tsk = rt_mutex_owner(m); | |
16002 | + if (tsk) | |
16003 | + wake_up_process(tsk); | |
16004 | + | |
16005 | + raw_spin_unlock_irq(&m->wait_lock); | |
16006 | +} | |
16007 | + | |
16008 | +static void __up_write_unlock(struct rw_semaphore *sem, int bias, | |
16009 | + unsigned long flags) | |
16010 | +{ | |
16011 | + struct rt_mutex *m = &sem->rtmutex; | |
16012 | + | |
16013 | + atomic_add(READER_BIAS - bias, &sem->readers); | |
16014 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
16015 | + rt_mutex_unlock(m); | |
16016 | +} | |
16017 | + | |
16018 | +static int __sched __down_write_common(struct rw_semaphore *sem, int state) | |
16019 | +{ | |
16020 | + struct rt_mutex *m = &sem->rtmutex; | |
16021 | + unsigned long flags; | |
16022 | + | |
16023 | + /* Take the rtmutex as a first step */ | |
16024 | + if (rt_mutex_lock_state(m, state)) | |
16025 | + return -EINTR; | |
16026 | + | |
16027 | + /* Force readers into slow path */ | |
16028 | + atomic_sub(READER_BIAS, &sem->readers); | |
16029 | + might_sleep(); | |
16030 | + | |
16031 | + set_current_state(state); | |
16032 | + for (;;) { | |
16033 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
16034 | + /* Have all readers left the critical region? */ | |
16035 | + if (!atomic_read(&sem->readers)) { | |
16036 | + atomic_set(&sem->readers, WRITER_BIAS); | |
16037 | + __set_current_state(TASK_RUNNING); | |
16038 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
16039 | + return 0; | |
16040 | + } | |
16041 | + | |
16042 | + if (signal_pending_state(state, current)) { | |
16043 | + __set_current_state(TASK_RUNNING); | |
16044 | + __up_write_unlock(sem, 0, flags); | |
16045 | + return -EINTR; | |
16046 | + } | |
16047 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
16048 | + | |
16049 | + if (atomic_read(&sem->readers) != 0) { | |
16050 | + schedule(); | |
16051 | + set_current_state(state); | |
16052 | + } | |
16053 | + } | |
16054 | +} | |
16055 | + | |
16056 | +void __sched __down_write(struct rw_semaphore *sem) | |
16057 | +{ | |
16058 | + __down_write_common(sem, TASK_UNINTERRUPTIBLE); | |
16059 | +} | |
16060 | + | |
16061 | +int __sched __down_write_killable(struct rw_semaphore *sem) | |
16062 | +{ | |
16063 | + return __down_write_common(sem, TASK_KILLABLE); | |
16064 | +} | |
16065 | + | |
16066 | +int __down_write_trylock(struct rw_semaphore *sem) | |
16067 | +{ | |
16068 | + struct rt_mutex *m = &sem->rtmutex; | |
16069 | + unsigned long flags; | |
16070 | + | |
16071 | + if (!rt_mutex_trylock(m)) | |
16072 | + return 0; | |
16073 | + | |
16074 | + atomic_sub(READER_BIAS, &sem->readers); | |
16075 | + | |
16076 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
16077 | + if (!atomic_read(&sem->readers)) { | |
16078 | + atomic_set(&sem->readers, WRITER_BIAS); | |
16079 | + raw_spin_unlock_irqrestore(&m->wait_lock, flags); | |
16080 | + return 1; | |
16081 | + } | |
16082 | + __up_write_unlock(sem, 0, flags); | |
16083 | + return 0; | |
16084 | +} | |
16085 | + | |
16086 | +void __up_write(struct rw_semaphore *sem) | |
16087 | +{ | |
16088 | + struct rt_mutex *m = &sem->rtmutex; | |
16089 | + unsigned long flags; | |
16090 | + | |
16091 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
16092 | + __up_write_unlock(sem, WRITER_BIAS, flags); | |
16093 | +} | |
16094 | + | |
16095 | +void __downgrade_write(struct rw_semaphore *sem) | |
16096 | +{ | |
16097 | + struct rt_mutex *m = &sem->rtmutex; | |
16098 | + unsigned long flags; | |
16099 | + | |
16100 | + raw_spin_lock_irqsave(&m->wait_lock, flags); | |
16101 | + /* Release it and account current as reader */ | |
16102 | + __up_write_unlock(sem, WRITER_BIAS - 1, flags); | |
16103 | +} | |
1a6e0f06 JK |
16104 | diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c |
16105 | index db3ccb1dd614..909779647bd1 100644 | |
16106 | --- a/kernel/locking/spinlock.c | |
16107 | +++ b/kernel/locking/spinlock.c | |
16108 | @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ | |
16109 | * __[spin|read|write]_lock_bh() | |
16110 | */ | |
16111 | BUILD_LOCK_OPS(spin, raw_spinlock); | |
16112 | + | |
16113 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16114 | BUILD_LOCK_OPS(read, rwlock); | |
16115 | BUILD_LOCK_OPS(write, rwlock); | |
16116 | +#endif | |
16117 | ||
16118 | #endif | |
16119 | ||
16120 | @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) | |
16121 | EXPORT_SYMBOL(_raw_spin_unlock_bh); | |
16122 | #endif | |
16123 | ||
16124 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16125 | + | |
16126 | #ifndef CONFIG_INLINE_READ_TRYLOCK | |
16127 | int __lockfunc _raw_read_trylock(rwlock_t *lock) | |
16128 | { | |
16129 | @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) | |
16130 | EXPORT_SYMBOL(_raw_write_unlock_bh); | |
16131 | #endif | |
16132 | ||
16133 | +#endif /* !PREEMPT_RT_FULL */ | |
16134 | + | |
16135 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
16136 | ||
16137 | void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | |
16138 | diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c | |
16139 | index 0374a596cffa..94970338d518 100644 | |
16140 | --- a/kernel/locking/spinlock_debug.c | |
16141 | +++ b/kernel/locking/spinlock_debug.c | |
16142 | @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, | |
16143 | ||
16144 | EXPORT_SYMBOL(__raw_spin_lock_init); | |
16145 | ||
16146 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16147 | void __rwlock_init(rwlock_t *lock, const char *name, | |
16148 | struct lock_class_key *key) | |
16149 | { | |
16150 | @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, | |
16151 | } | |
16152 | ||
16153 | EXPORT_SYMBOL(__rwlock_init); | |
16154 | +#endif | |
16155 | ||
16156 | static void spin_dump(raw_spinlock_t *lock, const char *msg) | |
16157 | { | |
16158 | @@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock) | |
16159 | arch_spin_unlock(&lock->raw_lock); | |
16160 | } | |
16161 | ||
16162 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16163 | static void rwlock_bug(rwlock_t *lock, const char *msg) | |
16164 | { | |
16165 | if (!debug_locks_off()) | |
16166 | @@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock) | |
16167 | debug_write_unlock(lock); | |
16168 | arch_write_unlock(&lock->raw_lock); | |
16169 | } | |
16170 | + | |
16171 | +#endif | |
5c015b7c | 16172 | diff --git a/kernel/module.c b/kernel/module.c |
33c7bf0f | 16173 | index 0e54d5bf0097..f27764fbfa24 100644 |
5c015b7c JK |
16174 | --- a/kernel/module.c |
16175 | +++ b/kernel/module.c | |
16176 | @@ -660,16 +660,7 @@ static void percpu_modcopy(struct module *mod, | |
16177 | memcpy(per_cpu_ptr(mod->percpu, cpu), from, size); | |
16178 | } | |
16179 | ||
16180 | -/** | |
16181 | - * is_module_percpu_address - test whether address is from module static percpu | |
16182 | - * @addr: address to test | |
16183 | - * | |
16184 | - * Test whether @addr belongs to module static percpu area. | |
16185 | - * | |
16186 | - * RETURNS: | |
16187 | - * %true if @addr is from module static percpu area | |
16188 | - */ | |
16189 | -bool is_module_percpu_address(unsigned long addr) | |
16190 | +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) | |
16191 | { | |
16192 | struct module *mod; | |
16193 | unsigned int cpu; | |
33c7bf0f | 16194 | @@ -683,9 +674,15 @@ bool is_module_percpu_address(unsigned long addr) |
5c015b7c JK |
16195 | continue; |
16196 | for_each_possible_cpu(cpu) { | |
16197 | void *start = per_cpu_ptr(mod->percpu, cpu); | |
16198 | + void *va = (void *)addr; | |
16199 | ||
16200 | - if ((void *)addr >= start && | |
16201 | - (void *)addr < start + mod->percpu_size) { | |
16202 | + if (va >= start && va < start + mod->percpu_size) { | |
33c7bf0f | 16203 | + if (can_addr) { |
5c015b7c | 16204 | + *can_addr = (unsigned long) (va - start); |
33c7bf0f JK |
16205 | + *can_addr += (unsigned long) |
16206 | + per_cpu_ptr(mod->percpu, | |
16207 | + get_boot_cpu_id()); | |
16208 | + } | |
5c015b7c JK |
16209 | preempt_enable(); |
16210 | return true; | |
16211 | } | |
33c7bf0f | 16212 | @@ -696,6 +693,20 @@ bool is_module_percpu_address(unsigned long addr) |
5c015b7c JK |
16213 | return false; |
16214 | } | |
16215 | ||
16216 | +/** | |
16217 | + * is_module_percpu_address - test whether address is from module static percpu | |
16218 | + * @addr: address to test | |
16219 | + * | |
16220 | + * Test whether @addr belongs to module static percpu area. | |
16221 | + * | |
16222 | + * RETURNS: | |
16223 | + * %true if @addr is from module static percpu area | |
16224 | + */ | |
16225 | +bool is_module_percpu_address(unsigned long addr) | |
16226 | +{ | |
16227 | + return __is_module_percpu_address(addr, NULL); | |
16228 | +} | |
16229 | + | |
16230 | #else /* ... !CONFIG_SMP */ | |
16231 | ||
16232 | static inline void __percpu *mod_percpu(struct module *mod) | |
33c7bf0f | 16233 | @@ -727,6 +738,11 @@ bool is_module_percpu_address(unsigned long addr) |
5c015b7c JK |
16234 | return false; |
16235 | } | |
16236 | ||
16237 | +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) | |
16238 | +{ | |
16239 | + return false; | |
16240 | +} | |
16241 | + | |
16242 | #endif /* CONFIG_SMP */ | |
16243 | ||
16244 | #define MODINFO_ATTR(field) \ | |
1a6e0f06 | 16245 | diff --git a/kernel/panic.c b/kernel/panic.c |
c7c16703 | 16246 | index e6480e20379e..7e9c1918a94e 100644 |
1a6e0f06 JK |
16247 | --- a/kernel/panic.c |
16248 | +++ b/kernel/panic.c | |
c7c16703 | 16249 | @@ -482,9 +482,11 @@ static u64 oops_id; |
1a6e0f06 JK |
16250 | |
16251 | static int init_oops_id(void) | |
16252 | { | |
16253 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16254 | if (!oops_id) | |
16255 | get_random_bytes(&oops_id, sizeof(oops_id)); | |
16256 | else | |
16257 | +#endif | |
16258 | oops_id++; | |
16259 | ||
16260 | return 0; | |
16261 | diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c | |
c7c16703 | 16262 | index b26dbc48c75b..968255f27a33 100644 |
1a6e0f06 JK |
16263 | --- a/kernel/power/hibernate.c |
16264 | +++ b/kernel/power/hibernate.c | |
16265 | @@ -286,6 +286,8 @@ static int create_image(int platform_mode) | |
16266 | ||
16267 | local_irq_disable(); | |
16268 | ||
16269 | + system_state = SYSTEM_SUSPEND; | |
16270 | + | |
16271 | error = syscore_suspend(); | |
16272 | if (error) { | |
16273 | printk(KERN_ERR "PM: Some system devices failed to power down, " | |
c7c16703 | 16274 | @@ -317,6 +319,7 @@ static int create_image(int platform_mode) |
1a6e0f06 JK |
16275 | syscore_resume(); |
16276 | ||
16277 | Enable_irqs: | |
16278 | + system_state = SYSTEM_RUNNING; | |
16279 | local_irq_enable(); | |
16280 | ||
16281 | Enable_cpus: | |
c7c16703 | 16282 | @@ -446,6 +449,7 @@ static int resume_target_kernel(bool platform_mode) |
1a6e0f06 JK |
16283 | goto Enable_cpus; |
16284 | ||
16285 | local_irq_disable(); | |
16286 | + system_state = SYSTEM_SUSPEND; | |
16287 | ||
16288 | error = syscore_suspend(); | |
16289 | if (error) | |
c7c16703 | 16290 | @@ -479,6 +483,7 @@ static int resume_target_kernel(bool platform_mode) |
1a6e0f06 JK |
16291 | syscore_resume(); |
16292 | ||
16293 | Enable_irqs: | |
16294 | + system_state = SYSTEM_RUNNING; | |
16295 | local_irq_enable(); | |
16296 | ||
16297 | Enable_cpus: | |
c7c16703 | 16298 | @@ -564,6 +569,7 @@ int hibernation_platform_enter(void) |
1a6e0f06 JK |
16299 | goto Enable_cpus; |
16300 | ||
16301 | local_irq_disable(); | |
16302 | + system_state = SYSTEM_SUSPEND; | |
16303 | syscore_suspend(); | |
16304 | if (pm_wakeup_pending()) { | |
16305 | error = -EAGAIN; | |
c7c16703 | 16306 | @@ -576,6 +582,7 @@ int hibernation_platform_enter(void) |
1a6e0f06 JK |
16307 | |
16308 | Power_up: | |
16309 | syscore_resume(); | |
16310 | + system_state = SYSTEM_RUNNING; | |
16311 | local_irq_enable(); | |
16312 | ||
16313 | Enable_cpus: | |
c7c16703 | 16314 | @@ -676,6 +683,10 @@ static int load_image_and_restore(void) |
1a6e0f06 JK |
16315 | return error; |
16316 | } | |
16317 | ||
16318 | +#ifndef CONFIG_SUSPEND | |
16319 | +bool pm_in_action; | |
16320 | +#endif | |
16321 | + | |
16322 | /** | |
16323 | * hibernate - Carry out system hibernation, including saving the image. | |
16324 | */ | |
c7c16703 | 16325 | @@ -689,6 +700,8 @@ int hibernate(void) |
1a6e0f06 JK |
16326 | return -EPERM; |
16327 | } | |
16328 | ||
16329 | + pm_in_action = true; | |
16330 | + | |
16331 | lock_system_sleep(); | |
16332 | /* The snapshot device should not be opened while we're running */ | |
16333 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
c7c16703 | 16334 | @@ -766,6 +779,7 @@ int hibernate(void) |
1a6e0f06 JK |
16335 | atomic_inc(&snapshot_device_available); |
16336 | Unlock: | |
16337 | unlock_system_sleep(); | |
16338 | + pm_in_action = false; | |
16339 | return error; | |
16340 | } | |
16341 | ||
16342 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c | |
c7c16703 | 16343 | index 6ccb08f57fcb..c8cbb5ed2fe3 100644 |
1a6e0f06 JK |
16344 | --- a/kernel/power/suspend.c |
16345 | +++ b/kernel/power/suspend.c | |
c7c16703 | 16346 | @@ -369,6 +369,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) |
1a6e0f06 JK |
16347 | arch_suspend_disable_irqs(); |
16348 | BUG_ON(!irqs_disabled()); | |
16349 | ||
16350 | + system_state = SYSTEM_SUSPEND; | |
16351 | + | |
16352 | error = syscore_suspend(); | |
16353 | if (!error) { | |
16354 | *wakeup = pm_wakeup_pending(); | |
c7c16703 | 16355 | @@ -385,6 +387,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) |
1a6e0f06 JK |
16356 | syscore_resume(); |
16357 | } | |
16358 | ||
16359 | + system_state = SYSTEM_RUNNING; | |
16360 | + | |
16361 | arch_suspend_enable_irqs(); | |
16362 | BUG_ON(irqs_disabled()); | |
16363 | ||
c7c16703 | 16364 | @@ -527,6 +531,8 @@ static int enter_state(suspend_state_t state) |
1a6e0f06 JK |
16365 | return error; |
16366 | } | |
16367 | ||
16368 | +bool pm_in_action; | |
16369 | + | |
16370 | /** | |
16371 | * pm_suspend - Externally visible function for suspending the system. | |
16372 | * @state: System sleep state to enter. | |
c7c16703 | 16373 | @@ -541,6 +547,8 @@ int pm_suspend(suspend_state_t state) |
1a6e0f06 JK |
16374 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) |
16375 | return -EINVAL; | |
16376 | ||
16377 | + pm_in_action = true; | |
16378 | + | |
16379 | error = enter_state(state); | |
16380 | if (error) { | |
16381 | suspend_stats.fail++; | |
c7c16703 | 16382 | @@ -548,6 +556,7 @@ int pm_suspend(suspend_state_t state) |
1a6e0f06 JK |
16383 | } else { |
16384 | suspend_stats.success++; | |
16385 | } | |
16386 | + pm_in_action = false; | |
16387 | return error; | |
16388 | } | |
16389 | EXPORT_SYMBOL(pm_suspend); | |
16390 | diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c | |
5c015b7c | 16391 | index 9c5b231684d0..cf15bdb6855b 100644 |
1a6e0f06 JK |
16392 | --- a/kernel/printk/printk.c |
16393 | +++ b/kernel/printk/printk.c | |
16394 | @@ -351,6 +351,65 @@ __packed __aligned(4) | |
16395 | */ | |
16396 | DEFINE_RAW_SPINLOCK(logbuf_lock); | |
16397 | ||
16398 | +#ifdef CONFIG_EARLY_PRINTK | |
16399 | +struct console *early_console; | |
16400 | + | |
16401 | +static void early_vprintk(const char *fmt, va_list ap) | |
16402 | +{ | |
16403 | + if (early_console) { | |
16404 | + char buf[512]; | |
16405 | + int n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
16406 | + | |
16407 | + early_console->write(early_console, buf, n); | |
16408 | + } | |
16409 | +} | |
16410 | + | |
16411 | +asmlinkage void early_printk(const char *fmt, ...) | |
16412 | +{ | |
16413 | + va_list ap; | |
16414 | + | |
16415 | + va_start(ap, fmt); | |
16416 | + early_vprintk(fmt, ap); | |
16417 | + va_end(ap); | |
16418 | +} | |
16419 | + | |
16420 | +/* | |
16421 | + * This is independent of any log levels - a global | |
16422 | + * kill switch that turns off all of printk. | |
16423 | + * | |
16424 | + * Used by the NMI watchdog if early-printk is enabled. | |
16425 | + */ | |
16426 | +static bool __read_mostly printk_killswitch; | |
16427 | + | |
16428 | +static int __init force_early_printk_setup(char *str) | |
16429 | +{ | |
16430 | + printk_killswitch = true; | |
16431 | + return 0; | |
16432 | +} | |
16433 | +early_param("force_early_printk", force_early_printk_setup); | |
16434 | + | |
16435 | +void printk_kill(void) | |
16436 | +{ | |
16437 | + printk_killswitch = true; | |
16438 | +} | |
16439 | + | |
16440 | +#ifdef CONFIG_PRINTK | |
16441 | +static int forced_early_printk(const char *fmt, va_list ap) | |
16442 | +{ | |
16443 | + if (!printk_killswitch) | |
16444 | + return 0; | |
16445 | + early_vprintk(fmt, ap); | |
16446 | + return 1; | |
16447 | +} | |
16448 | +#endif | |
16449 | + | |
16450 | +#else | |
16451 | +static inline int forced_early_printk(const char *fmt, va_list ap) | |
16452 | +{ | |
16453 | + return 0; | |
16454 | +} | |
16455 | +#endif | |
16456 | + | |
16457 | #ifdef CONFIG_PRINTK | |
16458 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | |
16459 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | |
c7c16703 | 16460 | @@ -1337,6 +1396,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
16461 | { |
16462 | char *text; | |
16463 | int len = 0; | |
16464 | + int attempts = 0; | |
16465 | ||
16466 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); | |
16467 | if (!text) | |
c7c16703 | 16468 | @@ -1348,6 +1408,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
16469 | u64 seq; |
16470 | u32 idx; | |
16471 | enum log_flags prev; | |
16472 | + int num_msg; | |
16473 | +try_again: | |
16474 | + attempts++; | |
16475 | + if (attempts > 10) { | |
16476 | + len = -EBUSY; | |
16477 | + goto out; | |
16478 | + } | |
16479 | + num_msg = 0; | |
16480 | ||
16481 | /* | |
16482 | * Find first record that fits, including all following records, | |
c7c16703 | 16483 | @@ -1363,6 +1431,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
16484 | prev = msg->flags; |
16485 | idx = log_next(idx); | |
16486 | seq++; | |
16487 | + num_msg++; | |
16488 | + if (num_msg > 5) { | |
16489 | + num_msg = 0; | |
16490 | + raw_spin_unlock_irq(&logbuf_lock); | |
16491 | + raw_spin_lock_irq(&logbuf_lock); | |
16492 | + if (clear_seq < log_first_seq) | |
16493 | + goto try_again; | |
16494 | + } | |
16495 | } | |
16496 | ||
16497 | /* move first record forward until length fits into the buffer */ | |
c7c16703 | 16498 | @@ -1376,6 +1452,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
16499 | prev = msg->flags; |
16500 | idx = log_next(idx); | |
16501 | seq++; | |
16502 | + num_msg++; | |
16503 | + if (num_msg > 5) { | |
16504 | + num_msg = 0; | |
16505 | + raw_spin_unlock_irq(&logbuf_lock); | |
16506 | + raw_spin_lock_irq(&logbuf_lock); | |
16507 | + if (clear_seq < log_first_seq) | |
16508 | + goto try_again; | |
16509 | + } | |
16510 | } | |
16511 | ||
16512 | /* last message fitting into this dump */ | |
c7c16703 | 16513 | @@ -1416,6 +1500,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) |
1a6e0f06 JK |
16514 | clear_seq = log_next_seq; |
16515 | clear_idx = log_next_idx; | |
16516 | } | |
16517 | +out: | |
16518 | raw_spin_unlock_irq(&logbuf_lock); | |
16519 | ||
16520 | kfree(text); | |
c7c16703 | 16521 | @@ -1569,6 +1654,12 @@ static void call_console_drivers(int level, |
1a6e0f06 JK |
16522 | if (!console_drivers) |
16523 | return; | |
16524 | ||
16525 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
16526 | + if (in_irq() || in_nmi()) | |
16527 | + return; | |
16528 | + } | |
16529 | + | |
16530 | + migrate_disable(); | |
16531 | for_each_console(con) { | |
16532 | if (exclusive_console && con != exclusive_console) | |
16533 | continue; | |
c7c16703 | 16534 | @@ -1584,6 +1675,7 @@ static void call_console_drivers(int level, |
1a6e0f06 JK |
16535 | else |
16536 | con->write(con, text, len); | |
16537 | } | |
16538 | + migrate_enable(); | |
16539 | } | |
16540 | ||
16541 | /* | |
c7c16703 | 16542 | @@ -1781,6 +1873,13 @@ asmlinkage int vprintk_emit(int facility, int level, |
1a6e0f06 JK |
16543 | /* cpu currently holding logbuf_lock in this function */ |
16544 | static unsigned int logbuf_cpu = UINT_MAX; | |
16545 | ||
16546 | + /* | |
16547 | + * Fall back to early_printk if a debugging subsystem has | |
16548 | + * killed printk output | |
16549 | + */ | |
16550 | + if (unlikely(forced_early_printk(fmt, args))) | |
16551 | + return 1; | |
16552 | + | |
16553 | if (level == LOGLEVEL_SCHED) { | |
16554 | level = LOGLEVEL_DEFAULT; | |
16555 | in_sched = true; | |
c7c16703 | 16556 | @@ -1885,13 +1984,23 @@ asmlinkage int vprintk_emit(int facility, int level, |
1a6e0f06 JK |
16557 | |
16558 | /* If called from the scheduler, we can not call up(). */ | |
16559 | if (!in_sched) { | |
16560 | + int may_trylock = 1; | |
16561 | + | |
16562 | lockdep_off(); | |
16563 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16564 | + /* | |
16565 | + * we can't take a sleeping lock with IRQs or preeption disabled | |
16566 | + * so we can't print in these contexts | |
16567 | + */ | |
16568 | + if (!(preempt_count() == 0 && !irqs_disabled())) | |
16569 | + may_trylock = 0; | |
16570 | +#endif | |
16571 | /* | |
16572 | * Try to acquire and then immediately release the console | |
16573 | * semaphore. The release will print out buffers and wake up | |
16574 | * /dev/kmsg and syslog() users. | |
16575 | */ | |
16576 | - if (console_trylock()) | |
16577 | + if (may_trylock && console_trylock()) | |
16578 | console_unlock(); | |
16579 | lockdep_on(); | |
16580 | } | |
c7c16703 | 16581 | @@ -2014,26 +2123,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func); |
1a6e0f06 JK |
16582 | |
16583 | #endif /* CONFIG_PRINTK */ | |
16584 | ||
16585 | -#ifdef CONFIG_EARLY_PRINTK | |
16586 | -struct console *early_console; | |
16587 | - | |
16588 | -asmlinkage __visible void early_printk(const char *fmt, ...) | |
16589 | -{ | |
16590 | - va_list ap; | |
16591 | - char buf[512]; | |
16592 | - int n; | |
16593 | - | |
16594 | - if (!early_console) | |
16595 | - return; | |
16596 | - | |
16597 | - va_start(ap, fmt); | |
16598 | - n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
16599 | - va_end(ap); | |
16600 | - | |
16601 | - early_console->write(early_console, buf, n); | |
16602 | -} | |
16603 | -#endif | |
16604 | - | |
16605 | static int __add_preferred_console(char *name, int idx, char *options, | |
16606 | char *brl_options) | |
16607 | { | |
c7c16703 | 16608 | @@ -2303,11 +2392,16 @@ static void console_cont_flush(char *text, size_t size) |
1a6e0f06 JK |
16609 | goto out; |
16610 | ||
16611 | len = cont_print_text(text, size); | |
16612 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16613 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
16614 | + call_console_drivers(cont.level, NULL, 0, text, len); | |
16615 | +#else | |
16616 | raw_spin_unlock(&logbuf_lock); | |
16617 | stop_critical_timings(); | |
16618 | call_console_drivers(cont.level, NULL, 0, text, len); | |
16619 | start_critical_timings(); | |
16620 | local_irq_restore(flags); | |
16621 | +#endif | |
16622 | return; | |
16623 | out: | |
16624 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
c7c16703 | 16625 | @@ -2431,13 +2525,17 @@ void console_unlock(void) |
1a6e0f06 JK |
16626 | console_idx = log_next(console_idx); |
16627 | console_seq++; | |
16628 | console_prev = msg->flags; | |
16629 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16630 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
16631 | + call_console_drivers(level, ext_text, ext_len, text, len); | |
16632 | +#else | |
16633 | raw_spin_unlock(&logbuf_lock); | |
16634 | ||
16635 | stop_critical_timings(); /* don't trace print latency */ | |
16636 | call_console_drivers(level, ext_text, ext_len, text, len); | |
16637 | start_critical_timings(); | |
16638 | local_irq_restore(flags); | |
16639 | - | |
16640 | +#endif | |
16641 | if (do_cond_resched) | |
16642 | cond_resched(); | |
16643 | } | |
c7c16703 | 16644 | @@ -2489,6 +2587,11 @@ void console_unblank(void) |
1a6e0f06 JK |
16645 | { |
16646 | struct console *c; | |
16647 | ||
16648 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
16649 | + if (in_irq() || in_nmi()) | |
16650 | + return; | |
16651 | + } | |
16652 | + | |
16653 | /* | |
16654 | * console_unblank can no longer be called in interrupt context unless | |
16655 | * oops_in_progress is set to 1.. | |
16656 | diff --git a/kernel/ptrace.c b/kernel/ptrace.c | |
7c18450a | 16657 | index a5caecef88be..61e7c5e2183c 100644 |
1a6e0f06 JK |
16658 | --- a/kernel/ptrace.c |
16659 | +++ b/kernel/ptrace.c | |
c7c16703 | 16660 | @@ -166,7 +166,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) |
1a6e0f06 JK |
16661 | |
16662 | spin_lock_irq(&task->sighand->siglock); | |
16663 | if (task_is_traced(task) && !__fatal_signal_pending(task)) { | |
16664 | - task->state = __TASK_TRACED; | |
16665 | + unsigned long flags; | |
16666 | + | |
16667 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
16668 | + if (task->state & __TASK_TRACED) | |
16669 | + task->state = __TASK_TRACED; | |
16670 | + else | |
16671 | + task->saved_state = __TASK_TRACED; | |
16672 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
16673 | ret = true; | |
16674 | } | |
16675 | spin_unlock_irq(&task->sighand->siglock); | |
16676 | diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c | |
c7c16703 | 16677 | index bf08fee53dc7..eeb8ce4ad7b6 100644 |
1a6e0f06 JK |
16678 | --- a/kernel/rcu/rcutorture.c |
16679 | +++ b/kernel/rcu/rcutorture.c | |
16680 | @@ -404,6 +404,7 @@ static struct rcu_torture_ops rcu_ops = { | |
16681 | .name = "rcu" | |
16682 | }; | |
16683 | ||
16684 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16685 | /* | |
16686 | * Definitions for rcu_bh torture testing. | |
16687 | */ | |
16688 | @@ -443,6 +444,12 @@ static struct rcu_torture_ops rcu_bh_ops = { | |
16689 | .name = "rcu_bh" | |
16690 | }; | |
16691 | ||
16692 | +#else | |
16693 | +static struct rcu_torture_ops rcu_bh_ops = { | |
16694 | + .ttype = INVALID_RCU_FLAVOR, | |
16695 | +}; | |
16696 | +#endif | |
16697 | + | |
16698 | /* | |
16699 | * Don't even think about trying any of these in real life!!! | |
16700 | * The names includes "busted", and they really means it! | |
16701 | diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c | |
1f39f580 | 16702 | index 10f62c6f48e7..dbee19478f09 100644 |
1a6e0f06 JK |
16703 | --- a/kernel/rcu/tree.c |
16704 | +++ b/kernel/rcu/tree.c | |
c7c16703 | 16705 | @@ -55,6 +55,11 @@ |
1a6e0f06 JK |
16706 | #include <linux/random.h> |
16707 | #include <linux/trace_events.h> | |
16708 | #include <linux/suspend.h> | |
16709 | +#include <linux/delay.h> | |
16710 | +#include <linux/gfp.h> | |
16711 | +#include <linux/oom.h> | |
16712 | +#include <linux/smpboot.h> | |
16713 | +#include "../time/tick-internal.h" | |
16714 | ||
16715 | #include "tree.h" | |
16716 | #include "rcu.h" | |
1f39f580 | 16717 | @@ -260,6 +265,19 @@ void rcu_sched_qs(void) |
1a6e0f06 JK |
16718 | this_cpu_ptr(&rcu_sched_data), true); |
16719 | } | |
16720 | ||
16721 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16722 | +static void rcu_preempt_qs(void); | |
16723 | + | |
16724 | +void rcu_bh_qs(void) | |
16725 | +{ | |
16726 | + unsigned long flags; | |
16727 | + | |
16728 | + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */ | |
16729 | + local_irq_save(flags); | |
16730 | + rcu_preempt_qs(); | |
16731 | + local_irq_restore(flags); | |
16732 | +} | |
16733 | +#else | |
16734 | void rcu_bh_qs(void) | |
16735 | { | |
16736 | if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { | |
1f39f580 | 16737 | @@ -269,6 +287,7 @@ void rcu_bh_qs(void) |
1a6e0f06 JK |
16738 | __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); |
16739 | } | |
16740 | } | |
16741 | +#endif | |
16742 | ||
16743 | static DEFINE_PER_CPU(int, rcu_sched_qs_mask); | |
16744 | ||
1f39f580 | 16745 | @@ -449,11 +468,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched); |
1a6e0f06 JK |
16746 | /* |
16747 | * Return the number of RCU BH batches started thus far for debug & stats. | |
16748 | */ | |
16749 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16750 | unsigned long rcu_batches_started_bh(void) | |
16751 | { | |
16752 | return rcu_bh_state.gpnum; | |
16753 | } | |
16754 | EXPORT_SYMBOL_GPL(rcu_batches_started_bh); | |
16755 | +#endif | |
16756 | ||
16757 | /* | |
16758 | * Return the number of RCU batches completed thus far for debug & stats. | |
1f39f580 | 16759 | @@ -473,6 +494,7 @@ unsigned long rcu_batches_completed_sched(void) |
1a6e0f06 JK |
16760 | } |
16761 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | |
16762 | ||
16763 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16764 | /* | |
16765 | * Return the number of RCU BH batches completed thus far for debug & stats. | |
16766 | */ | |
1f39f580 | 16767 | @@ -481,6 +503,7 @@ unsigned long rcu_batches_completed_bh(void) |
1a6e0f06 JK |
16768 | return rcu_bh_state.completed; |
16769 | } | |
16770 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | |
16771 | +#endif | |
16772 | ||
16773 | /* | |
16774 | * Return the number of RCU expedited batches completed thus far for | |
1f39f580 | 16775 | @@ -504,6 +527,7 @@ unsigned long rcu_exp_batches_completed_sched(void) |
1a6e0f06 JK |
16776 | } |
16777 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); | |
16778 | ||
16779 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16780 | /* | |
16781 | * Force a quiescent state. | |
16782 | */ | |
1f39f580 | 16783 | @@ -522,6 +546,13 @@ void rcu_bh_force_quiescent_state(void) |
1a6e0f06 JK |
16784 | } |
16785 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | |
16786 | ||
16787 | +#else | |
16788 | +void rcu_force_quiescent_state(void) | |
16789 | +{ | |
16790 | +} | |
16791 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |
16792 | +#endif | |
16793 | + | |
16794 | /* | |
16795 | * Force a quiescent state for RCU-sched. | |
16796 | */ | |
1f39f580 | 16797 | @@ -572,9 +603,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, |
1a6e0f06 JK |
16798 | case RCU_FLAVOR: |
16799 | rsp = rcu_state_p; | |
16800 | break; | |
16801 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16802 | case RCU_BH_FLAVOR: | |
16803 | rsp = &rcu_bh_state; | |
16804 | break; | |
16805 | +#endif | |
16806 | case RCU_SCHED_FLAVOR: | |
16807 | rsp = &rcu_sched_state; | |
16808 | break; | |
1f39f580 | 16809 | @@ -3016,18 +3049,17 @@ __rcu_process_callbacks(struct rcu_state *rsp) |
1a6e0f06 JK |
16810 | /* |
16811 | * Do RCU core processing for the current CPU. | |
16812 | */ | |
c7c16703 JK |
16813 | -static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) |
16814 | +static __latent_entropy void rcu_process_callbacks(void) | |
1a6e0f06 JK |
16815 | { |
16816 | struct rcu_state *rsp; | |
16817 | ||
16818 | if (cpu_is_offline(smp_processor_id())) | |
16819 | return; | |
16820 | - trace_rcu_utilization(TPS("Start RCU core")); | |
16821 | for_each_rcu_flavor(rsp) | |
16822 | __rcu_process_callbacks(rsp); | |
16823 | - trace_rcu_utilization(TPS("End RCU core")); | |
16824 | } | |
16825 | ||
16826 | +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
16827 | /* | |
16828 | * Schedule RCU callback invocation. If the specified type of RCU | |
16829 | * does not support RCU priority boosting, just do a direct call, | |
1f39f580 | 16830 | @@ -3039,19 +3071,106 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) |
1a6e0f06 JK |
16831 | { |
16832 | if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) | |
16833 | return; | |
16834 | - if (likely(!rsp->boost)) { | |
16835 | - rcu_do_batch(rsp, rdp); | |
16836 | - return; | |
16837 | - } | |
16838 | - invoke_rcu_callbacks_kthread(); | |
16839 | + rcu_do_batch(rsp, rdp); | |
16840 | } | |
16841 | ||
16842 | +static void rcu_wake_cond(struct task_struct *t, int status) | |
16843 | +{ | |
16844 | + /* | |
16845 | + * If the thread is yielding, only wake it when this | |
16846 | + * is invoked from idle | |
16847 | + */ | |
16848 | + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) | |
16849 | + wake_up_process(t); | |
16850 | +} | |
16851 | + | |
16852 | +/* | |
16853 | + * Wake up this CPU's rcuc kthread to do RCU core processing. | |
16854 | + */ | |
16855 | static void invoke_rcu_core(void) | |
16856 | { | |
16857 | - if (cpu_online(smp_processor_id())) | |
16858 | - raise_softirq(RCU_SOFTIRQ); | |
16859 | + unsigned long flags; | |
16860 | + struct task_struct *t; | |
16861 | + | |
16862 | + if (!cpu_online(smp_processor_id())) | |
16863 | + return; | |
16864 | + local_irq_save(flags); | |
16865 | + __this_cpu_write(rcu_cpu_has_work, 1); | |
16866 | + t = __this_cpu_read(rcu_cpu_kthread_task); | |
16867 | + if (t != NULL && current != t) | |
16868 | + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); | |
16869 | + local_irq_restore(flags); | |
16870 | } | |
16871 | ||
16872 | +static void rcu_cpu_kthread_park(unsigned int cpu) | |
16873 | +{ | |
16874 | + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
16875 | +} | |
16876 | + | |
16877 | +static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
16878 | +{ | |
16879 | + return __this_cpu_read(rcu_cpu_has_work); | |
16880 | +} | |
16881 | + | |
16882 | +/* | |
16883 | + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
16884 | + * RCU softirq used in flavors and configurations of RCU that do not | |
16885 | + * support RCU priority boosting. | |
16886 | + */ | |
16887 | +static void rcu_cpu_kthread(unsigned int cpu) | |
16888 | +{ | |
16889 | + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
16890 | + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
16891 | + int spincnt; | |
16892 | + | |
16893 | + for (spincnt = 0; spincnt < 10; spincnt++) { | |
16894 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
16895 | + local_bh_disable(); | |
16896 | + *statusp = RCU_KTHREAD_RUNNING; | |
16897 | + this_cpu_inc(rcu_cpu_kthread_loops); | |
16898 | + local_irq_disable(); | |
16899 | + work = *workp; | |
16900 | + *workp = 0; | |
16901 | + local_irq_enable(); | |
16902 | + if (work) | |
16903 | + rcu_process_callbacks(); | |
16904 | + local_bh_enable(); | |
16905 | + if (*workp == 0) { | |
16906 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
16907 | + *statusp = RCU_KTHREAD_WAITING; | |
16908 | + return; | |
16909 | + } | |
16910 | + } | |
16911 | + *statusp = RCU_KTHREAD_YIELDING; | |
16912 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
16913 | + schedule_timeout_interruptible(2); | |
16914 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
16915 | + *statusp = RCU_KTHREAD_WAITING; | |
16916 | +} | |
16917 | + | |
16918 | +static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
16919 | + .store = &rcu_cpu_kthread_task, | |
16920 | + .thread_should_run = rcu_cpu_kthread_should_run, | |
16921 | + .thread_fn = rcu_cpu_kthread, | |
16922 | + .thread_comm = "rcuc/%u", | |
16923 | + .setup = rcu_cpu_kthread_setup, | |
16924 | + .park = rcu_cpu_kthread_park, | |
16925 | +}; | |
16926 | + | |
16927 | +/* | |
16928 | + * Spawn per-CPU RCU core processing kthreads. | |
16929 | + */ | |
16930 | +static int __init rcu_spawn_core_kthreads(void) | |
16931 | +{ | |
16932 | + int cpu; | |
16933 | + | |
16934 | + for_each_possible_cpu(cpu) | |
16935 | + per_cpu(rcu_cpu_has_work, cpu) = 0; | |
16936 | + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
16937 | + return 0; | |
16938 | +} | |
16939 | +early_initcall(rcu_spawn_core_kthreads); | |
16940 | + | |
16941 | /* | |
16942 | * Handle any core-RCU processing required by a call_rcu() invocation. | |
16943 | */ | |
1f39f580 | 16944 | @@ -3195,6 +3314,7 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) |
1a6e0f06 JK |
16945 | } |
16946 | EXPORT_SYMBOL_GPL(call_rcu_sched); | |
16947 | ||
16948 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16949 | /* | |
16950 | * Queue an RCU callback for invocation after a quicker grace period. | |
16951 | */ | |
1f39f580 | 16952 | @@ -3203,6 +3323,7 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) |
1a6e0f06 JK |
16953 | __call_rcu(head, func, &rcu_bh_state, -1, 0); |
16954 | } | |
16955 | EXPORT_SYMBOL_GPL(call_rcu_bh); | |
16956 | +#endif | |
16957 | ||
16958 | /* | |
16959 | * Queue an RCU callback for lazy invocation after a grace period. | |
1f39f580 | 16960 | @@ -3294,6 +3415,7 @@ void synchronize_sched(void) |
1a6e0f06 JK |
16961 | } |
16962 | EXPORT_SYMBOL_GPL(synchronize_sched); | |
16963 | ||
16964 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16965 | /** | |
16966 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | |
16967 | * | |
1f39f580 | 16968 | @@ -3320,6 +3442,7 @@ void synchronize_rcu_bh(void) |
1a6e0f06 JK |
16969 | wait_rcu_gp(call_rcu_bh); |
16970 | } | |
16971 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | |
16972 | +#endif | |
16973 | ||
16974 | /** | |
16975 | * get_state_synchronize_rcu - Snapshot current RCU state | |
1f39f580 | 16976 | @@ -3698,6 +3821,7 @@ static void _rcu_barrier(struct rcu_state *rsp) |
1a6e0f06 JK |
16977 | mutex_unlock(&rsp->barrier_mutex); |
16978 | } | |
16979 | ||
16980 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16981 | /** | |
16982 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | |
16983 | */ | |
1f39f580 | 16984 | @@ -3706,6 +3830,7 @@ void rcu_barrier_bh(void) |
1a6e0f06 JK |
16985 | _rcu_barrier(&rcu_bh_state); |
16986 | } | |
16987 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |
16988 | +#endif | |
16989 | ||
16990 | /** | |
16991 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | |
1f39f580 | 16992 | @@ -4227,12 +4352,13 @@ void __init rcu_init(void) |
1a6e0f06 JK |
16993 | |
16994 | rcu_bootup_announce(); | |
16995 | rcu_init_geometry(); | |
16996 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16997 | rcu_init_one(&rcu_bh_state); | |
16998 | +#endif | |
16999 | rcu_init_one(&rcu_sched_state); | |
17000 | if (dump_tree) | |
17001 | rcu_dump_rcu_node_tree(&rcu_sched_state); | |
17002 | __rcu_init_preempt(); | |
17003 | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
17004 | ||
17005 | /* | |
17006 | * We don't need protection against CPU-hotplug here because | |
17007 | diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h | |
c7c16703 | 17008 | index e99a5234d9ed..958ac107062c 100644 |
1a6e0f06 JK |
17009 | --- a/kernel/rcu/tree.h |
17010 | +++ b/kernel/rcu/tree.h | |
c7c16703 | 17011 | @@ -588,18 +588,18 @@ extern struct list_head rcu_struct_flavors; |
1a6e0f06 JK |
17012 | */ |
17013 | extern struct rcu_state rcu_sched_state; | |
17014 | ||
17015 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
17016 | extern struct rcu_state rcu_bh_state; | |
17017 | +#endif | |
17018 | ||
17019 | #ifdef CONFIG_PREEMPT_RCU | |
17020 | extern struct rcu_state rcu_preempt_state; | |
17021 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | |
17022 | ||
17023 | -#ifdef CONFIG_RCU_BOOST | |
17024 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
17025 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | |
17026 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
17027 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | |
17028 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
17029 | ||
17030 | #ifndef RCU_TREE_NONCORE | |
17031 | ||
c7c16703 | 17032 | @@ -619,10 +619,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); |
1a6e0f06 JK |
17033 | static void __init __rcu_init_preempt(void); |
17034 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | |
17035 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | |
17036 | -static void invoke_rcu_callbacks_kthread(void); | |
17037 | static bool rcu_is_callbacks_kthread(void); | |
17038 | +static void rcu_cpu_kthread_setup(unsigned int cpu); | |
17039 | #ifdef CONFIG_RCU_BOOST | |
17040 | -static void rcu_preempt_do_callbacks(void); | |
17041 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
17042 | struct rcu_node *rnp); | |
17043 | #endif /* #ifdef CONFIG_RCU_BOOST */ | |
17044 | diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h | |
1f39f580 | 17045 | index 56583e764ebf..7c656f8e192f 100644 |
1a6e0f06 JK |
17046 | --- a/kernel/rcu/tree_plugin.h |
17047 | +++ b/kernel/rcu/tree_plugin.h | |
17048 | @@ -24,25 +24,10 @@ | |
17049 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | |
17050 | */ | |
17051 | ||
17052 | -#include <linux/delay.h> | |
17053 | -#include <linux/gfp.h> | |
17054 | -#include <linux/oom.h> | |
17055 | -#include <linux/smpboot.h> | |
17056 | -#include "../time/tick-internal.h" | |
17057 | - | |
17058 | #ifdef CONFIG_RCU_BOOST | |
17059 | ||
17060 | #include "../locking/rtmutex_common.h" | |
17061 | ||
17062 | -/* | |
17063 | - * Control variables for per-CPU and per-rcu_node kthreads. These | |
17064 | - * handle all flavors of RCU. | |
17065 | - */ | |
17066 | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
17067 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
17068 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
17069 | -DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
17070 | - | |
17071 | #else /* #ifdef CONFIG_RCU_BOOST */ | |
17072 | ||
17073 | /* | |
17074 | @@ -55,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
17075 | ||
17076 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
17077 | ||
17078 | +/* | |
17079 | + * Control variables for per-CPU and per-rcu_node kthreads. These | |
17080 | + * handle all flavors of RCU. | |
17081 | + */ | |
17082 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
17083 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
17084 | +DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
17085 | + | |
17086 | #ifdef CONFIG_RCU_NOCB_CPU | |
17087 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | |
17088 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ | |
17089 | @@ -426,7 +419,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |
17090 | } | |
17091 | ||
17092 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | |
17093 | - if (in_irq() || in_serving_softirq()) { | |
17094 | + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { | |
17095 | lockdep_rcu_suspicious(__FILE__, __LINE__, | |
17096 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | |
17097 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", | |
17098 | @@ -632,15 +625,6 @@ static void rcu_preempt_check_callbacks(void) | |
17099 | t->rcu_read_unlock_special.b.need_qs = true; | |
17100 | } | |
17101 | ||
17102 | -#ifdef CONFIG_RCU_BOOST | |
17103 | - | |
17104 | -static void rcu_preempt_do_callbacks(void) | |
17105 | -{ | |
17106 | - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); | |
17107 | -} | |
17108 | - | |
17109 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
17110 | - | |
17111 | /* | |
17112 | * Queue a preemptible-RCU callback for invocation after a grace period. | |
17113 | */ | |
17114 | @@ -829,6 +813,19 @@ void exit_rcu(void) | |
17115 | ||
17116 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
17117 | ||
17118 | +/* | |
17119 | + * If boosting, set rcuc kthreads to realtime priority. | |
17120 | + */ | |
17121 | +static void rcu_cpu_kthread_setup(unsigned int cpu) | |
17122 | +{ | |
17123 | +#ifdef CONFIG_RCU_BOOST | |
17124 | + struct sched_param sp; | |
17125 | + | |
17126 | + sp.sched_priority = kthread_prio; | |
17127 | + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
17128 | +#endif /* #ifdef CONFIG_RCU_BOOST */ | |
17129 | +} | |
17130 | + | |
17131 | #ifdef CONFIG_RCU_BOOST | |
17132 | ||
17133 | #include "../locking/rtmutex_common.h" | |
17134 | @@ -860,16 +857,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |
17135 | ||
17136 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | |
17137 | ||
17138 | -static void rcu_wake_cond(struct task_struct *t, int status) | |
17139 | -{ | |
17140 | - /* | |
17141 | - * If the thread is yielding, only wake it when this | |
17142 | - * is invoked from idle | |
17143 | - */ | |
17144 | - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) | |
17145 | - wake_up_process(t); | |
17146 | -} | |
17147 | - | |
17148 | /* | |
17149 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | |
17150 | * or ->boost_tasks, advancing the pointer to the next task in the | |
17151 | @@ -1013,23 +1000,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
17152 | } | |
17153 | ||
17154 | /* | |
17155 | - * Wake up the per-CPU kthread to invoke RCU callbacks. | |
17156 | - */ | |
17157 | -static void invoke_rcu_callbacks_kthread(void) | |
17158 | -{ | |
17159 | - unsigned long flags; | |
17160 | - | |
17161 | - local_irq_save(flags); | |
17162 | - __this_cpu_write(rcu_cpu_has_work, 1); | |
17163 | - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | |
17164 | - current != __this_cpu_read(rcu_cpu_kthread_task)) { | |
17165 | - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), | |
17166 | - __this_cpu_read(rcu_cpu_kthread_status)); | |
17167 | - } | |
17168 | - local_irq_restore(flags); | |
17169 | -} | |
17170 | - | |
17171 | -/* | |
17172 | * Is the current CPU running the RCU-callbacks kthread? | |
17173 | * Caller must have preemption disabled. | |
17174 | */ | |
17175 | @@ -1083,67 +1053,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
17176 | return 0; | |
17177 | } | |
17178 | ||
17179 | -static void rcu_kthread_do_work(void) | |
17180 | -{ | |
17181 | - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); | |
17182 | - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); | |
17183 | - rcu_preempt_do_callbacks(); | |
17184 | -} | |
17185 | - | |
17186 | -static void rcu_cpu_kthread_setup(unsigned int cpu) | |
17187 | -{ | |
17188 | - struct sched_param sp; | |
17189 | - | |
17190 | - sp.sched_priority = kthread_prio; | |
17191 | - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
17192 | -} | |
17193 | - | |
17194 | -static void rcu_cpu_kthread_park(unsigned int cpu) | |
17195 | -{ | |
17196 | - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
17197 | -} | |
17198 | - | |
17199 | -static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
17200 | -{ | |
17201 | - return __this_cpu_read(rcu_cpu_has_work); | |
17202 | -} | |
17203 | - | |
17204 | -/* | |
17205 | - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
17206 | - * RCU softirq used in flavors and configurations of RCU that do not | |
17207 | - * support RCU priority boosting. | |
17208 | - */ | |
17209 | -static void rcu_cpu_kthread(unsigned int cpu) | |
17210 | -{ | |
17211 | - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
17212 | - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
17213 | - int spincnt; | |
17214 | - | |
17215 | - for (spincnt = 0; spincnt < 10; spincnt++) { | |
17216 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
17217 | - local_bh_disable(); | |
17218 | - *statusp = RCU_KTHREAD_RUNNING; | |
17219 | - this_cpu_inc(rcu_cpu_kthread_loops); | |
17220 | - local_irq_disable(); | |
17221 | - work = *workp; | |
17222 | - *workp = 0; | |
17223 | - local_irq_enable(); | |
17224 | - if (work) | |
17225 | - rcu_kthread_do_work(); | |
17226 | - local_bh_enable(); | |
17227 | - if (*workp == 0) { | |
17228 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
17229 | - *statusp = RCU_KTHREAD_WAITING; | |
17230 | - return; | |
17231 | - } | |
17232 | - } | |
17233 | - *statusp = RCU_KTHREAD_YIELDING; | |
17234 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
17235 | - schedule_timeout_interruptible(2); | |
17236 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
17237 | - *statusp = RCU_KTHREAD_WAITING; | |
17238 | -} | |
17239 | - | |
17240 | /* | |
17241 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | |
17242 | * served by the rcu_node in question. The CPU hotplug lock is still | |
17243 | @@ -1174,26 +1083,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | |
17244 | free_cpumask_var(cm); | |
17245 | } | |
17246 | ||
17247 | -static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
17248 | - .store = &rcu_cpu_kthread_task, | |
17249 | - .thread_should_run = rcu_cpu_kthread_should_run, | |
17250 | - .thread_fn = rcu_cpu_kthread, | |
17251 | - .thread_comm = "rcuc/%u", | |
17252 | - .setup = rcu_cpu_kthread_setup, | |
17253 | - .park = rcu_cpu_kthread_park, | |
17254 | -}; | |
17255 | - | |
17256 | /* | |
17257 | * Spawn boost kthreads -- called as soon as the scheduler is running. | |
17258 | */ | |
17259 | static void __init rcu_spawn_boost_kthreads(void) | |
17260 | { | |
17261 | struct rcu_node *rnp; | |
17262 | - int cpu; | |
17263 | - | |
17264 | - for_each_possible_cpu(cpu) | |
17265 | - per_cpu(rcu_cpu_has_work, cpu) = 0; | |
17266 | - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
17267 | rcu_for_each_leaf_node(rcu_state_p, rnp) | |
17268 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | |
17269 | } | |
17270 | @@ -1216,11 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
17271 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | |
17272 | } | |
17273 | ||
17274 | -static void invoke_rcu_callbacks_kthread(void) | |
17275 | -{ | |
17276 | - WARN_ON_ONCE(1); | |
17277 | -} | |
17278 | - | |
17279 | static bool rcu_is_callbacks_kthread(void) | |
17280 | { | |
17281 | return false; | |
17282 | @@ -1244,7 +1134,7 @@ static void rcu_prepare_kthreads(int cpu) | |
17283 | ||
17284 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
17285 | ||
17286 | -#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
17287 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) | |
17288 | ||
17289 | /* | |
17290 | * Check to see if any future RCU-related work will need to be done | |
17291 | @@ -1261,7 +1151,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
17292 | return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) | |
17293 | ? 0 : rcu_cpu_has_callbacks(NULL); | |
17294 | } | |
17295 | +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ | |
17296 | ||
17297 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
17298 | /* | |
17299 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | |
17300 | * after it. | |
17301 | @@ -1357,6 +1249,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |
17302 | return cbs_ready; | |
17303 | } | |
17304 | ||
17305 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
17306 | + | |
17307 | /* | |
17308 | * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | |
17309 | * to invoke. If the CPU has callbacks, try to advance them. Tell the | |
17310 | @@ -1402,6 +1296,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
17311 | *nextevt = basemono + dj * TICK_NSEC; | |
17312 | return 0; | |
17313 | } | |
17314 | +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */ | |
17315 | ||
17316 | /* | |
17317 | * Prepare a CPU for idle from an RCU perspective. The first major task | |
17318 | diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c | |
1f39f580 | 17319 | index 4f6db7e6a117..ee02e1e1b3e5 100644 |
1a6e0f06 JK |
17320 | --- a/kernel/rcu/update.c |
17321 | +++ b/kernel/rcu/update.c | |
c7c16703 JK |
17322 | @@ -62,7 +62,7 @@ |
17323 | #ifndef CONFIG_TINY_RCU | |
17324 | module_param(rcu_expedited, int, 0); | |
17325 | module_param(rcu_normal, int, 0); | |
17326 | -static int rcu_normal_after_boot; | |
17327 | +static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
17328 | module_param(rcu_normal_after_boot, int, 0); | |
17329 | #endif /* #ifndef CONFIG_TINY_RCU */ | |
17330 | ||
1f39f580 | 17331 | @@ -132,8 +132,7 @@ bool rcu_gp_is_normal(void) |
c7c16703 JK |
17332 | } |
17333 | EXPORT_SYMBOL_GPL(rcu_gp_is_normal); | |
17334 | ||
17335 | -static atomic_t rcu_expedited_nesting = | |
17336 | - ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); | |
17337 | +static atomic_t rcu_expedited_nesting = ATOMIC_INIT(1); | |
17338 | ||
17339 | /* | |
17340 | * Should normal grace-period primitives be expedited? Intended for | |
1f39f580 | 17341 | @@ -182,8 +181,7 @@ EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); |
c7c16703 JK |
17342 | */ |
17343 | void rcu_end_inkernel_boot(void) | |
17344 | { | |
17345 | - if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) | |
17346 | - rcu_unexpedite_gp(); | |
17347 | + rcu_unexpedite_gp(); | |
17348 | if (rcu_normal_after_boot) | |
17349 | WRITE_ONCE(rcu_normal, 1); | |
17350 | } | |
1f39f580 | 17351 | @@ -298,6 +296,7 @@ int rcu_read_lock_held(void) |
1a6e0f06 JK |
17352 | } |
17353 | EXPORT_SYMBOL_GPL(rcu_read_lock_held); | |
17354 | ||
17355 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
17356 | /** | |
17357 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | |
17358 | * | |
1f39f580 | 17359 | @@ -324,6 +323,7 @@ int rcu_read_lock_bh_held(void) |
1a6e0f06 JK |
17360 | return in_softirq() || irqs_disabled(); |
17361 | } | |
17362 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |
17363 | +#endif | |
17364 | ||
17365 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | |
17366 | ||
1a6e0f06 JK |
17367 | diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile |
17368 | index 5e59b832ae2b..7337a7f60e3f 100644 | |
17369 | --- a/kernel/sched/Makefile | |
17370 | +++ b/kernel/sched/Makefile | |
17371 | @@ -17,7 +17,7 @@ endif | |
17372 | ||
17373 | obj-y += core.o loadavg.o clock.o cputime.o | |
17374 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o | |
17375 | -obj-y += wait.o swait.o completion.o idle.o | |
17376 | +obj-y += wait.o swait.o swork.o completion.o idle.o | |
17377 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o | |
17378 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | |
17379 | obj-$(CONFIG_SCHEDSTATS) += stats.o | |
17380 | diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c | |
17381 | index 8d0f35debf35..b62cf6400fe0 100644 | |
17382 | --- a/kernel/sched/completion.c | |
17383 | +++ b/kernel/sched/completion.c | |
17384 | @@ -30,10 +30,10 @@ void complete(struct completion *x) | |
17385 | { | |
17386 | unsigned long flags; | |
17387 | ||
17388 | - spin_lock_irqsave(&x->wait.lock, flags); | |
17389 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
17390 | x->done++; | |
17391 | - __wake_up_locked(&x->wait, TASK_NORMAL, 1); | |
17392 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
17393 | + swake_up_locked(&x->wait); | |
17394 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
17395 | } | |
17396 | EXPORT_SYMBOL(complete); | |
17397 | ||
17398 | @@ -50,10 +50,10 @@ void complete_all(struct completion *x) | |
17399 | { | |
17400 | unsigned long flags; | |
17401 | ||
17402 | - spin_lock_irqsave(&x->wait.lock, flags); | |
17403 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
17404 | x->done += UINT_MAX/2; | |
17405 | - __wake_up_locked(&x->wait, TASK_NORMAL, 0); | |
17406 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
17407 | + swake_up_all_locked(&x->wait); | |
17408 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
17409 | } | |
17410 | EXPORT_SYMBOL(complete_all); | |
17411 | ||
17412 | @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x, | |
17413 | long (*action)(long), long timeout, int state) | |
17414 | { | |
17415 | if (!x->done) { | |
17416 | - DECLARE_WAITQUEUE(wait, current); | |
17417 | + DECLARE_SWAITQUEUE(wait); | |
17418 | ||
17419 | - __add_wait_queue_tail_exclusive(&x->wait, &wait); | |
17420 | + __prepare_to_swait(&x->wait, &wait); | |
17421 | do { | |
17422 | if (signal_pending_state(state, current)) { | |
17423 | timeout = -ERESTARTSYS; | |
17424 | break; | |
17425 | } | |
17426 | __set_current_state(state); | |
17427 | - spin_unlock_irq(&x->wait.lock); | |
17428 | + raw_spin_unlock_irq(&x->wait.lock); | |
17429 | timeout = action(timeout); | |
17430 | - spin_lock_irq(&x->wait.lock); | |
17431 | + raw_spin_lock_irq(&x->wait.lock); | |
17432 | } while (!x->done && timeout); | |
17433 | - __remove_wait_queue(&x->wait, &wait); | |
17434 | + __finish_swait(&x->wait, &wait); | |
17435 | if (!x->done) | |
17436 | return timeout; | |
17437 | } | |
17438 | @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x, | |
17439 | { | |
17440 | might_sleep(); | |
17441 | ||
17442 | - spin_lock_irq(&x->wait.lock); | |
17443 | + raw_spin_lock_irq(&x->wait.lock); | |
17444 | timeout = do_wait_for_common(x, action, timeout, state); | |
17445 | - spin_unlock_irq(&x->wait.lock); | |
17446 | + raw_spin_unlock_irq(&x->wait.lock); | |
17447 | return timeout; | |
17448 | } | |
17449 | ||
17450 | @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x) | |
17451 | if (!READ_ONCE(x->done)) | |
17452 | return 0; | |
17453 | ||
17454 | - spin_lock_irqsave(&x->wait.lock, flags); | |
17455 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
17456 | if (!x->done) | |
17457 | ret = 0; | |
17458 | else | |
17459 | x->done--; | |
17460 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
17461 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
17462 | return ret; | |
17463 | } | |
17464 | EXPORT_SYMBOL(try_wait_for_completion); | |
17465 | @@ -311,7 +311,7 @@ bool completion_done(struct completion *x) | |
17466 | * after it's acquired the lock. | |
17467 | */ | |
17468 | smp_rmb(); | |
17469 | - spin_unlock_wait(&x->wait.lock); | |
17470 | + raw_spin_unlock_wait(&x->wait.lock); | |
17471 | return true; | |
17472 | } | |
17473 | EXPORT_SYMBOL(completion_done); | |
17474 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c | |
7c18450a | 17475 | index 154fd689fe02..30b24f774198 100644 |
1a6e0f06 JK |
17476 | --- a/kernel/sched/core.c |
17477 | +++ b/kernel/sched/core.c | |
17478 | @@ -129,7 +129,11 @@ const_debug unsigned int sysctl_sched_features = | |
17479 | * Number of tasks to iterate in a single balance run. | |
17480 | * Limited because this is done with IRQs disabled. | |
17481 | */ | |
17482 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
17483 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | |
17484 | +#else | |
17485 | +const_debug unsigned int sysctl_sched_nr_migrate = 8; | |
17486 | +#endif | |
17487 | ||
17488 | /* | |
17489 | * period over which we average the RT time consumption, measured | |
17490 | @@ -345,6 +349,7 @@ static void init_rq_hrtick(struct rq *rq) | |
17491 | ||
17492 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
17493 | rq->hrtick_timer.function = hrtick; | |
17494 | + rq->hrtick_timer.irqsafe = 1; | |
17495 | } | |
17496 | #else /* CONFIG_SCHED_HRTICK */ | |
17497 | static inline void hrtick_clear(struct rq *rq) | |
17498 | @@ -449,7 +454,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) | |
17499 | head->lastp = &node->next; | |
17500 | } | |
17501 | ||
17502 | -void wake_up_q(struct wake_q_head *head) | |
17503 | +void __wake_up_q(struct wake_q_head *head, bool sleeper) | |
17504 | { | |
17505 | struct wake_q_node *node = head->first; | |
17506 | ||
17507 | @@ -466,7 +471,10 @@ void wake_up_q(struct wake_q_head *head) | |
17508 | * wake_up_process() implies a wmb() to pair with the queueing | |
17509 | * in wake_q_add() so as not to miss wakeups. | |
17510 | */ | |
17511 | - wake_up_process(task); | |
17512 | + if (sleeper) | |
17513 | + wake_up_lock_sleeper(task); | |
17514 | + else | |
17515 | + wake_up_process(task); | |
17516 | put_task_struct(task); | |
17517 | } | |
17518 | } | |
17519 | @@ -502,6 +510,38 @@ void resched_curr(struct rq *rq) | |
17520 | trace_sched_wake_idle_without_ipi(cpu); | |
17521 | } | |
17522 | ||
17523 | +#ifdef CONFIG_PREEMPT_LAZY | |
17524 | +void resched_curr_lazy(struct rq *rq) | |
17525 | +{ | |
17526 | + struct task_struct *curr = rq->curr; | |
17527 | + int cpu; | |
17528 | + | |
17529 | + if (!sched_feat(PREEMPT_LAZY)) { | |
17530 | + resched_curr(rq); | |
17531 | + return; | |
17532 | + } | |
17533 | + | |
17534 | + lockdep_assert_held(&rq->lock); | |
17535 | + | |
17536 | + if (test_tsk_need_resched(curr)) | |
17537 | + return; | |
17538 | + | |
17539 | + if (test_tsk_need_resched_lazy(curr)) | |
17540 | + return; | |
17541 | + | |
17542 | + set_tsk_need_resched_lazy(curr); | |
17543 | + | |
17544 | + cpu = cpu_of(rq); | |
17545 | + if (cpu == smp_processor_id()) | |
17546 | + return; | |
17547 | + | |
17548 | + /* NEED_RESCHED_LAZY must be visible before we test polling */ | |
17549 | + smp_mb(); | |
17550 | + if (!tsk_is_polling(curr)) | |
17551 | + smp_send_reschedule(cpu); | |
17552 | +} | |
17553 | +#endif | |
17554 | + | |
17555 | void resched_cpu(int cpu) | |
17556 | { | |
17557 | struct rq *rq = cpu_rq(cpu); | |
17558 | @@ -525,11 +565,14 @@ void resched_cpu(int cpu) | |
17559 | */ | |
17560 | int get_nohz_timer_target(void) | |
17561 | { | |
17562 | - int i, cpu = smp_processor_id(); | |
17563 | + int i, cpu; | |
17564 | struct sched_domain *sd; | |
17565 | ||
17566 | + preempt_disable_rt(); | |
17567 | + cpu = smp_processor_id(); | |
17568 | + | |
17569 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) | |
17570 | - return cpu; | |
17571 | + goto preempt_en_rt; | |
17572 | ||
17573 | rcu_read_lock(); | |
17574 | for_each_domain(cpu, sd) { | |
17575 | @@ -548,6 +591,8 @@ int get_nohz_timer_target(void) | |
17576 | cpu = housekeeping_any_cpu(); | |
17577 | unlock: | |
17578 | rcu_read_unlock(); | |
17579 | +preempt_en_rt: | |
17580 | + preempt_enable_rt(); | |
17581 | return cpu; | |
17582 | } | |
17583 | /* | |
c7c16703 | 17584 | @@ -1100,6 +1145,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
1a6e0f06 JK |
17585 | |
17586 | lockdep_assert_held(&p->pi_lock); | |
17587 | ||
17588 | + if (__migrate_disabled(p)) { | |
17589 | + cpumask_copy(&p->cpus_allowed, new_mask); | |
17590 | + return; | |
17591 | + } | |
17592 | + | |
17593 | queued = task_on_rq_queued(p); | |
17594 | running = task_current(rq, p); | |
17595 | ||
c7c16703 JK |
17596 | @@ -1122,6 +1172,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) |
17597 | set_curr_task(rq, p); | |
1a6e0f06 JK |
17598 | } |
17599 | ||
17600 | +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); | |
17601 | +static DEFINE_MUTEX(sched_down_mutex); | |
17602 | +static cpumask_t sched_down_cpumask; | |
17603 | + | |
17604 | +void tell_sched_cpu_down_begin(int cpu) | |
17605 | +{ | |
17606 | + mutex_lock(&sched_down_mutex); | |
17607 | + cpumask_set_cpu(cpu, &sched_down_cpumask); | |
17608 | + mutex_unlock(&sched_down_mutex); | |
17609 | +} | |
17610 | + | |
17611 | +void tell_sched_cpu_down_done(int cpu) | |
17612 | +{ | |
17613 | + mutex_lock(&sched_down_mutex); | |
17614 | + cpumask_clear_cpu(cpu, &sched_down_cpumask); | |
17615 | + mutex_unlock(&sched_down_mutex); | |
17616 | +} | |
17617 | + | |
17618 | +/** | |
17619 | + * migrate_me - try to move the current task off this cpu | |
17620 | + * | |
17621 | + * Used by the pin_current_cpu() code to try to get tasks | |
17622 | + * to move off the current CPU as it is going down. | |
17623 | + * It will only move the task if the task isn't pinned to | |
17624 | + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY) | |
17625 | + * and the task has to be in a RUNNING state. Otherwise the | |
17626 | + * movement of the task will wake it up (change its state | |
17627 | + * to running) when the task did not expect it. | |
17628 | + * | |
17629 | + * Returns 1 if it succeeded in moving the current task | |
17630 | + * 0 otherwise. | |
17631 | + */ | |
17632 | +int migrate_me(void) | |
17633 | +{ | |
17634 | + struct task_struct *p = current; | |
17635 | + struct migration_arg arg; | |
17636 | + struct cpumask *cpumask; | |
17637 | + struct cpumask *mask; | |
17638 | + unsigned int dest_cpu; | |
17639 | + struct rq_flags rf; | |
17640 | + struct rq *rq; | |
17641 | + | |
17642 | + /* | |
17643 | + * We can not migrate tasks bounded to a CPU or tasks not | |
17644 | + * running. The movement of the task will wake it up. | |
17645 | + */ | |
17646 | + if (p->flags & PF_NO_SETAFFINITY || p->state) | |
17647 | + return 0; | |
17648 | + | |
17649 | + mutex_lock(&sched_down_mutex); | |
17650 | + rq = task_rq_lock(p, &rf); | |
17651 | + | |
17652 | + cpumask = this_cpu_ptr(&sched_cpumasks); | |
17653 | + mask = &p->cpus_allowed; | |
17654 | + | |
17655 | + cpumask_andnot(cpumask, mask, &sched_down_cpumask); | |
17656 | + | |
17657 | + if (!cpumask_weight(cpumask)) { | |
17658 | + /* It's only on this CPU? */ | |
17659 | + task_rq_unlock(rq, p, &rf); | |
17660 | + mutex_unlock(&sched_down_mutex); | |
17661 | + return 0; | |
17662 | + } | |
17663 | + | |
17664 | + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask); | |
17665 | + | |
17666 | + arg.task = p; | |
17667 | + arg.dest_cpu = dest_cpu; | |
17668 | + | |
17669 | + task_rq_unlock(rq, p, &rf); | |
17670 | + | |
17671 | + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | |
17672 | + tlb_migrate_finish(p->mm); | |
17673 | + mutex_unlock(&sched_down_mutex); | |
17674 | + | |
17675 | + return 1; | |
17676 | +} | |
17677 | + | |
17678 | /* | |
17679 | * Change a given task's CPU affinity. Migrate the thread to a | |
17680 | * proper CPU and schedule it away if the CPU it's executing on | |
c7c16703 | 17681 | @@ -1179,7 +1307,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, |
1a6e0f06 JK |
17682 | } |
17683 | ||
17684 | /* Can the task run on the task's current CPU? If so, we're done */ | |
17685 | - if (cpumask_test_cpu(task_cpu(p), new_mask)) | |
17686 | + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) | |
17687 | goto out; | |
17688 | ||
17689 | dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); | |
c7c16703 | 17690 | @@ -1366,6 +1494,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) |
1a6e0f06 JK |
17691 | return ret; |
17692 | } | |
17693 | ||
17694 | +static bool check_task_state(struct task_struct *p, long match_state) | |
17695 | +{ | |
17696 | + bool match = false; | |
17697 | + | |
17698 | + raw_spin_lock_irq(&p->pi_lock); | |
17699 | + if (p->state == match_state || p->saved_state == match_state) | |
17700 | + match = true; | |
17701 | + raw_spin_unlock_irq(&p->pi_lock); | |
17702 | + | |
17703 | + return match; | |
17704 | +} | |
17705 | + | |
17706 | /* | |
17707 | * wait_task_inactive - wait for a thread to unschedule. | |
17708 | * | |
c7c16703 | 17709 | @@ -1410,7 +1550,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) |
1a6e0f06 JK |
17710 | * is actually now running somewhere else! |
17711 | */ | |
17712 | while (task_running(rq, p)) { | |
17713 | - if (match_state && unlikely(p->state != match_state)) | |
17714 | + if (match_state && !check_task_state(p, match_state)) | |
17715 | return 0; | |
17716 | cpu_relax(); | |
17717 | } | |
c7c16703 | 17718 | @@ -1425,7 +1565,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) |
1a6e0f06 JK |
17719 | running = task_running(rq, p); |
17720 | queued = task_on_rq_queued(p); | |
17721 | ncsw = 0; | |
17722 | - if (!match_state || p->state == match_state) | |
17723 | + if (!match_state || p->state == match_state || | |
17724 | + p->saved_state == match_state) | |
17725 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | |
17726 | task_rq_unlock(rq, p, &rf); | |
17727 | ||
c7c16703 | 17728 | @@ -1680,10 +1821,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl |
1a6e0f06 JK |
17729 | { |
17730 | activate_task(rq, p, en_flags); | |
17731 | p->on_rq = TASK_ON_RQ_QUEUED; | |
17732 | - | |
17733 | - /* if a worker is waking up, notify workqueue */ | |
17734 | - if (p->flags & PF_WQ_WORKER) | |
17735 | - wq_worker_waking_up(p, cpu_of(rq)); | |
17736 | } | |
17737 | ||
17738 | /* | |
c7c16703 | 17739 | @@ -2018,8 +2155,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
1a6e0f06 JK |
17740 | */ |
17741 | smp_mb__before_spinlock(); | |
17742 | raw_spin_lock_irqsave(&p->pi_lock, flags); | |
17743 | - if (!(p->state & state)) | |
17744 | + if (!(p->state & state)) { | |
17745 | + /* | |
17746 | + * The task might be running due to a spinlock sleeper | |
17747 | + * wakeup. Check the saved state and set it to running | |
17748 | + * if the wakeup condition is true. | |
17749 | + */ | |
17750 | + if (!(wake_flags & WF_LOCK_SLEEPER)) { | |
17751 | + if (p->saved_state & state) { | |
17752 | + p->saved_state = TASK_RUNNING; | |
17753 | + success = 1; | |
17754 | + } | |
17755 | + } | |
17756 | goto out; | |
17757 | + } | |
17758 | + | |
17759 | + /* | |
17760 | + * If this is a regular wakeup, then we can unconditionally | |
17761 | + * clear the saved state of a "lock sleeper". | |
17762 | + */ | |
17763 | + if (!(wake_flags & WF_LOCK_SLEEPER)) | |
17764 | + p->saved_state = TASK_RUNNING; | |
17765 | ||
17766 | trace_sched_waking(p); | |
17767 | ||
c7c16703 | 17768 | @@ -2102,53 +2258,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
1a6e0f06 JK |
17769 | } |
17770 | ||
17771 | /** | |
17772 | - * try_to_wake_up_local - try to wake up a local task with rq lock held | |
17773 | - * @p: the thread to be awakened | |
c7c16703 | 17774 | - * @cookie: context's cookie for pinning |
1a6e0f06 JK |
17775 | - * |
17776 | - * Put @p on the run-queue if it's not already there. The caller must | |
17777 | - * ensure that this_rq() is locked, @p is bound to this_rq() and not | |
17778 | - * the current task. | |
17779 | - */ | |
17780 | -static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie) | |
17781 | -{ | |
17782 | - struct rq *rq = task_rq(p); | |
17783 | - | |
17784 | - if (WARN_ON_ONCE(rq != this_rq()) || | |
17785 | - WARN_ON_ONCE(p == current)) | |
17786 | - return; | |
17787 | - | |
17788 | - lockdep_assert_held(&rq->lock); | |
17789 | - | |
17790 | - if (!raw_spin_trylock(&p->pi_lock)) { | |
17791 | - /* | |
17792 | - * This is OK, because current is on_cpu, which avoids it being | |
17793 | - * picked for load-balance and preemption/IRQs are still | |
17794 | - * disabled avoiding further scheduler activity on it and we've | |
17795 | - * not yet picked a replacement task. | |
17796 | - */ | |
17797 | - lockdep_unpin_lock(&rq->lock, cookie); | |
17798 | - raw_spin_unlock(&rq->lock); | |
17799 | - raw_spin_lock(&p->pi_lock); | |
17800 | - raw_spin_lock(&rq->lock); | |
17801 | - lockdep_repin_lock(&rq->lock, cookie); | |
17802 | - } | |
17803 | - | |
17804 | - if (!(p->state & TASK_NORMAL)) | |
17805 | - goto out; | |
17806 | - | |
17807 | - trace_sched_waking(p); | |
17808 | - | |
17809 | - if (!task_on_rq_queued(p)) | |
17810 | - ttwu_activate(rq, p, ENQUEUE_WAKEUP); | |
17811 | - | |
17812 | - ttwu_do_wakeup(rq, p, 0, cookie); | |
c7c16703 | 17813 | - ttwu_stat(p, smp_processor_id(), 0); |
1a6e0f06 JK |
17814 | -out: |
17815 | - raw_spin_unlock(&p->pi_lock); | |
17816 | -} | |
17817 | - | |
17818 | -/** | |
17819 | * wake_up_process - Wake up a specific process | |
17820 | * @p: The process to be woken up. | |
17821 | * | |
c7c16703 | 17822 | @@ -2166,6 +2275,18 @@ int wake_up_process(struct task_struct *p) |
1a6e0f06 JK |
17823 | } |
17824 | EXPORT_SYMBOL(wake_up_process); | |
17825 | ||
17826 | +/** | |
17827 | + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" | |
17828 | + * @p: The process to be woken up. | |
17829 | + * | |
17830 | + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate | |
17831 | + * the nature of the wakeup. | |
17832 | + */ | |
17833 | +int wake_up_lock_sleeper(struct task_struct *p) | |
17834 | +{ | |
17835 | + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); | |
17836 | +} | |
17837 | + | |
17838 | int wake_up_state(struct task_struct *p, unsigned int state) | |
17839 | { | |
17840 | return try_to_wake_up(p, state, 0); | |
c7c16703 | 17841 | @@ -2442,6 +2563,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) |
1a6e0f06 JK |
17842 | p->on_cpu = 0; |
17843 | #endif | |
17844 | init_task_preempt_count(p); | |
17845 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
17846 | + task_thread_info(p)->preempt_lazy_count = 0; | |
17847 | +#endif | |
17848 | #ifdef CONFIG_SMP | |
17849 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | |
17850 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
c7c16703 | 17851 | @@ -2770,21 +2894,16 @@ static struct rq *finish_task_switch(struct task_struct *prev) |
1a6e0f06 JK |
17852 | finish_arch_post_lock_switch(); |
17853 | ||
17854 | fire_sched_in_preempt_notifiers(current); | |
17855 | + /* | |
17856 | + * We use mmdrop_delayed() here so we don't have to do the | |
17857 | + * full __mmdrop() when we are the last user. | |
17858 | + */ | |
17859 | if (mm) | |
17860 | - mmdrop(mm); | |
17861 | + mmdrop_delayed(mm); | |
17862 | if (unlikely(prev_state == TASK_DEAD)) { | |
17863 | if (prev->sched_class->task_dead) | |
17864 | prev->sched_class->task_dead(prev); | |
c7c16703 JK |
17865 | |
17866 | - /* | |
17867 | - * Remove function-return probe instances associated with this | |
17868 | - * task and put them back on the free list. | |
17869 | - */ | |
17870 | - kprobe_flush_task(prev); | |
17871 | - | |
17872 | - /* Task is done with its stack. */ | |
17873 | - put_task_stack(prev); | |
17874 | - | |
17875 | put_task_struct(prev); | |
17876 | } | |
17877 | ||
17878 | @@ -3252,6 +3371,77 @@ static inline void schedule_debug(struct task_struct *prev) | |
17879 | schedstat_inc(this_rq()->sched_count); | |
1a6e0f06 JK |
17880 | } |
17881 | ||
17882 | +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) | |
17883 | + | |
17884 | +void migrate_disable(void) | |
17885 | +{ | |
17886 | + struct task_struct *p = current; | |
17887 | + | |
17888 | + if (in_atomic() || irqs_disabled()) { | |
17889 | +#ifdef CONFIG_SCHED_DEBUG | |
17890 | + p->migrate_disable_atomic++; | |
17891 | +#endif | |
17892 | + return; | |
17893 | + } | |
17894 | + | |
17895 | +#ifdef CONFIG_SCHED_DEBUG | |
17896 | + if (unlikely(p->migrate_disable_atomic)) { | |
17897 | + tracing_off(); | |
17898 | + WARN_ON_ONCE(1); | |
17899 | + } | |
17900 | +#endif | |
17901 | + | |
17902 | + if (p->migrate_disable) { | |
17903 | + p->migrate_disable++; | |
17904 | + return; | |
17905 | + } | |
17906 | + | |
17907 | + preempt_disable(); | |
17908 | + preempt_lazy_disable(); | |
17909 | + pin_current_cpu(); | |
17910 | + p->migrate_disable = 1; | |
17911 | + preempt_enable(); | |
17912 | +} | |
17913 | +EXPORT_SYMBOL(migrate_disable); | |
17914 | + | |
17915 | +void migrate_enable(void) | |
17916 | +{ | |
17917 | + struct task_struct *p = current; | |
17918 | + | |
17919 | + if (in_atomic() || irqs_disabled()) { | |
17920 | +#ifdef CONFIG_SCHED_DEBUG | |
17921 | + p->migrate_disable_atomic--; | |
17922 | +#endif | |
17923 | + return; | |
17924 | + } | |
17925 | + | |
17926 | +#ifdef CONFIG_SCHED_DEBUG | |
17927 | + if (unlikely(p->migrate_disable_atomic)) { | |
17928 | + tracing_off(); | |
17929 | + WARN_ON_ONCE(1); | |
17930 | + } | |
17931 | +#endif | |
17932 | + WARN_ON_ONCE(p->migrate_disable <= 0); | |
17933 | + | |
17934 | + if (p->migrate_disable > 1) { | |
17935 | + p->migrate_disable--; | |
17936 | + return; | |
17937 | + } | |
17938 | + | |
17939 | + preempt_disable(); | |
17940 | + /* | |
17941 | + * Clearing migrate_disable causes tsk_cpus_allowed to | |
17942 | + * show the tasks original cpu affinity. | |
17943 | + */ | |
17944 | + p->migrate_disable = 0; | |
17945 | + | |
17946 | + unpin_current_cpu(); | |
17947 | + preempt_enable(); | |
17948 | + preempt_lazy_enable(); | |
17949 | +} | |
17950 | +EXPORT_SYMBOL(migrate_enable); | |
17951 | +#endif | |
17952 | + | |
17953 | /* | |
17954 | * Pick up the highest-prio task: | |
17955 | */ | |
c7c16703 | 17956 | @@ -3368,19 +3558,6 @@ static void __sched notrace __schedule(bool preempt) |
1a6e0f06 JK |
17957 | } else { |
17958 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | |
17959 | prev->on_rq = 0; | |
17960 | - | |
17961 | - /* | |
17962 | - * If a worker went to sleep, notify and ask workqueue | |
17963 | - * whether it wants to wake up a task to maintain | |
17964 | - * concurrency. | |
17965 | - */ | |
17966 | - if (prev->flags & PF_WQ_WORKER) { | |
17967 | - struct task_struct *to_wakeup; | |
17968 | - | |
17969 | - to_wakeup = wq_worker_sleeping(prev); | |
17970 | - if (to_wakeup) | |
17971 | - try_to_wake_up_local(to_wakeup, cookie); | |
17972 | - } | |
17973 | } | |
17974 | switch_count = &prev->nvcsw; | |
17975 | } | |
c7c16703 | 17976 | @@ -3390,6 +3567,7 @@ static void __sched notrace __schedule(bool preempt) |
1a6e0f06 JK |
17977 | |
17978 | next = pick_next_task(rq, prev, cookie); | |
17979 | clear_tsk_need_resched(prev); | |
17980 | + clear_tsk_need_resched_lazy(prev); | |
17981 | clear_preempt_need_resched(); | |
17982 | rq->clock_skip_update = 0; | |
17983 | ||
c7c16703 | 17984 | @@ -3437,9 +3615,20 @@ void __noreturn do_task_dead(void) |
1a6e0f06 JK |
17985 | |
17986 | static inline void sched_submit_work(struct task_struct *tsk) | |
17987 | { | |
17988 | - if (!tsk->state || tsk_is_pi_blocked(tsk)) | |
17989 | + if (!tsk->state) | |
17990 | return; | |
17991 | /* | |
17992 | + * If a worker went to sleep, notify and ask workqueue whether | |
17993 | + * it wants to wake up a task to maintain concurrency. | |
17994 | + */ | |
17995 | + if (tsk->flags & PF_WQ_WORKER) | |
17996 | + wq_worker_sleeping(tsk); | |
17997 | + | |
17998 | + | |
17999 | + if (tsk_is_pi_blocked(tsk)) | |
18000 | + return; | |
18001 | + | |
18002 | + /* | |
18003 | * If we are going to sleep and we have plugged IO queued, | |
18004 | * make sure to submit it to avoid deadlocks. | |
18005 | */ | |
c7c16703 | 18006 | @@ -3447,6 +3636,12 @@ static inline void sched_submit_work(struct task_struct *tsk) |
1a6e0f06 JK |
18007 | blk_schedule_flush_plug(tsk); |
18008 | } | |
18009 | ||
18010 | +static void sched_update_worker(struct task_struct *tsk) | |
18011 | +{ | |
18012 | + if (tsk->flags & PF_WQ_WORKER) | |
18013 | + wq_worker_running(tsk); | |
18014 | +} | |
18015 | + | |
18016 | asmlinkage __visible void __sched schedule(void) | |
18017 | { | |
18018 | struct task_struct *tsk = current; | |
c7c16703 | 18019 | @@ -3457,6 +3652,7 @@ asmlinkage __visible void __sched schedule(void) |
1a6e0f06 JK |
18020 | __schedule(false); |
18021 | sched_preempt_enable_no_resched(); | |
18022 | } while (need_resched()); | |
18023 | + sched_update_worker(tsk); | |
18024 | } | |
18025 | EXPORT_SYMBOL(schedule); | |
18026 | ||
c7c16703 | 18027 | @@ -3520,6 +3716,30 @@ static void __sched notrace preempt_schedule_common(void) |
1a6e0f06 JK |
18028 | } while (need_resched()); |
18029 | } | |
18030 | ||
18031 | +#ifdef CONFIG_PREEMPT_LAZY | |
18032 | +/* | |
18033 | + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is | |
18034 | + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as | |
18035 | + * preempt_lazy_count counter >0. | |
18036 | + */ | |
18037 | +static __always_inline int preemptible_lazy(void) | |
18038 | +{ | |
18039 | + if (test_thread_flag(TIF_NEED_RESCHED)) | |
18040 | + return 1; | |
18041 | + if (current_thread_info()->preempt_lazy_count) | |
18042 | + return 0; | |
18043 | + return 1; | |
18044 | +} | |
18045 | + | |
18046 | +#else | |
18047 | + | |
18048 | +static inline int preemptible_lazy(void) | |
18049 | +{ | |
18050 | + return 1; | |
18051 | +} | |
18052 | + | |
18053 | +#endif | |
18054 | + | |
18055 | #ifdef CONFIG_PREEMPT | |
18056 | /* | |
18057 | * this is the entry point to schedule() from in-kernel preemption | |
c7c16703 | 18058 | @@ -3534,7 +3754,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) |
1a6e0f06 JK |
18059 | */ |
18060 | if (likely(!preemptible())) | |
18061 | return; | |
18062 | - | |
18063 | + if (!preemptible_lazy()) | |
18064 | + return; | |
18065 | preempt_schedule_common(); | |
18066 | } | |
18067 | NOKPROBE_SYMBOL(preempt_schedule); | |
c7c16703 | 18068 | @@ -3561,6 +3782,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) |
1a6e0f06 JK |
18069 | if (likely(!preemptible())) |
18070 | return; | |
18071 | ||
18072 | + if (!preemptible_lazy()) | |
18073 | + return; | |
18074 | + | |
18075 | do { | |
18076 | /* | |
18077 | * Because the function tracer can trace preempt_count_sub() | |
c7c16703 | 18078 | @@ -3583,7 +3807,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) |
1a6e0f06 JK |
18079 | * an infinite recursion. |
18080 | */ | |
18081 | prev_ctx = exception_enter(); | |
18082 | + /* | |
18083 | + * The add/subtract must not be traced by the function | |
18084 | + * tracer. But we still want to account for the | |
18085 | + * preempt off latency tracer. Since the _notrace versions | |
18086 | + * of add/subtract skip the accounting for latency tracer | |
18087 | + * we must force it manually. | |
18088 | + */ | |
18089 | + start_critical_timings(); | |
18090 | __schedule(true); | |
18091 | + stop_critical_timings(); | |
18092 | exception_exit(prev_ctx); | |
18093 | ||
18094 | preempt_latency_stop(1); | |
7c18450a JK |
18095 | @@ -3629,10 +3862,25 @@ EXPORT_SYMBOL(default_wake_function); |
18096 | ||
18097 | #ifdef CONFIG_RT_MUTEXES | |
18098 | ||
18099 | +static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) | |
18100 | +{ | |
18101 | + if (pi_task) | |
18102 | + prio = min(prio, pi_task->prio); | |
18103 | + | |
18104 | + return prio; | |
18105 | +} | |
18106 | + | |
18107 | +static inline int rt_effective_prio(struct task_struct *p, int prio) | |
18108 | +{ | |
18109 | + struct task_struct *pi_task = rt_mutex_get_top_task(p); | |
18110 | + | |
18111 | + return __rt_effective_prio(pi_task, prio); | |
18112 | +} | |
18113 | + | |
18114 | /* | |
18115 | * rt_mutex_setprio - set the current priority of a task | |
18116 | - * @p: task | |
18117 | - * @prio: prio value (kernel-internal form) | |
18118 | + * @p: task to boost | |
18119 | + * @pi_task: donor task | |
18120 | * | |
18121 | * This function changes the 'effective' priority of a task. It does | |
18122 | * not touch ->normal_prio like __setscheduler(). | |
18123 | @@ -3640,16 +3888,40 @@ EXPORT_SYMBOL(default_wake_function); | |
18124 | * Used by the rt_mutex code to implement priority inheritance | |
18125 | * logic. Call site only calls if the priority of the task changed. | |
18126 | */ | |
18127 | -void rt_mutex_setprio(struct task_struct *p, int prio) | |
18128 | +void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task) | |
18129 | { | |
18130 | - int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; | |
18131 | + int prio, oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE; | |
18132 | const struct sched_class *prev_class; | |
18133 | struct rq_flags rf; | |
18134 | struct rq *rq; | |
18135 | ||
18136 | - BUG_ON(prio > MAX_PRIO); | |
18137 | + /* XXX used to be waiter->prio, not waiter->task->prio */ | |
18138 | + prio = __rt_effective_prio(pi_task, p->normal_prio); | |
18139 | + | |
18140 | + /* | |
18141 | + * If nothing changed; bail early. | |
18142 | + */ | |
18143 | + if (p->pi_top_task == pi_task && prio == p->prio && !dl_prio(prio)) | |
18144 | + return; | |
18145 | ||
18146 | rq = __task_rq_lock(p, &rf); | |
18147 | + /* | |
18148 | + * Set under pi_lock && rq->lock, such that the value can be used under | |
18149 | + * either lock. | |
18150 | + * | |
18151 | + * Note that there is loads of tricky to make this pointer cache work | |
18152 | + * right. rt_mutex_slowunlock()+rt_mutex_postunlock() work together to | |
18153 | + * ensure a task is de-boosted (pi_task is set to NULL) before the | |
18154 | + * task is allowed to run again (and can exit). This ensures the pointer | |
18155 | + * points to a blocked task -- which guaratees the task is present. | |
18156 | + */ | |
18157 | + p->pi_top_task = pi_task; | |
18158 | + | |
18159 | + /* | |
18160 | + * For FIFO/RR we only need to set prio, if that matches we're done. | |
18161 | + */ | |
18162 | + if (prio == p->prio && !dl_prio(prio)) | |
18163 | + goto out_unlock; | |
18164 | ||
18165 | /* | |
18166 | * Idle task boosting is a nono in general. There is one | |
18167 | @@ -3669,7 +3941,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |
18168 | goto out_unlock; | |
18169 | } | |
18170 | ||
18171 | - trace_sched_pi_setprio(p, prio); | |
18172 | + trace_sched_pi_setprio(p, pi_task); | |
18173 | oldprio = p->prio; | |
18174 | ||
18175 | if (oldprio == prio) | |
18176 | @@ -3693,7 +3965,6 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |
18177 | * running task | |
18178 | */ | |
18179 | if (dl_prio(prio)) { | |
18180 | - struct task_struct *pi_task = rt_mutex_get_top_task(p); | |
18181 | if (!dl_prio(p->normal_prio) || | |
18182 | (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) { | |
18183 | p->dl.dl_boosted = 1; | |
18184 | @@ -3730,6 +4001,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |
18185 | balance_callback(rq); | |
18186 | preempt_enable(); | |
18187 | } | |
18188 | +#else | |
18189 | +static inline int rt_effective_prio(struct task_struct *p, int prio) | |
18190 | +{ | |
18191 | + return prio; | |
18192 | +} | |
18193 | #endif | |
18194 | ||
18195 | void set_user_nice(struct task_struct *p, long nice) | |
18196 | @@ -3974,10 +4250,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, | |
18197 | * Keep a potential priority boosting if called from | |
18198 | * sched_setscheduler(). | |
18199 | */ | |
18200 | + p->prio = normal_prio(p); | |
18201 | if (keep_boost) | |
18202 | - p->prio = rt_mutex_get_effective_prio(p, normal_prio(p)); | |
18203 | - else | |
18204 | - p->prio = normal_prio(p); | |
18205 | + p->prio = rt_effective_prio(p, p->prio); | |
18206 | ||
18207 | if (dl_prio(p->prio)) | |
18208 | p->sched_class = &dl_sched_class; | |
18209 | @@ -4264,7 +4539,7 @@ static int __sched_setscheduler(struct task_struct *p, | |
18210 | * the runqueue. This will be done when the task deboost | |
18211 | * itself. | |
18212 | */ | |
18213 | - new_effective_prio = rt_mutex_get_effective_prio(p, newprio); | |
18214 | + new_effective_prio = rt_effective_prio(p, newprio); | |
18215 | if (new_effective_prio == oldprio) | |
18216 | queue_flags &= ~DEQUEUE_MOVE; | |
18217 | } | |
18218 | @@ -4939,6 +5214,7 @@ int __cond_resched_lock(spinlock_t *lock) | |
1a6e0f06 JK |
18219 | } |
18220 | EXPORT_SYMBOL(__cond_resched_lock); | |
18221 | ||
18222 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
18223 | int __sched __cond_resched_softirq(void) | |
18224 | { | |
18225 | BUG_ON(!in_softirq()); | |
7c18450a | 18226 | @@ -4952,6 +5228,7 @@ int __sched __cond_resched_softirq(void) |
1a6e0f06 JK |
18227 | return 0; |
18228 | } | |
18229 | EXPORT_SYMBOL(__cond_resched_softirq); | |
18230 | +#endif | |
18231 | ||
18232 | /** | |
18233 | * yield - yield the current processor to other threads. | |
7c18450a | 18234 | @@ -5315,7 +5592,9 @@ void init_idle(struct task_struct *idle, int cpu) |
1a6e0f06 JK |
18235 | |
18236 | /* Set the preempt count _outside_ the spinlocks! */ | |
18237 | init_idle_preempt_count(idle, cpu); | |
18238 | - | |
18239 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
18240 | + task_thread_info(idle)->preempt_lazy_count = 0; | |
18241 | +#endif | |
18242 | /* | |
18243 | * The idle tasks have their own, simple scheduling class: | |
18244 | */ | |
7c18450a | 18245 | @@ -5458,6 +5737,8 @@ void sched_setnuma(struct task_struct *p, int nid) |
1a6e0f06 JK |
18246 | #endif /* CONFIG_NUMA_BALANCING */ |
18247 | ||
18248 | #ifdef CONFIG_HOTPLUG_CPU | |
18249 | +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); | |
18250 | + | |
18251 | /* | |
18252 | * Ensures that the idle task is using init_mm right before its cpu goes | |
18253 | * offline. | |
7c18450a | 18254 | @@ -5472,7 +5753,12 @@ void idle_task_exit(void) |
1a6e0f06 JK |
18255 | switch_mm_irqs_off(mm, &init_mm, current); |
18256 | finish_arch_post_lock_switch(); | |
18257 | } | |
18258 | - mmdrop(mm); | |
18259 | + /* | |
18260 | + * Defer the cleanup to an alive cpu. On RT we can neither | |
18261 | + * call mmdrop() nor mmdrop_delayed() from here. | |
18262 | + */ | |
18263 | + per_cpu(idle_last_mm, smp_processor_id()) = mm; | |
18264 | + | |
18265 | } | |
18266 | ||
18267 | /* | |
7c18450a | 18268 | @@ -7418,6 +7704,10 @@ int sched_cpu_dying(unsigned int cpu) |
1a6e0f06 JK |
18269 | update_max_interval(); |
18270 | nohz_balance_exit_idle(cpu); | |
18271 | hrtick_clear(rq); | |
18272 | + if (per_cpu(idle_last_mm, cpu)) { | |
18273 | + mmdrop_delayed(per_cpu(idle_last_mm, cpu)); | |
18274 | + per_cpu(idle_last_mm, cpu) = NULL; | |
18275 | + } | |
18276 | return 0; | |
18277 | } | |
18278 | #endif | |
7c18450a | 18279 | @@ -7698,7 +7988,7 @@ void __init sched_init(void) |
1a6e0f06 JK |
18280 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
18281 | static inline int preempt_count_equals(int preempt_offset) | |
18282 | { | |
18283 | - int nested = preempt_count() + rcu_preempt_depth(); | |
18284 | + int nested = preempt_count() + sched_rcu_preempt_depth(); | |
18285 | ||
18286 | return (nested == preempt_offset); | |
18287 | } | |
18288 | diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c | |
33c7bf0f | 18289 | index c95c5122b105..e00accf92a4b 100644 |
1a6e0f06 JK |
18290 | --- a/kernel/sched/deadline.c |
18291 | +++ b/kernel/sched/deadline.c | |
c7c16703 | 18292 | @@ -687,6 +687,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) |
1a6e0f06 JK |
18293 | |
18294 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
18295 | timer->function = dl_task_timer; | |
18296 | + timer->irqsafe = 1; | |
18297 | } | |
18298 | ||
18299 | static | |
18300 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c | |
c7c16703 | 18301 | index fa178b62ea79..935224123441 100644 |
1a6e0f06 JK |
18302 | --- a/kernel/sched/debug.c |
18303 | +++ b/kernel/sched/debug.c | |
c7c16703 | 18304 | @@ -558,6 +558,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
1a6e0f06 JK |
18305 | P(rt_throttled); |
18306 | PN(rt_time); | |
18307 | PN(rt_runtime); | |
18308 | +#ifdef CONFIG_SMP | |
18309 | + P(rt_nr_migratory); | |
18310 | +#endif | |
18311 | ||
18312 | #undef PN | |
18313 | #undef P | |
c7c16703 | 18314 | @@ -953,6 +956,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) |
1a6e0f06 JK |
18315 | #endif |
18316 | P(policy); | |
18317 | P(prio); | |
18318 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18319 | + P(migrate_disable); | |
18320 | +#endif | |
18321 | + P(nr_cpus_allowed); | |
c7c16703 | 18322 | #undef PN_SCHEDSTAT |
1a6e0f06 JK |
18323 | #undef PN |
18324 | #undef __PN | |
1a6e0f06 | 18325 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c |
c7c16703 | 18326 | index c242944f5cbd..4aeb2e2e41bc 100644 |
1a6e0f06 JK |
18327 | --- a/kernel/sched/fair.c |
18328 | +++ b/kernel/sched/fair.c | |
c7c16703 | 18329 | @@ -3518,7 +3518,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) |
1a6e0f06 JK |
18330 | ideal_runtime = sched_slice(cfs_rq, curr); |
18331 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | |
18332 | if (delta_exec > ideal_runtime) { | |
18333 | - resched_curr(rq_of(cfs_rq)); | |
18334 | + resched_curr_lazy(rq_of(cfs_rq)); | |
18335 | /* | |
18336 | * The current task ran long enough, ensure it doesn't get | |
18337 | * re-elected due to buddy favours. | |
c7c16703 | 18338 | @@ -3542,7 +3542,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) |
1a6e0f06 JK |
18339 | return; |
18340 | ||
18341 | if (delta > ideal_runtime) | |
18342 | - resched_curr(rq_of(cfs_rq)); | |
18343 | + resched_curr_lazy(rq_of(cfs_rq)); | |
18344 | } | |
18345 | ||
18346 | static void | |
c7c16703 | 18347 | @@ -3684,7 +3684,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) |
1a6e0f06 JK |
18348 | * validating it and just reschedule. |
18349 | */ | |
18350 | if (queued) { | |
18351 | - resched_curr(rq_of(cfs_rq)); | |
18352 | + resched_curr_lazy(rq_of(cfs_rq)); | |
18353 | return; | |
18354 | } | |
18355 | /* | |
c7c16703 | 18356 | @@ -3866,7 +3866,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) |
1a6e0f06 JK |
18357 | * hierarchy can be throttled |
18358 | */ | |
18359 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | |
18360 | - resched_curr(rq_of(cfs_rq)); | |
18361 | + resched_curr_lazy(rq_of(cfs_rq)); | |
18362 | } | |
18363 | ||
18364 | static __always_inline | |
c7c16703 | 18365 | @@ -4494,7 +4494,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) |
1a6e0f06 JK |
18366 | |
18367 | if (delta < 0) { | |
18368 | if (rq->curr == p) | |
18369 | - resched_curr(rq); | |
18370 | + resched_curr_lazy(rq); | |
18371 | return; | |
18372 | } | |
18373 | hrtick_start(rq, delta); | |
c7c16703 | 18374 | @@ -5905,7 +5905,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ |
1a6e0f06 JK |
18375 | return; |
18376 | ||
18377 | preempt: | |
18378 | - resched_curr(rq); | |
18379 | + resched_curr_lazy(rq); | |
18380 | /* | |
18381 | * Only set the backward buddy when the current task is still | |
18382 | * on the rq. This can happen when a wakeup gets interleaved | |
c7c16703 | 18383 | @@ -8631,7 +8631,7 @@ static void task_fork_fair(struct task_struct *p) |
1a6e0f06 JK |
18384 | * 'current' within the tree based on its new key value. |
18385 | */ | |
18386 | swap(curr->vruntime, se->vruntime); | |
18387 | - resched_curr(rq); | |
18388 | + resched_curr_lazy(rq); | |
18389 | } | |
18390 | ||
18391 | se->vruntime -= cfs_rq->min_vruntime; | |
c7c16703 | 18392 | @@ -8655,7 +8655,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) |
1a6e0f06 JK |
18393 | */ |
18394 | if (rq->curr == p) { | |
18395 | if (p->prio > oldprio) | |
18396 | - resched_curr(rq); | |
18397 | + resched_curr_lazy(rq); | |
18398 | } else | |
18399 | check_preempt_curr(rq, p, 0); | |
18400 | } | |
18401 | diff --git a/kernel/sched/features.h b/kernel/sched/features.h | |
18402 | index 69631fa46c2f..6d28fcd08872 100644 | |
18403 | --- a/kernel/sched/features.h | |
18404 | +++ b/kernel/sched/features.h | |
18405 | @@ -45,11 +45,19 @@ SCHED_FEAT(LB_BIAS, true) | |
18406 | */ | |
18407 | SCHED_FEAT(NONTASK_CAPACITY, true) | |
18408 | ||
18409 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18410 | +SCHED_FEAT(TTWU_QUEUE, false) | |
18411 | +# ifdef CONFIG_PREEMPT_LAZY | |
18412 | +SCHED_FEAT(PREEMPT_LAZY, true) | |
18413 | +# endif | |
18414 | +#else | |
18415 | + | |
18416 | /* | |
18417 | * Queue remote wakeups on the target CPU and process them | |
18418 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | |
18419 | */ | |
18420 | SCHED_FEAT(TTWU_QUEUE, true) | |
18421 | +#endif | |
18422 | ||
18423 | #ifdef HAVE_RT_PUSH_IPI | |
18424 | /* | |
18425 | diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c | |
33c7bf0f | 18426 | index f139f22ce30d..b0691f4e7d49 100644 |
1a6e0f06 JK |
18427 | --- a/kernel/sched/rt.c |
18428 | +++ b/kernel/sched/rt.c | |
18429 | @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |
18430 | ||
18431 | hrtimer_init(&rt_b->rt_period_timer, | |
18432 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
18433 | + rt_b->rt_period_timer.irqsafe = 1; | |
18434 | rt_b->rt_period_timer.function = sched_rt_period_timer; | |
18435 | } | |
18436 | ||
18437 | @@ -101,6 +102,7 @@ void init_rt_rq(struct rt_rq *rt_rq) | |
18438 | rt_rq->push_cpu = nr_cpu_ids; | |
18439 | raw_spin_lock_init(&rt_rq->push_lock); | |
18440 | init_irq_work(&rt_rq->push_work, push_irq_work_func); | |
18441 | + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ; | |
18442 | #endif | |
18443 | #endif /* CONFIG_SMP */ | |
18444 | /* We start is dequeued state, because no RT tasks are queued */ | |
18445 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h | |
c7c16703 | 18446 | index 055f935d4421..19324ac27026 100644 |
1a6e0f06 JK |
18447 | --- a/kernel/sched/sched.h |
18448 | +++ b/kernel/sched/sched.h | |
c7c16703 | 18449 | @@ -1163,6 +1163,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) |
1a6e0f06 JK |
18450 | #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ |
18451 | #define WF_FORK 0x02 /* child wakeup after fork */ | |
18452 | #define WF_MIGRATED 0x4 /* internal use, task got migrated */ | |
18453 | +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ | |
18454 | ||
18455 | /* | |
18456 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | |
c7c16703 | 18457 | @@ -1346,6 +1347,15 @@ extern void init_sched_fair_class(void); |
1a6e0f06 JK |
18458 | extern void resched_curr(struct rq *rq); |
18459 | extern void resched_cpu(int cpu); | |
18460 | ||
18461 | +#ifdef CONFIG_PREEMPT_LAZY | |
18462 | +extern void resched_curr_lazy(struct rq *rq); | |
18463 | +#else | |
18464 | +static inline void resched_curr_lazy(struct rq *rq) | |
18465 | +{ | |
18466 | + resched_curr(rq); | |
18467 | +} | |
18468 | +#endif | |
18469 | + | |
18470 | extern struct rt_bandwidth def_rt_bandwidth; | |
18471 | extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); | |
18472 | ||
18473 | diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c | |
18474 | index 82f0dff90030..ef027ff3250a 100644 | |
18475 | --- a/kernel/sched/swait.c | |
18476 | +++ b/kernel/sched/swait.c | |
18477 | @@ -1,5 +1,6 @@ | |
18478 | #include <linux/sched.h> | |
18479 | #include <linux/swait.h> | |
18480 | +#include <linux/suspend.h> | |
18481 | ||
18482 | void __init_swait_queue_head(struct swait_queue_head *q, const char *name, | |
18483 | struct lock_class_key *key) | |
18484 | @@ -29,6 +30,25 @@ void swake_up_locked(struct swait_queue_head *q) | |
18485 | } | |
18486 | EXPORT_SYMBOL(swake_up_locked); | |
18487 | ||
18488 | +void swake_up_all_locked(struct swait_queue_head *q) | |
18489 | +{ | |
18490 | + struct swait_queue *curr; | |
18491 | + int wakes = 0; | |
18492 | + | |
18493 | + while (!list_empty(&q->task_list)) { | |
18494 | + | |
18495 | + curr = list_first_entry(&q->task_list, typeof(*curr), | |
18496 | + task_list); | |
18497 | + wake_up_process(curr->task); | |
18498 | + list_del_init(&curr->task_list); | |
18499 | + wakes++; | |
18500 | + } | |
18501 | + if (pm_in_action) | |
18502 | + return; | |
18503 | + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes); | |
18504 | +} | |
18505 | +EXPORT_SYMBOL(swake_up_all_locked); | |
18506 | + | |
18507 | void swake_up(struct swait_queue_head *q) | |
18508 | { | |
18509 | unsigned long flags; | |
18510 | @@ -54,6 +74,7 @@ void swake_up_all(struct swait_queue_head *q) | |
18511 | if (!swait_active(q)) | |
18512 | return; | |
18513 | ||
18514 | + WARN_ON(irqs_disabled()); | |
18515 | raw_spin_lock_irq(&q->lock); | |
18516 | list_splice_init(&q->task_list, &tmp); | |
18517 | while (!list_empty(&tmp)) { | |
18518 | diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c | |
18519 | new file mode 100644 | |
18520 | index 000000000000..1950f40ca725 | |
18521 | --- /dev/null | |
18522 | +++ b/kernel/sched/swork.c | |
18523 | @@ -0,0 +1,173 @@ | |
18524 | +/* | |
18525 | + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de | |
18526 | + * | |
18527 | + * Provides a framework for enqueuing callbacks from irq context | |
18528 | + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. | |
18529 | + */ | |
18530 | + | |
18531 | +#include <linux/swait.h> | |
18532 | +#include <linux/swork.h> | |
18533 | +#include <linux/kthread.h> | |
18534 | +#include <linux/slab.h> | |
18535 | +#include <linux/spinlock.h> | |
18536 | +#include <linux/export.h> | |
18537 | + | |
18538 | +#define SWORK_EVENT_PENDING (1 << 0) | |
18539 | + | |
18540 | +static DEFINE_MUTEX(worker_mutex); | |
18541 | +static struct sworker *glob_worker; | |
18542 | + | |
18543 | +struct sworker { | |
18544 | + struct list_head events; | |
18545 | + struct swait_queue_head wq; | |
18546 | + | |
18547 | + raw_spinlock_t lock; | |
18548 | + | |
18549 | + struct task_struct *task; | |
18550 | + int refs; | |
18551 | +}; | |
18552 | + | |
18553 | +static bool swork_readable(struct sworker *worker) | |
18554 | +{ | |
18555 | + bool r; | |
18556 | + | |
18557 | + if (kthread_should_stop()) | |
18558 | + return true; | |
18559 | + | |
18560 | + raw_spin_lock_irq(&worker->lock); | |
18561 | + r = !list_empty(&worker->events); | |
18562 | + raw_spin_unlock_irq(&worker->lock); | |
18563 | + | |
18564 | + return r; | |
18565 | +} | |
18566 | + | |
18567 | +static int swork_kthread(void *arg) | |
18568 | +{ | |
18569 | + struct sworker *worker = arg; | |
18570 | + | |
18571 | + for (;;) { | |
18572 | + swait_event_interruptible(worker->wq, | |
18573 | + swork_readable(worker)); | |
18574 | + if (kthread_should_stop()) | |
18575 | + break; | |
18576 | + | |
18577 | + raw_spin_lock_irq(&worker->lock); | |
18578 | + while (!list_empty(&worker->events)) { | |
18579 | + struct swork_event *sev; | |
18580 | + | |
18581 | + sev = list_first_entry(&worker->events, | |
18582 | + struct swork_event, item); | |
18583 | + list_del(&sev->item); | |
18584 | + raw_spin_unlock_irq(&worker->lock); | |
18585 | + | |
18586 | + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, | |
18587 | + &sev->flags)); | |
18588 | + sev->func(sev); | |
18589 | + raw_spin_lock_irq(&worker->lock); | |
18590 | + } | |
18591 | + raw_spin_unlock_irq(&worker->lock); | |
18592 | + } | |
18593 | + return 0; | |
18594 | +} | |
18595 | + | |
18596 | +static struct sworker *swork_create(void) | |
18597 | +{ | |
18598 | + struct sworker *worker; | |
18599 | + | |
18600 | + worker = kzalloc(sizeof(*worker), GFP_KERNEL); | |
18601 | + if (!worker) | |
18602 | + return ERR_PTR(-ENOMEM); | |
18603 | + | |
18604 | + INIT_LIST_HEAD(&worker->events); | |
18605 | + raw_spin_lock_init(&worker->lock); | |
18606 | + init_swait_queue_head(&worker->wq); | |
18607 | + | |
18608 | + worker->task = kthread_run(swork_kthread, worker, "kswork"); | |
18609 | + if (IS_ERR(worker->task)) { | |
18610 | + kfree(worker); | |
18611 | + return ERR_PTR(-ENOMEM); | |
18612 | + } | |
18613 | + | |
18614 | + return worker; | |
18615 | +} | |
18616 | + | |
18617 | +static void swork_destroy(struct sworker *worker) | |
18618 | +{ | |
18619 | + kthread_stop(worker->task); | |
18620 | + | |
18621 | + WARN_ON(!list_empty(&worker->events)); | |
18622 | + kfree(worker); | |
18623 | +} | |
18624 | + | |
18625 | +/** | |
18626 | + * swork_queue - queue swork | |
18627 | + * | |
18628 | + * Returns %false if @work was already on a queue, %true otherwise. | |
18629 | + * | |
18630 | + * The work is queued and processed on a random CPU | |
18631 | + */ | |
18632 | +bool swork_queue(struct swork_event *sev) | |
18633 | +{ | |
18634 | + unsigned long flags; | |
18635 | + | |
18636 | + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) | |
18637 | + return false; | |
18638 | + | |
18639 | + raw_spin_lock_irqsave(&glob_worker->lock, flags); | |
18640 | + list_add_tail(&sev->item, &glob_worker->events); | |
18641 | + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); | |
18642 | + | |
18643 | + swake_up(&glob_worker->wq); | |
18644 | + return true; | |
18645 | +} | |
18646 | +EXPORT_SYMBOL_GPL(swork_queue); | |
18647 | + | |
18648 | +/** | |
18649 | + * swork_get - get an instance of the sworker | |
18650 | + * | |
18651 | + * Returns an negative error code if the initialization if the worker did not | |
18652 | + * work, %0 otherwise. | |
18653 | + * | |
18654 | + */ | |
18655 | +int swork_get(void) | |
18656 | +{ | |
18657 | + struct sworker *worker; | |
18658 | + | |
18659 | + mutex_lock(&worker_mutex); | |
18660 | + if (!glob_worker) { | |
18661 | + worker = swork_create(); | |
18662 | + if (IS_ERR(worker)) { | |
18663 | + mutex_unlock(&worker_mutex); | |
18664 | + return -ENOMEM; | |
18665 | + } | |
18666 | + | |
18667 | + glob_worker = worker; | |
18668 | + } | |
18669 | + | |
18670 | + glob_worker->refs++; | |
18671 | + mutex_unlock(&worker_mutex); | |
18672 | + | |
18673 | + return 0; | |
18674 | +} | |
18675 | +EXPORT_SYMBOL_GPL(swork_get); | |
18676 | + | |
18677 | +/** | |
18678 | + * swork_put - puts an instance of the sworker | |
18679 | + * | |
18680 | + * Will destroy the sworker thread. This function must not be called until all | |
18681 | + * queued events have been completed. | |
18682 | + */ | |
18683 | +void swork_put(void) | |
18684 | +{ | |
18685 | + mutex_lock(&worker_mutex); | |
18686 | + | |
18687 | + glob_worker->refs--; | |
18688 | + if (glob_worker->refs > 0) | |
18689 | + goto out; | |
18690 | + | |
18691 | + swork_destroy(glob_worker); | |
18692 | + glob_worker = NULL; | |
18693 | +out: | |
18694 | + mutex_unlock(&worker_mutex); | |
18695 | +} | |
18696 | +EXPORT_SYMBOL_GPL(swork_put); | |
18697 | diff --git a/kernel/signal.c b/kernel/signal.c | |
33c7bf0f | 18698 | index 0b1415720a15..c884647951f7 100644 |
1a6e0f06 JK |
18699 | --- a/kernel/signal.c |
18700 | +++ b/kernel/signal.c | |
18701 | @@ -14,6 +14,7 @@ | |
18702 | #include <linux/export.h> | |
18703 | #include <linux/init.h> | |
18704 | #include <linux/sched.h> | |
18705 | +#include <linux/sched/rt.h> | |
18706 | #include <linux/fs.h> | |
18707 | #include <linux/tty.h> | |
18708 | #include <linux/binfmts.h> | |
18709 | @@ -352,13 +353,30 @@ static bool task_participate_group_stop(struct task_struct *task) | |
18710 | return false; | |
18711 | } | |
18712 | ||
18713 | +static inline struct sigqueue *get_task_cache(struct task_struct *t) | |
18714 | +{ | |
18715 | + struct sigqueue *q = t->sigqueue_cache; | |
18716 | + | |
18717 | + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) | |
18718 | + return NULL; | |
18719 | + return q; | |
18720 | +} | |
18721 | + | |
18722 | +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) | |
18723 | +{ | |
18724 | + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) | |
18725 | + return 0; | |
18726 | + return 1; | |
18727 | +} | |
18728 | + | |
18729 | /* | |
18730 | * allocate a new signal queue record | |
18731 | * - this may be called without locks if and only if t == current, otherwise an | |
18732 | * appropriate lock must be held to stop the target task from exiting | |
18733 | */ | |
18734 | static struct sigqueue * | |
18735 | -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) | |
18736 | +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, | |
18737 | + int override_rlimit, int fromslab) | |
18738 | { | |
18739 | struct sigqueue *q = NULL; | |
18740 | struct user_struct *user; | |
18741 | @@ -375,7 +393,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
18742 | if (override_rlimit || | |
18743 | atomic_read(&user->sigpending) <= | |
18744 | task_rlimit(t, RLIMIT_SIGPENDING)) { | |
18745 | - q = kmem_cache_alloc(sigqueue_cachep, flags); | |
18746 | + if (!fromslab) | |
18747 | + q = get_task_cache(t); | |
18748 | + if (!q) | |
18749 | + q = kmem_cache_alloc(sigqueue_cachep, flags); | |
18750 | } else { | |
18751 | print_dropped_signal(sig); | |
18752 | } | |
18753 | @@ -392,6 +413,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
18754 | return q; | |
18755 | } | |
18756 | ||
18757 | +static struct sigqueue * | |
18758 | +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, | |
18759 | + int override_rlimit) | |
18760 | +{ | |
18761 | + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); | |
18762 | +} | |
18763 | + | |
18764 | static void __sigqueue_free(struct sigqueue *q) | |
18765 | { | |
18766 | if (q->flags & SIGQUEUE_PREALLOC) | |
18767 | @@ -401,6 +429,21 @@ static void __sigqueue_free(struct sigqueue *q) | |
18768 | kmem_cache_free(sigqueue_cachep, q); | |
18769 | } | |
18770 | ||
18771 | +static void sigqueue_free_current(struct sigqueue *q) | |
18772 | +{ | |
18773 | + struct user_struct *up; | |
18774 | + | |
18775 | + if (q->flags & SIGQUEUE_PREALLOC) | |
18776 | + return; | |
18777 | + | |
18778 | + up = q->user; | |
18779 | + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { | |
18780 | + atomic_dec(&up->sigpending); | |
18781 | + free_uid(up); | |
18782 | + } else | |
18783 | + __sigqueue_free(q); | |
18784 | +} | |
18785 | + | |
18786 | void flush_sigqueue(struct sigpending *queue) | |
18787 | { | |
18788 | struct sigqueue *q; | |
18789 | @@ -414,6 +457,21 @@ void flush_sigqueue(struct sigpending *queue) | |
18790 | } | |
18791 | ||
18792 | /* | |
18793 | + * Called from __exit_signal. Flush tsk->pending and | |
18794 | + * tsk->sigqueue_cache | |
18795 | + */ | |
18796 | +void flush_task_sigqueue(struct task_struct *tsk) | |
18797 | +{ | |
18798 | + struct sigqueue *q; | |
18799 | + | |
18800 | + flush_sigqueue(&tsk->pending); | |
18801 | + | |
18802 | + q = get_task_cache(tsk); | |
18803 | + if (q) | |
18804 | + kmem_cache_free(sigqueue_cachep, q); | |
18805 | +} | |
18806 | + | |
18807 | +/* | |
18808 | * Flush all pending signals for this kthread. | |
18809 | */ | |
18810 | void flush_signals(struct task_struct *t) | |
18811 | @@ -525,7 +583,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | |
18812 | still_pending: | |
18813 | list_del_init(&first->list); | |
18814 | copy_siginfo(info, &first->info); | |
18815 | - __sigqueue_free(first); | |
18816 | + sigqueue_free_current(first); | |
18817 | } else { | |
18818 | /* | |
18819 | * Ok, it wasn't in the queue. This must be | |
18820 | @@ -560,6 +618,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |
18821 | { | |
18822 | int signr; | |
18823 | ||
18824 | + WARN_ON_ONCE(tsk != current); | |
18825 | + | |
18826 | /* We only dequeue private signals from ourselves, we don't let | |
18827 | * signalfd steal them | |
18828 | */ | |
18829 | @@ -1156,8 +1216,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, | |
18830 | * We don't want to have recursive SIGSEGV's etc, for example, | |
18831 | * that is why we also clear SIGNAL_UNKILLABLE. | |
18832 | */ | |
18833 | -int | |
18834 | -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
18835 | +static int | |
18836 | +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
18837 | { | |
18838 | unsigned long int flags; | |
18839 | int ret, blocked, ignored; | |
18840 | @@ -1182,6 +1242,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
18841 | return ret; | |
18842 | } | |
18843 | ||
18844 | +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
18845 | +{ | |
18846 | +/* | |
18847 | + * On some archs, PREEMPT_RT has to delay sending a signal from a trap | |
18848 | + * since it can not enable preemption, and the signal code's spin_locks | |
18849 | + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will | |
18850 | + * send the signal on exit of the trap. | |
18851 | + */ | |
18852 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
18853 | + if (in_atomic()) { | |
18854 | + if (WARN_ON_ONCE(t != current)) | |
18855 | + return 0; | |
18856 | + if (WARN_ON_ONCE(t->forced_info.si_signo)) | |
18857 | + return 0; | |
18858 | + | |
18859 | + if (is_si_special(info)) { | |
18860 | + WARN_ON_ONCE(info != SEND_SIG_PRIV); | |
18861 | + t->forced_info.si_signo = sig; | |
18862 | + t->forced_info.si_errno = 0; | |
18863 | + t->forced_info.si_code = SI_KERNEL; | |
18864 | + t->forced_info.si_pid = 0; | |
18865 | + t->forced_info.si_uid = 0; | |
18866 | + } else { | |
18867 | + t->forced_info = *info; | |
18868 | + } | |
18869 | + | |
18870 | + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); | |
18871 | + return 0; | |
18872 | + } | |
18873 | +#endif | |
18874 | + return do_force_sig_info(sig, info, t); | |
18875 | +} | |
18876 | + | |
18877 | /* | |
18878 | * Nuke all other threads in the group. | |
18879 | */ | |
18880 | @@ -1216,12 +1309,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
18881 | * Disable interrupts early to avoid deadlocks. | |
18882 | * See rcu_read_unlock() comment header for details. | |
18883 | */ | |
18884 | - local_irq_save(*flags); | |
18885 | + local_irq_save_nort(*flags); | |
18886 | rcu_read_lock(); | |
18887 | sighand = rcu_dereference(tsk->sighand); | |
18888 | if (unlikely(sighand == NULL)) { | |
18889 | rcu_read_unlock(); | |
18890 | - local_irq_restore(*flags); | |
18891 | + local_irq_restore_nort(*flags); | |
18892 | break; | |
18893 | } | |
18894 | /* | |
18895 | @@ -1242,7 +1335,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
18896 | } | |
18897 | spin_unlock(&sighand->siglock); | |
18898 | rcu_read_unlock(); | |
18899 | - local_irq_restore(*flags); | |
18900 | + local_irq_restore_nort(*flags); | |
18901 | } | |
18902 | ||
18903 | return sighand; | |
18904 | @@ -1485,7 +1578,8 @@ EXPORT_SYMBOL(kill_pid); | |
18905 | */ | |
18906 | struct sigqueue *sigqueue_alloc(void) | |
18907 | { | |
18908 | - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); | |
18909 | + /* Preallocated sigqueue objects always from the slabcache ! */ | |
18910 | + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); | |
18911 | ||
18912 | if (q) | |
18913 | q->flags |= SIGQUEUE_PREALLOC; | |
18914 | @@ -1846,15 +1940,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |
18915 | if (gstop_done && ptrace_reparented(current)) | |
18916 | do_notify_parent_cldstop(current, false, why); | |
18917 | ||
18918 | - /* | |
18919 | - * Don't want to allow preemption here, because | |
18920 | - * sys_ptrace() needs this task to be inactive. | |
18921 | - * | |
18922 | - * XXX: implement read_unlock_no_resched(). | |
18923 | - */ | |
18924 | - preempt_disable(); | |
18925 | read_unlock(&tasklist_lock); | |
18926 | - preempt_enable_no_resched(); | |
18927 | freezable_schedule(); | |
18928 | } else { | |
18929 | /* | |
18930 | diff --git a/kernel/softirq.c b/kernel/softirq.c | |
1f39f580 | 18931 | index 744fa611cae0..819bd7cf5ad0 100644 |
1a6e0f06 JK |
18932 | --- a/kernel/softirq.c |
18933 | +++ b/kernel/softirq.c | |
18934 | @@ -21,10 +21,12 @@ | |
18935 | #include <linux/freezer.h> | |
18936 | #include <linux/kthread.h> | |
18937 | #include <linux/rcupdate.h> | |
18938 | +#include <linux/delay.h> | |
18939 | #include <linux/ftrace.h> | |
18940 | #include <linux/smp.h> | |
18941 | #include <linux/smpboot.h> | |
18942 | #include <linux/tick.h> | |
18943 | +#include <linux/locallock.h> | |
18944 | #include <linux/irq.h> | |
18945 | ||
18946 | #define CREATE_TRACE_POINTS | |
18947 | @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat); | |
18948 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | |
18949 | ||
18950 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |
18951 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
c7c16703 | 18952 | +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) |
1a6e0f06 JK |
18953 | +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); |
18954 | +#endif | |
18955 | ||
18956 | const char * const softirq_to_name[NR_SOFTIRQS] = { | |
c7c16703 | 18957 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", |
1a6e0f06 JK |
18958 | "TASKLET", "SCHED", "HRTIMER", "RCU" |
18959 | }; | |
18960 | ||
18961 | +#ifdef CONFIG_NO_HZ_COMMON | |
18962 | +# ifdef CONFIG_PREEMPT_RT_FULL | |
18963 | + | |
18964 | +struct softirq_runner { | |
18965 | + struct task_struct *runner[NR_SOFTIRQS]; | |
18966 | +}; | |
18967 | + | |
18968 | +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners); | |
18969 | + | |
18970 | +static inline void softirq_set_runner(unsigned int sirq) | |
18971 | +{ | |
18972 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
18973 | + | |
18974 | + sr->runner[sirq] = current; | |
18975 | +} | |
18976 | + | |
18977 | +static inline void softirq_clr_runner(unsigned int sirq) | |
18978 | +{ | |
18979 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
18980 | + | |
18981 | + sr->runner[sirq] = NULL; | |
18982 | +} | |
18983 | + | |
18984 | +/* | |
18985 | + * On preempt-rt a softirq running context might be blocked on a | |
18986 | + * lock. There might be no other runnable task on this CPU because the | |
18987 | + * lock owner runs on some other CPU. So we have to go into idle with | |
18988 | + * the pending bit set. Therefor we need to check this otherwise we | |
18989 | + * warn about false positives which confuses users and defeats the | |
18990 | + * whole purpose of this test. | |
18991 | + * | |
18992 | + * This code is called with interrupts disabled. | |
18993 | + */ | |
18994 | +void softirq_check_pending_idle(void) | |
18995 | +{ | |
18996 | + static int rate_limit; | |
18997 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
18998 | + u32 warnpending; | |
18999 | + int i; | |
19000 | + | |
19001 | + if (rate_limit >= 10) | |
19002 | + return; | |
19003 | + | |
19004 | + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; | |
19005 | + for (i = 0; i < NR_SOFTIRQS; i++) { | |
19006 | + struct task_struct *tsk = sr->runner[i]; | |
19007 | + | |
19008 | + /* | |
19009 | + * The wakeup code in rtmutex.c wakes up the task | |
19010 | + * _before_ it sets pi_blocked_on to NULL under | |
19011 | + * tsk->pi_lock. So we need to check for both: state | |
19012 | + * and pi_blocked_on. | |
19013 | + */ | |
19014 | + if (tsk) { | |
19015 | + raw_spin_lock(&tsk->pi_lock); | |
19016 | + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { | |
19017 | + /* Clear all bits pending in that task */ | |
19018 | + warnpending &= ~(tsk->softirqs_raised); | |
19019 | + warnpending &= ~(1 << i); | |
19020 | + } | |
19021 | + raw_spin_unlock(&tsk->pi_lock); | |
19022 | + } | |
19023 | + } | |
19024 | + | |
19025 | + if (warnpending) { | |
19026 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
19027 | + warnpending); | |
19028 | + rate_limit++; | |
19029 | + } | |
19030 | +} | |
19031 | +# else | |
19032 | +/* | |
19033 | + * On !PREEMPT_RT we just printk rate limited: | |
19034 | + */ | |
19035 | +void softirq_check_pending_idle(void) | |
19036 | +{ | |
19037 | + static int rate_limit; | |
19038 | + | |
19039 | + if (rate_limit < 10 && | |
19040 | + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
19041 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
19042 | + local_softirq_pending()); | |
19043 | + rate_limit++; | |
19044 | + } | |
19045 | +} | |
19046 | +# endif | |
19047 | + | |
19048 | +#else /* !CONFIG_NO_HZ_COMMON */ | |
19049 | +static inline void softirq_set_runner(unsigned int sirq) { } | |
19050 | +static inline void softirq_clr_runner(unsigned int sirq) { } | |
19051 | +#endif | |
19052 | + | |
19053 | /* | |
19054 | * we cannot loop indefinitely here to avoid userspace starvation, | |
19055 | * but we also don't want to introduce a worst case 1/HZ latency | |
1f39f580 | 19056 | @@ -77,6 +175,38 @@ static void wakeup_softirqd(void) |
1a6e0f06 JK |
19057 | wake_up_process(tsk); |
19058 | } | |
19059 | ||
19060 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19061 | +static void wakeup_timer_softirqd(void) | |
19062 | +{ | |
19063 | + /* Interrupts are disabled: no need to stop preemption */ | |
19064 | + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); | |
19065 | + | |
19066 | + if (tsk && tsk->state != TASK_RUNNING) | |
19067 | + wake_up_process(tsk); | |
19068 | +} | |
19069 | +#endif | |
19070 | + | |
19071 | +static void handle_softirq(unsigned int vec_nr) | |
19072 | +{ | |
19073 | + struct softirq_action *h = softirq_vec + vec_nr; | |
19074 | + int prev_count; | |
19075 | + | |
19076 | + prev_count = preempt_count(); | |
19077 | + | |
19078 | + kstat_incr_softirqs_this_cpu(vec_nr); | |
19079 | + | |
19080 | + trace_softirq_entry(vec_nr); | |
19081 | + h->action(h); | |
19082 | + trace_softirq_exit(vec_nr); | |
19083 | + if (unlikely(prev_count != preempt_count())) { | |
19084 | + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
19085 | + vec_nr, softirq_to_name[vec_nr], h->action, | |
19086 | + prev_count, preempt_count()); | |
19087 | + preempt_count_set(prev_count); | |
19088 | + } | |
19089 | +} | |
19090 | + | |
1f39f580 | 19091 | +#ifndef CONFIG_PREEMPT_RT_FULL |
c7c16703 JK |
19092 | /* |
19093 | * If ksoftirqd is scheduled, we do not want to process pending softirqs | |
19094 | * right now. Let ksoftirqd handle this at its own rate, to get fairness. | |
1f39f580 | 19095 | @@ -88,6 +218,47 @@ static bool ksoftirqd_running(void) |
c7c16703 JK |
19096 | return tsk && (tsk->state == TASK_RUNNING); |
19097 | } | |
19098 | ||
1a6e0f06 JK |
19099 | +static inline int ksoftirqd_softirq_pending(void) |
19100 | +{ | |
19101 | + return local_softirq_pending(); | |
19102 | +} | |
19103 | + | |
19104 | +static void handle_pending_softirqs(u32 pending) | |
19105 | +{ | |
19106 | + struct softirq_action *h = softirq_vec; | |
19107 | + int softirq_bit; | |
19108 | + | |
19109 | + local_irq_enable(); | |
19110 | + | |
19111 | + h = softirq_vec; | |
19112 | + | |
19113 | + while ((softirq_bit = ffs(pending))) { | |
19114 | + unsigned int vec_nr; | |
19115 | + | |
19116 | + h += softirq_bit - 1; | |
19117 | + vec_nr = h - softirq_vec; | |
19118 | + handle_softirq(vec_nr); | |
19119 | + | |
19120 | + h++; | |
19121 | + pending >>= softirq_bit; | |
19122 | + } | |
19123 | + | |
19124 | + rcu_bh_qs(); | |
19125 | + local_irq_disable(); | |
19126 | +} | |
19127 | + | |
19128 | +static void run_ksoftirqd(unsigned int cpu) | |
19129 | +{ | |
19130 | + local_irq_disable(); | |
19131 | + if (ksoftirqd_softirq_pending()) { | |
19132 | + __do_softirq(); | |
19133 | + local_irq_enable(); | |
19134 | + cond_resched_rcu_qs(); | |
19135 | + return; | |
19136 | + } | |
19137 | + local_irq_enable(); | |
19138 | +} | |
19139 | + | |
19140 | /* | |
19141 | * preempt_count and SOFTIRQ_OFFSET usage: | |
19142 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | |
c7c16703 | 19143 | @@ -243,10 +414,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) |
1a6e0f06 JK |
19144 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; |
19145 | unsigned long old_flags = current->flags; | |
19146 | int max_restart = MAX_SOFTIRQ_RESTART; | |
19147 | - struct softirq_action *h; | |
19148 | bool in_hardirq; | |
19149 | __u32 pending; | |
19150 | - int softirq_bit; | |
19151 | ||
19152 | /* | |
19153 | * Mask out PF_MEMALLOC s current task context is borrowed for the | |
c7c16703 | 19154 | @@ -265,36 +434,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) |
1a6e0f06 JK |
19155 | /* Reset the pending bitmask before enabling irqs */ |
19156 | set_softirq_pending(0); | |
19157 | ||
19158 | - local_irq_enable(); | |
19159 | - | |
19160 | - h = softirq_vec; | |
19161 | - | |
19162 | - while ((softirq_bit = ffs(pending))) { | |
19163 | - unsigned int vec_nr; | |
19164 | - int prev_count; | |
19165 | - | |
19166 | - h += softirq_bit - 1; | |
19167 | - | |
19168 | - vec_nr = h - softirq_vec; | |
19169 | - prev_count = preempt_count(); | |
19170 | - | |
19171 | - kstat_incr_softirqs_this_cpu(vec_nr); | |
19172 | - | |
19173 | - trace_softirq_entry(vec_nr); | |
19174 | - h->action(h); | |
19175 | - trace_softirq_exit(vec_nr); | |
19176 | - if (unlikely(prev_count != preempt_count())) { | |
19177 | - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
19178 | - vec_nr, softirq_to_name[vec_nr], h->action, | |
19179 | - prev_count, preempt_count()); | |
19180 | - preempt_count_set(prev_count); | |
19181 | - } | |
19182 | - h++; | |
19183 | - pending >>= softirq_bit; | |
19184 | - } | |
19185 | - | |
19186 | - rcu_bh_qs(); | |
19187 | - local_irq_disable(); | |
19188 | + handle_pending_softirqs(pending); | |
19189 | ||
19190 | pending = local_softirq_pending(); | |
19191 | if (pending) { | |
c7c16703 | 19192 | @@ -331,6 +471,309 @@ asmlinkage __visible void do_softirq(void) |
1a6e0f06 JK |
19193 | } |
19194 | ||
19195 | /* | |
19196 | + * This function must run with irqs disabled! | |
19197 | + */ | |
19198 | +void raise_softirq_irqoff(unsigned int nr) | |
19199 | +{ | |
19200 | + __raise_softirq_irqoff(nr); | |
19201 | + | |
19202 | + /* | |
19203 | + * If we're in an interrupt or softirq, we're done | |
19204 | + * (this also catches softirq-disabled code). We will | |
19205 | + * actually run the softirq once we return from | |
19206 | + * the irq or softirq. | |
19207 | + * | |
19208 | + * Otherwise we wake up ksoftirqd to make sure we | |
19209 | + * schedule the softirq soon. | |
19210 | + */ | |
19211 | + if (!in_interrupt()) | |
19212 | + wakeup_softirqd(); | |
19213 | +} | |
19214 | + | |
19215 | +void __raise_softirq_irqoff(unsigned int nr) | |
19216 | +{ | |
19217 | + trace_softirq_raise(nr); | |
19218 | + or_softirq_pending(1UL << nr); | |
19219 | +} | |
19220 | + | |
19221 | +static inline void local_bh_disable_nort(void) { local_bh_disable(); } | |
19222 | +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } | |
19223 | +static void ksoftirqd_set_sched_params(unsigned int cpu) { } | |
19224 | + | |
19225 | +#else /* !PREEMPT_RT_FULL */ | |
19226 | + | |
19227 | +/* | |
19228 | + * On RT we serialize softirq execution with a cpu local lock per softirq | |
19229 | + */ | |
19230 | +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks); | |
19231 | + | |
19232 | +void __init softirq_early_init(void) | |
19233 | +{ | |
19234 | + int i; | |
19235 | + | |
19236 | + for (i = 0; i < NR_SOFTIRQS; i++) | |
19237 | + local_irq_lock_init(local_softirq_locks[i]); | |
19238 | +} | |
19239 | + | |
19240 | +static void lock_softirq(int which) | |
19241 | +{ | |
19242 | + local_lock(local_softirq_locks[which]); | |
19243 | +} | |
19244 | + | |
19245 | +static void unlock_softirq(int which) | |
19246 | +{ | |
19247 | + local_unlock(local_softirq_locks[which]); | |
19248 | +} | |
19249 | + | |
19250 | +static void do_single_softirq(int which) | |
19251 | +{ | |
19252 | + unsigned long old_flags = current->flags; | |
19253 | + | |
19254 | + current->flags &= ~PF_MEMALLOC; | |
19255 | + vtime_account_irq_enter(current); | |
19256 | + current->flags |= PF_IN_SOFTIRQ; | |
19257 | + lockdep_softirq_enter(); | |
19258 | + local_irq_enable(); | |
19259 | + handle_softirq(which); | |
19260 | + local_irq_disable(); | |
19261 | + lockdep_softirq_exit(); | |
19262 | + current->flags &= ~PF_IN_SOFTIRQ; | |
19263 | + vtime_account_irq_enter(current); | |
19264 | + tsk_restore_flags(current, old_flags, PF_MEMALLOC); | |
19265 | +} | |
19266 | + | |
19267 | +/* | |
19268 | + * Called with interrupts disabled. Process softirqs which were raised | |
19269 | + * in current context (or on behalf of ksoftirqd). | |
19270 | + */ | |
19271 | +static void do_current_softirqs(void) | |
19272 | +{ | |
19273 | + while (current->softirqs_raised) { | |
19274 | + int i = __ffs(current->softirqs_raised); | |
19275 | + unsigned int pending, mask = (1U << i); | |
19276 | + | |
19277 | + current->softirqs_raised &= ~mask; | |
19278 | + local_irq_enable(); | |
19279 | + | |
19280 | + /* | |
19281 | + * If the lock is contended, we boost the owner to | |
19282 | + * process the softirq or leave the critical section | |
19283 | + * now. | |
19284 | + */ | |
19285 | + lock_softirq(i); | |
19286 | + local_irq_disable(); | |
19287 | + softirq_set_runner(i); | |
19288 | + /* | |
19289 | + * Check with the local_softirq_pending() bits, | |
19290 | + * whether we need to process this still or if someone | |
19291 | + * else took care of it. | |
19292 | + */ | |
19293 | + pending = local_softirq_pending(); | |
19294 | + if (pending & mask) { | |
19295 | + set_softirq_pending(pending & ~mask); | |
19296 | + do_single_softirq(i); | |
19297 | + } | |
19298 | + softirq_clr_runner(i); | |
19299 | + WARN_ON(current->softirq_nestcnt != 1); | |
19300 | + local_irq_enable(); | |
19301 | + unlock_softirq(i); | |
19302 | + local_irq_disable(); | |
19303 | + } | |
19304 | +} | |
19305 | + | |
19306 | +void __local_bh_disable(void) | |
19307 | +{ | |
19308 | + if (++current->softirq_nestcnt == 1) | |
19309 | + migrate_disable(); | |
19310 | +} | |
19311 | +EXPORT_SYMBOL(__local_bh_disable); | |
19312 | + | |
19313 | +void __local_bh_enable(void) | |
19314 | +{ | |
19315 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
19316 | + return; | |
19317 | + | |
19318 | + local_irq_disable(); | |
19319 | + if (current->softirq_nestcnt == 1 && current->softirqs_raised) | |
19320 | + do_current_softirqs(); | |
19321 | + local_irq_enable(); | |
19322 | + | |
19323 | + if (--current->softirq_nestcnt == 0) | |
19324 | + migrate_enable(); | |
19325 | +} | |
19326 | +EXPORT_SYMBOL(__local_bh_enable); | |
19327 | + | |
19328 | +void _local_bh_enable(void) | |
19329 | +{ | |
19330 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
19331 | + return; | |
19332 | + if (--current->softirq_nestcnt == 0) | |
19333 | + migrate_enable(); | |
19334 | +} | |
19335 | +EXPORT_SYMBOL(_local_bh_enable); | |
19336 | + | |
19337 | +int in_serving_softirq(void) | |
19338 | +{ | |
19339 | + return current->flags & PF_IN_SOFTIRQ; | |
19340 | +} | |
19341 | +EXPORT_SYMBOL(in_serving_softirq); | |
19342 | + | |
19343 | +/* Called with preemption disabled */ | |
19344 | +static void run_ksoftirqd(unsigned int cpu) | |
19345 | +{ | |
19346 | + local_irq_disable(); | |
19347 | + current->softirq_nestcnt++; | |
19348 | + | |
19349 | + do_current_softirqs(); | |
19350 | + current->softirq_nestcnt--; | |
19351 | + local_irq_enable(); | |
19352 | + cond_resched_rcu_qs(); | |
19353 | +} | |
19354 | + | |
19355 | +/* | |
19356 | + * Called from netif_rx_ni(). Preemption enabled, but migration | |
19357 | + * disabled. So the cpu can't go away under us. | |
19358 | + */ | |
19359 | +void thread_do_softirq(void) | |
19360 | +{ | |
19361 | + if (!in_serving_softirq() && current->softirqs_raised) { | |
19362 | + current->softirq_nestcnt++; | |
19363 | + do_current_softirqs(); | |
19364 | + current->softirq_nestcnt--; | |
19365 | + } | |
19366 | +} | |
19367 | + | |
19368 | +static void do_raise_softirq_irqoff(unsigned int nr) | |
19369 | +{ | |
19370 | + unsigned int mask; | |
19371 | + | |
19372 | + mask = 1UL << nr; | |
19373 | + | |
19374 | + trace_softirq_raise(nr); | |
19375 | + or_softirq_pending(mask); | |
19376 | + | |
19377 | + /* | |
19378 | + * If we are not in a hard interrupt and inside a bh disabled | |
19379 | + * region, we simply raise the flag on current. local_bh_enable() | |
19380 | + * will make sure that the softirq is executed. Otherwise we | |
19381 | + * delegate it to ksoftirqd. | |
19382 | + */ | |
19383 | + if (!in_irq() && current->softirq_nestcnt) | |
19384 | + current->softirqs_raised |= mask; | |
19385 | + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) | |
19386 | + return; | |
19387 | + | |
19388 | + if (mask & TIMER_SOFTIRQS) | |
19389 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
19390 | + else | |
19391 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
19392 | +} | |
19393 | + | |
19394 | +static void wakeup_proper_softirq(unsigned int nr) | |
19395 | +{ | |
19396 | + if ((1UL << nr) & TIMER_SOFTIRQS) | |
19397 | + wakeup_timer_softirqd(); | |
19398 | + else | |
19399 | + wakeup_softirqd(); | |
19400 | +} | |
19401 | + | |
1a6e0f06 JK |
19402 | +void __raise_softirq_irqoff(unsigned int nr) |
19403 | +{ | |
19404 | + do_raise_softirq_irqoff(nr); | |
19405 | + if (!in_irq() && !current->softirq_nestcnt) | |
19406 | + wakeup_proper_softirq(nr); | |
19407 | +} | |
19408 | + | |
19409 | +/* | |
19410 | + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd | |
19411 | + */ | |
19412 | +void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
19413 | +{ | |
19414 | + unsigned int mask; | |
19415 | + | |
19416 | + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) || | |
19417 | + !__this_cpu_read(ktimer_softirqd))) | |
19418 | + return; | |
19419 | + mask = 1UL << nr; | |
19420 | + | |
19421 | + trace_softirq_raise(nr); | |
19422 | + or_softirq_pending(mask); | |
19423 | + if (mask & TIMER_SOFTIRQS) | |
19424 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
19425 | + else | |
19426 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
19427 | + wakeup_proper_softirq(nr); | |
19428 | +} | |
19429 | + | |
19430 | +/* | |
19431 | + * This function must run with irqs disabled! | |
19432 | + */ | |
19433 | +void raise_softirq_irqoff(unsigned int nr) | |
19434 | +{ | |
19435 | + do_raise_softirq_irqoff(nr); | |
19436 | + | |
19437 | + /* | |
19438 | + * If we're in an hard interrupt we let irq return code deal | |
19439 | + * with the wakeup of ksoftirqd. | |
19440 | + */ | |
19441 | + if (in_irq()) | |
19442 | + return; | |
19443 | + /* | |
19444 | + * If we are in thread context but outside of a bh disabled | |
19445 | + * region, we need to wake ksoftirqd as well. | |
19446 | + * | |
19447 | + * CHECKME: Some of the places which do that could be wrapped | |
19448 | + * into local_bh_disable/enable pairs. Though it's unclear | |
19449 | + * whether this is worth the effort. To find those places just | |
19450 | + * raise a WARN() if the condition is met. | |
19451 | + */ | |
19452 | + if (!current->softirq_nestcnt) | |
19453 | + wakeup_proper_softirq(nr); | |
19454 | +} | |
19455 | + | |
19456 | +static inline int ksoftirqd_softirq_pending(void) | |
19457 | +{ | |
19458 | + return current->softirqs_raised; | |
19459 | +} | |
19460 | + | |
19461 | +static inline void local_bh_disable_nort(void) { } | |
19462 | +static inline void _local_bh_enable_nort(void) { } | |
19463 | + | |
19464 | +static inline void ksoftirqd_set_sched_params(unsigned int cpu) | |
19465 | +{ | |
19466 | + /* Take over all but timer pending softirqs when starting */ | |
19467 | + local_irq_disable(); | |
19468 | + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; | |
19469 | + local_irq_enable(); | |
19470 | +} | |
19471 | + | |
19472 | +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) | |
19473 | +{ | |
19474 | + struct sched_param param = { .sched_priority = 1 }; | |
19475 | + | |
19476 | + sched_setscheduler(current, SCHED_FIFO, ¶m); | |
19477 | + | |
19478 | + /* Take over timer pending softirqs when starting */ | |
19479 | + local_irq_disable(); | |
19480 | + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; | |
19481 | + local_irq_enable(); | |
19482 | +} | |
19483 | + | |
19484 | +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, | |
19485 | + bool online) | |
19486 | +{ | |
19487 | + struct sched_param param = { .sched_priority = 0 }; | |
19488 | + | |
19489 | + sched_setscheduler(current, SCHED_NORMAL, ¶m); | |
19490 | +} | |
19491 | + | |
19492 | +static int ktimer_softirqd_should_run(unsigned int cpu) | |
19493 | +{ | |
19494 | + return current->softirqs_raised; | |
19495 | +} | |
19496 | + | |
19497 | +#endif /* PREEMPT_RT_FULL */ | |
19498 | +/* | |
19499 | * Enter an interrupt context. | |
19500 | */ | |
19501 | void irq_enter(void) | |
c7c16703 | 19502 | @@ -341,9 +784,9 @@ void irq_enter(void) |
1a6e0f06 JK |
19503 | * Prevent raise_softirq from needlessly waking up ksoftirqd |
19504 | * here, as softirq will be serviced on return from interrupt. | |
19505 | */ | |
19506 | - local_bh_disable(); | |
19507 | + local_bh_disable_nort(); | |
19508 | tick_irq_enter(); | |
19509 | - _local_bh_enable(); | |
19510 | + _local_bh_enable_nort(); | |
19511 | } | |
19512 | ||
19513 | __irq_enter(); | |
1f39f580 | 19514 | @@ -351,6 +794,7 @@ void irq_enter(void) |
1a6e0f06 JK |
19515 | |
19516 | static inline void invoke_softirq(void) | |
19517 | { | |
1f39f580 | 19518 | +#ifndef CONFIG_PREEMPT_RT_FULL |
c7c16703 JK |
19519 | if (ksoftirqd_running()) |
19520 | return; | |
1f39f580 JK |
19521 | |
19522 | @@ -373,6 +817,18 @@ static inline void invoke_softirq(void) | |
1a6e0f06 JK |
19523 | } else { |
19524 | wakeup_softirqd(); | |
19525 | } | |
19526 | +#else /* PREEMPT_RT_FULL */ | |
1f39f580 | 19527 | + unsigned long flags; |
1a6e0f06 JK |
19528 | + |
19529 | + local_irq_save(flags); | |
19530 | + if (__this_cpu_read(ksoftirqd) && | |
19531 | + __this_cpu_read(ksoftirqd)->softirqs_raised) | |
19532 | + wakeup_softirqd(); | |
19533 | + if (__this_cpu_read(ktimer_softirqd) && | |
19534 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised) | |
19535 | + wakeup_timer_softirqd(); | |
19536 | + local_irq_restore(flags); | |
19537 | +#endif | |
19538 | } | |
19539 | ||
19540 | static inline void tick_irq_exit(void) | |
1f39f580 | 19541 | @@ -409,26 +865,6 @@ void irq_exit(void) |
1a6e0f06 JK |
19542 | trace_hardirq_exit(); /* must be last! */ |
19543 | } | |
19544 | ||
19545 | -/* | |
19546 | - * This function must run with irqs disabled! | |
19547 | - */ | |
19548 | -inline void raise_softirq_irqoff(unsigned int nr) | |
19549 | -{ | |
19550 | - __raise_softirq_irqoff(nr); | |
19551 | - | |
19552 | - /* | |
19553 | - * If we're in an interrupt or softirq, we're done | |
19554 | - * (this also catches softirq-disabled code). We will | |
19555 | - * actually run the softirq once we return from | |
19556 | - * the irq or softirq. | |
19557 | - * | |
19558 | - * Otherwise we wake up ksoftirqd to make sure we | |
19559 | - * schedule the softirq soon. | |
19560 | - */ | |
19561 | - if (!in_interrupt()) | |
19562 | - wakeup_softirqd(); | |
19563 | -} | |
19564 | - | |
19565 | void raise_softirq(unsigned int nr) | |
19566 | { | |
19567 | unsigned long flags; | |
1f39f580 | 19568 | @@ -438,12 +874,6 @@ void raise_softirq(unsigned int nr) |
1a6e0f06 JK |
19569 | local_irq_restore(flags); |
19570 | } | |
19571 | ||
19572 | -void __raise_softirq_irqoff(unsigned int nr) | |
19573 | -{ | |
19574 | - trace_softirq_raise(nr); | |
19575 | - or_softirq_pending(1UL << nr); | |
19576 | -} | |
19577 | - | |
19578 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | |
19579 | { | |
19580 | softirq_vec[nr].action = action; | |
1f39f580 | 19581 | @@ -460,15 +890,45 @@ struct tasklet_head { |
1a6e0f06 JK |
19582 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); |
19583 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | |
19584 | ||
19585 | +static void inline | |
19586 | +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) | |
19587 | +{ | |
19588 | + if (tasklet_trylock(t)) { | |
19589 | +again: | |
19590 | + /* We may have been preempted before tasklet_trylock | |
19591 | + * and __tasklet_action may have already run. | |
19592 | + * So double check the sched bit while the takslet | |
19593 | + * is locked before adding it to the list. | |
19594 | + */ | |
19595 | + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { | |
19596 | + t->next = NULL; | |
19597 | + *head->tail = t; | |
19598 | + head->tail = &(t->next); | |
19599 | + raise_softirq_irqoff(nr); | |
19600 | + tasklet_unlock(t); | |
19601 | + } else { | |
19602 | + /* This is subtle. If we hit the corner case above | |
19603 | + * It is possible that we get preempted right here, | |
19604 | + * and another task has successfully called | |
19605 | + * tasklet_schedule(), then this function, and | |
19606 | + * failed on the trylock. Thus we must be sure | |
19607 | + * before releasing the tasklet lock, that the | |
19608 | + * SCHED_BIT is clear. Otherwise the tasklet | |
19609 | + * may get its SCHED_BIT set, but not added to the | |
19610 | + * list | |
19611 | + */ | |
19612 | + if (!tasklet_tryunlock(t)) | |
19613 | + goto again; | |
19614 | + } | |
19615 | + } | |
19616 | +} | |
19617 | + | |
19618 | void __tasklet_schedule(struct tasklet_struct *t) | |
19619 | { | |
19620 | unsigned long flags; | |
19621 | ||
19622 | local_irq_save(flags); | |
19623 | - t->next = NULL; | |
19624 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
19625 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
19626 | - raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
19627 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); | |
19628 | local_irq_restore(flags); | |
19629 | } | |
19630 | EXPORT_SYMBOL(__tasklet_schedule); | |
1f39f580 | 19631 | @@ -478,10 +938,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) |
1a6e0f06 JK |
19632 | unsigned long flags; |
19633 | ||
19634 | local_irq_save(flags); | |
19635 | - t->next = NULL; | |
19636 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
19637 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
19638 | - raise_softirq_irqoff(HI_SOFTIRQ); | |
19639 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); | |
19640 | local_irq_restore(flags); | |
19641 | } | |
19642 | EXPORT_SYMBOL(__tasklet_hi_schedule); | |
1f39f580 | 19643 | @@ -490,82 +947,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) |
1a6e0f06 JK |
19644 | { |
19645 | BUG_ON(!irqs_disabled()); | |
19646 | ||
19647 | - t->next = __this_cpu_read(tasklet_hi_vec.head); | |
19648 | - __this_cpu_write(tasklet_hi_vec.head, t); | |
19649 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
19650 | + __tasklet_hi_schedule(t); | |
19651 | } | |
19652 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | |
19653 | ||
c7c16703 | 19654 | -static __latent_entropy void tasklet_action(struct softirq_action *a) |
1a6e0f06 JK |
19655 | +void tasklet_enable(struct tasklet_struct *t) |
19656 | { | |
19657 | - struct tasklet_struct *list; | |
19658 | + if (!atomic_dec_and_test(&t->count)) | |
19659 | + return; | |
19660 | + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) | |
19661 | + tasklet_schedule(t); | |
19662 | +} | |
19663 | +EXPORT_SYMBOL(tasklet_enable); | |
19664 | ||
19665 | - local_irq_disable(); | |
19666 | - list = __this_cpu_read(tasklet_vec.head); | |
19667 | - __this_cpu_write(tasklet_vec.head, NULL); | |
19668 | - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
19669 | - local_irq_enable(); | |
19670 | +static void __tasklet_action(struct softirq_action *a, | |
19671 | + struct tasklet_struct *list) | |
19672 | +{ | |
19673 | + int loops = 1000000; | |
19674 | ||
19675 | while (list) { | |
19676 | struct tasklet_struct *t = list; | |
19677 | ||
19678 | list = list->next; | |
19679 | ||
19680 | - if (tasklet_trylock(t)) { | |
19681 | - if (!atomic_read(&t->count)) { | |
19682 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
19683 | - &t->state)) | |
19684 | - BUG(); | |
19685 | - t->func(t->data); | |
19686 | - tasklet_unlock(t); | |
19687 | - continue; | |
19688 | - } | |
19689 | - tasklet_unlock(t); | |
19690 | + /* | |
19691 | + * Should always succeed - after a tasklist got on the | |
19692 | + * list (after getting the SCHED bit set from 0 to 1), | |
19693 | + * nothing but the tasklet softirq it got queued to can | |
19694 | + * lock it: | |
19695 | + */ | |
19696 | + if (!tasklet_trylock(t)) { | |
19697 | + WARN_ON(1); | |
19698 | + continue; | |
19699 | } | |
19700 | ||
19701 | - local_irq_disable(); | |
19702 | t->next = NULL; | |
19703 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
19704 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
19705 | - __raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
19706 | - local_irq_enable(); | |
19707 | + | |
19708 | + /* | |
19709 | + * If we cannot handle the tasklet because it's disabled, | |
19710 | + * mark it as pending. tasklet_enable() will later | |
19711 | + * re-schedule the tasklet. | |
19712 | + */ | |
19713 | + if (unlikely(atomic_read(&t->count))) { | |
19714 | +out_disabled: | |
19715 | + /* implicit unlock: */ | |
19716 | + wmb(); | |
19717 | + t->state = TASKLET_STATEF_PENDING; | |
19718 | + continue; | |
19719 | + } | |
19720 | + | |
19721 | + /* | |
19722 | + * After this point on the tasklet might be rescheduled | |
19723 | + * on another CPU, but it can only be added to another | |
19724 | + * CPU's tasklet list if we unlock the tasklet (which we | |
19725 | + * dont do yet). | |
19726 | + */ | |
19727 | + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
19728 | + WARN_ON(1); | |
19729 | + | |
19730 | +again: | |
19731 | + t->func(t->data); | |
19732 | + | |
19733 | + /* | |
19734 | + * Try to unlock the tasklet. We must use cmpxchg, because | |
19735 | + * another CPU might have scheduled or disabled the tasklet. | |
19736 | + * We only allow the STATE_RUN -> 0 transition here. | |
19737 | + */ | |
19738 | + while (!tasklet_tryunlock(t)) { | |
19739 | + /* | |
19740 | + * If it got disabled meanwhile, bail out: | |
19741 | + */ | |
19742 | + if (atomic_read(&t->count)) | |
19743 | + goto out_disabled; | |
19744 | + /* | |
19745 | + * If it got scheduled meanwhile, re-execute | |
19746 | + * the tasklet function: | |
19747 | + */ | |
19748 | + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
19749 | + goto again; | |
19750 | + if (!--loops) { | |
19751 | + printk("hm, tasklet state: %08lx\n", t->state); | |
19752 | + WARN_ON(1); | |
19753 | + tasklet_unlock(t); | |
19754 | + break; | |
19755 | + } | |
19756 | + } | |
19757 | } | |
19758 | } | |
19759 | ||
19760 | +static void tasklet_action(struct softirq_action *a) | |
19761 | +{ | |
19762 | + struct tasklet_struct *list; | |
19763 | + | |
19764 | + local_irq_disable(); | |
19765 | + | |
19766 | + list = __this_cpu_read(tasklet_vec.head); | |
19767 | + __this_cpu_write(tasklet_vec.head, NULL); | |
19768 | + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
19769 | + | |
19770 | + local_irq_enable(); | |
19771 | + | |
19772 | + __tasklet_action(a, list); | |
19773 | +} | |
19774 | + | |
c7c16703 | 19775 | static __latent_entropy void tasklet_hi_action(struct softirq_action *a) |
1a6e0f06 JK |
19776 | { |
19777 | struct tasklet_struct *list; | |
19778 | ||
19779 | local_irq_disable(); | |
19780 | + | |
19781 | list = __this_cpu_read(tasklet_hi_vec.head); | |
19782 | __this_cpu_write(tasklet_hi_vec.head, NULL); | |
19783 | __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); | |
19784 | + | |
19785 | local_irq_enable(); | |
19786 | ||
19787 | - while (list) { | |
19788 | - struct tasklet_struct *t = list; | |
19789 | - | |
19790 | - list = list->next; | |
19791 | - | |
19792 | - if (tasklet_trylock(t)) { | |
19793 | - if (!atomic_read(&t->count)) { | |
19794 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
19795 | - &t->state)) | |
19796 | - BUG(); | |
19797 | - t->func(t->data); | |
19798 | - tasklet_unlock(t); | |
19799 | - continue; | |
19800 | - } | |
19801 | - tasklet_unlock(t); | |
19802 | - } | |
19803 | - | |
19804 | - local_irq_disable(); | |
19805 | - t->next = NULL; | |
19806 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
19807 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
19808 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
19809 | - local_irq_enable(); | |
19810 | - } | |
19811 | + __tasklet_action(a, list); | |
19812 | } | |
19813 | ||
19814 | void tasklet_init(struct tasklet_struct *t, | |
1f39f580 | 19815 | @@ -586,7 +1083,7 @@ void tasklet_kill(struct tasklet_struct *t) |
1a6e0f06 JK |
19816 | |
19817 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | |
19818 | do { | |
19819 | - yield(); | |
19820 | + msleep(1); | |
19821 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); | |
19822 | } | |
19823 | tasklet_unlock_wait(t); | |
1f39f580 | 19824 | @@ -660,25 +1157,26 @@ void __init softirq_init(void) |
1a6e0f06 JK |
19825 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); |
19826 | } | |
19827 | ||
19828 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
19829 | +void tasklet_unlock_wait(struct tasklet_struct *t) | |
19830 | +{ | |
19831 | + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { | |
19832 | + /* | |
19833 | + * Hack for now to avoid this busy-loop: | |
19834 | + */ | |
19835 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19836 | + msleep(1); | |
19837 | +#else | |
19838 | + barrier(); | |
19839 | +#endif | |
19840 | + } | |
19841 | +} | |
19842 | +EXPORT_SYMBOL(tasklet_unlock_wait); | |
19843 | +#endif | |
19844 | + | |
19845 | static int ksoftirqd_should_run(unsigned int cpu) | |
19846 | { | |
19847 | - return local_softirq_pending(); | |
19848 | -} | |
19849 | - | |
19850 | -static void run_ksoftirqd(unsigned int cpu) | |
19851 | -{ | |
19852 | - local_irq_disable(); | |
19853 | - if (local_softirq_pending()) { | |
19854 | - /* | |
19855 | - * We can safely run softirq on inline stack, as we are not deep | |
19856 | - * in the task stack here. | |
19857 | - */ | |
19858 | - __do_softirq(); | |
19859 | - local_irq_enable(); | |
19860 | - cond_resched_rcu_qs(); | |
19861 | - return; | |
19862 | - } | |
19863 | - local_irq_enable(); | |
19864 | + return ksoftirqd_softirq_pending(); | |
19865 | } | |
19866 | ||
19867 | #ifdef CONFIG_HOTPLUG_CPU | |
1f39f580 | 19868 | @@ -745,17 +1243,31 @@ static int takeover_tasklets(unsigned int cpu) |
1a6e0f06 JK |
19869 | |
19870 | static struct smp_hotplug_thread softirq_threads = { | |
19871 | .store = &ksoftirqd, | |
19872 | + .setup = ksoftirqd_set_sched_params, | |
19873 | .thread_should_run = ksoftirqd_should_run, | |
19874 | .thread_fn = run_ksoftirqd, | |
19875 | .thread_comm = "ksoftirqd/%u", | |
19876 | }; | |
19877 | ||
19878 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19879 | +static struct smp_hotplug_thread softirq_timer_threads = { | |
19880 | + .store = &ktimer_softirqd, | |
19881 | + .setup = ktimer_softirqd_set_sched_params, | |
19882 | + .cleanup = ktimer_softirqd_clr_sched_params, | |
19883 | + .thread_should_run = ktimer_softirqd_should_run, | |
19884 | + .thread_fn = run_ksoftirqd, | |
19885 | + .thread_comm = "ktimersoftd/%u", | |
19886 | +}; | |
19887 | +#endif | |
19888 | + | |
19889 | static __init int spawn_ksoftirqd(void) | |
19890 | { | |
c7c16703 JK |
19891 | cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL, |
19892 | takeover_tasklets); | |
1a6e0f06 | 19893 | BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); |
c7c16703 | 19894 | - |
1a6e0f06 JK |
19895 | +#ifdef CONFIG_PREEMPT_RT_FULL |
19896 | + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); | |
19897 | +#endif | |
1a6e0f06 JK |
19898 | return 0; |
19899 | } | |
c7c16703 | 19900 | early_initcall(spawn_ksoftirqd); |
1a6e0f06 | 19901 | diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c |
c7c16703 | 19902 | index ec9ab2f01489..8b89dbedeaff 100644 |
1a6e0f06 JK |
19903 | --- a/kernel/stop_machine.c |
19904 | +++ b/kernel/stop_machine.c | |
c7c16703 | 19905 | @@ -36,7 +36,7 @@ struct cpu_stop_done { |
1a6e0f06 JK |
19906 | struct cpu_stopper { |
19907 | struct task_struct *thread; | |
19908 | ||
19909 | - spinlock_t lock; | |
19910 | + raw_spinlock_t lock; | |
19911 | bool enabled; /* is this stopper enabled? */ | |
19912 | struct list_head works; /* list of pending works */ | |
19913 | ||
c7c16703 | 19914 | @@ -78,14 +78,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) |
1a6e0f06 JK |
19915 | unsigned long flags; |
19916 | bool enabled; | |
19917 | ||
19918 | - spin_lock_irqsave(&stopper->lock, flags); | |
19919 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
19920 | enabled = stopper->enabled; | |
19921 | if (enabled) | |
19922 | __cpu_stop_queue_work(stopper, work); | |
19923 | else if (work->done) | |
19924 | cpu_stop_signal_done(work->done); | |
19925 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
19926 | ||
19927 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
19928 | return enabled; | |
19929 | } | |
19930 | ||
c7c16703 JK |
19931 | @@ -231,8 +231,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, |
19932 | struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); | |
1a6e0f06 | 19933 | int err; |
c7c16703 | 19934 | retry: |
1a6e0f06 JK |
19935 | - spin_lock_irq(&stopper1->lock); |
19936 | - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
19937 | + raw_spin_lock_irq(&stopper1->lock); | |
19938 | + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
19939 | ||
19940 | err = -ENOENT; | |
19941 | if (!stopper1->enabled || !stopper2->enabled) | |
c7c16703 | 19942 | @@ -255,8 +255,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, |
1a6e0f06 JK |
19943 | __cpu_stop_queue_work(stopper1, work1); |
19944 | __cpu_stop_queue_work(stopper2, work2); | |
19945 | unlock: | |
19946 | - spin_unlock(&stopper2->lock); | |
19947 | - spin_unlock_irq(&stopper1->lock); | |
19948 | + raw_spin_unlock(&stopper2->lock); | |
19949 | + raw_spin_unlock_irq(&stopper1->lock); | |
1a6e0f06 | 19950 | |
c7c16703 JK |
19951 | if (unlikely(err == -EDEADLK)) { |
19952 | while (stop_cpus_in_progress) | |
19953 | @@ -448,9 +448,9 @@ static int cpu_stop_should_run(unsigned int cpu) | |
1a6e0f06 JK |
19954 | unsigned long flags; |
19955 | int run; | |
19956 | ||
19957 | - spin_lock_irqsave(&stopper->lock, flags); | |
19958 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
19959 | run = !list_empty(&stopper->works); | |
19960 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
19961 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
19962 | return run; | |
19963 | } | |
19964 | ||
c7c16703 | 19965 | @@ -461,13 +461,13 @@ static void cpu_stopper_thread(unsigned int cpu) |
1a6e0f06 JK |
19966 | |
19967 | repeat: | |
19968 | work = NULL; | |
19969 | - spin_lock_irq(&stopper->lock); | |
19970 | + raw_spin_lock_irq(&stopper->lock); | |
19971 | if (!list_empty(&stopper->works)) { | |
19972 | work = list_first_entry(&stopper->works, | |
19973 | struct cpu_stop_work, list); | |
19974 | list_del_init(&work->list); | |
19975 | } | |
19976 | - spin_unlock_irq(&stopper->lock); | |
19977 | + raw_spin_unlock_irq(&stopper->lock); | |
19978 | ||
19979 | if (work) { | |
19980 | cpu_stop_fn_t fn = work->fn; | |
c7c16703 | 19981 | @@ -475,6 +475,8 @@ static void cpu_stopper_thread(unsigned int cpu) |
1a6e0f06 JK |
19982 | struct cpu_stop_done *done = work->done; |
19983 | int ret; | |
19984 | ||
c7c16703 | 19985 | + /* XXX */ |
1a6e0f06 JK |
19986 | + |
19987 | /* cpu stop callbacks must not sleep, make in_atomic() == T */ | |
19988 | preempt_count_inc(); | |
19989 | ret = fn(arg); | |
c7c16703 | 19990 | @@ -541,7 +543,7 @@ static int __init cpu_stop_init(void) |
1a6e0f06 JK |
19991 | for_each_possible_cpu(cpu) { |
19992 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | |
19993 | ||
19994 | - spin_lock_init(&stopper->lock); | |
19995 | + raw_spin_lock_init(&stopper->lock); | |
19996 | INIT_LIST_HEAD(&stopper->works); | |
19997 | } | |
19998 | ||
1a6e0f06 | 19999 | diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c |
c7c16703 | 20000 | index bb5ec425dfe0..8338b14ed3a3 100644 |
1a6e0f06 JK |
20001 | --- a/kernel/time/hrtimer.c |
20002 | +++ b/kernel/time/hrtimer.c | |
20003 | @@ -53,6 +53,7 @@ | |
20004 | #include <asm/uaccess.h> | |
20005 | ||
20006 | #include <trace/events/timer.h> | |
20007 | +#include <trace/events/hist.h> | |
20008 | ||
20009 | #include "tick-internal.h" | |
20010 | ||
20011 | @@ -695,6 +696,29 @@ static void hrtimer_switch_to_hres(void) | |
20012 | retrigger_next_event(NULL); | |
20013 | } | |
20014 | ||
20015 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20016 | + | |
20017 | +static struct swork_event clock_set_delay_work; | |
20018 | + | |
20019 | +static void run_clock_set_delay(struct swork_event *event) | |
20020 | +{ | |
20021 | + clock_was_set(); | |
20022 | +} | |
20023 | + | |
20024 | +void clock_was_set_delayed(void) | |
20025 | +{ | |
20026 | + swork_queue(&clock_set_delay_work); | |
20027 | +} | |
20028 | + | |
20029 | +static __init int create_clock_set_delay_thread(void) | |
20030 | +{ | |
20031 | + WARN_ON(swork_get()); | |
20032 | + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay); | |
20033 | + return 0; | |
20034 | +} | |
20035 | +early_initcall(create_clock_set_delay_thread); | |
20036 | +#else /* PREEMPT_RT_FULL */ | |
20037 | + | |
20038 | static void clock_was_set_work(struct work_struct *work) | |
20039 | { | |
20040 | clock_was_set(); | |
20041 | @@ -710,6 +734,7 @@ void clock_was_set_delayed(void) | |
20042 | { | |
20043 | schedule_work(&hrtimer_work); | |
20044 | } | |
20045 | +#endif | |
20046 | ||
20047 | #else | |
20048 | ||
20049 | @@ -719,11 +744,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } | |
20050 | static inline void hrtimer_switch_to_hres(void) { } | |
20051 | static inline void | |
20052 | hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } | |
20053 | -static inline int hrtimer_reprogram(struct hrtimer *timer, | |
20054 | - struct hrtimer_clock_base *base) | |
20055 | -{ | |
20056 | - return 0; | |
20057 | -} | |
20058 | +static inline void hrtimer_reprogram(struct hrtimer *timer, | |
20059 | + struct hrtimer_clock_base *base) { } | |
20060 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | |
20061 | static inline void retrigger_next_event(void *arg) { } | |
20062 | ||
20063 | @@ -855,6 +877,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |
20064 | } | |
20065 | EXPORT_SYMBOL_GPL(hrtimer_forward); | |
20066 | ||
20067 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
20068 | +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) | |
20069 | + | |
20070 | +/** | |
20071 | + * hrtimer_wait_for_timer - Wait for a running timer | |
20072 | + * | |
20073 | + * @timer: timer to wait for | |
20074 | + * | |
20075 | + * The function waits in case the timers callback function is | |
20076 | + * currently executed on the waitqueue of the timer base. The | |
20077 | + * waitqueue is woken up after the timer callback function has | |
20078 | + * finished execution. | |
20079 | + */ | |
20080 | +void hrtimer_wait_for_timer(const struct hrtimer *timer) | |
20081 | +{ | |
20082 | + struct hrtimer_clock_base *base = timer->base; | |
20083 | + | |
20084 | + if (base && base->cpu_base && !timer->irqsafe) | |
20085 | + wait_event(base->cpu_base->wait, | |
20086 | + !(hrtimer_callback_running(timer))); | |
20087 | +} | |
20088 | + | |
20089 | +#else | |
20090 | +# define wake_up_timer_waiters(b) do { } while (0) | |
20091 | +#endif | |
20092 | + | |
20093 | /* | |
20094 | * enqueue_hrtimer - internal function to (re)start a timer | |
20095 | * | |
20096 | @@ -896,6 +944,11 @@ static void __remove_hrtimer(struct hrtimer *timer, | |
20097 | if (!(state & HRTIMER_STATE_ENQUEUED)) | |
20098 | return; | |
20099 | ||
20100 | + if (unlikely(!list_empty(&timer->cb_entry))) { | |
20101 | + list_del_init(&timer->cb_entry); | |
20102 | + return; | |
20103 | + } | |
20104 | + | |
20105 | if (!timerqueue_del(&base->active, &timer->node)) | |
20106 | cpu_base->active_bases &= ~(1 << base->index); | |
20107 | ||
20108 | @@ -991,7 +1044,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |
20109 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | |
20110 | ||
20111 | timer_stats_hrtimer_set_start_info(timer); | |
20112 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20113 | + { | |
20114 | + ktime_t now = new_base->get_time(); | |
20115 | ||
20116 | + if (ktime_to_ns(tim) < ktime_to_ns(now)) | |
20117 | + timer->praecox = now; | |
20118 | + else | |
20119 | + timer->praecox = ktime_set(0, 0); | |
20120 | + } | |
20121 | +#endif | |
20122 | leftmost = enqueue_hrtimer(timer, new_base); | |
20123 | if (!leftmost) | |
20124 | goto unlock; | |
20125 | @@ -1063,7 +1125,7 @@ int hrtimer_cancel(struct hrtimer *timer) | |
20126 | ||
20127 | if (ret >= 0) | |
20128 | return ret; | |
20129 | - cpu_relax(); | |
20130 | + hrtimer_wait_for_timer(timer); | |
20131 | } | |
20132 | } | |
20133 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | |
20134 | @@ -1127,6 +1189,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |
20135 | ||
20136 | base = hrtimer_clockid_to_base(clock_id); | |
20137 | timer->base = &cpu_base->clock_base[base]; | |
20138 | + INIT_LIST_HEAD(&timer->cb_entry); | |
20139 | timerqueue_init(&timer->node); | |
20140 | ||
20141 | #ifdef CONFIG_TIMER_STATS | |
20142 | @@ -1167,6 +1230,7 @@ bool hrtimer_active(const struct hrtimer *timer) | |
20143 | seq = raw_read_seqcount_begin(&cpu_base->seq); | |
20144 | ||
20145 | if (timer->state != HRTIMER_STATE_INACTIVE || | |
20146 | + cpu_base->running_soft == timer || | |
20147 | cpu_base->running == timer) | |
20148 | return true; | |
20149 | ||
20150 | @@ -1265,10 +1329,112 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |
20151 | cpu_base->running = NULL; | |
20152 | } | |
20153 | ||
20154 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
20155 | +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, | |
20156 | + struct hrtimer_clock_base *base) | |
20157 | +{ | |
20158 | + int leftmost; | |
20159 | + | |
20160 | + if (restart != HRTIMER_NORESTART && | |
20161 | + !(timer->state & HRTIMER_STATE_ENQUEUED)) { | |
20162 | + | |
20163 | + leftmost = enqueue_hrtimer(timer, base); | |
20164 | + if (!leftmost) | |
20165 | + return; | |
20166 | +#ifdef CONFIG_HIGH_RES_TIMERS | |
20167 | + if (!hrtimer_is_hres_active(timer)) { | |
20168 | + /* | |
20169 | + * Kick to reschedule the next tick to handle the new timer | |
20170 | + * on dynticks target. | |
20171 | + */ | |
20172 | + if (base->cpu_base->nohz_active) | |
20173 | + wake_up_nohz_cpu(base->cpu_base->cpu); | |
20174 | + } else { | |
20175 | + | |
20176 | + hrtimer_reprogram(timer, base); | |
20177 | + } | |
20178 | +#endif | |
20179 | + } | |
20180 | +} | |
20181 | + | |
20182 | +/* | |
20183 | + * The changes in mainline which removed the callback modes from | |
20184 | + * hrtimer are not yet working with -rt. The non wakeup_process() | |
20185 | + * based callbacks which involve sleeping locks need to be treated | |
20186 | + * seperately. | |
20187 | + */ | |
20188 | +static void hrtimer_rt_run_pending(void) | |
20189 | +{ | |
20190 | + enum hrtimer_restart (*fn)(struct hrtimer *); | |
20191 | + struct hrtimer_cpu_base *cpu_base; | |
20192 | + struct hrtimer_clock_base *base; | |
20193 | + struct hrtimer *timer; | |
20194 | + int index, restart; | |
20195 | + | |
20196 | + local_irq_disable(); | |
20197 | + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); | |
20198 | + | |
20199 | + raw_spin_lock(&cpu_base->lock); | |
20200 | + | |
20201 | + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { | |
20202 | + base = &cpu_base->clock_base[index]; | |
20203 | + | |
20204 | + while (!list_empty(&base->expired)) { | |
20205 | + timer = list_first_entry(&base->expired, | |
20206 | + struct hrtimer, cb_entry); | |
20207 | + | |
20208 | + /* | |
20209 | + * Same as the above __run_hrtimer function | |
20210 | + * just we run with interrupts enabled. | |
20211 | + */ | |
20212 | + debug_deactivate(timer); | |
20213 | + cpu_base->running_soft = timer; | |
20214 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
20215 | + | |
20216 | + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); | |
20217 | + timer_stats_account_hrtimer(timer); | |
20218 | + fn = timer->function; | |
20219 | + | |
20220 | + raw_spin_unlock_irq(&cpu_base->lock); | |
20221 | + restart = fn(timer); | |
20222 | + raw_spin_lock_irq(&cpu_base->lock); | |
20223 | + | |
20224 | + hrtimer_rt_reprogram(restart, timer, base); | |
20225 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
20226 | + | |
20227 | + WARN_ON_ONCE(cpu_base->running_soft != timer); | |
20228 | + cpu_base->running_soft = NULL; | |
20229 | + } | |
20230 | + } | |
20231 | + | |
20232 | + raw_spin_unlock_irq(&cpu_base->lock); | |
20233 | + | |
20234 | + wake_up_timer_waiters(cpu_base); | |
20235 | +} | |
20236 | + | |
20237 | +static int hrtimer_rt_defer(struct hrtimer *timer) | |
20238 | +{ | |
20239 | + if (timer->irqsafe) | |
20240 | + return 0; | |
20241 | + | |
20242 | + __remove_hrtimer(timer, timer->base, timer->state, 0); | |
20243 | + list_add_tail(&timer->cb_entry, &timer->base->expired); | |
20244 | + return 1; | |
20245 | +} | |
20246 | + | |
20247 | +#else | |
20248 | + | |
20249 | +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } | |
20250 | + | |
20251 | +#endif | |
20252 | + | |
20253 | +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); | |
20254 | + | |
20255 | static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
20256 | { | |
20257 | struct hrtimer_clock_base *base = cpu_base->clock_base; | |
20258 | unsigned int active = cpu_base->active_bases; | |
20259 | + int raise = 0; | |
20260 | ||
20261 | for (; active; base++, active >>= 1) { | |
20262 | struct timerqueue_node *node; | |
20263 | @@ -1284,6 +1450,15 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
20264 | ||
20265 | timer = container_of(node, struct hrtimer, node); | |
20266 | ||
20267 | + trace_hrtimer_interrupt(raw_smp_processor_id(), | |
20268 | + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ? | |
20269 | + timer->praecox : hrtimer_get_expires(timer), | |
20270 | + basenow)), | |
20271 | + current, | |
20272 | + timer->function == hrtimer_wakeup ? | |
20273 | + container_of(timer, struct hrtimer_sleeper, | |
20274 | + timer)->task : NULL); | |
20275 | + | |
20276 | /* | |
20277 | * The immediate goal for using the softexpires is | |
20278 | * minimizing wakeups, not running timers at the | |
20279 | @@ -1299,9 +1474,14 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
20280 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) | |
20281 | break; | |
20282 | ||
20283 | - __run_hrtimer(cpu_base, base, timer, &basenow); | |
20284 | + if (!hrtimer_rt_defer(timer)) | |
20285 | + __run_hrtimer(cpu_base, base, timer, &basenow); | |
20286 | + else | |
20287 | + raise = 1; | |
20288 | } | |
20289 | } | |
20290 | + if (raise) | |
20291 | + raise_softirq_irqoff(HRTIMER_SOFTIRQ); | |
20292 | } | |
20293 | ||
20294 | #ifdef CONFIG_HIGH_RES_TIMERS | |
20295 | @@ -1464,16 +1644,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) | |
20296 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |
20297 | { | |
20298 | sl->timer.function = hrtimer_wakeup; | |
20299 | + sl->timer.irqsafe = 1; | |
20300 | sl->task = task; | |
20301 | } | |
20302 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | |
20303 | ||
20304 | -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | |
20305 | +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, | |
20306 | + unsigned long state) | |
20307 | { | |
20308 | hrtimer_init_sleeper(t, current); | |
20309 | ||
20310 | do { | |
20311 | - set_current_state(TASK_INTERRUPTIBLE); | |
20312 | + set_current_state(state); | |
20313 | hrtimer_start_expires(&t->timer, mode); | |
20314 | ||
20315 | if (likely(t->task)) | |
20316 | @@ -1515,7 +1697,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
20317 | HRTIMER_MODE_ABS); | |
20318 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | |
20319 | ||
20320 | - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | |
20321 | + /* cpu_chill() does not care about restart state. */ | |
20322 | + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) | |
20323 | goto out; | |
20324 | ||
20325 | rmtp = restart->nanosleep.rmtp; | |
20326 | @@ -1532,8 +1715,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
20327 | return ret; | |
20328 | } | |
20329 | ||
20330 | -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
20331 | - const enum hrtimer_mode mode, const clockid_t clockid) | |
20332 | +static long | |
20333 | +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
20334 | + const enum hrtimer_mode mode, const clockid_t clockid, | |
20335 | + unsigned long state) | |
20336 | { | |
20337 | struct restart_block *restart; | |
20338 | struct hrtimer_sleeper t; | |
20339 | @@ -1546,7 +1731,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
20340 | ||
20341 | hrtimer_init_on_stack(&t.timer, clockid, mode); | |
20342 | hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); | |
20343 | - if (do_nanosleep(&t, mode)) | |
20344 | + if (do_nanosleep(&t, mode, state)) | |
20345 | goto out; | |
20346 | ||
20347 | /* Absolute timers do not update the rmtp value and restart: */ | |
20348 | @@ -1573,6 +1758,12 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
20349 | return ret; | |
20350 | } | |
20351 | ||
20352 | +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
20353 | + const enum hrtimer_mode mode, const clockid_t clockid) | |
20354 | +{ | |
20355 | + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); | |
20356 | +} | |
20357 | + | |
20358 | SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
20359 | struct timespec __user *, rmtp) | |
20360 | { | |
20361 | @@ -1587,6 +1778,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
20362 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); | |
20363 | } | |
20364 | ||
20365 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20366 | +/* | |
20367 | + * Sleep for 1 ms in hope whoever holds what we want will let it go. | |
20368 | + */ | |
20369 | +void cpu_chill(void) | |
20370 | +{ | |
20371 | + struct timespec tu = { | |
20372 | + .tv_nsec = NSEC_PER_MSEC, | |
20373 | + }; | |
20374 | + unsigned int freeze_flag = current->flags & PF_NOFREEZE; | |
20375 | + | |
20376 | + current->flags |= PF_NOFREEZE; | |
20377 | + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC, | |
20378 | + TASK_UNINTERRUPTIBLE); | |
20379 | + if (!freeze_flag) | |
20380 | + current->flags &= ~PF_NOFREEZE; | |
20381 | +} | |
20382 | +EXPORT_SYMBOL(cpu_chill); | |
20383 | +#endif | |
20384 | + | |
20385 | /* | |
20386 | * Functions related to boot-time initialization: | |
20387 | */ | |
20388 | @@ -1598,10 +1809,14 @@ int hrtimers_prepare_cpu(unsigned int cpu) | |
20389 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | |
20390 | cpu_base->clock_base[i].cpu_base = cpu_base; | |
20391 | timerqueue_init_head(&cpu_base->clock_base[i].active); | |
20392 | + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); | |
20393 | } | |
20394 | ||
20395 | cpu_base->cpu = cpu; | |
20396 | hrtimer_init_hres(cpu_base); | |
20397 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
20398 | + init_waitqueue_head(&cpu_base->wait); | |
20399 | +#endif | |
20400 | return 0; | |
20401 | } | |
20402 | ||
20403 | @@ -1671,9 +1886,26 @@ int hrtimers_dead_cpu(unsigned int scpu) | |
20404 | ||
20405 | #endif /* CONFIG_HOTPLUG_CPU */ | |
20406 | ||
20407 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
20408 | + | |
20409 | +static void run_hrtimer_softirq(struct softirq_action *h) | |
20410 | +{ | |
20411 | + hrtimer_rt_run_pending(); | |
20412 | +} | |
20413 | + | |
20414 | +static void hrtimers_open_softirq(void) | |
20415 | +{ | |
20416 | + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); | |
20417 | +} | |
20418 | + | |
20419 | +#else | |
20420 | +static void hrtimers_open_softirq(void) { } | |
20421 | +#endif | |
20422 | + | |
20423 | void __init hrtimers_init(void) | |
20424 | { | |
20425 | hrtimers_prepare_cpu(smp_processor_id()); | |
20426 | + hrtimers_open_softirq(); | |
20427 | } | |
20428 | ||
20429 | /** | |
20430 | diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c | |
20431 | index 1d5c7204ddc9..184de6751180 100644 | |
20432 | --- a/kernel/time/itimer.c | |
20433 | +++ b/kernel/time/itimer.c | |
20434 | @@ -213,6 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) | |
20435 | /* We are sharing ->siglock with it_real_fn() */ | |
20436 | if (hrtimer_try_to_cancel(timer) < 0) { | |
20437 | spin_unlock_irq(&tsk->sighand->siglock); | |
20438 | + hrtimer_wait_for_timer(&tsk->signal->real_timer); | |
20439 | goto again; | |
20440 | } | |
20441 | expires = timeval_to_ktime(value->it_value); | |
20442 | diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c | |
20443 | index 555e21f7b966..a5d6435fabbb 100644 | |
20444 | --- a/kernel/time/jiffies.c | |
20445 | +++ b/kernel/time/jiffies.c | |
20446 | @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = { | |
20447 | .max_cycles = 10, | |
20448 | }; | |
20449 | ||
20450 | -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); | |
20451 | +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); | |
20452 | +__cacheline_aligned_in_smp seqcount_t jiffies_seq; | |
20453 | ||
20454 | #if (BITS_PER_LONG < 64) | |
20455 | u64 get_jiffies_64(void) | |
20456 | @@ -83,9 +84,9 @@ u64 get_jiffies_64(void) | |
20457 | u64 ret; | |
20458 | ||
20459 | do { | |
20460 | - seq = read_seqbegin(&jiffies_lock); | |
20461 | + seq = read_seqcount_begin(&jiffies_seq); | |
20462 | ret = jiffies_64; | |
20463 | - } while (read_seqretry(&jiffies_lock, seq)); | |
20464 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
20465 | return ret; | |
20466 | } | |
20467 | EXPORT_SYMBOL(get_jiffies_64); | |
20468 | diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c | |
20469 | index 6df8927c58a5..05b7391bf9bd 100644 | |
20470 | --- a/kernel/time/ntp.c | |
20471 | +++ b/kernel/time/ntp.c | |
20472 | @@ -17,6 +17,7 @@ | |
20473 | #include <linux/module.h> | |
20474 | #include <linux/rtc.h> | |
20475 | #include <linux/math64.h> | |
20476 | +#include <linux/swork.h> | |
20477 | ||
20478 | #include "ntp_internal.h" | |
20479 | #include "timekeeping_internal.h" | |
20480 | @@ -568,10 +569,35 @@ static void sync_cmos_clock(struct work_struct *work) | |
20481 | &sync_cmos_work, timespec64_to_jiffies(&next)); | |
20482 | } | |
20483 | ||
20484 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20485 | + | |
20486 | +static void run_clock_set_delay(struct swork_event *event) | |
20487 | +{ | |
20488 | + queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
20489 | +} | |
20490 | + | |
20491 | +static struct swork_event ntp_cmos_swork; | |
20492 | + | |
20493 | +void ntp_notify_cmos_timer(void) | |
20494 | +{ | |
20495 | + swork_queue(&ntp_cmos_swork); | |
20496 | +} | |
20497 | + | |
20498 | +static __init int create_cmos_delay_thread(void) | |
20499 | +{ | |
20500 | + WARN_ON(swork_get()); | |
20501 | + INIT_SWORK(&ntp_cmos_swork, run_clock_set_delay); | |
20502 | + return 0; | |
20503 | +} | |
20504 | +early_initcall(create_cmos_delay_thread); | |
20505 | + | |
20506 | +#else | |
20507 | + | |
20508 | void ntp_notify_cmos_timer(void) | |
20509 | { | |
20510 | queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
20511 | } | |
20512 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
20513 | ||
20514 | #else | |
20515 | void ntp_notify_cmos_timer(void) { } | |
20516 | diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c | |
20517 | index 39008d78927a..633f4eaca9e7 100644 | |
20518 | --- a/kernel/time/posix-cpu-timers.c | |
20519 | +++ b/kernel/time/posix-cpu-timers.c | |
20520 | @@ -3,6 +3,7 @@ | |
20521 | */ | |
20522 | ||
20523 | #include <linux/sched.h> | |
20524 | +#include <linux/sched/rt.h> | |
20525 | #include <linux/posix-timers.h> | |
20526 | #include <linux/errno.h> | |
20527 | #include <linux/math64.h> | |
20528 | @@ -620,7 +621,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |
20529 | /* | |
20530 | * Disarm any old timer after extracting its expiry time. | |
20531 | */ | |
20532 | - WARN_ON_ONCE(!irqs_disabled()); | |
20533 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
20534 | ||
20535 | ret = 0; | |
20536 | old_incr = timer->it.cpu.incr; | |
20537 | @@ -1064,7 +1065,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |
20538 | /* | |
20539 | * Now re-arm for the new expiry time. | |
20540 | */ | |
20541 | - WARN_ON_ONCE(!irqs_disabled()); | |
20542 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
20543 | arm_timer(timer); | |
20544 | unlock_task_sighand(p, &flags); | |
20545 | ||
20546 | @@ -1153,13 +1154,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |
20547 | * already updated our counts. We need to check if any timers fire now. | |
20548 | * Interrupts are disabled. | |
20549 | */ | |
20550 | -void run_posix_cpu_timers(struct task_struct *tsk) | |
20551 | +static void __run_posix_cpu_timers(struct task_struct *tsk) | |
20552 | { | |
20553 | LIST_HEAD(firing); | |
20554 | struct k_itimer *timer, *next; | |
20555 | unsigned long flags; | |
20556 | ||
20557 | - WARN_ON_ONCE(!irqs_disabled()); | |
20558 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
20559 | ||
20560 | /* | |
20561 | * The fast path checks that there are no expired thread or thread | |
20562 | @@ -1213,6 +1214,190 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |
20563 | } | |
20564 | } | |
20565 | ||
20566 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
20567 | +#include <linux/kthread.h> | |
20568 | +#include <linux/cpu.h> | |
20569 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); | |
20570 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); | |
20571 | + | |
20572 | +static int posix_cpu_timers_thread(void *data) | |
20573 | +{ | |
20574 | + int cpu = (long)data; | |
20575 | + | |
20576 | + BUG_ON(per_cpu(posix_timer_task,cpu) != current); | |
20577 | + | |
20578 | + while (!kthread_should_stop()) { | |
20579 | + struct task_struct *tsk = NULL; | |
20580 | + struct task_struct *next = NULL; | |
20581 | + | |
20582 | + if (cpu_is_offline(cpu)) | |
20583 | + goto wait_to_die; | |
20584 | + | |
20585 | + /* grab task list */ | |
20586 | + raw_local_irq_disable(); | |
20587 | + tsk = per_cpu(posix_timer_tasklist, cpu); | |
20588 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
20589 | + raw_local_irq_enable(); | |
20590 | + | |
20591 | + /* its possible the list is empty, just return */ | |
20592 | + if (!tsk) { | |
20593 | + set_current_state(TASK_INTERRUPTIBLE); | |
20594 | + schedule(); | |
20595 | + __set_current_state(TASK_RUNNING); | |
20596 | + continue; | |
20597 | + } | |
20598 | + | |
20599 | + /* Process task list */ | |
20600 | + while (1) { | |
20601 | + /* save next */ | |
20602 | + next = tsk->posix_timer_list; | |
20603 | + | |
20604 | + /* run the task timers, clear its ptr and | |
20605 | + * unreference it | |
20606 | + */ | |
20607 | + __run_posix_cpu_timers(tsk); | |
20608 | + tsk->posix_timer_list = NULL; | |
20609 | + put_task_struct(tsk); | |
20610 | + | |
20611 | + /* check if this is the last on the list */ | |
20612 | + if (next == tsk) | |
20613 | + break; | |
20614 | + tsk = next; | |
20615 | + } | |
20616 | + } | |
20617 | + return 0; | |
20618 | + | |
20619 | +wait_to_die: | |
20620 | + /* Wait for kthread_stop */ | |
20621 | + set_current_state(TASK_INTERRUPTIBLE); | |
20622 | + while (!kthread_should_stop()) { | |
20623 | + schedule(); | |
20624 | + set_current_state(TASK_INTERRUPTIBLE); | |
20625 | + } | |
20626 | + __set_current_state(TASK_RUNNING); | |
20627 | + return 0; | |
20628 | +} | |
20629 | + | |
20630 | +static inline int __fastpath_timer_check(struct task_struct *tsk) | |
20631 | +{ | |
20632 | + /* tsk == current, ensure it is safe to use ->signal/sighand */ | |
20633 | + if (unlikely(tsk->exit_state)) | |
20634 | + return 0; | |
20635 | + | |
20636 | + if (!task_cputime_zero(&tsk->cputime_expires)) | |
20637 | + return 1; | |
20638 | + | |
20639 | + if (!task_cputime_zero(&tsk->signal->cputime_expires)) | |
20640 | + return 1; | |
20641 | + | |
20642 | + return 0; | |
20643 | +} | |
20644 | + | |
20645 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
20646 | +{ | |
20647 | + unsigned long cpu = smp_processor_id(); | |
20648 | + struct task_struct *tasklist; | |
20649 | + | |
20650 | + BUG_ON(!irqs_disabled()); | |
20651 | + if(!per_cpu(posix_timer_task, cpu)) | |
20652 | + return; | |
20653 | + /* get per-cpu references */ | |
20654 | + tasklist = per_cpu(posix_timer_tasklist, cpu); | |
20655 | + | |
20656 | + /* check to see if we're already queued */ | |
20657 | + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { | |
20658 | + get_task_struct(tsk); | |
20659 | + if (tasklist) { | |
20660 | + tsk->posix_timer_list = tasklist; | |
20661 | + } else { | |
20662 | + /* | |
20663 | + * The list is terminated by a self-pointing | |
20664 | + * task_struct | |
20665 | + */ | |
20666 | + tsk->posix_timer_list = tsk; | |
20667 | + } | |
20668 | + per_cpu(posix_timer_tasklist, cpu) = tsk; | |
20669 | + | |
20670 | + wake_up_process(per_cpu(posix_timer_task, cpu)); | |
20671 | + } | |
20672 | +} | |
20673 | + | |
20674 | +/* | |
20675 | + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. | |
20676 | + * Here we can start up the necessary migration thread for the new CPU. | |
20677 | + */ | |
20678 | +static int posix_cpu_thread_call(struct notifier_block *nfb, | |
20679 | + unsigned long action, void *hcpu) | |
20680 | +{ | |
20681 | + int cpu = (long)hcpu; | |
20682 | + struct task_struct *p; | |
20683 | + struct sched_param param; | |
20684 | + | |
20685 | + switch (action) { | |
20686 | + case CPU_UP_PREPARE: | |
20687 | + p = kthread_create(posix_cpu_timers_thread, hcpu, | |
20688 | + "posixcputmr/%d",cpu); | |
20689 | + if (IS_ERR(p)) | |
20690 | + return NOTIFY_BAD; | |
20691 | + p->flags |= PF_NOFREEZE; | |
20692 | + kthread_bind(p, cpu); | |
20693 | + /* Must be high prio to avoid getting starved */ | |
20694 | + param.sched_priority = MAX_RT_PRIO-1; | |
20695 | + sched_setscheduler(p, SCHED_FIFO, ¶m); | |
20696 | + per_cpu(posix_timer_task,cpu) = p; | |
20697 | + break; | |
20698 | + case CPU_ONLINE: | |
20699 | + /* Strictly unneccessary, as first user will wake it. */ | |
20700 | + wake_up_process(per_cpu(posix_timer_task,cpu)); | |
20701 | + break; | |
20702 | +#ifdef CONFIG_HOTPLUG_CPU | |
20703 | + case CPU_UP_CANCELED: | |
20704 | + /* Unbind it from offline cpu so it can run. Fall thru. */ | |
20705 | + kthread_bind(per_cpu(posix_timer_task, cpu), | |
20706 | + cpumask_any(cpu_online_mask)); | |
20707 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
20708 | + per_cpu(posix_timer_task,cpu) = NULL; | |
20709 | + break; | |
20710 | + case CPU_DEAD: | |
20711 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
20712 | + per_cpu(posix_timer_task,cpu) = NULL; | |
20713 | + break; | |
20714 | +#endif | |
20715 | + } | |
20716 | + return NOTIFY_OK; | |
20717 | +} | |
20718 | + | |
20719 | +/* Register at highest priority so that task migration (migrate_all_tasks) | |
20720 | + * happens before everything else. | |
20721 | + */ | |
20722 | +static struct notifier_block posix_cpu_thread_notifier = { | |
20723 | + .notifier_call = posix_cpu_thread_call, | |
20724 | + .priority = 10 | |
20725 | +}; | |
20726 | + | |
20727 | +static int __init posix_cpu_thread_init(void) | |
20728 | +{ | |
20729 | + void *hcpu = (void *)(long)smp_processor_id(); | |
20730 | + /* Start one for boot CPU. */ | |
20731 | + unsigned long cpu; | |
20732 | + | |
20733 | + /* init the per-cpu posix_timer_tasklets */ | |
20734 | + for_each_possible_cpu(cpu) | |
20735 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
20736 | + | |
20737 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu); | |
20738 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu); | |
20739 | + register_cpu_notifier(&posix_cpu_thread_notifier); | |
20740 | + return 0; | |
20741 | +} | |
20742 | +early_initcall(posix_cpu_thread_init); | |
20743 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
20744 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
20745 | +{ | |
20746 | + __run_posix_cpu_timers(tsk); | |
20747 | +} | |
20748 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
20749 | + | |
20750 | /* | |
20751 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | |
20752 | * The tsk->sighand->siglock must be held by the caller. | |
20753 | diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c | |
20754 | index f2826c35e918..464a98155a0e 100644 | |
20755 | --- a/kernel/time/posix-timers.c | |
20756 | +++ b/kernel/time/posix-timers.c | |
20757 | @@ -506,6 +506,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) | |
20758 | static struct pid *good_sigevent(sigevent_t * event) | |
20759 | { | |
20760 | struct task_struct *rtn = current->group_leader; | |
20761 | + int sig = event->sigev_signo; | |
20762 | ||
20763 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && | |
20764 | (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || | |
20765 | @@ -514,7 +515,8 @@ static struct pid *good_sigevent(sigevent_t * event) | |
20766 | return NULL; | |
20767 | ||
20768 | if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && | |
20769 | - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) | |
20770 | + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || | |
20771 | + sig_kernel_coredump(sig))) | |
20772 | return NULL; | |
20773 | ||
20774 | return task_pid(rtn); | |
20775 | @@ -826,6 +828,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) | |
20776 | return overrun; | |
20777 | } | |
20778 | ||
20779 | +/* | |
20780 | + * Protected by RCU! | |
20781 | + */ | |
20782 | +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr) | |
20783 | +{ | |
20784 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20785 | + if (kc->timer_set == common_timer_set) | |
20786 | + hrtimer_wait_for_timer(&timr->it.real.timer); | |
20787 | + else | |
20788 | + /* FIXME: Whacky hack for posix-cpu-timers */ | |
20789 | + schedule_timeout(1); | |
20790 | +#endif | |
20791 | +} | |
20792 | + | |
20793 | /* Set a POSIX.1b interval timer. */ | |
20794 | /* timr->it_lock is taken. */ | |
20795 | static int | |
20796 | @@ -903,6 +919,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
20797 | if (!timr) | |
20798 | return -EINVAL; | |
20799 | ||
20800 | + rcu_read_lock(); | |
20801 | kc = clockid_to_kclock(timr->it_clock); | |
20802 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) | |
20803 | error = -EINVAL; | |
20804 | @@ -911,9 +928,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
20805 | ||
20806 | unlock_timer(timr, flag); | |
20807 | if (error == TIMER_RETRY) { | |
20808 | + timer_wait_for_callback(kc, timr); | |
20809 | rtn = NULL; // We already got the old time... | |
20810 | + rcu_read_unlock(); | |
20811 | goto retry; | |
20812 | } | |
20813 | + rcu_read_unlock(); | |
20814 | ||
20815 | if (old_setting && !error && | |
20816 | copy_to_user(old_setting, &old_spec, sizeof (old_spec))) | |
20817 | @@ -951,10 +971,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) | |
20818 | if (!timer) | |
20819 | return -EINVAL; | |
20820 | ||
20821 | + rcu_read_lock(); | |
20822 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
20823 | unlock_timer(timer, flags); | |
20824 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
20825 | + timer); | |
20826 | + rcu_read_unlock(); | |
20827 | goto retry_delete; | |
20828 | } | |
20829 | + rcu_read_unlock(); | |
20830 | ||
20831 | spin_lock(¤t->sighand->siglock); | |
20832 | list_del(&timer->list); | |
20833 | @@ -980,8 +1005,18 @@ static void itimer_delete(struct k_itimer *timer) | |
20834 | retry_delete: | |
20835 | spin_lock_irqsave(&timer->it_lock, flags); | |
20836 | ||
20837 | - if (timer_delete_hook(timer) == TIMER_RETRY) { | |
20838 | + /* On RT we can race with a deletion */ | |
20839 | + if (!timer->it_signal) { | |
20840 | unlock_timer(timer, flags); | |
20841 | + return; | |
20842 | + } | |
20843 | + | |
20844 | + if (timer_delete_hook(timer) == TIMER_RETRY) { | |
20845 | + rcu_read_lock(); | |
20846 | + unlock_timer(timer, flags); | |
20847 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
20848 | + timer); | |
20849 | + rcu_read_unlock(); | |
20850 | goto retry_delete; | |
20851 | } | |
20852 | list_del(&timer->list); | |
20853 | diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c | |
20854 | index 690b797f522e..fe8ba1619879 100644 | |
20855 | --- a/kernel/time/tick-broadcast-hrtimer.c | |
20856 | +++ b/kernel/time/tick-broadcast-hrtimer.c | |
20857 | @@ -107,5 +107,6 @@ void tick_setup_hrtimer_broadcast(void) | |
20858 | { | |
20859 | hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
20860 | bctimer.function = bc_handler; | |
20861 | + bctimer.irqsafe = true; | |
20862 | clockevents_register_device(&ce_broadcast_hrtimer); | |
20863 | } | |
20864 | diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c | |
20865 | index 4fcd99e12aa0..5a47f2e98faf 100644 | |
20866 | --- a/kernel/time/tick-common.c | |
20867 | +++ b/kernel/time/tick-common.c | |
20868 | @@ -79,13 +79,15 @@ int tick_is_oneshot_available(void) | |
20869 | static void tick_periodic(int cpu) | |
20870 | { | |
20871 | if (tick_do_timer_cpu == cpu) { | |
20872 | - write_seqlock(&jiffies_lock); | |
20873 | + raw_spin_lock(&jiffies_lock); | |
20874 | + write_seqcount_begin(&jiffies_seq); | |
20875 | ||
20876 | /* Keep track of the next tick event */ | |
20877 | tick_next_period = ktime_add(tick_next_period, tick_period); | |
20878 | ||
20879 | do_timer(1); | |
20880 | - write_sequnlock(&jiffies_lock); | |
20881 | + write_seqcount_end(&jiffies_seq); | |
20882 | + raw_spin_unlock(&jiffies_lock); | |
20883 | update_wall_time(); | |
20884 | } | |
20885 | ||
20886 | @@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | |
20887 | ktime_t next; | |
20888 | ||
20889 | do { | |
20890 | - seq = read_seqbegin(&jiffies_lock); | |
20891 | + seq = read_seqcount_begin(&jiffies_seq); | |
20892 | next = tick_next_period; | |
20893 | - } while (read_seqretry(&jiffies_lock, seq)); | |
20894 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
20895 | ||
20896 | clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); | |
20897 | ||
20898 | diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c | |
c7c16703 | 20899 | index 3bcb61b52f6c..66d85482a96e 100644 |
1a6e0f06 JK |
20900 | --- a/kernel/time/tick-sched.c |
20901 | +++ b/kernel/time/tick-sched.c | |
20902 | @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now) | |
20903 | return; | |
20904 | ||
20905 | /* Reevaluate with jiffies_lock held */ | |
20906 | - write_seqlock(&jiffies_lock); | |
20907 | + raw_spin_lock(&jiffies_lock); | |
20908 | + write_seqcount_begin(&jiffies_seq); | |
20909 | ||
20910 | delta = ktime_sub(now, last_jiffies_update); | |
20911 | if (delta.tv64 >= tick_period.tv64) { | |
20912 | @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now) | |
20913 | /* Keep the tick_next_period variable up to date */ | |
20914 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | |
20915 | } else { | |
20916 | - write_sequnlock(&jiffies_lock); | |
20917 | + write_seqcount_end(&jiffies_seq); | |
20918 | + raw_spin_unlock(&jiffies_lock); | |
20919 | return; | |
20920 | } | |
20921 | - write_sequnlock(&jiffies_lock); | |
20922 | + write_seqcount_end(&jiffies_seq); | |
20923 | + raw_spin_unlock(&jiffies_lock); | |
20924 | update_wall_time(); | |
20925 | } | |
20926 | ||
20927 | @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void) | |
20928 | { | |
20929 | ktime_t period; | |
20930 | ||
20931 | - write_seqlock(&jiffies_lock); | |
20932 | + raw_spin_lock(&jiffies_lock); | |
20933 | + write_seqcount_begin(&jiffies_seq); | |
20934 | /* Did we start the jiffies update yet ? */ | |
20935 | if (last_jiffies_update.tv64 == 0) | |
20936 | last_jiffies_update = tick_next_period; | |
20937 | period = last_jiffies_update; | |
20938 | - write_sequnlock(&jiffies_lock); | |
20939 | + write_seqcount_end(&jiffies_seq); | |
20940 | + raw_spin_unlock(&jiffies_lock); | |
20941 | return period; | |
20942 | } | |
20943 | ||
c7c16703 | 20944 | @@ -215,6 +220,7 @@ static void nohz_full_kick_func(struct irq_work *work) |
1a6e0f06 JK |
20945 | |
20946 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |
20947 | .func = nohz_full_kick_func, | |
20948 | + .flags = IRQ_WORK_HARD_IRQ, | |
20949 | }; | |
20950 | ||
20951 | /* | |
c7c16703 | 20952 | @@ -673,10 +679,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, |
1a6e0f06 JK |
20953 | |
20954 | /* Read jiffies and the time when jiffies were updated last */ | |
20955 | do { | |
20956 | - seq = read_seqbegin(&jiffies_lock); | |
20957 | + seq = read_seqcount_begin(&jiffies_seq); | |
20958 | basemono = last_jiffies_update.tv64; | |
20959 | basejiff = jiffies; | |
20960 | - } while (read_seqretry(&jiffies_lock, seq)); | |
20961 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
20962 | ts->last_jiffies = basejiff; | |
20963 | ||
20964 | if (rcu_needs_cpu(basemono, &next_rcu) || | |
c7c16703 | 20965 | @@ -877,14 +883,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) |
1a6e0f06 JK |
20966 | return false; |
20967 | ||
20968 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | |
20969 | - static int ratelimit; | |
20970 | - | |
20971 | - if (ratelimit < 10 && | |
20972 | - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
20973 | - pr_warn("NOHZ: local_softirq_pending %02x\n", | |
20974 | - (unsigned int) local_softirq_pending()); | |
20975 | - ratelimit++; | |
20976 | - } | |
20977 | + softirq_check_pending_idle(); | |
20978 | return false; | |
20979 | } | |
20980 | ||
c7c16703 | 20981 | @@ -1193,6 +1192,7 @@ void tick_setup_sched_timer(void) |
1a6e0f06 JK |
20982 | * Emulate tick processing via per-CPU hrtimers: |
20983 | */ | |
20984 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
20985 | + ts->sched_timer.irqsafe = 1; | |
20986 | ts->sched_timer.function = tick_sched_timer; | |
20987 | ||
20988 | /* Get the next period (per-CPU) */ | |
20989 | diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c | |
c7c16703 | 20990 | index 46e312e9be38..fa75cf5d9253 100644 |
1a6e0f06 JK |
20991 | --- a/kernel/time/timekeeping.c |
20992 | +++ b/kernel/time/timekeeping.c | |
20993 | @@ -2328,8 +2328,10 @@ EXPORT_SYMBOL(hardpps); | |
20994 | */ | |
20995 | void xtime_update(unsigned long ticks) | |
20996 | { | |
20997 | - write_seqlock(&jiffies_lock); | |
20998 | + raw_spin_lock(&jiffies_lock); | |
20999 | + write_seqcount_begin(&jiffies_seq); | |
21000 | do_timer(ticks); | |
21001 | - write_sequnlock(&jiffies_lock); | |
21002 | + write_seqcount_end(&jiffies_seq); | |
21003 | + raw_spin_unlock(&jiffies_lock); | |
21004 | update_wall_time(); | |
21005 | } | |
21006 | diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h | |
21007 | index 704f595ce83f..763a3e5121ff 100644 | |
21008 | --- a/kernel/time/timekeeping.h | |
21009 | +++ b/kernel/time/timekeeping.h | |
21010 | @@ -19,7 +19,8 @@ extern void timekeeping_resume(void); | |
21011 | extern void do_timer(unsigned long ticks); | |
21012 | extern void update_wall_time(void); | |
21013 | ||
21014 | -extern seqlock_t jiffies_lock; | |
21015 | +extern raw_spinlock_t jiffies_lock; | |
21016 | +extern seqcount_t jiffies_seq; | |
21017 | ||
21018 | #define CS_NAME_LEN 32 | |
21019 | ||
21020 | diff --git a/kernel/time/timer.c b/kernel/time/timer.c | |
5c015b7c | 21021 | index c611c47de884..cdff4411f8f6 100644 |
1a6e0f06 JK |
21022 | --- a/kernel/time/timer.c |
21023 | +++ b/kernel/time/timer.c | |
21024 | @@ -193,8 +193,11 @@ EXPORT_SYMBOL(jiffies_64); | |
21025 | #endif | |
21026 | ||
21027 | struct timer_base { | |
21028 | - spinlock_t lock; | |
21029 | + raw_spinlock_t lock; | |
21030 | struct timer_list *running_timer; | |
21031 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
21032 | + struct swait_queue_head wait_for_running_timer; | |
21033 | +#endif | |
21034 | unsigned long clk; | |
21035 | unsigned long next_expiry; | |
21036 | unsigned int cpu; | |
5c015b7c JK |
21037 | @@ -203,6 +206,8 @@ struct timer_base { |
21038 | bool is_idle; | |
21039 | DECLARE_BITMAP(pending_map, WHEEL_SIZE); | |
21040 | struct hlist_head vectors[WHEEL_SIZE]; | |
21041 | + struct hlist_head expired_lists[LVL_DEPTH]; | |
21042 | + int expired_count; | |
21043 | } ____cacheline_aligned; | |
21044 | ||
21045 | static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); | |
21046 | @@ -948,10 +953,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, | |
1a6e0f06 JK |
21047 | |
21048 | if (!(tf & TIMER_MIGRATING)) { | |
21049 | base = get_timer_base(tf); | |
21050 | - spin_lock_irqsave(&base->lock, *flags); | |
21051 | + raw_spin_lock_irqsave(&base->lock, *flags); | |
21052 | if (timer->flags == tf) | |
21053 | return base; | |
21054 | - spin_unlock_irqrestore(&base->lock, *flags); | |
21055 | + raw_spin_unlock_irqrestore(&base->lock, *flags); | |
21056 | } | |
21057 | cpu_relax(); | |
21058 | } | |
5c015b7c | 21059 | @@ -1023,9 +1028,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) |
1a6e0f06 JK |
21060 | /* See the comment in lock_timer_base() */ |
21061 | timer->flags |= TIMER_MIGRATING; | |
21062 | ||
21063 | - spin_unlock(&base->lock); | |
21064 | + raw_spin_unlock(&base->lock); | |
21065 | base = new_base; | |
21066 | - spin_lock(&base->lock); | |
21067 | + raw_spin_lock(&base->lock); | |
21068 | WRITE_ONCE(timer->flags, | |
21069 | (timer->flags & ~TIMER_BASEMASK) | base->cpu); | |
21070 | } | |
5c015b7c | 21071 | @@ -1050,7 +1055,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) |
1a6e0f06 JK |
21072 | } |
21073 | ||
21074 | out_unlock: | |
21075 | - spin_unlock_irqrestore(&base->lock, flags); | |
21076 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
21077 | ||
21078 | return ret; | |
21079 | } | |
5c015b7c | 21080 | @@ -1144,19 +1149,46 @@ void add_timer_on(struct timer_list *timer, int cpu) |
1a6e0f06 JK |
21081 | if (base != new_base) { |
21082 | timer->flags |= TIMER_MIGRATING; | |
21083 | ||
21084 | - spin_unlock(&base->lock); | |
21085 | + raw_spin_unlock(&base->lock); | |
21086 | base = new_base; | |
21087 | - spin_lock(&base->lock); | |
21088 | + raw_spin_lock(&base->lock); | |
21089 | WRITE_ONCE(timer->flags, | |
21090 | (timer->flags & ~TIMER_BASEMASK) | cpu); | |
21091 | } | |
21092 | ||
21093 | debug_activate(timer, timer->expires); | |
21094 | internal_add_timer(base, timer); | |
21095 | - spin_unlock_irqrestore(&base->lock, flags); | |
21096 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
21097 | } | |
21098 | EXPORT_SYMBOL_GPL(add_timer_on); | |
21099 | ||
21100 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
21101 | +/* | |
21102 | + * Wait for a running timer | |
21103 | + */ | |
21104 | +static void wait_for_running_timer(struct timer_list *timer) | |
21105 | +{ | |
21106 | + struct timer_base *base; | |
21107 | + u32 tf = timer->flags; | |
21108 | + | |
21109 | + if (tf & TIMER_MIGRATING) | |
21110 | + return; | |
21111 | + | |
21112 | + base = get_timer_base(tf); | |
21113 | + swait_event(base->wait_for_running_timer, | |
21114 | + base->running_timer != timer); | |
21115 | +} | |
21116 | + | |
21117 | +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) | |
21118 | +#else | |
21119 | +static inline void wait_for_running_timer(struct timer_list *timer) | |
21120 | +{ | |
21121 | + cpu_relax(); | |
21122 | +} | |
21123 | + | |
21124 | +# define wakeup_timer_waiters(b) do { } while (0) | |
21125 | +#endif | |
21126 | + | |
21127 | /** | |
21128 | * del_timer - deactive a timer. | |
21129 | * @timer: the timer to be deactivated | |
5c015b7c | 21130 | @@ -1180,7 +1212,7 @@ int del_timer(struct timer_list *timer) |
1a6e0f06 JK |
21131 | if (timer_pending(timer)) { |
21132 | base = lock_timer_base(timer, &flags); | |
21133 | ret = detach_if_pending(timer, base, true); | |
21134 | - spin_unlock_irqrestore(&base->lock, flags); | |
21135 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
21136 | } | |
21137 | ||
21138 | return ret; | |
5c015b7c | 21139 | @@ -1208,13 +1240,13 @@ int try_to_del_timer_sync(struct timer_list *timer) |
1a6e0f06 JK |
21140 | timer_stats_timer_clear_start_info(timer); |
21141 | ret = detach_if_pending(timer, base, true); | |
21142 | } | |
21143 | - spin_unlock_irqrestore(&base->lock, flags); | |
21144 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
21145 | ||
21146 | return ret; | |
21147 | } | |
21148 | EXPORT_SYMBOL(try_to_del_timer_sync); | |
21149 | ||
21150 | -#ifdef CONFIG_SMP | |
21151 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
21152 | /** | |
21153 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | |
21154 | * @timer: the timer to be deactivated | |
5c015b7c | 21155 | @@ -1274,7 +1306,7 @@ int del_timer_sync(struct timer_list *timer) |
1a6e0f06 JK |
21156 | int ret = try_to_del_timer_sync(timer); |
21157 | if (ret >= 0) | |
21158 | return ret; | |
21159 | - cpu_relax(); | |
21160 | + wait_for_running_timer(timer); | |
21161 | } | |
21162 | } | |
21163 | EXPORT_SYMBOL(del_timer_sync); | |
5c015b7c JK |
21164 | @@ -1323,7 +1355,8 @@ static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long), |
21165 | } | |
21166 | } | |
21167 | ||
21168 | -static void expire_timers(struct timer_base *base, struct hlist_head *head) | |
21169 | +static inline void __expire_timers(struct timer_base *base, | |
21170 | + struct hlist_head *head) | |
21171 | { | |
21172 | while (!hlist_empty(head)) { | |
21173 | struct timer_list *timer; | |
21174 | @@ -1339,33 +1372,53 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) | |
1a6e0f06 JK |
21175 | fn = timer->function; |
21176 | data = timer->data; | |
21177 | ||
21178 | - if (timer->flags & TIMER_IRQSAFE) { | |
21179 | - spin_unlock(&base->lock); | |
21180 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && | |
21181 | + timer->flags & TIMER_IRQSAFE) { | |
21182 | + raw_spin_unlock(&base->lock); | |
21183 | call_timer_fn(timer, fn, data); | |
21184 | - spin_lock(&base->lock); | |
21185 | + base->running_timer = NULL; | |
21186 | + raw_spin_lock(&base->lock); | |
21187 | } else { | |
21188 | - spin_unlock_irq(&base->lock); | |
21189 | + raw_spin_unlock_irq(&base->lock); | |
21190 | call_timer_fn(timer, fn, data); | |
21191 | - spin_lock_irq(&base->lock); | |
21192 | + base->running_timer = NULL; | |
21193 | + raw_spin_lock_irq(&base->lock); | |
21194 | } | |
21195 | } | |
21196 | } | |
5c015b7c JK |
21197 | |
21198 | -static int __collect_expired_timers(struct timer_base *base, | |
21199 | - struct hlist_head *heads) | |
21200 | +static void expire_timers(struct timer_base *base) | |
21201 | +{ | |
21202 | + struct hlist_head *head; | |
21203 | + | |
21204 | + while (base->expired_count--) { | |
21205 | + head = base->expired_lists + base->expired_count; | |
21206 | + __expire_timers(base, head); | |
21207 | + } | |
21208 | + base->expired_count = 0; | |
21209 | +} | |
21210 | + | |
21211 | +static void __collect_expired_timers(struct timer_base *base) | |
21212 | { | |
21213 | unsigned long clk = base->clk; | |
21214 | struct hlist_head *vec; | |
21215 | - int i, levels = 0; | |
21216 | + int i; | |
21217 | unsigned int idx; | |
21218 | ||
21219 | + /* | |
21220 | + * expire_timers() must be called at least once before we can | |
21221 | + * collect more timers | |
21222 | + */ | |
21223 | + if (WARN_ON(base->expired_count)) | |
21224 | + return; | |
21225 | + | |
21226 | for (i = 0; i < LVL_DEPTH; i++) { | |
21227 | idx = (clk & LVL_MASK) + i * LVL_SIZE; | |
21228 | ||
21229 | if (__test_and_clear_bit(idx, base->pending_map)) { | |
21230 | vec = base->vectors + idx; | |
21231 | - hlist_move_list(vec, heads++); | |
21232 | - levels++; | |
21233 | + hlist_move_list(vec, | |
21234 | + &base->expired_lists[base->expired_count++]); | |
21235 | } | |
21236 | /* Is it time to look at the next level? */ | |
21237 | if (clk & LVL_CLK_MASK) | |
21238 | @@ -1373,7 +1426,6 @@ static int __collect_expired_timers(struct timer_base *base, | |
21239 | /* Shift clock for the next level granularity */ | |
21240 | clk >>= LVL_CLK_SHIFT; | |
21241 | } | |
21242 | - return levels; | |
21243 | } | |
21244 | ||
21245 | #ifdef CONFIG_NO_HZ_COMMON | |
21246 | @@ -1515,7 +1567,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |
1a6e0f06 JK |
21247 | if (cpu_is_offline(smp_processor_id())) |
21248 | return expires; | |
21249 | ||
21250 | - spin_lock(&base->lock); | |
21251 | + raw_spin_lock(&base->lock); | |
21252 | nextevt = __next_timer_interrupt(base); | |
21253 | is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); | |
21254 | base->next_expiry = nextevt; | |
5c015b7c | 21255 | @@ -1543,7 +1595,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) |
1a6e0f06 JK |
21256 | if ((expires - basem) > TICK_NSEC) |
21257 | base->is_idle = true; | |
21258 | } | |
21259 | - spin_unlock(&base->lock); | |
21260 | + raw_spin_unlock(&base->lock); | |
21261 | ||
21262 | return cmp_next_hrtimer_event(basem, expires); | |
21263 | } | |
5c015b7c JK |
21264 | @@ -1566,8 +1618,7 @@ void timer_clear_idle(void) |
21265 | base->is_idle = false; | |
21266 | } | |
21267 | ||
21268 | -static int collect_expired_timers(struct timer_base *base, | |
21269 | - struct hlist_head *heads) | |
21270 | +static void collect_expired_timers(struct timer_base *base) | |
21271 | { | |
21272 | /* | |
21273 | * NOHZ optimization. After a long idle sleep we need to forward the | |
21274 | @@ -1584,20 +1635,49 @@ static int collect_expired_timers(struct timer_base *base, | |
21275 | if (time_after(next, jiffies)) { | |
21276 | /* The call site will increment clock! */ | |
21277 | base->clk = jiffies - 1; | |
21278 | - return 0; | |
21279 | + return; | |
21280 | } | |
21281 | base->clk = next; | |
21282 | } | |
21283 | - return __collect_expired_timers(base, heads); | |
21284 | + __collect_expired_timers(base); | |
21285 | } | |
21286 | #else | |
21287 | -static inline int collect_expired_timers(struct timer_base *base, | |
21288 | - struct hlist_head *heads) | |
21289 | +static inline void collect_expired_timers(struct timer_base *base) | |
21290 | { | |
21291 | - return __collect_expired_timers(base, heads); | |
21292 | + __collect_expired_timers(base); | |
21293 | } | |
21294 | #endif | |
21295 | ||
21296 | +static int find_expired_timers(struct timer_base *base) | |
21297 | +{ | |
21298 | + const unsigned long int end_clk = jiffies; | |
21299 | + | |
21300 | + while (!base->expired_count && time_after_eq(end_clk, base->clk)) { | |
21301 | + collect_expired_timers(base); | |
21302 | + base->clk++; | |
21303 | + } | |
21304 | + | |
21305 | + return base->expired_count; | |
21306 | +} | |
21307 | + | |
21308 | +/* Called from CPU tick routine to quickly collect expired timers */ | |
21309 | +static int tick_find_expired(struct timer_base *base) | |
21310 | +{ | |
21311 | + int count; | |
21312 | + | |
21313 | + raw_spin_lock(&base->lock); | |
21314 | + | |
21315 | + if (unlikely(time_after(jiffies, base->clk + HZ))) { | |
21316 | + /* defer to ktimersoftd; don't spend too long in irq context */ | |
21317 | + count = -1; | |
21318 | + } else | |
21319 | + count = find_expired_timers(base); | |
21320 | + | |
21321 | + raw_spin_unlock(&base->lock); | |
21322 | + | |
21323 | + return count; | |
21324 | +} | |
21325 | + | |
21326 | /* | |
21327 | * Called from the timer interrupt handler to charge one tick to the current | |
21328 | * process. user_tick is 1 if the tick is user time, 0 for system. | |
21329 | @@ -1608,13 +1688,13 @@ void update_process_times(int user_tick) | |
1a6e0f06 JK |
21330 | |
21331 | /* Note: this timer irq context must be accounted for as well. */ | |
21332 | account_process_tick(p, user_tick); | |
21333 | + scheduler_tick(); | |
21334 | run_local_timers(); | |
21335 | rcu_check_callbacks(user_tick); | |
21336 | -#ifdef CONFIG_IRQ_WORK | |
21337 | +#if defined(CONFIG_IRQ_WORK) | |
21338 | if (in_irq()) | |
21339 | irq_work_tick(); | |
21340 | #endif | |
21341 | - scheduler_tick(); | |
21342 | run_posix_cpu_timers(p); | |
21343 | } | |
21344 | ||
5c015b7c JK |
21345 | @@ -1624,24 +1704,13 @@ void update_process_times(int user_tick) |
21346 | */ | |
21347 | static inline void __run_timers(struct timer_base *base) | |
21348 | { | |
21349 | - struct hlist_head heads[LVL_DEPTH]; | |
21350 | - int levels; | |
1a6e0f06 JK |
21351 | + raw_spin_lock_irq(&base->lock); |
21352 | ||
5c015b7c JK |
21353 | - if (!time_after_eq(jiffies, base->clk)) |
21354 | - return; | |
21355 | + while (find_expired_timers(base)) | |
21356 | + expire_timers(base); | |
1a6e0f06 | 21357 | |
5c015b7c JK |
21358 | - spin_lock_irq(&base->lock); |
21359 | - | |
21360 | - while (time_after_eq(jiffies, base->clk)) { | |
21361 | - | |
21362 | - levels = collect_expired_timers(base, heads); | |
21363 | - base->clk++; | |
21364 | - | |
21365 | - while (levels--) | |
21366 | - expire_timers(base, heads + levels); | |
21367 | - } | |
1a6e0f06 JK |
21368 | - base->running_timer = NULL; |
21369 | - spin_unlock_irq(&base->lock); | |
21370 | + raw_spin_unlock_irq(&base->lock); | |
21371 | + wakeup_timer_waiters(base); | |
21372 | } | |
21373 | ||
21374 | /* | |
5c015b7c | 21375 | @@ -1651,6 +1720,8 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h) |
1a6e0f06 JK |
21376 | { |
21377 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | |
21378 | ||
21379 | + irq_work_tick_soft(); | |
21380 | + | |
21381 | __run_timers(base); | |
21382 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) | |
21383 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); | |
5c015b7c JK |
21384 | @@ -1665,12 +1736,12 @@ void run_local_timers(void) |
21385 | ||
21386 | hrtimer_run_queues(); | |
21387 | /* Raise the softirq only if required. */ | |
21388 | - if (time_before(jiffies, base->clk)) { | |
21389 | + if (time_before(jiffies, base->clk) || !tick_find_expired(base)) { | |
21390 | if (!IS_ENABLED(CONFIG_NO_HZ_COMMON) || !base->nohz_active) | |
21391 | return; | |
21392 | /* CPU is awake, so check the deferrable base. */ | |
21393 | base++; | |
21394 | - if (time_before(jiffies, base->clk)) | |
21395 | + if (time_before(jiffies, base->clk) || !tick_find_expired(base)) | |
21396 | return; | |
21397 | } | |
21398 | raise_softirq(TIMER_SOFTIRQ); | |
21399 | @@ -1836,16 +1907,17 @@ int timers_dead_cpu(unsigned int cpu) | |
1a6e0f06 JK |
21400 | * The caller is globally serialized and nobody else |
21401 | * takes two locks at once, deadlock is not possible. | |
21402 | */ | |
21403 | - spin_lock_irq(&new_base->lock); | |
21404 | - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
21405 | + raw_spin_lock_irq(&new_base->lock); | |
21406 | + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
21407 | ||
21408 | BUG_ON(old_base->running_timer); | |
5c015b7c | 21409 | + BUG_ON(old_base->expired_count); |
1a6e0f06 JK |
21410 | |
21411 | for (i = 0; i < WHEEL_SIZE; i++) | |
21412 | migrate_timer_list(new_base, old_base->vectors + i); | |
21413 | ||
21414 | - spin_unlock(&old_base->lock); | |
21415 | - spin_unlock_irq(&new_base->lock); | |
21416 | + raw_spin_unlock(&old_base->lock); | |
21417 | + raw_spin_unlock_irq(&new_base->lock); | |
21418 | put_cpu_ptr(&timer_bases); | |
21419 | } | |
21420 | return 0; | |
5c015b7c | 21421 | @@ -1861,8 +1933,12 @@ static void __init init_timer_cpu(int cpu) |
1a6e0f06 JK |
21422 | for (i = 0; i < NR_BASES; i++) { |
21423 | base = per_cpu_ptr(&timer_bases[i], cpu); | |
21424 | base->cpu = cpu; | |
21425 | - spin_lock_init(&base->lock); | |
21426 | + raw_spin_lock_init(&base->lock); | |
21427 | base->clk = jiffies; | |
21428 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
21429 | + init_swait_queue_head(&base->wait_for_running_timer); | |
21430 | +#endif | |
5c015b7c | 21431 | + base->expired_count = 0; |
1a6e0f06 JK |
21432 | } |
21433 | } | |
21434 | ||
21435 | diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig | |
c7c16703 | 21436 | index 2a96b063d659..812e37237eb8 100644 |
1a6e0f06 JK |
21437 | --- a/kernel/trace/Kconfig |
21438 | +++ b/kernel/trace/Kconfig | |
c7c16703 | 21439 | @@ -182,6 +182,24 @@ config IRQSOFF_TRACER |
1a6e0f06 JK |
21440 | enabled. This option and the preempt-off timing option can be |
21441 | used together or separately.) | |
21442 | ||
21443 | +config INTERRUPT_OFF_HIST | |
21444 | + bool "Interrupts-off Latency Histogram" | |
21445 | + depends on IRQSOFF_TRACER | |
21446 | + help | |
21447 | + This option generates continuously updated histograms (one per cpu) | |
21448 | + of the duration of time periods with interrupts disabled. The | |
21449 | + histograms are disabled by default. To enable them, write a non-zero | |
21450 | + number to | |
21451 | + | |
21452 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
21453 | + | |
21454 | + If PREEMPT_OFF_HIST is also selected, additional histograms (one | |
21455 | + per cpu) are generated that accumulate the duration of time periods | |
21456 | + when both interrupts and preemption are disabled. The histogram data | |
21457 | + will be located in the debug file system at | |
21458 | + | |
21459 | + /sys/kernel/debug/tracing/latency_hist/irqsoff | |
21460 | + | |
21461 | config PREEMPT_TRACER | |
21462 | bool "Preemption-off Latency Tracer" | |
21463 | default n | |
c7c16703 | 21464 | @@ -206,6 +224,24 @@ config PREEMPT_TRACER |
1a6e0f06 JK |
21465 | enabled. This option and the irqs-off timing option can be |
21466 | used together or separately.) | |
21467 | ||
21468 | +config PREEMPT_OFF_HIST | |
21469 | + bool "Preemption-off Latency Histogram" | |
21470 | + depends on PREEMPT_TRACER | |
21471 | + help | |
21472 | + This option generates continuously updated histograms (one per cpu) | |
21473 | + of the duration of time periods with preemption disabled. The | |
21474 | + histograms are disabled by default. To enable them, write a non-zero | |
21475 | + number to | |
21476 | + | |
21477 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
21478 | + | |
21479 | + If INTERRUPT_OFF_HIST is also selected, additional histograms (one | |
21480 | + per cpu) are generated that accumulate the duration of time periods | |
21481 | + when both interrupts and preemption are disabled. The histogram data | |
21482 | + will be located in the debug file system at | |
21483 | + | |
21484 | + /sys/kernel/debug/tracing/latency_hist/preemptoff | |
21485 | + | |
21486 | config SCHED_TRACER | |
21487 | bool "Scheduling Latency Tracer" | |
21488 | select GENERIC_TRACER | |
c7c16703 JK |
21489 | @@ -251,6 +287,74 @@ config HWLAT_TRACER |
21490 | file. Every time a latency is greater than tracing_thresh, it will | |
21491 | be recorded into the ring buffer. | |
1a6e0f06 JK |
21492 | |
21493 | +config WAKEUP_LATENCY_HIST | |
21494 | + bool "Scheduling Latency Histogram" | |
21495 | + depends on SCHED_TRACER | |
21496 | + help | |
21497 | + This option generates continuously updated histograms (one per cpu) | |
21498 | + of the scheduling latency of the highest priority task. | |
21499 | + The histograms are disabled by default. To enable them, write a | |
21500 | + non-zero number to | |
21501 | + | |
21502 | + /sys/kernel/debug/tracing/latency_hist/enable/wakeup | |
21503 | + | |
21504 | + Two different algorithms are used, one to determine the latency of | |
21505 | + processes that exclusively use the highest priority of the system and | |
21506 | + another one to determine the latency of processes that share the | |
21507 | + highest system priority with other processes. The former is used to | |
21508 | + improve hardware and system software, the latter to optimize the | |
21509 | + priority design of a given system. The histogram data will be | |
21510 | + located in the debug file system at | |
21511 | + | |
21512 | + /sys/kernel/debug/tracing/latency_hist/wakeup | |
21513 | + | |
21514 | + and | |
21515 | + | |
21516 | + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio | |
21517 | + | |
21518 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
21519 | + Histogram are selected, additional histogram data will be collected | |
21520 | + that contain, in addition to the wakeup latency, the timer latency, in | |
21521 | + case the wakeup was triggered by an expired timer. These histograms | |
21522 | + are available in the | |
21523 | + | |
21524 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
21525 | + | |
21526 | + directory. They reflect the apparent interrupt and scheduling latency | |
21527 | + and are best suitable to determine the worst-case latency of a given | |
21528 | + system. To enable these histograms, write a non-zero number to | |
21529 | + | |
21530 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
21531 | + | |
21532 | +config MISSED_TIMER_OFFSETS_HIST | |
21533 | + depends on HIGH_RES_TIMERS | |
21534 | + select GENERIC_TRACER | |
21535 | + bool "Missed Timer Offsets Histogram" | |
21536 | + help | |
21537 | + Generate a histogram of missed timer offsets in microseconds. The | |
21538 | + histograms are disabled by default. To enable them, write a non-zero | |
21539 | + number to | |
21540 | + | |
21541 | + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets | |
21542 | + | |
21543 | + The histogram data will be located in the debug file system at | |
21544 | + | |
21545 | + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets | |
21546 | + | |
21547 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
21548 | + Histogram are selected, additional histogram data will be collected | |
21549 | + that contain, in addition to the wakeup latency, the timer latency, in | |
21550 | + case the wakeup was triggered by an expired timer. These histograms | |
21551 | + are available in the | |
21552 | + | |
21553 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
21554 | + | |
21555 | + directory. They reflect the apparent interrupt and scheduling latency | |
21556 | + and are best suitable to determine the worst-case latency of a given | |
21557 | + system. To enable these histograms, write a non-zero number to | |
21558 | + | |
21559 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
21560 | + | |
21561 | config ENABLE_DEFAULT_TRACERS | |
21562 | bool "Trace process context switches and events" | |
21563 | depends on !GENERIC_TRACER | |
21564 | diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile | |
c7c16703 | 21565 | index e57980845549..83af000b783c 100644 |
1a6e0f06 JK |
21566 | --- a/kernel/trace/Makefile |
21567 | +++ b/kernel/trace/Makefile | |
c7c16703 | 21568 | @@ -38,6 +38,10 @@ obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o |
1a6e0f06 JK |
21569 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o |
21570 | obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o | |
c7c16703 | 21571 | obj-$(CONFIG_HWLAT_TRACER) += trace_hwlat.o |
1a6e0f06 JK |
21572 | +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o |
21573 | +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o | |
21574 | +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o | |
21575 | +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o | |
21576 | obj-$(CONFIG_NOP_TRACER) += trace_nop.o | |
21577 | obj-$(CONFIG_STACK_TRACER) += trace_stack.o | |
21578 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | |
21579 | diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c | |
21580 | new file mode 100644 | |
21581 | index 000000000000..7f6ee70dea41 | |
21582 | --- /dev/null | |
21583 | +++ b/kernel/trace/latency_hist.c | |
21584 | @@ -0,0 +1,1178 @@ | |
21585 | +/* | |
21586 | + * kernel/trace/latency_hist.c | |
21587 | + * | |
21588 | + * Add support for histograms of preemption-off latency and | |
21589 | + * interrupt-off latency and wakeup latency, it depends on | |
21590 | + * Real-Time Preemption Support. | |
21591 | + * | |
21592 | + * Copyright (C) 2005 MontaVista Software, Inc. | |
21593 | + * Yi Yang <yyang@ch.mvista.com> | |
21594 | + * | |
21595 | + * Converted to work with the new latency tracer. | |
21596 | + * Copyright (C) 2008 Red Hat, Inc. | |
21597 | + * Steven Rostedt <srostedt@redhat.com> | |
21598 | + * | |
21599 | + */ | |
21600 | +#include <linux/module.h> | |
21601 | +#include <linux/debugfs.h> | |
21602 | +#include <linux/seq_file.h> | |
21603 | +#include <linux/percpu.h> | |
21604 | +#include <linux/kallsyms.h> | |
21605 | +#include <linux/uaccess.h> | |
21606 | +#include <linux/sched.h> | |
21607 | +#include <linux/sched/rt.h> | |
21608 | +#include <linux/slab.h> | |
21609 | +#include <linux/atomic.h> | |
21610 | +#include <asm/div64.h> | |
21611 | + | |
21612 | +#include "trace.h" | |
21613 | +#include <trace/events/sched.h> | |
21614 | + | |
21615 | +#define NSECS_PER_USECS 1000L | |
21616 | + | |
21617 | +#define CREATE_TRACE_POINTS | |
21618 | +#include <trace/events/hist.h> | |
21619 | + | |
21620 | +enum { | |
21621 | + IRQSOFF_LATENCY = 0, | |
21622 | + PREEMPTOFF_LATENCY, | |
21623 | + PREEMPTIRQSOFF_LATENCY, | |
21624 | + WAKEUP_LATENCY, | |
21625 | + WAKEUP_LATENCY_SHAREDPRIO, | |
21626 | + MISSED_TIMER_OFFSETS, | |
21627 | + TIMERANDWAKEUP_LATENCY, | |
21628 | + MAX_LATENCY_TYPE, | |
21629 | +}; | |
21630 | + | |
21631 | +#define MAX_ENTRY_NUM 10240 | |
21632 | + | |
21633 | +struct hist_data { | |
21634 | + atomic_t hist_mode; /* 0 log, 1 don't log */ | |
21635 | + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */ | |
21636 | + long min_lat; | |
21637 | + long max_lat; | |
21638 | + unsigned long long below_hist_bound_samples; | |
21639 | + unsigned long long above_hist_bound_samples; | |
21640 | + long long accumulate_lat; | |
21641 | + unsigned long long total_samples; | |
21642 | + unsigned long long hist_array[MAX_ENTRY_NUM]; | |
21643 | +}; | |
21644 | + | |
21645 | +struct enable_data { | |
21646 | + int latency_type; | |
21647 | + int enabled; | |
21648 | +}; | |
21649 | + | |
21650 | +static char *latency_hist_dir_root = "latency_hist"; | |
21651 | + | |
21652 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
21653 | +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist); | |
21654 | +static char *irqsoff_hist_dir = "irqsoff"; | |
21655 | +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start); | |
21656 | +static DEFINE_PER_CPU(int, hist_irqsoff_counting); | |
21657 | +#endif | |
21658 | + | |
21659 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
21660 | +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist); | |
21661 | +static char *preemptoff_hist_dir = "preemptoff"; | |
21662 | +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start); | |
21663 | +static DEFINE_PER_CPU(int, hist_preemptoff_counting); | |
21664 | +#endif | |
21665 | + | |
21666 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
21667 | +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist); | |
21668 | +static char *preemptirqsoff_hist_dir = "preemptirqsoff"; | |
21669 | +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start); | |
21670 | +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting); | |
21671 | +#endif | |
21672 | + | |
21673 | +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST) | |
21674 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start); | |
21675 | +static struct enable_data preemptirqsoff_enabled_data = { | |
21676 | + .latency_type = PREEMPTIRQSOFF_LATENCY, | |
21677 | + .enabled = 0, | |
21678 | +}; | |
21679 | +#endif | |
21680 | + | |
21681 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
21682 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21683 | +struct maxlatproc_data { | |
21684 | + char comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
21685 | + char current_comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
21686 | + int pid; | |
21687 | + int current_pid; | |
21688 | + int prio; | |
21689 | + int current_prio; | |
21690 | + long latency; | |
21691 | + long timeroffset; | |
21692 | + cycle_t timestamp; | |
21693 | +}; | |
21694 | +#endif | |
21695 | + | |
21696 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21697 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist); | |
21698 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio); | |
21699 | +static char *wakeup_latency_hist_dir = "wakeup"; | |
21700 | +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio"; | |
21701 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
21702 | + struct task_struct *p); | |
21703 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
21704 | + bool preempt, struct task_struct *prev, struct task_struct *next); | |
21705 | +static notrace void probe_sched_migrate_task(void *, | |
21706 | + struct task_struct *task, int cpu); | |
21707 | +static struct enable_data wakeup_latency_enabled_data = { | |
21708 | + .latency_type = WAKEUP_LATENCY, | |
21709 | + .enabled = 0, | |
21710 | +}; | |
21711 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc); | |
21712 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio); | |
21713 | +static DEFINE_PER_CPU(struct task_struct *, wakeup_task); | |
21714 | +static DEFINE_PER_CPU(int, wakeup_sharedprio); | |
21715 | +static unsigned long wakeup_pid; | |
21716 | +#endif | |
21717 | + | |
21718 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21719 | +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets); | |
21720 | +static char *missed_timer_offsets_dir = "missed_timer_offsets"; | |
21721 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
21722 | + long long offset, struct task_struct *curr, struct task_struct *task); | |
21723 | +static struct enable_data missed_timer_offsets_enabled_data = { | |
21724 | + .latency_type = MISSED_TIMER_OFFSETS, | |
21725 | + .enabled = 0, | |
21726 | +}; | |
21727 | +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc); | |
21728 | +static unsigned long missed_timer_offsets_pid; | |
21729 | +#endif | |
21730 | + | |
21731 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
21732 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21733 | +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist); | |
21734 | +static char *timerandwakeup_latency_hist_dir = "timerandwakeup"; | |
21735 | +static struct enable_data timerandwakeup_enabled_data = { | |
21736 | + .latency_type = TIMERANDWAKEUP_LATENCY, | |
21737 | + .enabled = 0, | |
21738 | +}; | |
21739 | +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc); | |
21740 | +#endif | |
21741 | + | |
21742 | +void notrace latency_hist(int latency_type, int cpu, long latency, | |
21743 | + long timeroffset, cycle_t stop, | |
21744 | + struct task_struct *p) | |
21745 | +{ | |
21746 | + struct hist_data *my_hist; | |
21747 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
21748 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21749 | + struct maxlatproc_data *mp = NULL; | |
21750 | +#endif | |
21751 | + | |
21752 | + if (!cpu_possible(cpu) || latency_type < 0 || | |
21753 | + latency_type >= MAX_LATENCY_TYPE) | |
21754 | + return; | |
21755 | + | |
21756 | + switch (latency_type) { | |
21757 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
21758 | + case IRQSOFF_LATENCY: | |
21759 | + my_hist = &per_cpu(irqsoff_hist, cpu); | |
21760 | + break; | |
21761 | +#endif | |
21762 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
21763 | + case PREEMPTOFF_LATENCY: | |
21764 | + my_hist = &per_cpu(preemptoff_hist, cpu); | |
21765 | + break; | |
21766 | +#endif | |
21767 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
21768 | + case PREEMPTIRQSOFF_LATENCY: | |
21769 | + my_hist = &per_cpu(preemptirqsoff_hist, cpu); | |
21770 | + break; | |
21771 | +#endif | |
21772 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21773 | + case WAKEUP_LATENCY: | |
21774 | + my_hist = &per_cpu(wakeup_latency_hist, cpu); | |
21775 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
21776 | + break; | |
21777 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
21778 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
21779 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
21780 | + break; | |
21781 | +#endif | |
21782 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21783 | + case MISSED_TIMER_OFFSETS: | |
21784 | + my_hist = &per_cpu(missed_timer_offsets, cpu); | |
21785 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
21786 | + break; | |
21787 | +#endif | |
21788 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
21789 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21790 | + case TIMERANDWAKEUP_LATENCY: | |
21791 | + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
21792 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
21793 | + break; | |
21794 | +#endif | |
21795 | + | |
21796 | + default: | |
21797 | + return; | |
21798 | + } | |
21799 | + | |
21800 | + latency += my_hist->offset; | |
21801 | + | |
21802 | + if (atomic_read(&my_hist->hist_mode) == 0) | |
21803 | + return; | |
21804 | + | |
21805 | + if (latency < 0 || latency >= MAX_ENTRY_NUM) { | |
21806 | + if (latency < 0) | |
21807 | + my_hist->below_hist_bound_samples++; | |
21808 | + else | |
21809 | + my_hist->above_hist_bound_samples++; | |
21810 | + } else | |
21811 | + my_hist->hist_array[latency]++; | |
21812 | + | |
21813 | + if (unlikely(latency > my_hist->max_lat || | |
21814 | + my_hist->min_lat == LONG_MAX)) { | |
21815 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
21816 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21817 | + if (latency_type == WAKEUP_LATENCY || | |
21818 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
21819 | + latency_type == MISSED_TIMER_OFFSETS || | |
21820 | + latency_type == TIMERANDWAKEUP_LATENCY) { | |
21821 | + strncpy(mp->comm, p->comm, sizeof(mp->comm)); | |
21822 | + strncpy(mp->current_comm, current->comm, | |
21823 | + sizeof(mp->current_comm)); | |
21824 | + mp->pid = task_pid_nr(p); | |
21825 | + mp->current_pid = task_pid_nr(current); | |
21826 | + mp->prio = p->prio; | |
21827 | + mp->current_prio = current->prio; | |
21828 | + mp->latency = latency; | |
21829 | + mp->timeroffset = timeroffset; | |
21830 | + mp->timestamp = stop; | |
21831 | + } | |
21832 | +#endif | |
21833 | + my_hist->max_lat = latency; | |
21834 | + } | |
21835 | + if (unlikely(latency < my_hist->min_lat)) | |
21836 | + my_hist->min_lat = latency; | |
21837 | + my_hist->total_samples++; | |
21838 | + my_hist->accumulate_lat += latency; | |
21839 | +} | |
21840 | + | |
21841 | +static void *l_start(struct seq_file *m, loff_t *pos) | |
21842 | +{ | |
21843 | + loff_t *index_ptr = NULL; | |
21844 | + loff_t index = *pos; | |
21845 | + struct hist_data *my_hist = m->private; | |
21846 | + | |
21847 | + if (index == 0) { | |
21848 | + char minstr[32], avgstr[32], maxstr[32]; | |
21849 | + | |
21850 | + atomic_dec(&my_hist->hist_mode); | |
21851 | + | |
21852 | + if (likely(my_hist->total_samples)) { | |
21853 | + long avg = (long) div64_s64(my_hist->accumulate_lat, | |
21854 | + my_hist->total_samples); | |
21855 | + snprintf(minstr, sizeof(minstr), "%ld", | |
21856 | + my_hist->min_lat - my_hist->offset); | |
21857 | + snprintf(avgstr, sizeof(avgstr), "%ld", | |
21858 | + avg - my_hist->offset); | |
21859 | + snprintf(maxstr, sizeof(maxstr), "%ld", | |
21860 | + my_hist->max_lat - my_hist->offset); | |
21861 | + } else { | |
21862 | + strcpy(minstr, "<undef>"); | |
21863 | + strcpy(avgstr, minstr); | |
21864 | + strcpy(maxstr, minstr); | |
21865 | + } | |
21866 | + | |
21867 | + seq_printf(m, "#Minimum latency: %s microseconds\n" | |
21868 | + "#Average latency: %s microseconds\n" | |
21869 | + "#Maximum latency: %s microseconds\n" | |
21870 | + "#Total samples: %llu\n" | |
21871 | + "#There are %llu samples lower than %ld" | |
21872 | + " microseconds.\n" | |
21873 | + "#There are %llu samples greater or equal" | |
21874 | + " than %ld microseconds.\n" | |
21875 | + "#usecs\t%16s\n", | |
21876 | + minstr, avgstr, maxstr, | |
21877 | + my_hist->total_samples, | |
21878 | + my_hist->below_hist_bound_samples, | |
21879 | + -my_hist->offset, | |
21880 | + my_hist->above_hist_bound_samples, | |
21881 | + MAX_ENTRY_NUM - my_hist->offset, | |
21882 | + "samples"); | |
21883 | + } | |
21884 | + if (index < MAX_ENTRY_NUM) { | |
21885 | + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); | |
21886 | + if (index_ptr) | |
21887 | + *index_ptr = index; | |
21888 | + } | |
21889 | + | |
21890 | + return index_ptr; | |
21891 | +} | |
21892 | + | |
21893 | +static void *l_next(struct seq_file *m, void *p, loff_t *pos) | |
21894 | +{ | |
21895 | + loff_t *index_ptr = p; | |
21896 | + struct hist_data *my_hist = m->private; | |
21897 | + | |
21898 | + if (++*pos >= MAX_ENTRY_NUM) { | |
21899 | + atomic_inc(&my_hist->hist_mode); | |
21900 | + return NULL; | |
21901 | + } | |
21902 | + *index_ptr = *pos; | |
21903 | + return index_ptr; | |
21904 | +} | |
21905 | + | |
21906 | +static void l_stop(struct seq_file *m, void *p) | |
21907 | +{ | |
21908 | + kfree(p); | |
21909 | +} | |
21910 | + | |
21911 | +static int l_show(struct seq_file *m, void *p) | |
21912 | +{ | |
21913 | + int index = *(loff_t *) p; | |
21914 | + struct hist_data *my_hist = m->private; | |
21915 | + | |
21916 | + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset, | |
21917 | + my_hist->hist_array[index]); | |
21918 | + return 0; | |
21919 | +} | |
21920 | + | |
21921 | +static const struct seq_operations latency_hist_seq_op = { | |
21922 | + .start = l_start, | |
21923 | + .next = l_next, | |
21924 | + .stop = l_stop, | |
21925 | + .show = l_show | |
21926 | +}; | |
21927 | + | |
21928 | +static int latency_hist_open(struct inode *inode, struct file *file) | |
21929 | +{ | |
21930 | + int ret; | |
21931 | + | |
21932 | + ret = seq_open(file, &latency_hist_seq_op); | |
21933 | + if (!ret) { | |
21934 | + struct seq_file *seq = file->private_data; | |
21935 | + seq->private = inode->i_private; | |
21936 | + } | |
21937 | + return ret; | |
21938 | +} | |
21939 | + | |
21940 | +static const struct file_operations latency_hist_fops = { | |
21941 | + .open = latency_hist_open, | |
21942 | + .read = seq_read, | |
21943 | + .llseek = seq_lseek, | |
21944 | + .release = seq_release, | |
21945 | +}; | |
21946 | + | |
21947 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
21948 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21949 | +static void clear_maxlatprocdata(struct maxlatproc_data *mp) | |
21950 | +{ | |
21951 | + mp->comm[0] = mp->current_comm[0] = '\0'; | |
21952 | + mp->prio = mp->current_prio = mp->pid = mp->current_pid = | |
21953 | + mp->latency = mp->timeroffset = -1; | |
21954 | + mp->timestamp = 0; | |
21955 | +} | |
21956 | +#endif | |
21957 | + | |
21958 | +static void hist_reset(struct hist_data *hist) | |
21959 | +{ | |
21960 | + atomic_dec(&hist->hist_mode); | |
21961 | + | |
21962 | + memset(hist->hist_array, 0, sizeof(hist->hist_array)); | |
21963 | + hist->below_hist_bound_samples = 0ULL; | |
21964 | + hist->above_hist_bound_samples = 0ULL; | |
21965 | + hist->min_lat = LONG_MAX; | |
21966 | + hist->max_lat = LONG_MIN; | |
21967 | + hist->total_samples = 0ULL; | |
21968 | + hist->accumulate_lat = 0LL; | |
21969 | + | |
21970 | + atomic_inc(&hist->hist_mode); | |
21971 | +} | |
21972 | + | |
21973 | +static ssize_t | |
21974 | +latency_hist_reset(struct file *file, const char __user *a, | |
21975 | + size_t size, loff_t *off) | |
21976 | +{ | |
21977 | + int cpu; | |
21978 | + struct hist_data *hist = NULL; | |
21979 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
21980 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21981 | + struct maxlatproc_data *mp = NULL; | |
21982 | +#endif | |
21983 | + off_t latency_type = (off_t) file->private_data; | |
21984 | + | |
21985 | + for_each_online_cpu(cpu) { | |
21986 | + | |
21987 | + switch (latency_type) { | |
21988 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
21989 | + case PREEMPTOFF_LATENCY: | |
21990 | + hist = &per_cpu(preemptoff_hist, cpu); | |
21991 | + break; | |
21992 | +#endif | |
21993 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
21994 | + case IRQSOFF_LATENCY: | |
21995 | + hist = &per_cpu(irqsoff_hist, cpu); | |
21996 | + break; | |
21997 | +#endif | |
21998 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
21999 | + case PREEMPTIRQSOFF_LATENCY: | |
22000 | + hist = &per_cpu(preemptirqsoff_hist, cpu); | |
22001 | + break; | |
22002 | +#endif | |
22003 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22004 | + case WAKEUP_LATENCY: | |
22005 | + hist = &per_cpu(wakeup_latency_hist, cpu); | |
22006 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
22007 | + break; | |
22008 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
22009 | + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
22010 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
22011 | + break; | |
22012 | +#endif | |
22013 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22014 | + case MISSED_TIMER_OFFSETS: | |
22015 | + hist = &per_cpu(missed_timer_offsets, cpu); | |
22016 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
22017 | + break; | |
22018 | +#endif | |
22019 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
22020 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22021 | + case TIMERANDWAKEUP_LATENCY: | |
22022 | + hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
22023 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
22024 | + break; | |
22025 | +#endif | |
22026 | + } | |
22027 | + | |
22028 | + hist_reset(hist); | |
22029 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
22030 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22031 | + if (latency_type == WAKEUP_LATENCY || | |
22032 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
22033 | + latency_type == MISSED_TIMER_OFFSETS || | |
22034 | + latency_type == TIMERANDWAKEUP_LATENCY) | |
22035 | + clear_maxlatprocdata(mp); | |
22036 | +#endif | |
22037 | + } | |
22038 | + | |
22039 | + return size; | |
22040 | +} | |
22041 | + | |
22042 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
22043 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22044 | +static ssize_t | |
22045 | +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
22046 | +{ | |
22047 | + char buf[64]; | |
22048 | + int r; | |
22049 | + unsigned long *this_pid = file->private_data; | |
22050 | + | |
22051 | + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid); | |
22052 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
22053 | +} | |
22054 | + | |
22055 | +static ssize_t do_pid(struct file *file, const char __user *ubuf, | |
22056 | + size_t cnt, loff_t *ppos) | |
22057 | +{ | |
22058 | + char buf[64]; | |
22059 | + unsigned long pid; | |
22060 | + unsigned long *this_pid = file->private_data; | |
22061 | + | |
22062 | + if (cnt >= sizeof(buf)) | |
22063 | + return -EINVAL; | |
22064 | + | |
22065 | + if (copy_from_user(&buf, ubuf, cnt)) | |
22066 | + return -EFAULT; | |
22067 | + | |
22068 | + buf[cnt] = '\0'; | |
22069 | + | |
22070 | + if (kstrtoul(buf, 10, &pid)) | |
22071 | + return -EINVAL; | |
22072 | + | |
22073 | + *this_pid = pid; | |
22074 | + | |
22075 | + return cnt; | |
22076 | +} | |
22077 | +#endif | |
22078 | + | |
22079 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
22080 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22081 | +static ssize_t | |
22082 | +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
22083 | +{ | |
22084 | + int r; | |
22085 | + struct maxlatproc_data *mp = file->private_data; | |
22086 | + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8); | |
22087 | + unsigned long long t; | |
22088 | + unsigned long usecs, secs; | |
22089 | + char *buf; | |
22090 | + | |
22091 | + if (mp->pid == -1 || mp->current_pid == -1) { | |
22092 | + buf = "(none)\n"; | |
22093 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, | |
22094 | + strlen(buf)); | |
22095 | + } | |
22096 | + | |
22097 | + buf = kmalloc(strmaxlen, GFP_KERNEL); | |
22098 | + if (buf == NULL) | |
22099 | + return -ENOMEM; | |
22100 | + | |
22101 | + t = ns2usecs(mp->timestamp); | |
22102 | + usecs = do_div(t, USEC_PER_SEC); | |
22103 | + secs = (unsigned long) t; | |
22104 | + r = snprintf(buf, strmaxlen, | |
22105 | + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid, | |
22106 | + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm, | |
22107 | + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm, | |
22108 | + secs, usecs); | |
22109 | + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
22110 | + kfree(buf); | |
22111 | + return r; | |
22112 | +} | |
22113 | +#endif | |
22114 | + | |
22115 | +static ssize_t | |
22116 | +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
22117 | +{ | |
22118 | + char buf[64]; | |
22119 | + struct enable_data *ed = file->private_data; | |
22120 | + int r; | |
22121 | + | |
22122 | + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled); | |
22123 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
22124 | +} | |
22125 | + | |
22126 | +static ssize_t | |
22127 | +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) | |
22128 | +{ | |
22129 | + char buf[64]; | |
22130 | + long enable; | |
22131 | + struct enable_data *ed = file->private_data; | |
22132 | + | |
22133 | + if (cnt >= sizeof(buf)) | |
22134 | + return -EINVAL; | |
22135 | + | |
22136 | + if (copy_from_user(&buf, ubuf, cnt)) | |
22137 | + return -EFAULT; | |
22138 | + | |
22139 | + buf[cnt] = 0; | |
22140 | + | |
22141 | + if (kstrtoul(buf, 10, &enable)) | |
22142 | + return -EINVAL; | |
22143 | + | |
22144 | + if ((enable && ed->enabled) || (!enable && !ed->enabled)) | |
22145 | + return cnt; | |
22146 | + | |
22147 | + if (enable) { | |
22148 | + int ret; | |
22149 | + | |
22150 | + switch (ed->latency_type) { | |
22151 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
22152 | + case PREEMPTIRQSOFF_LATENCY: | |
22153 | + ret = register_trace_preemptirqsoff_hist( | |
22154 | + probe_preemptirqsoff_hist, NULL); | |
22155 | + if (ret) { | |
22156 | + pr_info("wakeup trace: Couldn't assign " | |
22157 | + "probe_preemptirqsoff_hist " | |
22158 | + "to trace_preemptirqsoff_hist\n"); | |
22159 | + return ret; | |
22160 | + } | |
22161 | + break; | |
22162 | +#endif | |
22163 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22164 | + case WAKEUP_LATENCY: | |
22165 | + ret = register_trace_sched_wakeup( | |
22166 | + probe_wakeup_latency_hist_start, NULL); | |
22167 | + if (ret) { | |
22168 | + pr_info("wakeup trace: Couldn't assign " | |
22169 | + "probe_wakeup_latency_hist_start " | |
22170 | + "to trace_sched_wakeup\n"); | |
22171 | + return ret; | |
22172 | + } | |
22173 | + ret = register_trace_sched_wakeup_new( | |
22174 | + probe_wakeup_latency_hist_start, NULL); | |
22175 | + if (ret) { | |
22176 | + pr_info("wakeup trace: Couldn't assign " | |
22177 | + "probe_wakeup_latency_hist_start " | |
22178 | + "to trace_sched_wakeup_new\n"); | |
22179 | + unregister_trace_sched_wakeup( | |
22180 | + probe_wakeup_latency_hist_start, NULL); | |
22181 | + return ret; | |
22182 | + } | |
22183 | + ret = register_trace_sched_switch( | |
22184 | + probe_wakeup_latency_hist_stop, NULL); | |
22185 | + if (ret) { | |
22186 | + pr_info("wakeup trace: Couldn't assign " | |
22187 | + "probe_wakeup_latency_hist_stop " | |
22188 | + "to trace_sched_switch\n"); | |
22189 | + unregister_trace_sched_wakeup( | |
22190 | + probe_wakeup_latency_hist_start, NULL); | |
22191 | + unregister_trace_sched_wakeup_new( | |
22192 | + probe_wakeup_latency_hist_start, NULL); | |
22193 | + return ret; | |
22194 | + } | |
22195 | + ret = register_trace_sched_migrate_task( | |
22196 | + probe_sched_migrate_task, NULL); | |
22197 | + if (ret) { | |
22198 | + pr_info("wakeup trace: Couldn't assign " | |
22199 | + "probe_sched_migrate_task " | |
22200 | + "to trace_sched_migrate_task\n"); | |
22201 | + unregister_trace_sched_wakeup( | |
22202 | + probe_wakeup_latency_hist_start, NULL); | |
22203 | + unregister_trace_sched_wakeup_new( | |
22204 | + probe_wakeup_latency_hist_start, NULL); | |
22205 | + unregister_trace_sched_switch( | |
22206 | + probe_wakeup_latency_hist_stop, NULL); | |
22207 | + return ret; | |
22208 | + } | |
22209 | + break; | |
22210 | +#endif | |
22211 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22212 | + case MISSED_TIMER_OFFSETS: | |
22213 | + ret = register_trace_hrtimer_interrupt( | |
22214 | + probe_hrtimer_interrupt, NULL); | |
22215 | + if (ret) { | |
22216 | + pr_info("wakeup trace: Couldn't assign " | |
22217 | + "probe_hrtimer_interrupt " | |
22218 | + "to trace_hrtimer_interrupt\n"); | |
22219 | + return ret; | |
22220 | + } | |
22221 | + break; | |
22222 | +#endif | |
22223 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
22224 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22225 | + case TIMERANDWAKEUP_LATENCY: | |
22226 | + if (!wakeup_latency_enabled_data.enabled || | |
22227 | + !missed_timer_offsets_enabled_data.enabled) | |
22228 | + return -EINVAL; | |
22229 | + break; | |
22230 | +#endif | |
22231 | + default: | |
22232 | + break; | |
22233 | + } | |
22234 | + } else { | |
22235 | + switch (ed->latency_type) { | |
22236 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
22237 | + case PREEMPTIRQSOFF_LATENCY: | |
22238 | + { | |
22239 | + int cpu; | |
22240 | + | |
22241 | + unregister_trace_preemptirqsoff_hist( | |
22242 | + probe_preemptirqsoff_hist, NULL); | |
22243 | + for_each_online_cpu(cpu) { | |
22244 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
22245 | + per_cpu(hist_irqsoff_counting, | |
22246 | + cpu) = 0; | |
22247 | +#endif | |
22248 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
22249 | + per_cpu(hist_preemptoff_counting, | |
22250 | + cpu) = 0; | |
22251 | +#endif | |
22252 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
22253 | + per_cpu(hist_preemptirqsoff_counting, | |
22254 | + cpu) = 0; | |
22255 | +#endif | |
22256 | + } | |
22257 | + } | |
22258 | + break; | |
22259 | +#endif | |
22260 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22261 | + case WAKEUP_LATENCY: | |
22262 | + { | |
22263 | + int cpu; | |
22264 | + | |
22265 | + unregister_trace_sched_wakeup( | |
22266 | + probe_wakeup_latency_hist_start, NULL); | |
22267 | + unregister_trace_sched_wakeup_new( | |
22268 | + probe_wakeup_latency_hist_start, NULL); | |
22269 | + unregister_trace_sched_switch( | |
22270 | + probe_wakeup_latency_hist_stop, NULL); | |
22271 | + unregister_trace_sched_migrate_task( | |
22272 | + probe_sched_migrate_task, NULL); | |
22273 | + | |
22274 | + for_each_online_cpu(cpu) { | |
22275 | + per_cpu(wakeup_task, cpu) = NULL; | |
22276 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
22277 | + } | |
22278 | + } | |
22279 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22280 | + timerandwakeup_enabled_data.enabled = 0; | |
22281 | +#endif | |
22282 | + break; | |
22283 | +#endif | |
22284 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22285 | + case MISSED_TIMER_OFFSETS: | |
22286 | + unregister_trace_hrtimer_interrupt( | |
22287 | + probe_hrtimer_interrupt, NULL); | |
22288 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22289 | + timerandwakeup_enabled_data.enabled = 0; | |
22290 | +#endif | |
22291 | + break; | |
22292 | +#endif | |
22293 | + default: | |
22294 | + break; | |
22295 | + } | |
22296 | + } | |
22297 | + ed->enabled = enable; | |
22298 | + return cnt; | |
22299 | +} | |
22300 | + | |
22301 | +static const struct file_operations latency_hist_reset_fops = { | |
22302 | + .open = tracing_open_generic, | |
22303 | + .write = latency_hist_reset, | |
22304 | +}; | |
22305 | + | |
22306 | +static const struct file_operations enable_fops = { | |
22307 | + .open = tracing_open_generic, | |
22308 | + .read = show_enable, | |
22309 | + .write = do_enable, | |
22310 | +}; | |
22311 | + | |
22312 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
22313 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22314 | +static const struct file_operations pid_fops = { | |
22315 | + .open = tracing_open_generic, | |
22316 | + .read = show_pid, | |
22317 | + .write = do_pid, | |
22318 | +}; | |
22319 | + | |
22320 | +static const struct file_operations maxlatproc_fops = { | |
22321 | + .open = tracing_open_generic, | |
22322 | + .read = show_maxlatproc, | |
22323 | +}; | |
22324 | +#endif | |
22325 | + | |
22326 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
22327 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, | |
22328 | + int starthist) | |
22329 | +{ | |
22330 | + int cpu = raw_smp_processor_id(); | |
22331 | + int time_set = 0; | |
22332 | + | |
22333 | + if (starthist) { | |
22334 | + cycle_t uninitialized_var(start); | |
22335 | + | |
22336 | + if (!preempt_count() && !irqs_disabled()) | |
22337 | + return; | |
22338 | + | |
22339 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
22340 | + if ((reason == IRQS_OFF || reason == TRACE_START) && | |
22341 | + !per_cpu(hist_irqsoff_counting, cpu)) { | |
22342 | + per_cpu(hist_irqsoff_counting, cpu) = 1; | |
22343 | + start = ftrace_now(cpu); | |
22344 | + time_set++; | |
22345 | + per_cpu(hist_irqsoff_start, cpu) = start; | |
22346 | + } | |
22347 | +#endif | |
22348 | + | |
22349 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
22350 | + if ((reason == PREEMPT_OFF || reason == TRACE_START) && | |
22351 | + !per_cpu(hist_preemptoff_counting, cpu)) { | |
22352 | + per_cpu(hist_preemptoff_counting, cpu) = 1; | |
22353 | + if (!(time_set++)) | |
22354 | + start = ftrace_now(cpu); | |
22355 | + per_cpu(hist_preemptoff_start, cpu) = start; | |
22356 | + } | |
22357 | +#endif | |
22358 | + | |
22359 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
22360 | + if (per_cpu(hist_irqsoff_counting, cpu) && | |
22361 | + per_cpu(hist_preemptoff_counting, cpu) && | |
22362 | + !per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
22363 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 1; | |
22364 | + if (!time_set) | |
22365 | + start = ftrace_now(cpu); | |
22366 | + per_cpu(hist_preemptirqsoff_start, cpu) = start; | |
22367 | + } | |
22368 | +#endif | |
22369 | + } else { | |
22370 | + cycle_t uninitialized_var(stop); | |
22371 | + | |
22372 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
22373 | + if ((reason == IRQS_ON || reason == TRACE_STOP) && | |
22374 | + per_cpu(hist_irqsoff_counting, cpu)) { | |
22375 | + cycle_t start = per_cpu(hist_irqsoff_start, cpu); | |
22376 | + | |
22377 | + stop = ftrace_now(cpu); | |
22378 | + time_set++; | |
22379 | + if (start) { | |
22380 | + long latency = ((long) (stop - start)) / | |
22381 | + NSECS_PER_USECS; | |
22382 | + | |
22383 | + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0, | |
22384 | + stop, NULL); | |
22385 | + } | |
22386 | + per_cpu(hist_irqsoff_counting, cpu) = 0; | |
22387 | + } | |
22388 | +#endif | |
22389 | + | |
22390 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
22391 | + if ((reason == PREEMPT_ON || reason == TRACE_STOP) && | |
22392 | + per_cpu(hist_preemptoff_counting, cpu)) { | |
22393 | + cycle_t start = per_cpu(hist_preemptoff_start, cpu); | |
22394 | + | |
22395 | + if (!(time_set++)) | |
22396 | + stop = ftrace_now(cpu); | |
22397 | + if (start) { | |
22398 | + long latency = ((long) (stop - start)) / | |
22399 | + NSECS_PER_USECS; | |
22400 | + | |
22401 | + latency_hist(PREEMPTOFF_LATENCY, cpu, latency, | |
22402 | + 0, stop, NULL); | |
22403 | + } | |
22404 | + per_cpu(hist_preemptoff_counting, cpu) = 0; | |
22405 | + } | |
22406 | +#endif | |
22407 | + | |
22408 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
22409 | + if ((!per_cpu(hist_irqsoff_counting, cpu) || | |
22410 | + !per_cpu(hist_preemptoff_counting, cpu)) && | |
22411 | + per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
22412 | + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu); | |
22413 | + | |
22414 | + if (!time_set) | |
22415 | + stop = ftrace_now(cpu); | |
22416 | + if (start) { | |
22417 | + long latency = ((long) (stop - start)) / | |
22418 | + NSECS_PER_USECS; | |
22419 | + | |
22420 | + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu, | |
22421 | + latency, 0, stop, NULL); | |
22422 | + } | |
22423 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 0; | |
22424 | + } | |
22425 | +#endif | |
22426 | + } | |
22427 | +} | |
22428 | +#endif | |
22429 | + | |
22430 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22431 | +static DEFINE_RAW_SPINLOCK(wakeup_lock); | |
22432 | +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task, | |
22433 | + int cpu) | |
22434 | +{ | |
22435 | + int old_cpu = task_cpu(task); | |
22436 | + | |
22437 | + if (cpu != old_cpu) { | |
22438 | + unsigned long flags; | |
22439 | + struct task_struct *cpu_wakeup_task; | |
22440 | + | |
22441 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
22442 | + | |
22443 | + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu); | |
22444 | + if (task == cpu_wakeup_task) { | |
22445 | + put_task_struct(cpu_wakeup_task); | |
22446 | + per_cpu(wakeup_task, old_cpu) = NULL; | |
22447 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task; | |
22448 | + get_task_struct(cpu_wakeup_task); | |
22449 | + } | |
22450 | + | |
22451 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
22452 | + } | |
22453 | +} | |
22454 | + | |
22455 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
22456 | + struct task_struct *p) | |
22457 | +{ | |
22458 | + unsigned long flags; | |
22459 | + struct task_struct *curr = current; | |
22460 | + int cpu = task_cpu(p); | |
22461 | + struct task_struct *cpu_wakeup_task; | |
22462 | + | |
22463 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
22464 | + | |
22465 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
22466 | + | |
22467 | + if (wakeup_pid) { | |
22468 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
22469 | + p->prio == curr->prio) | |
22470 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
22471 | + if (likely(wakeup_pid != task_pid_nr(p))) | |
22472 | + goto out; | |
22473 | + } else { | |
22474 | + if (likely(!rt_task(p)) || | |
22475 | + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) || | |
22476 | + p->prio > curr->prio) | |
22477 | + goto out; | |
22478 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
22479 | + p->prio == curr->prio) | |
22480 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
22481 | + } | |
22482 | + | |
22483 | + if (cpu_wakeup_task) | |
22484 | + put_task_struct(cpu_wakeup_task); | |
22485 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p; | |
22486 | + get_task_struct(cpu_wakeup_task); | |
22487 | + cpu_wakeup_task->preempt_timestamp_hist = | |
22488 | + ftrace_now(raw_smp_processor_id()); | |
22489 | +out: | |
22490 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
22491 | +} | |
22492 | + | |
22493 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
22494 | + bool preempt, struct task_struct *prev, struct task_struct *next) | |
22495 | +{ | |
22496 | + unsigned long flags; | |
22497 | + int cpu = task_cpu(next); | |
22498 | + long latency; | |
22499 | + cycle_t stop; | |
22500 | + struct task_struct *cpu_wakeup_task; | |
22501 | + | |
22502 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
22503 | + | |
22504 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
22505 | + | |
22506 | + if (cpu_wakeup_task == NULL) | |
22507 | + goto out; | |
22508 | + | |
22509 | + /* Already running? */ | |
22510 | + if (unlikely(current == cpu_wakeup_task)) | |
22511 | + goto out_reset; | |
22512 | + | |
22513 | + if (next != cpu_wakeup_task) { | |
22514 | + if (next->prio < cpu_wakeup_task->prio) | |
22515 | + goto out_reset; | |
22516 | + | |
22517 | + if (next->prio == cpu_wakeup_task->prio) | |
22518 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
22519 | + | |
22520 | + goto out; | |
22521 | + } | |
22522 | + | |
22523 | + if (current->prio == cpu_wakeup_task->prio) | |
22524 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
22525 | + | |
22526 | + /* | |
22527 | + * The task we are waiting for is about to be switched to. | |
22528 | + * Calculate latency and store it in histogram. | |
22529 | + */ | |
22530 | + stop = ftrace_now(raw_smp_processor_id()); | |
22531 | + | |
22532 | + latency = ((long) (stop - next->preempt_timestamp_hist)) / | |
22533 | + NSECS_PER_USECS; | |
22534 | + | |
22535 | + if (per_cpu(wakeup_sharedprio, cpu)) { | |
22536 | + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop, | |
22537 | + next); | |
22538 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
22539 | + } else { | |
22540 | + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next); | |
22541 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22542 | + if (timerandwakeup_enabled_data.enabled) { | |
22543 | + latency_hist(TIMERANDWAKEUP_LATENCY, cpu, | |
22544 | + next->timer_offset + latency, next->timer_offset, | |
22545 | + stop, next); | |
22546 | + } | |
22547 | +#endif | |
22548 | + } | |
22549 | + | |
22550 | +out_reset: | |
22551 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22552 | + next->timer_offset = 0; | |
22553 | +#endif | |
22554 | + put_task_struct(cpu_wakeup_task); | |
22555 | + per_cpu(wakeup_task, cpu) = NULL; | |
22556 | +out: | |
22557 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
22558 | +} | |
22559 | +#endif | |
22560 | + | |
22561 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22562 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
22563 | + long long latency_ns, struct task_struct *curr, | |
22564 | + struct task_struct *task) | |
22565 | +{ | |
22566 | + if (latency_ns <= 0 && task != NULL && rt_task(task) && | |
22567 | + (task->prio < curr->prio || | |
22568 | + (task->prio == curr->prio && | |
22569 | + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) { | |
22570 | + long latency; | |
22571 | + cycle_t now; | |
22572 | + | |
22573 | + if (missed_timer_offsets_pid) { | |
22574 | + if (likely(missed_timer_offsets_pid != | |
22575 | + task_pid_nr(task))) | |
22576 | + return; | |
22577 | + } | |
22578 | + | |
22579 | + now = ftrace_now(cpu); | |
22580 | + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS); | |
22581 | + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now, | |
22582 | + task); | |
22583 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22584 | + task->timer_offset = latency; | |
22585 | +#endif | |
22586 | + } | |
22587 | +} | |
22588 | +#endif | |
22589 | + | |
22590 | +static __init int latency_hist_init(void) | |
22591 | +{ | |
22592 | + struct dentry *latency_hist_root = NULL; | |
22593 | + struct dentry *dentry; | |
22594 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22595 | + struct dentry *dentry_sharedprio; | |
22596 | +#endif | |
22597 | + struct dentry *entry; | |
22598 | + struct dentry *enable_root; | |
22599 | + int i = 0; | |
22600 | + struct hist_data *my_hist; | |
22601 | + char name[64]; | |
22602 | + char *cpufmt = "CPU%d"; | |
22603 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
22604 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22605 | + char *cpufmt_maxlatproc = "max_latency-CPU%d"; | |
22606 | + struct maxlatproc_data *mp = NULL; | |
22607 | +#endif | |
22608 | + | |
22609 | + dentry = tracing_init_dentry(); | |
22610 | + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry); | |
22611 | + enable_root = debugfs_create_dir("enable", latency_hist_root); | |
22612 | + | |
22613 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
22614 | + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root); | |
22615 | + for_each_possible_cpu(i) { | |
22616 | + sprintf(name, cpufmt, i); | |
22617 | + entry = debugfs_create_file(name, 0444, dentry, | |
22618 | + &per_cpu(irqsoff_hist, i), &latency_hist_fops); | |
22619 | + my_hist = &per_cpu(irqsoff_hist, i); | |
22620 | + atomic_set(&my_hist->hist_mode, 1); | |
22621 | + my_hist->min_lat = LONG_MAX; | |
22622 | + } | |
22623 | + entry = debugfs_create_file("reset", 0644, dentry, | |
22624 | + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops); | |
22625 | +#endif | |
22626 | + | |
22627 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
22628 | + dentry = debugfs_create_dir(preemptoff_hist_dir, | |
22629 | + latency_hist_root); | |
22630 | + for_each_possible_cpu(i) { | |
22631 | + sprintf(name, cpufmt, i); | |
22632 | + entry = debugfs_create_file(name, 0444, dentry, | |
22633 | + &per_cpu(preemptoff_hist, i), &latency_hist_fops); | |
22634 | + my_hist = &per_cpu(preemptoff_hist, i); | |
22635 | + atomic_set(&my_hist->hist_mode, 1); | |
22636 | + my_hist->min_lat = LONG_MAX; | |
22637 | + } | |
22638 | + entry = debugfs_create_file("reset", 0644, dentry, | |
22639 | + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops); | |
22640 | +#endif | |
22641 | + | |
22642 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
22643 | + dentry = debugfs_create_dir(preemptirqsoff_hist_dir, | |
22644 | + latency_hist_root); | |
22645 | + for_each_possible_cpu(i) { | |
22646 | + sprintf(name, cpufmt, i); | |
22647 | + entry = debugfs_create_file(name, 0444, dentry, | |
22648 | + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops); | |
22649 | + my_hist = &per_cpu(preemptirqsoff_hist, i); | |
22650 | + atomic_set(&my_hist->hist_mode, 1); | |
22651 | + my_hist->min_lat = LONG_MAX; | |
22652 | + } | |
22653 | + entry = debugfs_create_file("reset", 0644, dentry, | |
22654 | + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops); | |
22655 | +#endif | |
22656 | + | |
22657 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
22658 | + entry = debugfs_create_file("preemptirqsoff", 0644, | |
22659 | + enable_root, (void *)&preemptirqsoff_enabled_data, | |
22660 | + &enable_fops); | |
22661 | +#endif | |
22662 | + | |
22663 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
22664 | + dentry = debugfs_create_dir(wakeup_latency_hist_dir, | |
22665 | + latency_hist_root); | |
22666 | + dentry_sharedprio = debugfs_create_dir( | |
22667 | + wakeup_latency_hist_dir_sharedprio, dentry); | |
22668 | + for_each_possible_cpu(i) { | |
22669 | + sprintf(name, cpufmt, i); | |
22670 | + | |
22671 | + entry = debugfs_create_file(name, 0444, dentry, | |
22672 | + &per_cpu(wakeup_latency_hist, i), | |
22673 | + &latency_hist_fops); | |
22674 | + my_hist = &per_cpu(wakeup_latency_hist, i); | |
22675 | + atomic_set(&my_hist->hist_mode, 1); | |
22676 | + my_hist->min_lat = LONG_MAX; | |
22677 | + | |
22678 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, | |
22679 | + &per_cpu(wakeup_latency_hist_sharedprio, i), | |
22680 | + &latency_hist_fops); | |
22681 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i); | |
22682 | + atomic_set(&my_hist->hist_mode, 1); | |
22683 | + my_hist->min_lat = LONG_MAX; | |
22684 | + | |
22685 | + sprintf(name, cpufmt_maxlatproc, i); | |
22686 | + | |
22687 | + mp = &per_cpu(wakeup_maxlatproc, i); | |
22688 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
22689 | + &maxlatproc_fops); | |
22690 | + clear_maxlatprocdata(mp); | |
22691 | + | |
22692 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i); | |
22693 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp, | |
22694 | + &maxlatproc_fops); | |
22695 | + clear_maxlatprocdata(mp); | |
22696 | + } | |
22697 | + entry = debugfs_create_file("pid", 0644, dentry, | |
22698 | + (void *)&wakeup_pid, &pid_fops); | |
22699 | + entry = debugfs_create_file("reset", 0644, dentry, | |
22700 | + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops); | |
22701 | + entry = debugfs_create_file("reset", 0644, dentry_sharedprio, | |
22702 | + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops); | |
22703 | + entry = debugfs_create_file("wakeup", 0644, | |
22704 | + enable_root, (void *)&wakeup_latency_enabled_data, | |
22705 | + &enable_fops); | |
22706 | +#endif | |
22707 | + | |
22708 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
22709 | + dentry = debugfs_create_dir(missed_timer_offsets_dir, | |
22710 | + latency_hist_root); | |
22711 | + for_each_possible_cpu(i) { | |
22712 | + sprintf(name, cpufmt, i); | |
22713 | + entry = debugfs_create_file(name, 0444, dentry, | |
22714 | + &per_cpu(missed_timer_offsets, i), &latency_hist_fops); | |
22715 | + my_hist = &per_cpu(missed_timer_offsets, i); | |
22716 | + atomic_set(&my_hist->hist_mode, 1); | |
22717 | + my_hist->min_lat = LONG_MAX; | |
22718 | + | |
22719 | + sprintf(name, cpufmt_maxlatproc, i); | |
22720 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, i); | |
22721 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
22722 | + &maxlatproc_fops); | |
22723 | + clear_maxlatprocdata(mp); | |
22724 | + } | |
22725 | + entry = debugfs_create_file("pid", 0644, dentry, | |
22726 | + (void *)&missed_timer_offsets_pid, &pid_fops); | |
22727 | + entry = debugfs_create_file("reset", 0644, dentry, | |
22728 | + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops); | |
22729 | + entry = debugfs_create_file("missed_timer_offsets", 0644, | |
22730 | + enable_root, (void *)&missed_timer_offsets_enabled_data, | |
22731 | + &enable_fops); | |
22732 | +#endif | |
22733 | + | |
22734 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
22735 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
22736 | + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir, | |
22737 | + latency_hist_root); | |
22738 | + for_each_possible_cpu(i) { | |
22739 | + sprintf(name, cpufmt, i); | |
22740 | + entry = debugfs_create_file(name, 0444, dentry, | |
22741 | + &per_cpu(timerandwakeup_latency_hist, i), | |
22742 | + &latency_hist_fops); | |
22743 | + my_hist = &per_cpu(timerandwakeup_latency_hist, i); | |
22744 | + atomic_set(&my_hist->hist_mode, 1); | |
22745 | + my_hist->min_lat = LONG_MAX; | |
22746 | + | |
22747 | + sprintf(name, cpufmt_maxlatproc, i); | |
22748 | + mp = &per_cpu(timerandwakeup_maxlatproc, i); | |
22749 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
22750 | + &maxlatproc_fops); | |
22751 | + clear_maxlatprocdata(mp); | |
22752 | + } | |
22753 | + entry = debugfs_create_file("reset", 0644, dentry, | |
22754 | + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops); | |
22755 | + entry = debugfs_create_file("timerandwakeup", 0644, | |
22756 | + enable_root, (void *)&timerandwakeup_enabled_data, | |
22757 | + &enable_fops); | |
22758 | +#endif | |
22759 | + return 0; | |
22760 | +} | |
22761 | + | |
22762 | +device_initcall(latency_hist_init); | |
22763 | diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c | |
7c18450a | 22764 | index 83c60f9013cb..6fb207964a84 100644 |
1a6e0f06 JK |
22765 | --- a/kernel/trace/trace.c |
22766 | +++ b/kernel/trace/trace.c | |
22767 | @@ -1897,6 +1897,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
22768 | struct task_struct *tsk = current; | |
22769 | ||
22770 | entry->preempt_count = pc & 0xff; | |
22771 | + entry->preempt_lazy_count = preempt_lazy_count(); | |
22772 | entry->pid = (tsk) ? tsk->pid : 0; | |
22773 | entry->flags = | |
22774 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | |
22775 | @@ -1907,8 +1908,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
22776 | ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | | |
22777 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | | |
22778 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | |
22779 | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
22780 | + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
22781 | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | | |
22782 | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); | |
22783 | + | |
22784 | + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; | |
22785 | } | |
22786 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | |
22787 | ||
22788 | @@ -2892,14 +2896,17 @@ get_total_entries(struct trace_buffer *buf, | |
22789 | ||
22790 | static void print_lat_help_header(struct seq_file *m) | |
22791 | { | |
22792 | - seq_puts(m, "# _------=> CPU# \n" | |
22793 | - "# / _-----=> irqs-off \n" | |
22794 | - "# | / _----=> need-resched \n" | |
22795 | - "# || / _---=> hardirq/softirq \n" | |
22796 | - "# ||| / _--=> preempt-depth \n" | |
22797 | - "# |||| / delay \n" | |
22798 | - "# cmd pid ||||| time | caller \n" | |
22799 | - "# \\ / ||||| \\ | / \n"); | |
22800 | + seq_puts(m, "# _--------=> CPU# \n" | |
22801 | + "# / _-------=> irqs-off \n" | |
22802 | + "# | / _------=> need-resched \n" | |
22803 | + "# || / _-----=> need-resched_lazy \n" | |
22804 | + "# ||| / _----=> hardirq/softirq \n" | |
22805 | + "# |||| / _---=> preempt-depth \n" | |
22806 | + "# ||||| / _--=> preempt-lazy-depth\n" | |
22807 | + "# |||||| / _-=> migrate-disable \n" | |
22808 | + "# ||||||| / delay \n" | |
22809 | + "# cmd pid |||||||| time | caller \n" | |
22810 | + "# \\ / |||||||| \\ | / \n"); | |
22811 | } | |
22812 | ||
22813 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) | |
22814 | @@ -2925,11 +2932,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file | |
22815 | print_event_info(buf, m); | |
22816 | seq_puts(m, "# _-----=> irqs-off\n" | |
22817 | "# / _----=> need-resched\n" | |
22818 | - "# | / _---=> hardirq/softirq\n" | |
22819 | - "# || / _--=> preempt-depth\n" | |
22820 | - "# ||| / delay\n" | |
22821 | - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" | |
22822 | - "# | | | |||| | |\n"); | |
22823 | + "# |/ _-----=> need-resched_lazy\n" | |
22824 | + "# || / _---=> hardirq/softirq\n" | |
22825 | + "# ||| / _--=> preempt-depth\n" | |
22826 | + "# |||| / _-=> preempt-lazy-depth\n" | |
22827 | + "# ||||| / _-=> migrate-disable \n" | |
22828 | + "# |||||| / delay\n" | |
22829 | + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n" | |
22830 | + "# | | | ||||||| | |\n"); | |
22831 | } | |
22832 | ||
22833 | void | |
22834 | diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h | |
7c18450a | 22835 | index b0d8576c27ae..702b9376b278 100644 |
1a6e0f06 JK |
22836 | --- a/kernel/trace/trace.h |
22837 | +++ b/kernel/trace/trace.h | |
c7c16703 | 22838 | @@ -124,6 +124,7 @@ struct kretprobe_trace_entry_head { |
1a6e0f06 JK |
22839 | * NEED_RESCHED - reschedule is requested |
22840 | * HARDIRQ - inside an interrupt handler | |
22841 | * SOFTIRQ - inside a softirq handler | |
22842 | + * NEED_RESCHED_LAZY - lazy reschedule is requested | |
22843 | */ | |
22844 | enum trace_flag_type { | |
22845 | TRACE_FLAG_IRQS_OFF = 0x01, | |
c7c16703 | 22846 | @@ -133,6 +134,7 @@ enum trace_flag_type { |
1a6e0f06 JK |
22847 | TRACE_FLAG_SOFTIRQ = 0x10, |
22848 | TRACE_FLAG_PREEMPT_RESCHED = 0x20, | |
22849 | TRACE_FLAG_NMI = 0x40, | |
22850 | + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, | |
22851 | }; | |
22852 | ||
22853 | #define TRACE_BUF_SIZE 1024 | |
22854 | diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c | |
22855 | index 03c0a48c3ac4..0b85d516b491 100644 | |
22856 | --- a/kernel/trace/trace_events.c | |
22857 | +++ b/kernel/trace/trace_events.c | |
22858 | @@ -187,6 +187,8 @@ static int trace_define_common_fields(void) | |
22859 | __common_field(unsigned char, flags); | |
22860 | __common_field(unsigned char, preempt_count); | |
22861 | __common_field(int, pid); | |
22862 | + __common_field(unsigned short, migrate_disable); | |
22863 | + __common_field(unsigned short, padding); | |
22864 | ||
22865 | return ret; | |
22866 | } | |
22867 | diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c | |
22868 | index 03cdff84d026..940bd10b4406 100644 | |
22869 | --- a/kernel/trace/trace_irqsoff.c | |
22870 | +++ b/kernel/trace/trace_irqsoff.c | |
22871 | @@ -13,6 +13,7 @@ | |
22872 | #include <linux/uaccess.h> | |
22873 | #include <linux/module.h> | |
22874 | #include <linux/ftrace.h> | |
22875 | +#include <trace/events/hist.h> | |
22876 | ||
22877 | #include "trace.h" | |
22878 | ||
22879 | @@ -424,11 +425,13 @@ void start_critical_timings(void) | |
22880 | { | |
22881 | if (preempt_trace() || irq_trace()) | |
22882 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
22883 | + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1); | |
22884 | } | |
22885 | EXPORT_SYMBOL_GPL(start_critical_timings); | |
22886 | ||
22887 | void stop_critical_timings(void) | |
22888 | { | |
22889 | + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0); | |
22890 | if (preempt_trace() || irq_trace()) | |
22891 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
22892 | } | |
22893 | @@ -438,6 +441,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings); | |
22894 | #ifdef CONFIG_PROVE_LOCKING | |
22895 | void time_hardirqs_on(unsigned long a0, unsigned long a1) | |
22896 | { | |
22897 | + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0); | |
22898 | if (!preempt_trace() && irq_trace()) | |
22899 | stop_critical_timing(a0, a1); | |
22900 | } | |
22901 | @@ -446,6 +450,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) | |
22902 | { | |
22903 | if (!preempt_trace() && irq_trace()) | |
22904 | start_critical_timing(a0, a1); | |
22905 | + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1); | |
22906 | } | |
22907 | ||
22908 | #else /* !CONFIG_PROVE_LOCKING */ | |
22909 | @@ -471,6 +476,7 @@ inline void print_irqtrace_events(struct task_struct *curr) | |
22910 | */ | |
22911 | void trace_hardirqs_on(void) | |
22912 | { | |
22913 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
22914 | if (!preempt_trace() && irq_trace()) | |
22915 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
22916 | } | |
22917 | @@ -480,11 +486,13 @@ void trace_hardirqs_off(void) | |
22918 | { | |
22919 | if (!preempt_trace() && irq_trace()) | |
22920 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
22921 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
22922 | } | |
22923 | EXPORT_SYMBOL(trace_hardirqs_off); | |
22924 | ||
22925 | __visible void trace_hardirqs_on_caller(unsigned long caller_addr) | |
22926 | { | |
22927 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
22928 | if (!preempt_trace() && irq_trace()) | |
22929 | stop_critical_timing(CALLER_ADDR0, caller_addr); | |
22930 | } | |
22931 | @@ -494,6 +502,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) | |
22932 | { | |
22933 | if (!preempt_trace() && irq_trace()) | |
22934 | start_critical_timing(CALLER_ADDR0, caller_addr); | |
22935 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
22936 | } | |
22937 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
22938 | ||
22939 | @@ -503,12 +512,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
22940 | #ifdef CONFIG_PREEMPT_TRACER | |
22941 | void trace_preempt_on(unsigned long a0, unsigned long a1) | |
22942 | { | |
22943 | + trace_preemptirqsoff_hist(PREEMPT_ON, 0); | |
22944 | if (preempt_trace() && !irq_trace()) | |
22945 | stop_critical_timing(a0, a1); | |
22946 | } | |
22947 | ||
22948 | void trace_preempt_off(unsigned long a0, unsigned long a1) | |
22949 | { | |
22950 | + trace_preemptirqsoff_hist(PREEMPT_ON, 1); | |
22951 | if (preempt_trace() && !irq_trace()) | |
22952 | start_critical_timing(a0, a1); | |
22953 | } | |
22954 | diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c | |
c7c16703 | 22955 | index 3fc20422c166..65a6dde71a7d 100644 |
1a6e0f06 JK |
22956 | --- a/kernel/trace/trace_output.c |
22957 | +++ b/kernel/trace/trace_output.c | |
22958 | @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
22959 | { | |
22960 | char hardsoft_irq; | |
22961 | char need_resched; | |
22962 | + char need_resched_lazy; | |
22963 | char irqs_off; | |
22964 | int hardirq; | |
22965 | int softirq; | |
22966 | @@ -416,6 +417,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
22967 | break; | |
22968 | } | |
22969 | ||
22970 | + need_resched_lazy = | |
22971 | + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; | |
22972 | + | |
22973 | hardsoft_irq = | |
22974 | (nmi && hardirq) ? 'Z' : | |
22975 | nmi ? 'z' : | |
22976 | @@ -424,14 +428,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
22977 | softirq ? 's' : | |
22978 | '.' ; | |
22979 | ||
22980 | - trace_seq_printf(s, "%c%c%c", | |
22981 | - irqs_off, need_resched, hardsoft_irq); | |
22982 | + trace_seq_printf(s, "%c%c%c%c", | |
22983 | + irqs_off, need_resched, need_resched_lazy, | |
22984 | + hardsoft_irq); | |
22985 | ||
22986 | if (entry->preempt_count) | |
22987 | trace_seq_printf(s, "%x", entry->preempt_count); | |
22988 | else | |
22989 | trace_seq_putc(s, '.'); | |
22990 | ||
22991 | + if (entry->preempt_lazy_count) | |
22992 | + trace_seq_printf(s, "%x", entry->preempt_lazy_count); | |
22993 | + else | |
22994 | + trace_seq_putc(s, '.'); | |
22995 | + | |
22996 | + if (entry->migrate_disable) | |
22997 | + trace_seq_printf(s, "%x", entry->migrate_disable); | |
22998 | + else | |
22999 | + trace_seq_putc(s, '.'); | |
23000 | + | |
23001 | return !trace_seq_has_overflowed(s); | |
23002 | } | |
23003 | ||
23004 | diff --git a/kernel/user.c b/kernel/user.c | |
23005 | index b069ccbfb0b0..1a2e88e98b5e 100644 | |
23006 | --- a/kernel/user.c | |
23007 | +++ b/kernel/user.c | |
23008 | @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up) | |
23009 | if (!up) | |
23010 | return; | |
23011 | ||
23012 | - local_irq_save(flags); | |
23013 | + local_irq_save_nort(flags); | |
23014 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) | |
23015 | free_user(up, flags); | |
23016 | else | |
23017 | - local_irq_restore(flags); | |
23018 | + local_irq_restore_nort(flags); | |
23019 | } | |
23020 | ||
23021 | struct user_struct *alloc_uid(kuid_t uid) | |
23022 | diff --git a/kernel/watchdog.c b/kernel/watchdog.c | |
c7c16703 | 23023 | index 6d1020c03d41..70c6a2f79f7e 100644 |
1a6e0f06 JK |
23024 | --- a/kernel/watchdog.c |
23025 | +++ b/kernel/watchdog.c | |
23026 | @@ -315,6 +315,8 @@ static int is_softlockup(unsigned long touch_ts) | |
23027 | ||
23028 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | |
23029 | ||
23030 | +static DEFINE_RAW_SPINLOCK(watchdog_output_lock); | |
23031 | + | |
23032 | static struct perf_event_attr wd_hw_attr = { | |
23033 | .type = PERF_TYPE_HARDWARE, | |
23034 | .config = PERF_COUNT_HW_CPU_CYCLES, | |
c7c16703 | 23035 | @@ -348,6 +350,13 @@ static void watchdog_overflow_callback(struct perf_event *event, |
1a6e0f06 JK |
23036 | /* only print hardlockups once */ |
23037 | if (__this_cpu_read(hard_watchdog_warn) == true) | |
23038 | return; | |
23039 | + /* | |
23040 | + * If early-printk is enabled then make sure we do not | |
23041 | + * lock up in printk() and kill console logging: | |
23042 | + */ | |
23043 | + printk_kill(); | |
23044 | + | |
23045 | + raw_spin_lock(&watchdog_output_lock); | |
23046 | ||
23047 | pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); | |
23048 | print_modules(); | |
c7c16703 | 23049 | @@ -365,6 +374,7 @@ static void watchdog_overflow_callback(struct perf_event *event, |
1a6e0f06 JK |
23050 | !test_and_set_bit(0, &hardlockup_allcpu_dumped)) |
23051 | trigger_allbutself_cpu_backtrace(); | |
23052 | ||
23053 | + raw_spin_unlock(&watchdog_output_lock); | |
23054 | if (hardlockup_panic) | |
23055 | nmi_panic(regs, "Hard LOCKUP"); | |
23056 | ||
c7c16703 | 23057 | @@ -512,6 +522,7 @@ static void watchdog_enable(unsigned int cpu) |
1a6e0f06 JK |
23058 | /* kick off the timer for the hardlockup detector */ |
23059 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
23060 | hrtimer->function = watchdog_timer_fn; | |
23061 | + hrtimer->irqsafe = 1; | |
23062 | ||
23063 | /* Enable the perf event */ | |
23064 | watchdog_nmi_enable(cpu); | |
23065 | diff --git a/kernel/workqueue.c b/kernel/workqueue.c | |
c7c16703 | 23066 | index 479d840db286..24eba6620a45 100644 |
1a6e0f06 JK |
23067 | --- a/kernel/workqueue.c |
23068 | +++ b/kernel/workqueue.c | |
23069 | @@ -48,6 +48,8 @@ | |
23070 | #include <linux/nodemask.h> | |
23071 | #include <linux/moduleparam.h> | |
23072 | #include <linux/uaccess.h> | |
23073 | +#include <linux/locallock.h> | |
23074 | +#include <linux/delay.h> | |
23075 | ||
23076 | #include "workqueue_internal.h" | |
23077 | ||
23078 | @@ -121,11 +123,16 @@ enum { | |
23079 | * cpu or grabbing pool->lock is enough for read access. If | |
23080 | * POOL_DISASSOCIATED is set, it's identical to L. | |
23081 | * | |
23082 | + * On RT we need the extra protection via rt_lock_idle_list() for | |
23083 | + * the list manipulations against read access from | |
23084 | + * wq_worker_sleeping(). All other places are nicely serialized via | |
23085 | + * pool->lock. | |
23086 | + * | |
23087 | * A: pool->attach_mutex protected. | |
23088 | * | |
23089 | * PL: wq_pool_mutex protected. | |
23090 | * | |
23091 | - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. | |
23092 | + * PR: wq_pool_mutex protected for writes. RCU protected for reads. | |
23093 | * | |
23094 | * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. | |
23095 | * | |
23096 | @@ -134,7 +141,7 @@ enum { | |
23097 | * | |
23098 | * WQ: wq->mutex protected. | |
23099 | * | |
23100 | - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. | |
23101 | + * WR: wq->mutex protected for writes. RCU protected for reads. | |
23102 | * | |
23103 | * MD: wq_mayday_lock protected. | |
23104 | */ | |
23105 | @@ -185,7 +192,7 @@ struct worker_pool { | |
23106 | atomic_t nr_running ____cacheline_aligned_in_smp; | |
23107 | ||
23108 | /* | |
23109 | - * Destruction of pool is sched-RCU protected to allow dereferences | |
23110 | + * Destruction of pool is RCU protected to allow dereferences | |
23111 | * from get_work_pool(). | |
23112 | */ | |
23113 | struct rcu_head rcu; | |
23114 | @@ -214,7 +221,7 @@ struct pool_workqueue { | |
23115 | /* | |
23116 | * Release of unbound pwq is punted to system_wq. See put_pwq() | |
23117 | * and pwq_unbound_release_workfn() for details. pool_workqueue | |
23118 | - * itself is also sched-RCU protected so that the first pwq can be | |
23119 | + * itself is also RCU protected so that the first pwq can be | |
23120 | * determined without grabbing wq->mutex. | |
23121 | */ | |
23122 | struct work_struct unbound_release_work; | |
23123 | @@ -348,6 +355,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); | |
23124 | struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; | |
23125 | EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); | |
23126 | ||
23127 | +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); | |
23128 | + | |
23129 | static int worker_thread(void *__worker); | |
23130 | static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
23131 | ||
23132 | @@ -355,20 +364,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
23133 | #include <trace/events/workqueue.h> | |
23134 | ||
23135 | #define assert_rcu_or_pool_mutex() \ | |
23136 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
23137 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
23138 | !lockdep_is_held(&wq_pool_mutex), \ | |
23139 | - "sched RCU or wq_pool_mutex should be held") | |
23140 | + "RCU or wq_pool_mutex should be held") | |
23141 | ||
23142 | #define assert_rcu_or_wq_mutex(wq) \ | |
23143 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
23144 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
23145 | !lockdep_is_held(&wq->mutex), \ | |
23146 | - "sched RCU or wq->mutex should be held") | |
23147 | + "RCU or wq->mutex should be held") | |
23148 | ||
23149 | #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ | |
23150 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
23151 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
23152 | !lockdep_is_held(&wq->mutex) && \ | |
23153 | !lockdep_is_held(&wq_pool_mutex), \ | |
23154 | - "sched RCU, wq->mutex or wq_pool_mutex should be held") | |
23155 | + "RCU, wq->mutex or wq_pool_mutex should be held") | |
23156 | ||
23157 | #define for_each_cpu_worker_pool(pool, cpu) \ | |
23158 | for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ | |
23159 | @@ -380,7 +389,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
23160 | * @pool: iteration cursor | |
23161 | * @pi: integer used for iteration | |
23162 | * | |
23163 | - * This must be called either with wq_pool_mutex held or sched RCU read | |
23164 | + * This must be called either with wq_pool_mutex held or RCU read | |
23165 | * locked. If the pool needs to be used beyond the locking in effect, the | |
23166 | * caller is responsible for guaranteeing that the pool stays online. | |
23167 | * | |
23168 | @@ -412,7 +421,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
23169 | * @pwq: iteration cursor | |
23170 | * @wq: the target workqueue | |
23171 | * | |
23172 | - * This must be called either with wq->mutex held or sched RCU read locked. | |
23173 | + * This must be called either with wq->mutex held or RCU read locked. | |
23174 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
23175 | * responsible for guaranteeing that the pwq stays online. | |
23176 | * | |
23177 | @@ -424,6 +433,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
23178 | if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ | |
23179 | else | |
23180 | ||
23181 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
23182 | +static inline void rt_lock_idle_list(struct worker_pool *pool) | |
23183 | +{ | |
23184 | + preempt_disable(); | |
23185 | +} | |
23186 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) | |
23187 | +{ | |
23188 | + preempt_enable(); | |
23189 | +} | |
23190 | +static inline void sched_lock_idle_list(struct worker_pool *pool) { } | |
23191 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) { } | |
23192 | +#else | |
23193 | +static inline void rt_lock_idle_list(struct worker_pool *pool) { } | |
23194 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) { } | |
23195 | +static inline void sched_lock_idle_list(struct worker_pool *pool) | |
23196 | +{ | |
23197 | + spin_lock_irq(&pool->lock); | |
23198 | +} | |
23199 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) | |
23200 | +{ | |
23201 | + spin_unlock_irq(&pool->lock); | |
23202 | +} | |
23203 | +#endif | |
23204 | + | |
23205 | + | |
23206 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | |
23207 | ||
23208 | static struct debug_obj_descr work_debug_descr; | |
23209 | @@ -548,7 +582,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) | |
23210 | * @wq: the target workqueue | |
23211 | * @node: the node ID | |
23212 | * | |
23213 | - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU | |
23214 | + * This must be called with any of wq_pool_mutex, wq->mutex or RCU | |
23215 | * read locked. | |
23216 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
23217 | * responsible for guaranteeing that the pwq stays online. | |
23218 | @@ -692,8 +726,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work) | |
23219 | * @work: the work item of interest | |
23220 | * | |
23221 | * Pools are created and destroyed under wq_pool_mutex, and allows read | |
23222 | - * access under sched-RCU read lock. As such, this function should be | |
23223 | - * called under wq_pool_mutex or with preemption disabled. | |
23224 | + * access under RCU read lock. As such, this function should be | |
23225 | + * called under wq_pool_mutex or inside of a rcu_read_lock() region. | |
23226 | * | |
23227 | * All fields of the returned pool are accessible as long as the above | |
23228 | * mentioned locking is in effect. If the returned pool needs to be used | |
23229 | @@ -830,50 +864,45 @@ static struct worker *first_idle_worker(struct worker_pool *pool) | |
23230 | */ | |
23231 | static void wake_up_worker(struct worker_pool *pool) | |
23232 | { | |
23233 | - struct worker *worker = first_idle_worker(pool); | |
23234 | + struct worker *worker; | |
23235 | + | |
23236 | + rt_lock_idle_list(pool); | |
23237 | + | |
23238 | + worker = first_idle_worker(pool); | |
23239 | ||
23240 | if (likely(worker)) | |
23241 | wake_up_process(worker->task); | |
23242 | + | |
23243 | + rt_unlock_idle_list(pool); | |
23244 | } | |
23245 | ||
23246 | /** | |
23247 | - * wq_worker_waking_up - a worker is waking up | |
23248 | + * wq_worker_running - a worker is running again | |
23249 | * @task: task waking up | |
23250 | - * @cpu: CPU @task is waking up to | |
23251 | * | |
23252 | - * This function is called during try_to_wake_up() when a worker is | |
23253 | - * being awoken. | |
23254 | - * | |
23255 | - * CONTEXT: | |
23256 | - * spin_lock_irq(rq->lock) | |
23257 | + * This function is called when a worker returns from schedule() | |
23258 | */ | |
23259 | -void wq_worker_waking_up(struct task_struct *task, int cpu) | |
23260 | +void wq_worker_running(struct task_struct *task) | |
23261 | { | |
23262 | struct worker *worker = kthread_data(task); | |
23263 | ||
23264 | - if (!(worker->flags & WORKER_NOT_RUNNING)) { | |
23265 | - WARN_ON_ONCE(worker->pool->cpu != cpu); | |
23266 | + if (!worker->sleeping) | |
23267 | + return; | |
23268 | + if (!(worker->flags & WORKER_NOT_RUNNING)) | |
23269 | atomic_inc(&worker->pool->nr_running); | |
23270 | - } | |
23271 | + worker->sleeping = 0; | |
23272 | } | |
23273 | ||
23274 | /** | |
23275 | * wq_worker_sleeping - a worker is going to sleep | |
23276 | * @task: task going to sleep | |
23277 | * | |
23278 | - * This function is called during schedule() when a busy worker is | |
23279 | - * going to sleep. Worker on the same cpu can be woken up by | |
23280 | - * returning pointer to its task. | |
23281 | - * | |
23282 | - * CONTEXT: | |
23283 | - * spin_lock_irq(rq->lock) | |
23284 | - * | |
23285 | - * Return: | |
23286 | - * Worker task on @cpu to wake up, %NULL if none. | |
23287 | + * This function is called from schedule() when a busy worker is | |
23288 | + * going to sleep. | |
23289 | */ | |
23290 | -struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
23291 | +void wq_worker_sleeping(struct task_struct *task) | |
23292 | { | |
23293 | - struct worker *worker = kthread_data(task), *to_wakeup = NULL; | |
23294 | + struct worker *worker = kthread_data(task); | |
23295 | struct worker_pool *pool; | |
23296 | ||
23297 | /* | |
23298 | @@ -882,29 +911,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
23299 | * checking NOT_RUNNING. | |
23300 | */ | |
23301 | if (worker->flags & WORKER_NOT_RUNNING) | |
23302 | - return NULL; | |
23303 | + return; | |
23304 | ||
23305 | pool = worker->pool; | |
23306 | ||
23307 | - /* this can only happen on the local cpu */ | |
23308 | - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id())) | |
23309 | - return NULL; | |
23310 | + if (WARN_ON_ONCE(worker->sleeping)) | |
23311 | + return; | |
23312 | + | |
23313 | + worker->sleeping = 1; | |
23314 | ||
23315 | /* | |
23316 | * The counterpart of the following dec_and_test, implied mb, | |
23317 | * worklist not empty test sequence is in insert_work(). | |
23318 | * Please read comment there. | |
23319 | - * | |
23320 | - * NOT_RUNNING is clear. This means that we're bound to and | |
23321 | - * running on the local cpu w/ rq lock held and preemption | |
23322 | - * disabled, which in turn means that none else could be | |
23323 | - * manipulating idle_list, so dereferencing idle_list without pool | |
23324 | - * lock is safe. | |
23325 | */ | |
23326 | if (atomic_dec_and_test(&pool->nr_running) && | |
23327 | - !list_empty(&pool->worklist)) | |
23328 | - to_wakeup = first_idle_worker(pool); | |
23329 | - return to_wakeup ? to_wakeup->task : NULL; | |
23330 | + !list_empty(&pool->worklist)) { | |
23331 | + sched_lock_idle_list(pool); | |
23332 | + wake_up_worker(pool); | |
23333 | + sched_unlock_idle_list(pool); | |
23334 | + } | |
23335 | } | |
23336 | ||
23337 | /** | |
c7c16703 | 23338 | @@ -1098,12 +1124,14 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) |
1a6e0f06 JK |
23339 | { |
23340 | if (pwq) { | |
23341 | /* | |
23342 | - * As both pwqs and pools are sched-RCU protected, the | |
23343 | + * As both pwqs and pools are RCU protected, the | |
23344 | * following lock operations are safe. | |
23345 | */ | |
23346 | - spin_lock_irq(&pwq->pool->lock); | |
c7c16703 | 23347 | + rcu_read_lock(); |
1a6e0f06 JK |
23348 | + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); |
23349 | put_pwq(pwq); | |
23350 | - spin_unlock_irq(&pwq->pool->lock); | |
23351 | + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); | |
c7c16703 | 23352 | + rcu_read_unlock(); |
1a6e0f06 JK |
23353 | } |
23354 | } | |
23355 | ||
c7c16703 | 23356 | @@ -1207,7 +1235,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, |
1a6e0f06 JK |
23357 | struct worker_pool *pool; |
23358 | struct pool_workqueue *pwq; | |
23359 | ||
23360 | - local_irq_save(*flags); | |
23361 | + local_lock_irqsave(pendingb_lock, *flags); | |
23362 | ||
23363 | /* try to steal the timer if it exists */ | |
23364 | if (is_dwork) { | |
c7c16703 | 23365 | @@ -1226,6 +1254,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, |
1a6e0f06 JK |
23366 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) |
23367 | return 0; | |
23368 | ||
23369 | + rcu_read_lock(); | |
23370 | /* | |
23371 | * The queueing is in progress, or it is already queued. Try to | |
23372 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | |
c7c16703 | 23373 | @@ -1264,14 +1293,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, |
1a6e0f06 JK |
23374 | set_work_pool_and_keep_pending(work, pool->id); |
23375 | ||
23376 | spin_unlock(&pool->lock); | |
23377 | + rcu_read_unlock(); | |
23378 | return 1; | |
23379 | } | |
23380 | spin_unlock(&pool->lock); | |
23381 | fail: | |
23382 | - local_irq_restore(*flags); | |
23383 | + rcu_read_unlock(); | |
23384 | + local_unlock_irqrestore(pendingb_lock, *flags); | |
23385 | if (work_is_canceling(work)) | |
23386 | return -ENOENT; | |
23387 | - cpu_relax(); | |
23388 | + cpu_chill(); | |
23389 | return -EAGAIN; | |
23390 | } | |
23391 | ||
c7c16703 | 23392 | @@ -1373,7 +1404,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
23393 | * queued or lose PENDING. Grabbing PENDING and queueing should |
23394 | * happen with IRQ disabled. | |
23395 | */ | |
23396 | - WARN_ON_ONCE(!irqs_disabled()); | |
23397 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
23398 | ||
23399 | debug_work_activate(work); | |
23400 | ||
c7c16703 | 23401 | @@ -1381,6 +1412,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
23402 | if (unlikely(wq->flags & __WQ_DRAINING) && |
23403 | WARN_ON_ONCE(!is_chained_work(wq))) | |
23404 | return; | |
23405 | + rcu_read_lock(); | |
23406 | retry: | |
23407 | if (req_cpu == WORK_CPU_UNBOUND) | |
23408 | cpu = wq_select_unbound_cpu(raw_smp_processor_id()); | |
c7c16703 | 23409 | @@ -1437,10 +1469,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
23410 | /* pwq determined, queue */ |
23411 | trace_workqueue_queue_work(req_cpu, pwq, work); | |
23412 | ||
23413 | - if (WARN_ON(!list_empty(&work->entry))) { | |
23414 | - spin_unlock(&pwq->pool->lock); | |
23415 | - return; | |
23416 | - } | |
23417 | + if (WARN_ON(!list_empty(&work->entry))) | |
23418 | + goto out; | |
23419 | ||
23420 | pwq->nr_in_flight[pwq->work_color]++; | |
23421 | work_flags = work_color_to_flags(pwq->work_color); | |
c7c16703 | 23422 | @@ -1458,7 +1488,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
23423 | |
23424 | insert_work(pwq, work, worklist, work_flags); | |
23425 | ||
23426 | +out: | |
23427 | spin_unlock(&pwq->pool->lock); | |
23428 | + rcu_read_unlock(); | |
23429 | } | |
23430 | ||
23431 | /** | |
c7c16703 | 23432 | @@ -1478,14 +1510,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
23433 | bool ret = false; |
23434 | unsigned long flags; | |
23435 | ||
23436 | - local_irq_save(flags); | |
23437 | + local_lock_irqsave(pendingb_lock,flags); | |
23438 | ||
23439 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
23440 | __queue_work(cpu, wq, work); | |
23441 | ret = true; | |
23442 | } | |
23443 | ||
23444 | - local_irq_restore(flags); | |
23445 | + local_unlock_irqrestore(pendingb_lock, flags); | |
23446 | return ret; | |
23447 | } | |
23448 | EXPORT_SYMBOL(queue_work_on); | |
c7c16703 | 23449 | @@ -1552,14 +1584,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
23450 | unsigned long flags; |
23451 | ||
23452 | /* read the comment in __queue_work() */ | |
23453 | - local_irq_save(flags); | |
23454 | + local_lock_irqsave(pendingb_lock, flags); | |
23455 | ||
23456 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
23457 | __queue_delayed_work(cpu, wq, dwork, delay); | |
23458 | ret = true; | |
23459 | } | |
23460 | ||
23461 | - local_irq_restore(flags); | |
23462 | + local_unlock_irqrestore(pendingb_lock, flags); | |
23463 | return ret; | |
23464 | } | |
23465 | EXPORT_SYMBOL(queue_delayed_work_on); | |
c7c16703 | 23466 | @@ -1594,7 +1626,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, |
1a6e0f06 JK |
23467 | |
23468 | if (likely(ret >= 0)) { | |
23469 | __queue_delayed_work(cpu, wq, dwork, delay); | |
23470 | - local_irq_restore(flags); | |
23471 | + local_unlock_irqrestore(pendingb_lock, flags); | |
23472 | } | |
23473 | ||
23474 | /* -ENOENT from try_to_grab_pending() becomes %true */ | |
c7c16703 | 23475 | @@ -1627,7 +1659,9 @@ static void worker_enter_idle(struct worker *worker) |
1a6e0f06 JK |
23476 | worker->last_active = jiffies; |
23477 | ||
23478 | /* idle_list is LIFO */ | |
23479 | + rt_lock_idle_list(pool); | |
23480 | list_add(&worker->entry, &pool->idle_list); | |
23481 | + rt_unlock_idle_list(pool); | |
23482 | ||
23483 | if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) | |
23484 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); | |
c7c16703 | 23485 | @@ -1660,7 +1694,9 @@ static void worker_leave_idle(struct worker *worker) |
1a6e0f06 JK |
23486 | return; |
23487 | worker_clr_flags(worker, WORKER_IDLE); | |
23488 | pool->nr_idle--; | |
23489 | + rt_lock_idle_list(pool); | |
23490 | list_del_init(&worker->entry); | |
23491 | + rt_unlock_idle_list(pool); | |
23492 | } | |
23493 | ||
23494 | static struct worker *alloc_worker(int node) | |
c7c16703 | 23495 | @@ -1826,7 +1862,9 @@ static void destroy_worker(struct worker *worker) |
1a6e0f06 JK |
23496 | pool->nr_workers--; |
23497 | pool->nr_idle--; | |
23498 | ||
23499 | + rt_lock_idle_list(pool); | |
23500 | list_del_init(&worker->entry); | |
23501 | + rt_unlock_idle_list(pool); | |
23502 | worker->flags |= WORKER_DIE; | |
23503 | wake_up_process(worker->task); | |
23504 | } | |
c7c16703 | 23505 | @@ -2785,14 +2823,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) |
1a6e0f06 JK |
23506 | |
23507 | might_sleep(); | |
23508 | ||
23509 | - local_irq_disable(); | |
23510 | + rcu_read_lock(); | |
23511 | pool = get_work_pool(work); | |
23512 | if (!pool) { | |
23513 | - local_irq_enable(); | |
23514 | + rcu_read_unlock(); | |
23515 | return false; | |
23516 | } | |
23517 | ||
23518 | - spin_lock(&pool->lock); | |
23519 | + spin_lock_irq(&pool->lock); | |
23520 | /* see the comment in try_to_grab_pending() with the same code */ | |
23521 | pwq = get_work_pwq(work); | |
23522 | if (pwq) { | |
c7c16703 | 23523 | @@ -2821,10 +2859,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) |
1a6e0f06 JK |
23524 | else |
23525 | lock_map_acquire_read(&pwq->wq->lockdep_map); | |
23526 | lock_map_release(&pwq->wq->lockdep_map); | |
23527 | - | |
23528 | + rcu_read_unlock(); | |
23529 | return true; | |
23530 | already_gone: | |
23531 | spin_unlock_irq(&pool->lock); | |
23532 | + rcu_read_unlock(); | |
23533 | return false; | |
23534 | } | |
23535 | ||
c7c16703 | 23536 | @@ -2911,7 +2950,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) |
1a6e0f06 JK |
23537 | |
23538 | /* tell other tasks trying to grab @work to back off */ | |
23539 | mark_work_canceling(work); | |
23540 | - local_irq_restore(flags); | |
23541 | + local_unlock_irqrestore(pendingb_lock, flags); | |
23542 | ||
23543 | flush_work(work); | |
23544 | clear_work_data(work); | |
c7c16703 | 23545 | @@ -2966,10 +3005,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); |
1a6e0f06 JK |
23546 | */ |
23547 | bool flush_delayed_work(struct delayed_work *dwork) | |
23548 | { | |
23549 | - local_irq_disable(); | |
23550 | + local_lock_irq(pendingb_lock); | |
23551 | if (del_timer_sync(&dwork->timer)) | |
23552 | __queue_work(dwork->cpu, dwork->wq, &dwork->work); | |
23553 | - local_irq_enable(); | |
23554 | + local_unlock_irq(pendingb_lock); | |
23555 | return flush_work(&dwork->work); | |
23556 | } | |
23557 | EXPORT_SYMBOL(flush_delayed_work); | |
c7c16703 JK |
23558 | @@ -2987,7 +3026,7 @@ static bool __cancel_work(struct work_struct *work, bool is_dwork) |
23559 | return false; | |
1a6e0f06 | 23560 | |
c7c16703 | 23561 | set_work_pool_and_clear_pending(work, get_work_pool_id(work)); |
1a6e0f06 JK |
23562 | - local_irq_restore(flags); |
23563 | + local_unlock_irqrestore(pendingb_lock, flags); | |
23564 | return ret; | |
23565 | } | |
c7c16703 JK |
23566 | |
23567 | @@ -3245,7 +3284,7 @@ static void rcu_free_pool(struct rcu_head *rcu) | |
1a6e0f06 JK |
23568 | * put_unbound_pool - put a worker_pool |
23569 | * @pool: worker_pool to put | |
23570 | * | |
23571 | - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU | |
23572 | + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU | |
23573 | * safe manner. get_unbound_pool() calls this function on its failure path | |
23574 | * and this function should be able to release pools which went through, | |
23575 | * successfully or not, init_worker_pool(). | |
c7c16703 | 23576 | @@ -3299,8 +3338,8 @@ static void put_unbound_pool(struct worker_pool *pool) |
1a6e0f06 JK |
23577 | del_timer_sync(&pool->idle_timer); |
23578 | del_timer_sync(&pool->mayday_timer); | |
23579 | ||
23580 | - /* sched-RCU protected to allow dereferences from get_work_pool() */ | |
23581 | - call_rcu_sched(&pool->rcu, rcu_free_pool); | |
23582 | + /* RCU protected to allow dereferences from get_work_pool() */ | |
23583 | + call_rcu(&pool->rcu, rcu_free_pool); | |
23584 | } | |
23585 | ||
23586 | /** | |
c7c16703 | 23587 | @@ -3407,14 +3446,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) |
1a6e0f06 JK |
23588 | put_unbound_pool(pool); |
23589 | mutex_unlock(&wq_pool_mutex); | |
23590 | ||
23591 | - call_rcu_sched(&pwq->rcu, rcu_free_pwq); | |
23592 | + call_rcu(&pwq->rcu, rcu_free_pwq); | |
23593 | ||
23594 | /* | |
23595 | * If we're the last pwq going away, @wq is already dead and no one | |
23596 | * is gonna access it anymore. Schedule RCU free. | |
23597 | */ | |
23598 | if (is_last) | |
23599 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
23600 | + call_rcu(&wq->rcu, rcu_free_wq); | |
23601 | } | |
23602 | ||
23603 | /** | |
c7c16703 | 23604 | @@ -4064,7 +4103,7 @@ void destroy_workqueue(struct workqueue_struct *wq) |
1a6e0f06 JK |
23605 | * The base ref is never dropped on per-cpu pwqs. Directly |
23606 | * schedule RCU free. | |
23607 | */ | |
23608 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
23609 | + call_rcu(&wq->rcu, rcu_free_wq); | |
23610 | } else { | |
23611 | /* | |
23612 | * We're the sole accessor of @wq at this point. Directly | |
c7c16703 | 23613 | @@ -4157,7 +4196,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) |
1a6e0f06 JK |
23614 | struct pool_workqueue *pwq; |
23615 | bool ret; | |
23616 | ||
23617 | - rcu_read_lock_sched(); | |
23618 | + rcu_read_lock(); | |
23619 | + preempt_disable(); | |
23620 | ||
23621 | if (cpu == WORK_CPU_UNBOUND) | |
23622 | cpu = smp_processor_id(); | |
c7c16703 | 23623 | @@ -4168,7 +4208,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) |
1a6e0f06 JK |
23624 | pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); |
23625 | ||
23626 | ret = !list_empty(&pwq->delayed_works); | |
23627 | - rcu_read_unlock_sched(); | |
23628 | + preempt_enable(); | |
23629 | + rcu_read_unlock(); | |
23630 | ||
23631 | return ret; | |
23632 | } | |
c7c16703 | 23633 | @@ -4194,15 +4235,15 @@ unsigned int work_busy(struct work_struct *work) |
1a6e0f06 JK |
23634 | if (work_pending(work)) |
23635 | ret |= WORK_BUSY_PENDING; | |
23636 | ||
23637 | - local_irq_save(flags); | |
23638 | + rcu_read_lock(); | |
23639 | pool = get_work_pool(work); | |
23640 | if (pool) { | |
23641 | - spin_lock(&pool->lock); | |
23642 | + spin_lock_irqsave(&pool->lock, flags); | |
23643 | if (find_worker_executing_work(pool, work)) | |
23644 | ret |= WORK_BUSY_RUNNING; | |
23645 | - spin_unlock(&pool->lock); | |
23646 | + spin_unlock_irqrestore(&pool->lock, flags); | |
23647 | } | |
23648 | - local_irq_restore(flags); | |
23649 | + rcu_read_unlock(); | |
23650 | ||
23651 | return ret; | |
23652 | } | |
c7c16703 | 23653 | @@ -4391,7 +4432,7 @@ void show_workqueue_state(void) |
1a6e0f06 JK |
23654 | unsigned long flags; |
23655 | int pi; | |
23656 | ||
23657 | - rcu_read_lock_sched(); | |
23658 | + rcu_read_lock(); | |
23659 | ||
23660 | pr_info("Showing busy workqueues and worker pools:\n"); | |
23661 | ||
c7c16703 | 23662 | @@ -4444,7 +4485,7 @@ void show_workqueue_state(void) |
1a6e0f06 JK |
23663 | spin_unlock_irqrestore(&pool->lock, flags); |
23664 | } | |
23665 | ||
23666 | - rcu_read_unlock_sched(); | |
23667 | + rcu_read_unlock(); | |
23668 | } | |
23669 | ||
23670 | /* | |
c7c16703 | 23671 | @@ -4782,16 +4823,16 @@ bool freeze_workqueues_busy(void) |
1a6e0f06 JK |
23672 | * nr_active is monotonically decreasing. It's safe |
23673 | * to peek without lock. | |
23674 | */ | |
23675 | - rcu_read_lock_sched(); | |
23676 | + rcu_read_lock(); | |
23677 | for_each_pwq(pwq, wq) { | |
23678 | WARN_ON_ONCE(pwq->nr_active < 0); | |
23679 | if (pwq->nr_active) { | |
23680 | busy = true; | |
23681 | - rcu_read_unlock_sched(); | |
23682 | + rcu_read_unlock(); | |
23683 | goto out_unlock; | |
23684 | } | |
23685 | } | |
23686 | - rcu_read_unlock_sched(); | |
23687 | + rcu_read_unlock(); | |
23688 | } | |
23689 | out_unlock: | |
23690 | mutex_unlock(&wq_pool_mutex); | |
c7c16703 | 23691 | @@ -4981,7 +5022,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, |
1a6e0f06 JK |
23692 | const char *delim = ""; |
23693 | int node, written = 0; | |
23694 | ||
23695 | - rcu_read_lock_sched(); | |
23696 | + get_online_cpus(); | |
23697 | + rcu_read_lock(); | |
23698 | for_each_node(node) { | |
23699 | written += scnprintf(buf + written, PAGE_SIZE - written, | |
23700 | "%s%d:%d", delim, node, | |
c7c16703 | 23701 | @@ -4989,7 +5031,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, |
1a6e0f06 JK |
23702 | delim = " "; |
23703 | } | |
23704 | written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | |
23705 | - rcu_read_unlock_sched(); | |
23706 | + rcu_read_unlock(); | |
23707 | + put_online_cpus(); | |
23708 | ||
23709 | return written; | |
23710 | } | |
23711 | diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h | |
23712 | index 8635417c587b..f000c4d6917e 100644 | |
23713 | --- a/kernel/workqueue_internal.h | |
23714 | +++ b/kernel/workqueue_internal.h | |
23715 | @@ -43,6 +43,7 @@ struct worker { | |
23716 | unsigned long last_active; /* L: last active timestamp */ | |
23717 | unsigned int flags; /* X: flags */ | |
23718 | int id; /* I: worker id */ | |
23719 | + int sleeping; /* None */ | |
23720 | ||
23721 | /* | |
23722 | * Opaque string set with work_set_desc(). Printed out with task | |
23723 | @@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void) | |
23724 | * Scheduler hooks for concurrency managed workqueue. Only to be used from | |
23725 | * sched/core.c and workqueue.c. | |
23726 | */ | |
23727 | -void wq_worker_waking_up(struct task_struct *task, int cpu); | |
23728 | -struct task_struct *wq_worker_sleeping(struct task_struct *task); | |
23729 | +void wq_worker_running(struct task_struct *task); | |
23730 | +void wq_worker_sleeping(struct task_struct *task); | |
23731 | ||
23732 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ | |
23733 | diff --git a/lib/Kconfig b/lib/Kconfig | |
c7c16703 | 23734 | index 260a80e313b9..b06becb3f477 100644 |
1a6e0f06 JK |
23735 | --- a/lib/Kconfig |
23736 | +++ b/lib/Kconfig | |
23737 | @@ -400,6 +400,7 @@ config CHECK_SIGNATURE | |
23738 | ||
23739 | config CPUMASK_OFFSTACK | |
23740 | bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS | |
23741 | + depends on !PREEMPT_RT_FULL | |
23742 | help | |
23743 | Use dynamic allocation for cpumask_var_t, instead of putting | |
23744 | them on the stack. This is a bit more expensive, but avoids | |
1a6e0f06 | 23745 | diff --git a/lib/debugobjects.c b/lib/debugobjects.c |
c7c16703 | 23746 | index 056052dc8e91..d8494e126de8 100644 |
1a6e0f06 JK |
23747 | --- a/lib/debugobjects.c |
23748 | +++ b/lib/debugobjects.c | |
23749 | @@ -308,7 +308,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) | |
23750 | struct debug_obj *obj; | |
23751 | unsigned long flags; | |
23752 | ||
23753 | - fill_pool(); | |
23754 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23755 | + if (preempt_count() == 0 && !irqs_disabled()) | |
23756 | +#endif | |
23757 | + fill_pool(); | |
23758 | ||
23759 | db = get_bucket((unsigned long) addr); | |
23760 | ||
23761 | diff --git a/lib/idr.c b/lib/idr.c | |
23762 | index 6098336df267..9decbe914595 100644 | |
23763 | --- a/lib/idr.c | |
23764 | +++ b/lib/idr.c | |
23765 | @@ -30,6 +30,7 @@ | |
23766 | #include <linux/idr.h> | |
23767 | #include <linux/spinlock.h> | |
23768 | #include <linux/percpu.h> | |
23769 | +#include <linux/locallock.h> | |
23770 | ||
23771 | #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) | |
23772 | #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) | |
23773 | @@ -45,6 +46,37 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); | |
23774 | static DEFINE_PER_CPU(int, idr_preload_cnt); | |
23775 | static DEFINE_SPINLOCK(simple_ida_lock); | |
23776 | ||
23777 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23778 | +static DEFINE_LOCAL_IRQ_LOCK(idr_lock); | |
23779 | + | |
23780 | +static inline void idr_preload_lock(void) | |
23781 | +{ | |
23782 | + local_lock(idr_lock); | |
23783 | +} | |
23784 | + | |
23785 | +static inline void idr_preload_unlock(void) | |
23786 | +{ | |
23787 | + local_unlock(idr_lock); | |
23788 | +} | |
23789 | + | |
23790 | +void idr_preload_end(void) | |
23791 | +{ | |
23792 | + idr_preload_unlock(); | |
23793 | +} | |
23794 | +EXPORT_SYMBOL(idr_preload_end); | |
23795 | +#else | |
23796 | +static inline void idr_preload_lock(void) | |
23797 | +{ | |
23798 | + preempt_disable(); | |
23799 | +} | |
23800 | + | |
23801 | +static inline void idr_preload_unlock(void) | |
23802 | +{ | |
23803 | + preempt_enable(); | |
23804 | +} | |
23805 | +#endif | |
23806 | + | |
23807 | + | |
23808 | /* the maximum ID which can be allocated given idr->layers */ | |
23809 | static int idr_max(int layers) | |
23810 | { | |
23811 | @@ -115,14 +147,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) | |
23812 | * context. See idr_preload() for details. | |
23813 | */ | |
23814 | if (!in_interrupt()) { | |
23815 | - preempt_disable(); | |
23816 | + idr_preload_lock(); | |
23817 | new = __this_cpu_read(idr_preload_head); | |
23818 | if (new) { | |
23819 | __this_cpu_write(idr_preload_head, new->ary[0]); | |
23820 | __this_cpu_dec(idr_preload_cnt); | |
23821 | new->ary[0] = NULL; | |
23822 | } | |
23823 | - preempt_enable(); | |
23824 | + idr_preload_unlock(); | |
23825 | if (new) | |
23826 | return new; | |
23827 | } | |
23828 | @@ -366,7 +398,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, | |
23829 | idr_mark_full(pa, id); | |
23830 | } | |
23831 | ||
23832 | - | |
23833 | /** | |
23834 | * idr_preload - preload for idr_alloc() | |
23835 | * @gfp_mask: allocation mask to use for preloading | |
23836 | @@ -401,7 +432,7 @@ void idr_preload(gfp_t gfp_mask) | |
23837 | WARN_ON_ONCE(in_interrupt()); | |
23838 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | |
23839 | ||
23840 | - preempt_disable(); | |
23841 | + idr_preload_lock(); | |
23842 | ||
23843 | /* | |
23844 | * idr_alloc() is likely to succeed w/o full idr_layer buffer and | |
23845 | @@ -413,9 +444,9 @@ void idr_preload(gfp_t gfp_mask) | |
23846 | while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { | |
23847 | struct idr_layer *new; | |
23848 | ||
23849 | - preempt_enable(); | |
23850 | + idr_preload_unlock(); | |
23851 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); | |
23852 | - preempt_disable(); | |
23853 | + idr_preload_lock(); | |
23854 | if (!new) | |
23855 | break; | |
23856 | ||
23857 | diff --git a/lib/irq_poll.c b/lib/irq_poll.c | |
c7c16703 | 23858 | index 1d6565e81030..b23a79761df7 100644 |
1a6e0f06 JK |
23859 | --- a/lib/irq_poll.c |
23860 | +++ b/lib/irq_poll.c | |
23861 | @@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop) | |
23862 | list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); | |
23863 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
23864 | local_irq_restore(flags); | |
23865 | + preempt_check_resched_rt(); | |
23866 | } | |
23867 | EXPORT_SYMBOL(irq_poll_sched); | |
23868 | ||
23869 | @@ -71,6 +72,7 @@ void irq_poll_complete(struct irq_poll *iop) | |
23870 | local_irq_save(flags); | |
23871 | __irq_poll_complete(iop); | |
23872 | local_irq_restore(flags); | |
23873 | + preempt_check_resched_rt(); | |
23874 | } | |
23875 | EXPORT_SYMBOL(irq_poll_complete); | |
23876 | ||
c7c16703 | 23877 | @@ -95,6 +97,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) |
1a6e0f06 JK |
23878 | } |
23879 | ||
23880 | local_irq_enable(); | |
23881 | + preempt_check_resched_rt(); | |
23882 | ||
23883 | /* Even though interrupts have been re-enabled, this | |
23884 | * access is safe because interrupts can only add new | |
c7c16703 | 23885 | @@ -132,6 +135,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h) |
1a6e0f06 JK |
23886 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); |
23887 | ||
23888 | local_irq_enable(); | |
23889 | + preempt_check_resched_rt(); | |
23890 | } | |
23891 | ||
23892 | /** | |
c7c16703 JK |
23893 | @@ -195,6 +199,7 @@ static int irq_poll_cpu_dead(unsigned int cpu) |
23894 | this_cpu_ptr(&blk_cpu_iopoll)); | |
23895 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
23896 | local_irq_enable(); | |
23897 | + preempt_check_resched_rt(); | |
1a6e0f06 | 23898 | |
c7c16703 JK |
23899 | return 0; |
23900 | } | |
1a6e0f06 | 23901 | diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c |
c7c16703 | 23902 | index f3a217ea0388..4611b156ef79 100644 |
1a6e0f06 JK |
23903 | --- a/lib/locking-selftest.c |
23904 | +++ b/lib/locking-selftest.c | |
23905 | @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) | |
23906 | #include "locking-selftest-spin-hardirq.h" | |
23907 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) | |
23908 | ||
23909 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23910 | + | |
23911 | #include "locking-selftest-rlock-hardirq.h" | |
23912 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) | |
23913 | ||
23914 | @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) | |
23915 | #include "locking-selftest-wlock-softirq.h" | |
23916 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) | |
23917 | ||
23918 | +#endif | |
23919 | + | |
23920 | #undef E1 | |
23921 | #undef E2 | |
23922 | ||
23923 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23924 | /* | |
23925 | * Enabling hardirqs with a softirq-safe lock held: | |
23926 | */ | |
23927 | @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
23928 | #undef E1 | |
23929 | #undef E2 | |
23930 | ||
23931 | +#endif | |
23932 | + | |
23933 | /* | |
23934 | * Enabling irqs with an irq-safe lock held: | |
23935 | */ | |
23936 | @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
23937 | #include "locking-selftest-spin-hardirq.h" | |
23938 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) | |
23939 | ||
23940 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23941 | + | |
23942 | #include "locking-selftest-rlock-hardirq.h" | |
23943 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) | |
23944 | ||
23945 | @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) | |
23946 | #include "locking-selftest-wlock-softirq.h" | |
23947 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
23948 | ||
23949 | +#endif | |
23950 | + | |
23951 | #undef E1 | |
23952 | #undef E2 | |
23953 | ||
23954 | @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
23955 | #include "locking-selftest-spin-hardirq.h" | |
23956 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) | |
23957 | ||
23958 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23959 | + | |
23960 | #include "locking-selftest-rlock-hardirq.h" | |
23961 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) | |
23962 | ||
23963 | @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) | |
23964 | #include "locking-selftest-wlock-softirq.h" | |
23965 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
23966 | ||
23967 | +#endif | |
23968 | + | |
23969 | #undef E1 | |
23970 | #undef E2 | |
23971 | #undef E3 | |
23972 | @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
23973 | #include "locking-selftest-spin-hardirq.h" | |
23974 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) | |
23975 | ||
23976 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23977 | + | |
23978 | #include "locking-selftest-rlock-hardirq.h" | |
23979 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) | |
23980 | ||
23981 | @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) | |
23982 | #include "locking-selftest-wlock-softirq.h" | |
23983 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) | |
23984 | ||
23985 | +#endif | |
23986 | + | |
23987 | #undef E1 | |
23988 | #undef E2 | |
23989 | #undef E3 | |
23990 | ||
23991 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23992 | + | |
23993 | /* | |
23994 | * read-lock / write-lock irq inversion. | |
23995 | * | |
23996 | @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) | |
23997 | #undef E2 | |
23998 | #undef E3 | |
23999 | ||
24000 | +#endif | |
24001 | + | |
24002 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
24003 | + | |
24004 | /* | |
24005 | * read-lock / write-lock recursion that is actually safe. | |
24006 | */ | |
24007 | @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) | |
24008 | #undef E2 | |
24009 | #undef E3 | |
24010 | ||
24011 | +#endif | |
24012 | + | |
24013 | /* | |
24014 | * read-lock / write-lock recursion that is unsafe. | |
24015 | */ | |
24016 | @@ -1858,6 +1885,7 @@ void locking_selftest(void) | |
24017 | ||
24018 | printk(" --------------------------------------------------------------------------\n"); | |
24019 | ||
24020 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
24021 | /* | |
24022 | * irq-context testcases: | |
24023 | */ | |
24024 | @@ -1870,6 +1898,28 @@ void locking_selftest(void) | |
24025 | ||
24026 | DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); | |
24027 | // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); | |
24028 | +#else | |
24029 | + /* On -rt, we only do hardirq context test for raw spinlock */ | |
24030 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); | |
24031 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21); | |
24032 | + | |
24033 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12); | |
24034 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21); | |
24035 | + | |
24036 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123); | |
24037 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132); | |
24038 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213); | |
24039 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231); | |
24040 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312); | |
24041 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321); | |
24042 | + | |
24043 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123); | |
24044 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132); | |
24045 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213); | |
24046 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231); | |
24047 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); | |
24048 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); | |
24049 | +#endif | |
24050 | ||
24051 | ww_tests(); | |
24052 | ||
24053 | diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c | |
24054 | index 6d40944960de..822a2c027e72 100644 | |
24055 | --- a/lib/percpu_ida.c | |
24056 | +++ b/lib/percpu_ida.c | |
24057 | @@ -26,6 +26,9 @@ | |
24058 | #include <linux/string.h> | |
24059 | #include <linux/spinlock.h> | |
24060 | #include <linux/percpu_ida.h> | |
24061 | +#include <linux/locallock.h> | |
24062 | + | |
24063 | +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock); | |
24064 | ||
24065 | struct percpu_ida_cpu { | |
24066 | /* | |
24067 | @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
24068 | unsigned long flags; | |
24069 | int tag; | |
24070 | ||
24071 | - local_irq_save(flags); | |
24072 | + local_lock_irqsave(irq_off_lock, flags); | |
24073 | tags = this_cpu_ptr(pool->tag_cpu); | |
24074 | ||
24075 | /* Fastpath */ | |
24076 | tag = alloc_local_tag(tags); | |
24077 | if (likely(tag >= 0)) { | |
24078 | - local_irq_restore(flags); | |
24079 | + local_unlock_irqrestore(irq_off_lock, flags); | |
24080 | return tag; | |
24081 | } | |
24082 | ||
24083 | @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
24084 | ||
24085 | if (!tags->nr_free) | |
24086 | alloc_global_tags(pool, tags); | |
24087 | + | |
24088 | if (!tags->nr_free) | |
24089 | steal_tags(pool, tags); | |
24090 | ||
24091 | @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
24092 | } | |
24093 | ||
24094 | spin_unlock(&pool->lock); | |
24095 | - local_irq_restore(flags); | |
24096 | + local_unlock_irqrestore(irq_off_lock, flags); | |
24097 | ||
24098 | if (tag >= 0 || state == TASK_RUNNING) | |
24099 | break; | |
24100 | @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
24101 | ||
24102 | schedule(); | |
24103 | ||
24104 | - local_irq_save(flags); | |
24105 | + local_lock_irqsave(irq_off_lock, flags); | |
24106 | tags = this_cpu_ptr(pool->tag_cpu); | |
24107 | } | |
24108 | if (state != TASK_RUNNING) | |
24109 | @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
24110 | ||
24111 | BUG_ON(tag >= pool->nr_tags); | |
24112 | ||
24113 | - local_irq_save(flags); | |
24114 | + local_lock_irqsave(irq_off_lock, flags); | |
24115 | tags = this_cpu_ptr(pool->tag_cpu); | |
24116 | ||
24117 | spin_lock(&tags->lock); | |
24118 | @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
24119 | spin_unlock(&pool->lock); | |
24120 | } | |
24121 | ||
24122 | - local_irq_restore(flags); | |
24123 | + local_unlock_irqrestore(irq_off_lock, flags); | |
24124 | } | |
24125 | EXPORT_SYMBOL_GPL(percpu_ida_free); | |
24126 | ||
24127 | @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
24128 | struct percpu_ida_cpu *remote; | |
24129 | unsigned cpu, i, err = 0; | |
24130 | ||
24131 | - local_irq_save(flags); | |
24132 | + local_lock_irqsave(irq_off_lock, flags); | |
24133 | for_each_possible_cpu(cpu) { | |
24134 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | |
24135 | spin_lock(&remote->lock); | |
24136 | @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
24137 | } | |
24138 | spin_unlock(&pool->lock); | |
24139 | out: | |
24140 | - local_irq_restore(flags); | |
24141 | + local_unlock_irqrestore(irq_off_lock, flags); | |
24142 | return err; | |
24143 | } | |
24144 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | |
24145 | diff --git a/lib/radix-tree.c b/lib/radix-tree.c | |
1f39f580 | 24146 | index 8e6d552c40dd..741da5a77fd5 100644 |
1a6e0f06 JK |
24147 | --- a/lib/radix-tree.c |
24148 | +++ b/lib/radix-tree.c | |
1f39f580 JK |
24149 | @@ -36,7 +36,7 @@ |
24150 | #include <linux/bitops.h> | |
24151 | #include <linux/rcupdate.h> | |
24152 | #include <linux/preempt.h> /* in_interrupt() */ | |
24153 | - | |
24154 | +#include <linux/locallock.h> | |
24155 | ||
24156 | /* Number of nodes in fully populated tree of given height */ | |
24157 | static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; | |
24158 | @@ -68,6 +68,7 @@ struct radix_tree_preload { | |
24159 | struct radix_tree_node *nodes; | |
24160 | }; | |
24161 | static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; | |
24162 | +static DEFINE_LOCAL_IRQ_LOCK(radix_tree_preloads_lock); | |
24163 | ||
24164 | static inline void *node_to_entry(void *ptr) | |
24165 | { | |
24166 | @@ -290,13 +291,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |
1a6e0f06 JK |
24167 | * succeed in getting a node here (and never reach |
24168 | * kmem_cache_alloc) | |
24169 | */ | |
24170 | - rtp = this_cpu_ptr(&radix_tree_preloads); | |
1f39f580 | 24171 | + rtp = &get_locked_var(radix_tree_preloads_lock, radix_tree_preloads); |
1a6e0f06 JK |
24172 | if (rtp->nr) { |
24173 | ret = rtp->nodes; | |
24174 | rtp->nodes = ret->private_data; | |
24175 | ret->private_data = NULL; | |
24176 | rtp->nr--; | |
24177 | } | |
1f39f580 | 24178 | + put_locked_var(radix_tree_preloads_lock, radix_tree_preloads); |
1a6e0f06 JK |
24179 | /* |
24180 | * Update the allocation stack trace as this is more useful | |
24181 | * for debugging. | |
1f39f580 JK |
24182 | @@ -357,14 +359,14 @@ static int __radix_tree_preload(gfp_t gfp_mask, int nr) |
24183 | */ | |
24184 | gfp_mask &= ~__GFP_ACCOUNT; | |
24185 | ||
24186 | - preempt_disable(); | |
24187 | + local_lock(radix_tree_preloads_lock); | |
24188 | rtp = this_cpu_ptr(&radix_tree_preloads); | |
24189 | while (rtp->nr < nr) { | |
24190 | - preempt_enable(); | |
24191 | + local_unlock(radix_tree_preloads_lock); | |
24192 | node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); | |
24193 | if (node == NULL) | |
24194 | goto out; | |
24195 | - preempt_disable(); | |
24196 | + local_lock(radix_tree_preloads_lock); | |
24197 | rtp = this_cpu_ptr(&radix_tree_preloads); | |
24198 | if (rtp->nr < nr) { | |
24199 | node->private_data = rtp->nodes; | |
24200 | @@ -406,7 +408,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) | |
24201 | if (gfpflags_allow_blocking(gfp_mask)) | |
24202 | return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE); | |
24203 | /* Preloading doesn't help anything with this gfp mask, skip it */ | |
24204 | - preempt_disable(); | |
24205 | + local_lock(radix_tree_preloads_lock); | |
24206 | return 0; | |
1a6e0f06 | 24207 | } |
1f39f580 JK |
24208 | EXPORT_SYMBOL(radix_tree_maybe_preload); |
24209 | @@ -422,7 +424,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |
1a6e0f06 | 24210 | |
1f39f580 JK |
24211 | /* Preloading doesn't help anything with this gfp mask, skip it */ |
24212 | if (!gfpflags_allow_blocking(gfp_mask)) { | |
24213 | - preempt_disable(); | |
24214 | + local_lock(radix_tree_preloads_lock); | |
24215 | return 0; | |
24216 | } | |
1a6e0f06 | 24217 | |
1f39f580 | 24218 | @@ -456,6 +458,12 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) |
1a6e0f06 JK |
24219 | return __radix_tree_preload(gfp_mask, nr_nodes); |
24220 | } | |
1a6e0f06 | 24221 | |
1f39f580 JK |
24222 | +void radix_tree_preload_end(void) |
24223 | +{ | |
24224 | + local_unlock(radix_tree_preloads_lock); | |
24225 | +} | |
24226 | +EXPORT_SYMBOL(radix_tree_preload_end); | |
24227 | + | |
1a6e0f06 JK |
24228 | /* |
24229 | * The maximum index which can be stored in a radix tree | |
1f39f580 | 24230 | */ |
1a6e0f06 JK |
24231 | diff --git a/lib/scatterlist.c b/lib/scatterlist.c |
24232 | index 004fc70fc56a..ccc46992a517 100644 | |
24233 | --- a/lib/scatterlist.c | |
24234 | +++ b/lib/scatterlist.c | |
24235 | @@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) | |
24236 | flush_kernel_dcache_page(miter->page); | |
24237 | ||
24238 | if (miter->__flags & SG_MITER_ATOMIC) { | |
24239 | - WARN_ON_ONCE(preemptible()); | |
24240 | + WARN_ON_ONCE(!pagefault_disabled()); | |
24241 | kunmap_atomic(miter->addr); | |
24242 | } else | |
24243 | kunmap(miter->page); | |
24244 | @@ -664,7 +664,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
24245 | if (!sg_miter_skip(&miter, skip)) | |
24246 | return false; | |
24247 | ||
24248 | - local_irq_save(flags); | |
24249 | + local_irq_save_nort(flags); | |
24250 | ||
24251 | while (sg_miter_next(&miter) && offset < buflen) { | |
24252 | unsigned int len; | |
24253 | @@ -681,7 +681,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
24254 | ||
24255 | sg_miter_stop(&miter); | |
24256 | ||
24257 | - local_irq_restore(flags); | |
24258 | + local_irq_restore_nort(flags); | |
24259 | return offset; | |
24260 | } | |
24261 | EXPORT_SYMBOL(sg_copy_buffer); | |
24262 | diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c | |
24263 | index 1afec32de6f2..11fa431046a8 100644 | |
24264 | --- a/lib/smp_processor_id.c | |
24265 | +++ b/lib/smp_processor_id.c | |
24266 | @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1, | |
24267 | if (!printk_ratelimit()) | |
24268 | goto out_enable; | |
24269 | ||
24270 | - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", | |
24271 | - what1, what2, preempt_count() - 1, current->comm, current->pid); | |
24272 | + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n", | |
24273 | + what1, what2, preempt_count() - 1, __migrate_disabled(current), | |
24274 | + current->comm, current->pid); | |
24275 | ||
24276 | print_symbol("caller is %s\n", (long)__builtin_return_address(0)); | |
24277 | dump_stack(); | |
24278 | diff --git a/localversion-rt b/localversion-rt | |
24279 | new file mode 100644 | |
7c18450a | 24280 | index 000000000000..9e7cd66d9f44 |
1a6e0f06 JK |
24281 | --- /dev/null |
24282 | +++ b/localversion-rt | |
24283 | @@ -0,0 +1 @@ | |
7c18450a | 24284 | +-rt18 |
1a6e0f06 | 24285 | diff --git a/mm/Kconfig b/mm/Kconfig |
c7c16703 | 24286 | index 86e3e0e74d20..77e5862a1ed2 100644 |
1a6e0f06 JK |
24287 | --- a/mm/Kconfig |
24288 | +++ b/mm/Kconfig | |
24289 | @@ -410,7 +410,7 @@ config NOMMU_INITIAL_TRIM_EXCESS | |
24290 | ||
24291 | config TRANSPARENT_HUGEPAGE | |
24292 | bool "Transparent Hugepage Support" | |
24293 | - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE | |
24294 | + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL | |
24295 | select COMPACTION | |
24296 | select RADIX_TREE_MULTIORDER | |
24297 | help | |
24298 | diff --git a/mm/backing-dev.c b/mm/backing-dev.c | |
5c015b7c | 24299 | index 6ff2d7744223..b5a91dd53b5f 100644 |
1a6e0f06 JK |
24300 | --- a/mm/backing-dev.c |
24301 | +++ b/mm/backing-dev.c | |
24302 | @@ -457,9 +457,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested) | |
24303 | { | |
24304 | unsigned long flags; | |
24305 | ||
24306 | - local_irq_save(flags); | |
24307 | + local_irq_save_nort(flags); | |
24308 | if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { | |
24309 | - local_irq_restore(flags); | |
24310 | + local_irq_restore_nort(flags); | |
24311 | return; | |
24312 | } | |
24313 | ||
24314 | diff --git a/mm/compaction.c b/mm/compaction.c | |
c7c16703 | 24315 | index 70e6bec46dc2..6678ed58b7c6 100644 |
1a6e0f06 JK |
24316 | --- a/mm/compaction.c |
24317 | +++ b/mm/compaction.c | |
c7c16703 | 24318 | @@ -1593,10 +1593,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro |
1a6e0f06 JK |
24319 | block_start_pfn(cc->migrate_pfn, cc->order); |
24320 | ||
24321 | if (cc->last_migrated_pfn < current_block_start) { | |
24322 | - cpu = get_cpu(); | |
24323 | + cpu = get_cpu_light(); | |
24324 | + local_lock_irq(swapvec_lock); | |
24325 | lru_add_drain_cpu(cpu); | |
24326 | + local_unlock_irq(swapvec_lock); | |
24327 | drain_local_pages(zone); | |
24328 | - put_cpu(); | |
24329 | + put_cpu_light(); | |
24330 | /* No more flushing until we migrate again */ | |
24331 | cc->last_migrated_pfn = 0; | |
24332 | } | |
24333 | diff --git a/mm/filemap.c b/mm/filemap.c | |
33c7bf0f | 24334 | index edfb90e3830c..a8d2c7a73d54 100644 |
1a6e0f06 JK |
24335 | --- a/mm/filemap.c |
24336 | +++ b/mm/filemap.c | |
24337 | @@ -159,9 +159,12 @@ static int page_cache_tree_insert(struct address_space *mapping, | |
24338 | * node->private_list is protected by | |
24339 | * mapping->tree_lock. | |
24340 | */ | |
24341 | - if (!list_empty(&node->private_list)) | |
24342 | - list_lru_del(&workingset_shadow_nodes, | |
24343 | + if (!list_empty(&node->private_list)) { | |
24344 | + local_lock(workingset_shadow_lock); | |
24345 | + list_lru_del(&__workingset_shadow_nodes, | |
24346 | &node->private_list); | |
24347 | + local_unlock(workingset_shadow_lock); | |
24348 | + } | |
24349 | } | |
24350 | return 0; | |
24351 | } | |
24352 | @@ -217,8 +220,10 @@ static void page_cache_tree_delete(struct address_space *mapping, | |
24353 | if (!dax_mapping(mapping) && !workingset_node_pages(node) && | |
24354 | list_empty(&node->private_list)) { | |
24355 | node->private_data = mapping; | |
24356 | - list_lru_add(&workingset_shadow_nodes, | |
24357 | - &node->private_list); | |
24358 | + local_lock(workingset_shadow_lock); | |
24359 | + list_lru_add(&__workingset_shadow_nodes, | |
24360 | + &node->private_list); | |
24361 | + local_unlock(workingset_shadow_lock); | |
24362 | } | |
24363 | } | |
24364 | ||
24365 | diff --git a/mm/highmem.c b/mm/highmem.c | |
24366 | index 50b4ca6787f0..77518a3b35a1 100644 | |
24367 | --- a/mm/highmem.c | |
24368 | +++ b/mm/highmem.c | |
24369 | @@ -29,10 +29,11 @@ | |
24370 | #include <linux/kgdb.h> | |
24371 | #include <asm/tlbflush.h> | |
24372 | ||
24373 | - | |
24374 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
24375 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
24376 | DEFINE_PER_CPU(int, __kmap_atomic_idx); | |
24377 | #endif | |
24378 | +#endif | |
24379 | ||
24380 | /* | |
24381 | * Virtual_count is not a pure "count". | |
24382 | @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) | |
24383 | unsigned long totalhigh_pages __read_mostly; | |
24384 | EXPORT_SYMBOL(totalhigh_pages); | |
24385 | ||
24386 | - | |
24387 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
24388 | EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); | |
24389 | +#endif | |
24390 | ||
24391 | unsigned int nr_free_highpages (void) | |
24392 | { | |
24393 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c | |
7c18450a | 24394 | index 47559cc0cdcc..1f2ebc924916 100644 |
1a6e0f06 JK |
24395 | --- a/mm/memcontrol.c |
24396 | +++ b/mm/memcontrol.c | |
24397 | @@ -67,6 +67,7 @@ | |
24398 | #include <net/sock.h> | |
24399 | #include <net/ip.h> | |
24400 | #include "slab.h" | |
24401 | +#include <linux/locallock.h> | |
24402 | ||
24403 | #include <asm/uaccess.h> | |
24404 | ||
24405 | @@ -92,6 +93,8 @@ int do_swap_account __read_mostly; | |
24406 | #define do_swap_account 0 | |
24407 | #endif | |
24408 | ||
24409 | +static DEFINE_LOCAL_IRQ_LOCK(event_lock); | |
24410 | + | |
24411 | /* Whether legacy memory+swap accounting is active */ | |
24412 | static bool do_memsw_account(void) | |
24413 | { | |
1f39f580 | 24414 | @@ -1692,6 +1695,7 @@ struct memcg_stock_pcp { |
1a6e0f06 JK |
24415 | #define FLUSHING_CACHED_CHARGE 0 |
24416 | }; | |
24417 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); | |
24418 | +static DEFINE_LOCAL_IRQ_LOCK(memcg_stock_ll); | |
24419 | static DEFINE_MUTEX(percpu_charge_mutex); | |
24420 | ||
24421 | /** | |
1f39f580 | 24422 | @@ -1714,7 +1718,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
24423 | if (nr_pages > CHARGE_BATCH) |
24424 | return ret; | |
24425 | ||
24426 | - local_irq_save(flags); | |
24427 | + local_lock_irqsave(memcg_stock_ll, flags); | |
24428 | ||
24429 | stock = this_cpu_ptr(&memcg_stock); | |
24430 | if (memcg == stock->cached && stock->nr_pages >= nr_pages) { | |
1f39f580 | 24431 | @@ -1722,7 +1726,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
24432 | ret = true; |
24433 | } | |
24434 | ||
24435 | - local_irq_restore(flags); | |
24436 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
24437 | ||
24438 | return ret; | |
24439 | } | |
1f39f580 | 24440 | @@ -1749,13 +1753,13 @@ static void drain_local_stock(struct work_struct *dummy) |
1a6e0f06 JK |
24441 | struct memcg_stock_pcp *stock; |
24442 | unsigned long flags; | |
24443 | ||
24444 | - local_irq_save(flags); | |
24445 | + local_lock_irqsave(memcg_stock_ll, flags); | |
24446 | ||
24447 | stock = this_cpu_ptr(&memcg_stock); | |
24448 | drain_stock(stock); | |
24449 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); | |
24450 | ||
24451 | - local_irq_restore(flags); | |
24452 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
24453 | } | |
24454 | ||
24455 | /* | |
1f39f580 | 24456 | @@ -1767,7 +1771,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
24457 | struct memcg_stock_pcp *stock; |
24458 | unsigned long flags; | |
24459 | ||
24460 | - local_irq_save(flags); | |
24461 | + local_lock_irqsave(memcg_stock_ll, flags); | |
24462 | ||
24463 | stock = this_cpu_ptr(&memcg_stock); | |
24464 | if (stock->cached != memcg) { /* reset if necessary */ | |
1f39f580 | 24465 | @@ -1776,7 +1780,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
1a6e0f06 JK |
24466 | } |
24467 | stock->nr_pages += nr_pages; | |
24468 | ||
24469 | - local_irq_restore(flags); | |
24470 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
24471 | } | |
24472 | ||
24473 | /* | |
1f39f580 | 24474 | @@ -1792,7 +1796,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) |
1a6e0f06 JK |
24475 | return; |
24476 | /* Notify other cpus that system-wide "drain" is running */ | |
24477 | get_online_cpus(); | |
24478 | - curcpu = get_cpu(); | |
24479 | + curcpu = get_cpu_light(); | |
24480 | for_each_online_cpu(cpu) { | |
24481 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | |
24482 | struct mem_cgroup *memcg; | |
1f39f580 | 24483 | @@ -1809,7 +1813,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) |
1a6e0f06 JK |
24484 | schedule_work_on(cpu, &stock->work); |
24485 | } | |
24486 | } | |
24487 | - put_cpu(); | |
24488 | + put_cpu_light(); | |
24489 | put_online_cpus(); | |
24490 | mutex_unlock(&percpu_charge_mutex); | |
24491 | } | |
7c18450a | 24492 | @@ -4555,12 +4559,12 @@ static int mem_cgroup_move_account(struct page *page, |
1a6e0f06 JK |
24493 | |
24494 | ret = 0; | |
24495 | ||
24496 | - local_irq_disable(); | |
24497 | + local_lock_irq(event_lock); | |
24498 | mem_cgroup_charge_statistics(to, page, compound, nr_pages); | |
24499 | memcg_check_events(to, page); | |
24500 | mem_cgroup_charge_statistics(from, page, compound, -nr_pages); | |
24501 | memcg_check_events(from, page); | |
24502 | - local_irq_enable(); | |
24503 | + local_unlock_irq(event_lock); | |
24504 | out_unlock: | |
24505 | unlock_page(page); | |
24506 | out: | |
7c18450a | 24507 | @@ -5435,10 +5439,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, |
1a6e0f06 JK |
24508 | |
24509 | commit_charge(page, memcg, lrucare); | |
24510 | ||
24511 | - local_irq_disable(); | |
24512 | + local_lock_irq(event_lock); | |
24513 | mem_cgroup_charge_statistics(memcg, page, compound, nr_pages); | |
24514 | memcg_check_events(memcg, page); | |
24515 | - local_irq_enable(); | |
24516 | + local_unlock_irq(event_lock); | |
24517 | ||
24518 | if (do_memsw_account() && PageSwapCache(page)) { | |
24519 | swp_entry_t entry = { .val = page_private(page) }; | |
7c18450a | 24520 | @@ -5494,14 +5498,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, |
1a6e0f06 JK |
24521 | memcg_oom_recover(memcg); |
24522 | } | |
24523 | ||
24524 | - local_irq_save(flags); | |
24525 | + local_lock_irqsave(event_lock, flags); | |
24526 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); | |
24527 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); | |
24528 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); | |
24529 | __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); | |
24530 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); | |
24531 | memcg_check_events(memcg, dummy_page); | |
24532 | - local_irq_restore(flags); | |
24533 | + local_unlock_irqrestore(event_lock, flags); | |
24534 | ||
24535 | if (!mem_cgroup_is_root(memcg)) | |
24536 | css_put_many(&memcg->css, nr_pages); | |
7c18450a | 24537 | @@ -5656,10 +5660,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) |
1a6e0f06 JK |
24538 | |
24539 | commit_charge(newpage, memcg, false); | |
24540 | ||
24541 | - local_irq_save(flags); | |
24542 | + local_lock_irqsave(event_lock, flags); | |
24543 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); | |
24544 | memcg_check_events(memcg, newpage); | |
24545 | - local_irq_restore(flags); | |
24546 | + local_unlock_irqrestore(event_lock, flags); | |
24547 | } | |
24548 | ||
24549 | DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); | |
7c18450a | 24550 | @@ -5850,6 +5854,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) |
1a6e0f06 JK |
24551 | { |
24552 | struct mem_cgroup *memcg, *swap_memcg; | |
24553 | unsigned short oldid; | |
24554 | + unsigned long flags; | |
24555 | ||
24556 | VM_BUG_ON_PAGE(PageLRU(page), page); | |
24557 | VM_BUG_ON_PAGE(page_count(page), page); | |
7c18450a | 24558 | @@ -5890,12 +5895,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) |
1a6e0f06 JK |
24559 | * important here to have the interrupts disabled because it is the |
24560 | * only synchronisation we have for udpating the per-CPU variables. | |
24561 | */ | |
24562 | + local_lock_irqsave(event_lock, flags); | |
24563 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
24564 | VM_BUG_ON(!irqs_disabled()); | |
24565 | +#endif | |
24566 | mem_cgroup_charge_statistics(memcg, page, false, -1); | |
24567 | memcg_check_events(memcg, page); | |
24568 | ||
24569 | if (!mem_cgroup_is_root(memcg)) | |
24570 | css_put(&memcg->css); | |
24571 | + local_unlock_irqrestore(event_lock, flags); | |
24572 | } | |
24573 | ||
24574 | /* | |
24575 | diff --git a/mm/mmu_context.c b/mm/mmu_context.c | |
24576 | index 6f4d27c5bb32..5cd25c745a8f 100644 | |
24577 | --- a/mm/mmu_context.c | |
24578 | +++ b/mm/mmu_context.c | |
24579 | @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm) | |
24580 | struct task_struct *tsk = current; | |
24581 | ||
24582 | task_lock(tsk); | |
24583 | + preempt_disable_rt(); | |
24584 | active_mm = tsk->active_mm; | |
24585 | if (active_mm != mm) { | |
24586 | atomic_inc(&mm->mm_count); | |
24587 | @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm) | |
24588 | } | |
24589 | tsk->mm = mm; | |
24590 | switch_mm(active_mm, mm, tsk); | |
24591 | + preempt_enable_rt(); | |
24592 | task_unlock(tsk); | |
24593 | #ifdef finish_arch_post_lock_switch | |
24594 | finish_arch_post_lock_switch(); | |
24595 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
7c18450a | 24596 | index e5b159b88e39..b9946dcb1099 100644 |
1a6e0f06 JK |
24597 | --- a/mm/page_alloc.c |
24598 | +++ b/mm/page_alloc.c | |
24599 | @@ -61,6 +61,7 @@ | |
24600 | #include <linux/page_ext.h> | |
24601 | #include <linux/hugetlb.h> | |
24602 | #include <linux/sched/rt.h> | |
24603 | +#include <linux/locallock.h> | |
24604 | #include <linux/page_owner.h> | |
24605 | #include <linux/kthread.h> | |
24606 | #include <linux/memcontrol.h> | |
c7c16703 | 24607 | @@ -281,6 +282,18 @@ EXPORT_SYMBOL(nr_node_ids); |
1a6e0f06 JK |
24608 | EXPORT_SYMBOL(nr_online_nodes); |
24609 | #endif | |
24610 | ||
24611 | +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); | |
24612 | + | |
24613 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24614 | +# define cpu_lock_irqsave(cpu, flags) \ | |
24615 | + local_lock_irqsave_on(pa_lock, flags, cpu) | |
24616 | +# define cpu_unlock_irqrestore(cpu, flags) \ | |
24617 | + local_unlock_irqrestore_on(pa_lock, flags, cpu) | |
24618 | +#else | |
24619 | +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) | |
24620 | +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) | |
24621 | +#endif | |
24622 | + | |
24623 | int page_group_by_mobility_disabled __read_mostly; | |
24624 | ||
24625 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | |
c7c16703 | 24626 | @@ -1072,7 +1085,7 @@ static bool bulkfree_pcp_prepare(struct page *page) |
1a6e0f06 JK |
24627 | #endif /* CONFIG_DEBUG_VM */ |
24628 | ||
24629 | /* | |
24630 | - * Frees a number of pages from the PCP lists | |
24631 | + * Frees a number of pages which have been collected from the pcp lists. | |
24632 | * Assumes all pages on list are in same zone, and of same order. | |
24633 | * count is the number of pages to free. | |
24634 | * | |
c7c16703 | 24635 | @@ -1083,19 +1096,58 @@ static bool bulkfree_pcp_prepare(struct page *page) |
1a6e0f06 JK |
24636 | * pinned" detection logic. |
24637 | */ | |
24638 | static void free_pcppages_bulk(struct zone *zone, int count, | |
24639 | - struct per_cpu_pages *pcp) | |
24640 | + struct list_head *list) | |
24641 | { | |
24642 | - int migratetype = 0; | |
24643 | - int batch_free = 0; | |
24644 | unsigned long nr_scanned; | |
24645 | bool isolated_pageblocks; | |
24646 | + unsigned long flags; | |
24647 | + | |
24648 | + spin_lock_irqsave(&zone->lock, flags); | |
24649 | ||
24650 | - spin_lock(&zone->lock); | |
24651 | isolated_pageblocks = has_isolate_pageblock(zone); | |
24652 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
24653 | if (nr_scanned) | |
24654 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
24655 | ||
24656 | + while (!list_empty(list)) { | |
24657 | + struct page *page; | |
24658 | + int mt; /* migratetype of the to-be-freed page */ | |
24659 | + | |
24660 | + page = list_first_entry(list, struct page, lru); | |
24661 | + /* must delete as __free_one_page list manipulates */ | |
24662 | + list_del(&page->lru); | |
24663 | + | |
24664 | + mt = get_pcppage_migratetype(page); | |
24665 | + /* MIGRATE_ISOLATE page should not go to pcplists */ | |
24666 | + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
24667 | + /* Pageblock could have been isolated meanwhile */ | |
24668 | + if (unlikely(isolated_pageblocks)) | |
24669 | + mt = get_pageblock_migratetype(page); | |
24670 | + | |
24671 | + if (bulkfree_pcp_prepare(page)) | |
24672 | + continue; | |
24673 | + | |
24674 | + __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
24675 | + trace_mm_page_pcpu_drain(page, 0, mt); | |
24676 | + count--; | |
24677 | + } | |
24678 | + WARN_ON(count != 0); | |
24679 | + spin_unlock_irqrestore(&zone->lock, flags); | |
24680 | +} | |
24681 | + | |
24682 | +/* | |
24683 | + * Moves a number of pages from the PCP lists to free list which | |
24684 | + * is freed outside of the locked region. | |
24685 | + * | |
24686 | + * Assumes all pages on list are in same zone, and of same order. | |
24687 | + * count is the number of pages to free. | |
24688 | + */ | |
24689 | +static void isolate_pcp_pages(int count, struct per_cpu_pages *src, | |
24690 | + struct list_head *dst) | |
24691 | +{ | |
24692 | + int migratetype = 0; | |
24693 | + int batch_free = 0; | |
24694 | + | |
24695 | while (count) { | |
24696 | struct page *page; | |
24697 | struct list_head *list; | |
c7c16703 | 24698 | @@ -1111,7 +1163,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, |
1a6e0f06 JK |
24699 | batch_free++; |
24700 | if (++migratetype == MIGRATE_PCPTYPES) | |
24701 | migratetype = 0; | |
24702 | - list = &pcp->lists[migratetype]; | |
24703 | + list = &src->lists[migratetype]; | |
24704 | } while (list_empty(list)); | |
24705 | ||
24706 | /* This is the only non-empty list. Free them all. */ | |
c7c16703 | 24707 | @@ -1119,27 +1171,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, |
1a6e0f06 JK |
24708 | batch_free = count; |
24709 | ||
24710 | do { | |
24711 | - int mt; /* migratetype of the to-be-freed page */ | |
24712 | - | |
24713 | page = list_last_entry(list, struct page, lru); | |
24714 | - /* must delete as __free_one_page list manipulates */ | |
24715 | list_del(&page->lru); | |
24716 | ||
24717 | - mt = get_pcppage_migratetype(page); | |
24718 | - /* MIGRATE_ISOLATE page should not go to pcplists */ | |
24719 | - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
24720 | - /* Pageblock could have been isolated meanwhile */ | |
24721 | - if (unlikely(isolated_pageblocks)) | |
24722 | - mt = get_pageblock_migratetype(page); | |
24723 | - | |
24724 | - if (bulkfree_pcp_prepare(page)) | |
24725 | - continue; | |
24726 | - | |
24727 | - __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
24728 | - trace_mm_page_pcpu_drain(page, 0, mt); | |
24729 | + list_add(&page->lru, dst); | |
24730 | } while (--count && --batch_free && !list_empty(list)); | |
24731 | } | |
24732 | - spin_unlock(&zone->lock); | |
24733 | } | |
24734 | ||
24735 | static void free_one_page(struct zone *zone, | |
c7c16703 | 24736 | @@ -1148,7 +1185,9 @@ static void free_one_page(struct zone *zone, |
1a6e0f06 JK |
24737 | int migratetype) |
24738 | { | |
24739 | unsigned long nr_scanned; | |
24740 | - spin_lock(&zone->lock); | |
24741 | + unsigned long flags; | |
24742 | + | |
24743 | + spin_lock_irqsave(&zone->lock, flags); | |
24744 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
24745 | if (nr_scanned) | |
24746 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
c7c16703 | 24747 | @@ -1158,7 +1197,7 @@ static void free_one_page(struct zone *zone, |
1a6e0f06 JK |
24748 | migratetype = get_pfnblock_migratetype(page, pfn); |
24749 | } | |
24750 | __free_one_page(page, pfn, zone, order, migratetype); | |
24751 | - spin_unlock(&zone->lock); | |
24752 | + spin_unlock_irqrestore(&zone->lock, flags); | |
24753 | } | |
24754 | ||
24755 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |
c7c16703 | 24756 | @@ -1244,10 +1283,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) |
1a6e0f06 JK |
24757 | return; |
24758 | ||
24759 | migratetype = get_pfnblock_migratetype(page, pfn); | |
24760 | - local_irq_save(flags); | |
24761 | + local_lock_irqsave(pa_lock, flags); | |
24762 | __count_vm_events(PGFREE, 1 << order); | |
24763 | free_one_page(page_zone(page), page, pfn, order, migratetype); | |
24764 | - local_irq_restore(flags); | |
24765 | + local_unlock_irqrestore(pa_lock, flags); | |
24766 | } | |
24767 | ||
24768 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) | |
c7c16703 | 24769 | @@ -2246,16 +2285,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, |
1a6e0f06 JK |
24770 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) |
24771 | { | |
24772 | unsigned long flags; | |
24773 | + LIST_HEAD(dst); | |
24774 | int to_drain, batch; | |
24775 | ||
24776 | - local_irq_save(flags); | |
24777 | + local_lock_irqsave(pa_lock, flags); | |
24778 | batch = READ_ONCE(pcp->batch); | |
24779 | to_drain = min(pcp->count, batch); | |
24780 | if (to_drain > 0) { | |
24781 | - free_pcppages_bulk(zone, to_drain, pcp); | |
24782 | + isolate_pcp_pages(to_drain, pcp, &dst); | |
24783 | pcp->count -= to_drain; | |
24784 | } | |
24785 | - local_irq_restore(flags); | |
24786 | + local_unlock_irqrestore(pa_lock, flags); | |
24787 | + free_pcppages_bulk(zone, to_drain, &dst); | |
24788 | } | |
24789 | #endif | |
24790 | ||
c7c16703 | 24791 | @@ -2271,16 +2312,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) |
1a6e0f06 JK |
24792 | unsigned long flags; |
24793 | struct per_cpu_pageset *pset; | |
24794 | struct per_cpu_pages *pcp; | |
24795 | + LIST_HEAD(dst); | |
24796 | + int count; | |
24797 | ||
24798 | - local_irq_save(flags); | |
24799 | + cpu_lock_irqsave(cpu, flags); | |
24800 | pset = per_cpu_ptr(zone->pageset, cpu); | |
24801 | ||
24802 | pcp = &pset->pcp; | |
24803 | - if (pcp->count) { | |
24804 | - free_pcppages_bulk(zone, pcp->count, pcp); | |
24805 | + count = pcp->count; | |
24806 | + if (count) { | |
24807 | + isolate_pcp_pages(count, pcp, &dst); | |
24808 | pcp->count = 0; | |
24809 | } | |
24810 | - local_irq_restore(flags); | |
24811 | + cpu_unlock_irqrestore(cpu, flags); | |
24812 | + if (count) | |
24813 | + free_pcppages_bulk(zone, count, &dst); | |
24814 | } | |
24815 | ||
24816 | /* | |
c7c16703 | 24817 | @@ -2366,8 +2412,17 @@ void drain_all_pages(struct zone *zone) |
1a6e0f06 JK |
24818 | else |
24819 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | |
24820 | } | |
24821 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
24822 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, | |
24823 | zone, 1); | |
24824 | +#else | |
24825 | + for_each_cpu(cpu, &cpus_with_pcps) { | |
24826 | + if (zone) | |
24827 | + drain_pages_zone(cpu, zone); | |
24828 | + else | |
24829 | + drain_pages(cpu); | |
24830 | + } | |
24831 | +#endif | |
24832 | } | |
24833 | ||
24834 | #ifdef CONFIG_HIBERNATION | |
c7c16703 | 24835 | @@ -2427,7 +2482,7 @@ void free_hot_cold_page(struct page *page, bool cold) |
1a6e0f06 JK |
24836 | |
24837 | migratetype = get_pfnblock_migratetype(page, pfn); | |
24838 | set_pcppage_migratetype(page, migratetype); | |
24839 | - local_irq_save(flags); | |
24840 | + local_lock_irqsave(pa_lock, flags); | |
24841 | __count_vm_event(PGFREE); | |
24842 | ||
24843 | /* | |
c7c16703 | 24844 | @@ -2453,12 +2508,17 @@ void free_hot_cold_page(struct page *page, bool cold) |
1a6e0f06 JK |
24845 | pcp->count++; |
24846 | if (pcp->count >= pcp->high) { | |
24847 | unsigned long batch = READ_ONCE(pcp->batch); | |
24848 | - free_pcppages_bulk(zone, batch, pcp); | |
24849 | + LIST_HEAD(dst); | |
24850 | + | |
24851 | + isolate_pcp_pages(batch, pcp, &dst); | |
24852 | pcp->count -= batch; | |
24853 | + local_unlock_irqrestore(pa_lock, flags); | |
24854 | + free_pcppages_bulk(zone, batch, &dst); | |
24855 | + return; | |
24856 | } | |
24857 | ||
24858 | out: | |
24859 | - local_irq_restore(flags); | |
24860 | + local_unlock_irqrestore(pa_lock, flags); | |
24861 | } | |
24862 | ||
24863 | /* | |
c7c16703 | 24864 | @@ -2600,7 +2660,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, |
1a6e0f06 JK |
24865 | struct per_cpu_pages *pcp; |
24866 | struct list_head *list; | |
24867 | ||
24868 | - local_irq_save(flags); | |
24869 | + local_lock_irqsave(pa_lock, flags); | |
24870 | do { | |
24871 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | |
24872 | list = &pcp->lists[migratetype]; | |
c7c16703 | 24873 | @@ -2627,7 +2687,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, |
1a6e0f06 JK |
24874 | * allocate greater than order-1 page units with __GFP_NOFAIL. |
24875 | */ | |
24876 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); | |
24877 | - spin_lock_irqsave(&zone->lock, flags); | |
24878 | + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); | |
24879 | ||
24880 | do { | |
24881 | page = NULL; | |
c7c16703 | 24882 | @@ -2639,22 +2699,24 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, |
1a6e0f06 JK |
24883 | if (!page) |
24884 | page = __rmqueue(zone, order, migratetype); | |
24885 | } while (page && check_new_pages(page, order)); | |
24886 | - spin_unlock(&zone->lock); | |
24887 | - if (!page) | |
24888 | + if (!page) { | |
24889 | + spin_unlock(&zone->lock); | |
24890 | goto failed; | |
24891 | + } | |
24892 | __mod_zone_freepage_state(zone, -(1 << order), | |
24893 | get_pcppage_migratetype(page)); | |
24894 | + spin_unlock(&zone->lock); | |
24895 | } | |
24896 | ||
24897 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | |
24898 | zone_statistics(preferred_zone, zone, gfp_flags); | |
24899 | - local_irq_restore(flags); | |
24900 | + local_unlock_irqrestore(pa_lock, flags); | |
24901 | ||
24902 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | |
24903 | return page; | |
24904 | ||
24905 | failed: | |
24906 | - local_irq_restore(flags); | |
24907 | + local_unlock_irqrestore(pa_lock, flags); | |
24908 | return NULL; | |
24909 | } | |
24910 | ||
5c015b7c | 24911 | @@ -6531,7 +6593,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self, |
1a6e0f06 JK |
24912 | int cpu = (unsigned long)hcpu; |
24913 | ||
24914 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
24915 | + local_lock_irq_on(swapvec_lock, cpu); | |
24916 | lru_add_drain_cpu(cpu); | |
24917 | + local_unlock_irq_on(swapvec_lock, cpu); | |
24918 | drain_pages(cpu); | |
24919 | ||
24920 | /* | |
5c015b7c | 24921 | @@ -6557,6 +6621,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, |
1a6e0f06 JK |
24922 | void __init page_alloc_init(void) |
24923 | { | |
24924 | hotcpu_notifier(page_alloc_cpu_notify, 0); | |
24925 | + local_irq_lock_init(pa_lock); | |
24926 | } | |
24927 | ||
24928 | /* | |
5c015b7c | 24929 | @@ -7385,7 +7450,7 @@ void zone_pcp_reset(struct zone *zone) |
1a6e0f06 JK |
24930 | struct per_cpu_pageset *pset; |
24931 | ||
24932 | /* avoid races with drain_pages() */ | |
24933 | - local_irq_save(flags); | |
24934 | + local_lock_irqsave(pa_lock, flags); | |
24935 | if (zone->pageset != &boot_pageset) { | |
24936 | for_each_online_cpu(cpu) { | |
24937 | pset = per_cpu_ptr(zone->pageset, cpu); | |
5c015b7c | 24938 | @@ -7394,7 +7459,7 @@ void zone_pcp_reset(struct zone *zone) |
1a6e0f06 JK |
24939 | free_percpu(zone->pageset); |
24940 | zone->pageset = &boot_pageset; | |
24941 | } | |
24942 | - local_irq_restore(flags); | |
24943 | + local_unlock_irqrestore(pa_lock, flags); | |
24944 | } | |
24945 | ||
24946 | #ifdef CONFIG_MEMORY_HOTREMOVE | |
5c015b7c | 24947 | diff --git a/mm/percpu.c b/mm/percpu.c |
33c7bf0f | 24948 | index f014cebbf405..4e739fcf91bf 100644 |
5c015b7c JK |
24949 | --- a/mm/percpu.c |
24950 | +++ b/mm/percpu.c | |
33c7bf0f | 24951 | @@ -1283,6 +1283,31 @@ void free_percpu(void __percpu *ptr) |
5c015b7c JK |
24952 | } |
24953 | EXPORT_SYMBOL_GPL(free_percpu); | |
24954 | ||
24955 | +bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr) | |
24956 | +{ | |
24957 | +#ifdef CONFIG_SMP | |
24958 | + const size_t static_size = __per_cpu_end - __per_cpu_start; | |
24959 | + void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); | |
24960 | + unsigned int cpu; | |
24961 | + | |
24962 | + for_each_possible_cpu(cpu) { | |
24963 | + void *start = per_cpu_ptr(base, cpu); | |
24964 | + void *va = (void *)addr; | |
24965 | + | |
24966 | + if (va >= start && va < start + static_size) { | |
33c7bf0f | 24967 | + if (can_addr) { |
5c015b7c | 24968 | + *can_addr = (unsigned long) (va - start); |
33c7bf0f JK |
24969 | + *can_addr += (unsigned long) |
24970 | + per_cpu_ptr(base, get_boot_cpu_id()); | |
24971 | + } | |
5c015b7c JK |
24972 | + return true; |
24973 | + } | |
24974 | + } | |
24975 | +#endif | |
24976 | + /* on UP, can't distinguish from other static vars, always false */ | |
24977 | + return false; | |
24978 | +} | |
24979 | + | |
24980 | /** | |
24981 | * is_kernel_percpu_address - test whether address is from static percpu area | |
24982 | * @addr: address to test | |
33c7bf0f | 24983 | @@ -1296,20 +1321,7 @@ EXPORT_SYMBOL_GPL(free_percpu); |
5c015b7c JK |
24984 | */ |
24985 | bool is_kernel_percpu_address(unsigned long addr) | |
24986 | { | |
24987 | -#ifdef CONFIG_SMP | |
24988 | - const size_t static_size = __per_cpu_end - __per_cpu_start; | |
24989 | - void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); | |
24990 | - unsigned int cpu; | |
24991 | - | |
24992 | - for_each_possible_cpu(cpu) { | |
24993 | - void *start = per_cpu_ptr(base, cpu); | |
24994 | - | |
24995 | - if ((void *)addr >= start && (void *)addr < start + static_size) | |
24996 | - return true; | |
24997 | - } | |
24998 | -#endif | |
24999 | - /* on UP, can't distinguish from other static vars, always false */ | |
25000 | - return false; | |
25001 | + return __is_kernel_percpu_address(addr, NULL); | |
25002 | } | |
25003 | ||
25004 | /** | |
1a6e0f06 | 25005 | diff --git a/mm/slab.h b/mm/slab.h |
33c7bf0f | 25006 | index ceb7d70cdb76..dfd281e43fbe 100644 |
1a6e0f06 JK |
25007 | --- a/mm/slab.h |
25008 | +++ b/mm/slab.h | |
25009 | @@ -426,7 +426,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |
25010 | * The slab lists for all objects. | |
25011 | */ | |
25012 | struct kmem_cache_node { | |
25013 | +#ifdef CONFIG_SLUB | |
25014 | + raw_spinlock_t list_lock; | |
25015 | +#else | |
25016 | spinlock_t list_lock; | |
25017 | +#endif | |
25018 | ||
25019 | #ifdef CONFIG_SLAB | |
25020 | struct list_head slabs_partial; /* partial list first, better asm code */ | |
25021 | diff --git a/mm/slub.c b/mm/slub.c | |
33c7bf0f | 25022 | index 58c7526f8de2..6d72b7f87129 100644 |
1a6e0f06 JK |
25023 | --- a/mm/slub.c |
25024 | +++ b/mm/slub.c | |
c7c16703 | 25025 | @@ -1141,7 +1141,7 @@ static noinline int free_debug_processing( |
1a6e0f06 JK |
25026 | unsigned long uninitialized_var(flags); |
25027 | int ret = 0; | |
25028 | ||
25029 | - spin_lock_irqsave(&n->list_lock, flags); | |
25030 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
25031 | slab_lock(page); | |
25032 | ||
25033 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | |
c7c16703 | 25034 | @@ -1176,7 +1176,7 @@ static noinline int free_debug_processing( |
1a6e0f06 JK |
25035 | bulk_cnt, cnt); |
25036 | ||
25037 | slab_unlock(page); | |
25038 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25039 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25040 | if (!ret) | |
25041 | slab_fix(s, "Object at 0x%p not freed", object); | |
25042 | return ret; | |
c7c16703 | 25043 | @@ -1304,6 +1304,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, |
1a6e0f06 JK |
25044 | |
25045 | #endif /* CONFIG_SLUB_DEBUG */ | |
25046 | ||
25047 | +struct slub_free_list { | |
25048 | + raw_spinlock_t lock; | |
25049 | + struct list_head list; | |
25050 | +}; | |
25051 | +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); | |
25052 | + | |
25053 | /* | |
25054 | * Hooks for other subsystems that check memory allocations. In a typical | |
25055 | * production configuration these hooks all should produce no code at all. | |
5c015b7c | 25056 | @@ -1527,10 +1533,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) |
1a6e0f06 JK |
25057 | void *start, *p; |
25058 | int idx, order; | |
25059 | bool shuffle; | |
25060 | + bool enableirqs = false; | |
25061 | ||
25062 | flags &= gfp_allowed_mask; | |
25063 | ||
25064 | if (gfpflags_allow_blocking(flags)) | |
25065 | + enableirqs = true; | |
25066 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25067 | + if (system_state == SYSTEM_RUNNING) | |
25068 | + enableirqs = true; | |
25069 | +#endif | |
25070 | + if (enableirqs) | |
25071 | local_irq_enable(); | |
25072 | ||
25073 | flags |= s->allocflags; | |
5c015b7c | 25074 | @@ -1605,7 +1618,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) |
1a6e0f06 JK |
25075 | page->frozen = 1; |
25076 | ||
25077 | out: | |
25078 | - if (gfpflags_allow_blocking(flags)) | |
25079 | + if (enableirqs) | |
25080 | local_irq_disable(); | |
25081 | if (!page) | |
25082 | return NULL; | |
5c015b7c | 25083 | @@ -1664,6 +1677,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) |
1a6e0f06 JK |
25084 | __free_pages(page, order); |
25085 | } | |
25086 | ||
25087 | +static void free_delayed(struct list_head *h) | |
25088 | +{ | |
25089 | + while(!list_empty(h)) { | |
25090 | + struct page *page = list_first_entry(h, struct page, lru); | |
25091 | + | |
25092 | + list_del(&page->lru); | |
25093 | + __free_slab(page->slab_cache, page); | |
25094 | + } | |
25095 | +} | |
25096 | + | |
25097 | #define need_reserve_slab_rcu \ | |
25098 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | |
25099 | ||
5c015b7c | 25100 | @@ -1695,6 +1718,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) |
1a6e0f06 JK |
25101 | } |
25102 | ||
25103 | call_rcu(head, rcu_free_slab); | |
25104 | + } else if (irqs_disabled()) { | |
25105 | + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); | |
25106 | + | |
25107 | + raw_spin_lock(&f->lock); | |
25108 | + list_add(&page->lru, &f->list); | |
25109 | + raw_spin_unlock(&f->lock); | |
25110 | } else | |
25111 | __free_slab(s, page); | |
25112 | } | |
5c015b7c | 25113 | @@ -1802,7 +1831,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, |
1a6e0f06 JK |
25114 | if (!n || !n->nr_partial) |
25115 | return NULL; | |
25116 | ||
25117 | - spin_lock(&n->list_lock); | |
25118 | + raw_spin_lock(&n->list_lock); | |
25119 | list_for_each_entry_safe(page, page2, &n->partial, lru) { | |
25120 | void *t; | |
25121 | ||
5c015b7c | 25122 | @@ -1827,7 +1856,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, |
1a6e0f06 JK |
25123 | break; |
25124 | ||
25125 | } | |
25126 | - spin_unlock(&n->list_lock); | |
25127 | + raw_spin_unlock(&n->list_lock); | |
25128 | return object; | |
25129 | } | |
25130 | ||
5c015b7c | 25131 | @@ -2073,7 +2102,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25132 | * that acquire_slab() will see a slab page that |
25133 | * is frozen | |
25134 | */ | |
25135 | - spin_lock(&n->list_lock); | |
25136 | + raw_spin_lock(&n->list_lock); | |
25137 | } | |
25138 | } else { | |
25139 | m = M_FULL; | |
5c015b7c | 25140 | @@ -2084,7 +2113,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25141 | * slabs from diagnostic functions will not see |
25142 | * any frozen slabs. | |
25143 | */ | |
25144 | - spin_lock(&n->list_lock); | |
25145 | + raw_spin_lock(&n->list_lock); | |
25146 | } | |
25147 | } | |
25148 | ||
5c015b7c | 25149 | @@ -2119,7 +2148,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25150 | goto redo; |
25151 | ||
25152 | if (lock) | |
25153 | - spin_unlock(&n->list_lock); | |
25154 | + raw_spin_unlock(&n->list_lock); | |
25155 | ||
25156 | if (m == M_FREE) { | |
25157 | stat(s, DEACTIVATE_EMPTY); | |
5c015b7c | 25158 | @@ -2151,10 +2180,10 @@ static void unfreeze_partials(struct kmem_cache *s, |
1a6e0f06 JK |
25159 | n2 = get_node(s, page_to_nid(page)); |
25160 | if (n != n2) { | |
25161 | if (n) | |
25162 | - spin_unlock(&n->list_lock); | |
25163 | + raw_spin_unlock(&n->list_lock); | |
25164 | ||
25165 | n = n2; | |
25166 | - spin_lock(&n->list_lock); | |
25167 | + raw_spin_lock(&n->list_lock); | |
25168 | } | |
25169 | ||
25170 | do { | |
5c015b7c | 25171 | @@ -2183,7 +2212,7 @@ static void unfreeze_partials(struct kmem_cache *s, |
1a6e0f06 JK |
25172 | } |
25173 | ||
25174 | if (n) | |
25175 | - spin_unlock(&n->list_lock); | |
25176 | + raw_spin_unlock(&n->list_lock); | |
25177 | ||
25178 | while (discard_page) { | |
25179 | page = discard_page; | |
5c015b7c | 25180 | @@ -2222,14 +2251,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) |
1a6e0f06 JK |
25181 | pobjects = oldpage->pobjects; |
25182 | pages = oldpage->pages; | |
25183 | if (drain && pobjects > s->cpu_partial) { | |
25184 | + struct slub_free_list *f; | |
25185 | unsigned long flags; | |
25186 | + LIST_HEAD(tofree); | |
25187 | /* | |
25188 | * partial array is full. Move the existing | |
25189 | * set to the per node partial list. | |
25190 | */ | |
25191 | local_irq_save(flags); | |
25192 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | |
25193 | + f = this_cpu_ptr(&slub_free_list); | |
25194 | + raw_spin_lock(&f->lock); | |
25195 | + list_splice_init(&f->list, &tofree); | |
25196 | + raw_spin_unlock(&f->lock); | |
25197 | local_irq_restore(flags); | |
25198 | + free_delayed(&tofree); | |
25199 | oldpage = NULL; | |
25200 | pobjects = 0; | |
25201 | pages = 0; | |
5c015b7c | 25202 | @@ -2301,7 +2337,22 @@ static bool has_cpu_slab(int cpu, void *info) |
1a6e0f06 JK |
25203 | |
25204 | static void flush_all(struct kmem_cache *s) | |
25205 | { | |
25206 | + LIST_HEAD(tofree); | |
25207 | + int cpu; | |
25208 | + | |
25209 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); | |
25210 | + for_each_online_cpu(cpu) { | |
25211 | + struct slub_free_list *f; | |
25212 | + | |
25213 | + if (!has_cpu_slab(cpu, s)) | |
25214 | + continue; | |
25215 | + | |
25216 | + f = &per_cpu(slub_free_list, cpu); | |
25217 | + raw_spin_lock_irq(&f->lock); | |
25218 | + list_splice_init(&f->list, &tofree); | |
25219 | + raw_spin_unlock_irq(&f->lock); | |
25220 | + free_delayed(&tofree); | |
25221 | + } | |
25222 | } | |
25223 | ||
25224 | /* | |
5c015b7c | 25225 | @@ -2356,10 +2407,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, |
1a6e0f06 JK |
25226 | unsigned long x = 0; |
25227 | struct page *page; | |
25228 | ||
25229 | - spin_lock_irqsave(&n->list_lock, flags); | |
25230 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
25231 | list_for_each_entry(page, &n->partial, lru) | |
25232 | x += get_count(page); | |
25233 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25234 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25235 | return x; | |
25236 | } | |
25237 | #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ | |
5c015b7c | 25238 | @@ -2497,8 +2548,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) |
1a6e0f06 JK |
25239 | * already disabled (which is the case for bulk allocation). |
25240 | */ | |
25241 | static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
25242 | - unsigned long addr, struct kmem_cache_cpu *c) | |
25243 | + unsigned long addr, struct kmem_cache_cpu *c, | |
25244 | + struct list_head *to_free) | |
25245 | { | |
25246 | + struct slub_free_list *f; | |
25247 | void *freelist; | |
25248 | struct page *page; | |
25249 | ||
5c015b7c | 25250 | @@ -2558,6 +2611,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
25251 | VM_BUG_ON(!c->page->frozen); |
25252 | c->freelist = get_freepointer(s, freelist); | |
25253 | c->tid = next_tid(c->tid); | |
25254 | + | |
25255 | +out: | |
25256 | + f = this_cpu_ptr(&slub_free_list); | |
25257 | + raw_spin_lock(&f->lock); | |
25258 | + list_splice_init(&f->list, to_free); | |
25259 | + raw_spin_unlock(&f->lock); | |
25260 | + | |
25261 | return freelist; | |
25262 | ||
25263 | new_slab: | |
5c015b7c | 25264 | @@ -2589,7 +2649,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
25265 | deactivate_slab(s, page, get_freepointer(s, freelist)); |
25266 | c->page = NULL; | |
25267 | c->freelist = NULL; | |
25268 | - return freelist; | |
25269 | + goto out; | |
25270 | } | |
25271 | ||
25272 | /* | |
5c015b7c | 25273 | @@ -2601,6 +2661,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
25274 | { |
25275 | void *p; | |
25276 | unsigned long flags; | |
25277 | + LIST_HEAD(tofree); | |
25278 | ||
25279 | local_irq_save(flags); | |
25280 | #ifdef CONFIG_PREEMPT | |
5c015b7c | 25281 | @@ -2612,8 +2673,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, |
1a6e0f06 JK |
25282 | c = this_cpu_ptr(s->cpu_slab); |
25283 | #endif | |
25284 | ||
25285 | - p = ___slab_alloc(s, gfpflags, node, addr, c); | |
25286 | + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); | |
25287 | local_irq_restore(flags); | |
25288 | + free_delayed(&tofree); | |
25289 | return p; | |
25290 | } | |
25291 | ||
5c015b7c | 25292 | @@ -2799,7 +2861,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25293 | |
25294 | do { | |
25295 | if (unlikely(n)) { | |
25296 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25297 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25298 | n = NULL; | |
25299 | } | |
25300 | prior = page->freelist; | |
5c015b7c | 25301 | @@ -2831,7 +2893,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25302 | * Otherwise the list_lock will synchronize with |
25303 | * other processors updating the list of slabs. | |
25304 | */ | |
25305 | - spin_lock_irqsave(&n->list_lock, flags); | |
25306 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
25307 | ||
25308 | } | |
25309 | } | |
5c015b7c | 25310 | @@ -2873,7 +2935,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25311 | add_partial(n, page, DEACTIVATE_TO_TAIL); |
25312 | stat(s, FREE_ADD_PARTIAL); | |
25313 | } | |
25314 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25315 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25316 | return; | |
25317 | ||
25318 | slab_empty: | |
5c015b7c | 25319 | @@ -2888,7 +2950,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25320 | remove_full(s, n, page); |
25321 | } | |
25322 | ||
25323 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25324 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25325 | stat(s, FREE_SLAB); | |
25326 | discard_slab(s, page); | |
25327 | } | |
5c015b7c | 25328 | @@ -3093,6 +3155,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
1a6e0f06 JK |
25329 | void **p) |
25330 | { | |
25331 | struct kmem_cache_cpu *c; | |
25332 | + LIST_HEAD(to_free); | |
25333 | int i; | |
25334 | ||
25335 | /* memcg and kmem_cache debug support */ | |
5c015b7c | 25336 | @@ -3116,7 +3179,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
1a6e0f06 JK |
25337 | * of re-populating per CPU c->freelist |
25338 | */ | |
25339 | p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, | |
25340 | - _RET_IP_, c); | |
25341 | + _RET_IP_, c, &to_free); | |
25342 | if (unlikely(!p[i])) | |
25343 | goto error; | |
25344 | ||
5c015b7c | 25345 | @@ -3128,6 +3191,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, |
1a6e0f06 JK |
25346 | } |
25347 | c->tid = next_tid(c->tid); | |
25348 | local_irq_enable(); | |
25349 | + free_delayed(&to_free); | |
25350 | ||
25351 | /* Clear memory outside IRQ disabled fastpath loop */ | |
25352 | if (unlikely(flags & __GFP_ZERO)) { | |
5c015b7c | 25353 | @@ -3275,7 +3339,7 @@ static void |
1a6e0f06 JK |
25354 | init_kmem_cache_node(struct kmem_cache_node *n) |
25355 | { | |
25356 | n->nr_partial = 0; | |
25357 | - spin_lock_init(&n->list_lock); | |
25358 | + raw_spin_lock_init(&n->list_lock); | |
25359 | INIT_LIST_HEAD(&n->partial); | |
25360 | #ifdef CONFIG_SLUB_DEBUG | |
25361 | atomic_long_set(&n->nr_slabs, 0); | |
5c015b7c | 25362 | @@ -3619,6 +3683,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25363 | const char *text) |
25364 | { | |
25365 | #ifdef CONFIG_SLUB_DEBUG | |
25366 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25367 | + /* XXX move out of irq-off section */ | |
25368 | + slab_err(s, page, text, s->name); | |
25369 | +#else | |
25370 | void *addr = page_address(page); | |
25371 | void *p; | |
25372 | unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * | |
5c015b7c | 25373 | @@ -3639,6 +3707,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, |
1a6e0f06 JK |
25374 | slab_unlock(page); |
25375 | kfree(map); | |
25376 | #endif | |
25377 | +#endif | |
25378 | } | |
25379 | ||
25380 | /* | |
5c015b7c | 25381 | @@ -3652,7 +3721,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) |
1a6e0f06 JK |
25382 | struct page *page, *h; |
25383 | ||
25384 | BUG_ON(irqs_disabled()); | |
25385 | - spin_lock_irq(&n->list_lock); | |
25386 | + raw_spin_lock_irq(&n->list_lock); | |
25387 | list_for_each_entry_safe(page, h, &n->partial, lru) { | |
25388 | if (!page->inuse) { | |
25389 | remove_partial(n, page); | |
5c015b7c | 25390 | @@ -3662,7 +3731,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) |
1a6e0f06 JK |
25391 | "Objects remaining in %s on __kmem_cache_shutdown()"); |
25392 | } | |
25393 | } | |
25394 | - spin_unlock_irq(&n->list_lock); | |
25395 | + raw_spin_unlock_irq(&n->list_lock); | |
25396 | ||
25397 | list_for_each_entry_safe(page, h, &discard, lru) | |
25398 | discard_slab(s, page); | |
33c7bf0f | 25399 | @@ -3905,7 +3974,7 @@ int __kmem_cache_shrink(struct kmem_cache *s) |
1a6e0f06 JK |
25400 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) |
25401 | INIT_LIST_HEAD(promote + i); | |
25402 | ||
25403 | - spin_lock_irqsave(&n->list_lock, flags); | |
25404 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
25405 | ||
25406 | /* | |
25407 | * Build lists of slabs to discard or promote. | |
33c7bf0f | 25408 | @@ -3936,7 +4005,7 @@ int __kmem_cache_shrink(struct kmem_cache *s) |
1a6e0f06 JK |
25409 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) |
25410 | list_splice(promote + i, &n->partial); | |
25411 | ||
25412 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25413 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25414 | ||
25415 | /* Release empty slabs */ | |
25416 | list_for_each_entry_safe(page, t, &discard, lru) | |
33c7bf0f | 25417 | @@ -4112,6 +4181,12 @@ void __init kmem_cache_init(void) |
1a6e0f06 JK |
25418 | { |
25419 | static __initdata struct kmem_cache boot_kmem_cache, | |
25420 | boot_kmem_cache_node; | |
25421 | + int cpu; | |
25422 | + | |
25423 | + for_each_possible_cpu(cpu) { | |
25424 | + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); | |
25425 | + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); | |
25426 | + } | |
25427 | ||
25428 | if (debug_guardpage_minorder()) | |
25429 | slub_max_order = 0; | |
33c7bf0f | 25430 | @@ -4320,7 +4395,7 @@ static int validate_slab_node(struct kmem_cache *s, |
1a6e0f06 JK |
25431 | struct page *page; |
25432 | unsigned long flags; | |
25433 | ||
25434 | - spin_lock_irqsave(&n->list_lock, flags); | |
25435 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
25436 | ||
25437 | list_for_each_entry(page, &n->partial, lru) { | |
25438 | validate_slab_slab(s, page, map); | |
33c7bf0f | 25439 | @@ -4342,7 +4417,7 @@ static int validate_slab_node(struct kmem_cache *s, |
1a6e0f06 JK |
25440 | s->name, count, atomic_long_read(&n->nr_slabs)); |
25441 | ||
25442 | out: | |
25443 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25444 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25445 | return count; | |
25446 | } | |
25447 | ||
33c7bf0f | 25448 | @@ -4530,12 +4605,12 @@ static int list_locations(struct kmem_cache *s, char *buf, |
1a6e0f06 JK |
25449 | if (!atomic_long_read(&n->nr_slabs)) |
25450 | continue; | |
25451 | ||
25452 | - spin_lock_irqsave(&n->list_lock, flags); | |
25453 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
25454 | list_for_each_entry(page, &n->partial, lru) | |
25455 | process_slab(&t, s, page, alloc, map); | |
25456 | list_for_each_entry(page, &n->full, lru) | |
25457 | process_slab(&t, s, page, alloc, map); | |
25458 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
25459 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
25460 | } | |
25461 | ||
25462 | for (i = 0; i < t.count; i++) { | |
25463 | diff --git a/mm/swap.c b/mm/swap.c | |
c7c16703 | 25464 | index 4dcf852e1e6d..69c3a5b24060 100644 |
1a6e0f06 JK |
25465 | --- a/mm/swap.c |
25466 | +++ b/mm/swap.c | |
25467 | @@ -32,6 +32,7 @@ | |
25468 | #include <linux/memcontrol.h> | |
25469 | #include <linux/gfp.h> | |
25470 | #include <linux/uio.h> | |
25471 | +#include <linux/locallock.h> | |
25472 | #include <linux/hugetlb.h> | |
25473 | #include <linux/page_idle.h> | |
25474 | ||
25475 | @@ -50,6 +51,8 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); | |
25476 | #ifdef CONFIG_SMP | |
25477 | static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); | |
25478 | #endif | |
25479 | +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); | |
25480 | +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock); | |
25481 | ||
25482 | /* | |
25483 | * This path almost never happens for VM activity - pages are normally | |
25484 | @@ -240,11 +243,11 @@ void rotate_reclaimable_page(struct page *page) | |
25485 | unsigned long flags; | |
25486 | ||
25487 | get_page(page); | |
25488 | - local_irq_save(flags); | |
25489 | + local_lock_irqsave(rotate_lock, flags); | |
25490 | pvec = this_cpu_ptr(&lru_rotate_pvecs); | |
25491 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
25492 | pagevec_move_tail(pvec); | |
25493 | - local_irq_restore(flags); | |
25494 | + local_unlock_irqrestore(rotate_lock, flags); | |
25495 | } | |
25496 | } | |
25497 | ||
25498 | @@ -294,12 +297,13 @@ void activate_page(struct page *page) | |
25499 | { | |
25500 | page = compound_head(page); | |
25501 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
25502 | - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
25503 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
25504 | + activate_page_pvecs); | |
25505 | ||
25506 | get_page(page); | |
25507 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
25508 | pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
25509 | - put_cpu_var(activate_page_pvecs); | |
25510 | + put_locked_var(swapvec_lock, activate_page_pvecs); | |
25511 | } | |
25512 | } | |
25513 | ||
25514 | @@ -326,7 +330,7 @@ void activate_page(struct page *page) | |
25515 | ||
25516 | static void __lru_cache_activate_page(struct page *page) | |
25517 | { | |
25518 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
25519 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
25520 | int i; | |
25521 | ||
25522 | /* | |
25523 | @@ -348,7 +352,7 @@ static void __lru_cache_activate_page(struct page *page) | |
25524 | } | |
25525 | } | |
25526 | ||
25527 | - put_cpu_var(lru_add_pvec); | |
25528 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
25529 | } | |
25530 | ||
25531 | /* | |
25532 | @@ -390,12 +394,12 @@ EXPORT_SYMBOL(mark_page_accessed); | |
25533 | ||
25534 | static void __lru_cache_add(struct page *page) | |
25535 | { | |
25536 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
25537 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
25538 | ||
25539 | get_page(page); | |
25540 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
25541 | __pagevec_lru_add(pvec); | |
25542 | - put_cpu_var(lru_add_pvec); | |
25543 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
25544 | } | |
25545 | ||
25546 | /** | |
25547 | @@ -593,9 +597,15 @@ void lru_add_drain_cpu(int cpu) | |
25548 | unsigned long flags; | |
25549 | ||
25550 | /* No harm done if a racing interrupt already did this */ | |
25551 | - local_irq_save(flags); | |
25552 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25553 | + local_lock_irqsave_on(rotate_lock, flags, cpu); | |
25554 | pagevec_move_tail(pvec); | |
25555 | - local_irq_restore(flags); | |
25556 | + local_unlock_irqrestore_on(rotate_lock, flags, cpu); | |
25557 | +#else | |
25558 | + local_lock_irqsave(rotate_lock, flags); | |
25559 | + pagevec_move_tail(pvec); | |
25560 | + local_unlock_irqrestore(rotate_lock, flags); | |
25561 | +#endif | |
25562 | } | |
25563 | ||
25564 | pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); | |
25565 | @@ -627,11 +637,12 @@ void deactivate_file_page(struct page *page) | |
25566 | return; | |
25567 | ||
25568 | if (likely(get_page_unless_zero(page))) { | |
25569 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); | |
25570 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
25571 | + lru_deactivate_file_pvecs); | |
25572 | ||
25573 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
25574 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); | |
25575 | - put_cpu_var(lru_deactivate_file_pvecs); | |
25576 | + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs); | |
25577 | } | |
25578 | } | |
25579 | ||
25580 | @@ -646,27 +657,31 @@ void deactivate_file_page(struct page *page) | |
25581 | void deactivate_page(struct page *page) | |
25582 | { | |
25583 | if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { | |
25584 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); | |
25585 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
25586 | + lru_deactivate_pvecs); | |
25587 | ||
25588 | get_page(page); | |
25589 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
25590 | pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); | |
25591 | - put_cpu_var(lru_deactivate_pvecs); | |
25592 | + put_locked_var(swapvec_lock, lru_deactivate_pvecs); | |
25593 | } | |
25594 | } | |
25595 | ||
25596 | void lru_add_drain(void) | |
25597 | { | |
25598 | - lru_add_drain_cpu(get_cpu()); | |
25599 | - put_cpu(); | |
25600 | + lru_add_drain_cpu(local_lock_cpu(swapvec_lock)); | |
25601 | + local_unlock_cpu(swapvec_lock); | |
25602 | } | |
25603 | ||
25604 | -static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
25605 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25606 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
25607 | { | |
25608 | - lru_add_drain(); | |
25609 | + local_lock_on(swapvec_lock, cpu); | |
25610 | + lru_add_drain_cpu(cpu); | |
25611 | + local_unlock_on(swapvec_lock, cpu); | |
25612 | } | |
25613 | ||
25614 | -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
25615 | +#else | |
25616 | ||
25617 | /* | |
25618 | * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM | |
25619 | @@ -686,6 +701,22 @@ static int __init lru_init(void) | |
25620 | } | |
25621 | early_initcall(lru_init); | |
25622 | ||
25623 | +static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
25624 | +{ | |
25625 | + lru_add_drain(); | |
25626 | +} | |
25627 | + | |
25628 | +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
25629 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
25630 | +{ | |
25631 | + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
25632 | + | |
25633 | + INIT_WORK(work, lru_add_drain_per_cpu); | |
25634 | + queue_work_on(cpu, lru_add_drain_wq, work); | |
25635 | + cpumask_set_cpu(cpu, has_work); | |
25636 | +} | |
25637 | +#endif | |
25638 | + | |
25639 | void lru_add_drain_all(void) | |
25640 | { | |
25641 | static DEFINE_MUTEX(lock); | |
25642 | @@ -697,21 +728,18 @@ void lru_add_drain_all(void) | |
25643 | cpumask_clear(&has_work); | |
25644 | ||
25645 | for_each_online_cpu(cpu) { | |
25646 | - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
25647 | - | |
25648 | if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || | |
25649 | pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || | |
25650 | pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || | |
25651 | pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || | |
25652 | - need_activate_page_drain(cpu)) { | |
25653 | - INIT_WORK(work, lru_add_drain_per_cpu); | |
25654 | - queue_work_on(cpu, lru_add_drain_wq, work); | |
25655 | - cpumask_set_cpu(cpu, &has_work); | |
25656 | - } | |
25657 | + need_activate_page_drain(cpu)) | |
25658 | + remote_lru_add_drain(cpu, &has_work); | |
25659 | } | |
25660 | ||
25661 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
25662 | for_each_cpu(cpu, &has_work) | |
25663 | flush_work(&per_cpu(lru_add_drain_work, cpu)); | |
25664 | +#endif | |
25665 | ||
25666 | put_online_cpus(); | |
25667 | mutex_unlock(&lock); | |
25668 | diff --git a/mm/truncate.c b/mm/truncate.c | |
c7c16703 | 25669 | index 8d8c62d89e6d..5bf1bd25d077 100644 |
1a6e0f06 JK |
25670 | --- a/mm/truncate.c |
25671 | +++ b/mm/truncate.c | |
25672 | @@ -62,9 +62,12 @@ static void clear_exceptional_entry(struct address_space *mapping, | |
25673 | * protected by mapping->tree_lock. | |
25674 | */ | |
25675 | if (!workingset_node_shadows(node) && | |
25676 | - !list_empty(&node->private_list)) | |
25677 | - list_lru_del(&workingset_shadow_nodes, | |
25678 | + !list_empty(&node->private_list)) { | |
25679 | + local_lock(workingset_shadow_lock); | |
25680 | + list_lru_del(&__workingset_shadow_nodes, | |
25681 | &node->private_list); | |
25682 | + local_unlock(workingset_shadow_lock); | |
25683 | + } | |
25684 | __radix_tree_delete_node(&mapping->page_tree, node); | |
25685 | unlock: | |
25686 | spin_unlock_irq(&mapping->tree_lock); | |
25687 | diff --git a/mm/vmalloc.c b/mm/vmalloc.c | |
c7c16703 | 25688 | index f2481cb4e6b2..db4de08fa97c 100644 |
1a6e0f06 JK |
25689 | --- a/mm/vmalloc.c |
25690 | +++ b/mm/vmalloc.c | |
25691 | @@ -845,7 +845,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
25692 | struct vmap_block *vb; | |
25693 | struct vmap_area *va; | |
25694 | unsigned long vb_idx; | |
25695 | - int node, err; | |
25696 | + int node, err, cpu; | |
25697 | void *vaddr; | |
25698 | ||
25699 | node = numa_node_id(); | |
25700 | @@ -888,11 +888,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
25701 | BUG_ON(err); | |
25702 | radix_tree_preload_end(); | |
25703 | ||
25704 | - vbq = &get_cpu_var(vmap_block_queue); | |
25705 | + cpu = get_cpu_light(); | |
25706 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
25707 | spin_lock(&vbq->lock); | |
25708 | list_add_tail_rcu(&vb->free_list, &vbq->free); | |
25709 | spin_unlock(&vbq->lock); | |
25710 | - put_cpu_var(vmap_block_queue); | |
25711 | + put_cpu_light(); | |
25712 | ||
25713 | return vaddr; | |
25714 | } | |
25715 | @@ -961,6 +962,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
25716 | struct vmap_block *vb; | |
25717 | void *vaddr = NULL; | |
25718 | unsigned int order; | |
25719 | + int cpu; | |
25720 | ||
25721 | BUG_ON(offset_in_page(size)); | |
25722 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | |
25723 | @@ -975,7 +977,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
25724 | order = get_order(size); | |
25725 | ||
25726 | rcu_read_lock(); | |
25727 | - vbq = &get_cpu_var(vmap_block_queue); | |
25728 | + cpu = get_cpu_light(); | |
25729 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
25730 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | |
25731 | unsigned long pages_off; | |
25732 | ||
25733 | @@ -998,7 +1001,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
25734 | break; | |
25735 | } | |
25736 | ||
25737 | - put_cpu_var(vmap_block_queue); | |
25738 | + put_cpu_light(); | |
25739 | rcu_read_unlock(); | |
25740 | ||
25741 | /* Allocate new block if nothing was found */ | |
25742 | diff --git a/mm/vmstat.c b/mm/vmstat.c | |
c7c16703 | 25743 | index 604f26a4f696..312006d2db50 100644 |
1a6e0f06 JK |
25744 | --- a/mm/vmstat.c |
25745 | +++ b/mm/vmstat.c | |
25746 | @@ -245,6 +245,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
25747 | long x; | |
25748 | long t; | |
25749 | ||
25750 | + preempt_disable_rt(); | |
25751 | x = delta + __this_cpu_read(*p); | |
25752 | ||
25753 | t = __this_cpu_read(pcp->stat_threshold); | |
25754 | @@ -254,6 +255,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
25755 | x = 0; | |
25756 | } | |
25757 | __this_cpu_write(*p, x); | |
25758 | + preempt_enable_rt(); | |
25759 | } | |
25760 | EXPORT_SYMBOL(__mod_zone_page_state); | |
25761 | ||
25762 | @@ -265,6 +267,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
25763 | long x; | |
25764 | long t; | |
25765 | ||
25766 | + preempt_disable_rt(); | |
25767 | x = delta + __this_cpu_read(*p); | |
25768 | ||
25769 | t = __this_cpu_read(pcp->stat_threshold); | |
25770 | @@ -274,6 +277,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
25771 | x = 0; | |
25772 | } | |
25773 | __this_cpu_write(*p, x); | |
25774 | + preempt_enable_rt(); | |
25775 | } | |
25776 | EXPORT_SYMBOL(__mod_node_page_state); | |
25777 | ||
25778 | @@ -306,6 +310,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
25779 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
25780 | s8 v, t; | |
25781 | ||
25782 | + preempt_disable_rt(); | |
25783 | v = __this_cpu_inc_return(*p); | |
25784 | t = __this_cpu_read(pcp->stat_threshold); | |
25785 | if (unlikely(v > t)) { | |
25786 | @@ -314,6 +319,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
25787 | zone_page_state_add(v + overstep, zone, item); | |
25788 | __this_cpu_write(*p, -overstep); | |
25789 | } | |
25790 | + preempt_enable_rt(); | |
25791 | } | |
25792 | ||
25793 | void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
25794 | @@ -322,6 +328,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
25795 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
25796 | s8 v, t; | |
25797 | ||
25798 | + preempt_disable_rt(); | |
25799 | v = __this_cpu_inc_return(*p); | |
25800 | t = __this_cpu_read(pcp->stat_threshold); | |
25801 | if (unlikely(v > t)) { | |
25802 | @@ -330,6 +337,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
25803 | node_page_state_add(v + overstep, pgdat, item); | |
25804 | __this_cpu_write(*p, -overstep); | |
25805 | } | |
25806 | + preempt_enable_rt(); | |
25807 | } | |
25808 | ||
25809 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
25810 | @@ -350,6 +358,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
25811 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
25812 | s8 v, t; | |
25813 | ||
25814 | + preempt_disable_rt(); | |
25815 | v = __this_cpu_dec_return(*p); | |
25816 | t = __this_cpu_read(pcp->stat_threshold); | |
25817 | if (unlikely(v < - t)) { | |
25818 | @@ -358,6 +367,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
25819 | zone_page_state_add(v - overstep, zone, item); | |
25820 | __this_cpu_write(*p, overstep); | |
25821 | } | |
25822 | + preempt_enable_rt(); | |
25823 | } | |
25824 | ||
25825 | void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
25826 | @@ -366,6 +376,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
25827 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
25828 | s8 v, t; | |
25829 | ||
25830 | + preempt_disable_rt(); | |
25831 | v = __this_cpu_dec_return(*p); | |
25832 | t = __this_cpu_read(pcp->stat_threshold); | |
25833 | if (unlikely(v < - t)) { | |
25834 | @@ -374,6 +385,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
25835 | node_page_state_add(v - overstep, pgdat, item); | |
25836 | __this_cpu_write(*p, overstep); | |
25837 | } | |
25838 | + preempt_enable_rt(); | |
25839 | } | |
25840 | ||
25841 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
25842 | diff --git a/mm/workingset.c b/mm/workingset.c | |
7c18450a | 25843 | index 4c4f05655e6e..b97b1e87b54c 100644 |
1a6e0f06 JK |
25844 | --- a/mm/workingset.c |
25845 | +++ b/mm/workingset.c | |
25846 | @@ -334,7 +334,8 @@ void workingset_activation(struct page *page) | |
25847 | * point where they would still be useful. | |
25848 | */ | |
25849 | ||
25850 | -struct list_lru workingset_shadow_nodes; | |
25851 | +struct list_lru __workingset_shadow_nodes; | |
25852 | +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
25853 | ||
25854 | static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
25855 | struct shrink_control *sc) | |
25856 | @@ -344,9 +345,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
25857 | unsigned long pages; | |
25858 | ||
25859 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
25860 | - local_irq_disable(); | |
25861 | - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); | |
25862 | - local_irq_enable(); | |
25863 | + local_lock_irq(workingset_shadow_lock); | |
25864 | + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc); | |
25865 | + local_unlock_irq(workingset_shadow_lock); | |
25866 | ||
c7c16703 | 25867 | if (sc->memcg) { |
1a6e0f06 JK |
25868 | pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, |
25869 | @@ -438,9 +439,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |
25870 | spin_unlock(&mapping->tree_lock); | |
25871 | ret = LRU_REMOVED_RETRY; | |
25872 | out: | |
25873 | - local_irq_enable(); | |
25874 | + local_unlock_irq(workingset_shadow_lock); | |
25875 | cond_resched(); | |
25876 | - local_irq_disable(); | |
25877 | + local_lock_irq(workingset_shadow_lock); | |
25878 | spin_lock(lru_lock); | |
25879 | return ret; | |
25880 | } | |
25881 | @@ -451,10 +452,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, | |
25882 | unsigned long ret; | |
25883 | ||
25884 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
25885 | - local_irq_disable(); | |
25886 | - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc, | |
25887 | + local_lock_irq(workingset_shadow_lock); | |
25888 | + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc, | |
25889 | shadow_lru_isolate, NULL); | |
25890 | - local_irq_enable(); | |
25891 | + local_unlock_irq(workingset_shadow_lock); | |
25892 | return ret; | |
25893 | } | |
25894 | ||
25895 | @@ -492,7 +493,7 @@ static int __init workingset_init(void) | |
25896 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | |
25897 | timestamp_bits, max_order, bucket_order); | |
25898 | ||
7c18450a JK |
25899 | - ret = __list_lru_init(&workingset_shadow_nodes, true, &shadow_nodes_key); |
25900 | + ret = __list_lru_init(&__workingset_shadow_nodes, true, &shadow_nodes_key); | |
1a6e0f06 JK |
25901 | if (ret) |
25902 | goto err; | |
25903 | ret = register_shrinker(&workingset_shadow_shrinker); | |
25904 | @@ -500,7 +501,7 @@ static int __init workingset_init(void) | |
25905 | goto err_list_lru; | |
25906 | return 0; | |
25907 | err_list_lru: | |
25908 | - list_lru_destroy(&workingset_shadow_nodes); | |
25909 | + list_lru_destroy(&__workingset_shadow_nodes); | |
25910 | err: | |
25911 | return ret; | |
25912 | } | |
25913 | diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c | |
7c18450a | 25914 | index 1689bb58e0d1..e52a8cb6aa5a 100644 |
1a6e0f06 JK |
25915 | --- a/mm/zsmalloc.c |
25916 | +++ b/mm/zsmalloc.c | |
25917 | @@ -53,6 +53,7 @@ | |
25918 | #include <linux/mount.h> | |
25919 | #include <linux/migrate.h> | |
25920 | #include <linux/pagemap.h> | |
25921 | +#include <linux/locallock.h> | |
25922 | ||
25923 | #define ZSPAGE_MAGIC 0x58 | |
25924 | ||
25925 | @@ -70,9 +71,22 @@ | |
25926 | */ | |
25927 | #define ZS_MAX_ZSPAGE_ORDER 2 | |
25928 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) | |
25929 | - | |
25930 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) | |
25931 | ||
25932 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25933 | + | |
25934 | +struct zsmalloc_handle { | |
25935 | + unsigned long addr; | |
25936 | + struct mutex lock; | |
25937 | +}; | |
25938 | + | |
25939 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) | |
25940 | + | |
25941 | +#else | |
25942 | + | |
25943 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) | |
25944 | +#endif | |
25945 | + | |
25946 | /* | |
25947 | * Object location (<PFN>, <obj_idx>) is encoded as | |
25948 | * as single (unsigned long) handle value. | |
25949 | @@ -327,7 +341,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} | |
25950 | ||
25951 | static int create_cache(struct zs_pool *pool) | |
25952 | { | |
25953 | - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, | |
25954 | + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, | |
25955 | 0, 0, NULL); | |
25956 | if (!pool->handle_cachep) | |
25957 | return 1; | |
25958 | @@ -351,10 +365,27 @@ static void destroy_cache(struct zs_pool *pool) | |
25959 | ||
25960 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) | |
25961 | { | |
25962 | - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, | |
25963 | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
25964 | + void *p; | |
25965 | + | |
25966 | + p = kmem_cache_alloc(pool->handle_cachep, | |
25967 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
25968 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25969 | + if (p) { | |
25970 | + struct zsmalloc_handle *zh = p; | |
25971 | + | |
25972 | + mutex_init(&zh->lock); | |
25973 | + } | |
25974 | +#endif | |
25975 | + return (unsigned long)p; | |
25976 | } | |
25977 | ||
25978 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25979 | +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) | |
25980 | +{ | |
25981 | + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); | |
25982 | +} | |
25983 | +#endif | |
25984 | + | |
25985 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) | |
25986 | { | |
25987 | kmem_cache_free(pool->handle_cachep, (void *)handle); | |
25988 | @@ -373,12 +404,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) | |
25989 | ||
25990 | static void record_obj(unsigned long handle, unsigned long obj) | |
25991 | { | |
25992 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25993 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
25994 | + | |
25995 | + WRITE_ONCE(zh->addr, obj); | |
25996 | +#else | |
25997 | /* | |
25998 | * lsb of @obj represents handle lock while other bits | |
25999 | * represent object value the handle is pointing so | |
26000 | * updating shouldn't do store tearing. | |
26001 | */ | |
26002 | WRITE_ONCE(*(unsigned long *)handle, obj); | |
26003 | +#endif | |
26004 | } | |
26005 | ||
26006 | /* zpool driver */ | |
26007 | @@ -467,6 +504,7 @@ MODULE_ALIAS("zpool-zsmalloc"); | |
26008 | ||
26009 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ | |
26010 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); | |
26011 | +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); | |
26012 | ||
26013 | static bool is_zspage_isolated(struct zspage *zspage) | |
26014 | { | |
26015 | @@ -902,7 +940,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) | |
26016 | ||
26017 | static unsigned long handle_to_obj(unsigned long handle) | |
26018 | { | |
26019 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26020 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
26021 | + | |
26022 | + return zh->addr; | |
26023 | +#else | |
26024 | return *(unsigned long *)handle; | |
26025 | +#endif | |
26026 | } | |
26027 | ||
26028 | static unsigned long obj_to_head(struct page *page, void *obj) | |
26029 | @@ -916,22 +960,46 @@ static unsigned long obj_to_head(struct page *page, void *obj) | |
26030 | ||
26031 | static inline int testpin_tag(unsigned long handle) | |
26032 | { | |
26033 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26034 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
26035 | + | |
26036 | + return mutex_is_locked(&zh->lock); | |
26037 | +#else | |
26038 | return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); | |
26039 | +#endif | |
26040 | } | |
26041 | ||
26042 | static inline int trypin_tag(unsigned long handle) | |
26043 | { | |
26044 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26045 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
26046 | + | |
26047 | + return mutex_trylock(&zh->lock); | |
26048 | +#else | |
26049 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
26050 | +#endif | |
26051 | } | |
26052 | ||
26053 | static void pin_tag(unsigned long handle) | |
26054 | { | |
26055 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26056 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
26057 | + | |
26058 | + return mutex_lock(&zh->lock); | |
26059 | +#else | |
26060 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
26061 | +#endif | |
26062 | } | |
26063 | ||
26064 | static void unpin_tag(unsigned long handle) | |
26065 | { | |
26066 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26067 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
26068 | + | |
26069 | + return mutex_unlock(&zh->lock); | |
26070 | +#else | |
26071 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
26072 | +#endif | |
26073 | } | |
26074 | ||
26075 | static void reset_page(struct page *page) | |
26076 | @@ -1423,7 +1491,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |
26077 | class = pool->size_class[class_idx]; | |
26078 | off = (class->size * obj_idx) & ~PAGE_MASK; | |
26079 | ||
26080 | - area = &get_cpu_var(zs_map_area); | |
26081 | + area = &get_locked_var(zs_map_area_lock, zs_map_area); | |
26082 | area->vm_mm = mm; | |
26083 | if (off + class->size <= PAGE_SIZE) { | |
26084 | /* this object is contained entirely within a page */ | |
26085 | @@ -1477,7 +1545,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | |
26086 | ||
26087 | __zs_unmap_object(area, pages, off, class->size); | |
26088 | } | |
26089 | - put_cpu_var(zs_map_area); | |
26090 | + put_locked_var(zs_map_area_lock, zs_map_area); | |
26091 | ||
26092 | migrate_read_unlock(zspage); | |
26093 | unpin_tag(handle); | |
26094 | diff --git a/net/core/dev.c b/net/core/dev.c | |
33c7bf0f | 26095 | index 2e04fd188081..3ba60ef8c79e 100644 |
1a6e0f06 JK |
26096 | --- a/net/core/dev.c |
26097 | +++ b/net/core/dev.c | |
26098 | @@ -190,6 +190,7 @@ static unsigned int napi_gen_id = NR_CPUS; | |
26099 | static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); | |
26100 | ||
26101 | static seqcount_t devnet_rename_seq; | |
26102 | +static DEFINE_MUTEX(devnet_rename_mutex); | |
26103 | ||
26104 | static inline void dev_base_seq_inc(struct net *net) | |
26105 | { | |
26106 | @@ -211,14 +212,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |
26107 | static inline void rps_lock(struct softnet_data *sd) | |
26108 | { | |
26109 | #ifdef CONFIG_RPS | |
26110 | - spin_lock(&sd->input_pkt_queue.lock); | |
26111 | + raw_spin_lock(&sd->input_pkt_queue.raw_lock); | |
26112 | #endif | |
26113 | } | |
26114 | ||
26115 | static inline void rps_unlock(struct softnet_data *sd) | |
26116 | { | |
26117 | #ifdef CONFIG_RPS | |
26118 | - spin_unlock(&sd->input_pkt_queue.lock); | |
26119 | + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); | |
26120 | #endif | |
26121 | } | |
26122 | ||
26123 | @@ -888,7 +889,8 @@ int netdev_get_name(struct net *net, char *name, int ifindex) | |
26124 | strcpy(name, dev->name); | |
26125 | rcu_read_unlock(); | |
26126 | if (read_seqcount_retry(&devnet_rename_seq, seq)) { | |
26127 | - cond_resched(); | |
26128 | + mutex_lock(&devnet_rename_mutex); | |
26129 | + mutex_unlock(&devnet_rename_mutex); | |
26130 | goto retry; | |
26131 | } | |
26132 | ||
26133 | @@ -1157,20 +1159,17 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
26134 | if (dev->flags & IFF_UP) | |
26135 | return -EBUSY; | |
26136 | ||
26137 | - write_seqcount_begin(&devnet_rename_seq); | |
26138 | + mutex_lock(&devnet_rename_mutex); | |
26139 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
26140 | ||
26141 | - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | |
26142 | - write_seqcount_end(&devnet_rename_seq); | |
26143 | - return 0; | |
26144 | - } | |
26145 | + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | |
26146 | + goto outunlock; | |
26147 | ||
26148 | memcpy(oldname, dev->name, IFNAMSIZ); | |
26149 | ||
26150 | err = dev_get_valid_name(net, dev, newname); | |
26151 | - if (err < 0) { | |
26152 | - write_seqcount_end(&devnet_rename_seq); | |
26153 | - return err; | |
26154 | - } | |
26155 | + if (err < 0) | |
26156 | + goto outunlock; | |
26157 | ||
26158 | if (oldname[0] && !strchr(oldname, '%')) | |
26159 | netdev_info(dev, "renamed from %s\n", oldname); | |
26160 | @@ -1183,11 +1182,12 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
26161 | if (ret) { | |
26162 | memcpy(dev->name, oldname, IFNAMSIZ); | |
26163 | dev->name_assign_type = old_assign_type; | |
26164 | - write_seqcount_end(&devnet_rename_seq); | |
26165 | - return ret; | |
26166 | + err = ret; | |
26167 | + goto outunlock; | |
26168 | } | |
26169 | ||
26170 | - write_seqcount_end(&devnet_rename_seq); | |
26171 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
26172 | + mutex_unlock(&devnet_rename_mutex); | |
26173 | ||
26174 | netdev_adjacent_rename_links(dev, oldname); | |
26175 | ||
26176 | @@ -1208,7 +1208,8 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
26177 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | |
26178 | if (err >= 0) { | |
26179 | err = ret; | |
26180 | - write_seqcount_begin(&devnet_rename_seq); | |
26181 | + mutex_lock(&devnet_rename_mutex); | |
26182 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
26183 | memcpy(dev->name, oldname, IFNAMSIZ); | |
26184 | memcpy(oldname, newname, IFNAMSIZ); | |
26185 | dev->name_assign_type = old_assign_type; | |
26186 | @@ -1221,6 +1222,11 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
26187 | } | |
26188 | ||
26189 | return err; | |
26190 | + | |
26191 | +outunlock: | |
26192 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
26193 | + mutex_unlock(&devnet_rename_mutex); | |
26194 | + return err; | |
26195 | } | |
26196 | ||
26197 | /** | |
33c7bf0f | 26198 | @@ -2285,6 +2291,7 @@ static void __netif_reschedule(struct Qdisc *q) |
1a6e0f06 JK |
26199 | sd->output_queue_tailp = &q->next_sched; |
26200 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
26201 | local_irq_restore(flags); | |
26202 | + preempt_check_resched_rt(); | |
26203 | } | |
26204 | ||
26205 | void __netif_schedule(struct Qdisc *q) | |
33c7bf0f | 26206 | @@ -2366,6 +2373,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) |
1a6e0f06 JK |
26207 | __this_cpu_write(softnet_data.completion_queue, skb); |
26208 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
26209 | local_irq_restore(flags); | |
26210 | + preempt_check_resched_rt(); | |
26211 | } | |
26212 | EXPORT_SYMBOL(__dev_kfree_skb_irq); | |
26213 | ||
33c7bf0f | 26214 | @@ -3100,7 +3108,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, |
1a6e0f06 JK |
26215 | * This permits qdisc->running owner to get the lock more |
26216 | * often and dequeue packets faster. | |
26217 | */ | |
26218 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26219 | + contended = true; | |
26220 | +#else | |
26221 | contended = qdisc_is_running(q); | |
26222 | +#endif | |
26223 | if (unlikely(contended)) | |
26224 | spin_lock(&q->busylock); | |
26225 | ||
33c7bf0f | 26226 | @@ -3163,8 +3175,10 @@ static void skb_update_prio(struct sk_buff *skb) |
1a6e0f06 JK |
26227 | #define skb_update_prio(skb) |
26228 | #endif | |
26229 | ||
26230 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
26231 | DEFINE_PER_CPU(int, xmit_recursion); | |
26232 | EXPORT_SYMBOL(xmit_recursion); | |
26233 | +#endif | |
26234 | ||
26235 | /** | |
26236 | * dev_loopback_xmit - loop back @skb | |
33c7bf0f | 26237 | @@ -3398,8 +3412,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) |
1a6e0f06 JK |
26238 | int cpu = smp_processor_id(); /* ok because BHs are off */ |
26239 | ||
26240 | if (txq->xmit_lock_owner != cpu) { | |
26241 | - if (unlikely(__this_cpu_read(xmit_recursion) > | |
26242 | - XMIT_RECURSION_LIMIT)) | |
26243 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) | |
26244 | goto recursion_alert; | |
26245 | ||
26246 | skb = validate_xmit_skb(skb, dev); | |
33c7bf0f | 26247 | @@ -3409,9 +3422,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) |
1a6e0f06 JK |
26248 | HARD_TX_LOCK(dev, txq, cpu); |
26249 | ||
26250 | if (!netif_xmit_stopped(txq)) { | |
26251 | - __this_cpu_inc(xmit_recursion); | |
26252 | + xmit_rec_inc(); | |
26253 | skb = dev_hard_start_xmit(skb, dev, txq, &rc); | |
26254 | - __this_cpu_dec(xmit_recursion); | |
26255 | + xmit_rec_dec(); | |
26256 | if (dev_xmit_complete(rc)) { | |
26257 | HARD_TX_UNLOCK(dev, txq); | |
26258 | goto out; | |
33c7bf0f | 26259 | @@ -3785,6 +3798,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, |
1a6e0f06 JK |
26260 | rps_unlock(sd); |
26261 | ||
26262 | local_irq_restore(flags); | |
26263 | + preempt_check_resched_rt(); | |
26264 | ||
26265 | atomic_long_inc(&skb->dev->rx_dropped); | |
26266 | kfree_skb(skb); | |
33c7bf0f | 26267 | @@ -3803,7 +3817,7 @@ static int netif_rx_internal(struct sk_buff *skb) |
1a6e0f06 JK |
26268 | struct rps_dev_flow voidflow, *rflow = &voidflow; |
26269 | int cpu; | |
26270 | ||
26271 | - preempt_disable(); | |
26272 | + migrate_disable(); | |
26273 | rcu_read_lock(); | |
26274 | ||
26275 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | |
33c7bf0f | 26276 | @@ -3813,13 +3827,13 @@ static int netif_rx_internal(struct sk_buff *skb) |
1a6e0f06 JK |
26277 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); |
26278 | ||
26279 | rcu_read_unlock(); | |
26280 | - preempt_enable(); | |
26281 | + migrate_enable(); | |
26282 | } else | |
26283 | #endif | |
26284 | { | |
26285 | unsigned int qtail; | |
26286 | - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | |
26287 | - put_cpu(); | |
26288 | + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); | |
26289 | + put_cpu_light(); | |
26290 | } | |
26291 | return ret; | |
26292 | } | |
33c7bf0f | 26293 | @@ -3853,11 +3867,9 @@ int netif_rx_ni(struct sk_buff *skb) |
1a6e0f06 JK |
26294 | |
26295 | trace_netif_rx_ni_entry(skb); | |
26296 | ||
26297 | - preempt_disable(); | |
26298 | + local_bh_disable(); | |
26299 | err = netif_rx_internal(skb); | |
26300 | - if (local_softirq_pending()) | |
26301 | - do_softirq(); | |
26302 | - preempt_enable(); | |
26303 | + local_bh_enable(); | |
26304 | ||
26305 | return err; | |
26306 | } | |
33c7bf0f | 26307 | @@ -4336,7 +4348,7 @@ static void flush_backlog(struct work_struct *work) |
1a6e0f06 | 26308 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { |
c7c16703 | 26309 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { |
1a6e0f06 JK |
26310 | __skb_unlink(skb, &sd->input_pkt_queue); |
26311 | - kfree_skb(skb); | |
26312 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
26313 | input_queue_head_incr(sd); | |
26314 | } | |
26315 | } | |
33c7bf0f | 26316 | @@ -4346,11 +4358,14 @@ static void flush_backlog(struct work_struct *work) |
1a6e0f06 | 26317 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { |
c7c16703 | 26318 | if (skb->dev->reg_state == NETREG_UNREGISTERING) { |
1a6e0f06 JK |
26319 | __skb_unlink(skb, &sd->process_queue); |
26320 | - kfree_skb(skb); | |
26321 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
26322 | input_queue_head_incr(sd); | |
26323 | } | |
26324 | } | |
1a6e0f06 JK |
26325 | + if (!skb_queue_empty(&sd->tofree_queue)) |
26326 | + raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
c7c16703 JK |
26327 | local_bh_enable(); |
26328 | + | |
1a6e0f06 JK |
26329 | } |
26330 | ||
c7c16703 | 26331 | static void flush_all_backlogs(void) |
33c7bf0f | 26332 | @@ -4831,6 +4846,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) |
1a6e0f06 JK |
26333 | sd->rps_ipi_list = NULL; |
26334 | ||
26335 | local_irq_enable(); | |
26336 | + preempt_check_resched_rt(); | |
26337 | ||
26338 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | |
26339 | while (remsd) { | |
33c7bf0f | 26340 | @@ -4844,6 +4860,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) |
1a6e0f06 JK |
26341 | } else |
26342 | #endif | |
26343 | local_irq_enable(); | |
26344 | + preempt_check_resched_rt(); | |
26345 | } | |
26346 | ||
26347 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) | |
33c7bf0f | 26348 | @@ -4873,7 +4890,9 @@ static int process_backlog(struct napi_struct *napi, int quota) |
c7c16703 JK |
26349 | while (again) { |
26350 | struct sk_buff *skb; | |
26351 | ||
26352 | + local_irq_disable(); | |
26353 | while ((skb = __skb_dequeue(&sd->process_queue))) { | |
26354 | + local_irq_enable(); | |
26355 | rcu_read_lock(); | |
26356 | __netif_receive_skb(skb); | |
26357 | rcu_read_unlock(); | |
33c7bf0f | 26358 | @@ -4881,9 +4900,9 @@ static int process_backlog(struct napi_struct *napi, int quota) |
c7c16703 JK |
26359 | if (++work >= quota) |
26360 | return work; | |
26361 | ||
26362 | + local_irq_disable(); | |
26363 | } | |
26364 | ||
26365 | - local_irq_disable(); | |
26366 | rps_lock(sd); | |
26367 | if (skb_queue_empty(&sd->input_pkt_queue)) { | |
26368 | /* | |
33c7bf0f | 26369 | @@ -4921,9 +4940,11 @@ void __napi_schedule(struct napi_struct *n) |
1a6e0f06 JK |
26370 | local_irq_save(flags); |
26371 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
26372 | local_irq_restore(flags); | |
26373 | + preempt_check_resched_rt(); | |
26374 | } | |
26375 | EXPORT_SYMBOL(__napi_schedule); | |
26376 | ||
c7c16703 JK |
26377 | +#ifndef CONFIG_PREEMPT_RT_FULL |
26378 | /** | |
26379 | * __napi_schedule_irqoff - schedule for receive | |
26380 | * @n: entry to schedule | |
33c7bf0f | 26381 | @@ -4935,6 +4956,7 @@ void __napi_schedule_irqoff(struct napi_struct *n) |
c7c16703 JK |
26382 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); |
26383 | } | |
26384 | EXPORT_SYMBOL(__napi_schedule_irqoff); | |
26385 | +#endif | |
26386 | ||
26387 | void __napi_complete(struct napi_struct *n) | |
26388 | { | |
33c7bf0f | 26389 | @@ -5224,13 +5246,21 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) |
c7c16703 JK |
26390 | struct softnet_data *sd = this_cpu_ptr(&softnet_data); |
26391 | unsigned long time_limit = jiffies + 2; | |
26392 | int budget = netdev_budget; | |
26393 | + struct sk_buff_head tofree_q; | |
26394 | + struct sk_buff *skb; | |
26395 | LIST_HEAD(list); | |
26396 | LIST_HEAD(repoll); | |
26397 | ||
26398 | + __skb_queue_head_init(&tofree_q); | |
26399 | + | |
26400 | local_irq_disable(); | |
26401 | + skb_queue_splice_init(&sd->tofree_queue, &tofree_q); | |
26402 | list_splice_init(&sd->poll_list, &list); | |
26403 | local_irq_enable(); | |
26404 | ||
26405 | + while ((skb = __skb_dequeue(&tofree_q))) | |
26406 | + kfree_skb(skb); | |
26407 | + | |
26408 | for (;;) { | |
26409 | struct napi_struct *n; | |
26410 | ||
33c7bf0f | 26411 | @@ -5261,7 +5291,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) |
1a6e0f06 JK |
26412 | list_splice_tail(&repoll, &list); |
26413 | list_splice(&list, &sd->poll_list); | |
26414 | if (!list_empty(&sd->poll_list)) | |
26415 | - __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
26416 | + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); | |
26417 | ||
26418 | net_rps_action_and_irq_enable(sd); | |
26419 | } | |
33c7bf0f | 26420 | @@ -8022,16 +8052,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, |
1a6e0f06 JK |
26421 | |
26422 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
26423 | local_irq_enable(); | |
26424 | + preempt_check_resched_rt(); | |
26425 | ||
26426 | /* Process offline CPU's input_pkt_queue */ | |
26427 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { | |
26428 | netif_rx_ni(skb); | |
26429 | input_queue_head_incr(oldsd); | |
26430 | } | |
26431 | - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { | |
26432 | + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | |
26433 | netif_rx_ni(skb); | |
26434 | input_queue_head_incr(oldsd); | |
26435 | } | |
26436 | + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { | |
26437 | + kfree_skb(skb); | |
26438 | + } | |
26439 | ||
26440 | return NOTIFY_OK; | |
26441 | } | |
33c7bf0f | 26442 | @@ -8336,8 +8370,9 @@ static int __init net_dev_init(void) |
c7c16703 JK |
26443 | |
26444 | INIT_WORK(flush, flush_backlog); | |
1a6e0f06 JK |
26445 | |
26446 | - skb_queue_head_init(&sd->input_pkt_queue); | |
26447 | - skb_queue_head_init(&sd->process_queue); | |
26448 | + skb_queue_head_init_raw(&sd->input_pkt_queue); | |
26449 | + skb_queue_head_init_raw(&sd->process_queue); | |
26450 | + skb_queue_head_init_raw(&sd->tofree_queue); | |
26451 | INIT_LIST_HEAD(&sd->poll_list); | |
26452 | sd->output_queue_tailp = &sd->output_queue; | |
26453 | #ifdef CONFIG_RPS | |
26454 | diff --git a/net/core/filter.c b/net/core/filter.c | |
c7c16703 | 26455 | index b391209838ef..b86e9681a88e 100644 |
1a6e0f06 JK |
26456 | --- a/net/core/filter.c |
26457 | +++ b/net/core/filter.c | |
c7c16703 | 26458 | @@ -1645,7 +1645,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) |
1a6e0f06 JK |
26459 | { |
26460 | int ret; | |
26461 | ||
26462 | - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { | |
26463 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) { | |
26464 | net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); | |
26465 | kfree_skb(skb); | |
26466 | return -ENETDOWN; | |
c7c16703 | 26467 | @@ -1653,9 +1653,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) |
1a6e0f06 JK |
26468 | |
26469 | skb->dev = dev; | |
26470 | ||
26471 | - __this_cpu_inc(xmit_recursion); | |
26472 | + xmit_rec_inc(); | |
26473 | ret = dev_queue_xmit(skb); | |
26474 | - __this_cpu_dec(xmit_recursion); | |
26475 | + xmit_rec_dec(); | |
26476 | ||
26477 | return ret; | |
26478 | } | |
26479 | diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c | |
26480 | index cad8e791f28e..2a9364fe62a5 100644 | |
26481 | --- a/net/core/gen_estimator.c | |
26482 | +++ b/net/core/gen_estimator.c | |
26483 | @@ -84,7 +84,7 @@ struct gen_estimator | |
26484 | struct gnet_stats_basic_packed *bstats; | |
26485 | struct gnet_stats_rate_est64 *rate_est; | |
26486 | spinlock_t *stats_lock; | |
26487 | - seqcount_t *running; | |
26488 | + net_seqlock_t *running; | |
26489 | int ewma_log; | |
26490 | u32 last_packets; | |
26491 | unsigned long avpps; | |
26492 | @@ -213,7 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
26493 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
26494 | struct gnet_stats_rate_est64 *rate_est, | |
26495 | spinlock_t *stats_lock, | |
26496 | - seqcount_t *running, | |
26497 | + net_seqlock_t *running, | |
26498 | struct nlattr *opt) | |
26499 | { | |
26500 | struct gen_estimator *est; | |
26501 | @@ -309,7 +309,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
26502 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
26503 | struct gnet_stats_rate_est64 *rate_est, | |
26504 | spinlock_t *stats_lock, | |
26505 | - seqcount_t *running, struct nlattr *opt) | |
26506 | + net_seqlock_t *running, struct nlattr *opt) | |
26507 | { | |
26508 | gen_kill_estimator(bstats, rate_est); | |
26509 | return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); | |
26510 | diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c | |
26511 | index 508e051304fb..bc3b17b78c94 100644 | |
26512 | --- a/net/core/gen_stats.c | |
26513 | +++ b/net/core/gen_stats.c | |
26514 | @@ -130,7 +130,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, | |
26515 | } | |
26516 | ||
26517 | void | |
26518 | -__gnet_stats_copy_basic(const seqcount_t *running, | |
26519 | +__gnet_stats_copy_basic(net_seqlock_t *running, | |
26520 | struct gnet_stats_basic_packed *bstats, | |
26521 | struct gnet_stats_basic_cpu __percpu *cpu, | |
26522 | struct gnet_stats_basic_packed *b) | |
26523 | @@ -143,10 +143,10 @@ __gnet_stats_copy_basic(const seqcount_t *running, | |
26524 | } | |
26525 | do { | |
26526 | if (running) | |
26527 | - seq = read_seqcount_begin(running); | |
26528 | + seq = net_seq_begin(running); | |
26529 | bstats->bytes = b->bytes; | |
26530 | bstats->packets = b->packets; | |
26531 | - } while (running && read_seqcount_retry(running, seq)); | |
26532 | + } while (running && net_seq_retry(running, seq)); | |
26533 | } | |
26534 | EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
26535 | ||
26536 | @@ -164,7 +164,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
26537 | * if the room in the socket buffer was not sufficient. | |
26538 | */ | |
26539 | int | |
26540 | -gnet_stats_copy_basic(const seqcount_t *running, | |
26541 | +gnet_stats_copy_basic(net_seqlock_t *running, | |
26542 | struct gnet_dump *d, | |
26543 | struct gnet_stats_basic_cpu __percpu *cpu, | |
26544 | struct gnet_stats_basic_packed *b) | |
26545 | diff --git a/net/core/skbuff.c b/net/core/skbuff.c | |
7c18450a | 26546 | index fe008f1bd930..9fa6bea3dd3f 100644 |
1a6e0f06 JK |
26547 | --- a/net/core/skbuff.c |
26548 | +++ b/net/core/skbuff.c | |
26549 | @@ -64,6 +64,7 @@ | |
26550 | #include <linux/errqueue.h> | |
26551 | #include <linux/prefetch.h> | |
26552 | #include <linux/if_vlan.h> | |
26553 | +#include <linux/locallock.h> | |
26554 | ||
26555 | #include <net/protocol.h> | |
26556 | #include <net/dst.h> | |
26557 | @@ -360,6 +361,8 @@ struct napi_alloc_cache { | |
26558 | ||
26559 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); | |
26560 | static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); | |
26561 | +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); | |
26562 | +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); | |
26563 | ||
26564 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
26565 | { | |
26566 | @@ -367,10 +370,10 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
26567 | unsigned long flags; | |
26568 | void *data; | |
26569 | ||
26570 | - local_irq_save(flags); | |
26571 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
26572 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
26573 | data = __alloc_page_frag(nc, fragsz, gfp_mask); | |
26574 | - local_irq_restore(flags); | |
26575 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
26576 | return data; | |
26577 | } | |
26578 | ||
26579 | @@ -389,9 +392,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |
26580 | ||
26581 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
26582 | { | |
26583 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
26584 | + struct napi_alloc_cache *nc; | |
26585 | + void *data; | |
26586 | ||
26587 | - return __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
26588 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26589 | + data = __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
26590 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26591 | + return data; | |
26592 | } | |
26593 | ||
26594 | void *napi_alloc_frag(unsigned int fragsz) | |
26595 | @@ -438,13 +445,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
26596 | if (sk_memalloc_socks()) | |
26597 | gfp_mask |= __GFP_MEMALLOC; | |
26598 | ||
26599 | - local_irq_save(flags); | |
26600 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
26601 | ||
26602 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
26603 | data = __alloc_page_frag(nc, len, gfp_mask); | |
26604 | pfmemalloc = nc->pfmemalloc; | |
26605 | ||
26606 | - local_irq_restore(flags); | |
26607 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
26608 | ||
26609 | if (unlikely(!data)) | |
26610 | return NULL; | |
26611 | @@ -485,9 +492,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); | |
26612 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
26613 | gfp_t gfp_mask) | |
26614 | { | |
26615 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
26616 | + struct napi_alloc_cache *nc; | |
26617 | struct sk_buff *skb; | |
26618 | void *data; | |
26619 | + bool pfmemalloc; | |
26620 | ||
26621 | len += NET_SKB_PAD + NET_IP_ALIGN; | |
26622 | ||
26623 | @@ -505,7 +513,10 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
26624 | if (sk_memalloc_socks()) | |
26625 | gfp_mask |= __GFP_MEMALLOC; | |
26626 | ||
26627 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26628 | data = __alloc_page_frag(&nc->page, len, gfp_mask); | |
26629 | + pfmemalloc = nc->page.pfmemalloc; | |
26630 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26631 | if (unlikely(!data)) | |
26632 | return NULL; | |
26633 | ||
26634 | @@ -516,7 +527,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
26635 | } | |
26636 | ||
26637 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
26638 | - if (nc->page.pfmemalloc) | |
26639 | + if (pfmemalloc) | |
26640 | skb->pfmemalloc = 1; | |
26641 | skb->head_frag = 1; | |
26642 | ||
26643 | @@ -760,23 +771,26 @@ EXPORT_SYMBOL(consume_skb); | |
26644 | ||
26645 | void __kfree_skb_flush(void) | |
26646 | { | |
26647 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
26648 | + struct napi_alloc_cache *nc; | |
26649 | ||
26650 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26651 | /* flush skb_cache if containing objects */ | |
26652 | if (nc->skb_count) { | |
26653 | kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, | |
26654 | nc->skb_cache); | |
26655 | nc->skb_count = 0; | |
26656 | } | |
26657 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26658 | } | |
26659 | ||
26660 | static inline void _kfree_skb_defer(struct sk_buff *skb) | |
26661 | { | |
26662 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
26663 | + struct napi_alloc_cache *nc; | |
26664 | ||
26665 | /* drop skb->head and call any destructors for packet */ | |
26666 | skb_release_all(skb); | |
26667 | ||
26668 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26669 | /* record skb to CPU local list */ | |
26670 | nc->skb_cache[nc->skb_count++] = skb; | |
26671 | ||
26672 | @@ -791,6 +805,7 @@ static inline void _kfree_skb_defer(struct sk_buff *skb) | |
26673 | nc->skb_cache); | |
26674 | nc->skb_count = 0; | |
26675 | } | |
26676 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
26677 | } | |
26678 | void __kfree_skb_defer(struct sk_buff *skb) | |
26679 | { | |
26680 | diff --git a/net/core/sock.c b/net/core/sock.c | |
33c7bf0f | 26681 | index 470a2043b846..2b09a5a33d8d 100644 |
1a6e0f06 JK |
26682 | --- a/net/core/sock.c |
26683 | +++ b/net/core/sock.c | |
33c7bf0f | 26684 | @@ -2499,12 +2499,11 @@ void lock_sock_nested(struct sock *sk, int subclass) |
1a6e0f06 JK |
26685 | if (sk->sk_lock.owned) |
26686 | __lock_sock(sk); | |
26687 | sk->sk_lock.owned = 1; | |
26688 | - spin_unlock(&sk->sk_lock.slock); | |
26689 | + spin_unlock_bh(&sk->sk_lock.slock); | |
26690 | /* | |
26691 | * The sk_lock has mutex_lock() semantics here: | |
26692 | */ | |
26693 | mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); | |
26694 | - local_bh_enable(); | |
26695 | } | |
26696 | EXPORT_SYMBOL(lock_sock_nested); | |
26697 | ||
26698 | diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c | |
c7c16703 | 26699 | index 48734ee6293f..e6864ff11352 100644 |
1a6e0f06 JK |
26700 | --- a/net/ipv4/icmp.c |
26701 | +++ b/net/ipv4/icmp.c | |
26702 | @@ -69,6 +69,7 @@ | |
26703 | #include <linux/jiffies.h> | |
26704 | #include <linux/kernel.h> | |
26705 | #include <linux/fcntl.h> | |
26706 | +#include <linux/sysrq.h> | |
26707 | #include <linux/socket.h> | |
26708 | #include <linux/in.h> | |
26709 | #include <linux/inet.h> | |
26710 | @@ -77,6 +78,7 @@ | |
26711 | #include <linux/string.h> | |
26712 | #include <linux/netfilter_ipv4.h> | |
26713 | #include <linux/slab.h> | |
26714 | +#include <linux/locallock.h> | |
26715 | #include <net/snmp.h> | |
26716 | #include <net/ip.h> | |
26717 | #include <net/route.h> | |
26718 | @@ -204,6 +206,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; | |
26719 | * | |
26720 | * On SMP we have one ICMP socket per-cpu. | |
26721 | */ | |
26722 | +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock); | |
26723 | + | |
26724 | static struct sock *icmp_sk(struct net *net) | |
26725 | { | |
26726 | return *this_cpu_ptr(net->ipv4.icmp_sk); | |
26727 | @@ -215,12 +219,14 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
26728 | ||
26729 | local_bh_disable(); | |
26730 | ||
26731 | + local_lock(icmp_sk_lock); | |
26732 | sk = icmp_sk(net); | |
26733 | ||
26734 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { | |
26735 | /* This can happen if the output path signals a | |
26736 | * dst_link_failure() for an outgoing ICMP packet. | |
26737 | */ | |
26738 | + local_unlock(icmp_sk_lock); | |
26739 | local_bh_enable(); | |
26740 | return NULL; | |
26741 | } | |
26742 | @@ -230,6 +236,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
26743 | static inline void icmp_xmit_unlock(struct sock *sk) | |
26744 | { | |
26745 | spin_unlock_bh(&sk->sk_lock.slock); | |
26746 | + local_unlock(icmp_sk_lock); | |
26747 | } | |
26748 | ||
26749 | int sysctl_icmp_msgs_per_sec __read_mostly = 1000; | |
26750 | @@ -358,6 +365,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
26751 | struct sock *sk; | |
26752 | struct sk_buff *skb; | |
26753 | ||
26754 | + local_lock(icmp_sk_lock); | |
26755 | sk = icmp_sk(dev_net((*rt)->dst.dev)); | |
26756 | if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, | |
26757 | icmp_param->data_len+icmp_param->head_len, | |
26758 | @@ -380,6 +388,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
26759 | skb->ip_summed = CHECKSUM_NONE; | |
26760 | ip_push_pending_frames(sk, fl4); | |
26761 | } | |
26762 | + local_unlock(icmp_sk_lock); | |
26763 | } | |
26764 | ||
26765 | /* | |
26766 | @@ -891,6 +900,30 @@ static bool icmp_redirect(struct sk_buff *skb) | |
26767 | } | |
26768 | ||
26769 | /* | |
26770 | + * 32bit and 64bit have different timestamp length, so we check for | |
26771 | + * the cookie at offset 20 and verify it is repeated at offset 50 | |
26772 | + */ | |
26773 | +#define CO_POS0 20 | |
26774 | +#define CO_POS1 50 | |
26775 | +#define CO_SIZE sizeof(int) | |
26776 | +#define ICMP_SYSRQ_SIZE 57 | |
26777 | + | |
26778 | +/* | |
26779 | + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie | |
26780 | + * pattern and if it matches send the next byte as a trigger to sysrq. | |
26781 | + */ | |
26782 | +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb) | |
26783 | +{ | |
26784 | + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq); | |
26785 | + char *p = skb->data; | |
26786 | + | |
26787 | + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) && | |
26788 | + !memcmp(&cookie, p + CO_POS1, CO_SIZE) && | |
26789 | + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE]) | |
26790 | + handle_sysrq(p[CO_POS0 + CO_SIZE]); | |
26791 | +} | |
26792 | + | |
26793 | +/* | |
26794 | * Handle ICMP_ECHO ("ping") requests. | |
26795 | * | |
26796 | * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo | |
26797 | @@ -917,6 +950,11 @@ static bool icmp_echo(struct sk_buff *skb) | |
26798 | icmp_param.data_len = skb->len; | |
26799 | icmp_param.head_len = sizeof(struct icmphdr); | |
26800 | icmp_reply(&icmp_param, skb); | |
26801 | + | |
26802 | + if (skb->len == ICMP_SYSRQ_SIZE && | |
26803 | + net->ipv4.sysctl_icmp_echo_sysrq) { | |
26804 | + icmp_check_sysrq(net, skb); | |
26805 | + } | |
26806 | } | |
26807 | /* should there be an ICMP stat for ignored echos? */ | |
26808 | return true; | |
26809 | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c | |
c7c16703 | 26810 | index 80bc36b25de2..215b90adfb05 100644 |
1a6e0f06 JK |
26811 | --- a/net/ipv4/sysctl_net_ipv4.c |
26812 | +++ b/net/ipv4/sysctl_net_ipv4.c | |
26813 | @@ -681,6 +681,13 @@ static struct ctl_table ipv4_net_table[] = { | |
26814 | .proc_handler = proc_dointvec | |
26815 | }, | |
26816 | { | |
26817 | + .procname = "icmp_echo_sysrq", | |
26818 | + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq, | |
26819 | + .maxlen = sizeof(int), | |
26820 | + .mode = 0644, | |
26821 | + .proc_handler = proc_dointvec | |
26822 | + }, | |
26823 | + { | |
26824 | .procname = "icmp_ignore_bogus_error_responses", | |
26825 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | |
26826 | .maxlen = sizeof(int), | |
26827 | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | |
33c7bf0f | 26828 | index 6988566dc72f..672fffcde28c 100644 |
1a6e0f06 JK |
26829 | --- a/net/ipv4/tcp_ipv4.c |
26830 | +++ b/net/ipv4/tcp_ipv4.c | |
26831 | @@ -62,6 +62,7 @@ | |
26832 | #include <linux/init.h> | |
26833 | #include <linux/times.h> | |
26834 | #include <linux/slab.h> | |
26835 | +#include <linux/locallock.h> | |
26836 | ||
26837 | #include <net/net_namespace.h> | |
26838 | #include <net/icmp.h> | |
33c7bf0f | 26839 | @@ -568,6 +569,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) |
1a6e0f06 JK |
26840 | } |
26841 | EXPORT_SYMBOL(tcp_v4_send_check); | |
26842 | ||
26843 | +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock); | |
26844 | /* | |
26845 | * This routine will send an RST to the other tcp. | |
26846 | * | |
33c7bf0f | 26847 | @@ -695,6 +697,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) |
1a6e0f06 JK |
26848 | offsetof(struct inet_timewait_sock, tw_bound_dev_if)); |
26849 | ||
26850 | arg.tos = ip_hdr(skb)->tos; | |
26851 | + | |
26852 | + local_lock(tcp_sk_lock); | |
26853 | local_bh_disable(); | |
26854 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
26855 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
33c7bf0f | 26856 | @@ -704,6 +708,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) |
1a6e0f06 JK |
26857 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); |
26858 | __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); | |
26859 | local_bh_enable(); | |
26860 | + local_unlock(tcp_sk_lock); | |
26861 | ||
26862 | #ifdef CONFIG_TCP_MD5SIG | |
26863 | out: | |
33c7bf0f | 26864 | @@ -779,6 +784,7 @@ static void tcp_v4_send_ack(struct net *net, |
1a6e0f06 JK |
26865 | if (oif) |
26866 | arg.bound_dev_if = oif; | |
26867 | arg.tos = tos; | |
26868 | + local_lock(tcp_sk_lock); | |
26869 | local_bh_disable(); | |
26870 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
26871 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
33c7bf0f | 26872 | @@ -787,6 +793,7 @@ static void tcp_v4_send_ack(struct net *net, |
1a6e0f06 JK |
26873 | |
26874 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | |
26875 | local_bh_enable(); | |
26876 | + local_unlock(tcp_sk_lock); | |
26877 | } | |
26878 | ||
26879 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) | |
26880 | diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c | |
7c18450a | 26881 | index acaaf616da71..09020dbcc089 100644 |
1a6e0f06 JK |
26882 | --- a/net/mac80211/rx.c |
26883 | +++ b/net/mac80211/rx.c | |
7c18450a | 26884 | @@ -4230,7 +4230,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, |
1a6e0f06 JK |
26885 | struct ieee80211_supported_band *sband; |
26886 | struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); | |
26887 | ||
26888 | - WARN_ON_ONCE(softirq_count() == 0); | |
26889 | + WARN_ON_ONCE_NONRT(softirq_count() == 0); | |
26890 | ||
26891 | if (WARN_ON(status->band >= NUM_NL80211_BANDS)) | |
26892 | goto drop; | |
26893 | diff --git a/net/netfilter/core.c b/net/netfilter/core.c | |
c7c16703 | 26894 | index 004af030ef1a..b64f751bda45 100644 |
1a6e0f06 JK |
26895 | --- a/net/netfilter/core.c |
26896 | +++ b/net/netfilter/core.c | |
c7c16703 | 26897 | @@ -22,12 +22,18 @@ |
1a6e0f06 JK |
26898 | #include <linux/proc_fs.h> |
26899 | #include <linux/mutex.h> | |
26900 | #include <linux/slab.h> | |
26901 | +#include <linux/locallock.h> | |
c7c16703 | 26902 | #include <linux/rcupdate.h> |
1a6e0f06 JK |
26903 | #include <net/net_namespace.h> |
26904 | #include <net/sock.h> | |
26905 | ||
26906 | #include "nf_internals.h" | |
26907 | ||
26908 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
26909 | +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock); | |
26910 | +EXPORT_PER_CPU_SYMBOL(xt_write_lock); | |
26911 | +#endif | |
26912 | + | |
26913 | static DEFINE_MUTEX(afinfo_mutex); | |
26914 | ||
26915 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
26916 | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c | |
7c18450a | 26917 | index cb76ff3088e9..3f42c5b1af55 100644 |
1a6e0f06 JK |
26918 | --- a/net/packet/af_packet.c |
26919 | +++ b/net/packet/af_packet.c | |
26920 | @@ -63,6 +63,7 @@ | |
26921 | #include <linux/if_packet.h> | |
26922 | #include <linux/wireless.h> | |
26923 | #include <linux/kernel.h> | |
26924 | +#include <linux/delay.h> | |
26925 | #include <linux/kmod.h> | |
26926 | #include <linux/slab.h> | |
26927 | #include <linux/vmalloc.h> | |
c7c16703 | 26928 | @@ -694,7 +695,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) |
1a6e0f06 JK |
26929 | if (BLOCK_NUM_PKTS(pbd)) { |
26930 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
26931 | /* Waiting for skb_copy_bits to finish... */ | |
26932 | - cpu_relax(); | |
26933 | + cpu_chill(); | |
26934 | } | |
26935 | } | |
26936 | ||
c7c16703 | 26937 | @@ -956,7 +957,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, |
1a6e0f06 JK |
26938 | if (!(status & TP_STATUS_BLK_TMO)) { |
26939 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
26940 | /* Waiting for skb_copy_bits to finish... */ | |
26941 | - cpu_relax(); | |
26942 | + cpu_chill(); | |
26943 | } | |
26944 | } | |
26945 | prb_close_block(pkc, pbd, po, status); | |
26946 | diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c | |
26947 | index 977f69886c00..f3e7a36b0396 100644 | |
26948 | --- a/net/rds/ib_rdma.c | |
26949 | +++ b/net/rds/ib_rdma.c | |
26950 | @@ -34,6 +34,7 @@ | |
26951 | #include <linux/slab.h> | |
26952 | #include <linux/rculist.h> | |
26953 | #include <linux/llist.h> | |
26954 | +#include <linux/delay.h> | |
26955 | ||
26956 | #include "rds_single_path.h" | |
26957 | #include "ib_mr.h" | |
26958 | @@ -210,7 +211,7 @@ static inline void wait_clean_list_grace(void) | |
26959 | for_each_online_cpu(cpu) { | |
26960 | flag = &per_cpu(clean_list_grace, cpu); | |
26961 | while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) | |
26962 | - cpu_relax(); | |
26963 | + cpu_chill(); | |
26964 | } | |
26965 | } | |
26966 | ||
26967 | diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c | |
c7c16703 | 26968 | index 7d921e56e715..13df56a738e5 100644 |
1a6e0f06 JK |
26969 | --- a/net/rxrpc/security.c |
26970 | +++ b/net/rxrpc/security.c | |
26971 | @@ -19,9 +19,6 @@ | |
26972 | #include <keys/rxrpc-type.h> | |
26973 | #include "ar-internal.h" | |
26974 | ||
26975 | -static LIST_HEAD(rxrpc_security_methods); | |
26976 | -static DECLARE_RWSEM(rxrpc_security_sem); | |
26977 | - | |
26978 | static const struct rxrpc_security *rxrpc_security_types[] = { | |
26979 | [RXRPC_SECURITY_NONE] = &rxrpc_no_security, | |
26980 | #ifdef CONFIG_RXKAD | |
26981 | diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c | |
c7c16703 | 26982 | index 206dc24add3a..00ea9bde5bb3 100644 |
1a6e0f06 JK |
26983 | --- a/net/sched/sch_api.c |
26984 | +++ b/net/sched/sch_api.c | |
c7c16703 | 26985 | @@ -981,7 +981,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, |
1a6e0f06 JK |
26986 | rcu_assign_pointer(sch->stab, stab); |
26987 | } | |
26988 | if (tca[TCA_RATE]) { | |
26989 | - seqcount_t *running; | |
26990 | + net_seqlock_t *running; | |
26991 | ||
26992 | err = -EOPNOTSUPP; | |
26993 | if (sch->flags & TCQ_F_MQROOT) | |
26994 | diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c | |
c7c16703 | 26995 | index 6cfb6e9038c2..20727e1347de 100644 |
1a6e0f06 JK |
26996 | --- a/net/sched/sch_generic.c |
26997 | +++ b/net/sched/sch_generic.c | |
c7c16703 JK |
26998 | @@ -425,7 +425,11 @@ struct Qdisc noop_qdisc = { |
26999 | .ops = &noop_qdisc_ops, | |
1a6e0f06 JK |
27000 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), |
27001 | .dev_queue = &noop_netdev_queue, | |
27002 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
27003 | + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running), | |
27004 | +#else | |
27005 | .running = SEQCNT_ZERO(noop_qdisc.running), | |
27006 | +#endif | |
27007 | .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), | |
27008 | }; | |
27009 | EXPORT_SYMBOL(noop_qdisc); | |
c7c16703 | 27010 | @@ -624,9 +628,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
1a6e0f06 JK |
27011 | lockdep_set_class(&sch->busylock, |
27012 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); | |
27013 | ||
27014 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
27015 | + seqlock_init(&sch->running); | |
27016 | + lockdep_set_class(&sch->running.seqcount, | |
27017 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
27018 | + lockdep_set_class(&sch->running.lock, | |
27019 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
27020 | +#else | |
27021 | seqcount_init(&sch->running); | |
27022 | lockdep_set_class(&sch->running, | |
27023 | dev->qdisc_running_key ?: &qdisc_running_key); | |
27024 | +#endif | |
27025 | ||
27026 | sch->ops = ops; | |
27027 | sch->enqueue = ops->enqueue; | |
c7c16703 | 27028 | @@ -925,7 +937,7 @@ void dev_deactivate_many(struct list_head *head) |
1a6e0f06 JK |
27029 | /* Wait for outstanding qdisc_run calls. */ |
27030 | list_for_each_entry(dev, head, close_list) | |
27031 | while (some_qdisc_is_busy(dev)) | |
27032 | - yield(); | |
27033 | + msleep(1); | |
27034 | } | |
27035 | ||
27036 | void dev_deactivate(struct net_device *dev) | |
27037 | diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c | |
1f39f580 | 27038 | index 9c9db55a0c1e..e6583b018a72 100644 |
1a6e0f06 JK |
27039 | --- a/net/sunrpc/svc_xprt.c |
27040 | +++ b/net/sunrpc/svc_xprt.c | |
27041 | @@ -396,7 +396,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
27042 | goto out; | |
27043 | } | |
27044 | ||
27045 | - cpu = get_cpu(); | |
27046 | + cpu = get_cpu_light(); | |
27047 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | |
27048 | ||
27049 | atomic_long_inc(&pool->sp_stats.packets); | |
27050 | @@ -432,7 +432,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
27051 | ||
27052 | atomic_long_inc(&pool->sp_stats.threads_woken); | |
27053 | wake_up_process(rqstp->rq_task); | |
27054 | - put_cpu(); | |
27055 | + put_cpu_light(); | |
27056 | goto out; | |
27057 | } | |
27058 | rcu_read_unlock(); | |
27059 | @@ -453,7 +453,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
27060 | goto redo_search; | |
27061 | } | |
27062 | rqstp = NULL; | |
27063 | - put_cpu(); | |
27064 | + put_cpu_light(); | |
27065 | out: | |
27066 | trace_svc_xprt_do_enqueue(xprt, rqstp); | |
27067 | } | |
27068 | diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h | |
27069 | index 6fdc97ef6023..523e0420d7f0 100755 | |
27070 | --- a/scripts/mkcompile_h | |
27071 | +++ b/scripts/mkcompile_h | |
27072 | @@ -4,7 +4,8 @@ TARGET=$1 | |
27073 | ARCH=$2 | |
27074 | SMP=$3 | |
27075 | PREEMPT=$4 | |
27076 | -CC=$5 | |
27077 | +RT=$5 | |
27078 | +CC=$6 | |
27079 | ||
27080 | vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } | |
27081 | ||
27082 | @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION" | |
27083 | CONFIG_FLAGS="" | |
27084 | if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi | |
27085 | if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi | |
27086 | +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi | |
27087 | UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" | |
27088 | ||
27089 | # Truncate to maximum length | |
27090 | diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c | |
c7c16703 | 27091 | index 9d33c1e85c79..3d307bda86f9 100644 |
1a6e0f06 JK |
27092 | --- a/sound/core/pcm_native.c |
27093 | +++ b/sound/core/pcm_native.c | |
27094 | @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); | |
27095 | void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) | |
27096 | { | |
27097 | if (!substream->pcm->nonatomic) | |
27098 | - local_irq_disable(); | |
27099 | + local_irq_disable_nort(); | |
27100 | snd_pcm_stream_lock(substream); | |
27101 | } | |
27102 | EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); | |
27103 | @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream) | |
27104 | { | |
27105 | snd_pcm_stream_unlock(substream); | |
27106 | if (!substream->pcm->nonatomic) | |
27107 | - local_irq_enable(); | |
27108 | + local_irq_enable_nort(); | |
27109 | } | |
27110 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); | |
27111 | ||
27112 | @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) | |
27113 | { | |
27114 | unsigned long flags = 0; | |
27115 | if (!substream->pcm->nonatomic) | |
27116 | - local_irq_save(flags); | |
27117 | + local_irq_save_nort(flags); | |
27118 | snd_pcm_stream_lock(substream); | |
27119 | return flags; | |
27120 | } | |
27121 | @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, | |
27122 | { | |
27123 | snd_pcm_stream_unlock(substream); | |
27124 | if (!substream->pcm->nonatomic) | |
27125 | - local_irq_restore(flags); | |
27126 | + local_irq_restore_nort(flags); | |
27127 | } | |
27128 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); | |
27129 |