]>
Commit | Line | Data |
---|---|---|
1a6e0f06 JK |
1 | diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt |
2 | new file mode 100644 | |
3 | index 000000000000..cb61516483d3 | |
4 | --- /dev/null | |
5 | +++ b/Documentation/hwlat_detector.txt | |
6 | @@ -0,0 +1,64 @@ | |
7 | +Introduction: | |
8 | +------------- | |
9 | + | |
10 | +The module hwlat_detector is a special purpose kernel module that is used to | |
11 | +detect large system latencies induced by the behavior of certain underlying | |
12 | +hardware or firmware, independent of Linux itself. The code was developed | |
13 | +originally to detect SMIs (System Management Interrupts) on x86 systems, | |
14 | +however there is nothing x86 specific about this patchset. It was | |
15 | +originally written for use by the "RT" patch since the Real Time | |
16 | +kernel is highly latency sensitive. | |
17 | + | |
18 | +SMIs are usually not serviced by the Linux kernel, which typically does not | |
19 | +even know that they are occuring. SMIs are instead are set up by BIOS code | |
20 | +and are serviced by BIOS code, usually for "critical" events such as | |
21 | +management of thermal sensors and fans. Sometimes though, SMIs are used for | |
22 | +other tasks and those tasks can spend an inordinate amount of time in the | |
23 | +handler (sometimes measured in milliseconds). Obviously this is a problem if | |
24 | +you are trying to keep event service latencies down in the microsecond range. | |
25 | + | |
26 | +The hardware latency detector works by hogging all of the cpus for configurable | |
27 | +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter | |
28 | +for some period, then looking for gaps in the TSC data. Any gap indicates a | |
29 | +time when the polling was interrupted and since the machine is stopped and | |
30 | +interrupts turned off the only thing that could do that would be an SMI. | |
31 | + | |
32 | +Note that the SMI detector should *NEVER* be used in a production environment. | |
33 | +It is intended to be run manually to determine if the hardware platform has a | |
34 | +problem with long system firmware service routines. | |
35 | + | |
36 | +Usage: | |
37 | +------ | |
38 | + | |
39 | +Loading the module hwlat_detector passing the parameter "enabled=1" (or by | |
40 | +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only | |
41 | +step required to start the hwlat_detector. It is possible to redefine the | |
42 | +threshold in microseconds (us) above which latency spikes will be taken | |
43 | +into account (parameter "threshold="). | |
44 | + | |
45 | +Example: | |
46 | + | |
47 | + # modprobe hwlat_detector enabled=1 threshold=100 | |
48 | + | |
49 | +After the module is loaded, it creates a directory named "hwlat_detector" under | |
50 | +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary | |
51 | +to have debugfs mounted, which might be on /sys/debug on your system. | |
52 | + | |
53 | +The /debug/hwlat_detector interface contains the following files: | |
54 | + | |
55 | +count - number of latency spikes observed since last reset | |
56 | +enable - a global enable/disable toggle (0/1), resets count | |
57 | +max - maximum hardware latency actually observed (usecs) | |
58 | +sample - a pipe from which to read current raw sample data | |
59 | + in the format <timestamp> <latency observed usecs> | |
60 | + (can be opened O_NONBLOCK for a single sample) | |
61 | +threshold - minimum latency value to be considered (usecs) | |
62 | +width - time period to sample with CPUs held (usecs) | |
63 | + must be less than the total window size (enforced) | |
64 | +window - total period of sampling, width being inside (usecs) | |
65 | + | |
66 | +By default we will set width to 500,000 and window to 1,000,000, meaning that | |
67 | +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we | |
68 | +observe any latencies that exceed the threshold (initially 100 usecs), | |
69 | +then we write to a global sample ring buffer of 8K samples, which is | |
70 | +consumed by reading from the "sample" (pipe) debugfs file interface. | |
71 | diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt | |
72 | index 3a3b30ac2a75..9e0745cafbd8 100644 | |
73 | --- a/Documentation/sysrq.txt | |
74 | +++ b/Documentation/sysrq.txt | |
75 | @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>, | |
76 | On other - If you know of the key combos for other architectures, please | |
77 | let me know so I can add them to this section. | |
78 | ||
79 | -On all - write a character to /proc/sysrq-trigger. e.g.: | |
80 | - | |
81 | +On all - write a character to /proc/sysrq-trigger, e.g.: | |
82 | echo t > /proc/sysrq-trigger | |
83 | ||
84 | +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g. | |
85 | + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq | |
86 | + Send an ICMP echo request with this pattern plus the particular | |
87 | + SysRq command key. Example: | |
88 | + # ping -c1 -s57 -p0102030468 | |
89 | + will trigger the SysRq-H (help) command. | |
90 | + | |
91 | + | |
92 | * What are the 'command' keys? | |
93 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
94 | 'b' - Will immediately reboot the system without syncing or unmounting | |
95 | diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt | |
96 | new file mode 100644 | |
97 | index 000000000000..6f2aeabf7faa | |
98 | --- /dev/null | |
99 | +++ b/Documentation/trace/histograms.txt | |
100 | @@ -0,0 +1,186 @@ | |
101 | + Using the Linux Kernel Latency Histograms | |
102 | + | |
103 | + | |
104 | +This document gives a short explanation how to enable, configure and use | |
105 | +latency histograms. Latency histograms are primarily relevant in the | |
106 | +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT) | |
107 | +and are used in the quality management of the Linux real-time | |
108 | +capabilities. | |
109 | + | |
110 | + | |
111 | +* Purpose of latency histograms | |
112 | + | |
113 | +A latency histogram continuously accumulates the frequencies of latency | |
114 | +data. There are two types of histograms | |
115 | +- potential sources of latencies | |
116 | +- effective latencies | |
117 | + | |
118 | + | |
119 | +* Potential sources of latencies | |
120 | + | |
121 | +Potential sources of latencies are code segments where interrupts, | |
122 | +preemption or both are disabled (aka critical sections). To create | |
123 | +histograms of potential sources of latency, the kernel stores the time | |
124 | +stamp at the start of a critical section, determines the time elapsed | |
125 | +when the end of the section is reached, and increments the frequency | |
126 | +counter of that latency value - irrespective of whether any concurrently | |
127 | +running process is affected by latency or not. | |
128 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
129 | + CONFIG_INTERRUPT_OFF_LATENCY | |
130 | + CONFIG_PREEMPT_OFF_LATENCY | |
131 | + | |
132 | + | |
133 | +* Effective latencies | |
134 | + | |
135 | +Effective latencies are actually occuring during wakeup of a process. To | |
136 | +determine effective latencies, the kernel stores the time stamp when a | |
137 | +process is scheduled to be woken up, and determines the duration of the | |
138 | +wakeup time shortly before control is passed over to this process. Note | |
139 | +that the apparent latency in user space may be somewhat longer, since the | |
140 | +process may be interrupted after control is passed over to it but before | |
141 | +the execution in user space takes place. Simply measuring the interval | |
142 | +between enqueuing and wakeup may also not appropriate in cases when a | |
143 | +process is scheduled as a result of a timer expiration. The timer may have | |
144 | +missed its deadline, e.g. due to disabled interrupts, but this latency | |
145 | +would not be registered. Therefore, the offsets of missed timers are | |
146 | +recorded in a separate histogram. If both wakeup latency and missed timer | |
147 | +offsets are configured and enabled, a third histogram may be enabled that | |
148 | +records the overall latency as a sum of the timer latency, if any, and the | |
149 | +wakeup latency. This histogram is called "timerandwakeup". | |
150 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
151 | + CONFIG_WAKEUP_LATENCY | |
152 | + CONFIG_MISSED_TIMER_OFSETS | |
153 | + | |
154 | + | |
155 | +* Usage | |
156 | + | |
157 | +The interface to the administration of the latency histograms is located | |
158 | +in the debugfs file system. To mount it, either enter | |
159 | + | |
160 | +mount -t sysfs nodev /sys | |
161 | +mount -t debugfs nodev /sys/kernel/debug | |
162 | + | |
163 | +from shell command line level, or add | |
164 | + | |
165 | +nodev /sys sysfs defaults 0 0 | |
166 | +nodev /sys/kernel/debug debugfs defaults 0 0 | |
167 | + | |
168 | +to the file /etc/fstab. All latency histogram related files are then | |
169 | +available in the directory /sys/kernel/debug/tracing/latency_hist. A | |
170 | +particular histogram type is enabled by writing non-zero to the related | |
171 | +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. | |
172 | +Select "preemptirqsoff" for the histograms of potential sources of | |
173 | +latencies and "wakeup" for histograms of effective latencies etc. The | |
174 | +histogram data - one per CPU - are available in the files | |
175 | + | |
176 | +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx | |
177 | +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx | |
178 | +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx | |
179 | +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx | |
180 | +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx | |
181 | +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx | |
182 | +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx | |
183 | + | |
184 | +The histograms are reset by writing non-zero to the file "reset" in a | |
185 | +particular latency directory. To reset all latency data, use | |
186 | + | |
187 | +#!/bin/sh | |
188 | + | |
189 | +TRACINGDIR=/sys/kernel/debug/tracing | |
190 | +HISTDIR=$TRACINGDIR/latency_hist | |
191 | + | |
192 | +if test -d $HISTDIR | |
193 | +then | |
194 | + cd $HISTDIR | |
195 | + for i in `find . | grep /reset$` | |
196 | + do | |
197 | + echo 1 >$i | |
198 | + done | |
199 | +fi | |
200 | + | |
201 | + | |
202 | +* Data format | |
203 | + | |
204 | +Latency data are stored with a resolution of one microsecond. The | |
205 | +maximum latency is 10,240 microseconds. The data are only valid, if the | |
206 | +overflow register is empty. Every output line contains the latency in | |
207 | +microseconds in the first row and the number of samples in the second | |
208 | +row. To display only lines with a positive latency count, use, for | |
209 | +example, | |
210 | + | |
211 | +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 | |
212 | + | |
213 | +#Minimum latency: 0 microseconds. | |
214 | +#Average latency: 0 microseconds. | |
215 | +#Maximum latency: 25 microseconds. | |
216 | +#Total samples: 3104770694 | |
217 | +#There are 0 samples greater or equal than 10240 microseconds | |
218 | +#usecs samples | |
219 | + 0 2984486876 | |
220 | + 1 49843506 | |
221 | + 2 58219047 | |
222 | + 3 5348126 | |
223 | + 4 2187960 | |
224 | + 5 3388262 | |
225 | + 6 959289 | |
226 | + 7 208294 | |
227 | + 8 40420 | |
228 | + 9 4485 | |
229 | + 10 14918 | |
230 | + 11 18340 | |
231 | + 12 25052 | |
232 | + 13 19455 | |
233 | + 14 5602 | |
234 | + 15 969 | |
235 | + 16 47 | |
236 | + 17 18 | |
237 | + 18 14 | |
238 | + 19 1 | |
239 | + 20 3 | |
240 | + 21 2 | |
241 | + 22 5 | |
242 | + 23 2 | |
243 | + 25 1 | |
244 | + | |
245 | + | |
246 | +* Wakeup latency of a selected process | |
247 | + | |
248 | +To only collect wakeup latency data of a particular process, write the | |
249 | +PID of the requested process to | |
250 | + | |
251 | +/sys/kernel/debug/tracing/latency_hist/wakeup/pid | |
252 | + | |
253 | +PIDs are not considered, if this variable is set to 0. | |
254 | + | |
255 | + | |
256 | +* Details of the process with the highest wakeup latency so far | |
257 | + | |
258 | +Selected data of the process that suffered from the highest wakeup | |
259 | +latency that occurred in a particular CPU are available in the file | |
260 | + | |
261 | +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx. | |
262 | + | |
263 | +In addition, other relevant system data at the time when the | |
264 | +latency occurred are given. | |
265 | + | |
266 | +The format of the data is (all in one line): | |
267 | +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \ | |
268 | +<- <PID> <Priority> <Command> <Timestamp> | |
269 | + | |
270 | +The value of <Timeroffset> is only relevant in the combined timer | |
271 | +and wakeup latency recording. In the wakeup recording, it is | |
272 | +always 0, in the missed_timer_offsets recording, it is the same | |
273 | +as <Latency>. | |
274 | + | |
275 | +When retrospectively searching for the origin of a latency and | |
276 | +tracing was not enabled, it may be helpful to know the name and | |
277 | +some basic data of the task that (finally) was switching to the | |
278 | +late real-tlme task. In addition to the victim's data, also the | |
279 | +data of the possible culprit are therefore displayed after the | |
280 | +"<-" symbol. | |
281 | + | |
282 | +Finally, the timestamp of the time when the latency occurred | |
283 | +in <seconds>.<microseconds> after the most recent system boot | |
284 | +is provided. | |
285 | + | |
286 | +These data are also reset when the wakeup histogram is reset. | |
287 | diff --git a/Makefile b/Makefile | |
288 | index b249529204cd..5d699d055995 100644 | |
289 | --- a/Makefile | |
290 | +++ b/Makefile | |
291 | @@ -398,12 +398,12 @@ KBUILD_CPPFLAGS := -D__KERNEL__ | |
292 | KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ | |
293 | -fno-strict-aliasing -fno-common \ | |
294 | -Werror-implicit-function-declaration \ | |
295 | - -Wno-format-security \ | |
296 | + -Wno-format-security -fno-PIE \ | |
297 | -std=gnu89 | |
298 | ||
299 | KBUILD_AFLAGS_KERNEL := | |
300 | KBUILD_CFLAGS_KERNEL := | |
301 | -KBUILD_AFLAGS := -D__ASSEMBLY__ | |
302 | +KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE | |
303 | KBUILD_AFLAGS_MODULE := -DMODULE | |
304 | KBUILD_CFLAGS_MODULE := -DMODULE | |
305 | KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds | |
306 | diff --git a/arch/Kconfig b/arch/Kconfig | |
307 | index fd6e9712af81..085134ee13e9 100644 | |
308 | --- a/arch/Kconfig | |
309 | +++ b/arch/Kconfig | |
310 | @@ -9,6 +9,7 @@ config OPROFILE | |
311 | tristate "OProfile system profiling" | |
312 | depends on PROFILING | |
313 | depends on HAVE_OPROFILE | |
314 | + depends on !PREEMPT_RT_FULL | |
315 | select RING_BUFFER | |
316 | select RING_BUFFER_ALLOW_SWAP | |
317 | help | |
318 | @@ -52,6 +53,7 @@ config KPROBES | |
319 | config JUMP_LABEL | |
320 | bool "Optimize very unlikely/likely branches" | |
321 | depends on HAVE_ARCH_JUMP_LABEL | |
322 | + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) | |
323 | help | |
324 | This option enables a transparent branch optimization that | |
325 | makes certain almost-always-true or almost-always-false branch | |
326 | diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig | |
327 | index a9c4e48bb7ec..6eefe4f32302 100644 | |
328 | --- a/arch/arm/Kconfig | |
329 | +++ b/arch/arm/Kconfig | |
330 | @@ -36,7 +36,7 @@ config ARM | |
331 | select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) | |
332 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 | |
333 | select HAVE_ARCH_HARDENED_USERCOPY | |
334 | - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU | |
335 | + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT_BASE | |
336 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU | |
337 | select HAVE_ARCH_MMAP_RND_BITS if MMU | |
338 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) | |
339 | @@ -75,6 +75,7 @@ config ARM | |
340 | select HAVE_PERF_EVENTS | |
341 | select HAVE_PERF_REGS | |
342 | select HAVE_PERF_USER_STACK_DUMP | |
343 | + select HAVE_PREEMPT_LAZY | |
344 | select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) | |
345 | select HAVE_REGS_AND_STACK_ACCESS_API | |
346 | select HAVE_SYSCALL_TRACEPOINTS | |
347 | diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h | |
348 | index 12ebfcc1d539..c962084605bc 100644 | |
349 | --- a/arch/arm/include/asm/switch_to.h | |
350 | +++ b/arch/arm/include/asm/switch_to.h | |
351 | @@ -3,6 +3,13 @@ | |
352 | ||
353 | #include <linux/thread_info.h> | |
354 | ||
355 | +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM | |
356 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); | |
357 | +#else | |
358 | +static inline void | |
359 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
360 | +#endif | |
361 | + | |
362 | /* | |
363 | * For v7 SMP cores running a preemptible kernel we may be pre-empted | |
364 | * during a TLB maintenance operation, so execute an inner-shareable dsb | |
365 | @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info | |
366 | #define switch_to(prev,next,last) \ | |
367 | do { \ | |
368 | __complete_pending_tlbi(); \ | |
369 | + switch_kmaps(prev, next); \ | |
370 | last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ | |
371 | } while (0) | |
372 | ||
373 | diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h | |
374 | index 776757d1604a..1f36a4eccc72 100644 | |
375 | --- a/arch/arm/include/asm/thread_info.h | |
376 | +++ b/arch/arm/include/asm/thread_info.h | |
377 | @@ -49,6 +49,7 @@ struct cpu_context_save { | |
378 | struct thread_info { | |
379 | unsigned long flags; /* low level flags */ | |
380 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
381 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
382 | mm_segment_t addr_limit; /* address limit */ | |
383 | struct task_struct *task; /* main task structure */ | |
384 | __u32 cpu; /* cpu */ | |
385 | @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
386 | #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ | |
387 | #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ | |
388 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ | |
389 | -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ | |
390 | +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ | |
391 | +#define TIF_NEED_RESCHED_LAZY 7 | |
392 | ||
393 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ | |
394 | #define TIF_USING_IWMMXT 17 | |
395 | @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
396 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | |
397 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
398 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
399 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
400 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
401 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
402 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
403 | @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
404 | * Change these and you break ASM code in entry-common.S | |
405 | */ | |
406 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
407 | - _TIF_NOTIFY_RESUME | _TIF_UPROBE) | |
408 | + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
409 | + _TIF_NEED_RESCHED_LAZY) | |
410 | ||
411 | #endif /* __KERNEL__ */ | |
412 | #endif /* __ASM_ARM_THREAD_INFO_H */ | |
413 | diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c | |
414 | index 608008229c7d..3866da3f7bb7 100644 | |
415 | --- a/arch/arm/kernel/asm-offsets.c | |
416 | +++ b/arch/arm/kernel/asm-offsets.c | |
417 | @@ -65,6 +65,7 @@ int main(void) | |
418 | BLANK(); | |
419 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
420 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
421 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
422 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
423 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
424 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
425 | diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S | |
426 | index 9f157e7c51e7..468e224d76aa 100644 | |
427 | --- a/arch/arm/kernel/entry-armv.S | |
428 | +++ b/arch/arm/kernel/entry-armv.S | |
429 | @@ -220,11 +220,18 @@ ENDPROC(__dabt_svc) | |
430 | ||
431 | #ifdef CONFIG_PREEMPT | |
432 | ldr r8, [tsk, #TI_PREEMPT] @ get preempt count | |
433 | - ldr r0, [tsk, #TI_FLAGS] @ get flags | |
434 | teq r8, #0 @ if preempt count != 0 | |
435 | + bne 1f @ return from exeption | |
436 | + ldr r0, [tsk, #TI_FLAGS] @ get flags | |
437 | + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set | |
438 | + blne svc_preempt @ preempt! | |
439 | + | |
440 | + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
441 | + teq r8, #0 @ if preempt lazy count != 0 | |
442 | movne r0, #0 @ force flags to 0 | |
443 | - tst r0, #_TIF_NEED_RESCHED | |
444 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
445 | blne svc_preempt | |
446 | +1: | |
447 | #endif | |
448 | ||
449 | svc_exit r5, irq = 1 @ return from exception | |
450 | @@ -239,8 +246,14 @@ ENDPROC(__irq_svc) | |
451 | 1: bl preempt_schedule_irq @ irq en/disable is done inside | |
452 | ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS | |
453 | tst r0, #_TIF_NEED_RESCHED | |
454 | + bne 1b | |
455 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
456 | reteq r8 @ go again | |
457 | - b 1b | |
458 | + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
459 | + teq r0, #0 @ if preempt lazy count != 0 | |
460 | + beq 1b | |
461 | + ret r8 @ go again | |
462 | + | |
463 | #endif | |
464 | ||
465 | __und_fault: | |
466 | diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S | |
467 | index 10c3283d6c19..8872937862cc 100644 | |
468 | --- a/arch/arm/kernel/entry-common.S | |
469 | +++ b/arch/arm/kernel/entry-common.S | |
470 | @@ -36,7 +36,9 @@ | |
471 | UNWIND(.cantunwind ) | |
472 | disable_irq_notrace @ disable interrupts | |
473 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
474 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
475 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
476 | + bne fast_work_pending | |
477 | + tst r1, #_TIF_SECCOMP | |
478 | bne fast_work_pending | |
479 | ||
480 | /* perform architecture specific actions before user return */ | |
481 | @@ -62,8 +64,11 @@ ENDPROC(ret_fast_syscall) | |
482 | str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 | |
483 | disable_irq_notrace @ disable interrupts | |
484 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
485 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
486 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
487 | + bne do_slower_path | |
488 | + tst r1, #_TIF_SECCOMP | |
489 | beq no_work_pending | |
490 | +do_slower_path: | |
491 | UNWIND(.fnend ) | |
492 | ENDPROC(ret_fast_syscall) | |
493 | ||
494 | diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c | |
495 | index 612eb530f33f..cd3006dc1fd3 100644 | |
496 | --- a/arch/arm/kernel/process.c | |
497 | +++ b/arch/arm/kernel/process.c | |
498 | @@ -323,6 +323,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) | |
499 | } | |
500 | ||
501 | #ifdef CONFIG_MMU | |
502 | +/* | |
503 | + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not | |
504 | + * initialized by pgtable_page_ctor() then a coredump of the vector page will | |
505 | + * fail. | |
506 | + */ | |
507 | +static int __init vectors_user_mapping_init_page(void) | |
508 | +{ | |
509 | + struct page *page; | |
510 | + unsigned long addr = 0xffff0000; | |
511 | + pgd_t *pgd; | |
512 | + pud_t *pud; | |
513 | + pmd_t *pmd; | |
514 | + | |
515 | + pgd = pgd_offset_k(addr); | |
516 | + pud = pud_offset(pgd, addr); | |
517 | + pmd = pmd_offset(pud, addr); | |
518 | + page = pmd_page(*(pmd)); | |
519 | + | |
520 | + pgtable_page_ctor(page); | |
521 | + | |
522 | + return 0; | |
523 | +} | |
524 | +late_initcall(vectors_user_mapping_init_page); | |
525 | + | |
526 | #ifdef CONFIG_KUSER_HELPERS | |
527 | /* | |
528 | * The vectors page is always readable from user space for the | |
529 | diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c | |
530 | index 7b8f2141427b..96541e00b74a 100644 | |
531 | --- a/arch/arm/kernel/signal.c | |
532 | +++ b/arch/arm/kernel/signal.c | |
533 | @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) | |
534 | */ | |
535 | trace_hardirqs_off(); | |
536 | do { | |
537 | - if (likely(thread_flags & _TIF_NEED_RESCHED)) { | |
538 | + if (likely(thread_flags & (_TIF_NEED_RESCHED | | |
539 | + _TIF_NEED_RESCHED_LAZY))) { | |
540 | schedule(); | |
541 | } else { | |
542 | if (unlikely(!user_mode(regs))) | |
543 | diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c | |
544 | index 861521606c6d..e5ca865d321b 100644 | |
545 | --- a/arch/arm/kernel/smp.c | |
546 | +++ b/arch/arm/kernel/smp.c | |
547 | @@ -234,8 +234,6 @@ int __cpu_disable(void) | |
548 | flush_cache_louis(); | |
549 | local_flush_tlb_all(); | |
550 | ||
551 | - clear_tasks_mm_cpumask(cpu); | |
552 | - | |
553 | return 0; | |
554 | } | |
555 | ||
556 | @@ -251,6 +249,9 @@ void __cpu_die(unsigned int cpu) | |
557 | pr_err("CPU%u: cpu didn't die\n", cpu); | |
558 | return; | |
559 | } | |
560 | + | |
561 | + clear_tasks_mm_cpumask(cpu); | |
562 | + | |
563 | pr_notice("CPU%u: shutdown\n", cpu); | |
564 | ||
565 | /* | |
566 | diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c | |
567 | index 0bee233fef9a..314cfb232a63 100644 | |
568 | --- a/arch/arm/kernel/unwind.c | |
569 | +++ b/arch/arm/kernel/unwind.c | |
570 | @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; | |
571 | static const struct unwind_idx *__origin_unwind_idx; | |
572 | extern const struct unwind_idx __stop_unwind_idx[]; | |
573 | ||
574 | -static DEFINE_SPINLOCK(unwind_lock); | |
575 | +static DEFINE_RAW_SPINLOCK(unwind_lock); | |
576 | static LIST_HEAD(unwind_tables); | |
577 | ||
578 | /* Convert a prel31 symbol to an absolute address */ | |
579 | @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
580 | /* module unwind tables */ | |
581 | struct unwind_table *table; | |
582 | ||
583 | - spin_lock_irqsave(&unwind_lock, flags); | |
584 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
585 | list_for_each_entry(table, &unwind_tables, list) { | |
586 | if (addr >= table->begin_addr && | |
587 | addr < table->end_addr) { | |
588 | @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
589 | break; | |
590 | } | |
591 | } | |
592 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
593 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
594 | } | |
595 | ||
596 | pr_debug("%s: idx = %p\n", __func__, idx); | |
597 | @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, | |
598 | tab->begin_addr = text_addr; | |
599 | tab->end_addr = text_addr + text_size; | |
600 | ||
601 | - spin_lock_irqsave(&unwind_lock, flags); | |
602 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
603 | list_add_tail(&tab->list, &unwind_tables); | |
604 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
605 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
606 | ||
607 | return tab; | |
608 | } | |
609 | @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) | |
610 | if (!tab) | |
611 | return; | |
612 | ||
613 | - spin_lock_irqsave(&unwind_lock, flags); | |
614 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
615 | list_del(&tab->list); | |
616 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
617 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
618 | ||
619 | kfree(tab); | |
620 | } | |
621 | diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c | |
622 | index c94b90d43772..244dde72018a 100644 | |
623 | --- a/arch/arm/kvm/arm.c | |
624 | +++ b/arch/arm/kvm/arm.c | |
625 | @@ -584,7 +584,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
626 | * involves poking the GIC, which must be done in a | |
627 | * non-preemptible context. | |
628 | */ | |
629 | - preempt_disable(); | |
630 | + migrate_disable(); | |
631 | kvm_pmu_flush_hwstate(vcpu); | |
632 | kvm_timer_flush_hwstate(vcpu); | |
633 | kvm_vgic_flush_hwstate(vcpu); | |
634 | @@ -605,7 +605,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
635 | kvm_pmu_sync_hwstate(vcpu); | |
636 | kvm_timer_sync_hwstate(vcpu); | |
637 | kvm_vgic_sync_hwstate(vcpu); | |
638 | - preempt_enable(); | |
639 | + migrate_enable(); | |
640 | continue; | |
641 | } | |
642 | ||
643 | @@ -661,7 +661,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
644 | ||
645 | kvm_vgic_sync_hwstate(vcpu); | |
646 | ||
647 | - preempt_enable(); | |
648 | + migrate_enable(); | |
649 | ||
650 | ret = handle_exit(vcpu, run, ret); | |
651 | } | |
652 | diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c | |
653 | index 98ffe1e62ad5..df9769ddece5 100644 | |
654 | --- a/arch/arm/mach-exynos/platsmp.c | |
655 | +++ b/arch/arm/mach-exynos/platsmp.c | |
656 | @@ -229,7 +229,7 @@ static void __iomem *scu_base_addr(void) | |
657 | return (void __iomem *)(S5P_VA_SCU); | |
658 | } | |
659 | ||
660 | -static DEFINE_SPINLOCK(boot_lock); | |
661 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
662 | ||
663 | static void exynos_secondary_init(unsigned int cpu) | |
664 | { | |
665 | @@ -242,8 +242,8 @@ static void exynos_secondary_init(unsigned int cpu) | |
666 | /* | |
667 | * Synchronise with the boot thread. | |
668 | */ | |
669 | - spin_lock(&boot_lock); | |
670 | - spin_unlock(&boot_lock); | |
671 | + raw_spin_lock(&boot_lock); | |
672 | + raw_spin_unlock(&boot_lock); | |
673 | } | |
674 | ||
675 | int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) | |
676 | @@ -307,7 +307,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
677 | * Set synchronisation state between this boot processor | |
678 | * and the secondary one | |
679 | */ | |
680 | - spin_lock(&boot_lock); | |
681 | + raw_spin_lock(&boot_lock); | |
682 | ||
683 | /* | |
684 | * The secondary processor is waiting to be released from | |
685 | @@ -334,7 +334,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
686 | ||
687 | if (timeout == 0) { | |
688 | printk(KERN_ERR "cpu1 power enable failed"); | |
689 | - spin_unlock(&boot_lock); | |
690 | + raw_spin_unlock(&boot_lock); | |
691 | return -ETIMEDOUT; | |
692 | } | |
693 | } | |
694 | @@ -380,7 +380,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
695 | * calibrations, then wait for it to finish | |
696 | */ | |
697 | fail: | |
698 | - spin_unlock(&boot_lock); | |
699 | + raw_spin_unlock(&boot_lock); | |
700 | ||
701 | return pen_release != -1 ? ret : 0; | |
702 | } | |
703 | diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c | |
704 | index 4b653a8cb75c..b03d5a922cb1 100644 | |
705 | --- a/arch/arm/mach-hisi/platmcpm.c | |
706 | +++ b/arch/arm/mach-hisi/platmcpm.c | |
707 | @@ -61,7 +61,7 @@ | |
708 | ||
709 | static void __iomem *sysctrl, *fabric; | |
710 | static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; | |
711 | -static DEFINE_SPINLOCK(boot_lock); | |
712 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
713 | static u32 fabric_phys_addr; | |
714 | /* | |
715 | * [0]: bootwrapper physical address | |
716 | @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
717 | if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) | |
718 | return -EINVAL; | |
719 | ||
720 | - spin_lock_irq(&boot_lock); | |
721 | + raw_spin_lock_irq(&boot_lock); | |
722 | ||
723 | if (hip04_cpu_table[cluster][cpu]) | |
724 | goto out; | |
725 | @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
726 | ||
727 | out: | |
728 | hip04_cpu_table[cluster][cpu]++; | |
729 | - spin_unlock_irq(&boot_lock); | |
730 | + raw_spin_unlock_irq(&boot_lock); | |
731 | ||
732 | return 0; | |
733 | } | |
734 | @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
735 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); | |
736 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | |
737 | ||
738 | - spin_lock(&boot_lock); | |
739 | + raw_spin_lock(&boot_lock); | |
740 | hip04_cpu_table[cluster][cpu]--; | |
741 | if (hip04_cpu_table[cluster][cpu] == 1) { | |
742 | /* A power_up request went ahead of us. */ | |
743 | - spin_unlock(&boot_lock); | |
744 | + raw_spin_unlock(&boot_lock); | |
745 | return; | |
746 | } else if (hip04_cpu_table[cluster][cpu] > 1) { | |
747 | pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); | |
748 | @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
749 | } | |
750 | ||
751 | last_man = hip04_cluster_is_down(cluster); | |
752 | - spin_unlock(&boot_lock); | |
753 | + raw_spin_unlock(&boot_lock); | |
754 | if (last_man) { | |
755 | /* Since it's Cortex A15, disable L2 prefetching. */ | |
756 | asm volatile( | |
757 | @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
758 | cpu >= HIP04_MAX_CPUS_PER_CLUSTER); | |
759 | ||
760 | count = TIMEOUT_MSEC / POLL_MSEC; | |
761 | - spin_lock_irq(&boot_lock); | |
762 | + raw_spin_lock_irq(&boot_lock); | |
763 | for (tries = 0; tries < count; tries++) { | |
764 | if (hip04_cpu_table[cluster][cpu]) | |
765 | goto err; | |
766 | @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
767 | data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); | |
768 | if (data & CORE_WFI_STATUS(cpu)) | |
769 | break; | |
770 | - spin_unlock_irq(&boot_lock); | |
771 | + raw_spin_unlock_irq(&boot_lock); | |
772 | /* Wait for clean L2 when the whole cluster is down. */ | |
773 | msleep(POLL_MSEC); | |
774 | - spin_lock_irq(&boot_lock); | |
775 | + raw_spin_lock_irq(&boot_lock); | |
776 | } | |
777 | if (tries >= count) | |
778 | goto err; | |
779 | @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
780 | goto err; | |
781 | if (hip04_cluster_is_down(cluster)) | |
782 | hip04_set_snoop_filter(cluster, 0); | |
783 | - spin_unlock_irq(&boot_lock); | |
784 | + raw_spin_unlock_irq(&boot_lock); | |
785 | return 1; | |
786 | err: | |
787 | - spin_unlock_irq(&boot_lock); | |
788 | + raw_spin_unlock_irq(&boot_lock); | |
789 | return 0; | |
790 | } | |
791 | #endif | |
792 | diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c | |
793 | index b4de3da6dffa..b52893319d75 100644 | |
794 | --- a/arch/arm/mach-omap2/omap-smp.c | |
795 | +++ b/arch/arm/mach-omap2/omap-smp.c | |
796 | @@ -64,7 +64,7 @@ static const struct omap_smp_config omap5_cfg __initconst = { | |
797 | .startup_addr = omap5_secondary_startup, | |
798 | }; | |
799 | ||
800 | -static DEFINE_SPINLOCK(boot_lock); | |
801 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
802 | ||
803 | void __iomem *omap4_get_scu_base(void) | |
804 | { | |
805 | @@ -131,8 +131,8 @@ static void omap4_secondary_init(unsigned int cpu) | |
806 | /* | |
807 | * Synchronise with the boot thread. | |
808 | */ | |
809 | - spin_lock(&boot_lock); | |
810 | - spin_unlock(&boot_lock); | |
811 | + raw_spin_lock(&boot_lock); | |
812 | + raw_spin_unlock(&boot_lock); | |
813 | } | |
814 | ||
815 | static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
816 | @@ -146,7 +146,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
817 | * Set synchronisation state between this boot processor | |
818 | * and the secondary one | |
819 | */ | |
820 | - spin_lock(&boot_lock); | |
821 | + raw_spin_lock(&boot_lock); | |
822 | ||
823 | /* | |
824 | * Update the AuxCoreBoot0 with boot state for secondary core. | |
825 | @@ -223,7 +223,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
826 | * Now the secondary core is starting up let it run its | |
827 | * calibrations, then wait for it to finish | |
828 | */ | |
829 | - spin_unlock(&boot_lock); | |
830 | + raw_spin_unlock(&boot_lock); | |
831 | ||
832 | return 0; | |
833 | } | |
834 | diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c | |
835 | index 0875b99add18..18b6d98d2581 100644 | |
836 | --- a/arch/arm/mach-prima2/platsmp.c | |
837 | +++ b/arch/arm/mach-prima2/platsmp.c | |
838 | @@ -22,7 +22,7 @@ | |
839 | ||
840 | static void __iomem *clk_base; | |
841 | ||
842 | -static DEFINE_SPINLOCK(boot_lock); | |
843 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
844 | ||
845 | static void sirfsoc_secondary_init(unsigned int cpu) | |
846 | { | |
847 | @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) | |
848 | /* | |
849 | * Synchronise with the boot thread. | |
850 | */ | |
851 | - spin_lock(&boot_lock); | |
852 | - spin_unlock(&boot_lock); | |
853 | + raw_spin_lock(&boot_lock); | |
854 | + raw_spin_unlock(&boot_lock); | |
855 | } | |
856 | ||
857 | static const struct of_device_id clk_ids[] = { | |
858 | @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
859 | /* make sure write buffer is drained */ | |
860 | mb(); | |
861 | ||
862 | - spin_lock(&boot_lock); | |
863 | + raw_spin_lock(&boot_lock); | |
864 | ||
865 | /* | |
866 | * The secondary processor is waiting to be released from | |
867 | @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
868 | * now the secondary core is starting up let it run its | |
869 | * calibrations, then wait for it to finish | |
870 | */ | |
871 | - spin_unlock(&boot_lock); | |
872 | + raw_spin_unlock(&boot_lock); | |
873 | ||
874 | return pen_release != -1 ? -ENOSYS : 0; | |
875 | } | |
876 | diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c | |
877 | index 5494c9e0c909..e8ce157d3548 100644 | |
878 | --- a/arch/arm/mach-qcom/platsmp.c | |
879 | +++ b/arch/arm/mach-qcom/platsmp.c | |
880 | @@ -46,7 +46,7 @@ | |
881 | ||
882 | extern void secondary_startup_arm(void); | |
883 | ||
884 | -static DEFINE_SPINLOCK(boot_lock); | |
885 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
886 | ||
887 | #ifdef CONFIG_HOTPLUG_CPU | |
888 | static void qcom_cpu_die(unsigned int cpu) | |
889 | @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) | |
890 | /* | |
891 | * Synchronise with the boot thread. | |
892 | */ | |
893 | - spin_lock(&boot_lock); | |
894 | - spin_unlock(&boot_lock); | |
895 | + raw_spin_lock(&boot_lock); | |
896 | + raw_spin_unlock(&boot_lock); | |
897 | } | |
898 | ||
899 | static int scss_release_secondary(unsigned int cpu) | |
900 | @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
901 | * set synchronisation state between this boot processor | |
902 | * and the secondary one | |
903 | */ | |
904 | - spin_lock(&boot_lock); | |
905 | + raw_spin_lock(&boot_lock); | |
906 | ||
907 | /* | |
908 | * Send the secondary CPU a soft interrupt, thereby causing | |
909 | @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
910 | * now the secondary core is starting up let it run its | |
911 | * calibrations, then wait for it to finish | |
912 | */ | |
913 | - spin_unlock(&boot_lock); | |
914 | + raw_spin_unlock(&boot_lock); | |
915 | ||
916 | return ret; | |
917 | } | |
918 | diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c | |
919 | index 8d1e2d551786..7fa56cc78118 100644 | |
920 | --- a/arch/arm/mach-spear/platsmp.c | |
921 | +++ b/arch/arm/mach-spear/platsmp.c | |
922 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
923 | sync_cache_w(&pen_release); | |
924 | } | |
925 | ||
926 | -static DEFINE_SPINLOCK(boot_lock); | |
927 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
928 | ||
929 | static void __iomem *scu_base = IOMEM(VA_SCU_BASE); | |
930 | ||
931 | @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) | |
932 | /* | |
933 | * Synchronise with the boot thread. | |
934 | */ | |
935 | - spin_lock(&boot_lock); | |
936 | - spin_unlock(&boot_lock); | |
937 | + raw_spin_lock(&boot_lock); | |
938 | + raw_spin_unlock(&boot_lock); | |
939 | } | |
940 | ||
941 | static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
942 | @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
943 | * set synchronisation state between this boot processor | |
944 | * and the secondary one | |
945 | */ | |
946 | - spin_lock(&boot_lock); | |
947 | + raw_spin_lock(&boot_lock); | |
948 | ||
949 | /* | |
950 | * The secondary processor is waiting to be released from | |
951 | @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
952 | * now the secondary core is starting up let it run its | |
953 | * calibrations, then wait for it to finish | |
954 | */ | |
955 | - spin_unlock(&boot_lock); | |
956 | + raw_spin_unlock(&boot_lock); | |
957 | ||
958 | return pen_release != -1 ? -ENOSYS : 0; | |
959 | } | |
960 | diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c | |
961 | index ea5a2277ee46..b988e081ac79 100644 | |
962 | --- a/arch/arm/mach-sti/platsmp.c | |
963 | +++ b/arch/arm/mach-sti/platsmp.c | |
964 | @@ -35,7 +35,7 @@ static void write_pen_release(int val) | |
965 | sync_cache_w(&pen_release); | |
966 | } | |
967 | ||
968 | -static DEFINE_SPINLOCK(boot_lock); | |
969 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
970 | ||
971 | static void sti_secondary_init(unsigned int cpu) | |
972 | { | |
973 | @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu) | |
974 | /* | |
975 | * Synchronise with the boot thread. | |
976 | */ | |
977 | - spin_lock(&boot_lock); | |
978 | - spin_unlock(&boot_lock); | |
979 | + raw_spin_lock(&boot_lock); | |
980 | + raw_spin_unlock(&boot_lock); | |
981 | } | |
982 | ||
983 | static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
984 | @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
985 | * set synchronisation state between this boot processor | |
986 | * and the secondary one | |
987 | */ | |
988 | - spin_lock(&boot_lock); | |
989 | + raw_spin_lock(&boot_lock); | |
990 | ||
991 | /* | |
992 | * The secondary processor is waiting to be released from | |
993 | @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
994 | * now the secondary core is starting up let it run its | |
995 | * calibrations, then wait for it to finish | |
996 | */ | |
997 | - spin_unlock(&boot_lock); | |
998 | + raw_spin_unlock(&boot_lock); | |
999 | ||
1000 | return pen_release != -1 ? -ENOSYS : 0; | |
1001 | } | |
1002 | diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c | |
1003 | index 3a2e678b8d30..3ed1e9ba6a01 100644 | |
1004 | --- a/arch/arm/mm/fault.c | |
1005 | +++ b/arch/arm/mm/fault.c | |
1006 | @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
1007 | if (addr < TASK_SIZE) | |
1008 | return do_page_fault(addr, fsr, regs); | |
1009 | ||
1010 | + if (interrupts_enabled(regs)) | |
1011 | + local_irq_enable(); | |
1012 | + | |
1013 | if (user_mode(regs)) | |
1014 | goto bad_area; | |
1015 | ||
1016 | @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
1017 | static int | |
1018 | do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |
1019 | { | |
1020 | + if (interrupts_enabled(regs)) | |
1021 | + local_irq_enable(); | |
1022 | + | |
1023 | do_bad_area(addr, fsr, regs); | |
1024 | return 0; | |
1025 | } | |
1026 | diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c | |
1027 | index d02f8187b1cc..542692dbd40a 100644 | |
1028 | --- a/arch/arm/mm/highmem.c | |
1029 | +++ b/arch/arm/mm/highmem.c | |
1030 | @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) | |
1031 | return *ptep; | |
1032 | } | |
1033 | ||
1034 | +static unsigned int fixmap_idx(int type) | |
1035 | +{ | |
1036 | + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1037 | +} | |
1038 | + | |
1039 | void *kmap(struct page *page) | |
1040 | { | |
1041 | might_sleep(); | |
1042 | @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap); | |
1043 | ||
1044 | void *kmap_atomic(struct page *page) | |
1045 | { | |
1046 | + pte_t pte = mk_pte(page, kmap_prot); | |
1047 | unsigned int idx; | |
1048 | unsigned long vaddr; | |
1049 | void *kmap; | |
1050 | int type; | |
1051 | ||
1052 | - preempt_disable(); | |
1053 | + preempt_disable_nort(); | |
1054 | pagefault_disable(); | |
1055 | if (!PageHighMem(page)) | |
1056 | return page_address(page); | |
1057 | @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page) | |
1058 | ||
1059 | type = kmap_atomic_idx_push(); | |
1060 | ||
1061 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1062 | + idx = fixmap_idx(type); | |
1063 | vaddr = __fix_to_virt(idx); | |
1064 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1065 | /* | |
1066 | @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page) | |
1067 | * in place, so the contained TLB flush ensures the TLB is updated | |
1068 | * with the new mapping. | |
1069 | */ | |
1070 | - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); | |
1071 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1072 | + current->kmap_pte[type] = pte; | |
1073 | +#endif | |
1074 | + set_fixmap_pte(idx, pte); | |
1075 | ||
1076 | return (void *)vaddr; | |
1077 | } | |
1078 | @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr) | |
1079 | ||
1080 | if (kvaddr >= (void *)FIXADDR_START) { | |
1081 | type = kmap_atomic_idx(); | |
1082 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1083 | + idx = fixmap_idx(type); | |
1084 | ||
1085 | if (cache_is_vivt()) | |
1086 | __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); | |
1087 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1088 | + current->kmap_pte[type] = __pte(0); | |
1089 | +#endif | |
1090 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1091 | BUG_ON(vaddr != __fix_to_virt(idx)); | |
1092 | - set_fixmap_pte(idx, __pte(0)); | |
1093 | #else | |
1094 | (void) idx; /* to kill a warning */ | |
1095 | #endif | |
1096 | + set_fixmap_pte(idx, __pte(0)); | |
1097 | kmap_atomic_idx_pop(); | |
1098 | } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { | |
1099 | /* this address was obtained through kmap_high_get() */ | |
1100 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); | |
1101 | } | |
1102 | pagefault_enable(); | |
1103 | - preempt_enable(); | |
1104 | + preempt_enable_nort(); | |
1105 | } | |
1106 | EXPORT_SYMBOL(__kunmap_atomic); | |
1107 | ||
1108 | void *kmap_atomic_pfn(unsigned long pfn) | |
1109 | { | |
1110 | + pte_t pte = pfn_pte(pfn, kmap_prot); | |
1111 | unsigned long vaddr; | |
1112 | int idx, type; | |
1113 | struct page *page = pfn_to_page(pfn); | |
1114 | ||
1115 | - preempt_disable(); | |
1116 | + preempt_disable_nort(); | |
1117 | pagefault_disable(); | |
1118 | if (!PageHighMem(page)) | |
1119 | return page_address(page); | |
1120 | ||
1121 | type = kmap_atomic_idx_push(); | |
1122 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1123 | + idx = fixmap_idx(type); | |
1124 | vaddr = __fix_to_virt(idx); | |
1125 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1126 | BUG_ON(!pte_none(get_fixmap_pte(vaddr))); | |
1127 | #endif | |
1128 | - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); | |
1129 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1130 | + current->kmap_pte[type] = pte; | |
1131 | +#endif | |
1132 | + set_fixmap_pte(idx, pte); | |
1133 | ||
1134 | return (void *)vaddr; | |
1135 | } | |
1136 | +#if defined CONFIG_PREEMPT_RT_FULL | |
1137 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
1138 | +{ | |
1139 | + int i; | |
1140 | + | |
1141 | + /* | |
1142 | + * Clear @prev's kmap_atomic mappings | |
1143 | + */ | |
1144 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
1145 | + int idx = fixmap_idx(i); | |
1146 | + | |
1147 | + set_fixmap_pte(idx, __pte(0)); | |
1148 | + } | |
1149 | + /* | |
1150 | + * Restore @next_p's kmap_atomic mappings | |
1151 | + */ | |
1152 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
1153 | + int idx = fixmap_idx(i); | |
1154 | + | |
1155 | + if (!pte_none(next_p->kmap_pte[i])) | |
1156 | + set_fixmap_pte(idx, next_p->kmap_pte[i]); | |
1157 | + } | |
1158 | +} | |
1159 | +#endif | |
1160 | diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c | |
1161 | index c2366510187a..6b60f582b738 100644 | |
1162 | --- a/arch/arm/plat-versatile/platsmp.c | |
1163 | +++ b/arch/arm/plat-versatile/platsmp.c | |
1164 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
1165 | sync_cache_w(&pen_release); | |
1166 | } | |
1167 | ||
1168 | -static DEFINE_SPINLOCK(boot_lock); | |
1169 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1170 | ||
1171 | void versatile_secondary_init(unsigned int cpu) | |
1172 | { | |
1173 | @@ -45,8 +45,8 @@ void versatile_secondary_init(unsigned int cpu) | |
1174 | /* | |
1175 | * Synchronise with the boot thread. | |
1176 | */ | |
1177 | - spin_lock(&boot_lock); | |
1178 | - spin_unlock(&boot_lock); | |
1179 | + raw_spin_lock(&boot_lock); | |
1180 | + raw_spin_unlock(&boot_lock); | |
1181 | } | |
1182 | ||
1183 | int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1184 | @@ -57,7 +57,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1185 | * Set synchronisation state between this boot processor | |
1186 | * and the secondary one | |
1187 | */ | |
1188 | - spin_lock(&boot_lock); | |
1189 | + raw_spin_lock(&boot_lock); | |
1190 | ||
1191 | /* | |
1192 | * This is really belt and braces; we hold unintended secondary | |
1193 | @@ -87,7 +87,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1194 | * now the secondary core is starting up let it run its | |
1195 | * calibrations, then wait for it to finish | |
1196 | */ | |
1197 | - spin_unlock(&boot_lock); | |
1198 | + raw_spin_unlock(&boot_lock); | |
1199 | ||
1200 | return pen_release != -1 ? -ENOSYS : 0; | |
1201 | } | |
1202 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig | |
1203 | index bc3f00f586f1..0f3df6d5154a 100644 | |
1204 | --- a/arch/arm64/Kconfig | |
1205 | +++ b/arch/arm64/Kconfig | |
1206 | @@ -90,6 +90,7 @@ config ARM64 | |
1207 | select HAVE_PERF_EVENTS | |
1208 | select HAVE_PERF_REGS | |
1209 | select HAVE_PERF_USER_STACK_DUMP | |
1210 | + select HAVE_PREEMPT_LAZY | |
1211 | select HAVE_REGS_AND_STACK_ACCESS_API | |
1212 | select HAVE_RCU_TABLE_FREE | |
1213 | select HAVE_SYSCALL_TRACEPOINTS | |
1214 | @@ -689,7 +690,7 @@ config XEN_DOM0 | |
1215 | ||
1216 | config XEN | |
1217 | bool "Xen guest support on ARM64" | |
1218 | - depends on ARM64 && OF | |
1219 | + depends on ARM64 && OF && !PREEMPT_RT_FULL | |
1220 | select SWIOTLB_XEN | |
1221 | select PARAVIRT | |
1222 | help | |
1223 | diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h | |
1224 | index abd64bd1f6d9..9170788ffa37 100644 | |
1225 | --- a/arch/arm64/include/asm/thread_info.h | |
1226 | +++ b/arch/arm64/include/asm/thread_info.h | |
1227 | @@ -49,6 +49,7 @@ struct thread_info { | |
1228 | mm_segment_t addr_limit; /* address limit */ | |
1229 | struct task_struct *task; /* main task structure */ | |
1230 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
1231 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
1232 | int cpu; /* cpu */ | |
1233 | }; | |
1234 | ||
1235 | @@ -109,6 +110,7 @@ static inline struct thread_info *current_thread_info(void) | |
1236 | #define TIF_NEED_RESCHED 1 | |
1237 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ | |
1238 | #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ | |
1239 | +#define TIF_NEED_RESCHED_LAZY 4 | |
1240 | #define TIF_NOHZ 7 | |
1241 | #define TIF_SYSCALL_TRACE 8 | |
1242 | #define TIF_SYSCALL_AUDIT 9 | |
1243 | @@ -124,6 +126,7 @@ static inline struct thread_info *current_thread_info(void) | |
1244 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
1245 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
1246 | #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) | |
1247 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
1248 | #define _TIF_NOHZ (1 << TIF_NOHZ) | |
1249 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
1250 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
1251 | @@ -132,7 +135,8 @@ static inline struct thread_info *current_thread_info(void) | |
1252 | #define _TIF_32BIT (1 << TIF_32BIT) | |
1253 | ||
1254 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
1255 | - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) | |
1256 | + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ | |
1257 | + _TIF_NEED_RESCHED_LAZY) | |
1258 | ||
1259 | #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1260 | _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ | |
1261 | diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c | |
1262 | index 05070b72fc28..acfeddb1283a 100644 | |
1263 | --- a/arch/arm64/kernel/asm-offsets.c | |
1264 | +++ b/arch/arm64/kernel/asm-offsets.c | |
1265 | @@ -37,6 +37,7 @@ int main(void) | |
1266 | BLANK(); | |
1267 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1268 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1269 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1270 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
1271 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1272 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1273 | diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S | |
1274 | index 441420ca7d08..404792bdca99 100644 | |
1275 | --- a/arch/arm64/kernel/entry.S | |
1276 | +++ b/arch/arm64/kernel/entry.S | |
1277 | @@ -434,11 +434,16 @@ ENDPROC(el1_sync) | |
1278 | ||
1279 | #ifdef CONFIG_PREEMPT | |
1280 | ldr w24, [tsk, #TI_PREEMPT] // get preempt count | |
1281 | - cbnz w24, 1f // preempt count != 0 | |
1282 | + cbnz w24, 2f // preempt count != 0 | |
1283 | ldr x0, [tsk, #TI_FLAGS] // get flags | |
1284 | - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1285 | - bl el1_preempt | |
1286 | + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1287 | + | |
1288 | + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count | |
1289 | + cbnz w24, 2f // preempt lazy count != 0 | |
1290 | + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? | |
1291 | 1: | |
1292 | + bl el1_preempt | |
1293 | +2: | |
1294 | #endif | |
1295 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1296 | bl trace_hardirqs_on | |
1297 | @@ -452,6 +457,7 @@ ENDPROC(el1_irq) | |
1298 | 1: bl preempt_schedule_irq // irq en/disable is done inside | |
1299 | ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS | |
1300 | tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? | |
1301 | + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? | |
1302 | ret x24 | |
1303 | #endif | |
1304 | ||
1305 | @@ -708,6 +714,7 @@ ENDPROC(cpu_switch_to) | |
1306 | */ | |
1307 | work_pending: | |
1308 | tbnz x1, #TIF_NEED_RESCHED, work_resched | |
1309 | + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched | |
1310 | /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ | |
1311 | mov x0, sp // 'regs' | |
1312 | enable_irq // enable interrupts for do_notify_resume() | |
1313 | diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig | |
1314 | index 212ff92920d2..71ad38d3d76b 100644 | |
1315 | --- a/arch/mips/Kconfig | |
1316 | +++ b/arch/mips/Kconfig | |
1317 | @@ -2480,7 +2480,7 @@ config MIPS_ASID_BITS_VARIABLE | |
1318 | # | |
1319 | config HIGHMEM | |
1320 | bool "High Memory Support" | |
1321 | - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA | |
1322 | + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL | |
1323 | ||
1324 | config CPU_SUPPORTS_HIGHMEM | |
1325 | bool | |
1326 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig | |
1327 | index 792cb1768c8f..ddf5a0fdb25a 100644 | |
1328 | --- a/arch/powerpc/Kconfig | |
1329 | +++ b/arch/powerpc/Kconfig | |
1330 | @@ -57,10 +57,11 @@ config LOCKDEP_SUPPORT | |
1331 | ||
1332 | config RWSEM_GENERIC_SPINLOCK | |
1333 | bool | |
1334 | + default y if PREEMPT_RT_FULL | |
1335 | ||
1336 | config RWSEM_XCHGADD_ALGORITHM | |
1337 | bool | |
1338 | - default y | |
1339 | + default y if !PREEMPT_RT_FULL | |
1340 | ||
1341 | config GENERIC_LOCKBREAK | |
1342 | bool | |
1343 | @@ -140,6 +141,7 @@ config PPC | |
1344 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | |
1345 | select GENERIC_STRNCPY_FROM_USER | |
1346 | select GENERIC_STRNLEN_USER | |
1347 | + select HAVE_PREEMPT_LAZY | |
1348 | select HAVE_MOD_ARCH_SPECIFIC | |
1349 | select MODULES_USE_ELF_RELA | |
1350 | select CLONE_BACKWARDS | |
1351 | @@ -326,7 +328,7 @@ menu "Kernel options" | |
1352 | ||
1353 | config HIGHMEM | |
1354 | bool "High memory support" | |
1355 | - depends on PPC32 | |
1356 | + depends on PPC32 && !PREEMPT_RT_FULL | |
1357 | ||
1358 | source kernel/Kconfig.hz | |
1359 | source kernel/Kconfig.preempt | |
1360 | diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h | |
1361 | index 87e4b2d8dcd4..981e501a4359 100644 | |
1362 | --- a/arch/powerpc/include/asm/thread_info.h | |
1363 | +++ b/arch/powerpc/include/asm/thread_info.h | |
1364 | @@ -43,6 +43,8 @@ struct thread_info { | |
1365 | int cpu; /* cpu we're on */ | |
1366 | int preempt_count; /* 0 => preemptable, | |
1367 | <0 => BUG */ | |
1368 | + int preempt_lazy_count; /* 0 => preemptable, | |
1369 | + <0 => BUG */ | |
1370 | unsigned long local_flags; /* private flags for thread */ | |
1371 | #ifdef CONFIG_LIVEPATCH | |
1372 | unsigned long *livepatch_sp; | |
1373 | @@ -88,8 +90,7 @@ static inline struct thread_info *current_thread_info(void) | |
1374 | #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ | |
1375 | #define TIF_SIGPENDING 1 /* signal pending */ | |
1376 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | |
1377 | -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling | |
1378 | - TIF_NEED_RESCHED */ | |
1379 | +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ | |
1380 | #define TIF_32BIT 4 /* 32 bit binary */ | |
1381 | #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ | |
1382 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
1383 | @@ -107,6 +108,8 @@ static inline struct thread_info *current_thread_info(void) | |
1384 | #if defined(CONFIG_PPC64) | |
1385 | #define TIF_ELF2ABI 18 /* function descriptors must die! */ | |
1386 | #endif | |
1387 | +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling | |
1388 | + TIF_NEED_RESCHED */ | |
1389 | ||
1390 | /* as above, but as bit values */ | |
1391 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
1392 | @@ -125,14 +128,16 @@ static inline struct thread_info *current_thread_info(void) | |
1393 | #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) | |
1394 | #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) | |
1395 | #define _TIF_NOHZ (1<<TIF_NOHZ) | |
1396 | +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) | |
1397 | #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1398 | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ | |
1399 | _TIF_NOHZ) | |
1400 | ||
1401 | #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ | |
1402 | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1403 | - _TIF_RESTORE_TM) | |
1404 | + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) | |
1405 | #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) | |
1406 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
1407 | ||
1408 | /* Bits in local_flags */ | |
1409 | /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ | |
1410 | diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c | |
1411 | index b89d14c0352c..81ae8f4c88f6 100644 | |
1412 | --- a/arch/powerpc/kernel/asm-offsets.c | |
1413 | +++ b/arch/powerpc/kernel/asm-offsets.c | |
1414 | @@ -156,6 +156,7 @@ int main(void) | |
1415 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1416 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); | |
1417 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1418 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1419 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1420 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1421 | ||
1422 | diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S | |
1423 | index 9899032230b4..f95b93f46c47 100644 | |
1424 | --- a/arch/powerpc/kernel/entry_32.S | |
1425 | +++ b/arch/powerpc/kernel/entry_32.S | |
1426 | @@ -835,7 +835,14 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1427 | cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1428 | bne restore | |
1429 | andi. r8,r8,_TIF_NEED_RESCHED | |
1430 | + bne+ 1f | |
1431 | + lwz r0,TI_PREEMPT_LAZY(r9) | |
1432 | + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1433 | + bne restore | |
1434 | + lwz r0,TI_FLAGS(r9) | |
1435 | + andi. r0,r0,_TIF_NEED_RESCHED_LAZY | |
1436 | beq+ restore | |
1437 | +1: | |
1438 | lwz r3,_MSR(r1) | |
1439 | andi. r0,r3,MSR_EE /* interrupts off? */ | |
1440 | beq restore /* don't schedule if so */ | |
1441 | @@ -846,11 +853,11 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ | |
1442 | */ | |
1443 | bl trace_hardirqs_off | |
1444 | #endif | |
1445 | -1: bl preempt_schedule_irq | |
1446 | +2: bl preempt_schedule_irq | |
1447 | CURRENT_THREAD_INFO(r9, r1) | |
1448 | lwz r3,TI_FLAGS(r9) | |
1449 | - andi. r0,r3,_TIF_NEED_RESCHED | |
1450 | - bne- 1b | |
1451 | + andi. r0,r3,_TIF_NEED_RESCHED_MASK | |
1452 | + bne- 2b | |
1453 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1454 | /* And now, to properly rebalance the above, we tell lockdep they | |
1455 | * are being turned back on, which will happen when we return | |
1456 | @@ -1171,7 +1178,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) | |
1457 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ | |
1458 | ||
1459 | do_work: /* r10 contains MSR_KERNEL here */ | |
1460 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1461 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1462 | beq do_user_signal | |
1463 | ||
1464 | do_resched: /* r10 contains MSR_KERNEL here */ | |
1465 | @@ -1192,7 +1199,7 @@ do_resched: /* r10 contains MSR_KERNEL here */ | |
1466 | MTMSRD(r10) /* disable interrupts */ | |
1467 | CURRENT_THREAD_INFO(r9, r1) | |
1468 | lwz r9,TI_FLAGS(r9) | |
1469 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1470 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1471 | bne- do_resched | |
1472 | andi. r0,r9,_TIF_USER_WORK_MASK | |
1473 | beq restore_user | |
1474 | diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S | |
1475 | index 5afd03e5e8b8..f5d4c2a033ef 100644 | |
1476 | --- a/arch/powerpc/kernel/entry_64.S | |
1477 | +++ b/arch/powerpc/kernel/entry_64.S | |
1478 | @@ -657,7 +657,7 @@ _GLOBAL(ret_from_except_lite) | |
1479 | bl restore_math | |
1480 | b restore | |
1481 | #endif | |
1482 | -1: andi. r0,r4,_TIF_NEED_RESCHED | |
1483 | +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1484 | beq 2f | |
1485 | bl restore_interrupts | |
1486 | SCHEDULE_USER | |
1487 | @@ -719,10 +719,18 @@ _GLOBAL(ret_from_except_lite) | |
1488 | ||
1489 | #ifdef CONFIG_PREEMPT | |
1490 | /* Check if we need to preempt */ | |
1491 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1492 | - beq+ restore | |
1493 | - /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1494 | lwz r8,TI_PREEMPT(r9) | |
1495 | + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ | |
1496 | + bne restore | |
1497 | + andi. r0,r4,_TIF_NEED_RESCHED | |
1498 | + bne+ check_count | |
1499 | + | |
1500 | + andi. r0,r4,_TIF_NEED_RESCHED_LAZY | |
1501 | + beq+ restore | |
1502 | + lwz r8,TI_PREEMPT_LAZY(r9) | |
1503 | + | |
1504 | + /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1505 | +check_count: | |
1506 | cmpwi cr1,r8,0 | |
1507 | ld r0,SOFTE(r1) | |
1508 | cmpdi r0,0 | |
1509 | @@ -739,7 +747,7 @@ _GLOBAL(ret_from_except_lite) | |
1510 | /* Re-test flags and eventually loop */ | |
1511 | CURRENT_THREAD_INFO(r9, r1) | |
1512 | ld r4,TI_FLAGS(r9) | |
1513 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1514 | + andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1515 | bne 1b | |
1516 | ||
1517 | /* | |
1518 | diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c | |
1519 | index 08887cf2b20e..f1770ea2d094 100644 | |
1520 | --- a/arch/powerpc/kernel/irq.c | |
1521 | +++ b/arch/powerpc/kernel/irq.c | |
1522 | @@ -633,6 +633,7 @@ void irq_ctx_init(void) | |
1523 | } | |
1524 | } | |
1525 | ||
1526 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1527 | void do_softirq_own_stack(void) | |
1528 | { | |
1529 | struct thread_info *curtp, *irqtp; | |
1530 | @@ -650,6 +651,7 @@ void do_softirq_own_stack(void) | |
1531 | if (irqtp->flags) | |
1532 | set_bits(irqtp->flags, &curtp->flags); | |
1533 | } | |
1534 | +#endif | |
1535 | ||
1536 | irq_hw_number_t virq_to_hw(unsigned int virq) | |
1537 | { | |
1538 | diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S | |
1539 | index d9c912b6e632..7b2e997a5083 100644 | |
1540 | --- a/arch/powerpc/kernel/misc_32.S | |
1541 | +++ b/arch/powerpc/kernel/misc_32.S | |
1542 | @@ -40,6 +40,7 @@ | |
1543 | * We store the saved ksp_limit in the unused part | |
1544 | * of the STACK_FRAME_OVERHEAD | |
1545 | */ | |
1546 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1547 | _GLOBAL(call_do_softirq) | |
1548 | mflr r0 | |
1549 | stw r0,4(r1) | |
1550 | @@ -56,6 +57,7 @@ _GLOBAL(call_do_softirq) | |
1551 | stw r10,THREAD+KSP_LIMIT(r2) | |
1552 | mtlr r0 | |
1553 | blr | |
1554 | +#endif | |
1555 | ||
1556 | /* | |
1557 | * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); | |
1558 | diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S | |
1559 | index cb195157b318..c919a2bfd0ca 100644 | |
1560 | --- a/arch/powerpc/kernel/misc_64.S | |
1561 | +++ b/arch/powerpc/kernel/misc_64.S | |
1562 | @@ -30,6 +30,7 @@ | |
1563 | ||
1564 | .text | |
1565 | ||
1566 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1567 | _GLOBAL(call_do_softirq) | |
1568 | mflr r0 | |
1569 | std r0,16(r1) | |
1570 | @@ -40,6 +41,7 @@ _GLOBAL(call_do_softirq) | |
1571 | ld r0,16(r1) | |
1572 | mtlr r0 | |
1573 | blr | |
1574 | +#endif | |
1575 | ||
1576 | _GLOBAL(call_do_irq) | |
1577 | mflr r0 | |
1578 | diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig | |
1579 | index c2024ac9d4e8..2303788da7e1 100644 | |
1580 | --- a/arch/powerpc/kvm/Kconfig | |
1581 | +++ b/arch/powerpc/kvm/Kconfig | |
1582 | @@ -172,6 +172,7 @@ config KVM_E500MC | |
1583 | config KVM_MPIC | |
1584 | bool "KVM in-kernel MPIC emulation" | |
1585 | depends on KVM && E500 | |
1586 | + depends on !PREEMPT_RT_FULL | |
1587 | select HAVE_KVM_IRQCHIP | |
1588 | select HAVE_KVM_IRQFD | |
1589 | select HAVE_KVM_IRQ_ROUTING | |
1590 | diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c | |
1591 | index 57caaf11a83f..030c9bfe52e3 100644 | |
1592 | --- a/arch/powerpc/platforms/ps3/device-init.c | |
1593 | +++ b/arch/powerpc/platforms/ps3/device-init.c | |
1594 | @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, | |
1595 | } | |
1596 | pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); | |
1597 | ||
1598 | - res = wait_event_interruptible(dev->done.wait, | |
1599 | + res = swait_event_interruptible(dev->done.wait, | |
1600 | dev->done.done || kthread_should_stop()); | |
1601 | if (kthread_should_stop()) | |
1602 | res = -EINTR; | |
1603 | diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c | |
1604 | index 6c0378c0b8b5..abd58b4dff97 100644 | |
1605 | --- a/arch/sh/kernel/irq.c | |
1606 | +++ b/arch/sh/kernel/irq.c | |
1607 | @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) | |
1608 | hardirq_ctx[cpu] = NULL; | |
1609 | } | |
1610 | ||
1611 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1612 | void do_softirq_own_stack(void) | |
1613 | { | |
1614 | struct thread_info *curctx; | |
1615 | @@ -174,6 +175,7 @@ void do_softirq_own_stack(void) | |
1616 | "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" | |
1617 | ); | |
1618 | } | |
1619 | +#endif | |
1620 | #else | |
1621 | static inline void handle_one_irq(unsigned int irq) | |
1622 | { | |
1623 | diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig | |
1624 | index 59b09600dd32..1b073eb3dc2a 100644 | |
1625 | --- a/arch/sparc/Kconfig | |
1626 | +++ b/arch/sparc/Kconfig | |
1627 | @@ -187,12 +187,10 @@ config NR_CPUS | |
1628 | source kernel/Kconfig.hz | |
1629 | ||
1630 | config RWSEM_GENERIC_SPINLOCK | |
1631 | - bool | |
1632 | - default y if SPARC32 | |
1633 | + def_bool PREEMPT_RT_FULL | |
1634 | ||
1635 | config RWSEM_XCHGADD_ALGORITHM | |
1636 | - bool | |
1637 | - default y if SPARC64 | |
1638 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1639 | ||
1640 | config GENERIC_HWEIGHT | |
1641 | bool | |
1642 | diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c | |
1643 | index 34a7930b76ef..773740521008 100644 | |
1644 | --- a/arch/sparc/kernel/irq_64.c | |
1645 | +++ b/arch/sparc/kernel/irq_64.c | |
1646 | @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) | |
1647 | set_irq_regs(old_regs); | |
1648 | } | |
1649 | ||
1650 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1651 | void do_softirq_own_stack(void) | |
1652 | { | |
1653 | void *orig_sp, *sp = softirq_stack[smp_processor_id()]; | |
1654 | @@ -868,6 +869,7 @@ void do_softirq_own_stack(void) | |
1655 | __asm__ __volatile__("mov %0, %%sp" | |
1656 | : : "r" (orig_sp)); | |
1657 | } | |
1658 | +#endif | |
1659 | ||
1660 | #ifdef CONFIG_HOTPLUG_CPU | |
1661 | void fixup_irqs(void) | |
1662 | diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig | |
1663 | index 2a1f0ce7c59a..bd4ab87efb31 100644 | |
1664 | --- a/arch/x86/Kconfig | |
1665 | +++ b/arch/x86/Kconfig | |
1666 | @@ -17,6 +17,7 @@ config X86_64 | |
1667 | ### Arch settings | |
1668 | config X86 | |
1669 | def_bool y | |
1670 | + select HAVE_PREEMPT_LAZY | |
1671 | select ACPI_LEGACY_TABLES_LOOKUP if ACPI | |
1672 | select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI | |
1673 | select ANON_INODES | |
1674 | @@ -231,8 +232,11 @@ config ARCH_MAY_HAVE_PC_FDC | |
1675 | def_bool y | |
1676 | depends on ISA_DMA_API | |
1677 | ||
1678 | +config RWSEM_GENERIC_SPINLOCK | |
1679 | + def_bool PREEMPT_RT_FULL | |
1680 | + | |
1681 | config RWSEM_XCHGADD_ALGORITHM | |
1682 | - def_bool y | |
1683 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
1684 | ||
1685 | config GENERIC_CALIBRATE_DELAY | |
1686 | def_bool y | |
1687 | @@ -885,7 +889,7 @@ config IOMMU_HELPER | |
1688 | config MAXSMP | |
1689 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" | |
1690 | depends on X86_64 && SMP && DEBUG_KERNEL | |
1691 | - select CPUMASK_OFFSTACK | |
1692 | + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL | |
1693 | ---help--- | |
1694 | Enable maximum number of CPUS and NUMA Nodes for this architecture. | |
1695 | If unsure, say N. | |
1696 | diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c | |
1697 | index 0ab5ee1c26af..fff8f6f1f90c 100644 | |
1698 | --- a/arch/x86/crypto/aesni-intel_glue.c | |
1699 | +++ b/arch/x86/crypto/aesni-intel_glue.c | |
1700 | @@ -372,14 +372,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |
1701 | err = blkcipher_walk_virt(desc, &walk); | |
1702 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1703 | ||
1704 | - kernel_fpu_begin(); | |
1705 | while ((nbytes = walk.nbytes)) { | |
1706 | + kernel_fpu_begin(); | |
1707 | aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1708 | - nbytes & AES_BLOCK_MASK); | |
1709 | + nbytes & AES_BLOCK_MASK); | |
1710 | + kernel_fpu_end(); | |
1711 | nbytes &= AES_BLOCK_SIZE - 1; | |
1712 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1713 | } | |
1714 | - kernel_fpu_end(); | |
1715 | ||
1716 | return err; | |
1717 | } | |
1718 | @@ -396,14 +396,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |
1719 | err = blkcipher_walk_virt(desc, &walk); | |
1720 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1721 | ||
1722 | - kernel_fpu_begin(); | |
1723 | while ((nbytes = walk.nbytes)) { | |
1724 | + kernel_fpu_begin(); | |
1725 | aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1726 | nbytes & AES_BLOCK_MASK); | |
1727 | + kernel_fpu_end(); | |
1728 | nbytes &= AES_BLOCK_SIZE - 1; | |
1729 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1730 | } | |
1731 | - kernel_fpu_end(); | |
1732 | ||
1733 | return err; | |
1734 | } | |
1735 | @@ -420,14 +420,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |
1736 | err = blkcipher_walk_virt(desc, &walk); | |
1737 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1738 | ||
1739 | - kernel_fpu_begin(); | |
1740 | while ((nbytes = walk.nbytes)) { | |
1741 | + kernel_fpu_begin(); | |
1742 | aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1743 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1744 | + kernel_fpu_end(); | |
1745 | nbytes &= AES_BLOCK_SIZE - 1; | |
1746 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1747 | } | |
1748 | - kernel_fpu_end(); | |
1749 | ||
1750 | return err; | |
1751 | } | |
1752 | @@ -444,14 +444,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |
1753 | err = blkcipher_walk_virt(desc, &walk); | |
1754 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1755 | ||
1756 | - kernel_fpu_begin(); | |
1757 | while ((nbytes = walk.nbytes)) { | |
1758 | + kernel_fpu_begin(); | |
1759 | aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1760 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1761 | + kernel_fpu_end(); | |
1762 | nbytes &= AES_BLOCK_SIZE - 1; | |
1763 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1764 | } | |
1765 | - kernel_fpu_end(); | |
1766 | ||
1767 | return err; | |
1768 | } | |
1769 | @@ -503,18 +503,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |
1770 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | |
1771 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1772 | ||
1773 | - kernel_fpu_begin(); | |
1774 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | |
1775 | + kernel_fpu_begin(); | |
1776 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
1777 | nbytes & AES_BLOCK_MASK, walk.iv); | |
1778 | + kernel_fpu_end(); | |
1779 | nbytes &= AES_BLOCK_SIZE - 1; | |
1780 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1781 | } | |
1782 | if (walk.nbytes) { | |
1783 | + kernel_fpu_begin(); | |
1784 | ctr_crypt_final(ctx, &walk); | |
1785 | + kernel_fpu_end(); | |
1786 | err = blkcipher_walk_done(desc, &walk, 0); | |
1787 | } | |
1788 | - kernel_fpu_end(); | |
1789 | ||
1790 | return err; | |
1791 | } | |
1792 | diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c | |
1793 | index 8648158f3916..d7699130ee36 100644 | |
1794 | --- a/arch/x86/crypto/cast5_avx_glue.c | |
1795 | +++ b/arch/x86/crypto/cast5_avx_glue.c | |
1796 | @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) | |
1797 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1798 | bool enc) | |
1799 | { | |
1800 | - bool fpu_enabled = false; | |
1801 | + bool fpu_enabled; | |
1802 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1803 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
1804 | unsigned int nbytes; | |
1805 | @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1806 | u8 *wsrc = walk->src.virt.addr; | |
1807 | u8 *wdst = walk->dst.virt.addr; | |
1808 | ||
1809 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1810 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1811 | ||
1812 | /* Process multi-block batch */ | |
1813 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
1814 | @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
1815 | } while (nbytes >= bsize); | |
1816 | ||
1817 | done: | |
1818 | + cast5_fpu_end(fpu_enabled); | |
1819 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1820 | } | |
1821 | - | |
1822 | - cast5_fpu_end(fpu_enabled); | |
1823 | return err; | |
1824 | } | |
1825 | ||
1826 | @@ -227,7 +226,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |
1827 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1828 | struct scatterlist *src, unsigned int nbytes) | |
1829 | { | |
1830 | - bool fpu_enabled = false; | |
1831 | + bool fpu_enabled; | |
1832 | struct blkcipher_walk walk; | |
1833 | int err; | |
1834 | ||
1835 | @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1836 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1837 | ||
1838 | while ((nbytes = walk.nbytes)) { | |
1839 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1840 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1841 | nbytes = __cbc_decrypt(desc, &walk); | |
1842 | + cast5_fpu_end(fpu_enabled); | |
1843 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1844 | } | |
1845 | - | |
1846 | - cast5_fpu_end(fpu_enabled); | |
1847 | return err; | |
1848 | } | |
1849 | ||
1850 | @@ -311,7 +309,7 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc, | |
1851 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1852 | struct scatterlist *src, unsigned int nbytes) | |
1853 | { | |
1854 | - bool fpu_enabled = false; | |
1855 | + bool fpu_enabled; | |
1856 | struct blkcipher_walk walk; | |
1857 | int err; | |
1858 | ||
1859 | @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
1860 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
1861 | ||
1862 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
1863 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
1864 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
1865 | nbytes = __ctr_crypt(desc, &walk); | |
1866 | + cast5_fpu_end(fpu_enabled); | |
1867 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1868 | } | |
1869 | ||
1870 | - cast5_fpu_end(fpu_enabled); | |
1871 | - | |
1872 | if (walk.nbytes) { | |
1873 | ctr_crypt_final(desc, &walk); | |
1874 | err = blkcipher_walk_done(desc, &walk, 0); | |
1875 | diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c | |
1876 | index 6a85598931b5..3a506ce7ed93 100644 | |
1877 | --- a/arch/x86/crypto/glue_helper.c | |
1878 | +++ b/arch/x86/crypto/glue_helper.c | |
1879 | @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1880 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | |
1881 | const unsigned int bsize = 128 / 8; | |
1882 | unsigned int nbytes, i, func_bytes; | |
1883 | - bool fpu_enabled = false; | |
1884 | + bool fpu_enabled; | |
1885 | int err; | |
1886 | ||
1887 | err = blkcipher_walk_virt(desc, walk); | |
1888 | @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1889 | u8 *wdst = walk->dst.virt.addr; | |
1890 | ||
1891 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1892 | - desc, fpu_enabled, nbytes); | |
1893 | + desc, false, nbytes); | |
1894 | ||
1895 | for (i = 0; i < gctx->num_funcs; i++) { | |
1896 | func_bytes = bsize * gctx->funcs[i].num_blocks; | |
1897 | @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
1898 | } | |
1899 | ||
1900 | done: | |
1901 | + glue_fpu_end(fpu_enabled); | |
1902 | err = blkcipher_walk_done(desc, walk, nbytes); | |
1903 | } | |
1904 | ||
1905 | - glue_fpu_end(fpu_enabled); | |
1906 | return err; | |
1907 | } | |
1908 | ||
1909 | @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1910 | struct scatterlist *src, unsigned int nbytes) | |
1911 | { | |
1912 | const unsigned int bsize = 128 / 8; | |
1913 | - bool fpu_enabled = false; | |
1914 | + bool fpu_enabled; | |
1915 | struct blkcipher_walk walk; | |
1916 | int err; | |
1917 | ||
1918 | @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
1919 | ||
1920 | while ((nbytes = walk.nbytes)) { | |
1921 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1922 | - desc, fpu_enabled, nbytes); | |
1923 | + desc, false, nbytes); | |
1924 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | |
1925 | + glue_fpu_end(fpu_enabled); | |
1926 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1927 | } | |
1928 | ||
1929 | - glue_fpu_end(fpu_enabled); | |
1930 | return err; | |
1931 | } | |
1932 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | |
1933 | @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1934 | struct scatterlist *src, unsigned int nbytes) | |
1935 | { | |
1936 | const unsigned int bsize = 128 / 8; | |
1937 | - bool fpu_enabled = false; | |
1938 | + bool fpu_enabled; | |
1939 | struct blkcipher_walk walk; | |
1940 | int err; | |
1941 | ||
1942 | @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
1943 | ||
1944 | while ((nbytes = walk.nbytes) >= bsize) { | |
1945 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1946 | - desc, fpu_enabled, nbytes); | |
1947 | + desc, false, nbytes); | |
1948 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | |
1949 | + glue_fpu_end(fpu_enabled); | |
1950 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1951 | } | |
1952 | ||
1953 | - glue_fpu_end(fpu_enabled); | |
1954 | - | |
1955 | if (walk.nbytes) { | |
1956 | glue_ctr_crypt_final_128bit( | |
1957 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | |
1958 | @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1959 | void *tweak_ctx, void *crypt_ctx) | |
1960 | { | |
1961 | const unsigned int bsize = 128 / 8; | |
1962 | - bool fpu_enabled = false; | |
1963 | + bool fpu_enabled; | |
1964 | struct blkcipher_walk walk; | |
1965 | int err; | |
1966 | ||
1967 | @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
1968 | ||
1969 | /* set minimum length to bsize, for tweak_fn */ | |
1970 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1971 | - desc, fpu_enabled, | |
1972 | + desc, false, | |
1973 | nbytes < bsize ? bsize : nbytes); | |
1974 | - | |
1975 | /* calculate first value of T */ | |
1976 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | |
1977 | + glue_fpu_end(fpu_enabled); | |
1978 | ||
1979 | while (nbytes) { | |
1980 | + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
1981 | + desc, false, nbytes); | |
1982 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | |
1983 | ||
1984 | + glue_fpu_end(fpu_enabled); | |
1985 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
1986 | nbytes = walk.nbytes; | |
1987 | } | |
1988 | - | |
1989 | - glue_fpu_end(fpu_enabled); | |
1990 | - | |
1991 | return err; | |
1992 | } | |
1993 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | |
1994 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | |
1995 | index 1433f6b4607d..f963fde8e4fa 100644 | |
1996 | --- a/arch/x86/entry/common.c | |
1997 | +++ b/arch/x86/entry/common.c | |
1998 | @@ -136,7 +136,7 @@ static long syscall_trace_enter(struct pt_regs *regs) | |
1999 | ||
2000 | #define EXIT_TO_USERMODE_LOOP_FLAGS \ | |
2001 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
2002 | - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) | |
2003 | + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) | |
2004 | ||
2005 | static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
2006 | { | |
2007 | @@ -152,9 +152,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
2008 | /* We have work to do. */ | |
2009 | local_irq_enable(); | |
2010 | ||
2011 | - if (cached_flags & _TIF_NEED_RESCHED) | |
2012 | + if (cached_flags & _TIF_NEED_RESCHED_MASK) | |
2013 | schedule(); | |
2014 | ||
2015 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
2016 | + if (unlikely(current->forced_info.si_signo)) { | |
2017 | + struct task_struct *t = current; | |
2018 | + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); | |
2019 | + t->forced_info.si_signo = 0; | |
2020 | + } | |
2021 | +#endif | |
2022 | if (cached_flags & _TIF_UPROBE) | |
2023 | uprobe_notify_resume(regs); | |
2024 | ||
2025 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | |
2026 | index 0b56666e6039..1d8ee026c9c5 100644 | |
2027 | --- a/arch/x86/entry/entry_32.S | |
2028 | +++ b/arch/x86/entry/entry_32.S | |
2029 | @@ -271,8 +271,25 @@ END(ret_from_exception) | |
2030 | ENTRY(resume_kernel) | |
2031 | DISABLE_INTERRUPTS(CLBR_ANY) | |
2032 | need_resched: | |
2033 | + # preempt count == 0 + NEED_RS set? | |
2034 | cmpl $0, PER_CPU_VAR(__preempt_count) | |
2035 | +#ifndef CONFIG_PREEMPT_LAZY | |
2036 | jnz restore_all | |
2037 | +#else | |
2038 | + jz test_int_off | |
2039 | + | |
2040 | + # atleast preempt count == 0 ? | |
2041 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2042 | + jne restore_all | |
2043 | + | |
2044 | + GET_THREAD_INFO(%ebp) | |
2045 | + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? | |
2046 | + jnz restore_all | |
2047 | + | |
2048 | + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp) | |
2049 | + jz restore_all | |
2050 | +test_int_off: | |
2051 | +#endif | |
2052 | testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? | |
2053 | jz restore_all | |
2054 | call preempt_schedule_irq | |
2055 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | |
2056 | index 02fff3ebfb87..81ec3d016df0 100644 | |
2057 | --- a/arch/x86/entry/entry_64.S | |
2058 | +++ b/arch/x86/entry/entry_64.S | |
2059 | @@ -512,7 +512,23 @@ GLOBAL(retint_user) | |
2060 | bt $9, EFLAGS(%rsp) /* were interrupts off? */ | |
2061 | jnc 1f | |
2062 | 0: cmpl $0, PER_CPU_VAR(__preempt_count) | |
2063 | +#ifndef CONFIG_PREEMPT_LAZY | |
2064 | jnz 1f | |
2065 | +#else | |
2066 | + jz do_preempt_schedule_irq | |
2067 | + | |
2068 | + # atleast preempt count == 0 ? | |
2069 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2070 | + jnz 1f | |
2071 | + | |
2072 | + GET_THREAD_INFO(%rcx) | |
2073 | + cmpl $0, TI_preempt_lazy_count(%rcx) | |
2074 | + jnz 1f | |
2075 | + | |
2076 | + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx) | |
2077 | + jnc 1f | |
2078 | +do_preempt_schedule_irq: | |
2079 | +#endif | |
2080 | call preempt_schedule_irq | |
2081 | jmp 0b | |
2082 | 1: | |
2083 | @@ -817,6 +833,7 @@ END(native_load_gs_index) | |
2084 | jmp 2b | |
2085 | .previous | |
2086 | ||
2087 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2088 | /* Call softirq on interrupt stack. Interrupts are off. */ | |
2089 | ENTRY(do_softirq_own_stack) | |
2090 | pushq %rbp | |
2091 | @@ -829,6 +846,7 @@ ENTRY(do_softirq_own_stack) | |
2092 | decl PER_CPU_VAR(irq_count) | |
2093 | ret | |
2094 | END(do_softirq_own_stack) | |
2095 | +#endif | |
2096 | ||
2097 | #ifdef CONFIG_XEN | |
2098 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 | |
2099 | diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h | |
2100 | index 17f218645701..11bd1b7ee6eb 100644 | |
2101 | --- a/arch/x86/include/asm/preempt.h | |
2102 | +++ b/arch/x86/include/asm/preempt.h | |
2103 | @@ -79,17 +79,46 @@ static __always_inline void __preempt_count_sub(int val) | |
2104 | * a decrement which hits zero means we have no preempt_count and should | |
2105 | * reschedule. | |
2106 | */ | |
2107 | -static __always_inline bool __preempt_count_dec_and_test(void) | |
2108 | +static __always_inline bool ____preempt_count_dec_and_test(void) | |
2109 | { | |
2110 | GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); | |
2111 | } | |
2112 | ||
2113 | +static __always_inline bool __preempt_count_dec_and_test(void) | |
2114 | +{ | |
2115 | + if (____preempt_count_dec_and_test()) | |
2116 | + return true; | |
2117 | +#ifdef CONFIG_PREEMPT_LAZY | |
2118 | + if (current_thread_info()->preempt_lazy_count) | |
2119 | + return false; | |
2120 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2121 | +#else | |
2122 | + return false; | |
2123 | +#endif | |
2124 | +} | |
2125 | + | |
2126 | /* | |
2127 | * Returns true when we need to resched and can (barring IRQ state). | |
2128 | */ | |
2129 | static __always_inline bool should_resched(int preempt_offset) | |
2130 | { | |
2131 | +#ifdef CONFIG_PREEMPT_LAZY | |
2132 | + u32 tmp; | |
2133 | + | |
2134 | + tmp = raw_cpu_read_4(__preempt_count); | |
2135 | + if (tmp == preempt_offset) | |
2136 | + return true; | |
2137 | + | |
2138 | + /* preempt count == 0 ? */ | |
2139 | + tmp &= ~PREEMPT_NEED_RESCHED; | |
2140 | + if (tmp) | |
2141 | + return false; | |
2142 | + if (current_thread_info()->preempt_lazy_count) | |
2143 | + return false; | |
2144 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2145 | +#else | |
2146 | return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); | |
2147 | +#endif | |
2148 | } | |
2149 | ||
2150 | #ifdef CONFIG_PREEMPT | |
2151 | diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h | |
2152 | index dd1e7d6387ab..d59bedb28bab 100644 | |
2153 | --- a/arch/x86/include/asm/signal.h | |
2154 | +++ b/arch/x86/include/asm/signal.h | |
2155 | @@ -23,6 +23,19 @@ typedef struct { | |
2156 | unsigned long sig[_NSIG_WORDS]; | |
2157 | } sigset_t; | |
2158 | ||
2159 | +/* | |
2160 | + * Because some traps use the IST stack, we must keep preemption | |
2161 | + * disabled while calling do_trap(), but do_trap() may call | |
2162 | + * force_sig_info() which will grab the signal spin_locks for the | |
2163 | + * task, which in PREEMPT_RT_FULL are mutexes. By defining | |
2164 | + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set | |
2165 | + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the | |
2166 | + * trap. | |
2167 | + */ | |
2168 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
2169 | +#define ARCH_RT_DELAYS_SIGNAL_SEND | |
2170 | +#endif | |
2171 | + | |
2172 | #ifndef CONFIG_COMPAT | |
2173 | typedef sigset_t compat_sigset_t; | |
2174 | #endif | |
2175 | diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h | |
2176 | index 58505f01962f..02fa39652cd6 100644 | |
2177 | --- a/arch/x86/include/asm/stackprotector.h | |
2178 | +++ b/arch/x86/include/asm/stackprotector.h | |
2179 | @@ -59,7 +59,7 @@ | |
2180 | */ | |
2181 | static __always_inline void boot_init_stack_canary(void) | |
2182 | { | |
2183 | - u64 canary; | |
2184 | + u64 uninitialized_var(canary); | |
2185 | u64 tsc; | |
2186 | ||
2187 | #ifdef CONFIG_X86_64 | |
2188 | @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void) | |
2189 | * of randomness. The TSC only matters for very early init, | |
2190 | * there it already has some randomness on most systems. Later | |
2191 | * on during the bootup the random pool has true entropy too. | |
2192 | + * | |
2193 | + * For preempt-rt we need to weaken the randomness a bit, as | |
2194 | + * we can't call into the random generator from atomic context | |
2195 | + * due to locking constraints. We just leave canary | |
2196 | + * uninitialized and use the TSC based randomness on top of it. | |
2197 | */ | |
2198 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2199 | get_random_bytes(&canary, sizeof(canary)); | |
2200 | +#endif | |
2201 | tsc = rdtsc(); | |
2202 | canary += tsc + (tsc << 32UL); | |
2203 | ||
2204 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | |
2205 | index 8b7c8d8e0852..631059ef61da 100644 | |
2206 | --- a/arch/x86/include/asm/thread_info.h | |
2207 | +++ b/arch/x86/include/asm/thread_info.h | |
2208 | @@ -57,6 +57,8 @@ struct thread_info { | |
2209 | __u32 flags; /* low level flags */ | |
2210 | __u32 status; /* thread synchronous flags */ | |
2211 | __u32 cpu; /* current CPU */ | |
2212 | + int preempt_lazy_count; /* 0 => lazy preemptable | |
2213 | + <0 => BUG */ | |
2214 | }; | |
2215 | ||
2216 | #define INIT_THREAD_INFO(tsk) \ | |
2217 | @@ -73,6 +75,10 @@ struct thread_info { | |
2218 | ||
2219 | #include <asm/asm-offsets.h> | |
2220 | ||
2221 | +#define GET_THREAD_INFO(reg) \ | |
2222 | + _ASM_MOV PER_CPU_VAR(cpu_current_top_of_stack),reg ; \ | |
2223 | + _ASM_SUB $(THREAD_SIZE),reg ; | |
2224 | + | |
2225 | #endif | |
2226 | ||
2227 | /* | |
2228 | @@ -91,6 +97,7 @@ struct thread_info { | |
2229 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ | |
2230 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
2231 | #define TIF_SECCOMP 8 /* secure computing */ | |
2232 | +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ | |
2233 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | |
2234 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | |
2235 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | |
2236 | @@ -115,6 +122,7 @@ struct thread_info { | |
2237 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) | |
2238 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
2239 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | |
2240 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
2241 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | |
2242 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
2243 | #define _TIF_NOTSC (1 << TIF_NOTSC) | |
2244 | @@ -151,6 +159,8 @@ struct thread_info { | |
2245 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | |
2246 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | |
2247 | ||
2248 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
2249 | + | |
2250 | #define STACK_WARN (THREAD_SIZE/8) | |
2251 | ||
2252 | /* | |
2253 | diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h | |
2254 | index cc44d926c17e..df278aa0f638 100644 | |
2255 | --- a/arch/x86/include/asm/uv/uv_bau.h | |
2256 | +++ b/arch/x86/include/asm/uv/uv_bau.h | |
2257 | @@ -615,9 +615,9 @@ struct bau_control { | |
2258 | cycles_t send_message; | |
2259 | cycles_t period_end; | |
2260 | cycles_t period_time; | |
2261 | - spinlock_t uvhub_lock; | |
2262 | - spinlock_t queue_lock; | |
2263 | - spinlock_t disable_lock; | |
2264 | + raw_spinlock_t uvhub_lock; | |
2265 | + raw_spinlock_t queue_lock; | |
2266 | + raw_spinlock_t disable_lock; | |
2267 | /* tunables */ | |
2268 | int max_concurr; | |
2269 | int max_concurr_const; | |
2270 | @@ -776,15 +776,15 @@ static inline int atom_asr(short i, struct atomic_short *v) | |
2271 | * to be lowered below the current 'v'. atomic_add_unless can only stop | |
2272 | * on equal. | |
2273 | */ | |
2274 | -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |
2275 | +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) | |
2276 | { | |
2277 | - spin_lock(lock); | |
2278 | + raw_spin_lock(lock); | |
2279 | if (atomic_read(v) >= u) { | |
2280 | - spin_unlock(lock); | |
2281 | + raw_spin_unlock(lock); | |
2282 | return 0; | |
2283 | } | |
2284 | atomic_inc(v); | |
2285 | - spin_unlock(lock); | |
2286 | + raw_spin_unlock(lock); | |
2287 | return 1; | |
2288 | } | |
2289 | ||
2290 | diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c | |
2291 | index fbd19444403f..e78f477a4ae3 100644 | |
2292 | --- a/arch/x86/kernel/acpi/boot.c | |
2293 | +++ b/arch/x86/kernel/acpi/boot.c | |
2294 | @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |
2295 | * ->ioapic_mutex | |
2296 | * ->ioapic_lock | |
2297 | */ | |
2298 | +#ifdef CONFIG_X86_IO_APIC | |
2299 | static DEFINE_MUTEX(acpi_ioapic_lock); | |
2300 | +#endif | |
2301 | ||
2302 | /* -------------------------------------------------------------------------- | |
2303 | Boot-time Configuration | |
2304 | diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c | |
2305 | index 48e6d84f173e..0b5a8b994f65 100644 | |
2306 | --- a/arch/x86/kernel/apic/io_apic.c | |
2307 | +++ b/arch/x86/kernel/apic/io_apic.c | |
2308 | @@ -1712,7 +1712,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) | |
2309 | static inline bool ioapic_irqd_mask(struct irq_data *data) | |
2310 | { | |
2311 | /* If we are moving the irq we need to mask it */ | |
2312 | - if (unlikely(irqd_is_setaffinity_pending(data))) { | |
2313 | + if (unlikely(irqd_is_setaffinity_pending(data) && | |
2314 | + !irqd_irq_inprogress(data))) { | |
2315 | mask_ioapic_irq(data); | |
2316 | return true; | |
2317 | } | |
2318 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c | |
2319 | index 2bd5c6ff7ee7..a2c317f5839b 100644 | |
2320 | --- a/arch/x86/kernel/asm-offsets.c | |
2321 | +++ b/arch/x86/kernel/asm-offsets.c | |
2322 | @@ -31,6 +31,7 @@ void common(void) { | |
2323 | BLANK(); | |
2324 | OFFSET(TI_flags, thread_info, flags); | |
2325 | OFFSET(TI_status, thread_info, status); | |
2326 | + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count); | |
2327 | ||
2328 | BLANK(); | |
2329 | OFFSET(TASK_addr_limit, task_struct, thread.addr_limit); | |
2330 | @@ -88,4 +89,5 @@ void common(void) { | |
2331 | ||
2332 | BLANK(); | |
2333 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | |
2334 | + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); | |
2335 | } | |
2336 | diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c | |
2337 | index 79d8ec849468..accbf0e806d0 100644 | |
2338 | --- a/arch/x86/kernel/cpu/mcheck/mce.c | |
2339 | +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |
2340 | @@ -41,6 +41,8 @@ | |
2341 | #include <linux/debugfs.h> | |
2342 | #include <linux/irq_work.h> | |
2343 | #include <linux/export.h> | |
2344 | +#include <linux/jiffies.h> | |
2345 | +#include <linux/swork.h> | |
2346 | ||
2347 | #include <asm/processor.h> | |
2348 | #include <asm/traps.h> | |
2349 | @@ -1291,7 +1293,7 @@ void mce_log_therm_throt_event(__u64 status) | |
2350 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; | |
2351 | ||
2352 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | |
2353 | -static DEFINE_PER_CPU(struct timer_list, mce_timer); | |
2354 | +static DEFINE_PER_CPU(struct hrtimer, mce_timer); | |
2355 | ||
2356 | static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2357 | { | |
2358 | @@ -1300,32 +1302,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2359 | ||
2360 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; | |
2361 | ||
2362 | -static void __restart_timer(struct timer_list *t, unsigned long interval) | |
2363 | +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval) | |
2364 | { | |
2365 | - unsigned long when = jiffies + interval; | |
2366 | - unsigned long flags; | |
2367 | - | |
2368 | - local_irq_save(flags); | |
2369 | - | |
2370 | - if (timer_pending(t)) { | |
2371 | - if (time_before(when, t->expires)) | |
2372 | - mod_timer(t, when); | |
2373 | - } else { | |
2374 | - t->expires = round_jiffies(when); | |
2375 | - add_timer_on(t, smp_processor_id()); | |
2376 | - } | |
2377 | - | |
2378 | - local_irq_restore(flags); | |
2379 | + if (!interval) | |
2380 | + return HRTIMER_NORESTART; | |
2381 | + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval))); | |
2382 | + return HRTIMER_RESTART; | |
2383 | } | |
2384 | ||
2385 | -static void mce_timer_fn(unsigned long data) | |
2386 | +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) | |
2387 | { | |
2388 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2389 | - int cpu = smp_processor_id(); | |
2390 | unsigned long iv; | |
2391 | ||
2392 | - WARN_ON(cpu != data); | |
2393 | - | |
2394 | iv = __this_cpu_read(mce_next_interval); | |
2395 | ||
2396 | if (mce_available(this_cpu_ptr(&cpu_info))) { | |
2397 | @@ -1348,7 +1336,7 @@ static void mce_timer_fn(unsigned long data) | |
2398 | ||
2399 | done: | |
2400 | __this_cpu_write(mce_next_interval, iv); | |
2401 | - __restart_timer(t, iv); | |
2402 | + return __restart_timer(timer, iv); | |
2403 | } | |
2404 | ||
2405 | /* | |
2406 | @@ -1356,7 +1344,7 @@ static void mce_timer_fn(unsigned long data) | |
2407 | */ | |
2408 | void mce_timer_kick(unsigned long interval) | |
2409 | { | |
2410 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2411 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2412 | unsigned long iv = __this_cpu_read(mce_next_interval); | |
2413 | ||
2414 | __restart_timer(t, interval); | |
2415 | @@ -1371,7 +1359,7 @@ static void mce_timer_delete_all(void) | |
2416 | int cpu; | |
2417 | ||
2418 | for_each_online_cpu(cpu) | |
2419 | - del_timer_sync(&per_cpu(mce_timer, cpu)); | |
2420 | + hrtimer_cancel(&per_cpu(mce_timer, cpu)); | |
2421 | } | |
2422 | ||
2423 | static void mce_do_trigger(struct work_struct *work) | |
2424 | @@ -1381,6 +1369,56 @@ static void mce_do_trigger(struct work_struct *work) | |
2425 | ||
2426 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2427 | ||
2428 | +static void __mce_notify_work(struct swork_event *event) | |
2429 | +{ | |
2430 | + /* Not more than two messages every minute */ | |
2431 | + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2432 | + | |
2433 | + /* wake processes polling /dev/mcelog */ | |
2434 | + wake_up_interruptible(&mce_chrdev_wait); | |
2435 | + | |
2436 | + /* | |
2437 | + * There is no risk of missing notifications because | |
2438 | + * work_pending is always cleared before the function is | |
2439 | + * executed. | |
2440 | + */ | |
2441 | + if (mce_helper[0] && !work_pending(&mce_trigger_work)) | |
2442 | + schedule_work(&mce_trigger_work); | |
2443 | + | |
2444 | + if (__ratelimit(&ratelimit)) | |
2445 | + pr_info(HW_ERR "Machine check events logged\n"); | |
2446 | +} | |
2447 | + | |
2448 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2449 | +static bool notify_work_ready __read_mostly; | |
2450 | +static struct swork_event notify_work; | |
2451 | + | |
2452 | +static int mce_notify_work_init(void) | |
2453 | +{ | |
2454 | + int err; | |
2455 | + | |
2456 | + err = swork_get(); | |
2457 | + if (err) | |
2458 | + return err; | |
2459 | + | |
2460 | + INIT_SWORK(¬ify_work, __mce_notify_work); | |
2461 | + notify_work_ready = true; | |
2462 | + return 0; | |
2463 | +} | |
2464 | + | |
2465 | +static void mce_notify_work(void) | |
2466 | +{ | |
2467 | + if (notify_work_ready) | |
2468 | + swork_queue(¬ify_work); | |
2469 | +} | |
2470 | +#else | |
2471 | +static void mce_notify_work(void) | |
2472 | +{ | |
2473 | + __mce_notify_work(NULL); | |
2474 | +} | |
2475 | +static inline int mce_notify_work_init(void) { return 0; } | |
2476 | +#endif | |
2477 | + | |
2478 | /* | |
2479 | * Notify the user(s) about new machine check events. | |
2480 | * Can be called from interrupt context, but not from machine check/NMI | |
2481 | @@ -1388,19 +1426,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2482 | */ | |
2483 | int mce_notify_irq(void) | |
2484 | { | |
2485 | - /* Not more than two messages every minute */ | |
2486 | - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2487 | - | |
2488 | if (test_and_clear_bit(0, &mce_need_notify)) { | |
2489 | - /* wake processes polling /dev/mcelog */ | |
2490 | - wake_up_interruptible(&mce_chrdev_wait); | |
2491 | - | |
2492 | - if (mce_helper[0]) | |
2493 | - schedule_work(&mce_trigger_work); | |
2494 | - | |
2495 | - if (__ratelimit(&ratelimit)) | |
2496 | - pr_info(HW_ERR "Machine check events logged\n"); | |
2497 | - | |
2498 | + mce_notify_work(); | |
2499 | return 1; | |
2500 | } | |
2501 | return 0; | |
2502 | @@ -1717,7 +1744,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) | |
2503 | } | |
2504 | } | |
2505 | ||
2506 | -static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2507 | +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) | |
2508 | { | |
2509 | unsigned long iv = check_interval * HZ; | |
2510 | ||
2511 | @@ -1726,16 +1753,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2512 | ||
2513 | per_cpu(mce_next_interval, cpu) = iv; | |
2514 | ||
2515 | - t->expires = round_jiffies(jiffies + iv); | |
2516 | - add_timer_on(t, cpu); | |
2517 | + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), | |
2518 | + 0, HRTIMER_MODE_REL_PINNED); | |
2519 | } | |
2520 | ||
2521 | static void __mcheck_cpu_init_timer(void) | |
2522 | { | |
2523 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2524 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2525 | unsigned int cpu = smp_processor_id(); | |
2526 | ||
2527 | - setup_pinned_timer(t, mce_timer_fn, cpu); | |
2528 | + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
2529 | + t->function = mce_timer_fn; | |
2530 | mce_start_timer(cpu, t); | |
2531 | } | |
2532 | ||
2533 | @@ -2459,6 +2487,8 @@ static void mce_disable_cpu(void *h) | |
2534 | if (!mce_available(raw_cpu_ptr(&cpu_info))) | |
2535 | return; | |
2536 | ||
2537 | + hrtimer_cancel(this_cpu_ptr(&mce_timer)); | |
2538 | + | |
2539 | if (!(action & CPU_TASKS_FROZEN)) | |
2540 | cmci_clear(); | |
2541 | ||
2542 | @@ -2481,6 +2511,7 @@ static void mce_reenable_cpu(void *h) | |
2543 | if (b->init) | |
2544 | wrmsrl(msr_ops.ctl(i), b->ctl); | |
2545 | } | |
2546 | + __mcheck_cpu_init_timer(); | |
2547 | } | |
2548 | ||
2549 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | |
2550 | @@ -2488,7 +2519,6 @@ static int | |
2551 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |
2552 | { | |
2553 | unsigned int cpu = (unsigned long)hcpu; | |
2554 | - struct timer_list *t = &per_cpu(mce_timer, cpu); | |
2555 | ||
2556 | switch (action & ~CPU_TASKS_FROZEN) { | |
2557 | case CPU_ONLINE: | |
2558 | @@ -2508,11 +2538,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |
2559 | break; | |
2560 | case CPU_DOWN_PREPARE: | |
2561 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | |
2562 | - del_timer_sync(t); | |
2563 | break; | |
2564 | case CPU_DOWN_FAILED: | |
2565 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | |
2566 | - mce_start_timer(cpu, t); | |
2567 | break; | |
2568 | } | |
2569 | ||
2570 | @@ -2551,6 +2579,10 @@ static __init int mcheck_init_device(void) | |
2571 | goto err_out; | |
2572 | } | |
2573 | ||
2574 | + err = mce_notify_work_init(); | |
2575 | + if (err) | |
2576 | + goto err_out; | |
2577 | + | |
2578 | if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { | |
2579 | err = -ENOMEM; | |
2580 | goto err_out; | |
2581 | diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c | |
2582 | index 09675712eba8..eea7557b355d 100644 | |
2583 | --- a/arch/x86/kernel/dumpstack_32.c | |
2584 | +++ b/arch/x86/kernel/dumpstack_32.c | |
2585 | @@ -42,7 +42,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
2586 | unsigned long *stack, unsigned long bp, | |
2587 | const struct stacktrace_ops *ops, void *data) | |
2588 | { | |
2589 | - const unsigned cpu = get_cpu(); | |
2590 | + const unsigned cpu = get_cpu_light(); | |
2591 | int graph = 0; | |
2592 | u32 *prev_esp; | |
2593 | ||
2594 | @@ -84,7 +84,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
2595 | break; | |
2596 | touch_nmi_watchdog(); | |
2597 | } | |
2598 | - put_cpu(); | |
2599 | + put_cpu_light(); | |
2600 | } | |
2601 | EXPORT_SYMBOL(dump_trace); | |
2602 | ||
2603 | diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c | |
2604 | index 9ee4520ce83c..2cd610b68868 100644 | |
2605 | --- a/arch/x86/kernel/dumpstack_64.c | |
2606 | +++ b/arch/x86/kernel/dumpstack_64.c | |
2607 | @@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
2608 | unsigned long *stack, unsigned long bp, | |
2609 | const struct stacktrace_ops *ops, void *data) | |
2610 | { | |
2611 | - const unsigned cpu = get_cpu(); | |
2612 | + const unsigned cpu = get_cpu_light(); | |
2613 | unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); | |
2614 | unsigned long dummy; | |
2615 | unsigned used = 0; | |
2616 | @@ -239,7 +239,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
2617 | * This handles the process stack: | |
2618 | */ | |
2619 | bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph); | |
2620 | - put_cpu(); | |
2621 | + put_cpu_light(); | |
2622 | } | |
2623 | EXPORT_SYMBOL(dump_trace); | |
2624 | ||
2625 | @@ -253,7 +253,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
2626 | int cpu; | |
2627 | int i; | |
2628 | ||
2629 | - preempt_disable(); | |
2630 | + migrate_disable(); | |
2631 | cpu = smp_processor_id(); | |
2632 | ||
2633 | irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); | |
2634 | @@ -299,7 +299,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
2635 | stack++; | |
2636 | touch_nmi_watchdog(); | |
2637 | } | |
2638 | - preempt_enable(); | |
2639 | + migrate_enable(); | |
2640 | ||
2641 | pr_cont("\n"); | |
2642 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | |
2643 | diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c | |
2644 | index 1f38d9a4d9de..053bf3b2ef39 100644 | |
2645 | --- a/arch/x86/kernel/irq_32.c | |
2646 | +++ b/arch/x86/kernel/irq_32.c | |
2647 | @@ -127,6 +127,7 @@ void irq_ctx_init(int cpu) | |
2648 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); | |
2649 | } | |
2650 | ||
2651 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2652 | void do_softirq_own_stack(void) | |
2653 | { | |
2654 | struct irq_stack *irqstk; | |
2655 | @@ -143,6 +144,7 @@ void do_softirq_own_stack(void) | |
2656 | ||
2657 | call_on_stack(__do_softirq, isp); | |
2658 | } | |
2659 | +#endif | |
2660 | ||
2661 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) | |
2662 | { | |
2663 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | |
2664 | index d86be29c38c7..b0e29d1a0571 100644 | |
2665 | --- a/arch/x86/kernel/process_32.c | |
2666 | +++ b/arch/x86/kernel/process_32.c | |
2667 | @@ -35,6 +35,7 @@ | |
2668 | #include <linux/uaccess.h> | |
2669 | #include <linux/io.h> | |
2670 | #include <linux/kdebug.h> | |
2671 | +#include <linux/highmem.h> | |
2672 | ||
2673 | #include <asm/pgtable.h> | |
2674 | #include <asm/ldt.h> | |
2675 | @@ -210,6 +211,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |
2676 | } | |
2677 | EXPORT_SYMBOL_GPL(start_thread); | |
2678 | ||
2679 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2680 | +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
2681 | +{ | |
2682 | + int i; | |
2683 | + | |
2684 | + /* | |
2685 | + * Clear @prev's kmap_atomic mappings | |
2686 | + */ | |
2687 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
2688 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2689 | + pte_t *ptep = kmap_pte - idx; | |
2690 | + | |
2691 | + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); | |
2692 | + } | |
2693 | + /* | |
2694 | + * Restore @next_p's kmap_atomic mappings | |
2695 | + */ | |
2696 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
2697 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
2698 | + | |
2699 | + if (!pte_none(next_p->kmap_pte[i])) | |
2700 | + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); | |
2701 | + } | |
2702 | +} | |
2703 | +#else | |
2704 | +static inline void | |
2705 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
2706 | +#endif | |
2707 | + | |
2708 | ||
2709 | /* | |
2710 | * switch_to(x,y) should switch tasks from x to y. | |
2711 | @@ -286,6 +316,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |
2712 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | |
2713 | __switch_to_xtra(prev_p, next_p, tss); | |
2714 | ||
2715 | + switch_kmaps(prev_p, next_p); | |
2716 | + | |
2717 | /* | |
2718 | * Leave lazy mode, flushing any hypercalls made here. | |
2719 | * This must be done before restoring TLS segments so | |
2720 | diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c | |
2721 | index b62c85229711..d907b281a9d6 100644 | |
2722 | --- a/arch/x86/kvm/lapic.c | |
2723 | +++ b/arch/x86/kvm/lapic.c | |
2724 | @@ -1938,6 +1938,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |
2725 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | |
2726 | HRTIMER_MODE_ABS_PINNED); | |
2727 | apic->lapic_timer.timer.function = apic_timer_fn; | |
2728 | + apic->lapic_timer.timer.irqsafe = 1; | |
2729 | ||
2730 | /* | |
2731 | * APIC is created enabled. This will prevent kvm_lapic_set_base from | |
2732 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | |
2733 | index 699f8726539a..24f30c86510c 100644 | |
2734 | --- a/arch/x86/kvm/x86.c | |
2735 | +++ b/arch/x86/kvm/x86.c | |
2736 | @@ -5865,6 +5865,13 @@ int kvm_arch_init(void *opaque) | |
2737 | goto out; | |
2738 | } | |
2739 | ||
2740 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2741 | + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | |
2742 | + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); | |
2743 | + return -EOPNOTSUPP; | |
2744 | + } | |
2745 | +#endif | |
2746 | + | |
2747 | r = kvm_mmu_module_init(); | |
2748 | if (r) | |
2749 | goto out_free_percpu; | |
2750 | diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c | |
2751 | index 6d18b70ed5a9..f752724c22e8 100644 | |
2752 | --- a/arch/x86/mm/highmem_32.c | |
2753 | +++ b/arch/x86/mm/highmem_32.c | |
2754 | @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap); | |
2755 | */ | |
2756 | void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2757 | { | |
2758 | + pte_t pte = mk_pte(page, prot); | |
2759 | unsigned long vaddr; | |
2760 | int idx, type; | |
2761 | ||
2762 | - preempt_disable(); | |
2763 | + preempt_disable_nort(); | |
2764 | pagefault_disable(); | |
2765 | ||
2766 | if (!PageHighMem(page)) | |
2767 | @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
2768 | idx = type + KM_TYPE_NR*smp_processor_id(); | |
2769 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2770 | BUG_ON(!pte_none(*(kmap_pte-idx))); | |
2771 | - set_pte(kmap_pte-idx, mk_pte(page, prot)); | |
2772 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2773 | + current->kmap_pte[type] = pte; | |
2774 | +#endif | |
2775 | + set_pte(kmap_pte-idx, pte); | |
2776 | arch_flush_lazy_mmu_mode(); | |
2777 | ||
2778 | return (void *)vaddr; | |
2779 | @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr) | |
2780 | * is a bad idea also, in case the page changes cacheability | |
2781 | * attributes or becomes a protected page in a hypervisor. | |
2782 | */ | |
2783 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2784 | + current->kmap_pte[type] = __pte(0); | |
2785 | +#endif | |
2786 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2787 | kmap_atomic_idx_pop(); | |
2788 | arch_flush_lazy_mmu_mode(); | |
2789 | @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr) | |
2790 | #endif | |
2791 | ||
2792 | pagefault_enable(); | |
2793 | - preempt_enable(); | |
2794 | + preempt_enable_nort(); | |
2795 | } | |
2796 | EXPORT_SYMBOL(__kunmap_atomic); | |
2797 | ||
2798 | diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c | |
2799 | index ada98b39b8ad..585f6829653b 100644 | |
2800 | --- a/arch/x86/mm/iomap_32.c | |
2801 | +++ b/arch/x86/mm/iomap_32.c | |
2802 | @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); | |
2803 | ||
2804 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2805 | { | |
2806 | + pte_t pte = pfn_pte(pfn, prot); | |
2807 | unsigned long vaddr; | |
2808 | int idx, type; | |
2809 | ||
2810 | @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
2811 | type = kmap_atomic_idx_push(); | |
2812 | idx = type + KM_TYPE_NR * smp_processor_id(); | |
2813 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
2814 | - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); | |
2815 | + WARN_ON(!pte_none(*(kmap_pte - idx))); | |
2816 | + | |
2817 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2818 | + current->kmap_pte[type] = pte; | |
2819 | +#endif | |
2820 | + set_pte(kmap_pte - idx, pte); | |
2821 | arch_flush_lazy_mmu_mode(); | |
2822 | ||
2823 | return (void *)vaddr; | |
2824 | @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr) | |
2825 | * is a bad idea also, in case the page changes cacheability | |
2826 | * attributes or becomes a protected page in a hypervisor. | |
2827 | */ | |
2828 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2829 | + current->kmap_pte[type] = __pte(0); | |
2830 | +#endif | |
2831 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
2832 | kmap_atomic_idx_pop(); | |
2833 | } | |
2834 | diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c | |
2835 | index fdb4d42b4ce5..8ab90fbecff0 100644 | |
2836 | --- a/arch/x86/platform/uv/tlb_uv.c | |
2837 | +++ b/arch/x86/platform/uv/tlb_uv.c | |
2838 | @@ -729,9 +729,9 @@ static void destination_plugged(struct bau_desc *bau_desc, | |
2839 | ||
2840 | quiesce_local_uvhub(hmaster); | |
2841 | ||
2842 | - spin_lock(&hmaster->queue_lock); | |
2843 | + raw_spin_lock(&hmaster->queue_lock); | |
2844 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2845 | - spin_unlock(&hmaster->queue_lock); | |
2846 | + raw_spin_unlock(&hmaster->queue_lock); | |
2847 | ||
2848 | end_uvhub_quiesce(hmaster); | |
2849 | ||
2850 | @@ -751,9 +751,9 @@ static void destination_timeout(struct bau_desc *bau_desc, | |
2851 | ||
2852 | quiesce_local_uvhub(hmaster); | |
2853 | ||
2854 | - spin_lock(&hmaster->queue_lock); | |
2855 | + raw_spin_lock(&hmaster->queue_lock); | |
2856 | reset_with_ipi(&bau_desc->distribution, bcp); | |
2857 | - spin_unlock(&hmaster->queue_lock); | |
2858 | + raw_spin_unlock(&hmaster->queue_lock); | |
2859 | ||
2860 | end_uvhub_quiesce(hmaster); | |
2861 | ||
2862 | @@ -774,7 +774,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) | |
2863 | cycles_t tm1; | |
2864 | ||
2865 | hmaster = bcp->uvhub_master; | |
2866 | - spin_lock(&hmaster->disable_lock); | |
2867 | + raw_spin_lock(&hmaster->disable_lock); | |
2868 | if (!bcp->baudisabled) { | |
2869 | stat->s_bau_disabled++; | |
2870 | tm1 = get_cycles(); | |
2871 | @@ -787,7 +787,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) | |
2872 | } | |
2873 | } | |
2874 | } | |
2875 | - spin_unlock(&hmaster->disable_lock); | |
2876 | + raw_spin_unlock(&hmaster->disable_lock); | |
2877 | } | |
2878 | ||
2879 | static void count_max_concurr(int stat, struct bau_control *bcp, | |
2880 | @@ -850,7 +850,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, | |
2881 | */ | |
2882 | static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) | |
2883 | { | |
2884 | - spinlock_t *lock = &hmaster->uvhub_lock; | |
2885 | + raw_spinlock_t *lock = &hmaster->uvhub_lock; | |
2886 | atomic_t *v; | |
2887 | ||
2888 | v = &hmaster->active_descriptor_count; | |
2889 | @@ -983,7 +983,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | |
2890 | struct bau_control *hmaster; | |
2891 | ||
2892 | hmaster = bcp->uvhub_master; | |
2893 | - spin_lock(&hmaster->disable_lock); | |
2894 | + raw_spin_lock(&hmaster->disable_lock); | |
2895 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { | |
2896 | stat->s_bau_reenabled++; | |
2897 | for_each_present_cpu(tcpu) { | |
2898 | @@ -995,10 +995,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | |
2899 | tbcp->period_giveups = 0; | |
2900 | } | |
2901 | } | |
2902 | - spin_unlock(&hmaster->disable_lock); | |
2903 | + raw_spin_unlock(&hmaster->disable_lock); | |
2904 | return 0; | |
2905 | } | |
2906 | - spin_unlock(&hmaster->disable_lock); | |
2907 | + raw_spin_unlock(&hmaster->disable_lock); | |
2908 | return -1; | |
2909 | } | |
2910 | ||
2911 | @@ -1916,9 +1916,9 @@ static void __init init_per_cpu_tunables(void) | |
2912 | bcp->cong_reps = congested_reps; | |
2913 | bcp->disabled_period = sec_2_cycles(disabled_period); | |
2914 | bcp->giveup_limit = giveup_limit; | |
2915 | - spin_lock_init(&bcp->queue_lock); | |
2916 | - spin_lock_init(&bcp->uvhub_lock); | |
2917 | - spin_lock_init(&bcp->disable_lock); | |
2918 | + raw_spin_lock_init(&bcp->queue_lock); | |
2919 | + raw_spin_lock_init(&bcp->uvhub_lock); | |
2920 | + raw_spin_lock_init(&bcp->disable_lock); | |
2921 | } | |
2922 | } | |
2923 | ||
2924 | diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c | |
2925 | index b333fc45f9ec..8b85916e6986 100644 | |
2926 | --- a/arch/x86/platform/uv/uv_time.c | |
2927 | +++ b/arch/x86/platform/uv/uv_time.c | |
2928 | @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | |
2929 | ||
2930 | /* There is one of these allocated per node */ | |
2931 | struct uv_rtc_timer_head { | |
2932 | - spinlock_t lock; | |
2933 | + raw_spinlock_t lock; | |
2934 | /* next cpu waiting for timer, local node relative: */ | |
2935 | int next_cpu; | |
2936 | /* number of cpus on this node: */ | |
2937 | @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void) | |
2938 | uv_rtc_deallocate_timers(); | |
2939 | return -ENOMEM; | |
2940 | } | |
2941 | - spin_lock_init(&head->lock); | |
2942 | + raw_spin_lock_init(&head->lock); | |
2943 | head->ncpus = uv_blade_nr_possible_cpus(bid); | |
2944 | head->next_cpu = -1; | |
2945 | blade_info[bid] = head; | |
2946 | @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2947 | unsigned long flags; | |
2948 | int next_cpu; | |
2949 | ||
2950 | - spin_lock_irqsave(&head->lock, flags); | |
2951 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2952 | ||
2953 | next_cpu = head->next_cpu; | |
2954 | *t = expires; | |
2955 | @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
2956 | if (uv_setup_intr(cpu, expires)) { | |
2957 | *t = ULLONG_MAX; | |
2958 | uv_rtc_find_next_timer(head, pnode); | |
2959 | - spin_unlock_irqrestore(&head->lock, flags); | |
2960 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2961 | return -ETIME; | |
2962 | } | |
2963 | } | |
2964 | ||
2965 | - spin_unlock_irqrestore(&head->lock, flags); | |
2966 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2967 | return 0; | |
2968 | } | |
2969 | ||
2970 | @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2971 | unsigned long flags; | |
2972 | int rc = 0; | |
2973 | ||
2974 | - spin_lock_irqsave(&head->lock, flags); | |
2975 | + raw_spin_lock_irqsave(&head->lock, flags); | |
2976 | ||
2977 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | |
2978 | rc = 1; | |
2979 | @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2980 | uv_rtc_find_next_timer(head, pnode); | |
2981 | } | |
2982 | ||
2983 | - spin_unlock_irqrestore(&head->lock, flags); | |
2984 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
2985 | ||
2986 | return rc; | |
2987 | } | |
2988 | @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
2989 | static cycle_t uv_read_rtc(struct clocksource *cs) | |
2990 | { | |
2991 | unsigned long offset; | |
2992 | + cycle_t cycles; | |
2993 | ||
2994 | + preempt_disable(); | |
2995 | if (uv_get_min_hub_revision_id() == 1) | |
2996 | offset = 0; | |
2997 | else | |
2998 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | |
2999 | ||
3000 | - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
3001 | + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
3002 | + preempt_enable(); | |
3003 | + | |
3004 | + return cycles; | |
3005 | } | |
3006 | ||
3007 | /* | |
3008 | diff --git a/block/blk-core.c b/block/blk-core.c | |
3009 | index 36c7ac328d8c..caa5fc1be2a2 100644 | |
3010 | --- a/block/blk-core.c | |
3011 | +++ b/block/blk-core.c | |
3012 | @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |
3013 | ||
3014 | INIT_LIST_HEAD(&rq->queuelist); | |
3015 | INIT_LIST_HEAD(&rq->timeout_list); | |
3016 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3017 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3018 | +#endif | |
3019 | rq->cpu = -1; | |
3020 | rq->q = q; | |
3021 | rq->__sector = (sector_t) -1; | |
3022 | @@ -233,7 +236,7 @@ EXPORT_SYMBOL(blk_start_queue_async); | |
3023 | **/ | |
3024 | void blk_start_queue(struct request_queue *q) | |
3025 | { | |
3026 | - WARN_ON(!irqs_disabled()); | |
3027 | + WARN_ON_NONRT(!irqs_disabled()); | |
3028 | ||
3029 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | |
3030 | __blk_run_queue(q); | |
3031 | @@ -659,7 +662,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait) | |
3032 | if (nowait) | |
3033 | return -EBUSY; | |
3034 | ||
3035 | - ret = wait_event_interruptible(q->mq_freeze_wq, | |
3036 | + ret = swait_event_interruptible(q->mq_freeze_wq, | |
3037 | !atomic_read(&q->mq_freeze_depth) || | |
3038 | blk_queue_dying(q)); | |
3039 | if (blk_queue_dying(q)) | |
3040 | @@ -679,7 +682,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) | |
3041 | struct request_queue *q = | |
3042 | container_of(ref, struct request_queue, q_usage_counter); | |
3043 | ||
3044 | - wake_up_all(&q->mq_freeze_wq); | |
3045 | + swake_up_all(&q->mq_freeze_wq); | |
3046 | } | |
3047 | ||
3048 | static void blk_rq_timed_out_timer(unsigned long data) | |
3049 | @@ -748,7 +751,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |
3050 | q->bypass_depth = 1; | |
3051 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | |
3052 | ||
3053 | - init_waitqueue_head(&q->mq_freeze_wq); | |
3054 | + init_swait_queue_head(&q->mq_freeze_wq); | |
3055 | ||
3056 | /* | |
3057 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | |
3058 | @@ -3171,7 +3174,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, | |
3059 | blk_run_queue_async(q); | |
3060 | else | |
3061 | __blk_run_queue(q); | |
3062 | - spin_unlock(q->queue_lock); | |
3063 | + spin_unlock_irq(q->queue_lock); | |
3064 | } | |
3065 | ||
3066 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) | |
3067 | @@ -3219,7 +3222,6 @@ EXPORT_SYMBOL(blk_check_plugged); | |
3068 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3069 | { | |
3070 | struct request_queue *q; | |
3071 | - unsigned long flags; | |
3072 | struct request *rq; | |
3073 | LIST_HEAD(list); | |
3074 | unsigned int depth; | |
3075 | @@ -3239,11 +3241,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3076 | q = NULL; | |
3077 | depth = 0; | |
3078 | ||
3079 | - /* | |
3080 | - * Save and disable interrupts here, to avoid doing it for every | |
3081 | - * queue lock we have to take. | |
3082 | - */ | |
3083 | - local_irq_save(flags); | |
3084 | while (!list_empty(&list)) { | |
3085 | rq = list_entry_rq(list.next); | |
3086 | list_del_init(&rq->queuelist); | |
3087 | @@ -3256,7 +3253,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3088 | queue_unplugged(q, depth, from_schedule); | |
3089 | q = rq->q; | |
3090 | depth = 0; | |
3091 | - spin_lock(q->queue_lock); | |
3092 | + spin_lock_irq(q->queue_lock); | |
3093 | } | |
3094 | ||
3095 | /* | |
3096 | @@ -3283,8 +3280,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3097 | */ | |
3098 | if (q) | |
3099 | queue_unplugged(q, depth, from_schedule); | |
3100 | - | |
3101 | - local_irq_restore(flags); | |
3102 | } | |
3103 | ||
3104 | void blk_finish_plug(struct blk_plug *plug) | |
3105 | diff --git a/block/blk-ioc.c b/block/blk-ioc.c | |
3106 | index 381cb50a673c..dc8785233d94 100644 | |
3107 | --- a/block/blk-ioc.c | |
3108 | +++ b/block/blk-ioc.c | |
3109 | @@ -7,6 +7,7 @@ | |
3110 | #include <linux/bio.h> | |
3111 | #include <linux/blkdev.h> | |
3112 | #include <linux/slab.h> | |
3113 | +#include <linux/delay.h> | |
3114 | ||
3115 | #include "blk.h" | |
3116 | ||
3117 | @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work) | |
3118 | spin_unlock(q->queue_lock); | |
3119 | } else { | |
3120 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3121 | - cpu_relax(); | |
3122 | + cpu_chill(); | |
3123 | spin_lock_irqsave_nested(&ioc->lock, flags, 1); | |
3124 | } | |
3125 | } | |
3126 | @@ -187,7 +188,7 @@ void put_io_context_active(struct io_context *ioc) | |
3127 | spin_unlock(icq->q->queue_lock); | |
3128 | } else { | |
3129 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3130 | - cpu_relax(); | |
3131 | + cpu_chill(); | |
3132 | goto retry; | |
3133 | } | |
3134 | } | |
3135 | diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c | |
3136 | index bb3ed488f7b5..628c6c13c482 100644 | |
3137 | --- a/block/blk-mq-cpu.c | |
3138 | +++ b/block/blk-mq-cpu.c | |
3139 | @@ -16,7 +16,7 @@ | |
3140 | #include "blk-mq.h" | |
3141 | ||
3142 | static LIST_HEAD(blk_mq_cpu_notify_list); | |
3143 | -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock); | |
3144 | +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); | |
3145 | ||
3146 | static int blk_mq_main_cpu_notify(struct notifier_block *self, | |
3147 | unsigned long action, void *hcpu) | |
3148 | @@ -25,7 +25,10 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, | |
3149 | struct blk_mq_cpu_notifier *notify; | |
3150 | int ret = NOTIFY_OK; | |
3151 | ||
3152 | - raw_spin_lock(&blk_mq_cpu_notify_lock); | |
3153 | + if (action != CPU_POST_DEAD) | |
3154 | + return NOTIFY_OK; | |
3155 | + | |
3156 | + spin_lock(&blk_mq_cpu_notify_lock); | |
3157 | ||
3158 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) { | |
3159 | ret = notify->notify(notify->data, action, cpu); | |
3160 | @@ -33,7 +36,7 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, | |
3161 | break; | |
3162 | } | |
3163 | ||
3164 | - raw_spin_unlock(&blk_mq_cpu_notify_lock); | |
3165 | + spin_unlock(&blk_mq_cpu_notify_lock); | |
3166 | return ret; | |
3167 | } | |
3168 | ||
3169 | @@ -41,16 +44,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | |
3170 | { | |
3171 | BUG_ON(!notifier->notify); | |
3172 | ||
3173 | - raw_spin_lock(&blk_mq_cpu_notify_lock); | |
3174 | + spin_lock(&blk_mq_cpu_notify_lock); | |
3175 | list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); | |
3176 | - raw_spin_unlock(&blk_mq_cpu_notify_lock); | |
3177 | + spin_unlock(&blk_mq_cpu_notify_lock); | |
3178 | } | |
3179 | ||
3180 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | |
3181 | { | |
3182 | - raw_spin_lock(&blk_mq_cpu_notify_lock); | |
3183 | + spin_lock(&blk_mq_cpu_notify_lock); | |
3184 | list_del(¬ifier->list); | |
3185 | - raw_spin_unlock(&blk_mq_cpu_notify_lock); | |
3186 | + spin_unlock(&blk_mq_cpu_notify_lock); | |
3187 | } | |
3188 | ||
3189 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | |
3190 | diff --git a/block/blk-mq.c b/block/blk-mq.c | |
3191 | index c207fa9870eb..ac71b0455e9f 100644 | |
3192 | --- a/block/blk-mq.c | |
3193 | +++ b/block/blk-mq.c | |
3194 | @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); | |
3195 | ||
3196 | static void blk_mq_freeze_queue_wait(struct request_queue *q) | |
3197 | { | |
3198 | - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3199 | + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
3200 | } | |
3201 | ||
3202 | /* | |
3203 | @@ -130,7 +130,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) | |
3204 | WARN_ON_ONCE(freeze_depth < 0); | |
3205 | if (!freeze_depth) { | |
3206 | percpu_ref_reinit(&q->q_usage_counter); | |
3207 | - wake_up_all(&q->mq_freeze_wq); | |
3208 | + swake_up_all(&q->mq_freeze_wq); | |
3209 | } | |
3210 | } | |
3211 | EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); | |
3212 | @@ -149,7 +149,7 @@ void blk_mq_wake_waiters(struct request_queue *q) | |
3213 | * dying, we need to ensure that processes currently waiting on | |
3214 | * the queue are notified as well. | |
3215 | */ | |
3216 | - wake_up_all(&q->mq_freeze_wq); | |
3217 | + swake_up_all(&q->mq_freeze_wq); | |
3218 | } | |
3219 | ||
3220 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | |
3221 | @@ -197,6 +197,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |
3222 | rq->resid_len = 0; | |
3223 | rq->sense = NULL; | |
3224 | ||
3225 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3226 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3227 | +#endif | |
3228 | INIT_LIST_HEAD(&rq->timeout_list); | |
3229 | rq->timeout = 0; | |
3230 | ||
3231 | @@ -379,6 +382,17 @@ void blk_mq_end_request(struct request *rq, int error) | |
3232 | } | |
3233 | EXPORT_SYMBOL(blk_mq_end_request); | |
3234 | ||
3235 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3236 | + | |
3237 | +void __blk_mq_complete_request_remote_work(struct work_struct *work) | |
3238 | +{ | |
3239 | + struct request *rq = container_of(work, struct request, work); | |
3240 | + | |
3241 | + rq->q->softirq_done_fn(rq); | |
3242 | +} | |
3243 | + | |
3244 | +#else | |
3245 | + | |
3246 | static void __blk_mq_complete_request_remote(void *data) | |
3247 | { | |
3248 | struct request *rq = data; | |
3249 | @@ -386,6 +400,8 @@ static void __blk_mq_complete_request_remote(void *data) | |
3250 | rq->q->softirq_done_fn(rq); | |
3251 | } | |
3252 | ||
3253 | +#endif | |
3254 | + | |
3255 | static void blk_mq_ipi_complete_request(struct request *rq) | |
3256 | { | |
3257 | struct blk_mq_ctx *ctx = rq->mq_ctx; | |
3258 | @@ -397,19 +413,23 @@ static void blk_mq_ipi_complete_request(struct request *rq) | |
3259 | return; | |
3260 | } | |
3261 | ||
3262 | - cpu = get_cpu(); | |
3263 | + cpu = get_cpu_light(); | |
3264 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) | |
3265 | shared = cpus_share_cache(cpu, ctx->cpu); | |
3266 | ||
3267 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | |
3268 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3269 | + schedule_work_on(ctx->cpu, &rq->work); | |
3270 | +#else | |
3271 | rq->csd.func = __blk_mq_complete_request_remote; | |
3272 | rq->csd.info = rq; | |
3273 | rq->csd.flags = 0; | |
3274 | smp_call_function_single_async(ctx->cpu, &rq->csd); | |
3275 | +#endif | |
3276 | } else { | |
3277 | rq->q->softirq_done_fn(rq); | |
3278 | } | |
3279 | - put_cpu(); | |
3280 | + put_cpu_light(); | |
3281 | } | |
3282 | ||
3283 | static void __blk_mq_complete_request(struct request *rq) | |
3284 | @@ -938,14 +958,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | |
3285 | return; | |
3286 | ||
3287 | if (!async) { | |
3288 | - int cpu = get_cpu(); | |
3289 | + int cpu = get_cpu_light(); | |
3290 | if (cpumask_test_cpu(cpu, hctx->cpumask)) { | |
3291 | __blk_mq_run_hw_queue(hctx); | |
3292 | - put_cpu(); | |
3293 | + put_cpu_light(); | |
3294 | return; | |
3295 | } | |
3296 | ||
3297 | - put_cpu(); | |
3298 | + put_cpu_light(); | |
3299 | } | |
3300 | ||
3301 | kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), | |
3302 | @@ -1667,7 +1687,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, | |
3303 | { | |
3304 | struct blk_mq_hw_ctx *hctx = data; | |
3305 | ||
3306 | - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | |
3307 | + if (action == CPU_POST_DEAD) | |
3308 | return blk_mq_hctx_cpu_offline(hctx, cpu); | |
3309 | ||
3310 | /* | |
3311 | diff --git a/block/blk-mq.h b/block/blk-mq.h | |
3312 | index 9087b11037b7..0401d76e827c 100644 | |
3313 | --- a/block/blk-mq.h | |
3314 | +++ b/block/blk-mq.h | |
3315 | @@ -86,12 +86,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, | |
3316 | */ | |
3317 | static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) | |
3318 | { | |
3319 | - return __blk_mq_get_ctx(q, get_cpu()); | |
3320 | + return __blk_mq_get_ctx(q, get_cpu_light()); | |
3321 | } | |
3322 | ||
3323 | static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) | |
3324 | { | |
3325 | - put_cpu(); | |
3326 | + put_cpu_light(); | |
3327 | } | |
3328 | ||
3329 | struct blk_mq_alloc_data { | |
3330 | diff --git a/block/blk-softirq.c b/block/blk-softirq.c | |
3331 | index 53b1737e978d..81c3c0a62edf 100644 | |
3332 | --- a/block/blk-softirq.c | |
3333 | +++ b/block/blk-softirq.c | |
3334 | @@ -51,6 +51,7 @@ static void trigger_softirq(void *data) | |
3335 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3336 | ||
3337 | local_irq_restore(flags); | |
3338 | + preempt_check_resched_rt(); | |
3339 | } | |
3340 | ||
3341 | /* | |
3342 | @@ -93,6 +94,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, | |
3343 | this_cpu_ptr(&blk_cpu_done)); | |
3344 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
3345 | local_irq_enable(); | |
3346 | + preempt_check_resched_rt(); | |
3347 | } | |
3348 | ||
3349 | return NOTIFY_OK; | |
3350 | @@ -150,6 +152,7 @@ void __blk_complete_request(struct request *req) | |
3351 | goto do_local; | |
3352 | ||
3353 | local_irq_restore(flags); | |
3354 | + preempt_check_resched_rt(); | |
3355 | } | |
3356 | ||
3357 | /** | |
3358 | diff --git a/block/bounce.c b/block/bounce.c | |
3359 | index 1cb5dd3a5da1..2f1ec8a67cbe 100644 | |
3360 | --- a/block/bounce.c | |
3361 | +++ b/block/bounce.c | |
3362 | @@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) | |
3363 | unsigned long flags; | |
3364 | unsigned char *vto; | |
3365 | ||
3366 | - local_irq_save(flags); | |
3367 | + local_irq_save_nort(flags); | |
3368 | vto = kmap_atomic(to->bv_page); | |
3369 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | |
3370 | kunmap_atomic(vto); | |
3371 | - local_irq_restore(flags); | |
3372 | + local_irq_restore_nort(flags); | |
3373 | } | |
3374 | ||
3375 | #else /* CONFIG_HIGHMEM */ | |
3376 | diff --git a/crypto/algapi.c b/crypto/algapi.c | |
3377 | index df939b54b09f..efe5e06adcf7 100644 | |
3378 | --- a/crypto/algapi.c | |
3379 | +++ b/crypto/algapi.c | |
3380 | @@ -718,13 +718,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); | |
3381 | ||
3382 | int crypto_register_notifier(struct notifier_block *nb) | |
3383 | { | |
3384 | - return blocking_notifier_chain_register(&crypto_chain, nb); | |
3385 | + return srcu_notifier_chain_register(&crypto_chain, nb); | |
3386 | } | |
3387 | EXPORT_SYMBOL_GPL(crypto_register_notifier); | |
3388 | ||
3389 | int crypto_unregister_notifier(struct notifier_block *nb) | |
3390 | { | |
3391 | - return blocking_notifier_chain_unregister(&crypto_chain, nb); | |
3392 | + return srcu_notifier_chain_unregister(&crypto_chain, nb); | |
3393 | } | |
3394 | EXPORT_SYMBOL_GPL(crypto_unregister_notifier); | |
3395 | ||
3396 | diff --git a/crypto/api.c b/crypto/api.c | |
3397 | index bbc147cb5dec..bc1a848f02ec 100644 | |
3398 | --- a/crypto/api.c | |
3399 | +++ b/crypto/api.c | |
3400 | @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); | |
3401 | DECLARE_RWSEM(crypto_alg_sem); | |
3402 | EXPORT_SYMBOL_GPL(crypto_alg_sem); | |
3403 | ||
3404 | -BLOCKING_NOTIFIER_HEAD(crypto_chain); | |
3405 | +SRCU_NOTIFIER_HEAD(crypto_chain); | |
3406 | EXPORT_SYMBOL_GPL(crypto_chain); | |
3407 | ||
3408 | static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); | |
3409 | @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v) | |
3410 | { | |
3411 | int ok; | |
3412 | ||
3413 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3414 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3415 | if (ok == NOTIFY_DONE) { | |
3416 | request_module("cryptomgr"); | |
3417 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
3418 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
3419 | } | |
3420 | ||
3421 | return ok; | |
3422 | diff --git a/crypto/internal.h b/crypto/internal.h | |
3423 | index 7eefcdb00227..0ecc7f5a2f40 100644 | |
3424 | --- a/crypto/internal.h | |
3425 | +++ b/crypto/internal.h | |
3426 | @@ -47,7 +47,7 @@ struct crypto_larval { | |
3427 | ||
3428 | extern struct list_head crypto_alg_list; | |
3429 | extern struct rw_semaphore crypto_alg_sem; | |
3430 | -extern struct blocking_notifier_head crypto_chain; | |
3431 | +extern struct srcu_notifier_head crypto_chain; | |
3432 | ||
3433 | #ifdef CONFIG_PROC_FS | |
3434 | void __init crypto_init_proc(void); | |
3435 | @@ -146,7 +146,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg) | |
3436 | ||
3437 | static inline void crypto_notify(unsigned long val, void *v) | |
3438 | { | |
3439 | - blocking_notifier_call_chain(&crypto_chain, val, v); | |
3440 | + srcu_notifier_call_chain(&crypto_chain, val, v); | |
3441 | } | |
3442 | ||
3443 | #endif /* _CRYPTO_INTERNAL_H */ | |
3444 | diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h | |
3445 | index fded776236e2..bda523219d50 100644 | |
3446 | --- a/drivers/acpi/acpica/acglobal.h | |
3447 | +++ b/drivers/acpi/acpica/acglobal.h | |
3448 | @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); | |
3449 | * interrupt level | |
3450 | */ | |
3451 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ | |
3452 | -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3453 | +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
3454 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); | |
3455 | ||
3456 | /* Mutex for _OSI support */ | |
3457 | diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c | |
3458 | index 3b7fb99362b6..696bf8e62afb 100644 | |
3459 | --- a/drivers/acpi/acpica/hwregs.c | |
3460 | +++ b/drivers/acpi/acpica/hwregs.c | |
3461 | @@ -363,14 +363,14 @@ acpi_status acpi_hw_clear_acpi_status(void) | |
3462 | ACPI_BITMASK_ALL_FIXED_STATUS, | |
3463 | ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); | |
3464 | ||
3465 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3466 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3467 | ||
3468 | /* Clear the fixed events in PM1 A/B */ | |
3469 | ||
3470 | status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, | |
3471 | ACPI_BITMASK_ALL_FIXED_STATUS); | |
3472 | ||
3473 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3474 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3475 | ||
3476 | if (ACPI_FAILURE(status)) { | |
3477 | goto exit; | |
3478 | diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c | |
3479 | index 98c26ff39409..6e236f2ea791 100644 | |
3480 | --- a/drivers/acpi/acpica/hwxface.c | |
3481 | +++ b/drivers/acpi/acpica/hwxface.c | |
3482 | @@ -373,7 +373,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3483 | return_ACPI_STATUS(AE_BAD_PARAMETER); | |
3484 | } | |
3485 | ||
3486 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
3487 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
3488 | ||
3489 | /* | |
3490 | * At this point, we know that the parent register is one of the | |
3491 | @@ -434,7 +434,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
3492 | ||
3493 | unlock_and_exit: | |
3494 | ||
3495 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
3496 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
3497 | return_ACPI_STATUS(status); | |
3498 | } | |
3499 | ||
3500 | diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c | |
3501 | index 15073375bd00..357e7ca5a587 100644 | |
3502 | --- a/drivers/acpi/acpica/utmutex.c | |
3503 | +++ b/drivers/acpi/acpica/utmutex.c | |
3504 | @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void) | |
3505 | return_ACPI_STATUS (status); | |
3506 | } | |
3507 | ||
3508 | - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); | |
3509 | + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); | |
3510 | if (ACPI_FAILURE (status)) { | |
3511 | return_ACPI_STATUS (status); | |
3512 | } | |
3513 | @@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void) | |
3514 | /* Delete the spinlocks */ | |
3515 | ||
3516 | acpi_os_delete_lock(acpi_gbl_gpe_lock); | |
3517 | - acpi_os_delete_lock(acpi_gbl_hardware_lock); | |
3518 | + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); | |
3519 | acpi_os_delete_lock(acpi_gbl_reference_count_lock); | |
3520 | ||
3521 | /* Delete the reader/writer lock */ | |
3522 | diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c | |
3523 | index 051b6158d1b7..7ad293bef6ed 100644 | |
3524 | --- a/drivers/ata/libata-sff.c | |
3525 | +++ b/drivers/ata/libata-sff.c | |
3526 | @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, | |
3527 | unsigned long flags; | |
3528 | unsigned int consumed; | |
3529 | ||
3530 | - local_irq_save(flags); | |
3531 | + local_irq_save_nort(flags); | |
3532 | consumed = ata_sff_data_xfer32(dev, buf, buflen, rw); | |
3533 | - local_irq_restore(flags); | |
3534 | + local_irq_restore_nort(flags); | |
3535 | ||
3536 | return consumed; | |
3537 | } | |
3538 | @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3539 | unsigned long flags; | |
3540 | ||
3541 | /* FIXME: use a bounce buffer */ | |
3542 | - local_irq_save(flags); | |
3543 | + local_irq_save_nort(flags); | |
3544 | buf = kmap_atomic(page); | |
3545 | ||
3546 | /* do the actual data transfer */ | |
3547 | @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
3548 | do_write); | |
3549 | ||
3550 | kunmap_atomic(buf); | |
3551 | - local_irq_restore(flags); | |
3552 | + local_irq_restore_nort(flags); | |
3553 | } else { | |
3554 | buf = page_address(page); | |
3555 | ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, | |
3556 | @@ -864,7 +864,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3557 | unsigned long flags; | |
3558 | ||
3559 | /* FIXME: use bounce buffer */ | |
3560 | - local_irq_save(flags); | |
3561 | + local_irq_save_nort(flags); | |
3562 | buf = kmap_atomic(page); | |
3563 | ||
3564 | /* do the actual data transfer */ | |
3565 | @@ -872,7 +872,7 @@ static int __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) | |
3566 | count, rw); | |
3567 | ||
3568 | kunmap_atomic(buf); | |
3569 | - local_irq_restore(flags); | |
3570 | + local_irq_restore_nort(flags); | |
3571 | } else { | |
3572 | buf = page_address(page); | |
3573 | consumed = ap->ops->sff_data_xfer(dev, buf + offset, | |
3574 | diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c | |
3575 | index 4b5cd3a7b2b6..fa8329ad79fd 100644 | |
3576 | --- a/drivers/block/zram/zcomp.c | |
3577 | +++ b/drivers/block/zram/zcomp.c | |
3578 | @@ -118,12 +118,19 @@ ssize_t zcomp_available_show(const char *comp, char *buf) | |
3579 | ||
3580 | struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) | |
3581 | { | |
3582 | - return *get_cpu_ptr(comp->stream); | |
3583 | + struct zcomp_strm *zstrm; | |
3584 | + | |
3585 | + zstrm = *this_cpu_ptr(comp->stream); | |
3586 | + spin_lock(&zstrm->zcomp_lock); | |
3587 | + return zstrm; | |
3588 | } | |
3589 | ||
3590 | void zcomp_stream_put(struct zcomp *comp) | |
3591 | { | |
3592 | - put_cpu_ptr(comp->stream); | |
3593 | + struct zcomp_strm *zstrm; | |
3594 | + | |
3595 | + zstrm = *this_cpu_ptr(comp->stream); | |
3596 | + spin_unlock(&zstrm->zcomp_lock); | |
3597 | } | |
3598 | ||
3599 | int zcomp_compress(struct zcomp_strm *zstrm, | |
3600 | @@ -174,6 +181,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp, | |
3601 | pr_err("Can't allocate a compression stream\n"); | |
3602 | return NOTIFY_BAD; | |
3603 | } | |
3604 | + spin_lock_init(&zstrm->zcomp_lock); | |
3605 | *per_cpu_ptr(comp->stream, cpu) = zstrm; | |
3606 | break; | |
3607 | case CPU_DEAD: | |
3608 | diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h | |
3609 | index 478cac2ed465..f7a6efdc3285 100644 | |
3610 | --- a/drivers/block/zram/zcomp.h | |
3611 | +++ b/drivers/block/zram/zcomp.h | |
3612 | @@ -14,6 +14,7 @@ struct zcomp_strm { | |
3613 | /* compression/decompression buffer */ | |
3614 | void *buffer; | |
3615 | struct crypto_comp *tfm; | |
3616 | + spinlock_t zcomp_lock; | |
3617 | }; | |
3618 | ||
3619 | /* dynamic per-device compression frontend */ | |
3620 | diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c | |
3621 | index 04365b17ee67..b4a0577a4dbc 100644 | |
3622 | --- a/drivers/block/zram/zram_drv.c | |
3623 | +++ b/drivers/block/zram/zram_drv.c | |
3624 | @@ -519,6 +519,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) | |
3625 | goto out_error; | |
3626 | } | |
3627 | ||
3628 | + zram_meta_init_table_locks(meta, disksize); | |
3629 | + | |
3630 | return meta; | |
3631 | ||
3632 | out_error: | |
3633 | @@ -566,28 +568,28 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) | |
3634 | struct zram_meta *meta = zram->meta; | |
3635 | unsigned long handle; | |
3636 | unsigned int size; | |
3637 | + struct zcomp_strm *zstrm; | |
3638 | ||
3639 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3640 | + zram_lock_table(&meta->table[index]); | |
3641 | handle = meta->table[index].handle; | |
3642 | size = zram_get_obj_size(meta, index); | |
3643 | ||
3644 | if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3645 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3646 | + zram_unlock_table(&meta->table[index]); | |
3647 | clear_page(mem); | |
3648 | return 0; | |
3649 | } | |
3650 | ||
3651 | + zstrm = zcomp_stream_get(zram->comp); | |
3652 | cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); | |
3653 | if (size == PAGE_SIZE) { | |
3654 | copy_page(mem, cmem); | |
3655 | } else { | |
3656 | - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); | |
3657 | - | |
3658 | ret = zcomp_decompress(zstrm, cmem, size, mem); | |
3659 | - zcomp_stream_put(zram->comp); | |
3660 | } | |
3661 | zs_unmap_object(meta->mem_pool, handle); | |
3662 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3663 | + zcomp_stream_put(zram->comp); | |
3664 | + zram_unlock_table(&meta->table[index]); | |
3665 | ||
3666 | /* Should NEVER happen. Return bio error if it does. */ | |
3667 | if (unlikely(ret)) { | |
3668 | @@ -607,14 +609,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, | |
3669 | struct zram_meta *meta = zram->meta; | |
3670 | page = bvec->bv_page; | |
3671 | ||
3672 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3673 | + zram_lock_table(&meta->table[index]); | |
3674 | if (unlikely(!meta->table[index].handle) || | |
3675 | zram_test_flag(meta, index, ZRAM_ZERO)) { | |
3676 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3677 | + zram_unlock_table(&meta->table[index]); | |
3678 | handle_zero_page(bvec); | |
3679 | return 0; | |
3680 | } | |
3681 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3682 | + zram_unlock_table(&meta->table[index]); | |
3683 | ||
3684 | if (is_partial_io(bvec)) | |
3685 | /* Use a temporary buffer to decompress the page */ | |
3686 | @@ -691,10 +693,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
3687 | if (user_mem) | |
3688 | kunmap_atomic(user_mem); | |
3689 | /* Free memory associated with this sector now. */ | |
3690 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3691 | + zram_lock_table(&meta->table[index]); | |
3692 | zram_free_page(zram, index); | |
3693 | zram_set_flag(meta, index, ZRAM_ZERO); | |
3694 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3695 | + zram_unlock_table(&meta->table[index]); | |
3696 | ||
3697 | atomic64_inc(&zram->stats.zero_pages); | |
3698 | ret = 0; | |
3699 | @@ -785,12 +787,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
3700 | * Free memory associated with this sector | |
3701 | * before overwriting unused sectors. | |
3702 | */ | |
3703 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3704 | + zram_lock_table(&meta->table[index]); | |
3705 | zram_free_page(zram, index); | |
3706 | ||
3707 | meta->table[index].handle = handle; | |
3708 | zram_set_obj_size(meta, index, clen); | |
3709 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3710 | + zram_unlock_table(&meta->table[index]); | |
3711 | ||
3712 | /* Update stats */ | |
3713 | atomic64_add(clen, &zram->stats.compr_data_size); | |
3714 | @@ -833,9 +835,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, | |
3715 | } | |
3716 | ||
3717 | while (n >= PAGE_SIZE) { | |
3718 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3719 | + zram_lock_table(&meta->table[index]); | |
3720 | zram_free_page(zram, index); | |
3721 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3722 | + zram_unlock_table(&meta->table[index]); | |
3723 | atomic64_inc(&zram->stats.notify_free); | |
3724 | index++; | |
3725 | n -= PAGE_SIZE; | |
3726 | @@ -964,9 +966,9 @@ static void zram_slot_free_notify(struct block_device *bdev, | |
3727 | zram = bdev->bd_disk->private_data; | |
3728 | meta = zram->meta; | |
3729 | ||
3730 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
3731 | + zram_lock_table(&meta->table[index]); | |
3732 | zram_free_page(zram, index); | |
3733 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
3734 | + zram_unlock_table(&meta->table[index]); | |
3735 | atomic64_inc(&zram->stats.notify_free); | |
3736 | } | |
3737 | ||
3738 | diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h | |
3739 | index 74fcf10da374..fd4020c99b9e 100644 | |
3740 | --- a/drivers/block/zram/zram_drv.h | |
3741 | +++ b/drivers/block/zram/zram_drv.h | |
3742 | @@ -73,6 +73,9 @@ enum zram_pageflags { | |
3743 | struct zram_table_entry { | |
3744 | unsigned long handle; | |
3745 | unsigned long value; | |
3746 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
3747 | + spinlock_t lock; | |
3748 | +#endif | |
3749 | }; | |
3750 | ||
3751 | struct zram_stats { | |
3752 | @@ -120,4 +123,42 @@ struct zram { | |
3753 | */ | |
3754 | bool claim; /* Protected by bdev->bd_mutex */ | |
3755 | }; | |
3756 | + | |
3757 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
3758 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3759 | +{ | |
3760 | + bit_spin_lock(ZRAM_ACCESS, &table->value); | |
3761 | +} | |
3762 | + | |
3763 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3764 | +{ | |
3765 | + bit_spin_unlock(ZRAM_ACCESS, &table->value); | |
3766 | +} | |
3767 | + | |
3768 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { } | |
3769 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
3770 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
3771 | +{ | |
3772 | + spin_lock(&table->lock); | |
3773 | + __set_bit(ZRAM_ACCESS, &table->value); | |
3774 | +} | |
3775 | + | |
3776 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
3777 | +{ | |
3778 | + __clear_bit(ZRAM_ACCESS, &table->value); | |
3779 | + spin_unlock(&table->lock); | |
3780 | +} | |
3781 | + | |
3782 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) | |
3783 | +{ | |
3784 | + size_t num_pages = disksize >> PAGE_SHIFT; | |
3785 | + size_t index; | |
3786 | + | |
3787 | + for (index = 0; index < num_pages; index++) { | |
3788 | + spinlock_t *lock = &meta->table[index].lock; | |
3789 | + spin_lock_init(lock); | |
3790 | + } | |
3791 | +} | |
3792 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
3793 | + | |
3794 | #endif | |
3795 | diff --git a/drivers/char/random.c b/drivers/char/random.c | |
3796 | index 3efb3bf0ab83..c894d2e266f3 100644 | |
3797 | --- a/drivers/char/random.c | |
3798 | +++ b/drivers/char/random.c | |
3799 | @@ -1028,8 +1028,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
3800 | } sample; | |
3801 | long delta, delta2, delta3; | |
3802 | ||
3803 | - preempt_disable(); | |
3804 | - | |
3805 | sample.jiffies = jiffies; | |
3806 | sample.cycles = random_get_entropy(); | |
3807 | sample.num = num; | |
3808 | @@ -1070,7 +1068,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
3809 | */ | |
3810 | credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); | |
3811 | } | |
3812 | - preempt_enable(); | |
3813 | } | |
3814 | ||
3815 | void add_input_randomness(unsigned int type, unsigned int code, | |
3816 | @@ -1123,28 +1120,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) | |
3817 | return *(ptr + f->reg_idx++); | |
3818 | } | |
3819 | ||
3820 | -void add_interrupt_randomness(int irq, int irq_flags) | |
3821 | +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) | |
3822 | { | |
3823 | struct entropy_store *r; | |
3824 | struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); | |
3825 | - struct pt_regs *regs = get_irq_regs(); | |
3826 | unsigned long now = jiffies; | |
3827 | cycles_t cycles = random_get_entropy(); | |
3828 | __u32 c_high, j_high; | |
3829 | - __u64 ip; | |
3830 | unsigned long seed; | |
3831 | int credit = 0; | |
3832 | ||
3833 | if (cycles == 0) | |
3834 | - cycles = get_reg(fast_pool, regs); | |
3835 | + cycles = get_reg(fast_pool, NULL); | |
3836 | c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; | |
3837 | j_high = (sizeof(now) > 4) ? now >> 32 : 0; | |
3838 | fast_pool->pool[0] ^= cycles ^ j_high ^ irq; | |
3839 | fast_pool->pool[1] ^= now ^ c_high; | |
3840 | - ip = regs ? instruction_pointer(regs) : _RET_IP_; | |
3841 | + if (!ip) | |
3842 | + ip = _RET_IP_; | |
3843 | fast_pool->pool[2] ^= ip; | |
3844 | fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : | |
3845 | - get_reg(fast_pool, regs); | |
3846 | + get_reg(fast_pool, NULL); | |
3847 | ||
3848 | fast_mix(fast_pool); | |
3849 | add_interrupt_bench(cycles); | |
3850 | diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c | |
3851 | index 4da2af9694a2..5b6f57f500b8 100644 | |
3852 | --- a/drivers/clocksource/tcb_clksrc.c | |
3853 | +++ b/drivers/clocksource/tcb_clksrc.c | |
3854 | @@ -23,8 +23,7 @@ | |
3855 | * this 32 bit free-running counter. the second channel is not used. | |
3856 | * | |
3857 | * - The third channel may be used to provide a 16-bit clockevent | |
3858 | - * source, used in either periodic or oneshot mode. This runs | |
3859 | - * at 32 KiHZ, and can handle delays of up to two seconds. | |
3860 | + * source, used in either periodic or oneshot mode. | |
3861 | * | |
3862 | * A boot clocksource and clockevent source are also currently needed, | |
3863 | * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so | |
3864 | @@ -74,6 +73,8 @@ static struct clocksource clksrc = { | |
3865 | struct tc_clkevt_device { | |
3866 | struct clock_event_device clkevt; | |
3867 | struct clk *clk; | |
3868 | + bool clk_enabled; | |
3869 | + u32 freq; | |
3870 | void __iomem *regs; | |
3871 | }; | |
3872 | ||
3873 | @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt) | |
3874 | return container_of(clkevt, struct tc_clkevt_device, clkevt); | |
3875 | } | |
3876 | ||
3877 | -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, | |
3878 | - * because using one of the divided clocks would usually mean the | |
3879 | - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). | |
3880 | - * | |
3881 | - * A divided clock could be good for high resolution timers, since | |
3882 | - * 30.5 usec resolution can seem "low". | |
3883 | - */ | |
3884 | static u32 timer_clock; | |
3885 | ||
3886 | +static void tc_clk_disable(struct clock_event_device *d) | |
3887 | +{ | |
3888 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3889 | + | |
3890 | + clk_disable(tcd->clk); | |
3891 | + tcd->clk_enabled = false; | |
3892 | +} | |
3893 | + | |
3894 | +static void tc_clk_enable(struct clock_event_device *d) | |
3895 | +{ | |
3896 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3897 | + | |
3898 | + if (tcd->clk_enabled) | |
3899 | + return; | |
3900 | + clk_enable(tcd->clk); | |
3901 | + tcd->clk_enabled = true; | |
3902 | +} | |
3903 | + | |
3904 | static int tc_shutdown(struct clock_event_device *d) | |
3905 | { | |
3906 | struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
3907 | @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d) | |
3908 | ||
3909 | __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); | |
3910 | __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); | |
3911 | + return 0; | |
3912 | +} | |
3913 | + | |
3914 | +static int tc_shutdown_clk_off(struct clock_event_device *d) | |
3915 | +{ | |
3916 | + tc_shutdown(d); | |
3917 | if (!clockevent_state_detached(d)) | |
3918 | - clk_disable(tcd->clk); | |
3919 | + tc_clk_disable(d); | |
3920 | ||
3921 | return 0; | |
3922 | } | |
3923 | @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d) | |
3924 | if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) | |
3925 | tc_shutdown(d); | |
3926 | ||
3927 | - clk_enable(tcd->clk); | |
3928 | + tc_clk_enable(d); | |
3929 | ||
3930 | - /* slow clock, count up to RC, then irq and stop */ | |
3931 | + /* count up to RC, then irq and stop */ | |
3932 | __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | | |
3933 | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); | |
3934 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3935 | @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d) | |
3936 | /* By not making the gentime core emulate periodic mode on top | |
3937 | * of oneshot, we get lower overhead and improved accuracy. | |
3938 | */ | |
3939 | - clk_enable(tcd->clk); | |
3940 | + tc_clk_enable(d); | |
3941 | ||
3942 | - /* slow clock, count up to RC, then irq and restart */ | |
3943 | + /* count up to RC, then irq and restart */ | |
3944 | __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, | |
3945 | regs + ATMEL_TC_REG(2, CMR)); | |
3946 | - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3947 | + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
3948 | ||
3949 | /* Enable clock and interrupts on RC compare */ | |
3950 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
3951 | @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = { | |
3952 | .features = CLOCK_EVT_FEAT_PERIODIC | | |
3953 | CLOCK_EVT_FEAT_ONESHOT, | |
3954 | /* Should be lower than at91rm9200's system timer */ | |
3955 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
3956 | .rating = 125, | |
3957 | +#else | |
3958 | + .rating = 200, | |
3959 | +#endif | |
3960 | .set_next_event = tc_next_event, | |
3961 | - .set_state_shutdown = tc_shutdown, | |
3962 | + .set_state_shutdown = tc_shutdown_clk_off, | |
3963 | .set_state_periodic = tc_set_periodic, | |
3964 | .set_state_oneshot = tc_set_oneshot, | |
3965 | }, | |
3966 | @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle) | |
3967 | return IRQ_NONE; | |
3968 | } | |
3969 | ||
3970 | -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3971 | +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) | |
3972 | { | |
3973 | + unsigned divisor = atmel_tc_divisors[divisor_idx]; | |
3974 | int ret; | |
3975 | struct clk *t2_clk = tc->clk[2]; | |
3976 | int irq = tc->irq[2]; | |
3977 | @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3978 | clkevt.regs = tc->regs; | |
3979 | clkevt.clk = t2_clk; | |
3980 | ||
3981 | - timer_clock = clk32k_divisor_idx; | |
3982 | + timer_clock = divisor_idx; | |
3983 | + if (!divisor) | |
3984 | + clkevt.freq = 32768; | |
3985 | + else | |
3986 | + clkevt.freq = clk_get_rate(t2_clk) / divisor; | |
3987 | ||
3988 | clkevt.clkevt.cpumask = cpumask_of(0); | |
3989 | ||
3990 | @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
3991 | return ret; | |
3992 | } | |
3993 | ||
3994 | - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); | |
3995 | + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); | |
3996 | ||
3997 | return ret; | |
3998 | } | |
3999 | @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void) | |
4000 | goto err_disable_t1; | |
4001 | ||
4002 | /* channel 2: periodic and oneshot timer support */ | |
4003 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
4004 | ret = setup_clkevents(tc, clk32k_divisor_idx); | |
4005 | +#else | |
4006 | + ret = setup_clkevents(tc, best_divisor_idx); | |
4007 | +#endif | |
4008 | if (ret) | |
4009 | goto err_unregister_clksrc; | |
4010 | ||
4011 | diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c | |
4012 | index 7f0f5b26d8c5..1553f19e73e7 100644 | |
4013 | --- a/drivers/clocksource/timer-atmel-pit.c | |
4014 | +++ b/drivers/clocksource/timer-atmel-pit.c | |
4015 | @@ -46,6 +46,7 @@ struct pit_data { | |
4016 | u32 cycle; | |
4017 | u32 cnt; | |
4018 | unsigned int irq; | |
4019 | + bool irq_requested; | |
4020 | struct clk *mck; | |
4021 | }; | |
4022 | ||
4023 | @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev) | |
4024 | ||
4025 | /* disable irq, leaving the clocksource active */ | |
4026 | pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); | |
4027 | + if (data->irq_requested) { | |
4028 | + free_irq(data->irq, data); | |
4029 | + data->irq_requested = false; | |
4030 | + } | |
4031 | return 0; | |
4032 | } | |
4033 | ||
4034 | +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); | |
4035 | /* | |
4036 | * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) | |
4037 | */ | |
4038 | static int pit_clkevt_set_periodic(struct clock_event_device *dev) | |
4039 | { | |
4040 | struct pit_data *data = clkevt_to_pit_data(dev); | |
4041 | + int ret; | |
4042 | + | |
4043 | + ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
4044 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4045 | + "at91_tick", data); | |
4046 | + if (ret) | |
4047 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
4048 | + | |
4049 | + data->irq_requested = true; | |
4050 | ||
4051 | /* update clocksource counter */ | |
4052 | data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); | |
4053 | @@ -211,15 +226,6 @@ static int __init at91sam926x_pit_common_init(struct pit_data *data) | |
4054 | return ret; | |
4055 | } | |
4056 | ||
4057 | - /* Set up irq handler */ | |
4058 | - ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
4059 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4060 | - "at91_tick", data); | |
4061 | - if (ret) { | |
4062 | - pr_err("Unable to setup IRQ\n"); | |
4063 | - return ret; | |
4064 | - } | |
4065 | - | |
4066 | /* Set up and register clockevents */ | |
4067 | data->clkevt.name = "pit"; | |
4068 | data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; | |
4069 | diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c | |
4070 | index e90ab5b63a90..9e124087c55f 100644 | |
4071 | --- a/drivers/clocksource/timer-atmel-st.c | |
4072 | +++ b/drivers/clocksource/timer-atmel-st.c | |
4073 | @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void) | |
4074 | last_crtr = read_CRTR(); | |
4075 | } | |
4076 | ||
4077 | +static int atmel_st_irq; | |
4078 | + | |
4079 | static int clkevt32k_shutdown(struct clock_event_device *evt) | |
4080 | { | |
4081 | clkdev32k_disable_and_flush_irq(); | |
4082 | irqmask = 0; | |
4083 | regmap_write(regmap_st, AT91_ST_IER, irqmask); | |
4084 | + free_irq(atmel_st_irq, regmap_st); | |
4085 | return 0; | |
4086 | } | |
4087 | ||
4088 | static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
4089 | { | |
4090 | + int ret; | |
4091 | + | |
4092 | clkdev32k_disable_and_flush_irq(); | |
4093 | ||
4094 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
4095 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4096 | + "at91_tick", regmap_st); | |
4097 | + if (ret) | |
4098 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
4099 | + | |
4100 | /* | |
4101 | * ALM for oneshot irqs, set by next_event() | |
4102 | * before 32 seconds have passed. | |
4103 | @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
4104 | ||
4105 | static int clkevt32k_set_periodic(struct clock_event_device *dev) | |
4106 | { | |
4107 | + int ret; | |
4108 | + | |
4109 | clkdev32k_disable_and_flush_irq(); | |
4110 | ||
4111 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
4112 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4113 | + "at91_tick", regmap_st); | |
4114 | + if (ret) | |
4115 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
4116 | + | |
4117 | /* PIT for periodic irqs; fixed rate of 1/HZ */ | |
4118 | irqmask = AT91_ST_PITS; | |
4119 | regmap_write(regmap_st, AT91_ST_PIMR, timer_latch); | |
4120 | @@ -198,7 +217,7 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
4121 | { | |
4122 | struct clk *sclk; | |
4123 | unsigned int sclk_rate, val; | |
4124 | - int irq, ret; | |
4125 | + int ret; | |
4126 | ||
4127 | regmap_st = syscon_node_to_regmap(node); | |
4128 | if (IS_ERR(regmap_st)) { | |
4129 | @@ -212,21 +231,12 @@ static int __init atmel_st_timer_init(struct device_node *node) | |
4130 | regmap_read(regmap_st, AT91_ST_SR, &val); | |
4131 | ||
4132 | /* Get the interrupts property */ | |
4133 | - irq = irq_of_parse_and_map(node, 0); | |
4134 | - if (!irq) { | |
4135 | + atmel_st_irq = irq_of_parse_and_map(node, 0); | |
4136 | + if (!atmel_st_irq) { | |
4137 | pr_err("Unable to get IRQ from DT\n"); | |
4138 | return -EINVAL; | |
4139 | } | |
4140 | ||
4141 | - /* Make IRQs happen for the system timer */ | |
4142 | - ret = request_irq(irq, at91rm9200_timer_interrupt, | |
4143 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
4144 | - "at91_tick", regmap_st); | |
4145 | - if (ret) { | |
4146 | - pr_err("Unable to setup IRQ\n"); | |
4147 | - return ret; | |
4148 | - } | |
4149 | - | |
4150 | sclk = of_clk_get(node, 0); | |
4151 | if (IS_ERR(sclk)) { | |
4152 | pr_err("Unable to get slow clock\n"); | |
4153 | diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c | |
4154 | index a782ce87715c..19d265948526 100644 | |
4155 | --- a/drivers/connector/cn_proc.c | |
4156 | +++ b/drivers/connector/cn_proc.c | |
4157 | @@ -32,6 +32,7 @@ | |
4158 | #include <linux/pid_namespace.h> | |
4159 | ||
4160 | #include <linux/cn_proc.h> | |
4161 | +#include <linux/locallock.h> | |
4162 | ||
4163 | /* | |
4164 | * Size of a cn_msg followed by a proc_event structure. Since the | |
4165 | @@ -54,10 +55,11 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; | |
4166 | ||
4167 | /* proc_event_counts is used as the sequence number of the netlink message */ | |
4168 | static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; | |
4169 | +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock); | |
4170 | ||
4171 | static inline void send_msg(struct cn_msg *msg) | |
4172 | { | |
4173 | - preempt_disable(); | |
4174 | + local_lock(send_msg_lock); | |
4175 | ||
4176 | msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; | |
4177 | ((struct proc_event *)msg->data)->cpu = smp_processor_id(); | |
4178 | @@ -70,7 +72,7 @@ static inline void send_msg(struct cn_msg *msg) | |
4179 | */ | |
4180 | cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); | |
4181 | ||
4182 | - preempt_enable(); | |
4183 | + local_unlock(send_msg_lock); | |
4184 | } | |
4185 | ||
4186 | void proc_fork_connector(struct task_struct *task) | |
4187 | diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 | |
4188 | index adbd1de1cea5..1fac5074f2cf 100644 | |
4189 | --- a/drivers/cpufreq/Kconfig.x86 | |
4190 | +++ b/drivers/cpufreq/Kconfig.x86 | |
4191 | @@ -124,7 +124,7 @@ config X86_POWERNOW_K7_ACPI | |
4192 | ||
4193 | config X86_POWERNOW_K8 | |
4194 | tristate "AMD Opteron/Athlon64 PowerNow!" | |
4195 | - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ | |
4196 | + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE | |
4197 | help | |
4198 | This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. | |
4199 | Support for K10 and newer processors is now in acpi-cpufreq. | |
4200 | diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
4201 | index b35e5b6475b2..ce60807fb1d4 100644 | |
4202 | --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
4203 | +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
4204 | @@ -1302,7 +1302,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, | |
4205 | if (ret) | |
4206 | return ret; | |
4207 | ||
4208 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
4209 | trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); | |
4210 | +#endif | |
4211 | ||
4212 | i915_gem_execbuffer_move_to_active(vmas, params->request); | |
4213 | ||
4214 | diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4215 | index 6f10b421487b..dd3a9a6ace11 100644 | |
4216 | --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4217 | +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
4218 | @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4219 | if (!mutex_is_locked(mutex)) | |
4220 | return false; | |
4221 | ||
4222 | -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) | |
4223 | +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4224 | return mutex->owner == task; | |
4225 | #else | |
4226 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4227 | diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c | |
4228 | index 1c2aec392412..1d85c0c791f1 100644 | |
4229 | --- a/drivers/gpu/drm/i915/i915_irq.c | |
4230 | +++ b/drivers/gpu/drm/i915/i915_irq.c | |
4231 | @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4232 | spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); | |
4233 | ||
4234 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4235 | + preempt_disable_rt(); | |
4236 | ||
4237 | /* Get optional system timestamp before query. */ | |
4238 | if (stime) | |
4239 | @@ -863,6 +864,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4240 | *etime = ktime_get(); | |
4241 | ||
4242 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4243 | + preempt_enable_rt(); | |
4244 | ||
4245 | spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); | |
4246 | ||
4247 | diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c | |
4248 | index e9a64fba6333..2aac27b13d86 100644 | |
4249 | --- a/drivers/gpu/drm/i915/intel_display.c | |
4250 | +++ b/drivers/gpu/drm/i915/intel_display.c | |
4251 | @@ -11647,7 +11647,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) | |
4252 | struct intel_crtc *intel_crtc = to_intel_crtc(crtc); | |
4253 | struct intel_flip_work *work; | |
4254 | ||
4255 | - WARN_ON(!in_interrupt()); | |
4256 | + WARN_ON_NONRT(!in_interrupt()); | |
4257 | ||
4258 | if (crtc == NULL) | |
4259 | return; | |
4260 | diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c | |
4261 | index 4178849631ad..0eb939c92544 100644 | |
4262 | --- a/drivers/gpu/drm/i915/intel_sprite.c | |
4263 | +++ b/drivers/gpu/drm/i915/intel_sprite.c | |
4264 | @@ -38,6 +38,7 @@ | |
4265 | #include "intel_drv.h" | |
4266 | #include <drm/i915_drm.h> | |
4267 | #include "i915_drv.h" | |
4268 | +#include <linux/locallock.h> | |
4269 | ||
4270 | static bool | |
4271 | format_is_yuv(uint32_t format) | |
4272 | @@ -64,6 +65,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, | |
4273 | 1000 * adjusted_mode->crtc_htotal); | |
4274 | } | |
4275 | ||
4276 | +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); | |
4277 | + | |
4278 | /** | |
4279 | * intel_pipe_update_start() - start update of a set of display registers | |
4280 | * @crtc: the crtc of which the registers are going to be updated | |
4281 | @@ -94,7 +97,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) | |
4282 | min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); | |
4283 | max = vblank_start - 1; | |
4284 | ||
4285 | - local_irq_disable(); | |
4286 | + local_lock_irq(pipe_update_lock); | |
4287 | ||
4288 | if (min <= 0 || max <= 0) | |
4289 | return; | |
4290 | @@ -124,11 +127,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) | |
4291 | break; | |
4292 | } | |
4293 | ||
4294 | - local_irq_enable(); | |
4295 | + local_unlock_irq(pipe_update_lock); | |
4296 | ||
4297 | timeout = schedule_timeout(timeout); | |
4298 | ||
4299 | - local_irq_disable(); | |
4300 | + local_lock_irq(pipe_update_lock); | |
4301 | } | |
4302 | ||
4303 | finish_wait(wq, &wait); | |
4304 | @@ -180,7 +183,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work | |
4305 | crtc->base.state->event = NULL; | |
4306 | } | |
4307 | ||
4308 | - local_irq_enable(); | |
4309 | + local_unlock_irq(pipe_update_lock); | |
4310 | ||
4311 | if (crtc->debug.start_vbl_count && | |
4312 | crtc->debug.start_vbl_count != end_vbl_count) { | |
4313 | diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4314 | index 283d2841ba58..d01f6ed1977e 100644 | |
4315 | --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4316 | +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c | |
4317 | @@ -23,7 +23,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
4318 | if (!mutex_is_locked(mutex)) | |
4319 | return false; | |
4320 | ||
4321 | -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) | |
4322 | +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
4323 | return mutex->owner == task; | |
4324 | #else | |
4325 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
4326 | diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c | |
4327 | index c3206fb8f4cf..6e2423186e2a 100644 | |
4328 | --- a/drivers/gpu/drm/radeon/radeon_display.c | |
4329 | +++ b/drivers/gpu/drm/radeon/radeon_display.c | |
4330 | @@ -1869,6 +1869,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4331 | struct radeon_device *rdev = dev->dev_private; | |
4332 | ||
4333 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
4334 | + preempt_disable_rt(); | |
4335 | ||
4336 | /* Get optional system timestamp before query. */ | |
4337 | if (stime) | |
4338 | @@ -1961,6 +1962,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
4339 | *etime = ktime_get(); | |
4340 | ||
4341 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
4342 | + preempt_enable_rt(); | |
4343 | ||
4344 | /* Decode into vertical and horizontal scanout position. */ | |
4345 | *vpos = position & 0x1fff; | |
4346 | diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c | |
4347 | index e82f7e1c217c..b57d917b6ab7 100644 | |
4348 | --- a/drivers/hv/vmbus_drv.c | |
4349 | +++ b/drivers/hv/vmbus_drv.c | |
4350 | @@ -761,6 +761,8 @@ static void vmbus_isr(void) | |
4351 | void *page_addr; | |
4352 | struct hv_message *msg; | |
4353 | union hv_synic_event_flags *event; | |
4354 | + struct pt_regs *regs = get_irq_regs(); | |
4355 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
4356 | bool handled = false; | |
4357 | ||
4358 | page_addr = hv_context.synic_event_page[cpu]; | |
4359 | @@ -808,7 +810,7 @@ static void vmbus_isr(void) | |
4360 | tasklet_schedule(hv_context.msg_dpc[cpu]); | |
4361 | } | |
4362 | ||
4363 | - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); | |
4364 | + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); | |
4365 | } | |
4366 | ||
4367 | ||
4368 | diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c | |
4369 | index 36f76e28a0bf..394f142f90c7 100644 | |
4370 | --- a/drivers/ide/alim15x3.c | |
4371 | +++ b/drivers/ide/alim15x3.c | |
4372 | @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4373 | ||
4374 | isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); | |
4375 | ||
4376 | - local_irq_save(flags); | |
4377 | + local_irq_save_nort(flags); | |
4378 | ||
4379 | if (m5229_revision < 0xC2) { | |
4380 | /* | |
4381 | @@ -325,7 +325,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
4382 | } | |
4383 | pci_dev_put(north); | |
4384 | pci_dev_put(isa_dev); | |
4385 | - local_irq_restore(flags); | |
4386 | + local_irq_restore_nort(flags); | |
4387 | return 0; | |
4388 | } | |
4389 | ||
4390 | diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c | |
4391 | index 0ceae5cbd89a..c212e85d7f3e 100644 | |
4392 | --- a/drivers/ide/hpt366.c | |
4393 | +++ b/drivers/ide/hpt366.c | |
4394 | @@ -1236,7 +1236,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4395 | ||
4396 | dma_old = inb(base + 2); | |
4397 | ||
4398 | - local_irq_save(flags); | |
4399 | + local_irq_save_nort(flags); | |
4400 | ||
4401 | dma_new = dma_old; | |
4402 | pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); | |
4403 | @@ -1247,7 +1247,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
4404 | if (dma_new != dma_old) | |
4405 | outb(dma_new, base + 2); | |
4406 | ||
4407 | - local_irq_restore(flags); | |
4408 | + local_irq_restore_nort(flags); | |
4409 | ||
4410 | printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", | |
4411 | hwif->name, base, base + 7); | |
4412 | diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c | |
4413 | index 19763977568c..4169433faab5 100644 | |
4414 | --- a/drivers/ide/ide-io-std.c | |
4415 | +++ b/drivers/ide/ide-io-std.c | |
4416 | @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4417 | unsigned long uninitialized_var(flags); | |
4418 | ||
4419 | if ((io_32bit & 2) && !mmio) { | |
4420 | - local_irq_save(flags); | |
4421 | + local_irq_save_nort(flags); | |
4422 | ata_vlb_sync(io_ports->nsect_addr); | |
4423 | } | |
4424 | ||
4425 | @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4426 | insl(data_addr, buf, words); | |
4427 | ||
4428 | if ((io_32bit & 2) && !mmio) | |
4429 | - local_irq_restore(flags); | |
4430 | + local_irq_restore_nort(flags); | |
4431 | ||
4432 | if (((len + 1) & 3) < 2) | |
4433 | return; | |
4434 | @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4435 | unsigned long uninitialized_var(flags); | |
4436 | ||
4437 | if ((io_32bit & 2) && !mmio) { | |
4438 | - local_irq_save(flags); | |
4439 | + local_irq_save_nort(flags); | |
4440 | ata_vlb_sync(io_ports->nsect_addr); | |
4441 | } | |
4442 | ||
4443 | @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
4444 | outsl(data_addr, buf, words); | |
4445 | ||
4446 | if ((io_32bit & 2) && !mmio) | |
4447 | - local_irq_restore(flags); | |
4448 | + local_irq_restore_nort(flags); | |
4449 | ||
4450 | if (((len + 1) & 3) < 2) | |
4451 | return; | |
4452 | diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c | |
4453 | index 669ea1e45795..e12e43e62245 100644 | |
4454 | --- a/drivers/ide/ide-io.c | |
4455 | +++ b/drivers/ide/ide-io.c | |
4456 | @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data) | |
4457 | /* disable_irq_nosync ?? */ | |
4458 | disable_irq(hwif->irq); | |
4459 | /* local CPU only, as if we were handling an interrupt */ | |
4460 | - local_irq_disable(); | |
4461 | + local_irq_disable_nort(); | |
4462 | if (hwif->polling) { | |
4463 | startstop = handler(drive); | |
4464 | } else if (drive_is_ready(drive)) { | |
4465 | diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c | |
4466 | index 376f2dc410c5..f014dd1b73dc 100644 | |
4467 | --- a/drivers/ide/ide-iops.c | |
4468 | +++ b/drivers/ide/ide-iops.c | |
4469 | @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, | |
4470 | if ((stat & ATA_BUSY) == 0) | |
4471 | break; | |
4472 | ||
4473 | - local_irq_restore(flags); | |
4474 | + local_irq_restore_nort(flags); | |
4475 | *rstat = stat; | |
4476 | return -EBUSY; | |
4477 | } | |
4478 | } | |
4479 | - local_irq_restore(flags); | |
4480 | + local_irq_restore_nort(flags); | |
4481 | } | |
4482 | /* | |
4483 | * Allow status to settle, then read it again. | |
4484 | diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c | |
4485 | index 0b63facd1d87..4ceba37afc0c 100644 | |
4486 | --- a/drivers/ide/ide-probe.c | |
4487 | +++ b/drivers/ide/ide-probe.c | |
4488 | @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) | |
4489 | int bswap = 1; | |
4490 | ||
4491 | /* local CPU only; some systems need this */ | |
4492 | - local_irq_save(flags); | |
4493 | + local_irq_save_nort(flags); | |
4494 | /* read 512 bytes of id info */ | |
4495 | hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); | |
4496 | - local_irq_restore(flags); | |
4497 | + local_irq_restore_nort(flags); | |
4498 | ||
4499 | drive->dev_flags |= IDE_DFLAG_ID_READ; | |
4500 | #ifdef DEBUG | |
4501 | diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c | |
4502 | index a716693417a3..be0568c722d6 100644 | |
4503 | --- a/drivers/ide/ide-taskfile.c | |
4504 | +++ b/drivers/ide/ide-taskfile.c | |
4505 | @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4506 | ||
4507 | page_is_high = PageHighMem(page); | |
4508 | if (page_is_high) | |
4509 | - local_irq_save(flags); | |
4510 | + local_irq_save_nort(flags); | |
4511 | ||
4512 | buf = kmap_atomic(page) + offset; | |
4513 | ||
4514 | @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
4515 | kunmap_atomic(buf); | |
4516 | ||
4517 | if (page_is_high) | |
4518 | - local_irq_restore(flags); | |
4519 | + local_irq_restore_nort(flags); | |
4520 | ||
4521 | len -= nr_bytes; | |
4522 | } | |
4523 | @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, | |
4524 | } | |
4525 | ||
4526 | if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) | |
4527 | - local_irq_disable(); | |
4528 | + local_irq_disable_nort(); | |
4529 | ||
4530 | ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); | |
4531 | ||
4532 | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
4533 | index d3394b6add24..506bfba6ec9f 100644 | |
4534 | --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
4535 | +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
4536 | @@ -897,7 +897,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) | |
4537 | ||
4538 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
4539 | ||
4540 | - local_irq_save(flags); | |
4541 | + local_irq_save_nort(flags); | |
4542 | netif_addr_lock(dev); | |
4543 | spin_lock(&priv->lock); | |
4544 | ||
4545 | @@ -979,7 +979,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) | |
4546 | ||
4547 | spin_unlock(&priv->lock); | |
4548 | netif_addr_unlock(dev); | |
4549 | - local_irq_restore(flags); | |
4550 | + local_irq_restore_nort(flags); | |
4551 | ||
4552 | /* | |
4553 | * make sure the in-flight joins have finished before we attempt | |
4554 | diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c | |
4555 | index 4a2a9e370be7..e970d9afd179 100644 | |
4556 | --- a/drivers/input/gameport/gameport.c | |
4557 | +++ b/drivers/input/gameport/gameport.c | |
4558 | @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport) | |
4559 | tx = ~0; | |
4560 | ||
4561 | for (i = 0; i < 50; i++) { | |
4562 | - local_irq_save(flags); | |
4563 | + local_irq_save_nort(flags); | |
4564 | t1 = ktime_get_ns(); | |
4565 | for (t = 0; t < 50; t++) | |
4566 | gameport_read(gameport); | |
4567 | t2 = ktime_get_ns(); | |
4568 | t3 = ktime_get_ns(); | |
4569 | - local_irq_restore(flags); | |
4570 | + local_irq_restore_nort(flags); | |
4571 | udelay(i * 10); | |
4572 | t = (t2 - t1) - (t3 - t2); | |
4573 | if (t < tx) | |
4574 | @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4575 | tx = 1 << 30; | |
4576 | ||
4577 | for(i = 0; i < 50; i++) { | |
4578 | - local_irq_save(flags); | |
4579 | + local_irq_save_nort(flags); | |
4580 | GET_TIME(t1); | |
4581 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4582 | GET_TIME(t2); | |
4583 | GET_TIME(t3); | |
4584 | - local_irq_restore(flags); | |
4585 | + local_irq_restore_nort(flags); | |
4586 | udelay(i * 10); | |
4587 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; | |
4588 | } | |
4589 | @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
4590 | tx = 1 << 30; | |
4591 | ||
4592 | for(i = 0; i < 50; i++) { | |
4593 | - local_irq_save(flags); | |
4594 | + local_irq_save_nort(flags); | |
4595 | t1 = rdtsc(); | |
4596 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
4597 | t2 = rdtsc(); | |
4598 | - local_irq_restore(flags); | |
4599 | + local_irq_restore_nort(flags); | |
4600 | udelay(i * 10); | |
4601 | if (t2 - t1 < tx) tx = t2 - t1; | |
4602 | } | |
4603 | diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c | |
4604 | index 96de97a46079..a6ec875d941b 100644 | |
4605 | --- a/drivers/iommu/amd_iommu.c | |
4606 | +++ b/drivers/iommu/amd_iommu.c | |
4607 | @@ -1832,10 +1832,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, | |
4608 | int ret; | |
4609 | ||
4610 | /* | |
4611 | - * Must be called with IRQs disabled. Warn here to detect early | |
4612 | - * when its not. | |
4613 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4614 | + * detect early when its not. | |
4615 | */ | |
4616 | - WARN_ON(!irqs_disabled()); | |
4617 | + WARN_ON_NONRT(!irqs_disabled()); | |
4618 | ||
4619 | /* lock domain */ | |
4620 | spin_lock(&domain->lock); | |
4621 | @@ -2003,10 +2003,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) | |
4622 | struct protection_domain *domain; | |
4623 | ||
4624 | /* | |
4625 | - * Must be called with IRQs disabled. Warn here to detect early | |
4626 | - * when its not. | |
4627 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
4628 | + * detect early when its not. | |
4629 | */ | |
4630 | - WARN_ON(!irqs_disabled()); | |
4631 | + WARN_ON_NONRT(!irqs_disabled()); | |
4632 | ||
4633 | if (WARN_ON(!dev_data->domain)) | |
4634 | return; | |
4635 | diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c | |
4636 | index ebb5bf3ddbd9..598f5df45f6b 100644 | |
4637 | --- a/drivers/iommu/intel-iommu.c | |
4638 | +++ b/drivers/iommu/intel-iommu.c | |
4639 | @@ -479,7 +479,7 @@ struct deferred_flush_data { | |
4640 | struct deferred_flush_table *tables; | |
4641 | }; | |
4642 | ||
4643 | -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4644 | +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); | |
4645 | ||
4646 | /* bitmap for indexing intel_iommus */ | |
4647 | static int g_num_of_iommus; | |
4648 | @@ -3626,10 +3626,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, | |
4649 | struct intel_iommu *iommu; | |
4650 | struct deferred_flush_entry *entry; | |
4651 | struct deferred_flush_data *flush_data; | |
4652 | - unsigned int cpuid; | |
4653 | ||
4654 | - cpuid = get_cpu(); | |
4655 | - flush_data = per_cpu_ptr(&deferred_flush, cpuid); | |
4656 | + flush_data = raw_cpu_ptr(&deferred_flush); | |
4657 | ||
4658 | /* Flush all CPUs' entries to avoid deferring too much. If | |
4659 | * this becomes a bottleneck, can just flush us, and rely on | |
4660 | @@ -3662,8 +3660,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, | |
4661 | } | |
4662 | flush_data->size++; | |
4663 | spin_unlock_irqrestore(&flush_data->lock, flags); | |
4664 | - | |
4665 | - put_cpu(); | |
4666 | } | |
4667 | ||
4668 | static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) | |
4669 | diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c | |
4670 | index e23001bfcfee..359d5d169ec0 100644 | |
4671 | --- a/drivers/iommu/iova.c | |
4672 | +++ b/drivers/iommu/iova.c | |
4673 | @@ -22,6 +22,7 @@ | |
4674 | #include <linux/slab.h> | |
4675 | #include <linux/smp.h> | |
4676 | #include <linux/bitops.h> | |
4677 | +#include <linux/cpu.h> | |
4678 | ||
4679 | static bool iova_rcache_insert(struct iova_domain *iovad, | |
4680 | unsigned long pfn, | |
4681 | @@ -420,10 +421,8 @@ alloc_iova_fast(struct iova_domain *iovad, unsigned long size, | |
4682 | ||
4683 | /* Try replenishing IOVAs by flushing rcache. */ | |
4684 | flushed_rcache = true; | |
4685 | - preempt_disable(); | |
4686 | for_each_online_cpu(cpu) | |
4687 | free_cpu_cached_iovas(cpu, iovad); | |
4688 | - preempt_enable(); | |
4689 | goto retry; | |
4690 | } | |
4691 | ||
4692 | @@ -751,7 +750,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4693 | bool can_insert = false; | |
4694 | unsigned long flags; | |
4695 | ||
4696 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4697 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4698 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4699 | ||
4700 | if (!iova_magazine_full(cpu_rcache->loaded)) { | |
4701 | @@ -781,7 +780,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, | |
4702 | iova_magazine_push(cpu_rcache->loaded, iova_pfn); | |
4703 | ||
4704 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4705 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4706 | ||
4707 | if (mag_to_free) { | |
4708 | iova_magazine_free_pfns(mag_to_free, iovad); | |
4709 | @@ -815,7 +813,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4710 | bool has_pfn = false; | |
4711 | unsigned long flags; | |
4712 | ||
4713 | - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); | |
4714 | + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); | |
4715 | spin_lock_irqsave(&cpu_rcache->lock, flags); | |
4716 | ||
4717 | if (!iova_magazine_empty(cpu_rcache->loaded)) { | |
4718 | @@ -837,7 +835,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, | |
4719 | iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); | |
4720 | ||
4721 | spin_unlock_irqrestore(&cpu_rcache->lock, flags); | |
4722 | - put_cpu_ptr(rcache->cpu_rcaches); | |
4723 | ||
4724 | return iova_pfn; | |
4725 | } | |
4726 | diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig | |
4727 | index 3f9ddb9fafa7..09da5b6b44a1 100644 | |
4728 | --- a/drivers/leds/trigger/Kconfig | |
4729 | +++ b/drivers/leds/trigger/Kconfig | |
4730 | @@ -69,7 +69,7 @@ config LEDS_TRIGGER_BACKLIGHT | |
4731 | ||
4732 | config LEDS_TRIGGER_CPU | |
4733 | bool "LED CPU Trigger" | |
4734 | - depends on LEDS_TRIGGERS | |
4735 | + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE | |
4736 | help | |
4737 | This allows LEDs to be controlled by active CPUs. This shows | |
4738 | the active CPUs across an array of LEDs so you can see which | |
4739 | diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig | |
4740 | index 4d200883c505..98b64ed5cb81 100644 | |
4741 | --- a/drivers/md/bcache/Kconfig | |
4742 | +++ b/drivers/md/bcache/Kconfig | |
4743 | @@ -1,6 +1,7 @@ | |
4744 | ||
4745 | config BCACHE | |
4746 | tristate "Block device as cache" | |
4747 | + depends on !PREEMPT_RT_FULL | |
4748 | ---help--- | |
4749 | Allows a block device to be used as cache for other devices; uses | |
4750 | a btree for indexing and the layout is optimized for SSDs. | |
4751 | diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c | |
4752 | index 5da86c8b6545..2aa092f2977e 100644 | |
4753 | --- a/drivers/md/dm-rq.c | |
4754 | +++ b/drivers/md/dm-rq.c | |
4755 | @@ -811,7 +811,7 @@ static void dm_old_request_fn(struct request_queue *q) | |
4756 | /* Establish tio->ti before queuing work (map_tio_request) */ | |
4757 | tio->ti = ti; | |
4758 | queue_kthread_work(&md->kworker, &tio->work); | |
4759 | - BUG_ON(!irqs_disabled()); | |
4760 | + BUG_ON_NONRT(!irqs_disabled()); | |
4761 | } | |
4762 | } | |
4763 | ||
4764 | diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c | |
4765 | index ee7fc3701700..ae59c9e13911 100644 | |
4766 | --- a/drivers/md/raid5.c | |
4767 | +++ b/drivers/md/raid5.c | |
4768 | @@ -1928,8 +1928,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4769 | struct raid5_percpu *percpu; | |
4770 | unsigned long cpu; | |
4771 | ||
4772 | - cpu = get_cpu(); | |
4773 | + cpu = get_cpu_light(); | |
4774 | percpu = per_cpu_ptr(conf->percpu, cpu); | |
4775 | + spin_lock(&percpu->lock); | |
4776 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | |
4777 | ops_run_biofill(sh); | |
4778 | overlap_clear++; | |
4779 | @@ -1985,7 +1986,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
4780 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | |
4781 | wake_up(&sh->raid_conf->wait_for_overlap); | |
4782 | } | |
4783 | - put_cpu(); | |
4784 | + spin_unlock(&percpu->lock); | |
4785 | + put_cpu_light(); | |
4786 | } | |
4787 | ||
4788 | static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, | |
4789 | @@ -6438,6 +6440,7 @@ static int raid5_alloc_percpu(struct r5conf *conf) | |
4790 | __func__, cpu); | |
4791 | break; | |
4792 | } | |
4793 | + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); | |
4794 | } | |
4795 | put_online_cpus(); | |
4796 | ||
4797 | diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h | |
4798 | index 517d4b68a1be..efe91887ecd7 100644 | |
4799 | --- a/drivers/md/raid5.h | |
4800 | +++ b/drivers/md/raid5.h | |
4801 | @@ -504,6 +504,7 @@ struct r5conf { | |
4802 | int recovery_disabled; | |
4803 | /* per cpu variables */ | |
4804 | struct raid5_percpu { | |
4805 | + spinlock_t lock; /* Protection for -RT */ | |
4806 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | |
4807 | struct flex_array *scribble; /* space for constructing buffer | |
4808 | * lists and performing address | |
4809 | diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig | |
4810 | index d00252828966..9faab404faac 100644 | |
4811 | --- a/drivers/misc/Kconfig | |
4812 | +++ b/drivers/misc/Kconfig | |
4813 | @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI | |
4814 | config ATMEL_TCLIB | |
4815 | bool "Atmel AT32/AT91 Timer/Counter Library" | |
4816 | depends on (AVR32 || ARCH_AT91) | |
4817 | + default y if PREEMPT_RT_FULL | |
4818 | help | |
4819 | Select this if you want a library to allocate the Timer/Counter | |
4820 | blocks found on many Atmel processors. This facilitates using | |
4821 | @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC | |
4822 | are combined to make a single 32-bit timer. | |
4823 | ||
4824 | When GENERIC_CLOCKEVENTS is defined, the third timer channel | |
4825 | - may be used as a clock event device supporting oneshot mode | |
4826 | - (delays of up to two seconds) based on the 32 KiHz clock. | |
4827 | + may be used as a clock event device supporting oneshot mode. | |
4828 | ||
4829 | config ATMEL_TCB_CLKSRC_BLOCK | |
4830 | int | |
4831 | @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK | |
4832 | TC can be used for other purposes, such as PWM generation and | |
4833 | interval timing. | |
4834 | ||
4835 | +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
4836 | + bool "TC Block use 32 KiHz clock" | |
4837 | + depends on ATMEL_TCB_CLKSRC | |
4838 | + default y if !PREEMPT_RT_FULL | |
4839 | + help | |
4840 | + Select this to use 32 KiHz base clock rate as TC block clock | |
4841 | + source for clock events. | |
4842 | + | |
4843 | + | |
4844 | config DUMMY_IRQ | |
4845 | tristate "Dummy IRQ handler" | |
4846 | default n | |
4847 | @@ -114,6 +123,35 @@ config IBM_ASM | |
4848 | for information on the specific driver level and support statement | |
4849 | for your IBM server. | |
4850 | ||
4851 | +config HWLAT_DETECTOR | |
4852 | + tristate "Testing module to detect hardware-induced latencies" | |
4853 | + depends on DEBUG_FS | |
4854 | + depends on RING_BUFFER | |
4855 | + default m | |
4856 | + ---help--- | |
4857 | + A simple hardware latency detector. Use this module to detect | |
4858 | + large latencies introduced by the behavior of the underlying | |
4859 | + system firmware external to Linux. We do this using periodic | |
4860 | + use of stop_machine to grab all available CPUs and measure | |
4861 | + for unexplainable gaps in the CPU timestamp counter(s). By | |
4862 | + default, the module is not enabled until the "enable" file | |
4863 | + within the "hwlat_detector" debugfs directory is toggled. | |
4864 | + | |
4865 | + This module is often used to detect SMI (System Management | |
4866 | + Interrupts) on x86 systems, though is not x86 specific. To | |
4867 | + this end, we default to using a sample window of 1 second, | |
4868 | + during which we will sample for 0.5 seconds. If an SMI or | |
4869 | + similar event occurs during that time, it is recorded | |
4870 | + into an 8K samples global ring buffer until retreived. | |
4871 | + | |
4872 | + WARNING: This software should never be enabled (it can be built | |
4873 | + but should not be turned on after it is loaded) in a production | |
4874 | + environment where high latencies are a concern since the | |
4875 | + sampling mechanism actually introduces latencies for | |
4876 | + regular tasks while the CPU(s) are being held. | |
4877 | + | |
4878 | + If unsure, say N | |
4879 | + | |
4880 | config PHANTOM | |
4881 | tristate "Sensable PHANToM (PCI)" | |
4882 | depends on PCI | |
4883 | diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile | |
4884 | index fb32516ddfe2..8643df9af3c4 100644 | |
4885 | --- a/drivers/misc/Makefile | |
4886 | +++ b/drivers/misc/Makefile | |
4887 | @@ -38,6 +38,7 @@ obj-$(CONFIG_C2PORT) += c2port/ | |
4888 | obj-$(CONFIG_HMC6352) += hmc6352.o | |
4889 | obj-y += eeprom/ | |
4890 | obj-y += cb710/ | |
4891 | +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o | |
4892 | obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o | |
4893 | obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o | |
4894 | obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o | |
4895 | diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c | |
4896 | new file mode 100644 | |
4897 | index 000000000000..52f5ad5fd9c0 | |
4898 | --- /dev/null | |
4899 | +++ b/drivers/misc/hwlat_detector.c | |
4900 | @@ -0,0 +1,1240 @@ | |
4901 | +/* | |
4902 | + * hwlat_detector.c - A simple Hardware Latency detector. | |
4903 | + * | |
4904 | + * Use this module to detect large system latencies induced by the behavior of | |
4905 | + * certain underlying system hardware or firmware, independent of Linux itself. | |
4906 | + * The code was developed originally to detect the presence of SMIs on Intel | |
4907 | + * and AMD systems, although there is no dependency upon x86 herein. | |
4908 | + * | |
4909 | + * The classical example usage of this module is in detecting the presence of | |
4910 | + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a | |
4911 | + * somewhat special form of hardware interrupt spawned from earlier CPU debug | |
4912 | + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge | |
4913 | + * LPC (or other device) to generate a special interrupt under certain | |
4914 | + * circumstances, for example, upon expiration of a special SMI timer device, | |
4915 | + * due to certain external thermal readings, on certain I/O address accesses, | |
4916 | + * and other situations. An SMI hits a special CPU pin, triggers a special | |
4917 | + * SMI mode (complete with special memory map), and the OS is unaware. | |
4918 | + * | |
4919 | + * Although certain hardware-inducing latencies are necessary (for example, | |
4920 | + * a modern system often requires an SMI handler for correct thermal control | |
4921 | + * and remote management) they can wreak havoc upon any OS-level performance | |
4922 | + * guarantees toward low-latency, especially when the OS is not even made | |
4923 | + * aware of the presence of these interrupts. For this reason, we need a | |
4924 | + * somewhat brute force mechanism to detect these interrupts. In this case, | |
4925 | + * we do it by hogging all of the CPU(s) for configurable timer intervals, | |
4926 | + * sampling the built-in CPU timer, looking for discontiguous readings. | |
4927 | + * | |
4928 | + * WARNING: This implementation necessarily introduces latencies. Therefore, | |
4929 | + * you should NEVER use this module in a production environment | |
4930 | + * requiring any kind of low-latency performance guarantee(s). | |
4931 | + * | |
4932 | + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> | |
4933 | + * | |
4934 | + * Includes useful feedback from Clark Williams <clark@redhat.com> | |
4935 | + * | |
4936 | + * This file is licensed under the terms of the GNU General Public | |
4937 | + * License version 2. This program is licensed "as is" without any | |
4938 | + * warranty of any kind, whether express or implied. | |
4939 | + */ | |
4940 | + | |
4941 | +#include <linux/module.h> | |
4942 | +#include <linux/init.h> | |
4943 | +#include <linux/ring_buffer.h> | |
4944 | +#include <linux/time.h> | |
4945 | +#include <linux/hrtimer.h> | |
4946 | +#include <linux/kthread.h> | |
4947 | +#include <linux/debugfs.h> | |
4948 | +#include <linux/seq_file.h> | |
4949 | +#include <linux/uaccess.h> | |
4950 | +#include <linux/version.h> | |
4951 | +#include <linux/delay.h> | |
4952 | +#include <linux/slab.h> | |
4953 | +#include <linux/trace_clock.h> | |
4954 | + | |
4955 | +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ | |
4956 | +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ | |
4957 | +#define U64STR_SIZE 22 /* 20 digits max */ | |
4958 | + | |
4959 | +#define VERSION "1.0.0" | |
4960 | +#define BANNER "hwlat_detector: " | |
4961 | +#define DRVNAME "hwlat_detector" | |
4962 | +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ | |
4963 | +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ | |
4964 | +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ | |
4965 | + | |
4966 | +/* Module metadata */ | |
4967 | + | |
4968 | +MODULE_LICENSE("GPL"); | |
4969 | +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); | |
4970 | +MODULE_DESCRIPTION("A simple hardware latency detector"); | |
4971 | +MODULE_VERSION(VERSION); | |
4972 | + | |
4973 | +/* Module parameters */ | |
4974 | + | |
4975 | +static int debug; | |
4976 | +static int enabled; | |
4977 | +static int threshold; | |
4978 | + | |
4979 | +module_param(debug, int, 0); /* enable debug */ | |
4980 | +module_param(enabled, int, 0); /* enable detector */ | |
4981 | +module_param(threshold, int, 0); /* latency threshold */ | |
4982 | + | |
4983 | +/* Buffering and sampling */ | |
4984 | + | |
4985 | +static struct ring_buffer *ring_buffer; /* sample buffer */ | |
4986 | +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ | |
4987 | +static unsigned long buf_size = BUF_SIZE_DEFAULT; | |
4988 | +static struct task_struct *kthread; /* sampling thread */ | |
4989 | + | |
4990 | +/* DebugFS filesystem entries */ | |
4991 | + | |
4992 | +static struct dentry *debug_dir; /* debugfs directory */ | |
4993 | +static struct dentry *debug_max; /* maximum TSC delta */ | |
4994 | +static struct dentry *debug_count; /* total detect count */ | |
4995 | +static struct dentry *debug_sample_width; /* sample width us */ | |
4996 | +static struct dentry *debug_sample_window; /* sample window us */ | |
4997 | +static struct dentry *debug_sample; /* raw samples us */ | |
4998 | +static struct dentry *debug_threshold; /* threshold us */ | |
4999 | +static struct dentry *debug_enable; /* enable/disable */ | |
5000 | + | |
5001 | +/* Individual samples and global state */ | |
5002 | + | |
5003 | +struct sample; /* latency sample */ | |
5004 | +struct data; /* Global state */ | |
5005 | + | |
5006 | +/* Sampling functions */ | |
5007 | +static int __buffer_add_sample(struct sample *sample); | |
5008 | +static struct sample *buffer_get_sample(struct sample *sample); | |
5009 | + | |
5010 | +/* Threading and state */ | |
5011 | +static int kthread_fn(void *unused); | |
5012 | +static int start_kthread(void); | |
5013 | +static int stop_kthread(void); | |
5014 | +static void __reset_stats(void); | |
5015 | +static int init_stats(void); | |
5016 | + | |
5017 | +/* Debugfs interface */ | |
5018 | +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, | |
5019 | + size_t cnt, loff_t *ppos, const u64 *entry); | |
5020 | +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, | |
5021 | + size_t cnt, loff_t *ppos, u64 *entry); | |
5022 | +static int debug_sample_fopen(struct inode *inode, struct file *filp); | |
5023 | +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, | |
5024 | + size_t cnt, loff_t *ppos); | |
5025 | +static int debug_sample_release(struct inode *inode, struct file *filp); | |
5026 | +static int debug_enable_fopen(struct inode *inode, struct file *filp); | |
5027 | +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, | |
5028 | + size_t cnt, loff_t *ppos); | |
5029 | +static ssize_t debug_enable_fwrite(struct file *file, | |
5030 | + const char __user *user_buffer, | |
5031 | + size_t user_size, loff_t *offset); | |
5032 | + | |
5033 | +/* Initialization functions */ | |
5034 | +static int init_debugfs(void); | |
5035 | +static void free_debugfs(void); | |
5036 | +static int detector_init(void); | |
5037 | +static void detector_exit(void); | |
5038 | + | |
5039 | +/* Individual latency samples are stored here when detected and packed into | |
5040 | + * the ring_buffer circular buffer, where they are overwritten when | |
5041 | + * more than buf_size/sizeof(sample) samples are received. */ | |
5042 | +struct sample { | |
5043 | + u64 seqnum; /* unique sequence */ | |
5044 | + u64 duration; /* ktime delta */ | |
5045 | + u64 outer_duration; /* ktime delta (outer loop) */ | |
5046 | + struct timespec timestamp; /* wall time */ | |
5047 | + unsigned long lost; | |
5048 | +}; | |
5049 | + | |
5050 | +/* keep the global state somewhere. */ | |
5051 | +static struct data { | |
5052 | + | |
5053 | + struct mutex lock; /* protect changes */ | |
5054 | + | |
5055 | + u64 count; /* total since reset */ | |
5056 | + u64 max_sample; /* max hardware latency */ | |
5057 | + u64 threshold; /* sample threshold level */ | |
5058 | + | |
5059 | + u64 sample_window; /* total sampling window (on+off) */ | |
5060 | + u64 sample_width; /* active sampling portion of window */ | |
5061 | + | |
5062 | + atomic_t sample_open; /* whether the sample file is open */ | |
5063 | + | |
5064 | + wait_queue_head_t wq; /* waitqeue for new sample values */ | |
5065 | + | |
5066 | +} data; | |
5067 | + | |
5068 | +/** | |
5069 | + * __buffer_add_sample - add a new latency sample recording to the ring buffer | |
5070 | + * @sample: The new latency sample value | |
5071 | + * | |
5072 | + * This receives a new latency sample and records it in a global ring buffer. | |
5073 | + * No additional locking is used in this case. | |
5074 | + */ | |
5075 | +static int __buffer_add_sample(struct sample *sample) | |
5076 | +{ | |
5077 | + return ring_buffer_write(ring_buffer, | |
5078 | + sizeof(struct sample), sample); | |
5079 | +} | |
5080 | + | |
5081 | +/** | |
5082 | + * buffer_get_sample - remove a hardware latency sample from the ring buffer | |
5083 | + * @sample: Pre-allocated storage for the sample | |
5084 | + * | |
5085 | + * This retrieves a hardware latency sample from the global circular buffer | |
5086 | + */ | |
5087 | +static struct sample *buffer_get_sample(struct sample *sample) | |
5088 | +{ | |
5089 | + struct ring_buffer_event *e = NULL; | |
5090 | + struct sample *s = NULL; | |
5091 | + unsigned int cpu = 0; | |
5092 | + | |
5093 | + if (!sample) | |
5094 | + return NULL; | |
5095 | + | |
5096 | + mutex_lock(&ring_buffer_mutex); | |
5097 | + for_each_online_cpu(cpu) { | |
5098 | + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); | |
5099 | + if (e) | |
5100 | + break; | |
5101 | + } | |
5102 | + | |
5103 | + if (e) { | |
5104 | + s = ring_buffer_event_data(e); | |
5105 | + memcpy(sample, s, sizeof(struct sample)); | |
5106 | + } else | |
5107 | + sample = NULL; | |
5108 | + mutex_unlock(&ring_buffer_mutex); | |
5109 | + | |
5110 | + return sample; | |
5111 | +} | |
5112 | + | |
5113 | +#ifndef CONFIG_TRACING | |
5114 | +#define time_type ktime_t | |
5115 | +#define time_get() ktime_get() | |
5116 | +#define time_to_us(x) ktime_to_us(x) | |
5117 | +#define time_sub(a, b) ktime_sub(a, b) | |
5118 | +#define init_time(a, b) (a).tv64 = b | |
5119 | +#define time_u64(a) ((a).tv64) | |
5120 | +#else | |
5121 | +#define time_type u64 | |
5122 | +#define time_get() trace_clock_local() | |
5123 | +#define time_to_us(x) div_u64(x, 1000) | |
5124 | +#define time_sub(a, b) ((a) - (b)) | |
5125 | +#define init_time(a, b) (a = b) | |
5126 | +#define time_u64(a) a | |
5127 | +#endif | |
5128 | +/** | |
5129 | + * get_sample - sample the CPU TSC and look for likely hardware latencies | |
5130 | + * | |
5131 | + * Used to repeatedly capture the CPU TSC (or similar), looking for potential | |
5132 | + * hardware-induced latency. Called with interrupts disabled and with | |
5133 | + * data.lock held. | |
5134 | + */ | |
5135 | +static int get_sample(void) | |
5136 | +{ | |
5137 | + time_type start, t1, t2, last_t2; | |
5138 | + s64 diff, total = 0; | |
5139 | + u64 sample = 0; | |
5140 | + u64 outer_sample = 0; | |
5141 | + int ret = -1; | |
5142 | + | |
5143 | + init_time(last_t2, 0); | |
5144 | + start = time_get(); /* start timestamp */ | |
5145 | + | |
5146 | + do { | |
5147 | + | |
5148 | + t1 = time_get(); /* we'll look for a discontinuity */ | |
5149 | + t2 = time_get(); | |
5150 | + | |
5151 | + if (time_u64(last_t2)) { | |
5152 | + /* Check the delta from outer loop (t2 to next t1) */ | |
5153 | + diff = time_to_us(time_sub(t1, last_t2)); | |
5154 | + /* This shouldn't happen */ | |
5155 | + if (diff < 0) { | |
5156 | + pr_err(BANNER "time running backwards\n"); | |
5157 | + goto out; | |
5158 | + } | |
5159 | + if (diff > outer_sample) | |
5160 | + outer_sample = diff; | |
5161 | + } | |
5162 | + last_t2 = t2; | |
5163 | + | |
5164 | + total = time_to_us(time_sub(t2, start)); /* sample width */ | |
5165 | + | |
5166 | + /* This checks the inner loop (t1 to t2) */ | |
5167 | + diff = time_to_us(time_sub(t2, t1)); /* current diff */ | |
5168 | + | |
5169 | + /* This shouldn't happen */ | |
5170 | + if (diff < 0) { | |
5171 | + pr_err(BANNER "time running backwards\n"); | |
5172 | + goto out; | |
5173 | + } | |
5174 | + | |
5175 | + if (diff > sample) | |
5176 | + sample = diff; /* only want highest value */ | |
5177 | + | |
5178 | + } while (total <= data.sample_width); | |
5179 | + | |
5180 | + ret = 0; | |
5181 | + | |
5182 | + /* If we exceed the threshold value, we have found a hardware latency */ | |
5183 | + if (sample > data.threshold || outer_sample > data.threshold) { | |
5184 | + struct sample s; | |
5185 | + | |
5186 | + ret = 1; | |
5187 | + | |
5188 | + data.count++; | |
5189 | + s.seqnum = data.count; | |
5190 | + s.duration = sample; | |
5191 | + s.outer_duration = outer_sample; | |
5192 | + s.timestamp = CURRENT_TIME; | |
5193 | + __buffer_add_sample(&s); | |
5194 | + | |
5195 | + /* Keep a running maximum ever recorded hardware latency */ | |
5196 | + if (sample > data.max_sample) | |
5197 | + data.max_sample = sample; | |
5198 | + } | |
5199 | + | |
5200 | +out: | |
5201 | + return ret; | |
5202 | +} | |
5203 | + | |
5204 | +/* | |
5205 | + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread | |
5206 | + * @unused: A required part of the kthread API. | |
5207 | + * | |
5208 | + * Used to periodically sample the CPU TSC via a call to get_sample. We | |
5209 | + * disable interrupts, which does (intentionally) introduce latency since we | |
5210 | + * need to ensure nothing else might be running (and thus pre-empting). | |
5211 | + * Obviously this should never be used in production environments. | |
5212 | + * | |
5213 | + * Currently this runs on which ever CPU it was scheduled on, but most | |
5214 | + * real-worald hardware latency situations occur across several CPUs, | |
5215 | + * but we might later generalize this if we find there are any actualy | |
5216 | + * systems with alternate SMI delivery or other hardware latencies. | |
5217 | + */ | |
5218 | +static int kthread_fn(void *unused) | |
5219 | +{ | |
5220 | + int ret; | |
5221 | + u64 interval; | |
5222 | + | |
5223 | + while (!kthread_should_stop()) { | |
5224 | + | |
5225 | + mutex_lock(&data.lock); | |
5226 | + | |
5227 | + local_irq_disable(); | |
5228 | + ret = get_sample(); | |
5229 | + local_irq_enable(); | |
5230 | + | |
5231 | + if (ret > 0) | |
5232 | + wake_up(&data.wq); /* wake up reader(s) */ | |
5233 | + | |
5234 | + interval = data.sample_window - data.sample_width; | |
5235 | + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ | |
5236 | + | |
5237 | + mutex_unlock(&data.lock); | |
5238 | + | |
5239 | + if (msleep_interruptible(interval)) | |
5240 | + break; | |
5241 | + } | |
5242 | + | |
5243 | + return 0; | |
5244 | +} | |
5245 | + | |
5246 | +/** | |
5247 | + * start_kthread - Kick off the hardware latency sampling/detector kthread | |
5248 | + * | |
5249 | + * This starts a kernel thread that will sit and sample the CPU timestamp | |
5250 | + * counter (TSC or similar) and look for potential hardware latencies. | |
5251 | + */ | |
5252 | +static int start_kthread(void) | |
5253 | +{ | |
5254 | + kthread = kthread_run(kthread_fn, NULL, | |
5255 | + DRVNAME); | |
5256 | + if (IS_ERR(kthread)) { | |
5257 | + pr_err(BANNER "could not start sampling thread\n"); | |
5258 | + enabled = 0; | |
5259 | + return -ENOMEM; | |
5260 | + } | |
5261 | + | |
5262 | + return 0; | |
5263 | +} | |
5264 | + | |
5265 | +/** | |
5266 | + * stop_kthread - Inform the hardware latency samping/detector kthread to stop | |
5267 | + * | |
5268 | + * This kicks the running hardware latency sampling/detector kernel thread and | |
5269 | + * tells it to stop sampling now. Use this on unload and at system shutdown. | |
5270 | + */ | |
5271 | +static int stop_kthread(void) | |
5272 | +{ | |
5273 | + int ret; | |
5274 | + | |
5275 | + ret = kthread_stop(kthread); | |
5276 | + | |
5277 | + return ret; | |
5278 | +} | |
5279 | + | |
5280 | +/** | |
5281 | + * __reset_stats - Reset statistics for the hardware latency detector | |
5282 | + * | |
5283 | + * We use data to store various statistics and global state. We call this | |
5284 | + * function in order to reset those when "enable" is toggled on or off, and | |
5285 | + * also at initialization. Should be called with data.lock held. | |
5286 | + */ | |
5287 | +static void __reset_stats(void) | |
5288 | +{ | |
5289 | + data.count = 0; | |
5290 | + data.max_sample = 0; | |
5291 | + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ | |
5292 | +} | |
5293 | + | |
5294 | +/** | |
5295 | + * init_stats - Setup global state statistics for the hardware latency detector | |
5296 | + * | |
5297 | + * We use data to store various statistics and global state. We also use | |
5298 | + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware | |
5299 | + * induced system latencies. This function initializes these structures and | |
5300 | + * allocates the global ring buffer also. | |
5301 | + */ | |
5302 | +static int init_stats(void) | |
5303 | +{ | |
5304 | + int ret = -ENOMEM; | |
5305 | + | |
5306 | + mutex_init(&data.lock); | |
5307 | + init_waitqueue_head(&data.wq); | |
5308 | + atomic_set(&data.sample_open, 0); | |
5309 | + | |
5310 | + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); | |
5311 | + | |
5312 | + if (WARN(!ring_buffer, KERN_ERR BANNER | |
5313 | + "failed to allocate ring buffer!\n")) | |
5314 | + goto out; | |
5315 | + | |
5316 | + __reset_stats(); | |
5317 | + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */ | |
5318 | + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ | |
5319 | + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ | |
5320 | + | |
5321 | + ret = 0; | |
5322 | + | |
5323 | +out: | |
5324 | + return ret; | |
5325 | + | |
5326 | +} | |
5327 | + | |
5328 | +/* | |
5329 | + * simple_data_read - Wrapper read function for global state debugfs entries | |
5330 | + * @filp: The active open file structure for the debugfs "file" | |
5331 | + * @ubuf: The userspace provided buffer to read value into | |
5332 | + * @cnt: The maximum number of bytes to read | |
5333 | + * @ppos: The current "file" position | |
5334 | + * @entry: The entry to read from | |
5335 | + * | |
5336 | + * This function provides a generic read implementation for the global state | |
5337 | + * "data" structure debugfs filesystem entries. It would be nice to use | |
5338 | + * simple_attr_read directly, but we need to make sure that the data.lock | |
5339 | + * is held during the actual read. | |
5340 | + */ | |
5341 | +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, | |
5342 | + size_t cnt, loff_t *ppos, const u64 *entry) | |
5343 | +{ | |
5344 | + char buf[U64STR_SIZE]; | |
5345 | + u64 val = 0; | |
5346 | + int len = 0; | |
5347 | + | |
5348 | + memset(buf, 0, sizeof(buf)); | |
5349 | + | |
5350 | + if (!entry) | |
5351 | + return -EFAULT; | |
5352 | + | |
5353 | + mutex_lock(&data.lock); | |
5354 | + val = *entry; | |
5355 | + mutex_unlock(&data.lock); | |
5356 | + | |
5357 | + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); | |
5358 | + | |
5359 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); | |
5360 | + | |
5361 | +} | |
5362 | + | |
5363 | +/* | |
5364 | + * simple_data_write - Wrapper write function for global state debugfs entries | |
5365 | + * @filp: The active open file structure for the debugfs "file" | |
5366 | + * @ubuf: The userspace provided buffer to write value from | |
5367 | + * @cnt: The maximum number of bytes to write | |
5368 | + * @ppos: The current "file" position | |
5369 | + * @entry: The entry to write to | |
5370 | + * | |
5371 | + * This function provides a generic write implementation for the global state | |
5372 | + * "data" structure debugfs filesystem entries. It would be nice to use | |
5373 | + * simple_attr_write directly, but we need to make sure that the data.lock | |
5374 | + * is held during the actual write. | |
5375 | + */ | |
5376 | +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, | |
5377 | + size_t cnt, loff_t *ppos, u64 *entry) | |
5378 | +{ | |
5379 | + char buf[U64STR_SIZE]; | |
5380 | + int csize = min(cnt, sizeof(buf)); | |
5381 | + u64 val = 0; | |
5382 | + int err = 0; | |
5383 | + | |
5384 | + memset(buf, '\0', sizeof(buf)); | |
5385 | + if (copy_from_user(buf, ubuf, csize)) | |
5386 | + return -EFAULT; | |
5387 | + | |
5388 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ | |
5389 | + err = kstrtoull(buf, 10, &val); | |
5390 | + if (err) | |
5391 | + return -EINVAL; | |
5392 | + | |
5393 | + mutex_lock(&data.lock); | |
5394 | + *entry = val; | |
5395 | + mutex_unlock(&data.lock); | |
5396 | + | |
5397 | + return csize; | |
5398 | +} | |
5399 | + | |
5400 | +/** | |
5401 | + * debug_count_fopen - Open function for "count" debugfs entry | |
5402 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
5403 | + * @filp: The active open file structure for the debugfs "file" | |
5404 | + * | |
5405 | + * This function provides an open implementation for the "count" debugfs | |
5406 | + * interface to the hardware latency detector. | |
5407 | + */ | |
5408 | +static int debug_count_fopen(struct inode *inode, struct file *filp) | |
5409 | +{ | |
5410 | + return 0; | |
5411 | +} | |
5412 | + | |
5413 | +/** | |
5414 | + * debug_count_fread - Read function for "count" debugfs entry | |
5415 | + * @filp: The active open file structure for the debugfs "file" | |
5416 | + * @ubuf: The userspace provided buffer to read value into | |
5417 | + * @cnt: The maximum number of bytes to read | |
5418 | + * @ppos: The current "file" position | |
5419 | + * | |
5420 | + * This function provides a read implementation for the "count" debugfs | |
5421 | + * interface to the hardware latency detector. Can be used to read the | |
5422 | + * number of latency readings exceeding the configured threshold since | |
5423 | + * the detector was last reset (e.g. by writing a zero into "count"). | |
5424 | + */ | |
5425 | +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, | |
5426 | + size_t cnt, loff_t *ppos) | |
5427 | +{ | |
5428 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); | |
5429 | +} | |
5430 | + | |
5431 | +/** | |
5432 | + * debug_count_fwrite - Write function for "count" debugfs entry | |
5433 | + * @filp: The active open file structure for the debugfs "file" | |
5434 | + * @ubuf: The user buffer that contains the value to write | |
5435 | + * @cnt: The maximum number of bytes to write to "file" | |
5436 | + * @ppos: The current position in the debugfs "file" | |
5437 | + * | |
5438 | + * This function provides a write implementation for the "count" debugfs | |
5439 | + * interface to the hardware latency detector. Can be used to write a | |
5440 | + * desired value, especially to zero the total count. | |
5441 | + */ | |
5442 | +static ssize_t debug_count_fwrite(struct file *filp, | |
5443 | + const char __user *ubuf, | |
5444 | + size_t cnt, | |
5445 | + loff_t *ppos) | |
5446 | +{ | |
5447 | + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); | |
5448 | +} | |
5449 | + | |
5450 | +/** | |
5451 | + * debug_enable_fopen - Dummy open function for "enable" debugfs interface | |
5452 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
5453 | + * @filp: The active open file structure for the debugfs "file" | |
5454 | + * | |
5455 | + * This function provides an open implementation for the "enable" debugfs | |
5456 | + * interface to the hardware latency detector. | |
5457 | + */ | |
5458 | +static int debug_enable_fopen(struct inode *inode, struct file *filp) | |
5459 | +{ | |
5460 | + return 0; | |
5461 | +} | |
5462 | + | |
5463 | +/** | |
5464 | + * debug_enable_fread - Read function for "enable" debugfs interface | |
5465 | + * @filp: The active open file structure for the debugfs "file" | |
5466 | + * @ubuf: The userspace provided buffer to read value into | |
5467 | + * @cnt: The maximum number of bytes to read | |
5468 | + * @ppos: The current "file" position | |
5469 | + * | |
5470 | + * This function provides a read implementation for the "enable" debugfs | |
5471 | + * interface to the hardware latency detector. Can be used to determine | |
5472 | + * whether the detector is currently enabled ("0\n" or "1\n" returned). | |
5473 | + */ | |
5474 | +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, | |
5475 | + size_t cnt, loff_t *ppos) | |
5476 | +{ | |
5477 | + char buf[4]; | |
5478 | + | |
5479 | + if ((cnt < sizeof(buf)) || (*ppos)) | |
5480 | + return 0; | |
5481 | + | |
5482 | + buf[0] = enabled ? '1' : '0'; | |
5483 | + buf[1] = '\n'; | |
5484 | + buf[2] = '\0'; | |
5485 | + if (copy_to_user(ubuf, buf, strlen(buf))) | |
5486 | + return -EFAULT; | |
5487 | + return *ppos = strlen(buf); | |
5488 | +} | |
5489 | + | |
5490 | +/** | |
5491 | + * debug_enable_fwrite - Write function for "enable" debugfs interface | |
5492 | + * @filp: The active open file structure for the debugfs "file" | |
5493 | + * @ubuf: The user buffer that contains the value to write | |
5494 | + * @cnt: The maximum number of bytes to write to "file" | |
5495 | + * @ppos: The current position in the debugfs "file" | |
5496 | + * | |
5497 | + * This function provides a write implementation for the "enable" debugfs | |
5498 | + * interface to the hardware latency detector. Can be used to enable or | |
5499 | + * disable the detector, which will have the side-effect of possibly | |
5500 | + * also resetting the global stats and kicking off the measuring | |
5501 | + * kthread (on an enable) or the converse (upon a disable). | |
5502 | + */ | |
5503 | +static ssize_t debug_enable_fwrite(struct file *filp, | |
5504 | + const char __user *ubuf, | |
5505 | + size_t cnt, | |
5506 | + loff_t *ppos) | |
5507 | +{ | |
5508 | + char buf[4]; | |
5509 | + int csize = min(cnt, sizeof(buf)); | |
5510 | + long val = 0; | |
5511 | + int err = 0; | |
5512 | + | |
5513 | + memset(buf, '\0', sizeof(buf)); | |
5514 | + if (copy_from_user(buf, ubuf, csize)) | |
5515 | + return -EFAULT; | |
5516 | + | |
5517 | + buf[sizeof(buf)-1] = '\0'; /* just in case */ | |
5518 | + err = kstrtoul(buf, 10, &val); | |
5519 | + if (err) | |
5520 | + return -EINVAL; | |
5521 | + | |
5522 | + if (val) { | |
5523 | + if (enabled) | |
5524 | + goto unlock; | |
5525 | + enabled = 1; | |
5526 | + __reset_stats(); | |
5527 | + if (start_kthread()) | |
5528 | + return -EFAULT; | |
5529 | + } else { | |
5530 | + if (!enabled) | |
5531 | + goto unlock; | |
5532 | + enabled = 0; | |
5533 | + err = stop_kthread(); | |
5534 | + if (err) { | |
5535 | + pr_err(BANNER "cannot stop kthread\n"); | |
5536 | + return -EFAULT; | |
5537 | + } | |
5538 | + wake_up(&data.wq); /* reader(s) should return */ | |
5539 | + } | |
5540 | +unlock: | |
5541 | + return csize; | |
5542 | +} | |
5543 | + | |
5544 | +/** | |
5545 | + * debug_max_fopen - Open function for "max" debugfs entry | |
5546 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
5547 | + * @filp: The active open file structure for the debugfs "file" | |
5548 | + * | |
5549 | + * This function provides an open implementation for the "max" debugfs | |
5550 | + * interface to the hardware latency detector. | |
5551 | + */ | |
5552 | +static int debug_max_fopen(struct inode *inode, struct file *filp) | |
5553 | +{ | |
5554 | + return 0; | |
5555 | +} | |
5556 | + | |
5557 | +/** | |
5558 | + * debug_max_fread - Read function for "max" debugfs entry | |
5559 | + * @filp: The active open file structure for the debugfs "file" | |
5560 | + * @ubuf: The userspace provided buffer to read value into | |
5561 | + * @cnt: The maximum number of bytes to read | |
5562 | + * @ppos: The current "file" position | |
5563 | + * | |
5564 | + * This function provides a read implementation for the "max" debugfs | |
5565 | + * interface to the hardware latency detector. Can be used to determine | |
5566 | + * the maximum latency value observed since it was last reset. | |
5567 | + */ | |
5568 | +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, | |
5569 | + size_t cnt, loff_t *ppos) | |
5570 | +{ | |
5571 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); | |
5572 | +} | |
5573 | + | |
5574 | +/** | |
5575 | + * debug_max_fwrite - Write function for "max" debugfs entry | |
5576 | + * @filp: The active open file structure for the debugfs "file" | |
5577 | + * @ubuf: The user buffer that contains the value to write | |
5578 | + * @cnt: The maximum number of bytes to write to "file" | |
5579 | + * @ppos: The current position in the debugfs "file" | |
5580 | + * | |
5581 | + * This function provides a write implementation for the "max" debugfs | |
5582 | + * interface to the hardware latency detector. Can be used to reset the | |
5583 | + * maximum or set it to some other desired value - if, then, subsequent | |
5584 | + * measurements exceed this value, the maximum will be updated. | |
5585 | + */ | |
5586 | +static ssize_t debug_max_fwrite(struct file *filp, | |
5587 | + const char __user *ubuf, | |
5588 | + size_t cnt, | |
5589 | + loff_t *ppos) | |
5590 | +{ | |
5591 | + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); | |
5592 | +} | |
5593 | + | |
5594 | + | |
5595 | +/** | |
5596 | + * debug_sample_fopen - An open function for "sample" debugfs interface | |
5597 | + * @inode: The in-kernel inode representation of this debugfs "file" | |
5598 | + * @filp: The active open file structure for the debugfs "file" | |
5599 | + * | |
5600 | + * This function handles opening the "sample" file within the hardware | |
5601 | + * latency detector debugfs directory interface. This file is used to read | |
5602 | + * raw samples from the global ring_buffer and allows the user to see a | |
5603 | + * running latency history. Can be opened blocking or non-blocking, | |
5604 | + * affecting whether it behaves as a buffer read pipe, or does not. | |
5605 | + * Implements simple locking to prevent multiple simultaneous use. | |
5606 | + */ | |
5607 | +static int debug_sample_fopen(struct inode *inode, struct file *filp) | |
5608 | +{ | |
5609 | + if (!atomic_add_unless(&data.sample_open, 1, 1)) | |
5610 | + return -EBUSY; | |
5611 | + else | |
5612 | + return 0; | |
5613 | +} | |
5614 | + | |
5615 | +/** | |
5616 | + * debug_sample_fread - A read function for "sample" debugfs interface | |
5617 | + * @filp: The active open file structure for the debugfs "file" | |
5618 | + * @ubuf: The user buffer that will contain the samples read | |
5619 | + * @cnt: The maximum bytes to read from the debugfs "file" | |
5620 | + * @ppos: The current position in the debugfs "file" | |
5621 | + * | |
5622 | + * This function handles reading from the "sample" file within the hardware | |
5623 | + * latency detector debugfs directory interface. This file is used to read | |
5624 | + * raw samples from the global ring_buffer and allows the user to see a | |
5625 | + * running latency history. By default this will block pending a new | |
5626 | + * value written into the sample buffer, unless there are already a | |
5627 | + * number of value(s) waiting in the buffer, or the sample file was | |
5628 | + * previously opened in a non-blocking mode of operation. | |
5629 | + */ | |
5630 | +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, | |
5631 | + size_t cnt, loff_t *ppos) | |
5632 | +{ | |
5633 | + int len = 0; | |
5634 | + char buf[64]; | |
5635 | + struct sample *sample = NULL; | |
5636 | + | |
5637 | + if (!enabled) | |
5638 | + return 0; | |
5639 | + | |
5640 | + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); | |
5641 | + if (!sample) | |
5642 | + return -ENOMEM; | |
5643 | + | |
5644 | + while (!buffer_get_sample(sample)) { | |
5645 | + | |
5646 | + DEFINE_WAIT(wait); | |
5647 | + | |
5648 | + if (filp->f_flags & O_NONBLOCK) { | |
5649 | + len = -EAGAIN; | |
5650 | + goto out; | |
5651 | + } | |
5652 | + | |
5653 | + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); | |
5654 | + schedule(); | |
5655 | + finish_wait(&data.wq, &wait); | |
5656 | + | |
5657 | + if (signal_pending(current)) { | |
5658 | + len = -EINTR; | |
5659 | + goto out; | |
5660 | + } | |
5661 | + | |
5662 | + if (!enabled) { /* enable was toggled */ | |
5663 | + len = 0; | |
5664 | + goto out; | |
5665 | + } | |
5666 | + } | |
5667 | + | |
5668 | + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n", | |
5669 | + sample->timestamp.tv_sec, | |
5670 | + sample->timestamp.tv_nsec, | |
5671 | + sample->duration, | |
5672 | + sample->outer_duration); | |
5673 | + | |
5674 | + | |
5675 | + /* handling partial reads is more trouble than it's worth */ | |
5676 | + if (len > cnt) | |
5677 | + goto out; | |
5678 | + | |
5679 | + if (copy_to_user(ubuf, buf, len)) | |
5680 | + len = -EFAULT; | |
5681 | + | |
5682 | +out: | |
5683 | + kfree(sample); | |
5684 | + return len; | |
5685 | +} | |
5686 | + | |
5687 | +/** | |
5688 | + * debug_sample_release - Release function for "sample" debugfs interface | |
5689 | + * @inode: The in-kernel inode represenation of the debugfs "file" | |
5690 | + * @filp: The active open file structure for the debugfs "file" | |
5691 | + * | |
5692 | + * This function completes the close of the debugfs interface "sample" file. | |
5693 | + * Frees the sample_open "lock" so that other users may open the interface. | |
5694 | + */ | |
5695 | +static int debug_sample_release(struct inode *inode, struct file *filp) | |
5696 | +{ | |
5697 | + atomic_dec(&data.sample_open); | |
5698 | + | |
5699 | + return 0; | |
5700 | +} | |
5701 | + | |
5702 | +/** | |
5703 | + * debug_threshold_fopen - Open function for "threshold" debugfs entry | |
5704 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
5705 | + * @filp: The active open file structure for the debugfs "file" | |
5706 | + * | |
5707 | + * This function provides an open implementation for the "threshold" debugfs | |
5708 | + * interface to the hardware latency detector. | |
5709 | + */ | |
5710 | +static int debug_threshold_fopen(struct inode *inode, struct file *filp) | |
5711 | +{ | |
5712 | + return 0; | |
5713 | +} | |
5714 | + | |
5715 | +/** | |
5716 | + * debug_threshold_fread - Read function for "threshold" debugfs entry | |
5717 | + * @filp: The active open file structure for the debugfs "file" | |
5718 | + * @ubuf: The userspace provided buffer to read value into | |
5719 | + * @cnt: The maximum number of bytes to read | |
5720 | + * @ppos: The current "file" position | |
5721 | + * | |
5722 | + * This function provides a read implementation for the "threshold" debugfs | |
5723 | + * interface to the hardware latency detector. It can be used to determine | |
5724 | + * the current threshold level at which a latency will be recorded in the | |
5725 | + * global ring buffer, typically on the order of 10us. | |
5726 | + */ | |
5727 | +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, | |
5728 | + size_t cnt, loff_t *ppos) | |
5729 | +{ | |
5730 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); | |
5731 | +} | |
5732 | + | |
5733 | +/** | |
5734 | + * debug_threshold_fwrite - Write function for "threshold" debugfs entry | |
5735 | + * @filp: The active open file structure for the debugfs "file" | |
5736 | + * @ubuf: The user buffer that contains the value to write | |
5737 | + * @cnt: The maximum number of bytes to write to "file" | |
5738 | + * @ppos: The current position in the debugfs "file" | |
5739 | + * | |
5740 | + * This function provides a write implementation for the "threshold" debugfs | |
5741 | + * interface to the hardware latency detector. It can be used to configure | |
5742 | + * the threshold level at which any subsequently detected latencies will | |
5743 | + * be recorded into the global ring buffer. | |
5744 | + */ | |
5745 | +static ssize_t debug_threshold_fwrite(struct file *filp, | |
5746 | + const char __user *ubuf, | |
5747 | + size_t cnt, | |
5748 | + loff_t *ppos) | |
5749 | +{ | |
5750 | + int ret; | |
5751 | + | |
5752 | + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); | |
5753 | + | |
5754 | + if (enabled) | |
5755 | + wake_up_process(kthread); | |
5756 | + | |
5757 | + return ret; | |
5758 | +} | |
5759 | + | |
5760 | +/** | |
5761 | + * debug_width_fopen - Open function for "width" debugfs entry | |
5762 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
5763 | + * @filp: The active open file structure for the debugfs "file" | |
5764 | + * | |
5765 | + * This function provides an open implementation for the "width" debugfs | |
5766 | + * interface to the hardware latency detector. | |
5767 | + */ | |
5768 | +static int debug_width_fopen(struct inode *inode, struct file *filp) | |
5769 | +{ | |
5770 | + return 0; | |
5771 | +} | |
5772 | + | |
5773 | +/** | |
5774 | + * debug_width_fread - Read function for "width" debugfs entry | |
5775 | + * @filp: The active open file structure for the debugfs "file" | |
5776 | + * @ubuf: The userspace provided buffer to read value into | |
5777 | + * @cnt: The maximum number of bytes to read | |
5778 | + * @ppos: The current "file" position | |
5779 | + * | |
5780 | + * This function provides a read implementation for the "width" debugfs | |
5781 | + * interface to the hardware latency detector. It can be used to determine | |
5782 | + * for how many us of the total window us we will actively sample for any | |
5783 | + * hardware-induced latecy periods. Obviously, it is not possible to | |
5784 | + * sample constantly and have the system respond to a sample reader, or, | |
5785 | + * worse, without having the system appear to have gone out to lunch. | |
5786 | + */ | |
5787 | +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, | |
5788 | + size_t cnt, loff_t *ppos) | |
5789 | +{ | |
5790 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); | |
5791 | +} | |
5792 | + | |
5793 | +/** | |
5794 | + * debug_width_fwrite - Write function for "width" debugfs entry | |
5795 | + * @filp: The active open file structure for the debugfs "file" | |
5796 | + * @ubuf: The user buffer that contains the value to write | |
5797 | + * @cnt: The maximum number of bytes to write to "file" | |
5798 | + * @ppos: The current position in the debugfs "file" | |
5799 | + * | |
5800 | + * This function provides a write implementation for the "width" debugfs | |
5801 | + * interface to the hardware latency detector. It can be used to configure | |
5802 | + * for how many us of the total window us we will actively sample for any | |
5803 | + * hardware-induced latency periods. Obviously, it is not possible to | |
5804 | + * sample constantly and have the system respond to a sample reader, or, | |
5805 | + * worse, without having the system appear to have gone out to lunch. It | |
5806 | + * is enforced that width is less that the total window size. | |
5807 | + */ | |
5808 | +static ssize_t debug_width_fwrite(struct file *filp, | |
5809 | + const char __user *ubuf, | |
5810 | + size_t cnt, | |
5811 | + loff_t *ppos) | |
5812 | +{ | |
5813 | + char buf[U64STR_SIZE]; | |
5814 | + int csize = min(cnt, sizeof(buf)); | |
5815 | + u64 val = 0; | |
5816 | + int err = 0; | |
5817 | + | |
5818 | + memset(buf, '\0', sizeof(buf)); | |
5819 | + if (copy_from_user(buf, ubuf, csize)) | |
5820 | + return -EFAULT; | |
5821 | + | |
5822 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ | |
5823 | + err = kstrtoull(buf, 10, &val); | |
5824 | + if (err) | |
5825 | + return -EINVAL; | |
5826 | + | |
5827 | + mutex_lock(&data.lock); | |
5828 | + if (val < data.sample_window) | |
5829 | + data.sample_width = val; | |
5830 | + else { | |
5831 | + mutex_unlock(&data.lock); | |
5832 | + return -EINVAL; | |
5833 | + } | |
5834 | + mutex_unlock(&data.lock); | |
5835 | + | |
5836 | + if (enabled) | |
5837 | + wake_up_process(kthread); | |
5838 | + | |
5839 | + return csize; | |
5840 | +} | |
5841 | + | |
5842 | +/** | |
5843 | + * debug_window_fopen - Open function for "window" debugfs entry | |
5844 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
5845 | + * @filp: The active open file structure for the debugfs "file" | |
5846 | + * | |
5847 | + * This function provides an open implementation for the "window" debugfs | |
5848 | + * interface to the hardware latency detector. The window is the total time | |
5849 | + * in us that will be considered one sample period. Conceptually, windows | |
5850 | + * occur back-to-back and contain a sample width period during which | |
5851 | + * actual sampling occurs. | |
5852 | + */ | |
5853 | +static int debug_window_fopen(struct inode *inode, struct file *filp) | |
5854 | +{ | |
5855 | + return 0; | |
5856 | +} | |
5857 | + | |
5858 | +/** | |
5859 | + * debug_window_fread - Read function for "window" debugfs entry | |
5860 | + * @filp: The active open file structure for the debugfs "file" | |
5861 | + * @ubuf: The userspace provided buffer to read value into | |
5862 | + * @cnt: The maximum number of bytes to read | |
5863 | + * @ppos: The current "file" position | |
5864 | + * | |
5865 | + * This function provides a read implementation for the "window" debugfs | |
5866 | + * interface to the hardware latency detector. The window is the total time | |
5867 | + * in us that will be considered one sample period. Conceptually, windows | |
5868 | + * occur back-to-back and contain a sample width period during which | |
5869 | + * actual sampling occurs. Can be used to read the total window size. | |
5870 | + */ | |
5871 | +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, | |
5872 | + size_t cnt, loff_t *ppos) | |
5873 | +{ | |
5874 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); | |
5875 | +} | |
5876 | + | |
5877 | +/** | |
5878 | + * debug_window_fwrite - Write function for "window" debugfs entry | |
5879 | + * @filp: The active open file structure for the debugfs "file" | |
5880 | + * @ubuf: The user buffer that contains the value to write | |
5881 | + * @cnt: The maximum number of bytes to write to "file" | |
5882 | + * @ppos: The current position in the debugfs "file" | |
5883 | + * | |
5884 | + * This function provides a write implementation for the "window" debufds | |
5885 | + * interface to the hardware latency detetector. The window is the total time | |
5886 | + * in us that will be considered one sample period. Conceptually, windows | |
5887 | + * occur back-to-back and contain a sample width period during which | |
5888 | + * actual sampling occurs. Can be used to write a new total window size. It | |
5889 | + * is enfoced that any value written must be greater than the sample width | |
5890 | + * size, or an error results. | |
5891 | + */ | |
5892 | +static ssize_t debug_window_fwrite(struct file *filp, | |
5893 | + const char __user *ubuf, | |
5894 | + size_t cnt, | |
5895 | + loff_t *ppos) | |
5896 | +{ | |
5897 | + char buf[U64STR_SIZE]; | |
5898 | + int csize = min(cnt, sizeof(buf)); | |
5899 | + u64 val = 0; | |
5900 | + int err = 0; | |
5901 | + | |
5902 | + memset(buf, '\0', sizeof(buf)); | |
5903 | + if (copy_from_user(buf, ubuf, csize)) | |
5904 | + return -EFAULT; | |
5905 | + | |
5906 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ | |
5907 | + err = kstrtoull(buf, 10, &val); | |
5908 | + if (err) | |
5909 | + return -EINVAL; | |
5910 | + | |
5911 | + mutex_lock(&data.lock); | |
5912 | + if (data.sample_width < val) | |
5913 | + data.sample_window = val; | |
5914 | + else { | |
5915 | + mutex_unlock(&data.lock); | |
5916 | + return -EINVAL; | |
5917 | + } | |
5918 | + mutex_unlock(&data.lock); | |
5919 | + | |
5920 | + return csize; | |
5921 | +} | |
5922 | + | |
5923 | +/* | |
5924 | + * Function pointers for the "count" debugfs file operations | |
5925 | + */ | |
5926 | +static const struct file_operations count_fops = { | |
5927 | + .open = debug_count_fopen, | |
5928 | + .read = debug_count_fread, | |
5929 | + .write = debug_count_fwrite, | |
5930 | + .owner = THIS_MODULE, | |
5931 | +}; | |
5932 | + | |
5933 | +/* | |
5934 | + * Function pointers for the "enable" debugfs file operations | |
5935 | + */ | |
5936 | +static const struct file_operations enable_fops = { | |
5937 | + .open = debug_enable_fopen, | |
5938 | + .read = debug_enable_fread, | |
5939 | + .write = debug_enable_fwrite, | |
5940 | + .owner = THIS_MODULE, | |
5941 | +}; | |
5942 | + | |
5943 | +/* | |
5944 | + * Function pointers for the "max" debugfs file operations | |
5945 | + */ | |
5946 | +static const struct file_operations max_fops = { | |
5947 | + .open = debug_max_fopen, | |
5948 | + .read = debug_max_fread, | |
5949 | + .write = debug_max_fwrite, | |
5950 | + .owner = THIS_MODULE, | |
5951 | +}; | |
5952 | + | |
5953 | +/* | |
5954 | + * Function pointers for the "sample" debugfs file operations | |
5955 | + */ | |
5956 | +static const struct file_operations sample_fops = { | |
5957 | + .open = debug_sample_fopen, | |
5958 | + .read = debug_sample_fread, | |
5959 | + .release = debug_sample_release, | |
5960 | + .owner = THIS_MODULE, | |
5961 | +}; | |
5962 | + | |
5963 | +/* | |
5964 | + * Function pointers for the "threshold" debugfs file operations | |
5965 | + */ | |
5966 | +static const struct file_operations threshold_fops = { | |
5967 | + .open = debug_threshold_fopen, | |
5968 | + .read = debug_threshold_fread, | |
5969 | + .write = debug_threshold_fwrite, | |
5970 | + .owner = THIS_MODULE, | |
5971 | +}; | |
5972 | + | |
5973 | +/* | |
5974 | + * Function pointers for the "width" debugfs file operations | |
5975 | + */ | |
5976 | +static const struct file_operations width_fops = { | |
5977 | + .open = debug_width_fopen, | |
5978 | + .read = debug_width_fread, | |
5979 | + .write = debug_width_fwrite, | |
5980 | + .owner = THIS_MODULE, | |
5981 | +}; | |
5982 | + | |
5983 | +/* | |
5984 | + * Function pointers for the "window" debugfs file operations | |
5985 | + */ | |
5986 | +static const struct file_operations window_fops = { | |
5987 | + .open = debug_window_fopen, | |
5988 | + .read = debug_window_fread, | |
5989 | + .write = debug_window_fwrite, | |
5990 | + .owner = THIS_MODULE, | |
5991 | +}; | |
5992 | + | |
5993 | +/** | |
5994 | + * init_debugfs - A function to initialize the debugfs interface files | |
5995 | + * | |
5996 | + * This function creates entries in debugfs for "hwlat_detector", including | |
5997 | + * files to read values from the detector, current samples, and the | |
5998 | + * maximum sample that has been captured since the hardware latency | |
5999 | + * dectector was started. | |
6000 | + */ | |
6001 | +static int init_debugfs(void) | |
6002 | +{ | |
6003 | + int ret = -ENOMEM; | |
6004 | + | |
6005 | + debug_dir = debugfs_create_dir(DRVNAME, NULL); | |
6006 | + if (!debug_dir) | |
6007 | + goto err_debug_dir; | |
6008 | + | |
6009 | + debug_sample = debugfs_create_file("sample", 0444, | |
6010 | + debug_dir, NULL, | |
6011 | + &sample_fops); | |
6012 | + if (!debug_sample) | |
6013 | + goto err_sample; | |
6014 | + | |
6015 | + debug_count = debugfs_create_file("count", 0444, | |
6016 | + debug_dir, NULL, | |
6017 | + &count_fops); | |
6018 | + if (!debug_count) | |
6019 | + goto err_count; | |
6020 | + | |
6021 | + debug_max = debugfs_create_file("max", 0444, | |
6022 | + debug_dir, NULL, | |
6023 | + &max_fops); | |
6024 | + if (!debug_max) | |
6025 | + goto err_max; | |
6026 | + | |
6027 | + debug_sample_window = debugfs_create_file("window", 0644, | |
6028 | + debug_dir, NULL, | |
6029 | + &window_fops); | |
6030 | + if (!debug_sample_window) | |
6031 | + goto err_window; | |
6032 | + | |
6033 | + debug_sample_width = debugfs_create_file("width", 0644, | |
6034 | + debug_dir, NULL, | |
6035 | + &width_fops); | |
6036 | + if (!debug_sample_width) | |
6037 | + goto err_width; | |
6038 | + | |
6039 | + debug_threshold = debugfs_create_file("threshold", 0644, | |
6040 | + debug_dir, NULL, | |
6041 | + &threshold_fops); | |
6042 | + if (!debug_threshold) | |
6043 | + goto err_threshold; | |
6044 | + | |
6045 | + debug_enable = debugfs_create_file("enable", 0644, | |
6046 | + debug_dir, &enabled, | |
6047 | + &enable_fops); | |
6048 | + if (!debug_enable) | |
6049 | + goto err_enable; | |
6050 | + | |
6051 | + else { | |
6052 | + ret = 0; | |
6053 | + goto out; | |
6054 | + } | |
6055 | + | |
6056 | +err_enable: | |
6057 | + debugfs_remove(debug_threshold); | |
6058 | +err_threshold: | |
6059 | + debugfs_remove(debug_sample_width); | |
6060 | +err_width: | |
6061 | + debugfs_remove(debug_sample_window); | |
6062 | +err_window: | |
6063 | + debugfs_remove(debug_max); | |
6064 | +err_max: | |
6065 | + debugfs_remove(debug_count); | |
6066 | +err_count: | |
6067 | + debugfs_remove(debug_sample); | |
6068 | +err_sample: | |
6069 | + debugfs_remove(debug_dir); | |
6070 | +err_debug_dir: | |
6071 | +out: | |
6072 | + return ret; | |
6073 | +} | |
6074 | + | |
6075 | +/** | |
6076 | + * free_debugfs - A function to cleanup the debugfs file interface | |
6077 | + */ | |
6078 | +static void free_debugfs(void) | |
6079 | +{ | |
6080 | + /* could also use a debugfs_remove_recursive */ | |
6081 | + debugfs_remove(debug_enable); | |
6082 | + debugfs_remove(debug_threshold); | |
6083 | + debugfs_remove(debug_sample_width); | |
6084 | + debugfs_remove(debug_sample_window); | |
6085 | + debugfs_remove(debug_max); | |
6086 | + debugfs_remove(debug_count); | |
6087 | + debugfs_remove(debug_sample); | |
6088 | + debugfs_remove(debug_dir); | |
6089 | +} | |
6090 | + | |
6091 | +/** | |
6092 | + * detector_init - Standard module initialization code | |
6093 | + */ | |
6094 | +static int detector_init(void) | |
6095 | +{ | |
6096 | + int ret = -ENOMEM; | |
6097 | + | |
6098 | + pr_info(BANNER "version %s\n", VERSION); | |
6099 | + | |
6100 | + ret = init_stats(); | |
6101 | + if (ret) | |
6102 | + goto out; | |
6103 | + | |
6104 | + ret = init_debugfs(); | |
6105 | + if (ret) | |
6106 | + goto err_stats; | |
6107 | + | |
6108 | + if (enabled) | |
6109 | + ret = start_kthread(); | |
6110 | + | |
6111 | + goto out; | |
6112 | + | |
6113 | +err_stats: | |
6114 | + ring_buffer_free(ring_buffer); | |
6115 | +out: | |
6116 | + return ret; | |
6117 | + | |
6118 | +} | |
6119 | + | |
6120 | +/** | |
6121 | + * detector_exit - Standard module cleanup code | |
6122 | + */ | |
6123 | +static void detector_exit(void) | |
6124 | +{ | |
6125 | + int err; | |
6126 | + | |
6127 | + if (enabled) { | |
6128 | + enabled = 0; | |
6129 | + err = stop_kthread(); | |
6130 | + if (err) | |
6131 | + pr_err(BANNER "cannot stop kthread\n"); | |
6132 | + } | |
6133 | + | |
6134 | + free_debugfs(); | |
6135 | + ring_buffer_free(ring_buffer); /* free up the ring buffer */ | |
6136 | + | |
6137 | +} | |
6138 | + | |
6139 | +module_init(detector_init); | |
6140 | +module_exit(detector_exit); | |
6141 | diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c | |
6142 | index df990bb8c873..1a162709a85e 100644 | |
6143 | --- a/drivers/mmc/host/mmci.c | |
6144 | +++ b/drivers/mmc/host/mmci.c | |
6145 | @@ -1147,15 +1147,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
6146 | struct sg_mapping_iter *sg_miter = &host->sg_miter; | |
6147 | struct variant_data *variant = host->variant; | |
6148 | void __iomem *base = host->base; | |
6149 | - unsigned long flags; | |
6150 | u32 status; | |
6151 | ||
6152 | status = readl(base + MMCISTATUS); | |
6153 | ||
6154 | dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); | |
6155 | ||
6156 | - local_irq_save(flags); | |
6157 | - | |
6158 | do { | |
6159 | unsigned int remain, len; | |
6160 | char *buffer; | |
6161 | @@ -1195,8 +1192,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
6162 | ||
6163 | sg_miter_stop(sg_miter); | |
6164 | ||
6165 | - local_irq_restore(flags); | |
6166 | - | |
6167 | /* | |
6168 | * If we have less than the fifo 'half-full' threshold to transfer, | |
6169 | * trigger a PIO interrupt as soon as any data is available. | |
6170 | diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c | |
6171 | index 25c55ab05c7d..5a1d117a8744 100644 | |
6172 | --- a/drivers/net/ethernet/3com/3c59x.c | |
6173 | +++ b/drivers/net/ethernet/3com/3c59x.c | |
6174 | @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) | |
6175 | { | |
6176 | struct vortex_private *vp = netdev_priv(dev); | |
6177 | unsigned long flags; | |
6178 | - local_irq_save(flags); | |
6179 | + local_irq_save_nort(flags); | |
6180 | (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); | |
6181 | - local_irq_restore(flags); | |
6182 | + local_irq_restore_nort(flags); | |
6183 | } | |
6184 | #endif | |
6185 | ||
6186 | @@ -1910,12 +1910,12 @@ static void vortex_tx_timeout(struct net_device *dev) | |
6187 | * Block interrupts because vortex_interrupt does a bare spin_lock() | |
6188 | */ | |
6189 | unsigned long flags; | |
6190 | - local_irq_save(flags); | |
6191 | + local_irq_save_nort(flags); | |
6192 | if (vp->full_bus_master_tx) | |
6193 | boomerang_interrupt(dev->irq, dev); | |
6194 | else | |
6195 | vortex_interrupt(dev->irq, dev); | |
6196 | - local_irq_restore(flags); | |
6197 | + local_irq_restore_nort(flags); | |
6198 | } | |
6199 | } | |
6200 | ||
6201 | diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c | |
6202 | index da4c2d8a4173..1420dfb56bac 100644 | |
6203 | --- a/drivers/net/ethernet/realtek/8139too.c | |
6204 | +++ b/drivers/net/ethernet/realtek/8139too.c | |
6205 | @@ -2233,7 +2233,7 @@ static void rtl8139_poll_controller(struct net_device *dev) | |
6206 | struct rtl8139_private *tp = netdev_priv(dev); | |
6207 | const int irq = tp->pci_dev->irq; | |
6208 | ||
6209 | - disable_irq(irq); | |
6210 | + disable_irq_nosync(irq); | |
6211 | rtl8139_interrupt(irq, dev); | |
6212 | enable_irq(irq); | |
6213 | } | |
6214 | diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
6215 | index 56f109bc8394..02afc796bc71 100644 | |
6216 | --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
6217 | +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c | |
6218 | @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, | |
6219 | while (!ctx->done.done && msecs--) | |
6220 | udelay(1000); | |
6221 | } else { | |
6222 | - wait_event_interruptible(ctx->done.wait, | |
6223 | + swait_event_interruptible(ctx->done.wait, | |
6224 | ctx->done.done); | |
6225 | } | |
6226 | break; | |
6227 | diff --git a/drivers/pci/access.c b/drivers/pci/access.c | |
6228 | index d11cdbb8fba3..223bbb9acb03 100644 | |
6229 | --- a/drivers/pci/access.c | |
6230 | +++ b/drivers/pci/access.c | |
6231 | @@ -672,7 +672,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev) | |
6232 | WARN_ON(!dev->block_cfg_access); | |
6233 | ||
6234 | dev->block_cfg_access = 0; | |
6235 | - wake_up_all(&pci_cfg_wait); | |
6236 | + wake_up_all_locked(&pci_cfg_wait); | |
6237 | raw_spin_unlock_irqrestore(&pci_lock, flags); | |
6238 | } | |
6239 | EXPORT_SYMBOL_GPL(pci_cfg_access_unlock); | |
6240 | diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c | |
6241 | index 9bd41a35a78a..8e2d436c2e3f 100644 | |
6242 | --- a/drivers/scsi/fcoe/fcoe.c | |
6243 | +++ b/drivers/scsi/fcoe/fcoe.c | |
6244 | @@ -1455,11 +1455,11 @@ static int fcoe_rcv(struct sk_buff *skb, struct net_device *netdev, | |
6245 | static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) | |
6246 | { | |
6247 | struct fcoe_percpu_s *fps; | |
6248 | - int rc; | |
6249 | + int rc, cpu = get_cpu_light(); | |
6250 | ||
6251 | - fps = &get_cpu_var(fcoe_percpu); | |
6252 | + fps = &per_cpu(fcoe_percpu, cpu); | |
6253 | rc = fcoe_get_paged_crc_eof(skb, tlen, fps); | |
6254 | - put_cpu_var(fcoe_percpu); | |
6255 | + put_cpu_light(); | |
6256 | ||
6257 | return rc; | |
6258 | } | |
6259 | @@ -1646,11 +1646,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, | |
6260 | return 0; | |
6261 | } | |
6262 | ||
6263 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
6264 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
6265 | stats->InvalidCRCCount++; | |
6266 | if (stats->InvalidCRCCount < 5) | |
6267 | printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); | |
6268 | - put_cpu(); | |
6269 | + put_cpu_light(); | |
6270 | return -EINVAL; | |
6271 | } | |
6272 | ||
6273 | @@ -1693,7 +1693,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
6274 | */ | |
6275 | hp = (struct fcoe_hdr *) skb_network_header(skb); | |
6276 | ||
6277 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
6278 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
6279 | if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { | |
6280 | if (stats->ErrorFrames < 5) | |
6281 | printk(KERN_WARNING "fcoe: FCoE version " | |
6282 | @@ -1725,13 +1725,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
6283 | goto drop; | |
6284 | ||
6285 | if (!fcoe_filter_frames(lport, fp)) { | |
6286 | - put_cpu(); | |
6287 | + put_cpu_light(); | |
6288 | fc_exch_recv(lport, fp); | |
6289 | return; | |
6290 | } | |
6291 | drop: | |
6292 | stats->ErrorFrames++; | |
6293 | - put_cpu(); | |
6294 | + put_cpu_light(); | |
6295 | kfree_skb(skb); | |
6296 | } | |
6297 | ||
6298 | diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c | |
6299 | index dcf36537a767..1a1f2e46452c 100644 | |
6300 | --- a/drivers/scsi/fcoe/fcoe_ctlr.c | |
6301 | +++ b/drivers/scsi/fcoe/fcoe_ctlr.c | |
6302 | @@ -834,7 +834,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
6303 | ||
6304 | INIT_LIST_HEAD(&del_list); | |
6305 | ||
6306 | - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); | |
6307 | + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); | |
6308 | ||
6309 | list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { | |
6310 | deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; | |
6311 | @@ -870,7 +870,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
6312 | sel_time = fcf->time; | |
6313 | } | |
6314 | } | |
6315 | - put_cpu(); | |
6316 | + put_cpu_light(); | |
6317 | ||
6318 | list_for_each_entry_safe(fcf, next, &del_list, list) { | |
6319 | /* Removes fcf from current list */ | |
6320 | diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c | |
6321 | index e72673b0a8fb..da598a6caa22 100644 | |
6322 | --- a/drivers/scsi/libfc/fc_exch.c | |
6323 | +++ b/drivers/scsi/libfc/fc_exch.c | |
6324 | @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, | |
6325 | } | |
6326 | memset(ep, 0, sizeof(*ep)); | |
6327 | ||
6328 | - cpu = get_cpu(); | |
6329 | + cpu = get_cpu_light(); | |
6330 | pool = per_cpu_ptr(mp->pool, cpu); | |
6331 | spin_lock_bh(&pool->lock); | |
6332 | - put_cpu(); | |
6333 | + put_cpu_light(); | |
6334 | ||
6335 | /* peek cache of free slot */ | |
6336 | if (pool->left != FC_XID_UNKNOWN) { | |
6337 | diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c | |
6338 | index 763f012fdeca..d0f61b595470 100644 | |
6339 | --- a/drivers/scsi/libsas/sas_ata.c | |
6340 | +++ b/drivers/scsi/libsas/sas_ata.c | |
6341 | @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
6342 | /* TODO: audit callers to ensure they are ready for qc_issue to | |
6343 | * unconditionally re-enable interrupts | |
6344 | */ | |
6345 | - local_irq_save(flags); | |
6346 | + local_irq_save_nort(flags); | |
6347 | spin_unlock(ap->lock); | |
6348 | ||
6349 | /* If the device fell off, no sense in issuing commands */ | |
6350 | @@ -252,7 +252,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
6351 | ||
6352 | out: | |
6353 | spin_lock(ap->lock); | |
6354 | - local_irq_restore(flags); | |
6355 | + local_irq_restore_nort(flags); | |
6356 | return ret; | |
6357 | } | |
6358 | ||
6359 | diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h | |
6360 | index edc48f3b8230..ee5c6f9dfb6f 100644 | |
6361 | --- a/drivers/scsi/qla2xxx/qla_inline.h | |
6362 | +++ b/drivers/scsi/qla2xxx/qla_inline.h | |
6363 | @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp) | |
6364 | { | |
6365 | unsigned long flags; | |
6366 | struct qla_hw_data *ha = rsp->hw; | |
6367 | - local_irq_save(flags); | |
6368 | + local_irq_save_nort(flags); | |
6369 | if (IS_P3P_TYPE(ha)) | |
6370 | qla82xx_poll(0, rsp); | |
6371 | else | |
6372 | ha->isp_ops->intr_handler(0, rsp); | |
6373 | - local_irq_restore(flags); | |
6374 | + local_irq_restore_nort(flags); | |
6375 | } | |
6376 | ||
6377 | static inline uint8_t * | |
6378 | diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c | |
6379 | index 987f1c729e9c..18391e07d70f 100644 | |
6380 | --- a/drivers/scsi/qla2xxx/qla_isr.c | |
6381 | +++ b/drivers/scsi/qla2xxx/qla_isr.c | |
6382 | @@ -3125,7 +3125,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) | |
6383 | * kref_put(). | |
6384 | */ | |
6385 | kref_get(&qentry->irq_notify.kref); | |
6386 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
6387 | + swork_queue(&qentry->irq_notify.swork); | |
6388 | +#else | |
6389 | schedule_work(&qentry->irq_notify.work); | |
6390 | +#endif | |
6391 | } | |
6392 | ||
6393 | /* | |
6394 | diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c | |
6395 | index 97f0a2bd93ed..a4f45aaa9ad4 100644 | |
6396 | --- a/drivers/thermal/x86_pkg_temp_thermal.c | |
6397 | +++ b/drivers/thermal/x86_pkg_temp_thermal.c | |
6398 | @@ -29,6 +29,7 @@ | |
6399 | #include <linux/pm.h> | |
6400 | #include <linux/thermal.h> | |
6401 | #include <linux/debugfs.h> | |
6402 | +#include <linux/swork.h> | |
6403 | #include <asm/cpu_device_id.h> | |
6404 | #include <asm/mce.h> | |
6405 | ||
6406 | @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) | |
6407 | } | |
6408 | } | |
6409 | ||
6410 | -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
6411 | +static void platform_thermal_notify_work(struct swork_event *event) | |
6412 | { | |
6413 | unsigned long flags; | |
6414 | int cpu = smp_processor_id(); | |
6415 | @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
6416 | pkg_work_scheduled[phy_id]) { | |
6417 | disable_pkg_thres_interrupt(); | |
6418 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
6419 | - return -EINVAL; | |
6420 | + return; | |
6421 | } | |
6422 | pkg_work_scheduled[phy_id] = 1; | |
6423 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
6424 | @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
6425 | schedule_delayed_work_on(cpu, | |
6426 | &per_cpu(pkg_temp_thermal_threshold_work, cpu), | |
6427 | msecs_to_jiffies(notify_delay_ms)); | |
6428 | +} | |
6429 | + | |
6430 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6431 | +static struct swork_event notify_work; | |
6432 | + | |
6433 | +static int thermal_notify_work_init(void) | |
6434 | +{ | |
6435 | + int err; | |
6436 | + | |
6437 | + err = swork_get(); | |
6438 | + if (err) | |
6439 | + return err; | |
6440 | + | |
6441 | + INIT_SWORK(¬ify_work, platform_thermal_notify_work); | |
6442 | return 0; | |
6443 | } | |
6444 | ||
6445 | +static void thermal_notify_work_cleanup(void) | |
6446 | +{ | |
6447 | + swork_put(); | |
6448 | +} | |
6449 | + | |
6450 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
6451 | +{ | |
6452 | + swork_queue(¬ify_work); | |
6453 | + return 0; | |
6454 | +} | |
6455 | + | |
6456 | +#else /* !CONFIG_PREEMPT_RT_FULL */ | |
6457 | + | |
6458 | +static int thermal_notify_work_init(void) { return 0; } | |
6459 | + | |
6460 | +static void thermal_notify_work_cleanup(void) { } | |
6461 | + | |
6462 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
6463 | +{ | |
6464 | + platform_thermal_notify_work(NULL); | |
6465 | + | |
6466 | + return 0; | |
6467 | +} | |
6468 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
6469 | + | |
6470 | static int find_siblings_cpu(int cpu) | |
6471 | { | |
6472 | int i; | |
6473 | @@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void) | |
6474 | if (!x86_match_cpu(pkg_temp_thermal_ids)) | |
6475 | return -ENODEV; | |
6476 | ||
6477 | + if (!thermal_notify_work_init()) | |
6478 | + return -ENODEV; | |
6479 | + | |
6480 | spin_lock_init(&pkg_work_lock); | |
6481 | platform_thermal_package_notify = | |
6482 | pkg_temp_thermal_platform_thermal_notify; | |
6483 | @@ -608,7 +651,7 @@ static int __init pkg_temp_thermal_init(void) | |
6484 | kfree(pkg_work_scheduled); | |
6485 | platform_thermal_package_notify = NULL; | |
6486 | platform_thermal_package_rate_control = NULL; | |
6487 | - | |
6488 | + thermal_notify_work_cleanup(); | |
6489 | return -ENODEV; | |
6490 | } | |
6491 | ||
6492 | @@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void) | |
6493 | mutex_unlock(&phy_dev_list_mutex); | |
6494 | platform_thermal_package_notify = NULL; | |
6495 | platform_thermal_package_rate_control = NULL; | |
6496 | + thermal_notify_work_cleanup(); | |
6497 | for_each_online_cpu(i) | |
6498 | cancel_delayed_work_sync( | |
6499 | &per_cpu(pkg_temp_thermal_threshold_work, i)); | |
6500 | diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c | |
6501 | index dcf43f66404f..a9ae57122841 100644 | |
6502 | --- a/drivers/tty/serial/8250/8250_core.c | |
6503 | +++ b/drivers/tty/serial/8250/8250_core.c | |
6504 | @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg; | |
6505 | ||
6506 | static unsigned int skip_txen_test; /* force skip of txen test at init time */ | |
6507 | ||
6508 | -#define PASS_LIMIT 512 | |
6509 | +/* | |
6510 | + * On -rt we can have a more delays, and legitimately | |
6511 | + * so - so don't drop work spuriously and spam the | |
6512 | + * syslog: | |
6513 | + */ | |
6514 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
6515 | +# define PASS_LIMIT 1000000 | |
6516 | +#else | |
6517 | +# define PASS_LIMIT 512 | |
6518 | +#endif | |
6519 | ||
6520 | #include <asm/serial.h> | |
6521 | /* | |
6522 | diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c | |
6523 | index 858a54633664..fc44fb59aef6 100644 | |
6524 | --- a/drivers/tty/serial/8250/8250_port.c | |
6525 | +++ b/drivers/tty/serial/8250/8250_port.c | |
6526 | @@ -35,6 +35,7 @@ | |
6527 | #include <linux/nmi.h> | |
6528 | #include <linux/mutex.h> | |
6529 | #include <linux/slab.h> | |
6530 | +#include <linux/kdb.h> | |
6531 | #include <linux/uaccess.h> | |
6532 | #include <linux/pm_runtime.h> | |
6533 | #include <linux/timer.h> | |
6534 | @@ -3109,9 +3110,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, | |
6535 | ||
6536 | serial8250_rpm_get(up); | |
6537 | ||
6538 | - if (port->sysrq) | |
6539 | + if (port->sysrq || oops_in_progress) | |
6540 | locked = 0; | |
6541 | - else if (oops_in_progress) | |
6542 | + else if (in_kdb_printk()) | |
6543 | locked = spin_trylock_irqsave(&port->lock, flags); | |
6544 | else | |
6545 | spin_lock_irqsave(&port->lock, flags); | |
6546 | diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c | |
6547 | index 8a9e213387a7..dd1f9a426b74 100644 | |
6548 | --- a/drivers/tty/serial/amba-pl011.c | |
6549 | +++ b/drivers/tty/serial/amba-pl011.c | |
6550 | @@ -2167,13 +2167,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) | |
6551 | ||
6552 | clk_enable(uap->clk); | |
6553 | ||
6554 | - local_irq_save(flags); | |
6555 | + /* | |
6556 | + * local_irq_save(flags); | |
6557 | + * | |
6558 | + * This local_irq_save() is nonsense. If we come in via sysrq | |
6559 | + * handling then interrupts are already disabled. Aside of | |
6560 | + * that the port.sysrq check is racy on SMP regardless. | |
6561 | + */ | |
6562 | if (uap->port.sysrq) | |
6563 | locked = 0; | |
6564 | else if (oops_in_progress) | |
6565 | - locked = spin_trylock(&uap->port.lock); | |
6566 | + locked = spin_trylock_irqsave(&uap->port.lock, flags); | |
6567 | else | |
6568 | - spin_lock(&uap->port.lock); | |
6569 | + spin_lock_irqsave(&uap->port.lock, flags); | |
6570 | ||
6571 | /* | |
6572 | * First save the CR then disable the interrupts | |
6573 | @@ -2197,8 +2203,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) | |
6574 | pl011_write(old_cr, uap, REG_CR); | |
6575 | ||
6576 | if (locked) | |
6577 | - spin_unlock(&uap->port.lock); | |
6578 | - local_irq_restore(flags); | |
6579 | + spin_unlock_irqrestore(&uap->port.lock, flags); | |
6580 | ||
6581 | clk_disable(uap->clk); | |
6582 | } | |
6583 | diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c | |
6584 | index a2a529994ba5..0ee7c4c518df 100644 | |
6585 | --- a/drivers/tty/serial/omap-serial.c | |
6586 | +++ b/drivers/tty/serial/omap-serial.c | |
6587 | @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s, | |
6588 | ||
6589 | pm_runtime_get_sync(up->dev); | |
6590 | ||
6591 | - local_irq_save(flags); | |
6592 | - if (up->port.sysrq) | |
6593 | - locked = 0; | |
6594 | - else if (oops_in_progress) | |
6595 | - locked = spin_trylock(&up->port.lock); | |
6596 | + if (up->port.sysrq || oops_in_progress) | |
6597 | + locked = spin_trylock_irqsave(&up->port.lock, flags); | |
6598 | else | |
6599 | - spin_lock(&up->port.lock); | |
6600 | + spin_lock_irqsave(&up->port.lock, flags); | |
6601 | ||
6602 | /* | |
6603 | * First save the IER then disable the interrupts | |
6604 | @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s, | |
6605 | pm_runtime_mark_last_busy(up->dev); | |
6606 | pm_runtime_put_autosuspend(up->dev); | |
6607 | if (locked) | |
6608 | - spin_unlock(&up->port.lock); | |
6609 | - local_irq_restore(flags); | |
6610 | + spin_unlock_irqrestore(&up->port.lock, flags); | |
6611 | } | |
6612 | ||
6613 | static int __init | |
6614 | diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c | |
6615 | index f36e6df2fa90..e086ea4d2997 100644 | |
6616 | --- a/drivers/tty/serial/sc16is7xx.c | |
6617 | +++ b/drivers/tty/serial/sc16is7xx.c | |
6618 | @@ -1240,7 +1240,7 @@ static int sc16is7xx_probe(struct device *dev, | |
6619 | ||
6620 | /* Setup interrupt */ | |
6621 | ret = devm_request_irq(dev, irq, sc16is7xx_irq, | |
6622 | - IRQF_ONESHOT | flags, dev_name(dev), s); | |
6623 | + flags, dev_name(dev), s); | |
6624 | if (!ret) | |
6625 | return 0; | |
6626 | ||
6627 | diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c | |
6628 | index d2e3f655c26f..fdd027a9bbd7 100644 | |
6629 | --- a/drivers/usb/core/hcd.c | |
6630 | +++ b/drivers/usb/core/hcd.c | |
6631 | @@ -1760,9 +1760,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) | |
6632 | * and no one may trigger the above deadlock situation when | |
6633 | * running complete() in tasklet. | |
6634 | */ | |
6635 | - local_irq_save(flags); | |
6636 | + local_irq_save_nort(flags); | |
6637 | urb->complete(urb); | |
6638 | - local_irq_restore(flags); | |
6639 | + local_irq_restore_nort(flags); | |
6640 | ||
6641 | usb_anchor_resume_wakeups(anchor); | |
6642 | atomic_dec(&urb->use_count); | |
6643 | diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c | |
6644 | index 5c8429f23a89..fa835fb1a186 100644 | |
6645 | --- a/drivers/usb/gadget/function/f_fs.c | |
6646 | +++ b/drivers/usb/gadget/function/f_fs.c | |
6647 | @@ -1509,7 +1509,7 @@ static void ffs_data_put(struct ffs_data *ffs) | |
6648 | pr_info("%s(): freeing\n", __func__); | |
6649 | ffs_data_clear(ffs); | |
6650 | BUG_ON(waitqueue_active(&ffs->ev.waitq) || | |
6651 | - waitqueue_active(&ffs->ep0req_completion.wait)); | |
6652 | + swait_active(&ffs->ep0req_completion.wait)); | |
6653 | kfree(ffs->dev_name); | |
6654 | kfree(ffs); | |
6655 | } | |
6656 | diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c | |
6657 | index 16104b5ebdcb..5c506c2b88ad 100644 | |
6658 | --- a/drivers/usb/gadget/legacy/inode.c | |
6659 | +++ b/drivers/usb/gadget/legacy/inode.c | |
6660 | @@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
6661 | spin_unlock_irq (&epdata->dev->lock); | |
6662 | ||
6663 | if (likely (value == 0)) { | |
6664 | - value = wait_event_interruptible (done.wait, done.done); | |
6665 | + value = swait_event_interruptible (done.wait, done.done); | |
6666 | if (value != 0) { | |
6667 | spin_lock_irq (&epdata->dev->lock); | |
6668 | if (likely (epdata->ep != NULL)) { | |
6669 | @@ -355,7 +355,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
6670 | usb_ep_dequeue (epdata->ep, epdata->req); | |
6671 | spin_unlock_irq (&epdata->dev->lock); | |
6672 | ||
6673 | - wait_event (done.wait, done.done); | |
6674 | + swait_event (done.wait, done.done); | |
6675 | if (epdata->status == -ECONNRESET) | |
6676 | epdata->status = -EINTR; | |
6677 | } else { | |
6678 | diff --git a/fs/aio.c b/fs/aio.c | |
6679 | index 4fe81d1c60f9..e68c06a4a017 100644 | |
6680 | --- a/fs/aio.c | |
6681 | +++ b/fs/aio.c | |
6682 | @@ -40,6 +40,7 @@ | |
6683 | #include <linux/ramfs.h> | |
6684 | #include <linux/percpu-refcount.h> | |
6685 | #include <linux/mount.h> | |
6686 | +#include <linux/swork.h> | |
6687 | ||
6688 | #include <asm/kmap_types.h> | |
6689 | #include <asm/uaccess.h> | |
6690 | @@ -115,7 +116,7 @@ struct kioctx { | |
6691 | struct page **ring_pages; | |
6692 | long nr_pages; | |
6693 | ||
6694 | - struct work_struct free_work; | |
6695 | + struct swork_event free_work; | |
6696 | ||
6697 | /* | |
6698 | * signals when all in-flight requests are done | |
6699 | @@ -258,6 +259,7 @@ static int __init aio_setup(void) | |
6700 | .mount = aio_mount, | |
6701 | .kill_sb = kill_anon_super, | |
6702 | }; | |
6703 | + BUG_ON(swork_get()); | |
6704 | aio_mnt = kern_mount(&aio_fs); | |
6705 | if (IS_ERR(aio_mnt)) | |
6706 | panic("Failed to create aio fs mount."); | |
6707 | @@ -578,9 +580,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) | |
6708 | return cancel(&kiocb->common); | |
6709 | } | |
6710 | ||
6711 | -static void free_ioctx(struct work_struct *work) | |
6712 | +static void free_ioctx(struct swork_event *sev) | |
6713 | { | |
6714 | - struct kioctx *ctx = container_of(work, struct kioctx, free_work); | |
6715 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
6716 | ||
6717 | pr_debug("freeing %p\n", ctx); | |
6718 | ||
6719 | @@ -599,8 +601,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |
6720 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) | |
6721 | complete(&ctx->rq_wait->comp); | |
6722 | ||
6723 | - INIT_WORK(&ctx->free_work, free_ioctx); | |
6724 | - schedule_work(&ctx->free_work); | |
6725 | + INIT_SWORK(&ctx->free_work, free_ioctx); | |
6726 | + swork_queue(&ctx->free_work); | |
6727 | } | |
6728 | ||
6729 | /* | |
6730 | @@ -608,9 +610,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) | |
6731 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - | |
6732 | * now it's safe to cancel any that need to be. | |
6733 | */ | |
6734 | -static void free_ioctx_users(struct percpu_ref *ref) | |
6735 | +static void free_ioctx_users_work(struct swork_event *sev) | |
6736 | { | |
6737 | - struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
6738 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
6739 | struct aio_kiocb *req; | |
6740 | ||
6741 | spin_lock_irq(&ctx->ctx_lock); | |
6742 | @@ -629,6 +631,14 @@ static void free_ioctx_users(struct percpu_ref *ref) | |
6743 | percpu_ref_put(&ctx->reqs); | |
6744 | } | |
6745 | ||
6746 | +static void free_ioctx_users(struct percpu_ref *ref) | |
6747 | +{ | |
6748 | + struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
6749 | + | |
6750 | + INIT_SWORK(&ctx->free_work, free_ioctx_users_work); | |
6751 | + swork_queue(&ctx->free_work); | |
6752 | +} | |
6753 | + | |
6754 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |
6755 | { | |
6756 | unsigned i, new_nr; | |
6757 | diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h | |
6758 | index a439548de785..7c392647d03b 100644 | |
6759 | --- a/fs/autofs4/autofs_i.h | |
6760 | +++ b/fs/autofs4/autofs_i.h | |
6761 | @@ -30,6 +30,7 @@ | |
6762 | #include <linux/sched.h> | |
6763 | #include <linux/mount.h> | |
6764 | #include <linux/namei.h> | |
6765 | +#include <linux/delay.h> | |
6766 | #include <asm/current.h> | |
6767 | #include <linux/uaccess.h> | |
6768 | ||
6769 | diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c | |
6770 | index d8e6d421c27f..2e689ab1306b 100644 | |
6771 | --- a/fs/autofs4/expire.c | |
6772 | +++ b/fs/autofs4/expire.c | |
6773 | @@ -148,7 +148,7 @@ static struct dentry *get_next_positive_dentry(struct dentry *prev, | |
6774 | parent = p->d_parent; | |
6775 | if (!spin_trylock(&parent->d_lock)) { | |
6776 | spin_unlock(&p->d_lock); | |
6777 | - cpu_relax(); | |
6778 | + cpu_chill(); | |
6779 | goto relock; | |
6780 | } | |
6781 | spin_unlock(&p->d_lock); | |
6782 | diff --git a/fs/buffer.c b/fs/buffer.c | |
6783 | index 9c8eb9b6db6a..d15d77f72cf7 100644 | |
6784 | --- a/fs/buffer.c | |
6785 | +++ b/fs/buffer.c | |
6786 | @@ -301,8 +301,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6787 | * decide that the page is now completely done. | |
6788 | */ | |
6789 | first = page_buffers(page); | |
6790 | - local_irq_save(flags); | |
6791 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
6792 | + flags = bh_uptodate_lock_irqsave(first); | |
6793 | clear_buffer_async_read(bh); | |
6794 | unlock_buffer(bh); | |
6795 | tmp = bh; | |
6796 | @@ -315,8 +314,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6797 | } | |
6798 | tmp = tmp->b_this_page; | |
6799 | } while (tmp != bh); | |
6800 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6801 | - local_irq_restore(flags); | |
6802 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6803 | ||
6804 | /* | |
6805 | * If none of the buffers had errors and they are all | |
6806 | @@ -328,9 +326,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
6807 | return; | |
6808 | ||
6809 | still_busy: | |
6810 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6811 | - local_irq_restore(flags); | |
6812 | - return; | |
6813 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6814 | } | |
6815 | ||
6816 | /* | |
6817 | @@ -358,8 +354,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
6818 | } | |
6819 | ||
6820 | first = page_buffers(page); | |
6821 | - local_irq_save(flags); | |
6822 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
6823 | + flags = bh_uptodate_lock_irqsave(first); | |
6824 | ||
6825 | clear_buffer_async_write(bh); | |
6826 | unlock_buffer(bh); | |
6827 | @@ -371,15 +366,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
6828 | } | |
6829 | tmp = tmp->b_this_page; | |
6830 | } | |
6831 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6832 | - local_irq_restore(flags); | |
6833 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6834 | end_page_writeback(page); | |
6835 | return; | |
6836 | ||
6837 | still_busy: | |
6838 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
6839 | - local_irq_restore(flags); | |
6840 | - return; | |
6841 | + bh_uptodate_unlock_irqrestore(first, flags); | |
6842 | } | |
6843 | EXPORT_SYMBOL(end_buffer_async_write); | |
6844 | ||
6845 | @@ -3384,6 +3376,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) | |
6846 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); | |
6847 | if (ret) { | |
6848 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | |
6849 | + buffer_head_init_locks(ret); | |
6850 | preempt_disable(); | |
6851 | __this_cpu_inc(bh_accounting.nr); | |
6852 | recalc_bh_state(); | |
6853 | diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c | |
6854 | index 8f6a2a5863b9..4217828d0b68 100644 | |
6855 | --- a/fs/cifs/readdir.c | |
6856 | +++ b/fs/cifs/readdir.c | |
6857 | @@ -80,7 +80,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, | |
6858 | struct inode *inode; | |
6859 | struct super_block *sb = parent->d_sb; | |
6860 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | |
6861 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
6862 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
6863 | ||
6864 | cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); | |
6865 | ||
6866 | diff --git a/fs/dcache.c b/fs/dcache.c | |
6867 | index 5c7cc953ac81..a9bb31f1c1af 100644 | |
6868 | --- a/fs/dcache.c | |
6869 | +++ b/fs/dcache.c | |
6870 | @@ -19,6 +19,7 @@ | |
6871 | #include <linux/mm.h> | |
6872 | #include <linux/fs.h> | |
6873 | #include <linux/fsnotify.h> | |
6874 | +#include <linux/delay.h> | |
6875 | #include <linux/slab.h> | |
6876 | #include <linux/init.h> | |
6877 | #include <linux/hash.h> | |
6878 | @@ -750,6 +751,8 @@ static inline bool fast_dput(struct dentry *dentry) | |
6879 | */ | |
6880 | void dput(struct dentry *dentry) | |
6881 | { | |
6882 | + struct dentry *parent; | |
6883 | + | |
6884 | if (unlikely(!dentry)) | |
6885 | return; | |
6886 | ||
6887 | @@ -788,9 +791,18 @@ void dput(struct dentry *dentry) | |
6888 | return; | |
6889 | ||
6890 | kill_it: | |
6891 | - dentry = dentry_kill(dentry); | |
6892 | - if (dentry) { | |
6893 | - cond_resched(); | |
6894 | + parent = dentry_kill(dentry); | |
6895 | + if (parent) { | |
6896 | + int r; | |
6897 | + | |
6898 | + if (parent == dentry) { | |
6899 | + /* the task with the highest priority won't schedule */ | |
6900 | + r = cond_resched(); | |
6901 | + if (!r) | |
6902 | + cpu_chill(); | |
6903 | + } else { | |
6904 | + dentry = parent; | |
6905 | + } | |
6906 | goto repeat; | |
6907 | } | |
6908 | } | |
6909 | @@ -2321,7 +2333,7 @@ void d_delete(struct dentry * dentry) | |
6910 | if (dentry->d_lockref.count == 1) { | |
6911 | if (!spin_trylock(&inode->i_lock)) { | |
6912 | spin_unlock(&dentry->d_lock); | |
6913 | - cpu_relax(); | |
6914 | + cpu_chill(); | |
6915 | goto again; | |
6916 | } | |
6917 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | |
6918 | @@ -2381,21 +2393,24 @@ static inline void end_dir_add(struct inode *dir, unsigned n) | |
6919 | ||
6920 | static void d_wait_lookup(struct dentry *dentry) | |
6921 | { | |
6922 | - if (d_in_lookup(dentry)) { | |
6923 | - DECLARE_WAITQUEUE(wait, current); | |
6924 | - add_wait_queue(dentry->d_wait, &wait); | |
6925 | - do { | |
6926 | - set_current_state(TASK_UNINTERRUPTIBLE); | |
6927 | - spin_unlock(&dentry->d_lock); | |
6928 | - schedule(); | |
6929 | - spin_lock(&dentry->d_lock); | |
6930 | - } while (d_in_lookup(dentry)); | |
6931 | - } | |
6932 | + struct swait_queue __wait; | |
6933 | + | |
6934 | + if (!d_in_lookup(dentry)) | |
6935 | + return; | |
6936 | + | |
6937 | + INIT_LIST_HEAD(&__wait.task_list); | |
6938 | + do { | |
6939 | + prepare_to_swait(dentry->d_wait, &__wait, TASK_UNINTERRUPTIBLE); | |
6940 | + spin_unlock(&dentry->d_lock); | |
6941 | + schedule(); | |
6942 | + spin_lock(&dentry->d_lock); | |
6943 | + } while (d_in_lookup(dentry)); | |
6944 | + finish_swait(dentry->d_wait, &__wait); | |
6945 | } | |
6946 | ||
6947 | struct dentry *d_alloc_parallel(struct dentry *parent, | |
6948 | const struct qstr *name, | |
6949 | - wait_queue_head_t *wq) | |
6950 | + struct swait_queue_head *wq) | |
6951 | { | |
6952 | unsigned int hash = name->hash; | |
6953 | struct hlist_bl_head *b = in_lookup_hash(parent, hash); | |
6954 | @@ -2504,7 +2519,7 @@ void __d_lookup_done(struct dentry *dentry) | |
6955 | hlist_bl_lock(b); | |
6956 | dentry->d_flags &= ~DCACHE_PAR_LOOKUP; | |
6957 | __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); | |
6958 | - wake_up_all(dentry->d_wait); | |
6959 | + swake_up_all(dentry->d_wait); | |
6960 | dentry->d_wait = NULL; | |
6961 | hlist_bl_unlock(b); | |
6962 | INIT_HLIST_NODE(&dentry->d_u.d_alias); | |
6963 | @@ -3601,6 +3616,11 @@ EXPORT_SYMBOL(d_genocide); | |
6964 | ||
6965 | void __init vfs_caches_init_early(void) | |
6966 | { | |
6967 | + int i; | |
6968 | + | |
6969 | + for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) | |
6970 | + INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); | |
6971 | + | |
6972 | dcache_init_early(); | |
6973 | inode_init_early(); | |
6974 | } | |
6975 | diff --git a/fs/eventpoll.c b/fs/eventpoll.c | |
6976 | index 10db91218933..42af0a06f657 100644 | |
6977 | --- a/fs/eventpoll.c | |
6978 | +++ b/fs/eventpoll.c | |
6979 | @@ -510,12 +510,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) | |
6980 | */ | |
6981 | static void ep_poll_safewake(wait_queue_head_t *wq) | |
6982 | { | |
6983 | - int this_cpu = get_cpu(); | |
6984 | + int this_cpu = get_cpu_light(); | |
6985 | ||
6986 | ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, | |
6987 | ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); | |
6988 | ||
6989 | - put_cpu(); | |
6990 | + put_cpu_light(); | |
6991 | } | |
6992 | ||
6993 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | |
6994 | diff --git a/fs/exec.c b/fs/exec.c | |
6995 | index 6fcfb3f7b137..751370a71ec5 100644 | |
6996 | --- a/fs/exec.c | |
6997 | +++ b/fs/exec.c | |
6998 | @@ -1012,12 +1012,14 @@ static int exec_mmap(struct mm_struct *mm) | |
6999 | } | |
7000 | } | |
7001 | task_lock(tsk); | |
7002 | + preempt_disable_rt(); | |
7003 | active_mm = tsk->active_mm; | |
7004 | tsk->mm = mm; | |
7005 | tsk->active_mm = mm; | |
7006 | activate_mm(active_mm, mm); | |
7007 | tsk->mm->vmacache_seqnum = 0; | |
7008 | vmacache_flush(tsk); | |
7009 | + preempt_enable_rt(); | |
7010 | task_unlock(tsk); | |
7011 | if (old_mm) { | |
7012 | up_read(&old_mm->mmap_sem); | |
7013 | diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c | |
7014 | index 4ff9251e9d3a..8fe489ec2ef1 100644 | |
7015 | --- a/fs/fuse/dir.c | |
7016 | +++ b/fs/fuse/dir.c | |
7017 | @@ -1174,7 +1174,7 @@ static int fuse_direntplus_link(struct file *file, | |
7018 | struct inode *dir = d_inode(parent); | |
7019 | struct fuse_conn *fc; | |
7020 | struct inode *inode; | |
7021 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
7022 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
7023 | ||
7024 | if (!o->nodeid) { | |
7025 | /* | |
7026 | diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c | |
7027 | index 684996c8a3a4..6e18a06aaabe 100644 | |
7028 | --- a/fs/jbd2/checkpoint.c | |
7029 | +++ b/fs/jbd2/checkpoint.c | |
7030 | @@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |
7031 | nblocks = jbd2_space_needed(journal); | |
7032 | while (jbd2_log_space_left(journal) < nblocks) { | |
7033 | write_unlock(&journal->j_state_lock); | |
7034 | + if (current->plug) | |
7035 | + io_schedule(); | |
7036 | mutex_lock(&journal->j_checkpoint_mutex); | |
7037 | ||
7038 | /* | |
7039 | diff --git a/fs/namei.c b/fs/namei.c | |
7040 | index adb04146df09..a89dfaf9f209 100644 | |
7041 | --- a/fs/namei.c | |
7042 | +++ b/fs/namei.c | |
7043 | @@ -1629,7 +1629,7 @@ static struct dentry *lookup_slow(const struct qstr *name, | |
7044 | { | |
7045 | struct dentry *dentry = ERR_PTR(-ENOENT), *old; | |
7046 | struct inode *inode = dir->d_inode; | |
7047 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
7048 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
7049 | ||
7050 | inode_lock_shared(inode); | |
7051 | /* Don't go there if it's already dead */ | |
7052 | @@ -3086,7 +3086,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, | |
7053 | struct dentry *dentry; | |
7054 | int error, create_error = 0; | |
7055 | umode_t mode = op->mode; | |
7056 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
7057 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
7058 | ||
7059 | if (unlikely(IS_DEADDIR(dir_inode))) | |
7060 | return -ENOENT; | |
7061 | diff --git a/fs/namespace.c b/fs/namespace.c | |
7062 | index 7bb2cda3bfef..cf79b18e7b58 100644 | |
7063 | --- a/fs/namespace.c | |
7064 | +++ b/fs/namespace.c | |
7065 | @@ -14,6 +14,7 @@ | |
7066 | #include <linux/mnt_namespace.h> | |
7067 | #include <linux/user_namespace.h> | |
7068 | #include <linux/namei.h> | |
7069 | +#include <linux/delay.h> | |
7070 | #include <linux/security.h> | |
7071 | #include <linux/idr.h> | |
7072 | #include <linux/init.h> /* init_rootfs */ | |
7073 | @@ -353,8 +354,11 @@ int __mnt_want_write(struct vfsmount *m) | |
7074 | * incremented count after it has set MNT_WRITE_HOLD. | |
7075 | */ | |
7076 | smp_mb(); | |
7077 | - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) | |
7078 | - cpu_relax(); | |
7079 | + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { | |
7080 | + preempt_enable(); | |
7081 | + cpu_chill(); | |
7082 | + preempt_disable(); | |
7083 | + } | |
7084 | /* | |
7085 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | |
7086 | * be set to match its requirements. So we must not load that until | |
7087 | diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c | |
7088 | index b9c65421ed81..03ffe8af8785 100644 | |
7089 | --- a/fs/nfs/delegation.c | |
7090 | +++ b/fs/nfs/delegation.c | |
7091 | @@ -150,11 +150,11 @@ static int nfs_delegation_claim_opens(struct inode *inode, | |
7092 | sp = state->owner; | |
7093 | /* Block nfs4_proc_unlck */ | |
7094 | mutex_lock(&sp->so_delegreturn_mutex); | |
7095 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
7096 | + seq = read_seqbegin(&sp->so_reclaim_seqlock); | |
7097 | err = nfs4_open_delegation_recall(ctx, state, stateid, type); | |
7098 | if (!err) | |
7099 | err = nfs_delegation_claim_locks(ctx, state, stateid); | |
7100 | - if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
7101 | + if (!err && read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
7102 | err = -EAGAIN; | |
7103 | mutex_unlock(&sp->so_delegreturn_mutex); | |
7104 | put_nfs_open_context(ctx); | |
7105 | diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c | |
7106 | index 6bc5a68e39f1..ce6488e07a13 100644 | |
7107 | --- a/fs/nfs/dir.c | |
7108 | +++ b/fs/nfs/dir.c | |
7109 | @@ -485,7 +485,7 @@ static | |
7110 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | |
7111 | { | |
7112 | struct qstr filename = QSTR_INIT(entry->name, entry->len); | |
7113 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
7114 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
7115 | struct dentry *dentry; | |
7116 | struct dentry *alias; | |
7117 | struct inode *dir = d_inode(parent); | |
7118 | @@ -1490,7 +1490,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, | |
7119 | struct file *file, unsigned open_flags, | |
7120 | umode_t mode, int *opened) | |
7121 | { | |
7122 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
7123 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
7124 | struct nfs_open_context *ctx; | |
7125 | struct dentry *res; | |
7126 | struct iattr attr = { .ia_valid = ATTR_OPEN }; | |
7127 | @@ -1805,7 +1805,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |
7128 | ||
7129 | trace_nfs_rmdir_enter(dir, dentry); | |
7130 | if (d_really_is_positive(dentry)) { | |
7131 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7132 | + down(&NFS_I(d_inode(dentry))->rmdir_sem); | |
7133 | +#else | |
7134 | down_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
7135 | +#endif | |
7136 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
7137 | /* Ensure the VFS deletes this inode */ | |
7138 | switch (error) { | |
7139 | @@ -1815,7 +1819,11 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |
7140 | case -ENOENT: | |
7141 | nfs_dentry_handle_enoent(dentry); | |
7142 | } | |
7143 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7144 | + up(&NFS_I(d_inode(dentry))->rmdir_sem); | |
7145 | +#else | |
7146 | up_write(&NFS_I(d_inode(dentry))->rmdir_sem); | |
7147 | +#endif | |
7148 | } else | |
7149 | error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); | |
7150 | trace_nfs_rmdir_exit(dir, dentry, error); | |
7151 | diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c | |
7152 | index bf4ec5ecc97e..36cd5fc9192c 100644 | |
7153 | --- a/fs/nfs/inode.c | |
7154 | +++ b/fs/nfs/inode.c | |
7155 | @@ -1957,7 +1957,11 @@ static void init_once(void *foo) | |
7156 | nfsi->nrequests = 0; | |
7157 | nfsi->commit_info.ncommit = 0; | |
7158 | atomic_set(&nfsi->commit_info.rpcs_out, 0); | |
7159 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7160 | + sema_init(&nfsi->rmdir_sem, 1); | |
7161 | +#else | |
7162 | init_rwsem(&nfsi->rmdir_sem); | |
7163 | +#endif | |
7164 | nfs4_init_once(nfsi); | |
7165 | } | |
7166 | ||
7167 | diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h | |
7168 | index 9bf64eacba5b..041da5cb80f5 100644 | |
7169 | --- a/fs/nfs/nfs4_fs.h | |
7170 | +++ b/fs/nfs/nfs4_fs.h | |
7171 | @@ -107,7 +107,7 @@ struct nfs4_state_owner { | |
7172 | unsigned long so_flags; | |
7173 | struct list_head so_states; | |
7174 | struct nfs_seqid_counter so_seqid; | |
7175 | - seqcount_t so_reclaim_seqcount; | |
7176 | + seqlock_t so_reclaim_seqlock; | |
7177 | struct mutex so_delegreturn_mutex; | |
7178 | }; | |
7179 | ||
7180 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c | |
7181 | index a9dec32ba9ba..49b64dfb307c 100644 | |
7182 | --- a/fs/nfs/nfs4proc.c | |
7183 | +++ b/fs/nfs/nfs4proc.c | |
7184 | @@ -2525,7 +2525,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |
7185 | unsigned int seq; | |
7186 | int ret; | |
7187 | ||
7188 | - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); | |
7189 | + seq = raw_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
7190 | ||
7191 | ret = _nfs4_proc_open(opendata); | |
7192 | if (ret != 0) | |
7193 | @@ -2561,7 +2561,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |
7194 | ctx->state = state; | |
7195 | if (d_inode(dentry) == state->inode) { | |
7196 | nfs_inode_attach_open_context(ctx); | |
7197 | - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) | |
7198 | + if (read_seqretry(&sp->so_reclaim_seqlock, seq)) | |
7199 | nfs4_schedule_stateid_recovery(server, state); | |
7200 | } | |
7201 | out: | |
7202 | diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c | |
7203 | index 8353f33f0466..657e13ed4b5d 100644 | |
7204 | --- a/fs/nfs/nfs4state.c | |
7205 | +++ b/fs/nfs/nfs4state.c | |
7206 | @@ -488,7 +488,7 @@ nfs4_alloc_state_owner(struct nfs_server *server, | |
7207 | nfs4_init_seqid_counter(&sp->so_seqid); | |
7208 | atomic_set(&sp->so_count, 1); | |
7209 | INIT_LIST_HEAD(&sp->so_lru); | |
7210 | - seqcount_init(&sp->so_reclaim_seqcount); | |
7211 | + seqlock_init(&sp->so_reclaim_seqlock); | |
7212 | mutex_init(&sp->so_delegreturn_mutex); | |
7213 | return sp; | |
7214 | } | |
7215 | @@ -1459,8 +1459,12 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs | |
7216 | * recovering after a network partition or a reboot from a | |
7217 | * server that doesn't support a grace period. | |
7218 | */ | |
7219 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7220 | + write_seqlock(&sp->so_reclaim_seqlock); | |
7221 | +#else | |
7222 | + write_seqcount_begin(&sp->so_reclaim_seqlock.seqcount); | |
7223 | +#endif | |
7224 | spin_lock(&sp->so_lock); | |
7225 | - raw_write_seqcount_begin(&sp->so_reclaim_seqcount); | |
7226 | restart: | |
7227 | list_for_each_entry(state, &sp->so_states, open_states) { | |
7228 | if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) | |
7229 | @@ -1528,14 +1532,20 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs | |
7230 | spin_lock(&sp->so_lock); | |
7231 | goto restart; | |
7232 | } | |
7233 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
7234 | spin_unlock(&sp->so_lock); | |
7235 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7236 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
7237 | +#else | |
7238 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
7239 | +#endif | |
7240 | return 0; | |
7241 | out_err: | |
7242 | nfs4_put_open_state(state); | |
7243 | - spin_lock(&sp->so_lock); | |
7244 | - raw_write_seqcount_end(&sp->so_reclaim_seqcount); | |
7245 | - spin_unlock(&sp->so_lock); | |
7246 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7247 | + write_sequnlock(&sp->so_reclaim_seqlock); | |
7248 | +#else | |
7249 | + write_seqcount_end(&sp->so_reclaim_seqlock.seqcount); | |
7250 | +#endif | |
7251 | return status; | |
7252 | } | |
7253 | ||
7254 | diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c | |
7255 | index 191aa577dd1f..58990c8f52e0 100644 | |
7256 | --- a/fs/nfs/unlink.c | |
7257 | +++ b/fs/nfs/unlink.c | |
7258 | @@ -12,7 +12,7 @@ | |
7259 | #include <linux/sunrpc/clnt.h> | |
7260 | #include <linux/nfs_fs.h> | |
7261 | #include <linux/sched.h> | |
7262 | -#include <linux/wait.h> | |
7263 | +#include <linux/swait.h> | |
7264 | #include <linux/namei.h> | |
7265 | #include <linux/fsnotify.h> | |
7266 | ||
7267 | @@ -51,6 +51,29 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) | |
7268 | rpc_restart_call_prepare(task); | |
7269 | } | |
7270 | ||
7271 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7272 | +static void nfs_down_anon(struct semaphore *sema) | |
7273 | +{ | |
7274 | + down(sema); | |
7275 | +} | |
7276 | + | |
7277 | +static void nfs_up_anon(struct semaphore *sema) | |
7278 | +{ | |
7279 | + up(sema); | |
7280 | +} | |
7281 | + | |
7282 | +#else | |
7283 | +static void nfs_down_anon(struct rw_semaphore *rwsem) | |
7284 | +{ | |
7285 | + down_read_non_owner(rwsem); | |
7286 | +} | |
7287 | + | |
7288 | +static void nfs_up_anon(struct rw_semaphore *rwsem) | |
7289 | +{ | |
7290 | + up_read_non_owner(rwsem); | |
7291 | +} | |
7292 | +#endif | |
7293 | + | |
7294 | /** | |
7295 | * nfs_async_unlink_release - Release the sillydelete data. | |
7296 | * @task: rpc_task of the sillydelete | |
7297 | @@ -64,7 +87,7 @@ static void nfs_async_unlink_release(void *calldata) | |
7298 | struct dentry *dentry = data->dentry; | |
7299 | struct super_block *sb = dentry->d_sb; | |
7300 | ||
7301 | - up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
7302 | + nfs_up_anon(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); | |
7303 | d_lookup_done(dentry); | |
7304 | nfs_free_unlinkdata(data); | |
7305 | dput(dentry); | |
7306 | @@ -117,10 +140,10 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
7307 | struct inode *dir = d_inode(dentry->d_parent); | |
7308 | struct dentry *alias; | |
7309 | ||
7310 | - down_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
7311 | + nfs_down_anon(&NFS_I(dir)->rmdir_sem); | |
7312 | alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); | |
7313 | if (IS_ERR(alias)) { | |
7314 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
7315 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
7316 | return 0; | |
7317 | } | |
7318 | if (!d_in_lookup(alias)) { | |
7319 | @@ -142,7 +165,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) | |
7320 | ret = 0; | |
7321 | spin_unlock(&alias->d_lock); | |
7322 | dput(alias); | |
7323 | - up_read_non_owner(&NFS_I(dir)->rmdir_sem); | |
7324 | + nfs_up_anon(&NFS_I(dir)->rmdir_sem); | |
7325 | /* | |
7326 | * If we'd displaced old cached devname, free it. At that | |
7327 | * point dentry is definitely not a root, so we won't need | |
7328 | @@ -182,7 +205,7 @@ nfs_async_unlink(struct dentry *dentry, const struct qstr *name) | |
7329 | goto out_free_name; | |
7330 | } | |
7331 | data->res.dir_attr = &data->dir_attr; | |
7332 | - init_waitqueue_head(&data->wq); | |
7333 | + init_swait_queue_head(&data->wq); | |
7334 | ||
7335 | status = -EBUSY; | |
7336 | spin_lock(&dentry->d_lock); | |
7337 | diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c | |
7338 | index fe251f187ff8..e89da4fb14c2 100644 | |
7339 | --- a/fs/ntfs/aops.c | |
7340 | +++ b/fs/ntfs/aops.c | |
7341 | @@ -92,13 +92,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
7342 | ofs = 0; | |
7343 | if (file_ofs < init_size) | |
7344 | ofs = init_size - file_ofs; | |
7345 | - local_irq_save(flags); | |
7346 | + local_irq_save_nort(flags); | |
7347 | kaddr = kmap_atomic(page); | |
7348 | memset(kaddr + bh_offset(bh) + ofs, 0, | |
7349 | bh->b_size - ofs); | |
7350 | flush_dcache_page(page); | |
7351 | kunmap_atomic(kaddr); | |
7352 | - local_irq_restore(flags); | |
7353 | + local_irq_restore_nort(flags); | |
7354 | } | |
7355 | } else { | |
7356 | clear_buffer_uptodate(bh); | |
7357 | @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
7358 | "0x%llx.", (unsigned long long)bh->b_blocknr); | |
7359 | } | |
7360 | first = page_buffers(page); | |
7361 | - local_irq_save(flags); | |
7362 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
7363 | + flags = bh_uptodate_lock_irqsave(first); | |
7364 | clear_buffer_async_read(bh); | |
7365 | unlock_buffer(bh); | |
7366 | tmp = bh; | |
7367 | @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
7368 | } | |
7369 | tmp = tmp->b_this_page; | |
7370 | } while (tmp != bh); | |
7371 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
7372 | - local_irq_restore(flags); | |
7373 | + bh_uptodate_unlock_irqrestore(first, flags); | |
7374 | /* | |
7375 | * If none of the buffers had errors then we can set the page uptodate, | |
7376 | * but we first have to perform the post read mst fixups, if the | |
7377 | @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
7378 | recs = PAGE_SIZE / rec_size; | |
7379 | /* Should have been verified before we got here... */ | |
7380 | BUG_ON(!recs); | |
7381 | - local_irq_save(flags); | |
7382 | + local_irq_save_nort(flags); | |
7383 | kaddr = kmap_atomic(page); | |
7384 | for (i = 0; i < recs; i++) | |
7385 | post_read_mst_fixup((NTFS_RECORD*)(kaddr + | |
7386 | i * rec_size), rec_size); | |
7387 | kunmap_atomic(kaddr); | |
7388 | - local_irq_restore(flags); | |
7389 | + local_irq_restore_nort(flags); | |
7390 | flush_dcache_page(page); | |
7391 | if (likely(page_uptodate && !PageError(page))) | |
7392 | SetPageUptodate(page); | |
7393 | @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
7394 | unlock_page(page); | |
7395 | return; | |
7396 | still_busy: | |
7397 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
7398 | - local_irq_restore(flags); | |
7399 | - return; | |
7400 | + bh_uptodate_unlock_irqrestore(first, flags); | |
7401 | } | |
7402 | ||
7403 | /** | |
7404 | diff --git a/fs/proc/base.c b/fs/proc/base.c | |
7405 | index ac0df4dde823..ad1a4723ffdd 100644 | |
7406 | --- a/fs/proc/base.c | |
7407 | +++ b/fs/proc/base.c | |
7408 | @@ -1819,7 +1819,7 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, | |
7409 | ||
7410 | child = d_hash_and_lookup(dir, &qname); | |
7411 | if (!child) { | |
7412 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
7413 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
7414 | child = d_alloc_parallel(dir, &qname, &wq); | |
7415 | if (IS_ERR(child)) | |
7416 | goto end_instantiate; | |
7417 | diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c | |
7418 | index 1b93650dda2f..c553bf3ea541 100644 | |
7419 | --- a/fs/proc/proc_sysctl.c | |
7420 | +++ b/fs/proc/proc_sysctl.c | |
7421 | @@ -627,7 +627,7 @@ static bool proc_sys_fill_cache(struct file *file, | |
7422 | ||
7423 | child = d_lookup(dir, &qname); | |
7424 | if (!child) { | |
7425 | - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); | |
7426 | + DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(wq); | |
7427 | child = d_alloc_parallel(dir, &qname, &wq); | |
7428 | if (IS_ERR(child)) | |
7429 | return false; | |
7430 | diff --git a/fs/timerfd.c b/fs/timerfd.c | |
7431 | index 9ae4abb4110b..8644b67c48fd 100644 | |
7432 | --- a/fs/timerfd.c | |
7433 | +++ b/fs/timerfd.c | |
7434 | @@ -460,7 +460,10 @@ static int do_timerfd_settime(int ufd, int flags, | |
7435 | break; | |
7436 | } | |
7437 | spin_unlock_irq(&ctx->wqh.lock); | |
7438 | - cpu_relax(); | |
7439 | + if (isalarm(ctx)) | |
7440 | + hrtimer_wait_for_timer(&ctx->t.alarm.timer); | |
7441 | + else | |
7442 | + hrtimer_wait_for_timer(&ctx->t.tmr); | |
7443 | } | |
7444 | ||
7445 | /* | |
7446 | diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h | |
7447 | index 93b61b1f2beb..58270adb46ce 100644 | |
7448 | --- a/include/acpi/platform/aclinux.h | |
7449 | +++ b/include/acpi/platform/aclinux.h | |
7450 | @@ -131,6 +131,7 @@ | |
7451 | ||
7452 | #define acpi_cache_t struct kmem_cache | |
7453 | #define acpi_spinlock spinlock_t * | |
7454 | +#define acpi_raw_spinlock raw_spinlock_t * | |
7455 | #define acpi_cpu_flags unsigned long | |
7456 | ||
7457 | /* Use native linux version of acpi_os_allocate_zeroed */ | |
7458 | @@ -149,6 +150,20 @@ | |
7459 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id | |
7460 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock | |
7461 | ||
7462 | +#define acpi_os_create_raw_lock(__handle) \ | |
7463 | +({ \ | |
7464 | + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ | |
7465 | + \ | |
7466 | + if (lock) { \ | |
7467 | + *(__handle) = lock; \ | |
7468 | + raw_spin_lock_init(*(__handle)); \ | |
7469 | + } \ | |
7470 | + lock ? AE_OK : AE_NO_MEMORY; \ | |
7471 | + }) | |
7472 | + | |
7473 | +#define acpi_os_delete_raw_lock(__handle) kfree(__handle) | |
7474 | + | |
7475 | + | |
7476 | /* | |
7477 | * OSL interfaces used by debugger/disassembler | |
7478 | */ | |
7479 | diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h | |
7480 | index 6f96247226a4..fa53a21263c2 100644 | |
7481 | --- a/include/asm-generic/bug.h | |
7482 | +++ b/include/asm-generic/bug.h | |
7483 | @@ -215,6 +215,20 @@ void __warn(const char *file, int line, void *caller, unsigned taint, | |
7484 | # define WARN_ON_SMP(x) ({0;}) | |
7485 | #endif | |
7486 | ||
7487 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7488 | +# define BUG_ON_RT(c) BUG_ON(c) | |
7489 | +# define BUG_ON_NONRT(c) do { } while (0) | |
7490 | +# define WARN_ON_RT(condition) WARN_ON(condition) | |
7491 | +# define WARN_ON_NONRT(condition) do { } while (0) | |
7492 | +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) | |
7493 | +#else | |
7494 | +# define BUG_ON_RT(c) do { } while (0) | |
7495 | +# define BUG_ON_NONRT(c) BUG_ON(c) | |
7496 | +# define WARN_ON_RT(condition) do { } while (0) | |
7497 | +# define WARN_ON_NONRT(condition) WARN_ON(condition) | |
7498 | +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) | |
7499 | +#endif | |
7500 | + | |
7501 | #endif /* __ASSEMBLY__ */ | |
7502 | ||
7503 | #endif | |
7504 | diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h | |
7505 | index e43bbffb5b7a..c23892264109 100644 | |
7506 | --- a/include/linux/blk-mq.h | |
7507 | +++ b/include/linux/blk-mq.h | |
7508 | @@ -222,6 +222,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) | |
7509 | ||
7510 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); | |
7511 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); | |
7512 | +void __blk_mq_complete_request_remote_work(struct work_struct *work); | |
7513 | ||
7514 | int blk_mq_request_started(struct request *rq); | |
7515 | void blk_mq_start_request(struct request *rq); | |
7516 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h | |
7517 | index e79055c8b577..8583c1af14ad 100644 | |
7518 | --- a/include/linux/blkdev.h | |
7519 | +++ b/include/linux/blkdev.h | |
7520 | @@ -89,6 +89,7 @@ struct request { | |
7521 | struct list_head queuelist; | |
7522 | union { | |
7523 | struct call_single_data csd; | |
7524 | + struct work_struct work; | |
7525 | u64 fifo_time; | |
7526 | }; | |
7527 | ||
7528 | @@ -467,7 +468,7 @@ struct request_queue { | |
7529 | struct throtl_data *td; | |
7530 | #endif | |
7531 | struct rcu_head rcu_head; | |
7532 | - wait_queue_head_t mq_freeze_wq; | |
7533 | + struct swait_queue_head mq_freeze_wq; | |
7534 | struct percpu_ref q_usage_counter; | |
7535 | struct list_head all_q_node; | |
7536 | ||
7537 | diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h | |
7538 | index 8fdcb783197d..d07dbeec7bc1 100644 | |
7539 | --- a/include/linux/bottom_half.h | |
7540 | +++ b/include/linux/bottom_half.h | |
7541 | @@ -3,6 +3,39 @@ | |
7542 | ||
7543 | #include <linux/preempt.h> | |
7544 | ||
7545 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7546 | + | |
7547 | +extern void __local_bh_disable(void); | |
7548 | +extern void _local_bh_enable(void); | |
7549 | +extern void __local_bh_enable(void); | |
7550 | + | |
7551 | +static inline void local_bh_disable(void) | |
7552 | +{ | |
7553 | + __local_bh_disable(); | |
7554 | +} | |
7555 | + | |
7556 | +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |
7557 | +{ | |
7558 | + __local_bh_disable(); | |
7559 | +} | |
7560 | + | |
7561 | +static inline void local_bh_enable(void) | |
7562 | +{ | |
7563 | + __local_bh_enable(); | |
7564 | +} | |
7565 | + | |
7566 | +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) | |
7567 | +{ | |
7568 | + __local_bh_enable(); | |
7569 | +} | |
7570 | + | |
7571 | +static inline void local_bh_enable_ip(unsigned long ip) | |
7572 | +{ | |
7573 | + __local_bh_enable(); | |
7574 | +} | |
7575 | + | |
7576 | +#else | |
7577 | + | |
7578 | #ifdef CONFIG_TRACE_IRQFLAGS | |
7579 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); | |
7580 | #else | |
7581 | @@ -30,5 +63,6 @@ static inline void local_bh_enable(void) | |
7582 | { | |
7583 | __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); | |
7584 | } | |
7585 | +#endif | |
7586 | ||
7587 | #endif /* _LINUX_BH_H */ | |
7588 | diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h | |
7589 | index ebbacd14d450..be5e87f6360a 100644 | |
7590 | --- a/include/linux/buffer_head.h | |
7591 | +++ b/include/linux/buffer_head.h | |
7592 | @@ -75,8 +75,50 @@ struct buffer_head { | |
7593 | struct address_space *b_assoc_map; /* mapping this buffer is | |
7594 | associated with */ | |
7595 | atomic_t b_count; /* users using this buffer_head */ | |
7596 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7597 | + spinlock_t b_uptodate_lock; | |
7598 | +#if IS_ENABLED(CONFIG_JBD2) | |
7599 | + spinlock_t b_state_lock; | |
7600 | + spinlock_t b_journal_head_lock; | |
7601 | +#endif | |
7602 | +#endif | |
7603 | }; | |
7604 | ||
7605 | +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) | |
7606 | +{ | |
7607 | + unsigned long flags; | |
7608 | + | |
7609 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7610 | + local_irq_save(flags); | |
7611 | + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); | |
7612 | +#else | |
7613 | + spin_lock_irqsave(&bh->b_uptodate_lock, flags); | |
7614 | +#endif | |
7615 | + return flags; | |
7616 | +} | |
7617 | + | |
7618 | +static inline void | |
7619 | +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) | |
7620 | +{ | |
7621 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
7622 | + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); | |
7623 | + local_irq_restore(flags); | |
7624 | +#else | |
7625 | + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); | |
7626 | +#endif | |
7627 | +} | |
7628 | + | |
7629 | +static inline void buffer_head_init_locks(struct buffer_head *bh) | |
7630 | +{ | |
7631 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7632 | + spin_lock_init(&bh->b_uptodate_lock); | |
7633 | +#if IS_ENABLED(CONFIG_JBD2) | |
7634 | + spin_lock_init(&bh->b_state_lock); | |
7635 | + spin_lock_init(&bh->b_journal_head_lock); | |
7636 | +#endif | |
7637 | +#endif | |
7638 | +} | |
7639 | + | |
7640 | /* | |
7641 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() | |
7642 | * and buffer_foo() functions. | |
7643 | diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h | |
7644 | index 5b17de62c962..56027cc01a56 100644 | |
7645 | --- a/include/linux/cgroup-defs.h | |
7646 | +++ b/include/linux/cgroup-defs.h | |
7647 | @@ -16,6 +16,7 @@ | |
7648 | #include <linux/percpu-refcount.h> | |
7649 | #include <linux/percpu-rwsem.h> | |
7650 | #include <linux/workqueue.h> | |
7651 | +#include <linux/swork.h> | |
7652 | ||
7653 | #ifdef CONFIG_CGROUPS | |
7654 | ||
7655 | @@ -137,6 +138,7 @@ struct cgroup_subsys_state { | |
7656 | /* percpu_ref killing and RCU release */ | |
7657 | struct rcu_head rcu_head; | |
7658 | struct work_struct destroy_work; | |
7659 | + struct swork_event destroy_swork; | |
7660 | }; | |
7661 | ||
7662 | /* | |
7663 | diff --git a/include/linux/completion.h b/include/linux/completion.h | |
7664 | index 5d5aaae3af43..3bca1590e29f 100644 | |
7665 | --- a/include/linux/completion.h | |
7666 | +++ b/include/linux/completion.h | |
7667 | @@ -7,8 +7,7 @@ | |
7668 | * Atomic wait-for-completion handler data structures. | |
7669 | * See kernel/sched/completion.c for details. | |
7670 | */ | |
7671 | - | |
7672 | -#include <linux/wait.h> | |
7673 | +#include <linux/swait.h> | |
7674 | ||
7675 | /* | |
7676 | * struct completion - structure used to maintain state for a "completion" | |
7677 | @@ -24,11 +23,11 @@ | |
7678 | */ | |
7679 | struct completion { | |
7680 | unsigned int done; | |
7681 | - wait_queue_head_t wait; | |
7682 | + struct swait_queue_head wait; | |
7683 | }; | |
7684 | ||
7685 | #define COMPLETION_INITIALIZER(work) \ | |
7686 | - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
7687 | + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
7688 | ||
7689 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | |
7690 | ({ init_completion(&work); work; }) | |
7691 | @@ -73,7 +72,7 @@ struct completion { | |
7692 | static inline void init_completion(struct completion *x) | |
7693 | { | |
7694 | x->done = 0; | |
7695 | - init_waitqueue_head(&x->wait); | |
7696 | + init_swait_queue_head(&x->wait); | |
7697 | } | |
7698 | ||
7699 | /** | |
7700 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h | |
7701 | index 797d9c8e9a1b..6eabd9e8a98b 100644 | |
7702 | --- a/include/linux/cpu.h | |
7703 | +++ b/include/linux/cpu.h | |
7704 | @@ -201,6 +201,8 @@ extern void get_online_cpus(void); | |
7705 | extern void put_online_cpus(void); | |
7706 | extern void cpu_hotplug_disable(void); | |
7707 | extern void cpu_hotplug_enable(void); | |
7708 | +extern void pin_current_cpu(void); | |
7709 | +extern void unpin_current_cpu(void); | |
7710 | #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) | |
7711 | #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) | |
7712 | #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) | |
7713 | @@ -218,6 +220,8 @@ static inline void cpu_hotplug_done(void) {} | |
7714 | #define put_online_cpus() do { } while (0) | |
7715 | #define cpu_hotplug_disable() do { } while (0) | |
7716 | #define cpu_hotplug_enable() do { } while (0) | |
7717 | +static inline void pin_current_cpu(void) { } | |
7718 | +static inline void unpin_current_cpu(void) { } | |
7719 | #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
7720 | #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
7721 | /* These aren't inline functions due to a GCC bug. */ | |
7722 | diff --git a/include/linux/dcache.h b/include/linux/dcache.h | |
7723 | index 5ff3e9a4fe5f..ed0431599fd7 100644 | |
7724 | --- a/include/linux/dcache.h | |
7725 | +++ b/include/linux/dcache.h | |
7726 | @@ -11,6 +11,7 @@ | |
7727 | #include <linux/rcupdate.h> | |
7728 | #include <linux/lockref.h> | |
7729 | #include <linux/stringhash.h> | |
7730 | +#include <linux/wait.h> | |
7731 | ||
7732 | struct path; | |
7733 | struct vfsmount; | |
7734 | @@ -100,7 +101,7 @@ struct dentry { | |
7735 | ||
7736 | union { | |
7737 | struct list_head d_lru; /* LRU list */ | |
7738 | - wait_queue_head_t *d_wait; /* in-lookup ones only */ | |
7739 | + struct swait_queue_head *d_wait; /* in-lookup ones only */ | |
7740 | }; | |
7741 | struct list_head d_child; /* child of parent list */ | |
7742 | struct list_head d_subdirs; /* our children */ | |
7743 | @@ -230,7 +231,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op | |
7744 | extern struct dentry * d_alloc(struct dentry *, const struct qstr *); | |
7745 | extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); | |
7746 | extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, | |
7747 | - wait_queue_head_t *); | |
7748 | + struct swait_queue_head *); | |
7749 | extern struct dentry * d_splice_alias(struct inode *, struct dentry *); | |
7750 | extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); | |
7751 | extern struct dentry * d_exact_alias(struct dentry *, struct inode *); | |
7752 | diff --git a/include/linux/delay.h b/include/linux/delay.h | |
7753 | index a6ecb34cf547..37caab306336 100644 | |
7754 | --- a/include/linux/delay.h | |
7755 | +++ b/include/linux/delay.h | |
7756 | @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds) | |
7757 | msleep(seconds * 1000); | |
7758 | } | |
7759 | ||
7760 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7761 | +extern void cpu_chill(void); | |
7762 | +#else | |
7763 | +# define cpu_chill() cpu_relax() | |
7764 | +#endif | |
7765 | + | |
7766 | #endif /* defined(_LINUX_DELAY_H) */ | |
7767 | diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h | |
7768 | index 7d565afe35d2..8e31b4d245d2 100644 | |
7769 | --- a/include/linux/ftrace.h | |
7770 | +++ b/include/linux/ftrace.h | |
7771 | @@ -714,6 +714,7 @@ static inline void __ftrace_enabled_restore(int enabled) | |
7772 | #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) | |
7773 | #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) | |
7774 | ||
7775 | +#ifdef CONFIG_USING_GET_LOCK_PARENT_IP | |
7776 | static inline unsigned long get_lock_parent_ip(void) | |
7777 | { | |
7778 | unsigned long addr = CALLER_ADDR0; | |
7779 | @@ -725,6 +726,7 @@ static inline unsigned long get_lock_parent_ip(void) | |
7780 | return addr; | |
7781 | return CALLER_ADDR2; | |
7782 | } | |
7783 | +#endif | |
7784 | ||
7785 | #ifdef CONFIG_IRQSOFF_TRACER | |
7786 | extern void time_hardirqs_on(unsigned long a0, unsigned long a1); | |
7787 | diff --git a/include/linux/highmem.h b/include/linux/highmem.h | |
7788 | index bb3f3297062a..a117a33ef72c 100644 | |
7789 | --- a/include/linux/highmem.h | |
7790 | +++ b/include/linux/highmem.h | |
7791 | @@ -7,6 +7,7 @@ | |
7792 | #include <linux/mm.h> | |
7793 | #include <linux/uaccess.h> | |
7794 | #include <linux/hardirq.h> | |
7795 | +#include <linux/sched.h> | |
7796 | ||
7797 | #include <asm/cacheflush.h> | |
7798 | ||
7799 | @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page) | |
7800 | ||
7801 | static inline void *kmap_atomic(struct page *page) | |
7802 | { | |
7803 | - preempt_disable(); | |
7804 | + preempt_disable_nort(); | |
7805 | pagefault_disable(); | |
7806 | return page_address(page); | |
7807 | } | |
7808 | @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page) | |
7809 | static inline void __kunmap_atomic(void *addr) | |
7810 | { | |
7811 | pagefault_enable(); | |
7812 | - preempt_enable(); | |
7813 | + preempt_enable_nort(); | |
7814 | } | |
7815 | ||
7816 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) | |
7817 | @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr) | |
7818 | ||
7819 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
7820 | ||
7821 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
7822 | DECLARE_PER_CPU(int, __kmap_atomic_idx); | |
7823 | +#endif | |
7824 | ||
7825 | static inline int kmap_atomic_idx_push(void) | |
7826 | { | |
7827 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
7828 | int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; | |
7829 | ||
7830 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
7831 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
7832 | WARN_ON_ONCE(in_irq() && !irqs_disabled()); | |
7833 | BUG_ON(idx >= KM_TYPE_NR); | |
7834 | -#endif | |
7835 | +# endif | |
7836 | return idx; | |
7837 | +#else | |
7838 | + current->kmap_idx++; | |
7839 | + BUG_ON(current->kmap_idx > KM_TYPE_NR); | |
7840 | + return current->kmap_idx - 1; | |
7841 | +#endif | |
7842 | } | |
7843 | ||
7844 | static inline int kmap_atomic_idx(void) | |
7845 | { | |
7846 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
7847 | return __this_cpu_read(__kmap_atomic_idx) - 1; | |
7848 | +#else | |
7849 | + return current->kmap_idx - 1; | |
7850 | +#endif | |
7851 | } | |
7852 | ||
7853 | static inline void kmap_atomic_idx_pop(void) | |
7854 | { | |
7855 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
7856 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
7857 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
7858 | int idx = __this_cpu_dec_return(__kmap_atomic_idx); | |
7859 | ||
7860 | BUG_ON(idx < 0); | |
7861 | -#else | |
7862 | +# else | |
7863 | __this_cpu_dec(__kmap_atomic_idx); | |
7864 | +# endif | |
7865 | +#else | |
7866 | + current->kmap_idx--; | |
7867 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
7868 | + BUG_ON(current->kmap_idx < 0); | |
7869 | +# endif | |
7870 | #endif | |
7871 | } | |
7872 | ||
7873 | diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h | |
7874 | index 5e00f80b1535..65d0671f20b4 100644 | |
7875 | --- a/include/linux/hrtimer.h | |
7876 | +++ b/include/linux/hrtimer.h | |
7877 | @@ -87,6 +87,9 @@ enum hrtimer_restart { | |
7878 | * @function: timer expiry callback function | |
7879 | * @base: pointer to the timer base (per cpu and per clock) | |
7880 | * @state: state information (See bit values above) | |
7881 | + * @cb_entry: list entry to defer timers from hardirq context | |
7882 | + * @irqsafe: timer can run in hardirq context | |
7883 | + * @praecox: timer expiry time if expired at the time of programming | |
7884 | * @is_rel: Set if the timer was armed relative | |
7885 | * @start_pid: timer statistics field to store the pid of the task which | |
7886 | * started the timer | |
7887 | @@ -103,6 +106,11 @@ struct hrtimer { | |
7888 | enum hrtimer_restart (*function)(struct hrtimer *); | |
7889 | struct hrtimer_clock_base *base; | |
7890 | u8 state; | |
7891 | + struct list_head cb_entry; | |
7892 | + int irqsafe; | |
7893 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
7894 | + ktime_t praecox; | |
7895 | +#endif | |
7896 | u8 is_rel; | |
7897 | #ifdef CONFIG_TIMER_STATS | |
7898 | int start_pid; | |
7899 | @@ -123,11 +131,7 @@ struct hrtimer_sleeper { | |
7900 | struct task_struct *task; | |
7901 | }; | |
7902 | ||
7903 | -#ifdef CONFIG_64BIT | |
7904 | # define HRTIMER_CLOCK_BASE_ALIGN 64 | |
7905 | -#else | |
7906 | -# define HRTIMER_CLOCK_BASE_ALIGN 32 | |
7907 | -#endif | |
7908 | ||
7909 | /** | |
7910 | * struct hrtimer_clock_base - the timer base for a specific clock | |
7911 | @@ -136,6 +140,7 @@ struct hrtimer_sleeper { | |
7912 | * timer to a base on another cpu. | |
7913 | * @clockid: clock id for per_cpu support | |
7914 | * @active: red black tree root node for the active timers | |
7915 | + * @expired: list head for deferred timers. | |
7916 | * @get_time: function to retrieve the current time of the clock | |
7917 | * @offset: offset of this clock to the monotonic base | |
7918 | */ | |
7919 | @@ -144,6 +149,7 @@ struct hrtimer_clock_base { | |
7920 | int index; | |
7921 | clockid_t clockid; | |
7922 | struct timerqueue_head active; | |
7923 | + struct list_head expired; | |
7924 | ktime_t (*get_time)(void); | |
7925 | ktime_t offset; | |
7926 | } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); | |
7927 | @@ -187,6 +193,7 @@ struct hrtimer_cpu_base { | |
7928 | raw_spinlock_t lock; | |
7929 | seqcount_t seq; | |
7930 | struct hrtimer *running; | |
7931 | + struct hrtimer *running_soft; | |
7932 | unsigned int cpu; | |
7933 | unsigned int active_bases; | |
7934 | unsigned int clock_was_set_seq; | |
7935 | @@ -203,6 +210,9 @@ struct hrtimer_cpu_base { | |
7936 | unsigned int nr_hangs; | |
7937 | unsigned int max_hang_time; | |
7938 | #endif | |
7939 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7940 | + wait_queue_head_t wait; | |
7941 | +#endif | |
7942 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | |
7943 | } ____cacheline_aligned; | |
7944 | ||
7945 | @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer) | |
7946 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | |
7947 | } | |
7948 | ||
7949 | +/* Softirq preemption could deadlock timer removal */ | |
7950 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7951 | + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); | |
7952 | +#else | |
7953 | +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) | |
7954 | +#endif | |
7955 | + | |
7956 | /* Query timers: */ | |
7957 | extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); | |
7958 | ||
7959 | @@ -436,7 +453,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) | |
7960 | * Helper function to check, whether the timer is running the callback | |
7961 | * function | |
7962 | */ | |
7963 | -static inline int hrtimer_callback_running(struct hrtimer *timer) | |
7964 | +static inline int hrtimer_callback_running(const struct hrtimer *timer) | |
7965 | { | |
7966 | return timer->base->cpu_base->running == timer; | |
7967 | } | |
7968 | diff --git a/include/linux/idr.h b/include/linux/idr.h | |
7969 | index 083d61e92706..5899796f50cb 100644 | |
7970 | --- a/include/linux/idr.h | |
7971 | +++ b/include/linux/idr.h | |
7972 | @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp); | |
7973 | * Each idr_preload() should be matched with an invocation of this | |
7974 | * function. See idr_preload() for details. | |
7975 | */ | |
7976 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
7977 | +void idr_preload_end(void); | |
7978 | +#else | |
7979 | static inline void idr_preload_end(void) | |
7980 | { | |
7981 | preempt_enable(); | |
7982 | } | |
7983 | +#endif | |
7984 | ||
7985 | /** | |
7986 | * idr_find - return pointer for given id | |
7987 | diff --git a/include/linux/init_task.h b/include/linux/init_task.h | |
7988 | index f8834f820ec2..a688d5e19578 100644 | |
7989 | --- a/include/linux/init_task.h | |
7990 | +++ b/include/linux/init_task.h | |
7991 | @@ -148,6 +148,12 @@ extern struct task_group root_task_group; | |
7992 | # define INIT_PERF_EVENTS(tsk) | |
7993 | #endif | |
7994 | ||
7995 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
7996 | +# define INIT_TIMER_LIST .posix_timer_list = NULL, | |
7997 | +#else | |
7998 | +# define INIT_TIMER_LIST | |
7999 | +#endif | |
8000 | + | |
8001 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
8002 | # define INIT_VTIME(tsk) \ | |
8003 | .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ | |
8004 | @@ -239,6 +245,7 @@ extern struct task_group root_task_group; | |
8005 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ | |
8006 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ | |
8007 | .timer_slack_ns = 50000, /* 50 usec default slack */ \ | |
8008 | + INIT_TIMER_LIST \ | |
8009 | .pids = { \ | |
8010 | [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ | |
8011 | [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ | |
8012 | diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h | |
8013 | index b6683f0ffc9f..c0a351daf736 100644 | |
8014 | --- a/include/linux/interrupt.h | |
8015 | +++ b/include/linux/interrupt.h | |
8016 | @@ -14,6 +14,7 @@ | |
8017 | #include <linux/hrtimer.h> | |
8018 | #include <linux/kref.h> | |
8019 | #include <linux/workqueue.h> | |
8020 | +#include <linux/swork.h> | |
8021 | ||
8022 | #include <linux/atomic.h> | |
8023 | #include <asm/ptrace.h> | |
8024 | @@ -61,6 +62,7 @@ | |
8025 | * interrupt handler after suspending interrupts. For system | |
8026 | * wakeup devices users need to implement wakeup detection in | |
8027 | * their interrupt handlers. | |
8028 | + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) | |
8029 | */ | |
8030 | #define IRQF_SHARED 0x00000080 | |
8031 | #define IRQF_PROBE_SHARED 0x00000100 | |
8032 | @@ -74,6 +76,7 @@ | |
8033 | #define IRQF_NO_THREAD 0x00010000 | |
8034 | #define IRQF_EARLY_RESUME 0x00020000 | |
8035 | #define IRQF_COND_SUSPEND 0x00040000 | |
8036 | +#define IRQF_NO_SOFTIRQ_CALL 0x00080000 | |
8037 | ||
8038 | #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) | |
8039 | ||
8040 | @@ -196,7 +199,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); | |
8041 | #ifdef CONFIG_LOCKDEP | |
8042 | # define local_irq_enable_in_hardirq() do { } while (0) | |
8043 | #else | |
8044 | -# define local_irq_enable_in_hardirq() local_irq_enable() | |
8045 | +# define local_irq_enable_in_hardirq() local_irq_enable_nort() | |
8046 | #endif | |
8047 | ||
8048 | extern void disable_irq_nosync(unsigned int irq); | |
8049 | @@ -216,6 +219,7 @@ extern void resume_device_irqs(void); | |
8050 | * struct irq_affinity_notify - context for notification of IRQ affinity changes | |
8051 | * @irq: Interrupt to which notification applies | |
8052 | * @kref: Reference count, for internal use | |
8053 | + * @swork: Swork item, for internal use | |
8054 | * @work: Work item, for internal use | |
8055 | * @notify: Function to be called on change. This will be | |
8056 | * called in process context. | |
8057 | @@ -227,7 +231,11 @@ extern void resume_device_irqs(void); | |
8058 | struct irq_affinity_notify { | |
8059 | unsigned int irq; | |
8060 | struct kref kref; | |
8061 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8062 | + struct swork_event swork; | |
8063 | +#else | |
8064 | struct work_struct work; | |
8065 | +#endif | |
8066 | void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); | |
8067 | void (*release)(struct kref *ref); | |
8068 | }; | |
8069 | @@ -398,9 +406,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
8070 | bool state); | |
8071 | ||
8072 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
8073 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
8074 | extern bool force_irqthreads; | |
8075 | +# else | |
8076 | +# define force_irqthreads (true) | |
8077 | +# endif | |
8078 | #else | |
8079 | -#define force_irqthreads (0) | |
8080 | +#define force_irqthreads (false) | |
8081 | #endif | |
8082 | ||
8083 | #ifndef __ARCH_SET_SOFTIRQ_PENDING | |
8084 | @@ -457,9 +469,10 @@ struct softirq_action | |
8085 | void (*action)(struct softirq_action *); | |
8086 | }; | |
8087 | ||
8088 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8089 | asmlinkage void do_softirq(void); | |
8090 | asmlinkage void __do_softirq(void); | |
8091 | - | |
8092 | +static inline void thread_do_softirq(void) { do_softirq(); } | |
8093 | #ifdef __ARCH_HAS_DO_SOFTIRQ | |
8094 | void do_softirq_own_stack(void); | |
8095 | #else | |
8096 | @@ -468,13 +481,25 @@ static inline void do_softirq_own_stack(void) | |
8097 | __do_softirq(); | |
8098 | } | |
8099 | #endif | |
8100 | +#else | |
8101 | +extern void thread_do_softirq(void); | |
8102 | +#endif | |
8103 | ||
8104 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | |
8105 | extern void softirq_init(void); | |
8106 | extern void __raise_softirq_irqoff(unsigned int nr); | |
8107 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8108 | +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); | |
8109 | +#else | |
8110 | +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
8111 | +{ | |
8112 | + __raise_softirq_irqoff(nr); | |
8113 | +} | |
8114 | +#endif | |
8115 | ||
8116 | extern void raise_softirq_irqoff(unsigned int nr); | |
8117 | extern void raise_softirq(unsigned int nr); | |
8118 | +extern void softirq_check_pending_idle(void); | |
8119 | ||
8120 | DECLARE_PER_CPU(struct task_struct *, ksoftirqd); | |
8121 | ||
8122 | @@ -496,8 +521,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) | |
8123 | to be executed on some cpu at least once after this. | |
8124 | * If the tasklet is already scheduled, but its execution is still not | |
8125 | started, it will be executed only once. | |
8126 | - * If this tasklet is already running on another CPU (or schedule is called | |
8127 | - from tasklet itself), it is rescheduled for later. | |
8128 | + * If this tasklet is already running on another CPU, it is rescheduled | |
8129 | + for later. | |
8130 | + * Schedule must not be called from the tasklet itself (a lockup occurs) | |
8131 | * Tasklet is strictly serialized wrt itself, but not | |
8132 | wrt another tasklets. If client needs some intertask synchronization, | |
8133 | he makes it with spinlocks. | |
8134 | @@ -522,27 +548,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } | |
8135 | enum | |
8136 | { | |
8137 | TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ | |
8138 | - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ | |
8139 | + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ | |
8140 | + TASKLET_STATE_PENDING /* Tasklet is pending */ | |
8141 | }; | |
8142 | ||
8143 | -#ifdef CONFIG_SMP | |
8144 | +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) | |
8145 | +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) | |
8146 | +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) | |
8147 | + | |
8148 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
8149 | static inline int tasklet_trylock(struct tasklet_struct *t) | |
8150 | { | |
8151 | return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); | |
8152 | } | |
8153 | ||
8154 | +static inline int tasklet_tryunlock(struct tasklet_struct *t) | |
8155 | +{ | |
8156 | + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; | |
8157 | +} | |
8158 | + | |
8159 | static inline void tasklet_unlock(struct tasklet_struct *t) | |
8160 | { | |
8161 | smp_mb__before_atomic(); | |
8162 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | |
8163 | } | |
8164 | ||
8165 | -static inline void tasklet_unlock_wait(struct tasklet_struct *t) | |
8166 | -{ | |
8167 | - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } | |
8168 | -} | |
8169 | +extern void tasklet_unlock_wait(struct tasklet_struct *t); | |
8170 | + | |
8171 | #else | |
8172 | #define tasklet_trylock(t) 1 | |
8173 | +#define tasklet_tryunlock(t) 1 | |
8174 | #define tasklet_unlock_wait(t) do { } while (0) | |
8175 | #define tasklet_unlock(t) do { } while (0) | |
8176 | #endif | |
8177 | @@ -591,12 +626,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) | |
8178 | smp_mb(); | |
8179 | } | |
8180 | ||
8181 | -static inline void tasklet_enable(struct tasklet_struct *t) | |
8182 | -{ | |
8183 | - smp_mb__before_atomic(); | |
8184 | - atomic_dec(&t->count); | |
8185 | -} | |
8186 | - | |
8187 | +extern void tasklet_enable(struct tasklet_struct *t); | |
8188 | extern void tasklet_kill(struct tasklet_struct *t); | |
8189 | extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); | |
8190 | extern void tasklet_init(struct tasklet_struct *t, | |
8191 | @@ -627,6 +657,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) | |
8192 | tasklet_kill(&ttimer->tasklet); | |
8193 | } | |
8194 | ||
8195 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8196 | +extern void softirq_early_init(void); | |
8197 | +#else | |
8198 | +static inline void softirq_early_init(void) { } | |
8199 | +#endif | |
8200 | + | |
8201 | /* | |
8202 | * Autoprobing for irqs: | |
8203 | * | |
8204 | diff --git a/include/linux/irq.h b/include/linux/irq.h | |
8205 | index 0ac26c892fe2..ede85f106aef 100644 | |
8206 | --- a/include/linux/irq.h | |
8207 | +++ b/include/linux/irq.h | |
8208 | @@ -72,6 +72,7 @@ enum irqchip_irq_state; | |
8209 | * IRQ_IS_POLLED - Always polled by another interrupt. Exclude | |
8210 | * it from the spurious interrupt detection | |
8211 | * mechanism and from core side polling. | |
8212 | + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) | |
8213 | * IRQ_DISABLE_UNLAZY - Disable lazy irq disable | |
8214 | */ | |
8215 | enum { | |
8216 | @@ -99,13 +100,14 @@ enum { | |
8217 | IRQ_PER_CPU_DEVID = (1 << 17), | |
8218 | IRQ_IS_POLLED = (1 << 18), | |
8219 | IRQ_DISABLE_UNLAZY = (1 << 19), | |
8220 | + IRQ_NO_SOFTIRQ_CALL = (1 << 20), | |
8221 | }; | |
8222 | ||
8223 | #define IRQF_MODIFY_MASK \ | |
8224 | (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ | |
8225 | IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ | |
8226 | IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ | |
8227 | - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) | |
8228 | + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) | |
8229 | ||
8230 | #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | |
8231 | ||
8232 | diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h | |
8233 | index 47b9ebd4a74f..2543aab05daa 100644 | |
8234 | --- a/include/linux/irq_work.h | |
8235 | +++ b/include/linux/irq_work.h | |
8236 | @@ -16,6 +16,7 @@ | |
8237 | #define IRQ_WORK_BUSY 2UL | |
8238 | #define IRQ_WORK_FLAGS 3UL | |
8239 | #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ | |
8240 | +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ | |
8241 | ||
8242 | struct irq_work { | |
8243 | unsigned long flags; | |
8244 | @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } | |
8245 | static inline void irq_work_run(void) { } | |
8246 | #endif | |
8247 | ||
8248 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
8249 | +void irq_work_tick_soft(void); | |
8250 | +#else | |
8251 | +static inline void irq_work_tick_soft(void) { } | |
8252 | +#endif | |
8253 | + | |
8254 | #endif /* _LINUX_IRQ_WORK_H */ | |
8255 | diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h | |
8256 | index b51beebf9804..219d9824f762 100644 | |
8257 | --- a/include/linux/irqdesc.h | |
8258 | +++ b/include/linux/irqdesc.h | |
8259 | @@ -64,6 +64,7 @@ struct irq_desc { | |
8260 | unsigned int irqs_unhandled; | |
8261 | atomic_t threads_handled; | |
8262 | int threads_handled_last; | |
8263 | + u64 random_ip; | |
8264 | raw_spinlock_t lock; | |
8265 | struct cpumask *percpu_enabled; | |
8266 | const struct cpumask *percpu_affinity; | |
8267 | diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h | |
8268 | index 5dd1272d1ab2..9b77034f7c5e 100644 | |
8269 | --- a/include/linux/irqflags.h | |
8270 | +++ b/include/linux/irqflags.h | |
8271 | @@ -25,8 +25,6 @@ | |
8272 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) | |
8273 | # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) | |
8274 | # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) | |
8275 | -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
8276 | -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
8277 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | |
8278 | #else | |
8279 | # define trace_hardirqs_on() do { } while (0) | |
8280 | @@ -39,9 +37,15 @@ | |
8281 | # define trace_softirqs_enabled(p) 0 | |
8282 | # define trace_hardirq_enter() do { } while (0) | |
8283 | # define trace_hardirq_exit() do { } while (0) | |
8284 | +# define INIT_TRACE_IRQFLAGS | |
8285 | +#endif | |
8286 | + | |
8287 | +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) | |
8288 | +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
8289 | +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
8290 | +#else | |
8291 | # define lockdep_softirq_enter() do { } while (0) | |
8292 | # define lockdep_softirq_exit() do { } while (0) | |
8293 | -# define INIT_TRACE_IRQFLAGS | |
8294 | #endif | |
8295 | ||
8296 | #if defined(CONFIG_IRQSOFF_TRACER) || \ | |
8297 | @@ -148,4 +152,23 @@ | |
8298 | ||
8299 | #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) | |
8300 | ||
8301 | +/* | |
8302 | + * local_irq* variants depending on RT/!RT | |
8303 | + */ | |
8304 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8305 | +# define local_irq_disable_nort() do { } while (0) | |
8306 | +# define local_irq_enable_nort() do { } while (0) | |
8307 | +# define local_irq_save_nort(flags) local_save_flags(flags) | |
8308 | +# define local_irq_restore_nort(flags) (void)(flags) | |
8309 | +# define local_irq_disable_rt() local_irq_disable() | |
8310 | +# define local_irq_enable_rt() local_irq_enable() | |
8311 | +#else | |
8312 | +# define local_irq_disable_nort() local_irq_disable() | |
8313 | +# define local_irq_enable_nort() local_irq_enable() | |
8314 | +# define local_irq_save_nort(flags) local_irq_save(flags) | |
8315 | +# define local_irq_restore_nort(flags) local_irq_restore(flags) | |
8316 | +# define local_irq_disable_rt() do { } while (0) | |
8317 | +# define local_irq_enable_rt() do { } while (0) | |
8318 | +#endif | |
8319 | + | |
8320 | #endif | |
8321 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h | |
8322 | index dfaa1f4dcb0c..d57dd06544a1 100644 | |
8323 | --- a/include/linux/jbd2.h | |
8324 | +++ b/include/linux/jbd2.h | |
8325 | @@ -347,32 +347,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) | |
8326 | ||
8327 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | |
8328 | { | |
8329 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8330 | bit_spin_lock(BH_State, &bh->b_state); | |
8331 | +#else | |
8332 | + spin_lock(&bh->b_state_lock); | |
8333 | +#endif | |
8334 | } | |
8335 | ||
8336 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | |
8337 | { | |
8338 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8339 | return bit_spin_trylock(BH_State, &bh->b_state); | |
8340 | +#else | |
8341 | + return spin_trylock(&bh->b_state_lock); | |
8342 | +#endif | |
8343 | } | |
8344 | ||
8345 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | |
8346 | { | |
8347 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8348 | return bit_spin_is_locked(BH_State, &bh->b_state); | |
8349 | +#else | |
8350 | + return spin_is_locked(&bh->b_state_lock); | |
8351 | +#endif | |
8352 | } | |
8353 | ||
8354 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | |
8355 | { | |
8356 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8357 | bit_spin_unlock(BH_State, &bh->b_state); | |
8358 | +#else | |
8359 | + spin_unlock(&bh->b_state_lock); | |
8360 | +#endif | |
8361 | } | |
8362 | ||
8363 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | |
8364 | { | |
8365 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8366 | bit_spin_lock(BH_JournalHead, &bh->b_state); | |
8367 | +#else | |
8368 | + spin_lock(&bh->b_journal_head_lock); | |
8369 | +#endif | |
8370 | } | |
8371 | ||
8372 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |
8373 | { | |
8374 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8375 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | |
8376 | +#else | |
8377 | + spin_unlock(&bh->b_journal_head_lock); | |
8378 | +#endif | |
8379 | } | |
8380 | ||
8381 | #define J_ASSERT(assert) BUG_ON(!(assert)) | |
8382 | diff --git a/include/linux/kdb.h b/include/linux/kdb.h | |
8383 | index 410decacff8f..0861bebfc188 100644 | |
8384 | --- a/include/linux/kdb.h | |
8385 | +++ b/include/linux/kdb.h | |
8386 | @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, | |
8387 | extern __printf(1, 2) int kdb_printf(const char *, ...); | |
8388 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); | |
8389 | ||
8390 | +#define in_kdb_printk() (kdb_trap_printk) | |
8391 | extern void kdb_init(int level); | |
8392 | ||
8393 | /* Access to kdb specific polling devices */ | |
8394 | @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *, | |
8395 | extern int kdb_unregister(char *); | |
8396 | #else /* ! CONFIG_KGDB_KDB */ | |
8397 | static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } | |
8398 | +#define in_kdb_printk() (0) | |
8399 | static inline void kdb_init(int level) {} | |
8400 | static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, | |
8401 | char *help, short minlen) { return 0; } | |
8402 | diff --git a/include/linux/kernel.h b/include/linux/kernel.h | |
8403 | index d96a6118d26a..37de2ce2d290 100644 | |
8404 | --- a/include/linux/kernel.h | |
8405 | +++ b/include/linux/kernel.h | |
8406 | @@ -194,6 +194,9 @@ extern int _cond_resched(void); | |
8407 | */ | |
8408 | # define might_sleep() \ | |
8409 | do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
8410 | + | |
8411 | +# define might_sleep_no_state_check() \ | |
8412 | + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
8413 | # define sched_annotate_sleep() (current->task_state_change = 0) | |
8414 | #else | |
8415 | static inline void ___might_sleep(const char *file, int line, | |
8416 | @@ -201,6 +204,7 @@ extern int _cond_resched(void); | |
8417 | static inline void __might_sleep(const char *file, int line, | |
8418 | int preempt_offset) { } | |
8419 | # define might_sleep() do { might_resched(); } while (0) | |
8420 | +# define might_sleep_no_state_check() do { might_resched(); } while (0) | |
8421 | # define sched_annotate_sleep() do { } while (0) | |
8422 | #endif | |
8423 | ||
8424 | @@ -491,6 +495,7 @@ extern enum system_states { | |
8425 | SYSTEM_HALT, | |
8426 | SYSTEM_POWER_OFF, | |
8427 | SYSTEM_RESTART, | |
8428 | + SYSTEM_SUSPEND, | |
8429 | } system_state; | |
8430 | ||
8431 | #define TAINT_PROPRIETARY_MODULE 0 | |
8432 | diff --git a/include/linux/lglock.h b/include/linux/lglock.h | |
8433 | index c92ebd100d9b..6f035f635d0e 100644 | |
8434 | --- a/include/linux/lglock.h | |
8435 | +++ b/include/linux/lglock.h | |
8436 | @@ -34,13 +34,30 @@ | |
8437 | #endif | |
8438 | ||
8439 | struct lglock { | |
8440 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8441 | + struct rt_mutex __percpu *lock; | |
8442 | +#else | |
8443 | arch_spinlock_t __percpu *lock; | |
8444 | +#endif | |
8445 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8446 | struct lock_class_key lock_key; | |
8447 | struct lockdep_map lock_dep_map; | |
8448 | #endif | |
8449 | }; | |
8450 | ||
8451 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8452 | +# define DEFINE_LGLOCK(name) \ | |
8453 | + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \ | |
8454 | + = __RT_MUTEX_INITIALIZER( name ## _lock); \ | |
8455 | + struct lglock name = { .lock = &name ## _lock } | |
8456 | + | |
8457 | +# define DEFINE_STATIC_LGLOCK(name) \ | |
8458 | + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \ | |
8459 | + = __RT_MUTEX_INITIALIZER( name ## _lock); \ | |
8460 | + static struct lglock name = { .lock = &name ## _lock } | |
8461 | + | |
8462 | +#else | |
8463 | + | |
8464 | #define DEFINE_LGLOCK(name) \ | |
8465 | static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ | |
8466 | = __ARCH_SPIN_LOCK_UNLOCKED; \ | |
8467 | @@ -50,6 +67,7 @@ struct lglock { | |
8468 | static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ | |
8469 | = __ARCH_SPIN_LOCK_UNLOCKED; \ | |
8470 | static struct lglock name = { .lock = &name ## _lock } | |
8471 | +#endif | |
8472 | ||
8473 | void lg_lock_init(struct lglock *lg, char *name); | |
8474 | ||
8475 | @@ -64,6 +82,12 @@ void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); | |
8476 | void lg_global_lock(struct lglock *lg); | |
8477 | void lg_global_unlock(struct lglock *lg); | |
8478 | ||
8479 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
8480 | +#define lg_global_trylock_relax(name) lg_global_lock(name) | |
8481 | +#else | |
8482 | +void lg_global_trylock_relax(struct lglock *lg); | |
8483 | +#endif | |
8484 | + | |
8485 | #else | |
8486 | /* When !CONFIG_SMP, map lglock to spinlock */ | |
8487 | #define lglock spinlock | |
8488 | diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h | |
8489 | index cb483305e1f5..4e5062316bb6 100644 | |
8490 | --- a/include/linux/list_bl.h | |
8491 | +++ b/include/linux/list_bl.h | |
8492 | @@ -2,6 +2,7 @@ | |
8493 | #define _LINUX_LIST_BL_H | |
8494 | ||
8495 | #include <linux/list.h> | |
8496 | +#include <linux/spinlock.h> | |
8497 | #include <linux/bit_spinlock.h> | |
8498 | ||
8499 | /* | |
8500 | @@ -32,13 +33,24 @@ | |
8501 | ||
8502 | struct hlist_bl_head { | |
8503 | struct hlist_bl_node *first; | |
8504 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8505 | + raw_spinlock_t lock; | |
8506 | +#endif | |
8507 | }; | |
8508 | ||
8509 | struct hlist_bl_node { | |
8510 | struct hlist_bl_node *next, **pprev; | |
8511 | }; | |
8512 | -#define INIT_HLIST_BL_HEAD(ptr) \ | |
8513 | - ((ptr)->first = NULL) | |
8514 | + | |
8515 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8516 | +#define INIT_HLIST_BL_HEAD(h) \ | |
8517 | +do { \ | |
8518 | + (h)->first = NULL; \ | |
8519 | + raw_spin_lock_init(&(h)->lock); \ | |
8520 | +} while (0) | |
8521 | +#else | |
8522 | +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL | |
8523 | +#endif | |
8524 | ||
8525 | static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) | |
8526 | { | |
8527 | @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) | |
8528 | ||
8529 | static inline void hlist_bl_lock(struct hlist_bl_head *b) | |
8530 | { | |
8531 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8532 | bit_spin_lock(0, (unsigned long *)b); | |
8533 | +#else | |
8534 | + raw_spin_lock(&b->lock); | |
8535 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
8536 | + __set_bit(0, (unsigned long *)b); | |
8537 | +#endif | |
8538 | +#endif | |
8539 | } | |
8540 | ||
8541 | static inline void hlist_bl_unlock(struct hlist_bl_head *b) | |
8542 | { | |
8543 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8544 | __bit_spin_unlock(0, (unsigned long *)b); | |
8545 | +#else | |
8546 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
8547 | + __clear_bit(0, (unsigned long *)b); | |
8548 | +#endif | |
8549 | + raw_spin_unlock(&b->lock); | |
8550 | +#endif | |
8551 | } | |
8552 | ||
8553 | static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) | |
8554 | diff --git a/include/linux/locallock.h b/include/linux/locallock.h | |
8555 | new file mode 100644 | |
8556 | index 000000000000..845c77f1a5ca | |
8557 | --- /dev/null | |
8558 | +++ b/include/linux/locallock.h | |
8559 | @@ -0,0 +1,278 @@ | |
8560 | +#ifndef _LINUX_LOCALLOCK_H | |
8561 | +#define _LINUX_LOCALLOCK_H | |
8562 | + | |
8563 | +#include <linux/percpu.h> | |
8564 | +#include <linux/spinlock.h> | |
8565 | + | |
8566 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8567 | + | |
8568 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
8569 | +# define LL_WARN(cond) WARN_ON(cond) | |
8570 | +#else | |
8571 | +# define LL_WARN(cond) do { } while (0) | |
8572 | +#endif | |
8573 | + | |
8574 | +/* | |
8575 | + * per cpu lock based substitute for local_irq_*() | |
8576 | + */ | |
8577 | +struct local_irq_lock { | |
8578 | + spinlock_t lock; | |
8579 | + struct task_struct *owner; | |
8580 | + int nestcnt; | |
8581 | + unsigned long flags; | |
8582 | +}; | |
8583 | + | |
8584 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ | |
8585 | + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ | |
8586 | + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } | |
8587 | + | |
8588 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ | |
8589 | + DECLARE_PER_CPU(struct local_irq_lock, lvar) | |
8590 | + | |
8591 | +#define local_irq_lock_init(lvar) \ | |
8592 | + do { \ | |
8593 | + int __cpu; \ | |
8594 | + for_each_possible_cpu(__cpu) \ | |
8595 | + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ | |
8596 | + } while (0) | |
8597 | + | |
8598 | +/* | |
8599 | + * spin_lock|trylock|unlock_local flavour that does not migrate disable | |
8600 | + * used for __local_lock|trylock|unlock where get_local_var/put_local_var | |
8601 | + * already takes care of the migrate_disable/enable | |
8602 | + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. | |
8603 | + */ | |
8604 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8605 | +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock) | |
8606 | +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock) | |
8607 | +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock) | |
8608 | +#else | |
8609 | +# define spin_lock_local(lock) spin_lock(lock) | |
8610 | +# define spin_trylock_local(lock) spin_trylock(lock) | |
8611 | +# define spin_unlock_local(lock) spin_unlock(lock) | |
8612 | +#endif | |
8613 | + | |
8614 | +static inline void __local_lock(struct local_irq_lock *lv) | |
8615 | +{ | |
8616 | + if (lv->owner != current) { | |
8617 | + spin_lock_local(&lv->lock); | |
8618 | + LL_WARN(lv->owner); | |
8619 | + LL_WARN(lv->nestcnt); | |
8620 | + lv->owner = current; | |
8621 | + } | |
8622 | + lv->nestcnt++; | |
8623 | +} | |
8624 | + | |
8625 | +#define local_lock(lvar) \ | |
8626 | + do { __local_lock(&get_local_var(lvar)); } while (0) | |
8627 | + | |
8628 | +#define local_lock_on(lvar, cpu) \ | |
8629 | + do { __local_lock(&per_cpu(lvar, cpu)); } while (0) | |
8630 | + | |
8631 | +static inline int __local_trylock(struct local_irq_lock *lv) | |
8632 | +{ | |
8633 | + if (lv->owner != current && spin_trylock_local(&lv->lock)) { | |
8634 | + LL_WARN(lv->owner); | |
8635 | + LL_WARN(lv->nestcnt); | |
8636 | + lv->owner = current; | |
8637 | + lv->nestcnt = 1; | |
8638 | + return 1; | |
8639 | + } | |
8640 | + return 0; | |
8641 | +} | |
8642 | + | |
8643 | +#define local_trylock(lvar) \ | |
8644 | + ({ \ | |
8645 | + int __locked; \ | |
8646 | + __locked = __local_trylock(&get_local_var(lvar)); \ | |
8647 | + if (!__locked) \ | |
8648 | + put_local_var(lvar); \ | |
8649 | + __locked; \ | |
8650 | + }) | |
8651 | + | |
8652 | +static inline void __local_unlock(struct local_irq_lock *lv) | |
8653 | +{ | |
8654 | + LL_WARN(lv->nestcnt == 0); | |
8655 | + LL_WARN(lv->owner != current); | |
8656 | + if (--lv->nestcnt) | |
8657 | + return; | |
8658 | + | |
8659 | + lv->owner = NULL; | |
8660 | + spin_unlock_local(&lv->lock); | |
8661 | +} | |
8662 | + | |
8663 | +#define local_unlock(lvar) \ | |
8664 | + do { \ | |
8665 | + __local_unlock(this_cpu_ptr(&lvar)); \ | |
8666 | + put_local_var(lvar); \ | |
8667 | + } while (0) | |
8668 | + | |
8669 | +#define local_unlock_on(lvar, cpu) \ | |
8670 | + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) | |
8671 | + | |
8672 | +static inline void __local_lock_irq(struct local_irq_lock *lv) | |
8673 | +{ | |
8674 | + spin_lock_irqsave(&lv->lock, lv->flags); | |
8675 | + LL_WARN(lv->owner); | |
8676 | + LL_WARN(lv->nestcnt); | |
8677 | + lv->owner = current; | |
8678 | + lv->nestcnt = 1; | |
8679 | +} | |
8680 | + | |
8681 | +#define local_lock_irq(lvar) \ | |
8682 | + do { __local_lock_irq(&get_local_var(lvar)); } while (0) | |
8683 | + | |
8684 | +#define local_lock_irq_on(lvar, cpu) \ | |
8685 | + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) | |
8686 | + | |
8687 | +static inline void __local_unlock_irq(struct local_irq_lock *lv) | |
8688 | +{ | |
8689 | + LL_WARN(!lv->nestcnt); | |
8690 | + LL_WARN(lv->owner != current); | |
8691 | + lv->owner = NULL; | |
8692 | + lv->nestcnt = 0; | |
8693 | + spin_unlock_irq(&lv->lock); | |
8694 | +} | |
8695 | + | |
8696 | +#define local_unlock_irq(lvar) \ | |
8697 | + do { \ | |
8698 | + __local_unlock_irq(this_cpu_ptr(&lvar)); \ | |
8699 | + put_local_var(lvar); \ | |
8700 | + } while (0) | |
8701 | + | |
8702 | +#define local_unlock_irq_on(lvar, cpu) \ | |
8703 | + do { \ | |
8704 | + __local_unlock_irq(&per_cpu(lvar, cpu)); \ | |
8705 | + } while (0) | |
8706 | + | |
8707 | +static inline int __local_lock_irqsave(struct local_irq_lock *lv) | |
8708 | +{ | |
8709 | + if (lv->owner != current) { | |
8710 | + __local_lock_irq(lv); | |
8711 | + return 0; | |
8712 | + } else { | |
8713 | + lv->nestcnt++; | |
8714 | + return 1; | |
8715 | + } | |
8716 | +} | |
8717 | + | |
8718 | +#define local_lock_irqsave(lvar, _flags) \ | |
8719 | + do { \ | |
8720 | + if (__local_lock_irqsave(&get_local_var(lvar))) \ | |
8721 | + put_local_var(lvar); \ | |
8722 | + _flags = __this_cpu_read(lvar.flags); \ | |
8723 | + } while (0) | |
8724 | + | |
8725 | +#define local_lock_irqsave_on(lvar, _flags, cpu) \ | |
8726 | + do { \ | |
8727 | + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ | |
8728 | + _flags = per_cpu(lvar, cpu).flags; \ | |
8729 | + } while (0) | |
8730 | + | |
8731 | +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, | |
8732 | + unsigned long flags) | |
8733 | +{ | |
8734 | + LL_WARN(!lv->nestcnt); | |
8735 | + LL_WARN(lv->owner != current); | |
8736 | + if (--lv->nestcnt) | |
8737 | + return 0; | |
8738 | + | |
8739 | + lv->owner = NULL; | |
8740 | + spin_unlock_irqrestore(&lv->lock, lv->flags); | |
8741 | + return 1; | |
8742 | +} | |
8743 | + | |
8744 | +#define local_unlock_irqrestore(lvar, flags) \ | |
8745 | + do { \ | |
8746 | + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \ | |
8747 | + put_local_var(lvar); \ | |
8748 | + } while (0) | |
8749 | + | |
8750 | +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ | |
8751 | + do { \ | |
8752 | + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ | |
8753 | + } while (0) | |
8754 | + | |
8755 | +#define local_spin_trylock_irq(lvar, lock) \ | |
8756 | + ({ \ | |
8757 | + int __locked; \ | |
8758 | + local_lock_irq(lvar); \ | |
8759 | + __locked = spin_trylock(lock); \ | |
8760 | + if (!__locked) \ | |
8761 | + local_unlock_irq(lvar); \ | |
8762 | + __locked; \ | |
8763 | + }) | |
8764 | + | |
8765 | +#define local_spin_lock_irq(lvar, lock) \ | |
8766 | + do { \ | |
8767 | + local_lock_irq(lvar); \ | |
8768 | + spin_lock(lock); \ | |
8769 | + } while (0) | |
8770 | + | |
8771 | +#define local_spin_unlock_irq(lvar, lock) \ | |
8772 | + do { \ | |
8773 | + spin_unlock(lock); \ | |
8774 | + local_unlock_irq(lvar); \ | |
8775 | + } while (0) | |
8776 | + | |
8777 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
8778 | + do { \ | |
8779 | + local_lock_irqsave(lvar, flags); \ | |
8780 | + spin_lock(lock); \ | |
8781 | + } while (0) | |
8782 | + | |
8783 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
8784 | + do { \ | |
8785 | + spin_unlock(lock); \ | |
8786 | + local_unlock_irqrestore(lvar, flags); \ | |
8787 | + } while (0) | |
8788 | + | |
8789 | +#define get_locked_var(lvar, var) \ | |
8790 | + (*({ \ | |
8791 | + local_lock(lvar); \ | |
8792 | + this_cpu_ptr(&var); \ | |
8793 | + })) | |
8794 | + | |
8795 | +#define put_locked_var(lvar, var) local_unlock(lvar); | |
8796 | + | |
8797 | +#define local_lock_cpu(lvar) \ | |
8798 | + ({ \ | |
8799 | + local_lock(lvar); \ | |
8800 | + smp_processor_id(); \ | |
8801 | + }) | |
8802 | + | |
8803 | +#define local_unlock_cpu(lvar) local_unlock(lvar) | |
8804 | + | |
8805 | +#else /* PREEMPT_RT_BASE */ | |
8806 | + | |
8807 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar | |
8808 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar | |
8809 | + | |
8810 | +static inline void local_irq_lock_init(int lvar) { } | |
8811 | + | |
8812 | +#define local_lock(lvar) preempt_disable() | |
8813 | +#define local_unlock(lvar) preempt_enable() | |
8814 | +#define local_lock_irq(lvar) local_irq_disable() | |
8815 | +#define local_lock_irq_on(lvar, cpu) local_irq_disable() | |
8816 | +#define local_unlock_irq(lvar) local_irq_enable() | |
8817 | +#define local_unlock_irq_on(lvar, cpu) local_irq_enable() | |
8818 | +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) | |
8819 | +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) | |
8820 | + | |
8821 | +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) | |
8822 | +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) | |
8823 | +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) | |
8824 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
8825 | + spin_lock_irqsave(lock, flags) | |
8826 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
8827 | + spin_unlock_irqrestore(lock, flags) | |
8828 | + | |
8829 | +#define get_locked_var(lvar, var) get_cpu_var(var) | |
8830 | +#define put_locked_var(lvar, var) put_cpu_var(var) | |
8831 | + | |
8832 | +#define local_lock_cpu(lvar) get_cpu() | |
8833 | +#define local_unlock_cpu(lvar) put_cpu() | |
8834 | + | |
8835 | +#endif | |
8836 | + | |
8837 | +#endif | |
8838 | diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h | |
8839 | index 903200f4ec41..df670d441fc9 100644 | |
8840 | --- a/include/linux/mm_types.h | |
8841 | +++ b/include/linux/mm_types.h | |
8842 | @@ -11,6 +11,7 @@ | |
8843 | #include <linux/completion.h> | |
8844 | #include <linux/cpumask.h> | |
8845 | #include <linux/uprobes.h> | |
8846 | +#include <linux/rcupdate.h> | |
8847 | #include <linux/page-flags-layout.h> | |
8848 | #include <linux/workqueue.h> | |
8849 | #include <asm/page.h> | |
8850 | @@ -508,6 +509,9 @@ struct mm_struct { | |
8851 | bool tlb_flush_pending; | |
8852 | #endif | |
8853 | struct uprobes_state uprobes_state; | |
8854 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
8855 | + struct rcu_head delayed_drop; | |
8856 | +#endif | |
8857 | #ifdef CONFIG_X86_INTEL_MPX | |
8858 | /* address of the bounds directory */ | |
8859 | void __user *bd_addr; | |
8860 | diff --git a/include/linux/mutex.h b/include/linux/mutex.h | |
8861 | index 2cb7531e7d7a..b3fdfc820216 100644 | |
8862 | --- a/include/linux/mutex.h | |
8863 | +++ b/include/linux/mutex.h | |
8864 | @@ -19,6 +19,17 @@ | |
8865 | #include <asm/processor.h> | |
8866 | #include <linux/osq_lock.h> | |
8867 | ||
8868 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8869 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
8870 | + , .dep_map = { .name = #lockname } | |
8871 | +#else | |
8872 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
8873 | +#endif | |
8874 | + | |
8875 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
8876 | +# include <linux/mutex_rt.h> | |
8877 | +#else | |
8878 | + | |
8879 | /* | |
8880 | * Simple, straightforward mutexes with strict semantics: | |
8881 | * | |
8882 | @@ -99,13 +110,6 @@ do { \ | |
8883 | static inline void mutex_destroy(struct mutex *lock) {} | |
8884 | #endif | |
8885 | ||
8886 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8887 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
8888 | - , .dep_map = { .name = #lockname } | |
8889 | -#else | |
8890 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
8891 | -#endif | |
8892 | - | |
8893 | #define __MUTEX_INITIALIZER(lockname) \ | |
8894 | { .count = ATOMIC_INIT(1) \ | |
8895 | , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ | |
8896 | @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); | |
8897 | extern int mutex_trylock(struct mutex *lock); | |
8898 | extern void mutex_unlock(struct mutex *lock); | |
8899 | ||
8900 | +#endif /* !PREEMPT_RT_FULL */ | |
8901 | + | |
8902 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | |
8903 | ||
8904 | #endif /* __LINUX_MUTEX_H */ | |
8905 | diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h | |
8906 | new file mode 100644 | |
8907 | index 000000000000..c38a44b14da5 | |
8908 | --- /dev/null | |
8909 | +++ b/include/linux/mutex_rt.h | |
8910 | @@ -0,0 +1,84 @@ | |
8911 | +#ifndef __LINUX_MUTEX_RT_H | |
8912 | +#define __LINUX_MUTEX_RT_H | |
8913 | + | |
8914 | +#ifndef __LINUX_MUTEX_H | |
8915 | +#error "Please include mutex.h" | |
8916 | +#endif | |
8917 | + | |
8918 | +#include <linux/rtmutex.h> | |
8919 | + | |
8920 | +/* FIXME: Just for __lockfunc */ | |
8921 | +#include <linux/spinlock.h> | |
8922 | + | |
8923 | +struct mutex { | |
8924 | + struct rt_mutex lock; | |
8925 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8926 | + struct lockdep_map dep_map; | |
8927 | +#endif | |
8928 | +}; | |
8929 | + | |
8930 | +#define __MUTEX_INITIALIZER(mutexname) \ | |
8931 | + { \ | |
8932 | + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ | |
8933 | + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ | |
8934 | + } | |
8935 | + | |
8936 | +#define DEFINE_MUTEX(mutexname) \ | |
8937 | + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) | |
8938 | + | |
8939 | +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); | |
8940 | +extern void __lockfunc _mutex_lock(struct mutex *lock); | |
8941 | +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); | |
8942 | +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); | |
8943 | +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); | |
8944 | +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); | |
8945 | +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); | |
8946 | +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); | |
8947 | +extern int __lockfunc _mutex_trylock(struct mutex *lock); | |
8948 | +extern void __lockfunc _mutex_unlock(struct mutex *lock); | |
8949 | + | |
8950 | +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) | |
8951 | +#define mutex_lock(l) _mutex_lock(l) | |
8952 | +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) | |
8953 | +#define mutex_lock_killable(l) _mutex_lock_killable(l) | |
8954 | +#define mutex_trylock(l) _mutex_trylock(l) | |
8955 | +#define mutex_unlock(l) _mutex_unlock(l) | |
8956 | +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) | |
8957 | + | |
8958 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
8959 | +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) | |
8960 | +# define mutex_lock_interruptible_nested(l, s) \ | |
8961 | + _mutex_lock_interruptible_nested(l, s) | |
8962 | +# define mutex_lock_killable_nested(l, s) \ | |
8963 | + _mutex_lock_killable_nested(l, s) | |
8964 | + | |
8965 | +# define mutex_lock_nest_lock(lock, nest_lock) \ | |
8966 | +do { \ | |
8967 | + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ | |
8968 | + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ | |
8969 | +} while (0) | |
8970 | + | |
8971 | +#else | |
8972 | +# define mutex_lock_nested(l, s) _mutex_lock(l) | |
8973 | +# define mutex_lock_interruptible_nested(l, s) \ | |
8974 | + _mutex_lock_interruptible(l) | |
8975 | +# define mutex_lock_killable_nested(l, s) \ | |
8976 | + _mutex_lock_killable(l) | |
8977 | +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) | |
8978 | +#endif | |
8979 | + | |
8980 | +# define mutex_init(mutex) \ | |
8981 | +do { \ | |
8982 | + static struct lock_class_key __key; \ | |
8983 | + \ | |
8984 | + rt_mutex_init(&(mutex)->lock); \ | |
8985 | + __mutex_do_init((mutex), #mutex, &__key); \ | |
8986 | +} while (0) | |
8987 | + | |
8988 | +# define __mutex_init(mutex, name, key) \ | |
8989 | +do { \ | |
8990 | + rt_mutex_init(&(mutex)->lock); \ | |
8991 | + __mutex_do_init((mutex), name, key); \ | |
8992 | +} while (0) | |
8993 | + | |
8994 | +#endif | |
8995 | diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h | |
8996 | index e8d79d4ebcfe..2ae8fa187016 100644 | |
8997 | --- a/include/linux/netdevice.h | |
8998 | +++ b/include/linux/netdevice.h | |
8999 | @@ -2409,14 +2409,53 @@ void netdev_freemem(struct net_device *dev); | |
9000 | void synchronize_net(void); | |
9001 | int init_dummy_netdev(struct net_device *dev); | |
9002 | ||
9003 | -DECLARE_PER_CPU(int, xmit_recursion); | |
9004 | #define XMIT_RECURSION_LIMIT 10 | |
9005 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9006 | +static inline int dev_recursion_level(void) | |
9007 | +{ | |
9008 | + return current->xmit_recursion; | |
9009 | +} | |
9010 | + | |
9011 | +static inline int xmit_rec_read(void) | |
9012 | +{ | |
9013 | + return current->xmit_recursion; | |
9014 | +} | |
9015 | + | |
9016 | +static inline void xmit_rec_inc(void) | |
9017 | +{ | |
9018 | + current->xmit_recursion++; | |
9019 | +} | |
9020 | + | |
9021 | +static inline void xmit_rec_dec(void) | |
9022 | +{ | |
9023 | + current->xmit_recursion--; | |
9024 | +} | |
9025 | + | |
9026 | +#else | |
9027 | + | |
9028 | +DECLARE_PER_CPU(int, xmit_recursion); | |
9029 | ||
9030 | static inline int dev_recursion_level(void) | |
9031 | { | |
9032 | return this_cpu_read(xmit_recursion); | |
9033 | } | |
9034 | ||
9035 | +static inline int xmit_rec_read(void) | |
9036 | +{ | |
9037 | + return __this_cpu_read(xmit_recursion); | |
9038 | +} | |
9039 | + | |
9040 | +static inline void xmit_rec_inc(void) | |
9041 | +{ | |
9042 | + __this_cpu_inc(xmit_recursion); | |
9043 | +} | |
9044 | + | |
9045 | +static inline void xmit_rec_dec(void) | |
9046 | +{ | |
9047 | + __this_cpu_dec(xmit_recursion); | |
9048 | +} | |
9049 | +#endif | |
9050 | + | |
9051 | struct net_device *dev_get_by_index(struct net *net, int ifindex); | |
9052 | struct net_device *__dev_get_by_index(struct net *net, int ifindex); | |
9053 | struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); | |
9054 | @@ -2794,6 +2833,7 @@ struct softnet_data { | |
9055 | unsigned int dropped; | |
9056 | struct sk_buff_head input_pkt_queue; | |
9057 | struct napi_struct backlog; | |
9058 | + struct sk_buff_head tofree_queue; | |
9059 | ||
9060 | }; | |
9061 | ||
9062 | diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h | |
9063 | index 2ad1a2b289b5..b4d10155af54 100644 | |
9064 | --- a/include/linux/netfilter/x_tables.h | |
9065 | +++ b/include/linux/netfilter/x_tables.h | |
9066 | @@ -4,6 +4,7 @@ | |
9067 | ||
9068 | #include <linux/netdevice.h> | |
9069 | #include <linux/static_key.h> | |
9070 | +#include <linux/locallock.h> | |
9071 | #include <uapi/linux/netfilter/x_tables.h> | |
9072 | ||
9073 | /* Test a struct->invflags and a boolean for inequality */ | |
9074 | @@ -300,6 +301,8 @@ void xt_free_table_info(struct xt_table_info *info); | |
9075 | */ | |
9076 | DECLARE_PER_CPU(seqcount_t, xt_recseq); | |
9077 | ||
9078 | +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); | |
9079 | + | |
9080 | /* xt_tee_enabled - true if x_tables needs to handle reentrancy | |
9081 | * | |
9082 | * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. | |
9083 | @@ -320,6 +323,9 @@ static inline unsigned int xt_write_recseq_begin(void) | |
9084 | { | |
9085 | unsigned int addend; | |
9086 | ||
9087 | + /* RT protection */ | |
9088 | + local_lock(xt_write_lock); | |
9089 | + | |
9090 | /* | |
9091 | * Low order bit of sequence is set if we already | |
9092 | * called xt_write_recseq_begin(). | |
9093 | @@ -350,6 +356,7 @@ static inline void xt_write_recseq_end(unsigned int addend) | |
9094 | /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ | |
9095 | smp_wmb(); | |
9096 | __this_cpu_add(xt_recseq.sequence, addend); | |
9097 | + local_unlock(xt_write_lock); | |
9098 | } | |
9099 | ||
9100 | /* | |
9101 | diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h | |
9102 | index 810124b33327..d54ca43d571f 100644 | |
9103 | --- a/include/linux/nfs_fs.h | |
9104 | +++ b/include/linux/nfs_fs.h | |
9105 | @@ -165,7 +165,11 @@ struct nfs_inode { | |
9106 | ||
9107 | /* Readers: in-flight sillydelete RPC calls */ | |
9108 | /* Writers: rmdir */ | |
9109 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9110 | + struct semaphore rmdir_sem; | |
9111 | +#else | |
9112 | struct rw_semaphore rmdir_sem; | |
9113 | +#endif | |
9114 | ||
9115 | #if IS_ENABLED(CONFIG_NFS_V4) | |
9116 | struct nfs4_cached_acl *nfs4_acl; | |
9117 | diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h | |
9118 | index 7cc0deee5bde..a20f49ee69ee 100644 | |
9119 | --- a/include/linux/nfs_xdr.h | |
9120 | +++ b/include/linux/nfs_xdr.h | |
9121 | @@ -1484,7 +1484,7 @@ struct nfs_unlinkdata { | |
9122 | struct nfs_removeargs args; | |
9123 | struct nfs_removeres res; | |
9124 | struct dentry *dentry; | |
9125 | - wait_queue_head_t wq; | |
9126 | + struct swait_queue_head wq; | |
9127 | struct rpc_cred *cred; | |
9128 | struct nfs_fattr dir_attr; | |
9129 | long timeout; | |
9130 | diff --git a/include/linux/notifier.h b/include/linux/notifier.h | |
9131 | index 4149868de4e6..babe5b9bcb91 100644 | |
9132 | --- a/include/linux/notifier.h | |
9133 | +++ b/include/linux/notifier.h | |
9134 | @@ -6,7 +6,7 @@ | |
9135 | * | |
9136 | * Alan Cox <Alan.Cox@linux.org> | |
9137 | */ | |
9138 | - | |
9139 | + | |
9140 | #ifndef _LINUX_NOTIFIER_H | |
9141 | #define _LINUX_NOTIFIER_H | |
9142 | #include <linux/errno.h> | |
9143 | @@ -42,9 +42,7 @@ | |
9144 | * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. | |
9145 | * As compensation, srcu_notifier_chain_unregister() is rather expensive. | |
9146 | * SRCU notifier chains should be used when the chain will be called very | |
9147 | - * often but notifier_blocks will seldom be removed. Also, SRCU notifier | |
9148 | - * chains are slightly more difficult to use because they require special | |
9149 | - * runtime initialization. | |
9150 | + * often but notifier_blocks will seldom be removed. | |
9151 | */ | |
9152 | ||
9153 | struct notifier_block; | |
9154 | @@ -90,7 +88,7 @@ struct srcu_notifier_head { | |
9155 | (name)->head = NULL; \ | |
9156 | } while (0) | |
9157 | ||
9158 | -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ | |
9159 | +/* srcu_notifier_heads must be cleaned up dynamically */ | |
9160 | extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
9161 | #define srcu_cleanup_notifier_head(name) \ | |
9162 | cleanup_srcu_struct(&(name)->srcu); | |
9163 | @@ -103,7 +101,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
9164 | .head = NULL } | |
9165 | #define RAW_NOTIFIER_INIT(name) { \ | |
9166 | .head = NULL } | |
9167 | -/* srcu_notifier_heads cannot be initialized statically */ | |
9168 | + | |
9169 | +#define SRCU_NOTIFIER_INIT(name, pcpu) \ | |
9170 | + { \ | |
9171 | + .mutex = __MUTEX_INITIALIZER(name.mutex), \ | |
9172 | + .head = NULL, \ | |
9173 | + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ | |
9174 | + } | |
9175 | ||
9176 | #define ATOMIC_NOTIFIER_HEAD(name) \ | |
9177 | struct atomic_notifier_head name = \ | |
9178 | @@ -115,6 +119,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
9179 | struct raw_notifier_head name = \ | |
9180 | RAW_NOTIFIER_INIT(name) | |
9181 | ||
9182 | +#define _SRCU_NOTIFIER_HEAD(name, mod) \ | |
9183 | + static DEFINE_PER_CPU(struct srcu_struct_array, \ | |
9184 | + name##_head_srcu_array); \ | |
9185 | + mod struct srcu_notifier_head name = \ | |
9186 | + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) | |
9187 | + | |
9188 | +#define SRCU_NOTIFIER_HEAD(name) \ | |
9189 | + _SRCU_NOTIFIER_HEAD(name, ) | |
9190 | + | |
9191 | +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ | |
9192 | + _SRCU_NOTIFIER_HEAD(name, static) | |
9193 | + | |
9194 | #ifdef __KERNEL__ | |
9195 | ||
9196 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, | |
9197 | @@ -184,12 +200,12 @@ static inline int notifier_to_errno(int ret) | |
9198 | ||
9199 | /* | |
9200 | * Declared notifiers so far. I can imagine quite a few more chains | |
9201 | - * over time (eg laptop power reset chains, reboot chain (to clean | |
9202 | + * over time (eg laptop power reset chains, reboot chain (to clean | |
9203 | * device units up), device [un]mount chain, module load/unload chain, | |
9204 | - * low memory chain, screenblank chain (for plug in modular screenblankers) | |
9205 | + * low memory chain, screenblank chain (for plug in modular screenblankers) | |
9206 | * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... | |
9207 | */ | |
9208 | - | |
9209 | + | |
9210 | /* CPU notfiers are defined in include/linux/cpu.h. */ | |
9211 | ||
9212 | /* netdevice notifiers are defined in include/linux/netdevice.h */ | |
9213 | diff --git a/include/linux/percpu.h b/include/linux/percpu.h | |
9214 | index 56939d3f6e53..1c7e33fc83e4 100644 | |
9215 | --- a/include/linux/percpu.h | |
9216 | +++ b/include/linux/percpu.h | |
9217 | @@ -18,6 +18,35 @@ | |
9218 | #define PERCPU_MODULE_RESERVE 0 | |
9219 | #endif | |
9220 | ||
9221 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9222 | + | |
9223 | +#define get_local_var(var) (*({ \ | |
9224 | + migrate_disable(); \ | |
9225 | + this_cpu_ptr(&var); })) | |
9226 | + | |
9227 | +#define put_local_var(var) do { \ | |
9228 | + (void)&(var); \ | |
9229 | + migrate_enable(); \ | |
9230 | +} while (0) | |
9231 | + | |
9232 | +# define get_local_ptr(var) ({ \ | |
9233 | + migrate_disable(); \ | |
9234 | + this_cpu_ptr(var); }) | |
9235 | + | |
9236 | +# define put_local_ptr(var) do { \ | |
9237 | + (void)(var); \ | |
9238 | + migrate_enable(); \ | |
9239 | +} while (0) | |
9240 | + | |
9241 | +#else | |
9242 | + | |
9243 | +#define get_local_var(var) get_cpu_var(var) | |
9244 | +#define put_local_var(var) put_cpu_var(var) | |
9245 | +#define get_local_ptr(var) get_cpu_ptr(var) | |
9246 | +#define put_local_ptr(var) put_cpu_ptr(var) | |
9247 | + | |
9248 | +#endif | |
9249 | + | |
9250 | /* minimum unit size, also is the maximum supported allocation size */ | |
9251 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) | |
9252 | ||
9253 | diff --git a/include/linux/pid.h b/include/linux/pid.h | |
9254 | index 23705a53abba..2cc64b779f03 100644 | |
9255 | --- a/include/linux/pid.h | |
9256 | +++ b/include/linux/pid.h | |
9257 | @@ -2,6 +2,7 @@ | |
9258 | #define _LINUX_PID_H | |
9259 | ||
9260 | #include <linux/rcupdate.h> | |
9261 | +#include <linux/atomic.h> | |
9262 | ||
9263 | enum pid_type | |
9264 | { | |
9265 | diff --git a/include/linux/preempt.h b/include/linux/preempt.h | |
9266 | index 75e4e30677f1..1cfb1cb72354 100644 | |
9267 | --- a/include/linux/preempt.h | |
9268 | +++ b/include/linux/preempt.h | |
9269 | @@ -50,7 +50,11 @@ | |
9270 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | |
9271 | #define NMI_OFFSET (1UL << NMI_SHIFT) | |
9272 | ||
9273 | -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
9274 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9275 | +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
9276 | +#else | |
9277 | +# define SOFTIRQ_DISABLE_OFFSET (0) | |
9278 | +#endif | |
9279 | ||
9280 | /* We use the MSB mostly because its available */ | |
9281 | #define PREEMPT_NEED_RESCHED 0x80000000 | |
9282 | @@ -59,9 +63,15 @@ | |
9283 | #include <asm/preempt.h> | |
9284 | ||
9285 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | |
9286 | -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
9287 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | |
9288 | | NMI_MASK)) | |
9289 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9290 | +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
9291 | +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
9292 | +#else | |
9293 | +# define softirq_count() (0UL) | |
9294 | +extern int in_serving_softirq(void); | |
9295 | +#endif | |
9296 | ||
9297 | /* | |
9298 | * Are we doing bottom half or hardware interrupt processing? | |
9299 | @@ -72,7 +82,6 @@ | |
9300 | #define in_irq() (hardirq_count()) | |
9301 | #define in_softirq() (softirq_count()) | |
9302 | #define in_interrupt() (irq_count()) | |
9303 | -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
9304 | ||
9305 | /* | |
9306 | * Are we in NMI context? | |
9307 | @@ -91,7 +100,11 @@ | |
9308 | /* | |
9309 | * The preempt_count offset after spin_lock() | |
9310 | */ | |
9311 | +#if !defined(CONFIG_PREEMPT_RT_FULL) | |
9312 | #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET | |
9313 | +#else | |
9314 | +#define PREEMPT_LOCK_OFFSET 0 | |
9315 | +#endif | |
9316 | ||
9317 | /* | |
9318 | * The preempt_count offset needed for things like: | |
9319 | @@ -140,6 +153,20 @@ extern void preempt_count_sub(int val); | |
9320 | #define preempt_count_inc() preempt_count_add(1) | |
9321 | #define preempt_count_dec() preempt_count_sub(1) | |
9322 | ||
9323 | +#ifdef CONFIG_PREEMPT_LAZY | |
9324 | +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) | |
9325 | +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) | |
9326 | +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) | |
9327 | +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) | |
9328 | +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) | |
9329 | +#else | |
9330 | +#define add_preempt_lazy_count(val) do { } while (0) | |
9331 | +#define sub_preempt_lazy_count(val) do { } while (0) | |
9332 | +#define inc_preempt_lazy_count() do { } while (0) | |
9333 | +#define dec_preempt_lazy_count() do { } while (0) | |
9334 | +#define preempt_lazy_count() (0) | |
9335 | +#endif | |
9336 | + | |
9337 | #ifdef CONFIG_PREEMPT_COUNT | |
9338 | ||
9339 | #define preempt_disable() \ | |
9340 | @@ -148,13 +175,25 @@ do { \ | |
9341 | barrier(); \ | |
9342 | } while (0) | |
9343 | ||
9344 | +#define preempt_lazy_disable() \ | |
9345 | +do { \ | |
9346 | + inc_preempt_lazy_count(); \ | |
9347 | + barrier(); \ | |
9348 | +} while (0) | |
9349 | + | |
9350 | #define sched_preempt_enable_no_resched() \ | |
9351 | do { \ | |
9352 | barrier(); \ | |
9353 | preempt_count_dec(); \ | |
9354 | } while (0) | |
9355 | ||
9356 | -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
9357 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
9358 | +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
9359 | +# define preempt_check_resched_rt() preempt_check_resched() | |
9360 | +#else | |
9361 | +# define preempt_enable_no_resched() preempt_enable() | |
9362 | +# define preempt_check_resched_rt() barrier(); | |
9363 | +#endif | |
9364 | ||
9365 | #define preemptible() (preempt_count() == 0 && !irqs_disabled()) | |
9366 | ||
9367 | @@ -179,6 +218,13 @@ do { \ | |
9368 | __preempt_schedule(); \ | |
9369 | } while (0) | |
9370 | ||
9371 | +#define preempt_lazy_enable() \ | |
9372 | +do { \ | |
9373 | + dec_preempt_lazy_count(); \ | |
9374 | + barrier(); \ | |
9375 | + preempt_check_resched(); \ | |
9376 | +} while (0) | |
9377 | + | |
9378 | #else /* !CONFIG_PREEMPT */ | |
9379 | #define preempt_enable() \ | |
9380 | do { \ | |
9381 | @@ -224,6 +270,7 @@ do { \ | |
9382 | #define preempt_disable_notrace() barrier() | |
9383 | #define preempt_enable_no_resched_notrace() barrier() | |
9384 | #define preempt_enable_notrace() barrier() | |
9385 | +#define preempt_check_resched_rt() barrier() | |
9386 | #define preemptible() 0 | |
9387 | ||
9388 | #endif /* CONFIG_PREEMPT_COUNT */ | |
9389 | @@ -244,10 +291,31 @@ do { \ | |
9390 | } while (0) | |
9391 | #define preempt_fold_need_resched() \ | |
9392 | do { \ | |
9393 | - if (tif_need_resched()) \ | |
9394 | + if (tif_need_resched_now()) \ | |
9395 | set_preempt_need_resched(); \ | |
9396 | } while (0) | |
9397 | ||
9398 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9399 | +# define preempt_disable_rt() preempt_disable() | |
9400 | +# define preempt_enable_rt() preempt_enable() | |
9401 | +# define preempt_disable_nort() barrier() | |
9402 | +# define preempt_enable_nort() barrier() | |
9403 | +# ifdef CONFIG_SMP | |
9404 | + extern void migrate_disable(void); | |
9405 | + extern void migrate_enable(void); | |
9406 | +# else /* CONFIG_SMP */ | |
9407 | +# define migrate_disable() barrier() | |
9408 | +# define migrate_enable() barrier() | |
9409 | +# endif /* CONFIG_SMP */ | |
9410 | +#else | |
9411 | +# define preempt_disable_rt() barrier() | |
9412 | +# define preempt_enable_rt() barrier() | |
9413 | +# define preempt_disable_nort() preempt_disable() | |
9414 | +# define preempt_enable_nort() preempt_enable() | |
9415 | +# define migrate_disable() preempt_disable() | |
9416 | +# define migrate_enable() preempt_enable() | |
9417 | +#endif | |
9418 | + | |
9419 | #ifdef CONFIG_PREEMPT_NOTIFIERS | |
9420 | ||
9421 | struct preempt_notifier; | |
9422 | diff --git a/include/linux/printk.h b/include/linux/printk.h | |
9423 | index 696a56be7d3e..310aa321ef0c 100644 | |
9424 | --- a/include/linux/printk.h | |
9425 | +++ b/include/linux/printk.h | |
9426 | @@ -125,9 +125,11 @@ struct va_format { | |
9427 | #ifdef CONFIG_EARLY_PRINTK | |
9428 | extern asmlinkage __printf(1, 2) | |
9429 | void early_printk(const char *fmt, ...); | |
9430 | +extern void printk_kill(void); | |
9431 | #else | |
9432 | static inline __printf(1, 2) __cold | |
9433 | void early_printk(const char *s, ...) { } | |
9434 | +static inline void printk_kill(void) { } | |
9435 | #endif | |
9436 | ||
9437 | #ifdef CONFIG_PRINTK_NMI | |
9438 | diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h | |
9439 | index 52b97db93830..fd9ea1c68db6 100644 | |
9440 | --- a/include/linux/radix-tree.h | |
9441 | +++ b/include/linux/radix-tree.h | |
9442 | @@ -289,9 +289,19 @@ unsigned int radix_tree_gang_lookup(struct radix_tree_root *root, | |
9443 | unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, | |
9444 | void ***results, unsigned long *indices, | |
9445 | unsigned long first_index, unsigned int max_items); | |
9446 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9447 | +static inline int radix_tree_preload(gfp_t gm) { return 0; } | |
9448 | +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; } | |
9449 | +static inline int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |
9450 | +{ | |
9451 | + return 0; | |
9452 | +}; | |
9453 | + | |
9454 | +#else | |
9455 | int radix_tree_preload(gfp_t gfp_mask); | |
9456 | int radix_tree_maybe_preload(gfp_t gfp_mask); | |
9457 | int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order); | |
9458 | +#endif | |
9459 | void radix_tree_init(void); | |
9460 | void *radix_tree_tag_set(struct radix_tree_root *root, | |
9461 | unsigned long index, unsigned int tag); | |
9462 | @@ -316,7 +326,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); | |
9463 | ||
9464 | static inline void radix_tree_preload_end(void) | |
9465 | { | |
9466 | - preempt_enable(); | |
9467 | + preempt_enable_nort(); | |
9468 | } | |
9469 | ||
9470 | /** | |
9471 | diff --git a/include/linux/random.h b/include/linux/random.h | |
9472 | index 3d6e9815cd85..f6e8860b6494 100644 | |
9473 | --- a/include/linux/random.h | |
9474 | +++ b/include/linux/random.h | |
9475 | @@ -20,7 +20,7 @@ struct random_ready_callback { | |
9476 | extern void add_device_randomness(const void *, unsigned int); | |
9477 | extern void add_input_randomness(unsigned int type, unsigned int code, | |
9478 | unsigned int value); | |
9479 | -extern void add_interrupt_randomness(int irq, int irq_flags); | |
9480 | +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip); | |
9481 | ||
9482 | extern void get_random_bytes(void *buf, int nbytes); | |
9483 | extern int add_random_ready_callback(struct random_ready_callback *rdy); | |
9484 | diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h | |
9485 | index e585018498d5..25c64474fc27 100644 | |
9486 | --- a/include/linux/rbtree.h | |
9487 | +++ b/include/linux/rbtree.h | |
9488 | @@ -31,7 +31,7 @@ | |
9489 | ||
9490 | #include <linux/kernel.h> | |
9491 | #include <linux/stddef.h> | |
9492 | -#include <linux/rcupdate.h> | |
9493 | +#include <linux/rcu_assign_pointer.h> | |
9494 | ||
9495 | struct rb_node { | |
9496 | unsigned long __rb_parent_color; | |
9497 | diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h | |
9498 | index d076183e49be..36bfb4dd57ae 100644 | |
9499 | --- a/include/linux/rbtree_augmented.h | |
9500 | +++ b/include/linux/rbtree_augmented.h | |
9501 | @@ -26,6 +26,7 @@ | |
9502 | ||
9503 | #include <linux/compiler.h> | |
9504 | #include <linux/rbtree.h> | |
9505 | +#include <linux/rcupdate.h> | |
9506 | ||
9507 | /* | |
9508 | * Please note - only struct rb_augment_callbacks and the prototypes for | |
9509 | diff --git a/include/linux/rcu_assign_pointer.h b/include/linux/rcu_assign_pointer.h | |
9510 | new file mode 100644 | |
9511 | index 000000000000..7066962a4379 | |
9512 | --- /dev/null | |
9513 | +++ b/include/linux/rcu_assign_pointer.h | |
9514 | @@ -0,0 +1,54 @@ | |
9515 | +#ifndef __LINUX_RCU_ASSIGN_POINTER_H__ | |
9516 | +#define __LINUX_RCU_ASSIGN_POINTER_H__ | |
9517 | +#include <linux/compiler.h> | |
9518 | +#include <asm/barrier.h> | |
9519 | + | |
9520 | +/** | |
9521 | + * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
9522 | + * @v: The value to statically initialize with. | |
9523 | + */ | |
9524 | +#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
9525 | + | |
9526 | +/** | |
9527 | + * rcu_assign_pointer() - assign to RCU-protected pointer | |
9528 | + * @p: pointer to assign to | |
9529 | + * @v: value to assign (publish) | |
9530 | + * | |
9531 | + * Assigns the specified value to the specified RCU-protected | |
9532 | + * pointer, ensuring that any concurrent RCU readers will see | |
9533 | + * any prior initialization. | |
9534 | + * | |
9535 | + * Inserts memory barriers on architectures that require them | |
9536 | + * (which is most of them), and also prevents the compiler from | |
9537 | + * reordering the code that initializes the structure after the pointer | |
9538 | + * assignment. More importantly, this call documents which pointers | |
9539 | + * will be dereferenced by RCU read-side code. | |
9540 | + * | |
9541 | + * In some special cases, you may use RCU_INIT_POINTER() instead | |
9542 | + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
9543 | + * to the fact that it does not constrain either the CPU or the compiler. | |
9544 | + * That said, using RCU_INIT_POINTER() when you should have used | |
9545 | + * rcu_assign_pointer() is a very bad thing that results in | |
9546 | + * impossible-to-diagnose memory corruption. So please be careful. | |
9547 | + * See the RCU_INIT_POINTER() comment header for details. | |
9548 | + * | |
9549 | + * Note that rcu_assign_pointer() evaluates each of its arguments only | |
9550 | + * once, appearances notwithstanding. One of the "extra" evaluations | |
9551 | + * is in typeof() and the other visible only to sparse (__CHECKER__), | |
9552 | + * neither of which actually execute the argument. As with most cpp | |
9553 | + * macros, this execute-arguments-only-once property is important, so | |
9554 | + * please be careful when making changes to rcu_assign_pointer() and the | |
9555 | + * other macros that it invokes. | |
9556 | + */ | |
9557 | +#define rcu_assign_pointer(p, v) \ | |
9558 | +({ \ | |
9559 | + uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
9560 | + \ | |
9561 | + if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
9562 | + WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
9563 | + else \ | |
9564 | + smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
9565 | + _r_a_p__v; \ | |
9566 | +}) | |
9567 | + | |
9568 | +#endif | |
9569 | diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h | |
9570 | index 1aa62e1a761b..2a614acb433e 100644 | |
9571 | --- a/include/linux/rcupdate.h | |
9572 | +++ b/include/linux/rcupdate.h | |
9573 | @@ -46,6 +46,7 @@ | |
9574 | #include <linux/compiler.h> | |
9575 | #include <linux/ktime.h> | |
9576 | #include <linux/irqflags.h> | |
9577 | +#include <linux/rcu_assign_pointer.h> | |
9578 | ||
9579 | #include <asm/barrier.h> | |
9580 | ||
9581 | @@ -178,6 +179,9 @@ void call_rcu(struct rcu_head *head, | |
9582 | ||
9583 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
9584 | ||
9585 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9586 | +#define call_rcu_bh call_rcu | |
9587 | +#else | |
9588 | /** | |
9589 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | |
9590 | * @head: structure to be used for queueing the RCU updates. | |
9591 | @@ -201,6 +205,7 @@ void call_rcu(struct rcu_head *head, | |
9592 | */ | |
9593 | void call_rcu_bh(struct rcu_head *head, | |
9594 | rcu_callback_t func); | |
9595 | +#endif | |
9596 | ||
9597 | /** | |
9598 | * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | |
9599 | @@ -301,6 +306,11 @@ void synchronize_rcu(void); | |
9600 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | |
9601 | */ | |
9602 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
9603 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9604 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
9605 | +#else | |
9606 | +static inline int sched_rcu_preempt_depth(void) { return 0; } | |
9607 | +#endif | |
9608 | ||
9609 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | |
9610 | ||
9611 | @@ -326,6 +336,8 @@ static inline int rcu_preempt_depth(void) | |
9612 | return 0; | |
9613 | } | |
9614 | ||
9615 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
9616 | + | |
9617 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
9618 | ||
9619 | /* Internal to kernel */ | |
9620 | @@ -500,7 +512,14 @@ extern struct lockdep_map rcu_callback_map; | |
9621 | int debug_lockdep_rcu_enabled(void); | |
9622 | ||
9623 | int rcu_read_lock_held(void); | |
9624 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9625 | +static inline int rcu_read_lock_bh_held(void) | |
9626 | +{ | |
9627 | + return rcu_read_lock_held(); | |
9628 | +} | |
9629 | +#else | |
9630 | int rcu_read_lock_bh_held(void); | |
9631 | +#endif | |
9632 | ||
9633 | /** | |
9634 | * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? | |
9635 | @@ -621,54 +640,6 @@ static inline void rcu_preempt_sleep_check(void) | |
9636 | }) | |
9637 | ||
9638 | /** | |
9639 | - * RCU_INITIALIZER() - statically initialize an RCU-protected global variable | |
9640 | - * @v: The value to statically initialize with. | |
9641 | - */ | |
9642 | -#define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) | |
9643 | - | |
9644 | -/** | |
9645 | - * rcu_assign_pointer() - assign to RCU-protected pointer | |
9646 | - * @p: pointer to assign to | |
9647 | - * @v: value to assign (publish) | |
9648 | - * | |
9649 | - * Assigns the specified value to the specified RCU-protected | |
9650 | - * pointer, ensuring that any concurrent RCU readers will see | |
9651 | - * any prior initialization. | |
9652 | - * | |
9653 | - * Inserts memory barriers on architectures that require them | |
9654 | - * (which is most of them), and also prevents the compiler from | |
9655 | - * reordering the code that initializes the structure after the pointer | |
9656 | - * assignment. More importantly, this call documents which pointers | |
9657 | - * will be dereferenced by RCU read-side code. | |
9658 | - * | |
9659 | - * In some special cases, you may use RCU_INIT_POINTER() instead | |
9660 | - * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due | |
9661 | - * to the fact that it does not constrain either the CPU or the compiler. | |
9662 | - * That said, using RCU_INIT_POINTER() when you should have used | |
9663 | - * rcu_assign_pointer() is a very bad thing that results in | |
9664 | - * impossible-to-diagnose memory corruption. So please be careful. | |
9665 | - * See the RCU_INIT_POINTER() comment header for details. | |
9666 | - * | |
9667 | - * Note that rcu_assign_pointer() evaluates each of its arguments only | |
9668 | - * once, appearances notwithstanding. One of the "extra" evaluations | |
9669 | - * is in typeof() and the other visible only to sparse (__CHECKER__), | |
9670 | - * neither of which actually execute the argument. As with most cpp | |
9671 | - * macros, this execute-arguments-only-once property is important, so | |
9672 | - * please be careful when making changes to rcu_assign_pointer() and the | |
9673 | - * other macros that it invokes. | |
9674 | - */ | |
9675 | -#define rcu_assign_pointer(p, v) \ | |
9676 | -({ \ | |
9677 | - uintptr_t _r_a_p__v = (uintptr_t)(v); \ | |
9678 | - \ | |
9679 | - if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL) \ | |
9680 | - WRITE_ONCE((p), (typeof(p))(_r_a_p__v)); \ | |
9681 | - else \ | |
9682 | - smp_store_release(&p, RCU_INITIALIZER((typeof(p))_r_a_p__v)); \ | |
9683 | - _r_a_p__v; \ | |
9684 | -}) | |
9685 | - | |
9686 | -/** | |
9687 | * rcu_access_pointer() - fetch RCU pointer with no dereferencing | |
9688 | * @p: The pointer to read | |
9689 | * | |
9690 | @@ -946,10 +917,14 @@ static inline void rcu_read_unlock(void) | |
9691 | static inline void rcu_read_lock_bh(void) | |
9692 | { | |
9693 | local_bh_disable(); | |
9694 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9695 | + rcu_read_lock(); | |
9696 | +#else | |
9697 | __acquire(RCU_BH); | |
9698 | rcu_lock_acquire(&rcu_bh_lock_map); | |
9699 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
9700 | "rcu_read_lock_bh() used illegally while idle"); | |
9701 | +#endif | |
9702 | } | |
9703 | ||
9704 | /* | |
9705 | @@ -959,10 +934,14 @@ static inline void rcu_read_lock_bh(void) | |
9706 | */ | |
9707 | static inline void rcu_read_unlock_bh(void) | |
9708 | { | |
9709 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9710 | + rcu_read_unlock(); | |
9711 | +#else | |
9712 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
9713 | "rcu_read_unlock_bh() used illegally while idle"); | |
9714 | rcu_lock_release(&rcu_bh_lock_map); | |
9715 | __release(RCU_BH); | |
9716 | +#endif | |
9717 | local_bh_enable(); | |
9718 | } | |
9719 | ||
9720 | diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h | |
9721 | index 63a4e4cf40a5..08ab12df2863 100644 | |
9722 | --- a/include/linux/rcutree.h | |
9723 | +++ b/include/linux/rcutree.h | |
9724 | @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu) | |
9725 | rcu_note_context_switch(); | |
9726 | } | |
9727 | ||
9728 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9729 | +# define synchronize_rcu_bh synchronize_rcu | |
9730 | +#else | |
9731 | void synchronize_rcu_bh(void); | |
9732 | +#endif | |
9733 | void synchronize_sched_expedited(void); | |
9734 | void synchronize_rcu_expedited(void); | |
9735 | ||
9736 | @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void) | |
9737 | } | |
9738 | ||
9739 | void rcu_barrier(void); | |
9740 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
9741 | +# define rcu_barrier_bh rcu_barrier | |
9742 | +#else | |
9743 | void rcu_barrier_bh(void); | |
9744 | +#endif | |
9745 | void rcu_barrier_sched(void); | |
9746 | unsigned long get_state_synchronize_rcu(void); | |
9747 | void cond_synchronize_rcu(unsigned long oldstate); | |
9748 | @@ -82,17 +90,14 @@ void cond_synchronize_sched(unsigned long oldstate); | |
9749 | extern unsigned long rcutorture_testseq; | |
9750 | extern unsigned long rcutorture_vernum; | |
9751 | unsigned long rcu_batches_started(void); | |
9752 | -unsigned long rcu_batches_started_bh(void); | |
9753 | unsigned long rcu_batches_started_sched(void); | |
9754 | unsigned long rcu_batches_completed(void); | |
9755 | -unsigned long rcu_batches_completed_bh(void); | |
9756 | unsigned long rcu_batches_completed_sched(void); | |
9757 | unsigned long rcu_exp_batches_completed(void); | |
9758 | unsigned long rcu_exp_batches_completed_sched(void); | |
9759 | void show_rcu_gp_kthreads(void); | |
9760 | ||
9761 | void rcu_force_quiescent_state(void); | |
9762 | -void rcu_bh_force_quiescent_state(void); | |
9763 | void rcu_sched_force_quiescent_state(void); | |
9764 | ||
9765 | void rcu_idle_enter(void); | |
9766 | @@ -109,6 +114,16 @@ extern int rcu_scheduler_active __read_mostly; | |
9767 | ||
9768 | bool rcu_is_watching(void); | |
9769 | ||
9770 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
9771 | +void rcu_bh_force_quiescent_state(void); | |
9772 | +unsigned long rcu_batches_started_bh(void); | |
9773 | +unsigned long rcu_batches_completed_bh(void); | |
9774 | +#else | |
9775 | +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state | |
9776 | +# define rcu_batches_completed_bh rcu_batches_completed | |
9777 | +# define rcu_batches_started_bh rcu_batches_completed | |
9778 | +#endif | |
9779 | + | |
9780 | void rcu_all_qs(void); | |
9781 | ||
9782 | /* RCUtree hotplug events */ | |
9783 | diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h | |
9784 | index 1abba5ce2a2f..30211c627511 100644 | |
9785 | --- a/include/linux/rtmutex.h | |
9786 | +++ b/include/linux/rtmutex.h | |
9787 | @@ -13,11 +13,15 @@ | |
9788 | #define __LINUX_RT_MUTEX_H | |
9789 | ||
9790 | #include <linux/linkage.h> | |
9791 | +#include <linux/spinlock_types_raw.h> | |
9792 | #include <linux/rbtree.h> | |
9793 | -#include <linux/spinlock_types.h> | |
9794 | ||
9795 | extern int max_lock_depth; /* for sysctl */ | |
9796 | ||
9797 | +#ifdef CONFIG_DEBUG_MUTEXES | |
9798 | +#include <linux/debug_locks.h> | |
9799 | +#endif | |
9800 | + | |
9801 | /** | |
9802 | * The rt_mutex structure | |
9803 | * | |
9804 | @@ -31,8 +35,8 @@ struct rt_mutex { | |
9805 | struct rb_root waiters; | |
9806 | struct rb_node *waiters_leftmost; | |
9807 | struct task_struct *owner; | |
9808 | -#ifdef CONFIG_DEBUG_RT_MUTEXES | |
9809 | int save_state; | |
9810 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
9811 | const char *name, *file; | |
9812 | int line; | |
9813 | void *magic; | |
9814 | @@ -55,22 +59,33 @@ struct hrtimer_sleeper; | |
9815 | # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) | |
9816 | #endif | |
9817 | ||
9818 | +# define rt_mutex_init(mutex) \ | |
9819 | + do { \ | |
9820 | + raw_spin_lock_init(&(mutex)->wait_lock); \ | |
9821 | + __rt_mutex_init(mutex, #mutex); \ | |
9822 | + } while (0) | |
9823 | + | |
9824 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
9825 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ | |
9826 | , .name = #mutexname, .file = __FILE__, .line = __LINE__ | |
9827 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) | |
9828 | extern void rt_mutex_debug_task_free(struct task_struct *tsk); | |
9829 | #else | |
9830 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
9831 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) | |
9832 | # define rt_mutex_debug_task_free(t) do { } while (0) | |
9833 | #endif | |
9834 | ||
9835 | -#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
9836 | - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
9837 | +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
9838 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
9839 | , .waiters = RB_ROOT \ | |
9840 | , .owner = NULL \ | |
9841 | - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} | |
9842 | + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
9843 | + | |
9844 | +#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
9845 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } | |
9846 | + | |
9847 | +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ | |
9848 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
9849 | + , .save_state = 1 } | |
9850 | ||
9851 | #define DEFINE_RT_MUTEX(mutexname) \ | |
9852 | struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) | |
9853 | @@ -91,6 +106,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock); | |
9854 | ||
9855 | extern void rt_mutex_lock(struct rt_mutex *lock); | |
9856 | extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); | |
9857 | +extern int rt_mutex_lock_killable(struct rt_mutex *lock); | |
9858 | extern int rt_mutex_timed_lock(struct rt_mutex *lock, | |
9859 | struct hrtimer_sleeper *timeout); | |
9860 | ||
9861 | diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h | |
9862 | new file mode 100644 | |
9863 | index 000000000000..49ed2d45d3be | |
9864 | --- /dev/null | |
9865 | +++ b/include/linux/rwlock_rt.h | |
9866 | @@ -0,0 +1,99 @@ | |
9867 | +#ifndef __LINUX_RWLOCK_RT_H | |
9868 | +#define __LINUX_RWLOCK_RT_H | |
9869 | + | |
9870 | +#ifndef __LINUX_SPINLOCK_H | |
9871 | +#error Do not include directly. Use spinlock.h | |
9872 | +#endif | |
9873 | + | |
9874 | +#define rwlock_init(rwl) \ | |
9875 | +do { \ | |
9876 | + static struct lock_class_key __key; \ | |
9877 | + \ | |
9878 | + rt_mutex_init(&(rwl)->lock); \ | |
9879 | + __rt_rwlock_init(rwl, #rwl, &__key); \ | |
9880 | +} while (0) | |
9881 | + | |
9882 | +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); | |
9883 | +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); | |
9884 | +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); | |
9885 | +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); | |
9886 | +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); | |
9887 | +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); | |
9888 | +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); | |
9889 | +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); | |
9890 | +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); | |
9891 | +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); | |
9892 | + | |
9893 | +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) | |
9894 | +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) | |
9895 | + | |
9896 | +#define write_trylock_irqsave(lock, flags) \ | |
9897 | + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) | |
9898 | + | |
9899 | +#define read_lock_irqsave(lock, flags) \ | |
9900 | + do { \ | |
9901 | + typecheck(unsigned long, flags); \ | |
9902 | + flags = rt_read_lock_irqsave(lock); \ | |
9903 | + } while (0) | |
9904 | + | |
9905 | +#define write_lock_irqsave(lock, flags) \ | |
9906 | + do { \ | |
9907 | + typecheck(unsigned long, flags); \ | |
9908 | + flags = rt_write_lock_irqsave(lock); \ | |
9909 | + } while (0) | |
9910 | + | |
9911 | +#define read_lock(lock) rt_read_lock(lock) | |
9912 | + | |
9913 | +#define read_lock_bh(lock) \ | |
9914 | + do { \ | |
9915 | + local_bh_disable(); \ | |
9916 | + rt_read_lock(lock); \ | |
9917 | + } while (0) | |
9918 | + | |
9919 | +#define read_lock_irq(lock) read_lock(lock) | |
9920 | + | |
9921 | +#define write_lock(lock) rt_write_lock(lock) | |
9922 | + | |
9923 | +#define write_lock_bh(lock) \ | |
9924 | + do { \ | |
9925 | + local_bh_disable(); \ | |
9926 | + rt_write_lock(lock); \ | |
9927 | + } while (0) | |
9928 | + | |
9929 | +#define write_lock_irq(lock) write_lock(lock) | |
9930 | + | |
9931 | +#define read_unlock(lock) rt_read_unlock(lock) | |
9932 | + | |
9933 | +#define read_unlock_bh(lock) \ | |
9934 | + do { \ | |
9935 | + rt_read_unlock(lock); \ | |
9936 | + local_bh_enable(); \ | |
9937 | + } while (0) | |
9938 | + | |
9939 | +#define read_unlock_irq(lock) read_unlock(lock) | |
9940 | + | |
9941 | +#define write_unlock(lock) rt_write_unlock(lock) | |
9942 | + | |
9943 | +#define write_unlock_bh(lock) \ | |
9944 | + do { \ | |
9945 | + rt_write_unlock(lock); \ | |
9946 | + local_bh_enable(); \ | |
9947 | + } while (0) | |
9948 | + | |
9949 | +#define write_unlock_irq(lock) write_unlock(lock) | |
9950 | + | |
9951 | +#define read_unlock_irqrestore(lock, flags) \ | |
9952 | + do { \ | |
9953 | + typecheck(unsigned long, flags); \ | |
9954 | + (void) flags; \ | |
9955 | + rt_read_unlock(lock); \ | |
9956 | + } while (0) | |
9957 | + | |
9958 | +#define write_unlock_irqrestore(lock, flags) \ | |
9959 | + do { \ | |
9960 | + typecheck(unsigned long, flags); \ | |
9961 | + (void) flags; \ | |
9962 | + rt_write_unlock(lock); \ | |
9963 | + } while (0) | |
9964 | + | |
9965 | +#endif | |
9966 | diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h | |
9967 | index cc0072e93e36..5317cd957292 100644 | |
9968 | --- a/include/linux/rwlock_types.h | |
9969 | +++ b/include/linux/rwlock_types.h | |
9970 | @@ -1,6 +1,10 @@ | |
9971 | #ifndef __LINUX_RWLOCK_TYPES_H | |
9972 | #define __LINUX_RWLOCK_TYPES_H | |
9973 | ||
9974 | +#if !defined(__LINUX_SPINLOCK_TYPES_H) | |
9975 | +# error "Do not include directly, include spinlock_types.h" | |
9976 | +#endif | |
9977 | + | |
9978 | /* | |
9979 | * include/linux/rwlock_types.h - generic rwlock type definitions | |
9980 | * and initializers | |
9981 | diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h | |
9982 | new file mode 100644 | |
9983 | index 000000000000..51b28d775fe1 | |
9984 | --- /dev/null | |
9985 | +++ b/include/linux/rwlock_types_rt.h | |
9986 | @@ -0,0 +1,33 @@ | |
9987 | +#ifndef __LINUX_RWLOCK_TYPES_RT_H | |
9988 | +#define __LINUX_RWLOCK_TYPES_RT_H | |
9989 | + | |
9990 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
9991 | +#error "Do not include directly. Include spinlock_types.h instead" | |
9992 | +#endif | |
9993 | + | |
9994 | +/* | |
9995 | + * rwlocks - rtmutex which allows single reader recursion | |
9996 | + */ | |
9997 | +typedef struct { | |
9998 | + struct rt_mutex lock; | |
9999 | + int read_depth; | |
10000 | + unsigned int break_lock; | |
10001 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10002 | + struct lockdep_map dep_map; | |
10003 | +#endif | |
10004 | +} rwlock_t; | |
10005 | + | |
10006 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10007 | +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
10008 | +#else | |
10009 | +# define RW_DEP_MAP_INIT(lockname) | |
10010 | +#endif | |
10011 | + | |
10012 | +#define __RW_LOCK_UNLOCKED(name) \ | |
10013 | + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ | |
10014 | + RW_DEP_MAP_INIT(name) } | |
10015 | + | |
10016 | +#define DEFINE_RWLOCK(name) \ | |
10017 | + rwlock_t name = __RW_LOCK_UNLOCKED(name) | |
10018 | + | |
10019 | +#endif | |
10020 | diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h | |
10021 | index dd1d14250340..8e1f44ff1f2f 100644 | |
10022 | --- a/include/linux/rwsem.h | |
10023 | +++ b/include/linux/rwsem.h | |
10024 | @@ -19,6 +19,10 @@ | |
10025 | #include <linux/osq_lock.h> | |
10026 | #endif | |
10027 | ||
10028 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10029 | +#include <linux/rwsem_rt.h> | |
10030 | +#else /* PREEMPT_RT_FULL */ | |
10031 | + | |
10032 | struct rw_semaphore; | |
10033 | ||
10034 | #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK | |
10035 | @@ -184,4 +188,6 @@ extern void up_read_non_owner(struct rw_semaphore *sem); | |
10036 | # define up_read_non_owner(sem) up_read(sem) | |
10037 | #endif | |
10038 | ||
10039 | +#endif /* !PREEMPT_RT_FULL */ | |
10040 | + | |
10041 | #endif /* _LINUX_RWSEM_H */ | |
10042 | diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h | |
10043 | new file mode 100644 | |
10044 | index 000000000000..e26bd95a57c3 | |
10045 | --- /dev/null | |
10046 | +++ b/include/linux/rwsem_rt.h | |
10047 | @@ -0,0 +1,167 @@ | |
10048 | +#ifndef _LINUX_RWSEM_RT_H | |
10049 | +#define _LINUX_RWSEM_RT_H | |
10050 | + | |
10051 | +#ifndef _LINUX_RWSEM_H | |
10052 | +#error "Include rwsem.h" | |
10053 | +#endif | |
10054 | + | |
10055 | +/* | |
10056 | + * RW-semaphores are a spinlock plus a reader-depth count. | |
10057 | + * | |
10058 | + * Note that the semantics are different from the usual | |
10059 | + * Linux rw-sems, in PREEMPT_RT mode we do not allow | |
10060 | + * multiple readers to hold the lock at once, we only allow | |
10061 | + * a read-lock owner to read-lock recursively. This is | |
10062 | + * better for latency, makes the implementation inherently | |
10063 | + * fair and makes it simpler as well. | |
10064 | + */ | |
10065 | + | |
10066 | +#include <linux/rtmutex.h> | |
10067 | + | |
10068 | +struct rw_semaphore { | |
10069 | + struct rt_mutex lock; | |
10070 | + int read_depth; | |
10071 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10072 | + struct lockdep_map dep_map; | |
10073 | +#endif | |
10074 | +}; | |
10075 | + | |
10076 | +#define __RWSEM_INITIALIZER(name) \ | |
10077 | + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ | |
10078 | + RW_DEP_MAP_INIT(name) } | |
10079 | + | |
10080 | +#define DECLARE_RWSEM(lockname) \ | |
10081 | + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) | |
10082 | + | |
10083 | +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
10084 | + struct lock_class_key *key); | |
10085 | + | |
10086 | +#define __rt_init_rwsem(sem, name, key) \ | |
10087 | + do { \ | |
10088 | + rt_mutex_init(&(sem)->lock); \ | |
10089 | + __rt_rwsem_init((sem), (name), (key));\ | |
10090 | + } while (0) | |
10091 | + | |
10092 | +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key) | |
10093 | + | |
10094 | +# define rt_init_rwsem(sem) \ | |
10095 | +do { \ | |
10096 | + static struct lock_class_key __key; \ | |
10097 | + \ | |
10098 | + __rt_init_rwsem((sem), #sem, &__key); \ | |
10099 | +} while (0) | |
10100 | + | |
10101 | +extern void rt_down_write(struct rw_semaphore *rwsem); | |
10102 | +extern int rt_down_write_killable(struct rw_semaphore *rwsem); | |
10103 | +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); | |
10104 | +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); | |
10105 | +extern int rt_down_write_killable_nested(struct rw_semaphore *rwsem, | |
10106 | + int subclass); | |
10107 | +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
10108 | + struct lockdep_map *nest); | |
10109 | +extern void rt__down_read(struct rw_semaphore *rwsem); | |
10110 | +extern void rt_down_read(struct rw_semaphore *rwsem); | |
10111 | +extern int rt_down_write_trylock(struct rw_semaphore *rwsem); | |
10112 | +extern int rt__down_read_trylock(struct rw_semaphore *rwsem); | |
10113 | +extern int rt_down_read_trylock(struct rw_semaphore *rwsem); | |
10114 | +extern void __rt_up_read(struct rw_semaphore *rwsem); | |
10115 | +extern void rt_up_read(struct rw_semaphore *rwsem); | |
10116 | +extern void rt_up_write(struct rw_semaphore *rwsem); | |
10117 | +extern void rt_downgrade_write(struct rw_semaphore *rwsem); | |
10118 | + | |
10119 | +#define init_rwsem(sem) rt_init_rwsem(sem) | |
10120 | +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) | |
10121 | + | |
10122 | +static inline int rwsem_is_contended(struct rw_semaphore *sem) | |
10123 | +{ | |
10124 | + /* rt_mutex_has_waiters() */ | |
10125 | + return !RB_EMPTY_ROOT(&sem->lock.waiters); | |
10126 | +} | |
10127 | + | |
10128 | +static inline void __down_read(struct rw_semaphore *sem) | |
10129 | +{ | |
10130 | + rt__down_read(sem); | |
10131 | +} | |
10132 | + | |
10133 | +static inline void down_read(struct rw_semaphore *sem) | |
10134 | +{ | |
10135 | + rt_down_read(sem); | |
10136 | +} | |
10137 | + | |
10138 | +static inline int __down_read_trylock(struct rw_semaphore *sem) | |
10139 | +{ | |
10140 | + return rt__down_read_trylock(sem); | |
10141 | +} | |
10142 | + | |
10143 | +static inline int down_read_trylock(struct rw_semaphore *sem) | |
10144 | +{ | |
10145 | + return rt_down_read_trylock(sem); | |
10146 | +} | |
10147 | + | |
10148 | +static inline void down_write(struct rw_semaphore *sem) | |
10149 | +{ | |
10150 | + rt_down_write(sem); | |
10151 | +} | |
10152 | + | |
10153 | +static inline int down_write_killable(struct rw_semaphore *sem) | |
10154 | +{ | |
10155 | + return rt_down_write_killable(sem); | |
10156 | +} | |
10157 | + | |
10158 | +static inline int down_write_trylock(struct rw_semaphore *sem) | |
10159 | +{ | |
10160 | + return rt_down_write_trylock(sem); | |
10161 | +} | |
10162 | + | |
10163 | +static inline void __up_read(struct rw_semaphore *sem) | |
10164 | +{ | |
10165 | + __rt_up_read(sem); | |
10166 | +} | |
10167 | + | |
10168 | +static inline void up_read(struct rw_semaphore *sem) | |
10169 | +{ | |
10170 | + rt_up_read(sem); | |
10171 | +} | |
10172 | + | |
10173 | +static inline void up_write(struct rw_semaphore *sem) | |
10174 | +{ | |
10175 | + rt_up_write(sem); | |
10176 | +} | |
10177 | + | |
10178 | +static inline void downgrade_write(struct rw_semaphore *sem) | |
10179 | +{ | |
10180 | + rt_downgrade_write(sem); | |
10181 | +} | |
10182 | + | |
10183 | +static inline void down_read_nested(struct rw_semaphore *sem, int subclass) | |
10184 | +{ | |
10185 | + return rt_down_read_nested(sem, subclass); | |
10186 | +} | |
10187 | + | |
10188 | +static inline void down_write_nested(struct rw_semaphore *sem, int subclass) | |
10189 | +{ | |
10190 | + rt_down_write_nested(sem, subclass); | |
10191 | +} | |
10192 | + | |
10193 | +static inline int down_write_killable_nested(struct rw_semaphore *sem, | |
10194 | + int subclass) | |
10195 | +{ | |
10196 | + return rt_down_write_killable_nested(sem, subclass); | |
10197 | +} | |
10198 | + | |
10199 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
10200 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
10201 | + struct rw_semaphore *nest_lock) | |
10202 | +{ | |
10203 | + rt_down_write_nested_lock(sem, &nest_lock->dep_map); | |
10204 | +} | |
10205 | + | |
10206 | +#else | |
10207 | + | |
10208 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
10209 | + struct rw_semaphore *nest_lock) | |
10210 | +{ | |
10211 | + rt_down_write_nested_lock(sem, NULL); | |
10212 | +} | |
10213 | +#endif | |
10214 | +#endif | |
10215 | diff --git a/include/linux/sched.h b/include/linux/sched.h | |
10216 | index 62c68e513e39..c873ce0183ab 100644 | |
10217 | --- a/include/linux/sched.h | |
10218 | +++ b/include/linux/sched.h | |
10219 | @@ -26,6 +26,7 @@ struct sched_param { | |
10220 | #include <linux/nodemask.h> | |
10221 | #include <linux/mm_types.h> | |
10222 | #include <linux/preempt.h> | |
10223 | +#include <asm/kmap_types.h> | |
10224 | ||
10225 | #include <asm/page.h> | |
10226 | #include <asm/ptrace.h> | |
10227 | @@ -243,10 +244,7 @@ extern char ___assert_task_state[1 - 2*!!( | |
10228 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ | |
10229 | __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) | |
10230 | ||
10231 | -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) | |
10232 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) | |
10233 | -#define task_is_stopped_or_traced(task) \ | |
10234 | - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | |
10235 | #define task_contributes_to_load(task) \ | |
10236 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | |
10237 | (task->flags & PF_FROZEN) == 0 && \ | |
10238 | @@ -312,6 +310,11 @@ extern char ___assert_task_state[1 - 2*!!( | |
10239 | ||
10240 | #endif | |
10241 | ||
10242 | +#define __set_current_state_no_track(state_value) \ | |
10243 | + do { current->state = (state_value); } while (0) | |
10244 | +#define set_current_state_no_track(state_value) \ | |
10245 | + set_mb(current->state, (state_value)) | |
10246 | + | |
10247 | /* Task command name length */ | |
10248 | #define TASK_COMM_LEN 16 | |
10249 | ||
10250 | @@ -1009,8 +1012,18 @@ struct wake_q_head { | |
10251 | struct wake_q_head name = { WAKE_Q_TAIL, &name.first } | |
10252 | ||
10253 | extern void wake_q_add(struct wake_q_head *head, | |
10254 | - struct task_struct *task); | |
10255 | -extern void wake_up_q(struct wake_q_head *head); | |
10256 | + struct task_struct *task); | |
10257 | +extern void __wake_up_q(struct wake_q_head *head, bool sleeper); | |
10258 | + | |
10259 | +static inline void wake_up_q(struct wake_q_head *head) | |
10260 | +{ | |
10261 | + __wake_up_q(head, false); | |
10262 | +} | |
10263 | + | |
10264 | +static inline void wake_up_q_sleeper(struct wake_q_head *head) | |
10265 | +{ | |
10266 | + __wake_up_q(head, true); | |
10267 | +} | |
10268 | ||
10269 | /* | |
10270 | * sched-domains (multiprocessor balancing) declarations: | |
10271 | @@ -1459,6 +1472,7 @@ struct tlbflush_unmap_batch { | |
10272 | ||
10273 | struct task_struct { | |
10274 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | |
10275 | + volatile long saved_state; /* saved state for "spinlock sleepers" */ | |
10276 | void *stack; | |
10277 | atomic_t usage; | |
10278 | unsigned int flags; /* per process flags, defined below */ | |
10279 | @@ -1495,6 +1509,12 @@ struct task_struct { | |
10280 | #endif | |
10281 | ||
10282 | unsigned int policy; | |
10283 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10284 | + int migrate_disable; | |
10285 | +# ifdef CONFIG_SCHED_DEBUG | |
10286 | + int migrate_disable_atomic; | |
10287 | +# endif | |
10288 | +#endif | |
10289 | int nr_cpus_allowed; | |
10290 | cpumask_t cpus_allowed; | |
10291 | ||
10292 | @@ -1629,6 +1649,9 @@ struct task_struct { | |
10293 | ||
10294 | struct task_cputime cputime_expires; | |
10295 | struct list_head cpu_timers[3]; | |
10296 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10297 | + struct task_struct *posix_timer_list; | |
10298 | +#endif | |
10299 | ||
10300 | /* process credentials */ | |
10301 | const struct cred __rcu *real_cred; /* objective and real subjective task | |
10302 | @@ -1659,10 +1682,15 @@ struct task_struct { | |
10303 | /* signal handlers */ | |
10304 | struct signal_struct *signal; | |
10305 | struct sighand_struct *sighand; | |
10306 | + struct sigqueue *sigqueue_cache; | |
10307 | ||
10308 | sigset_t blocked, real_blocked; | |
10309 | sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ | |
10310 | struct sigpending pending; | |
10311 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10312 | + /* TODO: move me into ->restart_block ? */ | |
10313 | + struct siginfo forced_info; | |
10314 | +#endif | |
10315 | ||
10316 | unsigned long sas_ss_sp; | |
10317 | size_t sas_ss_size; | |
10318 | @@ -1891,6 +1919,12 @@ struct task_struct { | |
10319 | /* bitmask and counter of trace recursion */ | |
10320 | unsigned long trace_recursion; | |
10321 | #endif /* CONFIG_TRACING */ | |
10322 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
10323 | + u64 preempt_timestamp_hist; | |
10324 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
10325 | + long timer_offset; | |
10326 | +#endif | |
10327 | +#endif | |
10328 | #ifdef CONFIG_KCOV | |
10329 | /* Coverage collection mode enabled for this task (0 if disabled). */ | |
10330 | enum kcov_mode kcov_mode; | |
10331 | @@ -1916,9 +1950,23 @@ struct task_struct { | |
10332 | unsigned int sequential_io; | |
10333 | unsigned int sequential_io_avg; | |
10334 | #endif | |
10335 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10336 | + struct rcu_head put_rcu; | |
10337 | + int softirq_nestcnt; | |
10338 | + unsigned int softirqs_raised; | |
10339 | +#endif | |
10340 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10341 | +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 | |
10342 | + int kmap_idx; | |
10343 | + pte_t kmap_pte[KM_TYPE_NR]; | |
10344 | +# endif | |
10345 | +#endif | |
10346 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
10347 | unsigned long task_state_change; | |
10348 | #endif | |
10349 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10350 | + int xmit_recursion; | |
10351 | +#endif | |
10352 | int pagefault_disabled; | |
10353 | #ifdef CONFIG_MMU | |
10354 | struct task_struct *oom_reaper_list; | |
10355 | @@ -1939,14 +1987,6 @@ extern int arch_task_struct_size __read_mostly; | |
10356 | # define arch_task_struct_size (sizeof(struct task_struct)) | |
10357 | #endif | |
10358 | ||
10359 | -/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
10360 | -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | |
10361 | - | |
10362 | -static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
10363 | -{ | |
10364 | - return p->nr_cpus_allowed; | |
10365 | -} | |
10366 | - | |
10367 | #define TNF_MIGRATED 0x01 | |
10368 | #define TNF_NO_GROUP 0x02 | |
10369 | #define TNF_SHARED 0x04 | |
10370 | @@ -2162,6 +2202,15 @@ extern struct pid *cad_pid; | |
10371 | extern void free_task(struct task_struct *tsk); | |
10372 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) | |
10373 | ||
10374 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10375 | +extern void __put_task_struct_cb(struct rcu_head *rhp); | |
10376 | + | |
10377 | +static inline void put_task_struct(struct task_struct *t) | |
10378 | +{ | |
10379 | + if (atomic_dec_and_test(&t->usage)) | |
10380 | + call_rcu(&t->put_rcu, __put_task_struct_cb); | |
10381 | +} | |
10382 | +#else | |
10383 | extern void __put_task_struct(struct task_struct *t); | |
10384 | ||
10385 | static inline void put_task_struct(struct task_struct *t) | |
10386 | @@ -2169,6 +2218,7 @@ static inline void put_task_struct(struct task_struct *t) | |
10387 | if (atomic_dec_and_test(&t->usage)) | |
10388 | __put_task_struct(t); | |
10389 | } | |
10390 | +#endif | |
10391 | ||
10392 | struct task_struct *task_rcu_dereference(struct task_struct **ptask); | |
10393 | struct task_struct *try_get_task_struct(struct task_struct **ptask); | |
10394 | @@ -2210,6 +2260,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, | |
10395 | /* | |
10396 | * Per process flags | |
10397 | */ | |
10398 | +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ | |
10399 | #define PF_EXITING 0x00000004 /* getting shut down */ | |
10400 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | |
10401 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | |
10402 | @@ -2378,6 +2429,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, | |
10403 | ||
10404 | extern int set_cpus_allowed_ptr(struct task_struct *p, | |
10405 | const struct cpumask *new_mask); | |
10406 | +int migrate_me(void); | |
10407 | +void tell_sched_cpu_down_begin(int cpu); | |
10408 | +void tell_sched_cpu_down_done(int cpu); | |
10409 | + | |
10410 | #else | |
10411 | static inline void do_set_cpus_allowed(struct task_struct *p, | |
10412 | const struct cpumask *new_mask) | |
10413 | @@ -2390,6 +2445,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, | |
10414 | return -EINVAL; | |
10415 | return 0; | |
10416 | } | |
10417 | +static inline int migrate_me(void) { return 0; } | |
10418 | +static inline void tell_sched_cpu_down_begin(int cpu) { } | |
10419 | +static inline void tell_sched_cpu_down_done(int cpu) { } | |
10420 | #endif | |
10421 | ||
10422 | #ifdef CONFIG_NO_HZ_COMMON | |
10423 | @@ -2624,6 +2682,7 @@ extern void xtime_update(unsigned long ticks); | |
10424 | ||
10425 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); | |
10426 | extern int wake_up_process(struct task_struct *tsk); | |
10427 | +extern int wake_up_lock_sleeper(struct task_struct * tsk); | |
10428 | extern void wake_up_new_task(struct task_struct *tsk); | |
10429 | #ifdef CONFIG_SMP | |
10430 | extern void kick_process(struct task_struct *tsk); | |
10431 | @@ -2832,6 +2891,17 @@ static inline void mmdrop(struct mm_struct *mm) | |
10432 | __mmdrop(mm); | |
10433 | } | |
10434 | ||
10435 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
10436 | +extern void __mmdrop_delayed(struct rcu_head *rhp); | |
10437 | +static inline void mmdrop_delayed(struct mm_struct *mm) | |
10438 | +{ | |
10439 | + if (atomic_dec_and_test(&mm->mm_count)) | |
10440 | + call_rcu(&mm->delayed_drop, __mmdrop_delayed); | |
10441 | +} | |
10442 | +#else | |
10443 | +# define mmdrop_delayed(mm) mmdrop(mm) | |
10444 | +#endif | |
10445 | + | |
10446 | static inline bool mmget_not_zero(struct mm_struct *mm) | |
10447 | { | |
10448 | return atomic_inc_not_zero(&mm->mm_users); | |
10449 | @@ -3168,6 +3238,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) | |
10450 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); | |
10451 | } | |
10452 | ||
10453 | +#ifdef CONFIG_PREEMPT_LAZY | |
10454 | +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) | |
10455 | +{ | |
10456 | + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
10457 | +} | |
10458 | + | |
10459 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) | |
10460 | +{ | |
10461 | + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
10462 | +} | |
10463 | + | |
10464 | +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) | |
10465 | +{ | |
10466 | + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); | |
10467 | +} | |
10468 | + | |
10469 | +static inline int need_resched_lazy(void) | |
10470 | +{ | |
10471 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
10472 | +} | |
10473 | + | |
10474 | +static inline int need_resched_now(void) | |
10475 | +{ | |
10476 | + return test_thread_flag(TIF_NEED_RESCHED); | |
10477 | +} | |
10478 | + | |
10479 | +#else | |
10480 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } | |
10481 | +static inline int need_resched_lazy(void) { return 0; } | |
10482 | + | |
10483 | +static inline int need_resched_now(void) | |
10484 | +{ | |
10485 | + return test_thread_flag(TIF_NEED_RESCHED); | |
10486 | +} | |
10487 | + | |
10488 | +#endif | |
10489 | + | |
10490 | static inline int restart_syscall(void) | |
10491 | { | |
10492 | set_tsk_thread_flag(current, TIF_SIGPENDING); | |
10493 | @@ -3199,6 +3306,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) | |
10494 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); | |
10495 | } | |
10496 | ||
10497 | +static inline bool __task_is_stopped_or_traced(struct task_struct *task) | |
10498 | +{ | |
10499 | + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) | |
10500 | + return true; | |
10501 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10502 | + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) | |
10503 | + return true; | |
10504 | +#endif | |
10505 | + return false; | |
10506 | +} | |
10507 | + | |
10508 | +static inline bool task_is_stopped_or_traced(struct task_struct *task) | |
10509 | +{ | |
10510 | + bool traced_stopped; | |
10511 | + | |
10512 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10513 | + unsigned long flags; | |
10514 | + | |
10515 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
10516 | + traced_stopped = __task_is_stopped_or_traced(task); | |
10517 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
10518 | +#else | |
10519 | + traced_stopped = __task_is_stopped_or_traced(task); | |
10520 | +#endif | |
10521 | + return traced_stopped; | |
10522 | +} | |
10523 | + | |
10524 | +static inline bool task_is_traced(struct task_struct *task) | |
10525 | +{ | |
10526 | + bool traced = false; | |
10527 | + | |
10528 | + if (task->state & __TASK_TRACED) | |
10529 | + return true; | |
10530 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10531 | + /* in case the task is sleeping on tasklist_lock */ | |
10532 | + raw_spin_lock_irq(&task->pi_lock); | |
10533 | + if (task->state & __TASK_TRACED) | |
10534 | + traced = true; | |
10535 | + else if (task->saved_state & __TASK_TRACED) | |
10536 | + traced = true; | |
10537 | + raw_spin_unlock_irq(&task->pi_lock); | |
10538 | +#endif | |
10539 | + return traced; | |
10540 | +} | |
10541 | + | |
10542 | /* | |
10543 | * cond_resched() and cond_resched_lock(): latency reduction via | |
10544 | * explicit rescheduling in places that are safe. The return | |
10545 | @@ -3220,12 +3372,16 @@ extern int __cond_resched_lock(spinlock_t *lock); | |
10546 | __cond_resched_lock(lock); \ | |
10547 | }) | |
10548 | ||
10549 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
10550 | extern int __cond_resched_softirq(void); | |
10551 | ||
10552 | #define cond_resched_softirq() ({ \ | |
10553 | ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ | |
10554 | __cond_resched_softirq(); \ | |
10555 | }) | |
10556 | +#else | |
10557 | +# define cond_resched_softirq() cond_resched() | |
10558 | +#endif | |
10559 | ||
10560 | static inline void cond_resched_rcu(void) | |
10561 | { | |
10562 | @@ -3387,6 +3543,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) | |
10563 | ||
10564 | #endif /* CONFIG_SMP */ | |
10565 | ||
10566 | +static inline int __migrate_disabled(struct task_struct *p) | |
10567 | +{ | |
10568 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10569 | + return p->migrate_disable; | |
10570 | +#else | |
10571 | + return 0; | |
10572 | +#endif | |
10573 | +} | |
10574 | + | |
10575 | +/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
10576 | +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) | |
10577 | +{ | |
10578 | + if (__migrate_disabled(p)) | |
10579 | + return cpumask_of(task_cpu(p)); | |
10580 | + | |
10581 | + return &p->cpus_allowed; | |
10582 | +} | |
10583 | + | |
10584 | +static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
10585 | +{ | |
10586 | + if (__migrate_disabled(p)) | |
10587 | + return 1; | |
10588 | + return p->nr_cpus_allowed; | |
10589 | +} | |
10590 | + | |
10591 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); | |
10592 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); | |
10593 | ||
10594 | diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h | |
10595 | index ead97654c4e9..3d7223ffdd3b 100644 | |
10596 | --- a/include/linux/seqlock.h | |
10597 | +++ b/include/linux/seqlock.h | |
10598 | @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) | |
10599 | return __read_seqcount_retry(s, start); | |
10600 | } | |
10601 | ||
10602 | - | |
10603 | - | |
10604 | -static inline void raw_write_seqcount_begin(seqcount_t *s) | |
10605 | +static inline void __raw_write_seqcount_begin(seqcount_t *s) | |
10606 | { | |
10607 | s->sequence++; | |
10608 | smp_wmb(); | |
10609 | } | |
10610 | ||
10611 | -static inline void raw_write_seqcount_end(seqcount_t *s) | |
10612 | +static inline void raw_write_seqcount_begin(seqcount_t *s) | |
10613 | +{ | |
10614 | + preempt_disable_rt(); | |
10615 | + __raw_write_seqcount_begin(s); | |
10616 | +} | |
10617 | + | |
10618 | +static inline void __raw_write_seqcount_end(seqcount_t *s) | |
10619 | { | |
10620 | smp_wmb(); | |
10621 | s->sequence++; | |
10622 | } | |
10623 | ||
10624 | +static inline void raw_write_seqcount_end(seqcount_t *s) | |
10625 | +{ | |
10626 | + __raw_write_seqcount_end(s); | |
10627 | + preempt_enable_rt(); | |
10628 | +} | |
10629 | + | |
10630 | /** | |
10631 | * raw_write_seqcount_barrier - do a seq write barrier | |
10632 | * @s: pointer to seqcount_t | |
10633 | @@ -428,10 +438,32 @@ typedef struct { | |
10634 | /* | |
10635 | * Read side functions for starting and finalizing a read side section. | |
10636 | */ | |
10637 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
10638 | static inline unsigned read_seqbegin(const seqlock_t *sl) | |
10639 | { | |
10640 | return read_seqcount_begin(&sl->seqcount); | |
10641 | } | |
10642 | +#else | |
10643 | +/* | |
10644 | + * Starvation safe read side for RT | |
10645 | + */ | |
10646 | +static inline unsigned read_seqbegin(seqlock_t *sl) | |
10647 | +{ | |
10648 | + unsigned ret; | |
10649 | + | |
10650 | +repeat: | |
10651 | + ret = ACCESS_ONCE(sl->seqcount.sequence); | |
10652 | + if (unlikely(ret & 1)) { | |
10653 | + /* | |
10654 | + * Take the lock and let the writer proceed (i.e. evtl | |
10655 | + * boost it), otherwise we could loop here forever. | |
10656 | + */ | |
10657 | + spin_unlock_wait(&sl->lock); | |
10658 | + goto repeat; | |
10659 | + } | |
10660 | + return ret; | |
10661 | +} | |
10662 | +#endif | |
10663 | ||
10664 | static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
10665 | { | |
10666 | @@ -446,36 +478,45 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
10667 | static inline void write_seqlock(seqlock_t *sl) | |
10668 | { | |
10669 | spin_lock(&sl->lock); | |
10670 | - write_seqcount_begin(&sl->seqcount); | |
10671 | + __raw_write_seqcount_begin(&sl->seqcount); | |
10672 | +} | |
10673 | + | |
10674 | +static inline int try_write_seqlock(seqlock_t *sl) | |
10675 | +{ | |
10676 | + if (spin_trylock(&sl->lock)) { | |
10677 | + __raw_write_seqcount_begin(&sl->seqcount); | |
10678 | + return 1; | |
10679 | + } | |
10680 | + return 0; | |
10681 | } | |
10682 | ||
10683 | static inline void write_sequnlock(seqlock_t *sl) | |
10684 | { | |
10685 | - write_seqcount_end(&sl->seqcount); | |
10686 | + __raw_write_seqcount_end(&sl->seqcount); | |
10687 | spin_unlock(&sl->lock); | |
10688 | } | |
10689 | ||
10690 | static inline void write_seqlock_bh(seqlock_t *sl) | |
10691 | { | |
10692 | spin_lock_bh(&sl->lock); | |
10693 | - write_seqcount_begin(&sl->seqcount); | |
10694 | + __raw_write_seqcount_begin(&sl->seqcount); | |
10695 | } | |
10696 | ||
10697 | static inline void write_sequnlock_bh(seqlock_t *sl) | |
10698 | { | |
10699 | - write_seqcount_end(&sl->seqcount); | |
10700 | + __raw_write_seqcount_end(&sl->seqcount); | |
10701 | spin_unlock_bh(&sl->lock); | |
10702 | } | |
10703 | ||
10704 | static inline void write_seqlock_irq(seqlock_t *sl) | |
10705 | { | |
10706 | spin_lock_irq(&sl->lock); | |
10707 | - write_seqcount_begin(&sl->seqcount); | |
10708 | + __raw_write_seqcount_begin(&sl->seqcount); | |
10709 | } | |
10710 | ||
10711 | static inline void write_sequnlock_irq(seqlock_t *sl) | |
10712 | { | |
10713 | - write_seqcount_end(&sl->seqcount); | |
10714 | + __raw_write_seqcount_end(&sl->seqcount); | |
10715 | spin_unlock_irq(&sl->lock); | |
10716 | } | |
10717 | ||
10718 | @@ -484,7 +525,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
10719 | unsigned long flags; | |
10720 | ||
10721 | spin_lock_irqsave(&sl->lock, flags); | |
10722 | - write_seqcount_begin(&sl->seqcount); | |
10723 | + __raw_write_seqcount_begin(&sl->seqcount); | |
10724 | return flags; | |
10725 | } | |
10726 | ||
10727 | @@ -494,7 +535,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
10728 | static inline void | |
10729 | write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | |
10730 | { | |
10731 | - write_seqcount_end(&sl->seqcount); | |
10732 | + __raw_write_seqcount_end(&sl->seqcount); | |
10733 | spin_unlock_irqrestore(&sl->lock, flags); | |
10734 | } | |
10735 | ||
10736 | diff --git a/include/linux/signal.h b/include/linux/signal.h | |
10737 | index b63f63eaa39c..295540fdfc72 100644 | |
10738 | --- a/include/linux/signal.h | |
10739 | +++ b/include/linux/signal.h | |
10740 | @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig) | |
10741 | } | |
10742 | ||
10743 | extern void flush_sigqueue(struct sigpending *queue); | |
10744 | +extern void flush_task_sigqueue(struct task_struct *tsk); | |
10745 | ||
10746 | /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ | |
10747 | static inline int valid_signal(unsigned long sig) | |
10748 | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h | |
10749 | index 0f665cb26b50..59c38d1635c8 100644 | |
10750 | --- a/include/linux/skbuff.h | |
10751 | +++ b/include/linux/skbuff.h | |
10752 | @@ -284,6 +284,7 @@ struct sk_buff_head { | |
10753 | ||
10754 | __u32 qlen; | |
10755 | spinlock_t lock; | |
10756 | + raw_spinlock_t raw_lock; | |
10757 | }; | |
10758 | ||
10759 | struct sk_buff; | |
10760 | @@ -1565,6 +1566,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) | |
10761 | __skb_queue_head_init(list); | |
10762 | } | |
10763 | ||
10764 | +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) | |
10765 | +{ | |
10766 | + raw_spin_lock_init(&list->raw_lock); | |
10767 | + __skb_queue_head_init(list); | |
10768 | +} | |
10769 | + | |
10770 | static inline void skb_queue_head_init_class(struct sk_buff_head *list, | |
10771 | struct lock_class_key *class) | |
10772 | { | |
10773 | diff --git a/include/linux/smp.h b/include/linux/smp.h | |
10774 | index eccae4690f41..64ec52d951c3 100644 | |
10775 | --- a/include/linux/smp.h | |
10776 | +++ b/include/linux/smp.h | |
10777 | @@ -185,6 +185,9 @@ static inline void smp_init(void) { } | |
10778 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) | |
10779 | #define put_cpu() preempt_enable() | |
10780 | ||
10781 | +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) | |
10782 | +#define put_cpu_light() migrate_enable() | |
10783 | + | |
10784 | /* | |
10785 | * Callback to arch code if there's nosmp or maxcpus=0 on the | |
10786 | * boot command line: | |
10787 | diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h | |
10788 | index 47dd0cebd204..02928fa5499d 100644 | |
10789 | --- a/include/linux/spinlock.h | |
10790 | +++ b/include/linux/spinlock.h | |
10791 | @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
10792 | #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) | |
10793 | ||
10794 | /* Include rwlock functions */ | |
10795 | -#include <linux/rwlock.h> | |
10796 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10797 | +# include <linux/rwlock_rt.h> | |
10798 | +#else | |
10799 | +# include <linux/rwlock.h> | |
10800 | +#endif | |
10801 | ||
10802 | /* | |
10803 | * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: | |
10804 | @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
10805 | # include <linux/spinlock_api_up.h> | |
10806 | #endif | |
10807 | ||
10808 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10809 | +# include <linux/spinlock_rt.h> | |
10810 | +#else /* PREEMPT_RT_FULL */ | |
10811 | + | |
10812 | /* | |
10813 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n | |
10814 | */ | |
10815 | @@ -347,6 +355,12 @@ static __always_inline void spin_unlock(spinlock_t *lock) | |
10816 | raw_spin_unlock(&lock->rlock); | |
10817 | } | |
10818 | ||
10819 | +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock) | |
10820 | +{ | |
10821 | + raw_spin_unlock(&lock->rlock); | |
10822 | + return 0; | |
10823 | +} | |
10824 | + | |
10825 | static __always_inline void spin_unlock_bh(spinlock_t *lock) | |
10826 | { | |
10827 | raw_spin_unlock_bh(&lock->rlock); | |
10828 | @@ -416,4 +430,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); | |
10829 | #define atomic_dec_and_lock(atomic, lock) \ | |
10830 | __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) | |
10831 | ||
10832 | +#endif /* !PREEMPT_RT_FULL */ | |
10833 | + | |
10834 | #endif /* __LINUX_SPINLOCK_H */ | |
10835 | diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h | |
10836 | index 5344268e6e62..043263f30e81 100644 | |
10837 | --- a/include/linux/spinlock_api_smp.h | |
10838 | +++ b/include/linux/spinlock_api_smp.h | |
10839 | @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) | |
10840 | return 0; | |
10841 | } | |
10842 | ||
10843 | -#include <linux/rwlock_api_smp.h> | |
10844 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
10845 | +# include <linux/rwlock_api_smp.h> | |
10846 | +#endif | |
10847 | ||
10848 | #endif /* __LINUX_SPINLOCK_API_SMP_H */ | |
10849 | diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h | |
10850 | new file mode 100644 | |
10851 | index 000000000000..7eb87584e843 | |
10852 | --- /dev/null | |
10853 | +++ b/include/linux/spinlock_rt.h | |
10854 | @@ -0,0 +1,165 @@ | |
10855 | +#ifndef __LINUX_SPINLOCK_RT_H | |
10856 | +#define __LINUX_SPINLOCK_RT_H | |
10857 | + | |
10858 | +#ifndef __LINUX_SPINLOCK_H | |
10859 | +#error Do not include directly. Use spinlock.h | |
10860 | +#endif | |
10861 | + | |
10862 | +#include <linux/bug.h> | |
10863 | + | |
10864 | +extern void | |
10865 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); | |
10866 | + | |
10867 | +#define spin_lock_init(slock) \ | |
10868 | +do { \ | |
10869 | + static struct lock_class_key __key; \ | |
10870 | + \ | |
10871 | + rt_mutex_init(&(slock)->lock); \ | |
10872 | + __rt_spin_lock_init(slock, #slock, &__key); \ | |
10873 | +} while (0) | |
10874 | + | |
10875 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock); | |
10876 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock); | |
10877 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock); | |
10878 | + | |
10879 | +extern void __lockfunc rt_spin_lock(spinlock_t *lock); | |
10880 | +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); | |
10881 | +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); | |
10882 | +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); | |
10883 | +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock); | |
10884 | +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); | |
10885 | +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); | |
10886 | +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); | |
10887 | +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); | |
10888 | +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); | |
10889 | + | |
10890 | +/* | |
10891 | + * lockdep-less calls, for derived types like rwlock: | |
10892 | + * (for trylock they can use rt_mutex_trylock() directly. | |
10893 | + */ | |
10894 | +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); | |
10895 | +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); | |
10896 | +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); | |
10897 | +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock); | |
10898 | + | |
10899 | +#define spin_lock(lock) rt_spin_lock(lock) | |
10900 | + | |
10901 | +#define spin_lock_bh(lock) \ | |
10902 | + do { \ | |
10903 | + local_bh_disable(); \ | |
10904 | + rt_spin_lock(lock); \ | |
10905 | + } while (0) | |
10906 | + | |
10907 | +#define spin_lock_irq(lock) spin_lock(lock) | |
10908 | + | |
10909 | +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) | |
10910 | + | |
10911 | +#define spin_trylock(lock) \ | |
10912 | +({ \ | |
10913 | + int __locked; \ | |
10914 | + __locked = spin_do_trylock(lock); \ | |
10915 | + __locked; \ | |
10916 | +}) | |
10917 | + | |
10918 | +#ifdef CONFIG_LOCKDEP | |
10919 | +# define spin_lock_nested(lock, subclass) \ | |
10920 | + do { \ | |
10921 | + rt_spin_lock_nested(lock, subclass); \ | |
10922 | + } while (0) | |
10923 | + | |
10924 | +#define spin_lock_bh_nested(lock, subclass) \ | |
10925 | + do { \ | |
10926 | + local_bh_disable(); \ | |
10927 | + rt_spin_lock_nested(lock, subclass); \ | |
10928 | + } while (0) | |
10929 | + | |
10930 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
10931 | + do { \ | |
10932 | + typecheck(unsigned long, flags); \ | |
10933 | + flags = 0; \ | |
10934 | + rt_spin_lock_nested(lock, subclass); \ | |
10935 | + } while (0) | |
10936 | +#else | |
10937 | +# define spin_lock_nested(lock, subclass) spin_lock(lock) | |
10938 | +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) | |
10939 | + | |
10940 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
10941 | + do { \ | |
10942 | + typecheck(unsigned long, flags); \ | |
10943 | + flags = 0; \ | |
10944 | + spin_lock(lock); \ | |
10945 | + } while (0) | |
10946 | +#endif | |
10947 | + | |
10948 | +#define spin_lock_irqsave(lock, flags) \ | |
10949 | + do { \ | |
10950 | + typecheck(unsigned long, flags); \ | |
10951 | + flags = 0; \ | |
10952 | + spin_lock(lock); \ | |
10953 | + } while (0) | |
10954 | + | |
10955 | +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) | |
10956 | +{ | |
10957 | + unsigned long flags = 0; | |
10958 | +#ifdef CONFIG_TRACE_IRQFLAGS | |
10959 | + flags = rt_spin_lock_trace_flags(lock); | |
10960 | +#else | |
10961 | + spin_lock(lock); /* lock_local */ | |
10962 | +#endif | |
10963 | + return flags; | |
10964 | +} | |
10965 | + | |
10966 | +/* FIXME: we need rt_spin_lock_nest_lock */ | |
10967 | +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) | |
10968 | + | |
10969 | +#define spin_unlock(lock) rt_spin_unlock(lock) | |
10970 | +#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock) | |
10971 | + | |
10972 | +#define spin_unlock_bh(lock) \ | |
10973 | + do { \ | |
10974 | + rt_spin_unlock(lock); \ | |
10975 | + local_bh_enable(); \ | |
10976 | + } while (0) | |
10977 | + | |
10978 | +#define spin_unlock_irq(lock) spin_unlock(lock) | |
10979 | + | |
10980 | +#define spin_unlock_irqrestore(lock, flags) \ | |
10981 | + do { \ | |
10982 | + typecheck(unsigned long, flags); \ | |
10983 | + (void) flags; \ | |
10984 | + spin_unlock(lock); \ | |
10985 | + } while (0) | |
10986 | + | |
10987 | +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) | |
10988 | +#define spin_trylock_irq(lock) spin_trylock(lock) | |
10989 | + | |
10990 | +#define spin_trylock_irqsave(lock, flags) \ | |
10991 | + rt_spin_trylock_irqsave(lock, &(flags)) | |
10992 | + | |
10993 | +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) | |
10994 | + | |
10995 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
10996 | +# define spin_is_contended(lock) ((lock)->break_lock) | |
10997 | +#else | |
10998 | +# define spin_is_contended(lock) (((void)(lock), 0)) | |
10999 | +#endif | |
11000 | + | |
11001 | +static inline int spin_can_lock(spinlock_t *lock) | |
11002 | +{ | |
11003 | + return !rt_mutex_is_locked(&lock->lock); | |
11004 | +} | |
11005 | + | |
11006 | +static inline int spin_is_locked(spinlock_t *lock) | |
11007 | +{ | |
11008 | + return rt_mutex_is_locked(&lock->lock); | |
11009 | +} | |
11010 | + | |
11011 | +static inline void assert_spin_locked(spinlock_t *lock) | |
11012 | +{ | |
11013 | + BUG_ON(!spin_is_locked(lock)); | |
11014 | +} | |
11015 | + | |
11016 | +#define atomic_dec_and_lock(atomic, lock) \ | |
11017 | + atomic_dec_and_spin_lock(atomic, lock) | |
11018 | + | |
11019 | +#endif | |
11020 | diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h | |
11021 | index 73548eb13a5d..10bac715ea96 100644 | |
11022 | --- a/include/linux/spinlock_types.h | |
11023 | +++ b/include/linux/spinlock_types.h | |
11024 | @@ -9,80 +9,15 @@ | |
11025 | * Released under the General Public License (GPL). | |
11026 | */ | |
11027 | ||
11028 | -#if defined(CONFIG_SMP) | |
11029 | -# include <asm/spinlock_types.h> | |
11030 | +#include <linux/spinlock_types_raw.h> | |
11031 | + | |
11032 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
11033 | +# include <linux/spinlock_types_nort.h> | |
11034 | +# include <linux/rwlock_types.h> | |
11035 | #else | |
11036 | -# include <linux/spinlock_types_up.h> | |
11037 | +# include <linux/rtmutex.h> | |
11038 | +# include <linux/spinlock_types_rt.h> | |
11039 | +# include <linux/rwlock_types_rt.h> | |
11040 | #endif | |
11041 | ||
11042 | -#include <linux/lockdep.h> | |
11043 | - | |
11044 | -typedef struct raw_spinlock { | |
11045 | - arch_spinlock_t raw_lock; | |
11046 | -#ifdef CONFIG_GENERIC_LOCKBREAK | |
11047 | - unsigned int break_lock; | |
11048 | -#endif | |
11049 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
11050 | - unsigned int magic, owner_cpu; | |
11051 | - void *owner; | |
11052 | -#endif | |
11053 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
11054 | - struct lockdep_map dep_map; | |
11055 | -#endif | |
11056 | -} raw_spinlock_t; | |
11057 | - | |
11058 | -#define SPINLOCK_MAGIC 0xdead4ead | |
11059 | - | |
11060 | -#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
11061 | - | |
11062 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
11063 | -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
11064 | -#else | |
11065 | -# define SPIN_DEP_MAP_INIT(lockname) | |
11066 | -#endif | |
11067 | - | |
11068 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
11069 | -# define SPIN_DEBUG_INIT(lockname) \ | |
11070 | - .magic = SPINLOCK_MAGIC, \ | |
11071 | - .owner_cpu = -1, \ | |
11072 | - .owner = SPINLOCK_OWNER_INIT, | |
11073 | -#else | |
11074 | -# define SPIN_DEBUG_INIT(lockname) | |
11075 | -#endif | |
11076 | - | |
11077 | -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
11078 | - { \ | |
11079 | - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
11080 | - SPIN_DEBUG_INIT(lockname) \ | |
11081 | - SPIN_DEP_MAP_INIT(lockname) } | |
11082 | - | |
11083 | -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
11084 | - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
11085 | - | |
11086 | -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
11087 | - | |
11088 | -typedef struct spinlock { | |
11089 | - union { | |
11090 | - struct raw_spinlock rlock; | |
11091 | - | |
11092 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
11093 | -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
11094 | - struct { | |
11095 | - u8 __padding[LOCK_PADSIZE]; | |
11096 | - struct lockdep_map dep_map; | |
11097 | - }; | |
11098 | -#endif | |
11099 | - }; | |
11100 | -} spinlock_t; | |
11101 | - | |
11102 | -#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
11103 | - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
11104 | - | |
11105 | -#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
11106 | - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
11107 | - | |
11108 | -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
11109 | - | |
11110 | -#include <linux/rwlock_types.h> | |
11111 | - | |
11112 | #endif /* __LINUX_SPINLOCK_TYPES_H */ | |
11113 | diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h | |
11114 | new file mode 100644 | |
11115 | index 000000000000..f1dac1fb1d6a | |
11116 | --- /dev/null | |
11117 | +++ b/include/linux/spinlock_types_nort.h | |
11118 | @@ -0,0 +1,33 @@ | |
11119 | +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H | |
11120 | +#define __LINUX_SPINLOCK_TYPES_NORT_H | |
11121 | + | |
11122 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
11123 | +#error "Do not include directly. Include spinlock_types.h instead" | |
11124 | +#endif | |
11125 | + | |
11126 | +/* | |
11127 | + * The non RT version maps spinlocks to raw_spinlocks | |
11128 | + */ | |
11129 | +typedef struct spinlock { | |
11130 | + union { | |
11131 | + struct raw_spinlock rlock; | |
11132 | + | |
11133 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
11134 | +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
11135 | + struct { | |
11136 | + u8 __padding[LOCK_PADSIZE]; | |
11137 | + struct lockdep_map dep_map; | |
11138 | + }; | |
11139 | +#endif | |
11140 | + }; | |
11141 | +} spinlock_t; | |
11142 | + | |
11143 | +#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
11144 | + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
11145 | + | |
11146 | +#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
11147 | + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
11148 | + | |
11149 | +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
11150 | + | |
11151 | +#endif | |
11152 | diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h | |
11153 | new file mode 100644 | |
11154 | index 000000000000..edffc4d53fc9 | |
11155 | --- /dev/null | |
11156 | +++ b/include/linux/spinlock_types_raw.h | |
11157 | @@ -0,0 +1,56 @@ | |
11158 | +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H | |
11159 | +#define __LINUX_SPINLOCK_TYPES_RAW_H | |
11160 | + | |
11161 | +#if defined(CONFIG_SMP) | |
11162 | +# include <asm/spinlock_types.h> | |
11163 | +#else | |
11164 | +# include <linux/spinlock_types_up.h> | |
11165 | +#endif | |
11166 | + | |
11167 | +#include <linux/lockdep.h> | |
11168 | + | |
11169 | +typedef struct raw_spinlock { | |
11170 | + arch_spinlock_t raw_lock; | |
11171 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
11172 | + unsigned int break_lock; | |
11173 | +#endif | |
11174 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
11175 | + unsigned int magic, owner_cpu; | |
11176 | + void *owner; | |
11177 | +#endif | |
11178 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
11179 | + struct lockdep_map dep_map; | |
11180 | +#endif | |
11181 | +} raw_spinlock_t; | |
11182 | + | |
11183 | +#define SPINLOCK_MAGIC 0xdead4ead | |
11184 | + | |
11185 | +#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
11186 | + | |
11187 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
11188 | +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
11189 | +#else | |
11190 | +# define SPIN_DEP_MAP_INIT(lockname) | |
11191 | +#endif | |
11192 | + | |
11193 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
11194 | +# define SPIN_DEBUG_INIT(lockname) \ | |
11195 | + .magic = SPINLOCK_MAGIC, \ | |
11196 | + .owner_cpu = -1, \ | |
11197 | + .owner = SPINLOCK_OWNER_INIT, | |
11198 | +#else | |
11199 | +# define SPIN_DEBUG_INIT(lockname) | |
11200 | +#endif | |
11201 | + | |
11202 | +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
11203 | + { \ | |
11204 | + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
11205 | + SPIN_DEBUG_INIT(lockname) \ | |
11206 | + SPIN_DEP_MAP_INIT(lockname) } | |
11207 | + | |
11208 | +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
11209 | + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
11210 | + | |
11211 | +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
11212 | + | |
11213 | +#endif | |
11214 | diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h | |
11215 | new file mode 100644 | |
11216 | index 000000000000..3e3d8c5f7a9a | |
11217 | --- /dev/null | |
11218 | +++ b/include/linux/spinlock_types_rt.h | |
11219 | @@ -0,0 +1,48 @@ | |
11220 | +#ifndef __LINUX_SPINLOCK_TYPES_RT_H | |
11221 | +#define __LINUX_SPINLOCK_TYPES_RT_H | |
11222 | + | |
11223 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
11224 | +#error "Do not include directly. Include spinlock_types.h instead" | |
11225 | +#endif | |
11226 | + | |
11227 | +#include <linux/cache.h> | |
11228 | + | |
11229 | +/* | |
11230 | + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: | |
11231 | + */ | |
11232 | +typedef struct spinlock { | |
11233 | + struct rt_mutex lock; | |
11234 | + unsigned int break_lock; | |
11235 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
11236 | + struct lockdep_map dep_map; | |
11237 | +#endif | |
11238 | +} spinlock_t; | |
11239 | + | |
11240 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
11241 | +# define __RT_SPIN_INITIALIZER(name) \ | |
11242 | + { \ | |
11243 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
11244 | + .save_state = 1, \ | |
11245 | + .file = __FILE__, \ | |
11246 | + .line = __LINE__ , \ | |
11247 | + } | |
11248 | +#else | |
11249 | +# define __RT_SPIN_INITIALIZER(name) \ | |
11250 | + { \ | |
11251 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
11252 | + .save_state = 1, \ | |
11253 | + } | |
11254 | +#endif | |
11255 | + | |
11256 | +/* | |
11257 | +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) | |
11258 | +*/ | |
11259 | + | |
11260 | +#define __SPIN_LOCK_UNLOCKED(name) \ | |
11261 | + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ | |
11262 | + SPIN_DEP_MAP_INIT(name) } | |
11263 | + | |
11264 | +#define DEFINE_SPINLOCK(name) \ | |
11265 | + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) | |
11266 | + | |
11267 | +#endif | |
11268 | diff --git a/include/linux/srcu.h b/include/linux/srcu.h | |
11269 | index dc8eb63c6568..e793d3a257da 100644 | |
11270 | --- a/include/linux/srcu.h | |
11271 | +++ b/include/linux/srcu.h | |
11272 | @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp); | |
11273 | ||
11274 | void process_srcu(struct work_struct *work); | |
11275 | ||
11276 | -#define __SRCU_STRUCT_INIT(name) \ | |
11277 | +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ | |
11278 | { \ | |
11279 | .completed = -300, \ | |
11280 | - .per_cpu_ref = &name##_srcu_array, \ | |
11281 | + .per_cpu_ref = &pcpu_name, \ | |
11282 | .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ | |
11283 | .running = false, \ | |
11284 | .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ | |
11285 | @@ -119,7 +119,7 @@ void process_srcu(struct work_struct *work); | |
11286 | */ | |
11287 | #define __DEFINE_SRCU(name, is_static) \ | |
11288 | static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ | |
11289 | - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
11290 | + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array) | |
11291 | #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) | |
11292 | #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) | |
11293 | ||
11294 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
11295 | index 7693e39b14fe..b36eedeb28d1 100644 | |
11296 | --- a/include/linux/suspend.h | |
11297 | +++ b/include/linux/suspend.h | |
11298 | @@ -193,6 +193,12 @@ struct platform_freeze_ops { | |
11299 | void (*end)(void); | |
11300 | }; | |
11301 | ||
11302 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | |
11303 | +extern bool pm_in_action; | |
11304 | +#else | |
11305 | +# define pm_in_action false | |
11306 | +#endif | |
11307 | + | |
11308 | #ifdef CONFIG_SUSPEND | |
11309 | /** | |
11310 | * suspend_set_ops - set platform dependent suspend operations | |
11311 | diff --git a/include/linux/swait.h b/include/linux/swait.h | |
11312 | index c1f9c62a8a50..83f004a72320 100644 | |
11313 | --- a/include/linux/swait.h | |
11314 | +++ b/include/linux/swait.h | |
11315 | @@ -87,6 +87,7 @@ static inline int swait_active(struct swait_queue_head *q) | |
11316 | extern void swake_up(struct swait_queue_head *q); | |
11317 | extern void swake_up_all(struct swait_queue_head *q); | |
11318 | extern void swake_up_locked(struct swait_queue_head *q); | |
11319 | +extern void swake_up_all_locked(struct swait_queue_head *q); | |
11320 | ||
11321 | extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); | |
11322 | extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); | |
11323 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
11324 | index e1d761463243..4ae9a4434ad3 100644 | |
11325 | --- a/include/linux/swap.h | |
11326 | +++ b/include/linux/swap.h | |
11327 | @@ -11,6 +11,7 @@ | |
11328 | #include <linux/fs.h> | |
11329 | #include <linux/atomic.h> | |
11330 | #include <linux/page-flags.h> | |
11331 | +#include <linux/locallock.h> | |
11332 | #include <asm/page.h> | |
11333 | ||
11334 | struct notifier_block; | |
11335 | @@ -243,7 +244,8 @@ struct swap_info_struct { | |
11336 | void *workingset_eviction(struct address_space *mapping, struct page *page); | |
11337 | bool workingset_refault(void *shadow); | |
11338 | void workingset_activation(struct page *page); | |
11339 | -extern struct list_lru workingset_shadow_nodes; | |
11340 | +extern struct list_lru __workingset_shadow_nodes; | |
11341 | +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
11342 | ||
11343 | static inline unsigned int workingset_node_pages(struct radix_tree_node *node) | |
11344 | { | |
11345 | @@ -288,6 +290,7 @@ extern unsigned long nr_free_pagecache_pages(void); | |
11346 | ||
11347 | ||
11348 | /* linux/mm/swap.c */ | |
11349 | +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); | |
11350 | extern void lru_cache_add(struct page *); | |
11351 | extern void lru_cache_add_anon(struct page *page); | |
11352 | extern void lru_cache_add_file(struct page *page); | |
11353 | diff --git a/include/linux/swork.h b/include/linux/swork.h | |
11354 | new file mode 100644 | |
11355 | index 000000000000..f175fa9a6016 | |
11356 | --- /dev/null | |
11357 | +++ b/include/linux/swork.h | |
11358 | @@ -0,0 +1,24 @@ | |
11359 | +#ifndef _LINUX_SWORK_H | |
11360 | +#define _LINUX_SWORK_H | |
11361 | + | |
11362 | +#include <linux/list.h> | |
11363 | + | |
11364 | +struct swork_event { | |
11365 | + struct list_head item; | |
11366 | + unsigned long flags; | |
11367 | + void (*func)(struct swork_event *); | |
11368 | +}; | |
11369 | + | |
11370 | +static inline void INIT_SWORK(struct swork_event *event, | |
11371 | + void (*func)(struct swork_event *)) | |
11372 | +{ | |
11373 | + event->flags = 0; | |
11374 | + event->func = func; | |
11375 | +} | |
11376 | + | |
11377 | +bool swork_queue(struct swork_event *sev); | |
11378 | + | |
11379 | +int swork_get(void); | |
11380 | +void swork_put(void); | |
11381 | + | |
11382 | +#endif /* _LINUX_SWORK_H */ | |
11383 | diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h | |
11384 | index 2b5b10eed74f..8bf15b1858f5 100644 | |
11385 | --- a/include/linux/thread_info.h | |
11386 | +++ b/include/linux/thread_info.h | |
11387 | @@ -103,7 +103,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) | |
11388 | #define test_thread_flag(flag) \ | |
11389 | test_ti_thread_flag(current_thread_info(), flag) | |
11390 | ||
11391 | -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
11392 | +#ifdef CONFIG_PREEMPT_LAZY | |
11393 | +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ | |
11394 | + test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
11395 | +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) | |
11396 | +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
11397 | + | |
11398 | +#else | |
11399 | +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
11400 | +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) | |
11401 | +#define tif_need_resched_lazy() 0 | |
11402 | +#endif | |
11403 | ||
11404 | #ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES | |
11405 | static inline int arch_within_stack_frames(const void * const stack, | |
11406 | diff --git a/include/linux/timer.h b/include/linux/timer.h | |
11407 | index 51d601f192d4..83cea629efe1 100644 | |
11408 | --- a/include/linux/timer.h | |
11409 | +++ b/include/linux/timer.h | |
11410 | @@ -241,7 +241,7 @@ extern void add_timer(struct timer_list *timer); | |
11411 | ||
11412 | extern int try_to_del_timer_sync(struct timer_list *timer); | |
11413 | ||
11414 | -#ifdef CONFIG_SMP | |
11415 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
11416 | extern int del_timer_sync(struct timer_list *timer); | |
11417 | #else | |
11418 | # define del_timer_sync(t) del_timer(t) | |
11419 | diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h | |
11420 | index be007610ceb0..15154b13a53b 100644 | |
11421 | --- a/include/linux/trace_events.h | |
11422 | +++ b/include/linux/trace_events.h | |
11423 | @@ -56,6 +56,9 @@ struct trace_entry { | |
11424 | unsigned char flags; | |
11425 | unsigned char preempt_count; | |
11426 | int pid; | |
11427 | + unsigned short migrate_disable; | |
11428 | + unsigned short padding; | |
11429 | + unsigned char preempt_lazy_count; | |
11430 | }; | |
11431 | ||
11432 | #define TRACE_EVENT_TYPE_MAX \ | |
11433 | diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h | |
11434 | index f30c187ed785..83bf0f798426 100644 | |
11435 | --- a/include/linux/uaccess.h | |
11436 | +++ b/include/linux/uaccess.h | |
11437 | @@ -24,6 +24,7 @@ static __always_inline void pagefault_disabled_dec(void) | |
11438 | */ | |
11439 | static inline void pagefault_disable(void) | |
11440 | { | |
11441 | + migrate_disable(); | |
11442 | pagefault_disabled_inc(); | |
11443 | /* | |
11444 | * make sure to have issued the store before a pagefault | |
11445 | @@ -40,6 +41,7 @@ static inline void pagefault_enable(void) | |
11446 | */ | |
11447 | barrier(); | |
11448 | pagefault_disabled_dec(); | |
11449 | + migrate_enable(); | |
11450 | } | |
11451 | ||
11452 | /* | |
11453 | diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h | |
11454 | index 4a29c75b146e..0a294e950df8 100644 | |
11455 | --- a/include/linux/uprobes.h | |
11456 | +++ b/include/linux/uprobes.h | |
11457 | @@ -27,6 +27,7 @@ | |
11458 | #include <linux/errno.h> | |
11459 | #include <linux/rbtree.h> | |
11460 | #include <linux/types.h> | |
11461 | +#include <linux/wait.h> | |
11462 | ||
11463 | struct vm_area_struct; | |
11464 | struct mm_struct; | |
11465 | diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h | |
11466 | index 613771909b6e..e28c5a43229d 100644 | |
11467 | --- a/include/linux/vmstat.h | |
11468 | +++ b/include/linux/vmstat.h | |
11469 | @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); | |
11470 | */ | |
11471 | static inline void __count_vm_event(enum vm_event_item item) | |
11472 | { | |
11473 | + preempt_disable_rt(); | |
11474 | raw_cpu_inc(vm_event_states.event[item]); | |
11475 | + preempt_enable_rt(); | |
11476 | } | |
11477 | ||
11478 | static inline void count_vm_event(enum vm_event_item item) | |
11479 | @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item) | |
11480 | ||
11481 | static inline void __count_vm_events(enum vm_event_item item, long delta) | |
11482 | { | |
11483 | + preempt_disable_rt(); | |
11484 | raw_cpu_add(vm_event_states.event[item], delta); | |
11485 | + preempt_enable_rt(); | |
11486 | } | |
11487 | ||
11488 | static inline void count_vm_events(enum vm_event_item item, long delta) | |
11489 | diff --git a/include/linux/wait.h b/include/linux/wait.h | |
11490 | index c3ff74d764fa..60222150a409 100644 | |
11491 | --- a/include/linux/wait.h | |
11492 | +++ b/include/linux/wait.h | |
11493 | @@ -8,6 +8,7 @@ | |
11494 | #include <linux/spinlock.h> | |
11495 | #include <asm/current.h> | |
11496 | #include <uapi/linux/wait.h> | |
11497 | +#include <linux/atomic.h> | |
11498 | ||
11499 | typedef struct __wait_queue wait_queue_t; | |
11500 | typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); | |
11501 | diff --git a/include/net/dst.h b/include/net/dst.h | |
11502 | index 6835d224d47b..55a5a9698f14 100644 | |
11503 | --- a/include/net/dst.h | |
11504 | +++ b/include/net/dst.h | |
11505 | @@ -446,7 +446,7 @@ static inline void dst_confirm(struct dst_entry *dst) | |
11506 | static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, | |
11507 | struct sk_buff *skb) | |
11508 | { | |
11509 | - const struct hh_cache *hh; | |
11510 | + struct hh_cache *hh; | |
11511 | ||
11512 | if (dst->pending_confirm) { | |
11513 | unsigned long now = jiffies; | |
11514 | diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h | |
11515 | index 231e121cc7d9..d125222b979d 100644 | |
11516 | --- a/include/net/gen_stats.h | |
11517 | +++ b/include/net/gen_stats.h | |
11518 | @@ -5,6 +5,7 @@ | |
11519 | #include <linux/socket.h> | |
11520 | #include <linux/rtnetlink.h> | |
11521 | #include <linux/pkt_sched.h> | |
11522 | +#include <net/net_seq_lock.h> | |
11523 | ||
11524 | struct gnet_stats_basic_cpu { | |
11525 | struct gnet_stats_basic_packed bstats; | |
11526 | @@ -33,11 +34,11 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, | |
11527 | spinlock_t *lock, struct gnet_dump *d, | |
11528 | int padattr); | |
11529 | ||
11530 | -int gnet_stats_copy_basic(const seqcount_t *running, | |
11531 | +int gnet_stats_copy_basic(net_seqlock_t *running, | |
11532 | struct gnet_dump *d, | |
11533 | struct gnet_stats_basic_cpu __percpu *cpu, | |
11534 | struct gnet_stats_basic_packed *b); | |
11535 | -void __gnet_stats_copy_basic(const seqcount_t *running, | |
11536 | +void __gnet_stats_copy_basic(net_seqlock_t *running, | |
11537 | struct gnet_stats_basic_packed *bstats, | |
11538 | struct gnet_stats_basic_cpu __percpu *cpu, | |
11539 | struct gnet_stats_basic_packed *b); | |
11540 | @@ -55,14 +56,14 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
11541 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
11542 | struct gnet_stats_rate_est64 *rate_est, | |
11543 | spinlock_t *stats_lock, | |
11544 | - seqcount_t *running, struct nlattr *opt); | |
11545 | + net_seqlock_t *running, struct nlattr *opt); | |
11546 | void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, | |
11547 | struct gnet_stats_rate_est64 *rate_est); | |
11548 | int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
11549 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
11550 | struct gnet_stats_rate_est64 *rate_est, | |
11551 | spinlock_t *stats_lock, | |
11552 | - seqcount_t *running, struct nlattr *opt); | |
11553 | + net_seqlock_t *running, struct nlattr *opt); | |
11554 | bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, | |
11555 | const struct gnet_stats_rate_est64 *rate_est); | |
11556 | #endif | |
11557 | diff --git a/include/net/neighbour.h b/include/net/neighbour.h | |
11558 | index 8b683841e574..bf656008f6e7 100644 | |
11559 | --- a/include/net/neighbour.h | |
11560 | +++ b/include/net/neighbour.h | |
11561 | @@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) | |
11562 | } | |
11563 | #endif | |
11564 | ||
11565 | -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) | |
11566 | +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) | |
11567 | { | |
11568 | unsigned int seq; | |
11569 | int hh_len; | |
11570 | @@ -501,7 +501,7 @@ struct neighbour_cb { | |
11571 | ||
11572 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | |
11573 | ||
11574 | -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, | |
11575 | +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n, | |
11576 | const struct net_device *dev) | |
11577 | { | |
11578 | unsigned int seq; | |
11579 | diff --git a/include/net/net_seq_lock.h b/include/net/net_seq_lock.h | |
11580 | new file mode 100644 | |
11581 | index 000000000000..a7034298a82a | |
11582 | --- /dev/null | |
11583 | +++ b/include/net/net_seq_lock.h | |
11584 | @@ -0,0 +1,15 @@ | |
11585 | +#ifndef __NET_NET_SEQ_LOCK_H__ | |
11586 | +#define __NET_NET_SEQ_LOCK_H__ | |
11587 | + | |
11588 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11589 | +# define net_seqlock_t seqlock_t | |
11590 | +# define net_seq_begin(__r) read_seqbegin(__r) | |
11591 | +# define net_seq_retry(__r, __s) read_seqretry(__r, __s) | |
11592 | + | |
11593 | +#else | |
11594 | +# define net_seqlock_t seqcount_t | |
11595 | +# define net_seq_begin(__r) read_seqcount_begin(__r) | |
11596 | +# define net_seq_retry(__r, __s) read_seqcount_retry(__r, __s) | |
11597 | +#endif | |
11598 | + | |
11599 | +#endif | |
11600 | diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h | |
11601 | index d061ffeb1e71..12ef433dc3b8 100644 | |
11602 | --- a/include/net/netns/ipv4.h | |
11603 | +++ b/include/net/netns/ipv4.h | |
11604 | @@ -70,6 +70,7 @@ struct netns_ipv4 { | |
11605 | ||
11606 | int sysctl_icmp_echo_ignore_all; | |
11607 | int sysctl_icmp_echo_ignore_broadcasts; | |
11608 | + int sysctl_icmp_echo_sysrq; | |
11609 | int sysctl_icmp_ignore_bogus_error_responses; | |
11610 | int sysctl_icmp_ratelimit; | |
11611 | int sysctl_icmp_ratemask; | |
11612 | diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h | |
11613 | index 909aff2db2b3..c47219d6e4bc 100644 | |
11614 | --- a/include/net/sch_generic.h | |
11615 | +++ b/include/net/sch_generic.h | |
11616 | @@ -10,6 +10,7 @@ | |
11617 | #include <linux/dynamic_queue_limits.h> | |
11618 | #include <net/gen_stats.h> | |
11619 | #include <net/rtnetlink.h> | |
11620 | +#include <net/net_seq_lock.h> | |
11621 | ||
11622 | struct Qdisc_ops; | |
11623 | struct qdisc_walker; | |
11624 | @@ -78,7 +79,7 @@ struct Qdisc { | |
11625 | struct sk_buff *gso_skb ____cacheline_aligned_in_smp; | |
11626 | struct sk_buff_head q; | |
11627 | struct gnet_stats_basic_packed bstats; | |
11628 | - seqcount_t running; | |
11629 | + net_seqlock_t running; | |
11630 | struct gnet_stats_queue qstats; | |
11631 | unsigned long state; | |
11632 | struct Qdisc *next_sched; | |
11633 | @@ -90,13 +91,22 @@ struct Qdisc { | |
11634 | spinlock_t busylock ____cacheline_aligned_in_smp; | |
11635 | }; | |
11636 | ||
11637 | -static inline bool qdisc_is_running(const struct Qdisc *qdisc) | |
11638 | +static inline bool qdisc_is_running(struct Qdisc *qdisc) | |
11639 | { | |
11640 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11641 | + return spin_is_locked(&qdisc->running.lock) ? true : false; | |
11642 | +#else | |
11643 | return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; | |
11644 | +#endif | |
11645 | } | |
11646 | ||
11647 | static inline bool qdisc_run_begin(struct Qdisc *qdisc) | |
11648 | { | |
11649 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11650 | + if (try_write_seqlock(&qdisc->running)) | |
11651 | + return true; | |
11652 | + return false; | |
11653 | +#else | |
11654 | if (qdisc_is_running(qdisc)) | |
11655 | return false; | |
11656 | /* Variant of write_seqcount_begin() telling lockdep a trylock | |
11657 | @@ -105,11 +115,16 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) | |
11658 | raw_write_seqcount_begin(&qdisc->running); | |
11659 | seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_); | |
11660 | return true; | |
11661 | +#endif | |
11662 | } | |
11663 | ||
11664 | static inline void qdisc_run_end(struct Qdisc *qdisc) | |
11665 | { | |
11666 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11667 | + write_sequnlock(&qdisc->running); | |
11668 | +#else | |
11669 | write_seqcount_end(&qdisc->running); | |
11670 | +#endif | |
11671 | } | |
11672 | ||
11673 | static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) | |
11674 | @@ -300,7 +315,7 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc) | |
11675 | return qdisc_lock(root); | |
11676 | } | |
11677 | ||
11678 | -static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
11679 | +static inline net_seqlock_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc) | |
11680 | { | |
11681 | struct Qdisc *root = qdisc_root_sleeping(qdisc); | |
11682 | ||
11683 | diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h | |
11684 | new file mode 100644 | |
11685 | index 000000000000..f7710de1b1f3 | |
11686 | --- /dev/null | |
11687 | +++ b/include/trace/events/hist.h | |
11688 | @@ -0,0 +1,73 @@ | |
11689 | +#undef TRACE_SYSTEM | |
11690 | +#define TRACE_SYSTEM hist | |
11691 | + | |
11692 | +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ) | |
11693 | +#define _TRACE_HIST_H | |
11694 | + | |
11695 | +#include "latency_hist.h" | |
11696 | +#include <linux/tracepoint.h> | |
11697 | + | |
11698 | +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST) | |
11699 | +#define trace_preemptirqsoff_hist(a, b) | |
11700 | +#define trace_preemptirqsoff_hist_rcuidle(a, b) | |
11701 | +#else | |
11702 | +TRACE_EVENT(preemptirqsoff_hist, | |
11703 | + | |
11704 | + TP_PROTO(int reason, int starthist), | |
11705 | + | |
11706 | + TP_ARGS(reason, starthist), | |
11707 | + | |
11708 | + TP_STRUCT__entry( | |
11709 | + __field(int, reason) | |
11710 | + __field(int, starthist) | |
11711 | + ), | |
11712 | + | |
11713 | + TP_fast_assign( | |
11714 | + __entry->reason = reason; | |
11715 | + __entry->starthist = starthist; | |
11716 | + ), | |
11717 | + | |
11718 | + TP_printk("reason=%s starthist=%s", getaction(__entry->reason), | |
11719 | + __entry->starthist ? "start" : "stop") | |
11720 | +); | |
11721 | +#endif | |
11722 | + | |
11723 | +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
11724 | +#define trace_hrtimer_interrupt(a, b, c, d) | |
11725 | +#else | |
11726 | +TRACE_EVENT(hrtimer_interrupt, | |
11727 | + | |
11728 | + TP_PROTO(int cpu, long long offset, struct task_struct *curr, | |
11729 | + struct task_struct *task), | |
11730 | + | |
11731 | + TP_ARGS(cpu, offset, curr, task), | |
11732 | + | |
11733 | + TP_STRUCT__entry( | |
11734 | + __field(int, cpu) | |
11735 | + __field(long long, offset) | |
11736 | + __array(char, ccomm, TASK_COMM_LEN) | |
11737 | + __field(int, cprio) | |
11738 | + __array(char, tcomm, TASK_COMM_LEN) | |
11739 | + __field(int, tprio) | |
11740 | + ), | |
11741 | + | |
11742 | + TP_fast_assign( | |
11743 | + __entry->cpu = cpu; | |
11744 | + __entry->offset = offset; | |
11745 | + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN); | |
11746 | + __entry->cprio = curr->prio; | |
11747 | + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", | |
11748 | + task != NULL ? TASK_COMM_LEN : 7); | |
11749 | + __entry->tprio = task != NULL ? task->prio : -1; | |
11750 | + ), | |
11751 | + | |
11752 | + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]", | |
11753 | + __entry->cpu, __entry->offset, __entry->ccomm, | |
11754 | + __entry->cprio, __entry->tcomm, __entry->tprio) | |
11755 | +); | |
11756 | +#endif | |
11757 | + | |
11758 | +#endif /* _TRACE_HIST_H */ | |
11759 | + | |
11760 | +/* This part must be outside protection */ | |
11761 | +#include <trace/define_trace.h> | |
11762 | diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h | |
11763 | new file mode 100644 | |
11764 | index 000000000000..d3f2fbd560b1 | |
11765 | --- /dev/null | |
11766 | +++ b/include/trace/events/latency_hist.h | |
11767 | @@ -0,0 +1,29 @@ | |
11768 | +#ifndef _LATENCY_HIST_H | |
11769 | +#define _LATENCY_HIST_H | |
11770 | + | |
11771 | +enum hist_action { | |
11772 | + IRQS_ON, | |
11773 | + PREEMPT_ON, | |
11774 | + TRACE_STOP, | |
11775 | + IRQS_OFF, | |
11776 | + PREEMPT_OFF, | |
11777 | + TRACE_START, | |
11778 | +}; | |
11779 | + | |
11780 | +static char *actions[] = { | |
11781 | + "IRQS_ON", | |
11782 | + "PREEMPT_ON", | |
11783 | + "TRACE_STOP", | |
11784 | + "IRQS_OFF", | |
11785 | + "PREEMPT_OFF", | |
11786 | + "TRACE_START", | |
11787 | +}; | |
11788 | + | |
11789 | +static inline char *getaction(int action) | |
11790 | +{ | |
11791 | + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0])) | |
11792 | + return actions[action]; | |
11793 | + return "unknown"; | |
11794 | +} | |
11795 | + | |
11796 | +#endif /* _LATENCY_HIST_H */ | |
11797 | diff --git a/init/Kconfig b/init/Kconfig | |
11798 | index cac3f096050d..b6c9166d878a 100644 | |
11799 | --- a/init/Kconfig | |
11800 | +++ b/init/Kconfig | |
11801 | @@ -496,7 +496,7 @@ config TINY_RCU | |
11802 | ||
11803 | config RCU_EXPERT | |
11804 | bool "Make expert-level adjustments to RCU configuration" | |
11805 | - default n | |
11806 | + default y if PREEMPT_RT_FULL | |
11807 | help | |
11808 | This option needs to be enabled if you wish to make | |
11809 | expert-level adjustments to RCU configuration. By default, | |
11810 | @@ -613,7 +613,7 @@ config RCU_FANOUT_LEAF | |
11811 | ||
11812 | config RCU_FAST_NO_HZ | |
11813 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | |
11814 | - depends on NO_HZ_COMMON && SMP && RCU_EXPERT | |
11815 | + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL | |
11816 | default n | |
11817 | help | |
11818 | This option permits CPUs to enter dynticks-idle state even if | |
11819 | @@ -640,7 +640,7 @@ config TREE_RCU_TRACE | |
11820 | config RCU_BOOST | |
11821 | bool "Enable RCU priority boosting" | |
11822 | depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT | |
11823 | - default n | |
11824 | + default y if PREEMPT_RT_FULL | |
11825 | help | |
11826 | This option boosts the priority of preempted RCU readers that | |
11827 | block the current preemptible RCU grace period for too long. | |
11828 | @@ -1054,6 +1054,7 @@ config CFS_BANDWIDTH | |
11829 | config RT_GROUP_SCHED | |
11830 | bool "Group scheduling for SCHED_RR/FIFO" | |
11831 | depends on CGROUP_SCHED | |
11832 | + depends on !PREEMPT_RT_FULL | |
11833 | default n | |
11834 | help | |
11835 | This feature lets you explicitly allocate real CPU bandwidth | |
11836 | @@ -1761,6 +1762,7 @@ choice | |
11837 | ||
11838 | config SLAB | |
11839 | bool "SLAB" | |
11840 | + depends on !PREEMPT_RT_FULL | |
11841 | select HAVE_HARDENED_USERCOPY_ALLOCATOR | |
11842 | help | |
11843 | The regular slab allocator that is established and known to work | |
11844 | @@ -1781,6 +1783,7 @@ config SLUB | |
11845 | config SLOB | |
11846 | depends on EXPERT | |
11847 | bool "SLOB (Simple Allocator)" | |
11848 | + depends on !PREEMPT_RT_FULL | |
11849 | help | |
11850 | SLOB replaces the stock allocator with a drastically simpler | |
11851 | allocator. SLOB is generally more space efficient but | |
11852 | @@ -1799,7 +1802,7 @@ config SLAB_FREELIST_RANDOM | |
11853 | ||
11854 | config SLUB_CPU_PARTIAL | |
11855 | default y | |
11856 | - depends on SLUB && SMP | |
11857 | + depends on SLUB && SMP && !PREEMPT_RT_FULL | |
11858 | bool "SLUB per cpu partial cache" | |
11859 | help | |
11860 | Per cpu partial caches accellerate objects allocation and freeing | |
11861 | diff --git a/init/Makefile b/init/Makefile | |
11862 | index 7bc47ee31c36..88cf473554e0 100644 | |
11863 | --- a/init/Makefile | |
11864 | +++ b/init/Makefile | |
11865 | @@ -33,4 +33,4 @@ $(obj)/version.o: include/generated/compile.h | |
11866 | include/generated/compile.h: FORCE | |
11867 | @$($(quiet)chk_compile.h) | |
11868 | $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ | |
11869 | - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" | |
11870 | + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" | |
11871 | diff --git a/init/main.c b/init/main.c | |
11872 | index a8a58e2794a5..e4c979e37a91 100644 | |
11873 | --- a/init/main.c | |
11874 | +++ b/init/main.c | |
11875 | @@ -507,6 +507,7 @@ asmlinkage __visible void __init start_kernel(void) | |
11876 | setup_command_line(command_line); | |
11877 | setup_nr_cpu_ids(); | |
11878 | setup_per_cpu_areas(); | |
11879 | + softirq_early_init(); | |
11880 | boot_cpu_state_init(); | |
11881 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | |
11882 | ||
11883 | diff --git a/ipc/msg.c b/ipc/msg.c | |
11884 | index c6521c205cb4..996d89023552 100644 | |
11885 | --- a/ipc/msg.c | |
11886 | +++ b/ipc/msg.c | |
11887 | @@ -183,20 +183,14 @@ static void ss_wakeup(struct list_head *h, int kill) | |
11888 | } | |
11889 | } | |
11890 | ||
11891 | -static void expunge_all(struct msg_queue *msq, int res) | |
11892 | +static void expunge_all(struct msg_queue *msq, int res, | |
11893 | + struct wake_q_head *wake_q) | |
11894 | { | |
11895 | struct msg_receiver *msr, *t; | |
11896 | ||
11897 | list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { | |
11898 | - msr->r_msg = NULL; /* initialize expunge ordering */ | |
11899 | - wake_up_process(msr->r_tsk); | |
11900 | - /* | |
11901 | - * Ensure that the wakeup is visible before setting r_msg as | |
11902 | - * the receiving end depends on it: either spinning on a nil, | |
11903 | - * or dealing with -EAGAIN cases. See lockless receive part 1 | |
11904 | - * and 2 in do_msgrcv(). | |
11905 | - */ | |
11906 | - smp_wmb(); /* barrier (B) */ | |
11907 | + | |
11908 | + wake_q_add(wake_q, msr->r_tsk); | |
11909 | msr->r_msg = ERR_PTR(res); | |
11910 | } | |
11911 | } | |
11912 | @@ -213,11 +207,13 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |
11913 | { | |
11914 | struct msg_msg *msg, *t; | |
11915 | struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); | |
11916 | + WAKE_Q(wake_q); | |
11917 | ||
11918 | - expunge_all(msq, -EIDRM); | |
11919 | + expunge_all(msq, -EIDRM, &wake_q); | |
11920 | ss_wakeup(&msq->q_senders, 1); | |
11921 | msg_rmid(ns, msq); | |
11922 | ipc_unlock_object(&msq->q_perm); | |
11923 | + wake_up_q(&wake_q); | |
11924 | rcu_read_unlock(); | |
11925 | ||
11926 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { | |
11927 | @@ -342,6 +338,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |
11928 | struct kern_ipc_perm *ipcp; | |
11929 | struct msqid64_ds uninitialized_var(msqid64); | |
11930 | struct msg_queue *msq; | |
11931 | + WAKE_Q(wake_q); | |
11932 | int err; | |
11933 | ||
11934 | if (cmd == IPC_SET) { | |
11935 | @@ -389,7 +386,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |
11936 | /* sleeping receivers might be excluded by | |
11937 | * stricter permissions. | |
11938 | */ | |
11939 | - expunge_all(msq, -EAGAIN); | |
11940 | + expunge_all(msq, -EAGAIN, &wake_q); | |
11941 | /* sleeping senders might be able to send | |
11942 | * due to a larger queue size. | |
11943 | */ | |
11944 | @@ -402,6 +399,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |
11945 | ||
11946 | out_unlock0: | |
11947 | ipc_unlock_object(&msq->q_perm); | |
11948 | + wake_up_q(&wake_q); | |
11949 | out_unlock1: | |
11950 | rcu_read_unlock(); | |
11951 | out_up: | |
11952 | @@ -566,7 +564,8 @@ static int testmsg(struct msg_msg *msg, long type, int mode) | |
11953 | return 0; | |
11954 | } | |
11955 | ||
11956 | -static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) | |
11957 | +static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, | |
11958 | + struct wake_q_head *wake_q) | |
11959 | { | |
11960 | struct msg_receiver *msr, *t; | |
11961 | ||
11962 | @@ -577,27 +576,13 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) | |
11963 | ||
11964 | list_del(&msr->r_list); | |
11965 | if (msr->r_maxsize < msg->m_ts) { | |
11966 | - /* initialize pipelined send ordering */ | |
11967 | - msr->r_msg = NULL; | |
11968 | - wake_up_process(msr->r_tsk); | |
11969 | - /* barrier (B) see barrier comment below */ | |
11970 | - smp_wmb(); | |
11971 | + wake_q_add(wake_q, msr->r_tsk); | |
11972 | msr->r_msg = ERR_PTR(-E2BIG); | |
11973 | } else { | |
11974 | - msr->r_msg = NULL; | |
11975 | msq->q_lrpid = task_pid_vnr(msr->r_tsk); | |
11976 | msq->q_rtime = get_seconds(); | |
11977 | - wake_up_process(msr->r_tsk); | |
11978 | - /* | |
11979 | - * Ensure that the wakeup is visible before | |
11980 | - * setting r_msg, as the receiving can otherwise | |
11981 | - * exit - once r_msg is set, the receiver can | |
11982 | - * continue. See lockless receive part 1 and 2 | |
11983 | - * in do_msgrcv(). Barrier (B). | |
11984 | - */ | |
11985 | - smp_wmb(); | |
11986 | + wake_q_add(wake_q, msr->r_tsk); | |
11987 | msr->r_msg = msg; | |
11988 | - | |
11989 | return 1; | |
11990 | } | |
11991 | } | |
11992 | @@ -613,6 +598,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |
11993 | struct msg_msg *msg; | |
11994 | int err; | |
11995 | struct ipc_namespace *ns; | |
11996 | + WAKE_Q(wake_q); | |
11997 | ||
11998 | ns = current->nsproxy->ipc_ns; | |
11999 | ||
12000 | @@ -698,7 +684,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |
12001 | msq->q_lspid = task_tgid_vnr(current); | |
12002 | msq->q_stime = get_seconds(); | |
12003 | ||
12004 | - if (!pipelined_send(msq, msg)) { | |
12005 | + if (!pipelined_send(msq, msg, &wake_q)) { | |
12006 | /* no one is waiting for this message, enqueue it */ | |
12007 | list_add_tail(&msg->m_list, &msq->q_messages); | |
12008 | msq->q_cbytes += msgsz; | |
12009 | @@ -712,6 +698,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |
12010 | ||
12011 | out_unlock0: | |
12012 | ipc_unlock_object(&msq->q_perm); | |
12013 | + wake_up_q(&wake_q); | |
12014 | out_unlock1: | |
12015 | rcu_read_unlock(); | |
12016 | if (msg != NULL) | |
12017 | @@ -932,57 +919,25 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl | |
12018 | rcu_read_lock(); | |
12019 | ||
12020 | /* Lockless receive, part 2: | |
12021 | - * Wait until pipelined_send or expunge_all are outside of | |
12022 | - * wake_up_process(). There is a race with exit(), see | |
12023 | - * ipc/mqueue.c for the details. The correct serialization | |
12024 | - * ensures that a receiver cannot continue without the wakeup | |
12025 | - * being visibible _before_ setting r_msg: | |
12026 | + * The work in pipelined_send() and expunge_all(): | |
12027 | + * - Set pointer to message | |
12028 | + * - Queue the receiver task for later wakeup | |
12029 | + * - Wake up the process after the lock is dropped. | |
12030 | * | |
12031 | - * CPU 0 CPU 1 | |
12032 | - * <loop receiver> | |
12033 | - * smp_rmb(); (A) <-- pair -. <waker thread> | |
12034 | - * <load ->r_msg> | msr->r_msg = NULL; | |
12035 | - * | wake_up_process(); | |
12036 | - * <continue> `------> smp_wmb(); (B) | |
12037 | - * msr->r_msg = msg; | |
12038 | - * | |
12039 | - * Where (A) orders the message value read and where (B) orders | |
12040 | - * the write to the r_msg -- done in both pipelined_send and | |
12041 | - * expunge_all. | |
12042 | + * Should the process wake up before this wakeup (due to a | |
12043 | + * signal) it will either see the message and continue … | |
12044 | */ | |
12045 | - for (;;) { | |
12046 | - /* | |
12047 | - * Pairs with writer barrier in pipelined_send | |
12048 | - * or expunge_all. | |
12049 | - */ | |
12050 | - smp_rmb(); /* barrier (A) */ | |
12051 | - msg = (struct msg_msg *)msr_d.r_msg; | |
12052 | - if (msg) | |
12053 | - break; | |
12054 | ||
12055 | - /* | |
12056 | - * The cpu_relax() call is a compiler barrier | |
12057 | - * which forces everything in this loop to be | |
12058 | - * re-loaded. | |
12059 | - */ | |
12060 | - cpu_relax(); | |
12061 | - } | |
12062 | - | |
12063 | - /* Lockless receive, part 3: | |
12064 | - * If there is a message or an error then accept it without | |
12065 | - * locking. | |
12066 | - */ | |
12067 | + msg = (struct msg_msg *)msr_d.r_msg; | |
12068 | if (msg != ERR_PTR(-EAGAIN)) | |
12069 | goto out_unlock1; | |
12070 | ||
12071 | - /* Lockless receive, part 3: | |
12072 | - * Acquire the queue spinlock. | |
12073 | - */ | |
12074 | + /* | |
12075 | + * … or see -EAGAIN, acquire the lock to check the message | |
12076 | + * again. | |
12077 | + */ | |
12078 | ipc_lock_object(&msq->q_perm); | |
12079 | ||
12080 | - /* Lockless receive, part 4: | |
12081 | - * Repeat test after acquiring the spinlock. | |
12082 | - */ | |
12083 | msg = (struct msg_msg *)msr_d.r_msg; | |
12084 | if (msg != ERR_PTR(-EAGAIN)) | |
12085 | goto out_unlock0; | |
12086 | diff --git a/ipc/sem.c b/ipc/sem.c | |
12087 | index 5e318c5f749d..ec9203971539 100644 | |
12088 | --- a/ipc/sem.c | |
12089 | +++ b/ipc/sem.c | |
12090 | @@ -712,6 +712,13 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) | |
12091 | static void wake_up_sem_queue_prepare(struct list_head *pt, | |
12092 | struct sem_queue *q, int error) | |
12093 | { | |
12094 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12095 | + struct task_struct *p = q->sleeper; | |
12096 | + get_task_struct(p); | |
12097 | + q->status = error; | |
12098 | + wake_up_process(p); | |
12099 | + put_task_struct(p); | |
12100 | +#else | |
12101 | if (list_empty(pt)) { | |
12102 | /* | |
12103 | * Hold preempt off so that we don't get preempted and have the | |
12104 | @@ -723,6 +730,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
12105 | q->pid = error; | |
12106 | ||
12107 | list_add_tail(&q->list, pt); | |
12108 | +#endif | |
12109 | } | |
12110 | ||
12111 | /** | |
12112 | @@ -736,6 +744,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, | |
12113 | */ | |
12114 | static void wake_up_sem_queue_do(struct list_head *pt) | |
12115 | { | |
12116 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12117 | struct sem_queue *q, *t; | |
12118 | int did_something; | |
12119 | ||
12120 | @@ -748,6 +757,7 @@ static void wake_up_sem_queue_do(struct list_head *pt) | |
12121 | } | |
12122 | if (did_something) | |
12123 | preempt_enable(); | |
12124 | +#endif | |
12125 | } | |
12126 | ||
12127 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | |
12128 | diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks | |
12129 | index ebdb0043203a..b9e6aa7e5aa6 100644 | |
12130 | --- a/kernel/Kconfig.locks | |
12131 | +++ b/kernel/Kconfig.locks | |
12132 | @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW | |
12133 | ||
12134 | config MUTEX_SPIN_ON_OWNER | |
12135 | def_bool y | |
12136 | - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW | |
12137 | + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
12138 | ||
12139 | config RWSEM_SPIN_ON_OWNER | |
12140 | def_bool y | |
12141 | - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | |
12142 | + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
12143 | ||
12144 | config LOCK_SPIN_ON_OWNER | |
12145 | def_bool y | |
12146 | diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt | |
12147 | index 3f9c97419f02..11dbe26a8279 100644 | |
12148 | --- a/kernel/Kconfig.preempt | |
12149 | +++ b/kernel/Kconfig.preempt | |
12150 | @@ -1,3 +1,16 @@ | |
12151 | +config PREEMPT | |
12152 | + bool | |
12153 | + select PREEMPT_COUNT | |
12154 | + | |
12155 | +config PREEMPT_RT_BASE | |
12156 | + bool | |
12157 | + select PREEMPT | |
12158 | + | |
12159 | +config HAVE_PREEMPT_LAZY | |
12160 | + bool | |
12161 | + | |
12162 | +config PREEMPT_LAZY | |
12163 | + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL | |
12164 | ||
12165 | choice | |
12166 | prompt "Preemption Model" | |
12167 | @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY | |
12168 | ||
12169 | Select this if you are building a kernel for a desktop system. | |
12170 | ||
12171 | -config PREEMPT | |
12172 | +config PREEMPT__LL | |
12173 | bool "Preemptible Kernel (Low-Latency Desktop)" | |
12174 | - select PREEMPT_COUNT | |
12175 | + select PREEMPT | |
12176 | select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK | |
12177 | help | |
12178 | This option reduces the latency of the kernel by making | |
12179 | @@ -52,6 +65,22 @@ config PREEMPT | |
12180 | embedded system with latency requirements in the milliseconds | |
12181 | range. | |
12182 | ||
12183 | +config PREEMPT_RTB | |
12184 | + bool "Preemptible Kernel (Basic RT)" | |
12185 | + select PREEMPT_RT_BASE | |
12186 | + help | |
12187 | + This option is basically the same as (Low-Latency Desktop) but | |
12188 | + enables changes which are preliminary for the full preemptible | |
12189 | + RT kernel. | |
12190 | + | |
12191 | +config PREEMPT_RT_FULL | |
12192 | + bool "Fully Preemptible Kernel (RT)" | |
12193 | + depends on IRQ_FORCED_THREADING | |
12194 | + select PREEMPT_RT_BASE | |
12195 | + select PREEMPT_RCU | |
12196 | + help | |
12197 | + All and everything | |
12198 | + | |
12199 | endchoice | |
12200 | ||
12201 | config PREEMPT_COUNT | |
12202 | diff --git a/kernel/Makefile b/kernel/Makefile | |
12203 | index e2ec54e2b952..bff8214bf5f6 100644 | |
12204 | --- a/kernel/Makefile | |
12205 | +++ b/kernel/Makefile | |
12206 | @@ -11,6 +11,13 @@ obj-y = fork.o exec_domain.o panic.o \ | |
12207 | notifier.o ksysfs.o cred.o reboot.o \ | |
12208 | async.o range.o smpboot.o | |
12209 | ||
12210 | +# Tracing may do some dangerous __builtin_return_address() operations | |
12211 | +# We know they are dangerous, we don't need gcc telling us that. | |
12212 | +ifdef CONFIG_USING_GET_LOCK_PARENT_IP | |
12213 | +FRAME_CFLAGS := $(call cc-disable-warning,frame-address) | |
12214 | +KBUILD_CFLAGS += $(FRAME_CFLAGS) | |
12215 | +endif | |
12216 | + | |
12217 | obj-$(CONFIG_MULTIUSER) += groups.o | |
12218 | ||
12219 | ifdef CONFIG_FUNCTION_TRACER | |
12220 | diff --git a/kernel/cgroup.c b/kernel/cgroup.c | |
12221 | index d6b729beba49..11d61b2ca938 100644 | |
12222 | --- a/kernel/cgroup.c | |
12223 | +++ b/kernel/cgroup.c | |
12224 | @@ -5027,10 +5027,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) | |
12225 | queue_work(cgroup_destroy_wq, &css->destroy_work); | |
12226 | } | |
12227 | ||
12228 | -static void css_release_work_fn(struct work_struct *work) | |
12229 | +static void css_release_work_fn(struct swork_event *sev) | |
12230 | { | |
12231 | struct cgroup_subsys_state *css = | |
12232 | - container_of(work, struct cgroup_subsys_state, destroy_work); | |
12233 | + container_of(sev, struct cgroup_subsys_state, destroy_swork); | |
12234 | struct cgroup_subsys *ss = css->ss; | |
12235 | struct cgroup *cgrp = css->cgroup; | |
12236 | ||
12237 | @@ -5071,8 +5071,8 @@ static void css_release(struct percpu_ref *ref) | |
12238 | struct cgroup_subsys_state *css = | |
12239 | container_of(ref, struct cgroup_subsys_state, refcnt); | |
12240 | ||
12241 | - INIT_WORK(&css->destroy_work, css_release_work_fn); | |
12242 | - queue_work(cgroup_destroy_wq, &css->destroy_work); | |
12243 | + INIT_SWORK(&css->destroy_swork, css_release_work_fn); | |
12244 | + swork_queue(&css->destroy_swork); | |
12245 | } | |
12246 | ||
12247 | static void init_and_link_css(struct cgroup_subsys_state *css, | |
12248 | @@ -5716,6 +5716,7 @@ static int __init cgroup_wq_init(void) | |
12249 | */ | |
12250 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | |
12251 | BUG_ON(!cgroup_destroy_wq); | |
12252 | + BUG_ON(swork_get()); | |
12253 | ||
12254 | /* | |
12255 | * Used to destroy pidlists and separate to serve as flush domain. | |
12256 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
12257 | index 341bf80f80bd..b575429a8a00 100644 | |
12258 | --- a/kernel/cpu.c | |
12259 | +++ b/kernel/cpu.c | |
12260 | @@ -152,8 +152,8 @@ static struct { | |
12261 | #endif | |
12262 | } cpu_hotplug = { | |
12263 | .active_writer = NULL, | |
12264 | - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), | |
12265 | .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), | |
12266 | + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), | |
12267 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12268 | .dep_map = {.name = "cpu_hotplug.lock" }, | |
12269 | #endif | |
12270 | @@ -166,6 +166,289 @@ static struct { | |
12271 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) | |
12272 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) | |
12273 | ||
12274 | +/** | |
12275 | + * hotplug_pcp - per cpu hotplug descriptor | |
12276 | + * @unplug: set when pin_current_cpu() needs to sync tasks | |
12277 | + * @sync_tsk: the task that waits for tasks to finish pinned sections | |
12278 | + * @refcount: counter of tasks in pinned sections | |
12279 | + * @grab_lock: set when the tasks entering pinned sections should wait | |
12280 | + * @synced: notifier for @sync_tsk to tell cpu_down it's finished | |
12281 | + * @mutex: the mutex to make tasks wait (used when @grab_lock is true) | |
12282 | + * @mutex_init: zero if the mutex hasn't been initialized yet. | |
12283 | + * | |
12284 | + * Although @unplug and @sync_tsk may point to the same task, the @unplug | |
12285 | + * is used as a flag and still exists after @sync_tsk has exited and | |
12286 | + * @sync_tsk set to NULL. | |
12287 | + */ | |
12288 | +struct hotplug_pcp { | |
12289 | + struct task_struct *unplug; | |
12290 | + struct task_struct *sync_tsk; | |
12291 | + int refcount; | |
12292 | + int grab_lock; | |
12293 | + struct completion synced; | |
12294 | + struct completion unplug_wait; | |
12295 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12296 | + /* | |
12297 | + * Note, on PREEMPT_RT, the hotplug lock must save the state of | |
12298 | + * the task, otherwise the mutex will cause the task to fail | |
12299 | + * to sleep when required. (Because it's called from migrate_disable()) | |
12300 | + * | |
12301 | + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's | |
12302 | + * state. | |
12303 | + */ | |
12304 | + spinlock_t lock; | |
12305 | +#else | |
12306 | + struct mutex mutex; | |
12307 | +#endif | |
12308 | + int mutex_init; | |
12309 | +}; | |
12310 | + | |
12311 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12312 | +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock) | |
12313 | +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock) | |
12314 | +#else | |
12315 | +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex) | |
12316 | +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex) | |
12317 | +#endif | |
12318 | + | |
12319 | +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); | |
12320 | + | |
12321 | +/** | |
12322 | + * pin_current_cpu - Prevent the current cpu from being unplugged | |
12323 | + * | |
12324 | + * Lightweight version of get_online_cpus() to prevent cpu from being | |
12325 | + * unplugged when code runs in a migration disabled region. | |
12326 | + * | |
12327 | + * Must be called with preemption disabled (preempt_count = 1)! | |
12328 | + */ | |
12329 | +void pin_current_cpu(void) | |
12330 | +{ | |
12331 | + struct hotplug_pcp *hp; | |
12332 | + int force = 0; | |
12333 | + | |
12334 | +retry: | |
12335 | + hp = this_cpu_ptr(&hotplug_pcp); | |
12336 | + | |
12337 | + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 || | |
12338 | + hp->unplug == current) { | |
12339 | + hp->refcount++; | |
12340 | + return; | |
12341 | + } | |
12342 | + if (hp->grab_lock) { | |
12343 | + preempt_enable(); | |
12344 | + hotplug_lock(hp); | |
12345 | + hotplug_unlock(hp); | |
12346 | + } else { | |
12347 | + preempt_enable(); | |
12348 | + /* | |
12349 | + * Try to push this task off of this CPU. | |
12350 | + */ | |
12351 | + if (!migrate_me()) { | |
12352 | + preempt_disable(); | |
12353 | + hp = this_cpu_ptr(&hotplug_pcp); | |
12354 | + if (!hp->grab_lock) { | |
12355 | + /* | |
12356 | + * Just let it continue it's already pinned | |
12357 | + * or about to sleep. | |
12358 | + */ | |
12359 | + force = 1; | |
12360 | + goto retry; | |
12361 | + } | |
12362 | + preempt_enable(); | |
12363 | + } | |
12364 | + } | |
12365 | + preempt_disable(); | |
12366 | + goto retry; | |
12367 | +} | |
12368 | + | |
12369 | +/** | |
12370 | + * unpin_current_cpu - Allow unplug of current cpu | |
12371 | + * | |
12372 | + * Must be called with preemption or interrupts disabled! | |
12373 | + */ | |
12374 | +void unpin_current_cpu(void) | |
12375 | +{ | |
12376 | + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); | |
12377 | + | |
12378 | + WARN_ON(hp->refcount <= 0); | |
12379 | + | |
12380 | + /* This is safe. sync_unplug_thread is pinned to this cpu */ | |
12381 | + if (!--hp->refcount && hp->unplug && hp->unplug != current) | |
12382 | + wake_up_process(hp->unplug); | |
12383 | +} | |
12384 | + | |
12385 | +static void wait_for_pinned_cpus(struct hotplug_pcp *hp) | |
12386 | +{ | |
12387 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
12388 | + while (hp->refcount) { | |
12389 | + schedule_preempt_disabled(); | |
12390 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
12391 | + } | |
12392 | +} | |
12393 | + | |
12394 | +static int sync_unplug_thread(void *data) | |
12395 | +{ | |
12396 | + struct hotplug_pcp *hp = data; | |
12397 | + | |
12398 | + wait_for_completion(&hp->unplug_wait); | |
12399 | + preempt_disable(); | |
12400 | + hp->unplug = current; | |
12401 | + wait_for_pinned_cpus(hp); | |
12402 | + | |
12403 | + /* | |
12404 | + * This thread will synchronize the cpu_down() with threads | |
12405 | + * that have pinned the CPU. When the pinned CPU count reaches | |
12406 | + * zero, we inform the cpu_down code to continue to the next step. | |
12407 | + */ | |
12408 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
12409 | + preempt_enable(); | |
12410 | + complete(&hp->synced); | |
12411 | + | |
12412 | + /* | |
12413 | + * If all succeeds, the next step will need tasks to wait till | |
12414 | + * the CPU is offline before continuing. To do this, the grab_lock | |
12415 | + * is set and tasks going into pin_current_cpu() will block on the | |
12416 | + * mutex. But we still need to wait for those that are already in | |
12417 | + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop() | |
12418 | + * will kick this thread out. | |
12419 | + */ | |
12420 | + while (!hp->grab_lock && !kthread_should_stop()) { | |
12421 | + schedule(); | |
12422 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
12423 | + } | |
12424 | + | |
12425 | + /* Make sure grab_lock is seen before we see a stale completion */ | |
12426 | + smp_mb(); | |
12427 | + | |
12428 | + /* | |
12429 | + * Now just before cpu_down() enters stop machine, we need to make | |
12430 | + * sure all tasks that are in pinned CPU sections are out, and new | |
12431 | + * tasks will now grab the lock, keeping them from entering pinned | |
12432 | + * CPU sections. | |
12433 | + */ | |
12434 | + if (!kthread_should_stop()) { | |
12435 | + preempt_disable(); | |
12436 | + wait_for_pinned_cpus(hp); | |
12437 | + preempt_enable(); | |
12438 | + complete(&hp->synced); | |
12439 | + } | |
12440 | + | |
12441 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
12442 | + while (!kthread_should_stop()) { | |
12443 | + schedule(); | |
12444 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
12445 | + } | |
12446 | + set_current_state(TASK_RUNNING); | |
12447 | + | |
12448 | + /* | |
12449 | + * Force this thread off this CPU as it's going down and | |
12450 | + * we don't want any more work on this CPU. | |
12451 | + */ | |
12452 | + current->flags &= ~PF_NO_SETAFFINITY; | |
12453 | + set_cpus_allowed_ptr(current, cpu_present_mask); | |
12454 | + migrate_me(); | |
12455 | + return 0; | |
12456 | +} | |
12457 | + | |
12458 | +static void __cpu_unplug_sync(struct hotplug_pcp *hp) | |
12459 | +{ | |
12460 | + wake_up_process(hp->sync_tsk); | |
12461 | + wait_for_completion(&hp->synced); | |
12462 | +} | |
12463 | + | |
12464 | +static void __cpu_unplug_wait(unsigned int cpu) | |
12465 | +{ | |
12466 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
12467 | + | |
12468 | + complete(&hp->unplug_wait); | |
12469 | + wait_for_completion(&hp->synced); | |
12470 | +} | |
12471 | + | |
12472 | +/* | |
12473 | + * Start the sync_unplug_thread on the target cpu and wait for it to | |
12474 | + * complete. | |
12475 | + */ | |
12476 | +static int cpu_unplug_begin(unsigned int cpu) | |
12477 | +{ | |
12478 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
12479 | + int err; | |
12480 | + | |
12481 | + /* Protected by cpu_hotplug.lock */ | |
12482 | + if (!hp->mutex_init) { | |
12483 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12484 | + spin_lock_init(&hp->lock); | |
12485 | +#else | |
12486 | + mutex_init(&hp->mutex); | |
12487 | +#endif | |
12488 | + hp->mutex_init = 1; | |
12489 | + } | |
12490 | + | |
12491 | + /* Inform the scheduler to migrate tasks off this CPU */ | |
12492 | + tell_sched_cpu_down_begin(cpu); | |
12493 | + | |
12494 | + init_completion(&hp->synced); | |
12495 | + init_completion(&hp->unplug_wait); | |
12496 | + | |
12497 | + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); | |
12498 | + if (IS_ERR(hp->sync_tsk)) { | |
12499 | + err = PTR_ERR(hp->sync_tsk); | |
12500 | + hp->sync_tsk = NULL; | |
12501 | + return err; | |
12502 | + } | |
12503 | + kthread_bind(hp->sync_tsk, cpu); | |
12504 | + | |
12505 | + /* | |
12506 | + * Wait for tasks to get out of the pinned sections, | |
12507 | + * it's still OK if new tasks enter. Some CPU notifiers will | |
12508 | + * wait for tasks that are going to enter these sections and | |
12509 | + * we must not have them block. | |
12510 | + */ | |
12511 | + wake_up_process(hp->sync_tsk); | |
12512 | + return 0; | |
12513 | +} | |
12514 | + | |
12515 | +static void cpu_unplug_sync(unsigned int cpu) | |
12516 | +{ | |
12517 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
12518 | + | |
12519 | + init_completion(&hp->synced); | |
12520 | + /* The completion needs to be initialzied before setting grab_lock */ | |
12521 | + smp_wmb(); | |
12522 | + | |
12523 | + /* Grab the mutex before setting grab_lock */ | |
12524 | + hotplug_lock(hp); | |
12525 | + hp->grab_lock = 1; | |
12526 | + | |
12527 | + /* | |
12528 | + * The CPU notifiers have been completed. | |
12529 | + * Wait for tasks to get out of pinned CPU sections and have new | |
12530 | + * tasks block until the CPU is completely down. | |
12531 | + */ | |
12532 | + __cpu_unplug_sync(hp); | |
12533 | + | |
12534 | + /* All done with the sync thread */ | |
12535 | + kthread_stop(hp->sync_tsk); | |
12536 | + hp->sync_tsk = NULL; | |
12537 | +} | |
12538 | + | |
12539 | +static void cpu_unplug_done(unsigned int cpu) | |
12540 | +{ | |
12541 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
12542 | + | |
12543 | + hp->unplug = NULL; | |
12544 | + /* Let all tasks know cpu unplug is finished before cleaning up */ | |
12545 | + smp_wmb(); | |
12546 | + | |
12547 | + if (hp->sync_tsk) | |
12548 | + kthread_stop(hp->sync_tsk); | |
12549 | + | |
12550 | + if (hp->grab_lock) { | |
12551 | + hotplug_unlock(hp); | |
12552 | + /* protected by cpu_hotplug.lock */ | |
12553 | + hp->grab_lock = 0; | |
12554 | + } | |
12555 | + tell_sched_cpu_down_done(cpu); | |
12556 | +} | |
12557 | ||
12558 | void get_online_cpus(void) | |
12559 | { | |
12560 | @@ -710,10 +993,14 @@ static int takedown_cpu(unsigned int cpu) | |
12561 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | |
12562 | int err; | |
12563 | ||
12564 | + __cpu_unplug_wait(cpu); | |
12565 | /* Park the smpboot threads */ | |
12566 | kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread); | |
12567 | smpboot_park_threads(cpu); | |
12568 | ||
12569 | + /* Notifiers are done. Don't let any more tasks pin this CPU. */ | |
12570 | + cpu_unplug_sync(cpu); | |
12571 | + | |
12572 | /* | |
12573 | * Prevent irq alloc/free while the dying cpu reorganizes the | |
12574 | * interrupt affinities. | |
12575 | @@ -799,6 +1086,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |
12576 | struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); | |
12577 | int prev_state, ret = 0; | |
12578 | bool hasdied = false; | |
12579 | + int mycpu; | |
12580 | + cpumask_var_t cpumask; | |
12581 | + cpumask_var_t cpumask_org; | |
12582 | ||
12583 | if (num_online_cpus() == 1) | |
12584 | return -EBUSY; | |
12585 | @@ -806,7 +1096,34 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |
12586 | if (!cpu_present(cpu)) | |
12587 | return -EINVAL; | |
12588 | ||
12589 | + /* Move the downtaker off the unplug cpu */ | |
12590 | + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) | |
12591 | + return -ENOMEM; | |
12592 | + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) { | |
12593 | + free_cpumask_var(cpumask); | |
12594 | + return -ENOMEM; | |
12595 | + } | |
12596 | + | |
12597 | + cpumask_copy(cpumask_org, tsk_cpus_allowed(current)); | |
12598 | + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); | |
12599 | + set_cpus_allowed_ptr(current, cpumask); | |
12600 | + free_cpumask_var(cpumask); | |
12601 | + migrate_disable(); | |
12602 | + mycpu = smp_processor_id(); | |
12603 | + if (mycpu == cpu) { | |
12604 | + printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); | |
12605 | + migrate_enable(); | |
12606 | + ret = -EBUSY; | |
12607 | + goto restore_cpus; | |
12608 | + } | |
12609 | + | |
12610 | + migrate_enable(); | |
12611 | cpu_hotplug_begin(); | |
12612 | + ret = cpu_unplug_begin(cpu); | |
12613 | + if (ret) { | |
12614 | + printk("cpu_unplug_begin(%d) failed\n", cpu); | |
12615 | + goto out_cancel; | |
12616 | + } | |
12617 | ||
12618 | cpuhp_tasks_frozen = tasks_frozen; | |
12619 | ||
12620 | @@ -845,10 +1162,15 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, | |
12621 | ||
12622 | hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; | |
12623 | out: | |
12624 | + cpu_unplug_done(cpu); | |
12625 | +out_cancel: | |
12626 | cpu_hotplug_done(); | |
12627 | /* This post dead nonsense must die */ | |
12628 | if (!ret && hasdied) | |
12629 | cpu_notify_nofail(CPU_POST_DEAD, cpu); | |
12630 | +restore_cpus: | |
12631 | + set_cpus_allowed_ptr(current, cpumask_org); | |
12632 | + free_cpumask_var(cpumask_org); | |
12633 | return ret; | |
12634 | } | |
12635 | ||
12636 | diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c | |
12637 | index fc1ef736253c..83c666537a7a 100644 | |
12638 | --- a/kernel/debug/kdb/kdb_io.c | |
12639 | +++ b/kernel/debug/kdb/kdb_io.c | |
12640 | @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
12641 | int linecount; | |
12642 | int colcount; | |
12643 | int logging, saved_loglevel = 0; | |
12644 | - int saved_trap_printk; | |
12645 | int got_printf_lock = 0; | |
12646 | int retlen = 0; | |
12647 | int fnd, len; | |
12648 | @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
12649 | unsigned long uninitialized_var(flags); | |
12650 | ||
12651 | preempt_disable(); | |
12652 | - saved_trap_printk = kdb_trap_printk; | |
12653 | - kdb_trap_printk = 0; | |
12654 | ||
12655 | /* Serialize kdb_printf if multiple cpus try to write at once. | |
12656 | * But if any cpu goes recursive in kdb, just print the output, | |
12657 | @@ -855,7 +852,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
12658 | } else { | |
12659 | __release(kdb_printf_lock); | |
12660 | } | |
12661 | - kdb_trap_printk = saved_trap_printk; | |
12662 | preempt_enable(); | |
12663 | return retlen; | |
12664 | } | |
12665 | @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...) | |
12666 | va_list ap; | |
12667 | int r; | |
12668 | ||
12669 | + kdb_trap_printk++; | |
12670 | va_start(ap, fmt); | |
12671 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); | |
12672 | va_end(ap); | |
12673 | + kdb_trap_printk--; | |
12674 | ||
12675 | return r; | |
12676 | } | |
12677 | diff --git a/kernel/events/core.c b/kernel/events/core.c | |
12678 | index fc9bb2225291..bc2db7e1ae04 100644 | |
12679 | --- a/kernel/events/core.c | |
12680 | +++ b/kernel/events/core.c | |
12681 | @@ -1042,6 +1042,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) | |
12682 | raw_spin_lock_init(&cpuctx->hrtimer_lock); | |
12683 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); | |
12684 | timer->function = perf_mux_hrtimer_handler; | |
12685 | + timer->irqsafe = 1; | |
12686 | } | |
12687 | ||
12688 | static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) | |
12689 | @@ -8215,6 +8216,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) | |
12690 | ||
12691 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
12692 | hwc->hrtimer.function = perf_swevent_hrtimer; | |
12693 | + hwc->hrtimer.irqsafe = 1; | |
12694 | ||
12695 | /* | |
12696 | * Since hrtimers have a fixed rate, we can do a static freq->period | |
12697 | diff --git a/kernel/exit.c b/kernel/exit.c | |
12698 | index 091a78be3b09..170b672bbb38 100644 | |
12699 | --- a/kernel/exit.c | |
12700 | +++ b/kernel/exit.c | |
12701 | @@ -143,7 +143,7 @@ static void __exit_signal(struct task_struct *tsk) | |
12702 | * Do this under ->siglock, we can race with another thread | |
12703 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | |
12704 | */ | |
12705 | - flush_sigqueue(&tsk->pending); | |
12706 | + flush_task_sigqueue(tsk); | |
12707 | tsk->sighand = NULL; | |
12708 | spin_unlock(&sighand->siglock); | |
12709 | ||
12710 | diff --git a/kernel/fork.c b/kernel/fork.c | |
12711 | index beb31725f7e2..e398cb9e62fa 100644 | |
12712 | --- a/kernel/fork.c | |
12713 | +++ b/kernel/fork.c | |
12714 | @@ -251,7 +251,9 @@ static inline void put_signal_struct(struct signal_struct *sig) | |
12715 | if (atomic_dec_and_test(&sig->sigcnt)) | |
12716 | free_signal_struct(sig); | |
12717 | } | |
12718 | - | |
12719 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12720 | +static | |
12721 | +#endif | |
12722 | void __put_task_struct(struct task_struct *tsk) | |
12723 | { | |
12724 | WARN_ON(!tsk->exit_state); | |
12725 | @@ -268,7 +270,18 @@ void __put_task_struct(struct task_struct *tsk) | |
12726 | if (!profile_handoff_task(tsk)) | |
12727 | free_task(tsk); | |
12728 | } | |
12729 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12730 | EXPORT_SYMBOL_GPL(__put_task_struct); | |
12731 | +#else | |
12732 | +void __put_task_struct_cb(struct rcu_head *rhp) | |
12733 | +{ | |
12734 | + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu); | |
12735 | + | |
12736 | + __put_task_struct(tsk); | |
12737 | + | |
12738 | +} | |
12739 | +EXPORT_SYMBOL_GPL(__put_task_struct_cb); | |
12740 | +#endif | |
12741 | ||
12742 | void __init __weak arch_task_cache_init(void) { } | |
12743 | ||
12744 | @@ -702,6 +715,19 @@ void __mmdrop(struct mm_struct *mm) | |
12745 | } | |
12746 | EXPORT_SYMBOL_GPL(__mmdrop); | |
12747 | ||
12748 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12749 | +/* | |
12750 | + * RCU callback for delayed mm drop. Not strictly rcu, but we don't | |
12751 | + * want another facility to make this work. | |
12752 | + */ | |
12753 | +void __mmdrop_delayed(struct rcu_head *rhp) | |
12754 | +{ | |
12755 | + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); | |
12756 | + | |
12757 | + __mmdrop(mm); | |
12758 | +} | |
12759 | +#endif | |
12760 | + | |
12761 | static inline void __mmput(struct mm_struct *mm) | |
12762 | { | |
12763 | VM_BUG_ON(atomic_read(&mm->mm_users)); | |
12764 | @@ -1274,6 +1300,9 @@ static void rt_mutex_init_task(struct task_struct *p) | |
12765 | */ | |
12766 | static void posix_cpu_timers_init(struct task_struct *tsk) | |
12767 | { | |
12768 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12769 | + tsk->posix_timer_list = NULL; | |
12770 | +#endif | |
12771 | tsk->cputime_expires.prof_exp = 0; | |
12772 | tsk->cputime_expires.virt_exp = 0; | |
12773 | tsk->cputime_expires.sched_exp = 0; | |
12774 | @@ -1399,6 +1428,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |
12775 | spin_lock_init(&p->alloc_lock); | |
12776 | ||
12777 | init_sigpending(&p->pending); | |
12778 | + p->sigqueue_cache = NULL; | |
12779 | ||
12780 | p->utime = p->stime = p->gtime = 0; | |
12781 | p->utimescaled = p->stimescaled = 0; | |
12782 | diff --git a/kernel/futex.c b/kernel/futex.c | |
12783 | index 46cb3a301bc1..6de82b959729 100644 | |
12784 | --- a/kernel/futex.c | |
12785 | +++ b/kernel/futex.c | |
12786 | @@ -895,7 +895,9 @@ void exit_pi_state_list(struct task_struct *curr) | |
12787 | * task still owns the PI-state: | |
12788 | */ | |
12789 | if (head->next != next) { | |
12790 | + raw_spin_unlock_irq(&curr->pi_lock); | |
12791 | spin_unlock(&hb->lock); | |
12792 | + raw_spin_lock_irq(&curr->pi_lock); | |
12793 | continue; | |
12794 | } | |
12795 | ||
12796 | @@ -1290,6 +1292,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
12797 | struct futex_pi_state *pi_state = this->pi_state; | |
12798 | u32 uninitialized_var(curval), newval; | |
12799 | WAKE_Q(wake_q); | |
12800 | + WAKE_Q(wake_sleeper_q); | |
12801 | bool deboost; | |
12802 | int ret = 0; | |
12803 | ||
12804 | @@ -1356,7 +1359,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
12805 | ||
12806 | raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
12807 | ||
12808 | - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); | |
12809 | + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, | |
12810 | + &wake_sleeper_q); | |
12811 | ||
12812 | /* | |
12813 | * First unlock HB so the waiter does not spin on it once he got woken | |
12814 | @@ -1364,8 +1368,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
12815 | * deboost first (and lose our higher priority), then the task might get | |
12816 | * scheduled away before the wake up can take place. | |
12817 | */ | |
12818 | - spin_unlock(&hb->lock); | |
12819 | + deboost |= spin_unlock_no_deboost(&hb->lock); | |
12820 | wake_up_q(&wake_q); | |
12821 | + wake_up_q_sleeper(&wake_sleeper_q); | |
12822 | if (deboost) | |
12823 | rt_mutex_adjust_prio(current); | |
12824 | ||
12825 | @@ -1915,6 +1920,16 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |
12826 | requeue_pi_wake_futex(this, &key2, hb2); | |
12827 | drop_count++; | |
12828 | continue; | |
12829 | + } else if (ret == -EAGAIN) { | |
12830 | + /* | |
12831 | + * Waiter was woken by timeout or | |
12832 | + * signal and has set pi_blocked_on to | |
12833 | + * PI_WAKEUP_INPROGRESS before we | |
12834 | + * tried to enqueue it on the rtmutex. | |
12835 | + */ | |
12836 | + this->pi_state = NULL; | |
12837 | + put_pi_state(pi_state); | |
12838 | + continue; | |
12839 | } else if (ret) { | |
12840 | /* | |
12841 | * rt_mutex_start_proxy_lock() detected a | |
12842 | @@ -2805,7 +2820,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
12843 | struct hrtimer_sleeper timeout, *to = NULL; | |
12844 | struct rt_mutex_waiter rt_waiter; | |
12845 | struct rt_mutex *pi_mutex = NULL; | |
12846 | - struct futex_hash_bucket *hb; | |
12847 | + struct futex_hash_bucket *hb, *hb2; | |
12848 | union futex_key key2 = FUTEX_KEY_INIT; | |
12849 | struct futex_q q = futex_q_init; | |
12850 | int res, ret; | |
12851 | @@ -2830,10 +2845,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
12852 | * The waiter is allocated on our stack, manipulated by the requeue | |
12853 | * code while we sleep on uaddr. | |
12854 | */ | |
12855 | - debug_rt_mutex_init_waiter(&rt_waiter); | |
12856 | - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); | |
12857 | - RB_CLEAR_NODE(&rt_waiter.tree_entry); | |
12858 | - rt_waiter.task = NULL; | |
12859 | + rt_mutex_init_waiter(&rt_waiter, false); | |
12860 | ||
12861 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); | |
12862 | if (unlikely(ret != 0)) | |
12863 | @@ -2864,20 +2876,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
12864 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ | |
12865 | futex_wait_queue_me(hb, &q, to); | |
12866 | ||
12867 | - spin_lock(&hb->lock); | |
12868 | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
12869 | - spin_unlock(&hb->lock); | |
12870 | - if (ret) | |
12871 | - goto out_put_keys; | |
12872 | + /* | |
12873 | + * On RT we must avoid races with requeue and trying to block | |
12874 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by | |
12875 | + * serializing access to pi_blocked_on with pi_lock. | |
12876 | + */ | |
12877 | + raw_spin_lock_irq(¤t->pi_lock); | |
12878 | + if (current->pi_blocked_on) { | |
12879 | + /* | |
12880 | + * We have been requeued or are in the process of | |
12881 | + * being requeued. | |
12882 | + */ | |
12883 | + raw_spin_unlock_irq(¤t->pi_lock); | |
12884 | + } else { | |
12885 | + /* | |
12886 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS | |
12887 | + * prevents a concurrent requeue from moving us to the | |
12888 | + * uaddr2 rtmutex. After that we can safely acquire | |
12889 | + * (and possibly block on) hb->lock. | |
12890 | + */ | |
12891 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; | |
12892 | + raw_spin_unlock_irq(¤t->pi_lock); | |
12893 | + | |
12894 | + spin_lock(&hb->lock); | |
12895 | + | |
12896 | + /* | |
12897 | + * Clean up pi_blocked_on. We might leak it otherwise | |
12898 | + * when we succeeded with the hb->lock in the fast | |
12899 | + * path. | |
12900 | + */ | |
12901 | + raw_spin_lock_irq(¤t->pi_lock); | |
12902 | + current->pi_blocked_on = NULL; | |
12903 | + raw_spin_unlock_irq(¤t->pi_lock); | |
12904 | + | |
12905 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
12906 | + spin_unlock(&hb->lock); | |
12907 | + if (ret) | |
12908 | + goto out_put_keys; | |
12909 | + } | |
12910 | ||
12911 | /* | |
12912 | - * In order for us to be here, we know our q.key == key2, and since | |
12913 | - * we took the hb->lock above, we also know that futex_requeue() has | |
12914 | - * completed and we no longer have to concern ourselves with a wakeup | |
12915 | - * race with the atomic proxy lock acquisition by the requeue code. The | |
12916 | - * futex_requeue dropped our key1 reference and incremented our key2 | |
12917 | - * reference count. | |
12918 | + * In order to be here, we have either been requeued, are in | |
12919 | + * the process of being requeued, or requeue successfully | |
12920 | + * acquired uaddr2 on our behalf. If pi_blocked_on was | |
12921 | + * non-null above, we may be racing with a requeue. Do not | |
12922 | + * rely on q->lock_ptr to be hb2->lock until after blocking on | |
12923 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 | |
12924 | + * reference and incremented our key2 reference count. | |
12925 | */ | |
12926 | + hb2 = hash_futex(&key2); | |
12927 | ||
12928 | /* Check if the requeue code acquired the second futex for us. */ | |
12929 | if (!q.rt_waiter) { | |
12930 | @@ -2886,14 +2933,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
12931 | * did a lock-steal - fix up the PI-state in that case. | |
12932 | */ | |
12933 | if (q.pi_state && (q.pi_state->owner != current)) { | |
12934 | - spin_lock(q.lock_ptr); | |
12935 | + spin_lock(&hb2->lock); | |
12936 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
12937 | ret = fixup_pi_state_owner(uaddr2, &q, current); | |
12938 | /* | |
12939 | * Drop the reference to the pi state which | |
12940 | * the requeue_pi() code acquired for us. | |
12941 | */ | |
12942 | put_pi_state(q.pi_state); | |
12943 | - spin_unlock(q.lock_ptr); | |
12944 | + spin_unlock(&hb2->lock); | |
12945 | } | |
12946 | } else { | |
12947 | /* | |
12948 | @@ -2906,7 +2954,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
12949 | ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); | |
12950 | debug_rt_mutex_free_waiter(&rt_waiter); | |
12951 | ||
12952 | - spin_lock(q.lock_ptr); | |
12953 | + spin_lock(&hb2->lock); | |
12954 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
12955 | /* | |
12956 | * Fixup the pi_state owner and possibly acquire the lock if we | |
12957 | * haven't already. | |
12958 | diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c | |
12959 | index d3f24905852c..f87aa8fdcc51 100644 | |
12960 | --- a/kernel/irq/handle.c | |
12961 | +++ b/kernel/irq/handle.c | |
12962 | @@ -181,10 +181,16 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |
12963 | { | |
12964 | irqreturn_t retval; | |
12965 | unsigned int flags = 0; | |
12966 | + struct pt_regs *regs = get_irq_regs(); | |
12967 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
12968 | ||
12969 | retval = __handle_irq_event_percpu(desc, &flags); | |
12970 | ||
12971 | - add_interrupt_randomness(desc->irq_data.irq, flags); | |
12972 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12973 | + desc->random_ip = ip; | |
12974 | +#else | |
12975 | + add_interrupt_randomness(desc->irq_data.irq, flags, ip); | |
12976 | +#endif | |
12977 | ||
12978 | if (!noirqdebug) | |
12979 | note_interrupt(desc, retval); | |
12980 | diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c | |
12981 | index 9530fcd27704..fadf8f848299 100644 | |
12982 | --- a/kernel/irq/manage.c | |
12983 | +++ b/kernel/irq/manage.c | |
12984 | @@ -22,6 +22,7 @@ | |
12985 | #include "internals.h" | |
12986 | ||
12987 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
12988 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
12989 | __read_mostly bool force_irqthreads; | |
12990 | ||
12991 | static int __init setup_forced_irqthreads(char *arg) | |
12992 | @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg) | |
12993 | return 0; | |
12994 | } | |
12995 | early_param("threadirqs", setup_forced_irqthreads); | |
12996 | +# endif | |
12997 | #endif | |
12998 | ||
12999 | static void __synchronize_hardirq(struct irq_desc *desc) | |
13000 | @@ -233,7 +235,12 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, | |
13001 | ||
13002 | if (desc->affinity_notify) { | |
13003 | kref_get(&desc->affinity_notify->kref); | |
13004 | + | |
13005 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13006 | + swork_queue(&desc->affinity_notify->swork); | |
13007 | +#else | |
13008 | schedule_work(&desc->affinity_notify->work); | |
13009 | +#endif | |
13010 | } | |
13011 | irqd_set(data, IRQD_AFFINITY_SET); | |
13012 | ||
13013 | @@ -271,10 +278,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | |
13014 | } | |
13015 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | |
13016 | ||
13017 | -static void irq_affinity_notify(struct work_struct *work) | |
13018 | +static void _irq_affinity_notify(struct irq_affinity_notify *notify) | |
13019 | { | |
13020 | - struct irq_affinity_notify *notify = | |
13021 | - container_of(work, struct irq_affinity_notify, work); | |
13022 | struct irq_desc *desc = irq_to_desc(notify->irq); | |
13023 | cpumask_var_t cpumask; | |
13024 | unsigned long flags; | |
13025 | @@ -296,6 +301,35 @@ static void irq_affinity_notify(struct work_struct *work) | |
13026 | kref_put(¬ify->kref, notify->release); | |
13027 | } | |
13028 | ||
13029 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13030 | +static void init_helper_thread(void) | |
13031 | +{ | |
13032 | + static int init_sworker_once; | |
13033 | + | |
13034 | + if (init_sworker_once) | |
13035 | + return; | |
13036 | + if (WARN_ON(swork_get())) | |
13037 | + return; | |
13038 | + init_sworker_once = 1; | |
13039 | +} | |
13040 | + | |
13041 | +static void irq_affinity_notify(struct swork_event *swork) | |
13042 | +{ | |
13043 | + struct irq_affinity_notify *notify = | |
13044 | + container_of(swork, struct irq_affinity_notify, swork); | |
13045 | + _irq_affinity_notify(notify); | |
13046 | +} | |
13047 | + | |
13048 | +#else | |
13049 | + | |
13050 | +static void irq_affinity_notify(struct work_struct *work) | |
13051 | +{ | |
13052 | + struct irq_affinity_notify *notify = | |
13053 | + container_of(work, struct irq_affinity_notify, work); | |
13054 | + _irq_affinity_notify(notify); | |
13055 | +} | |
13056 | +#endif | |
13057 | + | |
13058 | /** | |
13059 | * irq_set_affinity_notifier - control notification of IRQ affinity changes | |
13060 | * @irq: Interrupt for which to enable/disable notification | |
13061 | @@ -324,7 +358,12 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) | |
13062 | if (notify) { | |
13063 | notify->irq = irq; | |
13064 | kref_init(¬ify->kref); | |
13065 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13066 | + INIT_SWORK(¬ify->swork, irq_affinity_notify); | |
13067 | + init_helper_thread(); | |
13068 | +#else | |
13069 | INIT_WORK(¬ify->work, irq_affinity_notify); | |
13070 | +#endif | |
13071 | } | |
13072 | ||
13073 | raw_spin_lock_irqsave(&desc->lock, flags); | |
13074 | @@ -879,7 +918,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | |
13075 | local_bh_disable(); | |
13076 | ret = action->thread_fn(action->irq, action->dev_id); | |
13077 | irq_finalize_oneshot(desc, action); | |
13078 | - local_bh_enable(); | |
13079 | + /* | |
13080 | + * Interrupts which have real time requirements can be set up | |
13081 | + * to avoid softirq processing in the thread handler. This is | |
13082 | + * safe as these interrupts do not raise soft interrupts. | |
13083 | + */ | |
13084 | + if (irq_settings_no_softirq_call(desc)) | |
13085 | + _local_bh_enable(); | |
13086 | + else | |
13087 | + local_bh_enable(); | |
13088 | return ret; | |
13089 | } | |
13090 | ||
13091 | @@ -976,6 +1023,12 @@ static int irq_thread(void *data) | |
13092 | if (action_ret == IRQ_WAKE_THREAD) | |
13093 | irq_wake_secondary(desc, action); | |
13094 | ||
13095 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13096 | + migrate_disable(); | |
13097 | + add_interrupt_randomness(action->irq, 0, | |
13098 | + desc->random_ip ^ (unsigned long) action); | |
13099 | + migrate_enable(); | |
13100 | +#endif | |
13101 | wake_threads_waitq(desc); | |
13102 | } | |
13103 | ||
13104 | @@ -1336,6 +1389,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |
13105 | irqd_set(&desc->irq_data, IRQD_NO_BALANCING); | |
13106 | } | |
13107 | ||
13108 | + if (new->flags & IRQF_NO_SOFTIRQ_CALL) | |
13109 | + irq_settings_set_no_softirq_call(desc); | |
13110 | + | |
13111 | /* Set default affinity mask once everything is setup */ | |
13112 | setup_affinity(desc, mask); | |
13113 | ||
13114 | @@ -2061,7 +2117,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); | |
13115 | * This call sets the internal irqchip state of an interrupt, | |
13116 | * depending on the value of @which. | |
13117 | * | |
13118 | - * This function should be called with preemption disabled if the | |
13119 | + * This function should be called with migration disabled if the | |
13120 | * interrupt controller has per-cpu registers. | |
13121 | */ | |
13122 | int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
13123 | diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h | |
13124 | index 320579d89091..2df2d4445b1e 100644 | |
13125 | --- a/kernel/irq/settings.h | |
13126 | +++ b/kernel/irq/settings.h | |
13127 | @@ -16,6 +16,7 @@ enum { | |
13128 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | |
13129 | _IRQ_IS_POLLED = IRQ_IS_POLLED, | |
13130 | _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, | |
13131 | + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL, | |
13132 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | |
13133 | }; | |
13134 | ||
13135 | @@ -30,6 +31,7 @@ enum { | |
13136 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | |
13137 | #define IRQ_IS_POLLED GOT_YOU_MORON | |
13138 | #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON | |
13139 | +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON | |
13140 | #undef IRQF_MODIFY_MASK | |
13141 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | |
13142 | ||
13143 | @@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) | |
13144 | desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); | |
13145 | } | |
13146 | ||
13147 | +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc) | |
13148 | +{ | |
13149 | + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL; | |
13150 | +} | |
13151 | + | |
13152 | +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc) | |
13153 | +{ | |
13154 | + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL; | |
13155 | +} | |
13156 | + | |
13157 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) | |
13158 | { | |
13159 | return desc->status_use_accessors & _IRQ_PER_CPU; | |
13160 | diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c | |
13161 | index 5707f97a3e6a..73f38dc7a7fb 100644 | |
13162 | --- a/kernel/irq/spurious.c | |
13163 | +++ b/kernel/irq/spurious.c | |
13164 | @@ -442,6 +442,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); | |
13165 | ||
13166 | static int __init irqfixup_setup(char *str) | |
13167 | { | |
13168 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13169 | + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
13170 | + return 1; | |
13171 | +#endif | |
13172 | irqfixup = 1; | |
13173 | printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); | |
13174 | printk(KERN_WARNING "This may impact system performance.\n"); | |
13175 | @@ -454,6 +458,10 @@ module_param(irqfixup, int, 0644); | |
13176 | ||
13177 | static int __init irqpoll_setup(char *str) | |
13178 | { | |
13179 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13180 | + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
13181 | + return 1; | |
13182 | +#endif | |
13183 | irqfixup = 2; | |
13184 | printk(KERN_WARNING "Misrouted IRQ fixup and polling support " | |
13185 | "enabled\n"); | |
13186 | diff --git a/kernel/irq_work.c b/kernel/irq_work.c | |
13187 | index bcf107ce0854..2899ba0d23d1 100644 | |
13188 | --- a/kernel/irq_work.c | |
13189 | +++ b/kernel/irq_work.c | |
13190 | @@ -17,6 +17,7 @@ | |
13191 | #include <linux/cpu.h> | |
13192 | #include <linux/notifier.h> | |
13193 | #include <linux/smp.h> | |
13194 | +#include <linux/interrupt.h> | |
13195 | #include <asm/processor.h> | |
13196 | ||
13197 | ||
13198 | @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void) | |
13199 | */ | |
13200 | bool irq_work_queue_on(struct irq_work *work, int cpu) | |
13201 | { | |
13202 | + struct llist_head *list; | |
13203 | + | |
13204 | /* All work should have been flushed before going offline */ | |
13205 | WARN_ON_ONCE(cpu_is_offline(cpu)); | |
13206 | ||
13207 | @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) | |
13208 | if (!irq_work_claim(work)) | |
13209 | return false; | |
13210 | ||
13211 | - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | |
13212 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) | |
13213 | + list = &per_cpu(lazy_list, cpu); | |
13214 | + else | |
13215 | + list = &per_cpu(raised_list, cpu); | |
13216 | + | |
13217 | + if (llist_add(&work->llnode, list)) | |
13218 | arch_send_call_function_single_ipi(cpu); | |
13219 | ||
13220 | return true; | |
13221 | @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on); | |
13222 | /* Enqueue the irq work @work on the current CPU */ | |
13223 | bool irq_work_queue(struct irq_work *work) | |
13224 | { | |
13225 | + struct llist_head *list; | |
13226 | + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
13227 | + | |
13228 | /* Only queue if not already pending */ | |
13229 | if (!irq_work_claim(work)) | |
13230 | return false; | |
13231 | @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work) | |
13232 | /* Queue the entry and raise the IPI if needed. */ | |
13233 | preempt_disable(); | |
13234 | ||
13235 | - /* If the work is "lazy", handle it from next tick if any */ | |
13236 | - if (work->flags & IRQ_WORK_LAZY) { | |
13237 | - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && | |
13238 | - tick_nohz_tick_stopped()) | |
13239 | - arch_irq_work_raise(); | |
13240 | - } else { | |
13241 | - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) | |
13242 | + lazy_work = work->flags & IRQ_WORK_LAZY; | |
13243 | + | |
13244 | + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) | |
13245 | + list = this_cpu_ptr(&lazy_list); | |
13246 | + else | |
13247 | + list = this_cpu_ptr(&raised_list); | |
13248 | + | |
13249 | + if (llist_add(&work->llnode, list)) { | |
13250 | + if (!lazy_work || tick_nohz_tick_stopped()) | |
13251 | arch_irq_work_raise(); | |
13252 | } | |
13253 | ||
13254 | @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void) | |
13255 | raised = this_cpu_ptr(&raised_list); | |
13256 | lazy = this_cpu_ptr(&lazy_list); | |
13257 | ||
13258 | - if (llist_empty(raised) || arch_irq_work_has_interrupt()) | |
13259 | - if (llist_empty(lazy)) | |
13260 | - return false; | |
13261 | + if (llist_empty(raised) && llist_empty(lazy)) | |
13262 | + return false; | |
13263 | ||
13264 | /* All work should have been flushed before going offline */ | |
13265 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | |
13266 | @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list) | |
13267 | struct irq_work *work; | |
13268 | struct llist_node *llnode; | |
13269 | ||
13270 | - BUG_ON(!irqs_disabled()); | |
13271 | + BUG_ON_NONRT(!irqs_disabled()); | |
13272 | ||
13273 | if (llist_empty(list)) | |
13274 | return; | |
13275 | @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list) | |
13276 | void irq_work_run(void) | |
13277 | { | |
13278 | irq_work_run_list(this_cpu_ptr(&raised_list)); | |
13279 | - irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13280 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { | |
13281 | + /* | |
13282 | + * NOTE: we raise softirq via IPI for safety, | |
13283 | + * and execute in irq_work_tick() to move the | |
13284 | + * overhead from hard to soft irq context. | |
13285 | + */ | |
13286 | + if (!llist_empty(this_cpu_ptr(&lazy_list))) | |
13287 | + raise_softirq(TIMER_SOFTIRQ); | |
13288 | + } else | |
13289 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13290 | } | |
13291 | EXPORT_SYMBOL_GPL(irq_work_run); | |
13292 | ||
13293 | @@ -179,8 +200,17 @@ void irq_work_tick(void) | |
13294 | ||
13295 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) | |
13296 | irq_work_run_list(raised); | |
13297 | + | |
13298 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) | |
13299 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13300 | +} | |
13301 | + | |
13302 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
13303 | +void irq_work_tick_soft(void) | |
13304 | +{ | |
13305 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
13306 | } | |
13307 | +#endif | |
13308 | ||
13309 | /* | |
13310 | * Synchronize against the irq_work @entry, ensures the entry is not | |
13311 | diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c | |
13312 | index ee1bc1bb8feb..ddef07958840 100644 | |
13313 | --- a/kernel/ksysfs.c | |
13314 | +++ b/kernel/ksysfs.c | |
13315 | @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo); | |
13316 | ||
13317 | #endif /* CONFIG_KEXEC_CORE */ | |
13318 | ||
13319 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
13320 | +static ssize_t realtime_show(struct kobject *kobj, | |
13321 | + struct kobj_attribute *attr, char *buf) | |
13322 | +{ | |
13323 | + return sprintf(buf, "%d\n", 1); | |
13324 | +} | |
13325 | +KERNEL_ATTR_RO(realtime); | |
13326 | +#endif | |
13327 | + | |
13328 | /* whether file capabilities are enabled */ | |
13329 | static ssize_t fscaps_show(struct kobject *kobj, | |
13330 | struct kobj_attribute *attr, char *buf) | |
13331 | @@ -225,6 +234,9 @@ static struct attribute * kernel_attrs[] = { | |
13332 | &rcu_expedited_attr.attr, | |
13333 | &rcu_normal_attr.attr, | |
13334 | #endif | |
13335 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13336 | + &realtime_attr.attr, | |
13337 | +#endif | |
13338 | NULL | |
13339 | }; | |
13340 | ||
13341 | diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile | |
13342 | index 31322a4275cd..c6bba9299d8b 100644 | |
13343 | --- a/kernel/locking/Makefile | |
13344 | +++ b/kernel/locking/Makefile | |
13345 | @@ -2,7 +2,7 @@ | |
13346 | # and is generally not a function of system call inputs. | |
13347 | KCOV_INSTRUMENT := n | |
13348 | ||
13349 | -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o | |
13350 | +obj-y += semaphore.o percpu-rwsem.o | |
13351 | ||
13352 | ifdef CONFIG_FUNCTION_TRACER | |
13353 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | |
13354 | @@ -11,7 +11,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) | |
13355 | CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) | |
13356 | endif | |
13357 | ||
13358 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
13359 | +obj-y += mutex.o | |
13360 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | |
13361 | +obj-y += rwsem.o | |
13362 | +endif | |
13363 | obj-$(CONFIG_LOCKDEP) += lockdep.o | |
13364 | ifeq ($(CONFIG_PROC_FS),y) | |
13365 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | |
13366 | @@ -25,7 +29,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |
13367 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | |
13368 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |
13369 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |
13370 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
13371 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | |
13372 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | |
13373 | +endif | |
13374 | +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o | |
13375 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o | |
13376 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | |
13377 | diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c | |
13378 | index 951cfcd10b4a..57e0ea72c28a 100644 | |
13379 | --- a/kernel/locking/lglock.c | |
13380 | +++ b/kernel/locking/lglock.c | |
13381 | @@ -4,6 +4,15 @@ | |
13382 | #include <linux/cpu.h> | |
13383 | #include <linux/string.h> | |
13384 | ||
13385 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13386 | +# define lg_lock_ptr arch_spinlock_t | |
13387 | +# define lg_do_lock(l) arch_spin_lock(l) | |
13388 | +# define lg_do_unlock(l) arch_spin_unlock(l) | |
13389 | +#else | |
13390 | +# define lg_lock_ptr struct rt_mutex | |
13391 | +# define lg_do_lock(l) __rt_spin_lock__no_mg(l) | |
13392 | +# define lg_do_unlock(l) __rt_spin_unlock(l) | |
13393 | +#endif | |
13394 | /* | |
13395 | * Note there is no uninit, so lglocks cannot be defined in | |
13396 | * modules (but it's fine to use them from there) | |
13397 | @@ -12,51 +21,60 @@ | |
13398 | ||
13399 | void lg_lock_init(struct lglock *lg, char *name) | |
13400 | { | |
13401 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13402 | + int i; | |
13403 | + | |
13404 | + for_each_possible_cpu(i) { | |
13405 | + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i); | |
13406 | + | |
13407 | + rt_mutex_init(lock); | |
13408 | + } | |
13409 | +#endif | |
13410 | LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); | |
13411 | } | |
13412 | EXPORT_SYMBOL(lg_lock_init); | |
13413 | ||
13414 | void lg_local_lock(struct lglock *lg) | |
13415 | { | |
13416 | - arch_spinlock_t *lock; | |
13417 | + lg_lock_ptr *lock; | |
13418 | ||
13419 | - preempt_disable(); | |
13420 | + migrate_disable(); | |
13421 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
13422 | lock = this_cpu_ptr(lg->lock); | |
13423 | - arch_spin_lock(lock); | |
13424 | + lg_do_lock(lock); | |
13425 | } | |
13426 | EXPORT_SYMBOL(lg_local_lock); | |
13427 | ||
13428 | void lg_local_unlock(struct lglock *lg) | |
13429 | { | |
13430 | - arch_spinlock_t *lock; | |
13431 | + lg_lock_ptr *lock; | |
13432 | ||
13433 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
13434 | lock = this_cpu_ptr(lg->lock); | |
13435 | - arch_spin_unlock(lock); | |
13436 | - preempt_enable(); | |
13437 | + lg_do_unlock(lock); | |
13438 | + migrate_enable(); | |
13439 | } | |
13440 | EXPORT_SYMBOL(lg_local_unlock); | |
13441 | ||
13442 | void lg_local_lock_cpu(struct lglock *lg, int cpu) | |
13443 | { | |
13444 | - arch_spinlock_t *lock; | |
13445 | + lg_lock_ptr *lock; | |
13446 | ||
13447 | - preempt_disable(); | |
13448 | + preempt_disable_nort(); | |
13449 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
13450 | lock = per_cpu_ptr(lg->lock, cpu); | |
13451 | - arch_spin_lock(lock); | |
13452 | + lg_do_lock(lock); | |
13453 | } | |
13454 | EXPORT_SYMBOL(lg_local_lock_cpu); | |
13455 | ||
13456 | void lg_local_unlock_cpu(struct lglock *lg, int cpu) | |
13457 | { | |
13458 | - arch_spinlock_t *lock; | |
13459 | + lg_lock_ptr *lock; | |
13460 | ||
13461 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
13462 | lock = per_cpu_ptr(lg->lock, cpu); | |
13463 | - arch_spin_unlock(lock); | |
13464 | - preempt_enable(); | |
13465 | + lg_do_unlock(lock); | |
13466 | + preempt_enable_nort(); | |
13467 | } | |
13468 | EXPORT_SYMBOL(lg_local_unlock_cpu); | |
13469 | ||
13470 | @@ -68,30 +86,30 @@ void lg_double_lock(struct lglock *lg, int cpu1, int cpu2) | |
13471 | if (cpu2 < cpu1) | |
13472 | swap(cpu1, cpu2); | |
13473 | ||
13474 | - preempt_disable(); | |
13475 | + preempt_disable_nort(); | |
13476 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
13477 | - arch_spin_lock(per_cpu_ptr(lg->lock, cpu1)); | |
13478 | - arch_spin_lock(per_cpu_ptr(lg->lock, cpu2)); | |
13479 | + lg_do_lock(per_cpu_ptr(lg->lock, cpu1)); | |
13480 | + lg_do_lock(per_cpu_ptr(lg->lock, cpu2)); | |
13481 | } | |
13482 | ||
13483 | void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2) | |
13484 | { | |
13485 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
13486 | - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1)); | |
13487 | - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2)); | |
13488 | - preempt_enable(); | |
13489 | + lg_do_unlock(per_cpu_ptr(lg->lock, cpu1)); | |
13490 | + lg_do_unlock(per_cpu_ptr(lg->lock, cpu2)); | |
13491 | + preempt_enable_nort(); | |
13492 | } | |
13493 | ||
13494 | void lg_global_lock(struct lglock *lg) | |
13495 | { | |
13496 | int i; | |
13497 | ||
13498 | - preempt_disable(); | |
13499 | + preempt_disable_nort(); | |
13500 | lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
13501 | for_each_possible_cpu(i) { | |
13502 | - arch_spinlock_t *lock; | |
13503 | + lg_lock_ptr *lock; | |
13504 | lock = per_cpu_ptr(lg->lock, i); | |
13505 | - arch_spin_lock(lock); | |
13506 | + lg_do_lock(lock); | |
13507 | } | |
13508 | } | |
13509 | EXPORT_SYMBOL(lg_global_lock); | |
13510 | @@ -102,10 +120,35 @@ void lg_global_unlock(struct lglock *lg) | |
13511 | ||
13512 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
13513 | for_each_possible_cpu(i) { | |
13514 | - arch_spinlock_t *lock; | |
13515 | + lg_lock_ptr *lock; | |
13516 | lock = per_cpu_ptr(lg->lock, i); | |
13517 | - arch_spin_unlock(lock); | |
13518 | + lg_do_unlock(lock); | |
13519 | } | |
13520 | - preempt_enable(); | |
13521 | + preempt_enable_nort(); | |
13522 | } | |
13523 | EXPORT_SYMBOL(lg_global_unlock); | |
13524 | + | |
13525 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13526 | +/* | |
13527 | + * HACK: If you use this, you get to keep the pieces. | |
13528 | + * Used in queue_stop_cpus_work() when stop machinery | |
13529 | + * is called from inactive CPU, so we can't schedule. | |
13530 | + */ | |
13531 | +# define lg_do_trylock_relax(l) \ | |
13532 | + do { \ | |
13533 | + while (!__rt_spin_trylock(l)) \ | |
13534 | + cpu_relax(); \ | |
13535 | + } while (0) | |
13536 | + | |
13537 | +void lg_global_trylock_relax(struct lglock *lg) | |
13538 | +{ | |
13539 | + int i; | |
13540 | + | |
13541 | + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
13542 | + for_each_possible_cpu(i) { | |
13543 | + lg_lock_ptr *lock; | |
13544 | + lock = per_cpu_ptr(lg->lock, i); | |
13545 | + lg_do_trylock_relax(lock); | |
13546 | + } | |
13547 | +} | |
13548 | +#endif | |
13549 | diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c | |
13550 | index 589d763a49b3..4b48c4bfb60c 100644 | |
13551 | --- a/kernel/locking/lockdep.c | |
13552 | +++ b/kernel/locking/lockdep.c | |
13553 | @@ -3686,6 +3686,7 @@ static void check_flags(unsigned long flags) | |
13554 | } | |
13555 | } | |
13556 | ||
13557 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13558 | /* | |
13559 | * We dont accurately track softirq state in e.g. | |
13560 | * hardirq contexts (such as on 4KSTACKS), so only | |
13561 | @@ -3700,6 +3701,7 @@ static void check_flags(unsigned long flags) | |
13562 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); | |
13563 | } | |
13564 | } | |
13565 | +#endif | |
13566 | ||
13567 | if (!debug_locks) | |
13568 | print_irqtrace_events(current); | |
13569 | diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c | |
13570 | index f8c5af52a131..788068773e61 100644 | |
13571 | --- a/kernel/locking/locktorture.c | |
13572 | +++ b/kernel/locking/locktorture.c | |
13573 | @@ -26,7 +26,6 @@ | |
13574 | #include <linux/kthread.h> | |
13575 | #include <linux/sched/rt.h> | |
13576 | #include <linux/spinlock.h> | |
13577 | -#include <linux/rwlock.h> | |
13578 | #include <linux/mutex.h> | |
13579 | #include <linux/rwsem.h> | |
13580 | #include <linux/smp.h> | |
13581 | diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c | |
13582 | new file mode 100644 | |
13583 | index 000000000000..665754c00e1e | |
13584 | --- /dev/null | |
13585 | +++ b/kernel/locking/rt.c | |
13586 | @@ -0,0 +1,498 @@ | |
13587 | +/* | |
13588 | + * kernel/rt.c | |
13589 | + * | |
13590 | + * Real-Time Preemption Support | |
13591 | + * | |
13592 | + * started by Ingo Molnar: | |
13593 | + * | |
13594 | + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
13595 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
13596 | + * | |
13597 | + * historic credit for proving that Linux spinlocks can be implemented via | |
13598 | + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow | |
13599 | + * and others) who prototyped it on 2.4 and did lots of comparative | |
13600 | + * research and analysis; TimeSys, for proving that you can implement a | |
13601 | + * fully preemptible kernel via the use of IRQ threading and mutexes; | |
13602 | + * Bill Huey for persuasively arguing on lkml that the mutex model is the | |
13603 | + * right one; and to MontaVista, who ported pmutexes to 2.6. | |
13604 | + * | |
13605 | + * This code is a from-scratch implementation and is not based on pmutexes, | |
13606 | + * but the idea of converting spinlocks to mutexes is used here too. | |
13607 | + * | |
13608 | + * lock debugging, locking tree, deadlock detection: | |
13609 | + * | |
13610 | + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey | |
13611 | + * Released under the General Public License (GPL). | |
13612 | + * | |
13613 | + * Includes portions of the generic R/W semaphore implementation from: | |
13614 | + * | |
13615 | + * Copyright (c) 2001 David Howells (dhowells@redhat.com). | |
13616 | + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | |
13617 | + * - Derived also from comments by Linus | |
13618 | + * | |
13619 | + * Pending ownership of locks and ownership stealing: | |
13620 | + * | |
13621 | + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt | |
13622 | + * | |
13623 | + * (also by Steven Rostedt) | |
13624 | + * - Converted single pi_lock to individual task locks. | |
13625 | + * | |
13626 | + * By Esben Nielsen: | |
13627 | + * Doing priority inheritance with help of the scheduler. | |
13628 | + * | |
13629 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
13630 | + * - major rework based on Esben Nielsens initial patch | |
13631 | + * - replaced thread_info references by task_struct refs | |
13632 | + * - removed task->pending_owner dependency | |
13633 | + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks | |
13634 | + * in the scheduler return path as discussed with Steven Rostedt | |
13635 | + * | |
13636 | + * Copyright (C) 2006, Kihon Technologies Inc. | |
13637 | + * Steven Rostedt <rostedt@goodmis.org> | |
13638 | + * - debugged and patched Thomas Gleixner's rework. | |
13639 | + * - added back the cmpxchg to the rework. | |
13640 | + * - turned atomic require back on for SMP. | |
13641 | + */ | |
13642 | + | |
13643 | +#include <linux/spinlock.h> | |
13644 | +#include <linux/rtmutex.h> | |
13645 | +#include <linux/sched.h> | |
13646 | +#include <linux/delay.h> | |
13647 | +#include <linux/module.h> | |
13648 | +#include <linux/kallsyms.h> | |
13649 | +#include <linux/syscalls.h> | |
13650 | +#include <linux/interrupt.h> | |
13651 | +#include <linux/plist.h> | |
13652 | +#include <linux/fs.h> | |
13653 | +#include <linux/futex.h> | |
13654 | +#include <linux/hrtimer.h> | |
13655 | + | |
13656 | +#include "rtmutex_common.h" | |
13657 | + | |
13658 | +/* | |
13659 | + * struct mutex functions | |
13660 | + */ | |
13661 | +void __mutex_do_init(struct mutex *mutex, const char *name, | |
13662 | + struct lock_class_key *key) | |
13663 | +{ | |
13664 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13665 | + /* | |
13666 | + * Make sure we are not reinitializing a held lock: | |
13667 | + */ | |
13668 | + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); | |
13669 | + lockdep_init_map(&mutex->dep_map, name, key, 0); | |
13670 | +#endif | |
13671 | + mutex->lock.save_state = 0; | |
13672 | +} | |
13673 | +EXPORT_SYMBOL(__mutex_do_init); | |
13674 | + | |
13675 | +void __lockfunc _mutex_lock(struct mutex *lock) | |
13676 | +{ | |
13677 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13678 | + rt_mutex_lock(&lock->lock); | |
13679 | +} | |
13680 | +EXPORT_SYMBOL(_mutex_lock); | |
13681 | + | |
13682 | +int __lockfunc _mutex_lock_interruptible(struct mutex *lock) | |
13683 | +{ | |
13684 | + int ret; | |
13685 | + | |
13686 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13687 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
13688 | + if (ret) | |
13689 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13690 | + return ret; | |
13691 | +} | |
13692 | +EXPORT_SYMBOL(_mutex_lock_interruptible); | |
13693 | + | |
13694 | +int __lockfunc _mutex_lock_killable(struct mutex *lock) | |
13695 | +{ | |
13696 | + int ret; | |
13697 | + | |
13698 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
13699 | + ret = rt_mutex_lock_killable(&lock->lock); | |
13700 | + if (ret) | |
13701 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13702 | + return ret; | |
13703 | +} | |
13704 | +EXPORT_SYMBOL(_mutex_lock_killable); | |
13705 | + | |
13706 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13707 | +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) | |
13708 | +{ | |
13709 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
13710 | + rt_mutex_lock(&lock->lock); | |
13711 | +} | |
13712 | +EXPORT_SYMBOL(_mutex_lock_nested); | |
13713 | + | |
13714 | +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | |
13715 | +{ | |
13716 | + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); | |
13717 | + rt_mutex_lock(&lock->lock); | |
13718 | +} | |
13719 | +EXPORT_SYMBOL(_mutex_lock_nest_lock); | |
13720 | + | |
13721 | +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) | |
13722 | +{ | |
13723 | + int ret; | |
13724 | + | |
13725 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
13726 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
13727 | + if (ret) | |
13728 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13729 | + return ret; | |
13730 | +} | |
13731 | +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); | |
13732 | + | |
13733 | +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) | |
13734 | +{ | |
13735 | + int ret; | |
13736 | + | |
13737 | + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
13738 | + ret = rt_mutex_lock_killable(&lock->lock); | |
13739 | + if (ret) | |
13740 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13741 | + return ret; | |
13742 | +} | |
13743 | +EXPORT_SYMBOL(_mutex_lock_killable_nested); | |
13744 | +#endif | |
13745 | + | |
13746 | +int __lockfunc _mutex_trylock(struct mutex *lock) | |
13747 | +{ | |
13748 | + int ret = rt_mutex_trylock(&lock->lock); | |
13749 | + | |
13750 | + if (ret) | |
13751 | + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
13752 | + | |
13753 | + return ret; | |
13754 | +} | |
13755 | +EXPORT_SYMBOL(_mutex_trylock); | |
13756 | + | |
13757 | +void __lockfunc _mutex_unlock(struct mutex *lock) | |
13758 | +{ | |
13759 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
13760 | + rt_mutex_unlock(&lock->lock); | |
13761 | +} | |
13762 | +EXPORT_SYMBOL(_mutex_unlock); | |
13763 | + | |
13764 | +/* | |
13765 | + * rwlock_t functions | |
13766 | + */ | |
13767 | +int __lockfunc rt_write_trylock(rwlock_t *rwlock) | |
13768 | +{ | |
13769 | + int ret; | |
13770 | + | |
13771 | + migrate_disable(); | |
13772 | + ret = rt_mutex_trylock(&rwlock->lock); | |
13773 | + if (ret) | |
13774 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
13775 | + else | |
13776 | + migrate_enable(); | |
13777 | + | |
13778 | + return ret; | |
13779 | +} | |
13780 | +EXPORT_SYMBOL(rt_write_trylock); | |
13781 | + | |
13782 | +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) | |
13783 | +{ | |
13784 | + int ret; | |
13785 | + | |
13786 | + *flags = 0; | |
13787 | + ret = rt_write_trylock(rwlock); | |
13788 | + return ret; | |
13789 | +} | |
13790 | +EXPORT_SYMBOL(rt_write_trylock_irqsave); | |
13791 | + | |
13792 | +int __lockfunc rt_read_trylock(rwlock_t *rwlock) | |
13793 | +{ | |
13794 | + struct rt_mutex *lock = &rwlock->lock; | |
13795 | + int ret = 1; | |
13796 | + | |
13797 | + /* | |
13798 | + * recursive read locks succeed when current owns the lock, | |
13799 | + * but not when read_depth == 0 which means that the lock is | |
13800 | + * write locked. | |
13801 | + */ | |
13802 | + if (rt_mutex_owner(lock) != current) { | |
13803 | + migrate_disable(); | |
13804 | + ret = rt_mutex_trylock(lock); | |
13805 | + if (ret) | |
13806 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
13807 | + else | |
13808 | + migrate_enable(); | |
13809 | + | |
13810 | + } else if (!rwlock->read_depth) { | |
13811 | + ret = 0; | |
13812 | + } | |
13813 | + | |
13814 | + if (ret) | |
13815 | + rwlock->read_depth++; | |
13816 | + | |
13817 | + return ret; | |
13818 | +} | |
13819 | +EXPORT_SYMBOL(rt_read_trylock); | |
13820 | + | |
13821 | +void __lockfunc rt_write_lock(rwlock_t *rwlock) | |
13822 | +{ | |
13823 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
13824 | + __rt_spin_lock(&rwlock->lock); | |
13825 | +} | |
13826 | +EXPORT_SYMBOL(rt_write_lock); | |
13827 | + | |
13828 | +void __lockfunc rt_read_lock(rwlock_t *rwlock) | |
13829 | +{ | |
13830 | + struct rt_mutex *lock = &rwlock->lock; | |
13831 | + | |
13832 | + | |
13833 | + /* | |
13834 | + * recursive read locks succeed when current owns the lock | |
13835 | + */ | |
13836 | + if (rt_mutex_owner(lock) != current) { | |
13837 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
13838 | + __rt_spin_lock(lock); | |
13839 | + } | |
13840 | + rwlock->read_depth++; | |
13841 | +} | |
13842 | + | |
13843 | +EXPORT_SYMBOL(rt_read_lock); | |
13844 | + | |
13845 | +void __lockfunc rt_write_unlock(rwlock_t *rwlock) | |
13846 | +{ | |
13847 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
13848 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
13849 | + __rt_spin_unlock(&rwlock->lock); | |
13850 | + migrate_enable(); | |
13851 | +} | |
13852 | +EXPORT_SYMBOL(rt_write_unlock); | |
13853 | + | |
13854 | +void __lockfunc rt_read_unlock(rwlock_t *rwlock) | |
13855 | +{ | |
13856 | + /* Release the lock only when read_depth is down to 0 */ | |
13857 | + if (--rwlock->read_depth == 0) { | |
13858 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
13859 | + __rt_spin_unlock(&rwlock->lock); | |
13860 | + migrate_enable(); | |
13861 | + } | |
13862 | +} | |
13863 | +EXPORT_SYMBOL(rt_read_unlock); | |
13864 | + | |
13865 | +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock) | |
13866 | +{ | |
13867 | + rt_write_lock(rwlock); | |
13868 | + | |
13869 | + return 0; | |
13870 | +} | |
13871 | +EXPORT_SYMBOL(rt_write_lock_irqsave); | |
13872 | + | |
13873 | +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock) | |
13874 | +{ | |
13875 | + rt_read_lock(rwlock); | |
13876 | + | |
13877 | + return 0; | |
13878 | +} | |
13879 | +EXPORT_SYMBOL(rt_read_lock_irqsave); | |
13880 | + | |
13881 | +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) | |
13882 | +{ | |
13883 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13884 | + /* | |
13885 | + * Make sure we are not reinitializing a held lock: | |
13886 | + */ | |
13887 | + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); | |
13888 | + lockdep_init_map(&rwlock->dep_map, name, key, 0); | |
13889 | +#endif | |
13890 | + rwlock->lock.save_state = 1; | |
13891 | + rwlock->read_depth = 0; | |
13892 | +} | |
13893 | +EXPORT_SYMBOL(__rt_rwlock_init); | |
13894 | + | |
13895 | +/* | |
13896 | + * rw_semaphores | |
13897 | + */ | |
13898 | + | |
13899 | +void rt_up_write(struct rw_semaphore *rwsem) | |
13900 | +{ | |
13901 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
13902 | + rt_mutex_unlock(&rwsem->lock); | |
13903 | +} | |
13904 | +EXPORT_SYMBOL(rt_up_write); | |
13905 | + | |
13906 | +void __rt_up_read(struct rw_semaphore *rwsem) | |
13907 | +{ | |
13908 | + if (--rwsem->read_depth == 0) | |
13909 | + rt_mutex_unlock(&rwsem->lock); | |
13910 | +} | |
13911 | + | |
13912 | +void rt_up_read(struct rw_semaphore *rwsem) | |
13913 | +{ | |
13914 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
13915 | + __rt_up_read(rwsem); | |
13916 | +} | |
13917 | +EXPORT_SYMBOL(rt_up_read); | |
13918 | + | |
13919 | +/* | |
13920 | + * downgrade a write lock into a read lock | |
13921 | + * - just wake up any readers at the front of the queue | |
13922 | + */ | |
13923 | +void rt_downgrade_write(struct rw_semaphore *rwsem) | |
13924 | +{ | |
13925 | + BUG_ON(rt_mutex_owner(&rwsem->lock) != current); | |
13926 | + rwsem->read_depth = 1; | |
13927 | +} | |
13928 | +EXPORT_SYMBOL(rt_downgrade_write); | |
13929 | + | |
13930 | +int rt_down_write_trylock(struct rw_semaphore *rwsem) | |
13931 | +{ | |
13932 | + int ret = rt_mutex_trylock(&rwsem->lock); | |
13933 | + | |
13934 | + if (ret) | |
13935 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
13936 | + return ret; | |
13937 | +} | |
13938 | +EXPORT_SYMBOL(rt_down_write_trylock); | |
13939 | + | |
13940 | +void rt_down_write(struct rw_semaphore *rwsem) | |
13941 | +{ | |
13942 | + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); | |
13943 | + rt_mutex_lock(&rwsem->lock); | |
13944 | +} | |
13945 | +EXPORT_SYMBOL(rt_down_write); | |
13946 | + | |
13947 | +int rt_down_write_killable(struct rw_semaphore *rwsem) | |
13948 | +{ | |
13949 | + int ret; | |
13950 | + | |
13951 | + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); | |
13952 | + ret = rt_mutex_lock_killable(&rwsem->lock); | |
13953 | + if (ret) | |
13954 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
13955 | + return ret; | |
13956 | +} | |
13957 | +EXPORT_SYMBOL(rt_down_write_killable); | |
13958 | + | |
13959 | +int rt_down_write_killable_nested(struct rw_semaphore *rwsem, int subclass) | |
13960 | +{ | |
13961 | + int ret; | |
13962 | + | |
13963 | + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
13964 | + ret = rt_mutex_lock_killable(&rwsem->lock); | |
13965 | + if (ret) | |
13966 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
13967 | + return ret; | |
13968 | +} | |
13969 | +EXPORT_SYMBOL(rt_down_write_killable_nested); | |
13970 | + | |
13971 | +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) | |
13972 | +{ | |
13973 | + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
13974 | + rt_mutex_lock(&rwsem->lock); | |
13975 | +} | |
13976 | +EXPORT_SYMBOL(rt_down_write_nested); | |
13977 | + | |
13978 | +void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
13979 | + struct lockdep_map *nest) | |
13980 | +{ | |
13981 | + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_); | |
13982 | + rt_mutex_lock(&rwsem->lock); | |
13983 | +} | |
13984 | +EXPORT_SYMBOL(rt_down_write_nested_lock); | |
13985 | + | |
13986 | +int rt__down_read_trylock(struct rw_semaphore *rwsem) | |
13987 | +{ | |
13988 | + struct rt_mutex *lock = &rwsem->lock; | |
13989 | + int ret = 1; | |
13990 | + | |
13991 | + /* | |
13992 | + * recursive read locks succeed when current owns the rwsem, | |
13993 | + * but not when read_depth == 0 which means that the rwsem is | |
13994 | + * write locked. | |
13995 | + */ | |
13996 | + if (rt_mutex_owner(lock) != current) | |
13997 | + ret = rt_mutex_trylock(&rwsem->lock); | |
13998 | + else if (!rwsem->read_depth) | |
13999 | + ret = 0; | |
14000 | + | |
14001 | + if (ret) | |
14002 | + rwsem->read_depth++; | |
14003 | + return ret; | |
14004 | + | |
14005 | +} | |
14006 | + | |
14007 | +int rt_down_read_trylock(struct rw_semaphore *rwsem) | |
14008 | +{ | |
14009 | + int ret; | |
14010 | + | |
14011 | + ret = rt__down_read_trylock(rwsem); | |
14012 | + if (ret) | |
14013 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
14014 | + | |
14015 | + return ret; | |
14016 | +} | |
14017 | +EXPORT_SYMBOL(rt_down_read_trylock); | |
14018 | + | |
14019 | +void rt__down_read(struct rw_semaphore *rwsem) | |
14020 | +{ | |
14021 | + struct rt_mutex *lock = &rwsem->lock; | |
14022 | + | |
14023 | + if (rt_mutex_owner(lock) != current) | |
14024 | + rt_mutex_lock(&rwsem->lock); | |
14025 | + rwsem->read_depth++; | |
14026 | +} | |
14027 | +EXPORT_SYMBOL(rt__down_read); | |
14028 | + | |
14029 | +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) | |
14030 | +{ | |
14031 | + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
14032 | + rt__down_read(rwsem); | |
14033 | +} | |
14034 | + | |
14035 | +void rt_down_read(struct rw_semaphore *rwsem) | |
14036 | +{ | |
14037 | + __rt_down_read(rwsem, 0); | |
14038 | +} | |
14039 | +EXPORT_SYMBOL(rt_down_read); | |
14040 | + | |
14041 | +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass) | |
14042 | +{ | |
14043 | + __rt_down_read(rwsem, subclass); | |
14044 | +} | |
14045 | +EXPORT_SYMBOL(rt_down_read_nested); | |
14046 | + | |
14047 | +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
14048 | + struct lock_class_key *key) | |
14049 | +{ | |
14050 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14051 | + /* | |
14052 | + * Make sure we are not reinitializing a held lock: | |
14053 | + */ | |
14054 | + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); | |
14055 | + lockdep_init_map(&rwsem->dep_map, name, key, 0); | |
14056 | +#endif | |
14057 | + rwsem->read_depth = 0; | |
14058 | + rwsem->lock.save_state = 0; | |
14059 | +} | |
14060 | +EXPORT_SYMBOL(__rt_rwsem_init); | |
14061 | + | |
14062 | +/** | |
14063 | + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | |
14064 | + * @cnt: the atomic which we are to dec | |
14065 | + * @lock: the mutex to return holding if we dec to 0 | |
14066 | + * | |
14067 | + * return true and hold lock if we dec to 0, return false otherwise | |
14068 | + */ | |
14069 | +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |
14070 | +{ | |
14071 | + /* dec if we can't possibly hit 0 */ | |
14072 | + if (atomic_add_unless(cnt, -1, 1)) | |
14073 | + return 0; | |
14074 | + /* we might hit 0, so take the lock */ | |
14075 | + mutex_lock(lock); | |
14076 | + if (!atomic_dec_and_test(cnt)) { | |
14077 | + /* when we actually did the dec, we didn't hit 0 */ | |
14078 | + mutex_unlock(lock); | |
14079 | + return 0; | |
14080 | + } | |
14081 | + /* we hit 0, and we hold the lock */ | |
14082 | + return 1; | |
14083 | +} | |
14084 | +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | |
14085 | diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c | |
14086 | index 1ec0f48962b3..2576f7ccf8e2 100644 | |
14087 | --- a/kernel/locking/rtmutex.c | |
14088 | +++ b/kernel/locking/rtmutex.c | |
14089 | @@ -7,6 +7,11 @@ | |
14090 | * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
14091 | * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt | |
14092 | * Copyright (C) 2006 Esben Nielsen | |
14093 | + * Adaptive Spinlocks: | |
14094 | + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, | |
14095 | + * and Peter Morreale, | |
14096 | + * Adaptive Spinlocks simplification: | |
14097 | + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> | |
14098 | * | |
14099 | * See Documentation/locking/rt-mutex-design.txt for details. | |
14100 | */ | |
14101 | @@ -16,6 +21,7 @@ | |
14102 | #include <linux/sched/rt.h> | |
14103 | #include <linux/sched/deadline.h> | |
14104 | #include <linux/timer.h> | |
14105 | +#include <linux/ww_mutex.h> | |
14106 | ||
14107 | #include "rtmutex_common.h" | |
14108 | ||
14109 | @@ -69,6 +75,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) | |
14110 | clear_rt_mutex_waiters(lock); | |
14111 | } | |
14112 | ||
14113 | +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) | |
14114 | +{ | |
14115 | + return waiter && waiter != PI_WAKEUP_INPROGRESS && | |
14116 | + waiter != PI_REQUEUE_INPROGRESS; | |
14117 | +} | |
14118 | + | |
14119 | /* | |
14120 | * We can speed up the acquire/release, if there's no debugging state to be | |
14121 | * set up. | |
14122 | @@ -350,6 +362,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, | |
14123 | return debug_rt_mutex_detect_deadlock(waiter, chwalk); | |
14124 | } | |
14125 | ||
14126 | +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) | |
14127 | +{ | |
14128 | + if (waiter->savestate) | |
14129 | + wake_up_lock_sleeper(waiter->task); | |
14130 | + else | |
14131 | + wake_up_process(waiter->task); | |
14132 | +} | |
14133 | + | |
14134 | /* | |
14135 | * Max number of times we'll walk the boosting chain: | |
14136 | */ | |
14137 | @@ -357,7 +377,8 @@ int max_lock_depth = 1024; | |
14138 | ||
14139 | static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) | |
14140 | { | |
14141 | - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; | |
14142 | + return rt_mutex_real_waiter(p->pi_blocked_on) ? | |
14143 | + p->pi_blocked_on->lock : NULL; | |
14144 | } | |
14145 | ||
14146 | /* | |
14147 | @@ -493,7 +514,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
14148 | * reached or the state of the chain has changed while we | |
14149 | * dropped the locks. | |
14150 | */ | |
14151 | - if (!waiter) | |
14152 | + if (!rt_mutex_real_waiter(waiter)) | |
14153 | goto out_unlock_pi; | |
14154 | ||
14155 | /* | |
14156 | @@ -655,13 +676,16 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
14157 | * follow here. This is the end of the chain we are walking. | |
14158 | */ | |
14159 | if (!rt_mutex_owner(lock)) { | |
14160 | + struct rt_mutex_waiter *lock_top_waiter; | |
14161 | + | |
14162 | /* | |
14163 | * If the requeue [7] above changed the top waiter, | |
14164 | * then we need to wake the new top waiter up to try | |
14165 | * to get the lock. | |
14166 | */ | |
14167 | - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) | |
14168 | - wake_up_process(rt_mutex_top_waiter(lock)->task); | |
14169 | + lock_top_waiter = rt_mutex_top_waiter(lock); | |
14170 | + if (prerequeue_top_waiter != lock_top_waiter) | |
14171 | + rt_mutex_wake_waiter(lock_top_waiter); | |
14172 | raw_spin_unlock_irq(&lock->wait_lock); | |
14173 | return 0; | |
14174 | } | |
14175 | @@ -754,6 +778,25 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
14176 | return ret; | |
14177 | } | |
14178 | ||
14179 | + | |
14180 | +#define STEAL_NORMAL 0 | |
14181 | +#define STEAL_LATERAL 1 | |
14182 | + | |
14183 | +/* | |
14184 | + * Note that RT tasks are excluded from lateral-steals to prevent the | |
14185 | + * introduction of an unbounded latency | |
14186 | + */ | |
14187 | +static inline int lock_is_stealable(struct task_struct *task, | |
14188 | + struct task_struct *pendowner, int mode) | |
14189 | +{ | |
14190 | + if (mode == STEAL_NORMAL || rt_task(task)) { | |
14191 | + if (task->prio >= pendowner->prio) | |
14192 | + return 0; | |
14193 | + } else if (task->prio > pendowner->prio) | |
14194 | + return 0; | |
14195 | + return 1; | |
14196 | +} | |
14197 | + | |
14198 | /* | |
14199 | * Try to take an rt-mutex | |
14200 | * | |
14201 | @@ -764,8 +807,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
14202 | * @waiter: The waiter that is queued to the lock's wait tree if the | |
14203 | * callsite called task_blocked_on_lock(), otherwise NULL | |
14204 | */ | |
14205 | -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
14206 | - struct rt_mutex_waiter *waiter) | |
14207 | +static int __try_to_take_rt_mutex(struct rt_mutex *lock, | |
14208 | + struct task_struct *task, | |
14209 | + struct rt_mutex_waiter *waiter, int mode) | |
14210 | { | |
14211 | /* | |
14212 | * Before testing whether we can acquire @lock, we set the | |
14213 | @@ -802,8 +846,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
14214 | * If waiter is not the highest priority waiter of | |
14215 | * @lock, give up. | |
14216 | */ | |
14217 | - if (waiter != rt_mutex_top_waiter(lock)) | |
14218 | + if (waiter != rt_mutex_top_waiter(lock)) { | |
14219 | + /* XXX lock_is_stealable() ? */ | |
14220 | return 0; | |
14221 | + } | |
14222 | ||
14223 | /* | |
14224 | * We can acquire the lock. Remove the waiter from the | |
14225 | @@ -821,14 +867,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
14226 | * not need to be dequeued. | |
14227 | */ | |
14228 | if (rt_mutex_has_waiters(lock)) { | |
14229 | - /* | |
14230 | - * If @task->prio is greater than or equal to | |
14231 | - * the top waiter priority (kernel view), | |
14232 | - * @task lost. | |
14233 | - */ | |
14234 | - if (task->prio >= rt_mutex_top_waiter(lock)->prio) | |
14235 | - return 0; | |
14236 | + struct task_struct *pown = rt_mutex_top_waiter(lock)->task; | |
14237 | ||
14238 | + if (task != pown && !lock_is_stealable(task, pown, mode)) | |
14239 | + return 0; | |
14240 | /* | |
14241 | * The current top waiter stays enqueued. We | |
14242 | * don't have to change anything in the lock | |
14243 | @@ -877,6 +919,438 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
14244 | return 1; | |
14245 | } | |
14246 | ||
14247 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14248 | +/* | |
14249 | + * preemptible spin_lock functions: | |
14250 | + */ | |
14251 | +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, | |
14252 | + void (*slowfn)(struct rt_mutex *lock, | |
14253 | + bool mg_off), | |
14254 | + bool do_mig_dis) | |
14255 | +{ | |
14256 | + might_sleep_no_state_check(); | |
14257 | + | |
14258 | + if (do_mig_dis) | |
14259 | + migrate_disable(); | |
14260 | + | |
14261 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
14262 | + rt_mutex_deadlock_account_lock(lock, current); | |
14263 | + else | |
14264 | + slowfn(lock, do_mig_dis); | |
14265 | +} | |
14266 | + | |
14267 | +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock, | |
14268 | + int (*slowfn)(struct rt_mutex *lock)) | |
14269 | +{ | |
14270 | + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
14271 | + rt_mutex_deadlock_account_unlock(current); | |
14272 | + return 0; | |
14273 | + } | |
14274 | + return slowfn(lock); | |
14275 | +} | |
14276 | +#ifdef CONFIG_SMP | |
14277 | +/* | |
14278 | + * Note that owner is a speculative pointer and dereferencing relies | |
14279 | + * on rcu_read_lock() and the check against the lock owner. | |
14280 | + */ | |
14281 | +static int adaptive_wait(struct rt_mutex *lock, | |
14282 | + struct task_struct *owner) | |
14283 | +{ | |
14284 | + int res = 0; | |
14285 | + | |
14286 | + rcu_read_lock(); | |
14287 | + for (;;) { | |
14288 | + if (owner != rt_mutex_owner(lock)) | |
14289 | + break; | |
14290 | + /* | |
14291 | + * Ensure that owner->on_cpu is dereferenced _after_ | |
14292 | + * checking the above to be valid. | |
14293 | + */ | |
14294 | + barrier(); | |
14295 | + if (!owner->on_cpu) { | |
14296 | + res = 1; | |
14297 | + break; | |
14298 | + } | |
14299 | + cpu_relax(); | |
14300 | + } | |
14301 | + rcu_read_unlock(); | |
14302 | + return res; | |
14303 | +} | |
14304 | +#else | |
14305 | +static int adaptive_wait(struct rt_mutex *lock, | |
14306 | + struct task_struct *orig_owner) | |
14307 | +{ | |
14308 | + return 1; | |
14309 | +} | |
14310 | +#endif | |
14311 | + | |
14312 | +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
14313 | + struct rt_mutex_waiter *waiter, | |
14314 | + struct task_struct *task, | |
14315 | + enum rtmutex_chainwalk chwalk); | |
14316 | +/* | |
14317 | + * Slow path lock function spin_lock style: this variant is very | |
14318 | + * careful not to miss any non-lock wakeups. | |
14319 | + * | |
14320 | + * We store the current state under p->pi_lock in p->saved_state and | |
14321 | + * the try_to_wake_up() code handles this accordingly. | |
14322 | + */ | |
14323 | +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, | |
14324 | + bool mg_off) | |
14325 | +{ | |
14326 | + struct task_struct *lock_owner, *self = current; | |
14327 | + struct rt_mutex_waiter waiter, *top_waiter; | |
14328 | + unsigned long flags; | |
14329 | + int ret; | |
14330 | + | |
14331 | + rt_mutex_init_waiter(&waiter, true); | |
14332 | + | |
14333 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
14334 | + | |
14335 | + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) { | |
14336 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14337 | + return; | |
14338 | + } | |
14339 | + | |
14340 | + BUG_ON(rt_mutex_owner(lock) == self); | |
14341 | + | |
14342 | + /* | |
14343 | + * We save whatever state the task is in and we'll restore it | |
14344 | + * after acquiring the lock taking real wakeups into account | |
14345 | + * as well. We are serialized via pi_lock against wakeups. See | |
14346 | + * try_to_wake_up(). | |
14347 | + */ | |
14348 | + raw_spin_lock(&self->pi_lock); | |
14349 | + self->saved_state = self->state; | |
14350 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
14351 | + raw_spin_unlock(&self->pi_lock); | |
14352 | + | |
14353 | + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK); | |
14354 | + BUG_ON(ret); | |
14355 | + | |
14356 | + for (;;) { | |
14357 | + /* Try to acquire the lock again. */ | |
14358 | + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL)) | |
14359 | + break; | |
14360 | + | |
14361 | + top_waiter = rt_mutex_top_waiter(lock); | |
14362 | + lock_owner = rt_mutex_owner(lock); | |
14363 | + | |
14364 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14365 | + | |
14366 | + debug_rt_mutex_print_deadlock(&waiter); | |
14367 | + | |
14368 | + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) { | |
14369 | + if (mg_off) | |
14370 | + migrate_enable(); | |
14371 | + schedule(); | |
14372 | + if (mg_off) | |
14373 | + migrate_disable(); | |
14374 | + } | |
14375 | + | |
14376 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
14377 | + | |
14378 | + raw_spin_lock(&self->pi_lock); | |
14379 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
14380 | + raw_spin_unlock(&self->pi_lock); | |
14381 | + } | |
14382 | + | |
14383 | + /* | |
14384 | + * Restore the task state to current->saved_state. We set it | |
14385 | + * to the original state above and the try_to_wake_up() code | |
14386 | + * has possibly updated it when a real (non-rtmutex) wakeup | |
14387 | + * happened while we were blocked. Clear saved_state so | |
14388 | + * try_to_wakeup() does not get confused. | |
14389 | + */ | |
14390 | + raw_spin_lock(&self->pi_lock); | |
14391 | + __set_current_state_no_track(self->saved_state); | |
14392 | + self->saved_state = TASK_RUNNING; | |
14393 | + raw_spin_unlock(&self->pi_lock); | |
14394 | + | |
14395 | + /* | |
14396 | + * try_to_take_rt_mutex() sets the waiter bit | |
14397 | + * unconditionally. We might have to fix that up: | |
14398 | + */ | |
14399 | + fixup_rt_mutex_waiters(lock); | |
14400 | + | |
14401 | + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock)); | |
14402 | + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry)); | |
14403 | + | |
14404 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14405 | + | |
14406 | + debug_rt_mutex_free_waiter(&waiter); | |
14407 | +} | |
14408 | + | |
14409 | +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
14410 | + struct wake_q_head *wake_sleeper_q, | |
14411 | + struct rt_mutex *lock); | |
14412 | +/* | |
14413 | + * Slow path to release a rt_mutex spin_lock style | |
14414 | + */ | |
14415 | +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) | |
14416 | +{ | |
14417 | + unsigned long flags; | |
14418 | + WAKE_Q(wake_q); | |
14419 | + WAKE_Q(wake_sleeper_q); | |
14420 | + | |
14421 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
14422 | + | |
14423 | + debug_rt_mutex_unlock(lock); | |
14424 | + | |
14425 | + rt_mutex_deadlock_account_unlock(current); | |
14426 | + | |
14427 | + if (!rt_mutex_has_waiters(lock)) { | |
14428 | + lock->owner = NULL; | |
14429 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14430 | + return 0; | |
14431 | + } | |
14432 | + | |
14433 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
14434 | + | |
14435 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14436 | + wake_up_q(&wake_q); | |
14437 | + wake_up_q_sleeper(&wake_sleeper_q); | |
14438 | + | |
14439 | + /* Undo pi boosting.when necessary */ | |
14440 | + rt_mutex_adjust_prio(current); | |
14441 | + return 0; | |
14442 | +} | |
14443 | + | |
14444 | +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock) | |
14445 | +{ | |
14446 | + unsigned long flags; | |
14447 | + WAKE_Q(wake_q); | |
14448 | + WAKE_Q(wake_sleeper_q); | |
14449 | + | |
14450 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
14451 | + | |
14452 | + debug_rt_mutex_unlock(lock); | |
14453 | + | |
14454 | + rt_mutex_deadlock_account_unlock(current); | |
14455 | + | |
14456 | + if (!rt_mutex_has_waiters(lock)) { | |
14457 | + lock->owner = NULL; | |
14458 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14459 | + return 0; | |
14460 | + } | |
14461 | + | |
14462 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
14463 | + | |
14464 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14465 | + wake_up_q(&wake_q); | |
14466 | + wake_up_q_sleeper(&wake_sleeper_q); | |
14467 | + return 1; | |
14468 | +} | |
14469 | + | |
14470 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) | |
14471 | +{ | |
14472 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false); | |
14473 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
14474 | +} | |
14475 | +EXPORT_SYMBOL(rt_spin_lock__no_mg); | |
14476 | + | |
14477 | +void __lockfunc rt_spin_lock(spinlock_t *lock) | |
14478 | +{ | |
14479 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
14480 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
14481 | +} | |
14482 | +EXPORT_SYMBOL(rt_spin_lock); | |
14483 | + | |
14484 | +void __lockfunc __rt_spin_lock(struct rt_mutex *lock) | |
14485 | +{ | |
14486 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true); | |
14487 | +} | |
14488 | +EXPORT_SYMBOL(__rt_spin_lock); | |
14489 | + | |
14490 | +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock) | |
14491 | +{ | |
14492 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false); | |
14493 | +} | |
14494 | +EXPORT_SYMBOL(__rt_spin_lock__no_mg); | |
14495 | + | |
14496 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14497 | +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) | |
14498 | +{ | |
14499 | + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
14500 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
14501 | +} | |
14502 | +EXPORT_SYMBOL(rt_spin_lock_nested); | |
14503 | +#endif | |
14504 | + | |
14505 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock) | |
14506 | +{ | |
14507 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
14508 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
14509 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
14510 | +} | |
14511 | +EXPORT_SYMBOL(rt_spin_unlock__no_mg); | |
14512 | + | |
14513 | +void __lockfunc rt_spin_unlock(spinlock_t *lock) | |
14514 | +{ | |
14515 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
14516 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
14517 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
14518 | + migrate_enable(); | |
14519 | +} | |
14520 | +EXPORT_SYMBOL(rt_spin_unlock); | |
14521 | + | |
14522 | +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock) | |
14523 | +{ | |
14524 | + int ret; | |
14525 | + | |
14526 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
14527 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
14528 | + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost); | |
14529 | + migrate_enable(); | |
14530 | + return ret; | |
14531 | +} | |
14532 | + | |
14533 | +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) | |
14534 | +{ | |
14535 | + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); | |
14536 | +} | |
14537 | +EXPORT_SYMBOL(__rt_spin_unlock); | |
14538 | + | |
14539 | +/* | |
14540 | + * Wait for the lock to get unlocked: instead of polling for an unlock | |
14541 | + * (like raw spinlocks do), we lock and unlock, to force the kernel to | |
14542 | + * schedule if there's contention: | |
14543 | + */ | |
14544 | +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) | |
14545 | +{ | |
14546 | + spin_lock(lock); | |
14547 | + spin_unlock(lock); | |
14548 | +} | |
14549 | +EXPORT_SYMBOL(rt_spin_unlock_wait); | |
14550 | + | |
14551 | +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock) | |
14552 | +{ | |
14553 | + return rt_mutex_trylock(lock); | |
14554 | +} | |
14555 | + | |
14556 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock) | |
14557 | +{ | |
14558 | + int ret; | |
14559 | + | |
14560 | + ret = rt_mutex_trylock(&lock->lock); | |
14561 | + if (ret) | |
14562 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14563 | + return ret; | |
14564 | +} | |
14565 | +EXPORT_SYMBOL(rt_spin_trylock__no_mg); | |
14566 | + | |
14567 | +int __lockfunc rt_spin_trylock(spinlock_t *lock) | |
14568 | +{ | |
14569 | + int ret; | |
14570 | + | |
14571 | + migrate_disable(); | |
14572 | + ret = rt_mutex_trylock(&lock->lock); | |
14573 | + if (ret) | |
14574 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14575 | + else | |
14576 | + migrate_enable(); | |
14577 | + return ret; | |
14578 | +} | |
14579 | +EXPORT_SYMBOL(rt_spin_trylock); | |
14580 | + | |
14581 | +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) | |
14582 | +{ | |
14583 | + int ret; | |
14584 | + | |
14585 | + local_bh_disable(); | |
14586 | + ret = rt_mutex_trylock(&lock->lock); | |
14587 | + if (ret) { | |
14588 | + migrate_disable(); | |
14589 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14590 | + } else | |
14591 | + local_bh_enable(); | |
14592 | + return ret; | |
14593 | +} | |
14594 | +EXPORT_SYMBOL(rt_spin_trylock_bh); | |
14595 | + | |
14596 | +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) | |
14597 | +{ | |
14598 | + int ret; | |
14599 | + | |
14600 | + *flags = 0; | |
14601 | + ret = rt_mutex_trylock(&lock->lock); | |
14602 | + if (ret) { | |
14603 | + migrate_disable(); | |
14604 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
14605 | + } | |
14606 | + return ret; | |
14607 | +} | |
14608 | +EXPORT_SYMBOL(rt_spin_trylock_irqsave); | |
14609 | + | |
14610 | +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) | |
14611 | +{ | |
14612 | + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ | |
14613 | + if (atomic_add_unless(atomic, -1, 1)) | |
14614 | + return 0; | |
14615 | + rt_spin_lock(lock); | |
14616 | + if (atomic_dec_and_test(atomic)) | |
14617 | + return 1; | |
14618 | + rt_spin_unlock(lock); | |
14619 | + return 0; | |
14620 | +} | |
14621 | +EXPORT_SYMBOL(atomic_dec_and_spin_lock); | |
14622 | + | |
14623 | + void | |
14624 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) | |
14625 | +{ | |
14626 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14627 | + /* | |
14628 | + * Make sure we are not reinitializing a held lock: | |
14629 | + */ | |
14630 | + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | |
14631 | + lockdep_init_map(&lock->dep_map, name, key, 0); | |
14632 | +#endif | |
14633 | +} | |
14634 | +EXPORT_SYMBOL(__rt_spin_lock_init); | |
14635 | + | |
14636 | +#endif /* PREEMPT_RT_FULL */ | |
14637 | + | |
14638 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14639 | + static inline int __sched | |
14640 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
14641 | +{ | |
14642 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
14643 | + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | |
14644 | + | |
14645 | + if (!hold_ctx) | |
14646 | + return 0; | |
14647 | + | |
14648 | + if (unlikely(ctx == hold_ctx)) | |
14649 | + return -EALREADY; | |
14650 | + | |
14651 | + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | |
14652 | + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | |
14653 | +#ifdef CONFIG_DEBUG_MUTEXES | |
14654 | + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); | |
14655 | + ctx->contending_lock = ww; | |
14656 | +#endif | |
14657 | + return -EDEADLK; | |
14658 | + } | |
14659 | + | |
14660 | + return 0; | |
14661 | +} | |
14662 | +#else | |
14663 | + static inline int __sched | |
14664 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
14665 | +{ | |
14666 | + BUG(); | |
14667 | + return 0; | |
14668 | +} | |
14669 | + | |
14670 | +#endif | |
14671 | + | |
14672 | +static inline int | |
14673 | +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
14674 | + struct rt_mutex_waiter *waiter) | |
14675 | +{ | |
14676 | + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); | |
14677 | +} | |
14678 | + | |
14679 | /* | |
14680 | * Task blocks on lock. | |
14681 | * | |
14682 | @@ -907,6 +1381,23 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
14683 | return -EDEADLK; | |
14684 | ||
14685 | raw_spin_lock(&task->pi_lock); | |
14686 | + | |
14687 | + /* | |
14688 | + * In the case of futex requeue PI, this will be a proxy | |
14689 | + * lock. The task will wake unaware that it is enqueueed on | |
14690 | + * this lock. Avoid blocking on two locks and corrupting | |
14691 | + * pi_blocked_on via the PI_WAKEUP_INPROGRESS | |
14692 | + * flag. futex_wait_requeue_pi() sets this when it wakes up | |
14693 | + * before requeue (due to a signal or timeout). Do not enqueue | |
14694 | + * the task if PI_WAKEUP_INPROGRESS is set. | |
14695 | + */ | |
14696 | + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) { | |
14697 | + raw_spin_unlock(&task->pi_lock); | |
14698 | + return -EAGAIN; | |
14699 | + } | |
14700 | + | |
14701 | + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); | |
14702 | + | |
14703 | __rt_mutex_adjust_prio(task); | |
14704 | waiter->task = task; | |
14705 | waiter->lock = lock; | |
14706 | @@ -930,7 +1421,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
14707 | rt_mutex_enqueue_pi(owner, waiter); | |
14708 | ||
14709 | __rt_mutex_adjust_prio(owner); | |
14710 | - if (owner->pi_blocked_on) | |
14711 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
14712 | chain_walk = 1; | |
14713 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { | |
14714 | chain_walk = 1; | |
14715 | @@ -972,6 +1463,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
14716 | * Called with lock->wait_lock held and interrupts disabled. | |
14717 | */ | |
14718 | static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
14719 | + struct wake_q_head *wake_sleeper_q, | |
14720 | struct rt_mutex *lock) | |
14721 | { | |
14722 | struct rt_mutex_waiter *waiter; | |
14723 | @@ -1000,7 +1492,10 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
14724 | ||
14725 | raw_spin_unlock(¤t->pi_lock); | |
14726 | ||
14727 | - wake_q_add(wake_q, waiter->task); | |
14728 | + if (waiter->savestate) | |
14729 | + wake_q_add(wake_sleeper_q, waiter->task); | |
14730 | + else | |
14731 | + wake_q_add(wake_q, waiter->task); | |
14732 | } | |
14733 | ||
14734 | /* | |
14735 | @@ -1014,7 +1509,7 @@ static void remove_waiter(struct rt_mutex *lock, | |
14736 | { | |
14737 | bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); | |
14738 | struct task_struct *owner = rt_mutex_owner(lock); | |
14739 | - struct rt_mutex *next_lock; | |
14740 | + struct rt_mutex *next_lock = NULL; | |
14741 | ||
14742 | raw_spin_lock(¤t->pi_lock); | |
14743 | rt_mutex_dequeue(lock, waiter); | |
14744 | @@ -1038,7 +1533,8 @@ static void remove_waiter(struct rt_mutex *lock, | |
14745 | __rt_mutex_adjust_prio(owner); | |
14746 | ||
14747 | /* Store the lock on which owner is blocked or NULL */ | |
14748 | - next_lock = task_blocked_on_lock(owner); | |
14749 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
14750 | + next_lock = task_blocked_on_lock(owner); | |
14751 | ||
14752 | raw_spin_unlock(&owner->pi_lock); | |
14753 | ||
14754 | @@ -1074,17 +1570,17 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |
14755 | raw_spin_lock_irqsave(&task->pi_lock, flags); | |
14756 | ||
14757 | waiter = task->pi_blocked_on; | |
14758 | - if (!waiter || (waiter->prio == task->prio && | |
14759 | + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio && | |
14760 | !dl_prio(task->prio))) { | |
14761 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14762 | return; | |
14763 | } | |
14764 | next_lock = waiter->lock; | |
14765 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14766 | ||
14767 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | |
14768 | get_task_struct(task); | |
14769 | ||
14770 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14771 | rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, | |
14772 | next_lock, NULL, task); | |
14773 | } | |
14774 | @@ -1102,7 +1598,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) | |
14775 | static int __sched | |
14776 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
14777 | struct hrtimer_sleeper *timeout, | |
14778 | - struct rt_mutex_waiter *waiter) | |
14779 | + struct rt_mutex_waiter *waiter, | |
14780 | + struct ww_acquire_ctx *ww_ctx) | |
14781 | { | |
14782 | int ret = 0; | |
14783 | ||
14784 | @@ -1125,6 +1622,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
14785 | break; | |
14786 | } | |
14787 | ||
14788 | + if (ww_ctx && ww_ctx->acquired > 0) { | |
14789 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
14790 | + if (ret) | |
14791 | + break; | |
14792 | + } | |
14793 | + | |
14794 | raw_spin_unlock_irq(&lock->wait_lock); | |
14795 | ||
14796 | debug_rt_mutex_print_deadlock(waiter); | |
14797 | @@ -1159,21 +1662,96 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, | |
14798 | } | |
14799 | } | |
14800 | ||
14801 | +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | |
14802 | + struct ww_acquire_ctx *ww_ctx) | |
14803 | +{ | |
14804 | +#ifdef CONFIG_DEBUG_MUTEXES | |
14805 | + /* | |
14806 | + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | |
14807 | + * but released with a normal mutex_unlock in this call. | |
14808 | + * | |
14809 | + * This should never happen, always use ww_mutex_unlock. | |
14810 | + */ | |
14811 | + DEBUG_LOCKS_WARN_ON(ww->ctx); | |
14812 | + | |
14813 | + /* | |
14814 | + * Not quite done after calling ww_acquire_done() ? | |
14815 | + */ | |
14816 | + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | |
14817 | + | |
14818 | + if (ww_ctx->contending_lock) { | |
14819 | + /* | |
14820 | + * After -EDEADLK you tried to | |
14821 | + * acquire a different ww_mutex? Bad! | |
14822 | + */ | |
14823 | + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | |
14824 | + | |
14825 | + /* | |
14826 | + * You called ww_mutex_lock after receiving -EDEADLK, | |
14827 | + * but 'forgot' to unlock everything else first? | |
14828 | + */ | |
14829 | + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | |
14830 | + ww_ctx->contending_lock = NULL; | |
14831 | + } | |
14832 | + | |
14833 | + /* | |
14834 | + * Naughty, using a different class will lead to undefined behavior! | |
14835 | + */ | |
14836 | + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | |
14837 | +#endif | |
14838 | + ww_ctx->acquired++; | |
14839 | +} | |
14840 | + | |
14841 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14842 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
14843 | + struct ww_acquire_ctx *ww_ctx) | |
14844 | +{ | |
14845 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
14846 | + struct rt_mutex_waiter *waiter, *n; | |
14847 | + | |
14848 | + /* | |
14849 | + * This branch gets optimized out for the common case, | |
14850 | + * and is only important for ww_mutex_lock. | |
14851 | + */ | |
14852 | + ww_mutex_lock_acquired(ww, ww_ctx); | |
14853 | + ww->ctx = ww_ctx; | |
14854 | + | |
14855 | + /* | |
14856 | + * Give any possible sleeping processes the chance to wake up, | |
14857 | + * so they can recheck if they have to back off. | |
14858 | + */ | |
14859 | + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters, | |
14860 | + tree_entry) { | |
14861 | + /* XXX debug rt mutex waiter wakeup */ | |
14862 | + | |
14863 | + BUG_ON(waiter->lock != lock); | |
14864 | + rt_mutex_wake_waiter(waiter); | |
14865 | + } | |
14866 | +} | |
14867 | + | |
14868 | +#else | |
14869 | + | |
14870 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
14871 | + struct ww_acquire_ctx *ww_ctx) | |
14872 | +{ | |
14873 | + BUG(); | |
14874 | +} | |
14875 | +#endif | |
14876 | + | |
14877 | /* | |
14878 | * Slow path lock function: | |
14879 | */ | |
14880 | static int __sched | |
14881 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
14882 | struct hrtimer_sleeper *timeout, | |
14883 | - enum rtmutex_chainwalk chwalk) | |
14884 | + enum rtmutex_chainwalk chwalk, | |
14885 | + struct ww_acquire_ctx *ww_ctx) | |
14886 | { | |
14887 | struct rt_mutex_waiter waiter; | |
14888 | unsigned long flags; | |
14889 | int ret = 0; | |
14890 | ||
14891 | - debug_rt_mutex_init_waiter(&waiter); | |
14892 | - RB_CLEAR_NODE(&waiter.pi_tree_entry); | |
14893 | - RB_CLEAR_NODE(&waiter.tree_entry); | |
14894 | + rt_mutex_init_waiter(&waiter, false); | |
14895 | ||
14896 | /* | |
14897 | * Technically we could use raw_spin_[un]lock_irq() here, but this can | |
14898 | @@ -1187,6 +1765,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
14899 | ||
14900 | /* Try to acquire the lock again: */ | |
14901 | if (try_to_take_rt_mutex(lock, current, NULL)) { | |
14902 | + if (ww_ctx) | |
14903 | + ww_mutex_account_lock(lock, ww_ctx); | |
14904 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14905 | return 0; | |
14906 | } | |
14907 | @@ -1201,13 +1781,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
14908 | ||
14909 | if (likely(!ret)) | |
14910 | /* sleep on the mutex */ | |
14911 | - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | |
14912 | + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, | |
14913 | + ww_ctx); | |
14914 | + else if (ww_ctx) { | |
14915 | + /* ww_mutex received EDEADLK, let it become EALREADY */ | |
14916 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
14917 | + BUG_ON(!ret); | |
14918 | + } | |
14919 | ||
14920 | if (unlikely(ret)) { | |
14921 | __set_current_state(TASK_RUNNING); | |
14922 | if (rt_mutex_has_waiters(lock)) | |
14923 | remove_waiter(lock, &waiter); | |
14924 | - rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
14925 | + /* ww_mutex want to report EDEADLK/EALREADY, let them */ | |
14926 | + if (!ww_ctx) | |
14927 | + rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
14928 | + } else if (ww_ctx) { | |
14929 | + ww_mutex_account_lock(lock, ww_ctx); | |
14930 | } | |
14931 | ||
14932 | /* | |
14933 | @@ -1267,7 +1857,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) | |
14934 | * Return whether the current task needs to undo a potential priority boosting. | |
14935 | */ | |
14936 | static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
14937 | - struct wake_q_head *wake_q) | |
14938 | + struct wake_q_head *wake_q, | |
14939 | + struct wake_q_head *wake_sleeper_q) | |
14940 | { | |
14941 | unsigned long flags; | |
14942 | ||
14943 | @@ -1323,7 +1914,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
14944 | * | |
14945 | * Queue the next waiter for wakeup once we release the wait_lock. | |
14946 | */ | |
14947 | - mark_wakeup_next_waiter(wake_q, lock); | |
14948 | + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); | |
14949 | ||
14950 | raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
14951 | ||
14952 | @@ -1339,31 +1930,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
14953 | */ | |
14954 | static inline int | |
14955 | rt_mutex_fastlock(struct rt_mutex *lock, int state, | |
14956 | + struct ww_acquire_ctx *ww_ctx, | |
14957 | int (*slowfn)(struct rt_mutex *lock, int state, | |
14958 | struct hrtimer_sleeper *timeout, | |
14959 | - enum rtmutex_chainwalk chwalk)) | |
14960 | + enum rtmutex_chainwalk chwalk, | |
14961 | + struct ww_acquire_ctx *ww_ctx)) | |
14962 | { | |
14963 | if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
14964 | rt_mutex_deadlock_account_lock(lock, current); | |
14965 | return 0; | |
14966 | } else | |
14967 | - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); | |
14968 | + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, | |
14969 | + ww_ctx); | |
14970 | } | |
14971 | ||
14972 | static inline int | |
14973 | rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, | |
14974 | struct hrtimer_sleeper *timeout, | |
14975 | enum rtmutex_chainwalk chwalk, | |
14976 | + struct ww_acquire_ctx *ww_ctx, | |
14977 | int (*slowfn)(struct rt_mutex *lock, int state, | |
14978 | struct hrtimer_sleeper *timeout, | |
14979 | - enum rtmutex_chainwalk chwalk)) | |
14980 | + enum rtmutex_chainwalk chwalk, | |
14981 | + struct ww_acquire_ctx *ww_ctx)) | |
14982 | { | |
14983 | if (chwalk == RT_MUTEX_MIN_CHAINWALK && | |
14984 | likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
14985 | rt_mutex_deadlock_account_lock(lock, current); | |
14986 | return 0; | |
14987 | } else | |
14988 | - return slowfn(lock, state, timeout, chwalk); | |
14989 | + return slowfn(lock, state, timeout, chwalk, ww_ctx); | |
14990 | } | |
14991 | ||
14992 | static inline int | |
14993 | @@ -1380,17 +1976,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, | |
14994 | static inline void | |
14995 | rt_mutex_fastunlock(struct rt_mutex *lock, | |
14996 | bool (*slowfn)(struct rt_mutex *lock, | |
14997 | - struct wake_q_head *wqh)) | |
14998 | + struct wake_q_head *wqh, | |
14999 | + struct wake_q_head *wq_sleeper)) | |
15000 | { | |
15001 | WAKE_Q(wake_q); | |
15002 | + WAKE_Q(wake_sleeper_q); | |
15003 | ||
15004 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
15005 | rt_mutex_deadlock_account_unlock(current); | |
15006 | ||
15007 | } else { | |
15008 | - bool deboost = slowfn(lock, &wake_q); | |
15009 | + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q); | |
15010 | ||
15011 | wake_up_q(&wake_q); | |
15012 | + wake_up_q_sleeper(&wake_sleeper_q); | |
15013 | ||
15014 | /* Undo pi boosting if necessary: */ | |
15015 | if (deboost) | |
15016 | @@ -1407,7 +2006,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) | |
15017 | { | |
15018 | might_sleep(); | |
15019 | ||
15020 | - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); | |
15021 | + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
15022 | } | |
15023 | EXPORT_SYMBOL_GPL(rt_mutex_lock); | |
15024 | ||
15025 | @@ -1424,7 +2023,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) | |
15026 | { | |
15027 | might_sleep(); | |
15028 | ||
15029 | - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); | |
15030 | + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
15031 | } | |
15032 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | |
15033 | ||
15034 | @@ -1437,11 +2036,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock, | |
15035 | might_sleep(); | |
15036 | ||
15037 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
15038 | - RT_MUTEX_FULL_CHAINWALK, | |
15039 | + RT_MUTEX_FULL_CHAINWALK, NULL, | |
15040 | rt_mutex_slowlock); | |
15041 | } | |
15042 | ||
15043 | /** | |
15044 | + * rt_mutex_lock_killable - lock a rt_mutex killable | |
15045 | + * | |
15046 | + * @lock: the rt_mutex to be locked | |
15047 | + * @detect_deadlock: deadlock detection on/off | |
15048 | + * | |
15049 | + * Returns: | |
15050 | + * 0 on success | |
15051 | + * -EINTR when interrupted by a signal | |
15052 | + * -EDEADLK when the lock would deadlock (when deadlock detection is on) | |
15053 | + */ | |
15054 | +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) | |
15055 | +{ | |
15056 | + might_sleep(); | |
15057 | + | |
15058 | + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock); | |
15059 | +} | |
15060 | +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); | |
15061 | + | |
15062 | +/** | |
15063 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | |
15064 | * the timeout structure is provided | |
15065 | * by the caller | |
15066 | @@ -1461,6 +2079,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) | |
15067 | ||
15068 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
15069 | RT_MUTEX_MIN_CHAINWALK, | |
15070 | + NULL, | |
15071 | rt_mutex_slowlock); | |
15072 | } | |
15073 | EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
15074 | @@ -1478,7 +2097,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
15075 | */ | |
15076 | int __sched rt_mutex_trylock(struct rt_mutex *lock) | |
15077 | { | |
15078 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15079 | + if (WARN_ON_ONCE(in_irq() || in_nmi())) | |
15080 | +#else | |
15081 | if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) | |
15082 | +#endif | |
15083 | return 0; | |
15084 | ||
15085 | return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); | |
15086 | @@ -1504,13 +2127,14 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); | |
15087 | * required or not. | |
15088 | */ | |
15089 | bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, | |
15090 | - struct wake_q_head *wqh) | |
15091 | + struct wake_q_head *wqh, | |
15092 | + struct wake_q_head *wq_sleeper) | |
15093 | { | |
15094 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
15095 | rt_mutex_deadlock_account_unlock(current); | |
15096 | return false; | |
15097 | } | |
15098 | - return rt_mutex_slowunlock(lock, wqh); | |
15099 | + return rt_mutex_slowunlock(lock, wqh, wq_sleeper); | |
15100 | } | |
15101 | ||
15102 | /** | |
15103 | @@ -1543,13 +2167,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); | |
15104 | void __rt_mutex_init(struct rt_mutex *lock, const char *name) | |
15105 | { | |
15106 | lock->owner = NULL; | |
15107 | - raw_spin_lock_init(&lock->wait_lock); | |
15108 | lock->waiters = RB_ROOT; | |
15109 | lock->waiters_leftmost = NULL; | |
15110 | ||
15111 | debug_rt_mutex_init(lock, name); | |
15112 | } | |
15113 | -EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
15114 | +EXPORT_SYMBOL(__rt_mutex_init); | |
15115 | ||
15116 | /** | |
15117 | * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a | |
15118 | @@ -1564,7 +2187,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
15119 | void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
15120 | struct task_struct *proxy_owner) | |
15121 | { | |
15122 | - __rt_mutex_init(lock, NULL); | |
15123 | + rt_mutex_init(lock); | |
15124 | debug_rt_mutex_proxy_lock(lock, proxy_owner); | |
15125 | rt_mutex_set_owner(lock, proxy_owner); | |
15126 | rt_mutex_deadlock_account_lock(lock, proxy_owner); | |
15127 | @@ -1612,6 +2235,35 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
15128 | return 1; | |
15129 | } | |
15130 | ||
15131 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15132 | + /* | |
15133 | + * In PREEMPT_RT there's an added race. | |
15134 | + * If the task, that we are about to requeue, times out, | |
15135 | + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue | |
15136 | + * to skip this task. But right after the task sets | |
15137 | + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then | |
15138 | + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. | |
15139 | + * This will replace the PI_WAKEUP_INPROGRESS with the actual | |
15140 | + * lock that it blocks on. We *must not* place this task | |
15141 | + * on this proxy lock in that case. | |
15142 | + * | |
15143 | + * To prevent this race, we first take the task's pi_lock | |
15144 | + * and check if it has updated its pi_blocked_on. If it has, | |
15145 | + * we assume that it woke up and we return -EAGAIN. | |
15146 | + * Otherwise, we set the task's pi_blocked_on to | |
15147 | + * PI_REQUEUE_INPROGRESS, so that if the task is waking up | |
15148 | + * it will know that we are in the process of requeuing it. | |
15149 | + */ | |
15150 | + raw_spin_lock(&task->pi_lock); | |
15151 | + if (task->pi_blocked_on) { | |
15152 | + raw_spin_unlock(&task->pi_lock); | |
15153 | + raw_spin_unlock_irq(&lock->wait_lock); | |
15154 | + return -EAGAIN; | |
15155 | + } | |
15156 | + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; | |
15157 | + raw_spin_unlock(&task->pi_lock); | |
15158 | +#endif | |
15159 | + | |
15160 | /* We enforce deadlock detection for futexes */ | |
15161 | ret = task_blocks_on_rt_mutex(lock, waiter, task, | |
15162 | RT_MUTEX_FULL_CHAINWALK); | |
15163 | @@ -1626,7 +2278,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, | |
15164 | ret = 0; | |
15165 | } | |
15166 | ||
15167 | - if (unlikely(ret)) | |
15168 | + if (ret && rt_mutex_has_waiters(lock)) | |
15169 | remove_waiter(lock, waiter); | |
15170 | ||
15171 | raw_spin_unlock_irq(&lock->wait_lock); | |
15172 | @@ -1682,7 +2334,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
15173 | set_current_state(TASK_INTERRUPTIBLE); | |
15174 | ||
15175 | /* sleep on the mutex */ | |
15176 | - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | |
15177 | + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); | |
15178 | ||
15179 | if (unlikely(ret)) | |
15180 | remove_waiter(lock, waiter); | |
15181 | @@ -1697,3 +2349,89 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
15182 | ||
15183 | return ret; | |
15184 | } | |
15185 | + | |
15186 | +static inline int | |
15187 | +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
15188 | +{ | |
15189 | +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH | |
15190 | + unsigned tmp; | |
15191 | + | |
15192 | + if (ctx->deadlock_inject_countdown-- == 0) { | |
15193 | + tmp = ctx->deadlock_inject_interval; | |
15194 | + if (tmp > UINT_MAX/4) | |
15195 | + tmp = UINT_MAX; | |
15196 | + else | |
15197 | + tmp = tmp*2 + tmp + tmp/2; | |
15198 | + | |
15199 | + ctx->deadlock_inject_interval = tmp; | |
15200 | + ctx->deadlock_inject_countdown = tmp; | |
15201 | + ctx->contending_lock = lock; | |
15202 | + | |
15203 | + ww_mutex_unlock(lock); | |
15204 | + | |
15205 | + return -EDEADLK; | |
15206 | + } | |
15207 | +#endif | |
15208 | + | |
15209 | + return 0; | |
15210 | +} | |
15211 | + | |
15212 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15213 | +int __sched | |
15214 | +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
15215 | +{ | |
15216 | + int ret; | |
15217 | + | |
15218 | + might_sleep(); | |
15219 | + | |
15220 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
15221 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx); | |
15222 | + if (ret) | |
15223 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
15224 | + else if (!ret && ww_ctx->acquired > 1) | |
15225 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
15226 | + | |
15227 | + return ret; | |
15228 | +} | |
15229 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); | |
15230 | + | |
15231 | +int __sched | |
15232 | +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
15233 | +{ | |
15234 | + int ret; | |
15235 | + | |
15236 | + might_sleep(); | |
15237 | + | |
15238 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
15239 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx); | |
15240 | + if (ret) | |
15241 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
15242 | + else if (!ret && ww_ctx->acquired > 1) | |
15243 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
15244 | + | |
15245 | + return ret; | |
15246 | +} | |
15247 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock); | |
15248 | + | |
15249 | +void __sched ww_mutex_unlock(struct ww_mutex *lock) | |
15250 | +{ | |
15251 | + int nest = !!lock->ctx; | |
15252 | + | |
15253 | + /* | |
15254 | + * The unlocking fastpath is the 0->1 transition from 'locked' | |
15255 | + * into 'unlocked' state: | |
15256 | + */ | |
15257 | + if (nest) { | |
15258 | +#ifdef CONFIG_DEBUG_MUTEXES | |
15259 | + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | |
15260 | +#endif | |
15261 | + if (lock->ctx->acquired > 0) | |
15262 | + lock->ctx->acquired--; | |
15263 | + lock->ctx = NULL; | |
15264 | + } | |
15265 | + | |
15266 | + mutex_release(&lock->base.dep_map, nest, _RET_IP_); | |
15267 | + rt_mutex_unlock(&lock->base.lock); | |
15268 | +} | |
15269 | +EXPORT_SYMBOL(ww_mutex_unlock); | |
15270 | +#endif | |
15271 | diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h | |
15272 | index 4f5f83c7d2d3..289f062f26cd 100644 | |
15273 | --- a/kernel/locking/rtmutex_common.h | |
15274 | +++ b/kernel/locking/rtmutex_common.h | |
15275 | @@ -27,6 +27,7 @@ struct rt_mutex_waiter { | |
15276 | struct rb_node pi_tree_entry; | |
15277 | struct task_struct *task; | |
15278 | struct rt_mutex *lock; | |
15279 | + bool savestate; | |
15280 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
15281 | unsigned long ip; | |
15282 | struct pid *deadlock_task_pid; | |
15283 | @@ -97,6 +98,9 @@ enum rtmutex_chainwalk { | |
15284 | /* | |
15285 | * PI-futex support (proxy locking functions, etc.): | |
15286 | */ | |
15287 | +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) | |
15288 | +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) | |
15289 | + | |
15290 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | |
15291 | extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
15292 | struct task_struct *proxy_owner); | |
15293 | @@ -110,7 +114,8 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
15294 | struct rt_mutex_waiter *waiter); | |
15295 | extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); | |
15296 | extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, | |
15297 | - struct wake_q_head *wqh); | |
15298 | + struct wake_q_head *wqh, | |
15299 | + struct wake_q_head *wq_sleeper); | |
15300 | extern void rt_mutex_adjust_prio(struct task_struct *task); | |
15301 | ||
15302 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
15303 | @@ -119,4 +124,14 @@ extern void rt_mutex_adjust_prio(struct task_struct *task); | |
15304 | # include "rtmutex.h" | |
15305 | #endif | |
15306 | ||
15307 | +static inline void | |
15308 | +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) | |
15309 | +{ | |
15310 | + debug_rt_mutex_init_waiter(waiter); | |
15311 | + waiter->task = NULL; | |
15312 | + waiter->savestate = savestate; | |
15313 | + RB_CLEAR_NODE(&waiter->pi_tree_entry); | |
15314 | + RB_CLEAR_NODE(&waiter->tree_entry); | |
15315 | +} | |
15316 | + | |
15317 | #endif | |
15318 | diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c | |
15319 | index db3ccb1dd614..909779647bd1 100644 | |
15320 | --- a/kernel/locking/spinlock.c | |
15321 | +++ b/kernel/locking/spinlock.c | |
15322 | @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ | |
15323 | * __[spin|read|write]_lock_bh() | |
15324 | */ | |
15325 | BUILD_LOCK_OPS(spin, raw_spinlock); | |
15326 | + | |
15327 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15328 | BUILD_LOCK_OPS(read, rwlock); | |
15329 | BUILD_LOCK_OPS(write, rwlock); | |
15330 | +#endif | |
15331 | ||
15332 | #endif | |
15333 | ||
15334 | @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) | |
15335 | EXPORT_SYMBOL(_raw_spin_unlock_bh); | |
15336 | #endif | |
15337 | ||
15338 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15339 | + | |
15340 | #ifndef CONFIG_INLINE_READ_TRYLOCK | |
15341 | int __lockfunc _raw_read_trylock(rwlock_t *lock) | |
15342 | { | |
15343 | @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) | |
15344 | EXPORT_SYMBOL(_raw_write_unlock_bh); | |
15345 | #endif | |
15346 | ||
15347 | +#endif /* !PREEMPT_RT_FULL */ | |
15348 | + | |
15349 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15350 | ||
15351 | void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | |
15352 | diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c | |
15353 | index 0374a596cffa..94970338d518 100644 | |
15354 | --- a/kernel/locking/spinlock_debug.c | |
15355 | +++ b/kernel/locking/spinlock_debug.c | |
15356 | @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, | |
15357 | ||
15358 | EXPORT_SYMBOL(__raw_spin_lock_init); | |
15359 | ||
15360 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15361 | void __rwlock_init(rwlock_t *lock, const char *name, | |
15362 | struct lock_class_key *key) | |
15363 | { | |
15364 | @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, | |
15365 | } | |
15366 | ||
15367 | EXPORT_SYMBOL(__rwlock_init); | |
15368 | +#endif | |
15369 | ||
15370 | static void spin_dump(raw_spinlock_t *lock, const char *msg) | |
15371 | { | |
15372 | @@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock) | |
15373 | arch_spin_unlock(&lock->raw_lock); | |
15374 | } | |
15375 | ||
15376 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15377 | static void rwlock_bug(rwlock_t *lock, const char *msg) | |
15378 | { | |
15379 | if (!debug_locks_off()) | |
15380 | @@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock) | |
15381 | debug_write_unlock(lock); | |
15382 | arch_write_unlock(&lock->raw_lock); | |
15383 | } | |
15384 | + | |
15385 | +#endif | |
15386 | diff --git a/kernel/panic.c b/kernel/panic.c | |
15387 | index ca8cea1ef673..6b698115f003 100644 | |
15388 | --- a/kernel/panic.c | |
15389 | +++ b/kernel/panic.c | |
15390 | @@ -449,9 +449,11 @@ static u64 oops_id; | |
15391 | ||
15392 | static int init_oops_id(void) | |
15393 | { | |
15394 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15395 | if (!oops_id) | |
15396 | get_random_bytes(&oops_id, sizeof(oops_id)); | |
15397 | else | |
15398 | +#endif | |
15399 | oops_id++; | |
15400 | ||
15401 | return 0; | |
15402 | diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c | |
15403 | index 33c79b6105c5..f53375bc77df 100644 | |
15404 | --- a/kernel/power/hibernate.c | |
15405 | +++ b/kernel/power/hibernate.c | |
15406 | @@ -286,6 +286,8 @@ static int create_image(int platform_mode) | |
15407 | ||
15408 | local_irq_disable(); | |
15409 | ||
15410 | + system_state = SYSTEM_SUSPEND; | |
15411 | + | |
15412 | error = syscore_suspend(); | |
15413 | if (error) { | |
15414 | printk(KERN_ERR "PM: Some system devices failed to power down, " | |
15415 | @@ -315,6 +317,7 @@ static int create_image(int platform_mode) | |
15416 | syscore_resume(); | |
15417 | ||
15418 | Enable_irqs: | |
15419 | + system_state = SYSTEM_RUNNING; | |
15420 | local_irq_enable(); | |
15421 | ||
15422 | Enable_cpus: | |
15423 | @@ -444,6 +447,7 @@ static int resume_target_kernel(bool platform_mode) | |
15424 | goto Enable_cpus; | |
15425 | ||
15426 | local_irq_disable(); | |
15427 | + system_state = SYSTEM_SUSPEND; | |
15428 | ||
15429 | error = syscore_suspend(); | |
15430 | if (error) | |
15431 | @@ -477,6 +481,7 @@ static int resume_target_kernel(bool platform_mode) | |
15432 | syscore_resume(); | |
15433 | ||
15434 | Enable_irqs: | |
15435 | + system_state = SYSTEM_RUNNING; | |
15436 | local_irq_enable(); | |
15437 | ||
15438 | Enable_cpus: | |
15439 | @@ -562,6 +567,7 @@ int hibernation_platform_enter(void) | |
15440 | goto Enable_cpus; | |
15441 | ||
15442 | local_irq_disable(); | |
15443 | + system_state = SYSTEM_SUSPEND; | |
15444 | syscore_suspend(); | |
15445 | if (pm_wakeup_pending()) { | |
15446 | error = -EAGAIN; | |
15447 | @@ -574,6 +580,7 @@ int hibernation_platform_enter(void) | |
15448 | ||
15449 | Power_up: | |
15450 | syscore_resume(); | |
15451 | + system_state = SYSTEM_RUNNING; | |
15452 | local_irq_enable(); | |
15453 | ||
15454 | Enable_cpus: | |
15455 | @@ -674,6 +681,10 @@ static int load_image_and_restore(void) | |
15456 | return error; | |
15457 | } | |
15458 | ||
15459 | +#ifndef CONFIG_SUSPEND | |
15460 | +bool pm_in_action; | |
15461 | +#endif | |
15462 | + | |
15463 | /** | |
15464 | * hibernate - Carry out system hibernation, including saving the image. | |
15465 | */ | |
15466 | @@ -687,6 +698,8 @@ int hibernate(void) | |
15467 | return -EPERM; | |
15468 | } | |
15469 | ||
15470 | + pm_in_action = true; | |
15471 | + | |
15472 | lock_system_sleep(); | |
15473 | /* The snapshot device should not be opened while we're running */ | |
15474 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
15475 | @@ -764,6 +777,7 @@ int hibernate(void) | |
15476 | atomic_inc(&snapshot_device_available); | |
15477 | Unlock: | |
15478 | unlock_system_sleep(); | |
15479 | + pm_in_action = false; | |
15480 | return error; | |
15481 | } | |
15482 | ||
15483 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c | |
15484 | index 0acab9d7f96f..aac06aad757c 100644 | |
15485 | --- a/kernel/power/suspend.c | |
15486 | +++ b/kernel/power/suspend.c | |
15487 | @@ -361,6 +361,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |
15488 | arch_suspend_disable_irqs(); | |
15489 | BUG_ON(!irqs_disabled()); | |
15490 | ||
15491 | + system_state = SYSTEM_SUSPEND; | |
15492 | + | |
15493 | error = syscore_suspend(); | |
15494 | if (!error) { | |
15495 | *wakeup = pm_wakeup_pending(); | |
15496 | @@ -377,6 +379,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |
15497 | syscore_resume(); | |
15498 | } | |
15499 | ||
15500 | + system_state = SYSTEM_RUNNING; | |
15501 | + | |
15502 | arch_suspend_enable_irqs(); | |
15503 | BUG_ON(irqs_disabled()); | |
15504 | ||
15505 | @@ -519,6 +523,8 @@ static int enter_state(suspend_state_t state) | |
15506 | return error; | |
15507 | } | |
15508 | ||
15509 | +bool pm_in_action; | |
15510 | + | |
15511 | /** | |
15512 | * pm_suspend - Externally visible function for suspending the system. | |
15513 | * @state: System sleep state to enter. | |
15514 | @@ -533,6 +539,8 @@ int pm_suspend(suspend_state_t state) | |
15515 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) | |
15516 | return -EINVAL; | |
15517 | ||
15518 | + pm_in_action = true; | |
15519 | + | |
15520 | error = enter_state(state); | |
15521 | if (error) { | |
15522 | suspend_stats.fail++; | |
15523 | @@ -540,6 +548,7 @@ int pm_suspend(suspend_state_t state) | |
15524 | } else { | |
15525 | suspend_stats.success++; | |
15526 | } | |
15527 | + pm_in_action = false; | |
15528 | return error; | |
15529 | } | |
15530 | EXPORT_SYMBOL(pm_suspend); | |
15531 | diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c | |
15532 | index eea6dbc2d8cf..6f01c7ecb45e 100644 | |
15533 | --- a/kernel/printk/printk.c | |
15534 | +++ b/kernel/printk/printk.c | |
15535 | @@ -351,6 +351,65 @@ __packed __aligned(4) | |
15536 | */ | |
15537 | DEFINE_RAW_SPINLOCK(logbuf_lock); | |
15538 | ||
15539 | +#ifdef CONFIG_EARLY_PRINTK | |
15540 | +struct console *early_console; | |
15541 | + | |
15542 | +static void early_vprintk(const char *fmt, va_list ap) | |
15543 | +{ | |
15544 | + if (early_console) { | |
15545 | + char buf[512]; | |
15546 | + int n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
15547 | + | |
15548 | + early_console->write(early_console, buf, n); | |
15549 | + } | |
15550 | +} | |
15551 | + | |
15552 | +asmlinkage void early_printk(const char *fmt, ...) | |
15553 | +{ | |
15554 | + va_list ap; | |
15555 | + | |
15556 | + va_start(ap, fmt); | |
15557 | + early_vprintk(fmt, ap); | |
15558 | + va_end(ap); | |
15559 | +} | |
15560 | + | |
15561 | +/* | |
15562 | + * This is independent of any log levels - a global | |
15563 | + * kill switch that turns off all of printk. | |
15564 | + * | |
15565 | + * Used by the NMI watchdog if early-printk is enabled. | |
15566 | + */ | |
15567 | +static bool __read_mostly printk_killswitch; | |
15568 | + | |
15569 | +static int __init force_early_printk_setup(char *str) | |
15570 | +{ | |
15571 | + printk_killswitch = true; | |
15572 | + return 0; | |
15573 | +} | |
15574 | +early_param("force_early_printk", force_early_printk_setup); | |
15575 | + | |
15576 | +void printk_kill(void) | |
15577 | +{ | |
15578 | + printk_killswitch = true; | |
15579 | +} | |
15580 | + | |
15581 | +#ifdef CONFIG_PRINTK | |
15582 | +static int forced_early_printk(const char *fmt, va_list ap) | |
15583 | +{ | |
15584 | + if (!printk_killswitch) | |
15585 | + return 0; | |
15586 | + early_vprintk(fmt, ap); | |
15587 | + return 1; | |
15588 | +} | |
15589 | +#endif | |
15590 | + | |
15591 | +#else | |
15592 | +static inline int forced_early_printk(const char *fmt, va_list ap) | |
15593 | +{ | |
15594 | + return 0; | |
15595 | +} | |
15596 | +#endif | |
15597 | + | |
15598 | #ifdef CONFIG_PRINTK | |
15599 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | |
15600 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | |
15601 | @@ -1340,6 +1399,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
15602 | { | |
15603 | char *text; | |
15604 | int len = 0; | |
15605 | + int attempts = 0; | |
15606 | ||
15607 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); | |
15608 | if (!text) | |
15609 | @@ -1351,6 +1411,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
15610 | u64 seq; | |
15611 | u32 idx; | |
15612 | enum log_flags prev; | |
15613 | + int num_msg; | |
15614 | +try_again: | |
15615 | + attempts++; | |
15616 | + if (attempts > 10) { | |
15617 | + len = -EBUSY; | |
15618 | + goto out; | |
15619 | + } | |
15620 | + num_msg = 0; | |
15621 | ||
15622 | /* | |
15623 | * Find first record that fits, including all following records, | |
15624 | @@ -1366,6 +1434,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
15625 | prev = msg->flags; | |
15626 | idx = log_next(idx); | |
15627 | seq++; | |
15628 | + num_msg++; | |
15629 | + if (num_msg > 5) { | |
15630 | + num_msg = 0; | |
15631 | + raw_spin_unlock_irq(&logbuf_lock); | |
15632 | + raw_spin_lock_irq(&logbuf_lock); | |
15633 | + if (clear_seq < log_first_seq) | |
15634 | + goto try_again; | |
15635 | + } | |
15636 | } | |
15637 | ||
15638 | /* move first record forward until length fits into the buffer */ | |
15639 | @@ -1379,6 +1455,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
15640 | prev = msg->flags; | |
15641 | idx = log_next(idx); | |
15642 | seq++; | |
15643 | + num_msg++; | |
15644 | + if (num_msg > 5) { | |
15645 | + num_msg = 0; | |
15646 | + raw_spin_unlock_irq(&logbuf_lock); | |
15647 | + raw_spin_lock_irq(&logbuf_lock); | |
15648 | + if (clear_seq < log_first_seq) | |
15649 | + goto try_again; | |
15650 | + } | |
15651 | } | |
15652 | ||
15653 | /* last message fitting into this dump */ | |
15654 | @@ -1419,6 +1503,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
15655 | clear_seq = log_next_seq; | |
15656 | clear_idx = log_next_idx; | |
15657 | } | |
15658 | +out: | |
15659 | raw_spin_unlock_irq(&logbuf_lock); | |
15660 | ||
15661 | kfree(text); | |
15662 | @@ -1572,6 +1657,12 @@ static void call_console_drivers(int level, | |
15663 | if (!console_drivers) | |
15664 | return; | |
15665 | ||
15666 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
15667 | + if (in_irq() || in_nmi()) | |
15668 | + return; | |
15669 | + } | |
15670 | + | |
15671 | + migrate_disable(); | |
15672 | for_each_console(con) { | |
15673 | if (exclusive_console && con != exclusive_console) | |
15674 | continue; | |
15675 | @@ -1587,6 +1678,7 @@ static void call_console_drivers(int level, | |
15676 | else | |
15677 | con->write(con, text, len); | |
15678 | } | |
15679 | + migrate_enable(); | |
15680 | } | |
15681 | ||
15682 | /* | |
15683 | @@ -1750,6 +1842,13 @@ asmlinkage int vprintk_emit(int facility, int level, | |
15684 | /* cpu currently holding logbuf_lock in this function */ | |
15685 | static unsigned int logbuf_cpu = UINT_MAX; | |
15686 | ||
15687 | + /* | |
15688 | + * Fall back to early_printk if a debugging subsystem has | |
15689 | + * killed printk output | |
15690 | + */ | |
15691 | + if (unlikely(forced_early_printk(fmt, args))) | |
15692 | + return 1; | |
15693 | + | |
15694 | if (level == LOGLEVEL_SCHED) { | |
15695 | level = LOGLEVEL_DEFAULT; | |
15696 | in_sched = true; | |
15697 | @@ -1894,13 +1993,23 @@ asmlinkage int vprintk_emit(int facility, int level, | |
15698 | ||
15699 | /* If called from the scheduler, we can not call up(). */ | |
15700 | if (!in_sched) { | |
15701 | + int may_trylock = 1; | |
15702 | + | |
15703 | lockdep_off(); | |
15704 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15705 | + /* | |
15706 | + * we can't take a sleeping lock with IRQs or preeption disabled | |
15707 | + * so we can't print in these contexts | |
15708 | + */ | |
15709 | + if (!(preempt_count() == 0 && !irqs_disabled())) | |
15710 | + may_trylock = 0; | |
15711 | +#endif | |
15712 | /* | |
15713 | * Try to acquire and then immediately release the console | |
15714 | * semaphore. The release will print out buffers and wake up | |
15715 | * /dev/kmsg and syslog() users. | |
15716 | */ | |
15717 | - if (console_trylock()) | |
15718 | + if (may_trylock && console_trylock()) | |
15719 | console_unlock(); | |
15720 | lockdep_on(); | |
15721 | } | |
15722 | @@ -2023,26 +2132,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func); | |
15723 | ||
15724 | #endif /* CONFIG_PRINTK */ | |
15725 | ||
15726 | -#ifdef CONFIG_EARLY_PRINTK | |
15727 | -struct console *early_console; | |
15728 | - | |
15729 | -asmlinkage __visible void early_printk(const char *fmt, ...) | |
15730 | -{ | |
15731 | - va_list ap; | |
15732 | - char buf[512]; | |
15733 | - int n; | |
15734 | - | |
15735 | - if (!early_console) | |
15736 | - return; | |
15737 | - | |
15738 | - va_start(ap, fmt); | |
15739 | - n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
15740 | - va_end(ap); | |
15741 | - | |
15742 | - early_console->write(early_console, buf, n); | |
15743 | -} | |
15744 | -#endif | |
15745 | - | |
15746 | static int __add_preferred_console(char *name, int idx, char *options, | |
15747 | char *brl_options) | |
15748 | { | |
15749 | @@ -2312,11 +2401,16 @@ static void console_cont_flush(char *text, size_t size) | |
15750 | goto out; | |
15751 | ||
15752 | len = cont_print_text(text, size); | |
15753 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15754 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
15755 | + call_console_drivers(cont.level, NULL, 0, text, len); | |
15756 | +#else | |
15757 | raw_spin_unlock(&logbuf_lock); | |
15758 | stop_critical_timings(); | |
15759 | call_console_drivers(cont.level, NULL, 0, text, len); | |
15760 | start_critical_timings(); | |
15761 | local_irq_restore(flags); | |
15762 | +#endif | |
15763 | return; | |
15764 | out: | |
15765 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
15766 | @@ -2440,13 +2534,17 @@ void console_unlock(void) | |
15767 | console_idx = log_next(console_idx); | |
15768 | console_seq++; | |
15769 | console_prev = msg->flags; | |
15770 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15771 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
15772 | + call_console_drivers(level, ext_text, ext_len, text, len); | |
15773 | +#else | |
15774 | raw_spin_unlock(&logbuf_lock); | |
15775 | ||
15776 | stop_critical_timings(); /* don't trace print latency */ | |
15777 | call_console_drivers(level, ext_text, ext_len, text, len); | |
15778 | start_critical_timings(); | |
15779 | local_irq_restore(flags); | |
15780 | - | |
15781 | +#endif | |
15782 | if (do_cond_resched) | |
15783 | cond_resched(); | |
15784 | } | |
15785 | @@ -2498,6 +2596,11 @@ void console_unblank(void) | |
15786 | { | |
15787 | struct console *c; | |
15788 | ||
15789 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
15790 | + if (in_irq() || in_nmi()) | |
15791 | + return; | |
15792 | + } | |
15793 | + | |
15794 | /* | |
15795 | * console_unblank can no longer be called in interrupt context unless | |
15796 | * oops_in_progress is set to 1.. | |
15797 | diff --git a/kernel/ptrace.c b/kernel/ptrace.c | |
15798 | index 1d3b7665d0be..ce666639789d 100644 | |
15799 | --- a/kernel/ptrace.c | |
15800 | +++ b/kernel/ptrace.c | |
15801 | @@ -128,7 +128,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) | |
15802 | ||
15803 | spin_lock_irq(&task->sighand->siglock); | |
15804 | if (task_is_traced(task) && !__fatal_signal_pending(task)) { | |
15805 | - task->state = __TASK_TRACED; | |
15806 | + unsigned long flags; | |
15807 | + | |
15808 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
15809 | + if (task->state & __TASK_TRACED) | |
15810 | + task->state = __TASK_TRACED; | |
15811 | + else | |
15812 | + task->saved_state = __TASK_TRACED; | |
15813 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
15814 | ret = true; | |
15815 | } | |
15816 | spin_unlock_irq(&task->sighand->siglock); | |
15817 | diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c | |
15818 | index 971e2b138063..a304670fb917 100644 | |
15819 | --- a/kernel/rcu/rcutorture.c | |
15820 | +++ b/kernel/rcu/rcutorture.c | |
15821 | @@ -404,6 +404,7 @@ static struct rcu_torture_ops rcu_ops = { | |
15822 | .name = "rcu" | |
15823 | }; | |
15824 | ||
15825 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15826 | /* | |
15827 | * Definitions for rcu_bh torture testing. | |
15828 | */ | |
15829 | @@ -443,6 +444,12 @@ static struct rcu_torture_ops rcu_bh_ops = { | |
15830 | .name = "rcu_bh" | |
15831 | }; | |
15832 | ||
15833 | +#else | |
15834 | +static struct rcu_torture_ops rcu_bh_ops = { | |
15835 | + .ttype = INVALID_RCU_FLAVOR, | |
15836 | +}; | |
15837 | +#endif | |
15838 | + | |
15839 | /* | |
15840 | * Don't even think about trying any of these in real life!!! | |
15841 | * The names includes "busted", and they really means it! | |
15842 | diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c | |
15843 | index 5d80925e7fc8..2b4bc2b2c25a 100644 | |
15844 | --- a/kernel/rcu/tree.c | |
15845 | +++ b/kernel/rcu/tree.c | |
15846 | @@ -56,6 +56,11 @@ | |
15847 | #include <linux/random.h> | |
15848 | #include <linux/trace_events.h> | |
15849 | #include <linux/suspend.h> | |
15850 | +#include <linux/delay.h> | |
15851 | +#include <linux/gfp.h> | |
15852 | +#include <linux/oom.h> | |
15853 | +#include <linux/smpboot.h> | |
15854 | +#include "../time/tick-internal.h" | |
15855 | ||
15856 | #include "tree.h" | |
15857 | #include "rcu.h" | |
15858 | @@ -259,6 +264,19 @@ void rcu_sched_qs(void) | |
15859 | this_cpu_ptr(&rcu_sched_data), true); | |
15860 | } | |
15861 | ||
15862 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
15863 | +static void rcu_preempt_qs(void); | |
15864 | + | |
15865 | +void rcu_bh_qs(void) | |
15866 | +{ | |
15867 | + unsigned long flags; | |
15868 | + | |
15869 | + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */ | |
15870 | + local_irq_save(flags); | |
15871 | + rcu_preempt_qs(); | |
15872 | + local_irq_restore(flags); | |
15873 | +} | |
15874 | +#else | |
15875 | void rcu_bh_qs(void) | |
15876 | { | |
15877 | if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { | |
15878 | @@ -268,6 +286,7 @@ void rcu_bh_qs(void) | |
15879 | __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); | |
15880 | } | |
15881 | } | |
15882 | +#endif | |
15883 | ||
15884 | static DEFINE_PER_CPU(int, rcu_sched_qs_mask); | |
15885 | ||
15886 | @@ -448,11 +467,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched); | |
15887 | /* | |
15888 | * Return the number of RCU BH batches started thus far for debug & stats. | |
15889 | */ | |
15890 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15891 | unsigned long rcu_batches_started_bh(void) | |
15892 | { | |
15893 | return rcu_bh_state.gpnum; | |
15894 | } | |
15895 | EXPORT_SYMBOL_GPL(rcu_batches_started_bh); | |
15896 | +#endif | |
15897 | ||
15898 | /* | |
15899 | * Return the number of RCU batches completed thus far for debug & stats. | |
15900 | @@ -472,6 +493,7 @@ unsigned long rcu_batches_completed_sched(void) | |
15901 | } | |
15902 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | |
15903 | ||
15904 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15905 | /* | |
15906 | * Return the number of RCU BH batches completed thus far for debug & stats. | |
15907 | */ | |
15908 | @@ -480,6 +502,7 @@ unsigned long rcu_batches_completed_bh(void) | |
15909 | return rcu_bh_state.completed; | |
15910 | } | |
15911 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | |
15912 | +#endif | |
15913 | ||
15914 | /* | |
15915 | * Return the number of RCU expedited batches completed thus far for | |
15916 | @@ -503,6 +526,7 @@ unsigned long rcu_exp_batches_completed_sched(void) | |
15917 | } | |
15918 | EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched); | |
15919 | ||
15920 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15921 | /* | |
15922 | * Force a quiescent state. | |
15923 | */ | |
15924 | @@ -521,6 +545,13 @@ void rcu_bh_force_quiescent_state(void) | |
15925 | } | |
15926 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | |
15927 | ||
15928 | +#else | |
15929 | +void rcu_force_quiescent_state(void) | |
15930 | +{ | |
15931 | +} | |
15932 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |
15933 | +#endif | |
15934 | + | |
15935 | /* | |
15936 | * Force a quiescent state for RCU-sched. | |
15937 | */ | |
15938 | @@ -571,9 +602,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, | |
15939 | case RCU_FLAVOR: | |
15940 | rsp = rcu_state_p; | |
15941 | break; | |
15942 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15943 | case RCU_BH_FLAVOR: | |
15944 | rsp = &rcu_bh_state; | |
15945 | break; | |
15946 | +#endif | |
15947 | case RCU_SCHED_FLAVOR: | |
15948 | rsp = &rcu_sched_state; | |
15949 | break; | |
15950 | @@ -3013,18 +3046,17 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |
15951 | /* | |
15952 | * Do RCU core processing for the current CPU. | |
15953 | */ | |
15954 | -static void rcu_process_callbacks(struct softirq_action *unused) | |
15955 | +static void rcu_process_callbacks(void) | |
15956 | { | |
15957 | struct rcu_state *rsp; | |
15958 | ||
15959 | if (cpu_is_offline(smp_processor_id())) | |
15960 | return; | |
15961 | - trace_rcu_utilization(TPS("Start RCU core")); | |
15962 | for_each_rcu_flavor(rsp) | |
15963 | __rcu_process_callbacks(rsp); | |
15964 | - trace_rcu_utilization(TPS("End RCU core")); | |
15965 | } | |
15966 | ||
15967 | +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
15968 | /* | |
15969 | * Schedule RCU callback invocation. If the specified type of RCU | |
15970 | * does not support RCU priority boosting, just do a direct call, | |
15971 | @@ -3036,19 +3068,106 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |
15972 | { | |
15973 | if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) | |
15974 | return; | |
15975 | - if (likely(!rsp->boost)) { | |
15976 | - rcu_do_batch(rsp, rdp); | |
15977 | - return; | |
15978 | - } | |
15979 | - invoke_rcu_callbacks_kthread(); | |
15980 | + rcu_do_batch(rsp, rdp); | |
15981 | } | |
15982 | ||
15983 | +static void rcu_wake_cond(struct task_struct *t, int status) | |
15984 | +{ | |
15985 | + /* | |
15986 | + * If the thread is yielding, only wake it when this | |
15987 | + * is invoked from idle | |
15988 | + */ | |
15989 | + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) | |
15990 | + wake_up_process(t); | |
15991 | +} | |
15992 | + | |
15993 | +/* | |
15994 | + * Wake up this CPU's rcuc kthread to do RCU core processing. | |
15995 | + */ | |
15996 | static void invoke_rcu_core(void) | |
15997 | { | |
15998 | - if (cpu_online(smp_processor_id())) | |
15999 | - raise_softirq(RCU_SOFTIRQ); | |
16000 | + unsigned long flags; | |
16001 | + struct task_struct *t; | |
16002 | + | |
16003 | + if (!cpu_online(smp_processor_id())) | |
16004 | + return; | |
16005 | + local_irq_save(flags); | |
16006 | + __this_cpu_write(rcu_cpu_has_work, 1); | |
16007 | + t = __this_cpu_read(rcu_cpu_kthread_task); | |
16008 | + if (t != NULL && current != t) | |
16009 | + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); | |
16010 | + local_irq_restore(flags); | |
16011 | } | |
16012 | ||
16013 | +static void rcu_cpu_kthread_park(unsigned int cpu) | |
16014 | +{ | |
16015 | + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
16016 | +} | |
16017 | + | |
16018 | +static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
16019 | +{ | |
16020 | + return __this_cpu_read(rcu_cpu_has_work); | |
16021 | +} | |
16022 | + | |
16023 | +/* | |
16024 | + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
16025 | + * RCU softirq used in flavors and configurations of RCU that do not | |
16026 | + * support RCU priority boosting. | |
16027 | + */ | |
16028 | +static void rcu_cpu_kthread(unsigned int cpu) | |
16029 | +{ | |
16030 | + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
16031 | + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
16032 | + int spincnt; | |
16033 | + | |
16034 | + for (spincnt = 0; spincnt < 10; spincnt++) { | |
16035 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
16036 | + local_bh_disable(); | |
16037 | + *statusp = RCU_KTHREAD_RUNNING; | |
16038 | + this_cpu_inc(rcu_cpu_kthread_loops); | |
16039 | + local_irq_disable(); | |
16040 | + work = *workp; | |
16041 | + *workp = 0; | |
16042 | + local_irq_enable(); | |
16043 | + if (work) | |
16044 | + rcu_process_callbacks(); | |
16045 | + local_bh_enable(); | |
16046 | + if (*workp == 0) { | |
16047 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
16048 | + *statusp = RCU_KTHREAD_WAITING; | |
16049 | + return; | |
16050 | + } | |
16051 | + } | |
16052 | + *statusp = RCU_KTHREAD_YIELDING; | |
16053 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
16054 | + schedule_timeout_interruptible(2); | |
16055 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
16056 | + *statusp = RCU_KTHREAD_WAITING; | |
16057 | +} | |
16058 | + | |
16059 | +static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
16060 | + .store = &rcu_cpu_kthread_task, | |
16061 | + .thread_should_run = rcu_cpu_kthread_should_run, | |
16062 | + .thread_fn = rcu_cpu_kthread, | |
16063 | + .thread_comm = "rcuc/%u", | |
16064 | + .setup = rcu_cpu_kthread_setup, | |
16065 | + .park = rcu_cpu_kthread_park, | |
16066 | +}; | |
16067 | + | |
16068 | +/* | |
16069 | + * Spawn per-CPU RCU core processing kthreads. | |
16070 | + */ | |
16071 | +static int __init rcu_spawn_core_kthreads(void) | |
16072 | +{ | |
16073 | + int cpu; | |
16074 | + | |
16075 | + for_each_possible_cpu(cpu) | |
16076 | + per_cpu(rcu_cpu_has_work, cpu) = 0; | |
16077 | + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
16078 | + return 0; | |
16079 | +} | |
16080 | +early_initcall(rcu_spawn_core_kthreads); | |
16081 | + | |
16082 | /* | |
16083 | * Handle any core-RCU processing required by a call_rcu() invocation. | |
16084 | */ | |
16085 | @@ -3192,6 +3311,7 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) | |
16086 | } | |
16087 | EXPORT_SYMBOL_GPL(call_rcu_sched); | |
16088 | ||
16089 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16090 | /* | |
16091 | * Queue an RCU callback for invocation after a quicker grace period. | |
16092 | */ | |
16093 | @@ -3200,6 +3320,7 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) | |
16094 | __call_rcu(head, func, &rcu_bh_state, -1, 0); | |
16095 | } | |
16096 | EXPORT_SYMBOL_GPL(call_rcu_bh); | |
16097 | +#endif | |
16098 | ||
16099 | /* | |
16100 | * Queue an RCU callback for lazy invocation after a grace period. | |
16101 | @@ -3291,6 +3412,7 @@ void synchronize_sched(void) | |
16102 | } | |
16103 | EXPORT_SYMBOL_GPL(synchronize_sched); | |
16104 | ||
16105 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16106 | /** | |
16107 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | |
16108 | * | |
16109 | @@ -3317,6 +3439,7 @@ void synchronize_rcu_bh(void) | |
16110 | wait_rcu_gp(call_rcu_bh); | |
16111 | } | |
16112 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | |
16113 | +#endif | |
16114 | ||
16115 | /** | |
16116 | * get_state_synchronize_rcu - Snapshot current RCU state | |
16117 | @@ -3695,6 +3818,7 @@ static void _rcu_barrier(struct rcu_state *rsp) | |
16118 | mutex_unlock(&rsp->barrier_mutex); | |
16119 | } | |
16120 | ||
16121 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16122 | /** | |
16123 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | |
16124 | */ | |
16125 | @@ -3703,6 +3827,7 @@ void rcu_barrier_bh(void) | |
16126 | _rcu_barrier(&rcu_bh_state); | |
16127 | } | |
16128 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |
16129 | +#endif | |
16130 | ||
16131 | /** | |
16132 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | |
16133 | @@ -4196,12 +4321,13 @@ void __init rcu_init(void) | |
16134 | ||
16135 | rcu_bootup_announce(); | |
16136 | rcu_init_geometry(); | |
16137 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16138 | rcu_init_one(&rcu_bh_state); | |
16139 | +#endif | |
16140 | rcu_init_one(&rcu_sched_state); | |
16141 | if (dump_tree) | |
16142 | rcu_dump_rcu_node_tree(&rcu_sched_state); | |
16143 | __rcu_init_preempt(); | |
16144 | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
16145 | ||
16146 | /* | |
16147 | * We don't need protection against CPU-hotplug here because | |
16148 | diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h | |
16149 | index f714f873bf9d..71631196e66e 100644 | |
16150 | --- a/kernel/rcu/tree.h | |
16151 | +++ b/kernel/rcu/tree.h | |
16152 | @@ -587,18 +587,18 @@ extern struct list_head rcu_struct_flavors; | |
16153 | */ | |
16154 | extern struct rcu_state rcu_sched_state; | |
16155 | ||
16156 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16157 | extern struct rcu_state rcu_bh_state; | |
16158 | +#endif | |
16159 | ||
16160 | #ifdef CONFIG_PREEMPT_RCU | |
16161 | extern struct rcu_state rcu_preempt_state; | |
16162 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | |
16163 | ||
16164 | -#ifdef CONFIG_RCU_BOOST | |
16165 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
16166 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | |
16167 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
16168 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | |
16169 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
16170 | ||
16171 | #ifndef RCU_TREE_NONCORE | |
16172 | ||
16173 | @@ -618,10 +618,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); | |
16174 | static void __init __rcu_init_preempt(void); | |
16175 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | |
16176 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | |
16177 | -static void invoke_rcu_callbacks_kthread(void); | |
16178 | static bool rcu_is_callbacks_kthread(void); | |
16179 | +static void rcu_cpu_kthread_setup(unsigned int cpu); | |
16180 | #ifdef CONFIG_RCU_BOOST | |
16181 | -static void rcu_preempt_do_callbacks(void); | |
16182 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
16183 | struct rcu_node *rnp); | |
16184 | #endif /* #ifdef CONFIG_RCU_BOOST */ | |
16185 | diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h | |
16186 | index 0082fce402a0..e08cddadd9c7 100644 | |
16187 | --- a/kernel/rcu/tree_plugin.h | |
16188 | +++ b/kernel/rcu/tree_plugin.h | |
16189 | @@ -24,25 +24,10 @@ | |
16190 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | |
16191 | */ | |
16192 | ||
16193 | -#include <linux/delay.h> | |
16194 | -#include <linux/gfp.h> | |
16195 | -#include <linux/oom.h> | |
16196 | -#include <linux/smpboot.h> | |
16197 | -#include "../time/tick-internal.h" | |
16198 | - | |
16199 | #ifdef CONFIG_RCU_BOOST | |
16200 | ||
16201 | #include "../locking/rtmutex_common.h" | |
16202 | ||
16203 | -/* | |
16204 | - * Control variables for per-CPU and per-rcu_node kthreads. These | |
16205 | - * handle all flavors of RCU. | |
16206 | - */ | |
16207 | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
16208 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
16209 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
16210 | -DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
16211 | - | |
16212 | #else /* #ifdef CONFIG_RCU_BOOST */ | |
16213 | ||
16214 | /* | |
16215 | @@ -55,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
16216 | ||
16217 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
16218 | ||
16219 | +/* | |
16220 | + * Control variables for per-CPU and per-rcu_node kthreads. These | |
16221 | + * handle all flavors of RCU. | |
16222 | + */ | |
16223 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
16224 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
16225 | +DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
16226 | + | |
16227 | #ifdef CONFIG_RCU_NOCB_CPU | |
16228 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | |
16229 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ | |
16230 | @@ -426,7 +419,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |
16231 | } | |
16232 | ||
16233 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | |
16234 | - if (in_irq() || in_serving_softirq()) { | |
16235 | + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { | |
16236 | lockdep_rcu_suspicious(__FILE__, __LINE__, | |
16237 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | |
16238 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", | |
16239 | @@ -632,15 +625,6 @@ static void rcu_preempt_check_callbacks(void) | |
16240 | t->rcu_read_unlock_special.b.need_qs = true; | |
16241 | } | |
16242 | ||
16243 | -#ifdef CONFIG_RCU_BOOST | |
16244 | - | |
16245 | -static void rcu_preempt_do_callbacks(void) | |
16246 | -{ | |
16247 | - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); | |
16248 | -} | |
16249 | - | |
16250 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
16251 | - | |
16252 | /* | |
16253 | * Queue a preemptible-RCU callback for invocation after a grace period. | |
16254 | */ | |
16255 | @@ -829,6 +813,19 @@ void exit_rcu(void) | |
16256 | ||
16257 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
16258 | ||
16259 | +/* | |
16260 | + * If boosting, set rcuc kthreads to realtime priority. | |
16261 | + */ | |
16262 | +static void rcu_cpu_kthread_setup(unsigned int cpu) | |
16263 | +{ | |
16264 | +#ifdef CONFIG_RCU_BOOST | |
16265 | + struct sched_param sp; | |
16266 | + | |
16267 | + sp.sched_priority = kthread_prio; | |
16268 | + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
16269 | +#endif /* #ifdef CONFIG_RCU_BOOST */ | |
16270 | +} | |
16271 | + | |
16272 | #ifdef CONFIG_RCU_BOOST | |
16273 | ||
16274 | #include "../locking/rtmutex_common.h" | |
16275 | @@ -860,16 +857,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |
16276 | ||
16277 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | |
16278 | ||
16279 | -static void rcu_wake_cond(struct task_struct *t, int status) | |
16280 | -{ | |
16281 | - /* | |
16282 | - * If the thread is yielding, only wake it when this | |
16283 | - * is invoked from idle | |
16284 | - */ | |
16285 | - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) | |
16286 | - wake_up_process(t); | |
16287 | -} | |
16288 | - | |
16289 | /* | |
16290 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | |
16291 | * or ->boost_tasks, advancing the pointer to the next task in the | |
16292 | @@ -1013,23 +1000,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
16293 | } | |
16294 | ||
16295 | /* | |
16296 | - * Wake up the per-CPU kthread to invoke RCU callbacks. | |
16297 | - */ | |
16298 | -static void invoke_rcu_callbacks_kthread(void) | |
16299 | -{ | |
16300 | - unsigned long flags; | |
16301 | - | |
16302 | - local_irq_save(flags); | |
16303 | - __this_cpu_write(rcu_cpu_has_work, 1); | |
16304 | - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | |
16305 | - current != __this_cpu_read(rcu_cpu_kthread_task)) { | |
16306 | - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), | |
16307 | - __this_cpu_read(rcu_cpu_kthread_status)); | |
16308 | - } | |
16309 | - local_irq_restore(flags); | |
16310 | -} | |
16311 | - | |
16312 | -/* | |
16313 | * Is the current CPU running the RCU-callbacks kthread? | |
16314 | * Caller must have preemption disabled. | |
16315 | */ | |
16316 | @@ -1083,67 +1053,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
16317 | return 0; | |
16318 | } | |
16319 | ||
16320 | -static void rcu_kthread_do_work(void) | |
16321 | -{ | |
16322 | - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); | |
16323 | - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); | |
16324 | - rcu_preempt_do_callbacks(); | |
16325 | -} | |
16326 | - | |
16327 | -static void rcu_cpu_kthread_setup(unsigned int cpu) | |
16328 | -{ | |
16329 | - struct sched_param sp; | |
16330 | - | |
16331 | - sp.sched_priority = kthread_prio; | |
16332 | - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
16333 | -} | |
16334 | - | |
16335 | -static void rcu_cpu_kthread_park(unsigned int cpu) | |
16336 | -{ | |
16337 | - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
16338 | -} | |
16339 | - | |
16340 | -static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
16341 | -{ | |
16342 | - return __this_cpu_read(rcu_cpu_has_work); | |
16343 | -} | |
16344 | - | |
16345 | -/* | |
16346 | - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
16347 | - * RCU softirq used in flavors and configurations of RCU that do not | |
16348 | - * support RCU priority boosting. | |
16349 | - */ | |
16350 | -static void rcu_cpu_kthread(unsigned int cpu) | |
16351 | -{ | |
16352 | - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
16353 | - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
16354 | - int spincnt; | |
16355 | - | |
16356 | - for (spincnt = 0; spincnt < 10; spincnt++) { | |
16357 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
16358 | - local_bh_disable(); | |
16359 | - *statusp = RCU_KTHREAD_RUNNING; | |
16360 | - this_cpu_inc(rcu_cpu_kthread_loops); | |
16361 | - local_irq_disable(); | |
16362 | - work = *workp; | |
16363 | - *workp = 0; | |
16364 | - local_irq_enable(); | |
16365 | - if (work) | |
16366 | - rcu_kthread_do_work(); | |
16367 | - local_bh_enable(); | |
16368 | - if (*workp == 0) { | |
16369 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
16370 | - *statusp = RCU_KTHREAD_WAITING; | |
16371 | - return; | |
16372 | - } | |
16373 | - } | |
16374 | - *statusp = RCU_KTHREAD_YIELDING; | |
16375 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
16376 | - schedule_timeout_interruptible(2); | |
16377 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
16378 | - *statusp = RCU_KTHREAD_WAITING; | |
16379 | -} | |
16380 | - | |
16381 | /* | |
16382 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | |
16383 | * served by the rcu_node in question. The CPU hotplug lock is still | |
16384 | @@ -1174,26 +1083,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | |
16385 | free_cpumask_var(cm); | |
16386 | } | |
16387 | ||
16388 | -static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
16389 | - .store = &rcu_cpu_kthread_task, | |
16390 | - .thread_should_run = rcu_cpu_kthread_should_run, | |
16391 | - .thread_fn = rcu_cpu_kthread, | |
16392 | - .thread_comm = "rcuc/%u", | |
16393 | - .setup = rcu_cpu_kthread_setup, | |
16394 | - .park = rcu_cpu_kthread_park, | |
16395 | -}; | |
16396 | - | |
16397 | /* | |
16398 | * Spawn boost kthreads -- called as soon as the scheduler is running. | |
16399 | */ | |
16400 | static void __init rcu_spawn_boost_kthreads(void) | |
16401 | { | |
16402 | struct rcu_node *rnp; | |
16403 | - int cpu; | |
16404 | - | |
16405 | - for_each_possible_cpu(cpu) | |
16406 | - per_cpu(rcu_cpu_has_work, cpu) = 0; | |
16407 | - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
16408 | rcu_for_each_leaf_node(rcu_state_p, rnp) | |
16409 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | |
16410 | } | |
16411 | @@ -1216,11 +1111,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
16412 | raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | |
16413 | } | |
16414 | ||
16415 | -static void invoke_rcu_callbacks_kthread(void) | |
16416 | -{ | |
16417 | - WARN_ON_ONCE(1); | |
16418 | -} | |
16419 | - | |
16420 | static bool rcu_is_callbacks_kthread(void) | |
16421 | { | |
16422 | return false; | |
16423 | @@ -1244,7 +1134,7 @@ static void rcu_prepare_kthreads(int cpu) | |
16424 | ||
16425 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
16426 | ||
16427 | -#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
16428 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) | |
16429 | ||
16430 | /* | |
16431 | * Check to see if any future RCU-related work will need to be done | |
16432 | @@ -1261,7 +1151,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
16433 | return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) | |
16434 | ? 0 : rcu_cpu_has_callbacks(NULL); | |
16435 | } | |
16436 | +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ | |
16437 | ||
16438 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
16439 | /* | |
16440 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | |
16441 | * after it. | |
16442 | @@ -1357,6 +1249,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |
16443 | return cbs_ready; | |
16444 | } | |
16445 | ||
16446 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16447 | + | |
16448 | /* | |
16449 | * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | |
16450 | * to invoke. If the CPU has callbacks, try to advance them. Tell the | |
16451 | @@ -1402,6 +1296,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
16452 | *nextevt = basemono + dj * TICK_NSEC; | |
16453 | return 0; | |
16454 | } | |
16455 | +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */ | |
16456 | ||
16457 | /* | |
16458 | * Prepare a CPU for idle from an RCU perspective. The first major task | |
16459 | diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c | |
16460 | index f0d8322bc3ec..b40d3468ba4e 100644 | |
16461 | --- a/kernel/rcu/update.c | |
16462 | +++ b/kernel/rcu/update.c | |
16463 | @@ -295,6 +295,7 @@ int rcu_read_lock_held(void) | |
16464 | } | |
16465 | EXPORT_SYMBOL_GPL(rcu_read_lock_held); | |
16466 | ||
16467 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16468 | /** | |
16469 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | |
16470 | * | |
16471 | @@ -321,6 +322,7 @@ int rcu_read_lock_bh_held(void) | |
16472 | return in_softirq() || irqs_disabled(); | |
16473 | } | |
16474 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |
16475 | +#endif | |
16476 | ||
16477 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | |
16478 | ||
16479 | diff --git a/kernel/relay.c b/kernel/relay.c | |
16480 | index d797502140b9..cf05c17ddbed 100644 | |
16481 | --- a/kernel/relay.c | |
16482 | +++ b/kernel/relay.c | |
16483 | @@ -336,6 +336,10 @@ static void wakeup_readers(unsigned long data) | |
16484 | { | |
16485 | struct rchan_buf *buf = (struct rchan_buf *)data; | |
16486 | wake_up_interruptible(&buf->read_wait); | |
16487 | + /* | |
16488 | + * Stupid polling for now: | |
16489 | + */ | |
16490 | + mod_timer(&buf->timer, jiffies + 1); | |
16491 | } | |
16492 | ||
16493 | /** | |
16494 | @@ -353,6 +357,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) | |
16495 | init_waitqueue_head(&buf->read_wait); | |
16496 | kref_init(&buf->kref); | |
16497 | setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); | |
16498 | + mod_timer(&buf->timer, jiffies + 1); | |
16499 | } else | |
16500 | del_timer_sync(&buf->timer); | |
16501 | ||
16502 | @@ -767,15 +772,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) | |
16503 | else | |
16504 | buf->early_bytes += buf->chan->subbuf_size - | |
16505 | buf->padding[old_subbuf]; | |
16506 | - smp_mb(); | |
16507 | - if (waitqueue_active(&buf->read_wait)) | |
16508 | - /* | |
16509 | - * Calling wake_up_interruptible() from here | |
16510 | - * will deadlock if we happen to be logging | |
16511 | - * from the scheduler (trying to re-grab | |
16512 | - * rq->lock), so defer it. | |
16513 | - */ | |
16514 | - mod_timer(&buf->timer, jiffies + 1); | |
16515 | } | |
16516 | ||
16517 | old = buf->data; | |
16518 | diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile | |
16519 | index 5e59b832ae2b..7337a7f60e3f 100644 | |
16520 | --- a/kernel/sched/Makefile | |
16521 | +++ b/kernel/sched/Makefile | |
16522 | @@ -17,7 +17,7 @@ endif | |
16523 | ||
16524 | obj-y += core.o loadavg.o clock.o cputime.o | |
16525 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o | |
16526 | -obj-y += wait.o swait.o completion.o idle.o | |
16527 | +obj-y += wait.o swait.o swork.o completion.o idle.o | |
16528 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o | |
16529 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | |
16530 | obj-$(CONFIG_SCHEDSTATS) += stats.o | |
16531 | diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c | |
16532 | index 8d0f35debf35..b62cf6400fe0 100644 | |
16533 | --- a/kernel/sched/completion.c | |
16534 | +++ b/kernel/sched/completion.c | |
16535 | @@ -30,10 +30,10 @@ void complete(struct completion *x) | |
16536 | { | |
16537 | unsigned long flags; | |
16538 | ||
16539 | - spin_lock_irqsave(&x->wait.lock, flags); | |
16540 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
16541 | x->done++; | |
16542 | - __wake_up_locked(&x->wait, TASK_NORMAL, 1); | |
16543 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
16544 | + swake_up_locked(&x->wait); | |
16545 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
16546 | } | |
16547 | EXPORT_SYMBOL(complete); | |
16548 | ||
16549 | @@ -50,10 +50,10 @@ void complete_all(struct completion *x) | |
16550 | { | |
16551 | unsigned long flags; | |
16552 | ||
16553 | - spin_lock_irqsave(&x->wait.lock, flags); | |
16554 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
16555 | x->done += UINT_MAX/2; | |
16556 | - __wake_up_locked(&x->wait, TASK_NORMAL, 0); | |
16557 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
16558 | + swake_up_all_locked(&x->wait); | |
16559 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
16560 | } | |
16561 | EXPORT_SYMBOL(complete_all); | |
16562 | ||
16563 | @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x, | |
16564 | long (*action)(long), long timeout, int state) | |
16565 | { | |
16566 | if (!x->done) { | |
16567 | - DECLARE_WAITQUEUE(wait, current); | |
16568 | + DECLARE_SWAITQUEUE(wait); | |
16569 | ||
16570 | - __add_wait_queue_tail_exclusive(&x->wait, &wait); | |
16571 | + __prepare_to_swait(&x->wait, &wait); | |
16572 | do { | |
16573 | if (signal_pending_state(state, current)) { | |
16574 | timeout = -ERESTARTSYS; | |
16575 | break; | |
16576 | } | |
16577 | __set_current_state(state); | |
16578 | - spin_unlock_irq(&x->wait.lock); | |
16579 | + raw_spin_unlock_irq(&x->wait.lock); | |
16580 | timeout = action(timeout); | |
16581 | - spin_lock_irq(&x->wait.lock); | |
16582 | + raw_spin_lock_irq(&x->wait.lock); | |
16583 | } while (!x->done && timeout); | |
16584 | - __remove_wait_queue(&x->wait, &wait); | |
16585 | + __finish_swait(&x->wait, &wait); | |
16586 | if (!x->done) | |
16587 | return timeout; | |
16588 | } | |
16589 | @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x, | |
16590 | { | |
16591 | might_sleep(); | |
16592 | ||
16593 | - spin_lock_irq(&x->wait.lock); | |
16594 | + raw_spin_lock_irq(&x->wait.lock); | |
16595 | timeout = do_wait_for_common(x, action, timeout, state); | |
16596 | - spin_unlock_irq(&x->wait.lock); | |
16597 | + raw_spin_unlock_irq(&x->wait.lock); | |
16598 | return timeout; | |
16599 | } | |
16600 | ||
16601 | @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x) | |
16602 | if (!READ_ONCE(x->done)) | |
16603 | return 0; | |
16604 | ||
16605 | - spin_lock_irqsave(&x->wait.lock, flags); | |
16606 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
16607 | if (!x->done) | |
16608 | ret = 0; | |
16609 | else | |
16610 | x->done--; | |
16611 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
16612 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
16613 | return ret; | |
16614 | } | |
16615 | EXPORT_SYMBOL(try_wait_for_completion); | |
16616 | @@ -311,7 +311,7 @@ bool completion_done(struct completion *x) | |
16617 | * after it's acquired the lock. | |
16618 | */ | |
16619 | smp_rmb(); | |
16620 | - spin_unlock_wait(&x->wait.lock); | |
16621 | + raw_spin_unlock_wait(&x->wait.lock); | |
16622 | return true; | |
16623 | } | |
16624 | EXPORT_SYMBOL(completion_done); | |
16625 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c | |
16626 | index 44817c640e99..55aafcff5810 100644 | |
16627 | --- a/kernel/sched/core.c | |
16628 | +++ b/kernel/sched/core.c | |
16629 | @@ -129,7 +129,11 @@ const_debug unsigned int sysctl_sched_features = | |
16630 | * Number of tasks to iterate in a single balance run. | |
16631 | * Limited because this is done with IRQs disabled. | |
16632 | */ | |
16633 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
16634 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | |
16635 | +#else | |
16636 | +const_debug unsigned int sysctl_sched_nr_migrate = 8; | |
16637 | +#endif | |
16638 | ||
16639 | /* | |
16640 | * period over which we average the RT time consumption, measured | |
16641 | @@ -345,6 +349,7 @@ static void init_rq_hrtick(struct rq *rq) | |
16642 | ||
16643 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
16644 | rq->hrtick_timer.function = hrtick; | |
16645 | + rq->hrtick_timer.irqsafe = 1; | |
16646 | } | |
16647 | #else /* CONFIG_SCHED_HRTICK */ | |
16648 | static inline void hrtick_clear(struct rq *rq) | |
16649 | @@ -449,7 +454,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) | |
16650 | head->lastp = &node->next; | |
16651 | } | |
16652 | ||
16653 | -void wake_up_q(struct wake_q_head *head) | |
16654 | +void __wake_up_q(struct wake_q_head *head, bool sleeper) | |
16655 | { | |
16656 | struct wake_q_node *node = head->first; | |
16657 | ||
16658 | @@ -466,7 +471,10 @@ void wake_up_q(struct wake_q_head *head) | |
16659 | * wake_up_process() implies a wmb() to pair with the queueing | |
16660 | * in wake_q_add() so as not to miss wakeups. | |
16661 | */ | |
16662 | - wake_up_process(task); | |
16663 | + if (sleeper) | |
16664 | + wake_up_lock_sleeper(task); | |
16665 | + else | |
16666 | + wake_up_process(task); | |
16667 | put_task_struct(task); | |
16668 | } | |
16669 | } | |
16670 | @@ -502,6 +510,38 @@ void resched_curr(struct rq *rq) | |
16671 | trace_sched_wake_idle_without_ipi(cpu); | |
16672 | } | |
16673 | ||
16674 | +#ifdef CONFIG_PREEMPT_LAZY | |
16675 | +void resched_curr_lazy(struct rq *rq) | |
16676 | +{ | |
16677 | + struct task_struct *curr = rq->curr; | |
16678 | + int cpu; | |
16679 | + | |
16680 | + if (!sched_feat(PREEMPT_LAZY)) { | |
16681 | + resched_curr(rq); | |
16682 | + return; | |
16683 | + } | |
16684 | + | |
16685 | + lockdep_assert_held(&rq->lock); | |
16686 | + | |
16687 | + if (test_tsk_need_resched(curr)) | |
16688 | + return; | |
16689 | + | |
16690 | + if (test_tsk_need_resched_lazy(curr)) | |
16691 | + return; | |
16692 | + | |
16693 | + set_tsk_need_resched_lazy(curr); | |
16694 | + | |
16695 | + cpu = cpu_of(rq); | |
16696 | + if (cpu == smp_processor_id()) | |
16697 | + return; | |
16698 | + | |
16699 | + /* NEED_RESCHED_LAZY must be visible before we test polling */ | |
16700 | + smp_mb(); | |
16701 | + if (!tsk_is_polling(curr)) | |
16702 | + smp_send_reschedule(cpu); | |
16703 | +} | |
16704 | +#endif | |
16705 | + | |
16706 | void resched_cpu(int cpu) | |
16707 | { | |
16708 | struct rq *rq = cpu_rq(cpu); | |
16709 | @@ -525,11 +565,14 @@ void resched_cpu(int cpu) | |
16710 | */ | |
16711 | int get_nohz_timer_target(void) | |
16712 | { | |
16713 | - int i, cpu = smp_processor_id(); | |
16714 | + int i, cpu; | |
16715 | struct sched_domain *sd; | |
16716 | ||
16717 | + preempt_disable_rt(); | |
16718 | + cpu = smp_processor_id(); | |
16719 | + | |
16720 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) | |
16721 | - return cpu; | |
16722 | + goto preempt_en_rt; | |
16723 | ||
16724 | rcu_read_lock(); | |
16725 | for_each_domain(cpu, sd) { | |
16726 | @@ -548,6 +591,8 @@ int get_nohz_timer_target(void) | |
16727 | cpu = housekeeping_any_cpu(); | |
16728 | unlock: | |
16729 | rcu_read_unlock(); | |
16730 | +preempt_en_rt: | |
16731 | + preempt_enable_rt(); | |
16732 | return cpu; | |
16733 | } | |
16734 | /* | |
16735 | @@ -1089,6 +1134,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |
16736 | ||
16737 | lockdep_assert_held(&p->pi_lock); | |
16738 | ||
16739 | + if (__migrate_disabled(p)) { | |
16740 | + cpumask_copy(&p->cpus_allowed, new_mask); | |
16741 | + return; | |
16742 | + } | |
16743 | + | |
16744 | queued = task_on_rq_queued(p); | |
16745 | running = task_current(rq, p); | |
16746 | ||
16747 | @@ -1111,6 +1161,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |
16748 | enqueue_task(rq, p, ENQUEUE_RESTORE); | |
16749 | } | |
16750 | ||
16751 | +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); | |
16752 | +static DEFINE_MUTEX(sched_down_mutex); | |
16753 | +static cpumask_t sched_down_cpumask; | |
16754 | + | |
16755 | +void tell_sched_cpu_down_begin(int cpu) | |
16756 | +{ | |
16757 | + mutex_lock(&sched_down_mutex); | |
16758 | + cpumask_set_cpu(cpu, &sched_down_cpumask); | |
16759 | + mutex_unlock(&sched_down_mutex); | |
16760 | +} | |
16761 | + | |
16762 | +void tell_sched_cpu_down_done(int cpu) | |
16763 | +{ | |
16764 | + mutex_lock(&sched_down_mutex); | |
16765 | + cpumask_clear_cpu(cpu, &sched_down_cpumask); | |
16766 | + mutex_unlock(&sched_down_mutex); | |
16767 | +} | |
16768 | + | |
16769 | +/** | |
16770 | + * migrate_me - try to move the current task off this cpu | |
16771 | + * | |
16772 | + * Used by the pin_current_cpu() code to try to get tasks | |
16773 | + * to move off the current CPU as it is going down. | |
16774 | + * It will only move the task if the task isn't pinned to | |
16775 | + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY) | |
16776 | + * and the task has to be in a RUNNING state. Otherwise the | |
16777 | + * movement of the task will wake it up (change its state | |
16778 | + * to running) when the task did not expect it. | |
16779 | + * | |
16780 | + * Returns 1 if it succeeded in moving the current task | |
16781 | + * 0 otherwise. | |
16782 | + */ | |
16783 | +int migrate_me(void) | |
16784 | +{ | |
16785 | + struct task_struct *p = current; | |
16786 | + struct migration_arg arg; | |
16787 | + struct cpumask *cpumask; | |
16788 | + struct cpumask *mask; | |
16789 | + unsigned int dest_cpu; | |
16790 | + struct rq_flags rf; | |
16791 | + struct rq *rq; | |
16792 | + | |
16793 | + /* | |
16794 | + * We can not migrate tasks bounded to a CPU or tasks not | |
16795 | + * running. The movement of the task will wake it up. | |
16796 | + */ | |
16797 | + if (p->flags & PF_NO_SETAFFINITY || p->state) | |
16798 | + return 0; | |
16799 | + | |
16800 | + mutex_lock(&sched_down_mutex); | |
16801 | + rq = task_rq_lock(p, &rf); | |
16802 | + | |
16803 | + cpumask = this_cpu_ptr(&sched_cpumasks); | |
16804 | + mask = &p->cpus_allowed; | |
16805 | + | |
16806 | + cpumask_andnot(cpumask, mask, &sched_down_cpumask); | |
16807 | + | |
16808 | + if (!cpumask_weight(cpumask)) { | |
16809 | + /* It's only on this CPU? */ | |
16810 | + task_rq_unlock(rq, p, &rf); | |
16811 | + mutex_unlock(&sched_down_mutex); | |
16812 | + return 0; | |
16813 | + } | |
16814 | + | |
16815 | + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask); | |
16816 | + | |
16817 | + arg.task = p; | |
16818 | + arg.dest_cpu = dest_cpu; | |
16819 | + | |
16820 | + task_rq_unlock(rq, p, &rf); | |
16821 | + | |
16822 | + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | |
16823 | + tlb_migrate_finish(p->mm); | |
16824 | + mutex_unlock(&sched_down_mutex); | |
16825 | + | |
16826 | + return 1; | |
16827 | +} | |
16828 | + | |
16829 | /* | |
16830 | * Change a given task's CPU affinity. Migrate the thread to a | |
16831 | * proper CPU and schedule it away if the CPU it's executing on | |
16832 | @@ -1168,7 +1296,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, | |
16833 | } | |
16834 | ||
16835 | /* Can the task run on the task's current CPU? If so, we're done */ | |
16836 | - if (cpumask_test_cpu(task_cpu(p), new_mask)) | |
16837 | + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) | |
16838 | goto out; | |
16839 | ||
16840 | dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask); | |
16841 | @@ -1355,6 +1483,18 @@ int migrate_swap(struct task_struct *cur, struct task_struct *p) | |
16842 | return ret; | |
16843 | } | |
16844 | ||
16845 | +static bool check_task_state(struct task_struct *p, long match_state) | |
16846 | +{ | |
16847 | + bool match = false; | |
16848 | + | |
16849 | + raw_spin_lock_irq(&p->pi_lock); | |
16850 | + if (p->state == match_state || p->saved_state == match_state) | |
16851 | + match = true; | |
16852 | + raw_spin_unlock_irq(&p->pi_lock); | |
16853 | + | |
16854 | + return match; | |
16855 | +} | |
16856 | + | |
16857 | /* | |
16858 | * wait_task_inactive - wait for a thread to unschedule. | |
16859 | * | |
16860 | @@ -1399,7 +1539,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |
16861 | * is actually now running somewhere else! | |
16862 | */ | |
16863 | while (task_running(rq, p)) { | |
16864 | - if (match_state && unlikely(p->state != match_state)) | |
16865 | + if (match_state && !check_task_state(p, match_state)) | |
16866 | return 0; | |
16867 | cpu_relax(); | |
16868 | } | |
16869 | @@ -1414,7 +1554,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |
16870 | running = task_running(rq, p); | |
16871 | queued = task_on_rq_queued(p); | |
16872 | ncsw = 0; | |
16873 | - if (!match_state || p->state == match_state) | |
16874 | + if (!match_state || p->state == match_state || | |
16875 | + p->saved_state == match_state) | |
16876 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | |
16877 | task_rq_unlock(rq, p, &rf); | |
16878 | ||
16879 | @@ -1670,10 +1811,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl | |
16880 | { | |
16881 | activate_task(rq, p, en_flags); | |
16882 | p->on_rq = TASK_ON_RQ_QUEUED; | |
16883 | - | |
16884 | - /* if a worker is waking up, notify workqueue */ | |
16885 | - if (p->flags & PF_WQ_WORKER) | |
16886 | - wq_worker_waking_up(p, cpu_of(rq)); | |
16887 | } | |
16888 | ||
16889 | /* | |
16890 | @@ -2008,8 +2145,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |
16891 | */ | |
16892 | smp_mb__before_spinlock(); | |
16893 | raw_spin_lock_irqsave(&p->pi_lock, flags); | |
16894 | - if (!(p->state & state)) | |
16895 | + if (!(p->state & state)) { | |
16896 | + /* | |
16897 | + * The task might be running due to a spinlock sleeper | |
16898 | + * wakeup. Check the saved state and set it to running | |
16899 | + * if the wakeup condition is true. | |
16900 | + */ | |
16901 | + if (!(wake_flags & WF_LOCK_SLEEPER)) { | |
16902 | + if (p->saved_state & state) { | |
16903 | + p->saved_state = TASK_RUNNING; | |
16904 | + success = 1; | |
16905 | + } | |
16906 | + } | |
16907 | goto out; | |
16908 | + } | |
16909 | + | |
16910 | + /* | |
16911 | + * If this is a regular wakeup, then we can unconditionally | |
16912 | + * clear the saved state of a "lock sleeper". | |
16913 | + */ | |
16914 | + if (!(wake_flags & WF_LOCK_SLEEPER)) | |
16915 | + p->saved_state = TASK_RUNNING; | |
16916 | ||
16917 | trace_sched_waking(p); | |
16918 | ||
16919 | @@ -2093,53 +2249,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |
16920 | } | |
16921 | ||
16922 | /** | |
16923 | - * try_to_wake_up_local - try to wake up a local task with rq lock held | |
16924 | - * @p: the thread to be awakened | |
16925 | - * | |
16926 | - * Put @p on the run-queue if it's not already there. The caller must | |
16927 | - * ensure that this_rq() is locked, @p is bound to this_rq() and not | |
16928 | - * the current task. | |
16929 | - */ | |
16930 | -static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie) | |
16931 | -{ | |
16932 | - struct rq *rq = task_rq(p); | |
16933 | - | |
16934 | - if (WARN_ON_ONCE(rq != this_rq()) || | |
16935 | - WARN_ON_ONCE(p == current)) | |
16936 | - return; | |
16937 | - | |
16938 | - lockdep_assert_held(&rq->lock); | |
16939 | - | |
16940 | - if (!raw_spin_trylock(&p->pi_lock)) { | |
16941 | - /* | |
16942 | - * This is OK, because current is on_cpu, which avoids it being | |
16943 | - * picked for load-balance and preemption/IRQs are still | |
16944 | - * disabled avoiding further scheduler activity on it and we've | |
16945 | - * not yet picked a replacement task. | |
16946 | - */ | |
16947 | - lockdep_unpin_lock(&rq->lock, cookie); | |
16948 | - raw_spin_unlock(&rq->lock); | |
16949 | - raw_spin_lock(&p->pi_lock); | |
16950 | - raw_spin_lock(&rq->lock); | |
16951 | - lockdep_repin_lock(&rq->lock, cookie); | |
16952 | - } | |
16953 | - | |
16954 | - if (!(p->state & TASK_NORMAL)) | |
16955 | - goto out; | |
16956 | - | |
16957 | - trace_sched_waking(p); | |
16958 | - | |
16959 | - if (!task_on_rq_queued(p)) | |
16960 | - ttwu_activate(rq, p, ENQUEUE_WAKEUP); | |
16961 | - | |
16962 | - ttwu_do_wakeup(rq, p, 0, cookie); | |
16963 | - if (schedstat_enabled()) | |
16964 | - ttwu_stat(p, smp_processor_id(), 0); | |
16965 | -out: | |
16966 | - raw_spin_unlock(&p->pi_lock); | |
16967 | -} | |
16968 | - | |
16969 | -/** | |
16970 | * wake_up_process - Wake up a specific process | |
16971 | * @p: The process to be woken up. | |
16972 | * | |
16973 | @@ -2157,6 +2266,18 @@ int wake_up_process(struct task_struct *p) | |
16974 | } | |
16975 | EXPORT_SYMBOL(wake_up_process); | |
16976 | ||
16977 | +/** | |
16978 | + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" | |
16979 | + * @p: The process to be woken up. | |
16980 | + * | |
16981 | + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate | |
16982 | + * the nature of the wakeup. | |
16983 | + */ | |
16984 | +int wake_up_lock_sleeper(struct task_struct *p) | |
16985 | +{ | |
16986 | + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); | |
16987 | +} | |
16988 | + | |
16989 | int wake_up_state(struct task_struct *p, unsigned int state) | |
16990 | { | |
16991 | return try_to_wake_up(p, state, 0); | |
16992 | @@ -2433,6 +2554,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) | |
16993 | p->on_cpu = 0; | |
16994 | #endif | |
16995 | init_task_preempt_count(p); | |
16996 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
16997 | + task_thread_info(p)->preempt_lazy_count = 0; | |
16998 | +#endif | |
16999 | #ifdef CONFIG_SMP | |
17000 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | |
17001 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
17002 | @@ -2761,8 +2885,12 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |
17003 | finish_arch_post_lock_switch(); | |
17004 | ||
17005 | fire_sched_in_preempt_notifiers(current); | |
17006 | + /* | |
17007 | + * We use mmdrop_delayed() here so we don't have to do the | |
17008 | + * full __mmdrop() when we are the last user. | |
17009 | + */ | |
17010 | if (mm) | |
17011 | - mmdrop(mm); | |
17012 | + mmdrop_delayed(mm); | |
17013 | if (unlikely(prev_state == TASK_DEAD)) { | |
17014 | if (prev->sched_class->task_dead) | |
17015 | prev->sched_class->task_dead(prev); | |
17016 | @@ -3237,6 +3365,77 @@ static inline void schedule_debug(struct task_struct *prev) | |
17017 | schedstat_inc(this_rq(), sched_count); | |
17018 | } | |
17019 | ||
17020 | +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) | |
17021 | + | |
17022 | +void migrate_disable(void) | |
17023 | +{ | |
17024 | + struct task_struct *p = current; | |
17025 | + | |
17026 | + if (in_atomic() || irqs_disabled()) { | |
17027 | +#ifdef CONFIG_SCHED_DEBUG | |
17028 | + p->migrate_disable_atomic++; | |
17029 | +#endif | |
17030 | + return; | |
17031 | + } | |
17032 | + | |
17033 | +#ifdef CONFIG_SCHED_DEBUG | |
17034 | + if (unlikely(p->migrate_disable_atomic)) { | |
17035 | + tracing_off(); | |
17036 | + WARN_ON_ONCE(1); | |
17037 | + } | |
17038 | +#endif | |
17039 | + | |
17040 | + if (p->migrate_disable) { | |
17041 | + p->migrate_disable++; | |
17042 | + return; | |
17043 | + } | |
17044 | + | |
17045 | + preempt_disable(); | |
17046 | + preempt_lazy_disable(); | |
17047 | + pin_current_cpu(); | |
17048 | + p->migrate_disable = 1; | |
17049 | + preempt_enable(); | |
17050 | +} | |
17051 | +EXPORT_SYMBOL(migrate_disable); | |
17052 | + | |
17053 | +void migrate_enable(void) | |
17054 | +{ | |
17055 | + struct task_struct *p = current; | |
17056 | + | |
17057 | + if (in_atomic() || irqs_disabled()) { | |
17058 | +#ifdef CONFIG_SCHED_DEBUG | |
17059 | + p->migrate_disable_atomic--; | |
17060 | +#endif | |
17061 | + return; | |
17062 | + } | |
17063 | + | |
17064 | +#ifdef CONFIG_SCHED_DEBUG | |
17065 | + if (unlikely(p->migrate_disable_atomic)) { | |
17066 | + tracing_off(); | |
17067 | + WARN_ON_ONCE(1); | |
17068 | + } | |
17069 | +#endif | |
17070 | + WARN_ON_ONCE(p->migrate_disable <= 0); | |
17071 | + | |
17072 | + if (p->migrate_disable > 1) { | |
17073 | + p->migrate_disable--; | |
17074 | + return; | |
17075 | + } | |
17076 | + | |
17077 | + preempt_disable(); | |
17078 | + /* | |
17079 | + * Clearing migrate_disable causes tsk_cpus_allowed to | |
17080 | + * show the tasks original cpu affinity. | |
17081 | + */ | |
17082 | + p->migrate_disable = 0; | |
17083 | + | |
17084 | + unpin_current_cpu(); | |
17085 | + preempt_enable(); | |
17086 | + preempt_lazy_enable(); | |
17087 | +} | |
17088 | +EXPORT_SYMBOL(migrate_enable); | |
17089 | +#endif | |
17090 | + | |
17091 | /* | |
17092 | * Pick up the highest-prio task: | |
17093 | */ | |
17094 | @@ -3364,19 +3563,6 @@ static void __sched notrace __schedule(bool preempt) | |
17095 | } else { | |
17096 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | |
17097 | prev->on_rq = 0; | |
17098 | - | |
17099 | - /* | |
17100 | - * If a worker went to sleep, notify and ask workqueue | |
17101 | - * whether it wants to wake up a task to maintain | |
17102 | - * concurrency. | |
17103 | - */ | |
17104 | - if (prev->flags & PF_WQ_WORKER) { | |
17105 | - struct task_struct *to_wakeup; | |
17106 | - | |
17107 | - to_wakeup = wq_worker_sleeping(prev); | |
17108 | - if (to_wakeup) | |
17109 | - try_to_wake_up_local(to_wakeup, cookie); | |
17110 | - } | |
17111 | } | |
17112 | switch_count = &prev->nvcsw; | |
17113 | } | |
17114 | @@ -3386,6 +3572,7 @@ static void __sched notrace __schedule(bool preempt) | |
17115 | ||
17116 | next = pick_next_task(rq, prev, cookie); | |
17117 | clear_tsk_need_resched(prev); | |
17118 | + clear_tsk_need_resched_lazy(prev); | |
17119 | clear_preempt_need_resched(); | |
17120 | rq->clock_skip_update = 0; | |
17121 | ||
17122 | @@ -3407,9 +3594,20 @@ STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */ | |
17123 | ||
17124 | static inline void sched_submit_work(struct task_struct *tsk) | |
17125 | { | |
17126 | - if (!tsk->state || tsk_is_pi_blocked(tsk)) | |
17127 | + if (!tsk->state) | |
17128 | return; | |
17129 | /* | |
17130 | + * If a worker went to sleep, notify and ask workqueue whether | |
17131 | + * it wants to wake up a task to maintain concurrency. | |
17132 | + */ | |
17133 | + if (tsk->flags & PF_WQ_WORKER) | |
17134 | + wq_worker_sleeping(tsk); | |
17135 | + | |
17136 | + | |
17137 | + if (tsk_is_pi_blocked(tsk)) | |
17138 | + return; | |
17139 | + | |
17140 | + /* | |
17141 | * If we are going to sleep and we have plugged IO queued, | |
17142 | * make sure to submit it to avoid deadlocks. | |
17143 | */ | |
17144 | @@ -3417,6 +3615,12 @@ static inline void sched_submit_work(struct task_struct *tsk) | |
17145 | blk_schedule_flush_plug(tsk); | |
17146 | } | |
17147 | ||
17148 | +static void sched_update_worker(struct task_struct *tsk) | |
17149 | +{ | |
17150 | + if (tsk->flags & PF_WQ_WORKER) | |
17151 | + wq_worker_running(tsk); | |
17152 | +} | |
17153 | + | |
17154 | asmlinkage __visible void __sched schedule(void) | |
17155 | { | |
17156 | struct task_struct *tsk = current; | |
17157 | @@ -3427,6 +3631,7 @@ asmlinkage __visible void __sched schedule(void) | |
17158 | __schedule(false); | |
17159 | sched_preempt_enable_no_resched(); | |
17160 | } while (need_resched()); | |
17161 | + sched_update_worker(tsk); | |
17162 | } | |
17163 | EXPORT_SYMBOL(schedule); | |
17164 | ||
17165 | @@ -3490,6 +3695,30 @@ static void __sched notrace preempt_schedule_common(void) | |
17166 | } while (need_resched()); | |
17167 | } | |
17168 | ||
17169 | +#ifdef CONFIG_PREEMPT_LAZY | |
17170 | +/* | |
17171 | + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is | |
17172 | + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as | |
17173 | + * preempt_lazy_count counter >0. | |
17174 | + */ | |
17175 | +static __always_inline int preemptible_lazy(void) | |
17176 | +{ | |
17177 | + if (test_thread_flag(TIF_NEED_RESCHED)) | |
17178 | + return 1; | |
17179 | + if (current_thread_info()->preempt_lazy_count) | |
17180 | + return 0; | |
17181 | + return 1; | |
17182 | +} | |
17183 | + | |
17184 | +#else | |
17185 | + | |
17186 | +static inline int preemptible_lazy(void) | |
17187 | +{ | |
17188 | + return 1; | |
17189 | +} | |
17190 | + | |
17191 | +#endif | |
17192 | + | |
17193 | #ifdef CONFIG_PREEMPT | |
17194 | /* | |
17195 | * this is the entry point to schedule() from in-kernel preemption | |
17196 | @@ -3504,7 +3733,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |
17197 | */ | |
17198 | if (likely(!preemptible())) | |
17199 | return; | |
17200 | - | |
17201 | + if (!preemptible_lazy()) | |
17202 | + return; | |
17203 | preempt_schedule_common(); | |
17204 | } | |
17205 | NOKPROBE_SYMBOL(preempt_schedule); | |
17206 | @@ -3531,6 +3761,9 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) | |
17207 | if (likely(!preemptible())) | |
17208 | return; | |
17209 | ||
17210 | + if (!preemptible_lazy()) | |
17211 | + return; | |
17212 | + | |
17213 | do { | |
17214 | /* | |
17215 | * Because the function tracer can trace preempt_count_sub() | |
17216 | @@ -3553,7 +3786,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) | |
17217 | * an infinite recursion. | |
17218 | */ | |
17219 | prev_ctx = exception_enter(); | |
17220 | + /* | |
17221 | + * The add/subtract must not be traced by the function | |
17222 | + * tracer. But we still want to account for the | |
17223 | + * preempt off latency tracer. Since the _notrace versions | |
17224 | + * of add/subtract skip the accounting for latency tracer | |
17225 | + * we must force it manually. | |
17226 | + */ | |
17227 | + start_critical_timings(); | |
17228 | __schedule(true); | |
17229 | + stop_critical_timings(); | |
17230 | exception_exit(prev_ctx); | |
17231 | ||
17232 | preempt_latency_stop(1); | |
17233 | @@ -4901,6 +5143,7 @@ int __cond_resched_lock(spinlock_t *lock) | |
17234 | } | |
17235 | EXPORT_SYMBOL(__cond_resched_lock); | |
17236 | ||
17237 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
17238 | int __sched __cond_resched_softirq(void) | |
17239 | { | |
17240 | BUG_ON(!in_softirq()); | |
17241 | @@ -4914,6 +5157,7 @@ int __sched __cond_resched_softirq(void) | |
17242 | return 0; | |
17243 | } | |
17244 | EXPORT_SYMBOL(__cond_resched_softirq); | |
17245 | +#endif | |
17246 | ||
17247 | /** | |
17248 | * yield - yield the current processor to other threads. | |
17249 | @@ -5283,7 +5527,9 @@ void init_idle(struct task_struct *idle, int cpu) | |
17250 | ||
17251 | /* Set the preempt count _outside_ the spinlocks! */ | |
17252 | init_idle_preempt_count(idle, cpu); | |
17253 | - | |
17254 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
17255 | + task_thread_info(idle)->preempt_lazy_count = 0; | |
17256 | +#endif | |
17257 | /* | |
17258 | * The idle tasks have their own, simple scheduling class: | |
17259 | */ | |
17260 | @@ -5426,6 +5672,8 @@ void sched_setnuma(struct task_struct *p, int nid) | |
17261 | #endif /* CONFIG_NUMA_BALANCING */ | |
17262 | ||
17263 | #ifdef CONFIG_HOTPLUG_CPU | |
17264 | +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); | |
17265 | + | |
17266 | /* | |
17267 | * Ensures that the idle task is using init_mm right before its cpu goes | |
17268 | * offline. | |
17269 | @@ -5440,7 +5688,12 @@ void idle_task_exit(void) | |
17270 | switch_mm_irqs_off(mm, &init_mm, current); | |
17271 | finish_arch_post_lock_switch(); | |
17272 | } | |
17273 | - mmdrop(mm); | |
17274 | + /* | |
17275 | + * Defer the cleanup to an alive cpu. On RT we can neither | |
17276 | + * call mmdrop() nor mmdrop_delayed() from here. | |
17277 | + */ | |
17278 | + per_cpu(idle_last_mm, smp_processor_id()) = mm; | |
17279 | + | |
17280 | } | |
17281 | ||
17282 | /* | |
17283 | @@ -7315,6 +7568,10 @@ int sched_cpu_dying(unsigned int cpu) | |
17284 | update_max_interval(); | |
17285 | nohz_balance_exit_idle(cpu); | |
17286 | hrtick_clear(rq); | |
17287 | + if (per_cpu(idle_last_mm, cpu)) { | |
17288 | + mmdrop_delayed(per_cpu(idle_last_mm, cpu)); | |
17289 | + per_cpu(idle_last_mm, cpu) = NULL; | |
17290 | + } | |
17291 | return 0; | |
17292 | } | |
17293 | #endif | |
17294 | @@ -7566,7 +7823,7 @@ void __init sched_init(void) | |
17295 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
17296 | static inline int preempt_count_equals(int preempt_offset) | |
17297 | { | |
17298 | - int nested = preempt_count() + rcu_preempt_depth(); | |
17299 | + int nested = preempt_count() + sched_rcu_preempt_depth(); | |
17300 | ||
17301 | return (nested == preempt_offset); | |
17302 | } | |
17303 | diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c | |
17304 | index 1ce8867283dc..766da04b06a0 100644 | |
17305 | --- a/kernel/sched/deadline.c | |
17306 | +++ b/kernel/sched/deadline.c | |
17307 | @@ -697,6 +697,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |
17308 | ||
17309 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
17310 | timer->function = dl_task_timer; | |
17311 | + timer->irqsafe = 1; | |
17312 | } | |
17313 | ||
17314 | static | |
17315 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c | |
17316 | index 2a0a9995256d..48a9b6f57249 100644 | |
17317 | --- a/kernel/sched/debug.c | |
17318 | +++ b/kernel/sched/debug.c | |
17319 | @@ -552,6 +552,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | |
17320 | P(rt_throttled); | |
17321 | PN(rt_time); | |
17322 | PN(rt_runtime); | |
17323 | +#ifdef CONFIG_SMP | |
17324 | + P(rt_nr_migratory); | |
17325 | +#endif | |
17326 | ||
17327 | #undef PN | |
17328 | #undef P | |
17329 | @@ -947,6 +950,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |
17330 | #endif | |
17331 | P(policy); | |
17332 | P(prio); | |
17333 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17334 | + P(migrate_disable); | |
17335 | +#endif | |
17336 | + P(nr_cpus_allowed); | |
17337 | #undef PN | |
17338 | #undef __PN | |
17339 | #undef P | |
17340 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | |
17341 | index 8b3610c871f2..1145079af264 100644 | |
17342 | --- a/kernel/sched/fair.c | |
17343 | +++ b/kernel/sched/fair.c | |
17344 | @@ -3508,7 +3508,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |
17345 | ideal_runtime = sched_slice(cfs_rq, curr); | |
17346 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | |
17347 | if (delta_exec > ideal_runtime) { | |
17348 | - resched_curr(rq_of(cfs_rq)); | |
17349 | + resched_curr_lazy(rq_of(cfs_rq)); | |
17350 | /* | |
17351 | * The current task ran long enough, ensure it doesn't get | |
17352 | * re-elected due to buddy favours. | |
17353 | @@ -3532,7 +3532,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |
17354 | return; | |
17355 | ||
17356 | if (delta > ideal_runtime) | |
17357 | - resched_curr(rq_of(cfs_rq)); | |
17358 | + resched_curr_lazy(rq_of(cfs_rq)); | |
17359 | } | |
17360 | ||
17361 | static void | |
17362 | @@ -3677,7 +3677,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |
17363 | * validating it and just reschedule. | |
17364 | */ | |
17365 | if (queued) { | |
17366 | - resched_curr(rq_of(cfs_rq)); | |
17367 | + resched_curr_lazy(rq_of(cfs_rq)); | |
17368 | return; | |
17369 | } | |
17370 | /* | |
17371 | @@ -3859,7 +3859,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) | |
17372 | * hierarchy can be throttled | |
17373 | */ | |
17374 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | |
17375 | - resched_curr(rq_of(cfs_rq)); | |
17376 | + resched_curr_lazy(rq_of(cfs_rq)); | |
17377 | } | |
17378 | ||
17379 | static __always_inline | |
17380 | @@ -4487,7 +4487,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |
17381 | ||
17382 | if (delta < 0) { | |
17383 | if (rq->curr == p) | |
17384 | - resched_curr(rq); | |
17385 | + resched_curr_lazy(rq); | |
17386 | return; | |
17387 | } | |
17388 | hrtick_start(rq, delta); | |
17389 | @@ -5676,7 +5676,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |
17390 | return; | |
17391 | ||
17392 | preempt: | |
17393 | - resched_curr(rq); | |
17394 | + resched_curr_lazy(rq); | |
17395 | /* | |
17396 | * Only set the backward buddy when the current task is still | |
17397 | * on the rq. This can happen when a wakeup gets interleaved | |
17398 | @@ -8402,7 +8402,7 @@ static void task_fork_fair(struct task_struct *p) | |
17399 | * 'current' within the tree based on its new key value. | |
17400 | */ | |
17401 | swap(curr->vruntime, se->vruntime); | |
17402 | - resched_curr(rq); | |
17403 | + resched_curr_lazy(rq); | |
17404 | } | |
17405 | ||
17406 | se->vruntime -= cfs_rq->min_vruntime; | |
17407 | @@ -8426,7 +8426,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) | |
17408 | */ | |
17409 | if (rq->curr == p) { | |
17410 | if (p->prio > oldprio) | |
17411 | - resched_curr(rq); | |
17412 | + resched_curr_lazy(rq); | |
17413 | } else | |
17414 | check_preempt_curr(rq, p, 0); | |
17415 | } | |
17416 | diff --git a/kernel/sched/features.h b/kernel/sched/features.h | |
17417 | index 69631fa46c2f..6d28fcd08872 100644 | |
17418 | --- a/kernel/sched/features.h | |
17419 | +++ b/kernel/sched/features.h | |
17420 | @@ -45,11 +45,19 @@ SCHED_FEAT(LB_BIAS, true) | |
17421 | */ | |
17422 | SCHED_FEAT(NONTASK_CAPACITY, true) | |
17423 | ||
17424 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17425 | +SCHED_FEAT(TTWU_QUEUE, false) | |
17426 | +# ifdef CONFIG_PREEMPT_LAZY | |
17427 | +SCHED_FEAT(PREEMPT_LAZY, true) | |
17428 | +# endif | |
17429 | +#else | |
17430 | + | |
17431 | /* | |
17432 | * Queue remote wakeups on the target CPU and process them | |
17433 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | |
17434 | */ | |
17435 | SCHED_FEAT(TTWU_QUEUE, true) | |
17436 | +#endif | |
17437 | ||
17438 | #ifdef HAVE_RT_PUSH_IPI | |
17439 | /* | |
17440 | diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c | |
17441 | index d5690b722691..731cd0e98c15 100644 | |
17442 | --- a/kernel/sched/rt.c | |
17443 | +++ b/kernel/sched/rt.c | |
17444 | @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |
17445 | ||
17446 | hrtimer_init(&rt_b->rt_period_timer, | |
17447 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
17448 | + rt_b->rt_period_timer.irqsafe = 1; | |
17449 | rt_b->rt_period_timer.function = sched_rt_period_timer; | |
17450 | } | |
17451 | ||
17452 | @@ -101,6 +102,7 @@ void init_rt_rq(struct rt_rq *rt_rq) | |
17453 | rt_rq->push_cpu = nr_cpu_ids; | |
17454 | raw_spin_lock_init(&rt_rq->push_lock); | |
17455 | init_irq_work(&rt_rq->push_work, push_irq_work_func); | |
17456 | + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ; | |
17457 | #endif | |
17458 | #endif /* CONFIG_SMP */ | |
17459 | /* We start is dequeued state, because no RT tasks are queued */ | |
17460 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h | |
17461 | index c64fc5114004..af58f9b3ece4 100644 | |
17462 | --- a/kernel/sched/sched.h | |
17463 | +++ b/kernel/sched/sched.h | |
17464 | @@ -1138,6 +1138,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |
17465 | #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ | |
17466 | #define WF_FORK 0x02 /* child wakeup after fork */ | |
17467 | #define WF_MIGRATED 0x4 /* internal use, task got migrated */ | |
17468 | +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ | |
17469 | ||
17470 | /* | |
17471 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | |
17472 | @@ -1316,6 +1317,15 @@ extern void init_sched_fair_class(void); | |
17473 | extern void resched_curr(struct rq *rq); | |
17474 | extern void resched_cpu(int cpu); | |
17475 | ||
17476 | +#ifdef CONFIG_PREEMPT_LAZY | |
17477 | +extern void resched_curr_lazy(struct rq *rq); | |
17478 | +#else | |
17479 | +static inline void resched_curr_lazy(struct rq *rq) | |
17480 | +{ | |
17481 | + resched_curr(rq); | |
17482 | +} | |
17483 | +#endif | |
17484 | + | |
17485 | extern struct rt_bandwidth def_rt_bandwidth; | |
17486 | extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); | |
17487 | ||
17488 | diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c | |
17489 | index 82f0dff90030..ef027ff3250a 100644 | |
17490 | --- a/kernel/sched/swait.c | |
17491 | +++ b/kernel/sched/swait.c | |
17492 | @@ -1,5 +1,6 @@ | |
17493 | #include <linux/sched.h> | |
17494 | #include <linux/swait.h> | |
17495 | +#include <linux/suspend.h> | |
17496 | ||
17497 | void __init_swait_queue_head(struct swait_queue_head *q, const char *name, | |
17498 | struct lock_class_key *key) | |
17499 | @@ -29,6 +30,25 @@ void swake_up_locked(struct swait_queue_head *q) | |
17500 | } | |
17501 | EXPORT_SYMBOL(swake_up_locked); | |
17502 | ||
17503 | +void swake_up_all_locked(struct swait_queue_head *q) | |
17504 | +{ | |
17505 | + struct swait_queue *curr; | |
17506 | + int wakes = 0; | |
17507 | + | |
17508 | + while (!list_empty(&q->task_list)) { | |
17509 | + | |
17510 | + curr = list_first_entry(&q->task_list, typeof(*curr), | |
17511 | + task_list); | |
17512 | + wake_up_process(curr->task); | |
17513 | + list_del_init(&curr->task_list); | |
17514 | + wakes++; | |
17515 | + } | |
17516 | + if (pm_in_action) | |
17517 | + return; | |
17518 | + WARN(wakes > 2, "complete_all() with %d waiters\n", wakes); | |
17519 | +} | |
17520 | +EXPORT_SYMBOL(swake_up_all_locked); | |
17521 | + | |
17522 | void swake_up(struct swait_queue_head *q) | |
17523 | { | |
17524 | unsigned long flags; | |
17525 | @@ -54,6 +74,7 @@ void swake_up_all(struct swait_queue_head *q) | |
17526 | if (!swait_active(q)) | |
17527 | return; | |
17528 | ||
17529 | + WARN_ON(irqs_disabled()); | |
17530 | raw_spin_lock_irq(&q->lock); | |
17531 | list_splice_init(&q->task_list, &tmp); | |
17532 | while (!list_empty(&tmp)) { | |
17533 | diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c | |
17534 | new file mode 100644 | |
17535 | index 000000000000..1950f40ca725 | |
17536 | --- /dev/null | |
17537 | +++ b/kernel/sched/swork.c | |
17538 | @@ -0,0 +1,173 @@ | |
17539 | +/* | |
17540 | + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de | |
17541 | + * | |
17542 | + * Provides a framework for enqueuing callbacks from irq context | |
17543 | + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. | |
17544 | + */ | |
17545 | + | |
17546 | +#include <linux/swait.h> | |
17547 | +#include <linux/swork.h> | |
17548 | +#include <linux/kthread.h> | |
17549 | +#include <linux/slab.h> | |
17550 | +#include <linux/spinlock.h> | |
17551 | +#include <linux/export.h> | |
17552 | + | |
17553 | +#define SWORK_EVENT_PENDING (1 << 0) | |
17554 | + | |
17555 | +static DEFINE_MUTEX(worker_mutex); | |
17556 | +static struct sworker *glob_worker; | |
17557 | + | |
17558 | +struct sworker { | |
17559 | + struct list_head events; | |
17560 | + struct swait_queue_head wq; | |
17561 | + | |
17562 | + raw_spinlock_t lock; | |
17563 | + | |
17564 | + struct task_struct *task; | |
17565 | + int refs; | |
17566 | +}; | |
17567 | + | |
17568 | +static bool swork_readable(struct sworker *worker) | |
17569 | +{ | |
17570 | + bool r; | |
17571 | + | |
17572 | + if (kthread_should_stop()) | |
17573 | + return true; | |
17574 | + | |
17575 | + raw_spin_lock_irq(&worker->lock); | |
17576 | + r = !list_empty(&worker->events); | |
17577 | + raw_spin_unlock_irq(&worker->lock); | |
17578 | + | |
17579 | + return r; | |
17580 | +} | |
17581 | + | |
17582 | +static int swork_kthread(void *arg) | |
17583 | +{ | |
17584 | + struct sworker *worker = arg; | |
17585 | + | |
17586 | + for (;;) { | |
17587 | + swait_event_interruptible(worker->wq, | |
17588 | + swork_readable(worker)); | |
17589 | + if (kthread_should_stop()) | |
17590 | + break; | |
17591 | + | |
17592 | + raw_spin_lock_irq(&worker->lock); | |
17593 | + while (!list_empty(&worker->events)) { | |
17594 | + struct swork_event *sev; | |
17595 | + | |
17596 | + sev = list_first_entry(&worker->events, | |
17597 | + struct swork_event, item); | |
17598 | + list_del(&sev->item); | |
17599 | + raw_spin_unlock_irq(&worker->lock); | |
17600 | + | |
17601 | + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, | |
17602 | + &sev->flags)); | |
17603 | + sev->func(sev); | |
17604 | + raw_spin_lock_irq(&worker->lock); | |
17605 | + } | |
17606 | + raw_spin_unlock_irq(&worker->lock); | |
17607 | + } | |
17608 | + return 0; | |
17609 | +} | |
17610 | + | |
17611 | +static struct sworker *swork_create(void) | |
17612 | +{ | |
17613 | + struct sworker *worker; | |
17614 | + | |
17615 | + worker = kzalloc(sizeof(*worker), GFP_KERNEL); | |
17616 | + if (!worker) | |
17617 | + return ERR_PTR(-ENOMEM); | |
17618 | + | |
17619 | + INIT_LIST_HEAD(&worker->events); | |
17620 | + raw_spin_lock_init(&worker->lock); | |
17621 | + init_swait_queue_head(&worker->wq); | |
17622 | + | |
17623 | + worker->task = kthread_run(swork_kthread, worker, "kswork"); | |
17624 | + if (IS_ERR(worker->task)) { | |
17625 | + kfree(worker); | |
17626 | + return ERR_PTR(-ENOMEM); | |
17627 | + } | |
17628 | + | |
17629 | + return worker; | |
17630 | +} | |
17631 | + | |
17632 | +static void swork_destroy(struct sworker *worker) | |
17633 | +{ | |
17634 | + kthread_stop(worker->task); | |
17635 | + | |
17636 | + WARN_ON(!list_empty(&worker->events)); | |
17637 | + kfree(worker); | |
17638 | +} | |
17639 | + | |
17640 | +/** | |
17641 | + * swork_queue - queue swork | |
17642 | + * | |
17643 | + * Returns %false if @work was already on a queue, %true otherwise. | |
17644 | + * | |
17645 | + * The work is queued and processed on a random CPU | |
17646 | + */ | |
17647 | +bool swork_queue(struct swork_event *sev) | |
17648 | +{ | |
17649 | + unsigned long flags; | |
17650 | + | |
17651 | + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) | |
17652 | + return false; | |
17653 | + | |
17654 | + raw_spin_lock_irqsave(&glob_worker->lock, flags); | |
17655 | + list_add_tail(&sev->item, &glob_worker->events); | |
17656 | + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); | |
17657 | + | |
17658 | + swake_up(&glob_worker->wq); | |
17659 | + return true; | |
17660 | +} | |
17661 | +EXPORT_SYMBOL_GPL(swork_queue); | |
17662 | + | |
17663 | +/** | |
17664 | + * swork_get - get an instance of the sworker | |
17665 | + * | |
17666 | + * Returns an negative error code if the initialization if the worker did not | |
17667 | + * work, %0 otherwise. | |
17668 | + * | |
17669 | + */ | |
17670 | +int swork_get(void) | |
17671 | +{ | |
17672 | + struct sworker *worker; | |
17673 | + | |
17674 | + mutex_lock(&worker_mutex); | |
17675 | + if (!glob_worker) { | |
17676 | + worker = swork_create(); | |
17677 | + if (IS_ERR(worker)) { | |
17678 | + mutex_unlock(&worker_mutex); | |
17679 | + return -ENOMEM; | |
17680 | + } | |
17681 | + | |
17682 | + glob_worker = worker; | |
17683 | + } | |
17684 | + | |
17685 | + glob_worker->refs++; | |
17686 | + mutex_unlock(&worker_mutex); | |
17687 | + | |
17688 | + return 0; | |
17689 | +} | |
17690 | +EXPORT_SYMBOL_GPL(swork_get); | |
17691 | + | |
17692 | +/** | |
17693 | + * swork_put - puts an instance of the sworker | |
17694 | + * | |
17695 | + * Will destroy the sworker thread. This function must not be called until all | |
17696 | + * queued events have been completed. | |
17697 | + */ | |
17698 | +void swork_put(void) | |
17699 | +{ | |
17700 | + mutex_lock(&worker_mutex); | |
17701 | + | |
17702 | + glob_worker->refs--; | |
17703 | + if (glob_worker->refs > 0) | |
17704 | + goto out; | |
17705 | + | |
17706 | + swork_destroy(glob_worker); | |
17707 | + glob_worker = NULL; | |
17708 | +out: | |
17709 | + mutex_unlock(&worker_mutex); | |
17710 | +} | |
17711 | +EXPORT_SYMBOL_GPL(swork_put); | |
17712 | diff --git a/kernel/signal.c b/kernel/signal.c | |
17713 | index af21afc00d08..7ead97a43298 100644 | |
17714 | --- a/kernel/signal.c | |
17715 | +++ b/kernel/signal.c | |
17716 | @@ -14,6 +14,7 @@ | |
17717 | #include <linux/export.h> | |
17718 | #include <linux/init.h> | |
17719 | #include <linux/sched.h> | |
17720 | +#include <linux/sched/rt.h> | |
17721 | #include <linux/fs.h> | |
17722 | #include <linux/tty.h> | |
17723 | #include <linux/binfmts.h> | |
17724 | @@ -352,13 +353,30 @@ static bool task_participate_group_stop(struct task_struct *task) | |
17725 | return false; | |
17726 | } | |
17727 | ||
17728 | +static inline struct sigqueue *get_task_cache(struct task_struct *t) | |
17729 | +{ | |
17730 | + struct sigqueue *q = t->sigqueue_cache; | |
17731 | + | |
17732 | + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) | |
17733 | + return NULL; | |
17734 | + return q; | |
17735 | +} | |
17736 | + | |
17737 | +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) | |
17738 | +{ | |
17739 | + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) | |
17740 | + return 0; | |
17741 | + return 1; | |
17742 | +} | |
17743 | + | |
17744 | /* | |
17745 | * allocate a new signal queue record | |
17746 | * - this may be called without locks if and only if t == current, otherwise an | |
17747 | * appropriate lock must be held to stop the target task from exiting | |
17748 | */ | |
17749 | static struct sigqueue * | |
17750 | -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) | |
17751 | +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, | |
17752 | + int override_rlimit, int fromslab) | |
17753 | { | |
17754 | struct sigqueue *q = NULL; | |
17755 | struct user_struct *user; | |
17756 | @@ -375,7 +393,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
17757 | if (override_rlimit || | |
17758 | atomic_read(&user->sigpending) <= | |
17759 | task_rlimit(t, RLIMIT_SIGPENDING)) { | |
17760 | - q = kmem_cache_alloc(sigqueue_cachep, flags); | |
17761 | + if (!fromslab) | |
17762 | + q = get_task_cache(t); | |
17763 | + if (!q) | |
17764 | + q = kmem_cache_alloc(sigqueue_cachep, flags); | |
17765 | } else { | |
17766 | print_dropped_signal(sig); | |
17767 | } | |
17768 | @@ -392,6 +413,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
17769 | return q; | |
17770 | } | |
17771 | ||
17772 | +static struct sigqueue * | |
17773 | +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, | |
17774 | + int override_rlimit) | |
17775 | +{ | |
17776 | + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); | |
17777 | +} | |
17778 | + | |
17779 | static void __sigqueue_free(struct sigqueue *q) | |
17780 | { | |
17781 | if (q->flags & SIGQUEUE_PREALLOC) | |
17782 | @@ -401,6 +429,21 @@ static void __sigqueue_free(struct sigqueue *q) | |
17783 | kmem_cache_free(sigqueue_cachep, q); | |
17784 | } | |
17785 | ||
17786 | +static void sigqueue_free_current(struct sigqueue *q) | |
17787 | +{ | |
17788 | + struct user_struct *up; | |
17789 | + | |
17790 | + if (q->flags & SIGQUEUE_PREALLOC) | |
17791 | + return; | |
17792 | + | |
17793 | + up = q->user; | |
17794 | + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { | |
17795 | + atomic_dec(&up->sigpending); | |
17796 | + free_uid(up); | |
17797 | + } else | |
17798 | + __sigqueue_free(q); | |
17799 | +} | |
17800 | + | |
17801 | void flush_sigqueue(struct sigpending *queue) | |
17802 | { | |
17803 | struct sigqueue *q; | |
17804 | @@ -414,6 +457,21 @@ void flush_sigqueue(struct sigpending *queue) | |
17805 | } | |
17806 | ||
17807 | /* | |
17808 | + * Called from __exit_signal. Flush tsk->pending and | |
17809 | + * tsk->sigqueue_cache | |
17810 | + */ | |
17811 | +void flush_task_sigqueue(struct task_struct *tsk) | |
17812 | +{ | |
17813 | + struct sigqueue *q; | |
17814 | + | |
17815 | + flush_sigqueue(&tsk->pending); | |
17816 | + | |
17817 | + q = get_task_cache(tsk); | |
17818 | + if (q) | |
17819 | + kmem_cache_free(sigqueue_cachep, q); | |
17820 | +} | |
17821 | + | |
17822 | +/* | |
17823 | * Flush all pending signals for this kthread. | |
17824 | */ | |
17825 | void flush_signals(struct task_struct *t) | |
17826 | @@ -525,7 +583,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | |
17827 | still_pending: | |
17828 | list_del_init(&first->list); | |
17829 | copy_siginfo(info, &first->info); | |
17830 | - __sigqueue_free(first); | |
17831 | + sigqueue_free_current(first); | |
17832 | } else { | |
17833 | /* | |
17834 | * Ok, it wasn't in the queue. This must be | |
17835 | @@ -560,6 +618,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |
17836 | { | |
17837 | int signr; | |
17838 | ||
17839 | + WARN_ON_ONCE(tsk != current); | |
17840 | + | |
17841 | /* We only dequeue private signals from ourselves, we don't let | |
17842 | * signalfd steal them | |
17843 | */ | |
17844 | @@ -1156,8 +1216,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, | |
17845 | * We don't want to have recursive SIGSEGV's etc, for example, | |
17846 | * that is why we also clear SIGNAL_UNKILLABLE. | |
17847 | */ | |
17848 | -int | |
17849 | -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
17850 | +static int | |
17851 | +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
17852 | { | |
17853 | unsigned long int flags; | |
17854 | int ret, blocked, ignored; | |
17855 | @@ -1182,6 +1242,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
17856 | return ret; | |
17857 | } | |
17858 | ||
17859 | +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
17860 | +{ | |
17861 | +/* | |
17862 | + * On some archs, PREEMPT_RT has to delay sending a signal from a trap | |
17863 | + * since it can not enable preemption, and the signal code's spin_locks | |
17864 | + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will | |
17865 | + * send the signal on exit of the trap. | |
17866 | + */ | |
17867 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
17868 | + if (in_atomic()) { | |
17869 | + if (WARN_ON_ONCE(t != current)) | |
17870 | + return 0; | |
17871 | + if (WARN_ON_ONCE(t->forced_info.si_signo)) | |
17872 | + return 0; | |
17873 | + | |
17874 | + if (is_si_special(info)) { | |
17875 | + WARN_ON_ONCE(info != SEND_SIG_PRIV); | |
17876 | + t->forced_info.si_signo = sig; | |
17877 | + t->forced_info.si_errno = 0; | |
17878 | + t->forced_info.si_code = SI_KERNEL; | |
17879 | + t->forced_info.si_pid = 0; | |
17880 | + t->forced_info.si_uid = 0; | |
17881 | + } else { | |
17882 | + t->forced_info = *info; | |
17883 | + } | |
17884 | + | |
17885 | + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); | |
17886 | + return 0; | |
17887 | + } | |
17888 | +#endif | |
17889 | + return do_force_sig_info(sig, info, t); | |
17890 | +} | |
17891 | + | |
17892 | /* | |
17893 | * Nuke all other threads in the group. | |
17894 | */ | |
17895 | @@ -1216,12 +1309,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
17896 | * Disable interrupts early to avoid deadlocks. | |
17897 | * See rcu_read_unlock() comment header for details. | |
17898 | */ | |
17899 | - local_irq_save(*flags); | |
17900 | + local_irq_save_nort(*flags); | |
17901 | rcu_read_lock(); | |
17902 | sighand = rcu_dereference(tsk->sighand); | |
17903 | if (unlikely(sighand == NULL)) { | |
17904 | rcu_read_unlock(); | |
17905 | - local_irq_restore(*flags); | |
17906 | + local_irq_restore_nort(*flags); | |
17907 | break; | |
17908 | } | |
17909 | /* | |
17910 | @@ -1242,7 +1335,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
17911 | } | |
17912 | spin_unlock(&sighand->siglock); | |
17913 | rcu_read_unlock(); | |
17914 | - local_irq_restore(*flags); | |
17915 | + local_irq_restore_nort(*flags); | |
17916 | } | |
17917 | ||
17918 | return sighand; | |
17919 | @@ -1485,7 +1578,8 @@ EXPORT_SYMBOL(kill_pid); | |
17920 | */ | |
17921 | struct sigqueue *sigqueue_alloc(void) | |
17922 | { | |
17923 | - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); | |
17924 | + /* Preallocated sigqueue objects always from the slabcache ! */ | |
17925 | + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); | |
17926 | ||
17927 | if (q) | |
17928 | q->flags |= SIGQUEUE_PREALLOC; | |
17929 | @@ -1846,15 +1940,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |
17930 | if (gstop_done && ptrace_reparented(current)) | |
17931 | do_notify_parent_cldstop(current, false, why); | |
17932 | ||
17933 | - /* | |
17934 | - * Don't want to allow preemption here, because | |
17935 | - * sys_ptrace() needs this task to be inactive. | |
17936 | - * | |
17937 | - * XXX: implement read_unlock_no_resched(). | |
17938 | - */ | |
17939 | - preempt_disable(); | |
17940 | read_unlock(&tasklist_lock); | |
17941 | - preempt_enable_no_resched(); | |
17942 | freezable_schedule(); | |
17943 | } else { | |
17944 | /* | |
17945 | diff --git a/kernel/softirq.c b/kernel/softirq.c | |
17946 | index 17caf4b63342..a602b7152de7 100644 | |
17947 | --- a/kernel/softirq.c | |
17948 | +++ b/kernel/softirq.c | |
17949 | @@ -21,10 +21,12 @@ | |
17950 | #include <linux/freezer.h> | |
17951 | #include <linux/kthread.h> | |
17952 | #include <linux/rcupdate.h> | |
17953 | +#include <linux/delay.h> | |
17954 | #include <linux/ftrace.h> | |
17955 | #include <linux/smp.h> | |
17956 | #include <linux/smpboot.h> | |
17957 | #include <linux/tick.h> | |
17958 | +#include <linux/locallock.h> | |
17959 | #include <linux/irq.h> | |
17960 | ||
17961 | #define CREATE_TRACE_POINTS | |
17962 | @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat); | |
17963 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | |
17964 | ||
17965 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |
17966 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17967 | +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) | |
17968 | +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); | |
17969 | +#endif | |
17970 | ||
17971 | const char * const softirq_to_name[NR_SOFTIRQS] = { | |
17972 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", | |
17973 | "TASKLET", "SCHED", "HRTIMER", "RCU" | |
17974 | }; | |
17975 | ||
17976 | +#ifdef CONFIG_NO_HZ_COMMON | |
17977 | +# ifdef CONFIG_PREEMPT_RT_FULL | |
17978 | + | |
17979 | +struct softirq_runner { | |
17980 | + struct task_struct *runner[NR_SOFTIRQS]; | |
17981 | +}; | |
17982 | + | |
17983 | +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners); | |
17984 | + | |
17985 | +static inline void softirq_set_runner(unsigned int sirq) | |
17986 | +{ | |
17987 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
17988 | + | |
17989 | + sr->runner[sirq] = current; | |
17990 | +} | |
17991 | + | |
17992 | +static inline void softirq_clr_runner(unsigned int sirq) | |
17993 | +{ | |
17994 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
17995 | + | |
17996 | + sr->runner[sirq] = NULL; | |
17997 | +} | |
17998 | + | |
17999 | +/* | |
18000 | + * On preempt-rt a softirq running context might be blocked on a | |
18001 | + * lock. There might be no other runnable task on this CPU because the | |
18002 | + * lock owner runs on some other CPU. So we have to go into idle with | |
18003 | + * the pending bit set. Therefor we need to check this otherwise we | |
18004 | + * warn about false positives which confuses users and defeats the | |
18005 | + * whole purpose of this test. | |
18006 | + * | |
18007 | + * This code is called with interrupts disabled. | |
18008 | + */ | |
18009 | +void softirq_check_pending_idle(void) | |
18010 | +{ | |
18011 | + static int rate_limit; | |
18012 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
18013 | + u32 warnpending; | |
18014 | + int i; | |
18015 | + | |
18016 | + if (rate_limit >= 10) | |
18017 | + return; | |
18018 | + | |
18019 | + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; | |
18020 | + for (i = 0; i < NR_SOFTIRQS; i++) { | |
18021 | + struct task_struct *tsk = sr->runner[i]; | |
18022 | + | |
18023 | + /* | |
18024 | + * The wakeup code in rtmutex.c wakes up the task | |
18025 | + * _before_ it sets pi_blocked_on to NULL under | |
18026 | + * tsk->pi_lock. So we need to check for both: state | |
18027 | + * and pi_blocked_on. | |
18028 | + */ | |
18029 | + if (tsk) { | |
18030 | + raw_spin_lock(&tsk->pi_lock); | |
18031 | + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { | |
18032 | + /* Clear all bits pending in that task */ | |
18033 | + warnpending &= ~(tsk->softirqs_raised); | |
18034 | + warnpending &= ~(1 << i); | |
18035 | + } | |
18036 | + raw_spin_unlock(&tsk->pi_lock); | |
18037 | + } | |
18038 | + } | |
18039 | + | |
18040 | + if (warnpending) { | |
18041 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
18042 | + warnpending); | |
18043 | + rate_limit++; | |
18044 | + } | |
18045 | +} | |
18046 | +# else | |
18047 | +/* | |
18048 | + * On !PREEMPT_RT we just printk rate limited: | |
18049 | + */ | |
18050 | +void softirq_check_pending_idle(void) | |
18051 | +{ | |
18052 | + static int rate_limit; | |
18053 | + | |
18054 | + if (rate_limit < 10 && | |
18055 | + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
18056 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
18057 | + local_softirq_pending()); | |
18058 | + rate_limit++; | |
18059 | + } | |
18060 | +} | |
18061 | +# endif | |
18062 | + | |
18063 | +#else /* !CONFIG_NO_HZ_COMMON */ | |
18064 | +static inline void softirq_set_runner(unsigned int sirq) { } | |
18065 | +static inline void softirq_clr_runner(unsigned int sirq) { } | |
18066 | +#endif | |
18067 | + | |
18068 | /* | |
18069 | * we cannot loop indefinitely here to avoid userspace starvation, | |
18070 | * but we also don't want to introduce a worst case 1/HZ latency | |
18071 | @@ -77,6 +175,79 @@ static void wakeup_softirqd(void) | |
18072 | wake_up_process(tsk); | |
18073 | } | |
18074 | ||
18075 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18076 | +static void wakeup_timer_softirqd(void) | |
18077 | +{ | |
18078 | + /* Interrupts are disabled: no need to stop preemption */ | |
18079 | + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); | |
18080 | + | |
18081 | + if (tsk && tsk->state != TASK_RUNNING) | |
18082 | + wake_up_process(tsk); | |
18083 | +} | |
18084 | +#endif | |
18085 | + | |
18086 | +static void handle_softirq(unsigned int vec_nr) | |
18087 | +{ | |
18088 | + struct softirq_action *h = softirq_vec + vec_nr; | |
18089 | + int prev_count; | |
18090 | + | |
18091 | + prev_count = preempt_count(); | |
18092 | + | |
18093 | + kstat_incr_softirqs_this_cpu(vec_nr); | |
18094 | + | |
18095 | + trace_softirq_entry(vec_nr); | |
18096 | + h->action(h); | |
18097 | + trace_softirq_exit(vec_nr); | |
18098 | + if (unlikely(prev_count != preempt_count())) { | |
18099 | + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
18100 | + vec_nr, softirq_to_name[vec_nr], h->action, | |
18101 | + prev_count, preempt_count()); | |
18102 | + preempt_count_set(prev_count); | |
18103 | + } | |
18104 | +} | |
18105 | + | |
18106 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
18107 | +static inline int ksoftirqd_softirq_pending(void) | |
18108 | +{ | |
18109 | + return local_softirq_pending(); | |
18110 | +} | |
18111 | + | |
18112 | +static void handle_pending_softirqs(u32 pending) | |
18113 | +{ | |
18114 | + struct softirq_action *h = softirq_vec; | |
18115 | + int softirq_bit; | |
18116 | + | |
18117 | + local_irq_enable(); | |
18118 | + | |
18119 | + h = softirq_vec; | |
18120 | + | |
18121 | + while ((softirq_bit = ffs(pending))) { | |
18122 | + unsigned int vec_nr; | |
18123 | + | |
18124 | + h += softirq_bit - 1; | |
18125 | + vec_nr = h - softirq_vec; | |
18126 | + handle_softirq(vec_nr); | |
18127 | + | |
18128 | + h++; | |
18129 | + pending >>= softirq_bit; | |
18130 | + } | |
18131 | + | |
18132 | + rcu_bh_qs(); | |
18133 | + local_irq_disable(); | |
18134 | +} | |
18135 | + | |
18136 | +static void run_ksoftirqd(unsigned int cpu) | |
18137 | +{ | |
18138 | + local_irq_disable(); | |
18139 | + if (ksoftirqd_softirq_pending()) { | |
18140 | + __do_softirq(); | |
18141 | + local_irq_enable(); | |
18142 | + cond_resched_rcu_qs(); | |
18143 | + return; | |
18144 | + } | |
18145 | + local_irq_enable(); | |
18146 | +} | |
18147 | + | |
18148 | /* | |
18149 | * preempt_count and SOFTIRQ_OFFSET usage: | |
18150 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | |
18151 | @@ -232,10 +403,8 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) | |
18152 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | |
18153 | unsigned long old_flags = current->flags; | |
18154 | int max_restart = MAX_SOFTIRQ_RESTART; | |
18155 | - struct softirq_action *h; | |
18156 | bool in_hardirq; | |
18157 | __u32 pending; | |
18158 | - int softirq_bit; | |
18159 | ||
18160 | /* | |
18161 | * Mask out PF_MEMALLOC s current task context is borrowed for the | |
18162 | @@ -254,36 +423,7 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) | |
18163 | /* Reset the pending bitmask before enabling irqs */ | |
18164 | set_softirq_pending(0); | |
18165 | ||
18166 | - local_irq_enable(); | |
18167 | - | |
18168 | - h = softirq_vec; | |
18169 | - | |
18170 | - while ((softirq_bit = ffs(pending))) { | |
18171 | - unsigned int vec_nr; | |
18172 | - int prev_count; | |
18173 | - | |
18174 | - h += softirq_bit - 1; | |
18175 | - | |
18176 | - vec_nr = h - softirq_vec; | |
18177 | - prev_count = preempt_count(); | |
18178 | - | |
18179 | - kstat_incr_softirqs_this_cpu(vec_nr); | |
18180 | - | |
18181 | - trace_softirq_entry(vec_nr); | |
18182 | - h->action(h); | |
18183 | - trace_softirq_exit(vec_nr); | |
18184 | - if (unlikely(prev_count != preempt_count())) { | |
18185 | - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
18186 | - vec_nr, softirq_to_name[vec_nr], h->action, | |
18187 | - prev_count, preempt_count()); | |
18188 | - preempt_count_set(prev_count); | |
18189 | - } | |
18190 | - h++; | |
18191 | - pending >>= softirq_bit; | |
18192 | - } | |
18193 | - | |
18194 | - rcu_bh_qs(); | |
18195 | - local_irq_disable(); | |
18196 | + handle_pending_softirqs(pending); | |
18197 | ||
18198 | pending = local_softirq_pending(); | |
18199 | if (pending) { | |
18200 | @@ -320,6 +460,310 @@ asmlinkage __visible void do_softirq(void) | |
18201 | } | |
18202 | ||
18203 | /* | |
18204 | + * This function must run with irqs disabled! | |
18205 | + */ | |
18206 | +void raise_softirq_irqoff(unsigned int nr) | |
18207 | +{ | |
18208 | + __raise_softirq_irqoff(nr); | |
18209 | + | |
18210 | + /* | |
18211 | + * If we're in an interrupt or softirq, we're done | |
18212 | + * (this also catches softirq-disabled code). We will | |
18213 | + * actually run the softirq once we return from | |
18214 | + * the irq or softirq. | |
18215 | + * | |
18216 | + * Otherwise we wake up ksoftirqd to make sure we | |
18217 | + * schedule the softirq soon. | |
18218 | + */ | |
18219 | + if (!in_interrupt()) | |
18220 | + wakeup_softirqd(); | |
18221 | +} | |
18222 | + | |
18223 | +void __raise_softirq_irqoff(unsigned int nr) | |
18224 | +{ | |
18225 | + trace_softirq_raise(nr); | |
18226 | + or_softirq_pending(1UL << nr); | |
18227 | +} | |
18228 | + | |
18229 | +static inline void local_bh_disable_nort(void) { local_bh_disable(); } | |
18230 | +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } | |
18231 | +static void ksoftirqd_set_sched_params(unsigned int cpu) { } | |
18232 | + | |
18233 | +#else /* !PREEMPT_RT_FULL */ | |
18234 | + | |
18235 | +/* | |
18236 | + * On RT we serialize softirq execution with a cpu local lock per softirq | |
18237 | + */ | |
18238 | +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks); | |
18239 | + | |
18240 | +void __init softirq_early_init(void) | |
18241 | +{ | |
18242 | + int i; | |
18243 | + | |
18244 | + for (i = 0; i < NR_SOFTIRQS; i++) | |
18245 | + local_irq_lock_init(local_softirq_locks[i]); | |
18246 | +} | |
18247 | + | |
18248 | +static void lock_softirq(int which) | |
18249 | +{ | |
18250 | + local_lock(local_softirq_locks[which]); | |
18251 | +} | |
18252 | + | |
18253 | +static void unlock_softirq(int which) | |
18254 | +{ | |
18255 | + local_unlock(local_softirq_locks[which]); | |
18256 | +} | |
18257 | + | |
18258 | +static void do_single_softirq(int which) | |
18259 | +{ | |
18260 | + unsigned long old_flags = current->flags; | |
18261 | + | |
18262 | + current->flags &= ~PF_MEMALLOC; | |
18263 | + vtime_account_irq_enter(current); | |
18264 | + current->flags |= PF_IN_SOFTIRQ; | |
18265 | + lockdep_softirq_enter(); | |
18266 | + local_irq_enable(); | |
18267 | + handle_softirq(which); | |
18268 | + local_irq_disable(); | |
18269 | + lockdep_softirq_exit(); | |
18270 | + current->flags &= ~PF_IN_SOFTIRQ; | |
18271 | + vtime_account_irq_enter(current); | |
18272 | + tsk_restore_flags(current, old_flags, PF_MEMALLOC); | |
18273 | +} | |
18274 | + | |
18275 | +/* | |
18276 | + * Called with interrupts disabled. Process softirqs which were raised | |
18277 | + * in current context (or on behalf of ksoftirqd). | |
18278 | + */ | |
18279 | +static void do_current_softirqs(void) | |
18280 | +{ | |
18281 | + while (current->softirqs_raised) { | |
18282 | + int i = __ffs(current->softirqs_raised); | |
18283 | + unsigned int pending, mask = (1U << i); | |
18284 | + | |
18285 | + current->softirqs_raised &= ~mask; | |
18286 | + local_irq_enable(); | |
18287 | + | |
18288 | + /* | |
18289 | + * If the lock is contended, we boost the owner to | |
18290 | + * process the softirq or leave the critical section | |
18291 | + * now. | |
18292 | + */ | |
18293 | + lock_softirq(i); | |
18294 | + local_irq_disable(); | |
18295 | + softirq_set_runner(i); | |
18296 | + /* | |
18297 | + * Check with the local_softirq_pending() bits, | |
18298 | + * whether we need to process this still or if someone | |
18299 | + * else took care of it. | |
18300 | + */ | |
18301 | + pending = local_softirq_pending(); | |
18302 | + if (pending & mask) { | |
18303 | + set_softirq_pending(pending & ~mask); | |
18304 | + do_single_softirq(i); | |
18305 | + } | |
18306 | + softirq_clr_runner(i); | |
18307 | + WARN_ON(current->softirq_nestcnt != 1); | |
18308 | + local_irq_enable(); | |
18309 | + unlock_softirq(i); | |
18310 | + local_irq_disable(); | |
18311 | + } | |
18312 | +} | |
18313 | + | |
18314 | +void __local_bh_disable(void) | |
18315 | +{ | |
18316 | + if (++current->softirq_nestcnt == 1) | |
18317 | + migrate_disable(); | |
18318 | +} | |
18319 | +EXPORT_SYMBOL(__local_bh_disable); | |
18320 | + | |
18321 | +void __local_bh_enable(void) | |
18322 | +{ | |
18323 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
18324 | + return; | |
18325 | + | |
18326 | + local_irq_disable(); | |
18327 | + if (current->softirq_nestcnt == 1 && current->softirqs_raised) | |
18328 | + do_current_softirqs(); | |
18329 | + local_irq_enable(); | |
18330 | + | |
18331 | + if (--current->softirq_nestcnt == 0) | |
18332 | + migrate_enable(); | |
18333 | +} | |
18334 | +EXPORT_SYMBOL(__local_bh_enable); | |
18335 | + | |
18336 | +void _local_bh_enable(void) | |
18337 | +{ | |
18338 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
18339 | + return; | |
18340 | + if (--current->softirq_nestcnt == 0) | |
18341 | + migrate_enable(); | |
18342 | +} | |
18343 | +EXPORT_SYMBOL(_local_bh_enable); | |
18344 | + | |
18345 | +int in_serving_softirq(void) | |
18346 | +{ | |
18347 | + return current->flags & PF_IN_SOFTIRQ; | |
18348 | +} | |
18349 | +EXPORT_SYMBOL(in_serving_softirq); | |
18350 | + | |
18351 | +/* Called with preemption disabled */ | |
18352 | +static void run_ksoftirqd(unsigned int cpu) | |
18353 | +{ | |
18354 | + local_irq_disable(); | |
18355 | + current->softirq_nestcnt++; | |
18356 | + | |
18357 | + do_current_softirqs(); | |
18358 | + current->softirq_nestcnt--; | |
18359 | + local_irq_enable(); | |
18360 | + cond_resched_rcu_qs(); | |
18361 | +} | |
18362 | + | |
18363 | +/* | |
18364 | + * Called from netif_rx_ni(). Preemption enabled, but migration | |
18365 | + * disabled. So the cpu can't go away under us. | |
18366 | + */ | |
18367 | +void thread_do_softirq(void) | |
18368 | +{ | |
18369 | + if (!in_serving_softirq() && current->softirqs_raised) { | |
18370 | + current->softirq_nestcnt++; | |
18371 | + do_current_softirqs(); | |
18372 | + current->softirq_nestcnt--; | |
18373 | + } | |
18374 | +} | |
18375 | + | |
18376 | +static void do_raise_softirq_irqoff(unsigned int nr) | |
18377 | +{ | |
18378 | + unsigned int mask; | |
18379 | + | |
18380 | + mask = 1UL << nr; | |
18381 | + | |
18382 | + trace_softirq_raise(nr); | |
18383 | + or_softirq_pending(mask); | |
18384 | + | |
18385 | + /* | |
18386 | + * If we are not in a hard interrupt and inside a bh disabled | |
18387 | + * region, we simply raise the flag on current. local_bh_enable() | |
18388 | + * will make sure that the softirq is executed. Otherwise we | |
18389 | + * delegate it to ksoftirqd. | |
18390 | + */ | |
18391 | + if (!in_irq() && current->softirq_nestcnt) | |
18392 | + current->softirqs_raised |= mask; | |
18393 | + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) | |
18394 | + return; | |
18395 | + | |
18396 | + if (mask & TIMER_SOFTIRQS) | |
18397 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
18398 | + else | |
18399 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
18400 | +} | |
18401 | + | |
18402 | +static void wakeup_proper_softirq(unsigned int nr) | |
18403 | +{ | |
18404 | + if ((1UL << nr) & TIMER_SOFTIRQS) | |
18405 | + wakeup_timer_softirqd(); | |
18406 | + else | |
18407 | + wakeup_softirqd(); | |
18408 | +} | |
18409 | + | |
18410 | + | |
18411 | +void __raise_softirq_irqoff(unsigned int nr) | |
18412 | +{ | |
18413 | + do_raise_softirq_irqoff(nr); | |
18414 | + if (!in_irq() && !current->softirq_nestcnt) | |
18415 | + wakeup_proper_softirq(nr); | |
18416 | +} | |
18417 | + | |
18418 | +/* | |
18419 | + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd | |
18420 | + */ | |
18421 | +void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
18422 | +{ | |
18423 | + unsigned int mask; | |
18424 | + | |
18425 | + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) || | |
18426 | + !__this_cpu_read(ktimer_softirqd))) | |
18427 | + return; | |
18428 | + mask = 1UL << nr; | |
18429 | + | |
18430 | + trace_softirq_raise(nr); | |
18431 | + or_softirq_pending(mask); | |
18432 | + if (mask & TIMER_SOFTIRQS) | |
18433 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
18434 | + else | |
18435 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
18436 | + wakeup_proper_softirq(nr); | |
18437 | +} | |
18438 | + | |
18439 | +/* | |
18440 | + * This function must run with irqs disabled! | |
18441 | + */ | |
18442 | +void raise_softirq_irqoff(unsigned int nr) | |
18443 | +{ | |
18444 | + do_raise_softirq_irqoff(nr); | |
18445 | + | |
18446 | + /* | |
18447 | + * If we're in an hard interrupt we let irq return code deal | |
18448 | + * with the wakeup of ksoftirqd. | |
18449 | + */ | |
18450 | + if (in_irq()) | |
18451 | + return; | |
18452 | + /* | |
18453 | + * If we are in thread context but outside of a bh disabled | |
18454 | + * region, we need to wake ksoftirqd as well. | |
18455 | + * | |
18456 | + * CHECKME: Some of the places which do that could be wrapped | |
18457 | + * into local_bh_disable/enable pairs. Though it's unclear | |
18458 | + * whether this is worth the effort. To find those places just | |
18459 | + * raise a WARN() if the condition is met. | |
18460 | + */ | |
18461 | + if (!current->softirq_nestcnt) | |
18462 | + wakeup_proper_softirq(nr); | |
18463 | +} | |
18464 | + | |
18465 | +static inline int ksoftirqd_softirq_pending(void) | |
18466 | +{ | |
18467 | + return current->softirqs_raised; | |
18468 | +} | |
18469 | + | |
18470 | +static inline void local_bh_disable_nort(void) { } | |
18471 | +static inline void _local_bh_enable_nort(void) { } | |
18472 | + | |
18473 | +static inline void ksoftirqd_set_sched_params(unsigned int cpu) | |
18474 | +{ | |
18475 | + /* Take over all but timer pending softirqs when starting */ | |
18476 | + local_irq_disable(); | |
18477 | + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; | |
18478 | + local_irq_enable(); | |
18479 | +} | |
18480 | + | |
18481 | +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) | |
18482 | +{ | |
18483 | + struct sched_param param = { .sched_priority = 1 }; | |
18484 | + | |
18485 | + sched_setscheduler(current, SCHED_FIFO, ¶m); | |
18486 | + | |
18487 | + /* Take over timer pending softirqs when starting */ | |
18488 | + local_irq_disable(); | |
18489 | + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; | |
18490 | + local_irq_enable(); | |
18491 | +} | |
18492 | + | |
18493 | +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, | |
18494 | + bool online) | |
18495 | +{ | |
18496 | + struct sched_param param = { .sched_priority = 0 }; | |
18497 | + | |
18498 | + sched_setscheduler(current, SCHED_NORMAL, ¶m); | |
18499 | +} | |
18500 | + | |
18501 | +static int ktimer_softirqd_should_run(unsigned int cpu) | |
18502 | +{ | |
18503 | + return current->softirqs_raised; | |
18504 | +} | |
18505 | + | |
18506 | +#endif /* PREEMPT_RT_FULL */ | |
18507 | +/* | |
18508 | * Enter an interrupt context. | |
18509 | */ | |
18510 | void irq_enter(void) | |
18511 | @@ -330,9 +774,9 @@ void irq_enter(void) | |
18512 | * Prevent raise_softirq from needlessly waking up ksoftirqd | |
18513 | * here, as softirq will be serviced on return from interrupt. | |
18514 | */ | |
18515 | - local_bh_disable(); | |
18516 | + local_bh_disable_nort(); | |
18517 | tick_irq_enter(); | |
18518 | - _local_bh_enable(); | |
18519 | + _local_bh_enable_nort(); | |
18520 | } | |
18521 | ||
18522 | __irq_enter(); | |
18523 | @@ -340,6 +784,7 @@ void irq_enter(void) | |
18524 | ||
18525 | static inline void invoke_softirq(void) | |
18526 | { | |
18527 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
18528 | if (!force_irqthreads) { | |
18529 | #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK | |
18530 | /* | |
18531 | @@ -359,6 +804,18 @@ static inline void invoke_softirq(void) | |
18532 | } else { | |
18533 | wakeup_softirqd(); | |
18534 | } | |
18535 | +#else /* PREEMPT_RT_FULL */ | |
18536 | + unsigned long flags; | |
18537 | + | |
18538 | + local_irq_save(flags); | |
18539 | + if (__this_cpu_read(ksoftirqd) && | |
18540 | + __this_cpu_read(ksoftirqd)->softirqs_raised) | |
18541 | + wakeup_softirqd(); | |
18542 | + if (__this_cpu_read(ktimer_softirqd) && | |
18543 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised) | |
18544 | + wakeup_timer_softirqd(); | |
18545 | + local_irq_restore(flags); | |
18546 | +#endif | |
18547 | } | |
18548 | ||
18549 | static inline void tick_irq_exit(void) | |
18550 | @@ -395,26 +852,6 @@ void irq_exit(void) | |
18551 | trace_hardirq_exit(); /* must be last! */ | |
18552 | } | |
18553 | ||
18554 | -/* | |
18555 | - * This function must run with irqs disabled! | |
18556 | - */ | |
18557 | -inline void raise_softirq_irqoff(unsigned int nr) | |
18558 | -{ | |
18559 | - __raise_softirq_irqoff(nr); | |
18560 | - | |
18561 | - /* | |
18562 | - * If we're in an interrupt or softirq, we're done | |
18563 | - * (this also catches softirq-disabled code). We will | |
18564 | - * actually run the softirq once we return from | |
18565 | - * the irq or softirq. | |
18566 | - * | |
18567 | - * Otherwise we wake up ksoftirqd to make sure we | |
18568 | - * schedule the softirq soon. | |
18569 | - */ | |
18570 | - if (!in_interrupt()) | |
18571 | - wakeup_softirqd(); | |
18572 | -} | |
18573 | - | |
18574 | void raise_softirq(unsigned int nr) | |
18575 | { | |
18576 | unsigned long flags; | |
18577 | @@ -424,12 +861,6 @@ void raise_softirq(unsigned int nr) | |
18578 | local_irq_restore(flags); | |
18579 | } | |
18580 | ||
18581 | -void __raise_softirq_irqoff(unsigned int nr) | |
18582 | -{ | |
18583 | - trace_softirq_raise(nr); | |
18584 | - or_softirq_pending(1UL << nr); | |
18585 | -} | |
18586 | - | |
18587 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | |
18588 | { | |
18589 | softirq_vec[nr].action = action; | |
18590 | @@ -446,15 +877,45 @@ struct tasklet_head { | |
18591 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); | |
18592 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | |
18593 | ||
18594 | +static void inline | |
18595 | +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) | |
18596 | +{ | |
18597 | + if (tasklet_trylock(t)) { | |
18598 | +again: | |
18599 | + /* We may have been preempted before tasklet_trylock | |
18600 | + * and __tasklet_action may have already run. | |
18601 | + * So double check the sched bit while the takslet | |
18602 | + * is locked before adding it to the list. | |
18603 | + */ | |
18604 | + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { | |
18605 | + t->next = NULL; | |
18606 | + *head->tail = t; | |
18607 | + head->tail = &(t->next); | |
18608 | + raise_softirq_irqoff(nr); | |
18609 | + tasklet_unlock(t); | |
18610 | + } else { | |
18611 | + /* This is subtle. If we hit the corner case above | |
18612 | + * It is possible that we get preempted right here, | |
18613 | + * and another task has successfully called | |
18614 | + * tasklet_schedule(), then this function, and | |
18615 | + * failed on the trylock. Thus we must be sure | |
18616 | + * before releasing the tasklet lock, that the | |
18617 | + * SCHED_BIT is clear. Otherwise the tasklet | |
18618 | + * may get its SCHED_BIT set, but not added to the | |
18619 | + * list | |
18620 | + */ | |
18621 | + if (!tasklet_tryunlock(t)) | |
18622 | + goto again; | |
18623 | + } | |
18624 | + } | |
18625 | +} | |
18626 | + | |
18627 | void __tasklet_schedule(struct tasklet_struct *t) | |
18628 | { | |
18629 | unsigned long flags; | |
18630 | ||
18631 | local_irq_save(flags); | |
18632 | - t->next = NULL; | |
18633 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
18634 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
18635 | - raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
18636 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); | |
18637 | local_irq_restore(flags); | |
18638 | } | |
18639 | EXPORT_SYMBOL(__tasklet_schedule); | |
18640 | @@ -464,10 +925,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |
18641 | unsigned long flags; | |
18642 | ||
18643 | local_irq_save(flags); | |
18644 | - t->next = NULL; | |
18645 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
18646 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
18647 | - raise_softirq_irqoff(HI_SOFTIRQ); | |
18648 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); | |
18649 | local_irq_restore(flags); | |
18650 | } | |
18651 | EXPORT_SYMBOL(__tasklet_hi_schedule); | |
18652 | @@ -476,82 +934,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) | |
18653 | { | |
18654 | BUG_ON(!irqs_disabled()); | |
18655 | ||
18656 | - t->next = __this_cpu_read(tasklet_hi_vec.head); | |
18657 | - __this_cpu_write(tasklet_hi_vec.head, t); | |
18658 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
18659 | + __tasklet_hi_schedule(t); | |
18660 | } | |
18661 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | |
18662 | ||
18663 | -static void tasklet_action(struct softirq_action *a) | |
18664 | +void tasklet_enable(struct tasklet_struct *t) | |
18665 | { | |
18666 | - struct tasklet_struct *list; | |
18667 | + if (!atomic_dec_and_test(&t->count)) | |
18668 | + return; | |
18669 | + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) | |
18670 | + tasklet_schedule(t); | |
18671 | +} | |
18672 | +EXPORT_SYMBOL(tasklet_enable); | |
18673 | ||
18674 | - local_irq_disable(); | |
18675 | - list = __this_cpu_read(tasklet_vec.head); | |
18676 | - __this_cpu_write(tasklet_vec.head, NULL); | |
18677 | - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
18678 | - local_irq_enable(); | |
18679 | +static void __tasklet_action(struct softirq_action *a, | |
18680 | + struct tasklet_struct *list) | |
18681 | +{ | |
18682 | + int loops = 1000000; | |
18683 | ||
18684 | while (list) { | |
18685 | struct tasklet_struct *t = list; | |
18686 | ||
18687 | list = list->next; | |
18688 | ||
18689 | - if (tasklet_trylock(t)) { | |
18690 | - if (!atomic_read(&t->count)) { | |
18691 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
18692 | - &t->state)) | |
18693 | - BUG(); | |
18694 | - t->func(t->data); | |
18695 | - tasklet_unlock(t); | |
18696 | - continue; | |
18697 | - } | |
18698 | - tasklet_unlock(t); | |
18699 | + /* | |
18700 | + * Should always succeed - after a tasklist got on the | |
18701 | + * list (after getting the SCHED bit set from 0 to 1), | |
18702 | + * nothing but the tasklet softirq it got queued to can | |
18703 | + * lock it: | |
18704 | + */ | |
18705 | + if (!tasklet_trylock(t)) { | |
18706 | + WARN_ON(1); | |
18707 | + continue; | |
18708 | } | |
18709 | ||
18710 | - local_irq_disable(); | |
18711 | t->next = NULL; | |
18712 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
18713 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
18714 | - __raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
18715 | - local_irq_enable(); | |
18716 | + | |
18717 | + /* | |
18718 | + * If we cannot handle the tasklet because it's disabled, | |
18719 | + * mark it as pending. tasklet_enable() will later | |
18720 | + * re-schedule the tasklet. | |
18721 | + */ | |
18722 | + if (unlikely(atomic_read(&t->count))) { | |
18723 | +out_disabled: | |
18724 | + /* implicit unlock: */ | |
18725 | + wmb(); | |
18726 | + t->state = TASKLET_STATEF_PENDING; | |
18727 | + continue; | |
18728 | + } | |
18729 | + | |
18730 | + /* | |
18731 | + * After this point on the tasklet might be rescheduled | |
18732 | + * on another CPU, but it can only be added to another | |
18733 | + * CPU's tasklet list if we unlock the tasklet (which we | |
18734 | + * dont do yet). | |
18735 | + */ | |
18736 | + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
18737 | + WARN_ON(1); | |
18738 | + | |
18739 | +again: | |
18740 | + t->func(t->data); | |
18741 | + | |
18742 | + /* | |
18743 | + * Try to unlock the tasklet. We must use cmpxchg, because | |
18744 | + * another CPU might have scheduled or disabled the tasklet. | |
18745 | + * We only allow the STATE_RUN -> 0 transition here. | |
18746 | + */ | |
18747 | + while (!tasklet_tryunlock(t)) { | |
18748 | + /* | |
18749 | + * If it got disabled meanwhile, bail out: | |
18750 | + */ | |
18751 | + if (atomic_read(&t->count)) | |
18752 | + goto out_disabled; | |
18753 | + /* | |
18754 | + * If it got scheduled meanwhile, re-execute | |
18755 | + * the tasklet function: | |
18756 | + */ | |
18757 | + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
18758 | + goto again; | |
18759 | + if (!--loops) { | |
18760 | + printk("hm, tasklet state: %08lx\n", t->state); | |
18761 | + WARN_ON(1); | |
18762 | + tasklet_unlock(t); | |
18763 | + break; | |
18764 | + } | |
18765 | + } | |
18766 | } | |
18767 | } | |
18768 | ||
18769 | +static void tasklet_action(struct softirq_action *a) | |
18770 | +{ | |
18771 | + struct tasklet_struct *list; | |
18772 | + | |
18773 | + local_irq_disable(); | |
18774 | + | |
18775 | + list = __this_cpu_read(tasklet_vec.head); | |
18776 | + __this_cpu_write(tasklet_vec.head, NULL); | |
18777 | + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
18778 | + | |
18779 | + local_irq_enable(); | |
18780 | + | |
18781 | + __tasklet_action(a, list); | |
18782 | +} | |
18783 | + | |
18784 | static void tasklet_hi_action(struct softirq_action *a) | |
18785 | { | |
18786 | struct tasklet_struct *list; | |
18787 | ||
18788 | local_irq_disable(); | |
18789 | + | |
18790 | list = __this_cpu_read(tasklet_hi_vec.head); | |
18791 | __this_cpu_write(tasklet_hi_vec.head, NULL); | |
18792 | __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); | |
18793 | + | |
18794 | local_irq_enable(); | |
18795 | ||
18796 | - while (list) { | |
18797 | - struct tasklet_struct *t = list; | |
18798 | - | |
18799 | - list = list->next; | |
18800 | - | |
18801 | - if (tasklet_trylock(t)) { | |
18802 | - if (!atomic_read(&t->count)) { | |
18803 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
18804 | - &t->state)) | |
18805 | - BUG(); | |
18806 | - t->func(t->data); | |
18807 | - tasklet_unlock(t); | |
18808 | - continue; | |
18809 | - } | |
18810 | - tasklet_unlock(t); | |
18811 | - } | |
18812 | - | |
18813 | - local_irq_disable(); | |
18814 | - t->next = NULL; | |
18815 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
18816 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
18817 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
18818 | - local_irq_enable(); | |
18819 | - } | |
18820 | + __tasklet_action(a, list); | |
18821 | } | |
18822 | ||
18823 | void tasklet_init(struct tasklet_struct *t, | |
18824 | @@ -572,7 +1070,7 @@ void tasklet_kill(struct tasklet_struct *t) | |
18825 | ||
18826 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | |
18827 | do { | |
18828 | - yield(); | |
18829 | + msleep(1); | |
18830 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); | |
18831 | } | |
18832 | tasklet_unlock_wait(t); | |
18833 | @@ -646,25 +1144,26 @@ void __init softirq_init(void) | |
18834 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); | |
18835 | } | |
18836 | ||
18837 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
18838 | +void tasklet_unlock_wait(struct tasklet_struct *t) | |
18839 | +{ | |
18840 | + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { | |
18841 | + /* | |
18842 | + * Hack for now to avoid this busy-loop: | |
18843 | + */ | |
18844 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18845 | + msleep(1); | |
18846 | +#else | |
18847 | + barrier(); | |
18848 | +#endif | |
18849 | + } | |
18850 | +} | |
18851 | +EXPORT_SYMBOL(tasklet_unlock_wait); | |
18852 | +#endif | |
18853 | + | |
18854 | static int ksoftirqd_should_run(unsigned int cpu) | |
18855 | { | |
18856 | - return local_softirq_pending(); | |
18857 | -} | |
18858 | - | |
18859 | -static void run_ksoftirqd(unsigned int cpu) | |
18860 | -{ | |
18861 | - local_irq_disable(); | |
18862 | - if (local_softirq_pending()) { | |
18863 | - /* | |
18864 | - * We can safely run softirq on inline stack, as we are not deep | |
18865 | - * in the task stack here. | |
18866 | - */ | |
18867 | - __do_softirq(); | |
18868 | - local_irq_enable(); | |
18869 | - cond_resched_rcu_qs(); | |
18870 | - return; | |
18871 | - } | |
18872 | - local_irq_enable(); | |
18873 | + return ksoftirqd_softirq_pending(); | |
18874 | } | |
18875 | ||
18876 | #ifdef CONFIG_HOTPLUG_CPU | |
18877 | @@ -746,16 +1245,31 @@ static struct notifier_block cpu_nfb = { | |
18878 | ||
18879 | static struct smp_hotplug_thread softirq_threads = { | |
18880 | .store = &ksoftirqd, | |
18881 | + .setup = ksoftirqd_set_sched_params, | |
18882 | .thread_should_run = ksoftirqd_should_run, | |
18883 | .thread_fn = run_ksoftirqd, | |
18884 | .thread_comm = "ksoftirqd/%u", | |
18885 | }; | |
18886 | ||
18887 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18888 | +static struct smp_hotplug_thread softirq_timer_threads = { | |
18889 | + .store = &ktimer_softirqd, | |
18890 | + .setup = ktimer_softirqd_set_sched_params, | |
18891 | + .cleanup = ktimer_softirqd_clr_sched_params, | |
18892 | + .thread_should_run = ktimer_softirqd_should_run, | |
18893 | + .thread_fn = run_ksoftirqd, | |
18894 | + .thread_comm = "ktimersoftd/%u", | |
18895 | +}; | |
18896 | +#endif | |
18897 | + | |
18898 | static __init int spawn_ksoftirqd(void) | |
18899 | { | |
18900 | register_cpu_notifier(&cpu_nfb); | |
18901 | ||
18902 | BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); | |
18903 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18904 | + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); | |
18905 | +#endif | |
18906 | ||
18907 | return 0; | |
18908 | } | |
18909 | diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c | |
18910 | index 4a1ca5f6da7e..3226e22b9e42 100644 | |
18911 | --- a/kernel/stop_machine.c | |
18912 | +++ b/kernel/stop_machine.c | |
18913 | @@ -37,7 +37,7 @@ struct cpu_stop_done { | |
18914 | struct cpu_stopper { | |
18915 | struct task_struct *thread; | |
18916 | ||
18917 | - spinlock_t lock; | |
18918 | + raw_spinlock_t lock; | |
18919 | bool enabled; /* is this stopper enabled? */ | |
18920 | struct list_head works; /* list of pending works */ | |
18921 | ||
18922 | @@ -83,14 +83,14 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) | |
18923 | unsigned long flags; | |
18924 | bool enabled; | |
18925 | ||
18926 | - spin_lock_irqsave(&stopper->lock, flags); | |
18927 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
18928 | enabled = stopper->enabled; | |
18929 | if (enabled) | |
18930 | __cpu_stop_queue_work(stopper, work); | |
18931 | else if (work->done) | |
18932 | cpu_stop_signal_done(work->done); | |
18933 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
18934 | ||
18935 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
18936 | return enabled; | |
18937 | } | |
18938 | ||
18939 | @@ -232,8 +232,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |
18940 | int err; | |
18941 | ||
18942 | lg_double_lock(&stop_cpus_lock, cpu1, cpu2); | |
18943 | - spin_lock_irq(&stopper1->lock); | |
18944 | - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
18945 | + raw_spin_lock_irq(&stopper1->lock); | |
18946 | + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
18947 | ||
18948 | err = -ENOENT; | |
18949 | if (!stopper1->enabled || !stopper2->enabled) | |
18950 | @@ -243,8 +243,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |
18951 | __cpu_stop_queue_work(stopper1, work1); | |
18952 | __cpu_stop_queue_work(stopper2, work2); | |
18953 | unlock: | |
18954 | - spin_unlock(&stopper2->lock); | |
18955 | - spin_unlock_irq(&stopper1->lock); | |
18956 | + raw_spin_unlock(&stopper2->lock); | |
18957 | + raw_spin_unlock_irq(&stopper1->lock); | |
18958 | lg_double_unlock(&stop_cpus_lock, cpu1, cpu2); | |
18959 | ||
18960 | return err; | |
18961 | @@ -321,18 +321,21 @@ static DEFINE_MUTEX(stop_cpus_mutex); | |
18962 | ||
18963 | static bool queue_stop_cpus_work(const struct cpumask *cpumask, | |
18964 | cpu_stop_fn_t fn, void *arg, | |
18965 | - struct cpu_stop_done *done) | |
18966 | + struct cpu_stop_done *done, bool inactive) | |
18967 | { | |
18968 | struct cpu_stop_work *work; | |
18969 | unsigned int cpu; | |
18970 | bool queued = false; | |
18971 | ||
18972 | /* | |
18973 | - * Disable preemption while queueing to avoid getting | |
18974 | - * preempted by a stopper which might wait for other stoppers | |
18975 | - * to enter @fn which can lead to deadlock. | |
18976 | + * Make sure that all work is queued on all cpus before | |
18977 | + * any of the cpus can execute it. | |
18978 | */ | |
18979 | - lg_global_lock(&stop_cpus_lock); | |
18980 | + if (!inactive) | |
18981 | + lg_global_lock(&stop_cpus_lock); | |
18982 | + else | |
18983 | + lg_global_trylock_relax(&stop_cpus_lock); | |
18984 | + | |
18985 | for_each_cpu(cpu, cpumask) { | |
18986 | work = &per_cpu(cpu_stopper.stop_work, cpu); | |
18987 | work->fn = fn; | |
18988 | @@ -352,7 +355,7 @@ static int __stop_cpus(const struct cpumask *cpumask, | |
18989 | struct cpu_stop_done done; | |
18990 | ||
18991 | cpu_stop_init_done(&done, cpumask_weight(cpumask)); | |
18992 | - if (!queue_stop_cpus_work(cpumask, fn, arg, &done)) | |
18993 | + if (!queue_stop_cpus_work(cpumask, fn, arg, &done, false)) | |
18994 | return -ENOENT; | |
18995 | wait_for_completion(&done.completion); | |
18996 | return done.ret; | |
18997 | @@ -433,9 +436,9 @@ static int cpu_stop_should_run(unsigned int cpu) | |
18998 | unsigned long flags; | |
18999 | int run; | |
19000 | ||
19001 | - spin_lock_irqsave(&stopper->lock, flags); | |
19002 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
19003 | run = !list_empty(&stopper->works); | |
19004 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
19005 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
19006 | return run; | |
19007 | } | |
19008 | ||
19009 | @@ -446,13 +449,13 @@ static void cpu_stopper_thread(unsigned int cpu) | |
19010 | ||
19011 | repeat: | |
19012 | work = NULL; | |
19013 | - spin_lock_irq(&stopper->lock); | |
19014 | + raw_spin_lock_irq(&stopper->lock); | |
19015 | if (!list_empty(&stopper->works)) { | |
19016 | work = list_first_entry(&stopper->works, | |
19017 | struct cpu_stop_work, list); | |
19018 | list_del_init(&work->list); | |
19019 | } | |
19020 | - spin_unlock_irq(&stopper->lock); | |
19021 | + raw_spin_unlock_irq(&stopper->lock); | |
19022 | ||
19023 | if (work) { | |
19024 | cpu_stop_fn_t fn = work->fn; | |
19025 | @@ -460,6 +463,16 @@ static void cpu_stopper_thread(unsigned int cpu) | |
19026 | struct cpu_stop_done *done = work->done; | |
19027 | int ret; | |
19028 | ||
19029 | + /* | |
19030 | + * Wait until the stopper finished scheduling on all | |
19031 | + * cpus | |
19032 | + */ | |
19033 | + lg_global_lock(&stop_cpus_lock); | |
19034 | + /* | |
19035 | + * Let other cpu threads continue as well | |
19036 | + */ | |
19037 | + lg_global_unlock(&stop_cpus_lock); | |
19038 | + | |
19039 | /* cpu stop callbacks must not sleep, make in_atomic() == T */ | |
19040 | preempt_count_inc(); | |
19041 | ret = fn(arg); | |
19042 | @@ -526,10 +539,12 @@ static int __init cpu_stop_init(void) | |
19043 | for_each_possible_cpu(cpu) { | |
19044 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | |
19045 | ||
19046 | - spin_lock_init(&stopper->lock); | |
19047 | + raw_spin_lock_init(&stopper->lock); | |
19048 | INIT_LIST_HEAD(&stopper->works); | |
19049 | } | |
19050 | ||
19051 | + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock"); | |
19052 | + | |
19053 | BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); | |
19054 | stop_machine_unpark(raw_smp_processor_id()); | |
19055 | stop_machine_initialized = true; | |
19056 | @@ -624,7 +639,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, | |
19057 | set_state(&msdata, MULTI_STOP_PREPARE); | |
19058 | cpu_stop_init_done(&done, num_active_cpus()); | |
19059 | queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, | |
19060 | - &done); | |
19061 | + &done, true); | |
19062 | ret = multi_cpu_stop(&msdata); | |
19063 | ||
19064 | /* Busy wait for completion. */ | |
19065 | diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c | |
19066 | index 9ba7c820fc23..d85f638fd99e 100644 | |
19067 | --- a/kernel/time/hrtimer.c | |
19068 | +++ b/kernel/time/hrtimer.c | |
19069 | @@ -53,6 +53,7 @@ | |
19070 | #include <asm/uaccess.h> | |
19071 | ||
19072 | #include <trace/events/timer.h> | |
19073 | +#include <trace/events/hist.h> | |
19074 | ||
19075 | #include "tick-internal.h" | |
19076 | ||
19077 | @@ -695,6 +696,29 @@ static void hrtimer_switch_to_hres(void) | |
19078 | retrigger_next_event(NULL); | |
19079 | } | |
19080 | ||
19081 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19082 | + | |
19083 | +static struct swork_event clock_set_delay_work; | |
19084 | + | |
19085 | +static void run_clock_set_delay(struct swork_event *event) | |
19086 | +{ | |
19087 | + clock_was_set(); | |
19088 | +} | |
19089 | + | |
19090 | +void clock_was_set_delayed(void) | |
19091 | +{ | |
19092 | + swork_queue(&clock_set_delay_work); | |
19093 | +} | |
19094 | + | |
19095 | +static __init int create_clock_set_delay_thread(void) | |
19096 | +{ | |
19097 | + WARN_ON(swork_get()); | |
19098 | + INIT_SWORK(&clock_set_delay_work, run_clock_set_delay); | |
19099 | + return 0; | |
19100 | +} | |
19101 | +early_initcall(create_clock_set_delay_thread); | |
19102 | +#else /* PREEMPT_RT_FULL */ | |
19103 | + | |
19104 | static void clock_was_set_work(struct work_struct *work) | |
19105 | { | |
19106 | clock_was_set(); | |
19107 | @@ -710,6 +734,7 @@ void clock_was_set_delayed(void) | |
19108 | { | |
19109 | schedule_work(&hrtimer_work); | |
19110 | } | |
19111 | +#endif | |
19112 | ||
19113 | #else | |
19114 | ||
19115 | @@ -719,11 +744,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } | |
19116 | static inline void hrtimer_switch_to_hres(void) { } | |
19117 | static inline void | |
19118 | hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } | |
19119 | -static inline int hrtimer_reprogram(struct hrtimer *timer, | |
19120 | - struct hrtimer_clock_base *base) | |
19121 | -{ | |
19122 | - return 0; | |
19123 | -} | |
19124 | +static inline void hrtimer_reprogram(struct hrtimer *timer, | |
19125 | + struct hrtimer_clock_base *base) { } | |
19126 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | |
19127 | static inline void retrigger_next_event(void *arg) { } | |
19128 | ||
19129 | @@ -855,6 +877,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |
19130 | } | |
19131 | EXPORT_SYMBOL_GPL(hrtimer_forward); | |
19132 | ||
19133 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
19134 | +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) | |
19135 | + | |
19136 | +/** | |
19137 | + * hrtimer_wait_for_timer - Wait for a running timer | |
19138 | + * | |
19139 | + * @timer: timer to wait for | |
19140 | + * | |
19141 | + * The function waits in case the timers callback function is | |
19142 | + * currently executed on the waitqueue of the timer base. The | |
19143 | + * waitqueue is woken up after the timer callback function has | |
19144 | + * finished execution. | |
19145 | + */ | |
19146 | +void hrtimer_wait_for_timer(const struct hrtimer *timer) | |
19147 | +{ | |
19148 | + struct hrtimer_clock_base *base = timer->base; | |
19149 | + | |
19150 | + if (base && base->cpu_base && !timer->irqsafe) | |
19151 | + wait_event(base->cpu_base->wait, | |
19152 | + !(hrtimer_callback_running(timer))); | |
19153 | +} | |
19154 | + | |
19155 | +#else | |
19156 | +# define wake_up_timer_waiters(b) do { } while (0) | |
19157 | +#endif | |
19158 | + | |
19159 | /* | |
19160 | * enqueue_hrtimer - internal function to (re)start a timer | |
19161 | * | |
19162 | @@ -896,6 +944,11 @@ static void __remove_hrtimer(struct hrtimer *timer, | |
19163 | if (!(state & HRTIMER_STATE_ENQUEUED)) | |
19164 | return; | |
19165 | ||
19166 | + if (unlikely(!list_empty(&timer->cb_entry))) { | |
19167 | + list_del_init(&timer->cb_entry); | |
19168 | + return; | |
19169 | + } | |
19170 | + | |
19171 | if (!timerqueue_del(&base->active, &timer->node)) | |
19172 | cpu_base->active_bases &= ~(1 << base->index); | |
19173 | ||
19174 | @@ -991,7 +1044,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |
19175 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | |
19176 | ||
19177 | timer_stats_hrtimer_set_start_info(timer); | |
19178 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
19179 | + { | |
19180 | + ktime_t now = new_base->get_time(); | |
19181 | ||
19182 | + if (ktime_to_ns(tim) < ktime_to_ns(now)) | |
19183 | + timer->praecox = now; | |
19184 | + else | |
19185 | + timer->praecox = ktime_set(0, 0); | |
19186 | + } | |
19187 | +#endif | |
19188 | leftmost = enqueue_hrtimer(timer, new_base); | |
19189 | if (!leftmost) | |
19190 | goto unlock; | |
19191 | @@ -1063,7 +1125,7 @@ int hrtimer_cancel(struct hrtimer *timer) | |
19192 | ||
19193 | if (ret >= 0) | |
19194 | return ret; | |
19195 | - cpu_relax(); | |
19196 | + hrtimer_wait_for_timer(timer); | |
19197 | } | |
19198 | } | |
19199 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | |
19200 | @@ -1127,6 +1189,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |
19201 | ||
19202 | base = hrtimer_clockid_to_base(clock_id); | |
19203 | timer->base = &cpu_base->clock_base[base]; | |
19204 | + INIT_LIST_HEAD(&timer->cb_entry); | |
19205 | timerqueue_init(&timer->node); | |
19206 | ||
19207 | #ifdef CONFIG_TIMER_STATS | |
19208 | @@ -1167,6 +1230,7 @@ bool hrtimer_active(const struct hrtimer *timer) | |
19209 | seq = raw_read_seqcount_begin(&cpu_base->seq); | |
19210 | ||
19211 | if (timer->state != HRTIMER_STATE_INACTIVE || | |
19212 | + cpu_base->running_soft == timer || | |
19213 | cpu_base->running == timer) | |
19214 | return true; | |
19215 | ||
19216 | @@ -1265,10 +1329,112 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |
19217 | cpu_base->running = NULL; | |
19218 | } | |
19219 | ||
19220 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
19221 | +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, | |
19222 | + struct hrtimer_clock_base *base) | |
19223 | +{ | |
19224 | + int leftmost; | |
19225 | + | |
19226 | + if (restart != HRTIMER_NORESTART && | |
19227 | + !(timer->state & HRTIMER_STATE_ENQUEUED)) { | |
19228 | + | |
19229 | + leftmost = enqueue_hrtimer(timer, base); | |
19230 | + if (!leftmost) | |
19231 | + return; | |
19232 | +#ifdef CONFIG_HIGH_RES_TIMERS | |
19233 | + if (!hrtimer_is_hres_active(timer)) { | |
19234 | + /* | |
19235 | + * Kick to reschedule the next tick to handle the new timer | |
19236 | + * on dynticks target. | |
19237 | + */ | |
19238 | + if (base->cpu_base->nohz_active) | |
19239 | + wake_up_nohz_cpu(base->cpu_base->cpu); | |
19240 | + } else { | |
19241 | + | |
19242 | + hrtimer_reprogram(timer, base); | |
19243 | + } | |
19244 | +#endif | |
19245 | + } | |
19246 | +} | |
19247 | + | |
19248 | +/* | |
19249 | + * The changes in mainline which removed the callback modes from | |
19250 | + * hrtimer are not yet working with -rt. The non wakeup_process() | |
19251 | + * based callbacks which involve sleeping locks need to be treated | |
19252 | + * seperately. | |
19253 | + */ | |
19254 | +static void hrtimer_rt_run_pending(void) | |
19255 | +{ | |
19256 | + enum hrtimer_restart (*fn)(struct hrtimer *); | |
19257 | + struct hrtimer_cpu_base *cpu_base; | |
19258 | + struct hrtimer_clock_base *base; | |
19259 | + struct hrtimer *timer; | |
19260 | + int index, restart; | |
19261 | + | |
19262 | + local_irq_disable(); | |
19263 | + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); | |
19264 | + | |
19265 | + raw_spin_lock(&cpu_base->lock); | |
19266 | + | |
19267 | + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { | |
19268 | + base = &cpu_base->clock_base[index]; | |
19269 | + | |
19270 | + while (!list_empty(&base->expired)) { | |
19271 | + timer = list_first_entry(&base->expired, | |
19272 | + struct hrtimer, cb_entry); | |
19273 | + | |
19274 | + /* | |
19275 | + * Same as the above __run_hrtimer function | |
19276 | + * just we run with interrupts enabled. | |
19277 | + */ | |
19278 | + debug_deactivate(timer); | |
19279 | + cpu_base->running_soft = timer; | |
19280 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
19281 | + | |
19282 | + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); | |
19283 | + timer_stats_account_hrtimer(timer); | |
19284 | + fn = timer->function; | |
19285 | + | |
19286 | + raw_spin_unlock_irq(&cpu_base->lock); | |
19287 | + restart = fn(timer); | |
19288 | + raw_spin_lock_irq(&cpu_base->lock); | |
19289 | + | |
19290 | + hrtimer_rt_reprogram(restart, timer, base); | |
19291 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
19292 | + | |
19293 | + WARN_ON_ONCE(cpu_base->running_soft != timer); | |
19294 | + cpu_base->running_soft = NULL; | |
19295 | + } | |
19296 | + } | |
19297 | + | |
19298 | + raw_spin_unlock_irq(&cpu_base->lock); | |
19299 | + | |
19300 | + wake_up_timer_waiters(cpu_base); | |
19301 | +} | |
19302 | + | |
19303 | +static int hrtimer_rt_defer(struct hrtimer *timer) | |
19304 | +{ | |
19305 | + if (timer->irqsafe) | |
19306 | + return 0; | |
19307 | + | |
19308 | + __remove_hrtimer(timer, timer->base, timer->state, 0); | |
19309 | + list_add_tail(&timer->cb_entry, &timer->base->expired); | |
19310 | + return 1; | |
19311 | +} | |
19312 | + | |
19313 | +#else | |
19314 | + | |
19315 | +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } | |
19316 | + | |
19317 | +#endif | |
19318 | + | |
19319 | +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); | |
19320 | + | |
19321 | static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
19322 | { | |
19323 | struct hrtimer_clock_base *base = cpu_base->clock_base; | |
19324 | unsigned int active = cpu_base->active_bases; | |
19325 | + int raise = 0; | |
19326 | ||
19327 | for (; active; base++, active >>= 1) { | |
19328 | struct timerqueue_node *node; | |
19329 | @@ -1284,6 +1450,15 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
19330 | ||
19331 | timer = container_of(node, struct hrtimer, node); | |
19332 | ||
19333 | + trace_hrtimer_interrupt(raw_smp_processor_id(), | |
19334 | + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ? | |
19335 | + timer->praecox : hrtimer_get_expires(timer), | |
19336 | + basenow)), | |
19337 | + current, | |
19338 | + timer->function == hrtimer_wakeup ? | |
19339 | + container_of(timer, struct hrtimer_sleeper, | |
19340 | + timer)->task : NULL); | |
19341 | + | |
19342 | /* | |
19343 | * The immediate goal for using the softexpires is | |
19344 | * minimizing wakeups, not running timers at the | |
19345 | @@ -1299,9 +1474,14 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
19346 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) | |
19347 | break; | |
19348 | ||
19349 | - __run_hrtimer(cpu_base, base, timer, &basenow); | |
19350 | + if (!hrtimer_rt_defer(timer)) | |
19351 | + __run_hrtimer(cpu_base, base, timer, &basenow); | |
19352 | + else | |
19353 | + raise = 1; | |
19354 | } | |
19355 | } | |
19356 | + if (raise) | |
19357 | + raise_softirq_irqoff(HRTIMER_SOFTIRQ); | |
19358 | } | |
19359 | ||
19360 | #ifdef CONFIG_HIGH_RES_TIMERS | |
19361 | @@ -1464,16 +1644,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) | |
19362 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |
19363 | { | |
19364 | sl->timer.function = hrtimer_wakeup; | |
19365 | + sl->timer.irqsafe = 1; | |
19366 | sl->task = task; | |
19367 | } | |
19368 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | |
19369 | ||
19370 | -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | |
19371 | +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, | |
19372 | + unsigned long state) | |
19373 | { | |
19374 | hrtimer_init_sleeper(t, current); | |
19375 | ||
19376 | do { | |
19377 | - set_current_state(TASK_INTERRUPTIBLE); | |
19378 | + set_current_state(state); | |
19379 | hrtimer_start_expires(&t->timer, mode); | |
19380 | ||
19381 | if (likely(t->task)) | |
19382 | @@ -1515,7 +1697,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
19383 | HRTIMER_MODE_ABS); | |
19384 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | |
19385 | ||
19386 | - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | |
19387 | + /* cpu_chill() does not care about restart state. */ | |
19388 | + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) | |
19389 | goto out; | |
19390 | ||
19391 | rmtp = restart->nanosleep.rmtp; | |
19392 | @@ -1532,8 +1715,10 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
19393 | return ret; | |
19394 | } | |
19395 | ||
19396 | -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
19397 | - const enum hrtimer_mode mode, const clockid_t clockid) | |
19398 | +static long | |
19399 | +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
19400 | + const enum hrtimer_mode mode, const clockid_t clockid, | |
19401 | + unsigned long state) | |
19402 | { | |
19403 | struct restart_block *restart; | |
19404 | struct hrtimer_sleeper t; | |
19405 | @@ -1546,7 +1731,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
19406 | ||
19407 | hrtimer_init_on_stack(&t.timer, clockid, mode); | |
19408 | hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); | |
19409 | - if (do_nanosleep(&t, mode)) | |
19410 | + if (do_nanosleep(&t, mode, state)) | |
19411 | goto out; | |
19412 | ||
19413 | /* Absolute timers do not update the rmtp value and restart: */ | |
19414 | @@ -1573,6 +1758,12 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
19415 | return ret; | |
19416 | } | |
19417 | ||
19418 | +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
19419 | + const enum hrtimer_mode mode, const clockid_t clockid) | |
19420 | +{ | |
19421 | + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); | |
19422 | +} | |
19423 | + | |
19424 | SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
19425 | struct timespec __user *, rmtp) | |
19426 | { | |
19427 | @@ -1587,6 +1778,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
19428 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); | |
19429 | } | |
19430 | ||
19431 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19432 | +/* | |
19433 | + * Sleep for 1 ms in hope whoever holds what we want will let it go. | |
19434 | + */ | |
19435 | +void cpu_chill(void) | |
19436 | +{ | |
19437 | + struct timespec tu = { | |
19438 | + .tv_nsec = NSEC_PER_MSEC, | |
19439 | + }; | |
19440 | + unsigned int freeze_flag = current->flags & PF_NOFREEZE; | |
19441 | + | |
19442 | + current->flags |= PF_NOFREEZE; | |
19443 | + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC, | |
19444 | + TASK_UNINTERRUPTIBLE); | |
19445 | + if (!freeze_flag) | |
19446 | + current->flags &= ~PF_NOFREEZE; | |
19447 | +} | |
19448 | +EXPORT_SYMBOL(cpu_chill); | |
19449 | +#endif | |
19450 | + | |
19451 | /* | |
19452 | * Functions related to boot-time initialization: | |
19453 | */ | |
19454 | @@ -1598,10 +1809,14 @@ int hrtimers_prepare_cpu(unsigned int cpu) | |
19455 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | |
19456 | cpu_base->clock_base[i].cpu_base = cpu_base; | |
19457 | timerqueue_init_head(&cpu_base->clock_base[i].active); | |
19458 | + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); | |
19459 | } | |
19460 | ||
19461 | cpu_base->cpu = cpu; | |
19462 | hrtimer_init_hres(cpu_base); | |
19463 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
19464 | + init_waitqueue_head(&cpu_base->wait); | |
19465 | +#endif | |
19466 | return 0; | |
19467 | } | |
19468 | ||
19469 | @@ -1671,9 +1886,26 @@ int hrtimers_dead_cpu(unsigned int scpu) | |
19470 | ||
19471 | #endif /* CONFIG_HOTPLUG_CPU */ | |
19472 | ||
19473 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
19474 | + | |
19475 | +static void run_hrtimer_softirq(struct softirq_action *h) | |
19476 | +{ | |
19477 | + hrtimer_rt_run_pending(); | |
19478 | +} | |
19479 | + | |
19480 | +static void hrtimers_open_softirq(void) | |
19481 | +{ | |
19482 | + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); | |
19483 | +} | |
19484 | + | |
19485 | +#else | |
19486 | +static void hrtimers_open_softirq(void) { } | |
19487 | +#endif | |
19488 | + | |
19489 | void __init hrtimers_init(void) | |
19490 | { | |
19491 | hrtimers_prepare_cpu(smp_processor_id()); | |
19492 | + hrtimers_open_softirq(); | |
19493 | } | |
19494 | ||
19495 | /** | |
19496 | diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c | |
19497 | index 1d5c7204ddc9..184de6751180 100644 | |
19498 | --- a/kernel/time/itimer.c | |
19499 | +++ b/kernel/time/itimer.c | |
19500 | @@ -213,6 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) | |
19501 | /* We are sharing ->siglock with it_real_fn() */ | |
19502 | if (hrtimer_try_to_cancel(timer) < 0) { | |
19503 | spin_unlock_irq(&tsk->sighand->siglock); | |
19504 | + hrtimer_wait_for_timer(&tsk->signal->real_timer); | |
19505 | goto again; | |
19506 | } | |
19507 | expires = timeval_to_ktime(value->it_value); | |
19508 | diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c | |
19509 | index 555e21f7b966..a5d6435fabbb 100644 | |
19510 | --- a/kernel/time/jiffies.c | |
19511 | +++ b/kernel/time/jiffies.c | |
19512 | @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = { | |
19513 | .max_cycles = 10, | |
19514 | }; | |
19515 | ||
19516 | -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); | |
19517 | +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); | |
19518 | +__cacheline_aligned_in_smp seqcount_t jiffies_seq; | |
19519 | ||
19520 | #if (BITS_PER_LONG < 64) | |
19521 | u64 get_jiffies_64(void) | |
19522 | @@ -83,9 +84,9 @@ u64 get_jiffies_64(void) | |
19523 | u64 ret; | |
19524 | ||
19525 | do { | |
19526 | - seq = read_seqbegin(&jiffies_lock); | |
19527 | + seq = read_seqcount_begin(&jiffies_seq); | |
19528 | ret = jiffies_64; | |
19529 | - } while (read_seqretry(&jiffies_lock, seq)); | |
19530 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
19531 | return ret; | |
19532 | } | |
19533 | EXPORT_SYMBOL(get_jiffies_64); | |
19534 | diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c | |
19535 | index 6df8927c58a5..05b7391bf9bd 100644 | |
19536 | --- a/kernel/time/ntp.c | |
19537 | +++ b/kernel/time/ntp.c | |
19538 | @@ -17,6 +17,7 @@ | |
19539 | #include <linux/module.h> | |
19540 | #include <linux/rtc.h> | |
19541 | #include <linux/math64.h> | |
19542 | +#include <linux/swork.h> | |
19543 | ||
19544 | #include "ntp_internal.h" | |
19545 | #include "timekeeping_internal.h" | |
19546 | @@ -568,10 +569,35 @@ static void sync_cmos_clock(struct work_struct *work) | |
19547 | &sync_cmos_work, timespec64_to_jiffies(&next)); | |
19548 | } | |
19549 | ||
19550 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19551 | + | |
19552 | +static void run_clock_set_delay(struct swork_event *event) | |
19553 | +{ | |
19554 | + queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
19555 | +} | |
19556 | + | |
19557 | +static struct swork_event ntp_cmos_swork; | |
19558 | + | |
19559 | +void ntp_notify_cmos_timer(void) | |
19560 | +{ | |
19561 | + swork_queue(&ntp_cmos_swork); | |
19562 | +} | |
19563 | + | |
19564 | +static __init int create_cmos_delay_thread(void) | |
19565 | +{ | |
19566 | + WARN_ON(swork_get()); | |
19567 | + INIT_SWORK(&ntp_cmos_swork, run_clock_set_delay); | |
19568 | + return 0; | |
19569 | +} | |
19570 | +early_initcall(create_cmos_delay_thread); | |
19571 | + | |
19572 | +#else | |
19573 | + | |
19574 | void ntp_notify_cmos_timer(void) | |
19575 | { | |
19576 | queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
19577 | } | |
19578 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
19579 | ||
19580 | #else | |
19581 | void ntp_notify_cmos_timer(void) { } | |
19582 | diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c | |
19583 | index 39008d78927a..633f4eaca9e7 100644 | |
19584 | --- a/kernel/time/posix-cpu-timers.c | |
19585 | +++ b/kernel/time/posix-cpu-timers.c | |
19586 | @@ -3,6 +3,7 @@ | |
19587 | */ | |
19588 | ||
19589 | #include <linux/sched.h> | |
19590 | +#include <linux/sched/rt.h> | |
19591 | #include <linux/posix-timers.h> | |
19592 | #include <linux/errno.h> | |
19593 | #include <linux/math64.h> | |
19594 | @@ -620,7 +621,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |
19595 | /* | |
19596 | * Disarm any old timer after extracting its expiry time. | |
19597 | */ | |
19598 | - WARN_ON_ONCE(!irqs_disabled()); | |
19599 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
19600 | ||
19601 | ret = 0; | |
19602 | old_incr = timer->it.cpu.incr; | |
19603 | @@ -1064,7 +1065,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |
19604 | /* | |
19605 | * Now re-arm for the new expiry time. | |
19606 | */ | |
19607 | - WARN_ON_ONCE(!irqs_disabled()); | |
19608 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
19609 | arm_timer(timer); | |
19610 | unlock_task_sighand(p, &flags); | |
19611 | ||
19612 | @@ -1153,13 +1154,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |
19613 | * already updated our counts. We need to check if any timers fire now. | |
19614 | * Interrupts are disabled. | |
19615 | */ | |
19616 | -void run_posix_cpu_timers(struct task_struct *tsk) | |
19617 | +static void __run_posix_cpu_timers(struct task_struct *tsk) | |
19618 | { | |
19619 | LIST_HEAD(firing); | |
19620 | struct k_itimer *timer, *next; | |
19621 | unsigned long flags; | |
19622 | ||
19623 | - WARN_ON_ONCE(!irqs_disabled()); | |
19624 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
19625 | ||
19626 | /* | |
19627 | * The fast path checks that there are no expired thread or thread | |
19628 | @@ -1213,6 +1214,190 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |
19629 | } | |
19630 | } | |
19631 | ||
19632 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
19633 | +#include <linux/kthread.h> | |
19634 | +#include <linux/cpu.h> | |
19635 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); | |
19636 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); | |
19637 | + | |
19638 | +static int posix_cpu_timers_thread(void *data) | |
19639 | +{ | |
19640 | + int cpu = (long)data; | |
19641 | + | |
19642 | + BUG_ON(per_cpu(posix_timer_task,cpu) != current); | |
19643 | + | |
19644 | + while (!kthread_should_stop()) { | |
19645 | + struct task_struct *tsk = NULL; | |
19646 | + struct task_struct *next = NULL; | |
19647 | + | |
19648 | + if (cpu_is_offline(cpu)) | |
19649 | + goto wait_to_die; | |
19650 | + | |
19651 | + /* grab task list */ | |
19652 | + raw_local_irq_disable(); | |
19653 | + tsk = per_cpu(posix_timer_tasklist, cpu); | |
19654 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
19655 | + raw_local_irq_enable(); | |
19656 | + | |
19657 | + /* its possible the list is empty, just return */ | |
19658 | + if (!tsk) { | |
19659 | + set_current_state(TASK_INTERRUPTIBLE); | |
19660 | + schedule(); | |
19661 | + __set_current_state(TASK_RUNNING); | |
19662 | + continue; | |
19663 | + } | |
19664 | + | |
19665 | + /* Process task list */ | |
19666 | + while (1) { | |
19667 | + /* save next */ | |
19668 | + next = tsk->posix_timer_list; | |
19669 | + | |
19670 | + /* run the task timers, clear its ptr and | |
19671 | + * unreference it | |
19672 | + */ | |
19673 | + __run_posix_cpu_timers(tsk); | |
19674 | + tsk->posix_timer_list = NULL; | |
19675 | + put_task_struct(tsk); | |
19676 | + | |
19677 | + /* check if this is the last on the list */ | |
19678 | + if (next == tsk) | |
19679 | + break; | |
19680 | + tsk = next; | |
19681 | + } | |
19682 | + } | |
19683 | + return 0; | |
19684 | + | |
19685 | +wait_to_die: | |
19686 | + /* Wait for kthread_stop */ | |
19687 | + set_current_state(TASK_INTERRUPTIBLE); | |
19688 | + while (!kthread_should_stop()) { | |
19689 | + schedule(); | |
19690 | + set_current_state(TASK_INTERRUPTIBLE); | |
19691 | + } | |
19692 | + __set_current_state(TASK_RUNNING); | |
19693 | + return 0; | |
19694 | +} | |
19695 | + | |
19696 | +static inline int __fastpath_timer_check(struct task_struct *tsk) | |
19697 | +{ | |
19698 | + /* tsk == current, ensure it is safe to use ->signal/sighand */ | |
19699 | + if (unlikely(tsk->exit_state)) | |
19700 | + return 0; | |
19701 | + | |
19702 | + if (!task_cputime_zero(&tsk->cputime_expires)) | |
19703 | + return 1; | |
19704 | + | |
19705 | + if (!task_cputime_zero(&tsk->signal->cputime_expires)) | |
19706 | + return 1; | |
19707 | + | |
19708 | + return 0; | |
19709 | +} | |
19710 | + | |
19711 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
19712 | +{ | |
19713 | + unsigned long cpu = smp_processor_id(); | |
19714 | + struct task_struct *tasklist; | |
19715 | + | |
19716 | + BUG_ON(!irqs_disabled()); | |
19717 | + if(!per_cpu(posix_timer_task, cpu)) | |
19718 | + return; | |
19719 | + /* get per-cpu references */ | |
19720 | + tasklist = per_cpu(posix_timer_tasklist, cpu); | |
19721 | + | |
19722 | + /* check to see if we're already queued */ | |
19723 | + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { | |
19724 | + get_task_struct(tsk); | |
19725 | + if (tasklist) { | |
19726 | + tsk->posix_timer_list = tasklist; | |
19727 | + } else { | |
19728 | + /* | |
19729 | + * The list is terminated by a self-pointing | |
19730 | + * task_struct | |
19731 | + */ | |
19732 | + tsk->posix_timer_list = tsk; | |
19733 | + } | |
19734 | + per_cpu(posix_timer_tasklist, cpu) = tsk; | |
19735 | + | |
19736 | + wake_up_process(per_cpu(posix_timer_task, cpu)); | |
19737 | + } | |
19738 | +} | |
19739 | + | |
19740 | +/* | |
19741 | + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. | |
19742 | + * Here we can start up the necessary migration thread for the new CPU. | |
19743 | + */ | |
19744 | +static int posix_cpu_thread_call(struct notifier_block *nfb, | |
19745 | + unsigned long action, void *hcpu) | |
19746 | +{ | |
19747 | + int cpu = (long)hcpu; | |
19748 | + struct task_struct *p; | |
19749 | + struct sched_param param; | |
19750 | + | |
19751 | + switch (action) { | |
19752 | + case CPU_UP_PREPARE: | |
19753 | + p = kthread_create(posix_cpu_timers_thread, hcpu, | |
19754 | + "posixcputmr/%d",cpu); | |
19755 | + if (IS_ERR(p)) | |
19756 | + return NOTIFY_BAD; | |
19757 | + p->flags |= PF_NOFREEZE; | |
19758 | + kthread_bind(p, cpu); | |
19759 | + /* Must be high prio to avoid getting starved */ | |
19760 | + param.sched_priority = MAX_RT_PRIO-1; | |
19761 | + sched_setscheduler(p, SCHED_FIFO, ¶m); | |
19762 | + per_cpu(posix_timer_task,cpu) = p; | |
19763 | + break; | |
19764 | + case CPU_ONLINE: | |
19765 | + /* Strictly unneccessary, as first user will wake it. */ | |
19766 | + wake_up_process(per_cpu(posix_timer_task,cpu)); | |
19767 | + break; | |
19768 | +#ifdef CONFIG_HOTPLUG_CPU | |
19769 | + case CPU_UP_CANCELED: | |
19770 | + /* Unbind it from offline cpu so it can run. Fall thru. */ | |
19771 | + kthread_bind(per_cpu(posix_timer_task, cpu), | |
19772 | + cpumask_any(cpu_online_mask)); | |
19773 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
19774 | + per_cpu(posix_timer_task,cpu) = NULL; | |
19775 | + break; | |
19776 | + case CPU_DEAD: | |
19777 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
19778 | + per_cpu(posix_timer_task,cpu) = NULL; | |
19779 | + break; | |
19780 | +#endif | |
19781 | + } | |
19782 | + return NOTIFY_OK; | |
19783 | +} | |
19784 | + | |
19785 | +/* Register at highest priority so that task migration (migrate_all_tasks) | |
19786 | + * happens before everything else. | |
19787 | + */ | |
19788 | +static struct notifier_block posix_cpu_thread_notifier = { | |
19789 | + .notifier_call = posix_cpu_thread_call, | |
19790 | + .priority = 10 | |
19791 | +}; | |
19792 | + | |
19793 | +static int __init posix_cpu_thread_init(void) | |
19794 | +{ | |
19795 | + void *hcpu = (void *)(long)smp_processor_id(); | |
19796 | + /* Start one for boot CPU. */ | |
19797 | + unsigned long cpu; | |
19798 | + | |
19799 | + /* init the per-cpu posix_timer_tasklets */ | |
19800 | + for_each_possible_cpu(cpu) | |
19801 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
19802 | + | |
19803 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu); | |
19804 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu); | |
19805 | + register_cpu_notifier(&posix_cpu_thread_notifier); | |
19806 | + return 0; | |
19807 | +} | |
19808 | +early_initcall(posix_cpu_thread_init); | |
19809 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
19810 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
19811 | +{ | |
19812 | + __run_posix_cpu_timers(tsk); | |
19813 | +} | |
19814 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
19815 | + | |
19816 | /* | |
19817 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | |
19818 | * The tsk->sighand->siglock must be held by the caller. | |
19819 | diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c | |
19820 | index f2826c35e918..464a98155a0e 100644 | |
19821 | --- a/kernel/time/posix-timers.c | |
19822 | +++ b/kernel/time/posix-timers.c | |
19823 | @@ -506,6 +506,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) | |
19824 | static struct pid *good_sigevent(sigevent_t * event) | |
19825 | { | |
19826 | struct task_struct *rtn = current->group_leader; | |
19827 | + int sig = event->sigev_signo; | |
19828 | ||
19829 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && | |
19830 | (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || | |
19831 | @@ -514,7 +515,8 @@ static struct pid *good_sigevent(sigevent_t * event) | |
19832 | return NULL; | |
19833 | ||
19834 | if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && | |
19835 | - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) | |
19836 | + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || | |
19837 | + sig_kernel_coredump(sig))) | |
19838 | return NULL; | |
19839 | ||
19840 | return task_pid(rtn); | |
19841 | @@ -826,6 +828,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) | |
19842 | return overrun; | |
19843 | } | |
19844 | ||
19845 | +/* | |
19846 | + * Protected by RCU! | |
19847 | + */ | |
19848 | +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr) | |
19849 | +{ | |
19850 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19851 | + if (kc->timer_set == common_timer_set) | |
19852 | + hrtimer_wait_for_timer(&timr->it.real.timer); | |
19853 | + else | |
19854 | + /* FIXME: Whacky hack for posix-cpu-timers */ | |
19855 | + schedule_timeout(1); | |
19856 | +#endif | |
19857 | +} | |
19858 | + | |
19859 | /* Set a POSIX.1b interval timer. */ | |
19860 | /* timr->it_lock is taken. */ | |
19861 | static int | |
19862 | @@ -903,6 +919,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
19863 | if (!timr) | |
19864 | return -EINVAL; | |
19865 | ||
19866 | + rcu_read_lock(); | |
19867 | kc = clockid_to_kclock(timr->it_clock); | |
19868 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) | |
19869 | error = -EINVAL; | |
19870 | @@ -911,9 +928,12 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, | |
19871 | ||
19872 | unlock_timer(timr, flag); | |
19873 | if (error == TIMER_RETRY) { | |
19874 | + timer_wait_for_callback(kc, timr); | |
19875 | rtn = NULL; // We already got the old time... | |
19876 | + rcu_read_unlock(); | |
19877 | goto retry; | |
19878 | } | |
19879 | + rcu_read_unlock(); | |
19880 | ||
19881 | if (old_setting && !error && | |
19882 | copy_to_user(old_setting, &old_spec, sizeof (old_spec))) | |
19883 | @@ -951,10 +971,15 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) | |
19884 | if (!timer) | |
19885 | return -EINVAL; | |
19886 | ||
19887 | + rcu_read_lock(); | |
19888 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
19889 | unlock_timer(timer, flags); | |
19890 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
19891 | + timer); | |
19892 | + rcu_read_unlock(); | |
19893 | goto retry_delete; | |
19894 | } | |
19895 | + rcu_read_unlock(); | |
19896 | ||
19897 | spin_lock(¤t->sighand->siglock); | |
19898 | list_del(&timer->list); | |
19899 | @@ -980,8 +1005,18 @@ static void itimer_delete(struct k_itimer *timer) | |
19900 | retry_delete: | |
19901 | spin_lock_irqsave(&timer->it_lock, flags); | |
19902 | ||
19903 | - if (timer_delete_hook(timer) == TIMER_RETRY) { | |
19904 | + /* On RT we can race with a deletion */ | |
19905 | + if (!timer->it_signal) { | |
19906 | unlock_timer(timer, flags); | |
19907 | + return; | |
19908 | + } | |
19909 | + | |
19910 | + if (timer_delete_hook(timer) == TIMER_RETRY) { | |
19911 | + rcu_read_lock(); | |
19912 | + unlock_timer(timer, flags); | |
19913 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
19914 | + timer); | |
19915 | + rcu_read_unlock(); | |
19916 | goto retry_delete; | |
19917 | } | |
19918 | list_del(&timer->list); | |
19919 | diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c | |
19920 | index 690b797f522e..fe8ba1619879 100644 | |
19921 | --- a/kernel/time/tick-broadcast-hrtimer.c | |
19922 | +++ b/kernel/time/tick-broadcast-hrtimer.c | |
19923 | @@ -107,5 +107,6 @@ void tick_setup_hrtimer_broadcast(void) | |
19924 | { | |
19925 | hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
19926 | bctimer.function = bc_handler; | |
19927 | + bctimer.irqsafe = true; | |
19928 | clockevents_register_device(&ce_broadcast_hrtimer); | |
19929 | } | |
19930 | diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c | |
19931 | index 4fcd99e12aa0..5a47f2e98faf 100644 | |
19932 | --- a/kernel/time/tick-common.c | |
19933 | +++ b/kernel/time/tick-common.c | |
19934 | @@ -79,13 +79,15 @@ int tick_is_oneshot_available(void) | |
19935 | static void tick_periodic(int cpu) | |
19936 | { | |
19937 | if (tick_do_timer_cpu == cpu) { | |
19938 | - write_seqlock(&jiffies_lock); | |
19939 | + raw_spin_lock(&jiffies_lock); | |
19940 | + write_seqcount_begin(&jiffies_seq); | |
19941 | ||
19942 | /* Keep track of the next tick event */ | |
19943 | tick_next_period = ktime_add(tick_next_period, tick_period); | |
19944 | ||
19945 | do_timer(1); | |
19946 | - write_sequnlock(&jiffies_lock); | |
19947 | + write_seqcount_end(&jiffies_seq); | |
19948 | + raw_spin_unlock(&jiffies_lock); | |
19949 | update_wall_time(); | |
19950 | } | |
19951 | ||
19952 | @@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | |
19953 | ktime_t next; | |
19954 | ||
19955 | do { | |
19956 | - seq = read_seqbegin(&jiffies_lock); | |
19957 | + seq = read_seqcount_begin(&jiffies_seq); | |
19958 | next = tick_next_period; | |
19959 | - } while (read_seqretry(&jiffies_lock, seq)); | |
19960 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
19961 | ||
19962 | clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); | |
19963 | ||
19964 | diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c | |
19965 | index 2ec7c00228f3..c1b30b8c671a 100644 | |
19966 | --- a/kernel/time/tick-sched.c | |
19967 | +++ b/kernel/time/tick-sched.c | |
19968 | @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now) | |
19969 | return; | |
19970 | ||
19971 | /* Reevaluate with jiffies_lock held */ | |
19972 | - write_seqlock(&jiffies_lock); | |
19973 | + raw_spin_lock(&jiffies_lock); | |
19974 | + write_seqcount_begin(&jiffies_seq); | |
19975 | ||
19976 | delta = ktime_sub(now, last_jiffies_update); | |
19977 | if (delta.tv64 >= tick_period.tv64) { | |
19978 | @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now) | |
19979 | /* Keep the tick_next_period variable up to date */ | |
19980 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | |
19981 | } else { | |
19982 | - write_sequnlock(&jiffies_lock); | |
19983 | + write_seqcount_end(&jiffies_seq); | |
19984 | + raw_spin_unlock(&jiffies_lock); | |
19985 | return; | |
19986 | } | |
19987 | - write_sequnlock(&jiffies_lock); | |
19988 | + write_seqcount_end(&jiffies_seq); | |
19989 | + raw_spin_unlock(&jiffies_lock); | |
19990 | update_wall_time(); | |
19991 | } | |
19992 | ||
19993 | @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void) | |
19994 | { | |
19995 | ktime_t period; | |
19996 | ||
19997 | - write_seqlock(&jiffies_lock); | |
19998 | + raw_spin_lock(&jiffies_lock); | |
19999 | + write_seqcount_begin(&jiffies_seq); | |
20000 | /* Did we start the jiffies update yet ? */ | |
20001 | if (last_jiffies_update.tv64 == 0) | |
20002 | last_jiffies_update = tick_next_period; | |
20003 | period = last_jiffies_update; | |
20004 | - write_sequnlock(&jiffies_lock); | |
20005 | + write_seqcount_end(&jiffies_seq); | |
20006 | + raw_spin_unlock(&jiffies_lock); | |
20007 | return period; | |
20008 | } | |
20009 | ||
20010 | @@ -212,6 +217,7 @@ static void nohz_full_kick_func(struct irq_work *work) | |
20011 | ||
20012 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |
20013 | .func = nohz_full_kick_func, | |
20014 | + .flags = IRQ_WORK_HARD_IRQ, | |
20015 | }; | |
20016 | ||
20017 | /* | |
20018 | @@ -670,10 +676,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |
20019 | ||
20020 | /* Read jiffies and the time when jiffies were updated last */ | |
20021 | do { | |
20022 | - seq = read_seqbegin(&jiffies_lock); | |
20023 | + seq = read_seqcount_begin(&jiffies_seq); | |
20024 | basemono = last_jiffies_update.tv64; | |
20025 | basejiff = jiffies; | |
20026 | - } while (read_seqretry(&jiffies_lock, seq)); | |
20027 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
20028 | ts->last_jiffies = basejiff; | |
20029 | ||
20030 | if (rcu_needs_cpu(basemono, &next_rcu) || | |
20031 | @@ -874,14 +880,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |
20032 | return false; | |
20033 | ||
20034 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | |
20035 | - static int ratelimit; | |
20036 | - | |
20037 | - if (ratelimit < 10 && | |
20038 | - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
20039 | - pr_warn("NOHZ: local_softirq_pending %02x\n", | |
20040 | - (unsigned int) local_softirq_pending()); | |
20041 | - ratelimit++; | |
20042 | - } | |
20043 | + softirq_check_pending_idle(); | |
20044 | return false; | |
20045 | } | |
20046 | ||
20047 | @@ -1190,6 +1189,7 @@ void tick_setup_sched_timer(void) | |
20048 | * Emulate tick processing via per-CPU hrtimers: | |
20049 | */ | |
20050 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
20051 | + ts->sched_timer.irqsafe = 1; | |
20052 | ts->sched_timer.function = tick_sched_timer; | |
20053 | ||
20054 | /* Get the next period (per-CPU) */ | |
20055 | diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c | |
20056 | index 37dec7e3db43..fa8d90d2acc3 100644 | |
20057 | --- a/kernel/time/timekeeping.c | |
20058 | +++ b/kernel/time/timekeeping.c | |
20059 | @@ -2328,8 +2328,10 @@ EXPORT_SYMBOL(hardpps); | |
20060 | */ | |
20061 | void xtime_update(unsigned long ticks) | |
20062 | { | |
20063 | - write_seqlock(&jiffies_lock); | |
20064 | + raw_spin_lock(&jiffies_lock); | |
20065 | + write_seqcount_begin(&jiffies_seq); | |
20066 | do_timer(ticks); | |
20067 | - write_sequnlock(&jiffies_lock); | |
20068 | + write_seqcount_end(&jiffies_seq); | |
20069 | + raw_spin_unlock(&jiffies_lock); | |
20070 | update_wall_time(); | |
20071 | } | |
20072 | diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h | |
20073 | index 704f595ce83f..763a3e5121ff 100644 | |
20074 | --- a/kernel/time/timekeeping.h | |
20075 | +++ b/kernel/time/timekeeping.h | |
20076 | @@ -19,7 +19,8 @@ extern void timekeeping_resume(void); | |
20077 | extern void do_timer(unsigned long ticks); | |
20078 | extern void update_wall_time(void); | |
20079 | ||
20080 | -extern seqlock_t jiffies_lock; | |
20081 | +extern raw_spinlock_t jiffies_lock; | |
20082 | +extern seqcount_t jiffies_seq; | |
20083 | ||
20084 | #define CS_NAME_LEN 32 | |
20085 | ||
20086 | diff --git a/kernel/time/timer.c b/kernel/time/timer.c | |
20087 | index 32bf6f75a8fe..ba53447a03f5 100644 | |
20088 | --- a/kernel/time/timer.c | |
20089 | +++ b/kernel/time/timer.c | |
20090 | @@ -193,8 +193,11 @@ EXPORT_SYMBOL(jiffies_64); | |
20091 | #endif | |
20092 | ||
20093 | struct timer_base { | |
20094 | - spinlock_t lock; | |
20095 | + raw_spinlock_t lock; | |
20096 | struct timer_list *running_timer; | |
20097 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20098 | + struct swait_queue_head wait_for_running_timer; | |
20099 | +#endif | |
20100 | unsigned long clk; | |
20101 | unsigned long next_expiry; | |
20102 | unsigned int cpu; | |
20103 | @@ -947,10 +950,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, | |
20104 | ||
20105 | if (!(tf & TIMER_MIGRATING)) { | |
20106 | base = get_timer_base(tf); | |
20107 | - spin_lock_irqsave(&base->lock, *flags); | |
20108 | + raw_spin_lock_irqsave(&base->lock, *flags); | |
20109 | if (timer->flags == tf) | |
20110 | return base; | |
20111 | - spin_unlock_irqrestore(&base->lock, *flags); | |
20112 | + raw_spin_unlock_irqrestore(&base->lock, *flags); | |
20113 | } | |
20114 | cpu_relax(); | |
20115 | } | |
20116 | @@ -1017,9 +1020,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |
20117 | /* See the comment in lock_timer_base() */ | |
20118 | timer->flags |= TIMER_MIGRATING; | |
20119 | ||
20120 | - spin_unlock(&base->lock); | |
20121 | + raw_spin_unlock(&base->lock); | |
20122 | base = new_base; | |
20123 | - spin_lock(&base->lock); | |
20124 | + raw_spin_lock(&base->lock); | |
20125 | WRITE_ONCE(timer->flags, | |
20126 | (timer->flags & ~TIMER_BASEMASK) | base->cpu); | |
20127 | } | |
20128 | @@ -1040,7 +1043,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |
20129 | } | |
20130 | ||
20131 | out_unlock: | |
20132 | - spin_unlock_irqrestore(&base->lock, flags); | |
20133 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
20134 | ||
20135 | return ret; | |
20136 | } | |
20137 | @@ -1134,19 +1137,46 @@ void add_timer_on(struct timer_list *timer, int cpu) | |
20138 | if (base != new_base) { | |
20139 | timer->flags |= TIMER_MIGRATING; | |
20140 | ||
20141 | - spin_unlock(&base->lock); | |
20142 | + raw_spin_unlock(&base->lock); | |
20143 | base = new_base; | |
20144 | - spin_lock(&base->lock); | |
20145 | + raw_spin_lock(&base->lock); | |
20146 | WRITE_ONCE(timer->flags, | |
20147 | (timer->flags & ~TIMER_BASEMASK) | cpu); | |
20148 | } | |
20149 | ||
20150 | debug_activate(timer, timer->expires); | |
20151 | internal_add_timer(base, timer); | |
20152 | - spin_unlock_irqrestore(&base->lock, flags); | |
20153 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
20154 | } | |
20155 | EXPORT_SYMBOL_GPL(add_timer_on); | |
20156 | ||
20157 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20158 | +/* | |
20159 | + * Wait for a running timer | |
20160 | + */ | |
20161 | +static void wait_for_running_timer(struct timer_list *timer) | |
20162 | +{ | |
20163 | + struct timer_base *base; | |
20164 | + u32 tf = timer->flags; | |
20165 | + | |
20166 | + if (tf & TIMER_MIGRATING) | |
20167 | + return; | |
20168 | + | |
20169 | + base = get_timer_base(tf); | |
20170 | + swait_event(base->wait_for_running_timer, | |
20171 | + base->running_timer != timer); | |
20172 | +} | |
20173 | + | |
20174 | +# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) | |
20175 | +#else | |
20176 | +static inline void wait_for_running_timer(struct timer_list *timer) | |
20177 | +{ | |
20178 | + cpu_relax(); | |
20179 | +} | |
20180 | + | |
20181 | +# define wakeup_timer_waiters(b) do { } while (0) | |
20182 | +#endif | |
20183 | + | |
20184 | /** | |
20185 | * del_timer - deactive a timer. | |
20186 | * @timer: the timer to be deactivated | |
20187 | @@ -1170,7 +1200,7 @@ int del_timer(struct timer_list *timer) | |
20188 | if (timer_pending(timer)) { | |
20189 | base = lock_timer_base(timer, &flags); | |
20190 | ret = detach_if_pending(timer, base, true); | |
20191 | - spin_unlock_irqrestore(&base->lock, flags); | |
20192 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
20193 | } | |
20194 | ||
20195 | return ret; | |
20196 | @@ -1198,13 +1228,13 @@ int try_to_del_timer_sync(struct timer_list *timer) | |
20197 | timer_stats_timer_clear_start_info(timer); | |
20198 | ret = detach_if_pending(timer, base, true); | |
20199 | } | |
20200 | - spin_unlock_irqrestore(&base->lock, flags); | |
20201 | + raw_spin_unlock_irqrestore(&base->lock, flags); | |
20202 | ||
20203 | return ret; | |
20204 | } | |
20205 | EXPORT_SYMBOL(try_to_del_timer_sync); | |
20206 | ||
20207 | -#ifdef CONFIG_SMP | |
20208 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
20209 | /** | |
20210 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | |
20211 | * @timer: the timer to be deactivated | |
20212 | @@ -1264,7 +1294,7 @@ int del_timer_sync(struct timer_list *timer) | |
20213 | int ret = try_to_del_timer_sync(timer); | |
20214 | if (ret >= 0) | |
20215 | return ret; | |
20216 | - cpu_relax(); | |
20217 | + wait_for_running_timer(timer); | |
20218 | } | |
20219 | } | |
20220 | EXPORT_SYMBOL(del_timer_sync); | |
20221 | @@ -1329,14 +1359,17 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) | |
20222 | fn = timer->function; | |
20223 | data = timer->data; | |
20224 | ||
20225 | - if (timer->flags & TIMER_IRQSAFE) { | |
20226 | - spin_unlock(&base->lock); | |
20227 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && | |
20228 | + timer->flags & TIMER_IRQSAFE) { | |
20229 | + raw_spin_unlock(&base->lock); | |
20230 | call_timer_fn(timer, fn, data); | |
20231 | - spin_lock(&base->lock); | |
20232 | + base->running_timer = NULL; | |
20233 | + raw_spin_lock(&base->lock); | |
20234 | } else { | |
20235 | - spin_unlock_irq(&base->lock); | |
20236 | + raw_spin_unlock_irq(&base->lock); | |
20237 | call_timer_fn(timer, fn, data); | |
20238 | - spin_lock_irq(&base->lock); | |
20239 | + base->running_timer = NULL; | |
20240 | + raw_spin_lock_irq(&base->lock); | |
20241 | } | |
20242 | } | |
20243 | } | |
20244 | @@ -1505,7 +1538,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |
20245 | if (cpu_is_offline(smp_processor_id())) | |
20246 | return expires; | |
20247 | ||
20248 | - spin_lock(&base->lock); | |
20249 | + raw_spin_lock(&base->lock); | |
20250 | nextevt = __next_timer_interrupt(base); | |
20251 | is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); | |
20252 | base->next_expiry = nextevt; | |
20253 | @@ -1529,7 +1562,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |
20254 | if ((expires - basem) > TICK_NSEC) | |
20255 | base->is_idle = true; | |
20256 | } | |
20257 | - spin_unlock(&base->lock); | |
20258 | + raw_spin_unlock(&base->lock); | |
20259 | ||
20260 | return cmp_next_hrtimer_event(basem, expires); | |
20261 | } | |
20262 | @@ -1594,13 +1627,13 @@ void update_process_times(int user_tick) | |
20263 | ||
20264 | /* Note: this timer irq context must be accounted for as well. */ | |
20265 | account_process_tick(p, user_tick); | |
20266 | + scheduler_tick(); | |
20267 | run_local_timers(); | |
20268 | rcu_check_callbacks(user_tick); | |
20269 | -#ifdef CONFIG_IRQ_WORK | |
20270 | +#if defined(CONFIG_IRQ_WORK) | |
20271 | if (in_irq()) | |
20272 | irq_work_tick(); | |
20273 | #endif | |
20274 | - scheduler_tick(); | |
20275 | run_posix_cpu_timers(p); | |
20276 | } | |
20277 | ||
20278 | @@ -1616,7 +1649,7 @@ static inline void __run_timers(struct timer_base *base) | |
20279 | if (!time_after_eq(jiffies, base->clk)) | |
20280 | return; | |
20281 | ||
20282 | - spin_lock_irq(&base->lock); | |
20283 | + raw_spin_lock_irq(&base->lock); | |
20284 | ||
20285 | while (time_after_eq(jiffies, base->clk)) { | |
20286 | ||
20287 | @@ -1626,8 +1659,8 @@ static inline void __run_timers(struct timer_base *base) | |
20288 | while (levels--) | |
20289 | expire_timers(base, heads + levels); | |
20290 | } | |
20291 | - base->running_timer = NULL; | |
20292 | - spin_unlock_irq(&base->lock); | |
20293 | + raw_spin_unlock_irq(&base->lock); | |
20294 | + wakeup_timer_waiters(base); | |
20295 | } | |
20296 | ||
20297 | /* | |
20298 | @@ -1637,6 +1670,8 @@ static void run_timer_softirq(struct softirq_action *h) | |
20299 | { | |
20300 | struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); | |
20301 | ||
20302 | + irq_work_tick_soft(); | |
20303 | + | |
20304 | __run_timers(base); | |
20305 | if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) | |
20306 | __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); | |
20307 | @@ -1822,16 +1857,16 @@ int timers_dead_cpu(unsigned int cpu) | |
20308 | * The caller is globally serialized and nobody else | |
20309 | * takes two locks at once, deadlock is not possible. | |
20310 | */ | |
20311 | - spin_lock_irq(&new_base->lock); | |
20312 | - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
20313 | + raw_spin_lock_irq(&new_base->lock); | |
20314 | + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); | |
20315 | ||
20316 | BUG_ON(old_base->running_timer); | |
20317 | ||
20318 | for (i = 0; i < WHEEL_SIZE; i++) | |
20319 | migrate_timer_list(new_base, old_base->vectors + i); | |
20320 | ||
20321 | - spin_unlock(&old_base->lock); | |
20322 | - spin_unlock_irq(&new_base->lock); | |
20323 | + raw_spin_unlock(&old_base->lock); | |
20324 | + raw_spin_unlock_irq(&new_base->lock); | |
20325 | put_cpu_ptr(&timer_bases); | |
20326 | } | |
20327 | return 0; | |
20328 | @@ -1847,8 +1882,11 @@ static void __init init_timer_cpu(int cpu) | |
20329 | for (i = 0; i < NR_BASES; i++) { | |
20330 | base = per_cpu_ptr(&timer_bases[i], cpu); | |
20331 | base->cpu = cpu; | |
20332 | - spin_lock_init(&base->lock); | |
20333 | + raw_spin_lock_init(&base->lock); | |
20334 | base->clk = jiffies; | |
20335 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20336 | + init_swait_queue_head(&base->wait_for_running_timer); | |
20337 | +#endif | |
20338 | } | |
20339 | } | |
20340 | ||
20341 | diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig | |
20342 | index f4b86e8ca1e7..340f14eef24a 100644 | |
20343 | --- a/kernel/trace/Kconfig | |
20344 | +++ b/kernel/trace/Kconfig | |
20345 | @@ -187,6 +187,24 @@ config IRQSOFF_TRACER | |
20346 | enabled. This option and the preempt-off timing option can be | |
20347 | used together or separately.) | |
20348 | ||
20349 | +config INTERRUPT_OFF_HIST | |
20350 | + bool "Interrupts-off Latency Histogram" | |
20351 | + depends on IRQSOFF_TRACER | |
20352 | + help | |
20353 | + This option generates continuously updated histograms (one per cpu) | |
20354 | + of the duration of time periods with interrupts disabled. The | |
20355 | + histograms are disabled by default. To enable them, write a non-zero | |
20356 | + number to | |
20357 | + | |
20358 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
20359 | + | |
20360 | + If PREEMPT_OFF_HIST is also selected, additional histograms (one | |
20361 | + per cpu) are generated that accumulate the duration of time periods | |
20362 | + when both interrupts and preemption are disabled. The histogram data | |
20363 | + will be located in the debug file system at | |
20364 | + | |
20365 | + /sys/kernel/debug/tracing/latency_hist/irqsoff | |
20366 | + | |
20367 | config PREEMPT_TRACER | |
20368 | bool "Preemption-off Latency Tracer" | |
20369 | default n | |
20370 | @@ -197,6 +215,7 @@ config PREEMPT_TRACER | |
20371 | select RING_BUFFER_ALLOW_SWAP | |
20372 | select TRACER_SNAPSHOT | |
20373 | select TRACER_SNAPSHOT_PER_CPU_SWAP | |
20374 | + select USING_GET_LOCK_PARENT_IP | |
20375 | help | |
20376 | This option measures the time spent in preemption-off critical | |
20377 | sections, with microsecond accuracy. | |
20378 | @@ -211,6 +230,24 @@ config PREEMPT_TRACER | |
20379 | enabled. This option and the irqs-off timing option can be | |
20380 | used together or separately.) | |
20381 | ||
20382 | +config PREEMPT_OFF_HIST | |
20383 | + bool "Preemption-off Latency Histogram" | |
20384 | + depends on PREEMPT_TRACER | |
20385 | + help | |
20386 | + This option generates continuously updated histograms (one per cpu) | |
20387 | + of the duration of time periods with preemption disabled. The | |
20388 | + histograms are disabled by default. To enable them, write a non-zero | |
20389 | + number to | |
20390 | + | |
20391 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
20392 | + | |
20393 | + If INTERRUPT_OFF_HIST is also selected, additional histograms (one | |
20394 | + per cpu) are generated that accumulate the duration of time periods | |
20395 | + when both interrupts and preemption are disabled. The histogram data | |
20396 | + will be located in the debug file system at | |
20397 | + | |
20398 | + /sys/kernel/debug/tracing/latency_hist/preemptoff | |
20399 | + | |
20400 | config SCHED_TRACER | |
20401 | bool "Scheduling Latency Tracer" | |
20402 | select GENERIC_TRACER | |
20403 | @@ -221,6 +258,74 @@ config SCHED_TRACER | |
20404 | This tracer tracks the latency of the highest priority task | |
20405 | to be scheduled in, starting from the point it has woken up. | |
20406 | ||
20407 | +config WAKEUP_LATENCY_HIST | |
20408 | + bool "Scheduling Latency Histogram" | |
20409 | + depends on SCHED_TRACER | |
20410 | + help | |
20411 | + This option generates continuously updated histograms (one per cpu) | |
20412 | + of the scheduling latency of the highest priority task. | |
20413 | + The histograms are disabled by default. To enable them, write a | |
20414 | + non-zero number to | |
20415 | + | |
20416 | + /sys/kernel/debug/tracing/latency_hist/enable/wakeup | |
20417 | + | |
20418 | + Two different algorithms are used, one to determine the latency of | |
20419 | + processes that exclusively use the highest priority of the system and | |
20420 | + another one to determine the latency of processes that share the | |
20421 | + highest system priority with other processes. The former is used to | |
20422 | + improve hardware and system software, the latter to optimize the | |
20423 | + priority design of a given system. The histogram data will be | |
20424 | + located in the debug file system at | |
20425 | + | |
20426 | + /sys/kernel/debug/tracing/latency_hist/wakeup | |
20427 | + | |
20428 | + and | |
20429 | + | |
20430 | + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio | |
20431 | + | |
20432 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
20433 | + Histogram are selected, additional histogram data will be collected | |
20434 | + that contain, in addition to the wakeup latency, the timer latency, in | |
20435 | + case the wakeup was triggered by an expired timer. These histograms | |
20436 | + are available in the | |
20437 | + | |
20438 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
20439 | + | |
20440 | + directory. They reflect the apparent interrupt and scheduling latency | |
20441 | + and are best suitable to determine the worst-case latency of a given | |
20442 | + system. To enable these histograms, write a non-zero number to | |
20443 | + | |
20444 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
20445 | + | |
20446 | +config MISSED_TIMER_OFFSETS_HIST | |
20447 | + depends on HIGH_RES_TIMERS | |
20448 | + select GENERIC_TRACER | |
20449 | + bool "Missed Timer Offsets Histogram" | |
20450 | + help | |
20451 | + Generate a histogram of missed timer offsets in microseconds. The | |
20452 | + histograms are disabled by default. To enable them, write a non-zero | |
20453 | + number to | |
20454 | + | |
20455 | + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets | |
20456 | + | |
20457 | + The histogram data will be located in the debug file system at | |
20458 | + | |
20459 | + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets | |
20460 | + | |
20461 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
20462 | + Histogram are selected, additional histogram data will be collected | |
20463 | + that contain, in addition to the wakeup latency, the timer latency, in | |
20464 | + case the wakeup was triggered by an expired timer. These histograms | |
20465 | + are available in the | |
20466 | + | |
20467 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
20468 | + | |
20469 | + directory. They reflect the apparent interrupt and scheduling latency | |
20470 | + and are best suitable to determine the worst-case latency of a given | |
20471 | + system. To enable these histograms, write a non-zero number to | |
20472 | + | |
20473 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
20474 | + | |
20475 | config ENABLE_DEFAULT_TRACERS | |
20476 | bool "Trace process context switches and events" | |
20477 | depends on !GENERIC_TRACER | |
20478 | diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile | |
20479 | index d0a1617b52b4..6bf9e9ff1fa5 100644 | |
20480 | --- a/kernel/trace/Makefile | |
20481 | +++ b/kernel/trace/Makefile | |
20482 | @@ -41,6 +41,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o | |
20483 | obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o | |
20484 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o | |
20485 | obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o | |
20486 | +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o | |
20487 | +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o | |
20488 | +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o | |
20489 | +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o | |
20490 | obj-$(CONFIG_NOP_TRACER) += trace_nop.o | |
20491 | obj-$(CONFIG_STACK_TRACER) += trace_stack.o | |
20492 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | |
20493 | diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c | |
20494 | new file mode 100644 | |
20495 | index 000000000000..7f6ee70dea41 | |
20496 | --- /dev/null | |
20497 | +++ b/kernel/trace/latency_hist.c | |
20498 | @@ -0,0 +1,1178 @@ | |
20499 | +/* | |
20500 | + * kernel/trace/latency_hist.c | |
20501 | + * | |
20502 | + * Add support for histograms of preemption-off latency and | |
20503 | + * interrupt-off latency and wakeup latency, it depends on | |
20504 | + * Real-Time Preemption Support. | |
20505 | + * | |
20506 | + * Copyright (C) 2005 MontaVista Software, Inc. | |
20507 | + * Yi Yang <yyang@ch.mvista.com> | |
20508 | + * | |
20509 | + * Converted to work with the new latency tracer. | |
20510 | + * Copyright (C) 2008 Red Hat, Inc. | |
20511 | + * Steven Rostedt <srostedt@redhat.com> | |
20512 | + * | |
20513 | + */ | |
20514 | +#include <linux/module.h> | |
20515 | +#include <linux/debugfs.h> | |
20516 | +#include <linux/seq_file.h> | |
20517 | +#include <linux/percpu.h> | |
20518 | +#include <linux/kallsyms.h> | |
20519 | +#include <linux/uaccess.h> | |
20520 | +#include <linux/sched.h> | |
20521 | +#include <linux/sched/rt.h> | |
20522 | +#include <linux/slab.h> | |
20523 | +#include <linux/atomic.h> | |
20524 | +#include <asm/div64.h> | |
20525 | + | |
20526 | +#include "trace.h" | |
20527 | +#include <trace/events/sched.h> | |
20528 | + | |
20529 | +#define NSECS_PER_USECS 1000L | |
20530 | + | |
20531 | +#define CREATE_TRACE_POINTS | |
20532 | +#include <trace/events/hist.h> | |
20533 | + | |
20534 | +enum { | |
20535 | + IRQSOFF_LATENCY = 0, | |
20536 | + PREEMPTOFF_LATENCY, | |
20537 | + PREEMPTIRQSOFF_LATENCY, | |
20538 | + WAKEUP_LATENCY, | |
20539 | + WAKEUP_LATENCY_SHAREDPRIO, | |
20540 | + MISSED_TIMER_OFFSETS, | |
20541 | + TIMERANDWAKEUP_LATENCY, | |
20542 | + MAX_LATENCY_TYPE, | |
20543 | +}; | |
20544 | + | |
20545 | +#define MAX_ENTRY_NUM 10240 | |
20546 | + | |
20547 | +struct hist_data { | |
20548 | + atomic_t hist_mode; /* 0 log, 1 don't log */ | |
20549 | + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */ | |
20550 | + long min_lat; | |
20551 | + long max_lat; | |
20552 | + unsigned long long below_hist_bound_samples; | |
20553 | + unsigned long long above_hist_bound_samples; | |
20554 | + long long accumulate_lat; | |
20555 | + unsigned long long total_samples; | |
20556 | + unsigned long long hist_array[MAX_ENTRY_NUM]; | |
20557 | +}; | |
20558 | + | |
20559 | +struct enable_data { | |
20560 | + int latency_type; | |
20561 | + int enabled; | |
20562 | +}; | |
20563 | + | |
20564 | +static char *latency_hist_dir_root = "latency_hist"; | |
20565 | + | |
20566 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
20567 | +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist); | |
20568 | +static char *irqsoff_hist_dir = "irqsoff"; | |
20569 | +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start); | |
20570 | +static DEFINE_PER_CPU(int, hist_irqsoff_counting); | |
20571 | +#endif | |
20572 | + | |
20573 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
20574 | +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist); | |
20575 | +static char *preemptoff_hist_dir = "preemptoff"; | |
20576 | +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start); | |
20577 | +static DEFINE_PER_CPU(int, hist_preemptoff_counting); | |
20578 | +#endif | |
20579 | + | |
20580 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
20581 | +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist); | |
20582 | +static char *preemptirqsoff_hist_dir = "preemptirqsoff"; | |
20583 | +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start); | |
20584 | +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting); | |
20585 | +#endif | |
20586 | + | |
20587 | +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST) | |
20588 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start); | |
20589 | +static struct enable_data preemptirqsoff_enabled_data = { | |
20590 | + .latency_type = PREEMPTIRQSOFF_LATENCY, | |
20591 | + .enabled = 0, | |
20592 | +}; | |
20593 | +#endif | |
20594 | + | |
20595 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20596 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20597 | +struct maxlatproc_data { | |
20598 | + char comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
20599 | + char current_comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
20600 | + int pid; | |
20601 | + int current_pid; | |
20602 | + int prio; | |
20603 | + int current_prio; | |
20604 | + long latency; | |
20605 | + long timeroffset; | |
20606 | + cycle_t timestamp; | |
20607 | +}; | |
20608 | +#endif | |
20609 | + | |
20610 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
20611 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist); | |
20612 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio); | |
20613 | +static char *wakeup_latency_hist_dir = "wakeup"; | |
20614 | +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio"; | |
20615 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
20616 | + struct task_struct *p); | |
20617 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
20618 | + bool preempt, struct task_struct *prev, struct task_struct *next); | |
20619 | +static notrace void probe_sched_migrate_task(void *, | |
20620 | + struct task_struct *task, int cpu); | |
20621 | +static struct enable_data wakeup_latency_enabled_data = { | |
20622 | + .latency_type = WAKEUP_LATENCY, | |
20623 | + .enabled = 0, | |
20624 | +}; | |
20625 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc); | |
20626 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio); | |
20627 | +static DEFINE_PER_CPU(struct task_struct *, wakeup_task); | |
20628 | +static DEFINE_PER_CPU(int, wakeup_sharedprio); | |
20629 | +static unsigned long wakeup_pid; | |
20630 | +#endif | |
20631 | + | |
20632 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20633 | +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets); | |
20634 | +static char *missed_timer_offsets_dir = "missed_timer_offsets"; | |
20635 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
20636 | + long long offset, struct task_struct *curr, struct task_struct *task); | |
20637 | +static struct enable_data missed_timer_offsets_enabled_data = { | |
20638 | + .latency_type = MISSED_TIMER_OFFSETS, | |
20639 | + .enabled = 0, | |
20640 | +}; | |
20641 | +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc); | |
20642 | +static unsigned long missed_timer_offsets_pid; | |
20643 | +#endif | |
20644 | + | |
20645 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
20646 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20647 | +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist); | |
20648 | +static char *timerandwakeup_latency_hist_dir = "timerandwakeup"; | |
20649 | +static struct enable_data timerandwakeup_enabled_data = { | |
20650 | + .latency_type = TIMERANDWAKEUP_LATENCY, | |
20651 | + .enabled = 0, | |
20652 | +}; | |
20653 | +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc); | |
20654 | +#endif | |
20655 | + | |
20656 | +void notrace latency_hist(int latency_type, int cpu, long latency, | |
20657 | + long timeroffset, cycle_t stop, | |
20658 | + struct task_struct *p) | |
20659 | +{ | |
20660 | + struct hist_data *my_hist; | |
20661 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20662 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20663 | + struct maxlatproc_data *mp = NULL; | |
20664 | +#endif | |
20665 | + | |
20666 | + if (!cpu_possible(cpu) || latency_type < 0 || | |
20667 | + latency_type >= MAX_LATENCY_TYPE) | |
20668 | + return; | |
20669 | + | |
20670 | + switch (latency_type) { | |
20671 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
20672 | + case IRQSOFF_LATENCY: | |
20673 | + my_hist = &per_cpu(irqsoff_hist, cpu); | |
20674 | + break; | |
20675 | +#endif | |
20676 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
20677 | + case PREEMPTOFF_LATENCY: | |
20678 | + my_hist = &per_cpu(preemptoff_hist, cpu); | |
20679 | + break; | |
20680 | +#endif | |
20681 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
20682 | + case PREEMPTIRQSOFF_LATENCY: | |
20683 | + my_hist = &per_cpu(preemptirqsoff_hist, cpu); | |
20684 | + break; | |
20685 | +#endif | |
20686 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
20687 | + case WAKEUP_LATENCY: | |
20688 | + my_hist = &per_cpu(wakeup_latency_hist, cpu); | |
20689 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
20690 | + break; | |
20691 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
20692 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
20693 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
20694 | + break; | |
20695 | +#endif | |
20696 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20697 | + case MISSED_TIMER_OFFSETS: | |
20698 | + my_hist = &per_cpu(missed_timer_offsets, cpu); | |
20699 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
20700 | + break; | |
20701 | +#endif | |
20702 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
20703 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20704 | + case TIMERANDWAKEUP_LATENCY: | |
20705 | + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
20706 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
20707 | + break; | |
20708 | +#endif | |
20709 | + | |
20710 | + default: | |
20711 | + return; | |
20712 | + } | |
20713 | + | |
20714 | + latency += my_hist->offset; | |
20715 | + | |
20716 | + if (atomic_read(&my_hist->hist_mode) == 0) | |
20717 | + return; | |
20718 | + | |
20719 | + if (latency < 0 || latency >= MAX_ENTRY_NUM) { | |
20720 | + if (latency < 0) | |
20721 | + my_hist->below_hist_bound_samples++; | |
20722 | + else | |
20723 | + my_hist->above_hist_bound_samples++; | |
20724 | + } else | |
20725 | + my_hist->hist_array[latency]++; | |
20726 | + | |
20727 | + if (unlikely(latency > my_hist->max_lat || | |
20728 | + my_hist->min_lat == LONG_MAX)) { | |
20729 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20730 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20731 | + if (latency_type == WAKEUP_LATENCY || | |
20732 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
20733 | + latency_type == MISSED_TIMER_OFFSETS || | |
20734 | + latency_type == TIMERANDWAKEUP_LATENCY) { | |
20735 | + strncpy(mp->comm, p->comm, sizeof(mp->comm)); | |
20736 | + strncpy(mp->current_comm, current->comm, | |
20737 | + sizeof(mp->current_comm)); | |
20738 | + mp->pid = task_pid_nr(p); | |
20739 | + mp->current_pid = task_pid_nr(current); | |
20740 | + mp->prio = p->prio; | |
20741 | + mp->current_prio = current->prio; | |
20742 | + mp->latency = latency; | |
20743 | + mp->timeroffset = timeroffset; | |
20744 | + mp->timestamp = stop; | |
20745 | + } | |
20746 | +#endif | |
20747 | + my_hist->max_lat = latency; | |
20748 | + } | |
20749 | + if (unlikely(latency < my_hist->min_lat)) | |
20750 | + my_hist->min_lat = latency; | |
20751 | + my_hist->total_samples++; | |
20752 | + my_hist->accumulate_lat += latency; | |
20753 | +} | |
20754 | + | |
20755 | +static void *l_start(struct seq_file *m, loff_t *pos) | |
20756 | +{ | |
20757 | + loff_t *index_ptr = NULL; | |
20758 | + loff_t index = *pos; | |
20759 | + struct hist_data *my_hist = m->private; | |
20760 | + | |
20761 | + if (index == 0) { | |
20762 | + char minstr[32], avgstr[32], maxstr[32]; | |
20763 | + | |
20764 | + atomic_dec(&my_hist->hist_mode); | |
20765 | + | |
20766 | + if (likely(my_hist->total_samples)) { | |
20767 | + long avg = (long) div64_s64(my_hist->accumulate_lat, | |
20768 | + my_hist->total_samples); | |
20769 | + snprintf(minstr, sizeof(minstr), "%ld", | |
20770 | + my_hist->min_lat - my_hist->offset); | |
20771 | + snprintf(avgstr, sizeof(avgstr), "%ld", | |
20772 | + avg - my_hist->offset); | |
20773 | + snprintf(maxstr, sizeof(maxstr), "%ld", | |
20774 | + my_hist->max_lat - my_hist->offset); | |
20775 | + } else { | |
20776 | + strcpy(minstr, "<undef>"); | |
20777 | + strcpy(avgstr, minstr); | |
20778 | + strcpy(maxstr, minstr); | |
20779 | + } | |
20780 | + | |
20781 | + seq_printf(m, "#Minimum latency: %s microseconds\n" | |
20782 | + "#Average latency: %s microseconds\n" | |
20783 | + "#Maximum latency: %s microseconds\n" | |
20784 | + "#Total samples: %llu\n" | |
20785 | + "#There are %llu samples lower than %ld" | |
20786 | + " microseconds.\n" | |
20787 | + "#There are %llu samples greater or equal" | |
20788 | + " than %ld microseconds.\n" | |
20789 | + "#usecs\t%16s\n", | |
20790 | + minstr, avgstr, maxstr, | |
20791 | + my_hist->total_samples, | |
20792 | + my_hist->below_hist_bound_samples, | |
20793 | + -my_hist->offset, | |
20794 | + my_hist->above_hist_bound_samples, | |
20795 | + MAX_ENTRY_NUM - my_hist->offset, | |
20796 | + "samples"); | |
20797 | + } | |
20798 | + if (index < MAX_ENTRY_NUM) { | |
20799 | + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); | |
20800 | + if (index_ptr) | |
20801 | + *index_ptr = index; | |
20802 | + } | |
20803 | + | |
20804 | + return index_ptr; | |
20805 | +} | |
20806 | + | |
20807 | +static void *l_next(struct seq_file *m, void *p, loff_t *pos) | |
20808 | +{ | |
20809 | + loff_t *index_ptr = p; | |
20810 | + struct hist_data *my_hist = m->private; | |
20811 | + | |
20812 | + if (++*pos >= MAX_ENTRY_NUM) { | |
20813 | + atomic_inc(&my_hist->hist_mode); | |
20814 | + return NULL; | |
20815 | + } | |
20816 | + *index_ptr = *pos; | |
20817 | + return index_ptr; | |
20818 | +} | |
20819 | + | |
20820 | +static void l_stop(struct seq_file *m, void *p) | |
20821 | +{ | |
20822 | + kfree(p); | |
20823 | +} | |
20824 | + | |
20825 | +static int l_show(struct seq_file *m, void *p) | |
20826 | +{ | |
20827 | + int index = *(loff_t *) p; | |
20828 | + struct hist_data *my_hist = m->private; | |
20829 | + | |
20830 | + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset, | |
20831 | + my_hist->hist_array[index]); | |
20832 | + return 0; | |
20833 | +} | |
20834 | + | |
20835 | +static const struct seq_operations latency_hist_seq_op = { | |
20836 | + .start = l_start, | |
20837 | + .next = l_next, | |
20838 | + .stop = l_stop, | |
20839 | + .show = l_show | |
20840 | +}; | |
20841 | + | |
20842 | +static int latency_hist_open(struct inode *inode, struct file *file) | |
20843 | +{ | |
20844 | + int ret; | |
20845 | + | |
20846 | + ret = seq_open(file, &latency_hist_seq_op); | |
20847 | + if (!ret) { | |
20848 | + struct seq_file *seq = file->private_data; | |
20849 | + seq->private = inode->i_private; | |
20850 | + } | |
20851 | + return ret; | |
20852 | +} | |
20853 | + | |
20854 | +static const struct file_operations latency_hist_fops = { | |
20855 | + .open = latency_hist_open, | |
20856 | + .read = seq_read, | |
20857 | + .llseek = seq_lseek, | |
20858 | + .release = seq_release, | |
20859 | +}; | |
20860 | + | |
20861 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20862 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20863 | +static void clear_maxlatprocdata(struct maxlatproc_data *mp) | |
20864 | +{ | |
20865 | + mp->comm[0] = mp->current_comm[0] = '\0'; | |
20866 | + mp->prio = mp->current_prio = mp->pid = mp->current_pid = | |
20867 | + mp->latency = mp->timeroffset = -1; | |
20868 | + mp->timestamp = 0; | |
20869 | +} | |
20870 | +#endif | |
20871 | + | |
20872 | +static void hist_reset(struct hist_data *hist) | |
20873 | +{ | |
20874 | + atomic_dec(&hist->hist_mode); | |
20875 | + | |
20876 | + memset(hist->hist_array, 0, sizeof(hist->hist_array)); | |
20877 | + hist->below_hist_bound_samples = 0ULL; | |
20878 | + hist->above_hist_bound_samples = 0ULL; | |
20879 | + hist->min_lat = LONG_MAX; | |
20880 | + hist->max_lat = LONG_MIN; | |
20881 | + hist->total_samples = 0ULL; | |
20882 | + hist->accumulate_lat = 0LL; | |
20883 | + | |
20884 | + atomic_inc(&hist->hist_mode); | |
20885 | +} | |
20886 | + | |
20887 | +static ssize_t | |
20888 | +latency_hist_reset(struct file *file, const char __user *a, | |
20889 | + size_t size, loff_t *off) | |
20890 | +{ | |
20891 | + int cpu; | |
20892 | + struct hist_data *hist = NULL; | |
20893 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20894 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20895 | + struct maxlatproc_data *mp = NULL; | |
20896 | +#endif | |
20897 | + off_t latency_type = (off_t) file->private_data; | |
20898 | + | |
20899 | + for_each_online_cpu(cpu) { | |
20900 | + | |
20901 | + switch (latency_type) { | |
20902 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
20903 | + case PREEMPTOFF_LATENCY: | |
20904 | + hist = &per_cpu(preemptoff_hist, cpu); | |
20905 | + break; | |
20906 | +#endif | |
20907 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
20908 | + case IRQSOFF_LATENCY: | |
20909 | + hist = &per_cpu(irqsoff_hist, cpu); | |
20910 | + break; | |
20911 | +#endif | |
20912 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
20913 | + case PREEMPTIRQSOFF_LATENCY: | |
20914 | + hist = &per_cpu(preemptirqsoff_hist, cpu); | |
20915 | + break; | |
20916 | +#endif | |
20917 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
20918 | + case WAKEUP_LATENCY: | |
20919 | + hist = &per_cpu(wakeup_latency_hist, cpu); | |
20920 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
20921 | + break; | |
20922 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
20923 | + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
20924 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
20925 | + break; | |
20926 | +#endif | |
20927 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
20928 | + case MISSED_TIMER_OFFSETS: | |
20929 | + hist = &per_cpu(missed_timer_offsets, cpu); | |
20930 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
20931 | + break; | |
20932 | +#endif | |
20933 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
20934 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20935 | + case TIMERANDWAKEUP_LATENCY: | |
20936 | + hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
20937 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
20938 | + break; | |
20939 | +#endif | |
20940 | + } | |
20941 | + | |
20942 | + hist_reset(hist); | |
20943 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20944 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20945 | + if (latency_type == WAKEUP_LATENCY || | |
20946 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
20947 | + latency_type == MISSED_TIMER_OFFSETS || | |
20948 | + latency_type == TIMERANDWAKEUP_LATENCY) | |
20949 | + clear_maxlatprocdata(mp); | |
20950 | +#endif | |
20951 | + } | |
20952 | + | |
20953 | + return size; | |
20954 | +} | |
20955 | + | |
20956 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20957 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20958 | +static ssize_t | |
20959 | +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
20960 | +{ | |
20961 | + char buf[64]; | |
20962 | + int r; | |
20963 | + unsigned long *this_pid = file->private_data; | |
20964 | + | |
20965 | + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid); | |
20966 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
20967 | +} | |
20968 | + | |
20969 | +static ssize_t do_pid(struct file *file, const char __user *ubuf, | |
20970 | + size_t cnt, loff_t *ppos) | |
20971 | +{ | |
20972 | + char buf[64]; | |
20973 | + unsigned long pid; | |
20974 | + unsigned long *this_pid = file->private_data; | |
20975 | + | |
20976 | + if (cnt >= sizeof(buf)) | |
20977 | + return -EINVAL; | |
20978 | + | |
20979 | + if (copy_from_user(&buf, ubuf, cnt)) | |
20980 | + return -EFAULT; | |
20981 | + | |
20982 | + buf[cnt] = '\0'; | |
20983 | + | |
20984 | + if (kstrtoul(buf, 10, &pid)) | |
20985 | + return -EINVAL; | |
20986 | + | |
20987 | + *this_pid = pid; | |
20988 | + | |
20989 | + return cnt; | |
20990 | +} | |
20991 | +#endif | |
20992 | + | |
20993 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
20994 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
20995 | +static ssize_t | |
20996 | +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
20997 | +{ | |
20998 | + int r; | |
20999 | + struct maxlatproc_data *mp = file->private_data; | |
21000 | + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8); | |
21001 | + unsigned long long t; | |
21002 | + unsigned long usecs, secs; | |
21003 | + char *buf; | |
21004 | + | |
21005 | + if (mp->pid == -1 || mp->current_pid == -1) { | |
21006 | + buf = "(none)\n"; | |
21007 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, | |
21008 | + strlen(buf)); | |
21009 | + } | |
21010 | + | |
21011 | + buf = kmalloc(strmaxlen, GFP_KERNEL); | |
21012 | + if (buf == NULL) | |
21013 | + return -ENOMEM; | |
21014 | + | |
21015 | + t = ns2usecs(mp->timestamp); | |
21016 | + usecs = do_div(t, USEC_PER_SEC); | |
21017 | + secs = (unsigned long) t; | |
21018 | + r = snprintf(buf, strmaxlen, | |
21019 | + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid, | |
21020 | + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm, | |
21021 | + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm, | |
21022 | + secs, usecs); | |
21023 | + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
21024 | + kfree(buf); | |
21025 | + return r; | |
21026 | +} | |
21027 | +#endif | |
21028 | + | |
21029 | +static ssize_t | |
21030 | +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
21031 | +{ | |
21032 | + char buf[64]; | |
21033 | + struct enable_data *ed = file->private_data; | |
21034 | + int r; | |
21035 | + | |
21036 | + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled); | |
21037 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
21038 | +} | |
21039 | + | |
21040 | +static ssize_t | |
21041 | +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) | |
21042 | +{ | |
21043 | + char buf[64]; | |
21044 | + long enable; | |
21045 | + struct enable_data *ed = file->private_data; | |
21046 | + | |
21047 | + if (cnt >= sizeof(buf)) | |
21048 | + return -EINVAL; | |
21049 | + | |
21050 | + if (copy_from_user(&buf, ubuf, cnt)) | |
21051 | + return -EFAULT; | |
21052 | + | |
21053 | + buf[cnt] = 0; | |
21054 | + | |
21055 | + if (kstrtoul(buf, 10, &enable)) | |
21056 | + return -EINVAL; | |
21057 | + | |
21058 | + if ((enable && ed->enabled) || (!enable && !ed->enabled)) | |
21059 | + return cnt; | |
21060 | + | |
21061 | + if (enable) { | |
21062 | + int ret; | |
21063 | + | |
21064 | + switch (ed->latency_type) { | |
21065 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
21066 | + case PREEMPTIRQSOFF_LATENCY: | |
21067 | + ret = register_trace_preemptirqsoff_hist( | |
21068 | + probe_preemptirqsoff_hist, NULL); | |
21069 | + if (ret) { | |
21070 | + pr_info("wakeup trace: Couldn't assign " | |
21071 | + "probe_preemptirqsoff_hist " | |
21072 | + "to trace_preemptirqsoff_hist\n"); | |
21073 | + return ret; | |
21074 | + } | |
21075 | + break; | |
21076 | +#endif | |
21077 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21078 | + case WAKEUP_LATENCY: | |
21079 | + ret = register_trace_sched_wakeup( | |
21080 | + probe_wakeup_latency_hist_start, NULL); | |
21081 | + if (ret) { | |
21082 | + pr_info("wakeup trace: Couldn't assign " | |
21083 | + "probe_wakeup_latency_hist_start " | |
21084 | + "to trace_sched_wakeup\n"); | |
21085 | + return ret; | |
21086 | + } | |
21087 | + ret = register_trace_sched_wakeup_new( | |
21088 | + probe_wakeup_latency_hist_start, NULL); | |
21089 | + if (ret) { | |
21090 | + pr_info("wakeup trace: Couldn't assign " | |
21091 | + "probe_wakeup_latency_hist_start " | |
21092 | + "to trace_sched_wakeup_new\n"); | |
21093 | + unregister_trace_sched_wakeup( | |
21094 | + probe_wakeup_latency_hist_start, NULL); | |
21095 | + return ret; | |
21096 | + } | |
21097 | + ret = register_trace_sched_switch( | |
21098 | + probe_wakeup_latency_hist_stop, NULL); | |
21099 | + if (ret) { | |
21100 | + pr_info("wakeup trace: Couldn't assign " | |
21101 | + "probe_wakeup_latency_hist_stop " | |
21102 | + "to trace_sched_switch\n"); | |
21103 | + unregister_trace_sched_wakeup( | |
21104 | + probe_wakeup_latency_hist_start, NULL); | |
21105 | + unregister_trace_sched_wakeup_new( | |
21106 | + probe_wakeup_latency_hist_start, NULL); | |
21107 | + return ret; | |
21108 | + } | |
21109 | + ret = register_trace_sched_migrate_task( | |
21110 | + probe_sched_migrate_task, NULL); | |
21111 | + if (ret) { | |
21112 | + pr_info("wakeup trace: Couldn't assign " | |
21113 | + "probe_sched_migrate_task " | |
21114 | + "to trace_sched_migrate_task\n"); | |
21115 | + unregister_trace_sched_wakeup( | |
21116 | + probe_wakeup_latency_hist_start, NULL); | |
21117 | + unregister_trace_sched_wakeup_new( | |
21118 | + probe_wakeup_latency_hist_start, NULL); | |
21119 | + unregister_trace_sched_switch( | |
21120 | + probe_wakeup_latency_hist_stop, NULL); | |
21121 | + return ret; | |
21122 | + } | |
21123 | + break; | |
21124 | +#endif | |
21125 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21126 | + case MISSED_TIMER_OFFSETS: | |
21127 | + ret = register_trace_hrtimer_interrupt( | |
21128 | + probe_hrtimer_interrupt, NULL); | |
21129 | + if (ret) { | |
21130 | + pr_info("wakeup trace: Couldn't assign " | |
21131 | + "probe_hrtimer_interrupt " | |
21132 | + "to trace_hrtimer_interrupt\n"); | |
21133 | + return ret; | |
21134 | + } | |
21135 | + break; | |
21136 | +#endif | |
21137 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
21138 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21139 | + case TIMERANDWAKEUP_LATENCY: | |
21140 | + if (!wakeup_latency_enabled_data.enabled || | |
21141 | + !missed_timer_offsets_enabled_data.enabled) | |
21142 | + return -EINVAL; | |
21143 | + break; | |
21144 | +#endif | |
21145 | + default: | |
21146 | + break; | |
21147 | + } | |
21148 | + } else { | |
21149 | + switch (ed->latency_type) { | |
21150 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
21151 | + case PREEMPTIRQSOFF_LATENCY: | |
21152 | + { | |
21153 | + int cpu; | |
21154 | + | |
21155 | + unregister_trace_preemptirqsoff_hist( | |
21156 | + probe_preemptirqsoff_hist, NULL); | |
21157 | + for_each_online_cpu(cpu) { | |
21158 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
21159 | + per_cpu(hist_irqsoff_counting, | |
21160 | + cpu) = 0; | |
21161 | +#endif | |
21162 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
21163 | + per_cpu(hist_preemptoff_counting, | |
21164 | + cpu) = 0; | |
21165 | +#endif | |
21166 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
21167 | + per_cpu(hist_preemptirqsoff_counting, | |
21168 | + cpu) = 0; | |
21169 | +#endif | |
21170 | + } | |
21171 | + } | |
21172 | + break; | |
21173 | +#endif | |
21174 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21175 | + case WAKEUP_LATENCY: | |
21176 | + { | |
21177 | + int cpu; | |
21178 | + | |
21179 | + unregister_trace_sched_wakeup( | |
21180 | + probe_wakeup_latency_hist_start, NULL); | |
21181 | + unregister_trace_sched_wakeup_new( | |
21182 | + probe_wakeup_latency_hist_start, NULL); | |
21183 | + unregister_trace_sched_switch( | |
21184 | + probe_wakeup_latency_hist_stop, NULL); | |
21185 | + unregister_trace_sched_migrate_task( | |
21186 | + probe_sched_migrate_task, NULL); | |
21187 | + | |
21188 | + for_each_online_cpu(cpu) { | |
21189 | + per_cpu(wakeup_task, cpu) = NULL; | |
21190 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
21191 | + } | |
21192 | + } | |
21193 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21194 | + timerandwakeup_enabled_data.enabled = 0; | |
21195 | +#endif | |
21196 | + break; | |
21197 | +#endif | |
21198 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21199 | + case MISSED_TIMER_OFFSETS: | |
21200 | + unregister_trace_hrtimer_interrupt( | |
21201 | + probe_hrtimer_interrupt, NULL); | |
21202 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21203 | + timerandwakeup_enabled_data.enabled = 0; | |
21204 | +#endif | |
21205 | + break; | |
21206 | +#endif | |
21207 | + default: | |
21208 | + break; | |
21209 | + } | |
21210 | + } | |
21211 | + ed->enabled = enable; | |
21212 | + return cnt; | |
21213 | +} | |
21214 | + | |
21215 | +static const struct file_operations latency_hist_reset_fops = { | |
21216 | + .open = tracing_open_generic, | |
21217 | + .write = latency_hist_reset, | |
21218 | +}; | |
21219 | + | |
21220 | +static const struct file_operations enable_fops = { | |
21221 | + .open = tracing_open_generic, | |
21222 | + .read = show_enable, | |
21223 | + .write = do_enable, | |
21224 | +}; | |
21225 | + | |
21226 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
21227 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21228 | +static const struct file_operations pid_fops = { | |
21229 | + .open = tracing_open_generic, | |
21230 | + .read = show_pid, | |
21231 | + .write = do_pid, | |
21232 | +}; | |
21233 | + | |
21234 | +static const struct file_operations maxlatproc_fops = { | |
21235 | + .open = tracing_open_generic, | |
21236 | + .read = show_maxlatproc, | |
21237 | +}; | |
21238 | +#endif | |
21239 | + | |
21240 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
21241 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, | |
21242 | + int starthist) | |
21243 | +{ | |
21244 | + int cpu = raw_smp_processor_id(); | |
21245 | + int time_set = 0; | |
21246 | + | |
21247 | + if (starthist) { | |
21248 | + cycle_t uninitialized_var(start); | |
21249 | + | |
21250 | + if (!preempt_count() && !irqs_disabled()) | |
21251 | + return; | |
21252 | + | |
21253 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
21254 | + if ((reason == IRQS_OFF || reason == TRACE_START) && | |
21255 | + !per_cpu(hist_irqsoff_counting, cpu)) { | |
21256 | + per_cpu(hist_irqsoff_counting, cpu) = 1; | |
21257 | + start = ftrace_now(cpu); | |
21258 | + time_set++; | |
21259 | + per_cpu(hist_irqsoff_start, cpu) = start; | |
21260 | + } | |
21261 | +#endif | |
21262 | + | |
21263 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
21264 | + if ((reason == PREEMPT_OFF || reason == TRACE_START) && | |
21265 | + !per_cpu(hist_preemptoff_counting, cpu)) { | |
21266 | + per_cpu(hist_preemptoff_counting, cpu) = 1; | |
21267 | + if (!(time_set++)) | |
21268 | + start = ftrace_now(cpu); | |
21269 | + per_cpu(hist_preemptoff_start, cpu) = start; | |
21270 | + } | |
21271 | +#endif | |
21272 | + | |
21273 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
21274 | + if (per_cpu(hist_irqsoff_counting, cpu) && | |
21275 | + per_cpu(hist_preemptoff_counting, cpu) && | |
21276 | + !per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
21277 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 1; | |
21278 | + if (!time_set) | |
21279 | + start = ftrace_now(cpu); | |
21280 | + per_cpu(hist_preemptirqsoff_start, cpu) = start; | |
21281 | + } | |
21282 | +#endif | |
21283 | + } else { | |
21284 | + cycle_t uninitialized_var(stop); | |
21285 | + | |
21286 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
21287 | + if ((reason == IRQS_ON || reason == TRACE_STOP) && | |
21288 | + per_cpu(hist_irqsoff_counting, cpu)) { | |
21289 | + cycle_t start = per_cpu(hist_irqsoff_start, cpu); | |
21290 | + | |
21291 | + stop = ftrace_now(cpu); | |
21292 | + time_set++; | |
21293 | + if (start) { | |
21294 | + long latency = ((long) (stop - start)) / | |
21295 | + NSECS_PER_USECS; | |
21296 | + | |
21297 | + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0, | |
21298 | + stop, NULL); | |
21299 | + } | |
21300 | + per_cpu(hist_irqsoff_counting, cpu) = 0; | |
21301 | + } | |
21302 | +#endif | |
21303 | + | |
21304 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
21305 | + if ((reason == PREEMPT_ON || reason == TRACE_STOP) && | |
21306 | + per_cpu(hist_preemptoff_counting, cpu)) { | |
21307 | + cycle_t start = per_cpu(hist_preemptoff_start, cpu); | |
21308 | + | |
21309 | + if (!(time_set++)) | |
21310 | + stop = ftrace_now(cpu); | |
21311 | + if (start) { | |
21312 | + long latency = ((long) (stop - start)) / | |
21313 | + NSECS_PER_USECS; | |
21314 | + | |
21315 | + latency_hist(PREEMPTOFF_LATENCY, cpu, latency, | |
21316 | + 0, stop, NULL); | |
21317 | + } | |
21318 | + per_cpu(hist_preemptoff_counting, cpu) = 0; | |
21319 | + } | |
21320 | +#endif | |
21321 | + | |
21322 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
21323 | + if ((!per_cpu(hist_irqsoff_counting, cpu) || | |
21324 | + !per_cpu(hist_preemptoff_counting, cpu)) && | |
21325 | + per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
21326 | + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu); | |
21327 | + | |
21328 | + if (!time_set) | |
21329 | + stop = ftrace_now(cpu); | |
21330 | + if (start) { | |
21331 | + long latency = ((long) (stop - start)) / | |
21332 | + NSECS_PER_USECS; | |
21333 | + | |
21334 | + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu, | |
21335 | + latency, 0, stop, NULL); | |
21336 | + } | |
21337 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 0; | |
21338 | + } | |
21339 | +#endif | |
21340 | + } | |
21341 | +} | |
21342 | +#endif | |
21343 | + | |
21344 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21345 | +static DEFINE_RAW_SPINLOCK(wakeup_lock); | |
21346 | +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task, | |
21347 | + int cpu) | |
21348 | +{ | |
21349 | + int old_cpu = task_cpu(task); | |
21350 | + | |
21351 | + if (cpu != old_cpu) { | |
21352 | + unsigned long flags; | |
21353 | + struct task_struct *cpu_wakeup_task; | |
21354 | + | |
21355 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
21356 | + | |
21357 | + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu); | |
21358 | + if (task == cpu_wakeup_task) { | |
21359 | + put_task_struct(cpu_wakeup_task); | |
21360 | + per_cpu(wakeup_task, old_cpu) = NULL; | |
21361 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task; | |
21362 | + get_task_struct(cpu_wakeup_task); | |
21363 | + } | |
21364 | + | |
21365 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
21366 | + } | |
21367 | +} | |
21368 | + | |
21369 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
21370 | + struct task_struct *p) | |
21371 | +{ | |
21372 | + unsigned long flags; | |
21373 | + struct task_struct *curr = current; | |
21374 | + int cpu = task_cpu(p); | |
21375 | + struct task_struct *cpu_wakeup_task; | |
21376 | + | |
21377 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
21378 | + | |
21379 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
21380 | + | |
21381 | + if (wakeup_pid) { | |
21382 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
21383 | + p->prio == curr->prio) | |
21384 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
21385 | + if (likely(wakeup_pid != task_pid_nr(p))) | |
21386 | + goto out; | |
21387 | + } else { | |
21388 | + if (likely(!rt_task(p)) || | |
21389 | + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) || | |
21390 | + p->prio > curr->prio) | |
21391 | + goto out; | |
21392 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
21393 | + p->prio == curr->prio) | |
21394 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
21395 | + } | |
21396 | + | |
21397 | + if (cpu_wakeup_task) | |
21398 | + put_task_struct(cpu_wakeup_task); | |
21399 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p; | |
21400 | + get_task_struct(cpu_wakeup_task); | |
21401 | + cpu_wakeup_task->preempt_timestamp_hist = | |
21402 | + ftrace_now(raw_smp_processor_id()); | |
21403 | +out: | |
21404 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
21405 | +} | |
21406 | + | |
21407 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
21408 | + bool preempt, struct task_struct *prev, struct task_struct *next) | |
21409 | +{ | |
21410 | + unsigned long flags; | |
21411 | + int cpu = task_cpu(next); | |
21412 | + long latency; | |
21413 | + cycle_t stop; | |
21414 | + struct task_struct *cpu_wakeup_task; | |
21415 | + | |
21416 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
21417 | + | |
21418 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
21419 | + | |
21420 | + if (cpu_wakeup_task == NULL) | |
21421 | + goto out; | |
21422 | + | |
21423 | + /* Already running? */ | |
21424 | + if (unlikely(current == cpu_wakeup_task)) | |
21425 | + goto out_reset; | |
21426 | + | |
21427 | + if (next != cpu_wakeup_task) { | |
21428 | + if (next->prio < cpu_wakeup_task->prio) | |
21429 | + goto out_reset; | |
21430 | + | |
21431 | + if (next->prio == cpu_wakeup_task->prio) | |
21432 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
21433 | + | |
21434 | + goto out; | |
21435 | + } | |
21436 | + | |
21437 | + if (current->prio == cpu_wakeup_task->prio) | |
21438 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
21439 | + | |
21440 | + /* | |
21441 | + * The task we are waiting for is about to be switched to. | |
21442 | + * Calculate latency and store it in histogram. | |
21443 | + */ | |
21444 | + stop = ftrace_now(raw_smp_processor_id()); | |
21445 | + | |
21446 | + latency = ((long) (stop - next->preempt_timestamp_hist)) / | |
21447 | + NSECS_PER_USECS; | |
21448 | + | |
21449 | + if (per_cpu(wakeup_sharedprio, cpu)) { | |
21450 | + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop, | |
21451 | + next); | |
21452 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
21453 | + } else { | |
21454 | + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next); | |
21455 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21456 | + if (timerandwakeup_enabled_data.enabled) { | |
21457 | + latency_hist(TIMERANDWAKEUP_LATENCY, cpu, | |
21458 | + next->timer_offset + latency, next->timer_offset, | |
21459 | + stop, next); | |
21460 | + } | |
21461 | +#endif | |
21462 | + } | |
21463 | + | |
21464 | +out_reset: | |
21465 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21466 | + next->timer_offset = 0; | |
21467 | +#endif | |
21468 | + put_task_struct(cpu_wakeup_task); | |
21469 | + per_cpu(wakeup_task, cpu) = NULL; | |
21470 | +out: | |
21471 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
21472 | +} | |
21473 | +#endif | |
21474 | + | |
21475 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21476 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
21477 | + long long latency_ns, struct task_struct *curr, | |
21478 | + struct task_struct *task) | |
21479 | +{ | |
21480 | + if (latency_ns <= 0 && task != NULL && rt_task(task) && | |
21481 | + (task->prio < curr->prio || | |
21482 | + (task->prio == curr->prio && | |
21483 | + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) { | |
21484 | + long latency; | |
21485 | + cycle_t now; | |
21486 | + | |
21487 | + if (missed_timer_offsets_pid) { | |
21488 | + if (likely(missed_timer_offsets_pid != | |
21489 | + task_pid_nr(task))) | |
21490 | + return; | |
21491 | + } | |
21492 | + | |
21493 | + now = ftrace_now(cpu); | |
21494 | + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS); | |
21495 | + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now, | |
21496 | + task); | |
21497 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21498 | + task->timer_offset = latency; | |
21499 | +#endif | |
21500 | + } | |
21501 | +} | |
21502 | +#endif | |
21503 | + | |
21504 | +static __init int latency_hist_init(void) | |
21505 | +{ | |
21506 | + struct dentry *latency_hist_root = NULL; | |
21507 | + struct dentry *dentry; | |
21508 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21509 | + struct dentry *dentry_sharedprio; | |
21510 | +#endif | |
21511 | + struct dentry *entry; | |
21512 | + struct dentry *enable_root; | |
21513 | + int i = 0; | |
21514 | + struct hist_data *my_hist; | |
21515 | + char name[64]; | |
21516 | + char *cpufmt = "CPU%d"; | |
21517 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
21518 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21519 | + char *cpufmt_maxlatproc = "max_latency-CPU%d"; | |
21520 | + struct maxlatproc_data *mp = NULL; | |
21521 | +#endif | |
21522 | + | |
21523 | + dentry = tracing_init_dentry(); | |
21524 | + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry); | |
21525 | + enable_root = debugfs_create_dir("enable", latency_hist_root); | |
21526 | + | |
21527 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
21528 | + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root); | |
21529 | + for_each_possible_cpu(i) { | |
21530 | + sprintf(name, cpufmt, i); | |
21531 | + entry = debugfs_create_file(name, 0444, dentry, | |
21532 | + &per_cpu(irqsoff_hist, i), &latency_hist_fops); | |
21533 | + my_hist = &per_cpu(irqsoff_hist, i); | |
21534 | + atomic_set(&my_hist->hist_mode, 1); | |
21535 | + my_hist->min_lat = LONG_MAX; | |
21536 | + } | |
21537 | + entry = debugfs_create_file("reset", 0644, dentry, | |
21538 | + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops); | |
21539 | +#endif | |
21540 | + | |
21541 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
21542 | + dentry = debugfs_create_dir(preemptoff_hist_dir, | |
21543 | + latency_hist_root); | |
21544 | + for_each_possible_cpu(i) { | |
21545 | + sprintf(name, cpufmt, i); | |
21546 | + entry = debugfs_create_file(name, 0444, dentry, | |
21547 | + &per_cpu(preemptoff_hist, i), &latency_hist_fops); | |
21548 | + my_hist = &per_cpu(preemptoff_hist, i); | |
21549 | + atomic_set(&my_hist->hist_mode, 1); | |
21550 | + my_hist->min_lat = LONG_MAX; | |
21551 | + } | |
21552 | + entry = debugfs_create_file("reset", 0644, dentry, | |
21553 | + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops); | |
21554 | +#endif | |
21555 | + | |
21556 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
21557 | + dentry = debugfs_create_dir(preemptirqsoff_hist_dir, | |
21558 | + latency_hist_root); | |
21559 | + for_each_possible_cpu(i) { | |
21560 | + sprintf(name, cpufmt, i); | |
21561 | + entry = debugfs_create_file(name, 0444, dentry, | |
21562 | + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops); | |
21563 | + my_hist = &per_cpu(preemptirqsoff_hist, i); | |
21564 | + atomic_set(&my_hist->hist_mode, 1); | |
21565 | + my_hist->min_lat = LONG_MAX; | |
21566 | + } | |
21567 | + entry = debugfs_create_file("reset", 0644, dentry, | |
21568 | + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops); | |
21569 | +#endif | |
21570 | + | |
21571 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
21572 | + entry = debugfs_create_file("preemptirqsoff", 0644, | |
21573 | + enable_root, (void *)&preemptirqsoff_enabled_data, | |
21574 | + &enable_fops); | |
21575 | +#endif | |
21576 | + | |
21577 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
21578 | + dentry = debugfs_create_dir(wakeup_latency_hist_dir, | |
21579 | + latency_hist_root); | |
21580 | + dentry_sharedprio = debugfs_create_dir( | |
21581 | + wakeup_latency_hist_dir_sharedprio, dentry); | |
21582 | + for_each_possible_cpu(i) { | |
21583 | + sprintf(name, cpufmt, i); | |
21584 | + | |
21585 | + entry = debugfs_create_file(name, 0444, dentry, | |
21586 | + &per_cpu(wakeup_latency_hist, i), | |
21587 | + &latency_hist_fops); | |
21588 | + my_hist = &per_cpu(wakeup_latency_hist, i); | |
21589 | + atomic_set(&my_hist->hist_mode, 1); | |
21590 | + my_hist->min_lat = LONG_MAX; | |
21591 | + | |
21592 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, | |
21593 | + &per_cpu(wakeup_latency_hist_sharedprio, i), | |
21594 | + &latency_hist_fops); | |
21595 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i); | |
21596 | + atomic_set(&my_hist->hist_mode, 1); | |
21597 | + my_hist->min_lat = LONG_MAX; | |
21598 | + | |
21599 | + sprintf(name, cpufmt_maxlatproc, i); | |
21600 | + | |
21601 | + mp = &per_cpu(wakeup_maxlatproc, i); | |
21602 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
21603 | + &maxlatproc_fops); | |
21604 | + clear_maxlatprocdata(mp); | |
21605 | + | |
21606 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i); | |
21607 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp, | |
21608 | + &maxlatproc_fops); | |
21609 | + clear_maxlatprocdata(mp); | |
21610 | + } | |
21611 | + entry = debugfs_create_file("pid", 0644, dentry, | |
21612 | + (void *)&wakeup_pid, &pid_fops); | |
21613 | + entry = debugfs_create_file("reset", 0644, dentry, | |
21614 | + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops); | |
21615 | + entry = debugfs_create_file("reset", 0644, dentry_sharedprio, | |
21616 | + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops); | |
21617 | + entry = debugfs_create_file("wakeup", 0644, | |
21618 | + enable_root, (void *)&wakeup_latency_enabled_data, | |
21619 | + &enable_fops); | |
21620 | +#endif | |
21621 | + | |
21622 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
21623 | + dentry = debugfs_create_dir(missed_timer_offsets_dir, | |
21624 | + latency_hist_root); | |
21625 | + for_each_possible_cpu(i) { | |
21626 | + sprintf(name, cpufmt, i); | |
21627 | + entry = debugfs_create_file(name, 0444, dentry, | |
21628 | + &per_cpu(missed_timer_offsets, i), &latency_hist_fops); | |
21629 | + my_hist = &per_cpu(missed_timer_offsets, i); | |
21630 | + atomic_set(&my_hist->hist_mode, 1); | |
21631 | + my_hist->min_lat = LONG_MAX; | |
21632 | + | |
21633 | + sprintf(name, cpufmt_maxlatproc, i); | |
21634 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, i); | |
21635 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
21636 | + &maxlatproc_fops); | |
21637 | + clear_maxlatprocdata(mp); | |
21638 | + } | |
21639 | + entry = debugfs_create_file("pid", 0644, dentry, | |
21640 | + (void *)&missed_timer_offsets_pid, &pid_fops); | |
21641 | + entry = debugfs_create_file("reset", 0644, dentry, | |
21642 | + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops); | |
21643 | + entry = debugfs_create_file("missed_timer_offsets", 0644, | |
21644 | + enable_root, (void *)&missed_timer_offsets_enabled_data, | |
21645 | + &enable_fops); | |
21646 | +#endif | |
21647 | + | |
21648 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
21649 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
21650 | + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir, | |
21651 | + latency_hist_root); | |
21652 | + for_each_possible_cpu(i) { | |
21653 | + sprintf(name, cpufmt, i); | |
21654 | + entry = debugfs_create_file(name, 0444, dentry, | |
21655 | + &per_cpu(timerandwakeup_latency_hist, i), | |
21656 | + &latency_hist_fops); | |
21657 | + my_hist = &per_cpu(timerandwakeup_latency_hist, i); | |
21658 | + atomic_set(&my_hist->hist_mode, 1); | |
21659 | + my_hist->min_lat = LONG_MAX; | |
21660 | + | |
21661 | + sprintf(name, cpufmt_maxlatproc, i); | |
21662 | + mp = &per_cpu(timerandwakeup_maxlatproc, i); | |
21663 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
21664 | + &maxlatproc_fops); | |
21665 | + clear_maxlatprocdata(mp); | |
21666 | + } | |
21667 | + entry = debugfs_create_file("reset", 0644, dentry, | |
21668 | + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops); | |
21669 | + entry = debugfs_create_file("timerandwakeup", 0644, | |
21670 | + enable_root, (void *)&timerandwakeup_enabled_data, | |
21671 | + &enable_fops); | |
21672 | +#endif | |
21673 | + return 0; | |
21674 | +} | |
21675 | + | |
21676 | +device_initcall(latency_hist_init); | |
21677 | diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c | |
21678 | index 7bc56762ca35..84ffcb813263 100644 | |
21679 | --- a/kernel/trace/trace.c | |
21680 | +++ b/kernel/trace/trace.c | |
21681 | @@ -1897,6 +1897,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
21682 | struct task_struct *tsk = current; | |
21683 | ||
21684 | entry->preempt_count = pc & 0xff; | |
21685 | + entry->preempt_lazy_count = preempt_lazy_count(); | |
21686 | entry->pid = (tsk) ? tsk->pid : 0; | |
21687 | entry->flags = | |
21688 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | |
21689 | @@ -1907,8 +1908,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
21690 | ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | | |
21691 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | | |
21692 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | |
21693 | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
21694 | + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
21695 | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | | |
21696 | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); | |
21697 | + | |
21698 | + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; | |
21699 | } | |
21700 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | |
21701 | ||
21702 | @@ -2892,14 +2896,17 @@ get_total_entries(struct trace_buffer *buf, | |
21703 | ||
21704 | static void print_lat_help_header(struct seq_file *m) | |
21705 | { | |
21706 | - seq_puts(m, "# _------=> CPU# \n" | |
21707 | - "# / _-----=> irqs-off \n" | |
21708 | - "# | / _----=> need-resched \n" | |
21709 | - "# || / _---=> hardirq/softirq \n" | |
21710 | - "# ||| / _--=> preempt-depth \n" | |
21711 | - "# |||| / delay \n" | |
21712 | - "# cmd pid ||||| time | caller \n" | |
21713 | - "# \\ / ||||| \\ | / \n"); | |
21714 | + seq_puts(m, "# _--------=> CPU# \n" | |
21715 | + "# / _-------=> irqs-off \n" | |
21716 | + "# | / _------=> need-resched \n" | |
21717 | + "# || / _-----=> need-resched_lazy \n" | |
21718 | + "# ||| / _----=> hardirq/softirq \n" | |
21719 | + "# |||| / _---=> preempt-depth \n" | |
21720 | + "# ||||| / _--=> preempt-lazy-depth\n" | |
21721 | + "# |||||| / _-=> migrate-disable \n" | |
21722 | + "# ||||||| / delay \n" | |
21723 | + "# cmd pid |||||||| time | caller \n" | |
21724 | + "# \\ / |||||||| \\ | / \n"); | |
21725 | } | |
21726 | ||
21727 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) | |
21728 | @@ -2925,11 +2932,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file | |
21729 | print_event_info(buf, m); | |
21730 | seq_puts(m, "# _-----=> irqs-off\n" | |
21731 | "# / _----=> need-resched\n" | |
21732 | - "# | / _---=> hardirq/softirq\n" | |
21733 | - "# || / _--=> preempt-depth\n" | |
21734 | - "# ||| / delay\n" | |
21735 | - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" | |
21736 | - "# | | | |||| | |\n"); | |
21737 | + "# |/ _-----=> need-resched_lazy\n" | |
21738 | + "# || / _---=> hardirq/softirq\n" | |
21739 | + "# ||| / _--=> preempt-depth\n" | |
21740 | + "# |||| / _-=> preempt-lazy-depth\n" | |
21741 | + "# ||||| / _-=> migrate-disable \n" | |
21742 | + "# |||||| / delay\n" | |
21743 | + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n" | |
21744 | + "# | | | ||||||| | |\n"); | |
21745 | } | |
21746 | ||
21747 | void | |
21748 | diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h | |
21749 | index f783df416726..6f2d0fa4fbf1 100644 | |
21750 | --- a/kernel/trace/trace.h | |
21751 | +++ b/kernel/trace/trace.h | |
21752 | @@ -123,6 +123,7 @@ struct kretprobe_trace_entry_head { | |
21753 | * NEED_RESCHED - reschedule is requested | |
21754 | * HARDIRQ - inside an interrupt handler | |
21755 | * SOFTIRQ - inside a softirq handler | |
21756 | + * NEED_RESCHED_LAZY - lazy reschedule is requested | |
21757 | */ | |
21758 | enum trace_flag_type { | |
21759 | TRACE_FLAG_IRQS_OFF = 0x01, | |
21760 | @@ -132,6 +133,7 @@ enum trace_flag_type { | |
21761 | TRACE_FLAG_SOFTIRQ = 0x10, | |
21762 | TRACE_FLAG_PREEMPT_RESCHED = 0x20, | |
21763 | TRACE_FLAG_NMI = 0x40, | |
21764 | + TRACE_FLAG_NEED_RESCHED_LAZY = 0x80, | |
21765 | }; | |
21766 | ||
21767 | #define TRACE_BUF_SIZE 1024 | |
21768 | diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c | |
21769 | index 03c0a48c3ac4..0b85d516b491 100644 | |
21770 | --- a/kernel/trace/trace_events.c | |
21771 | +++ b/kernel/trace/trace_events.c | |
21772 | @@ -187,6 +187,8 @@ static int trace_define_common_fields(void) | |
21773 | __common_field(unsigned char, flags); | |
21774 | __common_field(unsigned char, preempt_count); | |
21775 | __common_field(int, pid); | |
21776 | + __common_field(unsigned short, migrate_disable); | |
21777 | + __common_field(unsigned short, padding); | |
21778 | ||
21779 | return ret; | |
21780 | } | |
21781 | diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c | |
21782 | index 03cdff84d026..940bd10b4406 100644 | |
21783 | --- a/kernel/trace/trace_irqsoff.c | |
21784 | +++ b/kernel/trace/trace_irqsoff.c | |
21785 | @@ -13,6 +13,7 @@ | |
21786 | #include <linux/uaccess.h> | |
21787 | #include <linux/module.h> | |
21788 | #include <linux/ftrace.h> | |
21789 | +#include <trace/events/hist.h> | |
21790 | ||
21791 | #include "trace.h" | |
21792 | ||
21793 | @@ -424,11 +425,13 @@ void start_critical_timings(void) | |
21794 | { | |
21795 | if (preempt_trace() || irq_trace()) | |
21796 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
21797 | + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1); | |
21798 | } | |
21799 | EXPORT_SYMBOL_GPL(start_critical_timings); | |
21800 | ||
21801 | void stop_critical_timings(void) | |
21802 | { | |
21803 | + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0); | |
21804 | if (preempt_trace() || irq_trace()) | |
21805 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
21806 | } | |
21807 | @@ -438,6 +441,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings); | |
21808 | #ifdef CONFIG_PROVE_LOCKING | |
21809 | void time_hardirqs_on(unsigned long a0, unsigned long a1) | |
21810 | { | |
21811 | + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0); | |
21812 | if (!preempt_trace() && irq_trace()) | |
21813 | stop_critical_timing(a0, a1); | |
21814 | } | |
21815 | @@ -446,6 +450,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) | |
21816 | { | |
21817 | if (!preempt_trace() && irq_trace()) | |
21818 | start_critical_timing(a0, a1); | |
21819 | + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1); | |
21820 | } | |
21821 | ||
21822 | #else /* !CONFIG_PROVE_LOCKING */ | |
21823 | @@ -471,6 +476,7 @@ inline void print_irqtrace_events(struct task_struct *curr) | |
21824 | */ | |
21825 | void trace_hardirqs_on(void) | |
21826 | { | |
21827 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
21828 | if (!preempt_trace() && irq_trace()) | |
21829 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
21830 | } | |
21831 | @@ -480,11 +486,13 @@ void trace_hardirqs_off(void) | |
21832 | { | |
21833 | if (!preempt_trace() && irq_trace()) | |
21834 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
21835 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
21836 | } | |
21837 | EXPORT_SYMBOL(trace_hardirqs_off); | |
21838 | ||
21839 | __visible void trace_hardirqs_on_caller(unsigned long caller_addr) | |
21840 | { | |
21841 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
21842 | if (!preempt_trace() && irq_trace()) | |
21843 | stop_critical_timing(CALLER_ADDR0, caller_addr); | |
21844 | } | |
21845 | @@ -494,6 +502,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) | |
21846 | { | |
21847 | if (!preempt_trace() && irq_trace()) | |
21848 | start_critical_timing(CALLER_ADDR0, caller_addr); | |
21849 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
21850 | } | |
21851 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
21852 | ||
21853 | @@ -503,12 +512,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
21854 | #ifdef CONFIG_PREEMPT_TRACER | |
21855 | void trace_preempt_on(unsigned long a0, unsigned long a1) | |
21856 | { | |
21857 | + trace_preemptirqsoff_hist(PREEMPT_ON, 0); | |
21858 | if (preempt_trace() && !irq_trace()) | |
21859 | stop_critical_timing(a0, a1); | |
21860 | } | |
21861 | ||
21862 | void trace_preempt_off(unsigned long a0, unsigned long a1) | |
21863 | { | |
21864 | + trace_preemptirqsoff_hist(PREEMPT_ON, 1); | |
21865 | if (preempt_trace() && !irq_trace()) | |
21866 | start_critical_timing(a0, a1); | |
21867 | } | |
21868 | diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c | |
21869 | index 0bb9cf2d53e6..455a7464772f 100644 | |
21870 | --- a/kernel/trace/trace_output.c | |
21871 | +++ b/kernel/trace/trace_output.c | |
21872 | @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
21873 | { | |
21874 | char hardsoft_irq; | |
21875 | char need_resched; | |
21876 | + char need_resched_lazy; | |
21877 | char irqs_off; | |
21878 | int hardirq; | |
21879 | int softirq; | |
21880 | @@ -416,6 +417,9 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
21881 | break; | |
21882 | } | |
21883 | ||
21884 | + need_resched_lazy = | |
21885 | + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; | |
21886 | + | |
21887 | hardsoft_irq = | |
21888 | (nmi && hardirq) ? 'Z' : | |
21889 | nmi ? 'z' : | |
21890 | @@ -424,14 +428,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
21891 | softirq ? 's' : | |
21892 | '.' ; | |
21893 | ||
21894 | - trace_seq_printf(s, "%c%c%c", | |
21895 | - irqs_off, need_resched, hardsoft_irq); | |
21896 | + trace_seq_printf(s, "%c%c%c%c", | |
21897 | + irqs_off, need_resched, need_resched_lazy, | |
21898 | + hardsoft_irq); | |
21899 | ||
21900 | if (entry->preempt_count) | |
21901 | trace_seq_printf(s, "%x", entry->preempt_count); | |
21902 | else | |
21903 | trace_seq_putc(s, '.'); | |
21904 | ||
21905 | + if (entry->preempt_lazy_count) | |
21906 | + trace_seq_printf(s, "%x", entry->preempt_lazy_count); | |
21907 | + else | |
21908 | + trace_seq_putc(s, '.'); | |
21909 | + | |
21910 | + if (entry->migrate_disable) | |
21911 | + trace_seq_printf(s, "%x", entry->migrate_disable); | |
21912 | + else | |
21913 | + trace_seq_putc(s, '.'); | |
21914 | + | |
21915 | return !trace_seq_has_overflowed(s); | |
21916 | } | |
21917 | ||
21918 | diff --git a/kernel/user.c b/kernel/user.c | |
21919 | index b069ccbfb0b0..1a2e88e98b5e 100644 | |
21920 | --- a/kernel/user.c | |
21921 | +++ b/kernel/user.c | |
21922 | @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up) | |
21923 | if (!up) | |
21924 | return; | |
21925 | ||
21926 | - local_irq_save(flags); | |
21927 | + local_irq_save_nort(flags); | |
21928 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) | |
21929 | free_user(up, flags); | |
21930 | else | |
21931 | - local_irq_restore(flags); | |
21932 | + local_irq_restore_nort(flags); | |
21933 | } | |
21934 | ||
21935 | struct user_struct *alloc_uid(kuid_t uid) | |
21936 | diff --git a/kernel/watchdog.c b/kernel/watchdog.c | |
21937 | index 9acb29f280ec..caba62080411 100644 | |
21938 | --- a/kernel/watchdog.c | |
21939 | +++ b/kernel/watchdog.c | |
21940 | @@ -315,6 +315,8 @@ static int is_softlockup(unsigned long touch_ts) | |
21941 | ||
21942 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | |
21943 | ||
21944 | +static DEFINE_RAW_SPINLOCK(watchdog_output_lock); | |
21945 | + | |
21946 | static struct perf_event_attr wd_hw_attr = { | |
21947 | .type = PERF_TYPE_HARDWARE, | |
21948 | .config = PERF_COUNT_HW_CPU_CYCLES, | |
21949 | @@ -349,6 +351,13 @@ static void watchdog_overflow_callback(struct perf_event *event, | |
21950 | /* only print hardlockups once */ | |
21951 | if (__this_cpu_read(hard_watchdog_warn) == true) | |
21952 | return; | |
21953 | + /* | |
21954 | + * If early-printk is enabled then make sure we do not | |
21955 | + * lock up in printk() and kill console logging: | |
21956 | + */ | |
21957 | + printk_kill(); | |
21958 | + | |
21959 | + raw_spin_lock(&watchdog_output_lock); | |
21960 | ||
21961 | pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); | |
21962 | print_modules(); | |
21963 | @@ -366,6 +375,7 @@ static void watchdog_overflow_callback(struct perf_event *event, | |
21964 | !test_and_set_bit(0, &hardlockup_allcpu_dumped)) | |
21965 | trigger_allbutself_cpu_backtrace(); | |
21966 | ||
21967 | + raw_spin_unlock(&watchdog_output_lock); | |
21968 | if (hardlockup_panic) | |
21969 | nmi_panic(regs, "Hard LOCKUP"); | |
21970 | ||
21971 | @@ -513,6 +523,7 @@ static void watchdog_enable(unsigned int cpu) | |
21972 | /* kick off the timer for the hardlockup detector */ | |
21973 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
21974 | hrtimer->function = watchdog_timer_fn; | |
21975 | + hrtimer->irqsafe = 1; | |
21976 | ||
21977 | /* Enable the perf event */ | |
21978 | watchdog_nmi_enable(cpu); | |
21979 | diff --git a/kernel/workqueue.c b/kernel/workqueue.c | |
21980 | index ef071ca73fc3..c7a62d6adb00 100644 | |
21981 | --- a/kernel/workqueue.c | |
21982 | +++ b/kernel/workqueue.c | |
21983 | @@ -48,6 +48,8 @@ | |
21984 | #include <linux/nodemask.h> | |
21985 | #include <linux/moduleparam.h> | |
21986 | #include <linux/uaccess.h> | |
21987 | +#include <linux/locallock.h> | |
21988 | +#include <linux/delay.h> | |
21989 | ||
21990 | #include "workqueue_internal.h" | |
21991 | ||
21992 | @@ -121,11 +123,16 @@ enum { | |
21993 | * cpu or grabbing pool->lock is enough for read access. If | |
21994 | * POOL_DISASSOCIATED is set, it's identical to L. | |
21995 | * | |
21996 | + * On RT we need the extra protection via rt_lock_idle_list() for | |
21997 | + * the list manipulations against read access from | |
21998 | + * wq_worker_sleeping(). All other places are nicely serialized via | |
21999 | + * pool->lock. | |
22000 | + * | |
22001 | * A: pool->attach_mutex protected. | |
22002 | * | |
22003 | * PL: wq_pool_mutex protected. | |
22004 | * | |
22005 | - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. | |
22006 | + * PR: wq_pool_mutex protected for writes. RCU protected for reads. | |
22007 | * | |
22008 | * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. | |
22009 | * | |
22010 | @@ -134,7 +141,7 @@ enum { | |
22011 | * | |
22012 | * WQ: wq->mutex protected. | |
22013 | * | |
22014 | - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. | |
22015 | + * WR: wq->mutex protected for writes. RCU protected for reads. | |
22016 | * | |
22017 | * MD: wq_mayday_lock protected. | |
22018 | */ | |
22019 | @@ -185,7 +192,7 @@ struct worker_pool { | |
22020 | atomic_t nr_running ____cacheline_aligned_in_smp; | |
22021 | ||
22022 | /* | |
22023 | - * Destruction of pool is sched-RCU protected to allow dereferences | |
22024 | + * Destruction of pool is RCU protected to allow dereferences | |
22025 | * from get_work_pool(). | |
22026 | */ | |
22027 | struct rcu_head rcu; | |
22028 | @@ -214,7 +221,7 @@ struct pool_workqueue { | |
22029 | /* | |
22030 | * Release of unbound pwq is punted to system_wq. See put_pwq() | |
22031 | * and pwq_unbound_release_workfn() for details. pool_workqueue | |
22032 | - * itself is also sched-RCU protected so that the first pwq can be | |
22033 | + * itself is also RCU protected so that the first pwq can be | |
22034 | * determined without grabbing wq->mutex. | |
22035 | */ | |
22036 | struct work_struct unbound_release_work; | |
22037 | @@ -348,6 +355,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); | |
22038 | struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; | |
22039 | EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); | |
22040 | ||
22041 | +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); | |
22042 | + | |
22043 | static int worker_thread(void *__worker); | |
22044 | static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
22045 | ||
22046 | @@ -355,20 +364,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
22047 | #include <trace/events/workqueue.h> | |
22048 | ||
22049 | #define assert_rcu_or_pool_mutex() \ | |
22050 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
22051 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
22052 | !lockdep_is_held(&wq_pool_mutex), \ | |
22053 | - "sched RCU or wq_pool_mutex should be held") | |
22054 | + "RCU or wq_pool_mutex should be held") | |
22055 | ||
22056 | #define assert_rcu_or_wq_mutex(wq) \ | |
22057 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
22058 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
22059 | !lockdep_is_held(&wq->mutex), \ | |
22060 | - "sched RCU or wq->mutex should be held") | |
22061 | + "RCU or wq->mutex should be held") | |
22062 | ||
22063 | #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ | |
22064 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
22065 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
22066 | !lockdep_is_held(&wq->mutex) && \ | |
22067 | !lockdep_is_held(&wq_pool_mutex), \ | |
22068 | - "sched RCU, wq->mutex or wq_pool_mutex should be held") | |
22069 | + "RCU, wq->mutex or wq_pool_mutex should be held") | |
22070 | ||
22071 | #define for_each_cpu_worker_pool(pool, cpu) \ | |
22072 | for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ | |
22073 | @@ -380,7 +389,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
22074 | * @pool: iteration cursor | |
22075 | * @pi: integer used for iteration | |
22076 | * | |
22077 | - * This must be called either with wq_pool_mutex held or sched RCU read | |
22078 | + * This must be called either with wq_pool_mutex held or RCU read | |
22079 | * locked. If the pool needs to be used beyond the locking in effect, the | |
22080 | * caller is responsible for guaranteeing that the pool stays online. | |
22081 | * | |
22082 | @@ -412,7 +421,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
22083 | * @pwq: iteration cursor | |
22084 | * @wq: the target workqueue | |
22085 | * | |
22086 | - * This must be called either with wq->mutex held or sched RCU read locked. | |
22087 | + * This must be called either with wq->mutex held or RCU read locked. | |
22088 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
22089 | * responsible for guaranteeing that the pwq stays online. | |
22090 | * | |
22091 | @@ -424,6 +433,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
22092 | if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ | |
22093 | else | |
22094 | ||
22095 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
22096 | +static inline void rt_lock_idle_list(struct worker_pool *pool) | |
22097 | +{ | |
22098 | + preempt_disable(); | |
22099 | +} | |
22100 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) | |
22101 | +{ | |
22102 | + preempt_enable(); | |
22103 | +} | |
22104 | +static inline void sched_lock_idle_list(struct worker_pool *pool) { } | |
22105 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) { } | |
22106 | +#else | |
22107 | +static inline void rt_lock_idle_list(struct worker_pool *pool) { } | |
22108 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) { } | |
22109 | +static inline void sched_lock_idle_list(struct worker_pool *pool) | |
22110 | +{ | |
22111 | + spin_lock_irq(&pool->lock); | |
22112 | +} | |
22113 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) | |
22114 | +{ | |
22115 | + spin_unlock_irq(&pool->lock); | |
22116 | +} | |
22117 | +#endif | |
22118 | + | |
22119 | + | |
22120 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | |
22121 | ||
22122 | static struct debug_obj_descr work_debug_descr; | |
22123 | @@ -548,7 +582,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) | |
22124 | * @wq: the target workqueue | |
22125 | * @node: the node ID | |
22126 | * | |
22127 | - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU | |
22128 | + * This must be called with any of wq_pool_mutex, wq->mutex or RCU | |
22129 | * read locked. | |
22130 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
22131 | * responsible for guaranteeing that the pwq stays online. | |
22132 | @@ -692,8 +726,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work) | |
22133 | * @work: the work item of interest | |
22134 | * | |
22135 | * Pools are created and destroyed under wq_pool_mutex, and allows read | |
22136 | - * access under sched-RCU read lock. As such, this function should be | |
22137 | - * called under wq_pool_mutex or with preemption disabled. | |
22138 | + * access under RCU read lock. As such, this function should be | |
22139 | + * called under wq_pool_mutex or inside of a rcu_read_lock() region. | |
22140 | * | |
22141 | * All fields of the returned pool are accessible as long as the above | |
22142 | * mentioned locking is in effect. If the returned pool needs to be used | |
22143 | @@ -830,50 +864,45 @@ static struct worker *first_idle_worker(struct worker_pool *pool) | |
22144 | */ | |
22145 | static void wake_up_worker(struct worker_pool *pool) | |
22146 | { | |
22147 | - struct worker *worker = first_idle_worker(pool); | |
22148 | + struct worker *worker; | |
22149 | + | |
22150 | + rt_lock_idle_list(pool); | |
22151 | + | |
22152 | + worker = first_idle_worker(pool); | |
22153 | ||
22154 | if (likely(worker)) | |
22155 | wake_up_process(worker->task); | |
22156 | + | |
22157 | + rt_unlock_idle_list(pool); | |
22158 | } | |
22159 | ||
22160 | /** | |
22161 | - * wq_worker_waking_up - a worker is waking up | |
22162 | + * wq_worker_running - a worker is running again | |
22163 | * @task: task waking up | |
22164 | - * @cpu: CPU @task is waking up to | |
22165 | * | |
22166 | - * This function is called during try_to_wake_up() when a worker is | |
22167 | - * being awoken. | |
22168 | - * | |
22169 | - * CONTEXT: | |
22170 | - * spin_lock_irq(rq->lock) | |
22171 | + * This function is called when a worker returns from schedule() | |
22172 | */ | |
22173 | -void wq_worker_waking_up(struct task_struct *task, int cpu) | |
22174 | +void wq_worker_running(struct task_struct *task) | |
22175 | { | |
22176 | struct worker *worker = kthread_data(task); | |
22177 | ||
22178 | - if (!(worker->flags & WORKER_NOT_RUNNING)) { | |
22179 | - WARN_ON_ONCE(worker->pool->cpu != cpu); | |
22180 | + if (!worker->sleeping) | |
22181 | + return; | |
22182 | + if (!(worker->flags & WORKER_NOT_RUNNING)) | |
22183 | atomic_inc(&worker->pool->nr_running); | |
22184 | - } | |
22185 | + worker->sleeping = 0; | |
22186 | } | |
22187 | ||
22188 | /** | |
22189 | * wq_worker_sleeping - a worker is going to sleep | |
22190 | * @task: task going to sleep | |
22191 | * | |
22192 | - * This function is called during schedule() when a busy worker is | |
22193 | - * going to sleep. Worker on the same cpu can be woken up by | |
22194 | - * returning pointer to its task. | |
22195 | - * | |
22196 | - * CONTEXT: | |
22197 | - * spin_lock_irq(rq->lock) | |
22198 | - * | |
22199 | - * Return: | |
22200 | - * Worker task on @cpu to wake up, %NULL if none. | |
22201 | + * This function is called from schedule() when a busy worker is | |
22202 | + * going to sleep. | |
22203 | */ | |
22204 | -struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
22205 | +void wq_worker_sleeping(struct task_struct *task) | |
22206 | { | |
22207 | - struct worker *worker = kthread_data(task), *to_wakeup = NULL; | |
22208 | + struct worker *worker = kthread_data(task); | |
22209 | struct worker_pool *pool; | |
22210 | ||
22211 | /* | |
22212 | @@ -882,29 +911,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) | |
22213 | * checking NOT_RUNNING. | |
22214 | */ | |
22215 | if (worker->flags & WORKER_NOT_RUNNING) | |
22216 | - return NULL; | |
22217 | + return; | |
22218 | ||
22219 | pool = worker->pool; | |
22220 | ||
22221 | - /* this can only happen on the local cpu */ | |
22222 | - if (WARN_ON_ONCE(pool->cpu != raw_smp_processor_id())) | |
22223 | - return NULL; | |
22224 | + if (WARN_ON_ONCE(worker->sleeping)) | |
22225 | + return; | |
22226 | + | |
22227 | + worker->sleeping = 1; | |
22228 | ||
22229 | /* | |
22230 | * The counterpart of the following dec_and_test, implied mb, | |
22231 | * worklist not empty test sequence is in insert_work(). | |
22232 | * Please read comment there. | |
22233 | - * | |
22234 | - * NOT_RUNNING is clear. This means that we're bound to and | |
22235 | - * running on the local cpu w/ rq lock held and preemption | |
22236 | - * disabled, which in turn means that none else could be | |
22237 | - * manipulating idle_list, so dereferencing idle_list without pool | |
22238 | - * lock is safe. | |
22239 | */ | |
22240 | if (atomic_dec_and_test(&pool->nr_running) && | |
22241 | - !list_empty(&pool->worklist)) | |
22242 | - to_wakeup = first_idle_worker(pool); | |
22243 | - return to_wakeup ? to_wakeup->task : NULL; | |
22244 | + !list_empty(&pool->worklist)) { | |
22245 | + sched_lock_idle_list(pool); | |
22246 | + wake_up_worker(pool); | |
22247 | + sched_unlock_idle_list(pool); | |
22248 | + } | |
22249 | } | |
22250 | ||
22251 | /** | |
22252 | @@ -1098,12 +1124,12 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) | |
22253 | { | |
22254 | if (pwq) { | |
22255 | /* | |
22256 | - * As both pwqs and pools are sched-RCU protected, the | |
22257 | + * As both pwqs and pools are RCU protected, the | |
22258 | * following lock operations are safe. | |
22259 | */ | |
22260 | - spin_lock_irq(&pwq->pool->lock); | |
22261 | + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); | |
22262 | put_pwq(pwq); | |
22263 | - spin_unlock_irq(&pwq->pool->lock); | |
22264 | + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); | |
22265 | } | |
22266 | } | |
22267 | ||
22268 | @@ -1207,7 +1233,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
22269 | struct worker_pool *pool; | |
22270 | struct pool_workqueue *pwq; | |
22271 | ||
22272 | - local_irq_save(*flags); | |
22273 | + local_lock_irqsave(pendingb_lock, *flags); | |
22274 | ||
22275 | /* try to steal the timer if it exists */ | |
22276 | if (is_dwork) { | |
22277 | @@ -1226,6 +1252,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
22278 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) | |
22279 | return 0; | |
22280 | ||
22281 | + rcu_read_lock(); | |
22282 | /* | |
22283 | * The queueing is in progress, or it is already queued. Try to | |
22284 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | |
22285 | @@ -1264,14 +1291,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
22286 | set_work_pool_and_keep_pending(work, pool->id); | |
22287 | ||
22288 | spin_unlock(&pool->lock); | |
22289 | + rcu_read_unlock(); | |
22290 | return 1; | |
22291 | } | |
22292 | spin_unlock(&pool->lock); | |
22293 | fail: | |
22294 | - local_irq_restore(*flags); | |
22295 | + rcu_read_unlock(); | |
22296 | + local_unlock_irqrestore(pendingb_lock, *flags); | |
22297 | if (work_is_canceling(work)) | |
22298 | return -ENOENT; | |
22299 | - cpu_relax(); | |
22300 | + cpu_chill(); | |
22301 | return -EAGAIN; | |
22302 | } | |
22303 | ||
22304 | @@ -1373,7 +1402,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
22305 | * queued or lose PENDING. Grabbing PENDING and queueing should | |
22306 | * happen with IRQ disabled. | |
22307 | */ | |
22308 | - WARN_ON_ONCE(!irqs_disabled()); | |
22309 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
22310 | ||
22311 | debug_work_activate(work); | |
22312 | ||
22313 | @@ -1381,6 +1410,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
22314 | if (unlikely(wq->flags & __WQ_DRAINING) && | |
22315 | WARN_ON_ONCE(!is_chained_work(wq))) | |
22316 | return; | |
22317 | + rcu_read_lock(); | |
22318 | retry: | |
22319 | if (req_cpu == WORK_CPU_UNBOUND) | |
22320 | cpu = wq_select_unbound_cpu(raw_smp_processor_id()); | |
22321 | @@ -1437,10 +1467,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
22322 | /* pwq determined, queue */ | |
22323 | trace_workqueue_queue_work(req_cpu, pwq, work); | |
22324 | ||
22325 | - if (WARN_ON(!list_empty(&work->entry))) { | |
22326 | - spin_unlock(&pwq->pool->lock); | |
22327 | - return; | |
22328 | - } | |
22329 | + if (WARN_ON(!list_empty(&work->entry))) | |
22330 | + goto out; | |
22331 | ||
22332 | pwq->nr_in_flight[pwq->work_color]++; | |
22333 | work_flags = work_color_to_flags(pwq->work_color); | |
22334 | @@ -1458,7 +1486,9 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
22335 | ||
22336 | insert_work(pwq, work, worklist, work_flags); | |
22337 | ||
22338 | +out: | |
22339 | spin_unlock(&pwq->pool->lock); | |
22340 | + rcu_read_unlock(); | |
22341 | } | |
22342 | ||
22343 | /** | |
22344 | @@ -1478,14 +1508,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, | |
22345 | bool ret = false; | |
22346 | unsigned long flags; | |
22347 | ||
22348 | - local_irq_save(flags); | |
22349 | + local_lock_irqsave(pendingb_lock,flags); | |
22350 | ||
22351 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
22352 | __queue_work(cpu, wq, work); | |
22353 | ret = true; | |
22354 | } | |
22355 | ||
22356 | - local_irq_restore(flags); | |
22357 | + local_unlock_irqrestore(pendingb_lock, flags); | |
22358 | return ret; | |
22359 | } | |
22360 | EXPORT_SYMBOL(queue_work_on); | |
22361 | @@ -1552,14 +1582,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |
22362 | unsigned long flags; | |
22363 | ||
22364 | /* read the comment in __queue_work() */ | |
22365 | - local_irq_save(flags); | |
22366 | + local_lock_irqsave(pendingb_lock, flags); | |
22367 | ||
22368 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
22369 | __queue_delayed_work(cpu, wq, dwork, delay); | |
22370 | ret = true; | |
22371 | } | |
22372 | ||
22373 | - local_irq_restore(flags); | |
22374 | + local_unlock_irqrestore(pendingb_lock, flags); | |
22375 | return ret; | |
22376 | } | |
22377 | EXPORT_SYMBOL(queue_delayed_work_on); | |
22378 | @@ -1594,7 +1624,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, | |
22379 | ||
22380 | if (likely(ret >= 0)) { | |
22381 | __queue_delayed_work(cpu, wq, dwork, delay); | |
22382 | - local_irq_restore(flags); | |
22383 | + local_unlock_irqrestore(pendingb_lock, flags); | |
22384 | } | |
22385 | ||
22386 | /* -ENOENT from try_to_grab_pending() becomes %true */ | |
22387 | @@ -1627,7 +1657,9 @@ static void worker_enter_idle(struct worker *worker) | |
22388 | worker->last_active = jiffies; | |
22389 | ||
22390 | /* idle_list is LIFO */ | |
22391 | + rt_lock_idle_list(pool); | |
22392 | list_add(&worker->entry, &pool->idle_list); | |
22393 | + rt_unlock_idle_list(pool); | |
22394 | ||
22395 | if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) | |
22396 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); | |
22397 | @@ -1660,7 +1692,9 @@ static void worker_leave_idle(struct worker *worker) | |
22398 | return; | |
22399 | worker_clr_flags(worker, WORKER_IDLE); | |
22400 | pool->nr_idle--; | |
22401 | + rt_lock_idle_list(pool); | |
22402 | list_del_init(&worker->entry); | |
22403 | + rt_unlock_idle_list(pool); | |
22404 | } | |
22405 | ||
22406 | static struct worker *alloc_worker(int node) | |
22407 | @@ -1826,7 +1860,9 @@ static void destroy_worker(struct worker *worker) | |
22408 | pool->nr_workers--; | |
22409 | pool->nr_idle--; | |
22410 | ||
22411 | + rt_lock_idle_list(pool); | |
22412 | list_del_init(&worker->entry); | |
22413 | + rt_unlock_idle_list(pool); | |
22414 | worker->flags |= WORKER_DIE; | |
22415 | wake_up_process(worker->task); | |
22416 | } | |
22417 | @@ -2785,14 +2821,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) | |
22418 | ||
22419 | might_sleep(); | |
22420 | ||
22421 | - local_irq_disable(); | |
22422 | + rcu_read_lock(); | |
22423 | pool = get_work_pool(work); | |
22424 | if (!pool) { | |
22425 | - local_irq_enable(); | |
22426 | + rcu_read_unlock(); | |
22427 | return false; | |
22428 | } | |
22429 | ||
22430 | - spin_lock(&pool->lock); | |
22431 | + spin_lock_irq(&pool->lock); | |
22432 | /* see the comment in try_to_grab_pending() with the same code */ | |
22433 | pwq = get_work_pwq(work); | |
22434 | if (pwq) { | |
22435 | @@ -2821,10 +2857,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) | |
22436 | else | |
22437 | lock_map_acquire_read(&pwq->wq->lockdep_map); | |
22438 | lock_map_release(&pwq->wq->lockdep_map); | |
22439 | - | |
22440 | + rcu_read_unlock(); | |
22441 | return true; | |
22442 | already_gone: | |
22443 | spin_unlock_irq(&pool->lock); | |
22444 | + rcu_read_unlock(); | |
22445 | return false; | |
22446 | } | |
22447 | ||
22448 | @@ -2911,7 +2948,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) | |
22449 | ||
22450 | /* tell other tasks trying to grab @work to back off */ | |
22451 | mark_work_canceling(work); | |
22452 | - local_irq_restore(flags); | |
22453 | + local_unlock_irqrestore(pendingb_lock, flags); | |
22454 | ||
22455 | flush_work(work); | |
22456 | clear_work_data(work); | |
22457 | @@ -2966,10 +3003,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); | |
22458 | */ | |
22459 | bool flush_delayed_work(struct delayed_work *dwork) | |
22460 | { | |
22461 | - local_irq_disable(); | |
22462 | + local_lock_irq(pendingb_lock); | |
22463 | if (del_timer_sync(&dwork->timer)) | |
22464 | __queue_work(dwork->cpu, dwork->wq, &dwork->work); | |
22465 | - local_irq_enable(); | |
22466 | + local_unlock_irq(pendingb_lock); | |
22467 | return flush_work(&dwork->work); | |
22468 | } | |
22469 | EXPORT_SYMBOL(flush_delayed_work); | |
22470 | @@ -3004,7 +3041,7 @@ bool cancel_delayed_work(struct delayed_work *dwork) | |
22471 | ||
22472 | set_work_pool_and_clear_pending(&dwork->work, | |
22473 | get_work_pool_id(&dwork->work)); | |
22474 | - local_irq_restore(flags); | |
22475 | + local_unlock_irqrestore(pendingb_lock, flags); | |
22476 | return ret; | |
22477 | } | |
22478 | EXPORT_SYMBOL(cancel_delayed_work); | |
22479 | @@ -3233,7 +3270,7 @@ static void rcu_free_pool(struct rcu_head *rcu) | |
22480 | * put_unbound_pool - put a worker_pool | |
22481 | * @pool: worker_pool to put | |
22482 | * | |
22483 | - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU | |
22484 | + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU | |
22485 | * safe manner. get_unbound_pool() calls this function on its failure path | |
22486 | * and this function should be able to release pools which went through, | |
22487 | * successfully or not, init_worker_pool(). | |
22488 | @@ -3287,8 +3324,8 @@ static void put_unbound_pool(struct worker_pool *pool) | |
22489 | del_timer_sync(&pool->idle_timer); | |
22490 | del_timer_sync(&pool->mayday_timer); | |
22491 | ||
22492 | - /* sched-RCU protected to allow dereferences from get_work_pool() */ | |
22493 | - call_rcu_sched(&pool->rcu, rcu_free_pool); | |
22494 | + /* RCU protected to allow dereferences from get_work_pool() */ | |
22495 | + call_rcu(&pool->rcu, rcu_free_pool); | |
22496 | } | |
22497 | ||
22498 | /** | |
22499 | @@ -3395,14 +3432,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) | |
22500 | put_unbound_pool(pool); | |
22501 | mutex_unlock(&wq_pool_mutex); | |
22502 | ||
22503 | - call_rcu_sched(&pwq->rcu, rcu_free_pwq); | |
22504 | + call_rcu(&pwq->rcu, rcu_free_pwq); | |
22505 | ||
22506 | /* | |
22507 | * If we're the last pwq going away, @wq is already dead and no one | |
22508 | * is gonna access it anymore. Schedule RCU free. | |
22509 | */ | |
22510 | if (is_last) | |
22511 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
22512 | + call_rcu(&wq->rcu, rcu_free_wq); | |
22513 | } | |
22514 | ||
22515 | /** | |
22516 | @@ -4052,7 +4089,7 @@ void destroy_workqueue(struct workqueue_struct *wq) | |
22517 | * The base ref is never dropped on per-cpu pwqs. Directly | |
22518 | * schedule RCU free. | |
22519 | */ | |
22520 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
22521 | + call_rcu(&wq->rcu, rcu_free_wq); | |
22522 | } else { | |
22523 | /* | |
22524 | * We're the sole accessor of @wq at this point. Directly | |
22525 | @@ -4145,7 +4182,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) | |
22526 | struct pool_workqueue *pwq; | |
22527 | bool ret; | |
22528 | ||
22529 | - rcu_read_lock_sched(); | |
22530 | + rcu_read_lock(); | |
22531 | + preempt_disable(); | |
22532 | ||
22533 | if (cpu == WORK_CPU_UNBOUND) | |
22534 | cpu = smp_processor_id(); | |
22535 | @@ -4156,7 +4194,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) | |
22536 | pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); | |
22537 | ||
22538 | ret = !list_empty(&pwq->delayed_works); | |
22539 | - rcu_read_unlock_sched(); | |
22540 | + preempt_enable(); | |
22541 | + rcu_read_unlock(); | |
22542 | ||
22543 | return ret; | |
22544 | } | |
22545 | @@ -4182,15 +4221,15 @@ unsigned int work_busy(struct work_struct *work) | |
22546 | if (work_pending(work)) | |
22547 | ret |= WORK_BUSY_PENDING; | |
22548 | ||
22549 | - local_irq_save(flags); | |
22550 | + rcu_read_lock(); | |
22551 | pool = get_work_pool(work); | |
22552 | if (pool) { | |
22553 | - spin_lock(&pool->lock); | |
22554 | + spin_lock_irqsave(&pool->lock, flags); | |
22555 | if (find_worker_executing_work(pool, work)) | |
22556 | ret |= WORK_BUSY_RUNNING; | |
22557 | - spin_unlock(&pool->lock); | |
22558 | + spin_unlock_irqrestore(&pool->lock, flags); | |
22559 | } | |
22560 | - local_irq_restore(flags); | |
22561 | + rcu_read_unlock(); | |
22562 | ||
22563 | return ret; | |
22564 | } | |
22565 | @@ -4379,7 +4418,7 @@ void show_workqueue_state(void) | |
22566 | unsigned long flags; | |
22567 | int pi; | |
22568 | ||
22569 | - rcu_read_lock_sched(); | |
22570 | + rcu_read_lock(); | |
22571 | ||
22572 | pr_info("Showing busy workqueues and worker pools:\n"); | |
22573 | ||
22574 | @@ -4432,7 +4471,7 @@ void show_workqueue_state(void) | |
22575 | spin_unlock_irqrestore(&pool->lock, flags); | |
22576 | } | |
22577 | ||
22578 | - rcu_read_unlock_sched(); | |
22579 | + rcu_read_unlock(); | |
22580 | } | |
22581 | ||
22582 | /* | |
22583 | @@ -4770,16 +4809,16 @@ bool freeze_workqueues_busy(void) | |
22584 | * nr_active is monotonically decreasing. It's safe | |
22585 | * to peek without lock. | |
22586 | */ | |
22587 | - rcu_read_lock_sched(); | |
22588 | + rcu_read_lock(); | |
22589 | for_each_pwq(pwq, wq) { | |
22590 | WARN_ON_ONCE(pwq->nr_active < 0); | |
22591 | if (pwq->nr_active) { | |
22592 | busy = true; | |
22593 | - rcu_read_unlock_sched(); | |
22594 | + rcu_read_unlock(); | |
22595 | goto out_unlock; | |
22596 | } | |
22597 | } | |
22598 | - rcu_read_unlock_sched(); | |
22599 | + rcu_read_unlock(); | |
22600 | } | |
22601 | out_unlock: | |
22602 | mutex_unlock(&wq_pool_mutex); | |
22603 | @@ -4969,7 +5008,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, | |
22604 | const char *delim = ""; | |
22605 | int node, written = 0; | |
22606 | ||
22607 | - rcu_read_lock_sched(); | |
22608 | + get_online_cpus(); | |
22609 | + rcu_read_lock(); | |
22610 | for_each_node(node) { | |
22611 | written += scnprintf(buf + written, PAGE_SIZE - written, | |
22612 | "%s%d:%d", delim, node, | |
22613 | @@ -4977,7 +5017,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, | |
22614 | delim = " "; | |
22615 | } | |
22616 | written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | |
22617 | - rcu_read_unlock_sched(); | |
22618 | + rcu_read_unlock(); | |
22619 | + put_online_cpus(); | |
22620 | ||
22621 | return written; | |
22622 | } | |
22623 | diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h | |
22624 | index 8635417c587b..f000c4d6917e 100644 | |
22625 | --- a/kernel/workqueue_internal.h | |
22626 | +++ b/kernel/workqueue_internal.h | |
22627 | @@ -43,6 +43,7 @@ struct worker { | |
22628 | unsigned long last_active; /* L: last active timestamp */ | |
22629 | unsigned int flags; /* X: flags */ | |
22630 | int id; /* I: worker id */ | |
22631 | + int sleeping; /* None */ | |
22632 | ||
22633 | /* | |
22634 | * Opaque string set with work_set_desc(). Printed out with task | |
22635 | @@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void) | |
22636 | * Scheduler hooks for concurrency managed workqueue. Only to be used from | |
22637 | * sched/core.c and workqueue.c. | |
22638 | */ | |
22639 | -void wq_worker_waking_up(struct task_struct *task, int cpu); | |
22640 | -struct task_struct *wq_worker_sleeping(struct task_struct *task); | |
22641 | +void wq_worker_running(struct task_struct *task); | |
22642 | +void wq_worker_sleeping(struct task_struct *task); | |
22643 | ||
22644 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ | |
22645 | diff --git a/lib/Kconfig b/lib/Kconfig | |
22646 | index d79909dc01ec..fd2accb2f2bb 100644 | |
22647 | --- a/lib/Kconfig | |
22648 | +++ b/lib/Kconfig | |
22649 | @@ -400,6 +400,7 @@ config CHECK_SIGNATURE | |
22650 | ||
22651 | config CPUMASK_OFFSTACK | |
22652 | bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS | |
22653 | + depends on !PREEMPT_RT_FULL | |
22654 | help | |
22655 | Use dynamic allocation for cpumask_var_t, instead of putting | |
22656 | them on the stack. This is a bit more expensive, but avoids | |
22657 | diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug | |
22658 | index cab7405f48d2..dbc49c48ff53 100644 | |
22659 | --- a/lib/Kconfig.debug | |
22660 | +++ b/lib/Kconfig.debug | |
22661 | @@ -977,6 +977,7 @@ config TIMER_STATS | |
22662 | config DEBUG_PREEMPT | |
22663 | bool "Debug preemptible kernel" | |
22664 | depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT | |
22665 | + select USING_GET_LOCK_PARENT_IP | |
22666 | default y | |
22667 | help | |
22668 | If you say Y here then the kernel will use a debug variant of the | |
22669 | @@ -1159,8 +1160,17 @@ config LOCK_TORTURE_TEST | |
22670 | ||
22671 | endmenu # lock debugging | |
22672 | ||
22673 | +config USING_GET_LOCK_PARENT_IP | |
22674 | + bool | |
22675 | + help | |
22676 | + Enables the use of the function get_lock_parent_ip() that | |
22677 | + will use __builtin_return_address(n) with n > 0 causing | |
22678 | + some gcc warnings. When this is selected, those warnings | |
22679 | + will be suppressed. | |
22680 | + | |
22681 | config TRACE_IRQFLAGS | |
22682 | bool | |
22683 | + select USING_GET_LOCK_PARENT_IP | |
22684 | help | |
22685 | Enables hooks to interrupt enabling and disabling for | |
22686 | either tracing or lock debugging. | |
22687 | diff --git a/lib/debugobjects.c b/lib/debugobjects.c | |
22688 | index a8e12601eb37..c76d5f0beafe 100644 | |
22689 | --- a/lib/debugobjects.c | |
22690 | +++ b/lib/debugobjects.c | |
22691 | @@ -308,7 +308,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) | |
22692 | struct debug_obj *obj; | |
22693 | unsigned long flags; | |
22694 | ||
22695 | - fill_pool(); | |
22696 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
22697 | + if (preempt_count() == 0 && !irqs_disabled()) | |
22698 | +#endif | |
22699 | + fill_pool(); | |
22700 | ||
22701 | db = get_bucket((unsigned long) addr); | |
22702 | ||
22703 | diff --git a/lib/idr.c b/lib/idr.c | |
22704 | index 6098336df267..9decbe914595 100644 | |
22705 | --- a/lib/idr.c | |
22706 | +++ b/lib/idr.c | |
22707 | @@ -30,6 +30,7 @@ | |
22708 | #include <linux/idr.h> | |
22709 | #include <linux/spinlock.h> | |
22710 | #include <linux/percpu.h> | |
22711 | +#include <linux/locallock.h> | |
22712 | ||
22713 | #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) | |
22714 | #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) | |
22715 | @@ -45,6 +46,37 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); | |
22716 | static DEFINE_PER_CPU(int, idr_preload_cnt); | |
22717 | static DEFINE_SPINLOCK(simple_ida_lock); | |
22718 | ||
22719 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
22720 | +static DEFINE_LOCAL_IRQ_LOCK(idr_lock); | |
22721 | + | |
22722 | +static inline void idr_preload_lock(void) | |
22723 | +{ | |
22724 | + local_lock(idr_lock); | |
22725 | +} | |
22726 | + | |
22727 | +static inline void idr_preload_unlock(void) | |
22728 | +{ | |
22729 | + local_unlock(idr_lock); | |
22730 | +} | |
22731 | + | |
22732 | +void idr_preload_end(void) | |
22733 | +{ | |
22734 | + idr_preload_unlock(); | |
22735 | +} | |
22736 | +EXPORT_SYMBOL(idr_preload_end); | |
22737 | +#else | |
22738 | +static inline void idr_preload_lock(void) | |
22739 | +{ | |
22740 | + preempt_disable(); | |
22741 | +} | |
22742 | + | |
22743 | +static inline void idr_preload_unlock(void) | |
22744 | +{ | |
22745 | + preempt_enable(); | |
22746 | +} | |
22747 | +#endif | |
22748 | + | |
22749 | + | |
22750 | /* the maximum ID which can be allocated given idr->layers */ | |
22751 | static int idr_max(int layers) | |
22752 | { | |
22753 | @@ -115,14 +147,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) | |
22754 | * context. See idr_preload() for details. | |
22755 | */ | |
22756 | if (!in_interrupt()) { | |
22757 | - preempt_disable(); | |
22758 | + idr_preload_lock(); | |
22759 | new = __this_cpu_read(idr_preload_head); | |
22760 | if (new) { | |
22761 | __this_cpu_write(idr_preload_head, new->ary[0]); | |
22762 | __this_cpu_dec(idr_preload_cnt); | |
22763 | new->ary[0] = NULL; | |
22764 | } | |
22765 | - preempt_enable(); | |
22766 | + idr_preload_unlock(); | |
22767 | if (new) | |
22768 | return new; | |
22769 | } | |
22770 | @@ -366,7 +398,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, | |
22771 | idr_mark_full(pa, id); | |
22772 | } | |
22773 | ||
22774 | - | |
22775 | /** | |
22776 | * idr_preload - preload for idr_alloc() | |
22777 | * @gfp_mask: allocation mask to use for preloading | |
22778 | @@ -401,7 +432,7 @@ void idr_preload(gfp_t gfp_mask) | |
22779 | WARN_ON_ONCE(in_interrupt()); | |
22780 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | |
22781 | ||
22782 | - preempt_disable(); | |
22783 | + idr_preload_lock(); | |
22784 | ||
22785 | /* | |
22786 | * idr_alloc() is likely to succeed w/o full idr_layer buffer and | |
22787 | @@ -413,9 +444,9 @@ void idr_preload(gfp_t gfp_mask) | |
22788 | while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { | |
22789 | struct idr_layer *new; | |
22790 | ||
22791 | - preempt_enable(); | |
22792 | + idr_preload_unlock(); | |
22793 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); | |
22794 | - preempt_disable(); | |
22795 | + idr_preload_lock(); | |
22796 | if (!new) | |
22797 | break; | |
22798 | ||
22799 | diff --git a/lib/irq_poll.c b/lib/irq_poll.c | |
22800 | index 836f7db4e548..709d4eed1df9 100644 | |
22801 | --- a/lib/irq_poll.c | |
22802 | +++ b/lib/irq_poll.c | |
22803 | @@ -36,6 +36,7 @@ void irq_poll_sched(struct irq_poll *iop) | |
22804 | list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); | |
22805 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
22806 | local_irq_restore(flags); | |
22807 | + preempt_check_resched_rt(); | |
22808 | } | |
22809 | EXPORT_SYMBOL(irq_poll_sched); | |
22810 | ||
22811 | @@ -71,6 +72,7 @@ void irq_poll_complete(struct irq_poll *iop) | |
22812 | local_irq_save(flags); | |
22813 | __irq_poll_complete(iop); | |
22814 | local_irq_restore(flags); | |
22815 | + preempt_check_resched_rt(); | |
22816 | } | |
22817 | EXPORT_SYMBOL(irq_poll_complete); | |
22818 | ||
22819 | @@ -95,6 +97,7 @@ static void irq_poll_softirq(struct softirq_action *h) | |
22820 | } | |
22821 | ||
22822 | local_irq_enable(); | |
22823 | + preempt_check_resched_rt(); | |
22824 | ||
22825 | /* Even though interrupts have been re-enabled, this | |
22826 | * access is safe because interrupts can only add new | |
22827 | @@ -132,6 +135,7 @@ static void irq_poll_softirq(struct softirq_action *h) | |
22828 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
22829 | ||
22830 | local_irq_enable(); | |
22831 | + preempt_check_resched_rt(); | |
22832 | } | |
22833 | ||
22834 | /** | |
22835 | @@ -199,6 +203,7 @@ static int irq_poll_cpu_notify(struct notifier_block *self, | |
22836 | this_cpu_ptr(&blk_cpu_iopoll)); | |
22837 | __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); | |
22838 | local_irq_enable(); | |
22839 | + preempt_check_resched_rt(); | |
22840 | } | |
22841 | ||
22842 | return NOTIFY_OK; | |
22843 | diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c | |
22844 | index 872a15a2a637..b93a6103fa4d 100644 | |
22845 | --- a/lib/locking-selftest.c | |
22846 | +++ b/lib/locking-selftest.c | |
22847 | @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) | |
22848 | #include "locking-selftest-spin-hardirq.h" | |
22849 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) | |
22850 | ||
22851 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22852 | + | |
22853 | #include "locking-selftest-rlock-hardirq.h" | |
22854 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) | |
22855 | ||
22856 | @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) | |
22857 | #include "locking-selftest-wlock-softirq.h" | |
22858 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) | |
22859 | ||
22860 | +#endif | |
22861 | + | |
22862 | #undef E1 | |
22863 | #undef E2 | |
22864 | ||
22865 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22866 | /* | |
22867 | * Enabling hardirqs with a softirq-safe lock held: | |
22868 | */ | |
22869 | @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
22870 | #undef E1 | |
22871 | #undef E2 | |
22872 | ||
22873 | +#endif | |
22874 | + | |
22875 | /* | |
22876 | * Enabling irqs with an irq-safe lock held: | |
22877 | */ | |
22878 | @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
22879 | #include "locking-selftest-spin-hardirq.h" | |
22880 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) | |
22881 | ||
22882 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22883 | + | |
22884 | #include "locking-selftest-rlock-hardirq.h" | |
22885 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) | |
22886 | ||
22887 | @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) | |
22888 | #include "locking-selftest-wlock-softirq.h" | |
22889 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
22890 | ||
22891 | +#endif | |
22892 | + | |
22893 | #undef E1 | |
22894 | #undef E2 | |
22895 | ||
22896 | @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
22897 | #include "locking-selftest-spin-hardirq.h" | |
22898 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) | |
22899 | ||
22900 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22901 | + | |
22902 | #include "locking-selftest-rlock-hardirq.h" | |
22903 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) | |
22904 | ||
22905 | @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) | |
22906 | #include "locking-selftest-wlock-softirq.h" | |
22907 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
22908 | ||
22909 | +#endif | |
22910 | + | |
22911 | #undef E1 | |
22912 | #undef E2 | |
22913 | #undef E3 | |
22914 | @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
22915 | #include "locking-selftest-spin-hardirq.h" | |
22916 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) | |
22917 | ||
22918 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22919 | + | |
22920 | #include "locking-selftest-rlock-hardirq.h" | |
22921 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) | |
22922 | ||
22923 | @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) | |
22924 | #include "locking-selftest-wlock-softirq.h" | |
22925 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) | |
22926 | ||
22927 | +#endif | |
22928 | + | |
22929 | #undef E1 | |
22930 | #undef E2 | |
22931 | #undef E3 | |
22932 | ||
22933 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22934 | + | |
22935 | /* | |
22936 | * read-lock / write-lock irq inversion. | |
22937 | * | |
22938 | @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) | |
22939 | #undef E2 | |
22940 | #undef E3 | |
22941 | ||
22942 | +#endif | |
22943 | + | |
22944 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22945 | + | |
22946 | /* | |
22947 | * read-lock / write-lock recursion that is actually safe. | |
22948 | */ | |
22949 | @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) | |
22950 | #undef E2 | |
22951 | #undef E3 | |
22952 | ||
22953 | +#endif | |
22954 | + | |
22955 | /* | |
22956 | * read-lock / write-lock recursion that is unsafe. | |
22957 | */ | |
22958 | @@ -1858,6 +1885,7 @@ void locking_selftest(void) | |
22959 | ||
22960 | printk(" --------------------------------------------------------------------------\n"); | |
22961 | ||
22962 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22963 | /* | |
22964 | * irq-context testcases: | |
22965 | */ | |
22966 | @@ -1870,6 +1898,28 @@ void locking_selftest(void) | |
22967 | ||
22968 | DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); | |
22969 | // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); | |
22970 | +#else | |
22971 | + /* On -rt, we only do hardirq context test for raw spinlock */ | |
22972 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); | |
22973 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21); | |
22974 | + | |
22975 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12); | |
22976 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21); | |
22977 | + | |
22978 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123); | |
22979 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132); | |
22980 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213); | |
22981 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231); | |
22982 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312); | |
22983 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321); | |
22984 | + | |
22985 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123); | |
22986 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132); | |
22987 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213); | |
22988 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231); | |
22989 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); | |
22990 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); | |
22991 | +#endif | |
22992 | ||
22993 | ww_tests(); | |
22994 | ||
22995 | diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c | |
22996 | index 6d40944960de..822a2c027e72 100644 | |
22997 | --- a/lib/percpu_ida.c | |
22998 | +++ b/lib/percpu_ida.c | |
22999 | @@ -26,6 +26,9 @@ | |
23000 | #include <linux/string.h> | |
23001 | #include <linux/spinlock.h> | |
23002 | #include <linux/percpu_ida.h> | |
23003 | +#include <linux/locallock.h> | |
23004 | + | |
23005 | +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock); | |
23006 | ||
23007 | struct percpu_ida_cpu { | |
23008 | /* | |
23009 | @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
23010 | unsigned long flags; | |
23011 | int tag; | |
23012 | ||
23013 | - local_irq_save(flags); | |
23014 | + local_lock_irqsave(irq_off_lock, flags); | |
23015 | tags = this_cpu_ptr(pool->tag_cpu); | |
23016 | ||
23017 | /* Fastpath */ | |
23018 | tag = alloc_local_tag(tags); | |
23019 | if (likely(tag >= 0)) { | |
23020 | - local_irq_restore(flags); | |
23021 | + local_unlock_irqrestore(irq_off_lock, flags); | |
23022 | return tag; | |
23023 | } | |
23024 | ||
23025 | @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
23026 | ||
23027 | if (!tags->nr_free) | |
23028 | alloc_global_tags(pool, tags); | |
23029 | + | |
23030 | if (!tags->nr_free) | |
23031 | steal_tags(pool, tags); | |
23032 | ||
23033 | @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
23034 | } | |
23035 | ||
23036 | spin_unlock(&pool->lock); | |
23037 | - local_irq_restore(flags); | |
23038 | + local_unlock_irqrestore(irq_off_lock, flags); | |
23039 | ||
23040 | if (tag >= 0 || state == TASK_RUNNING) | |
23041 | break; | |
23042 | @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
23043 | ||
23044 | schedule(); | |
23045 | ||
23046 | - local_irq_save(flags); | |
23047 | + local_lock_irqsave(irq_off_lock, flags); | |
23048 | tags = this_cpu_ptr(pool->tag_cpu); | |
23049 | } | |
23050 | if (state != TASK_RUNNING) | |
23051 | @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
23052 | ||
23053 | BUG_ON(tag >= pool->nr_tags); | |
23054 | ||
23055 | - local_irq_save(flags); | |
23056 | + local_lock_irqsave(irq_off_lock, flags); | |
23057 | tags = this_cpu_ptr(pool->tag_cpu); | |
23058 | ||
23059 | spin_lock(&tags->lock); | |
23060 | @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
23061 | spin_unlock(&pool->lock); | |
23062 | } | |
23063 | ||
23064 | - local_irq_restore(flags); | |
23065 | + local_unlock_irqrestore(irq_off_lock, flags); | |
23066 | } | |
23067 | EXPORT_SYMBOL_GPL(percpu_ida_free); | |
23068 | ||
23069 | @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
23070 | struct percpu_ida_cpu *remote; | |
23071 | unsigned cpu, i, err = 0; | |
23072 | ||
23073 | - local_irq_save(flags); | |
23074 | + local_lock_irqsave(irq_off_lock, flags); | |
23075 | for_each_possible_cpu(cpu) { | |
23076 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | |
23077 | spin_lock(&remote->lock); | |
23078 | @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
23079 | } | |
23080 | spin_unlock(&pool->lock); | |
23081 | out: | |
23082 | - local_irq_restore(flags); | |
23083 | + local_unlock_irqrestore(irq_off_lock, flags); | |
23084 | return err; | |
23085 | } | |
23086 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | |
23087 | diff --git a/lib/radix-tree.c b/lib/radix-tree.c | |
23088 | index 8e6d552c40dd..881cc195d85f 100644 | |
23089 | --- a/lib/radix-tree.c | |
23090 | +++ b/lib/radix-tree.c | |
23091 | @@ -290,13 +290,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |
23092 | * succeed in getting a node here (and never reach | |
23093 | * kmem_cache_alloc) | |
23094 | */ | |
23095 | - rtp = this_cpu_ptr(&radix_tree_preloads); | |
23096 | + rtp = &get_cpu_var(radix_tree_preloads); | |
23097 | if (rtp->nr) { | |
23098 | ret = rtp->nodes; | |
23099 | rtp->nodes = ret->private_data; | |
23100 | ret->private_data = NULL; | |
23101 | rtp->nr--; | |
23102 | } | |
23103 | + put_cpu_var(radix_tree_preloads); | |
23104 | /* | |
23105 | * Update the allocation stack trace as this is more useful | |
23106 | * for debugging. | |
23107 | @@ -336,6 +337,7 @@ radix_tree_node_free(struct radix_tree_node *node) | |
23108 | call_rcu(&node->rcu_head, radix_tree_node_rcu_free); | |
23109 | } | |
23110 | ||
23111 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23112 | /* | |
23113 | * Load up this CPU's radix_tree_node buffer with sufficient objects to | |
23114 | * ensure that the addition of a single element in the tree cannot fail. On | |
23115 | @@ -455,6 +457,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) | |
23116 | ||
23117 | return __radix_tree_preload(gfp_mask, nr_nodes); | |
23118 | } | |
23119 | +#endif | |
23120 | ||
23121 | /* | |
23122 | * The maximum index which can be stored in a radix tree | |
23123 | diff --git a/lib/scatterlist.c b/lib/scatterlist.c | |
23124 | index 004fc70fc56a..ccc46992a517 100644 | |
23125 | --- a/lib/scatterlist.c | |
23126 | +++ b/lib/scatterlist.c | |
23127 | @@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) | |
23128 | flush_kernel_dcache_page(miter->page); | |
23129 | ||
23130 | if (miter->__flags & SG_MITER_ATOMIC) { | |
23131 | - WARN_ON_ONCE(preemptible()); | |
23132 | + WARN_ON_ONCE(!pagefault_disabled()); | |
23133 | kunmap_atomic(miter->addr); | |
23134 | } else | |
23135 | kunmap(miter->page); | |
23136 | @@ -664,7 +664,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
23137 | if (!sg_miter_skip(&miter, skip)) | |
23138 | return false; | |
23139 | ||
23140 | - local_irq_save(flags); | |
23141 | + local_irq_save_nort(flags); | |
23142 | ||
23143 | while (sg_miter_next(&miter) && offset < buflen) { | |
23144 | unsigned int len; | |
23145 | @@ -681,7 +681,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
23146 | ||
23147 | sg_miter_stop(&miter); | |
23148 | ||
23149 | - local_irq_restore(flags); | |
23150 | + local_irq_restore_nort(flags); | |
23151 | return offset; | |
23152 | } | |
23153 | EXPORT_SYMBOL(sg_copy_buffer); | |
23154 | diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c | |
23155 | index 1afec32de6f2..11fa431046a8 100644 | |
23156 | --- a/lib/smp_processor_id.c | |
23157 | +++ b/lib/smp_processor_id.c | |
23158 | @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1, | |
23159 | if (!printk_ratelimit()) | |
23160 | goto out_enable; | |
23161 | ||
23162 | - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", | |
23163 | - what1, what2, preempt_count() - 1, current->comm, current->pid); | |
23164 | + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n", | |
23165 | + what1, what2, preempt_count() - 1, __migrate_disabled(current), | |
23166 | + current->comm, current->pid); | |
23167 | ||
23168 | print_symbol("caller is %s\n", (long)__builtin_return_address(0)); | |
23169 | dump_stack(); | |
23170 | diff --git a/localversion-rt b/localversion-rt | |
23171 | new file mode 100644 | |
23172 | index 000000000000..0efe7ba1930e | |
23173 | --- /dev/null | |
23174 | +++ b/localversion-rt | |
23175 | @@ -0,0 +1 @@ | |
23176 | +-rt5 | |
23177 | diff --git a/mm/Kconfig b/mm/Kconfig | |
23178 | index be0ee11fa0d9..fe2857d67973 100644 | |
23179 | --- a/mm/Kconfig | |
23180 | +++ b/mm/Kconfig | |
23181 | @@ -410,7 +410,7 @@ config NOMMU_INITIAL_TRIM_EXCESS | |
23182 | ||
23183 | config TRANSPARENT_HUGEPAGE | |
23184 | bool "Transparent Hugepage Support" | |
23185 | - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE | |
23186 | + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL | |
23187 | select COMPACTION | |
23188 | select RADIX_TREE_MULTIORDER | |
23189 | help | |
23190 | diff --git a/mm/backing-dev.c b/mm/backing-dev.c | |
23191 | index 8fde443f36d7..d7a863b0ec20 100644 | |
23192 | --- a/mm/backing-dev.c | |
23193 | +++ b/mm/backing-dev.c | |
23194 | @@ -457,9 +457,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested) | |
23195 | { | |
23196 | unsigned long flags; | |
23197 | ||
23198 | - local_irq_save(flags); | |
23199 | + local_irq_save_nort(flags); | |
23200 | if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { | |
23201 | - local_irq_restore(flags); | |
23202 | + local_irq_restore_nort(flags); | |
23203 | return; | |
23204 | } | |
23205 | ||
23206 | diff --git a/mm/compaction.c b/mm/compaction.c | |
23207 | index 9affb2908304..d5eb0e52e96f 100644 | |
23208 | --- a/mm/compaction.c | |
23209 | +++ b/mm/compaction.c | |
23210 | @@ -1585,10 +1585,12 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro | |
23211 | block_start_pfn(cc->migrate_pfn, cc->order); | |
23212 | ||
23213 | if (cc->last_migrated_pfn < current_block_start) { | |
23214 | - cpu = get_cpu(); | |
23215 | + cpu = get_cpu_light(); | |
23216 | + local_lock_irq(swapvec_lock); | |
23217 | lru_add_drain_cpu(cpu); | |
23218 | + local_unlock_irq(swapvec_lock); | |
23219 | drain_local_pages(zone); | |
23220 | - put_cpu(); | |
23221 | + put_cpu_light(); | |
23222 | /* No more flushing until we migrate again */ | |
23223 | cc->last_migrated_pfn = 0; | |
23224 | } | |
23225 | diff --git a/mm/filemap.c b/mm/filemap.c | |
23226 | index ced9ef6c06b0..19f6f0d77604 100644 | |
23227 | --- a/mm/filemap.c | |
23228 | +++ b/mm/filemap.c | |
23229 | @@ -159,9 +159,12 @@ static int page_cache_tree_insert(struct address_space *mapping, | |
23230 | * node->private_list is protected by | |
23231 | * mapping->tree_lock. | |
23232 | */ | |
23233 | - if (!list_empty(&node->private_list)) | |
23234 | - list_lru_del(&workingset_shadow_nodes, | |
23235 | + if (!list_empty(&node->private_list)) { | |
23236 | + local_lock(workingset_shadow_lock); | |
23237 | + list_lru_del(&__workingset_shadow_nodes, | |
23238 | &node->private_list); | |
23239 | + local_unlock(workingset_shadow_lock); | |
23240 | + } | |
23241 | } | |
23242 | return 0; | |
23243 | } | |
23244 | @@ -217,8 +220,10 @@ static void page_cache_tree_delete(struct address_space *mapping, | |
23245 | if (!dax_mapping(mapping) && !workingset_node_pages(node) && | |
23246 | list_empty(&node->private_list)) { | |
23247 | node->private_data = mapping; | |
23248 | - list_lru_add(&workingset_shadow_nodes, | |
23249 | - &node->private_list); | |
23250 | + local_lock(workingset_shadow_lock); | |
23251 | + list_lru_add(&__workingset_shadow_nodes, | |
23252 | + &node->private_list); | |
23253 | + local_unlock(workingset_shadow_lock); | |
23254 | } | |
23255 | } | |
23256 | ||
23257 | diff --git a/mm/highmem.c b/mm/highmem.c | |
23258 | index 50b4ca6787f0..77518a3b35a1 100644 | |
23259 | --- a/mm/highmem.c | |
23260 | +++ b/mm/highmem.c | |
23261 | @@ -29,10 +29,11 @@ | |
23262 | #include <linux/kgdb.h> | |
23263 | #include <asm/tlbflush.h> | |
23264 | ||
23265 | - | |
23266 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23267 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
23268 | DEFINE_PER_CPU(int, __kmap_atomic_idx); | |
23269 | #endif | |
23270 | +#endif | |
23271 | ||
23272 | /* | |
23273 | * Virtual_count is not a pure "count". | |
23274 | @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) | |
23275 | unsigned long totalhigh_pages __read_mostly; | |
23276 | EXPORT_SYMBOL(totalhigh_pages); | |
23277 | ||
23278 | - | |
23279 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23280 | EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); | |
23281 | +#endif | |
23282 | ||
23283 | unsigned int nr_free_highpages (void) | |
23284 | { | |
23285 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c | |
23286 | index 4be518d4e68a..724240ca2f35 100644 | |
23287 | --- a/mm/memcontrol.c | |
23288 | +++ b/mm/memcontrol.c | |
23289 | @@ -67,6 +67,7 @@ | |
23290 | #include <net/sock.h> | |
23291 | #include <net/ip.h> | |
23292 | #include "slab.h" | |
23293 | +#include <linux/locallock.h> | |
23294 | ||
23295 | #include <asm/uaccess.h> | |
23296 | ||
23297 | @@ -92,6 +93,8 @@ int do_swap_account __read_mostly; | |
23298 | #define do_swap_account 0 | |
23299 | #endif | |
23300 | ||
23301 | +static DEFINE_LOCAL_IRQ_LOCK(event_lock); | |
23302 | + | |
23303 | /* Whether legacy memory+swap accounting is active */ | |
23304 | static bool do_memsw_account(void) | |
23305 | { | |
23306 | @@ -1724,6 +1727,7 @@ struct memcg_stock_pcp { | |
23307 | #define FLUSHING_CACHED_CHARGE 0 | |
23308 | }; | |
23309 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); | |
23310 | +static DEFINE_LOCAL_IRQ_LOCK(memcg_stock_ll); | |
23311 | static DEFINE_MUTEX(percpu_charge_mutex); | |
23312 | ||
23313 | /** | |
23314 | @@ -1746,7 +1750,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
23315 | if (nr_pages > CHARGE_BATCH) | |
23316 | return ret; | |
23317 | ||
23318 | - local_irq_save(flags); | |
23319 | + local_lock_irqsave(memcg_stock_ll, flags); | |
23320 | ||
23321 | stock = this_cpu_ptr(&memcg_stock); | |
23322 | if (memcg == stock->cached && stock->nr_pages >= nr_pages) { | |
23323 | @@ -1754,7 +1758,7 @@ static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
23324 | ret = true; | |
23325 | } | |
23326 | ||
23327 | - local_irq_restore(flags); | |
23328 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
23329 | ||
23330 | return ret; | |
23331 | } | |
23332 | @@ -1781,13 +1785,13 @@ static void drain_local_stock(struct work_struct *dummy) | |
23333 | struct memcg_stock_pcp *stock; | |
23334 | unsigned long flags; | |
23335 | ||
23336 | - local_irq_save(flags); | |
23337 | + local_lock_irqsave(memcg_stock_ll, flags); | |
23338 | ||
23339 | stock = this_cpu_ptr(&memcg_stock); | |
23340 | drain_stock(stock); | |
23341 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); | |
23342 | ||
23343 | - local_irq_restore(flags); | |
23344 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
23345 | } | |
23346 | ||
23347 | /* | |
23348 | @@ -1799,7 +1803,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
23349 | struct memcg_stock_pcp *stock; | |
23350 | unsigned long flags; | |
23351 | ||
23352 | - local_irq_save(flags); | |
23353 | + local_lock_irqsave(memcg_stock_ll, flags); | |
23354 | ||
23355 | stock = this_cpu_ptr(&memcg_stock); | |
23356 | if (stock->cached != memcg) { /* reset if necessary */ | |
23357 | @@ -1808,7 +1812,7 @@ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
23358 | } | |
23359 | stock->nr_pages += nr_pages; | |
23360 | ||
23361 | - local_irq_restore(flags); | |
23362 | + local_unlock_irqrestore(memcg_stock_ll, flags); | |
23363 | } | |
23364 | ||
23365 | /* | |
23366 | @@ -1824,7 +1828,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) | |
23367 | return; | |
23368 | /* Notify other cpus that system-wide "drain" is running */ | |
23369 | get_online_cpus(); | |
23370 | - curcpu = get_cpu(); | |
23371 | + curcpu = get_cpu_light(); | |
23372 | for_each_online_cpu(cpu) { | |
23373 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | |
23374 | struct mem_cgroup *memcg; | |
23375 | @@ -1841,7 +1845,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) | |
23376 | schedule_work_on(cpu, &stock->work); | |
23377 | } | |
23378 | } | |
23379 | - put_cpu(); | |
23380 | + put_cpu_light(); | |
23381 | put_online_cpus(); | |
23382 | mutex_unlock(&percpu_charge_mutex); | |
23383 | } | |
23384 | @@ -4566,12 +4570,12 @@ static int mem_cgroup_move_account(struct page *page, | |
23385 | ||
23386 | ret = 0; | |
23387 | ||
23388 | - local_irq_disable(); | |
23389 | + local_lock_irq(event_lock); | |
23390 | mem_cgroup_charge_statistics(to, page, compound, nr_pages); | |
23391 | memcg_check_events(to, page); | |
23392 | mem_cgroup_charge_statistics(from, page, compound, -nr_pages); | |
23393 | memcg_check_events(from, page); | |
23394 | - local_irq_enable(); | |
23395 | + local_unlock_irq(event_lock); | |
23396 | out_unlock: | |
23397 | unlock_page(page); | |
23398 | out: | |
23399 | @@ -5444,10 +5448,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, | |
23400 | ||
23401 | commit_charge(page, memcg, lrucare); | |
23402 | ||
23403 | - local_irq_disable(); | |
23404 | + local_lock_irq(event_lock); | |
23405 | mem_cgroup_charge_statistics(memcg, page, compound, nr_pages); | |
23406 | memcg_check_events(memcg, page); | |
23407 | - local_irq_enable(); | |
23408 | + local_unlock_irq(event_lock); | |
23409 | ||
23410 | if (do_memsw_account() && PageSwapCache(page)) { | |
23411 | swp_entry_t entry = { .val = page_private(page) }; | |
23412 | @@ -5503,14 +5507,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, | |
23413 | memcg_oom_recover(memcg); | |
23414 | } | |
23415 | ||
23416 | - local_irq_save(flags); | |
23417 | + local_lock_irqsave(event_lock, flags); | |
23418 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); | |
23419 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); | |
23420 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); | |
23421 | __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); | |
23422 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); | |
23423 | memcg_check_events(memcg, dummy_page); | |
23424 | - local_irq_restore(flags); | |
23425 | + local_unlock_irqrestore(event_lock, flags); | |
23426 | ||
23427 | if (!mem_cgroup_is_root(memcg)) | |
23428 | css_put_many(&memcg->css, nr_pages); | |
23429 | @@ -5665,10 +5669,10 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) | |
23430 | ||
23431 | commit_charge(newpage, memcg, false); | |
23432 | ||
23433 | - local_irq_save(flags); | |
23434 | + local_lock_irqsave(event_lock, flags); | |
23435 | mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); | |
23436 | memcg_check_events(memcg, newpage); | |
23437 | - local_irq_restore(flags); | |
23438 | + local_unlock_irqrestore(event_lock, flags); | |
23439 | } | |
23440 | ||
23441 | DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key); | |
23442 | @@ -5845,6 +5849,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |
23443 | { | |
23444 | struct mem_cgroup *memcg, *swap_memcg; | |
23445 | unsigned short oldid; | |
23446 | + unsigned long flags; | |
23447 | ||
23448 | VM_BUG_ON_PAGE(PageLRU(page), page); | |
23449 | VM_BUG_ON_PAGE(page_count(page), page); | |
23450 | @@ -5885,12 +5890,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |
23451 | * important here to have the interrupts disabled because it is the | |
23452 | * only synchronisation we have for udpating the per-CPU variables. | |
23453 | */ | |
23454 | + local_lock_irqsave(event_lock, flags); | |
23455 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
23456 | VM_BUG_ON(!irqs_disabled()); | |
23457 | +#endif | |
23458 | mem_cgroup_charge_statistics(memcg, page, false, -1); | |
23459 | memcg_check_events(memcg, page); | |
23460 | ||
23461 | if (!mem_cgroup_is_root(memcg)) | |
23462 | css_put(&memcg->css); | |
23463 | + local_unlock_irqrestore(event_lock, flags); | |
23464 | } | |
23465 | ||
23466 | /* | |
23467 | diff --git a/mm/mmu_context.c b/mm/mmu_context.c | |
23468 | index 6f4d27c5bb32..5cd25c745a8f 100644 | |
23469 | --- a/mm/mmu_context.c | |
23470 | +++ b/mm/mmu_context.c | |
23471 | @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm) | |
23472 | struct task_struct *tsk = current; | |
23473 | ||
23474 | task_lock(tsk); | |
23475 | + preempt_disable_rt(); | |
23476 | active_mm = tsk->active_mm; | |
23477 | if (active_mm != mm) { | |
23478 | atomic_inc(&mm->mm_count); | |
23479 | @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm) | |
23480 | } | |
23481 | tsk->mm = mm; | |
23482 | switch_mm(active_mm, mm, tsk); | |
23483 | + preempt_enable_rt(); | |
23484 | task_unlock(tsk); | |
23485 | #ifdef finish_arch_post_lock_switch | |
23486 | finish_arch_post_lock_switch(); | |
23487 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
23488 | index a2214c64ed3c..4be4d5d66f73 100644 | |
23489 | --- a/mm/page_alloc.c | |
23490 | +++ b/mm/page_alloc.c | |
23491 | @@ -61,6 +61,7 @@ | |
23492 | #include <linux/page_ext.h> | |
23493 | #include <linux/hugetlb.h> | |
23494 | #include <linux/sched/rt.h> | |
23495 | +#include <linux/locallock.h> | |
23496 | #include <linux/page_owner.h> | |
23497 | #include <linux/kthread.h> | |
23498 | #include <linux/memcontrol.h> | |
23499 | @@ -276,6 +277,18 @@ EXPORT_SYMBOL(nr_node_ids); | |
23500 | EXPORT_SYMBOL(nr_online_nodes); | |
23501 | #endif | |
23502 | ||
23503 | +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); | |
23504 | + | |
23505 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
23506 | +# define cpu_lock_irqsave(cpu, flags) \ | |
23507 | + local_lock_irqsave_on(pa_lock, flags, cpu) | |
23508 | +# define cpu_unlock_irqrestore(cpu, flags) \ | |
23509 | + local_unlock_irqrestore_on(pa_lock, flags, cpu) | |
23510 | +#else | |
23511 | +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) | |
23512 | +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) | |
23513 | +#endif | |
23514 | + | |
23515 | int page_group_by_mobility_disabled __read_mostly; | |
23516 | ||
23517 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | |
23518 | @@ -1056,7 +1069,7 @@ static bool bulkfree_pcp_prepare(struct page *page) | |
23519 | #endif /* CONFIG_DEBUG_VM */ | |
23520 | ||
23521 | /* | |
23522 | - * Frees a number of pages from the PCP lists | |
23523 | + * Frees a number of pages which have been collected from the pcp lists. | |
23524 | * Assumes all pages on list are in same zone, and of same order. | |
23525 | * count is the number of pages to free. | |
23526 | * | |
23527 | @@ -1067,19 +1080,58 @@ static bool bulkfree_pcp_prepare(struct page *page) | |
23528 | * pinned" detection logic. | |
23529 | */ | |
23530 | static void free_pcppages_bulk(struct zone *zone, int count, | |
23531 | - struct per_cpu_pages *pcp) | |
23532 | + struct list_head *list) | |
23533 | { | |
23534 | - int migratetype = 0; | |
23535 | - int batch_free = 0; | |
23536 | unsigned long nr_scanned; | |
23537 | bool isolated_pageblocks; | |
23538 | + unsigned long flags; | |
23539 | + | |
23540 | + spin_lock_irqsave(&zone->lock, flags); | |
23541 | ||
23542 | - spin_lock(&zone->lock); | |
23543 | isolated_pageblocks = has_isolate_pageblock(zone); | |
23544 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
23545 | if (nr_scanned) | |
23546 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
23547 | ||
23548 | + while (!list_empty(list)) { | |
23549 | + struct page *page; | |
23550 | + int mt; /* migratetype of the to-be-freed page */ | |
23551 | + | |
23552 | + page = list_first_entry(list, struct page, lru); | |
23553 | + /* must delete as __free_one_page list manipulates */ | |
23554 | + list_del(&page->lru); | |
23555 | + | |
23556 | + mt = get_pcppage_migratetype(page); | |
23557 | + /* MIGRATE_ISOLATE page should not go to pcplists */ | |
23558 | + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
23559 | + /* Pageblock could have been isolated meanwhile */ | |
23560 | + if (unlikely(isolated_pageblocks)) | |
23561 | + mt = get_pageblock_migratetype(page); | |
23562 | + | |
23563 | + if (bulkfree_pcp_prepare(page)) | |
23564 | + continue; | |
23565 | + | |
23566 | + __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
23567 | + trace_mm_page_pcpu_drain(page, 0, mt); | |
23568 | + count--; | |
23569 | + } | |
23570 | + WARN_ON(count != 0); | |
23571 | + spin_unlock_irqrestore(&zone->lock, flags); | |
23572 | +} | |
23573 | + | |
23574 | +/* | |
23575 | + * Moves a number of pages from the PCP lists to free list which | |
23576 | + * is freed outside of the locked region. | |
23577 | + * | |
23578 | + * Assumes all pages on list are in same zone, and of same order. | |
23579 | + * count is the number of pages to free. | |
23580 | + */ | |
23581 | +static void isolate_pcp_pages(int count, struct per_cpu_pages *src, | |
23582 | + struct list_head *dst) | |
23583 | +{ | |
23584 | + int migratetype = 0; | |
23585 | + int batch_free = 0; | |
23586 | + | |
23587 | while (count) { | |
23588 | struct page *page; | |
23589 | struct list_head *list; | |
23590 | @@ -1095,7 +1147,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |
23591 | batch_free++; | |
23592 | if (++migratetype == MIGRATE_PCPTYPES) | |
23593 | migratetype = 0; | |
23594 | - list = &pcp->lists[migratetype]; | |
23595 | + list = &src->lists[migratetype]; | |
23596 | } while (list_empty(list)); | |
23597 | ||
23598 | /* This is the only non-empty list. Free them all. */ | |
23599 | @@ -1103,27 +1155,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |
23600 | batch_free = count; | |
23601 | ||
23602 | do { | |
23603 | - int mt; /* migratetype of the to-be-freed page */ | |
23604 | - | |
23605 | page = list_last_entry(list, struct page, lru); | |
23606 | - /* must delete as __free_one_page list manipulates */ | |
23607 | list_del(&page->lru); | |
23608 | ||
23609 | - mt = get_pcppage_migratetype(page); | |
23610 | - /* MIGRATE_ISOLATE page should not go to pcplists */ | |
23611 | - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
23612 | - /* Pageblock could have been isolated meanwhile */ | |
23613 | - if (unlikely(isolated_pageblocks)) | |
23614 | - mt = get_pageblock_migratetype(page); | |
23615 | - | |
23616 | - if (bulkfree_pcp_prepare(page)) | |
23617 | - continue; | |
23618 | - | |
23619 | - __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
23620 | - trace_mm_page_pcpu_drain(page, 0, mt); | |
23621 | + list_add(&page->lru, dst); | |
23622 | } while (--count && --batch_free && !list_empty(list)); | |
23623 | } | |
23624 | - spin_unlock(&zone->lock); | |
23625 | } | |
23626 | ||
23627 | static void free_one_page(struct zone *zone, | |
23628 | @@ -1132,7 +1169,9 @@ static void free_one_page(struct zone *zone, | |
23629 | int migratetype) | |
23630 | { | |
23631 | unsigned long nr_scanned; | |
23632 | - spin_lock(&zone->lock); | |
23633 | + unsigned long flags; | |
23634 | + | |
23635 | + spin_lock_irqsave(&zone->lock, flags); | |
23636 | nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED); | |
23637 | if (nr_scanned) | |
23638 | __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned); | |
23639 | @@ -1142,7 +1181,7 @@ static void free_one_page(struct zone *zone, | |
23640 | migratetype = get_pfnblock_migratetype(page, pfn); | |
23641 | } | |
23642 | __free_one_page(page, pfn, zone, order, migratetype); | |
23643 | - spin_unlock(&zone->lock); | |
23644 | + spin_unlock_irqrestore(&zone->lock, flags); | |
23645 | } | |
23646 | ||
23647 | static void __meminit __init_single_page(struct page *page, unsigned long pfn, | |
23648 | @@ -1228,10 +1267,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |
23649 | return; | |
23650 | ||
23651 | migratetype = get_pfnblock_migratetype(page, pfn); | |
23652 | - local_irq_save(flags); | |
23653 | + local_lock_irqsave(pa_lock, flags); | |
23654 | __count_vm_events(PGFREE, 1 << order); | |
23655 | free_one_page(page_zone(page), page, pfn, order, migratetype); | |
23656 | - local_irq_restore(flags); | |
23657 | + local_unlock_irqrestore(pa_lock, flags); | |
23658 | } | |
23659 | ||
23660 | static void __init __free_pages_boot_core(struct page *page, unsigned int order) | |
23661 | @@ -2219,16 +2258,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |
23662 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |
23663 | { | |
23664 | unsigned long flags; | |
23665 | + LIST_HEAD(dst); | |
23666 | int to_drain, batch; | |
23667 | ||
23668 | - local_irq_save(flags); | |
23669 | + local_lock_irqsave(pa_lock, flags); | |
23670 | batch = READ_ONCE(pcp->batch); | |
23671 | to_drain = min(pcp->count, batch); | |
23672 | if (to_drain > 0) { | |
23673 | - free_pcppages_bulk(zone, to_drain, pcp); | |
23674 | + isolate_pcp_pages(to_drain, pcp, &dst); | |
23675 | pcp->count -= to_drain; | |
23676 | } | |
23677 | - local_irq_restore(flags); | |
23678 | + local_unlock_irqrestore(pa_lock, flags); | |
23679 | + free_pcppages_bulk(zone, to_drain, &dst); | |
23680 | } | |
23681 | #endif | |
23682 | ||
23683 | @@ -2244,16 +2285,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) | |
23684 | unsigned long flags; | |
23685 | struct per_cpu_pageset *pset; | |
23686 | struct per_cpu_pages *pcp; | |
23687 | + LIST_HEAD(dst); | |
23688 | + int count; | |
23689 | ||
23690 | - local_irq_save(flags); | |
23691 | + cpu_lock_irqsave(cpu, flags); | |
23692 | pset = per_cpu_ptr(zone->pageset, cpu); | |
23693 | ||
23694 | pcp = &pset->pcp; | |
23695 | - if (pcp->count) { | |
23696 | - free_pcppages_bulk(zone, pcp->count, pcp); | |
23697 | + count = pcp->count; | |
23698 | + if (count) { | |
23699 | + isolate_pcp_pages(count, pcp, &dst); | |
23700 | pcp->count = 0; | |
23701 | } | |
23702 | - local_irq_restore(flags); | |
23703 | + cpu_unlock_irqrestore(cpu, flags); | |
23704 | + if (count) | |
23705 | + free_pcppages_bulk(zone, count, &dst); | |
23706 | } | |
23707 | ||
23708 | /* | |
23709 | @@ -2339,8 +2385,17 @@ void drain_all_pages(struct zone *zone) | |
23710 | else | |
23711 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | |
23712 | } | |
23713 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
23714 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, | |
23715 | zone, 1); | |
23716 | +#else | |
23717 | + for_each_cpu(cpu, &cpus_with_pcps) { | |
23718 | + if (zone) | |
23719 | + drain_pages_zone(cpu, zone); | |
23720 | + else | |
23721 | + drain_pages(cpu); | |
23722 | + } | |
23723 | +#endif | |
23724 | } | |
23725 | ||
23726 | #ifdef CONFIG_HIBERNATION | |
23727 | @@ -2400,7 +2455,7 @@ void free_hot_cold_page(struct page *page, bool cold) | |
23728 | ||
23729 | migratetype = get_pfnblock_migratetype(page, pfn); | |
23730 | set_pcppage_migratetype(page, migratetype); | |
23731 | - local_irq_save(flags); | |
23732 | + local_lock_irqsave(pa_lock, flags); | |
23733 | __count_vm_event(PGFREE); | |
23734 | ||
23735 | /* | |
23736 | @@ -2426,12 +2481,17 @@ void free_hot_cold_page(struct page *page, bool cold) | |
23737 | pcp->count++; | |
23738 | if (pcp->count >= pcp->high) { | |
23739 | unsigned long batch = READ_ONCE(pcp->batch); | |
23740 | - free_pcppages_bulk(zone, batch, pcp); | |
23741 | + LIST_HEAD(dst); | |
23742 | + | |
23743 | + isolate_pcp_pages(batch, pcp, &dst); | |
23744 | pcp->count -= batch; | |
23745 | + local_unlock_irqrestore(pa_lock, flags); | |
23746 | + free_pcppages_bulk(zone, batch, &dst); | |
23747 | + return; | |
23748 | } | |
23749 | ||
23750 | out: | |
23751 | - local_irq_restore(flags); | |
23752 | + local_unlock_irqrestore(pa_lock, flags); | |
23753 | } | |
23754 | ||
23755 | /* | |
23756 | @@ -2568,7 +2628,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
23757 | struct per_cpu_pages *pcp; | |
23758 | struct list_head *list; | |
23759 | ||
23760 | - local_irq_save(flags); | |
23761 | + local_lock_irqsave(pa_lock, flags); | |
23762 | do { | |
23763 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | |
23764 | list = &pcp->lists[migratetype]; | |
23765 | @@ -2595,7 +2655,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
23766 | * allocate greater than order-1 page units with __GFP_NOFAIL. | |
23767 | */ | |
23768 | WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1)); | |
23769 | - spin_lock_irqsave(&zone->lock, flags); | |
23770 | + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); | |
23771 | ||
23772 | do { | |
23773 | page = NULL; | |
23774 | @@ -2607,22 +2667,24 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
23775 | if (!page) | |
23776 | page = __rmqueue(zone, order, migratetype); | |
23777 | } while (page && check_new_pages(page, order)); | |
23778 | - spin_unlock(&zone->lock); | |
23779 | - if (!page) | |
23780 | + if (!page) { | |
23781 | + spin_unlock(&zone->lock); | |
23782 | goto failed; | |
23783 | + } | |
23784 | __mod_zone_freepage_state(zone, -(1 << order), | |
23785 | get_pcppage_migratetype(page)); | |
23786 | + spin_unlock(&zone->lock); | |
23787 | } | |
23788 | ||
23789 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | |
23790 | zone_statistics(preferred_zone, zone, gfp_flags); | |
23791 | - local_irq_restore(flags); | |
23792 | + local_unlock_irqrestore(pa_lock, flags); | |
23793 | ||
23794 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | |
23795 | return page; | |
23796 | ||
23797 | failed: | |
23798 | - local_irq_restore(flags); | |
23799 | + local_unlock_irqrestore(pa_lock, flags); | |
23800 | return NULL; | |
23801 | } | |
23802 | ||
23803 | @@ -6528,7 +6590,9 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |
23804 | int cpu = (unsigned long)hcpu; | |
23805 | ||
23806 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | |
23807 | + local_lock_irq_on(swapvec_lock, cpu); | |
23808 | lru_add_drain_cpu(cpu); | |
23809 | + local_unlock_irq_on(swapvec_lock, cpu); | |
23810 | drain_pages(cpu); | |
23811 | ||
23812 | /* | |
23813 | @@ -6554,6 +6618,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |
23814 | void __init page_alloc_init(void) | |
23815 | { | |
23816 | hotcpu_notifier(page_alloc_cpu_notify, 0); | |
23817 | + local_irq_lock_init(pa_lock); | |
23818 | } | |
23819 | ||
23820 | /* | |
23821 | @@ -7370,7 +7435,7 @@ void zone_pcp_reset(struct zone *zone) | |
23822 | struct per_cpu_pageset *pset; | |
23823 | ||
23824 | /* avoid races with drain_pages() */ | |
23825 | - local_irq_save(flags); | |
23826 | + local_lock_irqsave(pa_lock, flags); | |
23827 | if (zone->pageset != &boot_pageset) { | |
23828 | for_each_online_cpu(cpu) { | |
23829 | pset = per_cpu_ptr(zone->pageset, cpu); | |
23830 | @@ -7379,7 +7444,7 @@ void zone_pcp_reset(struct zone *zone) | |
23831 | free_percpu(zone->pageset); | |
23832 | zone->pageset = &boot_pageset; | |
23833 | } | |
23834 | - local_irq_restore(flags); | |
23835 | + local_unlock_irqrestore(pa_lock, flags); | |
23836 | } | |
23837 | ||
23838 | #ifdef CONFIG_MEMORY_HOTREMOVE | |
23839 | diff --git a/mm/slab.h b/mm/slab.h | |
23840 | index 9653f2e2591a..b7371e026627 100644 | |
23841 | --- a/mm/slab.h | |
23842 | +++ b/mm/slab.h | |
23843 | @@ -426,7 +426,11 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | |
23844 | * The slab lists for all objects. | |
23845 | */ | |
23846 | struct kmem_cache_node { | |
23847 | +#ifdef CONFIG_SLUB | |
23848 | + raw_spinlock_t list_lock; | |
23849 | +#else | |
23850 | spinlock_t list_lock; | |
23851 | +#endif | |
23852 | ||
23853 | #ifdef CONFIG_SLAB | |
23854 | struct list_head slabs_partial; /* partial list first, better asm code */ | |
23855 | diff --git a/mm/slub.c b/mm/slub.c | |
23856 | index 9adae58462f8..4b386747f050 100644 | |
23857 | --- a/mm/slub.c | |
23858 | +++ b/mm/slub.c | |
23859 | @@ -1145,7 +1145,7 @@ static noinline int free_debug_processing( | |
23860 | unsigned long uninitialized_var(flags); | |
23861 | int ret = 0; | |
23862 | ||
23863 | - spin_lock_irqsave(&n->list_lock, flags); | |
23864 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
23865 | slab_lock(page); | |
23866 | ||
23867 | if (s->flags & SLAB_CONSISTENCY_CHECKS) { | |
23868 | @@ -1180,7 +1180,7 @@ static noinline int free_debug_processing( | |
23869 | bulk_cnt, cnt); | |
23870 | ||
23871 | slab_unlock(page); | |
23872 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
23873 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
23874 | if (!ret) | |
23875 | slab_fix(s, "Object at 0x%p not freed", object); | |
23876 | return ret; | |
23877 | @@ -1308,6 +1308,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, | |
23878 | ||
23879 | #endif /* CONFIG_SLUB_DEBUG */ | |
23880 | ||
23881 | +struct slub_free_list { | |
23882 | + raw_spinlock_t lock; | |
23883 | + struct list_head list; | |
23884 | +}; | |
23885 | +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); | |
23886 | + | |
23887 | /* | |
23888 | * Hooks for other subsystems that check memory allocations. In a typical | |
23889 | * production configuration these hooks all should produce no code at all. | |
23890 | @@ -1527,10 +1533,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |
23891 | void *start, *p; | |
23892 | int idx, order; | |
23893 | bool shuffle; | |
23894 | + bool enableirqs = false; | |
23895 | ||
23896 | flags &= gfp_allowed_mask; | |
23897 | ||
23898 | if (gfpflags_allow_blocking(flags)) | |
23899 | + enableirqs = true; | |
23900 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23901 | + if (system_state == SYSTEM_RUNNING) | |
23902 | + enableirqs = true; | |
23903 | +#endif | |
23904 | + if (enableirqs) | |
23905 | local_irq_enable(); | |
23906 | ||
23907 | flags |= s->allocflags; | |
23908 | @@ -1605,7 +1618,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |
23909 | page->frozen = 1; | |
23910 | ||
23911 | out: | |
23912 | - if (gfpflags_allow_blocking(flags)) | |
23913 | + if (enableirqs) | |
23914 | local_irq_disable(); | |
23915 | if (!page) | |
23916 | return NULL; | |
23917 | @@ -1664,6 +1677,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |
23918 | __free_pages(page, order); | |
23919 | } | |
23920 | ||
23921 | +static void free_delayed(struct list_head *h) | |
23922 | +{ | |
23923 | + while(!list_empty(h)) { | |
23924 | + struct page *page = list_first_entry(h, struct page, lru); | |
23925 | + | |
23926 | + list_del(&page->lru); | |
23927 | + __free_slab(page->slab_cache, page); | |
23928 | + } | |
23929 | +} | |
23930 | + | |
23931 | #define need_reserve_slab_rcu \ | |
23932 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | |
23933 | ||
23934 | @@ -1695,6 +1718,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) | |
23935 | } | |
23936 | ||
23937 | call_rcu(head, rcu_free_slab); | |
23938 | + } else if (irqs_disabled()) { | |
23939 | + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); | |
23940 | + | |
23941 | + raw_spin_lock(&f->lock); | |
23942 | + list_add(&page->lru, &f->list); | |
23943 | + raw_spin_unlock(&f->lock); | |
23944 | } else | |
23945 | __free_slab(s, page); | |
23946 | } | |
23947 | @@ -1802,7 +1831,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |
23948 | if (!n || !n->nr_partial) | |
23949 | return NULL; | |
23950 | ||
23951 | - spin_lock(&n->list_lock); | |
23952 | + raw_spin_lock(&n->list_lock); | |
23953 | list_for_each_entry_safe(page, page2, &n->partial, lru) { | |
23954 | void *t; | |
23955 | ||
23956 | @@ -1827,7 +1856,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |
23957 | break; | |
23958 | ||
23959 | } | |
23960 | - spin_unlock(&n->list_lock); | |
23961 | + raw_spin_unlock(&n->list_lock); | |
23962 | return object; | |
23963 | } | |
23964 | ||
23965 | @@ -2073,7 +2102,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, | |
23966 | * that acquire_slab() will see a slab page that | |
23967 | * is frozen | |
23968 | */ | |
23969 | - spin_lock(&n->list_lock); | |
23970 | + raw_spin_lock(&n->list_lock); | |
23971 | } | |
23972 | } else { | |
23973 | m = M_FULL; | |
23974 | @@ -2084,7 +2113,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, | |
23975 | * slabs from diagnostic functions will not see | |
23976 | * any frozen slabs. | |
23977 | */ | |
23978 | - spin_lock(&n->list_lock); | |
23979 | + raw_spin_lock(&n->list_lock); | |
23980 | } | |
23981 | } | |
23982 | ||
23983 | @@ -2119,7 +2148,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, | |
23984 | goto redo; | |
23985 | ||
23986 | if (lock) | |
23987 | - spin_unlock(&n->list_lock); | |
23988 | + raw_spin_unlock(&n->list_lock); | |
23989 | ||
23990 | if (m == M_FREE) { | |
23991 | stat(s, DEACTIVATE_EMPTY); | |
23992 | @@ -2151,10 +2180,10 @@ static void unfreeze_partials(struct kmem_cache *s, | |
23993 | n2 = get_node(s, page_to_nid(page)); | |
23994 | if (n != n2) { | |
23995 | if (n) | |
23996 | - spin_unlock(&n->list_lock); | |
23997 | + raw_spin_unlock(&n->list_lock); | |
23998 | ||
23999 | n = n2; | |
24000 | - spin_lock(&n->list_lock); | |
24001 | + raw_spin_lock(&n->list_lock); | |
24002 | } | |
24003 | ||
24004 | do { | |
24005 | @@ -2183,7 +2212,7 @@ static void unfreeze_partials(struct kmem_cache *s, | |
24006 | } | |
24007 | ||
24008 | if (n) | |
24009 | - spin_unlock(&n->list_lock); | |
24010 | + raw_spin_unlock(&n->list_lock); | |
24011 | ||
24012 | while (discard_page) { | |
24013 | page = discard_page; | |
24014 | @@ -2222,14 +2251,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |
24015 | pobjects = oldpage->pobjects; | |
24016 | pages = oldpage->pages; | |
24017 | if (drain && pobjects > s->cpu_partial) { | |
24018 | + struct slub_free_list *f; | |
24019 | unsigned long flags; | |
24020 | + LIST_HEAD(tofree); | |
24021 | /* | |
24022 | * partial array is full. Move the existing | |
24023 | * set to the per node partial list. | |
24024 | */ | |
24025 | local_irq_save(flags); | |
24026 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | |
24027 | + f = this_cpu_ptr(&slub_free_list); | |
24028 | + raw_spin_lock(&f->lock); | |
24029 | + list_splice_init(&f->list, &tofree); | |
24030 | + raw_spin_unlock(&f->lock); | |
24031 | local_irq_restore(flags); | |
24032 | + free_delayed(&tofree); | |
24033 | oldpage = NULL; | |
24034 | pobjects = 0; | |
24035 | pages = 0; | |
24036 | @@ -2301,7 +2337,22 @@ static bool has_cpu_slab(int cpu, void *info) | |
24037 | ||
24038 | static void flush_all(struct kmem_cache *s) | |
24039 | { | |
24040 | + LIST_HEAD(tofree); | |
24041 | + int cpu; | |
24042 | + | |
24043 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); | |
24044 | + for_each_online_cpu(cpu) { | |
24045 | + struct slub_free_list *f; | |
24046 | + | |
24047 | + if (!has_cpu_slab(cpu, s)) | |
24048 | + continue; | |
24049 | + | |
24050 | + f = &per_cpu(slub_free_list, cpu); | |
24051 | + raw_spin_lock_irq(&f->lock); | |
24052 | + list_splice_init(&f->list, &tofree); | |
24053 | + raw_spin_unlock_irq(&f->lock); | |
24054 | + free_delayed(&tofree); | |
24055 | + } | |
24056 | } | |
24057 | ||
24058 | /* | |
24059 | @@ -2337,10 +2388,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, | |
24060 | unsigned long x = 0; | |
24061 | struct page *page; | |
24062 | ||
24063 | - spin_lock_irqsave(&n->list_lock, flags); | |
24064 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
24065 | list_for_each_entry(page, &n->partial, lru) | |
24066 | x += get_count(page); | |
24067 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
24068 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
24069 | return x; | |
24070 | } | |
24071 | #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ | |
24072 | @@ -2478,8 +2529,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) | |
24073 | * already disabled (which is the case for bulk allocation). | |
24074 | */ | |
24075 | static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
24076 | - unsigned long addr, struct kmem_cache_cpu *c) | |
24077 | + unsigned long addr, struct kmem_cache_cpu *c, | |
24078 | + struct list_head *to_free) | |
24079 | { | |
24080 | + struct slub_free_list *f; | |
24081 | void *freelist; | |
24082 | struct page *page; | |
24083 | ||
24084 | @@ -2539,6 +2592,13 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
24085 | VM_BUG_ON(!c->page->frozen); | |
24086 | c->freelist = get_freepointer(s, freelist); | |
24087 | c->tid = next_tid(c->tid); | |
24088 | + | |
24089 | +out: | |
24090 | + f = this_cpu_ptr(&slub_free_list); | |
24091 | + raw_spin_lock(&f->lock); | |
24092 | + list_splice_init(&f->list, to_free); | |
24093 | + raw_spin_unlock(&f->lock); | |
24094 | + | |
24095 | return freelist; | |
24096 | ||
24097 | new_slab: | |
24098 | @@ -2570,7 +2630,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
24099 | deactivate_slab(s, page, get_freepointer(s, freelist)); | |
24100 | c->page = NULL; | |
24101 | c->freelist = NULL; | |
24102 | - return freelist; | |
24103 | + goto out; | |
24104 | } | |
24105 | ||
24106 | /* | |
24107 | @@ -2582,6 +2642,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
24108 | { | |
24109 | void *p; | |
24110 | unsigned long flags; | |
24111 | + LIST_HEAD(tofree); | |
24112 | ||
24113 | local_irq_save(flags); | |
24114 | #ifdef CONFIG_PREEMPT | |
24115 | @@ -2593,8 +2654,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
24116 | c = this_cpu_ptr(s->cpu_slab); | |
24117 | #endif | |
24118 | ||
24119 | - p = ___slab_alloc(s, gfpflags, node, addr, c); | |
24120 | + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); | |
24121 | local_irq_restore(flags); | |
24122 | + free_delayed(&tofree); | |
24123 | return p; | |
24124 | } | |
24125 | ||
24126 | @@ -2780,7 +2842,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
24127 | ||
24128 | do { | |
24129 | if (unlikely(n)) { | |
24130 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
24131 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
24132 | n = NULL; | |
24133 | } | |
24134 | prior = page->freelist; | |
24135 | @@ -2812,7 +2874,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
24136 | * Otherwise the list_lock will synchronize with | |
24137 | * other processors updating the list of slabs. | |
24138 | */ | |
24139 | - spin_lock_irqsave(&n->list_lock, flags); | |
24140 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
24141 | ||
24142 | } | |
24143 | } | |
24144 | @@ -2854,7 +2916,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
24145 | add_partial(n, page, DEACTIVATE_TO_TAIL); | |
24146 | stat(s, FREE_ADD_PARTIAL); | |
24147 | } | |
24148 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
24149 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
24150 | return; | |
24151 | ||
24152 | slab_empty: | |
24153 | @@ -2869,7 +2931,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
24154 | remove_full(s, n, page); | |
24155 | } | |
24156 | ||
24157 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
24158 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
24159 | stat(s, FREE_SLAB); | |
24160 | discard_slab(s, page); | |
24161 | } | |
24162 | @@ -3074,6 +3136,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
24163 | void **p) | |
24164 | { | |
24165 | struct kmem_cache_cpu *c; | |
24166 | + LIST_HEAD(to_free); | |
24167 | int i; | |
24168 | ||
24169 | /* memcg and kmem_cache debug support */ | |
24170 | @@ -3097,7 +3160,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
24171 | * of re-populating per CPU c->freelist | |
24172 | */ | |
24173 | p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, | |
24174 | - _RET_IP_, c); | |
24175 | + _RET_IP_, c, &to_free); | |
24176 | if (unlikely(!p[i])) | |
24177 | goto error; | |
24178 | ||
24179 | @@ -3109,6 +3172,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
24180 | } | |
24181 | c->tid = next_tid(c->tid); | |
24182 | local_irq_enable(); | |
24183 | + free_delayed(&to_free); | |
24184 | ||
24185 | /* Clear memory outside IRQ disabled fastpath loop */ | |
24186 | if (unlikely(flags & __GFP_ZERO)) { | |
24187 | @@ -3256,7 +3320,7 @@ static void | |
24188 | init_kmem_cache_node(struct kmem_cache_node *n) | |
24189 | { | |
24190 | n->nr_partial = 0; | |
24191 | - spin_lock_init(&n->list_lock); | |
24192 | + raw_spin_lock_init(&n->list_lock); | |
24193 | INIT_LIST_HEAD(&n->partial); | |
24194 | #ifdef CONFIG_SLUB_DEBUG | |
24195 | atomic_long_set(&n->nr_slabs, 0); | |
24196 | @@ -3600,6 +3664,10 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, | |
24197 | const char *text) | |
24198 | { | |
24199 | #ifdef CONFIG_SLUB_DEBUG | |
24200 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24201 | + /* XXX move out of irq-off section */ | |
24202 | + slab_err(s, page, text, s->name); | |
24203 | +#else | |
24204 | void *addr = page_address(page); | |
24205 | void *p; | |
24206 | unsigned long *map = kzalloc(BITS_TO_LONGS(page->objects) * | |
24207 | @@ -3620,6 +3688,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, | |
24208 | slab_unlock(page); | |
24209 | kfree(map); | |
24210 | #endif | |
24211 | +#endif | |
24212 | } | |
24213 | ||
24214 | /* | |
24215 | @@ -3633,7 +3702,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |
24216 | struct page *page, *h; | |
24217 | ||
24218 | BUG_ON(irqs_disabled()); | |
24219 | - spin_lock_irq(&n->list_lock); | |
24220 | + raw_spin_lock_irq(&n->list_lock); | |
24221 | list_for_each_entry_safe(page, h, &n->partial, lru) { | |
24222 | if (!page->inuse) { | |
24223 | remove_partial(n, page); | |
24224 | @@ -3643,7 +3712,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |
24225 | "Objects remaining in %s on __kmem_cache_shutdown()"); | |
24226 | } | |
24227 | } | |
24228 | - spin_unlock_irq(&n->list_lock); | |
24229 | + raw_spin_unlock_irq(&n->list_lock); | |
24230 | ||
24231 | list_for_each_entry_safe(page, h, &discard, lru) | |
24232 | discard_slab(s, page); | |
24233 | @@ -3901,7 +3970,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |
24234 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) | |
24235 | INIT_LIST_HEAD(promote + i); | |
24236 | ||
24237 | - spin_lock_irqsave(&n->list_lock, flags); | |
24238 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
24239 | ||
24240 | /* | |
24241 | * Build lists of slabs to discard or promote. | |
24242 | @@ -3932,7 +4001,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |
24243 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) | |
24244 | list_splice(promote + i, &n->partial); | |
24245 | ||
24246 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
24247 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
24248 | ||
24249 | /* Release empty slabs */ | |
24250 | list_for_each_entry_safe(page, t, &discard, lru) | |
24251 | @@ -4108,6 +4177,12 @@ void __init kmem_cache_init(void) | |
24252 | { | |
24253 | static __initdata struct kmem_cache boot_kmem_cache, | |
24254 | boot_kmem_cache_node; | |
24255 | + int cpu; | |
24256 | + | |
24257 | + for_each_possible_cpu(cpu) { | |
24258 | + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); | |
24259 | + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); | |
24260 | + } | |
24261 | ||
24262 | if (debug_guardpage_minorder()) | |
24263 | slub_max_order = 0; | |
24264 | @@ -4354,7 +4429,7 @@ static int validate_slab_node(struct kmem_cache *s, | |
24265 | struct page *page; | |
24266 | unsigned long flags; | |
24267 | ||
24268 | - spin_lock_irqsave(&n->list_lock, flags); | |
24269 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
24270 | ||
24271 | list_for_each_entry(page, &n->partial, lru) { | |
24272 | validate_slab_slab(s, page, map); | |
24273 | @@ -4376,7 +4451,7 @@ static int validate_slab_node(struct kmem_cache *s, | |
24274 | s->name, count, atomic_long_read(&n->nr_slabs)); | |
24275 | ||
24276 | out: | |
24277 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
24278 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
24279 | return count; | |
24280 | } | |
24281 | ||
24282 | @@ -4564,12 +4639,12 @@ static int list_locations(struct kmem_cache *s, char *buf, | |
24283 | if (!atomic_long_read(&n->nr_slabs)) | |
24284 | continue; | |
24285 | ||
24286 | - spin_lock_irqsave(&n->list_lock, flags); | |
24287 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
24288 | list_for_each_entry(page, &n->partial, lru) | |
24289 | process_slab(&t, s, page, alloc, map); | |
24290 | list_for_each_entry(page, &n->full, lru) | |
24291 | process_slab(&t, s, page, alloc, map); | |
24292 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
24293 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
24294 | } | |
24295 | ||
24296 | for (i = 0; i < t.count; i++) { | |
24297 | diff --git a/mm/swap.c b/mm/swap.c | |
24298 | index 75c63bb2a1da..93fe549eb11e 100644 | |
24299 | --- a/mm/swap.c | |
24300 | +++ b/mm/swap.c | |
24301 | @@ -32,6 +32,7 @@ | |
24302 | #include <linux/memcontrol.h> | |
24303 | #include <linux/gfp.h> | |
24304 | #include <linux/uio.h> | |
24305 | +#include <linux/locallock.h> | |
24306 | #include <linux/hugetlb.h> | |
24307 | #include <linux/page_idle.h> | |
24308 | ||
24309 | @@ -50,6 +51,8 @@ static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs); | |
24310 | #ifdef CONFIG_SMP | |
24311 | static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs); | |
24312 | #endif | |
24313 | +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); | |
24314 | +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock); | |
24315 | ||
24316 | /* | |
24317 | * This path almost never happens for VM activity - pages are normally | |
24318 | @@ -240,11 +243,11 @@ void rotate_reclaimable_page(struct page *page) | |
24319 | unsigned long flags; | |
24320 | ||
24321 | get_page(page); | |
24322 | - local_irq_save(flags); | |
24323 | + local_lock_irqsave(rotate_lock, flags); | |
24324 | pvec = this_cpu_ptr(&lru_rotate_pvecs); | |
24325 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
24326 | pagevec_move_tail(pvec); | |
24327 | - local_irq_restore(flags); | |
24328 | + local_unlock_irqrestore(rotate_lock, flags); | |
24329 | } | |
24330 | } | |
24331 | ||
24332 | @@ -294,12 +297,13 @@ void activate_page(struct page *page) | |
24333 | { | |
24334 | page = compound_head(page); | |
24335 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
24336 | - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
24337 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
24338 | + activate_page_pvecs); | |
24339 | ||
24340 | get_page(page); | |
24341 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
24342 | pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
24343 | - put_cpu_var(activate_page_pvecs); | |
24344 | + put_locked_var(swapvec_lock, activate_page_pvecs); | |
24345 | } | |
24346 | } | |
24347 | ||
24348 | @@ -326,7 +330,7 @@ void activate_page(struct page *page) | |
24349 | ||
24350 | static void __lru_cache_activate_page(struct page *page) | |
24351 | { | |
24352 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
24353 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
24354 | int i; | |
24355 | ||
24356 | /* | |
24357 | @@ -348,7 +352,7 @@ static void __lru_cache_activate_page(struct page *page) | |
24358 | } | |
24359 | } | |
24360 | ||
24361 | - put_cpu_var(lru_add_pvec); | |
24362 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
24363 | } | |
24364 | ||
24365 | /* | |
24366 | @@ -390,12 +394,12 @@ EXPORT_SYMBOL(mark_page_accessed); | |
24367 | ||
24368 | static void __lru_cache_add(struct page *page) | |
24369 | { | |
24370 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
24371 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
24372 | ||
24373 | get_page(page); | |
24374 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
24375 | __pagevec_lru_add(pvec); | |
24376 | - put_cpu_var(lru_add_pvec); | |
24377 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
24378 | } | |
24379 | ||
24380 | /** | |
24381 | @@ -593,9 +597,15 @@ void lru_add_drain_cpu(int cpu) | |
24382 | unsigned long flags; | |
24383 | ||
24384 | /* No harm done if a racing interrupt already did this */ | |
24385 | - local_irq_save(flags); | |
24386 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24387 | + local_lock_irqsave_on(rotate_lock, flags, cpu); | |
24388 | pagevec_move_tail(pvec); | |
24389 | - local_irq_restore(flags); | |
24390 | + local_unlock_irqrestore_on(rotate_lock, flags, cpu); | |
24391 | +#else | |
24392 | + local_lock_irqsave(rotate_lock, flags); | |
24393 | + pagevec_move_tail(pvec); | |
24394 | + local_unlock_irqrestore(rotate_lock, flags); | |
24395 | +#endif | |
24396 | } | |
24397 | ||
24398 | pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); | |
24399 | @@ -627,11 +637,12 @@ void deactivate_file_page(struct page *page) | |
24400 | return; | |
24401 | ||
24402 | if (likely(get_page_unless_zero(page))) { | |
24403 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); | |
24404 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
24405 | + lru_deactivate_file_pvecs); | |
24406 | ||
24407 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
24408 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); | |
24409 | - put_cpu_var(lru_deactivate_file_pvecs); | |
24410 | + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs); | |
24411 | } | |
24412 | } | |
24413 | ||
24414 | @@ -646,27 +657,31 @@ void deactivate_file_page(struct page *page) | |
24415 | void deactivate_page(struct page *page) | |
24416 | { | |
24417 | if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { | |
24418 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs); | |
24419 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
24420 | + lru_deactivate_pvecs); | |
24421 | ||
24422 | get_page(page); | |
24423 | if (!pagevec_add(pvec, page) || PageCompound(page)) | |
24424 | pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL); | |
24425 | - put_cpu_var(lru_deactivate_pvecs); | |
24426 | + put_locked_var(swapvec_lock, lru_deactivate_pvecs); | |
24427 | } | |
24428 | } | |
24429 | ||
24430 | void lru_add_drain(void) | |
24431 | { | |
24432 | - lru_add_drain_cpu(get_cpu()); | |
24433 | - put_cpu(); | |
24434 | + lru_add_drain_cpu(local_lock_cpu(swapvec_lock)); | |
24435 | + local_unlock_cpu(swapvec_lock); | |
24436 | } | |
24437 | ||
24438 | -static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
24439 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
24440 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
24441 | { | |
24442 | - lru_add_drain(); | |
24443 | + local_lock_on(swapvec_lock, cpu); | |
24444 | + lru_add_drain_cpu(cpu); | |
24445 | + local_unlock_on(swapvec_lock, cpu); | |
24446 | } | |
24447 | ||
24448 | -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
24449 | +#else | |
24450 | ||
24451 | /* | |
24452 | * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM | |
24453 | @@ -686,6 +701,22 @@ static int __init lru_init(void) | |
24454 | } | |
24455 | early_initcall(lru_init); | |
24456 | ||
24457 | +static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
24458 | +{ | |
24459 | + lru_add_drain(); | |
24460 | +} | |
24461 | + | |
24462 | +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
24463 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
24464 | +{ | |
24465 | + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
24466 | + | |
24467 | + INIT_WORK(work, lru_add_drain_per_cpu); | |
24468 | + queue_work_on(cpu, lru_add_drain_wq, work); | |
24469 | + cpumask_set_cpu(cpu, has_work); | |
24470 | +} | |
24471 | +#endif | |
24472 | + | |
24473 | void lru_add_drain_all(void) | |
24474 | { | |
24475 | static DEFINE_MUTEX(lock); | |
24476 | @@ -697,21 +728,18 @@ void lru_add_drain_all(void) | |
24477 | cpumask_clear(&has_work); | |
24478 | ||
24479 | for_each_online_cpu(cpu) { | |
24480 | - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
24481 | - | |
24482 | if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || | |
24483 | pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || | |
24484 | pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || | |
24485 | pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) || | |
24486 | - need_activate_page_drain(cpu)) { | |
24487 | - INIT_WORK(work, lru_add_drain_per_cpu); | |
24488 | - queue_work_on(cpu, lru_add_drain_wq, work); | |
24489 | - cpumask_set_cpu(cpu, &has_work); | |
24490 | - } | |
24491 | + need_activate_page_drain(cpu)) | |
24492 | + remote_lru_add_drain(cpu, &has_work); | |
24493 | } | |
24494 | ||
24495 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
24496 | for_each_cpu(cpu, &has_work) | |
24497 | flush_work(&per_cpu(lru_add_drain_work, cpu)); | |
24498 | +#endif | |
24499 | ||
24500 | put_online_cpus(); | |
24501 | mutex_unlock(&lock); | |
24502 | diff --git a/mm/truncate.c b/mm/truncate.c | |
24503 | index a01cce450a26..4bda37604f99 100644 | |
24504 | --- a/mm/truncate.c | |
24505 | +++ b/mm/truncate.c | |
24506 | @@ -62,9 +62,12 @@ static void clear_exceptional_entry(struct address_space *mapping, | |
24507 | * protected by mapping->tree_lock. | |
24508 | */ | |
24509 | if (!workingset_node_shadows(node) && | |
24510 | - !list_empty(&node->private_list)) | |
24511 | - list_lru_del(&workingset_shadow_nodes, | |
24512 | + !list_empty(&node->private_list)) { | |
24513 | + local_lock(workingset_shadow_lock); | |
24514 | + list_lru_del(&__workingset_shadow_nodes, | |
24515 | &node->private_list); | |
24516 | + local_unlock(workingset_shadow_lock); | |
24517 | + } | |
24518 | __radix_tree_delete_node(&mapping->page_tree, node); | |
24519 | unlock: | |
24520 | spin_unlock_irq(&mapping->tree_lock); | |
24521 | diff --git a/mm/vmalloc.c b/mm/vmalloc.c | |
24522 | index 91f44e78c516..06ec393bb97d 100644 | |
24523 | --- a/mm/vmalloc.c | |
24524 | +++ b/mm/vmalloc.c | |
24525 | @@ -845,7 +845,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
24526 | struct vmap_block *vb; | |
24527 | struct vmap_area *va; | |
24528 | unsigned long vb_idx; | |
24529 | - int node, err; | |
24530 | + int node, err, cpu; | |
24531 | void *vaddr; | |
24532 | ||
24533 | node = numa_node_id(); | |
24534 | @@ -888,11 +888,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
24535 | BUG_ON(err); | |
24536 | radix_tree_preload_end(); | |
24537 | ||
24538 | - vbq = &get_cpu_var(vmap_block_queue); | |
24539 | + cpu = get_cpu_light(); | |
24540 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
24541 | spin_lock(&vbq->lock); | |
24542 | list_add_tail_rcu(&vb->free_list, &vbq->free); | |
24543 | spin_unlock(&vbq->lock); | |
24544 | - put_cpu_var(vmap_block_queue); | |
24545 | + put_cpu_light(); | |
24546 | ||
24547 | return vaddr; | |
24548 | } | |
24549 | @@ -961,6 +962,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
24550 | struct vmap_block *vb; | |
24551 | void *vaddr = NULL; | |
24552 | unsigned int order; | |
24553 | + int cpu; | |
24554 | ||
24555 | BUG_ON(offset_in_page(size)); | |
24556 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | |
24557 | @@ -975,7 +977,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
24558 | order = get_order(size); | |
24559 | ||
24560 | rcu_read_lock(); | |
24561 | - vbq = &get_cpu_var(vmap_block_queue); | |
24562 | + cpu = get_cpu_light(); | |
24563 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
24564 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | |
24565 | unsigned long pages_off; | |
24566 | ||
24567 | @@ -998,7 +1001,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
24568 | break; | |
24569 | } | |
24570 | ||
24571 | - put_cpu_var(vmap_block_queue); | |
24572 | + put_cpu_light(); | |
24573 | rcu_read_unlock(); | |
24574 | ||
24575 | /* Allocate new block if nothing was found */ | |
24576 | diff --git a/mm/vmstat.c b/mm/vmstat.c | |
24577 | index 89cec42d19ff..fb73631fb90b 100644 | |
24578 | --- a/mm/vmstat.c | |
24579 | +++ b/mm/vmstat.c | |
24580 | @@ -245,6 +245,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
24581 | long x; | |
24582 | long t; | |
24583 | ||
24584 | + preempt_disable_rt(); | |
24585 | x = delta + __this_cpu_read(*p); | |
24586 | ||
24587 | t = __this_cpu_read(pcp->stat_threshold); | |
24588 | @@ -254,6 +255,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
24589 | x = 0; | |
24590 | } | |
24591 | __this_cpu_write(*p, x); | |
24592 | + preempt_enable_rt(); | |
24593 | } | |
24594 | EXPORT_SYMBOL(__mod_zone_page_state); | |
24595 | ||
24596 | @@ -265,6 +267,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
24597 | long x; | |
24598 | long t; | |
24599 | ||
24600 | + preempt_disable_rt(); | |
24601 | x = delta + __this_cpu_read(*p); | |
24602 | ||
24603 | t = __this_cpu_read(pcp->stat_threshold); | |
24604 | @@ -274,6 +277,7 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, | |
24605 | x = 0; | |
24606 | } | |
24607 | __this_cpu_write(*p, x); | |
24608 | + preempt_enable_rt(); | |
24609 | } | |
24610 | EXPORT_SYMBOL(__mod_node_page_state); | |
24611 | ||
24612 | @@ -306,6 +310,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
24613 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
24614 | s8 v, t; | |
24615 | ||
24616 | + preempt_disable_rt(); | |
24617 | v = __this_cpu_inc_return(*p); | |
24618 | t = __this_cpu_read(pcp->stat_threshold); | |
24619 | if (unlikely(v > t)) { | |
24620 | @@ -314,6 +319,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
24621 | zone_page_state_add(v + overstep, zone, item); | |
24622 | __this_cpu_write(*p, -overstep); | |
24623 | } | |
24624 | + preempt_enable_rt(); | |
24625 | } | |
24626 | ||
24627 | void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
24628 | @@ -322,6 +328,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
24629 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
24630 | s8 v, t; | |
24631 | ||
24632 | + preempt_disable_rt(); | |
24633 | v = __this_cpu_inc_return(*p); | |
24634 | t = __this_cpu_read(pcp->stat_threshold); | |
24635 | if (unlikely(v > t)) { | |
24636 | @@ -330,6 +337,7 @@ void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
24637 | node_page_state_add(v + overstep, pgdat, item); | |
24638 | __this_cpu_write(*p, -overstep); | |
24639 | } | |
24640 | + preempt_enable_rt(); | |
24641 | } | |
24642 | ||
24643 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
24644 | @@ -350,6 +358,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
24645 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
24646 | s8 v, t; | |
24647 | ||
24648 | + preempt_disable_rt(); | |
24649 | v = __this_cpu_dec_return(*p); | |
24650 | t = __this_cpu_read(pcp->stat_threshold); | |
24651 | if (unlikely(v < - t)) { | |
24652 | @@ -358,6 +367,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
24653 | zone_page_state_add(v - overstep, zone, item); | |
24654 | __this_cpu_write(*p, overstep); | |
24655 | } | |
24656 | + preempt_enable_rt(); | |
24657 | } | |
24658 | ||
24659 | void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
24660 | @@ -366,6 +376,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
24661 | s8 __percpu *p = pcp->vm_node_stat_diff + item; | |
24662 | s8 v, t; | |
24663 | ||
24664 | + preempt_disable_rt(); | |
24665 | v = __this_cpu_dec_return(*p); | |
24666 | t = __this_cpu_read(pcp->stat_threshold); | |
24667 | if (unlikely(v < - t)) { | |
24668 | @@ -374,6 +385,7 @@ void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | |
24669 | node_page_state_add(v - overstep, pgdat, item); | |
24670 | __this_cpu_write(*p, overstep); | |
24671 | } | |
24672 | + preempt_enable_rt(); | |
24673 | } | |
24674 | ||
24675 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
24676 | diff --git a/mm/workingset.c b/mm/workingset.c | |
24677 | index 617475f529f4..48674bf36fb1 100644 | |
24678 | --- a/mm/workingset.c | |
24679 | +++ b/mm/workingset.c | |
24680 | @@ -334,7 +334,8 @@ void workingset_activation(struct page *page) | |
24681 | * point where they would still be useful. | |
24682 | */ | |
24683 | ||
24684 | -struct list_lru workingset_shadow_nodes; | |
24685 | +struct list_lru __workingset_shadow_nodes; | |
24686 | +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
24687 | ||
24688 | static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
24689 | struct shrink_control *sc) | |
24690 | @@ -344,9 +345,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
24691 | unsigned long pages; | |
24692 | ||
24693 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
24694 | - local_irq_disable(); | |
24695 | - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); | |
24696 | - local_irq_enable(); | |
24697 | + local_lock_irq(workingset_shadow_lock); | |
24698 | + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc); | |
24699 | + local_unlock_irq(workingset_shadow_lock); | |
24700 | ||
24701 | if (memcg_kmem_enabled()) { | |
24702 | pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, | |
24703 | @@ -438,9 +439,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, | |
24704 | spin_unlock(&mapping->tree_lock); | |
24705 | ret = LRU_REMOVED_RETRY; | |
24706 | out: | |
24707 | - local_irq_enable(); | |
24708 | + local_unlock_irq(workingset_shadow_lock); | |
24709 | cond_resched(); | |
24710 | - local_irq_disable(); | |
24711 | + local_lock_irq(workingset_shadow_lock); | |
24712 | spin_lock(lru_lock); | |
24713 | return ret; | |
24714 | } | |
24715 | @@ -451,10 +452,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, | |
24716 | unsigned long ret; | |
24717 | ||
24718 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
24719 | - local_irq_disable(); | |
24720 | - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc, | |
24721 | + local_lock_irq(workingset_shadow_lock); | |
24722 | + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc, | |
24723 | shadow_lru_isolate, NULL); | |
24724 | - local_irq_enable(); | |
24725 | + local_unlock_irq(workingset_shadow_lock); | |
24726 | return ret; | |
24727 | } | |
24728 | ||
24729 | @@ -492,7 +493,7 @@ static int __init workingset_init(void) | |
24730 | pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", | |
24731 | timestamp_bits, max_order, bucket_order); | |
24732 | ||
24733 | - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | |
24734 | + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key); | |
24735 | if (ret) | |
24736 | goto err; | |
24737 | ret = register_shrinker(&workingset_shadow_shrinker); | |
24738 | @@ -500,7 +501,7 @@ static int __init workingset_init(void) | |
24739 | goto err_list_lru; | |
24740 | return 0; | |
24741 | err_list_lru: | |
24742 | - list_lru_destroy(&workingset_shadow_nodes); | |
24743 | + list_lru_destroy(&__workingset_shadow_nodes); | |
24744 | err: | |
24745 | return ret; | |
24746 | } | |
24747 | diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c | |
24748 | index b0bc023d25c5..5af6426fbcbe 100644 | |
24749 | --- a/mm/zsmalloc.c | |
24750 | +++ b/mm/zsmalloc.c | |
24751 | @@ -53,6 +53,7 @@ | |
24752 | #include <linux/mount.h> | |
24753 | #include <linux/migrate.h> | |
24754 | #include <linux/pagemap.h> | |
24755 | +#include <linux/locallock.h> | |
24756 | ||
24757 | #define ZSPAGE_MAGIC 0x58 | |
24758 | ||
24759 | @@ -70,9 +71,22 @@ | |
24760 | */ | |
24761 | #define ZS_MAX_ZSPAGE_ORDER 2 | |
24762 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) | |
24763 | - | |
24764 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) | |
24765 | ||
24766 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24767 | + | |
24768 | +struct zsmalloc_handle { | |
24769 | + unsigned long addr; | |
24770 | + struct mutex lock; | |
24771 | +}; | |
24772 | + | |
24773 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) | |
24774 | + | |
24775 | +#else | |
24776 | + | |
24777 | +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) | |
24778 | +#endif | |
24779 | + | |
24780 | /* | |
24781 | * Object location (<PFN>, <obj_idx>) is encoded as | |
24782 | * as single (unsigned long) handle value. | |
24783 | @@ -327,7 +341,7 @@ static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} | |
24784 | ||
24785 | static int create_cache(struct zs_pool *pool) | |
24786 | { | |
24787 | - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, | |
24788 | + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, | |
24789 | 0, 0, NULL); | |
24790 | if (!pool->handle_cachep) | |
24791 | return 1; | |
24792 | @@ -351,10 +365,27 @@ static void destroy_cache(struct zs_pool *pool) | |
24793 | ||
24794 | static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) | |
24795 | { | |
24796 | - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, | |
24797 | - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
24798 | + void *p; | |
24799 | + | |
24800 | + p = kmem_cache_alloc(pool->handle_cachep, | |
24801 | + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); | |
24802 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24803 | + if (p) { | |
24804 | + struct zsmalloc_handle *zh = p; | |
24805 | + | |
24806 | + mutex_init(&zh->lock); | |
24807 | + } | |
24808 | +#endif | |
24809 | + return (unsigned long)p; | |
24810 | } | |
24811 | ||
24812 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24813 | +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) | |
24814 | +{ | |
24815 | + return (void *)(handle &~((1 << OBJ_TAG_BITS) - 1)); | |
24816 | +} | |
24817 | +#endif | |
24818 | + | |
24819 | static void cache_free_handle(struct zs_pool *pool, unsigned long handle) | |
24820 | { | |
24821 | kmem_cache_free(pool->handle_cachep, (void *)handle); | |
24822 | @@ -373,12 +404,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) | |
24823 | ||
24824 | static void record_obj(unsigned long handle, unsigned long obj) | |
24825 | { | |
24826 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24827 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
24828 | + | |
24829 | + WRITE_ONCE(zh->addr, obj); | |
24830 | +#else | |
24831 | /* | |
24832 | * lsb of @obj represents handle lock while other bits | |
24833 | * represent object value the handle is pointing so | |
24834 | * updating shouldn't do store tearing. | |
24835 | */ | |
24836 | WRITE_ONCE(*(unsigned long *)handle, obj); | |
24837 | +#endif | |
24838 | } | |
24839 | ||
24840 | /* zpool driver */ | |
24841 | @@ -467,6 +504,7 @@ MODULE_ALIAS("zpool-zsmalloc"); | |
24842 | ||
24843 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ | |
24844 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); | |
24845 | +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); | |
24846 | ||
24847 | static bool is_zspage_isolated(struct zspage *zspage) | |
24848 | { | |
24849 | @@ -902,7 +940,13 @@ static unsigned long location_to_obj(struct page *page, unsigned int obj_idx) | |
24850 | ||
24851 | static unsigned long handle_to_obj(unsigned long handle) | |
24852 | { | |
24853 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24854 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
24855 | + | |
24856 | + return zh->addr; | |
24857 | +#else | |
24858 | return *(unsigned long *)handle; | |
24859 | +#endif | |
24860 | } | |
24861 | ||
24862 | static unsigned long obj_to_head(struct page *page, void *obj) | |
24863 | @@ -916,22 +960,46 @@ static unsigned long obj_to_head(struct page *page, void *obj) | |
24864 | ||
24865 | static inline int testpin_tag(unsigned long handle) | |
24866 | { | |
24867 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24868 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
24869 | + | |
24870 | + return mutex_is_locked(&zh->lock); | |
24871 | +#else | |
24872 | return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); | |
24873 | +#endif | |
24874 | } | |
24875 | ||
24876 | static inline int trypin_tag(unsigned long handle) | |
24877 | { | |
24878 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24879 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
24880 | + | |
24881 | + return mutex_trylock(&zh->lock); | |
24882 | +#else | |
24883 | return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
24884 | +#endif | |
24885 | } | |
24886 | ||
24887 | static void pin_tag(unsigned long handle) | |
24888 | { | |
24889 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24890 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
24891 | + | |
24892 | + return mutex_lock(&zh->lock); | |
24893 | +#else | |
24894 | bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
24895 | +#endif | |
24896 | } | |
24897 | ||
24898 | static void unpin_tag(unsigned long handle) | |
24899 | { | |
24900 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24901 | + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); | |
24902 | + | |
24903 | + return mutex_unlock(&zh->lock); | |
24904 | +#else | |
24905 | bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); | |
24906 | +#endif | |
24907 | } | |
24908 | ||
24909 | static void reset_page(struct page *page) | |
24910 | @@ -1423,7 +1491,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |
24911 | class = pool->size_class[class_idx]; | |
24912 | off = (class->size * obj_idx) & ~PAGE_MASK; | |
24913 | ||
24914 | - area = &get_cpu_var(zs_map_area); | |
24915 | + area = &get_locked_var(zs_map_area_lock, zs_map_area); | |
24916 | area->vm_mm = mm; | |
24917 | if (off + class->size <= PAGE_SIZE) { | |
24918 | /* this object is contained entirely within a page */ | |
24919 | @@ -1477,7 +1545,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | |
24920 | ||
24921 | __zs_unmap_object(area, pages, off, class->size); | |
24922 | } | |
24923 | - put_cpu_var(zs_map_area); | |
24924 | + put_locked_var(zs_map_area_lock, zs_map_area); | |
24925 | ||
24926 | migrate_read_unlock(zspage); | |
24927 | unpin_tag(handle); | |
24928 | diff --git a/net/core/dev.c b/net/core/dev.c | |
24929 | index ea6312057a71..d114a4692cde 100644 | |
24930 | --- a/net/core/dev.c | |
24931 | +++ b/net/core/dev.c | |
24932 | @@ -190,6 +190,7 @@ static unsigned int napi_gen_id = NR_CPUS; | |
24933 | static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8); | |
24934 | ||
24935 | static seqcount_t devnet_rename_seq; | |
24936 | +static DEFINE_MUTEX(devnet_rename_mutex); | |
24937 | ||
24938 | static inline void dev_base_seq_inc(struct net *net) | |
24939 | { | |
24940 | @@ -211,14 +212,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |
24941 | static inline void rps_lock(struct softnet_data *sd) | |
24942 | { | |
24943 | #ifdef CONFIG_RPS | |
24944 | - spin_lock(&sd->input_pkt_queue.lock); | |
24945 | + raw_spin_lock(&sd->input_pkt_queue.raw_lock); | |
24946 | #endif | |
24947 | } | |
24948 | ||
24949 | static inline void rps_unlock(struct softnet_data *sd) | |
24950 | { | |
24951 | #ifdef CONFIG_RPS | |
24952 | - spin_unlock(&sd->input_pkt_queue.lock); | |
24953 | + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); | |
24954 | #endif | |
24955 | } | |
24956 | ||
24957 | @@ -888,7 +889,8 @@ int netdev_get_name(struct net *net, char *name, int ifindex) | |
24958 | strcpy(name, dev->name); | |
24959 | rcu_read_unlock(); | |
24960 | if (read_seqcount_retry(&devnet_rename_seq, seq)) { | |
24961 | - cond_resched(); | |
24962 | + mutex_lock(&devnet_rename_mutex); | |
24963 | + mutex_unlock(&devnet_rename_mutex); | |
24964 | goto retry; | |
24965 | } | |
24966 | ||
24967 | @@ -1157,20 +1159,17 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
24968 | if (dev->flags & IFF_UP) | |
24969 | return -EBUSY; | |
24970 | ||
24971 | - write_seqcount_begin(&devnet_rename_seq); | |
24972 | + mutex_lock(&devnet_rename_mutex); | |
24973 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
24974 | ||
24975 | - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | |
24976 | - write_seqcount_end(&devnet_rename_seq); | |
24977 | - return 0; | |
24978 | - } | |
24979 | + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | |
24980 | + goto outunlock; | |
24981 | ||
24982 | memcpy(oldname, dev->name, IFNAMSIZ); | |
24983 | ||
24984 | err = dev_get_valid_name(net, dev, newname); | |
24985 | - if (err < 0) { | |
24986 | - write_seqcount_end(&devnet_rename_seq); | |
24987 | - return err; | |
24988 | - } | |
24989 | + if (err < 0) | |
24990 | + goto outunlock; | |
24991 | ||
24992 | if (oldname[0] && !strchr(oldname, '%')) | |
24993 | netdev_info(dev, "renamed from %s\n", oldname); | |
24994 | @@ -1183,11 +1182,12 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
24995 | if (ret) { | |
24996 | memcpy(dev->name, oldname, IFNAMSIZ); | |
24997 | dev->name_assign_type = old_assign_type; | |
24998 | - write_seqcount_end(&devnet_rename_seq); | |
24999 | - return ret; | |
25000 | + err = ret; | |
25001 | + goto outunlock; | |
25002 | } | |
25003 | ||
25004 | - write_seqcount_end(&devnet_rename_seq); | |
25005 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
25006 | + mutex_unlock(&devnet_rename_mutex); | |
25007 | ||
25008 | netdev_adjacent_rename_links(dev, oldname); | |
25009 | ||
25010 | @@ -1208,7 +1208,8 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
25011 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | |
25012 | if (err >= 0) { | |
25013 | err = ret; | |
25014 | - write_seqcount_begin(&devnet_rename_seq); | |
25015 | + mutex_lock(&devnet_rename_mutex); | |
25016 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
25017 | memcpy(dev->name, oldname, IFNAMSIZ); | |
25018 | memcpy(oldname, newname, IFNAMSIZ); | |
25019 | dev->name_assign_type = old_assign_type; | |
25020 | @@ -1221,6 +1222,11 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
25021 | } | |
25022 | ||
25023 | return err; | |
25024 | + | |
25025 | +outunlock: | |
25026 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
25027 | + mutex_unlock(&devnet_rename_mutex); | |
25028 | + return err; | |
25029 | } | |
25030 | ||
25031 | /** | |
25032 | @@ -2268,6 +2274,7 @@ static void __netif_reschedule(struct Qdisc *q) | |
25033 | sd->output_queue_tailp = &q->next_sched; | |
25034 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
25035 | local_irq_restore(flags); | |
25036 | + preempt_check_resched_rt(); | |
25037 | } | |
25038 | ||
25039 | void __netif_schedule(struct Qdisc *q) | |
25040 | @@ -2349,6 +2356,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) | |
25041 | __this_cpu_write(softnet_data.completion_queue, skb); | |
25042 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
25043 | local_irq_restore(flags); | |
25044 | + preempt_check_resched_rt(); | |
25045 | } | |
25046 | EXPORT_SYMBOL(__dev_kfree_skb_irq); | |
25047 | ||
25048 | @@ -3082,7 +3090,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |
25049 | * This permits qdisc->running owner to get the lock more | |
25050 | * often and dequeue packets faster. | |
25051 | */ | |
25052 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25053 | + contended = true; | |
25054 | +#else | |
25055 | contended = qdisc_is_running(q); | |
25056 | +#endif | |
25057 | if (unlikely(contended)) | |
25058 | spin_lock(&q->busylock); | |
25059 | ||
25060 | @@ -3145,8 +3157,10 @@ static void skb_update_prio(struct sk_buff *skb) | |
25061 | #define skb_update_prio(skb) | |
25062 | #endif | |
25063 | ||
25064 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
25065 | DEFINE_PER_CPU(int, xmit_recursion); | |
25066 | EXPORT_SYMBOL(xmit_recursion); | |
25067 | +#endif | |
25068 | ||
25069 | /** | |
25070 | * dev_loopback_xmit - loop back @skb | |
25071 | @@ -3390,8 +3404,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |
25072 | int cpu = smp_processor_id(); /* ok because BHs are off */ | |
25073 | ||
25074 | if (txq->xmit_lock_owner != cpu) { | |
25075 | - if (unlikely(__this_cpu_read(xmit_recursion) > | |
25076 | - XMIT_RECURSION_LIMIT)) | |
25077 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) | |
25078 | goto recursion_alert; | |
25079 | ||
25080 | skb = validate_xmit_skb(skb, dev); | |
25081 | @@ -3401,9 +3414,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |
25082 | HARD_TX_LOCK(dev, txq, cpu); | |
25083 | ||
25084 | if (!netif_xmit_stopped(txq)) { | |
25085 | - __this_cpu_inc(xmit_recursion); | |
25086 | + xmit_rec_inc(); | |
25087 | skb = dev_hard_start_xmit(skb, dev, txq, &rc); | |
25088 | - __this_cpu_dec(xmit_recursion); | |
25089 | + xmit_rec_dec(); | |
25090 | if (dev_xmit_complete(rc)) { | |
25091 | HARD_TX_UNLOCK(dev, txq); | |
25092 | goto out; | |
25093 | @@ -3777,6 +3790,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, | |
25094 | rps_unlock(sd); | |
25095 | ||
25096 | local_irq_restore(flags); | |
25097 | + preempt_check_resched_rt(); | |
25098 | ||
25099 | atomic_long_inc(&skb->dev->rx_dropped); | |
25100 | kfree_skb(skb); | |
25101 | @@ -3795,7 +3809,7 @@ static int netif_rx_internal(struct sk_buff *skb) | |
25102 | struct rps_dev_flow voidflow, *rflow = &voidflow; | |
25103 | int cpu; | |
25104 | ||
25105 | - preempt_disable(); | |
25106 | + migrate_disable(); | |
25107 | rcu_read_lock(); | |
25108 | ||
25109 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | |
25110 | @@ -3805,13 +3819,13 @@ static int netif_rx_internal(struct sk_buff *skb) | |
25111 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | |
25112 | ||
25113 | rcu_read_unlock(); | |
25114 | - preempt_enable(); | |
25115 | + migrate_enable(); | |
25116 | } else | |
25117 | #endif | |
25118 | { | |
25119 | unsigned int qtail; | |
25120 | - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | |
25121 | - put_cpu(); | |
25122 | + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); | |
25123 | + put_cpu_light(); | |
25124 | } | |
25125 | return ret; | |
25126 | } | |
25127 | @@ -3845,11 +3859,9 @@ int netif_rx_ni(struct sk_buff *skb) | |
25128 | ||
25129 | trace_netif_rx_ni_entry(skb); | |
25130 | ||
25131 | - preempt_disable(); | |
25132 | + local_bh_disable(); | |
25133 | err = netif_rx_internal(skb); | |
25134 | - if (local_softirq_pending()) | |
25135 | - do_softirq(); | |
25136 | - preempt_enable(); | |
25137 | + local_bh_enable(); | |
25138 | ||
25139 | return err; | |
25140 | } | |
25141 | @@ -4321,7 +4333,7 @@ static void flush_backlog(void *arg) | |
25142 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { | |
25143 | if (skb->dev == dev) { | |
25144 | __skb_unlink(skb, &sd->input_pkt_queue); | |
25145 | - kfree_skb(skb); | |
25146 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
25147 | input_queue_head_incr(sd); | |
25148 | } | |
25149 | } | |
25150 | @@ -4330,10 +4342,13 @@ static void flush_backlog(void *arg) | |
25151 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { | |
25152 | if (skb->dev == dev) { | |
25153 | __skb_unlink(skb, &sd->process_queue); | |
25154 | - kfree_skb(skb); | |
25155 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
25156 | input_queue_head_incr(sd); | |
25157 | } | |
25158 | } | |
25159 | + | |
25160 | + if (!skb_queue_empty(&sd->tofree_queue)) | |
25161 | + raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
25162 | } | |
25163 | ||
25164 | static int napi_gro_complete(struct sk_buff *skb) | |
25165 | @@ -4795,6 +4810,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) | |
25166 | sd->rps_ipi_list = NULL; | |
25167 | ||
25168 | local_irq_enable(); | |
25169 | + preempt_check_resched_rt(); | |
25170 | ||
25171 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | |
25172 | while (remsd) { | |
25173 | @@ -4808,6 +4824,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) | |
25174 | } else | |
25175 | #endif | |
25176 | local_irq_enable(); | |
25177 | + preempt_check_resched_rt(); | |
25178 | } | |
25179 | ||
25180 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) | |
25181 | @@ -4889,6 +4906,7 @@ void __napi_schedule(struct napi_struct *n) | |
25182 | local_irq_save(flags); | |
25183 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
25184 | local_irq_restore(flags); | |
25185 | + preempt_check_resched_rt(); | |
25186 | } | |
25187 | EXPORT_SYMBOL(__napi_schedule); | |
25188 | ||
25189 | @@ -5229,7 +5247,7 @@ static void net_rx_action(struct softirq_action *h) | |
25190 | list_splice_tail(&repoll, &list); | |
25191 | list_splice(&list, &sd->poll_list); | |
25192 | if (!list_empty(&sd->poll_list)) | |
25193 | - __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
25194 | + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); | |
25195 | ||
25196 | net_rps_action_and_irq_enable(sd); | |
25197 | } | |
25198 | @@ -7736,7 +7754,7 @@ EXPORT_SYMBOL(free_netdev); | |
25199 | void synchronize_net(void) | |
25200 | { | |
25201 | might_sleep(); | |
25202 | - if (rtnl_is_locked()) | |
25203 | + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) | |
25204 | synchronize_rcu_expedited(); | |
25205 | else | |
25206 | synchronize_rcu(); | |
25207 | @@ -7977,16 +7995,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |
25208 | ||
25209 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
25210 | local_irq_enable(); | |
25211 | + preempt_check_resched_rt(); | |
25212 | ||
25213 | /* Process offline CPU's input_pkt_queue */ | |
25214 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { | |
25215 | netif_rx_ni(skb); | |
25216 | input_queue_head_incr(oldsd); | |
25217 | } | |
25218 | - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { | |
25219 | + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | |
25220 | netif_rx_ni(skb); | |
25221 | input_queue_head_incr(oldsd); | |
25222 | } | |
25223 | + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { | |
25224 | + kfree_skb(skb); | |
25225 | + } | |
25226 | ||
25227 | return NOTIFY_OK; | |
25228 | } | |
25229 | @@ -8288,8 +8310,9 @@ static int __init net_dev_init(void) | |
25230 | for_each_possible_cpu(i) { | |
25231 | struct softnet_data *sd = &per_cpu(softnet_data, i); | |
25232 | ||
25233 | - skb_queue_head_init(&sd->input_pkt_queue); | |
25234 | - skb_queue_head_init(&sd->process_queue); | |
25235 | + skb_queue_head_init_raw(&sd->input_pkt_queue); | |
25236 | + skb_queue_head_init_raw(&sd->process_queue); | |
25237 | + skb_queue_head_init_raw(&sd->tofree_queue); | |
25238 | INIT_LIST_HEAD(&sd->poll_list); | |
25239 | sd->output_queue_tailp = &sd->output_queue; | |
25240 | #ifdef CONFIG_RPS | |
25241 | diff --git a/net/core/filter.c b/net/core/filter.c | |
25242 | index cb06aceb512a..3585a8982287 100644 | |
25243 | --- a/net/core/filter.c | |
25244 | +++ b/net/core/filter.c | |
25245 | @@ -1592,7 +1592,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) | |
25246 | { | |
25247 | int ret; | |
25248 | ||
25249 | - if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { | |
25250 | + if (unlikely(xmit_rec_read() > XMIT_RECURSION_LIMIT)) { | |
25251 | net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); | |
25252 | kfree_skb(skb); | |
25253 | return -ENETDOWN; | |
25254 | @@ -1600,9 +1600,9 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) | |
25255 | ||
25256 | skb->dev = dev; | |
25257 | ||
25258 | - __this_cpu_inc(xmit_recursion); | |
25259 | + xmit_rec_inc(); | |
25260 | ret = dev_queue_xmit(skb); | |
25261 | - __this_cpu_dec(xmit_recursion); | |
25262 | + xmit_rec_dec(); | |
25263 | ||
25264 | return ret; | |
25265 | } | |
25266 | diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c | |
25267 | index cad8e791f28e..2a9364fe62a5 100644 | |
25268 | --- a/net/core/gen_estimator.c | |
25269 | +++ b/net/core/gen_estimator.c | |
25270 | @@ -84,7 +84,7 @@ struct gen_estimator | |
25271 | struct gnet_stats_basic_packed *bstats; | |
25272 | struct gnet_stats_rate_est64 *rate_est; | |
25273 | spinlock_t *stats_lock; | |
25274 | - seqcount_t *running; | |
25275 | + net_seqlock_t *running; | |
25276 | int ewma_log; | |
25277 | u32 last_packets; | |
25278 | unsigned long avpps; | |
25279 | @@ -213,7 +213,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, | |
25280 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
25281 | struct gnet_stats_rate_est64 *rate_est, | |
25282 | spinlock_t *stats_lock, | |
25283 | - seqcount_t *running, | |
25284 | + net_seqlock_t *running, | |
25285 | struct nlattr *opt) | |
25286 | { | |
25287 | struct gen_estimator *est; | |
25288 | @@ -309,7 +309,7 @@ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, | |
25289 | struct gnet_stats_basic_cpu __percpu *cpu_bstats, | |
25290 | struct gnet_stats_rate_est64 *rate_est, | |
25291 | spinlock_t *stats_lock, | |
25292 | - seqcount_t *running, struct nlattr *opt) | |
25293 | + net_seqlock_t *running, struct nlattr *opt) | |
25294 | { | |
25295 | gen_kill_estimator(bstats, rate_est); | |
25296 | return gen_new_estimator(bstats, cpu_bstats, rate_est, stats_lock, running, opt); | |
25297 | diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c | |
25298 | index 508e051304fb..bc3b17b78c94 100644 | |
25299 | --- a/net/core/gen_stats.c | |
25300 | +++ b/net/core/gen_stats.c | |
25301 | @@ -130,7 +130,7 @@ __gnet_stats_copy_basic_cpu(struct gnet_stats_basic_packed *bstats, | |
25302 | } | |
25303 | ||
25304 | void | |
25305 | -__gnet_stats_copy_basic(const seqcount_t *running, | |
25306 | +__gnet_stats_copy_basic(net_seqlock_t *running, | |
25307 | struct gnet_stats_basic_packed *bstats, | |
25308 | struct gnet_stats_basic_cpu __percpu *cpu, | |
25309 | struct gnet_stats_basic_packed *b) | |
25310 | @@ -143,10 +143,10 @@ __gnet_stats_copy_basic(const seqcount_t *running, | |
25311 | } | |
25312 | do { | |
25313 | if (running) | |
25314 | - seq = read_seqcount_begin(running); | |
25315 | + seq = net_seq_begin(running); | |
25316 | bstats->bytes = b->bytes; | |
25317 | bstats->packets = b->packets; | |
25318 | - } while (running && read_seqcount_retry(running, seq)); | |
25319 | + } while (running && net_seq_retry(running, seq)); | |
25320 | } | |
25321 | EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
25322 | ||
25323 | @@ -164,7 +164,7 @@ EXPORT_SYMBOL(__gnet_stats_copy_basic); | |
25324 | * if the room in the socket buffer was not sufficient. | |
25325 | */ | |
25326 | int | |
25327 | -gnet_stats_copy_basic(const seqcount_t *running, | |
25328 | +gnet_stats_copy_basic(net_seqlock_t *running, | |
25329 | struct gnet_dump *d, | |
25330 | struct gnet_stats_basic_cpu __percpu *cpu, | |
25331 | struct gnet_stats_basic_packed *b) | |
25332 | diff --git a/net/core/skbuff.c b/net/core/skbuff.c | |
25333 | index 3864b4b68fa1..55c73ade9faa 100644 | |
25334 | --- a/net/core/skbuff.c | |
25335 | +++ b/net/core/skbuff.c | |
25336 | @@ -64,6 +64,7 @@ | |
25337 | #include <linux/errqueue.h> | |
25338 | #include <linux/prefetch.h> | |
25339 | #include <linux/if_vlan.h> | |
25340 | +#include <linux/locallock.h> | |
25341 | ||
25342 | #include <net/protocol.h> | |
25343 | #include <net/dst.h> | |
25344 | @@ -360,6 +361,8 @@ struct napi_alloc_cache { | |
25345 | ||
25346 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); | |
25347 | static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); | |
25348 | +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); | |
25349 | +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); | |
25350 | ||
25351 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
25352 | { | |
25353 | @@ -367,10 +370,10 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
25354 | unsigned long flags; | |
25355 | void *data; | |
25356 | ||
25357 | - local_irq_save(flags); | |
25358 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
25359 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
25360 | data = __alloc_page_frag(nc, fragsz, gfp_mask); | |
25361 | - local_irq_restore(flags); | |
25362 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
25363 | return data; | |
25364 | } | |
25365 | ||
25366 | @@ -389,9 +392,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |
25367 | ||
25368 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
25369 | { | |
25370 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
25371 | + struct napi_alloc_cache *nc; | |
25372 | + void *data; | |
25373 | ||
25374 | - return __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
25375 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25376 | + data = __alloc_page_frag(&nc->page, fragsz, gfp_mask); | |
25377 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25378 | + return data; | |
25379 | } | |
25380 | ||
25381 | void *napi_alloc_frag(unsigned int fragsz) | |
25382 | @@ -438,13 +445,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
25383 | if (sk_memalloc_socks()) | |
25384 | gfp_mask |= __GFP_MEMALLOC; | |
25385 | ||
25386 | - local_irq_save(flags); | |
25387 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
25388 | ||
25389 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
25390 | data = __alloc_page_frag(nc, len, gfp_mask); | |
25391 | pfmemalloc = nc->pfmemalloc; | |
25392 | ||
25393 | - local_irq_restore(flags); | |
25394 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
25395 | ||
25396 | if (unlikely(!data)) | |
25397 | return NULL; | |
25398 | @@ -485,9 +492,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); | |
25399 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
25400 | gfp_t gfp_mask) | |
25401 | { | |
25402 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
25403 | + struct napi_alloc_cache *nc; | |
25404 | struct sk_buff *skb; | |
25405 | void *data; | |
25406 | + bool pfmemalloc; | |
25407 | ||
25408 | len += NET_SKB_PAD + NET_IP_ALIGN; | |
25409 | ||
25410 | @@ -505,7 +513,10 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
25411 | if (sk_memalloc_socks()) | |
25412 | gfp_mask |= __GFP_MEMALLOC; | |
25413 | ||
25414 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25415 | data = __alloc_page_frag(&nc->page, len, gfp_mask); | |
25416 | + pfmemalloc = nc->page.pfmemalloc; | |
25417 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25418 | if (unlikely(!data)) | |
25419 | return NULL; | |
25420 | ||
25421 | @@ -516,7 +527,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
25422 | } | |
25423 | ||
25424 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
25425 | - if (nc->page.pfmemalloc) | |
25426 | + if (pfmemalloc) | |
25427 | skb->pfmemalloc = 1; | |
25428 | skb->head_frag = 1; | |
25429 | ||
25430 | @@ -760,23 +771,26 @@ EXPORT_SYMBOL(consume_skb); | |
25431 | ||
25432 | void __kfree_skb_flush(void) | |
25433 | { | |
25434 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
25435 | + struct napi_alloc_cache *nc; | |
25436 | ||
25437 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25438 | /* flush skb_cache if containing objects */ | |
25439 | if (nc->skb_count) { | |
25440 | kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count, | |
25441 | nc->skb_cache); | |
25442 | nc->skb_count = 0; | |
25443 | } | |
25444 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25445 | } | |
25446 | ||
25447 | static inline void _kfree_skb_defer(struct sk_buff *skb) | |
25448 | { | |
25449 | - struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
25450 | + struct napi_alloc_cache *nc; | |
25451 | ||
25452 | /* drop skb->head and call any destructors for packet */ | |
25453 | skb_release_all(skb); | |
25454 | ||
25455 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25456 | /* record skb to CPU local list */ | |
25457 | nc->skb_cache[nc->skb_count++] = skb; | |
25458 | ||
25459 | @@ -791,6 +805,7 @@ static inline void _kfree_skb_defer(struct sk_buff *skb) | |
25460 | nc->skb_cache); | |
25461 | nc->skb_count = 0; | |
25462 | } | |
25463 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
25464 | } | |
25465 | void __kfree_skb_defer(struct sk_buff *skb) | |
25466 | { | |
25467 | diff --git a/net/core/sock.c b/net/core/sock.c | |
25468 | index fd7b41edf1ce..e425d259a9f0 100644 | |
25469 | --- a/net/core/sock.c | |
25470 | +++ b/net/core/sock.c | |
25471 | @@ -2508,12 +2508,11 @@ void lock_sock_nested(struct sock *sk, int subclass) | |
25472 | if (sk->sk_lock.owned) | |
25473 | __lock_sock(sk); | |
25474 | sk->sk_lock.owned = 1; | |
25475 | - spin_unlock(&sk->sk_lock.slock); | |
25476 | + spin_unlock_bh(&sk->sk_lock.slock); | |
25477 | /* | |
25478 | * The sk_lock has mutex_lock() semantics here: | |
25479 | */ | |
25480 | mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); | |
25481 | - local_bh_enable(); | |
25482 | } | |
25483 | EXPORT_SYMBOL(lock_sock_nested); | |
25484 | ||
25485 | diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c | |
25486 | index 38abe70e595f..443259a04862 100644 | |
25487 | --- a/net/ipv4/icmp.c | |
25488 | +++ b/net/ipv4/icmp.c | |
25489 | @@ -69,6 +69,7 @@ | |
25490 | #include <linux/jiffies.h> | |
25491 | #include <linux/kernel.h> | |
25492 | #include <linux/fcntl.h> | |
25493 | +#include <linux/sysrq.h> | |
25494 | #include <linux/socket.h> | |
25495 | #include <linux/in.h> | |
25496 | #include <linux/inet.h> | |
25497 | @@ -77,6 +78,7 @@ | |
25498 | #include <linux/string.h> | |
25499 | #include <linux/netfilter_ipv4.h> | |
25500 | #include <linux/slab.h> | |
25501 | +#include <linux/locallock.h> | |
25502 | #include <net/snmp.h> | |
25503 | #include <net/ip.h> | |
25504 | #include <net/route.h> | |
25505 | @@ -204,6 +206,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; | |
25506 | * | |
25507 | * On SMP we have one ICMP socket per-cpu. | |
25508 | */ | |
25509 | +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock); | |
25510 | + | |
25511 | static struct sock *icmp_sk(struct net *net) | |
25512 | { | |
25513 | return *this_cpu_ptr(net->ipv4.icmp_sk); | |
25514 | @@ -215,12 +219,14 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
25515 | ||
25516 | local_bh_disable(); | |
25517 | ||
25518 | + local_lock(icmp_sk_lock); | |
25519 | sk = icmp_sk(net); | |
25520 | ||
25521 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { | |
25522 | /* This can happen if the output path signals a | |
25523 | * dst_link_failure() for an outgoing ICMP packet. | |
25524 | */ | |
25525 | + local_unlock(icmp_sk_lock); | |
25526 | local_bh_enable(); | |
25527 | return NULL; | |
25528 | } | |
25529 | @@ -230,6 +236,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
25530 | static inline void icmp_xmit_unlock(struct sock *sk) | |
25531 | { | |
25532 | spin_unlock_bh(&sk->sk_lock.slock); | |
25533 | + local_unlock(icmp_sk_lock); | |
25534 | } | |
25535 | ||
25536 | int sysctl_icmp_msgs_per_sec __read_mostly = 1000; | |
25537 | @@ -358,6 +365,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
25538 | struct sock *sk; | |
25539 | struct sk_buff *skb; | |
25540 | ||
25541 | + local_lock(icmp_sk_lock); | |
25542 | sk = icmp_sk(dev_net((*rt)->dst.dev)); | |
25543 | if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, | |
25544 | icmp_param->data_len+icmp_param->head_len, | |
25545 | @@ -380,6 +388,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
25546 | skb->ip_summed = CHECKSUM_NONE; | |
25547 | ip_push_pending_frames(sk, fl4); | |
25548 | } | |
25549 | + local_unlock(icmp_sk_lock); | |
25550 | } | |
25551 | ||
25552 | /* | |
25553 | @@ -891,6 +900,30 @@ static bool icmp_redirect(struct sk_buff *skb) | |
25554 | } | |
25555 | ||
25556 | /* | |
25557 | + * 32bit and 64bit have different timestamp length, so we check for | |
25558 | + * the cookie at offset 20 and verify it is repeated at offset 50 | |
25559 | + */ | |
25560 | +#define CO_POS0 20 | |
25561 | +#define CO_POS1 50 | |
25562 | +#define CO_SIZE sizeof(int) | |
25563 | +#define ICMP_SYSRQ_SIZE 57 | |
25564 | + | |
25565 | +/* | |
25566 | + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie | |
25567 | + * pattern and if it matches send the next byte as a trigger to sysrq. | |
25568 | + */ | |
25569 | +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb) | |
25570 | +{ | |
25571 | + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq); | |
25572 | + char *p = skb->data; | |
25573 | + | |
25574 | + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) && | |
25575 | + !memcmp(&cookie, p + CO_POS1, CO_SIZE) && | |
25576 | + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE]) | |
25577 | + handle_sysrq(p[CO_POS0 + CO_SIZE]); | |
25578 | +} | |
25579 | + | |
25580 | +/* | |
25581 | * Handle ICMP_ECHO ("ping") requests. | |
25582 | * | |
25583 | * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo | |
25584 | @@ -917,6 +950,11 @@ static bool icmp_echo(struct sk_buff *skb) | |
25585 | icmp_param.data_len = skb->len; | |
25586 | icmp_param.head_len = sizeof(struct icmphdr); | |
25587 | icmp_reply(&icmp_param, skb); | |
25588 | + | |
25589 | + if (skb->len == ICMP_SYSRQ_SIZE && | |
25590 | + net->ipv4.sysctl_icmp_echo_sysrq) { | |
25591 | + icmp_check_sysrq(net, skb); | |
25592 | + } | |
25593 | } | |
25594 | /* should there be an ICMP stat for ignored echos? */ | |
25595 | return true; | |
25596 | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c | |
25597 | index 1cb67de106fe..332a485323f0 100644 | |
25598 | --- a/net/ipv4/sysctl_net_ipv4.c | |
25599 | +++ b/net/ipv4/sysctl_net_ipv4.c | |
25600 | @@ -681,6 +681,13 @@ static struct ctl_table ipv4_net_table[] = { | |
25601 | .proc_handler = proc_dointvec | |
25602 | }, | |
25603 | { | |
25604 | + .procname = "icmp_echo_sysrq", | |
25605 | + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq, | |
25606 | + .maxlen = sizeof(int), | |
25607 | + .mode = 0644, | |
25608 | + .proc_handler = proc_dointvec | |
25609 | + }, | |
25610 | + { | |
25611 | .procname = "icmp_ignore_bogus_error_responses", | |
25612 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | |
25613 | .maxlen = sizeof(int), | |
25614 | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | |
25615 | index 7158d4f8dae4..0dc007fc6704 100644 | |
25616 | --- a/net/ipv4/tcp_ipv4.c | |
25617 | +++ b/net/ipv4/tcp_ipv4.c | |
25618 | @@ -62,6 +62,7 @@ | |
25619 | #include <linux/init.h> | |
25620 | #include <linux/times.h> | |
25621 | #include <linux/slab.h> | |
25622 | +#include <linux/locallock.h> | |
25623 | ||
25624 | #include <net/net_namespace.h> | |
25625 | #include <net/icmp.h> | |
25626 | @@ -565,6 +566,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) | |
25627 | } | |
25628 | EXPORT_SYMBOL(tcp_v4_send_check); | |
25629 | ||
25630 | +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock); | |
25631 | /* | |
25632 | * This routine will send an RST to the other tcp. | |
25633 | * | |
25634 | @@ -692,6 +694,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) | |
25635 | offsetof(struct inet_timewait_sock, tw_bound_dev_if)); | |
25636 | ||
25637 | arg.tos = ip_hdr(skb)->tos; | |
25638 | + | |
25639 | + local_lock(tcp_sk_lock); | |
25640 | local_bh_disable(); | |
25641 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
25642 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
25643 | @@ -701,6 +705,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) | |
25644 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | |
25645 | __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); | |
25646 | local_bh_enable(); | |
25647 | + local_unlock(tcp_sk_lock); | |
25648 | ||
25649 | #ifdef CONFIG_TCP_MD5SIG | |
25650 | out: | |
25651 | @@ -776,6 +781,7 @@ static void tcp_v4_send_ack(struct net *net, | |
25652 | if (oif) | |
25653 | arg.bound_dev_if = oif; | |
25654 | arg.tos = tos; | |
25655 | + local_lock(tcp_sk_lock); | |
25656 | local_bh_disable(); | |
25657 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
25658 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
25659 | @@ -784,6 +790,7 @@ static void tcp_v4_send_ack(struct net *net, | |
25660 | ||
25661 | __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); | |
25662 | local_bh_enable(); | |
25663 | + local_unlock(tcp_sk_lock); | |
25664 | } | |
25665 | ||
25666 | static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) | |
25667 | diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c | |
25668 | index 9dce3b157908..525efa5309ac 100644 | |
25669 | --- a/net/mac80211/rx.c | |
25670 | +++ b/net/mac80211/rx.c | |
25671 | @@ -4064,7 +4064,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, | |
25672 | struct ieee80211_supported_band *sband; | |
25673 | struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); | |
25674 | ||
25675 | - WARN_ON_ONCE(softirq_count() == 0); | |
25676 | + WARN_ON_ONCE_NONRT(softirq_count() == 0); | |
25677 | ||
25678 | if (WARN_ON(status->band >= NUM_NL80211_BANDS)) | |
25679 | goto drop; | |
25680 | diff --git a/net/netfilter/core.c b/net/netfilter/core.c | |
25681 | index f39276d1c2d7..10880c89d62f 100644 | |
25682 | --- a/net/netfilter/core.c | |
25683 | +++ b/net/netfilter/core.c | |
25684 | @@ -22,11 +22,17 @@ | |
25685 | #include <linux/proc_fs.h> | |
25686 | #include <linux/mutex.h> | |
25687 | #include <linux/slab.h> | |
25688 | +#include <linux/locallock.h> | |
25689 | #include <net/net_namespace.h> | |
25690 | #include <net/sock.h> | |
25691 | ||
25692 | #include "nf_internals.h" | |
25693 | ||
25694 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25695 | +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock); | |
25696 | +EXPORT_PER_CPU_SYMBOL(xt_write_lock); | |
25697 | +#endif | |
25698 | + | |
25699 | static DEFINE_MUTEX(afinfo_mutex); | |
25700 | ||
25701 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
25702 | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c | |
25703 | index 33a4697d5539..475cb74bf825 100644 | |
25704 | --- a/net/packet/af_packet.c | |
25705 | +++ b/net/packet/af_packet.c | |
25706 | @@ -63,6 +63,7 @@ | |
25707 | #include <linux/if_packet.h> | |
25708 | #include <linux/wireless.h> | |
25709 | #include <linux/kernel.h> | |
25710 | +#include <linux/delay.h> | |
25711 | #include <linux/kmod.h> | |
25712 | #include <linux/slab.h> | |
25713 | #include <linux/vmalloc.h> | |
25714 | @@ -695,7 +696,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) | |
25715 | if (BLOCK_NUM_PKTS(pbd)) { | |
25716 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
25717 | /* Waiting for skb_copy_bits to finish... */ | |
25718 | - cpu_relax(); | |
25719 | + cpu_chill(); | |
25720 | } | |
25721 | } | |
25722 | ||
25723 | @@ -957,7 +958,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, | |
25724 | if (!(status & TP_STATUS_BLK_TMO)) { | |
25725 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
25726 | /* Waiting for skb_copy_bits to finish... */ | |
25727 | - cpu_relax(); | |
25728 | + cpu_chill(); | |
25729 | } | |
25730 | } | |
25731 | prb_close_block(pkc, pbd, po, status); | |
25732 | diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c | |
25733 | index 977f69886c00..f3e7a36b0396 100644 | |
25734 | --- a/net/rds/ib_rdma.c | |
25735 | +++ b/net/rds/ib_rdma.c | |
25736 | @@ -34,6 +34,7 @@ | |
25737 | #include <linux/slab.h> | |
25738 | #include <linux/rculist.h> | |
25739 | #include <linux/llist.h> | |
25740 | +#include <linux/delay.h> | |
25741 | ||
25742 | #include "rds_single_path.h" | |
25743 | #include "ib_mr.h" | |
25744 | @@ -210,7 +211,7 @@ static inline void wait_clean_list_grace(void) | |
25745 | for_each_online_cpu(cpu) { | |
25746 | flag = &per_cpu(clean_list_grace, cpu); | |
25747 | while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) | |
25748 | - cpu_relax(); | |
25749 | + cpu_chill(); | |
25750 | } | |
25751 | } | |
25752 | ||
25753 | diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c | |
25754 | index 814d285ff802..d4d088e9be85 100644 | |
25755 | --- a/net/rxrpc/security.c | |
25756 | +++ b/net/rxrpc/security.c | |
25757 | @@ -19,9 +19,6 @@ | |
25758 | #include <keys/rxrpc-type.h> | |
25759 | #include "ar-internal.h" | |
25760 | ||
25761 | -static LIST_HEAD(rxrpc_security_methods); | |
25762 | -static DECLARE_RWSEM(rxrpc_security_sem); | |
25763 | - | |
25764 | static const struct rxrpc_security *rxrpc_security_types[] = { | |
25765 | [RXRPC_SECURITY_NONE] = &rxrpc_no_security, | |
25766 | #ifdef CONFIG_RXKAD | |
25767 | diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c | |
25768 | index 12ebde845523..99f3ce50c6c4 100644 | |
25769 | --- a/net/sched/sch_api.c | |
25770 | +++ b/net/sched/sch_api.c | |
25771 | @@ -975,7 +975,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |
25772 | rcu_assign_pointer(sch->stab, stab); | |
25773 | } | |
25774 | if (tca[TCA_RATE]) { | |
25775 | - seqcount_t *running; | |
25776 | + net_seqlock_t *running; | |
25777 | ||
25778 | err = -EOPNOTSUPP; | |
25779 | if (sch->flags & TCQ_F_MQROOT) | |
25780 | diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c | |
25781 | index 657c13362b19..cbab8d4d5864 100644 | |
25782 | --- a/net/sched/sch_generic.c | |
25783 | +++ b/net/sched/sch_generic.c | |
25784 | @@ -426,7 +426,11 @@ struct Qdisc noop_qdisc = { | |
25785 | .list = LIST_HEAD_INIT(noop_qdisc.list), | |
25786 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), | |
25787 | .dev_queue = &noop_netdev_queue, | |
25788 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25789 | + .running = __SEQLOCK_UNLOCKED(noop_qdisc.running), | |
25790 | +#else | |
25791 | .running = SEQCNT_ZERO(noop_qdisc.running), | |
25792 | +#endif | |
25793 | .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), | |
25794 | }; | |
25795 | EXPORT_SYMBOL(noop_qdisc); | |
25796 | @@ -620,9 +624,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | |
25797 | lockdep_set_class(&sch->busylock, | |
25798 | dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); | |
25799 | ||
25800 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25801 | + seqlock_init(&sch->running); | |
25802 | + lockdep_set_class(&sch->running.seqcount, | |
25803 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
25804 | + lockdep_set_class(&sch->running.lock, | |
25805 | + dev->qdisc_running_key ?: &qdisc_running_key); | |
25806 | +#else | |
25807 | seqcount_init(&sch->running); | |
25808 | lockdep_set_class(&sch->running, | |
25809 | dev->qdisc_running_key ?: &qdisc_running_key); | |
25810 | +#endif | |
25811 | ||
25812 | sch->ops = ops; | |
25813 | sch->enqueue = ops->enqueue; | |
25814 | @@ -917,7 +929,7 @@ void dev_deactivate_many(struct list_head *head) | |
25815 | /* Wait for outstanding qdisc_run calls. */ | |
25816 | list_for_each_entry(dev, head, close_list) | |
25817 | while (some_qdisc_is_busy(dev)) | |
25818 | - yield(); | |
25819 | + msleep(1); | |
25820 | } | |
25821 | ||
25822 | void dev_deactivate(struct net_device *dev) | |
25823 | diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c | |
25824 | index c3f652395a80..2dd84493528e 100644 | |
25825 | --- a/net/sunrpc/svc_xprt.c | |
25826 | +++ b/net/sunrpc/svc_xprt.c | |
25827 | @@ -396,7 +396,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
25828 | goto out; | |
25829 | } | |
25830 | ||
25831 | - cpu = get_cpu(); | |
25832 | + cpu = get_cpu_light(); | |
25833 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | |
25834 | ||
25835 | atomic_long_inc(&pool->sp_stats.packets); | |
25836 | @@ -432,7 +432,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
25837 | ||
25838 | atomic_long_inc(&pool->sp_stats.threads_woken); | |
25839 | wake_up_process(rqstp->rq_task); | |
25840 | - put_cpu(); | |
25841 | + put_cpu_light(); | |
25842 | goto out; | |
25843 | } | |
25844 | rcu_read_unlock(); | |
25845 | @@ -453,7 +453,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
25846 | goto redo_search; | |
25847 | } | |
25848 | rqstp = NULL; | |
25849 | - put_cpu(); | |
25850 | + put_cpu_light(); | |
25851 | out: | |
25852 | trace_svc_xprt_do_enqueue(xprt, rqstp); | |
25853 | } | |
25854 | diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h | |
25855 | index 6fdc97ef6023..523e0420d7f0 100755 | |
25856 | --- a/scripts/mkcompile_h | |
25857 | +++ b/scripts/mkcompile_h | |
25858 | @@ -4,7 +4,8 @@ TARGET=$1 | |
25859 | ARCH=$2 | |
25860 | SMP=$3 | |
25861 | PREEMPT=$4 | |
25862 | -CC=$5 | |
25863 | +RT=$5 | |
25864 | +CC=$6 | |
25865 | ||
25866 | vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } | |
25867 | ||
25868 | @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION" | |
25869 | CONFIG_FLAGS="" | |
25870 | if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi | |
25871 | if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi | |
25872 | +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi | |
25873 | UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" | |
25874 | ||
25875 | # Truncate to maximum length | |
25876 | diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c | |
25877 | index c61fd50f771f..1583de410f62 100644 | |
25878 | --- a/sound/core/pcm_native.c | |
25879 | +++ b/sound/core/pcm_native.c | |
25880 | @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); | |
25881 | void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) | |
25882 | { | |
25883 | if (!substream->pcm->nonatomic) | |
25884 | - local_irq_disable(); | |
25885 | + local_irq_disable_nort(); | |
25886 | snd_pcm_stream_lock(substream); | |
25887 | } | |
25888 | EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); | |
25889 | @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream) | |
25890 | { | |
25891 | snd_pcm_stream_unlock(substream); | |
25892 | if (!substream->pcm->nonatomic) | |
25893 | - local_irq_enable(); | |
25894 | + local_irq_enable_nort(); | |
25895 | } | |
25896 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); | |
25897 | ||
25898 | @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) | |
25899 | { | |
25900 | unsigned long flags = 0; | |
25901 | if (!substream->pcm->nonatomic) | |
25902 | - local_irq_save(flags); | |
25903 | + local_irq_save_nort(flags); | |
25904 | snd_pcm_stream_lock(substream); | |
25905 | return flags; | |
25906 | } | |
25907 | @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, | |
25908 | { | |
25909 | snd_pcm_stream_unlock(substream); | |
25910 | if (!substream->pcm->nonatomic) | |
25911 | - local_irq_restore(flags); | |
25912 | + local_irq_restore_nort(flags); | |
25913 | } | |
25914 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); | |
25915 |