]>
Commit | Line | Data |
---|---|---|
b4de310e JK |
1 | diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt |
2 | new file mode 100644 | |
3 | index 000000000000..cb61516483d3 | |
4 | --- /dev/null | |
5 | +++ b/Documentation/hwlat_detector.txt | |
6 | @@ -0,0 +1,64 @@ | |
7 | +Introduction: | |
8 | +------------- | |
9 | + | |
10 | +The module hwlat_detector is a special purpose kernel module that is used to | |
11 | +detect large system latencies induced by the behavior of certain underlying | |
12 | +hardware or firmware, independent of Linux itself. The code was developed | |
13 | +originally to detect SMIs (System Management Interrupts) on x86 systems, | |
14 | +however there is nothing x86 specific about this patchset. It was | |
15 | +originally written for use by the "RT" patch since the Real Time | |
16 | +kernel is highly latency sensitive. | |
17 | + | |
18 | +SMIs are usually not serviced by the Linux kernel, which typically does not | |
19 | +even know that they are occuring. SMIs are instead are set up by BIOS code | |
20 | +and are serviced by BIOS code, usually for "critical" events such as | |
21 | +management of thermal sensors and fans. Sometimes though, SMIs are used for | |
22 | +other tasks and those tasks can spend an inordinate amount of time in the | |
23 | +handler (sometimes measured in milliseconds). Obviously this is a problem if | |
24 | +you are trying to keep event service latencies down in the microsecond range. | |
25 | + | |
26 | +The hardware latency detector works by hogging all of the cpus for configurable | |
27 | +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter | |
28 | +for some period, then looking for gaps in the TSC data. Any gap indicates a | |
29 | +time when the polling was interrupted and since the machine is stopped and | |
30 | +interrupts turned off the only thing that could do that would be an SMI. | |
31 | + | |
32 | +Note that the SMI detector should *NEVER* be used in a production environment. | |
33 | +It is intended to be run manually to determine if the hardware platform has a | |
34 | +problem with long system firmware service routines. | |
35 | + | |
36 | +Usage: | |
37 | +------ | |
38 | + | |
39 | +Loading the module hwlat_detector passing the parameter "enabled=1" (or by | |
40 | +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only | |
41 | +step required to start the hwlat_detector. It is possible to redefine the | |
42 | +threshold in microseconds (us) above which latency spikes will be taken | |
43 | +into account (parameter "threshold="). | |
44 | + | |
45 | +Example: | |
46 | + | |
47 | + # modprobe hwlat_detector enabled=1 threshold=100 | |
48 | + | |
49 | +After the module is loaded, it creates a directory named "hwlat_detector" under | |
50 | +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary | |
51 | +to have debugfs mounted, which might be on /sys/debug on your system. | |
52 | + | |
53 | +The /debug/hwlat_detector interface contains the following files: | |
54 | + | |
55 | +count - number of latency spikes observed since last reset | |
56 | +enable - a global enable/disable toggle (0/1), resets count | |
57 | +max - maximum hardware latency actually observed (usecs) | |
58 | +sample - a pipe from which to read current raw sample data | |
59 | + in the format <timestamp> <latency observed usecs> | |
60 | + (can be opened O_NONBLOCK for a single sample) | |
61 | +threshold - minimum latency value to be considered (usecs) | |
62 | +width - time period to sample with CPUs held (usecs) | |
63 | + must be less than the total window size (enforced) | |
64 | +window - total period of sampling, width being inside (usecs) | |
65 | + | |
66 | +By default we will set width to 500,000 and window to 1,000,000, meaning that | |
67 | +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we | |
68 | +observe any latencies that exceed the threshold (initially 100 usecs), | |
69 | +then we write to a global sample ring buffer of 8K samples, which is | |
70 | +consumed by reading from the "sample" (pipe) debugfs file interface. | |
71 | diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt | |
cb95d48a | 72 | index c360f80c3473..5489dea355a2 100644 |
b4de310e JK |
73 | --- a/Documentation/kernel-parameters.txt |
74 | +++ b/Documentation/kernel-parameters.txt | |
cb95d48a | 75 | @@ -1636,6 +1636,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. |
b4de310e JK |
76 | ip= [IP_PNP] |
77 | See Documentation/filesystems/nfs/nfsroot.txt. | |
78 | ||
79 | + irqaffinity= [SMP] Set the default irq affinity mask | |
80 | + Format: | |
81 | + <cpu number>,...,<cpu number> | |
82 | + or | |
83 | + <cpu number>-<cpu number> | |
84 | + (must be a positive range in ascending order) | |
85 | + or a mixture | |
86 | + <cpu number>,...,<cpu number>-<cpu number> | |
87 | + | |
88 | irqfixup [HW] | |
89 | When an interrupt is not handled search all handlers | |
90 | for it. Intended to get systems with badly broken | |
91 | diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt | |
92 | index 13f5619b2203..f64d075ba647 100644 | |
93 | --- a/Documentation/sysrq.txt | |
94 | +++ b/Documentation/sysrq.txt | |
95 | @@ -59,10 +59,17 @@ On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>, | |
96 | On other - If you know of the key combos for other architectures, please | |
97 | let me know so I can add them to this section. | |
98 | ||
99 | -On all - write a character to /proc/sysrq-trigger. e.g.: | |
100 | - | |
101 | +On all - write a character to /proc/sysrq-trigger, e.g.: | |
102 | echo t > /proc/sysrq-trigger | |
103 | ||
104 | +On all - Enable network SysRq by writing a cookie to icmp_echo_sysrq, e.g. | |
105 | + echo 0x01020304 >/proc/sys/net/ipv4/icmp_echo_sysrq | |
106 | + Send an ICMP echo request with this pattern plus the particular | |
107 | + SysRq command key. Example: | |
108 | + # ping -c1 -s57 -p0102030468 | |
109 | + will trigger the SysRq-H (help) command. | |
110 | + | |
111 | + | |
112 | * What are the 'command' keys? | |
113 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
114 | 'b' - Will immediately reboot the system without syncing or unmounting | |
115 | diff --git a/Documentation/trace/histograms.txt b/Documentation/trace/histograms.txt | |
116 | new file mode 100644 | |
117 | index 000000000000..6f2aeabf7faa | |
118 | --- /dev/null | |
119 | +++ b/Documentation/trace/histograms.txt | |
120 | @@ -0,0 +1,186 @@ | |
121 | + Using the Linux Kernel Latency Histograms | |
122 | + | |
123 | + | |
124 | +This document gives a short explanation how to enable, configure and use | |
125 | +latency histograms. Latency histograms are primarily relevant in the | |
126 | +context of real-time enabled kernels (CONFIG_PREEMPT/CONFIG_PREEMPT_RT) | |
127 | +and are used in the quality management of the Linux real-time | |
128 | +capabilities. | |
129 | + | |
130 | + | |
131 | +* Purpose of latency histograms | |
132 | + | |
133 | +A latency histogram continuously accumulates the frequencies of latency | |
134 | +data. There are two types of histograms | |
135 | +- potential sources of latencies | |
136 | +- effective latencies | |
137 | + | |
138 | + | |
139 | +* Potential sources of latencies | |
140 | + | |
141 | +Potential sources of latencies are code segments where interrupts, | |
142 | +preemption or both are disabled (aka critical sections). To create | |
143 | +histograms of potential sources of latency, the kernel stores the time | |
144 | +stamp at the start of a critical section, determines the time elapsed | |
145 | +when the end of the section is reached, and increments the frequency | |
146 | +counter of that latency value - irrespective of whether any concurrently | |
147 | +running process is affected by latency or not. | |
148 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
149 | + CONFIG_INTERRUPT_OFF_LATENCY | |
150 | + CONFIG_PREEMPT_OFF_LATENCY | |
151 | + | |
152 | + | |
153 | +* Effective latencies | |
154 | + | |
155 | +Effective latencies are actually occuring during wakeup of a process. To | |
156 | +determine effective latencies, the kernel stores the time stamp when a | |
157 | +process is scheduled to be woken up, and determines the duration of the | |
158 | +wakeup time shortly before control is passed over to this process. Note | |
159 | +that the apparent latency in user space may be somewhat longer, since the | |
160 | +process may be interrupted after control is passed over to it but before | |
161 | +the execution in user space takes place. Simply measuring the interval | |
162 | +between enqueuing and wakeup may also not appropriate in cases when a | |
163 | +process is scheduled as a result of a timer expiration. The timer may have | |
164 | +missed its deadline, e.g. due to disabled interrupts, but this latency | |
165 | +would not be registered. Therefore, the offsets of missed timers are | |
166 | +recorded in a separate histogram. If both wakeup latency and missed timer | |
167 | +offsets are configured and enabled, a third histogram may be enabled that | |
168 | +records the overall latency as a sum of the timer latency, if any, and the | |
169 | +wakeup latency. This histogram is called "timerandwakeup". | |
170 | +- Configuration items (in the Kernel hacking/Tracers submenu) | |
171 | + CONFIG_WAKEUP_LATENCY | |
172 | + CONFIG_MISSED_TIMER_OFSETS | |
173 | + | |
174 | + | |
175 | +* Usage | |
176 | + | |
177 | +The interface to the administration of the latency histograms is located | |
178 | +in the debugfs file system. To mount it, either enter | |
179 | + | |
180 | +mount -t sysfs nodev /sys | |
181 | +mount -t debugfs nodev /sys/kernel/debug | |
182 | + | |
183 | +from shell command line level, or add | |
184 | + | |
185 | +nodev /sys sysfs defaults 0 0 | |
186 | +nodev /sys/kernel/debug debugfs defaults 0 0 | |
187 | + | |
188 | +to the file /etc/fstab. All latency histogram related files are then | |
189 | +available in the directory /sys/kernel/debug/tracing/latency_hist. A | |
190 | +particular histogram type is enabled by writing non-zero to the related | |
191 | +variable in the /sys/kernel/debug/tracing/latency_hist/enable directory. | |
192 | +Select "preemptirqsoff" for the histograms of potential sources of | |
193 | +latencies and "wakeup" for histograms of effective latencies etc. The | |
194 | +histogram data - one per CPU - are available in the files | |
195 | + | |
196 | +/sys/kernel/debug/tracing/latency_hist/preemptoff/CPUx | |
197 | +/sys/kernel/debug/tracing/latency_hist/irqsoff/CPUx | |
198 | +/sys/kernel/debug/tracing/latency_hist/preemptirqsoff/CPUx | |
199 | +/sys/kernel/debug/tracing/latency_hist/wakeup/CPUx | |
200 | +/sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio/CPUx | |
201 | +/sys/kernel/debug/tracing/latency_hist/missed_timer_offsets/CPUx | |
202 | +/sys/kernel/debug/tracing/latency_hist/timerandwakeup/CPUx | |
203 | + | |
204 | +The histograms are reset by writing non-zero to the file "reset" in a | |
205 | +particular latency directory. To reset all latency data, use | |
206 | + | |
207 | +#!/bin/sh | |
208 | + | |
209 | +TRACINGDIR=/sys/kernel/debug/tracing | |
210 | +HISTDIR=$TRACINGDIR/latency_hist | |
211 | + | |
212 | +if test -d $HISTDIR | |
213 | +then | |
214 | + cd $HISTDIR | |
215 | + for i in `find . | grep /reset$` | |
216 | + do | |
217 | + echo 1 >$i | |
218 | + done | |
219 | +fi | |
220 | + | |
221 | + | |
222 | +* Data format | |
223 | + | |
224 | +Latency data are stored with a resolution of one microsecond. The | |
225 | +maximum latency is 10,240 microseconds. The data are only valid, if the | |
226 | +overflow register is empty. Every output line contains the latency in | |
227 | +microseconds in the first row and the number of samples in the second | |
228 | +row. To display only lines with a positive latency count, use, for | |
229 | +example, | |
230 | + | |
231 | +grep -v " 0$" /sys/kernel/debug/tracing/latency_hist/preemptoff/CPU0 | |
232 | + | |
233 | +#Minimum latency: 0 microseconds. | |
234 | +#Average latency: 0 microseconds. | |
235 | +#Maximum latency: 25 microseconds. | |
236 | +#Total samples: 3104770694 | |
237 | +#There are 0 samples greater or equal than 10240 microseconds | |
238 | +#usecs samples | |
239 | + 0 2984486876 | |
240 | + 1 49843506 | |
241 | + 2 58219047 | |
242 | + 3 5348126 | |
243 | + 4 2187960 | |
244 | + 5 3388262 | |
245 | + 6 959289 | |
246 | + 7 208294 | |
247 | + 8 40420 | |
248 | + 9 4485 | |
249 | + 10 14918 | |
250 | + 11 18340 | |
251 | + 12 25052 | |
252 | + 13 19455 | |
253 | + 14 5602 | |
254 | + 15 969 | |
255 | + 16 47 | |
256 | + 17 18 | |
257 | + 18 14 | |
258 | + 19 1 | |
259 | + 20 3 | |
260 | + 21 2 | |
261 | + 22 5 | |
262 | + 23 2 | |
263 | + 25 1 | |
264 | + | |
265 | + | |
266 | +* Wakeup latency of a selected process | |
267 | + | |
268 | +To only collect wakeup latency data of a particular process, write the | |
269 | +PID of the requested process to | |
270 | + | |
271 | +/sys/kernel/debug/tracing/latency_hist/wakeup/pid | |
272 | + | |
273 | +PIDs are not considered, if this variable is set to 0. | |
274 | + | |
275 | + | |
276 | +* Details of the process with the highest wakeup latency so far | |
277 | + | |
278 | +Selected data of the process that suffered from the highest wakeup | |
279 | +latency that occurred in a particular CPU are available in the file | |
280 | + | |
281 | +/sys/kernel/debug/tracing/latency_hist/wakeup/max_latency-CPUx. | |
282 | + | |
283 | +In addition, other relevant system data at the time when the | |
284 | +latency occurred are given. | |
285 | + | |
286 | +The format of the data is (all in one line): | |
287 | +<PID> <Priority> <Latency> (<Timeroffset>) <Command> \ | |
288 | +<- <PID> <Priority> <Command> <Timestamp> | |
289 | + | |
290 | +The value of <Timeroffset> is only relevant in the combined timer | |
291 | +and wakeup latency recording. In the wakeup recording, it is | |
292 | +always 0, in the missed_timer_offsets recording, it is the same | |
293 | +as <Latency>. | |
294 | + | |
295 | +When retrospectively searching for the origin of a latency and | |
296 | +tracing was not enabled, it may be helpful to know the name and | |
297 | +some basic data of the task that (finally) was switching to the | |
298 | +late real-tlme task. In addition to the victim's data, also the | |
299 | +data of the possible culprit are therefore displayed after the | |
300 | +"<-" symbol. | |
301 | + | |
302 | +Finally, the timestamp of the time when the latency occurred | |
303 | +in <seconds>.<microseconds> after the most recent system boot | |
304 | +is provided. | |
305 | + | |
306 | +These data are also reset when the wakeup histogram is reset. | |
307 | diff --git a/Makefile b/Makefile | |
cb95d48a | 308 | index 98239d56924c..5ed3edefebde 100644 |
b4de310e JK |
309 | --- a/Makefile |
310 | +++ b/Makefile | |
cb95d48a JK |
311 | @@ -394,7 +394,7 @@ KBUILD_CPPFLAGS := -D__KERNEL__ |
312 | KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ | |
313 | -fno-strict-aliasing -fno-common \ | |
314 | -Werror-implicit-function-declaration \ | |
315 | - -Wno-format-security \ | |
316 | + -Wno-format-security -fno-PIE \ | |
317 | -std=gnu89 | |
318 | ||
319 | KBUILD_AFLAGS_KERNEL := | |
b4de310e JK |
320 | diff --git a/arch/Kconfig b/arch/Kconfig |
321 | index 4e949e58b192..3b26d76933fb 100644 | |
322 | --- a/arch/Kconfig | |
323 | +++ b/arch/Kconfig | |
324 | @@ -9,6 +9,7 @@ config OPROFILE | |
325 | tristate "OProfile system profiling" | |
326 | depends on PROFILING | |
327 | depends on HAVE_OPROFILE | |
328 | + depends on !PREEMPT_RT_FULL | |
329 | select RING_BUFFER | |
330 | select RING_BUFFER_ALLOW_SWAP | |
331 | help | |
332 | @@ -52,6 +53,7 @@ config KPROBES | |
333 | config JUMP_LABEL | |
334 | bool "Optimize very unlikely/likely branches" | |
335 | depends on HAVE_ARCH_JUMP_LABEL | |
336 | + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) | |
337 | help | |
338 | This option enables a transparent branch optimization that | |
339 | makes certain almost-always-true or almost-always-false branch | |
340 | diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig | |
341 | index 34e1569a11ee..79c4603e9453 100644 | |
342 | --- a/arch/arm/Kconfig | |
343 | +++ b/arch/arm/Kconfig | |
344 | @@ -33,7 +33,7 @@ config ARM | |
345 | select HARDIRQS_SW_RESEND | |
346 | select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) | |
347 | select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 | |
348 | - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 | |
349 | + select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !PREEMPT_RT_BASE | |
350 | select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 | |
351 | select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) | |
352 | select HAVE_ARCH_TRACEHOOK | |
353 | @@ -68,6 +68,7 @@ config ARM | |
354 | select HAVE_PERF_EVENTS | |
355 | select HAVE_PERF_REGS | |
356 | select HAVE_PERF_USER_STACK_DUMP | |
357 | + select HAVE_PREEMPT_LAZY | |
358 | select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) | |
359 | select HAVE_REGS_AND_STACK_ACCESS_API | |
360 | select HAVE_SYSCALL_TRACEPOINTS | |
361 | diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h | |
362 | index 12ebfcc1d539..c962084605bc 100644 | |
363 | --- a/arch/arm/include/asm/switch_to.h | |
364 | +++ b/arch/arm/include/asm/switch_to.h | |
365 | @@ -3,6 +3,13 @@ | |
366 | ||
367 | #include <linux/thread_info.h> | |
368 | ||
369 | +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM | |
370 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); | |
371 | +#else | |
372 | +static inline void | |
373 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
374 | +#endif | |
375 | + | |
376 | /* | |
377 | * For v7 SMP cores running a preemptible kernel we may be pre-empted | |
378 | * during a TLB maintenance operation, so execute an inner-shareable dsb | |
379 | @@ -25,6 +32,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info | |
380 | #define switch_to(prev,next,last) \ | |
381 | do { \ | |
382 | __complete_pending_tlbi(); \ | |
383 | + switch_kmaps(prev, next); \ | |
384 | last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ | |
385 | } while (0) | |
386 | ||
387 | diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h | |
388 | index 776757d1604a..1f36a4eccc72 100644 | |
389 | --- a/arch/arm/include/asm/thread_info.h | |
390 | +++ b/arch/arm/include/asm/thread_info.h | |
391 | @@ -49,6 +49,7 @@ struct cpu_context_save { | |
392 | struct thread_info { | |
393 | unsigned long flags; /* low level flags */ | |
394 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
395 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
396 | mm_segment_t addr_limit; /* address limit */ | |
397 | struct task_struct *task; /* main task structure */ | |
398 | __u32 cpu; /* cpu */ | |
399 | @@ -142,7 +143,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
400 | #define TIF_SYSCALL_TRACE 4 /* syscall trace active */ | |
401 | #define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ | |
402 | #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ | |
403 | -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ | |
404 | +#define TIF_SECCOMP 8 /* seccomp syscall filtering active */ | |
405 | +#define TIF_NEED_RESCHED_LAZY 7 | |
406 | ||
407 | #define TIF_NOHZ 12 /* in adaptive nohz mode */ | |
408 | #define TIF_USING_IWMMXT 17 | |
409 | @@ -152,6 +154,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
410 | #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) | |
411 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
412 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
413 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
414 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
415 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
416 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
417 | @@ -167,7 +170,8 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, | |
418 | * Change these and you break ASM code in entry-common.S | |
419 | */ | |
420 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
421 | - _TIF_NOTIFY_RESUME | _TIF_UPROBE) | |
422 | + _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
423 | + _TIF_NEED_RESCHED_LAZY) | |
424 | ||
425 | #endif /* __KERNEL__ */ | |
426 | #endif /* __ASM_ARM_THREAD_INFO_H */ | |
427 | diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c | |
428 | index 871b8267d211..4dbe70de7318 100644 | |
429 | --- a/arch/arm/kernel/asm-offsets.c | |
430 | +++ b/arch/arm/kernel/asm-offsets.c | |
431 | @@ -65,6 +65,7 @@ int main(void) | |
432 | BLANK(); | |
433 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
434 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
435 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
436 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
437 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
438 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
439 | diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S | |
440 | index 3ce377f7251f..d044cea59f54 100644 | |
441 | --- a/arch/arm/kernel/entry-armv.S | |
442 | +++ b/arch/arm/kernel/entry-armv.S | |
443 | @@ -215,11 +215,18 @@ __irq_svc: | |
444 | #ifdef CONFIG_PREEMPT | |
445 | get_thread_info tsk | |
446 | ldr r8, [tsk, #TI_PREEMPT] @ get preempt count | |
447 | - ldr r0, [tsk, #TI_FLAGS] @ get flags | |
448 | teq r8, #0 @ if preempt count != 0 | |
449 | + bne 1f @ return from exeption | |
450 | + ldr r0, [tsk, #TI_FLAGS] @ get flags | |
451 | + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set | |
452 | + blne svc_preempt @ preempt! | |
453 | + | |
454 | + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
455 | + teq r8, #0 @ if preempt lazy count != 0 | |
456 | movne r0, #0 @ force flags to 0 | |
457 | - tst r0, #_TIF_NEED_RESCHED | |
458 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
459 | blne svc_preempt | |
460 | +1: | |
461 | #endif | |
462 | ||
463 | svc_exit r5, irq = 1 @ return from exception | |
464 | @@ -234,8 +241,14 @@ svc_preempt: | |
465 | 1: bl preempt_schedule_irq @ irq en/disable is done inside | |
466 | ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS | |
467 | tst r0, #_TIF_NEED_RESCHED | |
468 | + bne 1b | |
469 | + tst r0, #_TIF_NEED_RESCHED_LAZY | |
470 | reteq r8 @ go again | |
471 | - b 1b | |
472 | + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count | |
473 | + teq r0, #0 @ if preempt lazy count != 0 | |
474 | + beq 1b | |
475 | + ret r8 @ go again | |
476 | + | |
477 | #endif | |
478 | ||
479 | __und_fault: | |
480 | diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S | |
481 | index 30a7228eaceb..c3bd6cbfce4b 100644 | |
482 | --- a/arch/arm/kernel/entry-common.S | |
483 | +++ b/arch/arm/kernel/entry-common.S | |
484 | @@ -36,7 +36,9 @@ ret_fast_syscall: | |
485 | UNWIND(.cantunwind ) | |
486 | disable_irq_notrace @ disable interrupts | |
487 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
488 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
489 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
490 | + bne fast_work_pending | |
491 | + tst r1, #_TIF_SECCOMP | |
492 | bne fast_work_pending | |
493 | ||
494 | /* perform architecture specific actions before user return */ | |
495 | @@ -62,8 +64,11 @@ ret_fast_syscall: | |
496 | str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 | |
497 | disable_irq_notrace @ disable interrupts | |
498 | ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing | |
499 | - tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK | |
500 | + tst r1, #((_TIF_SYSCALL_WORK | _TIF_WORK_MASK) & ~_TIF_SECCOMP) | |
501 | + bne do_slower_path | |
502 | + tst r1, #_TIF_SECCOMP | |
503 | beq no_work_pending | |
504 | +do_slower_path: | |
505 | UNWIND(.fnend ) | |
506 | ENDPROC(ret_fast_syscall) | |
507 | ||
508 | diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c | |
509 | index 4adfb46e3ee9..15f1d94b47c5 100644 | |
510 | --- a/arch/arm/kernel/process.c | |
511 | +++ b/arch/arm/kernel/process.c | |
512 | @@ -319,6 +319,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) | |
513 | } | |
514 | ||
515 | #ifdef CONFIG_MMU | |
516 | +/* | |
517 | + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not | |
518 | + * initialized by pgtable_page_ctor() then a coredump of the vector page will | |
519 | + * fail. | |
520 | + */ | |
521 | +static int __init vectors_user_mapping_init_page(void) | |
522 | +{ | |
523 | + struct page *page; | |
524 | + unsigned long addr = 0xffff0000; | |
525 | + pgd_t *pgd; | |
526 | + pud_t *pud; | |
527 | + pmd_t *pmd; | |
528 | + | |
529 | + pgd = pgd_offset_k(addr); | |
530 | + pud = pud_offset(pgd, addr); | |
531 | + pmd = pmd_offset(pud, addr); | |
532 | + page = pmd_page(*(pmd)); | |
533 | + | |
534 | + pgtable_page_ctor(page); | |
535 | + | |
536 | + return 0; | |
537 | +} | |
538 | +late_initcall(vectors_user_mapping_init_page); | |
539 | + | |
540 | #ifdef CONFIG_KUSER_HELPERS | |
541 | /* | |
542 | * The vectors page is always readable from user space for the | |
543 | diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c | |
544 | index 7b8f2141427b..96541e00b74a 100644 | |
545 | --- a/arch/arm/kernel/signal.c | |
546 | +++ b/arch/arm/kernel/signal.c | |
547 | @@ -572,7 +572,8 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) | |
548 | */ | |
549 | trace_hardirqs_off(); | |
550 | do { | |
551 | - if (likely(thread_flags & _TIF_NEED_RESCHED)) { | |
552 | + if (likely(thread_flags & (_TIF_NEED_RESCHED | | |
553 | + _TIF_NEED_RESCHED_LAZY))) { | |
554 | schedule(); | |
555 | } else { | |
556 | if (unlikely(!user_mode(regs))) | |
557 | diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c | |
558 | index b26361355dae..e5754e3b03c4 100644 | |
559 | --- a/arch/arm/kernel/smp.c | |
560 | +++ b/arch/arm/kernel/smp.c | |
561 | @@ -230,8 +230,6 @@ int __cpu_disable(void) | |
562 | flush_cache_louis(); | |
563 | local_flush_tlb_all(); | |
564 | ||
565 | - clear_tasks_mm_cpumask(cpu); | |
566 | - | |
567 | return 0; | |
568 | } | |
569 | ||
570 | @@ -247,6 +245,9 @@ void __cpu_die(unsigned int cpu) | |
571 | pr_err("CPU%u: cpu didn't die\n", cpu); | |
572 | return; | |
573 | } | |
574 | + | |
575 | + clear_tasks_mm_cpumask(cpu); | |
576 | + | |
577 | pr_notice("CPU%u: shutdown\n", cpu); | |
578 | ||
579 | /* | |
580 | diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c | |
581 | index 0bee233fef9a..314cfb232a63 100644 | |
582 | --- a/arch/arm/kernel/unwind.c | |
583 | +++ b/arch/arm/kernel/unwind.c | |
584 | @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; | |
585 | static const struct unwind_idx *__origin_unwind_idx; | |
586 | extern const struct unwind_idx __stop_unwind_idx[]; | |
587 | ||
588 | -static DEFINE_SPINLOCK(unwind_lock); | |
589 | +static DEFINE_RAW_SPINLOCK(unwind_lock); | |
590 | static LIST_HEAD(unwind_tables); | |
591 | ||
592 | /* Convert a prel31 symbol to an absolute address */ | |
593 | @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
594 | /* module unwind tables */ | |
595 | struct unwind_table *table; | |
596 | ||
597 | - spin_lock_irqsave(&unwind_lock, flags); | |
598 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
599 | list_for_each_entry(table, &unwind_tables, list) { | |
600 | if (addr >= table->begin_addr && | |
601 | addr < table->end_addr) { | |
602 | @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) | |
603 | break; | |
604 | } | |
605 | } | |
606 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
607 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
608 | } | |
609 | ||
610 | pr_debug("%s: idx = %p\n", __func__, idx); | |
611 | @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, | |
612 | tab->begin_addr = text_addr; | |
613 | tab->end_addr = text_addr + text_size; | |
614 | ||
615 | - spin_lock_irqsave(&unwind_lock, flags); | |
616 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
617 | list_add_tail(&tab->list, &unwind_tables); | |
618 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
619 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
620 | ||
621 | return tab; | |
622 | } | |
623 | @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) | |
624 | if (!tab) | |
625 | return; | |
626 | ||
627 | - spin_lock_irqsave(&unwind_lock, flags); | |
628 | + raw_spin_lock_irqsave(&unwind_lock, flags); | |
629 | list_del(&tab->list); | |
630 | - spin_unlock_irqrestore(&unwind_lock, flags); | |
631 | + raw_spin_unlock_irqrestore(&unwind_lock, flags); | |
632 | ||
633 | kfree(tab); | |
634 | } | |
635 | diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c | |
636 | index d7bef2144760..36a3e51492f7 100644 | |
637 | --- a/arch/arm/kvm/arm.c | |
638 | +++ b/arch/arm/kvm/arm.c | |
639 | @@ -496,18 +496,18 @@ static void kvm_arm_resume_guest(struct kvm *kvm) | |
640 | struct kvm_vcpu *vcpu; | |
641 | ||
642 | kvm_for_each_vcpu(i, vcpu, kvm) { | |
643 | - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); | |
644 | + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); | |
645 | ||
646 | vcpu->arch.pause = false; | |
647 | - wake_up_interruptible(wq); | |
648 | + swake_up(wq); | |
649 | } | |
650 | } | |
651 | ||
652 | static void vcpu_sleep(struct kvm_vcpu *vcpu) | |
653 | { | |
654 | - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); | |
655 | + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); | |
656 | ||
657 | - wait_event_interruptible(*wq, ((!vcpu->arch.power_off) && | |
658 | + swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && | |
659 | (!vcpu->arch.pause))); | |
660 | } | |
661 | ||
662 | @@ -566,7 +566,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
663 | * involves poking the GIC, which must be done in a | |
664 | * non-preemptible context. | |
665 | */ | |
666 | - preempt_disable(); | |
667 | + migrate_disable(); | |
668 | kvm_timer_flush_hwstate(vcpu); | |
669 | kvm_vgic_flush_hwstate(vcpu); | |
670 | ||
671 | @@ -585,7 +585,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
672 | local_irq_enable(); | |
673 | kvm_timer_sync_hwstate(vcpu); | |
674 | kvm_vgic_sync_hwstate(vcpu); | |
675 | - preempt_enable(); | |
676 | + migrate_enable(); | |
677 | continue; | |
678 | } | |
679 | ||
680 | @@ -639,7 +639,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) | |
681 | ||
682 | kvm_vgic_sync_hwstate(vcpu); | |
683 | ||
684 | - preempt_enable(); | |
685 | + migrate_enable(); | |
686 | ||
687 | ret = handle_exit(vcpu, run, ret); | |
688 | } | |
689 | diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c | |
690 | index a9b3b905e661..c2b131527a64 100644 | |
691 | --- a/arch/arm/kvm/psci.c | |
692 | +++ b/arch/arm/kvm/psci.c | |
693 | @@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | |
694 | { | |
695 | struct kvm *kvm = source_vcpu->kvm; | |
696 | struct kvm_vcpu *vcpu = NULL; | |
697 | - wait_queue_head_t *wq; | |
698 | + struct swait_queue_head *wq; | |
699 | unsigned long cpu_id; | |
700 | unsigned long context_id; | |
701 | phys_addr_t target_pc; | |
702 | @@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) | |
703 | smp_mb(); /* Make sure the above is visible */ | |
704 | ||
705 | wq = kvm_arch_vcpu_wq(vcpu); | |
706 | - wake_up_interruptible(wq); | |
707 | + swake_up(wq); | |
708 | ||
709 | return PSCI_RET_SUCCESS; | |
710 | } | |
711 | diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig | |
712 | index 28656c2b54a0..3f501305ca26 100644 | |
713 | --- a/arch/arm/mach-at91/Kconfig | |
714 | +++ b/arch/arm/mach-at91/Kconfig | |
715 | @@ -99,6 +99,7 @@ config HAVE_AT91_USB_CLK | |
716 | config COMMON_CLK_AT91 | |
717 | bool | |
718 | select COMMON_CLK | |
719 | + select MFD_SYSCON | |
720 | ||
721 | config HAVE_AT91_SMD | |
722 | bool | |
723 | diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c | |
724 | index c1a7c6cc00e1..63b4fa25b48a 100644 | |
725 | --- a/arch/arm/mach-at91/at91rm9200.c | |
726 | +++ b/arch/arm/mach-at91/at91rm9200.c | |
727 | @@ -12,7 +12,6 @@ | |
728 | #include <linux/of_platform.h> | |
729 | ||
730 | #include <asm/mach/arch.h> | |
731 | -#include <asm/system_misc.h> | |
732 | ||
733 | #include "generic.h" | |
734 | #include "soc.h" | |
735 | @@ -33,7 +32,6 @@ static void __init at91rm9200_dt_device_init(void) | |
736 | ||
737 | of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev); | |
738 | ||
739 | - arm_pm_idle = at91rm9200_idle; | |
740 | at91rm9200_pm_init(); | |
741 | } | |
742 | ||
743 | diff --git a/arch/arm/mach-at91/at91sam9.c b/arch/arm/mach-at91/at91sam9.c | |
744 | index 7eb64f763034..cada2a6412b3 100644 | |
745 | --- a/arch/arm/mach-at91/at91sam9.c | |
746 | +++ b/arch/arm/mach-at91/at91sam9.c | |
747 | @@ -62,8 +62,6 @@ static void __init at91sam9_common_init(void) | |
748 | soc_dev = soc_device_to_device(soc); | |
749 | ||
750 | of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev); | |
751 | - | |
752 | - arm_pm_idle = at91sam9_idle; | |
753 | } | |
754 | ||
755 | static void __init at91sam9_dt_device_init(void) | |
756 | diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h | |
757 | index b0fa7dc7286d..28ca57a2060f 100644 | |
758 | --- a/arch/arm/mach-at91/generic.h | |
759 | +++ b/arch/arm/mach-at91/generic.h | |
760 | @@ -11,27 +11,18 @@ | |
761 | #ifndef _AT91_GENERIC_H | |
762 | #define _AT91_GENERIC_H | |
763 | ||
764 | -#include <linux/of.h> | |
765 | -#include <linux/reboot.h> | |
766 | - | |
767 | - /* Map io */ | |
768 | -extern void __init at91_map_io(void); | |
769 | -extern void __init at91_alt_map_io(void); | |
770 | - | |
771 | -/* idle */ | |
772 | -extern void at91rm9200_idle(void); | |
773 | -extern void at91sam9_idle(void); | |
774 | - | |
775 | #ifdef CONFIG_PM | |
776 | extern void __init at91rm9200_pm_init(void); | |
777 | extern void __init at91sam9260_pm_init(void); | |
778 | extern void __init at91sam9g45_pm_init(void); | |
779 | extern void __init at91sam9x5_pm_init(void); | |
780 | +extern void __init sama5_pm_init(void); | |
781 | #else | |
782 | static inline void __init at91rm9200_pm_init(void) { } | |
783 | static inline void __init at91sam9260_pm_init(void) { } | |
784 | static inline void __init at91sam9g45_pm_init(void) { } | |
785 | static inline void __init at91sam9x5_pm_init(void) { } | |
786 | +static inline void __init sama5_pm_init(void) { } | |
787 | #endif | |
788 | ||
789 | #endif /* _AT91_GENERIC_H */ | |
790 | diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c | |
791 | index 23726fb31741..f06270198bf1 100644 | |
792 | --- a/arch/arm/mach-at91/pm.c | |
793 | +++ b/arch/arm/mach-at91/pm.c | |
794 | @@ -31,10 +31,13 @@ | |
795 | #include <asm/mach/irq.h> | |
796 | #include <asm/fncpy.h> | |
797 | #include <asm/cacheflush.h> | |
798 | +#include <asm/system_misc.h> | |
799 | ||
800 | #include "generic.h" | |
801 | #include "pm.h" | |
802 | ||
803 | +static void __iomem *pmc; | |
804 | + | |
805 | /* | |
806 | * FIXME: this is needed to communicate between the pinctrl driver and | |
807 | * the PM implementation in the machine. Possibly part of the PM | |
808 | @@ -87,7 +90,7 @@ static int at91_pm_verify_clocks(void) | |
809 | unsigned long scsr; | |
810 | int i; | |
811 | ||
812 | - scsr = at91_pmc_read(AT91_PMC_SCSR); | |
813 | + scsr = readl(pmc + AT91_PMC_SCSR); | |
814 | ||
815 | /* USB must not be using PLLB */ | |
816 | if ((scsr & at91_pm_data.uhp_udp_mask) != 0) { | |
817 | @@ -101,8 +104,7 @@ static int at91_pm_verify_clocks(void) | |
818 | ||
819 | if ((scsr & (AT91_PMC_PCK0 << i)) == 0) | |
820 | continue; | |
821 | - | |
822 | - css = at91_pmc_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS; | |
823 | + css = readl(pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS; | |
824 | if (css != AT91_PMC_CSS_SLOW) { | |
825 | pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css); | |
826 | return 0; | |
827 | @@ -145,8 +147,8 @@ static void at91_pm_suspend(suspend_state_t state) | |
828 | flush_cache_all(); | |
829 | outer_disable(); | |
830 | ||
831 | - at91_suspend_sram_fn(at91_pmc_base, at91_ramc_base[0], | |
832 | - at91_ramc_base[1], pm_data); | |
833 | + at91_suspend_sram_fn(pmc, at91_ramc_base[0], | |
834 | + at91_ramc_base[1], pm_data); | |
835 | ||
836 | outer_resume(); | |
837 | } | |
838 | @@ -353,6 +355,21 @@ static __init void at91_dt_ramc(void) | |
839 | at91_pm_set_standby(standby); | |
840 | } | |
841 | ||
842 | +void at91rm9200_idle(void) | |
843 | +{ | |
844 | + /* | |
845 | + * Disable the processor clock. The processor will be automatically | |
846 | + * re-enabled by an interrupt or by a reset. | |
847 | + */ | |
848 | + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR); | |
849 | +} | |
850 | + | |
851 | +void at91sam9_idle(void) | |
852 | +{ | |
853 | + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR); | |
854 | + cpu_do_idle(); | |
855 | +} | |
856 | + | |
857 | static void __init at91_pm_sram_init(void) | |
858 | { | |
859 | struct gen_pool *sram_pool; | |
860 | @@ -399,13 +416,36 @@ static void __init at91_pm_sram_init(void) | |
861 | &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz); | |
862 | } | |
863 | ||
864 | -static void __init at91_pm_init(void) | |
865 | +static const struct of_device_id atmel_pmc_ids[] __initconst = { | |
866 | + { .compatible = "atmel,at91rm9200-pmc" }, | |
867 | + { .compatible = "atmel,at91sam9260-pmc" }, | |
868 | + { .compatible = "atmel,at91sam9g45-pmc" }, | |
869 | + { .compatible = "atmel,at91sam9n12-pmc" }, | |
870 | + { .compatible = "atmel,at91sam9x5-pmc" }, | |
871 | + { .compatible = "atmel,sama5d3-pmc" }, | |
872 | + { .compatible = "atmel,sama5d2-pmc" }, | |
873 | + { /* sentinel */ }, | |
874 | +}; | |
875 | + | |
876 | +static void __init at91_pm_init(void (*pm_idle)(void)) | |
877 | { | |
878 | - at91_pm_sram_init(); | |
879 | + struct device_node *pmc_np; | |
880 | ||
881 | if (at91_cpuidle_device.dev.platform_data) | |
882 | platform_device_register(&at91_cpuidle_device); | |
883 | ||
884 | + pmc_np = of_find_matching_node(NULL, atmel_pmc_ids); | |
885 | + pmc = of_iomap(pmc_np, 0); | |
886 | + if (!pmc) { | |
887 | + pr_err("AT91: PM not supported, PMC not found\n"); | |
888 | + return; | |
889 | + } | |
890 | + | |
891 | + if (pm_idle) | |
892 | + arm_pm_idle = pm_idle; | |
893 | + | |
894 | + at91_pm_sram_init(); | |
895 | + | |
896 | if (at91_suspend_sram_fn) | |
897 | suspend_set_ops(&at91_pm_ops); | |
898 | else | |
899 | @@ -424,7 +464,7 @@ void __init at91rm9200_pm_init(void) | |
900 | at91_pm_data.uhp_udp_mask = AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP; | |
901 | at91_pm_data.memctrl = AT91_MEMCTRL_MC; | |
902 | ||
903 | - at91_pm_init(); | |
904 | + at91_pm_init(at91rm9200_idle); | |
905 | } | |
906 | ||
907 | void __init at91sam9260_pm_init(void) | |
908 | @@ -432,7 +472,7 @@ void __init at91sam9260_pm_init(void) | |
909 | at91_dt_ramc(); | |
910 | at91_pm_data.memctrl = AT91_MEMCTRL_SDRAMC; | |
911 | at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP; | |
912 | - return at91_pm_init(); | |
913 | + at91_pm_init(at91sam9_idle); | |
914 | } | |
915 | ||
916 | void __init at91sam9g45_pm_init(void) | |
917 | @@ -440,7 +480,7 @@ void __init at91sam9g45_pm_init(void) | |
918 | at91_dt_ramc(); | |
919 | at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP; | |
920 | at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR; | |
921 | - return at91_pm_init(); | |
922 | + at91_pm_init(at91sam9_idle); | |
923 | } | |
924 | ||
925 | void __init at91sam9x5_pm_init(void) | |
926 | @@ -448,5 +488,13 @@ void __init at91sam9x5_pm_init(void) | |
927 | at91_dt_ramc(); | |
928 | at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP; | |
929 | at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR; | |
930 | - return at91_pm_init(); | |
931 | + at91_pm_init(at91sam9_idle); | |
932 | +} | |
933 | + | |
934 | +void __init sama5_pm_init(void) | |
935 | +{ | |
936 | + at91_dt_ramc(); | |
937 | + at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP; | |
938 | + at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR; | |
939 | + at91_pm_init(NULL); | |
940 | } | |
941 | diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c | |
942 | index d9cf6799aec0..df8fdf1cf66d 100644 | |
943 | --- a/arch/arm/mach-at91/sama5.c | |
944 | +++ b/arch/arm/mach-at91/sama5.c | |
945 | @@ -51,7 +51,7 @@ static void __init sama5_dt_device_init(void) | |
946 | soc_dev = soc_device_to_device(soc); | |
947 | ||
948 | of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev); | |
949 | - at91sam9x5_pm_init(); | |
950 | + sama5_pm_init(); | |
951 | } | |
952 | ||
953 | static const char *const sama5_dt_board_compat[] __initconst = { | |
954 | diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c | |
955 | index 98a2c0cbb833..310dce500d3e 100644 | |
956 | --- a/arch/arm/mach-exynos/platsmp.c | |
957 | +++ b/arch/arm/mach-exynos/platsmp.c | |
958 | @@ -230,7 +230,7 @@ static void __iomem *scu_base_addr(void) | |
959 | return (void __iomem *)(S5P_VA_SCU); | |
960 | } | |
961 | ||
962 | -static DEFINE_SPINLOCK(boot_lock); | |
963 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
964 | ||
965 | static void exynos_secondary_init(unsigned int cpu) | |
966 | { | |
967 | @@ -243,8 +243,8 @@ static void exynos_secondary_init(unsigned int cpu) | |
968 | /* | |
969 | * Synchronise with the boot thread. | |
970 | */ | |
971 | - spin_lock(&boot_lock); | |
972 | - spin_unlock(&boot_lock); | |
973 | + raw_spin_lock(&boot_lock); | |
974 | + raw_spin_unlock(&boot_lock); | |
975 | } | |
976 | ||
977 | int exynos_set_boot_addr(u32 core_id, unsigned long boot_addr) | |
978 | @@ -308,7 +308,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
979 | * Set synchronisation state between this boot processor | |
980 | * and the secondary one | |
981 | */ | |
982 | - spin_lock(&boot_lock); | |
983 | + raw_spin_lock(&boot_lock); | |
984 | ||
985 | /* | |
986 | * The secondary processor is waiting to be released from | |
987 | @@ -335,7 +335,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
988 | ||
989 | if (timeout == 0) { | |
990 | printk(KERN_ERR "cpu1 power enable failed"); | |
991 | - spin_unlock(&boot_lock); | |
992 | + raw_spin_unlock(&boot_lock); | |
993 | return -ETIMEDOUT; | |
994 | } | |
995 | } | |
996 | @@ -381,7 +381,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
997 | * calibrations, then wait for it to finish | |
998 | */ | |
999 | fail: | |
1000 | - spin_unlock(&boot_lock); | |
1001 | + raw_spin_unlock(&boot_lock); | |
1002 | ||
1003 | return pen_release != -1 ? ret : 0; | |
1004 | } | |
1005 | diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c | |
1006 | index b5f8f5ffda79..9753a84df9c4 100644 | |
1007 | --- a/arch/arm/mach-hisi/platmcpm.c | |
1008 | +++ b/arch/arm/mach-hisi/platmcpm.c | |
1009 | @@ -61,7 +61,7 @@ | |
1010 | ||
1011 | static void __iomem *sysctrl, *fabric; | |
1012 | static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; | |
1013 | -static DEFINE_SPINLOCK(boot_lock); | |
1014 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1015 | static u32 fabric_phys_addr; | |
1016 | /* | |
1017 | * [0]: bootwrapper physical address | |
1018 | @@ -113,7 +113,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
1019 | if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) | |
1020 | return -EINVAL; | |
1021 | ||
1022 | - spin_lock_irq(&boot_lock); | |
1023 | + raw_spin_lock_irq(&boot_lock); | |
1024 | ||
1025 | if (hip04_cpu_table[cluster][cpu]) | |
1026 | goto out; | |
1027 | @@ -147,7 +147,7 @@ static int hip04_boot_secondary(unsigned int l_cpu, struct task_struct *idle) | |
1028 | ||
1029 | out: | |
1030 | hip04_cpu_table[cluster][cpu]++; | |
1031 | - spin_unlock_irq(&boot_lock); | |
1032 | + raw_spin_unlock_irq(&boot_lock); | |
1033 | ||
1034 | return 0; | |
1035 | } | |
1036 | @@ -162,11 +162,11 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
1037 | cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); | |
1038 | cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); | |
1039 | ||
1040 | - spin_lock(&boot_lock); | |
1041 | + raw_spin_lock(&boot_lock); | |
1042 | hip04_cpu_table[cluster][cpu]--; | |
1043 | if (hip04_cpu_table[cluster][cpu] == 1) { | |
1044 | /* A power_up request went ahead of us. */ | |
1045 | - spin_unlock(&boot_lock); | |
1046 | + raw_spin_unlock(&boot_lock); | |
1047 | return; | |
1048 | } else if (hip04_cpu_table[cluster][cpu] > 1) { | |
1049 | pr_err("Cluster %d CPU%d boots multiple times\n", cluster, cpu); | |
1050 | @@ -174,7 +174,7 @@ static void hip04_cpu_die(unsigned int l_cpu) | |
1051 | } | |
1052 | ||
1053 | last_man = hip04_cluster_is_down(cluster); | |
1054 | - spin_unlock(&boot_lock); | |
1055 | + raw_spin_unlock(&boot_lock); | |
1056 | if (last_man) { | |
1057 | /* Since it's Cortex A15, disable L2 prefetching. */ | |
1058 | asm volatile( | |
1059 | @@ -203,7 +203,7 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
1060 | cpu >= HIP04_MAX_CPUS_PER_CLUSTER); | |
1061 | ||
1062 | count = TIMEOUT_MSEC / POLL_MSEC; | |
1063 | - spin_lock_irq(&boot_lock); | |
1064 | + raw_spin_lock_irq(&boot_lock); | |
1065 | for (tries = 0; tries < count; tries++) { | |
1066 | if (hip04_cpu_table[cluster][cpu]) | |
1067 | goto err; | |
1068 | @@ -211,10 +211,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
1069 | data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); | |
1070 | if (data & CORE_WFI_STATUS(cpu)) | |
1071 | break; | |
1072 | - spin_unlock_irq(&boot_lock); | |
1073 | + raw_spin_unlock_irq(&boot_lock); | |
1074 | /* Wait for clean L2 when the whole cluster is down. */ | |
1075 | msleep(POLL_MSEC); | |
1076 | - spin_lock_irq(&boot_lock); | |
1077 | + raw_spin_lock_irq(&boot_lock); | |
1078 | } | |
1079 | if (tries >= count) | |
1080 | goto err; | |
1081 | @@ -231,10 +231,10 @@ static int hip04_cpu_kill(unsigned int l_cpu) | |
1082 | goto err; | |
1083 | if (hip04_cluster_is_down(cluster)) | |
1084 | hip04_set_snoop_filter(cluster, 0); | |
1085 | - spin_unlock_irq(&boot_lock); | |
1086 | + raw_spin_unlock_irq(&boot_lock); | |
1087 | return 1; | |
1088 | err: | |
1089 | - spin_unlock_irq(&boot_lock); | |
1090 | + raw_spin_unlock_irq(&boot_lock); | |
1091 | return 0; | |
1092 | } | |
1093 | #endif | |
1094 | diff --git a/arch/arm/mach-imx/Kconfig b/arch/arm/mach-imx/Kconfig | |
1095 | index 8ceda2844c4f..08bcf8fb76f2 100644 | |
1096 | --- a/arch/arm/mach-imx/Kconfig | |
1097 | +++ b/arch/arm/mach-imx/Kconfig | |
1098 | @@ -524,7 +524,7 @@ config SOC_IMX6Q | |
1099 | bool "i.MX6 Quad/DualLite support" | |
1100 | select ARM_ERRATA_764369 if SMP | |
1101 | select HAVE_ARM_SCU if SMP | |
1102 | - select HAVE_ARM_TWD if SMP | |
1103 | + select HAVE_ARM_TWD | |
1104 | select PCI_DOMAINS if PCI | |
1105 | select PINCTRL_IMX6Q | |
1106 | select SOC_IMX6 | |
1107 | diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c | |
1108 | index 79e1f876d1c9..7e625c17f78e 100644 | |
1109 | --- a/arch/arm/mach-omap2/omap-smp.c | |
1110 | +++ b/arch/arm/mach-omap2/omap-smp.c | |
1111 | @@ -43,7 +43,7 @@ | |
1112 | /* SCU base address */ | |
1113 | static void __iomem *scu_base; | |
1114 | ||
1115 | -static DEFINE_SPINLOCK(boot_lock); | |
1116 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1117 | ||
1118 | void __iomem *omap4_get_scu_base(void) | |
1119 | { | |
1120 | @@ -74,8 +74,8 @@ static void omap4_secondary_init(unsigned int cpu) | |
1121 | /* | |
1122 | * Synchronise with the boot thread. | |
1123 | */ | |
1124 | - spin_lock(&boot_lock); | |
1125 | - spin_unlock(&boot_lock); | |
1126 | + raw_spin_lock(&boot_lock); | |
1127 | + raw_spin_unlock(&boot_lock); | |
1128 | } | |
1129 | ||
1130 | static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1131 | @@ -89,7 +89,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1132 | * Set synchronisation state between this boot processor | |
1133 | * and the secondary one | |
1134 | */ | |
1135 | - spin_lock(&boot_lock); | |
1136 | + raw_spin_lock(&boot_lock); | |
1137 | ||
1138 | /* | |
1139 | * Update the AuxCoreBoot0 with boot state for secondary core. | |
1140 | @@ -166,7 +166,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1141 | * Now the secondary core is starting up let it run its | |
1142 | * calibrations, then wait for it to finish | |
1143 | */ | |
1144 | - spin_unlock(&boot_lock); | |
1145 | + raw_spin_unlock(&boot_lock); | |
1146 | ||
1147 | return 0; | |
1148 | } | |
1149 | diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c | |
1150 | index e46c91094dde..dcb3ed0c26da 100644 | |
1151 | --- a/arch/arm/mach-prima2/platsmp.c | |
1152 | +++ b/arch/arm/mach-prima2/platsmp.c | |
1153 | @@ -22,7 +22,7 @@ | |
1154 | ||
1155 | static void __iomem *clk_base; | |
1156 | ||
1157 | -static DEFINE_SPINLOCK(boot_lock); | |
1158 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1159 | ||
1160 | static void sirfsoc_secondary_init(unsigned int cpu) | |
1161 | { | |
1162 | @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) | |
1163 | /* | |
1164 | * Synchronise with the boot thread. | |
1165 | */ | |
1166 | - spin_lock(&boot_lock); | |
1167 | - spin_unlock(&boot_lock); | |
1168 | + raw_spin_lock(&boot_lock); | |
1169 | + raw_spin_unlock(&boot_lock); | |
1170 | } | |
1171 | ||
1172 | static const struct of_device_id clk_ids[] = { | |
1173 | @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1174 | /* make sure write buffer is drained */ | |
1175 | mb(); | |
1176 | ||
1177 | - spin_lock(&boot_lock); | |
1178 | + raw_spin_lock(&boot_lock); | |
1179 | ||
1180 | /* | |
1181 | * The secondary processor is waiting to be released from | |
1182 | @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1183 | * now the secondary core is starting up let it run its | |
1184 | * calibrations, then wait for it to finish | |
1185 | */ | |
1186 | - spin_unlock(&boot_lock); | |
1187 | + raw_spin_unlock(&boot_lock); | |
1188 | ||
1189 | return pen_release != -1 ? -ENOSYS : 0; | |
1190 | } | |
1191 | diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c | |
1192 | index 9b00123a315d..0a49fe1bc8cf 100644 | |
1193 | --- a/arch/arm/mach-qcom/platsmp.c | |
1194 | +++ b/arch/arm/mach-qcom/platsmp.c | |
1195 | @@ -46,7 +46,7 @@ | |
1196 | ||
1197 | extern void secondary_startup_arm(void); | |
1198 | ||
1199 | -static DEFINE_SPINLOCK(boot_lock); | |
1200 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1201 | ||
1202 | #ifdef CONFIG_HOTPLUG_CPU | |
1203 | static void qcom_cpu_die(unsigned int cpu) | |
1204 | @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) | |
1205 | /* | |
1206 | * Synchronise with the boot thread. | |
1207 | */ | |
1208 | - spin_lock(&boot_lock); | |
1209 | - spin_unlock(&boot_lock); | |
1210 | + raw_spin_lock(&boot_lock); | |
1211 | + raw_spin_unlock(&boot_lock); | |
1212 | } | |
1213 | ||
1214 | static int scss_release_secondary(unsigned int cpu) | |
1215 | @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
1216 | * set synchronisation state between this boot processor | |
1217 | * and the secondary one | |
1218 | */ | |
1219 | - spin_lock(&boot_lock); | |
1220 | + raw_spin_lock(&boot_lock); | |
1221 | ||
1222 | /* | |
1223 | * Send the secondary CPU a soft interrupt, thereby causing | |
1224 | @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) | |
1225 | * now the secondary core is starting up let it run its | |
1226 | * calibrations, then wait for it to finish | |
1227 | */ | |
1228 | - spin_unlock(&boot_lock); | |
1229 | + raw_spin_unlock(&boot_lock); | |
1230 | ||
1231 | return ret; | |
1232 | } | |
1233 | diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c | |
1234 | index fd4297713d67..b0553b2c2d53 100644 | |
1235 | --- a/arch/arm/mach-spear/platsmp.c | |
1236 | +++ b/arch/arm/mach-spear/platsmp.c | |
1237 | @@ -32,7 +32,7 @@ static void write_pen_release(int val) | |
1238 | sync_cache_w(&pen_release); | |
1239 | } | |
1240 | ||
1241 | -static DEFINE_SPINLOCK(boot_lock); | |
1242 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1243 | ||
1244 | static void __iomem *scu_base = IOMEM(VA_SCU_BASE); | |
1245 | ||
1246 | @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) | |
1247 | /* | |
1248 | * Synchronise with the boot thread. | |
1249 | */ | |
1250 | - spin_lock(&boot_lock); | |
1251 | - spin_unlock(&boot_lock); | |
1252 | + raw_spin_lock(&boot_lock); | |
1253 | + raw_spin_unlock(&boot_lock); | |
1254 | } | |
1255 | ||
1256 | static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1257 | @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1258 | * set synchronisation state between this boot processor | |
1259 | * and the secondary one | |
1260 | */ | |
1261 | - spin_lock(&boot_lock); | |
1262 | + raw_spin_lock(&boot_lock); | |
1263 | ||
1264 | /* | |
1265 | * The secondary processor is waiting to be released from | |
1266 | @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1267 | * now the secondary core is starting up let it run its | |
1268 | * calibrations, then wait for it to finish | |
1269 | */ | |
1270 | - spin_unlock(&boot_lock); | |
1271 | + raw_spin_unlock(&boot_lock); | |
1272 | ||
1273 | return pen_release != -1 ? -ENOSYS : 0; | |
1274 | } | |
1275 | diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c | |
1276 | index c4ad6eae67fa..e830b20b212f 100644 | |
1277 | --- a/arch/arm/mach-sti/platsmp.c | |
1278 | +++ b/arch/arm/mach-sti/platsmp.c | |
1279 | @@ -35,7 +35,7 @@ static void write_pen_release(int val) | |
1280 | sync_cache_w(&pen_release); | |
1281 | } | |
1282 | ||
1283 | -static DEFINE_SPINLOCK(boot_lock); | |
1284 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1285 | ||
1286 | static void sti_secondary_init(unsigned int cpu) | |
1287 | { | |
1288 | @@ -48,8 +48,8 @@ static void sti_secondary_init(unsigned int cpu) | |
1289 | /* | |
1290 | * Synchronise with the boot thread. | |
1291 | */ | |
1292 | - spin_lock(&boot_lock); | |
1293 | - spin_unlock(&boot_lock); | |
1294 | + raw_spin_lock(&boot_lock); | |
1295 | + raw_spin_unlock(&boot_lock); | |
1296 | } | |
1297 | ||
1298 | static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1299 | @@ -60,7 +60,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1300 | * set synchronisation state between this boot processor | |
1301 | * and the secondary one | |
1302 | */ | |
1303 | - spin_lock(&boot_lock); | |
1304 | + raw_spin_lock(&boot_lock); | |
1305 | ||
1306 | /* | |
1307 | * The secondary processor is waiting to be released from | |
1308 | @@ -91,7 +91,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1309 | * now the secondary core is starting up let it run its | |
1310 | * calibrations, then wait for it to finish | |
1311 | */ | |
1312 | - spin_unlock(&boot_lock); | |
1313 | + raw_spin_unlock(&boot_lock); | |
1314 | ||
1315 | return pen_release != -1 ? -ENOSYS : 0; | |
1316 | } | |
1317 | diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c | |
1318 | index daafcf121ce0..b8aa1e9ee8ee 100644 | |
1319 | --- a/arch/arm/mm/fault.c | |
1320 | +++ b/arch/arm/mm/fault.c | |
1321 | @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
1322 | if (addr < TASK_SIZE) | |
1323 | return do_page_fault(addr, fsr, regs); | |
1324 | ||
1325 | + if (interrupts_enabled(regs)) | |
1326 | + local_irq_enable(); | |
1327 | + | |
1328 | if (user_mode(regs)) | |
1329 | goto bad_area; | |
1330 | ||
1331 | @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, | |
1332 | static int | |
1333 | do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) | |
1334 | { | |
1335 | + if (interrupts_enabled(regs)) | |
1336 | + local_irq_enable(); | |
1337 | + | |
1338 | do_bad_area(addr, fsr, regs); | |
1339 | return 0; | |
1340 | } | |
1341 | diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c | |
1342 | index d02f8187b1cc..542692dbd40a 100644 | |
1343 | --- a/arch/arm/mm/highmem.c | |
1344 | +++ b/arch/arm/mm/highmem.c | |
1345 | @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) | |
1346 | return *ptep; | |
1347 | } | |
1348 | ||
1349 | +static unsigned int fixmap_idx(int type) | |
1350 | +{ | |
1351 | + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1352 | +} | |
1353 | + | |
1354 | void *kmap(struct page *page) | |
1355 | { | |
1356 | might_sleep(); | |
1357 | @@ -54,12 +59,13 @@ EXPORT_SYMBOL(kunmap); | |
1358 | ||
1359 | void *kmap_atomic(struct page *page) | |
1360 | { | |
1361 | + pte_t pte = mk_pte(page, kmap_prot); | |
1362 | unsigned int idx; | |
1363 | unsigned long vaddr; | |
1364 | void *kmap; | |
1365 | int type; | |
1366 | ||
1367 | - preempt_disable(); | |
1368 | + preempt_disable_nort(); | |
1369 | pagefault_disable(); | |
1370 | if (!PageHighMem(page)) | |
1371 | return page_address(page); | |
1372 | @@ -79,7 +85,7 @@ void *kmap_atomic(struct page *page) | |
1373 | ||
1374 | type = kmap_atomic_idx_push(); | |
1375 | ||
1376 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1377 | + idx = fixmap_idx(type); | |
1378 | vaddr = __fix_to_virt(idx); | |
1379 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1380 | /* | |
1381 | @@ -93,7 +99,10 @@ void *kmap_atomic(struct page *page) | |
1382 | * in place, so the contained TLB flush ensures the TLB is updated | |
1383 | * with the new mapping. | |
1384 | */ | |
1385 | - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); | |
1386 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1387 | + current->kmap_pte[type] = pte; | |
1388 | +#endif | |
1389 | + set_fixmap_pte(idx, pte); | |
1390 | ||
1391 | return (void *)vaddr; | |
1392 | } | |
1393 | @@ -106,44 +115,75 @@ void __kunmap_atomic(void *kvaddr) | |
1394 | ||
1395 | if (kvaddr >= (void *)FIXADDR_START) { | |
1396 | type = kmap_atomic_idx(); | |
1397 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1398 | + idx = fixmap_idx(type); | |
1399 | ||
1400 | if (cache_is_vivt()) | |
1401 | __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); | |
1402 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1403 | + current->kmap_pte[type] = __pte(0); | |
1404 | +#endif | |
1405 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1406 | BUG_ON(vaddr != __fix_to_virt(idx)); | |
1407 | - set_fixmap_pte(idx, __pte(0)); | |
1408 | #else | |
1409 | (void) idx; /* to kill a warning */ | |
1410 | #endif | |
1411 | + set_fixmap_pte(idx, __pte(0)); | |
1412 | kmap_atomic_idx_pop(); | |
1413 | } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { | |
1414 | /* this address was obtained through kmap_high_get() */ | |
1415 | kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); | |
1416 | } | |
1417 | pagefault_enable(); | |
1418 | - preempt_enable(); | |
1419 | + preempt_enable_nort(); | |
1420 | } | |
1421 | EXPORT_SYMBOL(__kunmap_atomic); | |
1422 | ||
1423 | void *kmap_atomic_pfn(unsigned long pfn) | |
1424 | { | |
1425 | + pte_t pte = pfn_pte(pfn, kmap_prot); | |
1426 | unsigned long vaddr; | |
1427 | int idx, type; | |
1428 | struct page *page = pfn_to_page(pfn); | |
1429 | ||
1430 | - preempt_disable(); | |
1431 | + preempt_disable_nort(); | |
1432 | pagefault_disable(); | |
1433 | if (!PageHighMem(page)) | |
1434 | return page_address(page); | |
1435 | ||
1436 | type = kmap_atomic_idx_push(); | |
1437 | - idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); | |
1438 | + idx = fixmap_idx(type); | |
1439 | vaddr = __fix_to_virt(idx); | |
1440 | #ifdef CONFIG_DEBUG_HIGHMEM | |
1441 | BUG_ON(!pte_none(get_fixmap_pte(vaddr))); | |
1442 | #endif | |
1443 | - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); | |
1444 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
1445 | + current->kmap_pte[type] = pte; | |
1446 | +#endif | |
1447 | + set_fixmap_pte(idx, pte); | |
1448 | ||
1449 | return (void *)vaddr; | |
1450 | } | |
1451 | +#if defined CONFIG_PREEMPT_RT_FULL | |
1452 | +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
1453 | +{ | |
1454 | + int i; | |
1455 | + | |
1456 | + /* | |
1457 | + * Clear @prev's kmap_atomic mappings | |
1458 | + */ | |
1459 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
1460 | + int idx = fixmap_idx(i); | |
1461 | + | |
1462 | + set_fixmap_pte(idx, __pte(0)); | |
1463 | + } | |
1464 | + /* | |
1465 | + * Restore @next_p's kmap_atomic mappings | |
1466 | + */ | |
1467 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
1468 | + int idx = fixmap_idx(i); | |
1469 | + | |
1470 | + if (!pte_none(next_p->kmap_pte[i])) | |
1471 | + set_fixmap_pte(idx, next_p->kmap_pte[i]); | |
1472 | + } | |
1473 | +} | |
1474 | +#endif | |
1475 | diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c | |
1476 | index 53feb90c840c..b4a8d54fc3f3 100644 | |
1477 | --- a/arch/arm/plat-versatile/platsmp.c | |
1478 | +++ b/arch/arm/plat-versatile/platsmp.c | |
1479 | @@ -30,7 +30,7 @@ static void write_pen_release(int val) | |
1480 | sync_cache_w(&pen_release); | |
1481 | } | |
1482 | ||
1483 | -static DEFINE_SPINLOCK(boot_lock); | |
1484 | +static DEFINE_RAW_SPINLOCK(boot_lock); | |
1485 | ||
1486 | void versatile_secondary_init(unsigned int cpu) | |
1487 | { | |
1488 | @@ -43,8 +43,8 @@ void versatile_secondary_init(unsigned int cpu) | |
1489 | /* | |
1490 | * Synchronise with the boot thread. | |
1491 | */ | |
1492 | - spin_lock(&boot_lock); | |
1493 | - spin_unlock(&boot_lock); | |
1494 | + raw_spin_lock(&boot_lock); | |
1495 | + raw_spin_unlock(&boot_lock); | |
1496 | } | |
1497 | ||
1498 | int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1499 | @@ -55,7 +55,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1500 | * Set synchronisation state between this boot processor | |
1501 | * and the secondary one | |
1502 | */ | |
1503 | - spin_lock(&boot_lock); | |
1504 | + raw_spin_lock(&boot_lock); | |
1505 | ||
1506 | /* | |
1507 | * This is really belt and braces; we hold unintended secondary | |
1508 | @@ -85,7 +85,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) | |
1509 | * now the secondary core is starting up let it run its | |
1510 | * calibrations, then wait for it to finish | |
1511 | */ | |
1512 | - spin_unlock(&boot_lock); | |
1513 | + raw_spin_unlock(&boot_lock); | |
1514 | ||
1515 | return pen_release != -1 ? -ENOSYS : 0; | |
1516 | } | |
1517 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig | |
1518 | index 14cdc6dea493..9196cf82f7be 100644 | |
1519 | --- a/arch/arm64/Kconfig | |
1520 | +++ b/arch/arm64/Kconfig | |
1521 | @@ -76,6 +76,7 @@ config ARM64 | |
1522 | select HAVE_PERF_REGS | |
1523 | select HAVE_PERF_USER_STACK_DUMP | |
1524 | select HAVE_RCU_TABLE_FREE | |
1525 | + select HAVE_PREEMPT_LAZY | |
1526 | select HAVE_SYSCALL_TRACEPOINTS | |
1527 | select IOMMU_DMA if IOMMU_SUPPORT | |
1528 | select IRQ_DOMAIN | |
1529 | @@ -582,7 +583,7 @@ config XEN_DOM0 | |
1530 | ||
1531 | config XEN | |
1532 | bool "Xen guest support on ARM64" | |
1533 | - depends on ARM64 && OF | |
1534 | + depends on ARM64 && OF && !PREEMPT_RT_FULL | |
1535 | select SWIOTLB_XEN | |
1536 | help | |
1537 | Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. | |
1538 | diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h | |
1539 | index 90c7ff233735..5f4e89fbc290 100644 | |
1540 | --- a/arch/arm64/include/asm/thread_info.h | |
1541 | +++ b/arch/arm64/include/asm/thread_info.h | |
1542 | @@ -49,6 +49,7 @@ struct thread_info { | |
1543 | mm_segment_t addr_limit; /* address limit */ | |
1544 | struct task_struct *task; /* main task structure */ | |
1545 | int preempt_count; /* 0 => preemptable, <0 => bug */ | |
1546 | + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ | |
1547 | int cpu; /* cpu */ | |
1548 | }; | |
1549 | ||
1550 | @@ -103,6 +104,7 @@ static inline struct thread_info *current_thread_info(void) | |
1551 | #define TIF_NEED_RESCHED 1 | |
1552 | #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ | |
1553 | #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ | |
1554 | +#define TIF_NEED_RESCHED_LAZY 4 | |
1555 | #define TIF_NOHZ 7 | |
1556 | #define TIF_SYSCALL_TRACE 8 | |
1557 | #define TIF_SYSCALL_AUDIT 9 | |
1558 | @@ -118,6 +120,7 @@ static inline struct thread_info *current_thread_info(void) | |
1559 | #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) | |
1560 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | |
1561 | #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) | |
1562 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
1563 | #define _TIF_NOHZ (1 << TIF_NOHZ) | |
1564 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | |
1565 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
1566 | @@ -126,7 +129,8 @@ static inline struct thread_info *current_thread_info(void) | |
1567 | #define _TIF_32BIT (1 << TIF_32BIT) | |
1568 | ||
1569 | #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ | |
1570 | - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) | |
1571 | + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ | |
1572 | + _TIF_NEED_RESCHED_LAZY) | |
1573 | ||
1574 | #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1575 | _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ | |
1576 | diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c | |
1577 | index 087cf9a65359..d74475928399 100644 | |
1578 | --- a/arch/arm64/kernel/asm-offsets.c | |
1579 | +++ b/arch/arm64/kernel/asm-offsets.c | |
1580 | @@ -35,6 +35,7 @@ int main(void) | |
1581 | BLANK(); | |
1582 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1583 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1584 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1585 | DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); | |
1586 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1587 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1588 | diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S | |
1589 | index 5a3753d09e20..05d73c4c03f6 100644 | |
1590 | --- a/arch/arm64/kernel/entry.S | |
1591 | +++ b/arch/arm64/kernel/entry.S | |
1592 | @@ -376,11 +376,16 @@ el1_irq: | |
1593 | #ifdef CONFIG_PREEMPT | |
1594 | get_thread_info tsk | |
1595 | ldr w24, [tsk, #TI_PREEMPT] // get preempt count | |
1596 | - cbnz w24, 1f // preempt count != 0 | |
1597 | + cbnz w24, 2f // preempt count != 0 | |
1598 | ldr x0, [tsk, #TI_FLAGS] // get flags | |
1599 | - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1600 | - bl el1_preempt | |
1601 | + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? | |
1602 | + | |
1603 | + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count | |
1604 | + cbnz w24, 2f // preempt lazy count != 0 | |
1605 | + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? | |
1606 | 1: | |
1607 | + bl el1_preempt | |
1608 | +2: | |
1609 | #endif | |
1610 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1611 | bl trace_hardirqs_on | |
1612 | @@ -394,6 +399,7 @@ el1_preempt: | |
1613 | 1: bl preempt_schedule_irq // irq en/disable is done inside | |
1614 | ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS | |
1615 | tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? | |
1616 | + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? | |
1617 | ret x24 | |
1618 | #endif | |
1619 | ||
1620 | @@ -638,6 +644,7 @@ ret_fast_syscall_trace: | |
1621 | */ | |
1622 | work_pending: | |
1623 | tbnz x1, #TIF_NEED_RESCHED, work_resched | |
1624 | + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched | |
1625 | /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ | |
1626 | ldr x2, [sp, #S_PSTATE] | |
1627 | mov x0, sp // 'regs' | |
1628 | diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig | |
1629 | index db459612de44..bd8be6a0e745 100644 | |
1630 | --- a/arch/mips/Kconfig | |
1631 | +++ b/arch/mips/Kconfig | |
1632 | @@ -2410,7 +2410,7 @@ config CPU_R4400_WORKAROUNDS | |
1633 | # | |
1634 | config HIGHMEM | |
1635 | bool "High Memory Support" | |
1636 | - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA | |
1637 | + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL | |
1638 | ||
1639 | config CPU_SUPPORTS_HIGHMEM | |
1640 | bool | |
1641 | diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c | |
1642 | index e86b7499921a..b2a2f678c5dc 100644 | |
1643 | --- a/arch/mips/kvm/mips.c | |
1644 | +++ b/arch/mips/kvm/mips.c | |
1645 | @@ -454,8 +454,8 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |
1646 | ||
1647 | dvcpu->arch.wait = 0; | |
1648 | ||
1649 | - if (waitqueue_active(&dvcpu->wq)) | |
1650 | - wake_up_interruptible(&dvcpu->wq); | |
1651 | + if (swait_active(&dvcpu->wq)) | |
1652 | + swake_up(&dvcpu->wq); | |
1653 | ||
1654 | return 0; | |
1655 | } | |
1656 | @@ -1183,8 +1183,8 @@ static void kvm_mips_comparecount_func(unsigned long data) | |
1657 | kvm_mips_callbacks->queue_timer_int(vcpu); | |
1658 | ||
1659 | vcpu->arch.wait = 0; | |
1660 | - if (waitqueue_active(&vcpu->wq)) | |
1661 | - wake_up_interruptible(&vcpu->wq); | |
1662 | + if (swait_active(&vcpu->wq)) | |
1663 | + swake_up(&vcpu->wq); | |
1664 | } | |
1665 | ||
1666 | /* low level hrtimer wake routine */ | |
1667 | diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig | |
1668 | index db49e0d796b1..1d2be228661c 100644 | |
1669 | --- a/arch/powerpc/Kconfig | |
1670 | +++ b/arch/powerpc/Kconfig | |
1671 | @@ -60,10 +60,11 @@ config LOCKDEP_SUPPORT | |
1672 | ||
1673 | config RWSEM_GENERIC_SPINLOCK | |
1674 | bool | |
1675 | + default y if PREEMPT_RT_FULL | |
1676 | ||
1677 | config RWSEM_XCHGADD_ALGORITHM | |
1678 | bool | |
1679 | - default y | |
1680 | + default y if !PREEMPT_RT_FULL | |
1681 | ||
1682 | config GENERIC_LOCKBREAK | |
1683 | bool | |
1684 | @@ -141,6 +142,7 @@ config PPC | |
1685 | select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST | |
1686 | select GENERIC_STRNCPY_FROM_USER | |
1687 | select GENERIC_STRNLEN_USER | |
1688 | + select HAVE_PREEMPT_LAZY | |
1689 | select HAVE_MOD_ARCH_SPECIFIC | |
1690 | select MODULES_USE_ELF_RELA | |
1691 | select CLONE_BACKWARDS | |
1692 | @@ -319,7 +321,7 @@ menu "Kernel options" | |
1693 | ||
1694 | config HIGHMEM | |
1695 | bool "High memory support" | |
1696 | - depends on PPC32 | |
1697 | + depends on PPC32 && !PREEMPT_RT_FULL | |
1698 | ||
1699 | source kernel/Kconfig.hz | |
1700 | source kernel/Kconfig.preempt | |
1701 | diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h | |
1702 | index cfa758c6b4f6..f8673ff84b31 100644 | |
1703 | --- a/arch/powerpc/include/asm/kvm_host.h | |
1704 | +++ b/arch/powerpc/include/asm/kvm_host.h | |
1705 | @@ -286,7 +286,7 @@ struct kvmppc_vcore { | |
1706 | struct list_head runnable_threads; | |
1707 | struct list_head preempt_list; | |
1708 | spinlock_t lock; | |
1709 | - wait_queue_head_t wq; | |
1710 | + struct swait_queue_head wq; | |
1711 | spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ | |
1712 | u64 stolen_tb; | |
1713 | u64 preempt_tb; | |
1714 | @@ -626,7 +626,7 @@ struct kvm_vcpu_arch { | |
1715 | u8 prodded; | |
1716 | u32 last_inst; | |
1717 | ||
1718 | - wait_queue_head_t *wqp; | |
1719 | + struct swait_queue_head *wqp; | |
1720 | struct kvmppc_vcore *vcore; | |
1721 | int ret; | |
1722 | int trap; | |
1723 | diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h | |
1724 | index 7efee4a3240b..40e6fa1b85b2 100644 | |
1725 | --- a/arch/powerpc/include/asm/thread_info.h | |
1726 | +++ b/arch/powerpc/include/asm/thread_info.h | |
1727 | @@ -42,6 +42,8 @@ struct thread_info { | |
1728 | int cpu; /* cpu we're on */ | |
1729 | int preempt_count; /* 0 => preemptable, | |
1730 | <0 => BUG */ | |
1731 | + int preempt_lazy_count; /* 0 => preemptable, | |
1732 | + <0 => BUG */ | |
1733 | unsigned long local_flags; /* private flags for thread */ | |
1734 | ||
1735 | /* low level flags - has atomic operations done on it */ | |
1736 | @@ -82,8 +84,7 @@ static inline struct thread_info *current_thread_info(void) | |
1737 | #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ | |
1738 | #define TIF_SIGPENDING 1 /* signal pending */ | |
1739 | #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ | |
1740 | -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling | |
1741 | - TIF_NEED_RESCHED */ | |
1742 | +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ | |
1743 | #define TIF_32BIT 4 /* 32 bit binary */ | |
1744 | #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ | |
1745 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
1746 | @@ -101,6 +102,8 @@ static inline struct thread_info *current_thread_info(void) | |
1747 | #if defined(CONFIG_PPC64) | |
1748 | #define TIF_ELF2ABI 18 /* function descriptors must die! */ | |
1749 | #endif | |
1750 | +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling | |
1751 | + TIF_NEED_RESCHED */ | |
1752 | ||
1753 | /* as above, but as bit values */ | |
1754 | #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) | |
1755 | @@ -119,14 +122,16 @@ static inline struct thread_info *current_thread_info(void) | |
1756 | #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) | |
1757 | #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) | |
1758 | #define _TIF_NOHZ (1<<TIF_NOHZ) | |
1759 | +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) | |
1760 | #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ | |
1761 | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ | |
1762 | _TIF_NOHZ) | |
1763 | ||
1764 | #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ | |
1765 | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
1766 | - _TIF_RESTORE_TM) | |
1767 | + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) | |
1768 | #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) | |
1769 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
1770 | ||
1771 | /* Bits in local_flags */ | |
1772 | /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ | |
1773 | diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c | |
1774 | index 221d584d089f..d6d0c59ef8ae 100644 | |
1775 | --- a/arch/powerpc/kernel/asm-offsets.c | |
1776 | +++ b/arch/powerpc/kernel/asm-offsets.c | |
1777 | @@ -160,6 +160,7 @@ int main(void) | |
1778 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | |
1779 | DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); | |
1780 | DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); | |
1781 | + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); | |
1782 | DEFINE(TI_TASK, offsetof(struct thread_info, task)); | |
1783 | DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); | |
1784 | ||
1785 | diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S | |
1786 | index 2405631e91a2..c21b4b42eaa0 100644 | |
1787 | --- a/arch/powerpc/kernel/entry_32.S | |
1788 | +++ b/arch/powerpc/kernel/entry_32.S | |
1789 | @@ -818,7 +818,14 @@ resume_kernel: | |
1790 | cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1791 | bne restore | |
1792 | andi. r8,r8,_TIF_NEED_RESCHED | |
1793 | + bne+ 1f | |
1794 | + lwz r0,TI_PREEMPT_LAZY(r9) | |
1795 | + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ | |
1796 | + bne restore | |
1797 | + lwz r0,TI_FLAGS(r9) | |
1798 | + andi. r0,r0,_TIF_NEED_RESCHED_LAZY | |
1799 | beq+ restore | |
1800 | +1: | |
1801 | lwz r3,_MSR(r1) | |
1802 | andi. r0,r3,MSR_EE /* interrupts off? */ | |
1803 | beq restore /* don't schedule if so */ | |
1804 | @@ -829,11 +836,11 @@ resume_kernel: | |
1805 | */ | |
1806 | bl trace_hardirqs_off | |
1807 | #endif | |
1808 | -1: bl preempt_schedule_irq | |
1809 | +2: bl preempt_schedule_irq | |
1810 | CURRENT_THREAD_INFO(r9, r1) | |
1811 | lwz r3,TI_FLAGS(r9) | |
1812 | - andi. r0,r3,_TIF_NEED_RESCHED | |
1813 | - bne- 1b | |
1814 | + andi. r0,r3,_TIF_NEED_RESCHED_MASK | |
1815 | + bne- 2b | |
1816 | #ifdef CONFIG_TRACE_IRQFLAGS | |
1817 | /* And now, to properly rebalance the above, we tell lockdep they | |
1818 | * are being turned back on, which will happen when we return | |
1819 | @@ -1154,7 +1161,7 @@ global_dbcr0: | |
1820 | #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ | |
1821 | ||
1822 | do_work: /* r10 contains MSR_KERNEL here */ | |
1823 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1824 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1825 | beq do_user_signal | |
1826 | ||
1827 | do_resched: /* r10 contains MSR_KERNEL here */ | |
1828 | @@ -1175,7 +1182,7 @@ recheck: | |
1829 | MTMSRD(r10) /* disable interrupts */ | |
1830 | CURRENT_THREAD_INFO(r9, r1) | |
1831 | lwz r9,TI_FLAGS(r9) | |
1832 | - andi. r0,r9,_TIF_NEED_RESCHED | |
1833 | + andi. r0,r9,_TIF_NEED_RESCHED_MASK | |
1834 | bne- do_resched | |
1835 | andi. r0,r9,_TIF_USER_WORK_MASK | |
1836 | beq restore_user | |
1837 | diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S | |
1838 | index edba294620db..1aae3fdb0c2a 100644 | |
1839 | --- a/arch/powerpc/kernel/entry_64.S | |
1840 | +++ b/arch/powerpc/kernel/entry_64.S | |
1841 | @@ -683,7 +683,7 @@ _GLOBAL(ret_from_except_lite) | |
1842 | #else | |
1843 | beq restore | |
1844 | #endif | |
1845 | -1: andi. r0,r4,_TIF_NEED_RESCHED | |
1846 | +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1847 | beq 2f | |
1848 | bl restore_interrupts | |
1849 | SCHEDULE_USER | |
1850 | @@ -745,10 +745,18 @@ resume_kernel: | |
1851 | ||
1852 | #ifdef CONFIG_PREEMPT | |
1853 | /* Check if we need to preempt */ | |
1854 | + lwz r8,TI_PREEMPT(r9) | |
1855 | + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ | |
1856 | + bne restore | |
1857 | andi. r0,r4,_TIF_NEED_RESCHED | |
1858 | + bne+ check_count | |
1859 | + | |
1860 | + andi. r0,r4,_TIF_NEED_RESCHED_LAZY | |
1861 | beq+ restore | |
1862 | + lwz r8,TI_PREEMPT_LAZY(r9) | |
1863 | + | |
1864 | /* Check that preempt_count() == 0 and interrupts are enabled */ | |
1865 | - lwz r8,TI_PREEMPT(r9) | |
1866 | +check_count: | |
1867 | cmpwi cr1,r8,0 | |
1868 | ld r0,SOFTE(r1) | |
1869 | cmpdi r0,0 | |
1870 | @@ -765,7 +773,7 @@ resume_kernel: | |
1871 | /* Re-test flags and eventually loop */ | |
1872 | CURRENT_THREAD_INFO(r9, r1) | |
1873 | ld r4,TI_FLAGS(r9) | |
1874 | - andi. r0,r4,_TIF_NEED_RESCHED | |
1875 | + andi. r0,r4,_TIF_NEED_RESCHED_MASK | |
1876 | bne 1b | |
1877 | ||
1878 | /* | |
1879 | diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c | |
1880 | index 290559df1e8b..070afa6da35d 100644 | |
1881 | --- a/arch/powerpc/kernel/irq.c | |
1882 | +++ b/arch/powerpc/kernel/irq.c | |
1883 | @@ -614,6 +614,7 @@ void irq_ctx_init(void) | |
1884 | } | |
1885 | } | |
1886 | ||
1887 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1888 | void do_softirq_own_stack(void) | |
1889 | { | |
1890 | struct thread_info *curtp, *irqtp; | |
1891 | @@ -631,6 +632,7 @@ void do_softirq_own_stack(void) | |
1892 | if (irqtp->flags) | |
1893 | set_bits(irqtp->flags, &curtp->flags); | |
1894 | } | |
1895 | +#endif | |
1896 | ||
1897 | irq_hw_number_t virq_to_hw(unsigned int virq) | |
1898 | { | |
1899 | diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S | |
1900 | index ed3ab509faca..8b261416c070 100644 | |
1901 | --- a/arch/powerpc/kernel/misc_32.S | |
1902 | +++ b/arch/powerpc/kernel/misc_32.S | |
1903 | @@ -40,6 +40,7 @@ | |
1904 | * We store the saved ksp_limit in the unused part | |
1905 | * of the STACK_FRAME_OVERHEAD | |
1906 | */ | |
1907 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1908 | _GLOBAL(call_do_softirq) | |
1909 | mflr r0 | |
1910 | stw r0,4(r1) | |
1911 | @@ -56,6 +57,7 @@ _GLOBAL(call_do_softirq) | |
1912 | stw r10,THREAD+KSP_LIMIT(r2) | |
1913 | mtlr r0 | |
1914 | blr | |
1915 | +#endif | |
1916 | ||
1917 | /* | |
1918 | * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); | |
1919 | diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S | |
1920 | index db475d41b57a..96b7ef80e05d 100644 | |
1921 | --- a/arch/powerpc/kernel/misc_64.S | |
1922 | +++ b/arch/powerpc/kernel/misc_64.S | |
1923 | @@ -30,6 +30,7 @@ | |
1924 | ||
1925 | .text | |
1926 | ||
1927 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
1928 | _GLOBAL(call_do_softirq) | |
1929 | mflr r0 | |
1930 | std r0,16(r1) | |
1931 | @@ -40,6 +41,7 @@ _GLOBAL(call_do_softirq) | |
1932 | ld r0,16(r1) | |
1933 | mtlr r0 | |
1934 | blr | |
1935 | +#endif | |
1936 | ||
1937 | _GLOBAL(call_do_irq) | |
1938 | mflr r0 | |
1939 | diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig | |
1940 | index c2024ac9d4e8..2303788da7e1 100644 | |
1941 | --- a/arch/powerpc/kvm/Kconfig | |
1942 | +++ b/arch/powerpc/kvm/Kconfig | |
1943 | @@ -172,6 +172,7 @@ config KVM_E500MC | |
1944 | config KVM_MPIC | |
1945 | bool "KVM in-kernel MPIC emulation" | |
1946 | depends on KVM && E500 | |
1947 | + depends on !PREEMPT_RT_FULL | |
1948 | select HAVE_KVM_IRQCHIP | |
1949 | select HAVE_KVM_IRQFD | |
1950 | select HAVE_KVM_IRQ_ROUTING | |
1951 | diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c | |
1952 | index a7352b59e6f9..df34a6432873 100644 | |
1953 | --- a/arch/powerpc/kvm/book3s_hv.c | |
1954 | +++ b/arch/powerpc/kvm/book3s_hv.c | |
1955 | @@ -114,11 +114,11 @@ static bool kvmppc_ipi_thread(int cpu) | |
1956 | static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) | |
1957 | { | |
1958 | int cpu; | |
1959 | - wait_queue_head_t *wqp; | |
1960 | + struct swait_queue_head *wqp; | |
1961 | ||
1962 | wqp = kvm_arch_vcpu_wq(vcpu); | |
1963 | - if (waitqueue_active(wqp)) { | |
1964 | - wake_up_interruptible(wqp); | |
1965 | + if (swait_active(wqp)) { | |
1966 | + swake_up(wqp); | |
1967 | ++vcpu->stat.halt_wakeup; | |
1968 | } | |
1969 | ||
1970 | @@ -707,8 +707,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) | |
1971 | tvcpu->arch.prodded = 1; | |
1972 | smp_mb(); | |
1973 | if (vcpu->arch.ceded) { | |
1974 | - if (waitqueue_active(&vcpu->wq)) { | |
1975 | - wake_up_interruptible(&vcpu->wq); | |
1976 | + if (swait_active(&vcpu->wq)) { | |
1977 | + swake_up(&vcpu->wq); | |
1978 | vcpu->stat.halt_wakeup++; | |
1979 | } | |
1980 | } | |
1981 | @@ -1447,7 +1447,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) | |
1982 | INIT_LIST_HEAD(&vcore->runnable_threads); | |
1983 | spin_lock_init(&vcore->lock); | |
1984 | spin_lock_init(&vcore->stoltb_lock); | |
1985 | - init_waitqueue_head(&vcore->wq); | |
1986 | + init_swait_queue_head(&vcore->wq); | |
1987 | vcore->preempt_tb = TB_NIL; | |
1988 | vcore->lpcr = kvm->arch.lpcr; | |
1989 | vcore->first_vcpuid = core * threads_per_subcore; | |
1990 | @@ -2519,10 +2519,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) | |
1991 | { | |
1992 | struct kvm_vcpu *vcpu; | |
1993 | int do_sleep = 1; | |
1994 | + DECLARE_SWAITQUEUE(wait); | |
1995 | ||
1996 | - DEFINE_WAIT(wait); | |
1997 | - | |
1998 | - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); | |
1999 | + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE); | |
2000 | ||
2001 | /* | |
2002 | * Check one last time for pending exceptions and ceded state after | |
2003 | @@ -2536,7 +2535,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) | |
2004 | } | |
2005 | ||
2006 | if (!do_sleep) { | |
2007 | - finish_wait(&vc->wq, &wait); | |
2008 | + finish_swait(&vc->wq, &wait); | |
2009 | return; | |
2010 | } | |
2011 | ||
2012 | @@ -2544,7 +2543,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) | |
2013 | trace_kvmppc_vcore_blocked(vc, 0); | |
2014 | spin_unlock(&vc->lock); | |
2015 | schedule(); | |
2016 | - finish_wait(&vc->wq, &wait); | |
2017 | + finish_swait(&vc->wq, &wait); | |
2018 | spin_lock(&vc->lock); | |
2019 | vc->vcore_state = VCORE_INACTIVE; | |
2020 | trace_kvmppc_vcore_blocked(vc, 1); | |
2021 | @@ -2600,7 +2599,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |
2022 | kvmppc_start_thread(vcpu, vc); | |
2023 | trace_kvm_guest_enter(vcpu); | |
2024 | } else if (vc->vcore_state == VCORE_SLEEPING) { | |
2025 | - wake_up(&vc->wq); | |
2026 | + swake_up(&vc->wq); | |
2027 | } | |
2028 | ||
2029 | } | |
2030 | diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c | |
2031 | index 3f175e8aedb4..c4c02f91904c 100644 | |
2032 | --- a/arch/powerpc/platforms/ps3/device-init.c | |
2033 | +++ b/arch/powerpc/platforms/ps3/device-init.c | |
2034 | @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, | |
2035 | } | |
2036 | pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); | |
2037 | ||
2038 | - res = wait_event_interruptible(dev->done.wait, | |
2039 | + res = swait_event_interruptible(dev->done.wait, | |
2040 | dev->done.done || kthread_should_stop()); | |
2041 | if (kthread_should_stop()) | |
2042 | res = -EINTR; | |
2043 | diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h | |
2044 | index e9a983f40a24..bbdc539fb3c6 100644 | |
2045 | --- a/arch/s390/include/asm/kvm_host.h | |
2046 | +++ b/arch/s390/include/asm/kvm_host.h | |
2047 | @@ -427,7 +427,7 @@ struct kvm_s390_irq_payload { | |
2048 | struct kvm_s390_local_interrupt { | |
2049 | spinlock_t lock; | |
2050 | struct kvm_s390_float_interrupt *float_int; | |
2051 | - wait_queue_head_t *wq; | |
2052 | + struct swait_queue_head *wq; | |
2053 | atomic_t *cpuflags; | |
2054 | DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); | |
2055 | struct kvm_s390_irq_payload irq; | |
2056 | diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c | |
2057 | index 6a75352f453c..cc862c486002 100644 | |
2058 | --- a/arch/s390/kvm/interrupt.c | |
2059 | +++ b/arch/s390/kvm/interrupt.c | |
2060 | @@ -868,13 +868,13 @@ no_timer: | |
2061 | ||
2062 | void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) | |
2063 | { | |
2064 | - if (waitqueue_active(&vcpu->wq)) { | |
2065 | + if (swait_active(&vcpu->wq)) { | |
2066 | /* | |
2067 | * The vcpu gave up the cpu voluntarily, mark it as a good | |
2068 | * yield-candidate. | |
2069 | */ | |
2070 | vcpu->preempted = true; | |
2071 | - wake_up_interruptible(&vcpu->wq); | |
2072 | + swake_up(&vcpu->wq); | |
2073 | vcpu->stat.halt_wakeup++; | |
2074 | } | |
2075 | } | |
2076 | diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c | |
2077 | index 6c0378c0b8b5..abd58b4dff97 100644 | |
2078 | --- a/arch/sh/kernel/irq.c | |
2079 | +++ b/arch/sh/kernel/irq.c | |
2080 | @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) | |
2081 | hardirq_ctx[cpu] = NULL; | |
2082 | } | |
2083 | ||
2084 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2085 | void do_softirq_own_stack(void) | |
2086 | { | |
2087 | struct thread_info *curctx; | |
2088 | @@ -174,6 +175,7 @@ void do_softirq_own_stack(void) | |
2089 | "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" | |
2090 | ); | |
2091 | } | |
2092 | +#endif | |
2093 | #else | |
2094 | static inline void handle_one_irq(unsigned int irq) | |
2095 | { | |
2096 | diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig | |
2097 | index 56442d2d7bbc..8c9598f534c9 100644 | |
2098 | --- a/arch/sparc/Kconfig | |
2099 | +++ b/arch/sparc/Kconfig | |
2100 | @@ -189,12 +189,10 @@ config NR_CPUS | |
2101 | source kernel/Kconfig.hz | |
2102 | ||
2103 | config RWSEM_GENERIC_SPINLOCK | |
2104 | - bool | |
2105 | - default y if SPARC32 | |
2106 | + def_bool PREEMPT_RT_FULL | |
2107 | ||
2108 | config RWSEM_XCHGADD_ALGORITHM | |
2109 | - bool | |
2110 | - default y if SPARC64 | |
2111 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
2112 | ||
2113 | config GENERIC_HWEIGHT | |
2114 | bool | |
2115 | diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c | |
2116 | index e22416ce56ea..d359de71153a 100644 | |
2117 | --- a/arch/sparc/kernel/irq_64.c | |
2118 | +++ b/arch/sparc/kernel/irq_64.c | |
2119 | @@ -854,6 +854,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) | |
2120 | set_irq_regs(old_regs); | |
2121 | } | |
2122 | ||
2123 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2124 | void do_softirq_own_stack(void) | |
2125 | { | |
2126 | void *orig_sp, *sp = softirq_stack[smp_processor_id()]; | |
2127 | @@ -868,6 +869,7 @@ void do_softirq_own_stack(void) | |
2128 | __asm__ __volatile__("mov %0, %%sp" | |
2129 | : : "r" (orig_sp)); | |
2130 | } | |
2131 | +#endif | |
2132 | ||
2133 | #ifdef CONFIG_HOTPLUG_CPU | |
2134 | void fixup_irqs(void) | |
2135 | diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig | |
2136 | index 436639a31624..6ee1dd0deadc 100644 | |
2137 | --- a/arch/x86/Kconfig | |
2138 | +++ b/arch/x86/Kconfig | |
2139 | @@ -17,6 +17,7 @@ config X86_64 | |
2140 | ### Arch settings | |
2141 | config X86 | |
2142 | def_bool y | |
2143 | + select HAVE_PREEMPT_LAZY | |
2144 | select ACPI_LEGACY_TABLES_LOOKUP if ACPI | |
2145 | select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI | |
2146 | select ANON_INODES | |
2147 | @@ -212,8 +213,11 @@ config ARCH_MAY_HAVE_PC_FDC | |
2148 | def_bool y | |
2149 | depends on ISA_DMA_API | |
2150 | ||
2151 | +config RWSEM_GENERIC_SPINLOCK | |
2152 | + def_bool PREEMPT_RT_FULL | |
2153 | + | |
2154 | config RWSEM_XCHGADD_ALGORITHM | |
2155 | - def_bool y | |
2156 | + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL | |
2157 | ||
2158 | config GENERIC_CALIBRATE_DELAY | |
2159 | def_bool y | |
2160 | @@ -848,7 +852,7 @@ config IOMMU_HELPER | |
2161 | config MAXSMP | |
2162 | bool "Enable Maximum number of SMP Processors and NUMA Nodes" | |
2163 | depends on X86_64 && SMP && DEBUG_KERNEL | |
2164 | - select CPUMASK_OFFSTACK | |
2165 | + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL | |
2166 | ---help--- | |
2167 | Enable maximum number of CPUS and NUMA Nodes for this architecture. | |
2168 | If unsure, say N. | |
2169 | diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c | |
2170 | index 3633ad6145c5..c6d5458ee7f9 100644 | |
2171 | --- a/arch/x86/crypto/aesni-intel_glue.c | |
2172 | +++ b/arch/x86/crypto/aesni-intel_glue.c | |
2173 | @@ -383,14 +383,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |
2174 | err = blkcipher_walk_virt(desc, &walk); | |
2175 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
2176 | ||
2177 | - kernel_fpu_begin(); | |
2178 | while ((nbytes = walk.nbytes)) { | |
2179 | + kernel_fpu_begin(); | |
2180 | aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
2181 | - nbytes & AES_BLOCK_MASK); | |
2182 | + nbytes & AES_BLOCK_MASK); | |
2183 | + kernel_fpu_end(); | |
2184 | nbytes &= AES_BLOCK_SIZE - 1; | |
2185 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2186 | } | |
2187 | - kernel_fpu_end(); | |
2188 | ||
2189 | return err; | |
2190 | } | |
2191 | @@ -407,14 +407,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |
2192 | err = blkcipher_walk_virt(desc, &walk); | |
2193 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
2194 | ||
2195 | - kernel_fpu_begin(); | |
2196 | while ((nbytes = walk.nbytes)) { | |
2197 | + kernel_fpu_begin(); | |
2198 | aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
2199 | nbytes & AES_BLOCK_MASK); | |
2200 | + kernel_fpu_end(); | |
2201 | nbytes &= AES_BLOCK_SIZE - 1; | |
2202 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2203 | } | |
2204 | - kernel_fpu_end(); | |
2205 | ||
2206 | return err; | |
2207 | } | |
2208 | @@ -431,14 +431,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |
2209 | err = blkcipher_walk_virt(desc, &walk); | |
2210 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
2211 | ||
2212 | - kernel_fpu_begin(); | |
2213 | while ((nbytes = walk.nbytes)) { | |
2214 | + kernel_fpu_begin(); | |
2215 | aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
2216 | nbytes & AES_BLOCK_MASK, walk.iv); | |
2217 | + kernel_fpu_end(); | |
2218 | nbytes &= AES_BLOCK_SIZE - 1; | |
2219 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2220 | } | |
2221 | - kernel_fpu_end(); | |
2222 | ||
2223 | return err; | |
2224 | } | |
2225 | @@ -455,14 +455,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |
2226 | err = blkcipher_walk_virt(desc, &walk); | |
2227 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
2228 | ||
2229 | - kernel_fpu_begin(); | |
2230 | while ((nbytes = walk.nbytes)) { | |
2231 | + kernel_fpu_begin(); | |
2232 | aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
2233 | nbytes & AES_BLOCK_MASK, walk.iv); | |
2234 | + kernel_fpu_end(); | |
2235 | nbytes &= AES_BLOCK_SIZE - 1; | |
2236 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2237 | } | |
2238 | - kernel_fpu_end(); | |
2239 | ||
2240 | return err; | |
2241 | } | |
2242 | @@ -514,18 +514,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, | |
2243 | err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); | |
2244 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
2245 | ||
2246 | - kernel_fpu_begin(); | |
2247 | while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { | |
2248 | + kernel_fpu_begin(); | |
2249 | aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, | |
2250 | nbytes & AES_BLOCK_MASK, walk.iv); | |
2251 | + kernel_fpu_end(); | |
2252 | nbytes &= AES_BLOCK_SIZE - 1; | |
2253 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2254 | } | |
2255 | if (walk.nbytes) { | |
2256 | + kernel_fpu_begin(); | |
2257 | ctr_crypt_final(ctx, &walk); | |
2258 | + kernel_fpu_end(); | |
2259 | err = blkcipher_walk_done(desc, &walk, 0); | |
2260 | } | |
2261 | - kernel_fpu_end(); | |
2262 | ||
2263 | return err; | |
2264 | } | |
2265 | diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c | |
2266 | index 8648158f3916..d7699130ee36 100644 | |
2267 | --- a/arch/x86/crypto/cast5_avx_glue.c | |
2268 | +++ b/arch/x86/crypto/cast5_avx_glue.c | |
2269 | @@ -59,7 +59,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) | |
2270 | static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
2271 | bool enc) | |
2272 | { | |
2273 | - bool fpu_enabled = false; | |
2274 | + bool fpu_enabled; | |
2275 | struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); | |
2276 | const unsigned int bsize = CAST5_BLOCK_SIZE; | |
2277 | unsigned int nbytes; | |
2278 | @@ -75,7 +75,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
2279 | u8 *wsrc = walk->src.virt.addr; | |
2280 | u8 *wdst = walk->dst.virt.addr; | |
2281 | ||
2282 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
2283 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
2284 | ||
2285 | /* Process multi-block batch */ | |
2286 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
2287 | @@ -103,10 +103,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, | |
2288 | } while (nbytes >= bsize); | |
2289 | ||
2290 | done: | |
2291 | + cast5_fpu_end(fpu_enabled); | |
2292 | err = blkcipher_walk_done(desc, walk, nbytes); | |
2293 | } | |
2294 | - | |
2295 | - cast5_fpu_end(fpu_enabled); | |
2296 | return err; | |
2297 | } | |
2298 | ||
2299 | @@ -227,7 +226,7 @@ done: | |
2300 | static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
2301 | struct scatterlist *src, unsigned int nbytes) | |
2302 | { | |
2303 | - bool fpu_enabled = false; | |
2304 | + bool fpu_enabled; | |
2305 | struct blkcipher_walk walk; | |
2306 | int err; | |
2307 | ||
2308 | @@ -236,12 +235,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
2309 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
2310 | ||
2311 | while ((nbytes = walk.nbytes)) { | |
2312 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
2313 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
2314 | nbytes = __cbc_decrypt(desc, &walk); | |
2315 | + cast5_fpu_end(fpu_enabled); | |
2316 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2317 | } | |
2318 | - | |
2319 | - cast5_fpu_end(fpu_enabled); | |
2320 | return err; | |
2321 | } | |
2322 | ||
2323 | @@ -311,7 +309,7 @@ done: | |
2324 | static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
2325 | struct scatterlist *src, unsigned int nbytes) | |
2326 | { | |
2327 | - bool fpu_enabled = false; | |
2328 | + bool fpu_enabled; | |
2329 | struct blkcipher_walk walk; | |
2330 | int err; | |
2331 | ||
2332 | @@ -320,13 +318,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, | |
2333 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | |
2334 | ||
2335 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
2336 | - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); | |
2337 | + fpu_enabled = cast5_fpu_begin(false, nbytes); | |
2338 | nbytes = __ctr_crypt(desc, &walk); | |
2339 | + cast5_fpu_end(fpu_enabled); | |
2340 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2341 | } | |
2342 | ||
2343 | - cast5_fpu_end(fpu_enabled); | |
2344 | - | |
2345 | if (walk.nbytes) { | |
2346 | ctr_crypt_final(desc, &walk); | |
2347 | err = blkcipher_walk_done(desc, &walk, 0); | |
2348 | diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c | |
2349 | index 6a85598931b5..3a506ce7ed93 100644 | |
2350 | --- a/arch/x86/crypto/glue_helper.c | |
2351 | +++ b/arch/x86/crypto/glue_helper.c | |
2352 | @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
2353 | void *ctx = crypto_blkcipher_ctx(desc->tfm); | |
2354 | const unsigned int bsize = 128 / 8; | |
2355 | unsigned int nbytes, i, func_bytes; | |
2356 | - bool fpu_enabled = false; | |
2357 | + bool fpu_enabled; | |
2358 | int err; | |
2359 | ||
2360 | err = blkcipher_walk_virt(desc, walk); | |
2361 | @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
2362 | u8 *wdst = walk->dst.virt.addr; | |
2363 | ||
2364 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2365 | - desc, fpu_enabled, nbytes); | |
2366 | + desc, false, nbytes); | |
2367 | ||
2368 | for (i = 0; i < gctx->num_funcs; i++) { | |
2369 | func_bytes = bsize * gctx->funcs[i].num_blocks; | |
2370 | @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, | |
2371 | } | |
2372 | ||
2373 | done: | |
2374 | + glue_fpu_end(fpu_enabled); | |
2375 | err = blkcipher_walk_done(desc, walk, nbytes); | |
2376 | } | |
2377 | ||
2378 | - glue_fpu_end(fpu_enabled); | |
2379 | return err; | |
2380 | } | |
2381 | ||
2382 | @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
2383 | struct scatterlist *src, unsigned int nbytes) | |
2384 | { | |
2385 | const unsigned int bsize = 128 / 8; | |
2386 | - bool fpu_enabled = false; | |
2387 | + bool fpu_enabled; | |
2388 | struct blkcipher_walk walk; | |
2389 | int err; | |
2390 | ||
2391 | @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, | |
2392 | ||
2393 | while ((nbytes = walk.nbytes)) { | |
2394 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2395 | - desc, fpu_enabled, nbytes); | |
2396 | + desc, false, nbytes); | |
2397 | nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); | |
2398 | + glue_fpu_end(fpu_enabled); | |
2399 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2400 | } | |
2401 | ||
2402 | - glue_fpu_end(fpu_enabled); | |
2403 | return err; | |
2404 | } | |
2405 | EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); | |
2406 | @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
2407 | struct scatterlist *src, unsigned int nbytes) | |
2408 | { | |
2409 | const unsigned int bsize = 128 / 8; | |
2410 | - bool fpu_enabled = false; | |
2411 | + bool fpu_enabled; | |
2412 | struct blkcipher_walk walk; | |
2413 | int err; | |
2414 | ||
2415 | @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, | |
2416 | ||
2417 | while ((nbytes = walk.nbytes) >= bsize) { | |
2418 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2419 | - desc, fpu_enabled, nbytes); | |
2420 | + desc, false, nbytes); | |
2421 | nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); | |
2422 | + glue_fpu_end(fpu_enabled); | |
2423 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2424 | } | |
2425 | ||
2426 | - glue_fpu_end(fpu_enabled); | |
2427 | - | |
2428 | if (walk.nbytes) { | |
2429 | glue_ctr_crypt_final_128bit( | |
2430 | gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); | |
2431 | @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
2432 | void *tweak_ctx, void *crypt_ctx) | |
2433 | { | |
2434 | const unsigned int bsize = 128 / 8; | |
2435 | - bool fpu_enabled = false; | |
2436 | + bool fpu_enabled; | |
2437 | struct blkcipher_walk walk; | |
2438 | int err; | |
2439 | ||
2440 | @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, | |
2441 | ||
2442 | /* set minimum length to bsize, for tweak_fn */ | |
2443 | fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2444 | - desc, fpu_enabled, | |
2445 | + desc, false, | |
2446 | nbytes < bsize ? bsize : nbytes); | |
2447 | - | |
2448 | /* calculate first value of T */ | |
2449 | tweak_fn(tweak_ctx, walk.iv, walk.iv); | |
2450 | + glue_fpu_end(fpu_enabled); | |
2451 | ||
2452 | while (nbytes) { | |
2453 | + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, | |
2454 | + desc, false, nbytes); | |
2455 | nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); | |
2456 | ||
2457 | + glue_fpu_end(fpu_enabled); | |
2458 | err = blkcipher_walk_done(desc, &walk, nbytes); | |
2459 | nbytes = walk.nbytes; | |
2460 | } | |
2461 | - | |
2462 | - glue_fpu_end(fpu_enabled); | |
2463 | - | |
2464 | return err; | |
2465 | } | |
2466 | EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); | |
2467 | diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c | |
2468 | index 1a4477cedc49..75a301b6a5b6 100644 | |
2469 | --- a/arch/x86/entry/common.c | |
2470 | +++ b/arch/x86/entry/common.c | |
2471 | @@ -220,7 +220,7 @@ long syscall_trace_enter(struct pt_regs *regs) | |
2472 | ||
2473 | #define EXIT_TO_USERMODE_LOOP_FLAGS \ | |
2474 | (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ | |
2475 | - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY) | |
2476 | + _TIF_NEED_RESCHED_MASK | _TIF_USER_RETURN_NOTIFY) | |
2477 | ||
2478 | static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
2479 | { | |
2480 | @@ -236,9 +236,16 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) | |
2481 | /* We have work to do. */ | |
2482 | local_irq_enable(); | |
2483 | ||
2484 | - if (cached_flags & _TIF_NEED_RESCHED) | |
2485 | + if (cached_flags & _TIF_NEED_RESCHED_MASK) | |
2486 | schedule(); | |
2487 | ||
2488 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
2489 | + if (unlikely(current->forced_info.si_signo)) { | |
2490 | + struct task_struct *t = current; | |
2491 | + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); | |
2492 | + t->forced_info.si_signo = 0; | |
2493 | + } | |
2494 | +#endif | |
2495 | if (cached_flags & _TIF_UPROBE) | |
2496 | uprobe_notify_resume(regs); | |
2497 | ||
2498 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | |
2499 | index f3b6d54e0042..2d722ee01fc2 100644 | |
2500 | --- a/arch/x86/entry/entry_32.S | |
2501 | +++ b/arch/x86/entry/entry_32.S | |
2502 | @@ -278,8 +278,24 @@ END(ret_from_exception) | |
2503 | ENTRY(resume_kernel) | |
2504 | DISABLE_INTERRUPTS(CLBR_ANY) | |
2505 | need_resched: | |
2506 | + # preempt count == 0 + NEED_RS set? | |
2507 | cmpl $0, PER_CPU_VAR(__preempt_count) | |
2508 | +#ifndef CONFIG_PREEMPT_LAZY | |
2509 | jnz restore_all | |
2510 | +#else | |
2511 | + jz test_int_off | |
2512 | + | |
2513 | + # atleast preempt count == 0 ? | |
2514 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2515 | + jne restore_all | |
2516 | + | |
2517 | + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? | |
2518 | + jnz restore_all | |
2519 | + | |
2520 | + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp) | |
2521 | + jz restore_all | |
2522 | +test_int_off: | |
2523 | +#endif | |
2524 | testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? | |
2525 | jz restore_all | |
2526 | call preempt_schedule_irq | |
2527 | diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S | |
2528 | index a55697d19824..316081a2ca85 100644 | |
2529 | --- a/arch/x86/entry/entry_64.S | |
2530 | +++ b/arch/x86/entry/entry_64.S | |
2531 | @@ -579,7 +579,23 @@ retint_kernel: | |
2532 | bt $9, EFLAGS(%rsp) /* were interrupts off? */ | |
2533 | jnc 1f | |
2534 | 0: cmpl $0, PER_CPU_VAR(__preempt_count) | |
2535 | +#ifndef CONFIG_PREEMPT_LAZY | |
2536 | jnz 1f | |
2537 | +#else | |
2538 | + jz do_preempt_schedule_irq | |
2539 | + | |
2540 | + # atleast preempt count == 0 ? | |
2541 | + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) | |
2542 | + jnz 1f | |
2543 | + | |
2544 | + GET_THREAD_INFO(%rcx) | |
2545 | + cmpl $0, TI_preempt_lazy_count(%rcx) | |
2546 | + jnz 1f | |
2547 | + | |
2548 | + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx) | |
2549 | + jnc 1f | |
2550 | +do_preempt_schedule_irq: | |
2551 | +#endif | |
2552 | call preempt_schedule_irq | |
2553 | jmp 0b | |
2554 | 1: | |
2555 | @@ -867,6 +883,7 @@ bad_gs: | |
2556 | jmp 2b | |
2557 | .previous | |
2558 | ||
2559 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2560 | /* Call softirq on interrupt stack. Interrupts are off. */ | |
2561 | ENTRY(do_softirq_own_stack) | |
2562 | pushq %rbp | |
2563 | @@ -879,6 +896,7 @@ ENTRY(do_softirq_own_stack) | |
2564 | decl PER_CPU_VAR(irq_count) | |
2565 | ret | |
2566 | END(do_softirq_own_stack) | |
2567 | +#endif | |
2568 | ||
2569 | #ifdef CONFIG_XEN | |
2570 | idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 | |
2571 | diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h | |
2572 | index 01bcde84d3e4..6f432adc55cd 100644 | |
2573 | --- a/arch/x86/include/asm/preempt.h | |
2574 | +++ b/arch/x86/include/asm/preempt.h | |
2575 | @@ -79,17 +79,46 @@ static __always_inline void __preempt_count_sub(int val) | |
2576 | * a decrement which hits zero means we have no preempt_count and should | |
2577 | * reschedule. | |
2578 | */ | |
2579 | -static __always_inline bool __preempt_count_dec_and_test(void) | |
2580 | +static __always_inline bool ____preempt_count_dec_and_test(void) | |
2581 | { | |
2582 | GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); | |
2583 | } | |
2584 | ||
2585 | +static __always_inline bool __preempt_count_dec_and_test(void) | |
2586 | +{ | |
2587 | + if (____preempt_count_dec_and_test()) | |
2588 | + return true; | |
2589 | +#ifdef CONFIG_PREEMPT_LAZY | |
2590 | + if (current_thread_info()->preempt_lazy_count) | |
2591 | + return false; | |
2592 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2593 | +#else | |
2594 | + return false; | |
2595 | +#endif | |
2596 | +} | |
2597 | + | |
2598 | /* | |
2599 | * Returns true when we need to resched and can (barring IRQ state). | |
2600 | */ | |
2601 | static __always_inline bool should_resched(int preempt_offset) | |
2602 | { | |
2603 | +#ifdef CONFIG_PREEMPT_LAZY | |
2604 | + u32 tmp; | |
2605 | + | |
2606 | + tmp = raw_cpu_read_4(__preempt_count); | |
2607 | + if (tmp == preempt_offset) | |
2608 | + return true; | |
2609 | + | |
2610 | + /* preempt count == 0 ? */ | |
2611 | + tmp &= ~PREEMPT_NEED_RESCHED; | |
2612 | + if (tmp) | |
2613 | + return false; | |
2614 | + if (current_thread_info()->preempt_lazy_count) | |
2615 | + return false; | |
2616 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
2617 | +#else | |
2618 | return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); | |
2619 | +#endif | |
2620 | } | |
2621 | ||
2622 | #ifdef CONFIG_PREEMPT | |
2623 | diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h | |
2624 | index 2138c9ae19ee..3f5b4ee2e2c1 100644 | |
2625 | --- a/arch/x86/include/asm/signal.h | |
2626 | +++ b/arch/x86/include/asm/signal.h | |
2627 | @@ -23,6 +23,19 @@ typedef struct { | |
2628 | unsigned long sig[_NSIG_WORDS]; | |
2629 | } sigset_t; | |
2630 | ||
2631 | +/* | |
2632 | + * Because some traps use the IST stack, we must keep preemption | |
2633 | + * disabled while calling do_trap(), but do_trap() may call | |
2634 | + * force_sig_info() which will grab the signal spin_locks for the | |
2635 | + * task, which in PREEMPT_RT_FULL are mutexes. By defining | |
2636 | + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set | |
2637 | + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the | |
2638 | + * trap. | |
2639 | + */ | |
2640 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
2641 | +#define ARCH_RT_DELAYS_SIGNAL_SEND | |
2642 | +#endif | |
2643 | + | |
2644 | #ifndef CONFIG_COMPAT | |
2645 | typedef sigset_t compat_sigset_t; | |
2646 | #endif | |
2647 | diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h | |
2648 | index 58505f01962f..02fa39652cd6 100644 | |
2649 | --- a/arch/x86/include/asm/stackprotector.h | |
2650 | +++ b/arch/x86/include/asm/stackprotector.h | |
2651 | @@ -59,7 +59,7 @@ | |
2652 | */ | |
2653 | static __always_inline void boot_init_stack_canary(void) | |
2654 | { | |
2655 | - u64 canary; | |
2656 | + u64 uninitialized_var(canary); | |
2657 | u64 tsc; | |
2658 | ||
2659 | #ifdef CONFIG_X86_64 | |
2660 | @@ -70,8 +70,15 @@ static __always_inline void boot_init_stack_canary(void) | |
2661 | * of randomness. The TSC only matters for very early init, | |
2662 | * there it already has some randomness on most systems. Later | |
2663 | * on during the bootup the random pool has true entropy too. | |
2664 | + * | |
2665 | + * For preempt-rt we need to weaken the randomness a bit, as | |
2666 | + * we can't call into the random generator from atomic context | |
2667 | + * due to locking constraints. We just leave canary | |
2668 | + * uninitialized and use the TSC based randomness on top of it. | |
2669 | */ | |
2670 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
2671 | get_random_bytes(&canary, sizeof(canary)); | |
2672 | +#endif | |
2673 | tsc = rdtsc(); | |
2674 | canary += tsc + (tsc << 32UL); | |
2675 | ||
2676 | diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h | |
2677 | index c7b551028740..ddb63bd90e3c 100644 | |
2678 | --- a/arch/x86/include/asm/thread_info.h | |
2679 | +++ b/arch/x86/include/asm/thread_info.h | |
2680 | @@ -58,6 +58,8 @@ struct thread_info { | |
2681 | __u32 status; /* thread synchronous flags */ | |
2682 | __u32 cpu; /* current CPU */ | |
2683 | mm_segment_t addr_limit; | |
2684 | + int preempt_lazy_count; /* 0 => lazy preemptable | |
2685 | + <0 => BUG */ | |
2686 | unsigned int sig_on_uaccess_error:1; | |
2687 | unsigned int uaccess_err:1; /* uaccess failed */ | |
2688 | }; | |
2689 | @@ -95,6 +97,7 @@ struct thread_info { | |
2690 | #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ | |
2691 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | |
2692 | #define TIF_SECCOMP 8 /* secure computing */ | |
2693 | +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ | |
2694 | #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ | |
2695 | #define TIF_UPROBE 12 /* breakpointed or singlestepping */ | |
2696 | #define TIF_NOTSC 16 /* TSC is not accessible in userland */ | |
2697 | @@ -119,6 +122,7 @@ struct thread_info { | |
2698 | #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) | |
2699 | #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) | |
2700 | #define _TIF_SECCOMP (1 << TIF_SECCOMP) | |
2701 | +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) | |
2702 | #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) | |
2703 | #define _TIF_UPROBE (1 << TIF_UPROBE) | |
2704 | #define _TIF_NOTSC (1 << TIF_NOTSC) | |
2705 | @@ -152,6 +156,8 @@ struct thread_info { | |
2706 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | |
2707 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | |
2708 | ||
2709 | +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) | |
2710 | + | |
2711 | #define STACK_WARN (THREAD_SIZE/8) | |
2712 | ||
2713 | /* | |
2714 | diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h | |
2715 | index fc808b83fccb..ebb40118abf5 100644 | |
2716 | --- a/arch/x86/include/asm/uv/uv_bau.h | |
2717 | +++ b/arch/x86/include/asm/uv/uv_bau.h | |
2718 | @@ -615,9 +615,9 @@ struct bau_control { | |
2719 | cycles_t send_message; | |
2720 | cycles_t period_end; | |
2721 | cycles_t period_time; | |
2722 | - spinlock_t uvhub_lock; | |
2723 | - spinlock_t queue_lock; | |
2724 | - spinlock_t disable_lock; | |
2725 | + raw_spinlock_t uvhub_lock; | |
2726 | + raw_spinlock_t queue_lock; | |
2727 | + raw_spinlock_t disable_lock; | |
2728 | /* tunables */ | |
2729 | int max_concurr; | |
2730 | int max_concurr_const; | |
2731 | @@ -776,15 +776,15 @@ static inline int atom_asr(short i, struct atomic_short *v) | |
2732 | * to be lowered below the current 'v'. atomic_add_unless can only stop | |
2733 | * on equal. | |
2734 | */ | |
2735 | -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |
2736 | +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) | |
2737 | { | |
2738 | - spin_lock(lock); | |
2739 | + raw_spin_lock(lock); | |
2740 | if (atomic_read(v) >= u) { | |
2741 | - spin_unlock(lock); | |
2742 | + raw_spin_unlock(lock); | |
2743 | return 0; | |
2744 | } | |
2745 | atomic_inc(v); | |
2746 | - spin_unlock(lock); | |
2747 | + raw_spin_unlock(lock); | |
2748 | return 1; | |
2749 | } | |
2750 | ||
2751 | diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h | |
2752 | index ea7074784cc4..01ec643ce66e 100644 | |
2753 | --- a/arch/x86/include/asm/uv/uv_hub.h | |
2754 | +++ b/arch/x86/include/asm/uv/uv_hub.h | |
2755 | @@ -492,7 +492,7 @@ struct uv_blade_info { | |
2756 | unsigned short nr_online_cpus; | |
2757 | unsigned short pnode; | |
2758 | short memory_nid; | |
2759 | - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ | |
2760 | + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ | |
2761 | unsigned long nmi_count; /* obsolete, see uv_hub_nmi */ | |
2762 | }; | |
2763 | extern struct uv_blade_info *uv_blade_info; | |
cb95d48a JK |
2764 | diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c |
2765 | index e75907601a41..a29fc4f84fc4 100644 | |
2766 | --- a/arch/x86/kernel/acpi/boot.c | |
2767 | +++ b/arch/x86/kernel/acpi/boot.c | |
2768 | @@ -87,7 +87,9 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |
2769 | * ->ioapic_mutex | |
2770 | * ->ioapic_lock | |
2771 | */ | |
2772 | +#ifdef CONFIG_X86_IO_APIC | |
2773 | static DEFINE_MUTEX(acpi_ioapic_lock); | |
2774 | +#endif | |
2775 | ||
2776 | /* -------------------------------------------------------------------------- | |
2777 | Boot-time Configuration | |
b4de310e JK |
2778 | diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c |
2779 | index fdb0fbfb1197..678c711e2a16 100644 | |
2780 | --- a/arch/x86/kernel/apic/io_apic.c | |
2781 | +++ b/arch/x86/kernel/apic/io_apic.c | |
2782 | @@ -1711,7 +1711,8 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) | |
2783 | static inline bool ioapic_irqd_mask(struct irq_data *data) | |
2784 | { | |
2785 | /* If we are moving the irq we need to mask it */ | |
2786 | - if (unlikely(irqd_is_setaffinity_pending(data))) { | |
2787 | + if (unlikely(irqd_is_setaffinity_pending(data) && | |
2788 | + !irqd_irq_inprogress(data))) { | |
2789 | mask_ioapic_irq(data); | |
2790 | return true; | |
2791 | } | |
2792 | diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c | |
2793 | index 4a139465f1d4..ad2afff02b36 100644 | |
2794 | --- a/arch/x86/kernel/apic/x2apic_uv_x.c | |
2795 | +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |
2796 | @@ -947,7 +947,7 @@ void __init uv_system_init(void) | |
2797 | uv_blade_info[blade].pnode = pnode; | |
2798 | uv_blade_info[blade].nr_possible_cpus = 0; | |
2799 | uv_blade_info[blade].nr_online_cpus = 0; | |
2800 | - spin_lock_init(&uv_blade_info[blade].nmi_lock); | |
2801 | + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock); | |
2802 | min_pnode = min(pnode, min_pnode); | |
2803 | max_pnode = max(pnode, max_pnode); | |
2804 | blade++; | |
2805 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c | |
2806 | index 439df975bc7a..b7954ddd6a0a 100644 | |
2807 | --- a/arch/x86/kernel/asm-offsets.c | |
2808 | +++ b/arch/x86/kernel/asm-offsets.c | |
2809 | @@ -32,6 +32,7 @@ void common(void) { | |
2810 | OFFSET(TI_flags, thread_info, flags); | |
2811 | OFFSET(TI_status, thread_info, status); | |
2812 | OFFSET(TI_addr_limit, thread_info, addr_limit); | |
2813 | + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count); | |
2814 | ||
2815 | BLANK(); | |
2816 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | |
2817 | @@ -89,4 +90,5 @@ void common(void) { | |
2818 | ||
2819 | BLANK(); | |
2820 | DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); | |
2821 | + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); | |
2822 | } | |
2823 | diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c | |
2824 | index 7e8a736d09db..430a4ec07811 100644 | |
2825 | --- a/arch/x86/kernel/cpu/mcheck/mce.c | |
2826 | +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |
2827 | @@ -41,6 +41,8 @@ | |
2828 | #include <linux/debugfs.h> | |
2829 | #include <linux/irq_work.h> | |
2830 | #include <linux/export.h> | |
2831 | +#include <linux/jiffies.h> | |
2832 | +#include <linux/swork.h> | |
2833 | ||
2834 | #include <asm/processor.h> | |
2835 | #include <asm/traps.h> | |
2836 | @@ -1236,7 +1238,7 @@ void mce_log_therm_throt_event(__u64 status) | |
2837 | static unsigned long check_interval = INITIAL_CHECK_INTERVAL; | |
2838 | ||
2839 | static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ | |
2840 | -static DEFINE_PER_CPU(struct timer_list, mce_timer); | |
2841 | +static DEFINE_PER_CPU(struct hrtimer, mce_timer); | |
2842 | ||
2843 | static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2844 | { | |
2845 | @@ -1245,32 +1247,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) | |
2846 | ||
2847 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; | |
2848 | ||
2849 | -static void __restart_timer(struct timer_list *t, unsigned long interval) | |
2850 | +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval) | |
2851 | { | |
2852 | - unsigned long when = jiffies + interval; | |
2853 | - unsigned long flags; | |
2854 | - | |
2855 | - local_irq_save(flags); | |
2856 | - | |
2857 | - if (timer_pending(t)) { | |
2858 | - if (time_before(when, t->expires)) | |
2859 | - mod_timer_pinned(t, when); | |
2860 | - } else { | |
2861 | - t->expires = round_jiffies(when); | |
2862 | - add_timer_on(t, smp_processor_id()); | |
2863 | - } | |
2864 | - | |
2865 | - local_irq_restore(flags); | |
2866 | + if (!interval) | |
2867 | + return HRTIMER_NORESTART; | |
2868 | + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval))); | |
2869 | + return HRTIMER_RESTART; | |
2870 | } | |
2871 | ||
2872 | -static void mce_timer_fn(unsigned long data) | |
2873 | +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) | |
2874 | { | |
2875 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2876 | - int cpu = smp_processor_id(); | |
2877 | unsigned long iv; | |
2878 | ||
2879 | - WARN_ON(cpu != data); | |
2880 | - | |
2881 | iv = __this_cpu_read(mce_next_interval); | |
2882 | ||
2883 | if (mce_available(this_cpu_ptr(&cpu_info))) { | |
2884 | @@ -1293,7 +1281,7 @@ static void mce_timer_fn(unsigned long data) | |
2885 | ||
2886 | done: | |
2887 | __this_cpu_write(mce_next_interval, iv); | |
2888 | - __restart_timer(t, iv); | |
2889 | + return __restart_timer(timer, iv); | |
2890 | } | |
2891 | ||
2892 | /* | |
2893 | @@ -1301,7 +1289,7 @@ done: | |
2894 | */ | |
2895 | void mce_timer_kick(unsigned long interval) | |
2896 | { | |
2897 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
2898 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
2899 | unsigned long iv = __this_cpu_read(mce_next_interval); | |
2900 | ||
2901 | __restart_timer(t, interval); | |
2902 | @@ -1316,7 +1304,7 @@ static void mce_timer_delete_all(void) | |
2903 | int cpu; | |
2904 | ||
2905 | for_each_online_cpu(cpu) | |
2906 | - del_timer_sync(&per_cpu(mce_timer, cpu)); | |
2907 | + hrtimer_cancel(&per_cpu(mce_timer, cpu)); | |
2908 | } | |
2909 | ||
2910 | static void mce_do_trigger(struct work_struct *work) | |
2911 | @@ -1326,6 +1314,56 @@ static void mce_do_trigger(struct work_struct *work) | |
2912 | ||
2913 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2914 | ||
2915 | +static void __mce_notify_work(struct swork_event *event) | |
2916 | +{ | |
2917 | + /* Not more than two messages every minute */ | |
2918 | + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2919 | + | |
2920 | + /* wake processes polling /dev/mcelog */ | |
2921 | + wake_up_interruptible(&mce_chrdev_wait); | |
2922 | + | |
2923 | + /* | |
2924 | + * There is no risk of missing notifications because | |
2925 | + * work_pending is always cleared before the function is | |
2926 | + * executed. | |
2927 | + */ | |
2928 | + if (mce_helper[0] && !work_pending(&mce_trigger_work)) | |
2929 | + schedule_work(&mce_trigger_work); | |
2930 | + | |
2931 | + if (__ratelimit(&ratelimit)) | |
2932 | + pr_info(HW_ERR "Machine check events logged\n"); | |
2933 | +} | |
2934 | + | |
2935 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
2936 | +static bool notify_work_ready __read_mostly; | |
2937 | +static struct swork_event notify_work; | |
2938 | + | |
2939 | +static int mce_notify_work_init(void) | |
2940 | +{ | |
2941 | + int err; | |
2942 | + | |
2943 | + err = swork_get(); | |
2944 | + if (err) | |
2945 | + return err; | |
2946 | + | |
2947 | + INIT_SWORK(¬ify_work, __mce_notify_work); | |
2948 | + notify_work_ready = true; | |
2949 | + return 0; | |
2950 | +} | |
2951 | + | |
2952 | +static void mce_notify_work(void) | |
2953 | +{ | |
2954 | + if (notify_work_ready) | |
2955 | + swork_queue(¬ify_work); | |
2956 | +} | |
2957 | +#else | |
2958 | +static void mce_notify_work(void) | |
2959 | +{ | |
2960 | + __mce_notify_work(NULL); | |
2961 | +} | |
2962 | +static inline int mce_notify_work_init(void) { return 0; } | |
2963 | +#endif | |
2964 | + | |
2965 | /* | |
2966 | * Notify the user(s) about new machine check events. | |
2967 | * Can be called from interrupt context, but not from machine check/NMI | |
2968 | @@ -1333,19 +1371,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | |
2969 | */ | |
2970 | int mce_notify_irq(void) | |
2971 | { | |
2972 | - /* Not more than two messages every minute */ | |
2973 | - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | |
2974 | - | |
2975 | if (test_and_clear_bit(0, &mce_need_notify)) { | |
2976 | - /* wake processes polling /dev/mcelog */ | |
2977 | - wake_up_interruptible(&mce_chrdev_wait); | |
2978 | - | |
2979 | - if (mce_helper[0]) | |
2980 | - schedule_work(&mce_trigger_work); | |
2981 | - | |
2982 | - if (__ratelimit(&ratelimit)) | |
2983 | - pr_info(HW_ERR "Machine check events logged\n"); | |
2984 | - | |
2985 | + mce_notify_work(); | |
2986 | return 1; | |
2987 | } | |
2988 | return 0; | |
2989 | @@ -1639,7 +1666,7 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) | |
2990 | } | |
2991 | } | |
2992 | ||
2993 | -static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2994 | +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) | |
2995 | { | |
2996 | unsigned long iv = check_interval * HZ; | |
2997 | ||
2998 | @@ -1648,16 +1675,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |
2999 | ||
3000 | per_cpu(mce_next_interval, cpu) = iv; | |
3001 | ||
3002 | - t->expires = round_jiffies(jiffies + iv); | |
3003 | - add_timer_on(t, cpu); | |
3004 | + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), | |
3005 | + 0, HRTIMER_MODE_REL_PINNED); | |
3006 | } | |
3007 | ||
3008 | static void __mcheck_cpu_init_timer(void) | |
3009 | { | |
3010 | - struct timer_list *t = this_cpu_ptr(&mce_timer); | |
3011 | + struct hrtimer *t = this_cpu_ptr(&mce_timer); | |
3012 | unsigned int cpu = smp_processor_id(); | |
3013 | ||
3014 | - setup_timer(t, mce_timer_fn, cpu); | |
3015 | + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
3016 | + t->function = mce_timer_fn; | |
3017 | mce_start_timer(cpu, t); | |
3018 | } | |
3019 | ||
3020 | @@ -2376,6 +2404,8 @@ static void mce_disable_cpu(void *h) | |
3021 | if (!mce_available(raw_cpu_ptr(&cpu_info))) | |
3022 | return; | |
3023 | ||
3024 | + hrtimer_cancel(this_cpu_ptr(&mce_timer)); | |
3025 | + | |
3026 | if (!(action & CPU_TASKS_FROZEN)) | |
3027 | cmci_clear(); | |
3028 | ||
3029 | @@ -2398,6 +2428,7 @@ static void mce_reenable_cpu(void *h) | |
3030 | if (b->init) | |
3031 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); | |
3032 | } | |
3033 | + __mcheck_cpu_init_timer(); | |
3034 | } | |
3035 | ||
3036 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | |
3037 | @@ -2405,7 +2436,6 @@ static int | |
3038 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |
3039 | { | |
3040 | unsigned int cpu = (unsigned long)hcpu; | |
3041 | - struct timer_list *t = &per_cpu(mce_timer, cpu); | |
3042 | ||
3043 | switch (action & ~CPU_TASKS_FROZEN) { | |
3044 | case CPU_ONLINE: | |
3045 | @@ -2425,11 +2455,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |
3046 | break; | |
3047 | case CPU_DOWN_PREPARE: | |
3048 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | |
3049 | - del_timer_sync(t); | |
3050 | break; | |
3051 | case CPU_DOWN_FAILED: | |
3052 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | |
3053 | - mce_start_timer(cpu, t); | |
3054 | break; | |
3055 | } | |
3056 | ||
3057 | @@ -2468,6 +2496,10 @@ static __init int mcheck_init_device(void) | |
3058 | goto err_out; | |
3059 | } | |
3060 | ||
3061 | + err = mce_notify_work_init(); | |
3062 | + if (err) | |
3063 | + goto err_out; | |
3064 | + | |
3065 | if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { | |
3066 | err = -ENOMEM; | |
3067 | goto err_out; | |
3068 | diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |
3069 | index ed446bdcbf31..d2ac364e2118 100644 | |
3070 | --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |
3071 | +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |
3072 | @@ -117,7 +117,7 @@ static struct perf_pmu_events_attr event_attr_##v = { \ | |
3073 | }; | |
3074 | ||
3075 | struct rapl_pmu { | |
3076 | - spinlock_t lock; | |
3077 | + raw_spinlock_t lock; | |
3078 | int n_active; /* number of active events */ | |
3079 | struct list_head active_list; | |
3080 | struct pmu *pmu; /* pointer to rapl_pmu_class */ | |
3081 | @@ -220,13 +220,13 @@ static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) | |
3082 | if (!pmu->n_active) | |
3083 | return HRTIMER_NORESTART; | |
3084 | ||
3085 | - spin_lock_irqsave(&pmu->lock, flags); | |
3086 | + raw_spin_lock_irqsave(&pmu->lock, flags); | |
3087 | ||
3088 | list_for_each_entry(event, &pmu->active_list, active_entry) { | |
3089 | rapl_event_update(event); | |
3090 | } | |
3091 | ||
3092 | - spin_unlock_irqrestore(&pmu->lock, flags); | |
3093 | + raw_spin_unlock_irqrestore(&pmu->lock, flags); | |
3094 | ||
3095 | hrtimer_forward_now(hrtimer, pmu->timer_interval); | |
3096 | ||
3097 | @@ -263,9 +263,9 @@ static void rapl_pmu_event_start(struct perf_event *event, int mode) | |
3098 | struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); | |
3099 | unsigned long flags; | |
3100 | ||
3101 | - spin_lock_irqsave(&pmu->lock, flags); | |
3102 | + raw_spin_lock_irqsave(&pmu->lock, flags); | |
3103 | __rapl_pmu_event_start(pmu, event); | |
3104 | - spin_unlock_irqrestore(&pmu->lock, flags); | |
3105 | + raw_spin_unlock_irqrestore(&pmu->lock, flags); | |
3106 | } | |
3107 | ||
3108 | static void rapl_pmu_event_stop(struct perf_event *event, int mode) | |
3109 | @@ -274,7 +274,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) | |
3110 | struct hw_perf_event *hwc = &event->hw; | |
3111 | unsigned long flags; | |
3112 | ||
3113 | - spin_lock_irqsave(&pmu->lock, flags); | |
3114 | + raw_spin_lock_irqsave(&pmu->lock, flags); | |
3115 | ||
3116 | /* mark event as deactivated and stopped */ | |
3117 | if (!(hwc->state & PERF_HES_STOPPED)) { | |
3118 | @@ -299,7 +299,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) | |
3119 | hwc->state |= PERF_HES_UPTODATE; | |
3120 | } | |
3121 | ||
3122 | - spin_unlock_irqrestore(&pmu->lock, flags); | |
3123 | + raw_spin_unlock_irqrestore(&pmu->lock, flags); | |
3124 | } | |
3125 | ||
3126 | static int rapl_pmu_event_add(struct perf_event *event, int mode) | |
3127 | @@ -308,14 +308,14 @@ static int rapl_pmu_event_add(struct perf_event *event, int mode) | |
3128 | struct hw_perf_event *hwc = &event->hw; | |
3129 | unsigned long flags; | |
3130 | ||
3131 | - spin_lock_irqsave(&pmu->lock, flags); | |
3132 | + raw_spin_lock_irqsave(&pmu->lock, flags); | |
3133 | ||
3134 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | |
3135 | ||
3136 | if (mode & PERF_EF_START) | |
3137 | __rapl_pmu_event_start(pmu, event); | |
3138 | ||
3139 | - spin_unlock_irqrestore(&pmu->lock, flags); | |
3140 | + raw_spin_unlock_irqrestore(&pmu->lock, flags); | |
3141 | ||
3142 | return 0; | |
3143 | } | |
3144 | @@ -603,7 +603,7 @@ static int rapl_cpu_prepare(int cpu) | |
3145 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); | |
3146 | if (!pmu) | |
3147 | return -1; | |
3148 | - spin_lock_init(&pmu->lock); | |
3149 | + raw_spin_lock_init(&pmu->lock); | |
3150 | ||
3151 | INIT_LIST_HEAD(&pmu->active_list); | |
3152 | ||
3153 | diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c | |
3154 | index 464ffd69b92e..00db1aad1548 100644 | |
3155 | --- a/arch/x86/kernel/dumpstack_32.c | |
3156 | +++ b/arch/x86/kernel/dumpstack_32.c | |
3157 | @@ -42,7 +42,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
3158 | unsigned long *stack, unsigned long bp, | |
3159 | const struct stacktrace_ops *ops, void *data) | |
3160 | { | |
3161 | - const unsigned cpu = get_cpu(); | |
3162 | + const unsigned cpu = get_cpu_light(); | |
3163 | int graph = 0; | |
3164 | u32 *prev_esp; | |
3165 | ||
3166 | @@ -86,7 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
3167 | break; | |
3168 | touch_nmi_watchdog(); | |
3169 | } | |
3170 | - put_cpu(); | |
3171 | + put_cpu_light(); | |
3172 | } | |
3173 | EXPORT_SYMBOL(dump_trace); | |
3174 | ||
3175 | diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c | |
3176 | index 5f1c6266eb30..c331e3fef465 100644 | |
3177 | --- a/arch/x86/kernel/dumpstack_64.c | |
3178 | +++ b/arch/x86/kernel/dumpstack_64.c | |
3179 | @@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
3180 | unsigned long *stack, unsigned long bp, | |
3181 | const struct stacktrace_ops *ops, void *data) | |
3182 | { | |
3183 | - const unsigned cpu = get_cpu(); | |
3184 | + const unsigned cpu = get_cpu_light(); | |
3185 | struct thread_info *tinfo; | |
3186 | unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); | |
3187 | unsigned long dummy; | |
3188 | @@ -241,7 +241,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |
3189 | * This handles the process stack: | |
3190 | */ | |
3191 | bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); | |
3192 | - put_cpu(); | |
3193 | + put_cpu_light(); | |
3194 | } | |
3195 | EXPORT_SYMBOL(dump_trace); | |
3196 | ||
3197 | @@ -255,7 +255,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
3198 | int cpu; | |
3199 | int i; | |
3200 | ||
3201 | - preempt_disable(); | |
3202 | + migrate_disable(); | |
3203 | cpu = smp_processor_id(); | |
3204 | ||
3205 | irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); | |
3206 | @@ -291,7 +291,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |
3207 | pr_cont(" %016lx", *stack++); | |
3208 | touch_nmi_watchdog(); | |
3209 | } | |
3210 | - preempt_enable(); | |
3211 | + migrate_enable(); | |
3212 | ||
3213 | pr_cont("\n"); | |
3214 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | |
3215 | diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c | |
3216 | index 38da8f29a9c8..ce71f7098f15 100644 | |
3217 | --- a/arch/x86/kernel/irq_32.c | |
3218 | +++ b/arch/x86/kernel/irq_32.c | |
3219 | @@ -128,6 +128,7 @@ void irq_ctx_init(int cpu) | |
3220 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); | |
3221 | } | |
3222 | ||
3223 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
3224 | void do_softirq_own_stack(void) | |
3225 | { | |
3226 | struct thread_info *curstk; | |
3227 | @@ -146,6 +147,7 @@ void do_softirq_own_stack(void) | |
3228 | ||
3229 | call_on_stack(__do_softirq, isp); | |
3230 | } | |
3231 | +#endif | |
3232 | ||
3233 | bool handle_irq(struct irq_desc *desc, struct pt_regs *regs) | |
3234 | { | |
3235 | diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c | |
3236 | index 47190bd399e7..807950860fb7 100644 | |
3237 | --- a/arch/x86/kernel/kvm.c | |
3238 | +++ b/arch/x86/kernel/kvm.c | |
3239 | @@ -36,6 +36,7 @@ | |
3240 | #include <linux/kprobes.h> | |
3241 | #include <linux/debugfs.h> | |
3242 | #include <linux/nmi.h> | |
3243 | +#include <linux/swait.h> | |
3244 | #include <asm/timer.h> | |
3245 | #include <asm/cpu.h> | |
3246 | #include <asm/traps.h> | |
3247 | @@ -91,14 +92,14 @@ static void kvm_io_delay(void) | |
3248 | ||
3249 | struct kvm_task_sleep_node { | |
3250 | struct hlist_node link; | |
3251 | - wait_queue_head_t wq; | |
3252 | + struct swait_queue_head wq; | |
3253 | u32 token; | |
3254 | int cpu; | |
3255 | bool halted; | |
3256 | }; | |
3257 | ||
3258 | static struct kvm_task_sleep_head { | |
3259 | - spinlock_t lock; | |
3260 | + raw_spinlock_t lock; | |
3261 | struct hlist_head list; | |
3262 | } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; | |
3263 | ||
3264 | @@ -122,17 +123,17 @@ void kvm_async_pf_task_wait(u32 token) | |
3265 | u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); | |
3266 | struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; | |
3267 | struct kvm_task_sleep_node n, *e; | |
3268 | - DEFINE_WAIT(wait); | |
3269 | + DECLARE_SWAITQUEUE(wait); | |
3270 | ||
3271 | rcu_irq_enter(); | |
3272 | ||
3273 | - spin_lock(&b->lock); | |
3274 | + raw_spin_lock(&b->lock); | |
3275 | e = _find_apf_task(b, token); | |
3276 | if (e) { | |
3277 | /* dummy entry exist -> wake up was delivered ahead of PF */ | |
3278 | hlist_del(&e->link); | |
3279 | kfree(e); | |
3280 | - spin_unlock(&b->lock); | |
3281 | + raw_spin_unlock(&b->lock); | |
3282 | ||
3283 | rcu_irq_exit(); | |
3284 | return; | |
3285 | @@ -141,13 +142,13 @@ void kvm_async_pf_task_wait(u32 token) | |
3286 | n.token = token; | |
3287 | n.cpu = smp_processor_id(); | |
3288 | n.halted = is_idle_task(current) || preempt_count() > 1; | |
3289 | - init_waitqueue_head(&n.wq); | |
3290 | + init_swait_queue_head(&n.wq); | |
3291 | hlist_add_head(&n.link, &b->list); | |
3292 | - spin_unlock(&b->lock); | |
3293 | + raw_spin_unlock(&b->lock); | |
3294 | ||
3295 | for (;;) { | |
3296 | if (!n.halted) | |
3297 | - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); | |
3298 | + prepare_to_swait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); | |
3299 | if (hlist_unhashed(&n.link)) | |
3300 | break; | |
3301 | ||
3302 | @@ -166,7 +167,7 @@ void kvm_async_pf_task_wait(u32 token) | |
3303 | } | |
3304 | } | |
3305 | if (!n.halted) | |
3306 | - finish_wait(&n.wq, &wait); | |
3307 | + finish_swait(&n.wq, &wait); | |
3308 | ||
3309 | rcu_irq_exit(); | |
3310 | return; | |
3311 | @@ -178,8 +179,8 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n) | |
3312 | hlist_del_init(&n->link); | |
3313 | if (n->halted) | |
3314 | smp_send_reschedule(n->cpu); | |
3315 | - else if (waitqueue_active(&n->wq)) | |
3316 | - wake_up(&n->wq); | |
3317 | + else if (swait_active(&n->wq)) | |
3318 | + swake_up(&n->wq); | |
3319 | } | |
3320 | ||
3321 | static void apf_task_wake_all(void) | |
3322 | @@ -189,14 +190,14 @@ static void apf_task_wake_all(void) | |
3323 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { | |
3324 | struct hlist_node *p, *next; | |
3325 | struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; | |
3326 | - spin_lock(&b->lock); | |
3327 | + raw_spin_lock(&b->lock); | |
3328 | hlist_for_each_safe(p, next, &b->list) { | |
3329 | struct kvm_task_sleep_node *n = | |
3330 | hlist_entry(p, typeof(*n), link); | |
3331 | if (n->cpu == smp_processor_id()) | |
3332 | apf_task_wake_one(n); | |
3333 | } | |
3334 | - spin_unlock(&b->lock); | |
3335 | + raw_spin_unlock(&b->lock); | |
3336 | } | |
3337 | } | |
3338 | ||
3339 | @@ -212,7 +213,7 @@ void kvm_async_pf_task_wake(u32 token) | |
3340 | } | |
3341 | ||
3342 | again: | |
3343 | - spin_lock(&b->lock); | |
3344 | + raw_spin_lock(&b->lock); | |
3345 | n = _find_apf_task(b, token); | |
3346 | if (!n) { | |
3347 | /* | |
3348 | @@ -225,17 +226,17 @@ again: | |
3349 | * Allocation failed! Busy wait while other cpu | |
3350 | * handles async PF. | |
3351 | */ | |
3352 | - spin_unlock(&b->lock); | |
3353 | + raw_spin_unlock(&b->lock); | |
3354 | cpu_relax(); | |
3355 | goto again; | |
3356 | } | |
3357 | n->token = token; | |
3358 | n->cpu = smp_processor_id(); | |
3359 | - init_waitqueue_head(&n->wq); | |
3360 | + init_swait_queue_head(&n->wq); | |
3361 | hlist_add_head(&n->link, &b->list); | |
3362 | } else | |
3363 | apf_task_wake_one(n); | |
3364 | - spin_unlock(&b->lock); | |
3365 | + raw_spin_unlock(&b->lock); | |
3366 | return; | |
3367 | } | |
3368 | EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); | |
3369 | @@ -486,7 +487,7 @@ void __init kvm_guest_init(void) | |
3370 | paravirt_ops_setup(); | |
3371 | register_reboot_notifier(&kvm_pv_reboot_nb); | |
3372 | for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) | |
3373 | - spin_lock_init(&async_pf_sleepers[i].lock); | |
3374 | + raw_spin_lock_init(&async_pf_sleepers[i].lock); | |
3375 | if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) | |
3376 | x86_init.irqs.trap_init = kvm_apf_trap_init; | |
3377 | ||
3378 | diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c | |
3379 | index 697f90db0e37..424aec4a4c71 100644 | |
3380 | --- a/arch/x86/kernel/nmi.c | |
3381 | +++ b/arch/x86/kernel/nmi.c | |
3382 | @@ -231,7 +231,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) | |
3383 | #endif | |
3384 | ||
3385 | if (panic_on_unrecovered_nmi) | |
3386 | - panic("NMI: Not continuing"); | |
3387 | + nmi_panic(regs, "NMI: Not continuing"); | |
3388 | ||
3389 | pr_emerg("Dazed and confused, but trying to continue\n"); | |
3390 | ||
3391 | @@ -255,8 +255,16 @@ io_check_error(unsigned char reason, struct pt_regs *regs) | |
3392 | reason, smp_processor_id()); | |
3393 | show_regs(regs); | |
3394 | ||
3395 | - if (panic_on_io_nmi) | |
3396 | - panic("NMI IOCK error: Not continuing"); | |
3397 | + if (panic_on_io_nmi) { | |
3398 | + nmi_panic(regs, "NMI IOCK error: Not continuing"); | |
3399 | + | |
3400 | + /* | |
3401 | + * If we end up here, it means we have received an NMI while | |
3402 | + * processing panic(). Simply return without delaying and | |
3403 | + * re-enabling NMIs. | |
3404 | + */ | |
3405 | + return; | |
3406 | + } | |
3407 | ||
3408 | /* Re-enable the IOCK line, wait for a few seconds */ | |
3409 | reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; | |
3410 | @@ -297,7 +305,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | |
3411 | ||
3412 | pr_emerg("Do you have a strange power saving mode enabled?\n"); | |
3413 | if (unknown_nmi_panic || panic_on_unrecovered_nmi) | |
3414 | - panic("NMI: Not continuing"); | |
3415 | + nmi_panic(regs, "NMI: Not continuing"); | |
3416 | ||
3417 | pr_emerg("Dazed and confused, but trying to continue\n"); | |
3418 | } | |
3419 | diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c | |
3420 | index 9f950917528b..4dd4beae917a 100644 | |
3421 | --- a/arch/x86/kernel/process_32.c | |
3422 | +++ b/arch/x86/kernel/process_32.c | |
3423 | @@ -35,6 +35,7 @@ | |
3424 | #include <linux/uaccess.h> | |
3425 | #include <linux/io.h> | |
3426 | #include <linux/kdebug.h> | |
3427 | +#include <linux/highmem.h> | |
3428 | ||
3429 | #include <asm/pgtable.h> | |
3430 | #include <asm/ldt.h> | |
3431 | @@ -210,6 +211,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | |
3432 | } | |
3433 | EXPORT_SYMBOL_GPL(start_thread); | |
3434 | ||
3435 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3436 | +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) | |
3437 | +{ | |
3438 | + int i; | |
3439 | + | |
3440 | + /* | |
3441 | + * Clear @prev's kmap_atomic mappings | |
3442 | + */ | |
3443 | + for (i = 0; i < prev_p->kmap_idx; i++) { | |
3444 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
3445 | + pte_t *ptep = kmap_pte - idx; | |
3446 | + | |
3447 | + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); | |
3448 | + } | |
3449 | + /* | |
3450 | + * Restore @next_p's kmap_atomic mappings | |
3451 | + */ | |
3452 | + for (i = 0; i < next_p->kmap_idx; i++) { | |
3453 | + int idx = i + KM_TYPE_NR * smp_processor_id(); | |
3454 | + | |
3455 | + if (!pte_none(next_p->kmap_pte[i])) | |
3456 | + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); | |
3457 | + } | |
3458 | +} | |
3459 | +#else | |
3460 | +static inline void | |
3461 | +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } | |
3462 | +#endif | |
3463 | + | |
3464 | ||
3465 | /* | |
3466 | * switch_to(x,y) should switch tasks from x to y. | |
3467 | @@ -286,6 +316,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |
3468 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | |
3469 | __switch_to_xtra(prev_p, next_p, tss); | |
3470 | ||
3471 | + switch_kmaps(prev_p, next_p); | |
3472 | + | |
3473 | /* | |
3474 | * Leave lazy mode, flushing any hypercalls made here. | |
3475 | * This must be done before restoring TLS segments so | |
3476 | diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c | |
3477 | index f660d63f40fe..8384207adde2 100644 | |
3478 | --- a/arch/x86/kernel/reboot.c | |
3479 | +++ b/arch/x86/kernel/reboot.c | |
3480 | @@ -726,6 +726,7 @@ static int crashing_cpu; | |
3481 | static nmi_shootdown_cb shootdown_callback; | |
3482 | ||
3483 | static atomic_t waiting_for_crash_ipi; | |
3484 | +static int crash_ipi_issued; | |
3485 | ||
3486 | static int crash_nmi_callback(unsigned int val, struct pt_regs *regs) | |
3487 | { | |
3488 | @@ -788,6 +789,9 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) | |
3489 | ||
3490 | smp_send_nmi_allbutself(); | |
3491 | ||
3492 | + /* Kick CPUs looping in NMI context. */ | |
3493 | + WRITE_ONCE(crash_ipi_issued, 1); | |
3494 | + | |
3495 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | |
3496 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | |
3497 | mdelay(1); | |
3498 | @@ -796,6 +800,22 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback) | |
3499 | ||
3500 | /* Leave the nmi callback set */ | |
3501 | } | |
3502 | + | |
3503 | +/* Override the weak function in kernel/panic.c */ | |
3504 | +void nmi_panic_self_stop(struct pt_regs *regs) | |
3505 | +{ | |
3506 | + while (1) { | |
3507 | + /* | |
3508 | + * Wait for the crash dumping IPI to be issued, and then | |
3509 | + * call its callback directly. | |
3510 | + */ | |
3511 | + if (READ_ONCE(crash_ipi_issued)) | |
3512 | + crash_nmi_callback(0, regs); /* Don't return */ | |
3513 | + | |
3514 | + cpu_relax(); | |
3515 | + } | |
3516 | +} | |
3517 | + | |
3518 | #else /* !CONFIG_SMP */ | |
3519 | void nmi_shootdown_cpus(nmi_shootdown_cb callback) | |
3520 | { | |
3521 | diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c | |
3522 | index 4d30b865be30..20d9e9fb3b74 100644 | |
3523 | --- a/arch/x86/kvm/lapic.c | |
3524 | +++ b/arch/x86/kvm/lapic.c | |
3525 | @@ -1195,7 +1195,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic) | |
3526 | static void apic_timer_expired(struct kvm_lapic *apic) | |
3527 | { | |
3528 | struct kvm_vcpu *vcpu = apic->vcpu; | |
3529 | - wait_queue_head_t *q = &vcpu->wq; | |
3530 | + struct swait_queue_head *q = &vcpu->wq; | |
3531 | struct kvm_timer *ktimer = &apic->lapic_timer; | |
3532 | ||
3533 | if (atomic_read(&apic->lapic_timer.pending)) | |
3534 | @@ -1204,8 +1204,8 @@ static void apic_timer_expired(struct kvm_lapic *apic) | |
3535 | atomic_inc(&apic->lapic_timer.pending); | |
3536 | kvm_set_pending_timer(vcpu); | |
3537 | ||
3538 | - if (waitqueue_active(q)) | |
3539 | - wake_up_interruptible(q); | |
3540 | + if (swait_active(q)) | |
3541 | + swake_up(q); | |
3542 | ||
3543 | if (apic_lvtt_tscdeadline(apic)) | |
3544 | ktimer->expired_tscdeadline = ktimer->tscdeadline; | |
3545 | @@ -1801,6 +1801,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |
3546 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | |
3547 | HRTIMER_MODE_ABS); | |
3548 | apic->lapic_timer.timer.function = apic_timer_fn; | |
3549 | + apic->lapic_timer.timer.irqsafe = 1; | |
3550 | ||
3551 | /* | |
3552 | * APIC is created enabled. This will prevent kvm_lapic_set_base from | |
3553 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c | |
3554 | index d7cb9577fa31..77c1bdd802df 100644 | |
3555 | --- a/arch/x86/kvm/x86.c | |
3556 | +++ b/arch/x86/kvm/x86.c | |
3557 | @@ -5792,6 +5792,13 @@ int kvm_arch_init(void *opaque) | |
3558 | goto out; | |
3559 | } | |
3560 | ||
3561 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3562 | + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | |
3563 | + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); | |
3564 | + return -EOPNOTSUPP; | |
3565 | + } | |
3566 | +#endif | |
3567 | + | |
3568 | r = kvm_mmu_module_init(); | |
3569 | if (r) | |
3570 | goto out_free_percpu; | |
3571 | diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c | |
3572 | index a6d739258137..bd24ba1c4a86 100644 | |
3573 | --- a/arch/x86/mm/highmem_32.c | |
3574 | +++ b/arch/x86/mm/highmem_32.c | |
3575 | @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap); | |
3576 | */ | |
3577 | void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
3578 | { | |
3579 | + pte_t pte = mk_pte(page, prot); | |
3580 | unsigned long vaddr; | |
3581 | int idx, type; | |
3582 | ||
3583 | - preempt_disable(); | |
3584 | + preempt_disable_nort(); | |
3585 | pagefault_disable(); | |
3586 | ||
3587 | if (!PageHighMem(page)) | |
3588 | @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) | |
3589 | idx = type + KM_TYPE_NR*smp_processor_id(); | |
3590 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
3591 | BUG_ON(!pte_none(*(kmap_pte-idx))); | |
3592 | - set_pte(kmap_pte-idx, mk_pte(page, prot)); | |
3593 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3594 | + current->kmap_pte[type] = pte; | |
3595 | +#endif | |
3596 | + set_pte(kmap_pte-idx, pte); | |
3597 | arch_flush_lazy_mmu_mode(); | |
3598 | ||
3599 | return (void *)vaddr; | |
3600 | @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr) | |
3601 | * is a bad idea also, in case the page changes cacheability | |
3602 | * attributes or becomes a protected page in a hypervisor. | |
3603 | */ | |
3604 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3605 | + current->kmap_pte[type] = __pte(0); | |
3606 | +#endif | |
3607 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
3608 | kmap_atomic_idx_pop(); | |
3609 | arch_flush_lazy_mmu_mode(); | |
3610 | @@ -100,7 +107,7 @@ void __kunmap_atomic(void *kvaddr) | |
3611 | #endif | |
3612 | ||
3613 | pagefault_enable(); | |
3614 | - preempt_enable(); | |
3615 | + preempt_enable_nort(); | |
3616 | } | |
3617 | EXPORT_SYMBOL(__kunmap_atomic); | |
3618 | ||
3619 | diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c | |
3620 | index 9c0ff045fdd4..dd25dd1671b6 100644 | |
3621 | --- a/arch/x86/mm/iomap_32.c | |
3622 | +++ b/arch/x86/mm/iomap_32.c | |
3623 | @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); | |
3624 | ||
3625 | void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
3626 | { | |
3627 | + pte_t pte = pfn_pte(pfn, prot); | |
3628 | unsigned long vaddr; | |
3629 | int idx, type; | |
3630 | ||
3631 | @@ -65,7 +66,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) | |
3632 | type = kmap_atomic_idx_push(); | |
3633 | idx = type + KM_TYPE_NR * smp_processor_id(); | |
3634 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | |
3635 | - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); | |
3636 | + WARN_ON(!pte_none(*(kmap_pte - idx))); | |
3637 | + | |
3638 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3639 | + current->kmap_pte[type] = pte; | |
3640 | +#endif | |
3641 | + set_pte(kmap_pte - idx, pte); | |
3642 | arch_flush_lazy_mmu_mode(); | |
3643 | ||
3644 | return (void *)vaddr; | |
3645 | @@ -113,6 +119,9 @@ iounmap_atomic(void __iomem *kvaddr) | |
3646 | * is a bad idea also, in case the page changes cacheability | |
3647 | * attributes or becomes a protected page in a hypervisor. | |
3648 | */ | |
3649 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3650 | + current->kmap_pte[type] = __pte(0); | |
3651 | +#endif | |
3652 | kpte_clear_flush(kmap_pte-idx, vaddr); | |
3653 | kmap_atomic_idx_pop(); | |
3654 | } | |
3655 | diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c | |
3656 | index 3b6ec42718e4..7871083de089 100644 | |
3657 | --- a/arch/x86/platform/uv/tlb_uv.c | |
3658 | +++ b/arch/x86/platform/uv/tlb_uv.c | |
3659 | @@ -714,9 +714,9 @@ static void destination_plugged(struct bau_desc *bau_desc, | |
3660 | ||
3661 | quiesce_local_uvhub(hmaster); | |
3662 | ||
3663 | - spin_lock(&hmaster->queue_lock); | |
3664 | + raw_spin_lock(&hmaster->queue_lock); | |
3665 | reset_with_ipi(&bau_desc->distribution, bcp); | |
3666 | - spin_unlock(&hmaster->queue_lock); | |
3667 | + raw_spin_unlock(&hmaster->queue_lock); | |
3668 | ||
3669 | end_uvhub_quiesce(hmaster); | |
3670 | ||
3671 | @@ -736,9 +736,9 @@ static void destination_timeout(struct bau_desc *bau_desc, | |
3672 | ||
3673 | quiesce_local_uvhub(hmaster); | |
3674 | ||
3675 | - spin_lock(&hmaster->queue_lock); | |
3676 | + raw_spin_lock(&hmaster->queue_lock); | |
3677 | reset_with_ipi(&bau_desc->distribution, bcp); | |
3678 | - spin_unlock(&hmaster->queue_lock); | |
3679 | + raw_spin_unlock(&hmaster->queue_lock); | |
3680 | ||
3681 | end_uvhub_quiesce(hmaster); | |
3682 | ||
3683 | @@ -759,7 +759,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) | |
3684 | cycles_t tm1; | |
3685 | ||
3686 | hmaster = bcp->uvhub_master; | |
3687 | - spin_lock(&hmaster->disable_lock); | |
3688 | + raw_spin_lock(&hmaster->disable_lock); | |
3689 | if (!bcp->baudisabled) { | |
3690 | stat->s_bau_disabled++; | |
3691 | tm1 = get_cycles(); | |
3692 | @@ -772,7 +772,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) | |
3693 | } | |
3694 | } | |
3695 | } | |
3696 | - spin_unlock(&hmaster->disable_lock); | |
3697 | + raw_spin_unlock(&hmaster->disable_lock); | |
3698 | } | |
3699 | ||
3700 | static void count_max_concurr(int stat, struct bau_control *bcp, | |
3701 | @@ -835,7 +835,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, | |
3702 | */ | |
3703 | static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) | |
3704 | { | |
3705 | - spinlock_t *lock = &hmaster->uvhub_lock; | |
3706 | + raw_spinlock_t *lock = &hmaster->uvhub_lock; | |
3707 | atomic_t *v; | |
3708 | ||
3709 | v = &hmaster->active_descriptor_count; | |
3710 | @@ -968,7 +968,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | |
3711 | struct bau_control *hmaster; | |
3712 | ||
3713 | hmaster = bcp->uvhub_master; | |
3714 | - spin_lock(&hmaster->disable_lock); | |
3715 | + raw_spin_lock(&hmaster->disable_lock); | |
3716 | if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { | |
3717 | stat->s_bau_reenabled++; | |
3718 | for_each_present_cpu(tcpu) { | |
3719 | @@ -980,10 +980,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | |
3720 | tbcp->period_giveups = 0; | |
3721 | } | |
3722 | } | |
3723 | - spin_unlock(&hmaster->disable_lock); | |
3724 | + raw_spin_unlock(&hmaster->disable_lock); | |
3725 | return 0; | |
3726 | } | |
3727 | - spin_unlock(&hmaster->disable_lock); | |
3728 | + raw_spin_unlock(&hmaster->disable_lock); | |
3729 | return -1; | |
3730 | } | |
3731 | ||
3732 | @@ -1901,9 +1901,9 @@ static void __init init_per_cpu_tunables(void) | |
3733 | bcp->cong_reps = congested_reps; | |
3734 | bcp->disabled_period = sec_2_cycles(disabled_period); | |
3735 | bcp->giveup_limit = giveup_limit; | |
3736 | - spin_lock_init(&bcp->queue_lock); | |
3737 | - spin_lock_init(&bcp->uvhub_lock); | |
3738 | - spin_lock_init(&bcp->disable_lock); | |
3739 | + raw_spin_lock_init(&bcp->queue_lock); | |
3740 | + raw_spin_lock_init(&bcp->uvhub_lock); | |
3741 | + raw_spin_lock_init(&bcp->disable_lock); | |
3742 | } | |
3743 | } | |
3744 | ||
3745 | diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c | |
3746 | index 2b158a9fa1d7..5e0b122620cb 100644 | |
3747 | --- a/arch/x86/platform/uv/uv_time.c | |
3748 | +++ b/arch/x86/platform/uv/uv_time.c | |
3749 | @@ -57,7 +57,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | |
3750 | ||
3751 | /* There is one of these allocated per node */ | |
3752 | struct uv_rtc_timer_head { | |
3753 | - spinlock_t lock; | |
3754 | + raw_spinlock_t lock; | |
3755 | /* next cpu waiting for timer, local node relative: */ | |
3756 | int next_cpu; | |
3757 | /* number of cpus on this node: */ | |
3758 | @@ -177,7 +177,7 @@ static __init int uv_rtc_allocate_timers(void) | |
3759 | uv_rtc_deallocate_timers(); | |
3760 | return -ENOMEM; | |
3761 | } | |
3762 | - spin_lock_init(&head->lock); | |
3763 | + raw_spin_lock_init(&head->lock); | |
3764 | head->ncpus = uv_blade_nr_possible_cpus(bid); | |
3765 | head->next_cpu = -1; | |
3766 | blade_info[bid] = head; | |
3767 | @@ -231,7 +231,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
3768 | unsigned long flags; | |
3769 | int next_cpu; | |
3770 | ||
3771 | - spin_lock_irqsave(&head->lock, flags); | |
3772 | + raw_spin_lock_irqsave(&head->lock, flags); | |
3773 | ||
3774 | next_cpu = head->next_cpu; | |
3775 | *t = expires; | |
3776 | @@ -243,12 +243,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |
3777 | if (uv_setup_intr(cpu, expires)) { | |
3778 | *t = ULLONG_MAX; | |
3779 | uv_rtc_find_next_timer(head, pnode); | |
3780 | - spin_unlock_irqrestore(&head->lock, flags); | |
3781 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
3782 | return -ETIME; | |
3783 | } | |
3784 | } | |
3785 | ||
3786 | - spin_unlock_irqrestore(&head->lock, flags); | |
3787 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
3788 | return 0; | |
3789 | } | |
3790 | ||
3791 | @@ -267,7 +267,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
3792 | unsigned long flags; | |
3793 | int rc = 0; | |
3794 | ||
3795 | - spin_lock_irqsave(&head->lock, flags); | |
3796 | + raw_spin_lock_irqsave(&head->lock, flags); | |
3797 | ||
3798 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | |
3799 | rc = 1; | |
3800 | @@ -279,7 +279,7 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
3801 | uv_rtc_find_next_timer(head, pnode); | |
3802 | } | |
3803 | ||
3804 | - spin_unlock_irqrestore(&head->lock, flags); | |
3805 | + raw_spin_unlock_irqrestore(&head->lock, flags); | |
3806 | ||
3807 | return rc; | |
3808 | } | |
3809 | @@ -299,13 +299,18 @@ static int uv_rtc_unset_timer(int cpu, int force) | |
3810 | static cycle_t uv_read_rtc(struct clocksource *cs) | |
3811 | { | |
3812 | unsigned long offset; | |
3813 | + cycle_t cycles; | |
3814 | ||
3815 | + preempt_disable(); | |
3816 | if (uv_get_min_hub_revision_id() == 1) | |
3817 | offset = 0; | |
3818 | else | |
3819 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | |
3820 | ||
3821 | - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
3822 | + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | |
3823 | + preempt_enable(); | |
3824 | + | |
3825 | + return cycles; | |
3826 | } | |
3827 | ||
3828 | /* | |
3829 | diff --git a/block/blk-core.c b/block/blk-core.c | |
3830 | index 4fab5d610805..52d2fe2fec8f 100644 | |
3831 | --- a/block/blk-core.c | |
3832 | +++ b/block/blk-core.c | |
3833 | @@ -125,6 +125,9 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |
3834 | ||
3835 | INIT_LIST_HEAD(&rq->queuelist); | |
3836 | INIT_LIST_HEAD(&rq->timeout_list); | |
3837 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
3838 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
3839 | +#endif | |
3840 | rq->cpu = -1; | |
3841 | rq->q = q; | |
3842 | rq->__sector = (sector_t) -1; | |
3843 | @@ -233,7 +236,7 @@ EXPORT_SYMBOL(blk_start_queue_async); | |
3844 | **/ | |
3845 | void blk_start_queue(struct request_queue *q) | |
3846 | { | |
3847 | - WARN_ON(!irqs_disabled()); | |
3848 | + WARN_ON_NONRT(!irqs_disabled()); | |
3849 | ||
3850 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | |
3851 | __blk_run_queue(q); | |
3852 | @@ -659,7 +662,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp) | |
3853 | if (!gfpflags_allow_blocking(gfp)) | |
3854 | return -EBUSY; | |
3855 | ||
3856 | - ret = wait_event_interruptible(q->mq_freeze_wq, | |
3857 | + ret = swait_event_interruptible(q->mq_freeze_wq, | |
3858 | !atomic_read(&q->mq_freeze_depth) || | |
3859 | blk_queue_dying(q)); | |
3860 | if (blk_queue_dying(q)) | |
3861 | @@ -679,7 +682,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref) | |
3862 | struct request_queue *q = | |
3863 | container_of(ref, struct request_queue, q_usage_counter); | |
3864 | ||
3865 | - wake_up_all(&q->mq_freeze_wq); | |
3866 | + swake_up_all(&q->mq_freeze_wq); | |
3867 | } | |
3868 | ||
3869 | struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |
3870 | @@ -741,7 +744,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |
3871 | q->bypass_depth = 1; | |
3872 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | |
3873 | ||
3874 | - init_waitqueue_head(&q->mq_freeze_wq); | |
3875 | + init_swait_queue_head(&q->mq_freeze_wq); | |
3876 | ||
3877 | /* | |
3878 | * Init percpu_ref in atomic mode so that it's faster to shutdown. | |
3879 | @@ -3200,7 +3203,7 @@ static void queue_unplugged(struct request_queue *q, unsigned int depth, | |
3880 | blk_run_queue_async(q); | |
3881 | else | |
3882 | __blk_run_queue(q); | |
3883 | - spin_unlock(q->queue_lock); | |
3884 | + spin_unlock_irq(q->queue_lock); | |
3885 | } | |
3886 | ||
3887 | static void flush_plug_callbacks(struct blk_plug *plug, bool from_schedule) | |
3888 | @@ -3248,7 +3251,6 @@ EXPORT_SYMBOL(blk_check_plugged); | |
3889 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3890 | { | |
3891 | struct request_queue *q; | |
3892 | - unsigned long flags; | |
3893 | struct request *rq; | |
3894 | LIST_HEAD(list); | |
3895 | unsigned int depth; | |
3896 | @@ -3268,11 +3270,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3897 | q = NULL; | |
3898 | depth = 0; | |
3899 | ||
3900 | - /* | |
3901 | - * Save and disable interrupts here, to avoid doing it for every | |
3902 | - * queue lock we have to take. | |
3903 | - */ | |
3904 | - local_irq_save(flags); | |
3905 | while (!list_empty(&list)) { | |
3906 | rq = list_entry_rq(list.next); | |
3907 | list_del_init(&rq->queuelist); | |
3908 | @@ -3285,7 +3282,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3909 | queue_unplugged(q, depth, from_schedule); | |
3910 | q = rq->q; | |
3911 | depth = 0; | |
3912 | - spin_lock(q->queue_lock); | |
3913 | + spin_lock_irq(q->queue_lock); | |
3914 | } | |
3915 | ||
3916 | /* | |
3917 | @@ -3312,8 +3309,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |
3918 | */ | |
3919 | if (q) | |
3920 | queue_unplugged(q, depth, from_schedule); | |
3921 | - | |
3922 | - local_irq_restore(flags); | |
3923 | } | |
3924 | ||
3925 | void blk_finish_plug(struct blk_plug *plug) | |
3926 | diff --git a/block/blk-ioc.c b/block/blk-ioc.c | |
3927 | index 381cb50a673c..dc8785233d94 100644 | |
3928 | --- a/block/blk-ioc.c | |
3929 | +++ b/block/blk-ioc.c | |
3930 | @@ -7,6 +7,7 @@ | |
3931 | #include <linux/bio.h> | |
3932 | #include <linux/blkdev.h> | |
3933 | #include <linux/slab.h> | |
3934 | +#include <linux/delay.h> | |
3935 | ||
3936 | #include "blk.h" | |
3937 | ||
3938 | @@ -109,7 +110,7 @@ static void ioc_release_fn(struct work_struct *work) | |
3939 | spin_unlock(q->queue_lock); | |
3940 | } else { | |
3941 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3942 | - cpu_relax(); | |
3943 | + cpu_chill(); | |
3944 | spin_lock_irqsave_nested(&ioc->lock, flags, 1); | |
3945 | } | |
3946 | } | |
3947 | @@ -187,7 +188,7 @@ retry: | |
3948 | spin_unlock(icq->q->queue_lock); | |
3949 | } else { | |
3950 | spin_unlock_irqrestore(&ioc->lock, flags); | |
3951 | - cpu_relax(); | |
3952 | + cpu_chill(); | |
3953 | goto retry; | |
3954 | } | |
3955 | } | |
3956 | diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c | |
3957 | index 0736729d6494..3e21e31d0d7e 100644 | |
3958 | --- a/block/blk-iopoll.c | |
3959 | +++ b/block/blk-iopoll.c | |
3960 | @@ -35,6 +35,7 @@ void blk_iopoll_sched(struct blk_iopoll *iop) | |
3961 | list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); | |
3962 | __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); | |
3963 | local_irq_restore(flags); | |
3964 | + preempt_check_resched_rt(); | |
3965 | } | |
3966 | EXPORT_SYMBOL(blk_iopoll_sched); | |
3967 | ||
3968 | @@ -132,6 +133,7 @@ static void blk_iopoll_softirq(struct softirq_action *h) | |
3969 | __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); | |
3970 | ||
3971 | local_irq_enable(); | |
3972 | + preempt_check_resched_rt(); | |
3973 | } | |
3974 | ||
3975 | /** | |
3976 | @@ -201,6 +203,7 @@ static int blk_iopoll_cpu_notify(struct notifier_block *self, | |
3977 | this_cpu_ptr(&blk_cpu_iopoll)); | |
3978 | __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); | |
3979 | local_irq_enable(); | |
3980 | + preempt_check_resched_rt(); | |
3981 | } | |
3982 | ||
3983 | return NOTIFY_OK; | |
3984 | diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c | |
3985 | index bb3ed488f7b5..628c6c13c482 100644 | |
3986 | --- a/block/blk-mq-cpu.c | |
3987 | +++ b/block/blk-mq-cpu.c | |
3988 | @@ -16,7 +16,7 @@ | |
3989 | #include "blk-mq.h" | |
3990 | ||
3991 | static LIST_HEAD(blk_mq_cpu_notify_list); | |
3992 | -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock); | |
3993 | +static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); | |
3994 | ||
3995 | static int blk_mq_main_cpu_notify(struct notifier_block *self, | |
3996 | unsigned long action, void *hcpu) | |
3997 | @@ -25,7 +25,10 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, | |
3998 | struct blk_mq_cpu_notifier *notify; | |
3999 | int ret = NOTIFY_OK; | |
4000 | ||
4001 | - raw_spin_lock(&blk_mq_cpu_notify_lock); | |
4002 | + if (action != CPU_POST_DEAD) | |
4003 | + return NOTIFY_OK; | |
4004 | + | |
4005 | + spin_lock(&blk_mq_cpu_notify_lock); | |
4006 | ||
4007 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) { | |
4008 | ret = notify->notify(notify->data, action, cpu); | |
4009 | @@ -33,7 +36,7 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, | |
4010 | break; | |
4011 | } | |
4012 | ||
4013 | - raw_spin_unlock(&blk_mq_cpu_notify_lock); | |
4014 | + spin_unlock(&blk_mq_cpu_notify_lock); | |
4015 | return ret; | |
4016 | } | |
4017 | ||
4018 | @@ -41,16 +44,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | |
4019 | { | |
4020 | BUG_ON(!notifier->notify); | |
4021 | ||
4022 | - raw_spin_lock(&blk_mq_cpu_notify_lock); | |
4023 | + spin_lock(&blk_mq_cpu_notify_lock); | |
4024 | list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); | |
4025 | - raw_spin_unlock(&blk_mq_cpu_notify_lock); | |
4026 | + spin_unlock(&blk_mq_cpu_notify_lock); | |
4027 | } | |
4028 | ||
4029 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | |
4030 | { | |
4031 | - raw_spin_lock(&blk_mq_cpu_notify_lock); | |
4032 | + spin_lock(&blk_mq_cpu_notify_lock); | |
4033 | list_del(¬ifier->list); | |
4034 | - raw_spin_unlock(&blk_mq_cpu_notify_lock); | |
4035 | + spin_unlock(&blk_mq_cpu_notify_lock); | |
4036 | } | |
4037 | ||
4038 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | |
4039 | diff --git a/block/blk-mq.c b/block/blk-mq.c | |
2bb96ace | 4040 | index c3e461ec40e4..03dfc2c91595 100644 |
b4de310e JK |
4041 | --- a/block/blk-mq.c |
4042 | +++ b/block/blk-mq.c | |
4043 | @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start); | |
4044 | ||
4045 | static void blk_mq_freeze_queue_wait(struct request_queue *q) | |
4046 | { | |
4047 | - wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
4048 | + swait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter)); | |
4049 | } | |
4050 | ||
4051 | /* | |
4052 | @@ -130,7 +130,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q) | |
4053 | WARN_ON_ONCE(freeze_depth < 0); | |
4054 | if (!freeze_depth) { | |
4055 | percpu_ref_reinit(&q->q_usage_counter); | |
4056 | - wake_up_all(&q->mq_freeze_wq); | |
4057 | + swake_up_all(&q->mq_freeze_wq); | |
4058 | } | |
4059 | } | |
4060 | EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); | |
4061 | @@ -149,7 +149,7 @@ void blk_mq_wake_waiters(struct request_queue *q) | |
4062 | * dying, we need to ensure that processes currently waiting on | |
4063 | * the queue are notified as well. | |
4064 | */ | |
4065 | - wake_up_all(&q->mq_freeze_wq); | |
4066 | + swake_up_all(&q->mq_freeze_wq); | |
4067 | } | |
4068 | ||
4069 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | |
4070 | @@ -196,6 +196,9 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |
4071 | rq->resid_len = 0; | |
4072 | rq->sense = NULL; | |
4073 | ||
4074 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
4075 | + INIT_WORK(&rq->work, __blk_mq_complete_request_remote_work); | |
4076 | +#endif | |
4077 | INIT_LIST_HEAD(&rq->timeout_list); | |
4078 | rq->timeout = 0; | |
4079 | ||
4080 | @@ -325,6 +328,17 @@ void blk_mq_end_request(struct request *rq, int error) | |
4081 | } | |
4082 | EXPORT_SYMBOL(blk_mq_end_request); | |
4083 | ||
4084 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
4085 | + | |
4086 | +void __blk_mq_complete_request_remote_work(struct work_struct *work) | |
4087 | +{ | |
4088 | + struct request *rq = container_of(work, struct request, work); | |
4089 | + | |
4090 | + rq->q->softirq_done_fn(rq); | |
4091 | +} | |
4092 | + | |
4093 | +#else | |
4094 | + | |
4095 | static void __blk_mq_complete_request_remote(void *data) | |
4096 | { | |
4097 | struct request *rq = data; | |
4098 | @@ -332,6 +346,8 @@ static void __blk_mq_complete_request_remote(void *data) | |
4099 | rq->q->softirq_done_fn(rq); | |
4100 | } | |
4101 | ||
4102 | +#endif | |
4103 | + | |
4104 | static void blk_mq_ipi_complete_request(struct request *rq) | |
4105 | { | |
4106 | struct blk_mq_ctx *ctx = rq->mq_ctx; | |
4107 | @@ -343,19 +359,23 @@ static void blk_mq_ipi_complete_request(struct request *rq) | |
4108 | return; | |
4109 | } | |
4110 | ||
4111 | - cpu = get_cpu(); | |
4112 | + cpu = get_cpu_light(); | |
4113 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) | |
4114 | shared = cpus_share_cache(cpu, ctx->cpu); | |
4115 | ||
4116 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | |
4117 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
4118 | + schedule_work_on(ctx->cpu, &rq->work); | |
4119 | +#else | |
4120 | rq->csd.func = __blk_mq_complete_request_remote; | |
4121 | rq->csd.info = rq; | |
4122 | rq->csd.flags = 0; | |
4123 | smp_call_function_single_async(ctx->cpu, &rq->csd); | |
4124 | +#endif | |
4125 | } else { | |
4126 | rq->q->softirq_done_fn(rq); | |
4127 | } | |
4128 | - put_cpu(); | |
4129 | + put_cpu_light(); | |
4130 | } | |
4131 | ||
4132 | static void __blk_mq_complete_request(struct request *rq) | |
4133 | @@ -864,14 +884,14 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | |
4134 | return; | |
4135 | ||
4136 | if (!async) { | |
4137 | - int cpu = get_cpu(); | |
4138 | + int cpu = get_cpu_light(); | |
4139 | if (cpumask_test_cpu(cpu, hctx->cpumask)) { | |
4140 | __blk_mq_run_hw_queue(hctx); | |
4141 | - put_cpu(); | |
4142 | + put_cpu_light(); | |
4143 | return; | |
4144 | } | |
4145 | ||
4146 | - put_cpu(); | |
4147 | + put_cpu_light(); | |
4148 | } | |
4149 | ||
4150 | kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), | |
4151 | @@ -1619,7 +1639,7 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, | |
4152 | { | |
4153 | struct blk_mq_hw_ctx *hctx = data; | |
4154 | ||
4155 | - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) | |
4156 | + if (action == CPU_POST_DEAD) | |
4157 | return blk_mq_hctx_cpu_offline(hctx, cpu); | |
4158 | ||
4159 | /* | |
4160 | diff --git a/block/blk-mq.h b/block/blk-mq.h | |
4161 | index 713820b47b31..3cb6feb4fe23 100644 | |
4162 | --- a/block/blk-mq.h | |
4163 | +++ b/block/blk-mq.h | |
4164 | @@ -74,7 +74,10 @@ struct blk_align_bitmap { | |
4165 | static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, | |
4166 | unsigned int cpu) | |
4167 | { | |
4168 | - return per_cpu_ptr(q->queue_ctx, cpu); | |
4169 | + struct blk_mq_ctx *ctx; | |
4170 | + | |
4171 | + ctx = per_cpu_ptr(q->queue_ctx, cpu); | |
4172 | + return ctx; | |
4173 | } | |
4174 | ||
4175 | /* | |
4176 | @@ -85,12 +88,12 @@ static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, | |
4177 | */ | |
4178 | static inline struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) | |
4179 | { | |
4180 | - return __blk_mq_get_ctx(q, get_cpu()); | |
4181 | + return __blk_mq_get_ctx(q, get_cpu_light()); | |
4182 | } | |
4183 | ||
4184 | static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx) | |
4185 | { | |
4186 | - put_cpu(); | |
4187 | + put_cpu_light(); | |
4188 | } | |
4189 | ||
4190 | struct blk_mq_alloc_data { | |
4191 | diff --git a/block/blk-softirq.c b/block/blk-softirq.c | |
4192 | index 53b1737e978d..81c3c0a62edf 100644 | |
4193 | --- a/block/blk-softirq.c | |
4194 | +++ b/block/blk-softirq.c | |
4195 | @@ -51,6 +51,7 @@ static void trigger_softirq(void *data) | |
4196 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
4197 | ||
4198 | local_irq_restore(flags); | |
4199 | + preempt_check_resched_rt(); | |
4200 | } | |
4201 | ||
4202 | /* | |
4203 | @@ -93,6 +94,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, | |
4204 | this_cpu_ptr(&blk_cpu_done)); | |
4205 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | |
4206 | local_irq_enable(); | |
4207 | + preempt_check_resched_rt(); | |
4208 | } | |
4209 | ||
4210 | return NOTIFY_OK; | |
4211 | @@ -150,6 +152,7 @@ do_local: | |
4212 | goto do_local; | |
4213 | ||
4214 | local_irq_restore(flags); | |
4215 | + preempt_check_resched_rt(); | |
4216 | } | |
4217 | ||
4218 | /** | |
4219 | diff --git a/block/bounce.c b/block/bounce.c | |
4220 | index 1cb5dd3a5da1..2f1ec8a67cbe 100644 | |
4221 | --- a/block/bounce.c | |
4222 | +++ b/block/bounce.c | |
4223 | @@ -55,11 +55,11 @@ static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) | |
4224 | unsigned long flags; | |
4225 | unsigned char *vto; | |
4226 | ||
4227 | - local_irq_save(flags); | |
4228 | + local_irq_save_nort(flags); | |
4229 | vto = kmap_atomic(to->bv_page); | |
4230 | memcpy(vto + to->bv_offset, vfrom, to->bv_len); | |
4231 | kunmap_atomic(vto); | |
4232 | - local_irq_restore(flags); | |
4233 | + local_irq_restore_nort(flags); | |
4234 | } | |
4235 | ||
4236 | #else /* CONFIG_HIGHMEM */ | |
4237 | diff --git a/crypto/algapi.c b/crypto/algapi.c | |
4238 | index 59bf491fe3d8..f98e79c8cd77 100644 | |
4239 | --- a/crypto/algapi.c | |
4240 | +++ b/crypto/algapi.c | |
4241 | @@ -719,13 +719,13 @@ EXPORT_SYMBOL_GPL(crypto_spawn_tfm2); | |
4242 | ||
4243 | int crypto_register_notifier(struct notifier_block *nb) | |
4244 | { | |
4245 | - return blocking_notifier_chain_register(&crypto_chain, nb); | |
4246 | + return srcu_notifier_chain_register(&crypto_chain, nb); | |
4247 | } | |
4248 | EXPORT_SYMBOL_GPL(crypto_register_notifier); | |
4249 | ||
4250 | int crypto_unregister_notifier(struct notifier_block *nb) | |
4251 | { | |
4252 | - return blocking_notifier_chain_unregister(&crypto_chain, nb); | |
4253 | + return srcu_notifier_chain_unregister(&crypto_chain, nb); | |
4254 | } | |
4255 | EXPORT_SYMBOL_GPL(crypto_unregister_notifier); | |
4256 | ||
4257 | diff --git a/crypto/api.c b/crypto/api.c | |
4258 | index bbc147cb5dec..bc1a848f02ec 100644 | |
4259 | --- a/crypto/api.c | |
4260 | +++ b/crypto/api.c | |
4261 | @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(crypto_alg_list); | |
4262 | DECLARE_RWSEM(crypto_alg_sem); | |
4263 | EXPORT_SYMBOL_GPL(crypto_alg_sem); | |
4264 | ||
4265 | -BLOCKING_NOTIFIER_HEAD(crypto_chain); | |
4266 | +SRCU_NOTIFIER_HEAD(crypto_chain); | |
4267 | EXPORT_SYMBOL_GPL(crypto_chain); | |
4268 | ||
4269 | static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); | |
4270 | @@ -236,10 +236,10 @@ int crypto_probing_notify(unsigned long val, void *v) | |
4271 | { | |
4272 | int ok; | |
4273 | ||
4274 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
4275 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
4276 | if (ok == NOTIFY_DONE) { | |
4277 | request_module("cryptomgr"); | |
4278 | - ok = blocking_notifier_call_chain(&crypto_chain, val, v); | |
4279 | + ok = srcu_notifier_call_chain(&crypto_chain, val, v); | |
4280 | } | |
4281 | ||
4282 | return ok; | |
4283 | diff --git a/crypto/internal.h b/crypto/internal.h | |
4284 | index 00e42a3ed814..2e85551e235f 100644 | |
4285 | --- a/crypto/internal.h | |
4286 | +++ b/crypto/internal.h | |
4287 | @@ -47,7 +47,7 @@ struct crypto_larval { | |
4288 | ||
4289 | extern struct list_head crypto_alg_list; | |
4290 | extern struct rw_semaphore crypto_alg_sem; | |
4291 | -extern struct blocking_notifier_head crypto_chain; | |
4292 | +extern struct srcu_notifier_head crypto_chain; | |
4293 | ||
4294 | #ifdef CONFIG_PROC_FS | |
4295 | void __init crypto_init_proc(void); | |
4296 | @@ -143,7 +143,7 @@ static inline int crypto_is_moribund(struct crypto_alg *alg) | |
4297 | ||
4298 | static inline void crypto_notify(unsigned long val, void *v) | |
4299 | { | |
4300 | - blocking_notifier_call_chain(&crypto_chain, val, v); | |
4301 | + srcu_notifier_call_chain(&crypto_chain, val, v); | |
4302 | } | |
4303 | ||
4304 | #endif /* _CRYPTO_INTERNAL_H */ | |
4305 | diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h | |
4306 | index faa97604d878..941497f31cf0 100644 | |
4307 | --- a/drivers/acpi/acpica/acglobal.h | |
4308 | +++ b/drivers/acpi/acpica/acglobal.h | |
4309 | @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); | |
4310 | * interrupt level | |
4311 | */ | |
4312 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ | |
4313 | -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
4314 | +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ | |
4315 | ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); | |
4316 | ||
4317 | /* Mutex for _OSI support */ | |
4318 | diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c | |
4319 | index 3cf77afd142c..dc32e72132f1 100644 | |
4320 | --- a/drivers/acpi/acpica/hwregs.c | |
4321 | +++ b/drivers/acpi/acpica/hwregs.c | |
4322 | @@ -269,14 +269,14 @@ acpi_status acpi_hw_clear_acpi_status(void) | |
4323 | ACPI_BITMASK_ALL_FIXED_STATUS, | |
4324 | ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); | |
4325 | ||
4326 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
4327 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
4328 | ||
4329 | /* Clear the fixed events in PM1 A/B */ | |
4330 | ||
4331 | status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, | |
4332 | ACPI_BITMASK_ALL_FIXED_STATUS); | |
4333 | ||
4334 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
4335 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
4336 | ||
4337 | if (ACPI_FAILURE(status)) { | |
4338 | goto exit; | |
4339 | diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c | |
2bb96ace | 4340 | index b2e50d8007fe..ff007084dc48 100644 |
b4de310e JK |
4341 | --- a/drivers/acpi/acpica/hwxface.c |
4342 | +++ b/drivers/acpi/acpica/hwxface.c | |
4343 | @@ -374,7 +374,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
4344 | return_ACPI_STATUS(AE_BAD_PARAMETER); | |
4345 | } | |
4346 | ||
4347 | - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); | |
4348 | + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); | |
4349 | ||
4350 | /* | |
4351 | * At this point, we know that the parent register is one of the | |
4352 | @@ -435,7 +435,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) | |
4353 | ||
4354 | unlock_and_exit: | |
4355 | ||
4356 | - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); | |
4357 | + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); | |
4358 | return_ACPI_STATUS(status); | |
4359 | } | |
4360 | ||
4361 | diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c | |
4362 | index ce406e39b669..41a75eb3ae9d 100644 | |
4363 | --- a/drivers/acpi/acpica/utmutex.c | |
4364 | +++ b/drivers/acpi/acpica/utmutex.c | |
4365 | @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void) | |
4366 | return_ACPI_STATUS (status); | |
4367 | } | |
4368 | ||
4369 | - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); | |
4370 | + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); | |
4371 | if (ACPI_FAILURE (status)) { | |
4372 | return_ACPI_STATUS (status); | |
4373 | } | |
4374 | @@ -156,7 +156,7 @@ void acpi_ut_mutex_terminate(void) | |
4375 | /* Delete the spinlocks */ | |
4376 | ||
4377 | acpi_os_delete_lock(acpi_gbl_gpe_lock); | |
4378 | - acpi_os_delete_lock(acpi_gbl_hardware_lock); | |
4379 | + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); | |
4380 | acpi_os_delete_lock(acpi_gbl_reference_count_lock); | |
4381 | ||
4382 | /* Delete the reader/writer lock */ | |
4383 | diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c | |
4384 | index 7dbba387d12a..65beb7abb4e7 100644 | |
4385 | --- a/drivers/ata/libata-sff.c | |
4386 | +++ b/drivers/ata/libata-sff.c | |
4387 | @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, | |
4388 | unsigned long flags; | |
4389 | unsigned int consumed; | |
4390 | ||
4391 | - local_irq_save(flags); | |
4392 | + local_irq_save_nort(flags); | |
4393 | consumed = ata_sff_data_xfer32(dev, buf, buflen, rw); | |
4394 | - local_irq_restore(flags); | |
4395 | + local_irq_restore_nort(flags); | |
4396 | ||
4397 | return consumed; | |
4398 | } | |
4399 | @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
4400 | unsigned long flags; | |
4401 | ||
4402 | /* FIXME: use a bounce buffer */ | |
4403 | - local_irq_save(flags); | |
4404 | + local_irq_save_nort(flags); | |
4405 | buf = kmap_atomic(page); | |
4406 | ||
4407 | /* do the actual data transfer */ | |
4408 | @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) | |
4409 | do_write); | |
4410 | ||
4411 | kunmap_atomic(buf); | |
4412 | - local_irq_restore(flags); | |
4413 | + local_irq_restore_nort(flags); | |
4414 | } else { | |
4415 | buf = page_address(page); | |
4416 | ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, | |
4417 | @@ -864,7 +864,7 @@ next_sg: | |
4418 | unsigned long flags; | |
4419 | ||
4420 | /* FIXME: use bounce buffer */ | |
4421 | - local_irq_save(flags); | |
4422 | + local_irq_save_nort(flags); | |
4423 | buf = kmap_atomic(page); | |
4424 | ||
4425 | /* do the actual data transfer */ | |
4426 | @@ -872,7 +872,7 @@ next_sg: | |
4427 | count, rw); | |
4428 | ||
4429 | kunmap_atomic(buf); | |
4430 | - local_irq_restore(flags); | |
4431 | + local_irq_restore_nort(flags); | |
4432 | } else { | |
4433 | buf = page_address(page); | |
4434 | consumed = ap->ops->sff_data_xfer(dev, buf + offset, | |
4435 | diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c | |
4436 | index 370c2f76016d..65e0b375a291 100644 | |
4437 | --- a/drivers/block/zram/zram_drv.c | |
4438 | +++ b/drivers/block/zram/zram_drv.c | |
4439 | @@ -520,6 +520,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) | |
4440 | goto out_error; | |
4441 | } | |
4442 | ||
4443 | + zram_meta_init_table_locks(meta, disksize); | |
4444 | + | |
4445 | return meta; | |
4446 | ||
4447 | out_error: | |
4448 | @@ -568,12 +570,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) | |
4449 | unsigned long handle; | |
4450 | size_t size; | |
4451 | ||
4452 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
4453 | + zram_lock_table(&meta->table[index]); | |
4454 | handle = meta->table[index].handle; | |
4455 | size = zram_get_obj_size(meta, index); | |
4456 | ||
4457 | if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { | |
4458 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4459 | + zram_unlock_table(&meta->table[index]); | |
4460 | clear_page(mem); | |
4461 | return 0; | |
4462 | } | |
4463 | @@ -584,7 +586,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) | |
4464 | else | |
4465 | ret = zcomp_decompress(zram->comp, cmem, size, mem); | |
4466 | zs_unmap_object(meta->mem_pool, handle); | |
4467 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4468 | + zram_unlock_table(&meta->table[index]); | |
4469 | ||
4470 | /* Should NEVER happen. Return bio error if it does. */ | |
4471 | if (unlikely(ret)) { | |
4472 | @@ -604,14 +606,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, | |
4473 | struct zram_meta *meta = zram->meta; | |
4474 | page = bvec->bv_page; | |
4475 | ||
4476 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
4477 | + zram_lock_table(&meta->table[index]); | |
4478 | if (unlikely(!meta->table[index].handle) || | |
4479 | zram_test_flag(meta, index, ZRAM_ZERO)) { | |
4480 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4481 | + zram_unlock_table(&meta->table[index]); | |
4482 | handle_zero_page(bvec); | |
4483 | return 0; | |
4484 | } | |
4485 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4486 | + zram_unlock_table(&meta->table[index]); | |
4487 | ||
4488 | if (is_partial_io(bvec)) | |
4489 | /* Use a temporary buffer to decompress the page */ | |
4490 | @@ -689,10 +691,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
4491 | if (user_mem) | |
4492 | kunmap_atomic(user_mem); | |
4493 | /* Free memory associated with this sector now. */ | |
4494 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
4495 | + zram_lock_table(&meta->table[index]); | |
4496 | zram_free_page(zram, index); | |
4497 | zram_set_flag(meta, index, ZRAM_ZERO); | |
4498 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4499 | + zram_unlock_table(&meta->table[index]); | |
4500 | ||
4501 | atomic64_inc(&zram->stats.zero_pages); | |
4502 | ret = 0; | |
4503 | @@ -752,12 +754,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, | |
4504 | * Free memory associated with this sector | |
4505 | * before overwriting unused sectors. | |
4506 | */ | |
4507 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
4508 | + zram_lock_table(&meta->table[index]); | |
4509 | zram_free_page(zram, index); | |
4510 | ||
4511 | meta->table[index].handle = handle; | |
4512 | zram_set_obj_size(meta, index, clen); | |
4513 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4514 | + zram_unlock_table(&meta->table[index]); | |
4515 | ||
4516 | /* Update stats */ | |
4517 | atomic64_add(clen, &zram->stats.compr_data_size); | |
4518 | @@ -800,9 +802,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, | |
4519 | } | |
4520 | ||
4521 | while (n >= PAGE_SIZE) { | |
4522 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
4523 | + zram_lock_table(&meta->table[index]); | |
4524 | zram_free_page(zram, index); | |
4525 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4526 | + zram_unlock_table(&meta->table[index]); | |
4527 | atomic64_inc(&zram->stats.notify_free); | |
4528 | index++; | |
4529 | n -= PAGE_SIZE; | |
4530 | @@ -928,9 +930,9 @@ static void zram_slot_free_notify(struct block_device *bdev, | |
4531 | zram = bdev->bd_disk->private_data; | |
4532 | meta = zram->meta; | |
4533 | ||
4534 | - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); | |
4535 | + zram_lock_table(&meta->table[index]); | |
4536 | zram_free_page(zram, index); | |
4537 | - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); | |
4538 | + zram_unlock_table(&meta->table[index]); | |
4539 | atomic64_inc(&zram->stats.notify_free); | |
4540 | } | |
4541 | ||
4542 | diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h | |
4543 | index 8e92339686d7..9e3e953d680e 100644 | |
4544 | --- a/drivers/block/zram/zram_drv.h | |
4545 | +++ b/drivers/block/zram/zram_drv.h | |
4546 | @@ -72,6 +72,9 @@ enum zram_pageflags { | |
4547 | struct zram_table_entry { | |
4548 | unsigned long handle; | |
4549 | unsigned long value; | |
4550 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
4551 | + spinlock_t lock; | |
4552 | +#endif | |
4553 | }; | |
4554 | ||
4555 | struct zram_stats { | |
4556 | @@ -119,4 +122,42 @@ struct zram { | |
4557 | */ | |
4558 | bool claim; /* Protected by bdev->bd_mutex */ | |
4559 | }; | |
4560 | + | |
4561 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
4562 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
4563 | +{ | |
4564 | + bit_spin_lock(ZRAM_ACCESS, &table->value); | |
4565 | +} | |
4566 | + | |
4567 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
4568 | +{ | |
4569 | + bit_spin_unlock(ZRAM_ACCESS, &table->value); | |
4570 | +} | |
4571 | + | |
4572 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { } | |
4573 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
4574 | +static inline void zram_lock_table(struct zram_table_entry *table) | |
4575 | +{ | |
4576 | + spin_lock(&table->lock); | |
4577 | + __set_bit(ZRAM_ACCESS, &table->value); | |
4578 | +} | |
4579 | + | |
4580 | +static inline void zram_unlock_table(struct zram_table_entry *table) | |
4581 | +{ | |
4582 | + __clear_bit(ZRAM_ACCESS, &table->value); | |
4583 | + spin_unlock(&table->lock); | |
4584 | +} | |
4585 | + | |
4586 | +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) | |
4587 | +{ | |
4588 | + size_t num_pages = disksize >> PAGE_SHIFT; | |
4589 | + size_t index; | |
4590 | + | |
4591 | + for (index = 0; index < num_pages; index++) { | |
4592 | + spinlock_t *lock = &meta->table[index].lock; | |
4593 | + spin_lock_init(lock); | |
4594 | + } | |
4595 | +} | |
4596 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
4597 | + | |
4598 | #endif | |
4599 | diff --git a/drivers/char/random.c b/drivers/char/random.c | |
4600 | index 491a4dce13fe..cf69b6b42208 100644 | |
4601 | --- a/drivers/char/random.c | |
4602 | +++ b/drivers/char/random.c | |
4603 | @@ -799,8 +799,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
4604 | } sample; | |
4605 | long delta, delta2, delta3; | |
4606 | ||
4607 | - preempt_disable(); | |
4608 | - | |
4609 | sample.jiffies = jiffies; | |
4610 | sample.cycles = random_get_entropy(); | |
4611 | sample.num = num; | |
4612 | @@ -841,7 +839,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) | |
4613 | */ | |
4614 | credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); | |
4615 | } | |
4616 | - preempt_enable(); | |
4617 | } | |
4618 | ||
4619 | void add_input_randomness(unsigned int type, unsigned int code, | |
4620 | @@ -894,28 +891,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) | |
4621 | return *(ptr + f->reg_idx++); | |
4622 | } | |
4623 | ||
4624 | -void add_interrupt_randomness(int irq, int irq_flags) | |
4625 | +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) | |
4626 | { | |
4627 | struct entropy_store *r; | |
4628 | struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); | |
4629 | - struct pt_regs *regs = get_irq_regs(); | |
4630 | unsigned long now = jiffies; | |
4631 | cycles_t cycles = random_get_entropy(); | |
4632 | __u32 c_high, j_high; | |
4633 | - __u64 ip; | |
4634 | unsigned long seed; | |
4635 | int credit = 0; | |
4636 | ||
4637 | if (cycles == 0) | |
4638 | - cycles = get_reg(fast_pool, regs); | |
4639 | + cycles = get_reg(fast_pool, NULL); | |
4640 | c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; | |
4641 | j_high = (sizeof(now) > 4) ? now >> 32 : 0; | |
4642 | fast_pool->pool[0] ^= cycles ^ j_high ^ irq; | |
4643 | fast_pool->pool[1] ^= now ^ c_high; | |
4644 | - ip = regs ? instruction_pointer(regs) : _RET_IP_; | |
4645 | + if (!ip) | |
4646 | + ip = _RET_IP_; | |
4647 | fast_pool->pool[2] ^= ip; | |
4648 | fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : | |
4649 | - get_reg(fast_pool, regs); | |
4650 | + get_reg(fast_pool, NULL); | |
4651 | ||
4652 | fast_mix(fast_pool); | |
4653 | add_interrupt_bench(cycles); | |
4654 | diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c | |
4655 | index abc80949e1dd..4ad3298eb372 100644 | |
4656 | --- a/drivers/clk/at91/clk-generated.c | |
4657 | +++ b/drivers/clk/at91/clk-generated.c | |
4658 | @@ -15,8 +15,8 @@ | |
4659 | #include <linux/clkdev.h> | |
4660 | #include <linux/clk/at91_pmc.h> | |
4661 | #include <linux/of.h> | |
4662 | -#include <linux/of_address.h> | |
4663 | -#include <linux/io.h> | |
4664 | +#include <linux/mfd/syscon.h> | |
4665 | +#include <linux/regmap.h> | |
4666 | ||
4667 | #include "pmc.h" | |
4668 | ||
4669 | @@ -28,8 +28,9 @@ | |
4670 | ||
4671 | struct clk_generated { | |
4672 | struct clk_hw hw; | |
4673 | - struct at91_pmc *pmc; | |
4674 | + struct regmap *regmap; | |
4675 | struct clk_range range; | |
4676 | + spinlock_t *lock; | |
4677 | u32 id; | |
4678 | u32 gckdiv; | |
4679 | u8 parent_id; | |
4680 | @@ -41,49 +42,52 @@ struct clk_generated { | |
4681 | static int clk_generated_enable(struct clk_hw *hw) | |
4682 | { | |
4683 | struct clk_generated *gck = to_clk_generated(hw); | |
4684 | - struct at91_pmc *pmc = gck->pmc; | |
4685 | - u32 tmp; | |
4686 | + unsigned long flags; | |
4687 | ||
4688 | pr_debug("GCLK: %s, gckdiv = %d, parent id = %d\n", | |
4689 | __func__, gck->gckdiv, gck->parent_id); | |
4690 | ||
4691 | - pmc_lock(pmc); | |
4692 | - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4693 | - tmp = pmc_read(pmc, AT91_PMC_PCR) & | |
4694 | - ~(AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK); | |
4695 | - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_GCKCSS(gck->parent_id) | |
4696 | - | AT91_PMC_PCR_CMD | |
4697 | - | AT91_PMC_PCR_GCKDIV(gck->gckdiv) | |
4698 | - | AT91_PMC_PCR_GCKEN); | |
4699 | - pmc_unlock(pmc); | |
4700 | + spin_lock_irqsave(gck->lock, flags); | |
4701 | + regmap_write(gck->regmap, AT91_PMC_PCR, | |
4702 | + (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4703 | + regmap_update_bits(gck->regmap, AT91_PMC_PCR, | |
4704 | + AT91_PMC_PCR_GCKDIV_MASK | AT91_PMC_PCR_GCKCSS_MASK | | |
4705 | + AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN, | |
4706 | + AT91_PMC_PCR_GCKCSS(gck->parent_id) | | |
4707 | + AT91_PMC_PCR_CMD | | |
4708 | + AT91_PMC_PCR_GCKDIV(gck->gckdiv) | | |
4709 | + AT91_PMC_PCR_GCKEN); | |
4710 | + spin_unlock_irqrestore(gck->lock, flags); | |
4711 | return 0; | |
4712 | } | |
4713 | ||
4714 | static void clk_generated_disable(struct clk_hw *hw) | |
4715 | { | |
4716 | struct clk_generated *gck = to_clk_generated(hw); | |
4717 | - struct at91_pmc *pmc = gck->pmc; | |
4718 | - u32 tmp; | |
4719 | - | |
4720 | - pmc_lock(pmc); | |
4721 | - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4722 | - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_GCKEN; | |
4723 | - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD); | |
4724 | - pmc_unlock(pmc); | |
4725 | + unsigned long flags; | |
4726 | + | |
4727 | + spin_lock_irqsave(gck->lock, flags); | |
4728 | + regmap_write(gck->regmap, AT91_PMC_PCR, | |
4729 | + (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4730 | + regmap_update_bits(gck->regmap, AT91_PMC_PCR, | |
4731 | + AT91_PMC_PCR_CMD | AT91_PMC_PCR_GCKEN, | |
4732 | + AT91_PMC_PCR_CMD); | |
4733 | + spin_unlock_irqrestore(gck->lock, flags); | |
4734 | } | |
4735 | ||
4736 | static int clk_generated_is_enabled(struct clk_hw *hw) | |
4737 | { | |
4738 | struct clk_generated *gck = to_clk_generated(hw); | |
4739 | - struct at91_pmc *pmc = gck->pmc; | |
4740 | - int ret; | |
4741 | + unsigned long flags; | |
4742 | + unsigned int status; | |
4743 | ||
4744 | - pmc_lock(pmc); | |
4745 | - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4746 | - ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_GCKEN); | |
4747 | - pmc_unlock(pmc); | |
4748 | + spin_lock_irqsave(gck->lock, flags); | |
4749 | + regmap_write(gck->regmap, AT91_PMC_PCR, | |
4750 | + (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4751 | + regmap_read(gck->regmap, AT91_PMC_PCR, &status); | |
4752 | + spin_unlock_irqrestore(gck->lock, flags); | |
4753 | ||
4754 | - return ret; | |
4755 | + return status & AT91_PMC_PCR_GCKEN ? 1 : 0; | |
4756 | } | |
4757 | ||
4758 | static unsigned long | |
4759 | @@ -214,13 +218,14 @@ static const struct clk_ops generated_ops = { | |
4760 | */ | |
4761 | static void clk_generated_startup(struct clk_generated *gck) | |
4762 | { | |
4763 | - struct at91_pmc *pmc = gck->pmc; | |
4764 | u32 tmp; | |
4765 | + unsigned long flags; | |
4766 | ||
4767 | - pmc_lock(pmc); | |
4768 | - pmc_write(pmc, AT91_PMC_PCR, (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4769 | - tmp = pmc_read(pmc, AT91_PMC_PCR); | |
4770 | - pmc_unlock(pmc); | |
4771 | + spin_lock_irqsave(gck->lock, flags); | |
4772 | + regmap_write(gck->regmap, AT91_PMC_PCR, | |
4773 | + (gck->id & AT91_PMC_PCR_PID_MASK)); | |
4774 | + regmap_read(gck->regmap, AT91_PMC_PCR, &tmp); | |
4775 | + spin_unlock_irqrestore(gck->lock, flags); | |
4776 | ||
4777 | gck->parent_id = (tmp & AT91_PMC_PCR_GCKCSS_MASK) | |
4778 | >> AT91_PMC_PCR_GCKCSS_OFFSET; | |
4779 | @@ -229,8 +234,8 @@ static void clk_generated_startup(struct clk_generated *gck) | |
4780 | } | |
4781 | ||
4782 | static struct clk * __init | |
4783 | -at91_clk_register_generated(struct at91_pmc *pmc, const char *name, | |
4784 | - const char **parent_names, u8 num_parents, | |
4785 | +at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const char | |
4786 | + *name, const char **parent_names, u8 num_parents, | |
4787 | u8 id, const struct clk_range *range) | |
4788 | { | |
4789 | struct clk_generated *gck; | |
4790 | @@ -249,7 +254,8 @@ at91_clk_register_generated(struct at91_pmc *pmc, const char *name, | |
4791 | ||
4792 | gck->id = id; | |
4793 | gck->hw.init = &init; | |
4794 | - gck->pmc = pmc; | |
4795 | + gck->regmap = regmap; | |
4796 | + gck->lock = lock; | |
4797 | gck->range = *range; | |
4798 | ||
4799 | clk = clk_register(NULL, &gck->hw); | |
4800 | @@ -261,8 +267,7 @@ at91_clk_register_generated(struct at91_pmc *pmc, const char *name, | |
4801 | return clk; | |
4802 | } | |
4803 | ||
4804 | -void __init of_sama5d2_clk_generated_setup(struct device_node *np, | |
4805 | - struct at91_pmc *pmc) | |
4806 | +void __init of_sama5d2_clk_generated_setup(struct device_node *np) | |
4807 | { | |
4808 | int num; | |
4809 | u32 id; | |
4810 | @@ -272,6 +277,7 @@ void __init of_sama5d2_clk_generated_setup(struct device_node *np, | |
4811 | const char *parent_names[GENERATED_SOURCE_MAX]; | |
4812 | struct device_node *gcknp; | |
4813 | struct clk_range range = CLK_RANGE(0, 0); | |
4814 | + struct regmap *regmap; | |
4815 | ||
4816 | num_parents = of_clk_get_parent_count(np); | |
4817 | if (num_parents <= 0 || num_parents > GENERATED_SOURCE_MAX) | |
4818 | @@ -283,6 +289,10 @@ void __init of_sama5d2_clk_generated_setup(struct device_node *np, | |
4819 | if (!num || num > PERIPHERAL_MAX) | |
4820 | return; | |
4821 | ||
4822 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
4823 | + if (IS_ERR(regmap)) | |
4824 | + return; | |
4825 | + | |
4826 | for_each_child_of_node(np, gcknp) { | |
4827 | if (of_property_read_u32(gcknp, "reg", &id)) | |
4828 | continue; | |
4829 | @@ -296,11 +306,14 @@ void __init of_sama5d2_clk_generated_setup(struct device_node *np, | |
4830 | of_at91_get_clk_range(gcknp, "atmel,clk-output-range", | |
4831 | &range); | |
4832 | ||
4833 | - clk = at91_clk_register_generated(pmc, name, parent_names, | |
4834 | - num_parents, id, &range); | |
4835 | + clk = at91_clk_register_generated(regmap, &pmc_pcr_lock, name, | |
4836 | + parent_names, num_parents, | |
4837 | + id, &range); | |
4838 | if (IS_ERR(clk)) | |
4839 | continue; | |
4840 | ||
4841 | of_clk_add_provider(gcknp, of_clk_src_simple_get, clk); | |
4842 | } | |
4843 | } | |
4844 | +CLK_OF_DECLARE(of_sama5d2_clk_generated_setup, "atmel,sama5d2-clk-generated", | |
4845 | + of_sama5d2_clk_generated_setup); | |
4846 | diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c | |
4847 | index a165230e7eda..8e20c8a76db7 100644 | |
4848 | --- a/drivers/clk/at91/clk-h32mx.c | |
4849 | +++ b/drivers/clk/at91/clk-h32mx.c | |
4850 | @@ -15,15 +15,9 @@ | |
4851 | #include <linux/clk-provider.h> | |
4852 | #include <linux/clkdev.h> | |
4853 | #include <linux/clk/at91_pmc.h> | |
4854 | -#include <linux/delay.h> | |
4855 | #include <linux/of.h> | |
4856 | -#include <linux/of_address.h> | |
4857 | -#include <linux/of_irq.h> | |
4858 | -#include <linux/io.h> | |
4859 | -#include <linux/interrupt.h> | |
4860 | -#include <linux/irq.h> | |
4861 | -#include <linux/sched.h> | |
4862 | -#include <linux/wait.h> | |
4863 | +#include <linux/regmap.h> | |
4864 | +#include <linux/mfd/syscon.h> | |
4865 | ||
4866 | #include "pmc.h" | |
4867 | ||
4868 | @@ -31,7 +25,7 @@ | |
4869 | ||
4870 | struct clk_sama5d4_h32mx { | |
4871 | struct clk_hw hw; | |
4872 | - struct at91_pmc *pmc; | |
4873 | + struct regmap *regmap; | |
4874 | }; | |
4875 | ||
4876 | #define to_clk_sama5d4_h32mx(hw) container_of(hw, struct clk_sama5d4_h32mx, hw) | |
4877 | @@ -40,8 +34,10 @@ static unsigned long clk_sama5d4_h32mx_recalc_rate(struct clk_hw *hw, | |
4878 | unsigned long parent_rate) | |
4879 | { | |
4880 | struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw); | |
4881 | + unsigned int mckr; | |
4882 | ||
4883 | - if (pmc_read(h32mxclk->pmc, AT91_PMC_MCKR) & AT91_PMC_H32MXDIV) | |
4884 | + regmap_read(h32mxclk->regmap, AT91_PMC_MCKR, &mckr); | |
4885 | + if (mckr & AT91_PMC_H32MXDIV) | |
4886 | return parent_rate / 2; | |
4887 | ||
4888 | if (parent_rate > H32MX_MAX_FREQ) | |
4889 | @@ -70,18 +66,16 @@ static int clk_sama5d4_h32mx_set_rate(struct clk_hw *hw, unsigned long rate, | |
4890 | unsigned long parent_rate) | |
4891 | { | |
4892 | struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw); | |
4893 | - struct at91_pmc *pmc = h32mxclk->pmc; | |
4894 | - u32 tmp; | |
4895 | + u32 mckr = 0; | |
4896 | ||
4897 | if (parent_rate != rate && (parent_rate / 2) != rate) | |
4898 | return -EINVAL; | |
4899 | ||
4900 | - pmc_lock(pmc); | |
4901 | - tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_H32MXDIV; | |
4902 | if ((parent_rate / 2) == rate) | |
4903 | - tmp |= AT91_PMC_H32MXDIV; | |
4904 | - pmc_write(pmc, AT91_PMC_MCKR, tmp); | |
4905 | - pmc_unlock(pmc); | |
4906 | + mckr = AT91_PMC_H32MXDIV; | |
4907 | + | |
4908 | + regmap_update_bits(h32mxclk->regmap, AT91_PMC_MCKR, | |
4909 | + AT91_PMC_H32MXDIV, mckr); | |
4910 | ||
4911 | return 0; | |
4912 | } | |
4913 | @@ -92,14 +86,18 @@ static const struct clk_ops h32mx_ops = { | |
4914 | .set_rate = clk_sama5d4_h32mx_set_rate, | |
4915 | }; | |
4916 | ||
4917 | -void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, | |
4918 | - struct at91_pmc *pmc) | |
4919 | +static void __init of_sama5d4_clk_h32mx_setup(struct device_node *np) | |
4920 | { | |
4921 | struct clk_sama5d4_h32mx *h32mxclk; | |
4922 | struct clk_init_data init; | |
4923 | const char *parent_name; | |
4924 | + struct regmap *regmap; | |
4925 | struct clk *clk; | |
4926 | ||
4927 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
4928 | + if (IS_ERR(regmap)) | |
4929 | + return; | |
4930 | + | |
4931 | h32mxclk = kzalloc(sizeof(*h32mxclk), GFP_KERNEL); | |
4932 | if (!h32mxclk) | |
4933 | return; | |
4934 | @@ -113,7 +111,7 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, | |
4935 | init.flags = CLK_SET_RATE_GATE; | |
4936 | ||
4937 | h32mxclk->hw.init = &init; | |
4938 | - h32mxclk->pmc = pmc; | |
4939 | + h32mxclk->regmap = regmap; | |
4940 | ||
4941 | clk = clk_register(NULL, &h32mxclk->hw); | |
4942 | if (IS_ERR(clk)) { | |
4943 | @@ -123,3 +121,5 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, | |
4944 | ||
4945 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
4946 | } | |
4947 | +CLK_OF_DECLARE(of_sama5d4_clk_h32mx_setup, "atmel,sama5d4-clk-h32mx", | |
4948 | + of_sama5d4_clk_h32mx_setup); | |
4949 | diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c | |
4950 | index fd7247deabdc..4bfc94d6c26e 100644 | |
4951 | --- a/drivers/clk/at91/clk-main.c | |
4952 | +++ b/drivers/clk/at91/clk-main.c | |
4953 | @@ -13,13 +13,8 @@ | |
4954 | #include <linux/clk/at91_pmc.h> | |
4955 | #include <linux/delay.h> | |
4956 | #include <linux/of.h> | |
4957 | -#include <linux/of_address.h> | |
4958 | -#include <linux/of_irq.h> | |
4959 | -#include <linux/io.h> | |
4960 | -#include <linux/interrupt.h> | |
4961 | -#include <linux/irq.h> | |
4962 | -#include <linux/sched.h> | |
4963 | -#include <linux/wait.h> | |
4964 | +#include <linux/mfd/syscon.h> | |
4965 | +#include <linux/regmap.h> | |
4966 | ||
4967 | #include "pmc.h" | |
4968 | ||
4969 | @@ -34,18 +29,14 @@ | |
4970 | ||
4971 | struct clk_main_osc { | |
4972 | struct clk_hw hw; | |
4973 | - struct at91_pmc *pmc; | |
4974 | - unsigned int irq; | |
4975 | - wait_queue_head_t wait; | |
4976 | + struct regmap *regmap; | |
4977 | }; | |
4978 | ||
4979 | #define to_clk_main_osc(hw) container_of(hw, struct clk_main_osc, hw) | |
4980 | ||
4981 | struct clk_main_rc_osc { | |
4982 | struct clk_hw hw; | |
4983 | - struct at91_pmc *pmc; | |
4984 | - unsigned int irq; | |
4985 | - wait_queue_head_t wait; | |
4986 | + struct regmap *regmap; | |
4987 | unsigned long frequency; | |
4988 | unsigned long accuracy; | |
4989 | }; | |
4990 | @@ -54,51 +45,47 @@ struct clk_main_rc_osc { | |
4991 | ||
4992 | struct clk_rm9200_main { | |
4993 | struct clk_hw hw; | |
4994 | - struct at91_pmc *pmc; | |
4995 | + struct regmap *regmap; | |
4996 | }; | |
4997 | ||
4998 | #define to_clk_rm9200_main(hw) container_of(hw, struct clk_rm9200_main, hw) | |
4999 | ||
5000 | struct clk_sam9x5_main { | |
5001 | struct clk_hw hw; | |
5002 | - struct at91_pmc *pmc; | |
5003 | - unsigned int irq; | |
5004 | - wait_queue_head_t wait; | |
5005 | + struct regmap *regmap; | |
5006 | u8 parent; | |
5007 | }; | |
5008 | ||
5009 | #define to_clk_sam9x5_main(hw) container_of(hw, struct clk_sam9x5_main, hw) | |
5010 | ||
5011 | -static irqreturn_t clk_main_osc_irq_handler(int irq, void *dev_id) | |
5012 | +static inline bool clk_main_osc_ready(struct regmap *regmap) | |
5013 | { | |
5014 | - struct clk_main_osc *osc = dev_id; | |
5015 | + unsigned int status; | |
5016 | ||
5017 | - wake_up(&osc->wait); | |
5018 | - disable_irq_nosync(osc->irq); | |
5019 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
5020 | ||
5021 | - return IRQ_HANDLED; | |
5022 | + return status & AT91_PMC_MOSCS; | |
5023 | } | |
5024 | ||
5025 | static int clk_main_osc_prepare(struct clk_hw *hw) | |
5026 | { | |
5027 | struct clk_main_osc *osc = to_clk_main_osc(hw); | |
5028 | - struct at91_pmc *pmc = osc->pmc; | |
5029 | + struct regmap *regmap = osc->regmap; | |
5030 | u32 tmp; | |
5031 | ||
5032 | - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK; | |
5033 | + regmap_read(regmap, AT91_CKGR_MOR, &tmp); | |
5034 | + tmp &= ~MOR_KEY_MASK; | |
5035 | + | |
5036 | if (tmp & AT91_PMC_OSCBYPASS) | |
5037 | return 0; | |
5038 | ||
5039 | if (!(tmp & AT91_PMC_MOSCEN)) { | |
5040 | tmp |= AT91_PMC_MOSCEN | AT91_PMC_KEY; | |
5041 | - pmc_write(pmc, AT91_CKGR_MOR, tmp); | |
5042 | + regmap_write(regmap, AT91_CKGR_MOR, tmp); | |
5043 | } | |
5044 | ||
5045 | - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS)) { | |
5046 | - enable_irq(osc->irq); | |
5047 | - wait_event(osc->wait, | |
5048 | - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS); | |
5049 | - } | |
5050 | + while (!clk_main_osc_ready(regmap)) | |
5051 | + cpu_relax(); | |
5052 | ||
5053 | return 0; | |
5054 | } | |
5055 | @@ -106,9 +93,10 @@ static int clk_main_osc_prepare(struct clk_hw *hw) | |
5056 | static void clk_main_osc_unprepare(struct clk_hw *hw) | |
5057 | { | |
5058 | struct clk_main_osc *osc = to_clk_main_osc(hw); | |
5059 | - struct at91_pmc *pmc = osc->pmc; | |
5060 | - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR); | |
5061 | + struct regmap *regmap = osc->regmap; | |
5062 | + u32 tmp; | |
5063 | ||
5064 | + regmap_read(regmap, AT91_CKGR_MOR, &tmp); | |
5065 | if (tmp & AT91_PMC_OSCBYPASS) | |
5066 | return; | |
5067 | ||
5068 | @@ -116,20 +104,22 @@ static void clk_main_osc_unprepare(struct clk_hw *hw) | |
5069 | return; | |
5070 | ||
5071 | tmp &= ~(AT91_PMC_KEY | AT91_PMC_MOSCEN); | |
5072 | - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY); | |
5073 | + regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_KEY); | |
5074 | } | |
5075 | ||
5076 | static int clk_main_osc_is_prepared(struct clk_hw *hw) | |
5077 | { | |
5078 | struct clk_main_osc *osc = to_clk_main_osc(hw); | |
5079 | - struct at91_pmc *pmc = osc->pmc; | |
5080 | - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR); | |
5081 | + struct regmap *regmap = osc->regmap; | |
5082 | + u32 tmp, status; | |
5083 | ||
5084 | + regmap_read(regmap, AT91_CKGR_MOR, &tmp); | |
5085 | if (tmp & AT91_PMC_OSCBYPASS) | |
5086 | return 1; | |
5087 | ||
5088 | - return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCS) && | |
5089 | - (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN)); | |
5090 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
5091 | + | |
5092 | + return (status & AT91_PMC_MOSCS) && (tmp & AT91_PMC_MOSCEN); | |
5093 | } | |
5094 | ||
5095 | static const struct clk_ops main_osc_ops = { | |
5096 | @@ -139,18 +129,16 @@ static const struct clk_ops main_osc_ops = { | |
5097 | }; | |
5098 | ||
5099 | static struct clk * __init | |
5100 | -at91_clk_register_main_osc(struct at91_pmc *pmc, | |
5101 | - unsigned int irq, | |
5102 | +at91_clk_register_main_osc(struct regmap *regmap, | |
5103 | const char *name, | |
5104 | const char *parent_name, | |
5105 | bool bypass) | |
5106 | { | |
5107 | - int ret; | |
5108 | struct clk_main_osc *osc; | |
5109 | struct clk *clk = NULL; | |
5110 | struct clk_init_data init; | |
5111 | ||
5112 | - if (!pmc || !irq || !name || !parent_name) | |
5113 | + if (!name || !parent_name) | |
5114 | return ERR_PTR(-EINVAL); | |
5115 | ||
5116 | osc = kzalloc(sizeof(*osc), GFP_KERNEL); | |
5117 | @@ -164,85 +152,70 @@ at91_clk_register_main_osc(struct at91_pmc *pmc, | |
5118 | init.flags = CLK_IGNORE_UNUSED; | |
5119 | ||
5120 | osc->hw.init = &init; | |
5121 | - osc->pmc = pmc; | |
5122 | - osc->irq = irq; | |
5123 | - | |
5124 | - init_waitqueue_head(&osc->wait); | |
5125 | - irq_set_status_flags(osc->irq, IRQ_NOAUTOEN); | |
5126 | - ret = request_irq(osc->irq, clk_main_osc_irq_handler, | |
5127 | - IRQF_TRIGGER_HIGH, name, osc); | |
5128 | - if (ret) { | |
5129 | - kfree(osc); | |
5130 | - return ERR_PTR(ret); | |
5131 | - } | |
5132 | + osc->regmap = regmap; | |
5133 | ||
5134 | if (bypass) | |
5135 | - pmc_write(pmc, AT91_CKGR_MOR, | |
5136 | - (pmc_read(pmc, AT91_CKGR_MOR) & | |
5137 | - ~(MOR_KEY_MASK | AT91_PMC_MOSCEN)) | | |
5138 | - AT91_PMC_OSCBYPASS | AT91_PMC_KEY); | |
5139 | + regmap_update_bits(regmap, | |
5140 | + AT91_CKGR_MOR, MOR_KEY_MASK | | |
5141 | + AT91_PMC_MOSCEN, | |
5142 | + AT91_PMC_OSCBYPASS | AT91_PMC_KEY); | |
5143 | ||
5144 | clk = clk_register(NULL, &osc->hw); | |
5145 | - if (IS_ERR(clk)) { | |
5146 | - free_irq(irq, osc); | |
5147 | + if (IS_ERR(clk)) | |
5148 | kfree(osc); | |
5149 | - } | |
5150 | ||
5151 | return clk; | |
5152 | } | |
5153 | ||
5154 | -void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np, | |
5155 | - struct at91_pmc *pmc) | |
5156 | +static void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np) | |
5157 | { | |
5158 | struct clk *clk; | |
5159 | - unsigned int irq; | |
5160 | const char *name = np->name; | |
5161 | const char *parent_name; | |
5162 | + struct regmap *regmap; | |
5163 | bool bypass; | |
5164 | ||
5165 | of_property_read_string(np, "clock-output-names", &name); | |
5166 | bypass = of_property_read_bool(np, "atmel,osc-bypass"); | |
5167 | parent_name = of_clk_get_parent_name(np, 0); | |
5168 | ||
5169 | - irq = irq_of_parse_and_map(np, 0); | |
5170 | - if (!irq) | |
5171 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
5172 | + if (IS_ERR(regmap)) | |
5173 | return; | |
5174 | ||
5175 | - clk = at91_clk_register_main_osc(pmc, irq, name, parent_name, bypass); | |
5176 | + clk = at91_clk_register_main_osc(regmap, name, parent_name, bypass); | |
5177 | if (IS_ERR(clk)) | |
5178 | return; | |
5179 | ||
5180 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
5181 | } | |
5182 | +CLK_OF_DECLARE(at91rm9200_clk_main_osc, "atmel,at91rm9200-clk-main-osc", | |
5183 | + of_at91rm9200_clk_main_osc_setup); | |
5184 | ||
5185 | -static irqreturn_t clk_main_rc_osc_irq_handler(int irq, void *dev_id) | |
5186 | +static bool clk_main_rc_osc_ready(struct regmap *regmap) | |
5187 | { | |
5188 | - struct clk_main_rc_osc *osc = dev_id; | |
5189 | + unsigned int status; | |
5190 | ||
5191 | - wake_up(&osc->wait); | |
5192 | - disable_irq_nosync(osc->irq); | |
5193 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
5194 | ||
5195 | - return IRQ_HANDLED; | |
5196 | + return status & AT91_PMC_MOSCRCS; | |
5197 | } | |
5198 | ||
5199 | static int clk_main_rc_osc_prepare(struct clk_hw *hw) | |
5200 | { | |
5201 | struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw); | |
5202 | - struct at91_pmc *pmc = osc->pmc; | |
5203 | - u32 tmp; | |
5204 | + struct regmap *regmap = osc->regmap; | |
5205 | + unsigned int mor; | |
5206 | ||
5207 | - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK; | |
5208 | + regmap_read(regmap, AT91_CKGR_MOR, &mor); | |
5209 | ||
5210 | - if (!(tmp & AT91_PMC_MOSCRCEN)) { | |
5211 | - tmp |= AT91_PMC_MOSCRCEN | AT91_PMC_KEY; | |
5212 | - pmc_write(pmc, AT91_CKGR_MOR, tmp); | |
5213 | - } | |
5214 | + if (!(mor & AT91_PMC_MOSCRCEN)) | |
5215 | + regmap_update_bits(regmap, AT91_CKGR_MOR, | |
5216 | + MOR_KEY_MASK | AT91_PMC_MOSCRCEN, | |
5217 | + AT91_PMC_MOSCRCEN | AT91_PMC_KEY); | |
5218 | ||
5219 | - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS)) { | |
5220 | - enable_irq(osc->irq); | |
5221 | - wait_event(osc->wait, | |
5222 | - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS); | |
5223 | - } | |
5224 | + while (!clk_main_rc_osc_ready(regmap)) | |
5225 | + cpu_relax(); | |
5226 | ||
5227 | return 0; | |
5228 | } | |
5229 | @@ -250,23 +223,28 @@ static int clk_main_rc_osc_prepare(struct clk_hw *hw) | |
5230 | static void clk_main_rc_osc_unprepare(struct clk_hw *hw) | |
5231 | { | |
5232 | struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw); | |
5233 | - struct at91_pmc *pmc = osc->pmc; | |
5234 | - u32 tmp = pmc_read(pmc, AT91_CKGR_MOR); | |
5235 | + struct regmap *regmap = osc->regmap; | |
5236 | + unsigned int mor; | |
5237 | + | |
5238 | + regmap_read(regmap, AT91_CKGR_MOR, &mor); | |
5239 | ||
5240 | - if (!(tmp & AT91_PMC_MOSCRCEN)) | |
5241 | + if (!(mor & AT91_PMC_MOSCRCEN)) | |
5242 | return; | |
5243 | ||
5244 | - tmp &= ~(MOR_KEY_MASK | AT91_PMC_MOSCRCEN); | |
5245 | - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_KEY); | |
5246 | + regmap_update_bits(regmap, AT91_CKGR_MOR, | |
5247 | + MOR_KEY_MASK | AT91_PMC_MOSCRCEN, AT91_PMC_KEY); | |
5248 | } | |
5249 | ||
5250 | static int clk_main_rc_osc_is_prepared(struct clk_hw *hw) | |
5251 | { | |
5252 | struct clk_main_rc_osc *osc = to_clk_main_rc_osc(hw); | |
5253 | - struct at91_pmc *pmc = osc->pmc; | |
5254 | + struct regmap *regmap = osc->regmap; | |
5255 | + unsigned int mor, status; | |
5256 | ||
5257 | - return !!((pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCRCS) && | |
5258 | - (pmc_read(pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCRCEN)); | |
5259 | + regmap_read(regmap, AT91_CKGR_MOR, &mor); | |
5260 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
5261 | + | |
5262 | + return (mor & AT91_PMC_MOSCRCEN) && (status & AT91_PMC_MOSCRCS); | |
5263 | } | |
5264 | ||
5265 | static unsigned long clk_main_rc_osc_recalc_rate(struct clk_hw *hw, | |
5266 | @@ -294,17 +272,15 @@ static const struct clk_ops main_rc_osc_ops = { | |
5267 | }; | |
5268 | ||
5269 | static struct clk * __init | |
5270 | -at91_clk_register_main_rc_osc(struct at91_pmc *pmc, | |
5271 | - unsigned int irq, | |
5272 | +at91_clk_register_main_rc_osc(struct regmap *regmap, | |
5273 | const char *name, | |
5274 | u32 frequency, u32 accuracy) | |
5275 | { | |
5276 | - int ret; | |
5277 | struct clk_main_rc_osc *osc; | |
5278 | struct clk *clk = NULL; | |
5279 | struct clk_init_data init; | |
5280 | ||
5281 | - if (!pmc || !irq || !name || !frequency) | |
5282 | + if (!name || !frequency) | |
5283 | return ERR_PTR(-EINVAL); | |
5284 | ||
5285 | osc = kzalloc(sizeof(*osc), GFP_KERNEL); | |
5286 | @@ -318,63 +294,53 @@ at91_clk_register_main_rc_osc(struct at91_pmc *pmc, | |
5287 | init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED; | |
5288 | ||
5289 | osc->hw.init = &init; | |
5290 | - osc->pmc = pmc; | |
5291 | - osc->irq = irq; | |
5292 | + osc->regmap = regmap; | |
5293 | osc->frequency = frequency; | |
5294 | osc->accuracy = accuracy; | |
5295 | ||
5296 | - init_waitqueue_head(&osc->wait); | |
5297 | - irq_set_status_flags(osc->irq, IRQ_NOAUTOEN); | |
5298 | - ret = request_irq(osc->irq, clk_main_rc_osc_irq_handler, | |
5299 | - IRQF_TRIGGER_HIGH, name, osc); | |
5300 | - if (ret) | |
5301 | - return ERR_PTR(ret); | |
5302 | - | |
5303 | clk = clk_register(NULL, &osc->hw); | |
5304 | - if (IS_ERR(clk)) { | |
5305 | - free_irq(irq, osc); | |
5306 | + if (IS_ERR(clk)) | |
5307 | kfree(osc); | |
5308 | - } | |
5309 | ||
5310 | return clk; | |
5311 | } | |
5312 | ||
5313 | -void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np, | |
5314 | - struct at91_pmc *pmc) | |
5315 | +static void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np) | |
5316 | { | |
5317 | struct clk *clk; | |
5318 | - unsigned int irq; | |
5319 | u32 frequency = 0; | |
5320 | u32 accuracy = 0; | |
5321 | const char *name = np->name; | |
5322 | + struct regmap *regmap; | |
5323 | ||
5324 | of_property_read_string(np, "clock-output-names", &name); | |
5325 | of_property_read_u32(np, "clock-frequency", &frequency); | |
5326 | of_property_read_u32(np, "clock-accuracy", &accuracy); | |
5327 | ||
5328 | - irq = irq_of_parse_and_map(np, 0); | |
5329 | - if (!irq) | |
5330 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
5331 | + if (IS_ERR(regmap)) | |
5332 | return; | |
5333 | ||
5334 | - clk = at91_clk_register_main_rc_osc(pmc, irq, name, frequency, | |
5335 | - accuracy); | |
5336 | + clk = at91_clk_register_main_rc_osc(regmap, name, frequency, accuracy); | |
5337 | if (IS_ERR(clk)) | |
5338 | return; | |
5339 | ||
5340 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
5341 | } | |
5342 | +CLK_OF_DECLARE(at91sam9x5_clk_main_rc_osc, "atmel,at91sam9x5-clk-main-rc-osc", | |
5343 | + of_at91sam9x5_clk_main_rc_osc_setup); | |
5344 | ||
5345 | ||
5346 | -static int clk_main_probe_frequency(struct at91_pmc *pmc) | |
5347 | +static int clk_main_probe_frequency(struct regmap *regmap) | |
5348 | { | |
5349 | unsigned long prep_time, timeout; | |
5350 | - u32 tmp; | |
5351 | + unsigned int mcfr; | |
5352 | ||
5353 | timeout = jiffies + usecs_to_jiffies(MAINFRDY_TIMEOUT); | |
5354 | do { | |
5355 | prep_time = jiffies; | |
5356 | - tmp = pmc_read(pmc, AT91_CKGR_MCFR); | |
5357 | - if (tmp & AT91_PMC_MAINRDY) | |
5358 | + regmap_read(regmap, AT91_CKGR_MCFR, &mcfr); | |
5359 | + if (mcfr & AT91_PMC_MAINRDY) | |
5360 | return 0; | |
5361 | usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT); | |
5362 | } while (time_before(prep_time, timeout)); | |
5363 | @@ -382,34 +348,37 @@ static int clk_main_probe_frequency(struct at91_pmc *pmc) | |
5364 | return -ETIMEDOUT; | |
5365 | } | |
5366 | ||
5367 | -static unsigned long clk_main_recalc_rate(struct at91_pmc *pmc, | |
5368 | +static unsigned long clk_main_recalc_rate(struct regmap *regmap, | |
5369 | unsigned long parent_rate) | |
5370 | { | |
5371 | - u32 tmp; | |
5372 | + unsigned int mcfr; | |
5373 | ||
5374 | if (parent_rate) | |
5375 | return parent_rate; | |
5376 | ||
5377 | pr_warn("Main crystal frequency not set, using approximate value\n"); | |
5378 | - tmp = pmc_read(pmc, AT91_CKGR_MCFR); | |
5379 | - if (!(tmp & AT91_PMC_MAINRDY)) | |
5380 | + regmap_read(regmap, AT91_CKGR_MCFR, &mcfr); | |
5381 | + if (!(mcfr & AT91_PMC_MAINRDY)) | |
5382 | return 0; | |
5383 | ||
5384 | - return ((tmp & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV; | |
5385 | + return ((mcfr & AT91_PMC_MAINF) * SLOW_CLOCK_FREQ) / MAINF_DIV; | |
5386 | } | |
5387 | ||
5388 | static int clk_rm9200_main_prepare(struct clk_hw *hw) | |
5389 | { | |
5390 | struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw); | |
5391 | ||
5392 | - return clk_main_probe_frequency(clkmain->pmc); | |
5393 | + return clk_main_probe_frequency(clkmain->regmap); | |
5394 | } | |
5395 | ||
5396 | static int clk_rm9200_main_is_prepared(struct clk_hw *hw) | |
5397 | { | |
5398 | struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw); | |
5399 | + unsigned int status; | |
5400 | + | |
5401 | + regmap_read(clkmain->regmap, AT91_CKGR_MCFR, &status); | |
5402 | ||
5403 | - return !!(pmc_read(clkmain->pmc, AT91_CKGR_MCFR) & AT91_PMC_MAINRDY); | |
5404 | + return status & AT91_PMC_MAINRDY ? 1 : 0; | |
5405 | } | |
5406 | ||
5407 | static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw, | |
5408 | @@ -417,7 +386,7 @@ static unsigned long clk_rm9200_main_recalc_rate(struct clk_hw *hw, | |
5409 | { | |
5410 | struct clk_rm9200_main *clkmain = to_clk_rm9200_main(hw); | |
5411 | ||
5412 | - return clk_main_recalc_rate(clkmain->pmc, parent_rate); | |
5413 | + return clk_main_recalc_rate(clkmain->regmap, parent_rate); | |
5414 | } | |
5415 | ||
5416 | static const struct clk_ops rm9200_main_ops = { | |
5417 | @@ -427,7 +396,7 @@ static const struct clk_ops rm9200_main_ops = { | |
5418 | }; | |
5419 | ||
5420 | static struct clk * __init | |
5421 | -at91_clk_register_rm9200_main(struct at91_pmc *pmc, | |
5422 | +at91_clk_register_rm9200_main(struct regmap *regmap, | |
5423 | const char *name, | |
5424 | const char *parent_name) | |
5425 | { | |
5426 | @@ -435,7 +404,7 @@ at91_clk_register_rm9200_main(struct at91_pmc *pmc, | |
5427 | struct clk *clk = NULL; | |
5428 | struct clk_init_data init; | |
5429 | ||
5430 | - if (!pmc || !name) | |
5431 | + if (!name) | |
5432 | return ERR_PTR(-EINVAL); | |
5433 | ||
5434 | if (!parent_name) | |
5435 | @@ -452,7 +421,7 @@ at91_clk_register_rm9200_main(struct at91_pmc *pmc, | |
5436 | init.flags = 0; | |
5437 | ||
5438 | clkmain->hw.init = &init; | |
5439 | - clkmain->pmc = pmc; | |
5440 | + clkmain->regmap = regmap; | |
5441 | ||
5442 | clk = clk_register(NULL, &clkmain->hw); | |
5443 | if (IS_ERR(clk)) | |
5444 | @@ -461,52 +430,54 @@ at91_clk_register_rm9200_main(struct at91_pmc *pmc, | |
5445 | return clk; | |
5446 | } | |
5447 | ||
5448 | -void __init of_at91rm9200_clk_main_setup(struct device_node *np, | |
5449 | - struct at91_pmc *pmc) | |
5450 | +static void __init of_at91rm9200_clk_main_setup(struct device_node *np) | |
5451 | { | |
5452 | struct clk *clk; | |
5453 | const char *parent_name; | |
5454 | const char *name = np->name; | |
5455 | + struct regmap *regmap; | |
5456 | ||
5457 | parent_name = of_clk_get_parent_name(np, 0); | |
5458 | of_property_read_string(np, "clock-output-names", &name); | |
5459 | ||
5460 | - clk = at91_clk_register_rm9200_main(pmc, name, parent_name); | |
5461 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
5462 | + if (IS_ERR(regmap)) | |
5463 | + return; | |
5464 | + | |
5465 | + clk = at91_clk_register_rm9200_main(regmap, name, parent_name); | |
5466 | if (IS_ERR(clk)) | |
5467 | return; | |
5468 | ||
5469 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
5470 | } | |
5471 | +CLK_OF_DECLARE(at91rm9200_clk_main, "atmel,at91rm9200-clk-main", | |
5472 | + of_at91rm9200_clk_main_setup); | |
5473 | ||
5474 | -static irqreturn_t clk_sam9x5_main_irq_handler(int irq, void *dev_id) | |
5475 | +static inline bool clk_sam9x5_main_ready(struct regmap *regmap) | |
5476 | { | |
5477 | - struct clk_sam9x5_main *clkmain = dev_id; | |
5478 | + unsigned int status; | |
5479 | ||
5480 | - wake_up(&clkmain->wait); | |
5481 | - disable_irq_nosync(clkmain->irq); | |
5482 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
5483 | ||
5484 | - return IRQ_HANDLED; | |
5485 | + return status & AT91_PMC_MOSCSELS ? 1 : 0; | |
5486 | } | |
5487 | ||
5488 | static int clk_sam9x5_main_prepare(struct clk_hw *hw) | |
5489 | { | |
5490 | struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw); | |
5491 | - struct at91_pmc *pmc = clkmain->pmc; | |
5492 | + struct regmap *regmap = clkmain->regmap; | |
5493 | ||
5494 | - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) { | |
5495 | - enable_irq(clkmain->irq); | |
5496 | - wait_event(clkmain->wait, | |
5497 | - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS); | |
5498 | - } | |
5499 | + while (!clk_sam9x5_main_ready(regmap)) | |
5500 | + cpu_relax(); | |
5501 | ||
5502 | - return clk_main_probe_frequency(pmc); | |
5503 | + return clk_main_probe_frequency(regmap); | |
5504 | } | |
5505 | ||
5506 | static int clk_sam9x5_main_is_prepared(struct clk_hw *hw) | |
5507 | { | |
5508 | struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw); | |
5509 | ||
5510 | - return !!(pmc_read(clkmain->pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS); | |
5511 | + return clk_sam9x5_main_ready(clkmain->regmap); | |
5512 | } | |
5513 | ||
5514 | static unsigned long clk_sam9x5_main_recalc_rate(struct clk_hw *hw, | |
5515 | @@ -514,30 +485,28 @@ static unsigned long clk_sam9x5_main_recalc_rate(struct clk_hw *hw, | |
5516 | { | |
5517 | struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw); | |
5518 | ||
5519 | - return clk_main_recalc_rate(clkmain->pmc, parent_rate); | |
5520 | + return clk_main_recalc_rate(clkmain->regmap, parent_rate); | |
5521 | } | |
5522 | ||
5523 | static int clk_sam9x5_main_set_parent(struct clk_hw *hw, u8 index) | |
5524 | { | |
5525 | struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw); | |
5526 | - struct at91_pmc *pmc = clkmain->pmc; | |
5527 | - u32 tmp; | |
5528 | + struct regmap *regmap = clkmain->regmap; | |
5529 | + unsigned int tmp; | |
5530 | ||
5531 | if (index > 1) | |
5532 | return -EINVAL; | |
5533 | ||
5534 | - tmp = pmc_read(pmc, AT91_CKGR_MOR) & ~MOR_KEY_MASK; | |
5535 | + regmap_read(regmap, AT91_CKGR_MOR, &tmp); | |
5536 | + tmp &= ~MOR_KEY_MASK; | |
5537 | ||
5538 | if (index && !(tmp & AT91_PMC_MOSCSEL)) | |
5539 | - pmc_write(pmc, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL); | |
5540 | + regmap_write(regmap, AT91_CKGR_MOR, tmp | AT91_PMC_MOSCSEL); | |
5541 | else if (!index && (tmp & AT91_PMC_MOSCSEL)) | |
5542 | - pmc_write(pmc, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL); | |
5543 | + regmap_write(regmap, AT91_CKGR_MOR, tmp & ~AT91_PMC_MOSCSEL); | |
5544 | ||
5545 | - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS)) { | |
5546 | - enable_irq(clkmain->irq); | |
5547 | - wait_event(clkmain->wait, | |
5548 | - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MOSCSELS); | |
5549 | - } | |
5550 | + while (!clk_sam9x5_main_ready(regmap)) | |
5551 | + cpu_relax(); | |
5552 | ||
5553 | return 0; | |
5554 | } | |
5555 | @@ -545,8 +514,11 @@ static int clk_sam9x5_main_set_parent(struct clk_hw *hw, u8 index) | |
5556 | static u8 clk_sam9x5_main_get_parent(struct clk_hw *hw) | |
5557 | { | |
5558 | struct clk_sam9x5_main *clkmain = to_clk_sam9x5_main(hw); | |
5559 | + unsigned int status; | |
5560 | + | |
5561 | + regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status); | |
5562 | ||
5563 | - return !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) & AT91_PMC_MOSCEN); | |
5564 | + return status & AT91_PMC_MOSCEN ? 1 : 0; | |
5565 | } | |
5566 | ||
5567 | static const struct clk_ops sam9x5_main_ops = { | |
5568 | @@ -558,18 +530,17 @@ static const struct clk_ops sam9x5_main_ops = { | |
5569 | }; | |
5570 | ||
5571 | static struct clk * __init | |
5572 | -at91_clk_register_sam9x5_main(struct at91_pmc *pmc, | |
5573 | - unsigned int irq, | |
5574 | +at91_clk_register_sam9x5_main(struct regmap *regmap, | |
5575 | const char *name, | |
5576 | const char **parent_names, | |
5577 | int num_parents) | |
5578 | { | |
5579 | - int ret; | |
5580 | struct clk_sam9x5_main *clkmain; | |
5581 | struct clk *clk = NULL; | |
5582 | struct clk_init_data init; | |
5583 | + unsigned int status; | |
5584 | ||
5585 | - if (!pmc || !irq || !name) | |
5586 | + if (!name) | |
5587 | return ERR_PTR(-EINVAL); | |
5588 | ||
5589 | if (!parent_names || !num_parents) | |
5590 | @@ -586,51 +557,42 @@ at91_clk_register_sam9x5_main(struct at91_pmc *pmc, | |
5591 | init.flags = CLK_SET_PARENT_GATE; | |
5592 | ||
5593 | clkmain->hw.init = &init; | |
5594 | - clkmain->pmc = pmc; | |
5595 | - clkmain->irq = irq; | |
5596 | - clkmain->parent = !!(pmc_read(clkmain->pmc, AT91_CKGR_MOR) & | |
5597 | - AT91_PMC_MOSCEN); | |
5598 | - init_waitqueue_head(&clkmain->wait); | |
5599 | - irq_set_status_flags(clkmain->irq, IRQ_NOAUTOEN); | |
5600 | - ret = request_irq(clkmain->irq, clk_sam9x5_main_irq_handler, | |
5601 | - IRQF_TRIGGER_HIGH, name, clkmain); | |
5602 | - if (ret) | |
5603 | - return ERR_PTR(ret); | |
5604 | + clkmain->regmap = regmap; | |
5605 | + regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status); | |
5606 | + clkmain->parent = status & AT91_PMC_MOSCEN ? 1 : 0; | |
5607 | ||
5608 | clk = clk_register(NULL, &clkmain->hw); | |
5609 | - if (IS_ERR(clk)) { | |
5610 | - free_irq(clkmain->irq, clkmain); | |
5611 | + if (IS_ERR(clk)) | |
5612 | kfree(clkmain); | |
5613 | - } | |
5614 | ||
5615 | return clk; | |
5616 | } | |
5617 | ||
5618 | -void __init of_at91sam9x5_clk_main_setup(struct device_node *np, | |
5619 | - struct at91_pmc *pmc) | |
5620 | +static void __init of_at91sam9x5_clk_main_setup(struct device_node *np) | |
5621 | { | |
5622 | struct clk *clk; | |
5623 | const char *parent_names[2]; | |
5624 | int num_parents; | |
5625 | - unsigned int irq; | |
5626 | const char *name = np->name; | |
5627 | + struct regmap *regmap; | |
5628 | ||
5629 | num_parents = of_clk_get_parent_count(np); | |
5630 | if (num_parents <= 0 || num_parents > 2) | |
5631 | return; | |
5632 | ||
5633 | of_clk_parent_fill(np, parent_names, num_parents); | |
5634 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
5635 | + if (IS_ERR(regmap)) | |
5636 | + return; | |
5637 | ||
5638 | of_property_read_string(np, "clock-output-names", &name); | |
5639 | ||
5640 | - irq = irq_of_parse_and_map(np, 0); | |
5641 | - if (!irq) | |
5642 | - return; | |
5643 | - | |
5644 | - clk = at91_clk_register_sam9x5_main(pmc, irq, name, parent_names, | |
5645 | + clk = at91_clk_register_sam9x5_main(regmap, name, parent_names, | |
5646 | num_parents); | |
5647 | if (IS_ERR(clk)) | |
5648 | return; | |
5649 | ||
5650 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
5651 | } | |
5652 | +CLK_OF_DECLARE(at91sam9x5_clk_main, "atmel,at91sam9x5-clk-main", | |
5653 | + of_at91sam9x5_clk_main_setup); | |
5654 | diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c | |
5655 | index 620ea323356b..7d4a1864ea7c 100644 | |
5656 | --- a/drivers/clk/at91/clk-master.c | |
5657 | +++ b/drivers/clk/at91/clk-master.c | |
5658 | @@ -12,13 +12,8 @@ | |
5659 | #include <linux/clkdev.h> | |
5660 | #include <linux/clk/at91_pmc.h> | |
5661 | #include <linux/of.h> | |
5662 | -#include <linux/of_address.h> | |
5663 | -#include <linux/of_irq.h> | |
5664 | -#include <linux/io.h> | |
5665 | -#include <linux/wait.h> | |
5666 | -#include <linux/sched.h> | |
5667 | -#include <linux/interrupt.h> | |
5668 | -#include <linux/irq.h> | |
5669 | +#include <linux/mfd/syscon.h> | |
5670 | +#include <linux/regmap.h> | |
5671 | ||
5672 | #include "pmc.h" | |
5673 | ||
5674 | @@ -44,32 +39,26 @@ struct clk_master_layout { | |
5675 | ||
5676 | struct clk_master { | |
5677 | struct clk_hw hw; | |
5678 | - struct at91_pmc *pmc; | |
5679 | - unsigned int irq; | |
5680 | - wait_queue_head_t wait; | |
5681 | + struct regmap *regmap; | |
5682 | const struct clk_master_layout *layout; | |
5683 | const struct clk_master_characteristics *characteristics; | |
5684 | }; | |
5685 | ||
5686 | -static irqreturn_t clk_master_irq_handler(int irq, void *dev_id) | |
5687 | +static inline bool clk_master_ready(struct regmap *regmap) | |
5688 | { | |
5689 | - struct clk_master *master = (struct clk_master *)dev_id; | |
5690 | + unsigned int status; | |
5691 | ||
5692 | - wake_up(&master->wait); | |
5693 | - disable_irq_nosync(master->irq); | |
5694 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
5695 | ||
5696 | - return IRQ_HANDLED; | |
5697 | + return status & AT91_PMC_MCKRDY ? 1 : 0; | |
5698 | } | |
5699 | + | |
5700 | static int clk_master_prepare(struct clk_hw *hw) | |
5701 | { | |
5702 | struct clk_master *master = to_clk_master(hw); | |
5703 | - struct at91_pmc *pmc = master->pmc; | |
5704 | ||
5705 | - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY)) { | |
5706 | - enable_irq(master->irq); | |
5707 | - wait_event(master->wait, | |
5708 | - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY); | |
5709 | - } | |
5710 | + while (!clk_master_ready(master->regmap)) | |
5711 | + cpu_relax(); | |
5712 | ||
5713 | return 0; | |
5714 | } | |
5715 | @@ -78,7 +67,7 @@ static int clk_master_is_prepared(struct clk_hw *hw) | |
5716 | { | |
5717 | struct clk_master *master = to_clk_master(hw); | |
5718 | ||
5719 | - return !!(pmc_read(master->pmc, AT91_PMC_SR) & AT91_PMC_MCKRDY); | |
5720 | + return clk_master_ready(master->regmap); | |
5721 | } | |
5722 | ||
5723 | static unsigned long clk_master_recalc_rate(struct clk_hw *hw, | |
5724 | @@ -88,18 +77,16 @@ static unsigned long clk_master_recalc_rate(struct clk_hw *hw, | |
5725 | u8 div; | |
5726 | unsigned long rate = parent_rate; | |
5727 | struct clk_master *master = to_clk_master(hw); | |
5728 | - struct at91_pmc *pmc = master->pmc; | |
5729 | const struct clk_master_layout *layout = master->layout; | |
5730 | const struct clk_master_characteristics *characteristics = | |
5731 | master->characteristics; | |
5732 | - u32 tmp; | |
5733 | + unsigned int mckr; | |
5734 | ||
5735 | - pmc_lock(pmc); | |
5736 | - tmp = pmc_read(pmc, AT91_PMC_MCKR) & layout->mask; | |
5737 | - pmc_unlock(pmc); | |
5738 | + regmap_read(master->regmap, AT91_PMC_MCKR, &mckr); | |
5739 | + mckr &= layout->mask; | |
5740 | ||
5741 | - pres = (tmp >> layout->pres_shift) & MASTER_PRES_MASK; | |
5742 | - div = (tmp >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK; | |
5743 | + pres = (mckr >> layout->pres_shift) & MASTER_PRES_MASK; | |
5744 | + div = (mckr >> MASTER_DIV_SHIFT) & MASTER_DIV_MASK; | |
5745 | ||
5746 | if (characteristics->have_div3_pres && pres == MASTER_PRES_MAX) | |
5747 | rate /= 3; | |
5748 | @@ -119,9 +106,11 @@ static unsigned long clk_master_recalc_rate(struct clk_hw *hw, | |
5749 | static u8 clk_master_get_parent(struct clk_hw *hw) | |
5750 | { | |
5751 | struct clk_master *master = to_clk_master(hw); | |
5752 | - struct at91_pmc *pmc = master->pmc; | |
5753 | + unsigned int mckr; | |
5754 | ||
5755 | - return pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_CSS; | |
5756 | + regmap_read(master->regmap, AT91_PMC_MCKR, &mckr); | |
5757 | + | |
5758 | + return mckr & AT91_PMC_CSS; | |
5759 | } | |
5760 | ||
5761 | static const struct clk_ops master_ops = { | |
5762 | @@ -132,18 +121,17 @@ static const struct clk_ops master_ops = { | |
5763 | }; | |
5764 | ||
5765 | static struct clk * __init | |
5766 | -at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq, | |
5767 | +at91_clk_register_master(struct regmap *regmap, | |
5768 | const char *name, int num_parents, | |
5769 | const char **parent_names, | |
5770 | const struct clk_master_layout *layout, | |
5771 | const struct clk_master_characteristics *characteristics) | |
5772 | { | |
5773 | - int ret; | |
5774 | struct clk_master *master; | |
5775 | struct clk *clk = NULL; | |
5776 | struct clk_init_data init; | |
5777 | ||
5778 | - if (!pmc || !irq || !name || !num_parents || !parent_names) | |
5779 | + if (!name || !num_parents || !parent_names) | |
5780 | return ERR_PTR(-EINVAL); | |
5781 | ||
5782 | master = kzalloc(sizeof(*master), GFP_KERNEL); | |
5783 | @@ -159,20 +147,10 @@ at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq, | |
5784 | master->hw.init = &init; | |
5785 | master->layout = layout; | |
5786 | master->characteristics = characteristics; | |
5787 | - master->pmc = pmc; | |
5788 | - master->irq = irq; | |
5789 | - init_waitqueue_head(&master->wait); | |
5790 | - irq_set_status_flags(master->irq, IRQ_NOAUTOEN); | |
5791 | - ret = request_irq(master->irq, clk_master_irq_handler, | |
5792 | - IRQF_TRIGGER_HIGH, "clk-master", master); | |
5793 | - if (ret) { | |
5794 | - kfree(master); | |
5795 | - return ERR_PTR(ret); | |
5796 | - } | |
5797 | + master->regmap = regmap; | |
5798 | ||
5799 | clk = clk_register(NULL, &master->hw); | |
5800 | if (IS_ERR(clk)) { | |
5801 | - free_irq(master->irq, master); | |
5802 | kfree(master); | |
5803 | } | |
5804 | ||
5805 | @@ -217,15 +195,15 @@ out_free_characteristics: | |
5806 | } | |
5807 | ||
5808 | static void __init | |
5809 | -of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc, | |
5810 | +of_at91_clk_master_setup(struct device_node *np, | |
5811 | const struct clk_master_layout *layout) | |
5812 | { | |
5813 | struct clk *clk; | |
5814 | int num_parents; | |
5815 | - unsigned int irq; | |
5816 | const char *parent_names[MASTER_SOURCE_MAX]; | |
5817 | const char *name = np->name; | |
5818 | struct clk_master_characteristics *characteristics; | |
5819 | + struct regmap *regmap; | |
5820 | ||
5821 | num_parents = of_clk_get_parent_count(np); | |
5822 | if (num_parents <= 0 || num_parents > MASTER_SOURCE_MAX) | |
5823 | @@ -239,11 +217,11 @@ of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc, | |
5824 | if (!characteristics) | |
5825 | return; | |
5826 | ||
5827 | - irq = irq_of_parse_and_map(np, 0); | |
5828 | - if (!irq) | |
5829 | - goto out_free_characteristics; | |
5830 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
5831 | + if (IS_ERR(regmap)) | |
5832 | + return; | |
5833 | ||
5834 | - clk = at91_clk_register_master(pmc, irq, name, num_parents, | |
5835 | + clk = at91_clk_register_master(regmap, name, num_parents, | |
5836 | parent_names, layout, | |
5837 | characteristics); | |
5838 | if (IS_ERR(clk)) | |
5839 | @@ -256,14 +234,16 @@ out_free_characteristics: | |
5840 | kfree(characteristics); | |
5841 | } | |
5842 | ||
5843 | -void __init of_at91rm9200_clk_master_setup(struct device_node *np, | |
5844 | - struct at91_pmc *pmc) | |
5845 | +static void __init of_at91rm9200_clk_master_setup(struct device_node *np) | |
5846 | { | |
5847 | - of_at91_clk_master_setup(np, pmc, &at91rm9200_master_layout); | |
5848 | + of_at91_clk_master_setup(np, &at91rm9200_master_layout); | |
5849 | } | |
5850 | +CLK_OF_DECLARE(at91rm9200_clk_master, "atmel,at91rm9200-clk-master", | |
5851 | + of_at91rm9200_clk_master_setup); | |
5852 | ||
5853 | -void __init of_at91sam9x5_clk_master_setup(struct device_node *np, | |
5854 | - struct at91_pmc *pmc) | |
5855 | +static void __init of_at91sam9x5_clk_master_setup(struct device_node *np) | |
5856 | { | |
5857 | - of_at91_clk_master_setup(np, pmc, &at91sam9x5_master_layout); | |
5858 | + of_at91_clk_master_setup(np, &at91sam9x5_master_layout); | |
5859 | } | |
5860 | +CLK_OF_DECLARE(at91sam9x5_clk_master, "atmel,at91sam9x5-clk-master", | |
5861 | + of_at91sam9x5_clk_master_setup); | |
5862 | diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c | |
5863 | index 58f3b568e9cb..d69cd2a121b1 100644 | |
5864 | --- a/drivers/clk/at91/clk-peripheral.c | |
5865 | +++ b/drivers/clk/at91/clk-peripheral.c | |
5866 | @@ -12,11 +12,13 @@ | |
5867 | #include <linux/clkdev.h> | |
5868 | #include <linux/clk/at91_pmc.h> | |
5869 | #include <linux/of.h> | |
5870 | -#include <linux/of_address.h> | |
5871 | -#include <linux/io.h> | |
5872 | +#include <linux/mfd/syscon.h> | |
5873 | +#include <linux/regmap.h> | |
5874 | ||
5875 | #include "pmc.h" | |
5876 | ||
5877 | +DEFINE_SPINLOCK(pmc_pcr_lock); | |
5878 | + | |
5879 | #define PERIPHERAL_MAX 64 | |
5880 | ||
5881 | #define PERIPHERAL_AT91RM9200 0 | |
5882 | @@ -33,7 +35,7 @@ | |
5883 | ||
5884 | struct clk_peripheral { | |
5885 | struct clk_hw hw; | |
5886 | - struct at91_pmc *pmc; | |
5887 | + struct regmap *regmap; | |
5888 | u32 id; | |
5889 | }; | |
5890 | ||
5891 | @@ -41,8 +43,9 @@ struct clk_peripheral { | |
5892 | ||
5893 | struct clk_sam9x5_peripheral { | |
5894 | struct clk_hw hw; | |
5895 | - struct at91_pmc *pmc; | |
5896 | + struct regmap *regmap; | |
5897 | struct clk_range range; | |
5898 | + spinlock_t *lock; | |
5899 | u32 id; | |
5900 | u32 div; | |
5901 | bool auto_div; | |
5902 | @@ -54,7 +57,6 @@ struct clk_sam9x5_peripheral { | |
5903 | static int clk_peripheral_enable(struct clk_hw *hw) | |
5904 | { | |
5905 | struct clk_peripheral *periph = to_clk_peripheral(hw); | |
5906 | - struct at91_pmc *pmc = periph->pmc; | |
5907 | int offset = AT91_PMC_PCER; | |
5908 | u32 id = periph->id; | |
5909 | ||
5910 | @@ -62,14 +64,14 @@ static int clk_peripheral_enable(struct clk_hw *hw) | |
5911 | return 0; | |
5912 | if (id > PERIPHERAL_ID_MAX) | |
5913 | offset = AT91_PMC_PCER1; | |
5914 | - pmc_write(pmc, offset, PERIPHERAL_MASK(id)); | |
5915 | + regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id)); | |
5916 | + | |
5917 | return 0; | |
5918 | } | |
5919 | ||
5920 | static void clk_peripheral_disable(struct clk_hw *hw) | |
5921 | { | |
5922 | struct clk_peripheral *periph = to_clk_peripheral(hw); | |
5923 | - struct at91_pmc *pmc = periph->pmc; | |
5924 | int offset = AT91_PMC_PCDR; | |
5925 | u32 id = periph->id; | |
5926 | ||
5927 | @@ -77,21 +79,23 @@ static void clk_peripheral_disable(struct clk_hw *hw) | |
5928 | return; | |
5929 | if (id > PERIPHERAL_ID_MAX) | |
5930 | offset = AT91_PMC_PCDR1; | |
5931 | - pmc_write(pmc, offset, PERIPHERAL_MASK(id)); | |
5932 | + regmap_write(periph->regmap, offset, PERIPHERAL_MASK(id)); | |
5933 | } | |
5934 | ||
5935 | static int clk_peripheral_is_enabled(struct clk_hw *hw) | |
5936 | { | |
5937 | struct clk_peripheral *periph = to_clk_peripheral(hw); | |
5938 | - struct at91_pmc *pmc = periph->pmc; | |
5939 | int offset = AT91_PMC_PCSR; | |
5940 | + unsigned int status; | |
5941 | u32 id = periph->id; | |
5942 | ||
5943 | if (id < PERIPHERAL_ID_MIN) | |
5944 | return 1; | |
5945 | if (id > PERIPHERAL_ID_MAX) | |
5946 | offset = AT91_PMC_PCSR1; | |
5947 | - return !!(pmc_read(pmc, offset) & PERIPHERAL_MASK(id)); | |
5948 | + regmap_read(periph->regmap, offset, &status); | |
5949 | + | |
5950 | + return status & PERIPHERAL_MASK(id) ? 1 : 0; | |
5951 | } | |
5952 | ||
5953 | static const struct clk_ops peripheral_ops = { | |
5954 | @@ -101,14 +105,14 @@ static const struct clk_ops peripheral_ops = { | |
5955 | }; | |
5956 | ||
5957 | static struct clk * __init | |
5958 | -at91_clk_register_peripheral(struct at91_pmc *pmc, const char *name, | |
5959 | +at91_clk_register_peripheral(struct regmap *regmap, const char *name, | |
5960 | const char *parent_name, u32 id) | |
5961 | { | |
5962 | struct clk_peripheral *periph; | |
5963 | struct clk *clk = NULL; | |
5964 | struct clk_init_data init; | |
5965 | ||
5966 | - if (!pmc || !name || !parent_name || id > PERIPHERAL_ID_MAX) | |
5967 | + if (!name || !parent_name || id > PERIPHERAL_ID_MAX) | |
5968 | return ERR_PTR(-EINVAL); | |
5969 | ||
5970 | periph = kzalloc(sizeof(*periph), GFP_KERNEL); | |
5971 | @@ -123,7 +127,7 @@ at91_clk_register_peripheral(struct at91_pmc *pmc, const char *name, | |
5972 | ||
5973 | periph->id = id; | |
5974 | periph->hw.init = &init; | |
5975 | - periph->pmc = pmc; | |
5976 | + periph->regmap = regmap; | |
5977 | ||
5978 | clk = clk_register(NULL, &periph->hw); | |
5979 | if (IS_ERR(clk)) | |
5980 | @@ -160,53 +164,58 @@ static void clk_sam9x5_peripheral_autodiv(struct clk_sam9x5_peripheral *periph) | |
5981 | static int clk_sam9x5_peripheral_enable(struct clk_hw *hw) | |
5982 | { | |
5983 | struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw); | |
5984 | - struct at91_pmc *pmc = periph->pmc; | |
5985 | - u32 tmp; | |
5986 | + unsigned long flags; | |
5987 | ||
5988 | if (periph->id < PERIPHERAL_ID_MIN) | |
5989 | return 0; | |
5990 | ||
5991 | - pmc_lock(pmc); | |
5992 | - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK)); | |
5993 | - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_DIV_MASK; | |
5994 | - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_DIV(periph->div) | |
5995 | - | AT91_PMC_PCR_CMD | |
5996 | - | AT91_PMC_PCR_EN); | |
5997 | - pmc_unlock(pmc); | |
5998 | + spin_lock_irqsave(periph->lock, flags); | |
5999 | + regmap_write(periph->regmap, AT91_PMC_PCR, | |
6000 | + (periph->id & AT91_PMC_PCR_PID_MASK)); | |
6001 | + regmap_update_bits(periph->regmap, AT91_PMC_PCR, | |
6002 | + AT91_PMC_PCR_DIV_MASK | AT91_PMC_PCR_CMD | | |
6003 | + AT91_PMC_PCR_EN, | |
6004 | + AT91_PMC_PCR_DIV(periph->div) | | |
6005 | + AT91_PMC_PCR_CMD | | |
6006 | + AT91_PMC_PCR_EN); | |
6007 | + spin_unlock_irqrestore(periph->lock, flags); | |
6008 | + | |
6009 | return 0; | |
6010 | } | |
6011 | ||
6012 | static void clk_sam9x5_peripheral_disable(struct clk_hw *hw) | |
6013 | { | |
6014 | struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw); | |
6015 | - struct at91_pmc *pmc = periph->pmc; | |
6016 | - u32 tmp; | |
6017 | + unsigned long flags; | |
6018 | ||
6019 | if (periph->id < PERIPHERAL_ID_MIN) | |
6020 | return; | |
6021 | ||
6022 | - pmc_lock(pmc); | |
6023 | - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK)); | |
6024 | - tmp = pmc_read(pmc, AT91_PMC_PCR) & ~AT91_PMC_PCR_EN; | |
6025 | - pmc_write(pmc, AT91_PMC_PCR, tmp | AT91_PMC_PCR_CMD); | |
6026 | - pmc_unlock(pmc); | |
6027 | + spin_lock_irqsave(periph->lock, flags); | |
6028 | + regmap_write(periph->regmap, AT91_PMC_PCR, | |
6029 | + (periph->id & AT91_PMC_PCR_PID_MASK)); | |
6030 | + regmap_update_bits(periph->regmap, AT91_PMC_PCR, | |
6031 | + AT91_PMC_PCR_EN | AT91_PMC_PCR_CMD, | |
6032 | + AT91_PMC_PCR_CMD); | |
6033 | + spin_unlock_irqrestore(periph->lock, flags); | |
6034 | } | |
6035 | ||
6036 | static int clk_sam9x5_peripheral_is_enabled(struct clk_hw *hw) | |
6037 | { | |
6038 | struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw); | |
6039 | - struct at91_pmc *pmc = periph->pmc; | |
6040 | - int ret; | |
6041 | + unsigned long flags; | |
6042 | + unsigned int status; | |
6043 | ||
6044 | if (periph->id < PERIPHERAL_ID_MIN) | |
6045 | return 1; | |
6046 | ||
6047 | - pmc_lock(pmc); | |
6048 | - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK)); | |
6049 | - ret = !!(pmc_read(pmc, AT91_PMC_PCR) & AT91_PMC_PCR_EN); | |
6050 | - pmc_unlock(pmc); | |
6051 | + spin_lock_irqsave(periph->lock, flags); | |
6052 | + regmap_write(periph->regmap, AT91_PMC_PCR, | |
6053 | + (periph->id & AT91_PMC_PCR_PID_MASK)); | |
6054 | + regmap_read(periph->regmap, AT91_PMC_PCR, &status); | |
6055 | + spin_unlock_irqrestore(periph->lock, flags); | |
6056 | ||
6057 | - return ret; | |
6058 | + return status & AT91_PMC_PCR_EN ? 1 : 0; | |
6059 | } | |
6060 | ||
6061 | static unsigned long | |
6062 | @@ -214,19 +223,20 @@ clk_sam9x5_peripheral_recalc_rate(struct clk_hw *hw, | |
6063 | unsigned long parent_rate) | |
6064 | { | |
6065 | struct clk_sam9x5_peripheral *periph = to_clk_sam9x5_peripheral(hw); | |
6066 | - struct at91_pmc *pmc = periph->pmc; | |
6067 | - u32 tmp; | |
6068 | + unsigned long flags; | |
6069 | + unsigned int status; | |
6070 | ||
6071 | if (periph->id < PERIPHERAL_ID_MIN) | |
6072 | return parent_rate; | |
6073 | ||
6074 | - pmc_lock(pmc); | |
6075 | - pmc_write(pmc, AT91_PMC_PCR, (periph->id & AT91_PMC_PCR_PID_MASK)); | |
6076 | - tmp = pmc_read(pmc, AT91_PMC_PCR); | |
6077 | - pmc_unlock(pmc); | |
6078 | + spin_lock_irqsave(periph->lock, flags); | |
6079 | + regmap_write(periph->regmap, AT91_PMC_PCR, | |
6080 | + (periph->id & AT91_PMC_PCR_PID_MASK)); | |
6081 | + regmap_read(periph->regmap, AT91_PMC_PCR, &status); | |
6082 | + spin_unlock_irqrestore(periph->lock, flags); | |
6083 | ||
6084 | - if (tmp & AT91_PMC_PCR_EN) { | |
6085 | - periph->div = PERIPHERAL_RSHIFT(tmp); | |
6086 | + if (status & AT91_PMC_PCR_EN) { | |
6087 | + periph->div = PERIPHERAL_RSHIFT(status); | |
6088 | periph->auto_div = false; | |
6089 | } else { | |
6090 | clk_sam9x5_peripheral_autodiv(periph); | |
6091 | @@ -318,15 +328,15 @@ static const struct clk_ops sam9x5_peripheral_ops = { | |
6092 | }; | |
6093 | ||
6094 | static struct clk * __init | |
6095 | -at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name, | |
6096 | - const char *parent_name, u32 id, | |
6097 | - const struct clk_range *range) | |
6098 | +at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock, | |
6099 | + const char *name, const char *parent_name, | |
6100 | + u32 id, const struct clk_range *range) | |
6101 | { | |
6102 | struct clk_sam9x5_peripheral *periph; | |
6103 | struct clk *clk = NULL; | |
6104 | struct clk_init_data init; | |
6105 | ||
6106 | - if (!pmc || !name || !parent_name) | |
6107 | + if (!name || !parent_name) | |
6108 | return ERR_PTR(-EINVAL); | |
6109 | ||
6110 | periph = kzalloc(sizeof(*periph), GFP_KERNEL); | |
6111 | @@ -342,7 +352,8 @@ at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name, | |
6112 | periph->id = id; | |
6113 | periph->hw.init = &init; | |
6114 | periph->div = 0; | |
6115 | - periph->pmc = pmc; | |
6116 | + periph->regmap = regmap; | |
6117 | + periph->lock = lock; | |
6118 | periph->auto_div = true; | |
6119 | periph->range = *range; | |
6120 | ||
6121 | @@ -356,7 +367,7 @@ at91_clk_register_sam9x5_peripheral(struct at91_pmc *pmc, const char *name, | |
6122 | } | |
6123 | ||
6124 | static void __init | |
6125 | -of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type) | |
6126 | +of_at91_clk_periph_setup(struct device_node *np, u8 type) | |
6127 | { | |
6128 | int num; | |
6129 | u32 id; | |
6130 | @@ -364,6 +375,7 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type) | |
6131 | const char *parent_name; | |
6132 | const char *name; | |
6133 | struct device_node *periphclknp; | |
6134 | + struct regmap *regmap; | |
6135 | ||
6136 | parent_name = of_clk_get_parent_name(np, 0); | |
6137 | if (!parent_name) | |
6138 | @@ -373,6 +385,10 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type) | |
6139 | if (!num || num > PERIPHERAL_MAX) | |
6140 | return; | |
6141 | ||
6142 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
6143 | + if (IS_ERR(regmap)) | |
6144 | + return; | |
6145 | + | |
6146 | for_each_child_of_node(np, periphclknp) { | |
6147 | if (of_property_read_u32(periphclknp, "reg", &id)) | |
6148 | continue; | |
6149 | @@ -384,7 +400,7 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type) | |
6150 | name = periphclknp->name; | |
6151 | ||
6152 | if (type == PERIPHERAL_AT91RM9200) { | |
6153 | - clk = at91_clk_register_peripheral(pmc, name, | |
6154 | + clk = at91_clk_register_peripheral(regmap, name, | |
6155 | parent_name, id); | |
6156 | } else { | |
6157 | struct clk_range range = CLK_RANGE(0, 0); | |
6158 | @@ -393,7 +409,9 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type) | |
6159 | "atmel,clk-output-range", | |
6160 | &range); | |
6161 | ||
6162 | - clk = at91_clk_register_sam9x5_peripheral(pmc, name, | |
6163 | + clk = at91_clk_register_sam9x5_peripheral(regmap, | |
6164 | + &pmc_pcr_lock, | |
6165 | + name, | |
6166 | parent_name, | |
6167 | id, &range); | |
6168 | } | |
6169 | @@ -405,14 +423,16 @@ of_at91_clk_periph_setup(struct device_node *np, struct at91_pmc *pmc, u8 type) | |
6170 | } | |
6171 | } | |
6172 | ||
6173 | -void __init of_at91rm9200_clk_periph_setup(struct device_node *np, | |
6174 | - struct at91_pmc *pmc) | |
6175 | +static void __init of_at91rm9200_clk_periph_setup(struct device_node *np) | |
6176 | { | |
6177 | - of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91RM9200); | |
6178 | + of_at91_clk_periph_setup(np, PERIPHERAL_AT91RM9200); | |
6179 | } | |
6180 | +CLK_OF_DECLARE(at91rm9200_clk_periph, "atmel,at91rm9200-clk-peripheral", | |
6181 | + of_at91rm9200_clk_periph_setup); | |
6182 | ||
6183 | -void __init of_at91sam9x5_clk_periph_setup(struct device_node *np, | |
6184 | - struct at91_pmc *pmc) | |
6185 | +static void __init of_at91sam9x5_clk_periph_setup(struct device_node *np) | |
6186 | { | |
6187 | - of_at91_clk_periph_setup(np, pmc, PERIPHERAL_AT91SAM9X5); | |
6188 | + of_at91_clk_periph_setup(np, PERIPHERAL_AT91SAM9X5); | |
6189 | } | |
6190 | +CLK_OF_DECLARE(at91sam9x5_clk_periph, "atmel,at91sam9x5-clk-peripheral", | |
6191 | + of_at91sam9x5_clk_periph_setup); | |
6192 | diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c | |
6193 | index 18b60f4895a6..fb2e0b56d4b7 100644 | |
6194 | --- a/drivers/clk/at91/clk-pll.c | |
6195 | +++ b/drivers/clk/at91/clk-pll.c | |
6196 | @@ -12,14 +12,8 @@ | |
6197 | #include <linux/clkdev.h> | |
6198 | #include <linux/clk/at91_pmc.h> | |
6199 | #include <linux/of.h> | |
6200 | -#include <linux/of_address.h> | |
6201 | -#include <linux/of_irq.h> | |
6202 | -#include <linux/io.h> | |
6203 | -#include <linux/kernel.h> | |
6204 | -#include <linux/wait.h> | |
6205 | -#include <linux/sched.h> | |
6206 | -#include <linux/interrupt.h> | |
6207 | -#include <linux/irq.h> | |
6208 | +#include <linux/mfd/syscon.h> | |
6209 | +#include <linux/regmap.h> | |
6210 | ||
6211 | #include "pmc.h" | |
6212 | ||
6213 | @@ -58,9 +52,7 @@ struct clk_pll_layout { | |
6214 | ||
6215 | struct clk_pll { | |
6216 | struct clk_hw hw; | |
6217 | - struct at91_pmc *pmc; | |
6218 | - unsigned int irq; | |
6219 | - wait_queue_head_t wait; | |
6220 | + struct regmap *regmap; | |
6221 | u8 id; | |
6222 | u8 div; | |
6223 | u8 range; | |
6224 | @@ -69,20 +61,19 @@ struct clk_pll { | |
6225 | const struct clk_pll_characteristics *characteristics; | |
6226 | }; | |
6227 | ||
6228 | -static irqreturn_t clk_pll_irq_handler(int irq, void *dev_id) | |
6229 | +static inline bool clk_pll_ready(struct regmap *regmap, int id) | |
6230 | { | |
6231 | - struct clk_pll *pll = (struct clk_pll *)dev_id; | |
6232 | + unsigned int status; | |
6233 | ||
6234 | - wake_up(&pll->wait); | |
6235 | - disable_irq_nosync(pll->irq); | |
6236 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
6237 | ||
6238 | - return IRQ_HANDLED; | |
6239 | + return status & PLL_STATUS_MASK(id) ? 1 : 0; | |
6240 | } | |
6241 | ||
6242 | static int clk_pll_prepare(struct clk_hw *hw) | |
6243 | { | |
6244 | struct clk_pll *pll = to_clk_pll(hw); | |
6245 | - struct at91_pmc *pmc = pll->pmc; | |
6246 | + struct regmap *regmap = pll->regmap; | |
6247 | const struct clk_pll_layout *layout = pll->layout; | |
6248 | const struct clk_pll_characteristics *characteristics = | |
6249 | pll->characteristics; | |
6250 | @@ -90,39 +81,34 @@ static int clk_pll_prepare(struct clk_hw *hw) | |
6251 | u32 mask = PLL_STATUS_MASK(id); | |
6252 | int offset = PLL_REG(id); | |
6253 | u8 out = 0; | |
6254 | - u32 pllr, icpr; | |
6255 | + unsigned int pllr; | |
6256 | + unsigned int status; | |
6257 | u8 div; | |
6258 | u16 mul; | |
6259 | ||
6260 | - pllr = pmc_read(pmc, offset); | |
6261 | + regmap_read(regmap, offset, &pllr); | |
6262 | div = PLL_DIV(pllr); | |
6263 | mul = PLL_MUL(pllr, layout); | |
6264 | ||
6265 | - if ((pmc_read(pmc, AT91_PMC_SR) & mask) && | |
6266 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
6267 | + if ((status & mask) && | |
6268 | (div == pll->div && mul == pll->mul)) | |
6269 | return 0; | |
6270 | ||
6271 | if (characteristics->out) | |
6272 | out = characteristics->out[pll->range]; | |
6273 | - if (characteristics->icpll) { | |
6274 | - icpr = pmc_read(pmc, AT91_PMC_PLLICPR) & ~PLL_ICPR_MASK(id); | |
6275 | - icpr |= (characteristics->icpll[pll->range] << | |
6276 | - PLL_ICPR_SHIFT(id)); | |
6277 | - pmc_write(pmc, AT91_PMC_PLLICPR, icpr); | |
6278 | - } | |
6279 | ||
6280 | - pllr &= ~layout->pllr_mask; | |
6281 | - pllr |= layout->pllr_mask & | |
6282 | - (pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) | | |
6283 | - (out << PLL_OUT_SHIFT) | | |
6284 | - ((pll->mul & layout->mul_mask) << layout->mul_shift)); | |
6285 | - pmc_write(pmc, offset, pllr); | |
6286 | - | |
6287 | - while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) { | |
6288 | - enable_irq(pll->irq); | |
6289 | - wait_event(pll->wait, | |
6290 | - pmc_read(pmc, AT91_PMC_SR) & mask); | |
6291 | - } | |
6292 | + if (characteristics->icpll) | |
6293 | + regmap_update_bits(regmap, AT91_PMC_PLLICPR, PLL_ICPR_MASK(id), | |
6294 | + characteristics->icpll[pll->range] << PLL_ICPR_SHIFT(id)); | |
6295 | + | |
6296 | + regmap_update_bits(regmap, offset, layout->pllr_mask, | |
6297 | + pll->div | (PLL_MAX_COUNT << PLL_COUNT_SHIFT) | | |
6298 | + (out << PLL_OUT_SHIFT) | | |
6299 | + ((pll->mul & layout->mul_mask) << layout->mul_shift)); | |
6300 | + | |
6301 | + while (!clk_pll_ready(regmap, pll->id)) | |
6302 | + cpu_relax(); | |
6303 | ||
6304 | return 0; | |
6305 | } | |
6306 | @@ -130,32 +116,35 @@ static int clk_pll_prepare(struct clk_hw *hw) | |
6307 | static int clk_pll_is_prepared(struct clk_hw *hw) | |
6308 | { | |
6309 | struct clk_pll *pll = to_clk_pll(hw); | |
6310 | - struct at91_pmc *pmc = pll->pmc; | |
6311 | ||
6312 | - return !!(pmc_read(pmc, AT91_PMC_SR) & | |
6313 | - PLL_STATUS_MASK(pll->id)); | |
6314 | + return clk_pll_ready(pll->regmap, pll->id); | |
6315 | } | |
6316 | ||
6317 | static void clk_pll_unprepare(struct clk_hw *hw) | |
6318 | { | |
6319 | struct clk_pll *pll = to_clk_pll(hw); | |
6320 | - struct at91_pmc *pmc = pll->pmc; | |
6321 | - const struct clk_pll_layout *layout = pll->layout; | |
6322 | - int offset = PLL_REG(pll->id); | |
6323 | - u32 tmp = pmc_read(pmc, offset) & ~(layout->pllr_mask); | |
6324 | + unsigned int mask = pll->layout->pllr_mask; | |
6325 | ||
6326 | - pmc_write(pmc, offset, tmp); | |
6327 | + regmap_update_bits(pll->regmap, PLL_REG(pll->id), mask, ~mask); | |
6328 | } | |
6329 | ||
6330 | static unsigned long clk_pll_recalc_rate(struct clk_hw *hw, | |
6331 | unsigned long parent_rate) | |
6332 | { | |
6333 | struct clk_pll *pll = to_clk_pll(hw); | |
6334 | + unsigned int pllr; | |
6335 | + u16 mul; | |
6336 | + u8 div; | |
6337 | ||
6338 | - if (!pll->div || !pll->mul) | |
6339 | + regmap_read(pll->regmap, PLL_REG(pll->id), &pllr); | |
6340 | + | |
6341 | + div = PLL_DIV(pllr); | |
6342 | + mul = PLL_MUL(pllr, pll->layout); | |
6343 | + | |
6344 | + if (!div || !mul) | |
6345 | return 0; | |
6346 | ||
6347 | - return (parent_rate / pll->div) * (pll->mul + 1); | |
6348 | + return (parent_rate / div) * (mul + 1); | |
6349 | } | |
6350 | ||
6351 | static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate, | |
6352 | @@ -308,7 +297,7 @@ static const struct clk_ops pll_ops = { | |
6353 | }; | |
6354 | ||
6355 | static struct clk * __init | |
6356 | -at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name, | |
6357 | +at91_clk_register_pll(struct regmap *regmap, const char *name, | |
6358 | const char *parent_name, u8 id, | |
6359 | const struct clk_pll_layout *layout, | |
6360 | const struct clk_pll_characteristics *characteristics) | |
6361 | @@ -316,9 +305,8 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name, | |
6362 | struct clk_pll *pll; | |
6363 | struct clk *clk = NULL; | |
6364 | struct clk_init_data init; | |
6365 | - int ret; | |
6366 | int offset = PLL_REG(id); | |
6367 | - u32 tmp; | |
6368 | + unsigned int pllr; | |
6369 | ||
6370 | if (id > PLL_MAX_ID) | |
6371 | return ERR_PTR(-EINVAL); | |
6372 | @@ -337,23 +325,13 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name, | |
6373 | pll->hw.init = &init; | |
6374 | pll->layout = layout; | |
6375 | pll->characteristics = characteristics; | |
6376 | - pll->pmc = pmc; | |
6377 | - pll->irq = irq; | |
6378 | - tmp = pmc_read(pmc, offset) & layout->pllr_mask; | |
6379 | - pll->div = PLL_DIV(tmp); | |
6380 | - pll->mul = PLL_MUL(tmp, layout); | |
6381 | - init_waitqueue_head(&pll->wait); | |
6382 | - irq_set_status_flags(pll->irq, IRQ_NOAUTOEN); | |
6383 | - ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH, | |
6384 | - id ? "clk-pllb" : "clk-plla", pll); | |
6385 | - if (ret) { | |
6386 | - kfree(pll); | |
6387 | - return ERR_PTR(ret); | |
6388 | - } | |
6389 | + pll->regmap = regmap; | |
6390 | + regmap_read(regmap, offset, &pllr); | |
6391 | + pll->div = PLL_DIV(pllr); | |
6392 | + pll->mul = PLL_MUL(pllr, layout); | |
6393 | ||
6394 | clk = clk_register(NULL, &pll->hw); | |
6395 | if (IS_ERR(clk)) { | |
6396 | - free_irq(pll->irq, pll); | |
6397 | kfree(pll); | |
6398 | } | |
6399 | ||
6400 | @@ -483,12 +461,12 @@ out_free_characteristics: | |
6401 | } | |
6402 | ||
6403 | static void __init | |
6404 | -of_at91_clk_pll_setup(struct device_node *np, struct at91_pmc *pmc, | |
6405 | +of_at91_clk_pll_setup(struct device_node *np, | |
6406 | const struct clk_pll_layout *layout) | |
6407 | { | |
6408 | u32 id; | |
6409 | - unsigned int irq; | |
6410 | struct clk *clk; | |
6411 | + struct regmap *regmap; | |
6412 | const char *parent_name; | |
6413 | const char *name = np->name; | |
6414 | struct clk_pll_characteristics *characteristics; | |
6415 | @@ -500,15 +478,15 @@ of_at91_clk_pll_setup(struct device_node *np, struct at91_pmc *pmc, | |
6416 | ||
6417 | of_property_read_string(np, "clock-output-names", &name); | |
6418 | ||
6419 | - characteristics = of_at91_clk_pll_get_characteristics(np); | |
6420 | - if (!characteristics) | |
6421 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
6422 | + if (IS_ERR(regmap)) | |
6423 | return; | |
6424 | ||
6425 | - irq = irq_of_parse_and_map(np, 0); | |
6426 | - if (!irq) | |
6427 | + characteristics = of_at91_clk_pll_get_characteristics(np); | |
6428 | + if (!characteristics) | |
6429 | return; | |
6430 | ||
6431 | - clk = at91_clk_register_pll(pmc, irq, name, parent_name, id, layout, | |
6432 | + clk = at91_clk_register_pll(regmap, name, parent_name, id, layout, | |
6433 | characteristics); | |
6434 | if (IS_ERR(clk)) | |
6435 | goto out_free_characteristics; | |
6436 | @@ -520,26 +498,30 @@ out_free_characteristics: | |
6437 | kfree(characteristics); | |
6438 | } | |
6439 | ||
6440 | -void __init of_at91rm9200_clk_pll_setup(struct device_node *np, | |
6441 | - struct at91_pmc *pmc) | |
6442 | +static void __init of_at91rm9200_clk_pll_setup(struct device_node *np) | |
6443 | { | |
6444 | - of_at91_clk_pll_setup(np, pmc, &at91rm9200_pll_layout); | |
6445 | + of_at91_clk_pll_setup(np, &at91rm9200_pll_layout); | |
6446 | } | |
6447 | +CLK_OF_DECLARE(at91rm9200_clk_pll, "atmel,at91rm9200-clk-pll", | |
6448 | + of_at91rm9200_clk_pll_setup); | |
6449 | ||
6450 | -void __init of_at91sam9g45_clk_pll_setup(struct device_node *np, | |
6451 | - struct at91_pmc *pmc) | |
6452 | +static void __init of_at91sam9g45_clk_pll_setup(struct device_node *np) | |
6453 | { | |
6454 | - of_at91_clk_pll_setup(np, pmc, &at91sam9g45_pll_layout); | |
6455 | + of_at91_clk_pll_setup(np, &at91sam9g45_pll_layout); | |
6456 | } | |
6457 | +CLK_OF_DECLARE(at91sam9g45_clk_pll, "atmel,at91sam9g45-clk-pll", | |
6458 | + of_at91sam9g45_clk_pll_setup); | |
6459 | ||
6460 | -void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np, | |
6461 | - struct at91_pmc *pmc) | |
6462 | +static void __init of_at91sam9g20_clk_pllb_setup(struct device_node *np) | |
6463 | { | |
6464 | - of_at91_clk_pll_setup(np, pmc, &at91sam9g20_pllb_layout); | |
6465 | + of_at91_clk_pll_setup(np, &at91sam9g20_pllb_layout); | |
6466 | } | |
6467 | +CLK_OF_DECLARE(at91sam9g20_clk_pllb, "atmel,at91sam9g20-clk-pllb", | |
6468 | + of_at91sam9g20_clk_pllb_setup); | |
6469 | ||
6470 | -void __init of_sama5d3_clk_pll_setup(struct device_node *np, | |
6471 | - struct at91_pmc *pmc) | |
6472 | +static void __init of_sama5d3_clk_pll_setup(struct device_node *np) | |
6473 | { | |
6474 | - of_at91_clk_pll_setup(np, pmc, &sama5d3_pll_layout); | |
6475 | + of_at91_clk_pll_setup(np, &sama5d3_pll_layout); | |
6476 | } | |
6477 | +CLK_OF_DECLARE(sama5d3_clk_pll, "atmel,sama5d3-clk-pll", | |
6478 | + of_sama5d3_clk_pll_setup); | |
6479 | diff --git a/drivers/clk/at91/clk-plldiv.c b/drivers/clk/at91/clk-plldiv.c | |
6480 | index ea226562bb40..2bed26481027 100644 | |
6481 | --- a/drivers/clk/at91/clk-plldiv.c | |
6482 | +++ b/drivers/clk/at91/clk-plldiv.c | |
6483 | @@ -12,8 +12,8 @@ | |
6484 | #include <linux/clkdev.h> | |
6485 | #include <linux/clk/at91_pmc.h> | |
6486 | #include <linux/of.h> | |
6487 | -#include <linux/of_address.h> | |
6488 | -#include <linux/io.h> | |
6489 | +#include <linux/mfd/syscon.h> | |
6490 | +#include <linux/regmap.h> | |
6491 | ||
6492 | #include "pmc.h" | |
6493 | ||
6494 | @@ -21,16 +21,18 @@ | |
6495 | ||
6496 | struct clk_plldiv { | |
6497 | struct clk_hw hw; | |
6498 | - struct at91_pmc *pmc; | |
6499 | + struct regmap *regmap; | |
6500 | }; | |
6501 | ||
6502 | static unsigned long clk_plldiv_recalc_rate(struct clk_hw *hw, | |
6503 | unsigned long parent_rate) | |
6504 | { | |
6505 | struct clk_plldiv *plldiv = to_clk_plldiv(hw); | |
6506 | - struct at91_pmc *pmc = plldiv->pmc; | |
6507 | + unsigned int mckr; | |
6508 | ||
6509 | - if (pmc_read(pmc, AT91_PMC_MCKR) & AT91_PMC_PLLADIV2) | |
6510 | + regmap_read(plldiv->regmap, AT91_PMC_MCKR, &mckr); | |
6511 | + | |
6512 | + if (mckr & AT91_PMC_PLLADIV2) | |
6513 | return parent_rate / 2; | |
6514 | ||
6515 | return parent_rate; | |
6516 | @@ -57,18 +59,12 @@ static int clk_plldiv_set_rate(struct clk_hw *hw, unsigned long rate, | |
6517 | unsigned long parent_rate) | |
6518 | { | |
6519 | struct clk_plldiv *plldiv = to_clk_plldiv(hw); | |
6520 | - struct at91_pmc *pmc = plldiv->pmc; | |
6521 | - u32 tmp; | |
6522 | ||
6523 | - if (parent_rate != rate && (parent_rate / 2) != rate) | |
6524 | + if ((parent_rate != rate) && (parent_rate / 2 != rate)) | |
6525 | return -EINVAL; | |
6526 | ||
6527 | - pmc_lock(pmc); | |
6528 | - tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_PLLADIV2; | |
6529 | - if ((parent_rate / 2) == rate) | |
6530 | - tmp |= AT91_PMC_PLLADIV2; | |
6531 | - pmc_write(pmc, AT91_PMC_MCKR, tmp); | |
6532 | - pmc_unlock(pmc); | |
6533 | + regmap_update_bits(plldiv->regmap, AT91_PMC_MCKR, AT91_PMC_PLLADIV2, | |
6534 | + parent_rate != rate ? AT91_PMC_PLLADIV2 : 0); | |
6535 | ||
6536 | return 0; | |
6537 | } | |
6538 | @@ -80,7 +76,7 @@ static const struct clk_ops plldiv_ops = { | |
6539 | }; | |
6540 | ||
6541 | static struct clk * __init | |
6542 | -at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name, | |
6543 | +at91_clk_register_plldiv(struct regmap *regmap, const char *name, | |
6544 | const char *parent_name) | |
6545 | { | |
6546 | struct clk_plldiv *plldiv; | |
6547 | @@ -98,7 +94,7 @@ at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name, | |
6548 | init.flags = CLK_SET_RATE_GATE; | |
6549 | ||
6550 | plldiv->hw.init = &init; | |
6551 | - plldiv->pmc = pmc; | |
6552 | + plldiv->regmap = regmap; | |
6553 | ||
6554 | clk = clk_register(NULL, &plldiv->hw); | |
6555 | ||
6556 | @@ -109,27 +105,27 @@ at91_clk_register_plldiv(struct at91_pmc *pmc, const char *name, | |
6557 | } | |
6558 | ||
6559 | static void __init | |
6560 | -of_at91_clk_plldiv_setup(struct device_node *np, struct at91_pmc *pmc) | |
6561 | +of_at91sam9x5_clk_plldiv_setup(struct device_node *np) | |
6562 | { | |
6563 | struct clk *clk; | |
6564 | const char *parent_name; | |
6565 | const char *name = np->name; | |
6566 | + struct regmap *regmap; | |
6567 | ||
6568 | parent_name = of_clk_get_parent_name(np, 0); | |
6569 | ||
6570 | of_property_read_string(np, "clock-output-names", &name); | |
6571 | ||
6572 | - clk = at91_clk_register_plldiv(pmc, name, parent_name); | |
6573 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
6574 | + if (IS_ERR(regmap)) | |
6575 | + return; | |
6576 | ||
6577 | + clk = at91_clk_register_plldiv(regmap, name, parent_name); | |
6578 | if (IS_ERR(clk)) | |
6579 | return; | |
6580 | ||
6581 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
6582 | return; | |
6583 | } | |
6584 | - | |
6585 | -void __init of_at91sam9x5_clk_plldiv_setup(struct device_node *np, | |
6586 | - struct at91_pmc *pmc) | |
6587 | -{ | |
6588 | - of_at91_clk_plldiv_setup(np, pmc); | |
6589 | -} | |
6590 | +CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv", | |
6591 | + of_at91sam9x5_clk_plldiv_setup); | |
6592 | diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c | |
6593 | index 14b270b85fec..bc0be629671b 100644 | |
6594 | --- a/drivers/clk/at91/clk-programmable.c | |
6595 | +++ b/drivers/clk/at91/clk-programmable.c | |
6596 | @@ -12,10 +12,8 @@ | |
6597 | #include <linux/clkdev.h> | |
6598 | #include <linux/clk/at91_pmc.h> | |
6599 | #include <linux/of.h> | |
6600 | -#include <linux/of_address.h> | |
6601 | -#include <linux/io.h> | |
6602 | -#include <linux/wait.h> | |
6603 | -#include <linux/sched.h> | |
6604 | +#include <linux/mfd/syscon.h> | |
6605 | +#include <linux/regmap.h> | |
6606 | ||
6607 | #include "pmc.h" | |
6608 | ||
6609 | @@ -24,6 +22,7 @@ | |
6610 | ||
6611 | #define PROG_STATUS_MASK(id) (1 << ((id) + 8)) | |
6612 | #define PROG_PRES_MASK 0x7 | |
6613 | +#define PROG_PRES(layout, pckr) ((pckr >> layout->pres_shift) & PROG_PRES_MASK) | |
6614 | #define PROG_MAX_RM9200_CSS 3 | |
6615 | ||
6616 | struct clk_programmable_layout { | |
6617 | @@ -34,7 +33,7 @@ struct clk_programmable_layout { | |
6618 | ||
6619 | struct clk_programmable { | |
6620 | struct clk_hw hw; | |
6621 | - struct at91_pmc *pmc; | |
6622 | + struct regmap *regmap; | |
6623 | u8 id; | |
6624 | const struct clk_programmable_layout *layout; | |
6625 | }; | |
6626 | @@ -44,14 +43,12 @@ struct clk_programmable { | |
6627 | static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw, | |
6628 | unsigned long parent_rate) | |
6629 | { | |
6630 | - u32 pres; | |
6631 | struct clk_programmable *prog = to_clk_programmable(hw); | |
6632 | - struct at91_pmc *pmc = prog->pmc; | |
6633 | - const struct clk_programmable_layout *layout = prog->layout; | |
6634 | + unsigned int pckr; | |
6635 | + | |
6636 | + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr); | |
6637 | ||
6638 | - pres = (pmc_read(pmc, AT91_PMC_PCKR(prog->id)) >> layout->pres_shift) & | |
6639 | - PROG_PRES_MASK; | |
6640 | - return parent_rate >> pres; | |
6641 | + return parent_rate >> PROG_PRES(prog->layout, pckr); | |
6642 | } | |
6643 | ||
6644 | static int clk_programmable_determine_rate(struct clk_hw *hw, | |
6645 | @@ -101,36 +98,36 @@ static int clk_programmable_set_parent(struct clk_hw *hw, u8 index) | |
6646 | { | |
6647 | struct clk_programmable *prog = to_clk_programmable(hw); | |
6648 | const struct clk_programmable_layout *layout = prog->layout; | |
6649 | - struct at91_pmc *pmc = prog->pmc; | |
6650 | - u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) & ~layout->css_mask; | |
6651 | + unsigned int mask = layout->css_mask; | |
6652 | + unsigned int pckr = 0; | |
6653 | ||
6654 | if (layout->have_slck_mck) | |
6655 | - tmp &= AT91_PMC_CSSMCK_MCK; | |
6656 | + mask |= AT91_PMC_CSSMCK_MCK; | |
6657 | ||
6658 | if (index > layout->css_mask) { | |
6659 | - if (index > PROG_MAX_RM9200_CSS && layout->have_slck_mck) { | |
6660 | - tmp |= AT91_PMC_CSSMCK_MCK; | |
6661 | - return 0; | |
6662 | - } else { | |
6663 | + if (index > PROG_MAX_RM9200_CSS && !layout->have_slck_mck) | |
6664 | return -EINVAL; | |
6665 | - } | |
6666 | + | |
6667 | + pckr |= AT91_PMC_CSSMCK_MCK; | |
6668 | } | |
6669 | ||
6670 | - pmc_write(pmc, AT91_PMC_PCKR(prog->id), tmp | index); | |
6671 | + regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id), mask, pckr); | |
6672 | + | |
6673 | return 0; | |
6674 | } | |
6675 | ||
6676 | static u8 clk_programmable_get_parent(struct clk_hw *hw) | |
6677 | { | |
6678 | - u32 tmp; | |
6679 | - u8 ret; | |
6680 | struct clk_programmable *prog = to_clk_programmable(hw); | |
6681 | - struct at91_pmc *pmc = prog->pmc; | |
6682 | const struct clk_programmable_layout *layout = prog->layout; | |
6683 | + unsigned int pckr; | |
6684 | + u8 ret; | |
6685 | + | |
6686 | + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr); | |
6687 | + | |
6688 | + ret = pckr & layout->css_mask; | |
6689 | ||
6690 | - tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)); | |
6691 | - ret = tmp & layout->css_mask; | |
6692 | - if (layout->have_slck_mck && (tmp & AT91_PMC_CSSMCK_MCK) && !ret) | |
6693 | + if (layout->have_slck_mck && (pckr & AT91_PMC_CSSMCK_MCK) && !ret) | |
6694 | ret = PROG_MAX_RM9200_CSS + 1; | |
6695 | ||
6696 | return ret; | |
6697 | @@ -140,26 +137,27 @@ static int clk_programmable_set_rate(struct clk_hw *hw, unsigned long rate, | |
6698 | unsigned long parent_rate) | |
6699 | { | |
6700 | struct clk_programmable *prog = to_clk_programmable(hw); | |
6701 | - struct at91_pmc *pmc = prog->pmc; | |
6702 | const struct clk_programmable_layout *layout = prog->layout; | |
6703 | unsigned long div = parent_rate / rate; | |
6704 | + unsigned int pckr; | |
6705 | int shift = 0; | |
6706 | - u32 tmp = pmc_read(pmc, AT91_PMC_PCKR(prog->id)) & | |
6707 | - ~(PROG_PRES_MASK << layout->pres_shift); | |
6708 | + | |
6709 | + regmap_read(prog->regmap, AT91_PMC_PCKR(prog->id), &pckr); | |
6710 | ||
6711 | if (!div) | |
6712 | return -EINVAL; | |
6713 | ||
6714 | shift = fls(div) - 1; | |
6715 | ||
6716 | - if (div != (1<<shift)) | |
6717 | + if (div != (1 << shift)) | |
6718 | return -EINVAL; | |
6719 | ||
6720 | if (shift >= PROG_PRES_MASK) | |
6721 | return -EINVAL; | |
6722 | ||
6723 | - pmc_write(pmc, AT91_PMC_PCKR(prog->id), | |
6724 | - tmp | (shift << layout->pres_shift)); | |
6725 | + regmap_update_bits(prog->regmap, AT91_PMC_PCKR(prog->id), | |
6726 | + PROG_PRES_MASK << layout->pres_shift, | |
6727 | + shift << layout->pres_shift); | |
6728 | ||
6729 | return 0; | |
6730 | } | |
6731 | @@ -173,7 +171,7 @@ static const struct clk_ops programmable_ops = { | |
6732 | }; | |
6733 | ||
6734 | static struct clk * __init | |
6735 | -at91_clk_register_programmable(struct at91_pmc *pmc, | |
6736 | +at91_clk_register_programmable(struct regmap *regmap, | |
6737 | const char *name, const char **parent_names, | |
6738 | u8 num_parents, u8 id, | |
6739 | const struct clk_programmable_layout *layout) | |
6740 | @@ -198,7 +196,7 @@ at91_clk_register_programmable(struct at91_pmc *pmc, | |
6741 | prog->id = id; | |
6742 | prog->layout = layout; | |
6743 | prog->hw.init = &init; | |
6744 | - prog->pmc = pmc; | |
6745 | + prog->regmap = regmap; | |
6746 | ||
6747 | clk = clk_register(NULL, &prog->hw); | |
6748 | if (IS_ERR(clk)) | |
6749 | @@ -226,7 +224,7 @@ static const struct clk_programmable_layout at91sam9x5_programmable_layout = { | |
6750 | }; | |
6751 | ||
6752 | static void __init | |
6753 | -of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc, | |
6754 | +of_at91_clk_prog_setup(struct device_node *np, | |
6755 | const struct clk_programmable_layout *layout) | |
6756 | { | |
6757 | int num; | |
6758 | @@ -236,6 +234,7 @@ of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc, | |
6759 | const char *parent_names[PROG_SOURCE_MAX]; | |
6760 | const char *name; | |
6761 | struct device_node *progclknp; | |
6762 | + struct regmap *regmap; | |
6763 | ||
6764 | num_parents = of_clk_get_parent_count(np); | |
6765 | if (num_parents <= 0 || num_parents > PROG_SOURCE_MAX) | |
6766 | @@ -247,6 +246,10 @@ of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc, | |
6767 | if (!num || num > (PROG_ID_MAX + 1)) | |
6768 | return; | |
6769 | ||
6770 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
6771 | + if (IS_ERR(regmap)) | |
6772 | + return; | |
6773 | + | |
6774 | for_each_child_of_node(np, progclknp) { | |
6775 | if (of_property_read_u32(progclknp, "reg", &id)) | |
6776 | continue; | |
6777 | @@ -254,7 +257,7 @@ of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc, | |
6778 | if (of_property_read_string(np, "clock-output-names", &name)) | |
6779 | name = progclknp->name; | |
6780 | ||
6781 | - clk = at91_clk_register_programmable(pmc, name, | |
6782 | + clk = at91_clk_register_programmable(regmap, name, | |
6783 | parent_names, num_parents, | |
6784 | id, layout); | |
6785 | if (IS_ERR(clk)) | |
6786 | @@ -265,20 +268,23 @@ of_at91_clk_prog_setup(struct device_node *np, struct at91_pmc *pmc, | |
6787 | } | |
6788 | ||
6789 | ||
6790 | -void __init of_at91rm9200_clk_prog_setup(struct device_node *np, | |
6791 | - struct at91_pmc *pmc) | |
6792 | +static void __init of_at91rm9200_clk_prog_setup(struct device_node *np) | |
6793 | { | |
6794 | - of_at91_clk_prog_setup(np, pmc, &at91rm9200_programmable_layout); | |
6795 | + of_at91_clk_prog_setup(np, &at91rm9200_programmable_layout); | |
6796 | } | |
6797 | +CLK_OF_DECLARE(at91rm9200_clk_prog, "atmel,at91rm9200-clk-programmable", | |
6798 | + of_at91rm9200_clk_prog_setup); | |
6799 | ||
6800 | -void __init of_at91sam9g45_clk_prog_setup(struct device_node *np, | |
6801 | - struct at91_pmc *pmc) | |
6802 | +static void __init of_at91sam9g45_clk_prog_setup(struct device_node *np) | |
6803 | { | |
6804 | - of_at91_clk_prog_setup(np, pmc, &at91sam9g45_programmable_layout); | |
6805 | + of_at91_clk_prog_setup(np, &at91sam9g45_programmable_layout); | |
6806 | } | |
6807 | +CLK_OF_DECLARE(at91sam9g45_clk_prog, "atmel,at91sam9g45-clk-programmable", | |
6808 | + of_at91sam9g45_clk_prog_setup); | |
6809 | ||
6810 | -void __init of_at91sam9x5_clk_prog_setup(struct device_node *np, | |
6811 | - struct at91_pmc *pmc) | |
6812 | +static void __init of_at91sam9x5_clk_prog_setup(struct device_node *np) | |
6813 | { | |
6814 | - of_at91_clk_prog_setup(np, pmc, &at91sam9x5_programmable_layout); | |
6815 | + of_at91_clk_prog_setup(np, &at91sam9x5_programmable_layout); | |
6816 | } | |
6817 | +CLK_OF_DECLARE(at91sam9x5_clk_prog, "atmel,at91sam9x5-clk-programmable", | |
6818 | + of_at91sam9x5_clk_prog_setup); | |
6819 | diff --git a/drivers/clk/at91/clk-slow.c b/drivers/clk/at91/clk-slow.c | |
6820 | index d0d5076a9b94..221c09684ba3 100644 | |
6821 | --- a/drivers/clk/at91/clk-slow.c | |
6822 | +++ b/drivers/clk/at91/clk-slow.c | |
6823 | @@ -13,17 +13,11 @@ | |
6824 | #include <linux/clk.h> | |
6825 | #include <linux/clk-provider.h> | |
6826 | #include <linux/clkdev.h> | |
6827 | -#include <linux/slab.h> | |
6828 | #include <linux/clk/at91_pmc.h> | |
6829 | #include <linux/delay.h> | |
6830 | #include <linux/of.h> | |
6831 | -#include <linux/of_address.h> | |
6832 | -#include <linux/of_irq.h> | |
6833 | -#include <linux/io.h> | |
6834 | -#include <linux/interrupt.h> | |
6835 | -#include <linux/irq.h> | |
6836 | -#include <linux/sched.h> | |
6837 | -#include <linux/wait.h> | |
6838 | +#include <linux/mfd/syscon.h> | |
6839 | +#include <linux/regmap.h> | |
6840 | ||
6841 | #include "pmc.h" | |
6842 | #include "sckc.h" | |
6843 | @@ -59,7 +53,7 @@ struct clk_slow_rc_osc { | |
6844 | ||
6845 | struct clk_sam9260_slow { | |
6846 | struct clk_hw hw; | |
6847 | - struct at91_pmc *pmc; | |
6848 | + struct regmap *regmap; | |
6849 | }; | |
6850 | ||
6851 | #define to_clk_sam9260_slow(hw) container_of(hw, struct clk_sam9260_slow, hw) | |
6852 | @@ -393,8 +387,11 @@ void __init of_at91sam9x5_clk_slow_setup(struct device_node *np, | |
6853 | static u8 clk_sam9260_slow_get_parent(struct clk_hw *hw) | |
6854 | { | |
6855 | struct clk_sam9260_slow *slowck = to_clk_sam9260_slow(hw); | |
6856 | + unsigned int status; | |
6857 | ||
6858 | - return !!(pmc_read(slowck->pmc, AT91_PMC_SR) & AT91_PMC_OSCSEL); | |
6859 | + regmap_read(slowck->regmap, AT91_PMC_SR, &status); | |
6860 | + | |
6861 | + return status & AT91_PMC_OSCSEL ? 1 : 0; | |
6862 | } | |
6863 | ||
6864 | static const struct clk_ops sam9260_slow_ops = { | |
6865 | @@ -402,7 +399,7 @@ static const struct clk_ops sam9260_slow_ops = { | |
6866 | }; | |
6867 | ||
6868 | static struct clk * __init | |
6869 | -at91_clk_register_sam9260_slow(struct at91_pmc *pmc, | |
6870 | +at91_clk_register_sam9260_slow(struct regmap *regmap, | |
6871 | const char *name, | |
6872 | const char **parent_names, | |
6873 | int num_parents) | |
6874 | @@ -411,7 +408,7 @@ at91_clk_register_sam9260_slow(struct at91_pmc *pmc, | |
6875 | struct clk *clk = NULL; | |
6876 | struct clk_init_data init; | |
6877 | ||
6878 | - if (!pmc || !name) | |
6879 | + if (!name) | |
6880 | return ERR_PTR(-EINVAL); | |
6881 | ||
6882 | if (!parent_names || !num_parents) | |
6883 | @@ -428,7 +425,7 @@ at91_clk_register_sam9260_slow(struct at91_pmc *pmc, | |
6884 | init.flags = 0; | |
6885 | ||
6886 | slowck->hw.init = &init; | |
6887 | - slowck->pmc = pmc; | |
6888 | + slowck->regmap = regmap; | |
6889 | ||
6890 | clk = clk_register(NULL, &slowck->hw); | |
6891 | if (IS_ERR(clk)) | |
6892 | @@ -439,29 +436,34 @@ at91_clk_register_sam9260_slow(struct at91_pmc *pmc, | |
6893 | return clk; | |
6894 | } | |
6895 | ||
6896 | -void __init of_at91sam9260_clk_slow_setup(struct device_node *np, | |
6897 | - struct at91_pmc *pmc) | |
6898 | +static void __init of_at91sam9260_clk_slow_setup(struct device_node *np) | |
6899 | { | |
6900 | struct clk *clk; | |
6901 | const char *parent_names[2]; | |
6902 | int num_parents; | |
6903 | const char *name = np->name; | |
6904 | + struct regmap *regmap; | |
6905 | ||
6906 | num_parents = of_clk_get_parent_count(np); | |
6907 | if (num_parents != 2) | |
6908 | return; | |
6909 | ||
6910 | of_clk_parent_fill(np, parent_names, num_parents); | |
6911 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
6912 | + if (IS_ERR(regmap)) | |
6913 | + return; | |
6914 | ||
6915 | of_property_read_string(np, "clock-output-names", &name); | |
6916 | ||
6917 | - clk = at91_clk_register_sam9260_slow(pmc, name, parent_names, | |
6918 | + clk = at91_clk_register_sam9260_slow(regmap, name, parent_names, | |
6919 | num_parents); | |
6920 | if (IS_ERR(clk)) | |
6921 | return; | |
6922 | ||
6923 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
6924 | } | |
6925 | +CLK_OF_DECLARE(at91sam9260_clk_slow, "atmel,at91sam9260-clk-slow", | |
6926 | + of_at91sam9260_clk_slow_setup); | |
6927 | ||
6928 | /* | |
6929 | * FIXME: All slow clk users are not properly claiming it (get + prepare + | |
6930 | diff --git a/drivers/clk/at91/clk-smd.c b/drivers/clk/at91/clk-smd.c | |
6931 | index a7f8501cfa05..e6948a52005a 100644 | |
6932 | --- a/drivers/clk/at91/clk-smd.c | |
6933 | +++ b/drivers/clk/at91/clk-smd.c | |
6934 | @@ -12,8 +12,8 @@ | |
6935 | #include <linux/clkdev.h> | |
6936 | #include <linux/clk/at91_pmc.h> | |
6937 | #include <linux/of.h> | |
6938 | -#include <linux/of_address.h> | |
6939 | -#include <linux/io.h> | |
6940 | +#include <linux/mfd/syscon.h> | |
6941 | +#include <linux/regmap.h> | |
6942 | ||
6943 | #include "pmc.h" | |
6944 | ||
6945 | @@ -24,7 +24,7 @@ | |
6946 | ||
6947 | struct at91sam9x5_clk_smd { | |
6948 | struct clk_hw hw; | |
6949 | - struct at91_pmc *pmc; | |
6950 | + struct regmap *regmap; | |
6951 | }; | |
6952 | ||
6953 | #define to_at91sam9x5_clk_smd(hw) \ | |
6954 | @@ -33,13 +33,13 @@ struct at91sam9x5_clk_smd { | |
6955 | static unsigned long at91sam9x5_clk_smd_recalc_rate(struct clk_hw *hw, | |
6956 | unsigned long parent_rate) | |
6957 | { | |
6958 | - u32 tmp; | |
6959 | - u8 smddiv; | |
6960 | struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw); | |
6961 | - struct at91_pmc *pmc = smd->pmc; | |
6962 | + unsigned int smdr; | |
6963 | + u8 smddiv; | |
6964 | + | |
6965 | + regmap_read(smd->regmap, AT91_PMC_SMD, &smdr); | |
6966 | + smddiv = (smdr & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT; | |
6967 | ||
6968 | - tmp = pmc_read(pmc, AT91_PMC_SMD); | |
6969 | - smddiv = (tmp & AT91_PMC_SMD_DIV) >> SMD_DIV_SHIFT; | |
6970 | return parent_rate / (smddiv + 1); | |
6971 | } | |
6972 | ||
6973 | @@ -67,40 +67,38 @@ static long at91sam9x5_clk_smd_round_rate(struct clk_hw *hw, unsigned long rate, | |
6974 | ||
6975 | static int at91sam9x5_clk_smd_set_parent(struct clk_hw *hw, u8 index) | |
6976 | { | |
6977 | - u32 tmp; | |
6978 | struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw); | |
6979 | - struct at91_pmc *pmc = smd->pmc; | |
6980 | ||
6981 | if (index > 1) | |
6982 | return -EINVAL; | |
6983 | - tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMDS; | |
6984 | - if (index) | |
6985 | - tmp |= AT91_PMC_SMDS; | |
6986 | - pmc_write(pmc, AT91_PMC_SMD, tmp); | |
6987 | + | |
6988 | + regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMDS, | |
6989 | + index ? AT91_PMC_SMDS : 0); | |
6990 | + | |
6991 | return 0; | |
6992 | } | |
6993 | ||
6994 | static u8 at91sam9x5_clk_smd_get_parent(struct clk_hw *hw) | |
6995 | { | |
6996 | struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw); | |
6997 | - struct at91_pmc *pmc = smd->pmc; | |
6998 | + unsigned int smdr; | |
6999 | ||
7000 | - return pmc_read(pmc, AT91_PMC_SMD) & AT91_PMC_SMDS; | |
7001 | + regmap_read(smd->regmap, AT91_PMC_SMD, &smdr); | |
7002 | + | |
7003 | + return smdr & AT91_PMC_SMDS; | |
7004 | } | |
7005 | ||
7006 | static int at91sam9x5_clk_smd_set_rate(struct clk_hw *hw, unsigned long rate, | |
7007 | unsigned long parent_rate) | |
7008 | { | |
7009 | - u32 tmp; | |
7010 | struct at91sam9x5_clk_smd *smd = to_at91sam9x5_clk_smd(hw); | |
7011 | - struct at91_pmc *pmc = smd->pmc; | |
7012 | unsigned long div = parent_rate / rate; | |
7013 | ||
7014 | if (parent_rate % rate || div < 1 || div > (SMD_MAX_DIV + 1)) | |
7015 | return -EINVAL; | |
7016 | - tmp = pmc_read(pmc, AT91_PMC_SMD) & ~AT91_PMC_SMD_DIV; | |
7017 | - tmp |= (div - 1) << SMD_DIV_SHIFT; | |
7018 | - pmc_write(pmc, AT91_PMC_SMD, tmp); | |
7019 | + | |
7020 | + regmap_update_bits(smd->regmap, AT91_PMC_SMD, AT91_PMC_SMD_DIV, | |
7021 | + (div - 1) << SMD_DIV_SHIFT); | |
7022 | ||
7023 | return 0; | |
7024 | } | |
7025 | @@ -114,7 +112,7 @@ static const struct clk_ops at91sam9x5_smd_ops = { | |
7026 | }; | |
7027 | ||
7028 | static struct clk * __init | |
7029 | -at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name, | |
7030 | +at91sam9x5_clk_register_smd(struct regmap *regmap, const char *name, | |
7031 | const char **parent_names, u8 num_parents) | |
7032 | { | |
7033 | struct at91sam9x5_clk_smd *smd; | |
7034 | @@ -132,7 +130,7 @@ at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name, | |
7035 | init.flags = CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE; | |
7036 | ||
7037 | smd->hw.init = &init; | |
7038 | - smd->pmc = pmc; | |
7039 | + smd->regmap = regmap; | |
7040 | ||
7041 | clk = clk_register(NULL, &smd->hw); | |
7042 | if (IS_ERR(clk)) | |
7043 | @@ -141,13 +139,13 @@ at91sam9x5_clk_register_smd(struct at91_pmc *pmc, const char *name, | |
7044 | return clk; | |
7045 | } | |
7046 | ||
7047 | -void __init of_at91sam9x5_clk_smd_setup(struct device_node *np, | |
7048 | - struct at91_pmc *pmc) | |
7049 | +static void __init of_at91sam9x5_clk_smd_setup(struct device_node *np) | |
7050 | { | |
7051 | struct clk *clk; | |
7052 | int num_parents; | |
7053 | const char *parent_names[SMD_SOURCE_MAX]; | |
7054 | const char *name = np->name; | |
7055 | + struct regmap *regmap; | |
7056 | ||
7057 | num_parents = of_clk_get_parent_count(np); | |
7058 | if (num_parents <= 0 || num_parents > SMD_SOURCE_MAX) | |
7059 | @@ -157,10 +155,16 @@ void __init of_at91sam9x5_clk_smd_setup(struct device_node *np, | |
7060 | ||
7061 | of_property_read_string(np, "clock-output-names", &name); | |
7062 | ||
7063 | - clk = at91sam9x5_clk_register_smd(pmc, name, parent_names, | |
7064 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
7065 | + if (IS_ERR(regmap)) | |
7066 | + return; | |
7067 | + | |
7068 | + clk = at91sam9x5_clk_register_smd(regmap, name, parent_names, | |
7069 | num_parents); | |
7070 | if (IS_ERR(clk)) | |
7071 | return; | |
7072 | ||
7073 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
7074 | } | |
7075 | +CLK_OF_DECLARE(at91sam9x5_clk_smd, "atmel,at91sam9x5-clk-smd", | |
7076 | + of_at91sam9x5_clk_smd_setup); | |
7077 | diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c | |
7078 | index 3f5314344286..8f35d8172909 100644 | |
7079 | --- a/drivers/clk/at91/clk-system.c | |
7080 | +++ b/drivers/clk/at91/clk-system.c | |
7081 | @@ -12,13 +12,8 @@ | |
7082 | #include <linux/clkdev.h> | |
7083 | #include <linux/clk/at91_pmc.h> | |
7084 | #include <linux/of.h> | |
7085 | -#include <linux/of_address.h> | |
7086 | -#include <linux/io.h> | |
7087 | -#include <linux/irq.h> | |
7088 | -#include <linux/of_irq.h> | |
7089 | -#include <linux/interrupt.h> | |
7090 | -#include <linux/wait.h> | |
7091 | -#include <linux/sched.h> | |
7092 | +#include <linux/mfd/syscon.h> | |
7093 | +#include <linux/regmap.h> | |
7094 | ||
7095 | #include "pmc.h" | |
7096 | ||
7097 | @@ -29,9 +24,7 @@ | |
7098 | #define to_clk_system(hw) container_of(hw, struct clk_system, hw) | |
7099 | struct clk_system { | |
7100 | struct clk_hw hw; | |
7101 | - struct at91_pmc *pmc; | |
7102 | - unsigned int irq; | |
7103 | - wait_queue_head_t wait; | |
7104 | + struct regmap *regmap; | |
7105 | u8 id; | |
7106 | }; | |
7107 | ||
7108 | @@ -39,58 +32,54 @@ static inline int is_pck(int id) | |
7109 | { | |
7110 | return (id >= 8) && (id <= 15); | |
7111 | } | |
7112 | -static irqreturn_t clk_system_irq_handler(int irq, void *dev_id) | |
7113 | + | |
7114 | +static inline bool clk_system_ready(struct regmap *regmap, int id) | |
7115 | { | |
7116 | - struct clk_system *sys = (struct clk_system *)dev_id; | |
7117 | + unsigned int status; | |
7118 | ||
7119 | - wake_up(&sys->wait); | |
7120 | - disable_irq_nosync(sys->irq); | |
7121 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
7122 | ||
7123 | - return IRQ_HANDLED; | |
7124 | + return status & (1 << id) ? 1 : 0; | |
7125 | } | |
7126 | ||
7127 | static int clk_system_prepare(struct clk_hw *hw) | |
7128 | { | |
7129 | struct clk_system *sys = to_clk_system(hw); | |
7130 | - struct at91_pmc *pmc = sys->pmc; | |
7131 | - u32 mask = 1 << sys->id; | |
7132 | ||
7133 | - pmc_write(pmc, AT91_PMC_SCER, mask); | |
7134 | + regmap_write(sys->regmap, AT91_PMC_SCER, 1 << sys->id); | |
7135 | ||
7136 | if (!is_pck(sys->id)) | |
7137 | return 0; | |
7138 | ||
7139 | - while (!(pmc_read(pmc, AT91_PMC_SR) & mask)) { | |
7140 | - if (sys->irq) { | |
7141 | - enable_irq(sys->irq); | |
7142 | - wait_event(sys->wait, | |
7143 | - pmc_read(pmc, AT91_PMC_SR) & mask); | |
7144 | - } else | |
7145 | - cpu_relax(); | |
7146 | - } | |
7147 | + while (!clk_system_ready(sys->regmap, sys->id)) | |
7148 | + cpu_relax(); | |
7149 | + | |
7150 | return 0; | |
7151 | } | |
7152 | ||
7153 | static void clk_system_unprepare(struct clk_hw *hw) | |
7154 | { | |
7155 | struct clk_system *sys = to_clk_system(hw); | |
7156 | - struct at91_pmc *pmc = sys->pmc; | |
7157 | ||
7158 | - pmc_write(pmc, AT91_PMC_SCDR, 1 << sys->id); | |
7159 | + regmap_write(sys->regmap, AT91_PMC_SCDR, 1 << sys->id); | |
7160 | } | |
7161 | ||
7162 | static int clk_system_is_prepared(struct clk_hw *hw) | |
7163 | { | |
7164 | struct clk_system *sys = to_clk_system(hw); | |
7165 | - struct at91_pmc *pmc = sys->pmc; | |
7166 | + unsigned int status; | |
7167 | + | |
7168 | + regmap_read(sys->regmap, AT91_PMC_SCSR, &status); | |
7169 | ||
7170 | - if (!(pmc_read(pmc, AT91_PMC_SCSR) & (1 << sys->id))) | |
7171 | + if (!(status & (1 << sys->id))) | |
7172 | return 0; | |
7173 | ||
7174 | if (!is_pck(sys->id)) | |
7175 | return 1; | |
7176 | ||
7177 | - return !!(pmc_read(pmc, AT91_PMC_SR) & (1 << sys->id)); | |
7178 | + regmap_read(sys->regmap, AT91_PMC_SR, &status); | |
7179 | + | |
7180 | + return status & (1 << sys->id) ? 1 : 0; | |
7181 | } | |
7182 | ||
7183 | static const struct clk_ops system_ops = { | |
7184 | @@ -100,13 +89,12 @@ static const struct clk_ops system_ops = { | |
7185 | }; | |
7186 | ||
7187 | static struct clk * __init | |
7188 | -at91_clk_register_system(struct at91_pmc *pmc, const char *name, | |
7189 | - const char *parent_name, u8 id, int irq) | |
7190 | +at91_clk_register_system(struct regmap *regmap, const char *name, | |
7191 | + const char *parent_name, u8 id) | |
7192 | { | |
7193 | struct clk_system *sys; | |
7194 | struct clk *clk = NULL; | |
7195 | struct clk_init_data init; | |
7196 | - int ret; | |
7197 | ||
7198 | if (!parent_name || id > SYSTEM_MAX_ID) | |
7199 | return ERR_PTR(-EINVAL); | |
7200 | @@ -123,44 +111,33 @@ at91_clk_register_system(struct at91_pmc *pmc, const char *name, | |
7201 | ||
7202 | sys->id = id; | |
7203 | sys->hw.init = &init; | |
7204 | - sys->pmc = pmc; | |
7205 | - sys->irq = irq; | |
7206 | - if (irq) { | |
7207 | - init_waitqueue_head(&sys->wait); | |
7208 | - irq_set_status_flags(sys->irq, IRQ_NOAUTOEN); | |
7209 | - ret = request_irq(sys->irq, clk_system_irq_handler, | |
7210 | - IRQF_TRIGGER_HIGH, name, sys); | |
7211 | - if (ret) { | |
7212 | - kfree(sys); | |
7213 | - return ERR_PTR(ret); | |
7214 | - } | |
7215 | - } | |
7216 | + sys->regmap = regmap; | |
7217 | ||
7218 | clk = clk_register(NULL, &sys->hw); | |
7219 | - if (IS_ERR(clk)) { | |
7220 | - if (irq) | |
7221 | - free_irq(sys->irq, sys); | |
7222 | + if (IS_ERR(clk)) | |
7223 | kfree(sys); | |
7224 | - } | |
7225 | ||
7226 | return clk; | |
7227 | } | |
7228 | ||
7229 | -static void __init | |
7230 | -of_at91_clk_sys_setup(struct device_node *np, struct at91_pmc *pmc) | |
7231 | +static void __init of_at91rm9200_clk_sys_setup(struct device_node *np) | |
7232 | { | |
7233 | int num; | |
7234 | - int irq = 0; | |
7235 | u32 id; | |
7236 | struct clk *clk; | |
7237 | const char *name; | |
7238 | struct device_node *sysclknp; | |
7239 | const char *parent_name; | |
7240 | + struct regmap *regmap; | |
7241 | ||
7242 | num = of_get_child_count(np); | |
7243 | if (num > (SYSTEM_MAX_ID + 1)) | |
7244 | return; | |
7245 | ||
7246 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
7247 | + if (IS_ERR(regmap)) | |
7248 | + return; | |
7249 | + | |
7250 | for_each_child_of_node(np, sysclknp) { | |
7251 | if (of_property_read_u32(sysclknp, "reg", &id)) | |
7252 | continue; | |
7253 | @@ -168,21 +145,14 @@ of_at91_clk_sys_setup(struct device_node *np, struct at91_pmc *pmc) | |
7254 | if (of_property_read_string(np, "clock-output-names", &name)) | |
7255 | name = sysclknp->name; | |
7256 | ||
7257 | - if (is_pck(id)) | |
7258 | - irq = irq_of_parse_and_map(sysclknp, 0); | |
7259 | - | |
7260 | parent_name = of_clk_get_parent_name(sysclknp, 0); | |
7261 | ||
7262 | - clk = at91_clk_register_system(pmc, name, parent_name, id, irq); | |
7263 | + clk = at91_clk_register_system(regmap, name, parent_name, id); | |
7264 | if (IS_ERR(clk)) | |
7265 | continue; | |
7266 | ||
7267 | of_clk_add_provider(sysclknp, of_clk_src_simple_get, clk); | |
7268 | } | |
7269 | } | |
7270 | - | |
7271 | -void __init of_at91rm9200_clk_sys_setup(struct device_node *np, | |
7272 | - struct at91_pmc *pmc) | |
7273 | -{ | |
7274 | - of_at91_clk_sys_setup(np, pmc); | |
7275 | -} | |
7276 | +CLK_OF_DECLARE(at91rm9200_clk_sys, "atmel,at91rm9200-clk-system", | |
7277 | + of_at91rm9200_clk_sys_setup); | |
7278 | diff --git a/drivers/clk/at91/clk-usb.c b/drivers/clk/at91/clk-usb.c | |
7279 | index 8ab8502778a2..650ca45892c0 100644 | |
7280 | --- a/drivers/clk/at91/clk-usb.c | |
7281 | +++ b/drivers/clk/at91/clk-usb.c | |
7282 | @@ -12,8 +12,8 @@ | |
7283 | #include <linux/clkdev.h> | |
7284 | #include <linux/clk/at91_pmc.h> | |
7285 | #include <linux/of.h> | |
7286 | -#include <linux/of_address.h> | |
7287 | -#include <linux/io.h> | |
7288 | +#include <linux/mfd/syscon.h> | |
7289 | +#include <linux/regmap.h> | |
7290 | ||
7291 | #include "pmc.h" | |
7292 | ||
7293 | @@ -27,7 +27,7 @@ | |
7294 | ||
7295 | struct at91sam9x5_clk_usb { | |
7296 | struct clk_hw hw; | |
7297 | - struct at91_pmc *pmc; | |
7298 | + struct regmap *regmap; | |
7299 | }; | |
7300 | ||
7301 | #define to_at91sam9x5_clk_usb(hw) \ | |
7302 | @@ -35,7 +35,7 @@ struct at91sam9x5_clk_usb { | |
7303 | ||
7304 | struct at91rm9200_clk_usb { | |
7305 | struct clk_hw hw; | |
7306 | - struct at91_pmc *pmc; | |
7307 | + struct regmap *regmap; | |
7308 | u32 divisors[4]; | |
7309 | }; | |
7310 | ||
7311 | @@ -45,13 +45,12 @@ struct at91rm9200_clk_usb { | |
7312 | static unsigned long at91sam9x5_clk_usb_recalc_rate(struct clk_hw *hw, | |
7313 | unsigned long parent_rate) | |
7314 | { | |
7315 | - u32 tmp; | |
7316 | - u8 usbdiv; | |
7317 | struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw); | |
7318 | - struct at91_pmc *pmc = usb->pmc; | |
7319 | + unsigned int usbr; | |
7320 | + u8 usbdiv; | |
7321 | ||
7322 | - tmp = pmc_read(pmc, AT91_PMC_USB); | |
7323 | - usbdiv = (tmp & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT; | |
7324 | + regmap_read(usb->regmap, AT91_PMC_USB, &usbr); | |
7325 | + usbdiv = (usbr & AT91_PMC_OHCIUSBDIV) >> SAM9X5_USB_DIV_SHIFT; | |
7326 | ||
7327 | return DIV_ROUND_CLOSEST(parent_rate, (usbdiv + 1)); | |
7328 | } | |
7329 | @@ -109,33 +108,31 @@ static int at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw, | |
7330 | ||
7331 | static int at91sam9x5_clk_usb_set_parent(struct clk_hw *hw, u8 index) | |
7332 | { | |
7333 | - u32 tmp; | |
7334 | struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw); | |
7335 | - struct at91_pmc *pmc = usb->pmc; | |
7336 | ||
7337 | if (index > 1) | |
7338 | return -EINVAL; | |
7339 | - tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS; | |
7340 | - if (index) | |
7341 | - tmp |= AT91_PMC_USBS; | |
7342 | - pmc_write(pmc, AT91_PMC_USB, tmp); | |
7343 | + | |
7344 | + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS, | |
7345 | + index ? AT91_PMC_USBS : 0); | |
7346 | + | |
7347 | return 0; | |
7348 | } | |
7349 | ||
7350 | static u8 at91sam9x5_clk_usb_get_parent(struct clk_hw *hw) | |
7351 | { | |
7352 | struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw); | |
7353 | - struct at91_pmc *pmc = usb->pmc; | |
7354 | + unsigned int usbr; | |
7355 | ||
7356 | - return pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS; | |
7357 | + regmap_read(usb->regmap, AT91_PMC_USB, &usbr); | |
7358 | + | |
7359 | + return usbr & AT91_PMC_USBS; | |
7360 | } | |
7361 | ||
7362 | static int at91sam9x5_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate, | |
7363 | unsigned long parent_rate) | |
7364 | { | |
7365 | - u32 tmp; | |
7366 | struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw); | |
7367 | - struct at91_pmc *pmc = usb->pmc; | |
7368 | unsigned long div; | |
7369 | ||
7370 | if (!rate) | |
7371 | @@ -145,9 +142,8 @@ static int at91sam9x5_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate, | |
7372 | if (div > SAM9X5_USB_MAX_DIV + 1 || !div) | |
7373 | return -EINVAL; | |
7374 | ||
7375 | - tmp = pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_OHCIUSBDIV; | |
7376 | - tmp |= (div - 1) << SAM9X5_USB_DIV_SHIFT; | |
7377 | - pmc_write(pmc, AT91_PMC_USB, tmp); | |
7378 | + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_OHCIUSBDIV, | |
7379 | + (div - 1) << SAM9X5_USB_DIV_SHIFT); | |
7380 | ||
7381 | return 0; | |
7382 | } | |
7383 | @@ -163,28 +159,28 @@ static const struct clk_ops at91sam9x5_usb_ops = { | |
7384 | static int at91sam9n12_clk_usb_enable(struct clk_hw *hw) | |
7385 | { | |
7386 | struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw); | |
7387 | - struct at91_pmc *pmc = usb->pmc; | |
7388 | ||
7389 | - pmc_write(pmc, AT91_PMC_USB, | |
7390 | - pmc_read(pmc, AT91_PMC_USB) | AT91_PMC_USBS); | |
7391 | + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS, | |
7392 | + AT91_PMC_USBS); | |
7393 | + | |
7394 | return 0; | |
7395 | } | |
7396 | ||
7397 | static void at91sam9n12_clk_usb_disable(struct clk_hw *hw) | |
7398 | { | |
7399 | struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw); | |
7400 | - struct at91_pmc *pmc = usb->pmc; | |
7401 | ||
7402 | - pmc_write(pmc, AT91_PMC_USB, | |
7403 | - pmc_read(pmc, AT91_PMC_USB) & ~AT91_PMC_USBS); | |
7404 | + regmap_update_bits(usb->regmap, AT91_PMC_USB, AT91_PMC_USBS, 0); | |
7405 | } | |
7406 | ||
7407 | static int at91sam9n12_clk_usb_is_enabled(struct clk_hw *hw) | |
7408 | { | |
7409 | struct at91sam9x5_clk_usb *usb = to_at91sam9x5_clk_usb(hw); | |
7410 | - struct at91_pmc *pmc = usb->pmc; | |
7411 | + unsigned int usbr; | |
7412 | ||
7413 | - return !!(pmc_read(pmc, AT91_PMC_USB) & AT91_PMC_USBS); | |
7414 | + regmap_read(usb->regmap, AT91_PMC_USB, &usbr); | |
7415 | + | |
7416 | + return usbr & AT91_PMC_USBS; | |
7417 | } | |
7418 | ||
7419 | static const struct clk_ops at91sam9n12_usb_ops = { | |
7420 | @@ -197,7 +193,7 @@ static const struct clk_ops at91sam9n12_usb_ops = { | |
7421 | }; | |
7422 | ||
7423 | static struct clk * __init | |
7424 | -at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7425 | +at91sam9x5_clk_register_usb(struct regmap *regmap, const char *name, | |
7426 | const char **parent_names, u8 num_parents) | |
7427 | { | |
7428 | struct at91sam9x5_clk_usb *usb; | |
7429 | @@ -216,7 +212,7 @@ at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7430 | CLK_SET_RATE_PARENT; | |
7431 | ||
7432 | usb->hw.init = &init; | |
7433 | - usb->pmc = pmc; | |
7434 | + usb->regmap = regmap; | |
7435 | ||
7436 | clk = clk_register(NULL, &usb->hw); | |
7437 | if (IS_ERR(clk)) | |
7438 | @@ -226,7 +222,7 @@ at91sam9x5_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7439 | } | |
7440 | ||
7441 | static struct clk * __init | |
7442 | -at91sam9n12_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7443 | +at91sam9n12_clk_register_usb(struct regmap *regmap, const char *name, | |
7444 | const char *parent_name) | |
7445 | { | |
7446 | struct at91sam9x5_clk_usb *usb; | |
7447 | @@ -244,7 +240,7 @@ at91sam9n12_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7448 | init.flags = CLK_SET_RATE_GATE | CLK_SET_RATE_PARENT; | |
7449 | ||
7450 | usb->hw.init = &init; | |
7451 | - usb->pmc = pmc; | |
7452 | + usb->regmap = regmap; | |
7453 | ||
7454 | clk = clk_register(NULL, &usb->hw); | |
7455 | if (IS_ERR(clk)) | |
7456 | @@ -257,12 +253,12 @@ static unsigned long at91rm9200_clk_usb_recalc_rate(struct clk_hw *hw, | |
7457 | unsigned long parent_rate) | |
7458 | { | |
7459 | struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw); | |
7460 | - struct at91_pmc *pmc = usb->pmc; | |
7461 | - u32 tmp; | |
7462 | + unsigned int pllbr; | |
7463 | u8 usbdiv; | |
7464 | ||
7465 | - tmp = pmc_read(pmc, AT91_CKGR_PLLBR); | |
7466 | - usbdiv = (tmp & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT; | |
7467 | + regmap_read(usb->regmap, AT91_CKGR_PLLBR, &pllbr); | |
7468 | + | |
7469 | + usbdiv = (pllbr & AT91_PMC_USBDIV) >> RM9200_USB_DIV_SHIFT; | |
7470 | if (usb->divisors[usbdiv]) | |
7471 | return parent_rate / usb->divisors[usbdiv]; | |
7472 | ||
7473 | @@ -310,10 +306,8 @@ static long at91rm9200_clk_usb_round_rate(struct clk_hw *hw, unsigned long rate, | |
7474 | static int at91rm9200_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate, | |
7475 | unsigned long parent_rate) | |
7476 | { | |
7477 | - u32 tmp; | |
7478 | int i; | |
7479 | struct at91rm9200_clk_usb *usb = to_at91rm9200_clk_usb(hw); | |
7480 | - struct at91_pmc *pmc = usb->pmc; | |
7481 | unsigned long div; | |
7482 | ||
7483 | if (!rate) | |
7484 | @@ -323,10 +317,10 @@ static int at91rm9200_clk_usb_set_rate(struct clk_hw *hw, unsigned long rate, | |
7485 | ||
7486 | for (i = 0; i < RM9200_USB_DIV_TAB_SIZE; i++) { | |
7487 | if (usb->divisors[i] == div) { | |
7488 | - tmp = pmc_read(pmc, AT91_CKGR_PLLBR) & | |
7489 | - ~AT91_PMC_USBDIV; | |
7490 | - tmp |= i << RM9200_USB_DIV_SHIFT; | |
7491 | - pmc_write(pmc, AT91_CKGR_PLLBR, tmp); | |
7492 | + regmap_update_bits(usb->regmap, AT91_CKGR_PLLBR, | |
7493 | + AT91_PMC_USBDIV, | |
7494 | + i << RM9200_USB_DIV_SHIFT); | |
7495 | + | |
7496 | return 0; | |
7497 | } | |
7498 | } | |
7499 | @@ -341,7 +335,7 @@ static const struct clk_ops at91rm9200_usb_ops = { | |
7500 | }; | |
7501 | ||
7502 | static struct clk * __init | |
7503 | -at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7504 | +at91rm9200_clk_register_usb(struct regmap *regmap, const char *name, | |
7505 | const char *parent_name, const u32 *divisors) | |
7506 | { | |
7507 | struct at91rm9200_clk_usb *usb; | |
7508 | @@ -359,7 +353,7 @@ at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7509 | init.flags = CLK_SET_RATE_PARENT; | |
7510 | ||
7511 | usb->hw.init = &init; | |
7512 | - usb->pmc = pmc; | |
7513 | + usb->regmap = regmap; | |
7514 | memcpy(usb->divisors, divisors, sizeof(usb->divisors)); | |
7515 | ||
7516 | clk = clk_register(NULL, &usb->hw); | |
7517 | @@ -369,13 +363,13 @@ at91rm9200_clk_register_usb(struct at91_pmc *pmc, const char *name, | |
7518 | return clk; | |
7519 | } | |
7520 | ||
7521 | -void __init of_at91sam9x5_clk_usb_setup(struct device_node *np, | |
7522 | - struct at91_pmc *pmc) | |
7523 | +static void __init of_at91sam9x5_clk_usb_setup(struct device_node *np) | |
7524 | { | |
7525 | struct clk *clk; | |
7526 | int num_parents; | |
7527 | const char *parent_names[USB_SOURCE_MAX]; | |
7528 | const char *name = np->name; | |
7529 | + struct regmap *regmap; | |
7530 | ||
7531 | num_parents = of_clk_get_parent_count(np); | |
7532 | if (num_parents <= 0 || num_parents > USB_SOURCE_MAX) | |
7533 | @@ -385,19 +379,26 @@ void __init of_at91sam9x5_clk_usb_setup(struct device_node *np, | |
7534 | ||
7535 | of_property_read_string(np, "clock-output-names", &name); | |
7536 | ||
7537 | - clk = at91sam9x5_clk_register_usb(pmc, name, parent_names, num_parents); | |
7538 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
7539 | + if (IS_ERR(regmap)) | |
7540 | + return; | |
7541 | + | |
7542 | + clk = at91sam9x5_clk_register_usb(regmap, name, parent_names, | |
7543 | + num_parents); | |
7544 | if (IS_ERR(clk)) | |
7545 | return; | |
7546 | ||
7547 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
7548 | } | |
7549 | +CLK_OF_DECLARE(at91sam9x5_clk_usb, "atmel,at91sam9x5-clk-usb", | |
7550 | + of_at91sam9x5_clk_usb_setup); | |
7551 | ||
7552 | -void __init of_at91sam9n12_clk_usb_setup(struct device_node *np, | |
7553 | - struct at91_pmc *pmc) | |
7554 | +static void __init of_at91sam9n12_clk_usb_setup(struct device_node *np) | |
7555 | { | |
7556 | struct clk *clk; | |
7557 | const char *parent_name; | |
7558 | const char *name = np->name; | |
7559 | + struct regmap *regmap; | |
7560 | ||
7561 | parent_name = of_clk_get_parent_name(np, 0); | |
7562 | if (!parent_name) | |
7563 | @@ -405,20 +406,26 @@ void __init of_at91sam9n12_clk_usb_setup(struct device_node *np, | |
7564 | ||
7565 | of_property_read_string(np, "clock-output-names", &name); | |
7566 | ||
7567 | - clk = at91sam9n12_clk_register_usb(pmc, name, parent_name); | |
7568 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
7569 | + if (IS_ERR(regmap)) | |
7570 | + return; | |
7571 | + | |
7572 | + clk = at91sam9n12_clk_register_usb(regmap, name, parent_name); | |
7573 | if (IS_ERR(clk)) | |
7574 | return; | |
7575 | ||
7576 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
7577 | } | |
7578 | +CLK_OF_DECLARE(at91sam9n12_clk_usb, "atmel,at91sam9n12-clk-usb", | |
7579 | + of_at91sam9n12_clk_usb_setup); | |
7580 | ||
7581 | -void __init of_at91rm9200_clk_usb_setup(struct device_node *np, | |
7582 | - struct at91_pmc *pmc) | |
7583 | +static void __init of_at91rm9200_clk_usb_setup(struct device_node *np) | |
7584 | { | |
7585 | struct clk *clk; | |
7586 | const char *parent_name; | |
7587 | const char *name = np->name; | |
7588 | u32 divisors[4] = {0, 0, 0, 0}; | |
7589 | + struct regmap *regmap; | |
7590 | ||
7591 | parent_name = of_clk_get_parent_name(np, 0); | |
7592 | if (!parent_name) | |
7593 | @@ -430,9 +437,15 @@ void __init of_at91rm9200_clk_usb_setup(struct device_node *np, | |
7594 | ||
7595 | of_property_read_string(np, "clock-output-names", &name); | |
7596 | ||
7597 | - clk = at91rm9200_clk_register_usb(pmc, name, parent_name, divisors); | |
7598 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
7599 | + if (IS_ERR(regmap)) | |
7600 | + return; | |
7601 | + | |
7602 | + clk = at91rm9200_clk_register_usb(regmap, name, parent_name, divisors); | |
7603 | if (IS_ERR(clk)) | |
7604 | return; | |
7605 | ||
7606 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
7607 | } | |
7608 | +CLK_OF_DECLARE(at91rm9200_clk_usb, "atmel,at91rm9200-clk-usb", | |
7609 | + of_at91rm9200_clk_usb_setup); | |
7610 | diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c | |
7611 | index ca561e90a60f..61fcf399e58c 100644 | |
7612 | --- a/drivers/clk/at91/clk-utmi.c | |
7613 | +++ b/drivers/clk/at91/clk-utmi.c | |
7614 | @@ -11,14 +11,9 @@ | |
7615 | #include <linux/clk-provider.h> | |
7616 | #include <linux/clkdev.h> | |
7617 | #include <linux/clk/at91_pmc.h> | |
7618 | -#include <linux/interrupt.h> | |
7619 | -#include <linux/irq.h> | |
7620 | #include <linux/of.h> | |
7621 | -#include <linux/of_address.h> | |
7622 | -#include <linux/of_irq.h> | |
7623 | -#include <linux/io.h> | |
7624 | -#include <linux/sched.h> | |
7625 | -#include <linux/wait.h> | |
7626 | +#include <linux/mfd/syscon.h> | |
7627 | +#include <linux/regmap.h> | |
7628 | ||
7629 | #include "pmc.h" | |
7630 | ||
7631 | @@ -26,37 +21,30 @@ | |
7632 | ||
7633 | struct clk_utmi { | |
7634 | struct clk_hw hw; | |
7635 | - struct at91_pmc *pmc; | |
7636 | - unsigned int irq; | |
7637 | - wait_queue_head_t wait; | |
7638 | + struct regmap *regmap; | |
7639 | }; | |
7640 | ||
7641 | #define to_clk_utmi(hw) container_of(hw, struct clk_utmi, hw) | |
7642 | ||
7643 | -static irqreturn_t clk_utmi_irq_handler(int irq, void *dev_id) | |
7644 | +static inline bool clk_utmi_ready(struct regmap *regmap) | |
7645 | { | |
7646 | - struct clk_utmi *utmi = (struct clk_utmi *)dev_id; | |
7647 | + unsigned int status; | |
7648 | ||
7649 | - wake_up(&utmi->wait); | |
7650 | - disable_irq_nosync(utmi->irq); | |
7651 | + regmap_read(regmap, AT91_PMC_SR, &status); | |
7652 | ||
7653 | - return IRQ_HANDLED; | |
7654 | + return status & AT91_PMC_LOCKU; | |
7655 | } | |
7656 | ||
7657 | static int clk_utmi_prepare(struct clk_hw *hw) | |
7658 | { | |
7659 | struct clk_utmi *utmi = to_clk_utmi(hw); | |
7660 | - struct at91_pmc *pmc = utmi->pmc; | |
7661 | - u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) | AT91_PMC_UPLLEN | | |
7662 | - AT91_PMC_UPLLCOUNT | AT91_PMC_BIASEN; | |
7663 | + unsigned int uckr = AT91_PMC_UPLLEN | AT91_PMC_UPLLCOUNT | | |
7664 | + AT91_PMC_BIASEN; | |
7665 | ||
7666 | - pmc_write(pmc, AT91_CKGR_UCKR, tmp); | |
7667 | + regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, uckr, uckr); | |
7668 | ||
7669 | - while (!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU)) { | |
7670 | - enable_irq(utmi->irq); | |
7671 | - wait_event(utmi->wait, | |
7672 | - pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU); | |
7673 | - } | |
7674 | + while (!clk_utmi_ready(utmi->regmap)) | |
7675 | + cpu_relax(); | |
7676 | ||
7677 | return 0; | |
7678 | } | |
7679 | @@ -64,18 +52,15 @@ static int clk_utmi_prepare(struct clk_hw *hw) | |
7680 | static int clk_utmi_is_prepared(struct clk_hw *hw) | |
7681 | { | |
7682 | struct clk_utmi *utmi = to_clk_utmi(hw); | |
7683 | - struct at91_pmc *pmc = utmi->pmc; | |
7684 | ||
7685 | - return !!(pmc_read(pmc, AT91_PMC_SR) & AT91_PMC_LOCKU); | |
7686 | + return clk_utmi_ready(utmi->regmap); | |
7687 | } | |
7688 | ||
7689 | static void clk_utmi_unprepare(struct clk_hw *hw) | |
7690 | { | |
7691 | struct clk_utmi *utmi = to_clk_utmi(hw); | |
7692 | - struct at91_pmc *pmc = utmi->pmc; | |
7693 | - u32 tmp = pmc_read(pmc, AT91_CKGR_UCKR) & ~AT91_PMC_UPLLEN; | |
7694 | ||
7695 | - pmc_write(pmc, AT91_CKGR_UCKR, tmp); | |
7696 | + regmap_update_bits(utmi->regmap, AT91_CKGR_UCKR, AT91_PMC_UPLLEN, 0); | |
7697 | } | |
7698 | ||
7699 | static unsigned long clk_utmi_recalc_rate(struct clk_hw *hw, | |
7700 | @@ -93,10 +78,9 @@ static const struct clk_ops utmi_ops = { | |
7701 | }; | |
7702 | ||
7703 | static struct clk * __init | |
7704 | -at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq, | |
7705 | +at91_clk_register_utmi(struct regmap *regmap, | |
7706 | const char *name, const char *parent_name) | |
7707 | { | |
7708 | - int ret; | |
7709 | struct clk_utmi *utmi; | |
7710 | struct clk *clk = NULL; | |
7711 | struct clk_init_data init; | |
7712 | @@ -112,52 +96,36 @@ at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq, | |
7713 | init.flags = CLK_SET_RATE_GATE; | |
7714 | ||
7715 | utmi->hw.init = &init; | |
7716 | - utmi->pmc = pmc; | |
7717 | - utmi->irq = irq; | |
7718 | - init_waitqueue_head(&utmi->wait); | |
7719 | - irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN); | |
7720 | - ret = request_irq(utmi->irq, clk_utmi_irq_handler, | |
7721 | - IRQF_TRIGGER_HIGH, "clk-utmi", utmi); | |
7722 | - if (ret) { | |
7723 | - kfree(utmi); | |
7724 | - return ERR_PTR(ret); | |
7725 | - } | |
7726 | + utmi->regmap = regmap; | |
7727 | ||
7728 | clk = clk_register(NULL, &utmi->hw); | |
7729 | - if (IS_ERR(clk)) { | |
7730 | - free_irq(utmi->irq, utmi); | |
7731 | + if (IS_ERR(clk)) | |
7732 | kfree(utmi); | |
7733 | - } | |
7734 | ||
7735 | return clk; | |
7736 | } | |
7737 | ||
7738 | -static void __init | |
7739 | -of_at91_clk_utmi_setup(struct device_node *np, struct at91_pmc *pmc) | |
7740 | +static void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np) | |
7741 | { | |
7742 | - unsigned int irq; | |
7743 | struct clk *clk; | |
7744 | const char *parent_name; | |
7745 | const char *name = np->name; | |
7746 | + struct regmap *regmap; | |
7747 | ||
7748 | parent_name = of_clk_get_parent_name(np, 0); | |
7749 | ||
7750 | of_property_read_string(np, "clock-output-names", &name); | |
7751 | ||
7752 | - irq = irq_of_parse_and_map(np, 0); | |
7753 | - if (!irq) | |
7754 | + regmap = syscon_node_to_regmap(of_get_parent(np)); | |
7755 | + if (IS_ERR(regmap)) | |
7756 | return; | |
7757 | ||
7758 | - clk = at91_clk_register_utmi(pmc, irq, name, parent_name); | |
7759 | + clk = at91_clk_register_utmi(regmap, name, parent_name); | |
7760 | if (IS_ERR(clk)) | |
7761 | return; | |
7762 | ||
7763 | of_clk_add_provider(np, of_clk_src_simple_get, clk); | |
7764 | return; | |
7765 | } | |
7766 | - | |
7767 | -void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np, | |
7768 | - struct at91_pmc *pmc) | |
7769 | -{ | |
7770 | - of_at91_clk_utmi_setup(np, pmc); | |
7771 | -} | |
7772 | +CLK_OF_DECLARE(at91sam9x5_clk_utmi, "atmel,at91sam9x5-clk-utmi", | |
7773 | + of_at91sam9x5_clk_utmi_setup); | |
7774 | diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c | |
7775 | index 8476b570779b..526df5ba042d 100644 | |
7776 | --- a/drivers/clk/at91/pmc.c | |
7777 | +++ b/drivers/clk/at91/pmc.c | |
7778 | @@ -12,36 +12,13 @@ | |
7779 | #include <linux/clkdev.h> | |
7780 | #include <linux/clk/at91_pmc.h> | |
7781 | #include <linux/of.h> | |
7782 | -#include <linux/of_address.h> | |
7783 | -#include <linux/io.h> | |
7784 | -#include <linux/interrupt.h> | |
7785 | -#include <linux/irq.h> | |
7786 | -#include <linux/irqchip/chained_irq.h> | |
7787 | -#include <linux/irqdomain.h> | |
7788 | -#include <linux/of_irq.h> | |
7789 | +#include <linux/mfd/syscon.h> | |
7790 | +#include <linux/regmap.h> | |
7791 | ||
7792 | #include <asm/proc-fns.h> | |
7793 | ||
7794 | #include "pmc.h" | |
7795 | ||
7796 | -void __iomem *at91_pmc_base; | |
7797 | -EXPORT_SYMBOL_GPL(at91_pmc_base); | |
7798 | - | |
7799 | -void at91rm9200_idle(void) | |
7800 | -{ | |
7801 | - /* | |
7802 | - * Disable the processor clock. The processor will be automatically | |
7803 | - * re-enabled by an interrupt or by a reset. | |
7804 | - */ | |
7805 | - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK); | |
7806 | -} | |
7807 | - | |
7808 | -void at91sam9_idle(void) | |
7809 | -{ | |
7810 | - at91_pmc_write(AT91_PMC_SCDR, AT91_PMC_PCK); | |
7811 | - cpu_do_idle(); | |
7812 | -} | |
7813 | - | |
7814 | int of_at91_get_clk_range(struct device_node *np, const char *propname, | |
7815 | struct clk_range *range) | |
7816 | { | |
7817 | @@ -64,402 +41,3 @@ int of_at91_get_clk_range(struct device_node *np, const char *propname, | |
7818 | return 0; | |
7819 | } | |
7820 | EXPORT_SYMBOL_GPL(of_at91_get_clk_range); | |
7821 | - | |
7822 | -static void pmc_irq_mask(struct irq_data *d) | |
7823 | -{ | |
7824 | - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d); | |
7825 | - | |
7826 | - pmc_write(pmc, AT91_PMC_IDR, 1 << d->hwirq); | |
7827 | -} | |
7828 | - | |
7829 | -static void pmc_irq_unmask(struct irq_data *d) | |
7830 | -{ | |
7831 | - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d); | |
7832 | - | |
7833 | - pmc_write(pmc, AT91_PMC_IER, 1 << d->hwirq); | |
7834 | -} | |
7835 | - | |
7836 | -static int pmc_irq_set_type(struct irq_data *d, unsigned type) | |
7837 | -{ | |
7838 | - if (type != IRQ_TYPE_LEVEL_HIGH) { | |
7839 | - pr_warn("PMC: type not supported (support only IRQ_TYPE_LEVEL_HIGH type)\n"); | |
7840 | - return -EINVAL; | |
7841 | - } | |
7842 | - | |
7843 | - return 0; | |
7844 | -} | |
7845 | - | |
7846 | -static void pmc_irq_suspend(struct irq_data *d) | |
7847 | -{ | |
7848 | - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d); | |
7849 | - | |
7850 | - pmc->imr = pmc_read(pmc, AT91_PMC_IMR); | |
7851 | - pmc_write(pmc, AT91_PMC_IDR, pmc->imr); | |
7852 | -} | |
7853 | - | |
7854 | -static void pmc_irq_resume(struct irq_data *d) | |
7855 | -{ | |
7856 | - struct at91_pmc *pmc = irq_data_get_irq_chip_data(d); | |
7857 | - | |
7858 | - pmc_write(pmc, AT91_PMC_IER, pmc->imr); | |
7859 | -} | |
7860 | - | |
7861 | -static struct irq_chip pmc_irq = { | |
7862 | - .name = "PMC", | |
7863 | - .irq_disable = pmc_irq_mask, | |
7864 | - .irq_mask = pmc_irq_mask, | |
7865 | - .irq_unmask = pmc_irq_unmask, | |
7866 | - .irq_set_type = pmc_irq_set_type, | |
7867 | - .irq_suspend = pmc_irq_suspend, | |
7868 | - .irq_resume = pmc_irq_resume, | |
7869 | -}; | |
7870 | - | |
7871 | -static struct lock_class_key pmc_lock_class; | |
7872 | - | |
7873 | -static int pmc_irq_map(struct irq_domain *h, unsigned int virq, | |
7874 | - irq_hw_number_t hw) | |
7875 | -{ | |
7876 | - struct at91_pmc *pmc = h->host_data; | |
7877 | - | |
7878 | - irq_set_lockdep_class(virq, &pmc_lock_class); | |
7879 | - | |
7880 | - irq_set_chip_and_handler(virq, &pmc_irq, | |
7881 | - handle_level_irq); | |
7882 | - irq_set_chip_data(virq, pmc); | |
7883 | - | |
7884 | - return 0; | |
7885 | -} | |
7886 | - | |
7887 | -static int pmc_irq_domain_xlate(struct irq_domain *d, | |
7888 | - struct device_node *ctrlr, | |
7889 | - const u32 *intspec, unsigned int intsize, | |
7890 | - irq_hw_number_t *out_hwirq, | |
7891 | - unsigned int *out_type) | |
7892 | -{ | |
7893 | - struct at91_pmc *pmc = d->host_data; | |
7894 | - const struct at91_pmc_caps *caps = pmc->caps; | |
7895 | - | |
7896 | - if (WARN_ON(intsize < 1)) | |
7897 | - return -EINVAL; | |
7898 | - | |
7899 | - *out_hwirq = intspec[0]; | |
7900 | - | |
7901 | - if (!(caps->available_irqs & (1 << *out_hwirq))) | |
7902 | - return -EINVAL; | |
7903 | - | |
7904 | - *out_type = IRQ_TYPE_LEVEL_HIGH; | |
7905 | - | |
7906 | - return 0; | |
7907 | -} | |
7908 | - | |
7909 | -static const struct irq_domain_ops pmc_irq_ops = { | |
7910 | - .map = pmc_irq_map, | |
7911 | - .xlate = pmc_irq_domain_xlate, | |
7912 | -}; | |
7913 | - | |
7914 | -static irqreturn_t pmc_irq_handler(int irq, void *data) | |
7915 | -{ | |
7916 | - struct at91_pmc *pmc = (struct at91_pmc *)data; | |
7917 | - unsigned long sr; | |
7918 | - int n; | |
7919 | - | |
7920 | - sr = pmc_read(pmc, AT91_PMC_SR) & pmc_read(pmc, AT91_PMC_IMR); | |
7921 | - if (!sr) | |
7922 | - return IRQ_NONE; | |
7923 | - | |
7924 | - for_each_set_bit(n, &sr, BITS_PER_LONG) | |
7925 | - generic_handle_irq(irq_find_mapping(pmc->irqdomain, n)); | |
7926 | - | |
7927 | - return IRQ_HANDLED; | |
7928 | -} | |
7929 | - | |
7930 | -static const struct at91_pmc_caps at91rm9200_caps = { | |
7931 | - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB | | |
7932 | - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY | | |
7933 | - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY | | |
7934 | - AT91_PMC_PCK3RDY, | |
7935 | -}; | |
7936 | - | |
7937 | -static const struct at91_pmc_caps at91sam9260_caps = { | |
7938 | - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB | | |
7939 | - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY | | |
7940 | - AT91_PMC_PCK1RDY, | |
7941 | -}; | |
7942 | - | |
7943 | -static const struct at91_pmc_caps at91sam9g45_caps = { | |
7944 | - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY | | |
7945 | - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY | | |
7946 | - AT91_PMC_PCK1RDY, | |
7947 | -}; | |
7948 | - | |
7949 | -static const struct at91_pmc_caps at91sam9n12_caps = { | |
7950 | - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_LOCKB | | |
7951 | - AT91_PMC_MCKRDY | AT91_PMC_PCK0RDY | | |
7952 | - AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS | | |
7953 | - AT91_PMC_MOSCRCS | AT91_PMC_CFDEV, | |
7954 | -}; | |
7955 | - | |
7956 | -static const struct at91_pmc_caps at91sam9x5_caps = { | |
7957 | - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY | | |
7958 | - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY | | |
7959 | - AT91_PMC_PCK1RDY | AT91_PMC_MOSCSELS | | |
7960 | - AT91_PMC_MOSCRCS | AT91_PMC_CFDEV, | |
7961 | -}; | |
7962 | - | |
7963 | -static const struct at91_pmc_caps sama5d2_caps = { | |
7964 | - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY | | |
7965 | - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY | | |
7966 | - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY | | |
7967 | - AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS | | |
7968 | - AT91_PMC_CFDEV | AT91_PMC_GCKRDY, | |
7969 | -}; | |
7970 | - | |
7971 | -static const struct at91_pmc_caps sama5d3_caps = { | |
7972 | - .available_irqs = AT91_PMC_MOSCS | AT91_PMC_LOCKA | AT91_PMC_MCKRDY | | |
7973 | - AT91_PMC_LOCKU | AT91_PMC_PCK0RDY | | |
7974 | - AT91_PMC_PCK1RDY | AT91_PMC_PCK2RDY | | |
7975 | - AT91_PMC_MOSCSELS | AT91_PMC_MOSCRCS | | |
7976 | - AT91_PMC_CFDEV, | |
7977 | -}; | |
7978 | - | |
7979 | -static struct at91_pmc *__init at91_pmc_init(struct device_node *np, | |
7980 | - void __iomem *regbase, int virq, | |
7981 | - const struct at91_pmc_caps *caps) | |
7982 | -{ | |
7983 | - struct at91_pmc *pmc; | |
7984 | - | |
7985 | - if (!regbase || !virq || !caps) | |
7986 | - return NULL; | |
7987 | - | |
7988 | - at91_pmc_base = regbase; | |
7989 | - | |
7990 | - pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); | |
7991 | - if (!pmc) | |
7992 | - return NULL; | |
7993 | - | |
7994 | - spin_lock_init(&pmc->lock); | |
7995 | - pmc->regbase = regbase; | |
7996 | - pmc->virq = virq; | |
7997 | - pmc->caps = caps; | |
7998 | - | |
7999 | - pmc->irqdomain = irq_domain_add_linear(np, 32, &pmc_irq_ops, pmc); | |
8000 | - | |
8001 | - if (!pmc->irqdomain) | |
8002 | - goto out_free_pmc; | |
8003 | - | |
8004 | - pmc_write(pmc, AT91_PMC_IDR, 0xffffffff); | |
8005 | - if (request_irq(pmc->virq, pmc_irq_handler, | |
8006 | - IRQF_SHARED | IRQF_COND_SUSPEND, "pmc", pmc)) | |
8007 | - goto out_remove_irqdomain; | |
8008 | - | |
8009 | - return pmc; | |
8010 | - | |
8011 | -out_remove_irqdomain: | |
8012 | - irq_domain_remove(pmc->irqdomain); | |
8013 | -out_free_pmc: | |
8014 | - kfree(pmc); | |
8015 | - | |
8016 | - return NULL; | |
8017 | -} | |
8018 | - | |
8019 | -static const struct of_device_id pmc_clk_ids[] __initconst = { | |
8020 | - /* Slow oscillator */ | |
8021 | - { | |
8022 | - .compatible = "atmel,at91sam9260-clk-slow", | |
8023 | - .data = of_at91sam9260_clk_slow_setup, | |
8024 | - }, | |
8025 | - /* Main clock */ | |
8026 | - { | |
8027 | - .compatible = "atmel,at91rm9200-clk-main-osc", | |
8028 | - .data = of_at91rm9200_clk_main_osc_setup, | |
8029 | - }, | |
8030 | - { | |
8031 | - .compatible = "atmel,at91sam9x5-clk-main-rc-osc", | |
8032 | - .data = of_at91sam9x5_clk_main_rc_osc_setup, | |
8033 | - }, | |
8034 | - { | |
8035 | - .compatible = "atmel,at91rm9200-clk-main", | |
8036 | - .data = of_at91rm9200_clk_main_setup, | |
8037 | - }, | |
8038 | - { | |
8039 | - .compatible = "atmel,at91sam9x5-clk-main", | |
8040 | - .data = of_at91sam9x5_clk_main_setup, | |
8041 | - }, | |
8042 | - /* PLL clocks */ | |
8043 | - { | |
8044 | - .compatible = "atmel,at91rm9200-clk-pll", | |
8045 | - .data = of_at91rm9200_clk_pll_setup, | |
8046 | - }, | |
8047 | - { | |
8048 | - .compatible = "atmel,at91sam9g45-clk-pll", | |
8049 | - .data = of_at91sam9g45_clk_pll_setup, | |
8050 | - }, | |
8051 | - { | |
8052 | - .compatible = "atmel,at91sam9g20-clk-pllb", | |
8053 | - .data = of_at91sam9g20_clk_pllb_setup, | |
8054 | - }, | |
8055 | - { | |
8056 | - .compatible = "atmel,sama5d3-clk-pll", | |
8057 | - .data = of_sama5d3_clk_pll_setup, | |
8058 | - }, | |
8059 | - { | |
8060 | - .compatible = "atmel,at91sam9x5-clk-plldiv", | |
8061 | - .data = of_at91sam9x5_clk_plldiv_setup, | |
8062 | - }, | |
8063 | - /* Master clock */ | |
8064 | - { | |
8065 | - .compatible = "atmel,at91rm9200-clk-master", | |
8066 | - .data = of_at91rm9200_clk_master_setup, | |
8067 | - }, | |
8068 | - { | |
8069 | - .compatible = "atmel,at91sam9x5-clk-master", | |
8070 | - .data = of_at91sam9x5_clk_master_setup, | |
8071 | - }, | |
8072 | - /* System clocks */ | |
8073 | - { | |
8074 | - .compatible = "atmel,at91rm9200-clk-system", | |
8075 | - .data = of_at91rm9200_clk_sys_setup, | |
8076 | - }, | |
8077 | - /* Peripheral clocks */ | |
8078 | - { | |
8079 | - .compatible = "atmel,at91rm9200-clk-peripheral", | |
8080 | - .data = of_at91rm9200_clk_periph_setup, | |
8081 | - }, | |
8082 | - { | |
8083 | - .compatible = "atmel,at91sam9x5-clk-peripheral", | |
8084 | - .data = of_at91sam9x5_clk_periph_setup, | |
8085 | - }, | |
8086 | - /* Programmable clocks */ | |
8087 | - { | |
8088 | - .compatible = "atmel,at91rm9200-clk-programmable", | |
8089 | - .data = of_at91rm9200_clk_prog_setup, | |
8090 | - }, | |
8091 | - { | |
8092 | - .compatible = "atmel,at91sam9g45-clk-programmable", | |
8093 | - .data = of_at91sam9g45_clk_prog_setup, | |
8094 | - }, | |
8095 | - { | |
8096 | - .compatible = "atmel,at91sam9x5-clk-programmable", | |
8097 | - .data = of_at91sam9x5_clk_prog_setup, | |
8098 | - }, | |
8099 | - /* UTMI clock */ | |
8100 | -#if defined(CONFIG_HAVE_AT91_UTMI) | |
8101 | - { | |
8102 | - .compatible = "atmel,at91sam9x5-clk-utmi", | |
8103 | - .data = of_at91sam9x5_clk_utmi_setup, | |
8104 | - }, | |
8105 | -#endif | |
8106 | - /* USB clock */ | |
8107 | -#if defined(CONFIG_HAVE_AT91_USB_CLK) | |
8108 | - { | |
8109 | - .compatible = "atmel,at91rm9200-clk-usb", | |
8110 | - .data = of_at91rm9200_clk_usb_setup, | |
8111 | - }, | |
8112 | - { | |
8113 | - .compatible = "atmel,at91sam9x5-clk-usb", | |
8114 | - .data = of_at91sam9x5_clk_usb_setup, | |
8115 | - }, | |
8116 | - { | |
8117 | - .compatible = "atmel,at91sam9n12-clk-usb", | |
8118 | - .data = of_at91sam9n12_clk_usb_setup, | |
8119 | - }, | |
8120 | -#endif | |
8121 | - /* SMD clock */ | |
8122 | -#if defined(CONFIG_HAVE_AT91_SMD) | |
8123 | - { | |
8124 | - .compatible = "atmel,at91sam9x5-clk-smd", | |
8125 | - .data = of_at91sam9x5_clk_smd_setup, | |
8126 | - }, | |
8127 | -#endif | |
8128 | -#if defined(CONFIG_HAVE_AT91_H32MX) | |
8129 | - { | |
8130 | - .compatible = "atmel,sama5d4-clk-h32mx", | |
8131 | - .data = of_sama5d4_clk_h32mx_setup, | |
8132 | - }, | |
8133 | -#endif | |
8134 | -#if defined(CONFIG_HAVE_AT91_GENERATED_CLK) | |
8135 | - { | |
8136 | - .compatible = "atmel,sama5d2-clk-generated", | |
8137 | - .data = of_sama5d2_clk_generated_setup, | |
8138 | - }, | |
8139 | -#endif | |
8140 | - { /*sentinel*/ } | |
8141 | -}; | |
8142 | - | |
8143 | -static void __init of_at91_pmc_setup(struct device_node *np, | |
8144 | - const struct at91_pmc_caps *caps) | |
8145 | -{ | |
8146 | - struct at91_pmc *pmc; | |
8147 | - struct device_node *childnp; | |
8148 | - void (*clk_setup)(struct device_node *, struct at91_pmc *); | |
8149 | - const struct of_device_id *clk_id; | |
8150 | - void __iomem *regbase = of_iomap(np, 0); | |
8151 | - int virq; | |
8152 | - | |
8153 | - if (!regbase) | |
8154 | - return; | |
8155 | - | |
8156 | - virq = irq_of_parse_and_map(np, 0); | |
8157 | - if (!virq) | |
8158 | - return; | |
8159 | - | |
8160 | - pmc = at91_pmc_init(np, regbase, virq, caps); | |
8161 | - if (!pmc) | |
8162 | - return; | |
8163 | - for_each_child_of_node(np, childnp) { | |
8164 | - clk_id = of_match_node(pmc_clk_ids, childnp); | |
8165 | - if (!clk_id) | |
8166 | - continue; | |
8167 | - clk_setup = clk_id->data; | |
8168 | - clk_setup(childnp, pmc); | |
8169 | - } | |
8170 | -} | |
8171 | - | |
8172 | -static void __init of_at91rm9200_pmc_setup(struct device_node *np) | |
8173 | -{ | |
8174 | - of_at91_pmc_setup(np, &at91rm9200_caps); | |
8175 | -} | |
8176 | -CLK_OF_DECLARE(at91rm9200_clk_pmc, "atmel,at91rm9200-pmc", | |
8177 | - of_at91rm9200_pmc_setup); | |
8178 | - | |
8179 | -static void __init of_at91sam9260_pmc_setup(struct device_node *np) | |
8180 | -{ | |
8181 | - of_at91_pmc_setup(np, &at91sam9260_caps); | |
8182 | -} | |
8183 | -CLK_OF_DECLARE(at91sam9260_clk_pmc, "atmel,at91sam9260-pmc", | |
8184 | - of_at91sam9260_pmc_setup); | |
8185 | - | |
8186 | -static void __init of_at91sam9g45_pmc_setup(struct device_node *np) | |
8187 | -{ | |
8188 | - of_at91_pmc_setup(np, &at91sam9g45_caps); | |
8189 | -} | |
8190 | -CLK_OF_DECLARE(at91sam9g45_clk_pmc, "atmel,at91sam9g45-pmc", | |
8191 | - of_at91sam9g45_pmc_setup); | |
8192 | - | |
8193 | -static void __init of_at91sam9n12_pmc_setup(struct device_node *np) | |
8194 | -{ | |
8195 | - of_at91_pmc_setup(np, &at91sam9n12_caps); | |
8196 | -} | |
8197 | -CLK_OF_DECLARE(at91sam9n12_clk_pmc, "atmel,at91sam9n12-pmc", | |
8198 | - of_at91sam9n12_pmc_setup); | |
8199 | - | |
8200 | -static void __init of_at91sam9x5_pmc_setup(struct device_node *np) | |
8201 | -{ | |
8202 | - of_at91_pmc_setup(np, &at91sam9x5_caps); | |
8203 | -} | |
8204 | -CLK_OF_DECLARE(at91sam9x5_clk_pmc, "atmel,at91sam9x5-pmc", | |
8205 | - of_at91sam9x5_pmc_setup); | |
8206 | - | |
8207 | -static void __init of_sama5d2_pmc_setup(struct device_node *np) | |
8208 | -{ | |
8209 | - of_at91_pmc_setup(np, &sama5d2_caps); | |
8210 | -} | |
8211 | -CLK_OF_DECLARE(sama5d2_clk_pmc, "atmel,sama5d2-pmc", | |
8212 | - of_sama5d2_pmc_setup); | |
8213 | - | |
8214 | -static void __init of_sama5d3_pmc_setup(struct device_node *np) | |
8215 | -{ | |
8216 | - of_at91_pmc_setup(np, &sama5d3_caps); | |
8217 | -} | |
8218 | -CLK_OF_DECLARE(sama5d3_clk_pmc, "atmel,sama5d3-pmc", | |
8219 | - of_sama5d3_pmc_setup); | |
8220 | diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h | |
8221 | index f65739272779..5771fff0ee3f 100644 | |
8222 | --- a/drivers/clk/at91/pmc.h | |
8223 | +++ b/drivers/clk/at91/pmc.h | |
8224 | @@ -14,8 +14,11 @@ | |
8225 | ||
8226 | #include <linux/io.h> | |
8227 | #include <linux/irqdomain.h> | |
8228 | +#include <linux/regmap.h> | |
8229 | #include <linux/spinlock.h> | |
8230 | ||
8231 | +extern spinlock_t pmc_pcr_lock; | |
8232 | + | |
8233 | struct clk_range { | |
8234 | unsigned long min; | |
8235 | unsigned long max; | |
8236 | @@ -23,102 +26,7 @@ struct clk_range { | |
8237 | ||
8238 | #define CLK_RANGE(MIN, MAX) {.min = MIN, .max = MAX,} | |
8239 | ||
8240 | -struct at91_pmc_caps { | |
8241 | - u32 available_irqs; | |
8242 | -}; | |
8243 | - | |
8244 | -struct at91_pmc { | |
8245 | - void __iomem *regbase; | |
8246 | - int virq; | |
8247 | - spinlock_t lock; | |
8248 | - const struct at91_pmc_caps *caps; | |
8249 | - struct irq_domain *irqdomain; | |
8250 | - u32 imr; | |
8251 | -}; | |
8252 | - | |
8253 | -static inline void pmc_lock(struct at91_pmc *pmc) | |
8254 | -{ | |
8255 | - spin_lock(&pmc->lock); | |
8256 | -} | |
8257 | - | |
8258 | -static inline void pmc_unlock(struct at91_pmc *pmc) | |
8259 | -{ | |
8260 | - spin_unlock(&pmc->lock); | |
8261 | -} | |
8262 | - | |
8263 | -static inline u32 pmc_read(struct at91_pmc *pmc, int offset) | |
8264 | -{ | |
8265 | - return readl(pmc->regbase + offset); | |
8266 | -} | |
8267 | - | |
8268 | -static inline void pmc_write(struct at91_pmc *pmc, int offset, u32 value) | |
8269 | -{ | |
8270 | - writel(value, pmc->regbase + offset); | |
8271 | -} | |
8272 | - | |
8273 | int of_at91_get_clk_range(struct device_node *np, const char *propname, | |
8274 | struct clk_range *range); | |
8275 | ||
8276 | -void of_at91sam9260_clk_slow_setup(struct device_node *np, | |
8277 | - struct at91_pmc *pmc); | |
8278 | - | |
8279 | -void of_at91rm9200_clk_main_osc_setup(struct device_node *np, | |
8280 | - struct at91_pmc *pmc); | |
8281 | -void of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np, | |
8282 | - struct at91_pmc *pmc); | |
8283 | -void of_at91rm9200_clk_main_setup(struct device_node *np, | |
8284 | - struct at91_pmc *pmc); | |
8285 | -void of_at91sam9x5_clk_main_setup(struct device_node *np, | |
8286 | - struct at91_pmc *pmc); | |
8287 | - | |
8288 | -void of_at91rm9200_clk_pll_setup(struct device_node *np, | |
8289 | - struct at91_pmc *pmc); | |
8290 | -void of_at91sam9g45_clk_pll_setup(struct device_node *np, | |
8291 | - struct at91_pmc *pmc); | |
8292 | -void of_at91sam9g20_clk_pllb_setup(struct device_node *np, | |
8293 | - struct at91_pmc *pmc); | |
8294 | -void of_sama5d3_clk_pll_setup(struct device_node *np, | |
8295 | - struct at91_pmc *pmc); | |
8296 | -void of_at91sam9x5_clk_plldiv_setup(struct device_node *np, | |
8297 | - struct at91_pmc *pmc); | |
8298 | - | |
8299 | -void of_at91rm9200_clk_master_setup(struct device_node *np, | |
8300 | - struct at91_pmc *pmc); | |
8301 | -void of_at91sam9x5_clk_master_setup(struct device_node *np, | |
8302 | - struct at91_pmc *pmc); | |
8303 | - | |
8304 | -void of_at91rm9200_clk_sys_setup(struct device_node *np, | |
8305 | - struct at91_pmc *pmc); | |
8306 | - | |
8307 | -void of_at91rm9200_clk_periph_setup(struct device_node *np, | |
8308 | - struct at91_pmc *pmc); | |
8309 | -void of_at91sam9x5_clk_periph_setup(struct device_node *np, | |
8310 | - struct at91_pmc *pmc); | |
8311 | - | |
8312 | -void of_at91rm9200_clk_prog_setup(struct device_node *np, | |
8313 | - struct at91_pmc *pmc); | |
8314 | -void of_at91sam9g45_clk_prog_setup(struct device_node *np, | |
8315 | - struct at91_pmc *pmc); | |
8316 | -void of_at91sam9x5_clk_prog_setup(struct device_node *np, | |
8317 | - struct at91_pmc *pmc); | |
8318 | - | |
8319 | -void of_at91sam9x5_clk_utmi_setup(struct device_node *np, | |
8320 | - struct at91_pmc *pmc); | |
8321 | - | |
8322 | -void of_at91rm9200_clk_usb_setup(struct device_node *np, | |
8323 | - struct at91_pmc *pmc); | |
8324 | -void of_at91sam9x5_clk_usb_setup(struct device_node *np, | |
8325 | - struct at91_pmc *pmc); | |
8326 | -void of_at91sam9n12_clk_usb_setup(struct device_node *np, | |
8327 | - struct at91_pmc *pmc); | |
8328 | - | |
8329 | -void of_at91sam9x5_clk_smd_setup(struct device_node *np, | |
8330 | - struct at91_pmc *pmc); | |
8331 | - | |
8332 | -void of_sama5d4_clk_h32mx_setup(struct device_node *np, | |
8333 | - struct at91_pmc *pmc); | |
8334 | - | |
8335 | -void of_sama5d2_clk_generated_setup(struct device_node *np, | |
8336 | - struct at91_pmc *pmc); | |
8337 | - | |
8338 | #endif /* __PMC_H_ */ | |
8339 | diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c | |
8340 | index 4da2af9694a2..5b6f57f500b8 100644 | |
8341 | --- a/drivers/clocksource/tcb_clksrc.c | |
8342 | +++ b/drivers/clocksource/tcb_clksrc.c | |
8343 | @@ -23,8 +23,7 @@ | |
8344 | * this 32 bit free-running counter. the second channel is not used. | |
8345 | * | |
8346 | * - The third channel may be used to provide a 16-bit clockevent | |
8347 | - * source, used in either periodic or oneshot mode. This runs | |
8348 | - * at 32 KiHZ, and can handle delays of up to two seconds. | |
8349 | + * source, used in either periodic or oneshot mode. | |
8350 | * | |
8351 | * A boot clocksource and clockevent source are also currently needed, | |
8352 | * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so | |
8353 | @@ -74,6 +73,8 @@ static struct clocksource clksrc = { | |
8354 | struct tc_clkevt_device { | |
8355 | struct clock_event_device clkevt; | |
8356 | struct clk *clk; | |
8357 | + bool clk_enabled; | |
8358 | + u32 freq; | |
8359 | void __iomem *regs; | |
8360 | }; | |
8361 | ||
8362 | @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt) | |
8363 | return container_of(clkevt, struct tc_clkevt_device, clkevt); | |
8364 | } | |
8365 | ||
8366 | -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, | |
8367 | - * because using one of the divided clocks would usually mean the | |
8368 | - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). | |
8369 | - * | |
8370 | - * A divided clock could be good for high resolution timers, since | |
8371 | - * 30.5 usec resolution can seem "low". | |
8372 | - */ | |
8373 | static u32 timer_clock; | |
8374 | ||
8375 | +static void tc_clk_disable(struct clock_event_device *d) | |
8376 | +{ | |
8377 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
8378 | + | |
8379 | + clk_disable(tcd->clk); | |
8380 | + tcd->clk_enabled = false; | |
8381 | +} | |
8382 | + | |
8383 | +static void tc_clk_enable(struct clock_event_device *d) | |
8384 | +{ | |
8385 | + struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
8386 | + | |
8387 | + if (tcd->clk_enabled) | |
8388 | + return; | |
8389 | + clk_enable(tcd->clk); | |
8390 | + tcd->clk_enabled = true; | |
8391 | +} | |
8392 | + | |
8393 | static int tc_shutdown(struct clock_event_device *d) | |
8394 | { | |
8395 | struct tc_clkevt_device *tcd = to_tc_clkevt(d); | |
8396 | @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d) | |
8397 | ||
8398 | __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); | |
8399 | __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); | |
8400 | + return 0; | |
8401 | +} | |
8402 | + | |
8403 | +static int tc_shutdown_clk_off(struct clock_event_device *d) | |
8404 | +{ | |
8405 | + tc_shutdown(d); | |
8406 | if (!clockevent_state_detached(d)) | |
8407 | - clk_disable(tcd->clk); | |
8408 | + tc_clk_disable(d); | |
8409 | ||
8410 | return 0; | |
8411 | } | |
8412 | @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d) | |
8413 | if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) | |
8414 | tc_shutdown(d); | |
8415 | ||
8416 | - clk_enable(tcd->clk); | |
8417 | + tc_clk_enable(d); | |
8418 | ||
8419 | - /* slow clock, count up to RC, then irq and stop */ | |
8420 | + /* count up to RC, then irq and stop */ | |
8421 | __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | | |
8422 | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); | |
8423 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
8424 | @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d) | |
8425 | /* By not making the gentime core emulate periodic mode on top | |
8426 | * of oneshot, we get lower overhead and improved accuracy. | |
8427 | */ | |
8428 | - clk_enable(tcd->clk); | |
8429 | + tc_clk_enable(d); | |
8430 | ||
8431 | - /* slow clock, count up to RC, then irq and restart */ | |
8432 | + /* count up to RC, then irq and restart */ | |
8433 | __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, | |
8434 | regs + ATMEL_TC_REG(2, CMR)); | |
8435 | - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
8436 | + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); | |
8437 | ||
8438 | /* Enable clock and interrupts on RC compare */ | |
8439 | __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); | |
8440 | @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = { | |
8441 | .features = CLOCK_EVT_FEAT_PERIODIC | | |
8442 | CLOCK_EVT_FEAT_ONESHOT, | |
8443 | /* Should be lower than at91rm9200's system timer */ | |
8444 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
8445 | .rating = 125, | |
8446 | +#else | |
8447 | + .rating = 200, | |
8448 | +#endif | |
8449 | .set_next_event = tc_next_event, | |
8450 | - .set_state_shutdown = tc_shutdown, | |
8451 | + .set_state_shutdown = tc_shutdown_clk_off, | |
8452 | .set_state_periodic = tc_set_periodic, | |
8453 | .set_state_oneshot = tc_set_oneshot, | |
8454 | }, | |
8455 | @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle) | |
8456 | return IRQ_NONE; | |
8457 | } | |
8458 | ||
8459 | -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
8460 | +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) | |
8461 | { | |
8462 | + unsigned divisor = atmel_tc_divisors[divisor_idx]; | |
8463 | int ret; | |
8464 | struct clk *t2_clk = tc->clk[2]; | |
8465 | int irq = tc->irq[2]; | |
8466 | @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
8467 | clkevt.regs = tc->regs; | |
8468 | clkevt.clk = t2_clk; | |
8469 | ||
8470 | - timer_clock = clk32k_divisor_idx; | |
8471 | + timer_clock = divisor_idx; | |
8472 | + if (!divisor) | |
8473 | + clkevt.freq = 32768; | |
8474 | + else | |
8475 | + clkevt.freq = clk_get_rate(t2_clk) / divisor; | |
8476 | ||
8477 | clkevt.clkevt.cpumask = cpumask_of(0); | |
8478 | ||
8479 | @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) | |
8480 | return ret; | |
8481 | } | |
8482 | ||
8483 | - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); | |
8484 | + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); | |
8485 | ||
8486 | return ret; | |
8487 | } | |
8488 | @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void) | |
8489 | goto err_disable_t1; | |
8490 | ||
8491 | /* channel 2: periodic and oneshot timer support */ | |
8492 | +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
8493 | ret = setup_clkevents(tc, clk32k_divisor_idx); | |
8494 | +#else | |
8495 | + ret = setup_clkevents(tc, best_divisor_idx); | |
8496 | +#endif | |
8497 | if (ret) | |
8498 | goto err_unregister_clksrc; | |
8499 | ||
8500 | diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c | |
8501 | index d911c5dca8f1..7a40f7e88468 100644 | |
8502 | --- a/drivers/clocksource/timer-atmel-pit.c | |
8503 | +++ b/drivers/clocksource/timer-atmel-pit.c | |
8504 | @@ -46,6 +46,7 @@ struct pit_data { | |
8505 | u32 cycle; | |
8506 | u32 cnt; | |
8507 | unsigned int irq; | |
8508 | + bool irq_requested; | |
8509 | struct clk *mck; | |
8510 | }; | |
8511 | ||
8512 | @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev) | |
8513 | ||
8514 | /* disable irq, leaving the clocksource active */ | |
8515 | pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); | |
8516 | + if (data->irq_requested) { | |
8517 | + free_irq(data->irq, data); | |
8518 | + data->irq_requested = false; | |
8519 | + } | |
8520 | return 0; | |
8521 | } | |
8522 | ||
8523 | +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); | |
8524 | /* | |
8525 | * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) | |
8526 | */ | |
8527 | static int pit_clkevt_set_periodic(struct clock_event_device *dev) | |
8528 | { | |
8529 | struct pit_data *data = clkevt_to_pit_data(dev); | |
8530 | + int ret; | |
8531 | + | |
8532 | + ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
8533 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8534 | + "at91_tick", data); | |
8535 | + if (ret) | |
8536 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
8537 | + | |
8538 | + data->irq_requested = true; | |
8539 | ||
8540 | /* update clocksource counter */ | |
8541 | data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); | |
8542 | @@ -181,7 +196,6 @@ static void __init at91sam926x_pit_common_init(struct pit_data *data) | |
8543 | { | |
8544 | unsigned long pit_rate; | |
8545 | unsigned bits; | |
8546 | - int ret; | |
8547 | ||
8548 | /* | |
8549 | * Use our actual MCK to figure out how many MCK/16 ticks per | |
8550 | @@ -206,13 +220,6 @@ static void __init at91sam926x_pit_common_init(struct pit_data *data) | |
8551 | data->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; | |
8552 | clocksource_register_hz(&data->clksrc, pit_rate); | |
8553 | ||
8554 | - /* Set up irq handler */ | |
8555 | - ret = request_irq(data->irq, at91sam926x_pit_interrupt, | |
8556 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8557 | - "at91_tick", data); | |
8558 | - if (ret) | |
8559 | - panic(pr_fmt("Unable to setup IRQ\n")); | |
8560 | - | |
8561 | /* Set up and register clockevents */ | |
8562 | data->clkevt.name = "pit"; | |
8563 | data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; | |
8564 | diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c | |
8565 | index 29d21d68df5a..103d0fd70cc4 100644 | |
8566 | --- a/drivers/clocksource/timer-atmel-st.c | |
8567 | +++ b/drivers/clocksource/timer-atmel-st.c | |
8568 | @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void) | |
8569 | last_crtr = read_CRTR(); | |
8570 | } | |
8571 | ||
8572 | +static int atmel_st_irq; | |
8573 | + | |
8574 | static int clkevt32k_shutdown(struct clock_event_device *evt) | |
8575 | { | |
8576 | clkdev32k_disable_and_flush_irq(); | |
8577 | irqmask = 0; | |
8578 | regmap_write(regmap_st, AT91_ST_IER, irqmask); | |
8579 | + free_irq(atmel_st_irq, regmap_st); | |
8580 | return 0; | |
8581 | } | |
8582 | ||
8583 | static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
8584 | { | |
8585 | + int ret; | |
8586 | + | |
8587 | clkdev32k_disable_and_flush_irq(); | |
8588 | ||
8589 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
8590 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8591 | + "at91_tick", regmap_st); | |
8592 | + if (ret) | |
8593 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
8594 | + | |
8595 | /* | |
8596 | * ALM for oneshot irqs, set by next_event() | |
8597 | * before 32 seconds have passed. | |
8598 | @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev) | |
8599 | ||
8600 | static int clkevt32k_set_periodic(struct clock_event_device *dev) | |
8601 | { | |
8602 | + int ret; | |
8603 | + | |
8604 | clkdev32k_disable_and_flush_irq(); | |
8605 | ||
8606 | + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, | |
8607 | + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8608 | + "at91_tick", regmap_st); | |
8609 | + if (ret) | |
8610 | + panic(pr_fmt("Unable to setup IRQ\n")); | |
8611 | + | |
8612 | /* PIT for periodic irqs; fixed rate of 1/HZ */ | |
8613 | irqmask = AT91_ST_PITS; | |
8614 | regmap_write(regmap_st, AT91_ST_PIMR, timer_latch); | |
8615 | @@ -198,7 +217,7 @@ static void __init atmel_st_timer_init(struct device_node *node) | |
8616 | { | |
8617 | struct clk *sclk; | |
8618 | unsigned int sclk_rate, val; | |
8619 | - int irq, ret; | |
8620 | + int ret; | |
8621 | ||
8622 | regmap_st = syscon_node_to_regmap(node); | |
8623 | if (IS_ERR(regmap_st)) | |
8624 | @@ -210,17 +229,10 @@ static void __init atmel_st_timer_init(struct device_node *node) | |
8625 | regmap_read(regmap_st, AT91_ST_SR, &val); | |
8626 | ||
8627 | /* Get the interrupts property */ | |
8628 | - irq = irq_of_parse_and_map(node, 0); | |
8629 | - if (!irq) | |
8630 | + atmel_st_irq = irq_of_parse_and_map(node, 0); | |
8631 | + if (!atmel_st_irq) | |
8632 | panic(pr_fmt("Unable to get IRQ from DT\n")); | |
8633 | ||
8634 | - /* Make IRQs happen for the system timer */ | |
8635 | - ret = request_irq(irq, at91rm9200_timer_interrupt, | |
8636 | - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, | |
8637 | - "at91_tick", regmap_st); | |
8638 | - if (ret) | |
8639 | - panic(pr_fmt("Unable to setup IRQ\n")); | |
8640 | - | |
8641 | sclk = of_clk_get(node, 0); | |
8642 | if (IS_ERR(sclk)) | |
8643 | panic(pr_fmt("Unable to get slow clock\n")); | |
8644 | diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 | |
8645 | index c59bdcb83217..8f23161d80be 100644 | |
8646 | --- a/drivers/cpufreq/Kconfig.x86 | |
8647 | +++ b/drivers/cpufreq/Kconfig.x86 | |
8648 | @@ -123,7 +123,7 @@ config X86_POWERNOW_K7_ACPI | |
8649 | ||
8650 | config X86_POWERNOW_K8 | |
8651 | tristate "AMD Opteron/Athlon64 PowerNow!" | |
8652 | - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ | |
8653 | + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE | |
8654 | help | |
8655 | This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. | |
8656 | Support for K10 and newer processors is now in acpi-cpufreq. | |
8657 | diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c | |
8658 | index 344058f8501a..d5657d50ac40 100644 | |
8659 | --- a/drivers/cpuidle/coupled.c | |
8660 | +++ b/drivers/cpuidle/coupled.c | |
8661 | @@ -119,7 +119,6 @@ struct cpuidle_coupled { | |
8662 | ||
8663 | #define CPUIDLE_COUPLED_NOT_IDLE (-1) | |
8664 | ||
8665 | -static DEFINE_MUTEX(cpuidle_coupled_lock); | |
8666 | static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); | |
8667 | ||
8668 | /* | |
8669 | diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
8670 | index 6ed7d63a0688..9da7482ad256 100644 | |
8671 | --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
8672 | +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c | |
8673 | @@ -1264,7 +1264,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, | |
8674 | if (ret) | |
8675 | return ret; | |
8676 | ||
8677 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
8678 | trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); | |
8679 | +#endif | |
8680 | ||
8681 | i915_gem_execbuffer_move_to_active(vmas, params->request); | |
8682 | i915_gem_execbuffer_retire_commands(params); | |
8683 | diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
8684 | index c0a96f1ee18e..deb1e207fa3c 100644 | |
8685 | --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
8686 | +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c | |
8687 | @@ -39,7 +39,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) | |
8688 | if (!mutex_is_locked(mutex)) | |
8689 | return false; | |
8690 | ||
8691 | -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) | |
8692 | +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE) | |
8693 | return mutex->owner == task; | |
8694 | #else | |
8695 | /* Since UP may be pre-empted, we cannot assume that we own the lock */ | |
8696 | diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c | |
8697 | index 0f42a2782afc..80a1db09a379 100644 | |
8698 | --- a/drivers/gpu/drm/i915/i915_irq.c | |
8699 | +++ b/drivers/gpu/drm/i915/i915_irq.c | |
8700 | @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
8701 | spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); | |
8702 | ||
8703 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
8704 | + preempt_disable_rt(); | |
8705 | ||
8706 | /* Get optional system timestamp before query. */ | |
8707 | if (stime) | |
8708 | @@ -863,6 +864,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
8709 | *etime = ktime_get(); | |
8710 | ||
8711 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
8712 | + preempt_enable_rt(); | |
8713 | ||
8714 | spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); | |
8715 | ||
8716 | diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c | |
8717 | index 909d1d71d130..8688709b4ffa 100644 | |
8718 | --- a/drivers/gpu/drm/i915/intel_display.c | |
8719 | +++ b/drivers/gpu/drm/i915/intel_display.c | |
8720 | @@ -11400,7 +11400,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe) | |
8721 | struct intel_crtc *intel_crtc = to_intel_crtc(crtc); | |
8722 | struct intel_unpin_work *work; | |
8723 | ||
8724 | - WARN_ON(!in_interrupt()); | |
8725 | + WARN_ON_NONRT(!in_interrupt()); | |
8726 | ||
8727 | if (crtc == NULL) | |
8728 | return; | |
8729 | diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c | |
8730 | index 2cc6aa072f4c..b79d33f14868 100644 | |
8731 | --- a/drivers/gpu/drm/i915/intel_sprite.c | |
8732 | +++ b/drivers/gpu/drm/i915/intel_sprite.c | |
8733 | @@ -38,6 +38,7 @@ | |
8734 | #include "intel_drv.h" | |
8735 | #include <drm/i915_drm.h> | |
8736 | #include "i915_drv.h" | |
8737 | +#include <linux/locallock.h> | |
8738 | ||
8739 | static bool | |
8740 | format_is_yuv(uint32_t format) | |
8741 | @@ -64,6 +65,8 @@ static int usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, | |
8742 | 1000 * adjusted_mode->crtc_htotal); | |
8743 | } | |
8744 | ||
8745 | +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); | |
8746 | + | |
8747 | /** | |
8748 | * intel_pipe_update_start() - start update of a set of display registers | |
8749 | * @crtc: the crtc of which the registers are going to be updated | |
8750 | @@ -96,7 +99,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) | |
8751 | min = vblank_start - usecs_to_scanlines(adjusted_mode, 100); | |
8752 | max = vblank_start - 1; | |
8753 | ||
8754 | - local_irq_disable(); | |
8755 | + local_lock_irq(pipe_update_lock); | |
8756 | ||
8757 | if (min <= 0 || max <= 0) | |
8758 | return; | |
8759 | @@ -126,11 +129,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) | |
8760 | break; | |
8761 | } | |
8762 | ||
8763 | - local_irq_enable(); | |
8764 | + local_unlock_irq(pipe_update_lock); | |
8765 | ||
8766 | timeout = schedule_timeout(timeout); | |
8767 | ||
8768 | - local_irq_disable(); | |
8769 | + local_lock_irq(pipe_update_lock); | |
8770 | } | |
8771 | ||
8772 | finish_wait(wq, &wait); | |
8773 | @@ -164,7 +167,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc) | |
8774 | ||
8775 | trace_i915_pipe_update_end(crtc, end_vbl_count, scanline_end); | |
8776 | ||
8777 | - local_irq_enable(); | |
8778 | + local_unlock_irq(pipe_update_lock); | |
8779 | ||
8780 | if (crtc->debug.start_vbl_count && | |
8781 | crtc->debug.start_vbl_count != end_vbl_count) { | |
8782 | diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c | |
8783 | index 3645b223aa37..642854b2ed2c 100644 | |
8784 | --- a/drivers/gpu/drm/radeon/radeon_display.c | |
8785 | +++ b/drivers/gpu/drm/radeon/radeon_display.c | |
8786 | @@ -1862,6 +1862,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
8787 | struct radeon_device *rdev = dev->dev_private; | |
8788 | ||
8789 | /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ | |
8790 | + preempt_disable_rt(); | |
8791 | ||
8792 | /* Get optional system timestamp before query. */ | |
8793 | if (stime) | |
8794 | @@ -1954,6 +1955,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, | |
8795 | *etime = ktime_get(); | |
8796 | ||
8797 | /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ | |
8798 | + preempt_enable_rt(); | |
8799 | ||
8800 | /* Decode into vertical and horizontal scanout position. */ | |
8801 | *vpos = position & 0x1fff; | |
8802 | diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c | |
8803 | index 509ed9731630..b2498b270f2c 100644 | |
8804 | --- a/drivers/hv/vmbus_drv.c | |
8805 | +++ b/drivers/hv/vmbus_drv.c | |
8806 | @@ -820,7 +820,7 @@ static void vmbus_isr(void) | |
8807 | tasklet_schedule(&msg_dpc); | |
8808 | } | |
8809 | ||
8810 | - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); | |
8811 | + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, 0); | |
8812 | } | |
8813 | ||
8814 | ||
8815 | diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c | |
8816 | index 08d26ba61ed3..46b89dd42b10 100644 | |
8817 | --- a/drivers/i2c/busses/i2c-omap.c | |
8818 | +++ b/drivers/i2c/busses/i2c-omap.c | |
8819 | @@ -995,15 +995,12 @@ omap_i2c_isr(int irq, void *dev_id) | |
8820 | u16 mask; | |
8821 | u16 stat; | |
8822 | ||
8823 | - spin_lock(&omap->lock); | |
8824 | - mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG); | |
8825 | stat = omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG); | |
8826 | + mask = omap_i2c_read_reg(omap, OMAP_I2C_IE_REG); | |
8827 | ||
8828 | if (stat & mask) | |
8829 | ret = IRQ_WAKE_THREAD; | |
8830 | ||
8831 | - spin_unlock(&omap->lock); | |
8832 | - | |
8833 | return ret; | |
8834 | } | |
8835 | ||
8836 | diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c | |
8837 | index 36f76e28a0bf..394f142f90c7 100644 | |
8838 | --- a/drivers/ide/alim15x3.c | |
8839 | +++ b/drivers/ide/alim15x3.c | |
8840 | @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) | |
8841 | ||
8842 | isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); | |
8843 | ||
8844 | - local_irq_save(flags); | |
8845 | + local_irq_save_nort(flags); | |
8846 | ||
8847 | if (m5229_revision < 0xC2) { | |
8848 | /* | |
8849 | @@ -325,7 +325,7 @@ out: | |
8850 | } | |
8851 | pci_dev_put(north); | |
8852 | pci_dev_put(isa_dev); | |
8853 | - local_irq_restore(flags); | |
8854 | + local_irq_restore_nort(flags); | |
8855 | return 0; | |
8856 | } | |
8857 | ||
8858 | diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c | |
8859 | index 696b6c1ec940..0d0a96629b73 100644 | |
8860 | --- a/drivers/ide/hpt366.c | |
8861 | +++ b/drivers/ide/hpt366.c | |
8862 | @@ -1241,7 +1241,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
8863 | ||
8864 | dma_old = inb(base + 2); | |
8865 | ||
8866 | - local_irq_save(flags); | |
8867 | + local_irq_save_nort(flags); | |
8868 | ||
8869 | dma_new = dma_old; | |
8870 | pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); | |
8871 | @@ -1252,7 +1252,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, | |
8872 | if (dma_new != dma_old) | |
8873 | outb(dma_new, base + 2); | |
8874 | ||
8875 | - local_irq_restore(flags); | |
8876 | + local_irq_restore_nort(flags); | |
8877 | ||
8878 | printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", | |
8879 | hwif->name, base, base + 7); | |
8880 | diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c | |
8881 | index 19763977568c..4169433faab5 100644 | |
8882 | --- a/drivers/ide/ide-io-std.c | |
8883 | +++ b/drivers/ide/ide-io-std.c | |
8884 | @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
8885 | unsigned long uninitialized_var(flags); | |
8886 | ||
8887 | if ((io_32bit & 2) && !mmio) { | |
8888 | - local_irq_save(flags); | |
8889 | + local_irq_save_nort(flags); | |
8890 | ata_vlb_sync(io_ports->nsect_addr); | |
8891 | } | |
8892 | ||
8893 | @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
8894 | insl(data_addr, buf, words); | |
8895 | ||
8896 | if ((io_32bit & 2) && !mmio) | |
8897 | - local_irq_restore(flags); | |
8898 | + local_irq_restore_nort(flags); | |
8899 | ||
8900 | if (((len + 1) & 3) < 2) | |
8901 | return; | |
8902 | @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
8903 | unsigned long uninitialized_var(flags); | |
8904 | ||
8905 | if ((io_32bit & 2) && !mmio) { | |
8906 | - local_irq_save(flags); | |
8907 | + local_irq_save_nort(flags); | |
8908 | ata_vlb_sync(io_ports->nsect_addr); | |
8909 | } | |
8910 | ||
8911 | @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, | |
8912 | outsl(data_addr, buf, words); | |
8913 | ||
8914 | if ((io_32bit & 2) && !mmio) | |
8915 | - local_irq_restore(flags); | |
8916 | + local_irq_restore_nort(flags); | |
8917 | ||
8918 | if (((len + 1) & 3) < 2) | |
8919 | return; | |
8920 | diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c | |
8921 | index 669ea1e45795..e12e43e62245 100644 | |
8922 | --- a/drivers/ide/ide-io.c | |
8923 | +++ b/drivers/ide/ide-io.c | |
8924 | @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data) | |
8925 | /* disable_irq_nosync ?? */ | |
8926 | disable_irq(hwif->irq); | |
8927 | /* local CPU only, as if we were handling an interrupt */ | |
8928 | - local_irq_disable(); | |
8929 | + local_irq_disable_nort(); | |
8930 | if (hwif->polling) { | |
8931 | startstop = handler(drive); | |
8932 | } else if (drive_is_ready(drive)) { | |
8933 | diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c | |
8934 | index 376f2dc410c5..f014dd1b73dc 100644 | |
8935 | --- a/drivers/ide/ide-iops.c | |
8936 | +++ b/drivers/ide/ide-iops.c | |
8937 | @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, | |
8938 | if ((stat & ATA_BUSY) == 0) | |
8939 | break; | |
8940 | ||
8941 | - local_irq_restore(flags); | |
8942 | + local_irq_restore_nort(flags); | |
8943 | *rstat = stat; | |
8944 | return -EBUSY; | |
8945 | } | |
8946 | } | |
8947 | - local_irq_restore(flags); | |
8948 | + local_irq_restore_nort(flags); | |
8949 | } | |
8950 | /* | |
8951 | * Allow status to settle, then read it again. | |
8952 | diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c | |
8953 | index 0b63facd1d87..4ceba37afc0c 100644 | |
8954 | --- a/drivers/ide/ide-probe.c | |
8955 | +++ b/drivers/ide/ide-probe.c | |
8956 | @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) | |
8957 | int bswap = 1; | |
8958 | ||
8959 | /* local CPU only; some systems need this */ | |
8960 | - local_irq_save(flags); | |
8961 | + local_irq_save_nort(flags); | |
8962 | /* read 512 bytes of id info */ | |
8963 | hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); | |
8964 | - local_irq_restore(flags); | |
8965 | + local_irq_restore_nort(flags); | |
8966 | ||
8967 | drive->dev_flags |= IDE_DFLAG_ID_READ; | |
8968 | #ifdef DEBUG | |
8969 | diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c | |
8970 | index a716693417a3..be0568c722d6 100644 | |
8971 | --- a/drivers/ide/ide-taskfile.c | |
8972 | +++ b/drivers/ide/ide-taskfile.c | |
8973 | @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
8974 | ||
8975 | page_is_high = PageHighMem(page); | |
8976 | if (page_is_high) | |
8977 | - local_irq_save(flags); | |
8978 | + local_irq_save_nort(flags); | |
8979 | ||
8980 | buf = kmap_atomic(page) + offset; | |
8981 | ||
8982 | @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, | |
8983 | kunmap_atomic(buf); | |
8984 | ||
8985 | if (page_is_high) | |
8986 | - local_irq_restore(flags); | |
8987 | + local_irq_restore_nort(flags); | |
8988 | ||
8989 | len -= nr_bytes; | |
8990 | } | |
8991 | @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, | |
8992 | } | |
8993 | ||
8994 | if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) | |
8995 | - local_irq_disable(); | |
8996 | + local_irq_disable_nort(); | |
8997 | ||
8998 | ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); | |
8999 | ||
9000 | diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
9001 | index 87799de90a1d..66cdd37f8605 100644 | |
9002 | --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
9003 | +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c | |
9004 | @@ -857,7 +857,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) | |
9005 | ||
9006 | ipoib_dbg_mcast(priv, "restarting multicast task\n"); | |
9007 | ||
9008 | - local_irq_save(flags); | |
9009 | + local_irq_save_nort(flags); | |
9010 | netif_addr_lock(dev); | |
9011 | spin_lock(&priv->lock); | |
9012 | ||
9013 | @@ -939,7 +939,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) | |
9014 | ||
9015 | spin_unlock(&priv->lock); | |
9016 | netif_addr_unlock(dev); | |
9017 | - local_irq_restore(flags); | |
9018 | + local_irq_restore_nort(flags); | |
9019 | ||
9020 | /* | |
9021 | * make sure the in-flight joins have finished before we attempt | |
9022 | diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c | |
9023 | index 4a2a9e370be7..e970d9afd179 100644 | |
9024 | --- a/drivers/input/gameport/gameport.c | |
9025 | +++ b/drivers/input/gameport/gameport.c | |
9026 | @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport) | |
9027 | tx = ~0; | |
9028 | ||
9029 | for (i = 0; i < 50; i++) { | |
9030 | - local_irq_save(flags); | |
9031 | + local_irq_save_nort(flags); | |
9032 | t1 = ktime_get_ns(); | |
9033 | for (t = 0; t < 50; t++) | |
9034 | gameport_read(gameport); | |
9035 | t2 = ktime_get_ns(); | |
9036 | t3 = ktime_get_ns(); | |
9037 | - local_irq_restore(flags); | |
9038 | + local_irq_restore_nort(flags); | |
9039 | udelay(i * 10); | |
9040 | t = (t2 - t1) - (t3 - t2); | |
9041 | if (t < tx) | |
9042 | @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
9043 | tx = 1 << 30; | |
9044 | ||
9045 | for(i = 0; i < 50; i++) { | |
9046 | - local_irq_save(flags); | |
9047 | + local_irq_save_nort(flags); | |
9048 | GET_TIME(t1); | |
9049 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
9050 | GET_TIME(t2); | |
9051 | GET_TIME(t3); | |
9052 | - local_irq_restore(flags); | |
9053 | + local_irq_restore_nort(flags); | |
9054 | udelay(i * 10); | |
9055 | if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; | |
9056 | } | |
9057 | @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport) | |
9058 | tx = 1 << 30; | |
9059 | ||
9060 | for(i = 0; i < 50; i++) { | |
9061 | - local_irq_save(flags); | |
9062 | + local_irq_save_nort(flags); | |
9063 | t1 = rdtsc(); | |
9064 | for (t = 0; t < 50; t++) gameport_read(gameport); | |
9065 | t2 = rdtsc(); | |
9066 | - local_irq_restore(flags); | |
9067 | + local_irq_restore_nort(flags); | |
9068 | udelay(i * 10); | |
9069 | if (t2 - t1 < tx) tx = t2 - t1; | |
9070 | } | |
9071 | diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c | |
9072 | index 0397985a2601..bc0e7d2c5cff 100644 | |
9073 | --- a/drivers/iommu/amd_iommu.c | |
9074 | +++ b/drivers/iommu/amd_iommu.c | |
9075 | @@ -2019,10 +2019,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, | |
9076 | int ret; | |
9077 | ||
9078 | /* | |
9079 | - * Must be called with IRQs disabled. Warn here to detect early | |
9080 | - * when its not. | |
9081 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
9082 | + * detect early when its not. | |
9083 | */ | |
9084 | - WARN_ON(!irqs_disabled()); | |
9085 | + WARN_ON_NONRT(!irqs_disabled()); | |
9086 | ||
9087 | /* lock domain */ | |
9088 | spin_lock(&domain->lock); | |
9089 | @@ -2185,10 +2185,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) | |
9090 | struct protection_domain *domain; | |
9091 | ||
9092 | /* | |
9093 | - * Must be called with IRQs disabled. Warn here to detect early | |
9094 | - * when its not. | |
9095 | + * Must be called with IRQs disabled on a non RT kernel. Warn here to | |
9096 | + * detect early when its not. | |
9097 | */ | |
9098 | - WARN_ON(!irqs_disabled()); | |
9099 | + WARN_ON_NONRT(!irqs_disabled()); | |
9100 | ||
9101 | if (WARN_ON(!dev_data->domain)) | |
9102 | return; | |
9103 | diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig | |
9104 | index 5bda6a9b56bb..d6286584c807 100644 | |
9105 | --- a/drivers/leds/trigger/Kconfig | |
9106 | +++ b/drivers/leds/trigger/Kconfig | |
9107 | @@ -61,7 +61,7 @@ config LEDS_TRIGGER_BACKLIGHT | |
9108 | ||
9109 | config LEDS_TRIGGER_CPU | |
9110 | bool "LED CPU Trigger" | |
9111 | - depends on LEDS_TRIGGERS | |
9112 | + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE | |
9113 | help | |
9114 | This allows LEDs to be controlled by active CPUs. This shows | |
9115 | the active CPUs across an array of LEDs so you can see which | |
9116 | diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig | |
9117 | index 4d200883c505..98b64ed5cb81 100644 | |
9118 | --- a/drivers/md/bcache/Kconfig | |
9119 | +++ b/drivers/md/bcache/Kconfig | |
9120 | @@ -1,6 +1,7 @@ | |
9121 | ||
9122 | config BCACHE | |
9123 | tristate "Block device as cache" | |
9124 | + depends on !PREEMPT_RT_FULL | |
9125 | ---help--- | |
9126 | Allows a block device to be used as cache for other devices; uses | |
9127 | a btree for indexing and the layout is optimized for SSDs. | |
9128 | diff --git a/drivers/md/dm.c b/drivers/md/dm.c | |
cb95d48a | 9129 | index 84aa8b1d0480..b7f070e3698e 100644 |
b4de310e JK |
9130 | --- a/drivers/md/dm.c |
9131 | +++ b/drivers/md/dm.c | |
9132 | @@ -2127,7 +2127,7 @@ static void dm_request_fn(struct request_queue *q) | |
9133 | /* Establish tio->ti before queuing work (map_tio_request) */ | |
9134 | tio->ti = ti; | |
9135 | queue_kthread_work(&md->kworker, &tio->work); | |
9136 | - BUG_ON(!irqs_disabled()); | |
9137 | + BUG_ON_NONRT(!irqs_disabled()); | |
9138 | } | |
9139 | ||
9140 | goto out; | |
9141 | diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c | |
9142 | index 10ce885445f6..76f71791361c 100644 | |
9143 | --- a/drivers/md/raid5.c | |
9144 | +++ b/drivers/md/raid5.c | |
9145 | @@ -1920,8 +1920,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
9146 | struct raid5_percpu *percpu; | |
9147 | unsigned long cpu; | |
9148 | ||
9149 | - cpu = get_cpu(); | |
9150 | + cpu = get_cpu_light(); | |
9151 | percpu = per_cpu_ptr(conf->percpu, cpu); | |
9152 | + spin_lock(&percpu->lock); | |
9153 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { | |
9154 | ops_run_biofill(sh); | |
9155 | overlap_clear++; | |
9156 | @@ -1977,7 +1978,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |
9157 | if (test_and_clear_bit(R5_Overlap, &dev->flags)) | |
9158 | wake_up(&sh->raid_conf->wait_for_overlap); | |
9159 | } | |
9160 | - put_cpu(); | |
9161 | + spin_unlock(&percpu->lock); | |
9162 | + put_cpu_light(); | |
9163 | } | |
9164 | ||
9165 | static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) | |
9166 | @@ -6414,6 +6416,7 @@ static int raid5_alloc_percpu(struct r5conf *conf) | |
9167 | __func__, cpu); | |
9168 | break; | |
9169 | } | |
9170 | + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); | |
9171 | } | |
9172 | put_online_cpus(); | |
9173 | ||
9174 | diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h | |
9175 | index 517d4b68a1be..efe91887ecd7 100644 | |
9176 | --- a/drivers/md/raid5.h | |
9177 | +++ b/drivers/md/raid5.h | |
9178 | @@ -504,6 +504,7 @@ struct r5conf { | |
9179 | int recovery_disabled; | |
9180 | /* per cpu variables */ | |
9181 | struct raid5_percpu { | |
9182 | + spinlock_t lock; /* Protection for -RT */ | |
9183 | struct page *spare_page; /* Used when checking P/Q in raid6 */ | |
9184 | struct flex_array *scribble; /* space for constructing buffer | |
9185 | * lists and performing address | |
9186 | diff --git a/drivers/media/platform/vsp1/vsp1_video.c b/drivers/media/platform/vsp1/vsp1_video.c | |
9187 | index 5ce88e1f5d71..b4f8cd74ecb8 100644 | |
9188 | --- a/drivers/media/platform/vsp1/vsp1_video.c | |
9189 | +++ b/drivers/media/platform/vsp1/vsp1_video.c | |
9190 | @@ -520,7 +520,7 @@ static bool vsp1_pipeline_stopped(struct vsp1_pipeline *pipe) | |
9191 | bool stopped; | |
9192 | ||
9193 | spin_lock_irqsave(&pipe->irqlock, flags); | |
9194 | - stopped = pipe->state == VSP1_PIPELINE_STOPPED, | |
9195 | + stopped = pipe->state == VSP1_PIPELINE_STOPPED; | |
9196 | spin_unlock_irqrestore(&pipe->irqlock, flags); | |
9197 | ||
9198 | return stopped; | |
9199 | diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig | |
9200 | index 4bf7d50b1bc7..6f7e99ad6e29 100644 | |
9201 | --- a/drivers/misc/Kconfig | |
9202 | +++ b/drivers/misc/Kconfig | |
9203 | @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI | |
9204 | config ATMEL_TCLIB | |
9205 | bool "Atmel AT32/AT91 Timer/Counter Library" | |
9206 | depends on (AVR32 || ARCH_AT91) | |
9207 | + default y if PREEMPT_RT_FULL | |
9208 | help | |
9209 | Select this if you want a library to allocate the Timer/Counter | |
9210 | blocks found on many Atmel processors. This facilitates using | |
9211 | @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC | |
9212 | are combined to make a single 32-bit timer. | |
9213 | ||
9214 | When GENERIC_CLOCKEVENTS is defined, the third timer channel | |
9215 | - may be used as a clock event device supporting oneshot mode | |
9216 | - (delays of up to two seconds) based on the 32 KiHz clock. | |
9217 | + may be used as a clock event device supporting oneshot mode. | |
9218 | ||
9219 | config ATMEL_TCB_CLKSRC_BLOCK | |
9220 | int | |
9221 | @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK | |
9222 | TC can be used for other purposes, such as PWM generation and | |
9223 | interval timing. | |
9224 | ||
9225 | +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK | |
9226 | + bool "TC Block use 32 KiHz clock" | |
9227 | + depends on ATMEL_TCB_CLKSRC | |
9228 | + default y if !PREEMPT_RT_FULL | |
9229 | + help | |
9230 | + Select this to use 32 KiHz base clock rate as TC block clock | |
9231 | + source for clock events. | |
9232 | + | |
9233 | + | |
9234 | config DUMMY_IRQ | |
9235 | tristate "Dummy IRQ handler" | |
9236 | default n | |
9237 | @@ -113,6 +122,35 @@ config IBM_ASM | |
9238 | for information on the specific driver level and support statement | |
9239 | for your IBM server. | |
9240 | ||
9241 | +config HWLAT_DETECTOR | |
9242 | + tristate "Testing module to detect hardware-induced latencies" | |
9243 | + depends on DEBUG_FS | |
9244 | + depends on RING_BUFFER | |
9245 | + default m | |
9246 | + ---help--- | |
9247 | + A simple hardware latency detector. Use this module to detect | |
9248 | + large latencies introduced by the behavior of the underlying | |
9249 | + system firmware external to Linux. We do this using periodic | |
9250 | + use of stop_machine to grab all available CPUs and measure | |
9251 | + for unexplainable gaps in the CPU timestamp counter(s). By | |
9252 | + default, the module is not enabled until the "enable" file | |
9253 | + within the "hwlat_detector" debugfs directory is toggled. | |
9254 | + | |
9255 | + This module is often used to detect SMI (System Management | |
9256 | + Interrupts) on x86 systems, though is not x86 specific. To | |
9257 | + this end, we default to using a sample window of 1 second, | |
9258 | + during which we will sample for 0.5 seconds. If an SMI or | |
9259 | + similar event occurs during that time, it is recorded | |
9260 | + into an 8K samples global ring buffer until retreived. | |
9261 | + | |
9262 | + WARNING: This software should never be enabled (it can be built | |
9263 | + but should not be turned on after it is loaded) in a production | |
9264 | + environment where high latencies are a concern since the | |
9265 | + sampling mechanism actually introduces latencies for | |
9266 | + regular tasks while the CPU(s) are being held. | |
9267 | + | |
9268 | + If unsure, say N | |
9269 | + | |
9270 | config PHANTOM | |
9271 | tristate "Sensable PHANToM (PCI)" | |
9272 | depends on PCI | |
9273 | diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile | |
9274 | index 537d7f3b78da..ec4aecba0656 100644 | |
9275 | --- a/drivers/misc/Makefile | |
9276 | +++ b/drivers/misc/Makefile | |
9277 | @@ -39,6 +39,7 @@ obj-$(CONFIG_C2PORT) += c2port/ | |
9278 | obj-$(CONFIG_HMC6352) += hmc6352.o | |
9279 | obj-y += eeprom/ | |
9280 | obj-y += cb710/ | |
9281 | +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o | |
9282 | obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o | |
9283 | obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o | |
9284 | obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o | |
9285 | diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c | |
9286 | new file mode 100644 | |
9287 | index 000000000000..52f5ad5fd9c0 | |
9288 | --- /dev/null | |
9289 | +++ b/drivers/misc/hwlat_detector.c | |
9290 | @@ -0,0 +1,1240 @@ | |
9291 | +/* | |
9292 | + * hwlat_detector.c - A simple Hardware Latency detector. | |
9293 | + * | |
9294 | + * Use this module to detect large system latencies induced by the behavior of | |
9295 | + * certain underlying system hardware or firmware, independent of Linux itself. | |
9296 | + * The code was developed originally to detect the presence of SMIs on Intel | |
9297 | + * and AMD systems, although there is no dependency upon x86 herein. | |
9298 | + * | |
9299 | + * The classical example usage of this module is in detecting the presence of | |
9300 | + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a | |
9301 | + * somewhat special form of hardware interrupt spawned from earlier CPU debug | |
9302 | + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge | |
9303 | + * LPC (or other device) to generate a special interrupt under certain | |
9304 | + * circumstances, for example, upon expiration of a special SMI timer device, | |
9305 | + * due to certain external thermal readings, on certain I/O address accesses, | |
9306 | + * and other situations. An SMI hits a special CPU pin, triggers a special | |
9307 | + * SMI mode (complete with special memory map), and the OS is unaware. | |
9308 | + * | |
9309 | + * Although certain hardware-inducing latencies are necessary (for example, | |
9310 | + * a modern system often requires an SMI handler for correct thermal control | |
9311 | + * and remote management) they can wreak havoc upon any OS-level performance | |
9312 | + * guarantees toward low-latency, especially when the OS is not even made | |
9313 | + * aware of the presence of these interrupts. For this reason, we need a | |
9314 | + * somewhat brute force mechanism to detect these interrupts. In this case, | |
9315 | + * we do it by hogging all of the CPU(s) for configurable timer intervals, | |
9316 | + * sampling the built-in CPU timer, looking for discontiguous readings. | |
9317 | + * | |
9318 | + * WARNING: This implementation necessarily introduces latencies. Therefore, | |
9319 | + * you should NEVER use this module in a production environment | |
9320 | + * requiring any kind of low-latency performance guarantee(s). | |
9321 | + * | |
9322 | + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> | |
9323 | + * | |
9324 | + * Includes useful feedback from Clark Williams <clark@redhat.com> | |
9325 | + * | |
9326 | + * This file is licensed under the terms of the GNU General Public | |
9327 | + * License version 2. This program is licensed "as is" without any | |
9328 | + * warranty of any kind, whether express or implied. | |
9329 | + */ | |
9330 | + | |
9331 | +#include <linux/module.h> | |
9332 | +#include <linux/init.h> | |
9333 | +#include <linux/ring_buffer.h> | |
9334 | +#include <linux/time.h> | |
9335 | +#include <linux/hrtimer.h> | |
9336 | +#include <linux/kthread.h> | |
9337 | +#include <linux/debugfs.h> | |
9338 | +#include <linux/seq_file.h> | |
9339 | +#include <linux/uaccess.h> | |
9340 | +#include <linux/version.h> | |
9341 | +#include <linux/delay.h> | |
9342 | +#include <linux/slab.h> | |
9343 | +#include <linux/trace_clock.h> | |
9344 | + | |
9345 | +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ | |
9346 | +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ | |
9347 | +#define U64STR_SIZE 22 /* 20 digits max */ | |
9348 | + | |
9349 | +#define VERSION "1.0.0" | |
9350 | +#define BANNER "hwlat_detector: " | |
9351 | +#define DRVNAME "hwlat_detector" | |
9352 | +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ | |
9353 | +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ | |
9354 | +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ | |
9355 | + | |
9356 | +/* Module metadata */ | |
9357 | + | |
9358 | +MODULE_LICENSE("GPL"); | |
9359 | +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); | |
9360 | +MODULE_DESCRIPTION("A simple hardware latency detector"); | |
9361 | +MODULE_VERSION(VERSION); | |
9362 | + | |
9363 | +/* Module parameters */ | |
9364 | + | |
9365 | +static int debug; | |
9366 | +static int enabled; | |
9367 | +static int threshold; | |
9368 | + | |
9369 | +module_param(debug, int, 0); /* enable debug */ | |
9370 | +module_param(enabled, int, 0); /* enable detector */ | |
9371 | +module_param(threshold, int, 0); /* latency threshold */ | |
9372 | + | |
9373 | +/* Buffering and sampling */ | |
9374 | + | |
9375 | +static struct ring_buffer *ring_buffer; /* sample buffer */ | |
9376 | +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ | |
9377 | +static unsigned long buf_size = BUF_SIZE_DEFAULT; | |
9378 | +static struct task_struct *kthread; /* sampling thread */ | |
9379 | + | |
9380 | +/* DebugFS filesystem entries */ | |
9381 | + | |
9382 | +static struct dentry *debug_dir; /* debugfs directory */ | |
9383 | +static struct dentry *debug_max; /* maximum TSC delta */ | |
9384 | +static struct dentry *debug_count; /* total detect count */ | |
9385 | +static struct dentry *debug_sample_width; /* sample width us */ | |
9386 | +static struct dentry *debug_sample_window; /* sample window us */ | |
9387 | +static struct dentry *debug_sample; /* raw samples us */ | |
9388 | +static struct dentry *debug_threshold; /* threshold us */ | |
9389 | +static struct dentry *debug_enable; /* enable/disable */ | |
9390 | + | |
9391 | +/* Individual samples and global state */ | |
9392 | + | |
9393 | +struct sample; /* latency sample */ | |
9394 | +struct data; /* Global state */ | |
9395 | + | |
9396 | +/* Sampling functions */ | |
9397 | +static int __buffer_add_sample(struct sample *sample); | |
9398 | +static struct sample *buffer_get_sample(struct sample *sample); | |
9399 | + | |
9400 | +/* Threading and state */ | |
9401 | +static int kthread_fn(void *unused); | |
9402 | +static int start_kthread(void); | |
9403 | +static int stop_kthread(void); | |
9404 | +static void __reset_stats(void); | |
9405 | +static int init_stats(void); | |
9406 | + | |
9407 | +/* Debugfs interface */ | |
9408 | +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, | |
9409 | + size_t cnt, loff_t *ppos, const u64 *entry); | |
9410 | +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, | |
9411 | + size_t cnt, loff_t *ppos, u64 *entry); | |
9412 | +static int debug_sample_fopen(struct inode *inode, struct file *filp); | |
9413 | +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, | |
9414 | + size_t cnt, loff_t *ppos); | |
9415 | +static int debug_sample_release(struct inode *inode, struct file *filp); | |
9416 | +static int debug_enable_fopen(struct inode *inode, struct file *filp); | |
9417 | +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, | |
9418 | + size_t cnt, loff_t *ppos); | |
9419 | +static ssize_t debug_enable_fwrite(struct file *file, | |
9420 | + const char __user *user_buffer, | |
9421 | + size_t user_size, loff_t *offset); | |
9422 | + | |
9423 | +/* Initialization functions */ | |
9424 | +static int init_debugfs(void); | |
9425 | +static void free_debugfs(void); | |
9426 | +static int detector_init(void); | |
9427 | +static void detector_exit(void); | |
9428 | + | |
9429 | +/* Individual latency samples are stored here when detected and packed into | |
9430 | + * the ring_buffer circular buffer, where they are overwritten when | |
9431 | + * more than buf_size/sizeof(sample) samples are received. */ | |
9432 | +struct sample { | |
9433 | + u64 seqnum; /* unique sequence */ | |
9434 | + u64 duration; /* ktime delta */ | |
9435 | + u64 outer_duration; /* ktime delta (outer loop) */ | |
9436 | + struct timespec timestamp; /* wall time */ | |
9437 | + unsigned long lost; | |
9438 | +}; | |
9439 | + | |
9440 | +/* keep the global state somewhere. */ | |
9441 | +static struct data { | |
9442 | + | |
9443 | + struct mutex lock; /* protect changes */ | |
9444 | + | |
9445 | + u64 count; /* total since reset */ | |
9446 | + u64 max_sample; /* max hardware latency */ | |
9447 | + u64 threshold; /* sample threshold level */ | |
9448 | + | |
9449 | + u64 sample_window; /* total sampling window (on+off) */ | |
9450 | + u64 sample_width; /* active sampling portion of window */ | |
9451 | + | |
9452 | + atomic_t sample_open; /* whether the sample file is open */ | |
9453 | + | |
9454 | + wait_queue_head_t wq; /* waitqeue for new sample values */ | |
9455 | + | |
9456 | +} data; | |
9457 | + | |
9458 | +/** | |
9459 | + * __buffer_add_sample - add a new latency sample recording to the ring buffer | |
9460 | + * @sample: The new latency sample value | |
9461 | + * | |
9462 | + * This receives a new latency sample and records it in a global ring buffer. | |
9463 | + * No additional locking is used in this case. | |
9464 | + */ | |
9465 | +static int __buffer_add_sample(struct sample *sample) | |
9466 | +{ | |
9467 | + return ring_buffer_write(ring_buffer, | |
9468 | + sizeof(struct sample), sample); | |
9469 | +} | |
9470 | + | |
9471 | +/** | |
9472 | + * buffer_get_sample - remove a hardware latency sample from the ring buffer | |
9473 | + * @sample: Pre-allocated storage for the sample | |
9474 | + * | |
9475 | + * This retrieves a hardware latency sample from the global circular buffer | |
9476 | + */ | |
9477 | +static struct sample *buffer_get_sample(struct sample *sample) | |
9478 | +{ | |
9479 | + struct ring_buffer_event *e = NULL; | |
9480 | + struct sample *s = NULL; | |
9481 | + unsigned int cpu = 0; | |
9482 | + | |
9483 | + if (!sample) | |
9484 | + return NULL; | |
9485 | + | |
9486 | + mutex_lock(&ring_buffer_mutex); | |
9487 | + for_each_online_cpu(cpu) { | |
9488 | + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); | |
9489 | + if (e) | |
9490 | + break; | |
9491 | + } | |
9492 | + | |
9493 | + if (e) { | |
9494 | + s = ring_buffer_event_data(e); | |
9495 | + memcpy(sample, s, sizeof(struct sample)); | |
9496 | + } else | |
9497 | + sample = NULL; | |
9498 | + mutex_unlock(&ring_buffer_mutex); | |
9499 | + | |
9500 | + return sample; | |
9501 | +} | |
9502 | + | |
9503 | +#ifndef CONFIG_TRACING | |
9504 | +#define time_type ktime_t | |
9505 | +#define time_get() ktime_get() | |
9506 | +#define time_to_us(x) ktime_to_us(x) | |
9507 | +#define time_sub(a, b) ktime_sub(a, b) | |
9508 | +#define init_time(a, b) (a).tv64 = b | |
9509 | +#define time_u64(a) ((a).tv64) | |
9510 | +#else | |
9511 | +#define time_type u64 | |
9512 | +#define time_get() trace_clock_local() | |
9513 | +#define time_to_us(x) div_u64(x, 1000) | |
9514 | +#define time_sub(a, b) ((a) - (b)) | |
9515 | +#define init_time(a, b) (a = b) | |
9516 | +#define time_u64(a) a | |
9517 | +#endif | |
9518 | +/** | |
9519 | + * get_sample - sample the CPU TSC and look for likely hardware latencies | |
9520 | + * | |
9521 | + * Used to repeatedly capture the CPU TSC (or similar), looking for potential | |
9522 | + * hardware-induced latency. Called with interrupts disabled and with | |
9523 | + * data.lock held. | |
9524 | + */ | |
9525 | +static int get_sample(void) | |
9526 | +{ | |
9527 | + time_type start, t1, t2, last_t2; | |
9528 | + s64 diff, total = 0; | |
9529 | + u64 sample = 0; | |
9530 | + u64 outer_sample = 0; | |
9531 | + int ret = -1; | |
9532 | + | |
9533 | + init_time(last_t2, 0); | |
9534 | + start = time_get(); /* start timestamp */ | |
9535 | + | |
9536 | + do { | |
9537 | + | |
9538 | + t1 = time_get(); /* we'll look for a discontinuity */ | |
9539 | + t2 = time_get(); | |
9540 | + | |
9541 | + if (time_u64(last_t2)) { | |
9542 | + /* Check the delta from outer loop (t2 to next t1) */ | |
9543 | + diff = time_to_us(time_sub(t1, last_t2)); | |
9544 | + /* This shouldn't happen */ | |
9545 | + if (diff < 0) { | |
9546 | + pr_err(BANNER "time running backwards\n"); | |
9547 | + goto out; | |
9548 | + } | |
9549 | + if (diff > outer_sample) | |
9550 | + outer_sample = diff; | |
9551 | + } | |
9552 | + last_t2 = t2; | |
9553 | + | |
9554 | + total = time_to_us(time_sub(t2, start)); /* sample width */ | |
9555 | + | |
9556 | + /* This checks the inner loop (t1 to t2) */ | |
9557 | + diff = time_to_us(time_sub(t2, t1)); /* current diff */ | |
9558 | + | |
9559 | + /* This shouldn't happen */ | |
9560 | + if (diff < 0) { | |
9561 | + pr_err(BANNER "time running backwards\n"); | |
9562 | + goto out; | |
9563 | + } | |
9564 | + | |
9565 | + if (diff > sample) | |
9566 | + sample = diff; /* only want highest value */ | |
9567 | + | |
9568 | + } while (total <= data.sample_width); | |
9569 | + | |
9570 | + ret = 0; | |
9571 | + | |
9572 | + /* If we exceed the threshold value, we have found a hardware latency */ | |
9573 | + if (sample > data.threshold || outer_sample > data.threshold) { | |
9574 | + struct sample s; | |
9575 | + | |
9576 | + ret = 1; | |
9577 | + | |
9578 | + data.count++; | |
9579 | + s.seqnum = data.count; | |
9580 | + s.duration = sample; | |
9581 | + s.outer_duration = outer_sample; | |
9582 | + s.timestamp = CURRENT_TIME; | |
9583 | + __buffer_add_sample(&s); | |
9584 | + | |
9585 | + /* Keep a running maximum ever recorded hardware latency */ | |
9586 | + if (sample > data.max_sample) | |
9587 | + data.max_sample = sample; | |
9588 | + } | |
9589 | + | |
9590 | +out: | |
9591 | + return ret; | |
9592 | +} | |
9593 | + | |
9594 | +/* | |
9595 | + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread | |
9596 | + * @unused: A required part of the kthread API. | |
9597 | + * | |
9598 | + * Used to periodically sample the CPU TSC via a call to get_sample. We | |
9599 | + * disable interrupts, which does (intentionally) introduce latency since we | |
9600 | + * need to ensure nothing else might be running (and thus pre-empting). | |
9601 | + * Obviously this should never be used in production environments. | |
9602 | + * | |
9603 | + * Currently this runs on which ever CPU it was scheduled on, but most | |
9604 | + * real-worald hardware latency situations occur across several CPUs, | |
9605 | + * but we might later generalize this if we find there are any actualy | |
9606 | + * systems with alternate SMI delivery or other hardware latencies. | |
9607 | + */ | |
9608 | +static int kthread_fn(void *unused) | |
9609 | +{ | |
9610 | + int ret; | |
9611 | + u64 interval; | |
9612 | + | |
9613 | + while (!kthread_should_stop()) { | |
9614 | + | |
9615 | + mutex_lock(&data.lock); | |
9616 | + | |
9617 | + local_irq_disable(); | |
9618 | + ret = get_sample(); | |
9619 | + local_irq_enable(); | |
9620 | + | |
9621 | + if (ret > 0) | |
9622 | + wake_up(&data.wq); /* wake up reader(s) */ | |
9623 | + | |
9624 | + interval = data.sample_window - data.sample_width; | |
9625 | + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ | |
9626 | + | |
9627 | + mutex_unlock(&data.lock); | |
9628 | + | |
9629 | + if (msleep_interruptible(interval)) | |
9630 | + break; | |
9631 | + } | |
9632 | + | |
9633 | + return 0; | |
9634 | +} | |
9635 | + | |
9636 | +/** | |
9637 | + * start_kthread - Kick off the hardware latency sampling/detector kthread | |
9638 | + * | |
9639 | + * This starts a kernel thread that will sit and sample the CPU timestamp | |
9640 | + * counter (TSC or similar) and look for potential hardware latencies. | |
9641 | + */ | |
9642 | +static int start_kthread(void) | |
9643 | +{ | |
9644 | + kthread = kthread_run(kthread_fn, NULL, | |
9645 | + DRVNAME); | |
9646 | + if (IS_ERR(kthread)) { | |
9647 | + pr_err(BANNER "could not start sampling thread\n"); | |
9648 | + enabled = 0; | |
9649 | + return -ENOMEM; | |
9650 | + } | |
9651 | + | |
9652 | + return 0; | |
9653 | +} | |
9654 | + | |
9655 | +/** | |
9656 | + * stop_kthread - Inform the hardware latency samping/detector kthread to stop | |
9657 | + * | |
9658 | + * This kicks the running hardware latency sampling/detector kernel thread and | |
9659 | + * tells it to stop sampling now. Use this on unload and at system shutdown. | |
9660 | + */ | |
9661 | +static int stop_kthread(void) | |
9662 | +{ | |
9663 | + int ret; | |
9664 | + | |
9665 | + ret = kthread_stop(kthread); | |
9666 | + | |
9667 | + return ret; | |
9668 | +} | |
9669 | + | |
9670 | +/** | |
9671 | + * __reset_stats - Reset statistics for the hardware latency detector | |
9672 | + * | |
9673 | + * We use data to store various statistics and global state. We call this | |
9674 | + * function in order to reset those when "enable" is toggled on or off, and | |
9675 | + * also at initialization. Should be called with data.lock held. | |
9676 | + */ | |
9677 | +static void __reset_stats(void) | |
9678 | +{ | |
9679 | + data.count = 0; | |
9680 | + data.max_sample = 0; | |
9681 | + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ | |
9682 | +} | |
9683 | + | |
9684 | +/** | |
9685 | + * init_stats - Setup global state statistics for the hardware latency detector | |
9686 | + * | |
9687 | + * We use data to store various statistics and global state. We also use | |
9688 | + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware | |
9689 | + * induced system latencies. This function initializes these structures and | |
9690 | + * allocates the global ring buffer also. | |
9691 | + */ | |
9692 | +static int init_stats(void) | |
9693 | +{ | |
9694 | + int ret = -ENOMEM; | |
9695 | + | |
9696 | + mutex_init(&data.lock); | |
9697 | + init_waitqueue_head(&data.wq); | |
9698 | + atomic_set(&data.sample_open, 0); | |
9699 | + | |
9700 | + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); | |
9701 | + | |
9702 | + if (WARN(!ring_buffer, KERN_ERR BANNER | |
9703 | + "failed to allocate ring buffer!\n")) | |
9704 | + goto out; | |
9705 | + | |
9706 | + __reset_stats(); | |
9707 | + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */ | |
9708 | + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ | |
9709 | + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ | |
9710 | + | |
9711 | + ret = 0; | |
9712 | + | |
9713 | +out: | |
9714 | + return ret; | |
9715 | + | |
9716 | +} | |
9717 | + | |
9718 | +/* | |
9719 | + * simple_data_read - Wrapper read function for global state debugfs entries | |
9720 | + * @filp: The active open file structure for the debugfs "file" | |
9721 | + * @ubuf: The userspace provided buffer to read value into | |
9722 | + * @cnt: The maximum number of bytes to read | |
9723 | + * @ppos: The current "file" position | |
9724 | + * @entry: The entry to read from | |
9725 | + * | |
9726 | + * This function provides a generic read implementation for the global state | |
9727 | + * "data" structure debugfs filesystem entries. It would be nice to use | |
9728 | + * simple_attr_read directly, but we need to make sure that the data.lock | |
9729 | + * is held during the actual read. | |
9730 | + */ | |
9731 | +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, | |
9732 | + size_t cnt, loff_t *ppos, const u64 *entry) | |
9733 | +{ | |
9734 | + char buf[U64STR_SIZE]; | |
9735 | + u64 val = 0; | |
9736 | + int len = 0; | |
9737 | + | |
9738 | + memset(buf, 0, sizeof(buf)); | |
9739 | + | |
9740 | + if (!entry) | |
9741 | + return -EFAULT; | |
9742 | + | |
9743 | + mutex_lock(&data.lock); | |
9744 | + val = *entry; | |
9745 | + mutex_unlock(&data.lock); | |
9746 | + | |
9747 | + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); | |
9748 | + | |
9749 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); | |
9750 | + | |
9751 | +} | |
9752 | + | |
9753 | +/* | |
9754 | + * simple_data_write - Wrapper write function for global state debugfs entries | |
9755 | + * @filp: The active open file structure for the debugfs "file" | |
9756 | + * @ubuf: The userspace provided buffer to write value from | |
9757 | + * @cnt: The maximum number of bytes to write | |
9758 | + * @ppos: The current "file" position | |
9759 | + * @entry: The entry to write to | |
9760 | + * | |
9761 | + * This function provides a generic write implementation for the global state | |
9762 | + * "data" structure debugfs filesystem entries. It would be nice to use | |
9763 | + * simple_attr_write directly, but we need to make sure that the data.lock | |
9764 | + * is held during the actual write. | |
9765 | + */ | |
9766 | +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, | |
9767 | + size_t cnt, loff_t *ppos, u64 *entry) | |
9768 | +{ | |
9769 | + char buf[U64STR_SIZE]; | |
9770 | + int csize = min(cnt, sizeof(buf)); | |
9771 | + u64 val = 0; | |
9772 | + int err = 0; | |
9773 | + | |
9774 | + memset(buf, '\0', sizeof(buf)); | |
9775 | + if (copy_from_user(buf, ubuf, csize)) | |
9776 | + return -EFAULT; | |
9777 | + | |
9778 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ | |
9779 | + err = kstrtoull(buf, 10, &val); | |
9780 | + if (err) | |
9781 | + return -EINVAL; | |
9782 | + | |
9783 | + mutex_lock(&data.lock); | |
9784 | + *entry = val; | |
9785 | + mutex_unlock(&data.lock); | |
9786 | + | |
9787 | + return csize; | |
9788 | +} | |
9789 | + | |
9790 | +/** | |
9791 | + * debug_count_fopen - Open function for "count" debugfs entry | |
9792 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
9793 | + * @filp: The active open file structure for the debugfs "file" | |
9794 | + * | |
9795 | + * This function provides an open implementation for the "count" debugfs | |
9796 | + * interface to the hardware latency detector. | |
9797 | + */ | |
9798 | +static int debug_count_fopen(struct inode *inode, struct file *filp) | |
9799 | +{ | |
9800 | + return 0; | |
9801 | +} | |
9802 | + | |
9803 | +/** | |
9804 | + * debug_count_fread - Read function for "count" debugfs entry | |
9805 | + * @filp: The active open file structure for the debugfs "file" | |
9806 | + * @ubuf: The userspace provided buffer to read value into | |
9807 | + * @cnt: The maximum number of bytes to read | |
9808 | + * @ppos: The current "file" position | |
9809 | + * | |
9810 | + * This function provides a read implementation for the "count" debugfs | |
9811 | + * interface to the hardware latency detector. Can be used to read the | |
9812 | + * number of latency readings exceeding the configured threshold since | |
9813 | + * the detector was last reset (e.g. by writing a zero into "count"). | |
9814 | + */ | |
9815 | +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, | |
9816 | + size_t cnt, loff_t *ppos) | |
9817 | +{ | |
9818 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); | |
9819 | +} | |
9820 | + | |
9821 | +/** | |
9822 | + * debug_count_fwrite - Write function for "count" debugfs entry | |
9823 | + * @filp: The active open file structure for the debugfs "file" | |
9824 | + * @ubuf: The user buffer that contains the value to write | |
9825 | + * @cnt: The maximum number of bytes to write to "file" | |
9826 | + * @ppos: The current position in the debugfs "file" | |
9827 | + * | |
9828 | + * This function provides a write implementation for the "count" debugfs | |
9829 | + * interface to the hardware latency detector. Can be used to write a | |
9830 | + * desired value, especially to zero the total count. | |
9831 | + */ | |
9832 | +static ssize_t debug_count_fwrite(struct file *filp, | |
9833 | + const char __user *ubuf, | |
9834 | + size_t cnt, | |
9835 | + loff_t *ppos) | |
9836 | +{ | |
9837 | + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); | |
9838 | +} | |
9839 | + | |
9840 | +/** | |
9841 | + * debug_enable_fopen - Dummy open function for "enable" debugfs interface | |
9842 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
9843 | + * @filp: The active open file structure for the debugfs "file" | |
9844 | + * | |
9845 | + * This function provides an open implementation for the "enable" debugfs | |
9846 | + * interface to the hardware latency detector. | |
9847 | + */ | |
9848 | +static int debug_enable_fopen(struct inode *inode, struct file *filp) | |
9849 | +{ | |
9850 | + return 0; | |
9851 | +} | |
9852 | + | |
9853 | +/** | |
9854 | + * debug_enable_fread - Read function for "enable" debugfs interface | |
9855 | + * @filp: The active open file structure for the debugfs "file" | |
9856 | + * @ubuf: The userspace provided buffer to read value into | |
9857 | + * @cnt: The maximum number of bytes to read | |
9858 | + * @ppos: The current "file" position | |
9859 | + * | |
9860 | + * This function provides a read implementation for the "enable" debugfs | |
9861 | + * interface to the hardware latency detector. Can be used to determine | |
9862 | + * whether the detector is currently enabled ("0\n" or "1\n" returned). | |
9863 | + */ | |
9864 | +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, | |
9865 | + size_t cnt, loff_t *ppos) | |
9866 | +{ | |
9867 | + char buf[4]; | |
9868 | + | |
9869 | + if ((cnt < sizeof(buf)) || (*ppos)) | |
9870 | + return 0; | |
9871 | + | |
9872 | + buf[0] = enabled ? '1' : '0'; | |
9873 | + buf[1] = '\n'; | |
9874 | + buf[2] = '\0'; | |
9875 | + if (copy_to_user(ubuf, buf, strlen(buf))) | |
9876 | + return -EFAULT; | |
9877 | + return *ppos = strlen(buf); | |
9878 | +} | |
9879 | + | |
9880 | +/** | |
9881 | + * debug_enable_fwrite - Write function for "enable" debugfs interface | |
9882 | + * @filp: The active open file structure for the debugfs "file" | |
9883 | + * @ubuf: The user buffer that contains the value to write | |
9884 | + * @cnt: The maximum number of bytes to write to "file" | |
9885 | + * @ppos: The current position in the debugfs "file" | |
9886 | + * | |
9887 | + * This function provides a write implementation for the "enable" debugfs | |
9888 | + * interface to the hardware latency detector. Can be used to enable or | |
9889 | + * disable the detector, which will have the side-effect of possibly | |
9890 | + * also resetting the global stats and kicking off the measuring | |
9891 | + * kthread (on an enable) or the converse (upon a disable). | |
9892 | + */ | |
9893 | +static ssize_t debug_enable_fwrite(struct file *filp, | |
9894 | + const char __user *ubuf, | |
9895 | + size_t cnt, | |
9896 | + loff_t *ppos) | |
9897 | +{ | |
9898 | + char buf[4]; | |
9899 | + int csize = min(cnt, sizeof(buf)); | |
9900 | + long val = 0; | |
9901 | + int err = 0; | |
9902 | + | |
9903 | + memset(buf, '\0', sizeof(buf)); | |
9904 | + if (copy_from_user(buf, ubuf, csize)) | |
9905 | + return -EFAULT; | |
9906 | + | |
9907 | + buf[sizeof(buf)-1] = '\0'; /* just in case */ | |
9908 | + err = kstrtoul(buf, 10, &val); | |
9909 | + if (err) | |
9910 | + return -EINVAL; | |
9911 | + | |
9912 | + if (val) { | |
9913 | + if (enabled) | |
9914 | + goto unlock; | |
9915 | + enabled = 1; | |
9916 | + __reset_stats(); | |
9917 | + if (start_kthread()) | |
9918 | + return -EFAULT; | |
9919 | + } else { | |
9920 | + if (!enabled) | |
9921 | + goto unlock; | |
9922 | + enabled = 0; | |
9923 | + err = stop_kthread(); | |
9924 | + if (err) { | |
9925 | + pr_err(BANNER "cannot stop kthread\n"); | |
9926 | + return -EFAULT; | |
9927 | + } | |
9928 | + wake_up(&data.wq); /* reader(s) should return */ | |
9929 | + } | |
9930 | +unlock: | |
9931 | + return csize; | |
9932 | +} | |
9933 | + | |
9934 | +/** | |
9935 | + * debug_max_fopen - Open function for "max" debugfs entry | |
9936 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
9937 | + * @filp: The active open file structure for the debugfs "file" | |
9938 | + * | |
9939 | + * This function provides an open implementation for the "max" debugfs | |
9940 | + * interface to the hardware latency detector. | |
9941 | + */ | |
9942 | +static int debug_max_fopen(struct inode *inode, struct file *filp) | |
9943 | +{ | |
9944 | + return 0; | |
9945 | +} | |
9946 | + | |
9947 | +/** | |
9948 | + * debug_max_fread - Read function for "max" debugfs entry | |
9949 | + * @filp: The active open file structure for the debugfs "file" | |
9950 | + * @ubuf: The userspace provided buffer to read value into | |
9951 | + * @cnt: The maximum number of bytes to read | |
9952 | + * @ppos: The current "file" position | |
9953 | + * | |
9954 | + * This function provides a read implementation for the "max" debugfs | |
9955 | + * interface to the hardware latency detector. Can be used to determine | |
9956 | + * the maximum latency value observed since it was last reset. | |
9957 | + */ | |
9958 | +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, | |
9959 | + size_t cnt, loff_t *ppos) | |
9960 | +{ | |
9961 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); | |
9962 | +} | |
9963 | + | |
9964 | +/** | |
9965 | + * debug_max_fwrite - Write function for "max" debugfs entry | |
9966 | + * @filp: The active open file structure for the debugfs "file" | |
9967 | + * @ubuf: The user buffer that contains the value to write | |
9968 | + * @cnt: The maximum number of bytes to write to "file" | |
9969 | + * @ppos: The current position in the debugfs "file" | |
9970 | + * | |
9971 | + * This function provides a write implementation for the "max" debugfs | |
9972 | + * interface to the hardware latency detector. Can be used to reset the | |
9973 | + * maximum or set it to some other desired value - if, then, subsequent | |
9974 | + * measurements exceed this value, the maximum will be updated. | |
9975 | + */ | |
9976 | +static ssize_t debug_max_fwrite(struct file *filp, | |
9977 | + const char __user *ubuf, | |
9978 | + size_t cnt, | |
9979 | + loff_t *ppos) | |
9980 | +{ | |
9981 | + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); | |
9982 | +} | |
9983 | + | |
9984 | + | |
9985 | +/** | |
9986 | + * debug_sample_fopen - An open function for "sample" debugfs interface | |
9987 | + * @inode: The in-kernel inode representation of this debugfs "file" | |
9988 | + * @filp: The active open file structure for the debugfs "file" | |
9989 | + * | |
9990 | + * This function handles opening the "sample" file within the hardware | |
9991 | + * latency detector debugfs directory interface. This file is used to read | |
9992 | + * raw samples from the global ring_buffer and allows the user to see a | |
9993 | + * running latency history. Can be opened blocking or non-blocking, | |
9994 | + * affecting whether it behaves as a buffer read pipe, or does not. | |
9995 | + * Implements simple locking to prevent multiple simultaneous use. | |
9996 | + */ | |
9997 | +static int debug_sample_fopen(struct inode *inode, struct file *filp) | |
9998 | +{ | |
9999 | + if (!atomic_add_unless(&data.sample_open, 1, 1)) | |
10000 | + return -EBUSY; | |
10001 | + else | |
10002 | + return 0; | |
10003 | +} | |
10004 | + | |
10005 | +/** | |
10006 | + * debug_sample_fread - A read function for "sample" debugfs interface | |
10007 | + * @filp: The active open file structure for the debugfs "file" | |
10008 | + * @ubuf: The user buffer that will contain the samples read | |
10009 | + * @cnt: The maximum bytes to read from the debugfs "file" | |
10010 | + * @ppos: The current position in the debugfs "file" | |
10011 | + * | |
10012 | + * This function handles reading from the "sample" file within the hardware | |
10013 | + * latency detector debugfs directory interface. This file is used to read | |
10014 | + * raw samples from the global ring_buffer and allows the user to see a | |
10015 | + * running latency history. By default this will block pending a new | |
10016 | + * value written into the sample buffer, unless there are already a | |
10017 | + * number of value(s) waiting in the buffer, or the sample file was | |
10018 | + * previously opened in a non-blocking mode of operation. | |
10019 | + */ | |
10020 | +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, | |
10021 | + size_t cnt, loff_t *ppos) | |
10022 | +{ | |
10023 | + int len = 0; | |
10024 | + char buf[64]; | |
10025 | + struct sample *sample = NULL; | |
10026 | + | |
10027 | + if (!enabled) | |
10028 | + return 0; | |
10029 | + | |
10030 | + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); | |
10031 | + if (!sample) | |
10032 | + return -ENOMEM; | |
10033 | + | |
10034 | + while (!buffer_get_sample(sample)) { | |
10035 | + | |
10036 | + DEFINE_WAIT(wait); | |
10037 | + | |
10038 | + if (filp->f_flags & O_NONBLOCK) { | |
10039 | + len = -EAGAIN; | |
10040 | + goto out; | |
10041 | + } | |
10042 | + | |
10043 | + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); | |
10044 | + schedule(); | |
10045 | + finish_wait(&data.wq, &wait); | |
10046 | + | |
10047 | + if (signal_pending(current)) { | |
10048 | + len = -EINTR; | |
10049 | + goto out; | |
10050 | + } | |
10051 | + | |
10052 | + if (!enabled) { /* enable was toggled */ | |
10053 | + len = 0; | |
10054 | + goto out; | |
10055 | + } | |
10056 | + } | |
10057 | + | |
10058 | + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n", | |
10059 | + sample->timestamp.tv_sec, | |
10060 | + sample->timestamp.tv_nsec, | |
10061 | + sample->duration, | |
10062 | + sample->outer_duration); | |
10063 | + | |
10064 | + | |
10065 | + /* handling partial reads is more trouble than it's worth */ | |
10066 | + if (len > cnt) | |
10067 | + goto out; | |
10068 | + | |
10069 | + if (copy_to_user(ubuf, buf, len)) | |
10070 | + len = -EFAULT; | |
10071 | + | |
10072 | +out: | |
10073 | + kfree(sample); | |
10074 | + return len; | |
10075 | +} | |
10076 | + | |
10077 | +/** | |
10078 | + * debug_sample_release - Release function for "sample" debugfs interface | |
10079 | + * @inode: The in-kernel inode represenation of the debugfs "file" | |
10080 | + * @filp: The active open file structure for the debugfs "file" | |
10081 | + * | |
10082 | + * This function completes the close of the debugfs interface "sample" file. | |
10083 | + * Frees the sample_open "lock" so that other users may open the interface. | |
10084 | + */ | |
10085 | +static int debug_sample_release(struct inode *inode, struct file *filp) | |
10086 | +{ | |
10087 | + atomic_dec(&data.sample_open); | |
10088 | + | |
10089 | + return 0; | |
10090 | +} | |
10091 | + | |
10092 | +/** | |
10093 | + * debug_threshold_fopen - Open function for "threshold" debugfs entry | |
10094 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
10095 | + * @filp: The active open file structure for the debugfs "file" | |
10096 | + * | |
10097 | + * This function provides an open implementation for the "threshold" debugfs | |
10098 | + * interface to the hardware latency detector. | |
10099 | + */ | |
10100 | +static int debug_threshold_fopen(struct inode *inode, struct file *filp) | |
10101 | +{ | |
10102 | + return 0; | |
10103 | +} | |
10104 | + | |
10105 | +/** | |
10106 | + * debug_threshold_fread - Read function for "threshold" debugfs entry | |
10107 | + * @filp: The active open file structure for the debugfs "file" | |
10108 | + * @ubuf: The userspace provided buffer to read value into | |
10109 | + * @cnt: The maximum number of bytes to read | |
10110 | + * @ppos: The current "file" position | |
10111 | + * | |
10112 | + * This function provides a read implementation for the "threshold" debugfs | |
10113 | + * interface to the hardware latency detector. It can be used to determine | |
10114 | + * the current threshold level at which a latency will be recorded in the | |
10115 | + * global ring buffer, typically on the order of 10us. | |
10116 | + */ | |
10117 | +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, | |
10118 | + size_t cnt, loff_t *ppos) | |
10119 | +{ | |
10120 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); | |
10121 | +} | |
10122 | + | |
10123 | +/** | |
10124 | + * debug_threshold_fwrite - Write function for "threshold" debugfs entry | |
10125 | + * @filp: The active open file structure for the debugfs "file" | |
10126 | + * @ubuf: The user buffer that contains the value to write | |
10127 | + * @cnt: The maximum number of bytes to write to "file" | |
10128 | + * @ppos: The current position in the debugfs "file" | |
10129 | + * | |
10130 | + * This function provides a write implementation for the "threshold" debugfs | |
10131 | + * interface to the hardware latency detector. It can be used to configure | |
10132 | + * the threshold level at which any subsequently detected latencies will | |
10133 | + * be recorded into the global ring buffer. | |
10134 | + */ | |
10135 | +static ssize_t debug_threshold_fwrite(struct file *filp, | |
10136 | + const char __user *ubuf, | |
10137 | + size_t cnt, | |
10138 | + loff_t *ppos) | |
10139 | +{ | |
10140 | + int ret; | |
10141 | + | |
10142 | + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); | |
10143 | + | |
10144 | + if (enabled) | |
10145 | + wake_up_process(kthread); | |
10146 | + | |
10147 | + return ret; | |
10148 | +} | |
10149 | + | |
10150 | +/** | |
10151 | + * debug_width_fopen - Open function for "width" debugfs entry | |
10152 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
10153 | + * @filp: The active open file structure for the debugfs "file" | |
10154 | + * | |
10155 | + * This function provides an open implementation for the "width" debugfs | |
10156 | + * interface to the hardware latency detector. | |
10157 | + */ | |
10158 | +static int debug_width_fopen(struct inode *inode, struct file *filp) | |
10159 | +{ | |
10160 | + return 0; | |
10161 | +} | |
10162 | + | |
10163 | +/** | |
10164 | + * debug_width_fread - Read function for "width" debugfs entry | |
10165 | + * @filp: The active open file structure for the debugfs "file" | |
10166 | + * @ubuf: The userspace provided buffer to read value into | |
10167 | + * @cnt: The maximum number of bytes to read | |
10168 | + * @ppos: The current "file" position | |
10169 | + * | |
10170 | + * This function provides a read implementation for the "width" debugfs | |
10171 | + * interface to the hardware latency detector. It can be used to determine | |
10172 | + * for how many us of the total window us we will actively sample for any | |
10173 | + * hardware-induced latecy periods. Obviously, it is not possible to | |
10174 | + * sample constantly and have the system respond to a sample reader, or, | |
10175 | + * worse, without having the system appear to have gone out to lunch. | |
10176 | + */ | |
10177 | +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, | |
10178 | + size_t cnt, loff_t *ppos) | |
10179 | +{ | |
10180 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); | |
10181 | +} | |
10182 | + | |
10183 | +/** | |
10184 | + * debug_width_fwrite - Write function for "width" debugfs entry | |
10185 | + * @filp: The active open file structure for the debugfs "file" | |
10186 | + * @ubuf: The user buffer that contains the value to write | |
10187 | + * @cnt: The maximum number of bytes to write to "file" | |
10188 | + * @ppos: The current position in the debugfs "file" | |
10189 | + * | |
10190 | + * This function provides a write implementation for the "width" debugfs | |
10191 | + * interface to the hardware latency detector. It can be used to configure | |
10192 | + * for how many us of the total window us we will actively sample for any | |
10193 | + * hardware-induced latency periods. Obviously, it is not possible to | |
10194 | + * sample constantly and have the system respond to a sample reader, or, | |
10195 | + * worse, without having the system appear to have gone out to lunch. It | |
10196 | + * is enforced that width is less that the total window size. | |
10197 | + */ | |
10198 | +static ssize_t debug_width_fwrite(struct file *filp, | |
10199 | + const char __user *ubuf, | |
10200 | + size_t cnt, | |
10201 | + loff_t *ppos) | |
10202 | +{ | |
10203 | + char buf[U64STR_SIZE]; | |
10204 | + int csize = min(cnt, sizeof(buf)); | |
10205 | + u64 val = 0; | |
10206 | + int err = 0; | |
10207 | + | |
10208 | + memset(buf, '\0', sizeof(buf)); | |
10209 | + if (copy_from_user(buf, ubuf, csize)) | |
10210 | + return -EFAULT; | |
10211 | + | |
10212 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ | |
10213 | + err = kstrtoull(buf, 10, &val); | |
10214 | + if (err) | |
10215 | + return -EINVAL; | |
10216 | + | |
10217 | + mutex_lock(&data.lock); | |
10218 | + if (val < data.sample_window) | |
10219 | + data.sample_width = val; | |
10220 | + else { | |
10221 | + mutex_unlock(&data.lock); | |
10222 | + return -EINVAL; | |
10223 | + } | |
10224 | + mutex_unlock(&data.lock); | |
10225 | + | |
10226 | + if (enabled) | |
10227 | + wake_up_process(kthread); | |
10228 | + | |
10229 | + return csize; | |
10230 | +} | |
10231 | + | |
10232 | +/** | |
10233 | + * debug_window_fopen - Open function for "window" debugfs entry | |
10234 | + * @inode: The in-kernel inode representation of the debugfs "file" | |
10235 | + * @filp: The active open file structure for the debugfs "file" | |
10236 | + * | |
10237 | + * This function provides an open implementation for the "window" debugfs | |
10238 | + * interface to the hardware latency detector. The window is the total time | |
10239 | + * in us that will be considered one sample period. Conceptually, windows | |
10240 | + * occur back-to-back and contain a sample width period during which | |
10241 | + * actual sampling occurs. | |
10242 | + */ | |
10243 | +static int debug_window_fopen(struct inode *inode, struct file *filp) | |
10244 | +{ | |
10245 | + return 0; | |
10246 | +} | |
10247 | + | |
10248 | +/** | |
10249 | + * debug_window_fread - Read function for "window" debugfs entry | |
10250 | + * @filp: The active open file structure for the debugfs "file" | |
10251 | + * @ubuf: The userspace provided buffer to read value into | |
10252 | + * @cnt: The maximum number of bytes to read | |
10253 | + * @ppos: The current "file" position | |
10254 | + * | |
10255 | + * This function provides a read implementation for the "window" debugfs | |
10256 | + * interface to the hardware latency detector. The window is the total time | |
10257 | + * in us that will be considered one sample period. Conceptually, windows | |
10258 | + * occur back-to-back and contain a sample width period during which | |
10259 | + * actual sampling occurs. Can be used to read the total window size. | |
10260 | + */ | |
10261 | +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, | |
10262 | + size_t cnt, loff_t *ppos) | |
10263 | +{ | |
10264 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); | |
10265 | +} | |
10266 | + | |
10267 | +/** | |
10268 | + * debug_window_fwrite - Write function for "window" debugfs entry | |
10269 | + * @filp: The active open file structure for the debugfs "file" | |
10270 | + * @ubuf: The user buffer that contains the value to write | |
10271 | + * @cnt: The maximum number of bytes to write to "file" | |
10272 | + * @ppos: The current position in the debugfs "file" | |
10273 | + * | |
10274 | + * This function provides a write implementation for the "window" debufds | |
10275 | + * interface to the hardware latency detetector. The window is the total time | |
10276 | + * in us that will be considered one sample period. Conceptually, windows | |
10277 | + * occur back-to-back and contain a sample width period during which | |
10278 | + * actual sampling occurs. Can be used to write a new total window size. It | |
10279 | + * is enfoced that any value written must be greater than the sample width | |
10280 | + * size, or an error results. | |
10281 | + */ | |
10282 | +static ssize_t debug_window_fwrite(struct file *filp, | |
10283 | + const char __user *ubuf, | |
10284 | + size_t cnt, | |
10285 | + loff_t *ppos) | |
10286 | +{ | |
10287 | + char buf[U64STR_SIZE]; | |
10288 | + int csize = min(cnt, sizeof(buf)); | |
10289 | + u64 val = 0; | |
10290 | + int err = 0; | |
10291 | + | |
10292 | + memset(buf, '\0', sizeof(buf)); | |
10293 | + if (copy_from_user(buf, ubuf, csize)) | |
10294 | + return -EFAULT; | |
10295 | + | |
10296 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ | |
10297 | + err = kstrtoull(buf, 10, &val); | |
10298 | + if (err) | |
10299 | + return -EINVAL; | |
10300 | + | |
10301 | + mutex_lock(&data.lock); | |
10302 | + if (data.sample_width < val) | |
10303 | + data.sample_window = val; | |
10304 | + else { | |
10305 | + mutex_unlock(&data.lock); | |
10306 | + return -EINVAL; | |
10307 | + } | |
10308 | + mutex_unlock(&data.lock); | |
10309 | + | |
10310 | + return csize; | |
10311 | +} | |
10312 | + | |
10313 | +/* | |
10314 | + * Function pointers for the "count" debugfs file operations | |
10315 | + */ | |
10316 | +static const struct file_operations count_fops = { | |
10317 | + .open = debug_count_fopen, | |
10318 | + .read = debug_count_fread, | |
10319 | + .write = debug_count_fwrite, | |
10320 | + .owner = THIS_MODULE, | |
10321 | +}; | |
10322 | + | |
10323 | +/* | |
10324 | + * Function pointers for the "enable" debugfs file operations | |
10325 | + */ | |
10326 | +static const struct file_operations enable_fops = { | |
10327 | + .open = debug_enable_fopen, | |
10328 | + .read = debug_enable_fread, | |
10329 | + .write = debug_enable_fwrite, | |
10330 | + .owner = THIS_MODULE, | |
10331 | +}; | |
10332 | + | |
10333 | +/* | |
10334 | + * Function pointers for the "max" debugfs file operations | |
10335 | + */ | |
10336 | +static const struct file_operations max_fops = { | |
10337 | + .open = debug_max_fopen, | |
10338 | + .read = debug_max_fread, | |
10339 | + .write = debug_max_fwrite, | |
10340 | + .owner = THIS_MODULE, | |
10341 | +}; | |
10342 | + | |
10343 | +/* | |
10344 | + * Function pointers for the "sample" debugfs file operations | |
10345 | + */ | |
10346 | +static const struct file_operations sample_fops = { | |
10347 | + .open = debug_sample_fopen, | |
10348 | + .read = debug_sample_fread, | |
10349 | + .release = debug_sample_release, | |
10350 | + .owner = THIS_MODULE, | |
10351 | +}; | |
10352 | + | |
10353 | +/* | |
10354 | + * Function pointers for the "threshold" debugfs file operations | |
10355 | + */ | |
10356 | +static const struct file_operations threshold_fops = { | |
10357 | + .open = debug_threshold_fopen, | |
10358 | + .read = debug_threshold_fread, | |
10359 | + .write = debug_threshold_fwrite, | |
10360 | + .owner = THIS_MODULE, | |
10361 | +}; | |
10362 | + | |
10363 | +/* | |
10364 | + * Function pointers for the "width" debugfs file operations | |
10365 | + */ | |
10366 | +static const struct file_operations width_fops = { | |
10367 | + .open = debug_width_fopen, | |
10368 | + .read = debug_width_fread, | |
10369 | + .write = debug_width_fwrite, | |
10370 | + .owner = THIS_MODULE, | |
10371 | +}; | |
10372 | + | |
10373 | +/* | |
10374 | + * Function pointers for the "window" debugfs file operations | |
10375 | + */ | |
10376 | +static const struct file_operations window_fops = { | |
10377 | + .open = debug_window_fopen, | |
10378 | + .read = debug_window_fread, | |
10379 | + .write = debug_window_fwrite, | |
10380 | + .owner = THIS_MODULE, | |
10381 | +}; | |
10382 | + | |
10383 | +/** | |
10384 | + * init_debugfs - A function to initialize the debugfs interface files | |
10385 | + * | |
10386 | + * This function creates entries in debugfs for "hwlat_detector", including | |
10387 | + * files to read values from the detector, current samples, and the | |
10388 | + * maximum sample that has been captured since the hardware latency | |
10389 | + * dectector was started. | |
10390 | + */ | |
10391 | +static int init_debugfs(void) | |
10392 | +{ | |
10393 | + int ret = -ENOMEM; | |
10394 | + | |
10395 | + debug_dir = debugfs_create_dir(DRVNAME, NULL); | |
10396 | + if (!debug_dir) | |
10397 | + goto err_debug_dir; | |
10398 | + | |
10399 | + debug_sample = debugfs_create_file("sample", 0444, | |
10400 | + debug_dir, NULL, | |
10401 | + &sample_fops); | |
10402 | + if (!debug_sample) | |
10403 | + goto err_sample; | |
10404 | + | |
10405 | + debug_count = debugfs_create_file("count", 0444, | |
10406 | + debug_dir, NULL, | |
10407 | + &count_fops); | |
10408 | + if (!debug_count) | |
10409 | + goto err_count; | |
10410 | + | |
10411 | + debug_max = debugfs_create_file("max", 0444, | |
10412 | + debug_dir, NULL, | |
10413 | + &max_fops); | |
10414 | + if (!debug_max) | |
10415 | + goto err_max; | |
10416 | + | |
10417 | + debug_sample_window = debugfs_create_file("window", 0644, | |
10418 | + debug_dir, NULL, | |
10419 | + &window_fops); | |
10420 | + if (!debug_sample_window) | |
10421 | + goto err_window; | |
10422 | + | |
10423 | + debug_sample_width = debugfs_create_file("width", 0644, | |
10424 | + debug_dir, NULL, | |
10425 | + &width_fops); | |
10426 | + if (!debug_sample_width) | |
10427 | + goto err_width; | |
10428 | + | |
10429 | + debug_threshold = debugfs_create_file("threshold", 0644, | |
10430 | + debug_dir, NULL, | |
10431 | + &threshold_fops); | |
10432 | + if (!debug_threshold) | |
10433 | + goto err_threshold; | |
10434 | + | |
10435 | + debug_enable = debugfs_create_file("enable", 0644, | |
10436 | + debug_dir, &enabled, | |
10437 | + &enable_fops); | |
10438 | + if (!debug_enable) | |
10439 | + goto err_enable; | |
10440 | + | |
10441 | + else { | |
10442 | + ret = 0; | |
10443 | + goto out; | |
10444 | + } | |
10445 | + | |
10446 | +err_enable: | |
10447 | + debugfs_remove(debug_threshold); | |
10448 | +err_threshold: | |
10449 | + debugfs_remove(debug_sample_width); | |
10450 | +err_width: | |
10451 | + debugfs_remove(debug_sample_window); | |
10452 | +err_window: | |
10453 | + debugfs_remove(debug_max); | |
10454 | +err_max: | |
10455 | + debugfs_remove(debug_count); | |
10456 | +err_count: | |
10457 | + debugfs_remove(debug_sample); | |
10458 | +err_sample: | |
10459 | + debugfs_remove(debug_dir); | |
10460 | +err_debug_dir: | |
10461 | +out: | |
10462 | + return ret; | |
10463 | +} | |
10464 | + | |
10465 | +/** | |
10466 | + * free_debugfs - A function to cleanup the debugfs file interface | |
10467 | + */ | |
10468 | +static void free_debugfs(void) | |
10469 | +{ | |
10470 | + /* could also use a debugfs_remove_recursive */ | |
10471 | + debugfs_remove(debug_enable); | |
10472 | + debugfs_remove(debug_threshold); | |
10473 | + debugfs_remove(debug_sample_width); | |
10474 | + debugfs_remove(debug_sample_window); | |
10475 | + debugfs_remove(debug_max); | |
10476 | + debugfs_remove(debug_count); | |
10477 | + debugfs_remove(debug_sample); | |
10478 | + debugfs_remove(debug_dir); | |
10479 | +} | |
10480 | + | |
10481 | +/** | |
10482 | + * detector_init - Standard module initialization code | |
10483 | + */ | |
10484 | +static int detector_init(void) | |
10485 | +{ | |
10486 | + int ret = -ENOMEM; | |
10487 | + | |
10488 | + pr_info(BANNER "version %s\n", VERSION); | |
10489 | + | |
10490 | + ret = init_stats(); | |
10491 | + if (ret) | |
10492 | + goto out; | |
10493 | + | |
10494 | + ret = init_debugfs(); | |
10495 | + if (ret) | |
10496 | + goto err_stats; | |
10497 | + | |
10498 | + if (enabled) | |
10499 | + ret = start_kthread(); | |
10500 | + | |
10501 | + goto out; | |
10502 | + | |
10503 | +err_stats: | |
10504 | + ring_buffer_free(ring_buffer); | |
10505 | +out: | |
10506 | + return ret; | |
10507 | + | |
10508 | +} | |
10509 | + | |
10510 | +/** | |
10511 | + * detector_exit - Standard module cleanup code | |
10512 | + */ | |
10513 | +static void detector_exit(void) | |
10514 | +{ | |
10515 | + int err; | |
10516 | + | |
10517 | + if (enabled) { | |
10518 | + enabled = 0; | |
10519 | + err = stop_kthread(); | |
10520 | + if (err) | |
10521 | + pr_err(BANNER "cannot stop kthread\n"); | |
10522 | + } | |
10523 | + | |
10524 | + free_debugfs(); | |
10525 | + ring_buffer_free(ring_buffer); /* free up the ring buffer */ | |
10526 | + | |
10527 | +} | |
10528 | + | |
10529 | +module_init(detector_init); | |
10530 | +module_exit(detector_exit); | |
10531 | diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c | |
10532 | index acece3299756..58ea04a03fa9 100644 | |
10533 | --- a/drivers/mmc/host/mmci.c | |
10534 | +++ b/drivers/mmc/host/mmci.c | |
10535 | @@ -1155,15 +1155,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
10536 | struct sg_mapping_iter *sg_miter = &host->sg_miter; | |
10537 | struct variant_data *variant = host->variant; | |
10538 | void __iomem *base = host->base; | |
10539 | - unsigned long flags; | |
10540 | u32 status; | |
10541 | ||
10542 | status = readl(base + MMCISTATUS); | |
10543 | ||
10544 | dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); | |
10545 | ||
10546 | - local_irq_save(flags); | |
10547 | - | |
10548 | do { | |
10549 | unsigned int remain, len; | |
10550 | char *buffer; | |
10551 | @@ -1203,8 +1200,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) | |
10552 | ||
10553 | sg_miter_stop(sg_miter); | |
10554 | ||
10555 | - local_irq_restore(flags); | |
10556 | - | |
10557 | /* | |
10558 | * If we have less than the fifo 'half-full' threshold to transfer, | |
10559 | * trigger a PIO interrupt as soon as any data is available. | |
10560 | diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c | |
10561 | index 2839af00f20c..4348b9c850d3 100644 | |
10562 | --- a/drivers/net/ethernet/3com/3c59x.c | |
10563 | +++ b/drivers/net/ethernet/3com/3c59x.c | |
10564 | @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) | |
10565 | { | |
10566 | struct vortex_private *vp = netdev_priv(dev); | |
10567 | unsigned long flags; | |
10568 | - local_irq_save(flags); | |
10569 | + local_irq_save_nort(flags); | |
10570 | (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); | |
10571 | - local_irq_restore(flags); | |
10572 | + local_irq_restore_nort(flags); | |
10573 | } | |
10574 | #endif | |
10575 | ||
10576 | @@ -1916,12 +1916,12 @@ static void vortex_tx_timeout(struct net_device *dev) | |
10577 | * Block interrupts because vortex_interrupt does a bare spin_lock() | |
10578 | */ | |
10579 | unsigned long flags; | |
10580 | - local_irq_save(flags); | |
10581 | + local_irq_save_nort(flags); | |
10582 | if (vp->full_bus_master_tx) | |
10583 | boomerang_interrupt(dev->irq, dev); | |
10584 | else | |
10585 | vortex_interrupt(dev->irq, dev); | |
10586 | - local_irq_restore(flags); | |
10587 | + local_irq_restore_nort(flags); | |
10588 | } | |
10589 | } | |
10590 | ||
10591 | diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c | |
10592 | index 8b5988e210d5..cf9928ccdd7e 100644 | |
10593 | --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c | |
10594 | +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c | |
10595 | @@ -2221,11 +2221,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, | |
10596 | } | |
10597 | ||
10598 | tpd_req = atl1c_cal_tpd_req(skb); | |
10599 | - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) { | |
10600 | - if (netif_msg_pktdata(adapter)) | |
10601 | - dev_info(&adapter->pdev->dev, "tx locked\n"); | |
10602 | - return NETDEV_TX_LOCKED; | |
10603 | - } | |
10604 | + spin_lock_irqsave(&adapter->tx_lock, flags); | |
10605 | ||
10606 | if (atl1c_tpd_avail(adapter, type) < tpd_req) { | |
10607 | /* no enough descriptor, just stop queue */ | |
10608 | diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c | |
10609 | index 59a03a193e83..734f7a7ad2c3 100644 | |
10610 | --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c | |
10611 | +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c | |
10612 | @@ -1880,8 +1880,7 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, | |
10613 | return NETDEV_TX_OK; | |
10614 | } | |
10615 | tpd_req = atl1e_cal_tdp_req(skb); | |
10616 | - if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) | |
10617 | - return NETDEV_TX_LOCKED; | |
10618 | + spin_lock_irqsave(&adapter->tx_lock, flags); | |
10619 | ||
10620 | if (atl1e_tpd_avail(adapter) < tpd_req) { | |
10621 | /* no enough descriptor, just stop queue */ | |
10622 | diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c | |
10623 | index 526ea74e82d9..86f467a2c485 100644 | |
10624 | --- a/drivers/net/ethernet/chelsio/cxgb/sge.c | |
10625 | +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c | |
10626 | @@ -1664,8 +1664,7 @@ static int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, | |
10627 | struct cmdQ *q = &sge->cmdQ[qid]; | |
10628 | unsigned int credits, pidx, genbit, count, use_sched_skb = 0; | |
10629 | ||
10630 | - if (!spin_trylock(&q->lock)) | |
10631 | - return NETDEV_TX_LOCKED; | |
10632 | + spin_lock(&q->lock); | |
10633 | ||
10634 | reclaim_completed_tx(sge, q); | |
10635 | ||
10636 | diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c | |
10637 | index 9ba975853ec6..813cfa698160 100644 | |
10638 | --- a/drivers/net/ethernet/neterion/s2io.c | |
10639 | +++ b/drivers/net/ethernet/neterion/s2io.c | |
10640 | @@ -4084,12 +4084,7 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) | |
10641 | [skb->priority & (MAX_TX_FIFOS - 1)]; | |
10642 | fifo = &mac_control->fifos[queue]; | |
10643 | ||
10644 | - if (do_spin_lock) | |
10645 | - spin_lock_irqsave(&fifo->tx_lock, flags); | |
10646 | - else { | |
10647 | - if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags))) | |
10648 | - return NETDEV_TX_LOCKED; | |
10649 | - } | |
10650 | + spin_lock_irqsave(&fifo->tx_lock, flags); | |
10651 | ||
10652 | if (sp->config.multiq) { | |
10653 | if (__netif_subqueue_stopped(dev, fifo->fifo_no)) { | |
10654 | diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | |
10655 | index 3b98b263bad0..ca4add749410 100644 | |
10656 | --- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | |
10657 | +++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | |
10658 | @@ -2137,10 +2137,8 @@ static int pch_gbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) | |
10659 | struct pch_gbe_tx_ring *tx_ring = adapter->tx_ring; | |
10660 | unsigned long flags; | |
10661 | ||
10662 | - if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags)) { | |
10663 | - /* Collision - tell upper layer to requeue */ | |
10664 | - return NETDEV_TX_LOCKED; | |
10665 | - } | |
10666 | + spin_lock_irqsave(&tx_ring->tx_lock, flags); | |
10667 | + | |
10668 | if (unlikely(!PCH_GBE_DESC_UNUSED(tx_ring))) { | |
10669 | netif_stop_queue(netdev); | |
10670 | spin_unlock_irqrestore(&tx_ring->tx_lock, flags); | |
10671 | diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c | |
10672 | index ef668d300800..d987d571fdd6 100644 | |
10673 | --- a/drivers/net/ethernet/realtek/8139too.c | |
10674 | +++ b/drivers/net/ethernet/realtek/8139too.c | |
10675 | @@ -2229,7 +2229,7 @@ static void rtl8139_poll_controller(struct net_device *dev) | |
10676 | struct rtl8139_private *tp = netdev_priv(dev); | |
10677 | const int irq = tp->pci_dev->irq; | |
10678 | ||
10679 | - disable_irq(irq); | |
10680 | + disable_irq_nosync(irq); | |
10681 | rtl8139_interrupt(irq, dev); | |
10682 | enable_irq(irq); | |
10683 | } | |
10684 | diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c | |
10685 | index 14c9d1baa85c..e1a5305418a8 100644 | |
10686 | --- a/drivers/net/ethernet/tehuti/tehuti.c | |
10687 | +++ b/drivers/net/ethernet/tehuti/tehuti.c | |
10688 | @@ -1629,13 +1629,8 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb, | |
10689 | unsigned long flags; | |
10690 | ||
10691 | ENTER; | |
10692 | - local_irq_save(flags); | |
10693 | - if (!spin_trylock(&priv->tx_lock)) { | |
10694 | - local_irq_restore(flags); | |
10695 | - DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n", | |
10696 | - BDX_DRV_NAME, ndev->name); | |
10697 | - return NETDEV_TX_LOCKED; | |
10698 | - } | |
10699 | + | |
10700 | + spin_lock_irqsave(&priv->tx_lock, flags); | |
10701 | ||
10702 | /* build tx descriptor */ | |
10703 | BDX_ASSERT(f->m.wptr >= f->m.memsz); /* started with valid wptr */ | |
10704 | diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c | |
10705 | index e7034c55e796..2e4ee0f912bf 100644 | |
10706 | --- a/drivers/net/rionet.c | |
10707 | +++ b/drivers/net/rionet.c | |
10708 | @@ -174,11 +174,7 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev) | |
10709 | unsigned long flags; | |
10710 | int add_num = 1; | |
10711 | ||
10712 | - local_irq_save(flags); | |
10713 | - if (!spin_trylock(&rnet->tx_lock)) { | |
10714 | - local_irq_restore(flags); | |
10715 | - return NETDEV_TX_LOCKED; | |
10716 | - } | |
10717 | + spin_lock_irqsave(&rnet->tx_lock, flags); | |
10718 | ||
10719 | if (is_multicast_ether_addr(eth->h_dest)) | |
10720 | add_num = nets[rnet->mport->id].nact; | |
10721 | diff --git a/drivers/net/wireless/orinoco/orinoco_usb.c b/drivers/net/wireless/orinoco/orinoco_usb.c | |
10722 | index f2cd513d54b2..6c0f4c9638a2 100644 | |
10723 | --- a/drivers/net/wireless/orinoco/orinoco_usb.c | |
10724 | +++ b/drivers/net/wireless/orinoco/orinoco_usb.c | |
10725 | @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, | |
10726 | while (!ctx->done.done && msecs--) | |
10727 | udelay(1000); | |
10728 | } else { | |
10729 | - wait_event_interruptible(ctx->done.wait, | |
10730 | + swait_event_interruptible(ctx->done.wait, | |
10731 | ctx->done.done); | |
10732 | } | |
10733 | break; | |
10734 | diff --git a/drivers/pci/access.c b/drivers/pci/access.c | |
10735 | index 59ac36fe7c42..7a45a20af78a 100644 | |
10736 | --- a/drivers/pci/access.c | |
10737 | +++ b/drivers/pci/access.c | |
10738 | @@ -561,7 +561,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev) | |
10739 | WARN_ON(!dev->block_cfg_access); | |
10740 | ||
10741 | dev->block_cfg_access = 0; | |
10742 | - wake_up_all(&pci_cfg_wait); | |
10743 | + wake_up_all_locked(&pci_cfg_wait); | |
10744 | raw_spin_unlock_irqrestore(&pci_lock, flags); | |
10745 | } | |
10746 | EXPORT_SYMBOL_GPL(pci_cfg_access_unlock); | |
10747 | diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c | |
10748 | index f4424063b860..cbbbebd86c6e 100644 | |
10749 | --- a/drivers/scsi/fcoe/fcoe.c | |
10750 | +++ b/drivers/scsi/fcoe/fcoe.c | |
10751 | @@ -1286,7 +1286,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu) | |
10752 | struct sk_buff *skb; | |
10753 | #ifdef CONFIG_SMP | |
10754 | struct fcoe_percpu_s *p0; | |
10755 | - unsigned targ_cpu = get_cpu(); | |
10756 | + unsigned targ_cpu = get_cpu_light(); | |
10757 | #endif /* CONFIG_SMP */ | |
10758 | ||
10759 | FCOE_DBG("Destroying receive thread for CPU %d\n", cpu); | |
10760 | @@ -1342,7 +1342,7 @@ static void fcoe_percpu_thread_destroy(unsigned int cpu) | |
10761 | kfree_skb(skb); | |
10762 | spin_unlock_bh(&p->fcoe_rx_list.lock); | |
10763 | } | |
10764 | - put_cpu(); | |
10765 | + put_cpu_light(); | |
10766 | #else | |
10767 | /* | |
10768 | * This a non-SMP scenario where the singular Rx thread is | |
10769 | @@ -1566,11 +1566,11 @@ err2: | |
10770 | static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) | |
10771 | { | |
10772 | struct fcoe_percpu_s *fps; | |
10773 | - int rc; | |
10774 | + int rc, cpu = get_cpu_light(); | |
10775 | ||
10776 | - fps = &get_cpu_var(fcoe_percpu); | |
10777 | + fps = &per_cpu(fcoe_percpu, cpu); | |
10778 | rc = fcoe_get_paged_crc_eof(skb, tlen, fps); | |
10779 | - put_cpu_var(fcoe_percpu); | |
10780 | + put_cpu_light(); | |
10781 | ||
10782 | return rc; | |
10783 | } | |
10784 | @@ -1766,11 +1766,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, | |
10785 | return 0; | |
10786 | } | |
10787 | ||
10788 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
10789 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
10790 | stats->InvalidCRCCount++; | |
10791 | if (stats->InvalidCRCCount < 5) | |
10792 | printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); | |
10793 | - put_cpu(); | |
10794 | + put_cpu_light(); | |
10795 | return -EINVAL; | |
10796 | } | |
10797 | ||
10798 | @@ -1814,7 +1814,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
10799 | */ | |
10800 | hp = (struct fcoe_hdr *) skb_network_header(skb); | |
10801 | ||
10802 | - stats = per_cpu_ptr(lport->stats, get_cpu()); | |
10803 | + stats = per_cpu_ptr(lport->stats, get_cpu_light()); | |
10804 | if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { | |
10805 | if (stats->ErrorFrames < 5) | |
10806 | printk(KERN_WARNING "fcoe: FCoE version " | |
10807 | @@ -1846,13 +1846,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) | |
10808 | goto drop; | |
10809 | ||
10810 | if (!fcoe_filter_frames(lport, fp)) { | |
10811 | - put_cpu(); | |
10812 | + put_cpu_light(); | |
10813 | fc_exch_recv(lport, fp); | |
10814 | return; | |
10815 | } | |
10816 | drop: | |
10817 | stats->ErrorFrames++; | |
10818 | - put_cpu(); | |
10819 | + put_cpu_light(); | |
10820 | kfree_skb(skb); | |
10821 | } | |
10822 | ||
10823 | diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c | |
10824 | index 34a1b1f333b4..d91131210695 100644 | |
10825 | --- a/drivers/scsi/fcoe/fcoe_ctlr.c | |
10826 | +++ b/drivers/scsi/fcoe/fcoe_ctlr.c | |
10827 | @@ -831,7 +831,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
10828 | ||
10829 | INIT_LIST_HEAD(&del_list); | |
10830 | ||
10831 | - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); | |
10832 | + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); | |
10833 | ||
10834 | list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { | |
10835 | deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; | |
10836 | @@ -867,7 +867,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) | |
10837 | sel_time = fcf->time; | |
10838 | } | |
10839 | } | |
10840 | - put_cpu(); | |
10841 | + put_cpu_light(); | |
10842 | ||
10843 | list_for_each_entry_safe(fcf, next, &del_list, list) { | |
10844 | /* Removes fcf from current list */ | |
10845 | diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c | |
10846 | index 30f9ef0c0d4f..6c686bc01a82 100644 | |
10847 | --- a/drivers/scsi/libfc/fc_exch.c | |
10848 | +++ b/drivers/scsi/libfc/fc_exch.c | |
10849 | @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, | |
10850 | } | |
10851 | memset(ep, 0, sizeof(*ep)); | |
10852 | ||
10853 | - cpu = get_cpu(); | |
10854 | + cpu = get_cpu_light(); | |
10855 | pool = per_cpu_ptr(mp->pool, cpu); | |
10856 | spin_lock_bh(&pool->lock); | |
10857 | - put_cpu(); | |
10858 | + put_cpu_light(); | |
10859 | ||
10860 | /* peek cache of free slot */ | |
10861 | if (pool->left != FC_XID_UNKNOWN) { | |
10862 | diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c | |
10863 | index 9c706d8c1441..d968ffc79c08 100644 | |
10864 | --- a/drivers/scsi/libsas/sas_ata.c | |
10865 | +++ b/drivers/scsi/libsas/sas_ata.c | |
10866 | @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
10867 | /* TODO: audit callers to ensure they are ready for qc_issue to | |
10868 | * unconditionally re-enable interrupts | |
10869 | */ | |
10870 | - local_irq_save(flags); | |
10871 | + local_irq_save_nort(flags); | |
10872 | spin_unlock(ap->lock); | |
10873 | ||
10874 | /* If the device fell off, no sense in issuing commands */ | |
10875 | @@ -255,7 +255,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) | |
10876 | ||
10877 | out: | |
10878 | spin_lock(ap->lock); | |
10879 | - local_irq_restore(flags); | |
10880 | + local_irq_restore_nort(flags); | |
10881 | return ret; | |
10882 | } | |
10883 | ||
10884 | diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h | |
10885 | index fee9eb7c8a60..b42d4adc42dc 100644 | |
10886 | --- a/drivers/scsi/qla2xxx/qla_inline.h | |
10887 | +++ b/drivers/scsi/qla2xxx/qla_inline.h | |
10888 | @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp) | |
10889 | { | |
10890 | unsigned long flags; | |
10891 | struct qla_hw_data *ha = rsp->hw; | |
10892 | - local_irq_save(flags); | |
10893 | + local_irq_save_nort(flags); | |
10894 | if (IS_P3P_TYPE(ha)) | |
10895 | qla82xx_poll(0, rsp); | |
10896 | else | |
10897 | ha->isp_ops->intr_handler(0, rsp); | |
10898 | - local_irq_restore(flags); | |
10899 | + local_irq_restore_nort(flags); | |
10900 | } | |
10901 | ||
10902 | static inline uint8_t * | |
10903 | diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c | |
10904 | index 7fc919f7da4d..e03fa17b8670 100644 | |
10905 | --- a/drivers/thermal/x86_pkg_temp_thermal.c | |
10906 | +++ b/drivers/thermal/x86_pkg_temp_thermal.c | |
10907 | @@ -29,6 +29,7 @@ | |
10908 | #include <linux/pm.h> | |
10909 | #include <linux/thermal.h> | |
10910 | #include <linux/debugfs.h> | |
10911 | +#include <linux/swork.h> | |
10912 | #include <asm/cpu_device_id.h> | |
10913 | #include <asm/mce.h> | |
10914 | ||
10915 | @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) | |
10916 | } | |
10917 | } | |
10918 | ||
10919 | -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
10920 | +static void platform_thermal_notify_work(struct swork_event *event) | |
10921 | { | |
10922 | unsigned long flags; | |
10923 | int cpu = smp_processor_id(); | |
10924 | @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
10925 | pkg_work_scheduled[phy_id]) { | |
10926 | disable_pkg_thres_interrupt(); | |
10927 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
10928 | - return -EINVAL; | |
10929 | + return; | |
10930 | } | |
10931 | pkg_work_scheduled[phy_id] = 1; | |
10932 | spin_unlock_irqrestore(&pkg_work_lock, flags); | |
10933 | @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
10934 | schedule_delayed_work_on(cpu, | |
10935 | &per_cpu(pkg_temp_thermal_threshold_work, cpu), | |
10936 | msecs_to_jiffies(notify_delay_ms)); | |
10937 | +} | |
10938 | + | |
10939 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
10940 | +static struct swork_event notify_work; | |
10941 | + | |
10942 | +static int thermal_notify_work_init(void) | |
10943 | +{ | |
10944 | + int err; | |
10945 | + | |
10946 | + err = swork_get(); | |
10947 | + if (err) | |
10948 | + return err; | |
10949 | + | |
10950 | + INIT_SWORK(¬ify_work, platform_thermal_notify_work); | |
10951 | return 0; | |
10952 | } | |
10953 | ||
10954 | +static void thermal_notify_work_cleanup(void) | |
10955 | +{ | |
10956 | + swork_put(); | |
10957 | +} | |
10958 | + | |
10959 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
10960 | +{ | |
10961 | + swork_queue(¬ify_work); | |
10962 | + return 0; | |
10963 | +} | |
10964 | + | |
10965 | +#else /* !CONFIG_PREEMPT_RT_FULL */ | |
10966 | + | |
10967 | +static int thermal_notify_work_init(void) { return 0; } | |
10968 | + | |
10969 | +static void thermal_notify_work_cleanup(void) { } | |
10970 | + | |
10971 | +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | |
10972 | +{ | |
10973 | + platform_thermal_notify_work(NULL); | |
10974 | + | |
10975 | + return 0; | |
10976 | +} | |
10977 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
10978 | + | |
10979 | static int find_siblings_cpu(int cpu) | |
10980 | { | |
10981 | int i; | |
10982 | @@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void) | |
10983 | if (!x86_match_cpu(pkg_temp_thermal_ids)) | |
10984 | return -ENODEV; | |
10985 | ||
10986 | + if (!thermal_notify_work_init()) | |
10987 | + return -ENODEV; | |
10988 | + | |
10989 | spin_lock_init(&pkg_work_lock); | |
10990 | platform_thermal_package_notify = | |
10991 | pkg_temp_thermal_platform_thermal_notify; | |
10992 | @@ -608,7 +651,7 @@ err_ret: | |
10993 | kfree(pkg_work_scheduled); | |
10994 | platform_thermal_package_notify = NULL; | |
10995 | platform_thermal_package_rate_control = NULL; | |
10996 | - | |
10997 | + thermal_notify_work_cleanup(); | |
10998 | return -ENODEV; | |
10999 | } | |
11000 | ||
11001 | @@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void) | |
11002 | mutex_unlock(&phy_dev_list_mutex); | |
11003 | platform_thermal_package_notify = NULL; | |
11004 | platform_thermal_package_rate_control = NULL; | |
11005 | + thermal_notify_work_cleanup(); | |
11006 | for_each_online_cpu(i) | |
11007 | cancel_delayed_work_sync( | |
11008 | &per_cpu(pkg_temp_thermal_threshold_work, i)); | |
11009 | diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c | |
11010 | index 39126460c1f5..af7701ca4d48 100644 | |
11011 | --- a/drivers/tty/serial/8250/8250_core.c | |
11012 | +++ b/drivers/tty/serial/8250/8250_core.c | |
11013 | @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg; | |
11014 | ||
11015 | static unsigned int skip_txen_test; /* force skip of txen test at init time */ | |
11016 | ||
11017 | -#define PASS_LIMIT 512 | |
11018 | +/* | |
11019 | + * On -rt we can have a more delays, and legitimately | |
11020 | + * so - so don't drop work spuriously and spam the | |
11021 | + * syslog: | |
11022 | + */ | |
11023 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11024 | +# define PASS_LIMIT 1000000 | |
11025 | +#else | |
11026 | +# define PASS_LIMIT 512 | |
11027 | +#endif | |
11028 | ||
11029 | #include <asm/serial.h> | |
11030 | /* | |
11031 | diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c | |
11032 | index 56ccbcefdd85..a0b9e854672c 100644 | |
11033 | --- a/drivers/tty/serial/8250/8250_port.c | |
11034 | +++ b/drivers/tty/serial/8250/8250_port.c | |
11035 | @@ -35,6 +35,7 @@ | |
11036 | #include <linux/nmi.h> | |
11037 | #include <linux/mutex.h> | |
11038 | #include <linux/slab.h> | |
11039 | +#include <linux/kdb.h> | |
11040 | #include <linux/uaccess.h> | |
11041 | #include <linux/pm_runtime.h> | |
11042 | ||
11043 | @@ -2843,9 +2844,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, | |
11044 | ||
11045 | serial8250_rpm_get(up); | |
11046 | ||
11047 | - if (port->sysrq) | |
11048 | + if (port->sysrq || oops_in_progress) | |
11049 | locked = 0; | |
11050 | - else if (oops_in_progress) | |
11051 | + else if (in_kdb_printk()) | |
11052 | locked = spin_trylock_irqsave(&port->lock, flags); | |
11053 | else | |
11054 | spin_lock_irqsave(&port->lock, flags); | |
11055 | diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c | |
11056 | index 899a77187bde..3ff6363b3751 100644 | |
11057 | --- a/drivers/tty/serial/amba-pl011.c | |
11058 | +++ b/drivers/tty/serial/amba-pl011.c | |
11059 | @@ -2067,13 +2067,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) | |
11060 | ||
11061 | clk_enable(uap->clk); | |
11062 | ||
11063 | - local_irq_save(flags); | |
11064 | + /* | |
11065 | + * local_irq_save(flags); | |
11066 | + * | |
11067 | + * This local_irq_save() is nonsense. If we come in via sysrq | |
11068 | + * handling then interrupts are already disabled. Aside of | |
11069 | + * that the port.sysrq check is racy on SMP regardless. | |
11070 | + */ | |
11071 | if (uap->port.sysrq) | |
11072 | locked = 0; | |
11073 | else if (oops_in_progress) | |
11074 | - locked = spin_trylock(&uap->port.lock); | |
11075 | + locked = spin_trylock_irqsave(&uap->port.lock, flags); | |
11076 | else | |
11077 | - spin_lock(&uap->port.lock); | |
11078 | + spin_lock_irqsave(&uap->port.lock, flags); | |
11079 | ||
11080 | /* | |
11081 | * First save the CR then disable the interrupts | |
11082 | @@ -2098,8 +2104,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) | |
11083 | writew(old_cr, uap->port.membase + UART011_CR); | |
11084 | ||
11085 | if (locked) | |
11086 | - spin_unlock(&uap->port.lock); | |
11087 | - local_irq_restore(flags); | |
11088 | + spin_unlock_irqrestore(&uap->port.lock, flags); | |
11089 | ||
11090 | clk_disable(uap->clk); | |
11091 | } | |
11092 | diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c | |
11093 | index 24280d9a05e9..9745fb8b7abb 100644 | |
11094 | --- a/drivers/tty/serial/omap-serial.c | |
11095 | +++ b/drivers/tty/serial/omap-serial.c | |
11096 | @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s, | |
11097 | ||
11098 | pm_runtime_get_sync(up->dev); | |
11099 | ||
11100 | - local_irq_save(flags); | |
11101 | - if (up->port.sysrq) | |
11102 | - locked = 0; | |
11103 | - else if (oops_in_progress) | |
11104 | - locked = spin_trylock(&up->port.lock); | |
11105 | + if (up->port.sysrq || oops_in_progress) | |
11106 | + locked = spin_trylock_irqsave(&up->port.lock, flags); | |
11107 | else | |
11108 | - spin_lock(&up->port.lock); | |
11109 | + spin_lock_irqsave(&up->port.lock, flags); | |
11110 | ||
11111 | /* | |
11112 | * First save the IER then disable the interrupts | |
11113 | @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s, | |
11114 | pm_runtime_mark_last_busy(up->dev); | |
11115 | pm_runtime_put_autosuspend(up->dev); | |
11116 | if (locked) | |
11117 | - spin_unlock(&up->port.lock); | |
11118 | - local_irq_restore(flags); | |
11119 | + spin_unlock_irqrestore(&up->port.lock, flags); | |
11120 | } | |
11121 | ||
11122 | static int __init | |
11123 | diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c | |
11124 | index edb5305b9d4d..7d5ee8a13ac6 100644 | |
11125 | --- a/drivers/tty/serial/sc16is7xx.c | |
11126 | +++ b/drivers/tty/serial/sc16is7xx.c | |
11127 | @@ -1230,7 +1230,7 @@ static int sc16is7xx_probe(struct device *dev, | |
11128 | ||
11129 | /* Setup interrupt */ | |
11130 | ret = devm_request_irq(dev, irq, sc16is7xx_irq, | |
11131 | - IRQF_ONESHOT | flags, dev_name(dev), s); | |
11132 | + flags, dev_name(dev), s); | |
11133 | if (!ret) | |
11134 | return 0; | |
11135 | ||
11136 | diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c | |
11137 | index f44ce09367bc..5fc9a14721bd 100644 | |
11138 | --- a/drivers/usb/core/hcd.c | |
11139 | +++ b/drivers/usb/core/hcd.c | |
11140 | @@ -1735,9 +1735,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) | |
11141 | * and no one may trigger the above deadlock situation when | |
11142 | * running complete() in tasklet. | |
11143 | */ | |
11144 | - local_irq_save(flags); | |
11145 | + local_irq_save_nort(flags); | |
11146 | urb->complete(urb); | |
11147 | - local_irq_restore(flags); | |
11148 | + local_irq_restore_nort(flags); | |
11149 | ||
11150 | usb_anchor_resume_wakeups(anchor); | |
11151 | atomic_dec(&urb->use_count); | |
11152 | diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c | |
11153 | index 803c503a2e3d..8dd2720aab64 100644 | |
11154 | --- a/drivers/usb/gadget/function/f_fs.c | |
11155 | +++ b/drivers/usb/gadget/function/f_fs.c | |
11156 | @@ -1404,7 +1404,7 @@ static void ffs_data_put(struct ffs_data *ffs) | |
11157 | pr_info("%s(): freeing\n", __func__); | |
11158 | ffs_data_clear(ffs); | |
11159 | BUG_ON(waitqueue_active(&ffs->ev.waitq) || | |
11160 | - waitqueue_active(&ffs->ep0req_completion.wait)); | |
11161 | + swait_active(&ffs->ep0req_completion.wait)); | |
11162 | kfree(ffs->dev_name); | |
11163 | kfree(ffs); | |
11164 | } | |
11165 | diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c | |
11166 | index e57f48f9528f..7544a54056e4 100644 | |
11167 | --- a/drivers/usb/gadget/legacy/inode.c | |
11168 | +++ b/drivers/usb/gadget/legacy/inode.c | |
11169 | @@ -345,7 +345,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
11170 | spin_unlock_irq (&epdata->dev->lock); | |
11171 | ||
11172 | if (likely (value == 0)) { | |
11173 | - value = wait_event_interruptible (done.wait, done.done); | |
11174 | + value = swait_event_interruptible (done.wait, done.done); | |
11175 | if (value != 0) { | |
11176 | spin_lock_irq (&epdata->dev->lock); | |
11177 | if (likely (epdata->ep != NULL)) { | |
11178 | @@ -354,7 +354,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) | |
11179 | usb_ep_dequeue (epdata->ep, epdata->req); | |
11180 | spin_unlock_irq (&epdata->dev->lock); | |
11181 | ||
11182 | - wait_event (done.wait, done.done); | |
11183 | + swait_event (done.wait, done.done); | |
11184 | if (epdata->status == -ECONNRESET) | |
11185 | epdata->status = -EINTR; | |
11186 | } else { | |
11187 | diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c | |
11188 | index f92f5aff0dd5..f9bba26e3655 100644 | |
11189 | --- a/drivers/usb/gadget/udc/atmel_usba_udc.c | |
11190 | +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c | |
11191 | @@ -17,7 +17,9 @@ | |
11192 | #include <linux/device.h> | |
11193 | #include <linux/dma-mapping.h> | |
11194 | #include <linux/list.h> | |
11195 | +#include <linux/mfd/syscon.h> | |
11196 | #include <linux/platform_device.h> | |
11197 | +#include <linux/regmap.h> | |
11198 | #include <linux/usb/ch9.h> | |
11199 | #include <linux/usb/gadget.h> | |
11200 | #include <linux/usb/atmel_usba_udc.h> | |
11201 | @@ -1888,20 +1890,15 @@ static int atmel_usba_stop(struct usb_gadget *gadget) | |
11202 | #ifdef CONFIG_OF | |
11203 | static void at91sam9rl_toggle_bias(struct usba_udc *udc, int is_on) | |
11204 | { | |
11205 | - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR); | |
11206 | - | |
11207 | - if (is_on) | |
11208 | - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN); | |
11209 | - else | |
11210 | - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN)); | |
11211 | + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, | |
11212 | + is_on ? AT91_PMC_BIASEN : 0); | |
11213 | } | |
11214 | ||
11215 | static void at91sam9g45_pulse_bias(struct usba_udc *udc) | |
11216 | { | |
11217 | - unsigned int uckr = at91_pmc_read(AT91_CKGR_UCKR); | |
11218 | - | |
11219 | - at91_pmc_write(AT91_CKGR_UCKR, uckr & ~(AT91_PMC_BIASEN)); | |
11220 | - at91_pmc_write(AT91_CKGR_UCKR, uckr | AT91_PMC_BIASEN); | |
11221 | + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, 0); | |
11222 | + regmap_update_bits(udc->pmc, AT91_CKGR_UCKR, AT91_PMC_BIASEN, | |
11223 | + AT91_PMC_BIASEN); | |
11224 | } | |
11225 | ||
11226 | static const struct usba_udc_errata at91sam9rl_errata = { | |
11227 | @@ -1938,6 +1935,9 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev, | |
11228 | return ERR_PTR(-EINVAL); | |
11229 | ||
11230 | udc->errata = match->data; | |
11231 | + udc->pmc = syscon_regmap_lookup_by_compatible("atmel,at91sam9g45-pmc"); | |
11232 | + if (udc->errata && IS_ERR(udc->pmc)) | |
11233 | + return ERR_CAST(udc->pmc); | |
11234 | ||
11235 | udc->num_ep = 0; | |
11236 | ||
11237 | diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.h b/drivers/usb/gadget/udc/atmel_usba_udc.h | |
11238 | index ea448a344767..3e1c9d589dfa 100644 | |
11239 | --- a/drivers/usb/gadget/udc/atmel_usba_udc.h | |
11240 | +++ b/drivers/usb/gadget/udc/atmel_usba_udc.h | |
11241 | @@ -354,6 +354,8 @@ struct usba_udc { | |
11242 | struct dentry *debugfs_root; | |
11243 | struct dentry *debugfs_regs; | |
11244 | #endif | |
11245 | + | |
11246 | + struct regmap *pmc; | |
11247 | }; | |
11248 | ||
11249 | static inline struct usba_ep *to_usba_ep(struct usb_ep *ep) | |
11250 | diff --git a/fs/aio.c b/fs/aio.c | |
2bb96ace | 11251 | index fe4f49212b99..c3194afdc3df 100644 |
b4de310e JK |
11252 | --- a/fs/aio.c |
11253 | +++ b/fs/aio.c | |
11254 | @@ -40,6 +40,7 @@ | |
11255 | #include <linux/ramfs.h> | |
11256 | #include <linux/percpu-refcount.h> | |
11257 | #include <linux/mount.h> | |
11258 | +#include <linux/swork.h> | |
11259 | ||
11260 | #include <asm/kmap_types.h> | |
11261 | #include <asm/uaccess.h> | |
11262 | @@ -115,7 +116,7 @@ struct kioctx { | |
11263 | struct page **ring_pages; | |
11264 | long nr_pages; | |
11265 | ||
11266 | - struct work_struct free_work; | |
11267 | + struct swork_event free_work; | |
11268 | ||
11269 | /* | |
11270 | * signals when all in-flight requests are done | |
2bb96ace | 11271 | @@ -258,6 +259,7 @@ static int __init aio_setup(void) |
b4de310e JK |
11272 | .mount = aio_mount, |
11273 | .kill_sb = kill_anon_super, | |
11274 | }; | |
11275 | + BUG_ON(swork_get()); | |
11276 | aio_mnt = kern_mount(&aio_fs); | |
11277 | if (IS_ERR(aio_mnt)) | |
11278 | panic("Failed to create aio fs mount."); | |
2bb96ace | 11279 | @@ -573,9 +575,9 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) |
b4de310e JK |
11280 | return cancel(&kiocb->common); |
11281 | } | |
11282 | ||
11283 | -static void free_ioctx(struct work_struct *work) | |
11284 | +static void free_ioctx(struct swork_event *sev) | |
11285 | { | |
11286 | - struct kioctx *ctx = container_of(work, struct kioctx, free_work); | |
11287 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
11288 | ||
11289 | pr_debug("freeing %p\n", ctx); | |
11290 | ||
2bb96ace | 11291 | @@ -594,8 +596,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) |
b4de310e JK |
11292 | if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) |
11293 | complete(&ctx->rq_wait->comp); | |
11294 | ||
11295 | - INIT_WORK(&ctx->free_work, free_ioctx); | |
11296 | - schedule_work(&ctx->free_work); | |
11297 | + INIT_SWORK(&ctx->free_work, free_ioctx); | |
11298 | + swork_queue(&ctx->free_work); | |
11299 | } | |
11300 | ||
11301 | /* | |
2bb96ace | 11302 | @@ -603,9 +605,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) |
b4de310e JK |
11303 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - |
11304 | * now it's safe to cancel any that need to be. | |
11305 | */ | |
11306 | -static void free_ioctx_users(struct percpu_ref *ref) | |
11307 | +static void free_ioctx_users_work(struct swork_event *sev) | |
11308 | { | |
11309 | - struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
11310 | + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); | |
11311 | struct aio_kiocb *req; | |
11312 | ||
11313 | spin_lock_irq(&ctx->ctx_lock); | |
2bb96ace | 11314 | @@ -624,6 +626,14 @@ static void free_ioctx_users(struct percpu_ref *ref) |
b4de310e JK |
11315 | percpu_ref_put(&ctx->reqs); |
11316 | } | |
11317 | ||
11318 | +static void free_ioctx_users(struct percpu_ref *ref) | |
11319 | +{ | |
11320 | + struct kioctx *ctx = container_of(ref, struct kioctx, users); | |
11321 | + | |
11322 | + INIT_SWORK(&ctx->free_work, free_ioctx_users_work); | |
11323 | + swork_queue(&ctx->free_work); | |
11324 | +} | |
11325 | + | |
11326 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |
11327 | { | |
11328 | unsigned i, new_nr; | |
11329 | diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h | |
11330 | index 502d3892d8a4..05af8d3e6e88 100644 | |
11331 | --- a/fs/autofs4/autofs_i.h | |
11332 | +++ b/fs/autofs4/autofs_i.h | |
11333 | @@ -34,6 +34,7 @@ | |
11334 | #include <linux/sched.h> | |
11335 | #include <linux/mount.h> | |
11336 | #include <linux/namei.h> | |
11337 | +#include <linux/delay.h> | |
11338 | #include <asm/current.h> | |
11339 | #include <asm/uaccess.h> | |
11340 | ||
11341 | diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c | |
11342 | index 7a5a598a2d94..d08bcdc30566 100644 | |
11343 | --- a/fs/autofs4/expire.c | |
11344 | +++ b/fs/autofs4/expire.c | |
11345 | @@ -150,7 +150,7 @@ again: | |
11346 | parent = p->d_parent; | |
11347 | if (!spin_trylock(&parent->d_lock)) { | |
11348 | spin_unlock(&p->d_lock); | |
11349 | - cpu_relax(); | |
11350 | + cpu_chill(); | |
11351 | goto relock; | |
11352 | } | |
11353 | spin_unlock(&p->d_lock); | |
11354 | diff --git a/fs/buffer.c b/fs/buffer.c | |
11355 | index 4f4cd959da7c..72b27e17b907 100644 | |
11356 | --- a/fs/buffer.c | |
11357 | +++ b/fs/buffer.c | |
11358 | @@ -305,8 +305,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
11359 | * decide that the page is now completely done. | |
11360 | */ | |
11361 | first = page_buffers(page); | |
11362 | - local_irq_save(flags); | |
11363 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
11364 | + flags = bh_uptodate_lock_irqsave(first); | |
11365 | clear_buffer_async_read(bh); | |
11366 | unlock_buffer(bh); | |
11367 | tmp = bh; | |
11368 | @@ -319,8 +318,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
11369 | } | |
11370 | tmp = tmp->b_this_page; | |
11371 | } while (tmp != bh); | |
11372 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11373 | - local_irq_restore(flags); | |
11374 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11375 | ||
11376 | /* | |
11377 | * If none of the buffers had errors and they are all | |
11378 | @@ -332,9 +330,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
11379 | return; | |
11380 | ||
11381 | still_busy: | |
11382 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11383 | - local_irq_restore(flags); | |
11384 | - return; | |
11385 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11386 | } | |
11387 | ||
11388 | /* | |
11389 | @@ -362,8 +358,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
11390 | } | |
11391 | ||
11392 | first = page_buffers(page); | |
11393 | - local_irq_save(flags); | |
11394 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
11395 | + flags = bh_uptodate_lock_irqsave(first); | |
11396 | ||
11397 | clear_buffer_async_write(bh); | |
11398 | unlock_buffer(bh); | |
11399 | @@ -375,15 +370,12 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |
11400 | } | |
11401 | tmp = tmp->b_this_page; | |
11402 | } | |
11403 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11404 | - local_irq_restore(flags); | |
11405 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11406 | end_page_writeback(page); | |
11407 | return; | |
11408 | ||
11409 | still_busy: | |
11410 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11411 | - local_irq_restore(flags); | |
11412 | - return; | |
11413 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11414 | } | |
11415 | EXPORT_SYMBOL(end_buffer_async_write); | |
11416 | ||
11417 | @@ -3325,6 +3317,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) | |
11418 | struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); | |
11419 | if (ret) { | |
11420 | INIT_LIST_HEAD(&ret->b_assoc_buffers); | |
11421 | + buffer_head_init_locks(ret); | |
11422 | preempt_disable(); | |
11423 | __this_cpu_inc(bh_accounting.nr); | |
11424 | recalc_bh_state(); | |
11425 | diff --git a/fs/dcache.c b/fs/dcache.c | |
11426 | index 71b6056ad35d..e80471cbfc19 100644 | |
11427 | --- a/fs/dcache.c | |
11428 | +++ b/fs/dcache.c | |
11429 | @@ -19,6 +19,7 @@ | |
11430 | #include <linux/mm.h> | |
11431 | #include <linux/fs.h> | |
11432 | #include <linux/fsnotify.h> | |
11433 | +#include <linux/delay.h> | |
11434 | #include <linux/slab.h> | |
11435 | #include <linux/init.h> | |
11436 | #include <linux/hash.h> | |
11437 | @@ -747,6 +748,8 @@ static inline bool fast_dput(struct dentry *dentry) | |
11438 | */ | |
11439 | void dput(struct dentry *dentry) | |
11440 | { | |
11441 | + struct dentry *parent; | |
11442 | + | |
11443 | if (unlikely(!dentry)) | |
11444 | return; | |
11445 | ||
11446 | @@ -783,9 +786,18 @@ repeat: | |
11447 | return; | |
11448 | ||
11449 | kill_it: | |
11450 | - dentry = dentry_kill(dentry); | |
11451 | - if (dentry) { | |
11452 | - cond_resched(); | |
11453 | + parent = dentry_kill(dentry); | |
11454 | + if (parent) { | |
11455 | + int r; | |
11456 | + | |
11457 | + if (parent == dentry) { | |
11458 | + /* the task with the highest priority won't schedule */ | |
11459 | + r = cond_resched(); | |
11460 | + if (!r) | |
11461 | + cpu_chill(); | |
11462 | + } else { | |
11463 | + dentry = parent; | |
11464 | + } | |
11465 | goto repeat; | |
11466 | } | |
11467 | } | |
11468 | @@ -2394,7 +2406,7 @@ again: | |
11469 | if (dentry->d_lockref.count == 1) { | |
11470 | if (!spin_trylock(&inode->i_lock)) { | |
11471 | spin_unlock(&dentry->d_lock); | |
11472 | - cpu_relax(); | |
11473 | + cpu_chill(); | |
11474 | goto again; | |
11475 | } | |
11476 | dentry->d_flags &= ~DCACHE_CANT_MOUNT; | |
11477 | diff --git a/fs/eventpoll.c b/fs/eventpoll.c | |
11478 | index 1e009cad8d5c..d0c12504d3b4 100644 | |
11479 | --- a/fs/eventpoll.c | |
11480 | +++ b/fs/eventpoll.c | |
11481 | @@ -505,12 +505,12 @@ static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests) | |
11482 | */ | |
11483 | static void ep_poll_safewake(wait_queue_head_t *wq) | |
11484 | { | |
11485 | - int this_cpu = get_cpu(); | |
11486 | + int this_cpu = get_cpu_light(); | |
11487 | ||
11488 | ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, | |
11489 | ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); | |
11490 | ||
11491 | - put_cpu(); | |
11492 | + put_cpu_light(); | |
11493 | } | |
11494 | ||
11495 | static void ep_remove_wait_queue(struct eppoll_entry *pwq) | |
11496 | diff --git a/fs/exec.c b/fs/exec.c | |
11497 | index b06623a9347f..e7760b7b692c 100644 | |
11498 | --- a/fs/exec.c | |
11499 | +++ b/fs/exec.c | |
11500 | @@ -865,12 +865,14 @@ static int exec_mmap(struct mm_struct *mm) | |
11501 | } | |
11502 | } | |
11503 | task_lock(tsk); | |
11504 | + preempt_disable_rt(); | |
11505 | active_mm = tsk->active_mm; | |
11506 | tsk->mm = mm; | |
11507 | tsk->active_mm = mm; | |
11508 | activate_mm(active_mm, mm); | |
11509 | tsk->mm->vmacache_seqnum = 0; | |
11510 | vmacache_flush(tsk); | |
11511 | + preempt_enable_rt(); | |
11512 | task_unlock(tsk); | |
11513 | if (old_mm) { | |
11514 | up_read(&old_mm->mmap_sem); | |
11515 | diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h | |
11516 | index 9db5500d63d9..5951c495d124 100644 | |
11517 | --- a/fs/f2fs/f2fs.h | |
11518 | +++ b/fs/f2fs/f2fs.h | |
11519 | @@ -24,7 +24,6 @@ | |
11520 | ||
11521 | #ifdef CONFIG_F2FS_CHECK_FS | |
11522 | #define f2fs_bug_on(sbi, condition) BUG_ON(condition) | |
11523 | -#define f2fs_down_write(x, y) down_write_nest_lock(x, y) | |
11524 | #else | |
11525 | #define f2fs_bug_on(sbi, condition) \ | |
11526 | do { \ | |
11527 | @@ -33,7 +32,6 @@ | |
11528 | set_sbi_flag(sbi, SBI_NEED_FSCK); \ | |
11529 | } \ | |
11530 | } while (0) | |
11531 | -#define f2fs_down_write(x, y) down_write(x) | |
11532 | #endif | |
11533 | ||
11534 | /* | |
11535 | @@ -959,7 +957,7 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) | |
11536 | ||
11537 | static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) | |
11538 | { | |
11539 | - f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex); | |
11540 | + down_write(&sbi->cp_rwsem); | |
11541 | } | |
11542 | ||
11543 | static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) | |
11544 | diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c | |
11545 | index 684996c8a3a4..6e18a06aaabe 100644 | |
11546 | --- a/fs/jbd2/checkpoint.c | |
11547 | +++ b/fs/jbd2/checkpoint.c | |
11548 | @@ -116,6 +116,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |
11549 | nblocks = jbd2_space_needed(journal); | |
11550 | while (jbd2_log_space_left(journal) < nblocks) { | |
11551 | write_unlock(&journal->j_state_lock); | |
11552 | + if (current->plug) | |
11553 | + io_schedule(); | |
11554 | mutex_lock(&journal->j_checkpoint_mutex); | |
11555 | ||
11556 | /* | |
11557 | diff --git a/fs/namespace.c b/fs/namespace.c | |
11558 | index 5be02a0635be..1f3725bbd04b 100644 | |
11559 | --- a/fs/namespace.c | |
11560 | +++ b/fs/namespace.c | |
11561 | @@ -14,6 +14,7 @@ | |
11562 | #include <linux/mnt_namespace.h> | |
11563 | #include <linux/user_namespace.h> | |
11564 | #include <linux/namei.h> | |
11565 | +#include <linux/delay.h> | |
11566 | #include <linux/security.h> | |
11567 | #include <linux/idr.h> | |
11568 | #include <linux/init.h> /* init_rootfs */ | |
11569 | @@ -353,8 +354,11 @@ int __mnt_want_write(struct vfsmount *m) | |
11570 | * incremented count after it has set MNT_WRITE_HOLD. | |
11571 | */ | |
11572 | smp_mb(); | |
11573 | - while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) | |
11574 | - cpu_relax(); | |
11575 | + while (ACCESS_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { | |
11576 | + preempt_enable(); | |
11577 | + cpu_chill(); | |
11578 | + preempt_disable(); | |
11579 | + } | |
11580 | /* | |
11581 | * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will | |
11582 | * be set to match its requirements. So we must not load that until | |
11583 | diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c | |
11584 | index 7521e11db728..f0de4b6b8bf3 100644 | |
11585 | --- a/fs/ntfs/aops.c | |
11586 | +++ b/fs/ntfs/aops.c | |
11587 | @@ -107,8 +107,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
11588 | "0x%llx.", (unsigned long long)bh->b_blocknr); | |
11589 | } | |
11590 | first = page_buffers(page); | |
11591 | - local_irq_save(flags); | |
11592 | - bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | |
11593 | + flags = bh_uptodate_lock_irqsave(first); | |
11594 | clear_buffer_async_read(bh); | |
11595 | unlock_buffer(bh); | |
11596 | tmp = bh; | |
11597 | @@ -123,8 +122,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
11598 | } | |
11599 | tmp = tmp->b_this_page; | |
11600 | } while (tmp != bh); | |
11601 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11602 | - local_irq_restore(flags); | |
11603 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11604 | /* | |
11605 | * If none of the buffers had errors then we can set the page uptodate, | |
11606 | * but we first have to perform the post read mst fixups, if the | |
11607 | @@ -145,13 +143,13 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
11608 | recs = PAGE_CACHE_SIZE / rec_size; | |
11609 | /* Should have been verified before we got here... */ | |
11610 | BUG_ON(!recs); | |
11611 | - local_irq_save(flags); | |
11612 | + local_irq_save_nort(flags); | |
11613 | kaddr = kmap_atomic(page); | |
11614 | for (i = 0; i < recs; i++) | |
11615 | post_read_mst_fixup((NTFS_RECORD*)(kaddr + | |
11616 | i * rec_size), rec_size); | |
11617 | kunmap_atomic(kaddr); | |
11618 | - local_irq_restore(flags); | |
11619 | + local_irq_restore_nort(flags); | |
11620 | flush_dcache_page(page); | |
11621 | if (likely(page_uptodate && !PageError(page))) | |
11622 | SetPageUptodate(page); | |
11623 | @@ -159,9 +157,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | |
11624 | unlock_page(page); | |
11625 | return; | |
11626 | still_busy: | |
11627 | - bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | |
11628 | - local_irq_restore(flags); | |
11629 | - return; | |
11630 | + bh_uptodate_unlock_irqrestore(first, flags); | |
11631 | } | |
11632 | ||
11633 | /** | |
11634 | diff --git a/fs/timerfd.c b/fs/timerfd.c | |
11635 | index 053818dd6c18..c4bc14fe0085 100644 | |
11636 | --- a/fs/timerfd.c | |
11637 | +++ b/fs/timerfd.c | |
11638 | @@ -450,7 +450,10 @@ static int do_timerfd_settime(int ufd, int flags, | |
11639 | break; | |
11640 | } | |
11641 | spin_unlock_irq(&ctx->wqh.lock); | |
11642 | - cpu_relax(); | |
11643 | + if (isalarm(ctx)) | |
11644 | + hrtimer_wait_for_timer(&ctx->t.alarm.timer); | |
11645 | + else | |
11646 | + hrtimer_wait_for_timer(&ctx->t.tmr); | |
11647 | } | |
11648 | ||
11649 | /* | |
11650 | diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h | |
11651 | index 323e5daece54..cc5fbd534fd4 100644 | |
11652 | --- a/include/acpi/platform/aclinux.h | |
11653 | +++ b/include/acpi/platform/aclinux.h | |
11654 | @@ -127,6 +127,7 @@ | |
11655 | ||
11656 | #define acpi_cache_t struct kmem_cache | |
11657 | #define acpi_spinlock spinlock_t * | |
11658 | +#define acpi_raw_spinlock raw_spinlock_t * | |
11659 | #define acpi_cpu_flags unsigned long | |
11660 | ||
11661 | /* Use native linux version of acpi_os_allocate_zeroed */ | |
11662 | @@ -145,6 +146,20 @@ | |
11663 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_thread_id | |
11664 | #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_create_lock | |
11665 | ||
11666 | +#define acpi_os_create_raw_lock(__handle) \ | |
11667 | +({ \ | |
11668 | + raw_spinlock_t *lock = ACPI_ALLOCATE(sizeof(*lock)); \ | |
11669 | + \ | |
11670 | + if (lock) { \ | |
11671 | + *(__handle) = lock; \ | |
11672 | + raw_spin_lock_init(*(__handle)); \ | |
11673 | + } \ | |
11674 | + lock ? AE_OK : AE_NO_MEMORY; \ | |
11675 | + }) | |
11676 | + | |
11677 | +#define acpi_os_delete_raw_lock(__handle) kfree(__handle) | |
11678 | + | |
11679 | + | |
11680 | /* | |
11681 | * OSL interfaces used by debugger/disassembler | |
11682 | */ | |
11683 | diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h | |
11684 | index 630dd2372238..850e4d993a88 100644 | |
11685 | --- a/include/asm-generic/bug.h | |
11686 | +++ b/include/asm-generic/bug.h | |
11687 | @@ -206,6 +206,20 @@ extern void warn_slowpath_null(const char *file, const int line); | |
11688 | # define WARN_ON_SMP(x) ({0;}) | |
11689 | #endif | |
11690 | ||
11691 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11692 | +# define BUG_ON_RT(c) BUG_ON(c) | |
11693 | +# define BUG_ON_NONRT(c) do { } while (0) | |
11694 | +# define WARN_ON_RT(condition) WARN_ON(condition) | |
11695 | +# define WARN_ON_NONRT(condition) do { } while (0) | |
11696 | +# define WARN_ON_ONCE_NONRT(condition) do { } while (0) | |
11697 | +#else | |
11698 | +# define BUG_ON_RT(c) do { } while (0) | |
11699 | +# define BUG_ON_NONRT(c) BUG_ON(c) | |
11700 | +# define WARN_ON_RT(condition) do { } while (0) | |
11701 | +# define WARN_ON_NONRT(condition) WARN_ON(condition) | |
11702 | +# define WARN_ON_ONCE_NONRT(condition) WARN_ON_ONCE(condition) | |
11703 | +#endif | |
11704 | + | |
11705 | #endif /* __ASSEMBLY__ */ | |
11706 | ||
11707 | #endif | |
11708 | diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h | |
11709 | index 5d8ffa3e6f8c..c1cde3577551 100644 | |
11710 | --- a/include/asm-generic/preempt.h | |
11711 | +++ b/include/asm-generic/preempt.h | |
11712 | @@ -7,10 +7,10 @@ | |
11713 | ||
11714 | static __always_inline int preempt_count(void) | |
11715 | { | |
11716 | - return current_thread_info()->preempt_count; | |
11717 | + return READ_ONCE(current_thread_info()->preempt_count); | |
11718 | } | |
11719 | ||
11720 | -static __always_inline int *preempt_count_ptr(void) | |
11721 | +static __always_inline volatile int *preempt_count_ptr(void) | |
11722 | { | |
11723 | return ¤t_thread_info()->preempt_count; | |
11724 | } | |
11725 | diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h | |
11726 | index daf17d70aeca..463df8954255 100644 | |
11727 | --- a/include/linux/blk-mq.h | |
11728 | +++ b/include/linux/blk-mq.h | |
11729 | @@ -212,6 +212,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) | |
11730 | ||
11731 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); | |
11732 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); | |
11733 | +void __blk_mq_complete_request_remote_work(struct work_struct *work); | |
11734 | ||
11735 | int blk_mq_request_started(struct request *rq); | |
11736 | void blk_mq_start_request(struct request *rq); | |
11737 | diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h | |
11738 | index fe14382f9664..a82143ad6702 100644 | |
11739 | --- a/include/linux/blkdev.h | |
11740 | +++ b/include/linux/blkdev.h | |
11741 | @@ -89,6 +89,7 @@ struct request { | |
11742 | struct list_head queuelist; | |
11743 | union { | |
11744 | struct call_single_data csd; | |
11745 | + struct work_struct work; | |
11746 | unsigned long fifo_time; | |
11747 | }; | |
11748 | ||
11749 | @@ -455,7 +456,7 @@ struct request_queue { | |
11750 | struct throtl_data *td; | |
11751 | #endif | |
11752 | struct rcu_head rcu_head; | |
11753 | - wait_queue_head_t mq_freeze_wq; | |
11754 | + struct swait_queue_head mq_freeze_wq; | |
11755 | struct percpu_ref q_usage_counter; | |
11756 | struct list_head all_q_node; | |
11757 | ||
11758 | diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h | |
11759 | index 8fdcb783197d..d07dbeec7bc1 100644 | |
11760 | --- a/include/linux/bottom_half.h | |
11761 | +++ b/include/linux/bottom_half.h | |
11762 | @@ -3,6 +3,39 @@ | |
11763 | ||
11764 | #include <linux/preempt.h> | |
11765 | ||
11766 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11767 | + | |
11768 | +extern void __local_bh_disable(void); | |
11769 | +extern void _local_bh_enable(void); | |
11770 | +extern void __local_bh_enable(void); | |
11771 | + | |
11772 | +static inline void local_bh_disable(void) | |
11773 | +{ | |
11774 | + __local_bh_disable(); | |
11775 | +} | |
11776 | + | |
11777 | +static inline void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |
11778 | +{ | |
11779 | + __local_bh_disable(); | |
11780 | +} | |
11781 | + | |
11782 | +static inline void local_bh_enable(void) | |
11783 | +{ | |
11784 | + __local_bh_enable(); | |
11785 | +} | |
11786 | + | |
11787 | +static inline void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) | |
11788 | +{ | |
11789 | + __local_bh_enable(); | |
11790 | +} | |
11791 | + | |
11792 | +static inline void local_bh_enable_ip(unsigned long ip) | |
11793 | +{ | |
11794 | + __local_bh_enable(); | |
11795 | +} | |
11796 | + | |
11797 | +#else | |
11798 | + | |
11799 | #ifdef CONFIG_TRACE_IRQFLAGS | |
11800 | extern void __local_bh_disable_ip(unsigned long ip, unsigned int cnt); | |
11801 | #else | |
11802 | @@ -30,5 +63,6 @@ static inline void local_bh_enable(void) | |
11803 | { | |
11804 | __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET); | |
11805 | } | |
11806 | +#endif | |
11807 | ||
11808 | #endif /* _LINUX_BH_H */ | |
11809 | diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h | |
11810 | index 89d9aa9e79bf..4a201008b02d 100644 | |
11811 | --- a/include/linux/buffer_head.h | |
11812 | +++ b/include/linux/buffer_head.h | |
11813 | @@ -75,8 +75,50 @@ struct buffer_head { | |
11814 | struct address_space *b_assoc_map; /* mapping this buffer is | |
11815 | associated with */ | |
11816 | atomic_t b_count; /* users using this buffer_head */ | |
11817 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11818 | + spinlock_t b_uptodate_lock; | |
11819 | +#if IS_ENABLED(CONFIG_JBD2) | |
11820 | + spinlock_t b_state_lock; | |
11821 | + spinlock_t b_journal_head_lock; | |
11822 | +#endif | |
11823 | +#endif | |
11824 | }; | |
11825 | ||
11826 | +static inline unsigned long bh_uptodate_lock_irqsave(struct buffer_head *bh) | |
11827 | +{ | |
11828 | + unsigned long flags; | |
11829 | + | |
11830 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11831 | + local_irq_save(flags); | |
11832 | + bit_spin_lock(BH_Uptodate_Lock, &bh->b_state); | |
11833 | +#else | |
11834 | + spin_lock_irqsave(&bh->b_uptodate_lock, flags); | |
11835 | +#endif | |
11836 | + return flags; | |
11837 | +} | |
11838 | + | |
11839 | +static inline void | |
11840 | +bh_uptodate_unlock_irqrestore(struct buffer_head *bh, unsigned long flags) | |
11841 | +{ | |
11842 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
11843 | + bit_spin_unlock(BH_Uptodate_Lock, &bh->b_state); | |
11844 | + local_irq_restore(flags); | |
11845 | +#else | |
11846 | + spin_unlock_irqrestore(&bh->b_uptodate_lock, flags); | |
11847 | +#endif | |
11848 | +} | |
11849 | + | |
11850 | +static inline void buffer_head_init_locks(struct buffer_head *bh) | |
11851 | +{ | |
11852 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
11853 | + spin_lock_init(&bh->b_uptodate_lock); | |
11854 | +#if IS_ENABLED(CONFIG_JBD2) | |
11855 | + spin_lock_init(&bh->b_state_lock); | |
11856 | + spin_lock_init(&bh->b_journal_head_lock); | |
11857 | +#endif | |
11858 | +#endif | |
11859 | +} | |
11860 | + | |
11861 | /* | |
11862 | * macro tricks to expand the set_buffer_foo(), clear_buffer_foo() | |
11863 | * and buffer_foo() functions. | |
11864 | diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h | |
11865 | index 8da263299754..0cc474291e08 100644 | |
11866 | --- a/include/linux/cgroup-defs.h | |
11867 | +++ b/include/linux/cgroup-defs.h | |
11868 | @@ -16,6 +16,7 @@ | |
11869 | #include <linux/percpu-refcount.h> | |
11870 | #include <linux/percpu-rwsem.h> | |
11871 | #include <linux/workqueue.h> | |
11872 | +#include <linux/swork.h> | |
11873 | ||
11874 | #ifdef CONFIG_CGROUPS | |
11875 | ||
11876 | @@ -142,6 +143,7 @@ struct cgroup_subsys_state { | |
11877 | /* percpu_ref killing and RCU release */ | |
11878 | struct rcu_head rcu_head; | |
11879 | struct work_struct destroy_work; | |
11880 | + struct swork_event destroy_swork; | |
11881 | }; | |
11882 | ||
11883 | /* | |
11884 | diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h | |
11885 | index 1e6932222e11..17f413bbbedf 100644 | |
11886 | --- a/include/linux/clk/at91_pmc.h | |
11887 | +++ b/include/linux/clk/at91_pmc.h | |
11888 | @@ -16,18 +16,6 @@ | |
11889 | #ifndef AT91_PMC_H | |
11890 | #define AT91_PMC_H | |
11891 | ||
11892 | -#ifndef __ASSEMBLY__ | |
11893 | -extern void __iomem *at91_pmc_base; | |
11894 | - | |
11895 | -#define at91_pmc_read(field) \ | |
11896 | - readl_relaxed(at91_pmc_base + field) | |
11897 | - | |
11898 | -#define at91_pmc_write(field, value) \ | |
11899 | - writel_relaxed(value, at91_pmc_base + field) | |
11900 | -#else | |
11901 | -.extern at91_pmc_base | |
11902 | -#endif | |
11903 | - | |
11904 | #define AT91_PMC_SCER 0x00 /* System Clock Enable Register */ | |
11905 | #define AT91_PMC_SCDR 0x04 /* System Clock Disable Register */ | |
11906 | ||
11907 | diff --git a/include/linux/completion.h b/include/linux/completion.h | |
11908 | index 5d5aaae3af43..3bca1590e29f 100644 | |
11909 | --- a/include/linux/completion.h | |
11910 | +++ b/include/linux/completion.h | |
11911 | @@ -7,8 +7,7 @@ | |
11912 | * Atomic wait-for-completion handler data structures. | |
11913 | * See kernel/sched/completion.c for details. | |
11914 | */ | |
11915 | - | |
11916 | -#include <linux/wait.h> | |
11917 | +#include <linux/swait.h> | |
11918 | ||
11919 | /* | |
11920 | * struct completion - structure used to maintain state for a "completion" | |
11921 | @@ -24,11 +23,11 @@ | |
11922 | */ | |
11923 | struct completion { | |
11924 | unsigned int done; | |
11925 | - wait_queue_head_t wait; | |
11926 | + struct swait_queue_head wait; | |
11927 | }; | |
11928 | ||
11929 | #define COMPLETION_INITIALIZER(work) \ | |
11930 | - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
11931 | + { 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) } | |
11932 | ||
11933 | #define COMPLETION_INITIALIZER_ONSTACK(work) \ | |
11934 | ({ init_completion(&work); work; }) | |
11935 | @@ -73,7 +72,7 @@ struct completion { | |
11936 | static inline void init_completion(struct completion *x) | |
11937 | { | |
11938 | x->done = 0; | |
11939 | - init_waitqueue_head(&x->wait); | |
11940 | + init_swait_queue_head(&x->wait); | |
11941 | } | |
11942 | ||
11943 | /** | |
11944 | diff --git a/include/linux/cpu.h b/include/linux/cpu.h | |
11945 | index d2ca8c38f9c4..94041d803d0b 100644 | |
11946 | --- a/include/linux/cpu.h | |
11947 | +++ b/include/linux/cpu.h | |
11948 | @@ -231,6 +231,8 @@ extern void get_online_cpus(void); | |
11949 | extern void put_online_cpus(void); | |
11950 | extern void cpu_hotplug_disable(void); | |
11951 | extern void cpu_hotplug_enable(void); | |
11952 | +extern void pin_current_cpu(void); | |
11953 | +extern void unpin_current_cpu(void); | |
11954 | #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) | |
11955 | #define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) | |
11956 | #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) | |
11957 | @@ -248,6 +250,8 @@ static inline void cpu_hotplug_done(void) {} | |
11958 | #define put_online_cpus() do { } while (0) | |
11959 | #define cpu_hotplug_disable() do { } while (0) | |
11960 | #define cpu_hotplug_enable() do { } while (0) | |
11961 | +static inline void pin_current_cpu(void) { } | |
11962 | +static inline void unpin_current_cpu(void) { } | |
11963 | #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
11964 | #define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) | |
11965 | /* These aren't inline functions due to a GCC bug. */ | |
11966 | diff --git a/include/linux/delay.h b/include/linux/delay.h | |
11967 | index a6ecb34cf547..37caab306336 100644 | |
11968 | --- a/include/linux/delay.h | |
11969 | +++ b/include/linux/delay.h | |
11970 | @@ -52,4 +52,10 @@ static inline void ssleep(unsigned int seconds) | |
11971 | msleep(seconds * 1000); | |
11972 | } | |
11973 | ||
11974 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
11975 | +extern void cpu_chill(void); | |
11976 | +#else | |
11977 | +# define cpu_chill() cpu_relax() | |
11978 | +#endif | |
11979 | + | |
11980 | #endif /* defined(_LINUX_DELAY_H) */ | |
11981 | diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h | |
11982 | index 60048c50404e..f2cd67624f18 100644 | |
11983 | --- a/include/linux/ftrace.h | |
11984 | +++ b/include/linux/ftrace.h | |
11985 | @@ -694,6 +694,18 @@ static inline void __ftrace_enabled_restore(int enabled) | |
11986 | #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) | |
11987 | #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) | |
11988 | ||
11989 | +static inline unsigned long get_lock_parent_ip(void) | |
11990 | +{ | |
11991 | + unsigned long addr = CALLER_ADDR0; | |
11992 | + | |
11993 | + if (!in_lock_functions(addr)) | |
11994 | + return addr; | |
11995 | + addr = CALLER_ADDR1; | |
11996 | + if (!in_lock_functions(addr)) | |
11997 | + return addr; | |
11998 | + return CALLER_ADDR2; | |
11999 | +} | |
12000 | + | |
12001 | #ifdef CONFIG_IRQSOFF_TRACER | |
12002 | extern void time_hardirqs_on(unsigned long a0, unsigned long a1); | |
12003 | extern void time_hardirqs_off(unsigned long a0, unsigned long a1); | |
12004 | diff --git a/include/linux/highmem.h b/include/linux/highmem.h | |
12005 | index bb3f3297062a..a117a33ef72c 100644 | |
12006 | --- a/include/linux/highmem.h | |
12007 | +++ b/include/linux/highmem.h | |
12008 | @@ -7,6 +7,7 @@ | |
12009 | #include <linux/mm.h> | |
12010 | #include <linux/uaccess.h> | |
12011 | #include <linux/hardirq.h> | |
12012 | +#include <linux/sched.h> | |
12013 | ||
12014 | #include <asm/cacheflush.h> | |
12015 | ||
12016 | @@ -65,7 +66,7 @@ static inline void kunmap(struct page *page) | |
12017 | ||
12018 | static inline void *kmap_atomic(struct page *page) | |
12019 | { | |
12020 | - preempt_disable(); | |
12021 | + preempt_disable_nort(); | |
12022 | pagefault_disable(); | |
12023 | return page_address(page); | |
12024 | } | |
12025 | @@ -74,7 +75,7 @@ static inline void *kmap_atomic(struct page *page) | |
12026 | static inline void __kunmap_atomic(void *addr) | |
12027 | { | |
12028 | pagefault_enable(); | |
12029 | - preempt_enable(); | |
12030 | + preempt_enable_nort(); | |
12031 | } | |
12032 | ||
12033 | #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) | |
12034 | @@ -86,32 +87,51 @@ static inline void __kunmap_atomic(void *addr) | |
12035 | ||
12036 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
12037 | ||
12038 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12039 | DECLARE_PER_CPU(int, __kmap_atomic_idx); | |
12040 | +#endif | |
12041 | ||
12042 | static inline int kmap_atomic_idx_push(void) | |
12043 | { | |
12044 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12045 | int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1; | |
12046 | ||
12047 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
12048 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
12049 | WARN_ON_ONCE(in_irq() && !irqs_disabled()); | |
12050 | BUG_ON(idx >= KM_TYPE_NR); | |
12051 | -#endif | |
12052 | +# endif | |
12053 | return idx; | |
12054 | +#else | |
12055 | + current->kmap_idx++; | |
12056 | + BUG_ON(current->kmap_idx > KM_TYPE_NR); | |
12057 | + return current->kmap_idx - 1; | |
12058 | +#endif | |
12059 | } | |
12060 | ||
12061 | static inline int kmap_atomic_idx(void) | |
12062 | { | |
12063 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12064 | return __this_cpu_read(__kmap_atomic_idx) - 1; | |
12065 | +#else | |
12066 | + return current->kmap_idx - 1; | |
12067 | +#endif | |
12068 | } | |
12069 | ||
12070 | static inline void kmap_atomic_idx_pop(void) | |
12071 | { | |
12072 | -#ifdef CONFIG_DEBUG_HIGHMEM | |
12073 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12074 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
12075 | int idx = __this_cpu_dec_return(__kmap_atomic_idx); | |
12076 | ||
12077 | BUG_ON(idx < 0); | |
12078 | -#else | |
12079 | +# else | |
12080 | __this_cpu_dec(__kmap_atomic_idx); | |
12081 | +# endif | |
12082 | +#else | |
12083 | + current->kmap_idx--; | |
12084 | +# ifdef CONFIG_DEBUG_HIGHMEM | |
12085 | + BUG_ON(current->kmap_idx < 0); | |
12086 | +# endif | |
12087 | #endif | |
12088 | } | |
12089 | ||
12090 | diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h | |
12091 | index 2ead22dd74a0..8fbcdfa5dc77 100644 | |
12092 | --- a/include/linux/hrtimer.h | |
12093 | +++ b/include/linux/hrtimer.h | |
12094 | @@ -87,6 +87,9 @@ enum hrtimer_restart { | |
12095 | * @function: timer expiry callback function | |
12096 | * @base: pointer to the timer base (per cpu and per clock) | |
12097 | * @state: state information (See bit values above) | |
12098 | + * @cb_entry: list entry to defer timers from hardirq context | |
12099 | + * @irqsafe: timer can run in hardirq context | |
12100 | + * @praecox: timer expiry time if expired at the time of programming | |
12101 | * @is_rel: Set if the timer was armed relative | |
12102 | * @start_pid: timer statistics field to store the pid of the task which | |
12103 | * started the timer | |
12104 | @@ -103,6 +106,11 @@ struct hrtimer { | |
12105 | enum hrtimer_restart (*function)(struct hrtimer *); | |
12106 | struct hrtimer_clock_base *base; | |
12107 | u8 state; | |
12108 | + struct list_head cb_entry; | |
12109 | + int irqsafe; | |
12110 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
12111 | + ktime_t praecox; | |
12112 | +#endif | |
12113 | u8 is_rel; | |
12114 | #ifdef CONFIG_TIMER_STATS | |
12115 | int start_pid; | |
12116 | @@ -123,11 +131,7 @@ struct hrtimer_sleeper { | |
12117 | struct task_struct *task; | |
12118 | }; | |
12119 | ||
12120 | -#ifdef CONFIG_64BIT | |
12121 | # define HRTIMER_CLOCK_BASE_ALIGN 64 | |
12122 | -#else | |
12123 | -# define HRTIMER_CLOCK_BASE_ALIGN 32 | |
12124 | -#endif | |
12125 | ||
12126 | /** | |
12127 | * struct hrtimer_clock_base - the timer base for a specific clock | |
12128 | @@ -136,6 +140,7 @@ struct hrtimer_sleeper { | |
12129 | * timer to a base on another cpu. | |
12130 | * @clockid: clock id for per_cpu support | |
12131 | * @active: red black tree root node for the active timers | |
12132 | + * @expired: list head for deferred timers. | |
12133 | * @get_time: function to retrieve the current time of the clock | |
12134 | * @offset: offset of this clock to the monotonic base | |
12135 | */ | |
12136 | @@ -144,6 +149,7 @@ struct hrtimer_clock_base { | |
12137 | int index; | |
12138 | clockid_t clockid; | |
12139 | struct timerqueue_head active; | |
12140 | + struct list_head expired; | |
12141 | ktime_t (*get_time)(void); | |
12142 | ktime_t offset; | |
12143 | } __attribute__((__aligned__(HRTIMER_CLOCK_BASE_ALIGN))); | |
12144 | @@ -187,6 +193,7 @@ struct hrtimer_cpu_base { | |
12145 | raw_spinlock_t lock; | |
12146 | seqcount_t seq; | |
12147 | struct hrtimer *running; | |
12148 | + struct hrtimer *running_soft; | |
12149 | unsigned int cpu; | |
12150 | unsigned int active_bases; | |
12151 | unsigned int clock_was_set_seq; | |
12152 | @@ -203,6 +210,9 @@ struct hrtimer_cpu_base { | |
12153 | unsigned int nr_hangs; | |
12154 | unsigned int max_hang_time; | |
12155 | #endif | |
12156 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12157 | + wait_queue_head_t wait; | |
12158 | +#endif | |
12159 | struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; | |
12160 | } ____cacheline_aligned; | |
12161 | ||
12162 | @@ -412,6 +422,13 @@ static inline void hrtimer_restart(struct hrtimer *timer) | |
12163 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | |
12164 | } | |
12165 | ||
12166 | +/* Softirq preemption could deadlock timer removal */ | |
12167 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12168 | + extern void hrtimer_wait_for_timer(const struct hrtimer *timer); | |
12169 | +#else | |
12170 | +# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) | |
12171 | +#endif | |
12172 | + | |
12173 | /* Query timers: */ | |
12174 | extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); | |
12175 | ||
12176 | @@ -436,7 +453,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) | |
12177 | * Helper function to check, whether the timer is running the callback | |
12178 | * function | |
12179 | */ | |
12180 | -static inline int hrtimer_callback_running(struct hrtimer *timer) | |
12181 | +static inline int hrtimer_callback_running(const struct hrtimer *timer) | |
12182 | { | |
12183 | return timer->base->cpu_base->running == timer; | |
12184 | } | |
12185 | diff --git a/include/linux/idr.h b/include/linux/idr.h | |
12186 | index 013fd9bc4cb6..f62be0aec911 100644 | |
12187 | --- a/include/linux/idr.h | |
12188 | +++ b/include/linux/idr.h | |
12189 | @@ -95,10 +95,14 @@ bool idr_is_empty(struct idr *idp); | |
12190 | * Each idr_preload() should be matched with an invocation of this | |
12191 | * function. See idr_preload() for details. | |
12192 | */ | |
12193 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12194 | +void idr_preload_end(void); | |
12195 | +#else | |
12196 | static inline void idr_preload_end(void) | |
12197 | { | |
12198 | preempt_enable(); | |
12199 | } | |
12200 | +#endif | |
12201 | ||
12202 | /** | |
12203 | * idr_find - return pointer for given id | |
12204 | diff --git a/include/linux/init_task.h b/include/linux/init_task.h | |
12205 | index 1c1ff7e4faa4..60fadde71a44 100644 | |
12206 | --- a/include/linux/init_task.h | |
12207 | +++ b/include/linux/init_task.h | |
12208 | @@ -148,9 +148,15 @@ extern struct task_group root_task_group; | |
12209 | # define INIT_PERF_EVENTS(tsk) | |
12210 | #endif | |
12211 | ||
12212 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12213 | +# define INIT_TIMER_LIST .posix_timer_list = NULL, | |
12214 | +#else | |
12215 | +# define INIT_TIMER_LIST | |
12216 | +#endif | |
12217 | + | |
12218 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
12219 | # define INIT_VTIME(tsk) \ | |
12220 | - .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ | |
12221 | + .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \ | |
12222 | .vtime_snap = 0, \ | |
12223 | .vtime_snap_whence = VTIME_SYS, | |
12224 | #else | |
12225 | @@ -239,6 +245,7 @@ extern struct task_group root_task_group; | |
12226 | .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ | |
12227 | .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ | |
12228 | .timer_slack_ns = 50000, /* 50 usec default slack */ \ | |
12229 | + INIT_TIMER_LIST \ | |
12230 | .pids = { \ | |
12231 | [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \ | |
12232 | [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \ | |
12233 | diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h | |
12234 | index ad16809c8596..655cee096aed 100644 | |
12235 | --- a/include/linux/interrupt.h | |
12236 | +++ b/include/linux/interrupt.h | |
12237 | @@ -61,6 +61,7 @@ | |
12238 | * interrupt handler after suspending interrupts. For system | |
12239 | * wakeup devices users need to implement wakeup detection in | |
12240 | * their interrupt handlers. | |
12241 | + * IRQF_NO_SOFTIRQ_CALL - Do not process softirqs in the irq thread context (RT) | |
12242 | */ | |
12243 | #define IRQF_SHARED 0x00000080 | |
12244 | #define IRQF_PROBE_SHARED 0x00000100 | |
12245 | @@ -74,6 +75,7 @@ | |
12246 | #define IRQF_NO_THREAD 0x00010000 | |
12247 | #define IRQF_EARLY_RESUME 0x00020000 | |
12248 | #define IRQF_COND_SUSPEND 0x00040000 | |
12249 | +#define IRQF_NO_SOFTIRQ_CALL 0x00080000 | |
12250 | ||
12251 | #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) | |
12252 | ||
12253 | @@ -186,7 +188,7 @@ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); | |
12254 | #ifdef CONFIG_LOCKDEP | |
12255 | # define local_irq_enable_in_hardirq() do { } while (0) | |
12256 | #else | |
12257 | -# define local_irq_enable_in_hardirq() local_irq_enable() | |
12258 | +# define local_irq_enable_in_hardirq() local_irq_enable_nort() | |
12259 | #endif | |
12260 | ||
12261 | extern void disable_irq_nosync(unsigned int irq); | |
12262 | @@ -206,6 +208,7 @@ extern void resume_device_irqs(void); | |
12263 | * @irq: Interrupt to which notification applies | |
12264 | * @kref: Reference count, for internal use | |
12265 | * @work: Work item, for internal use | |
12266 | + * @list: List item for deferred callbacks | |
12267 | * @notify: Function to be called on change. This will be | |
12268 | * called in process context. | |
12269 | * @release: Function to be called on release. This will be | |
12270 | @@ -217,6 +220,7 @@ struct irq_affinity_notify { | |
12271 | unsigned int irq; | |
12272 | struct kref kref; | |
12273 | struct work_struct work; | |
12274 | + struct list_head list; | |
12275 | void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask); | |
12276 | void (*release)(struct kref *ref); | |
12277 | }; | |
12278 | @@ -379,9 +383,13 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
12279 | bool state); | |
12280 | ||
12281 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
12282 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
12283 | extern bool force_irqthreads; | |
12284 | +# else | |
12285 | +# define force_irqthreads (true) | |
12286 | +# endif | |
12287 | #else | |
12288 | -#define force_irqthreads (0) | |
12289 | +#define force_irqthreads (false) | |
12290 | #endif | |
12291 | ||
12292 | #ifndef __ARCH_SET_SOFTIRQ_PENDING | |
12293 | @@ -438,9 +446,10 @@ struct softirq_action | |
12294 | void (*action)(struct softirq_action *); | |
12295 | }; | |
12296 | ||
12297 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12298 | asmlinkage void do_softirq(void); | |
12299 | asmlinkage void __do_softirq(void); | |
12300 | - | |
12301 | +static inline void thread_do_softirq(void) { do_softirq(); } | |
12302 | #ifdef __ARCH_HAS_DO_SOFTIRQ | |
12303 | void do_softirq_own_stack(void); | |
12304 | #else | |
12305 | @@ -449,13 +458,25 @@ static inline void do_softirq_own_stack(void) | |
12306 | __do_softirq(); | |
12307 | } | |
12308 | #endif | |
12309 | +#else | |
12310 | +extern void thread_do_softirq(void); | |
12311 | +#endif | |
12312 | ||
12313 | extern void open_softirq(int nr, void (*action)(struct softirq_action *)); | |
12314 | extern void softirq_init(void); | |
12315 | extern void __raise_softirq_irqoff(unsigned int nr); | |
12316 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12317 | +extern void __raise_softirq_irqoff_ksoft(unsigned int nr); | |
12318 | +#else | |
12319 | +static inline void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
12320 | +{ | |
12321 | + __raise_softirq_irqoff(nr); | |
12322 | +} | |
12323 | +#endif | |
12324 | ||
12325 | extern void raise_softirq_irqoff(unsigned int nr); | |
12326 | extern void raise_softirq(unsigned int nr); | |
12327 | +extern void softirq_check_pending_idle(void); | |
12328 | ||
12329 | DECLARE_PER_CPU(struct task_struct *, ksoftirqd); | |
12330 | ||
12331 | @@ -477,8 +498,9 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) | |
12332 | to be executed on some cpu at least once after this. | |
12333 | * If the tasklet is already scheduled, but its execution is still not | |
12334 | started, it will be executed only once. | |
12335 | - * If this tasklet is already running on another CPU (or schedule is called | |
12336 | - from tasklet itself), it is rescheduled for later. | |
12337 | + * If this tasklet is already running on another CPU, it is rescheduled | |
12338 | + for later. | |
12339 | + * Schedule must not be called from the tasklet itself (a lockup occurs) | |
12340 | * Tasklet is strictly serialized wrt itself, but not | |
12341 | wrt another tasklets. If client needs some intertask synchronization, | |
12342 | he makes it with spinlocks. | |
12343 | @@ -503,27 +525,36 @@ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data } | |
12344 | enum | |
12345 | { | |
12346 | TASKLET_STATE_SCHED, /* Tasklet is scheduled for execution */ | |
12347 | - TASKLET_STATE_RUN /* Tasklet is running (SMP only) */ | |
12348 | + TASKLET_STATE_RUN, /* Tasklet is running (SMP only) */ | |
12349 | + TASKLET_STATE_PENDING /* Tasklet is pending */ | |
12350 | }; | |
12351 | ||
12352 | -#ifdef CONFIG_SMP | |
12353 | +#define TASKLET_STATEF_SCHED (1 << TASKLET_STATE_SCHED) | |
12354 | +#define TASKLET_STATEF_RUN (1 << TASKLET_STATE_RUN) | |
12355 | +#define TASKLET_STATEF_PENDING (1 << TASKLET_STATE_PENDING) | |
12356 | + | |
12357 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
12358 | static inline int tasklet_trylock(struct tasklet_struct *t) | |
12359 | { | |
12360 | return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state); | |
12361 | } | |
12362 | ||
12363 | +static inline int tasklet_tryunlock(struct tasklet_struct *t) | |
12364 | +{ | |
12365 | + return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN; | |
12366 | +} | |
12367 | + | |
12368 | static inline void tasklet_unlock(struct tasklet_struct *t) | |
12369 | { | |
12370 | smp_mb__before_atomic(); | |
12371 | clear_bit(TASKLET_STATE_RUN, &(t)->state); | |
12372 | } | |
12373 | ||
12374 | -static inline void tasklet_unlock_wait(struct tasklet_struct *t) | |
12375 | -{ | |
12376 | - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } | |
12377 | -} | |
12378 | +extern void tasklet_unlock_wait(struct tasklet_struct *t); | |
12379 | + | |
12380 | #else | |
12381 | #define tasklet_trylock(t) 1 | |
12382 | +#define tasklet_tryunlock(t) 1 | |
12383 | #define tasklet_unlock_wait(t) do { } while (0) | |
12384 | #define tasklet_unlock(t) do { } while (0) | |
12385 | #endif | |
12386 | @@ -572,12 +603,7 @@ static inline void tasklet_disable(struct tasklet_struct *t) | |
12387 | smp_mb(); | |
12388 | } | |
12389 | ||
12390 | -static inline void tasklet_enable(struct tasklet_struct *t) | |
12391 | -{ | |
12392 | - smp_mb__before_atomic(); | |
12393 | - atomic_dec(&t->count); | |
12394 | -} | |
12395 | - | |
12396 | +extern void tasklet_enable(struct tasklet_struct *t); | |
12397 | extern void tasklet_kill(struct tasklet_struct *t); | |
12398 | extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu); | |
12399 | extern void tasklet_init(struct tasklet_struct *t, | |
12400 | @@ -608,6 +634,12 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer) | |
12401 | tasklet_kill(&ttimer->tasklet); | |
12402 | } | |
12403 | ||
12404 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12405 | +extern void softirq_early_init(void); | |
12406 | +#else | |
12407 | +static inline void softirq_early_init(void) { } | |
12408 | +#endif | |
12409 | + | |
12410 | /* | |
12411 | * Autoprobing for irqs: | |
12412 | * | |
12413 | diff --git a/include/linux/irq.h b/include/linux/irq.h | |
12414 | index f7cade00c525..dac9e11ba037 100644 | |
12415 | --- a/include/linux/irq.h | |
12416 | +++ b/include/linux/irq.h | |
12417 | @@ -72,6 +72,7 @@ enum irqchip_irq_state; | |
12418 | * IRQ_IS_POLLED - Always polled by another interrupt. Exclude | |
12419 | * it from the spurious interrupt detection | |
12420 | * mechanism and from core side polling. | |
12421 | + * IRQ_NO_SOFTIRQ_CALL - No softirq processing in the irq thread context (RT) | |
12422 | * IRQ_DISABLE_UNLAZY - Disable lazy irq disable | |
12423 | */ | |
12424 | enum { | |
12425 | @@ -99,13 +100,14 @@ enum { | |
12426 | IRQ_PER_CPU_DEVID = (1 << 17), | |
12427 | IRQ_IS_POLLED = (1 << 18), | |
12428 | IRQ_DISABLE_UNLAZY = (1 << 19), | |
12429 | + IRQ_NO_SOFTIRQ_CALL = (1 << 20), | |
12430 | }; | |
12431 | ||
12432 | #define IRQF_MODIFY_MASK \ | |
12433 | (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ | |
12434 | IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ | |
12435 | IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ | |
12436 | - IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) | |
12437 | + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY | IRQ_NO_SOFTIRQ_CALL) | |
12438 | ||
12439 | #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) | |
12440 | ||
12441 | diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h | |
12442 | index 47b9ebd4a74f..2543aab05daa 100644 | |
12443 | --- a/include/linux/irq_work.h | |
12444 | +++ b/include/linux/irq_work.h | |
12445 | @@ -16,6 +16,7 @@ | |
12446 | #define IRQ_WORK_BUSY 2UL | |
12447 | #define IRQ_WORK_FLAGS 3UL | |
12448 | #define IRQ_WORK_LAZY 4UL /* Doesn't want IPI, wait for tick */ | |
12449 | +#define IRQ_WORK_HARD_IRQ 8UL /* Run hard IRQ context, even on RT */ | |
12450 | ||
12451 | struct irq_work { | |
12452 | unsigned long flags; | |
12453 | @@ -51,4 +52,10 @@ static inline bool irq_work_needs_cpu(void) { return false; } | |
12454 | static inline void irq_work_run(void) { } | |
12455 | #endif | |
12456 | ||
12457 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
12458 | +void irq_work_tick_soft(void); | |
12459 | +#else | |
12460 | +static inline void irq_work_tick_soft(void) { } | |
12461 | +#endif | |
12462 | + | |
12463 | #endif /* _LINUX_IRQ_WORK_H */ | |
12464 | diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h | |
12465 | index a587a33363c7..ad57402a242d 100644 | |
12466 | --- a/include/linux/irqdesc.h | |
12467 | +++ b/include/linux/irqdesc.h | |
12468 | @@ -61,6 +61,7 @@ struct irq_desc { | |
12469 | unsigned int irqs_unhandled; | |
12470 | atomic_t threads_handled; | |
12471 | int threads_handled_last; | |
12472 | + u64 random_ip; | |
12473 | raw_spinlock_t lock; | |
12474 | struct cpumask *percpu_enabled; | |
12475 | #ifdef CONFIG_SMP | |
12476 | diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h | |
12477 | index 5dd1272d1ab2..9b77034f7c5e 100644 | |
12478 | --- a/include/linux/irqflags.h | |
12479 | +++ b/include/linux/irqflags.h | |
12480 | @@ -25,8 +25,6 @@ | |
12481 | # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) | |
12482 | # define trace_hardirq_enter() do { current->hardirq_context++; } while (0) | |
12483 | # define trace_hardirq_exit() do { current->hardirq_context--; } while (0) | |
12484 | -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
12485 | -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
12486 | # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, | |
12487 | #else | |
12488 | # define trace_hardirqs_on() do { } while (0) | |
12489 | @@ -39,9 +37,15 @@ | |
12490 | # define trace_softirqs_enabled(p) 0 | |
12491 | # define trace_hardirq_enter() do { } while (0) | |
12492 | # define trace_hardirq_exit() do { } while (0) | |
12493 | +# define INIT_TRACE_IRQFLAGS | |
12494 | +#endif | |
12495 | + | |
12496 | +#if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PREEMPT_RT_FULL) | |
12497 | +# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) | |
12498 | +# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) | |
12499 | +#else | |
12500 | # define lockdep_softirq_enter() do { } while (0) | |
12501 | # define lockdep_softirq_exit() do { } while (0) | |
12502 | -# define INIT_TRACE_IRQFLAGS | |
12503 | #endif | |
12504 | ||
12505 | #if defined(CONFIG_IRQSOFF_TRACER) || \ | |
12506 | @@ -148,4 +152,23 @@ | |
12507 | ||
12508 | #define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) | |
12509 | ||
12510 | +/* | |
12511 | + * local_irq* variants depending on RT/!RT | |
12512 | + */ | |
12513 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12514 | +# define local_irq_disable_nort() do { } while (0) | |
12515 | +# define local_irq_enable_nort() do { } while (0) | |
12516 | +# define local_irq_save_nort(flags) local_save_flags(flags) | |
12517 | +# define local_irq_restore_nort(flags) (void)(flags) | |
12518 | +# define local_irq_disable_rt() local_irq_disable() | |
12519 | +# define local_irq_enable_rt() local_irq_enable() | |
12520 | +#else | |
12521 | +# define local_irq_disable_nort() local_irq_disable() | |
12522 | +# define local_irq_enable_nort() local_irq_enable() | |
12523 | +# define local_irq_save_nort(flags) local_irq_save(flags) | |
12524 | +# define local_irq_restore_nort(flags) local_irq_restore(flags) | |
12525 | +# define local_irq_disable_rt() do { } while (0) | |
12526 | +# define local_irq_enable_rt() do { } while (0) | |
12527 | +#endif | |
12528 | + | |
12529 | #endif | |
12530 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h | |
12531 | index 65407f6c9120..eb5aabe4e18c 100644 | |
12532 | --- a/include/linux/jbd2.h | |
12533 | +++ b/include/linux/jbd2.h | |
12534 | @@ -352,32 +352,56 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) | |
12535 | ||
12536 | static inline void jbd_lock_bh_state(struct buffer_head *bh) | |
12537 | { | |
12538 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12539 | bit_spin_lock(BH_State, &bh->b_state); | |
12540 | +#else | |
12541 | + spin_lock(&bh->b_state_lock); | |
12542 | +#endif | |
12543 | } | |
12544 | ||
12545 | static inline int jbd_trylock_bh_state(struct buffer_head *bh) | |
12546 | { | |
12547 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12548 | return bit_spin_trylock(BH_State, &bh->b_state); | |
12549 | +#else | |
12550 | + return spin_trylock(&bh->b_state_lock); | |
12551 | +#endif | |
12552 | } | |
12553 | ||
12554 | static inline int jbd_is_locked_bh_state(struct buffer_head *bh) | |
12555 | { | |
12556 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12557 | return bit_spin_is_locked(BH_State, &bh->b_state); | |
12558 | +#else | |
12559 | + return spin_is_locked(&bh->b_state_lock); | |
12560 | +#endif | |
12561 | } | |
12562 | ||
12563 | static inline void jbd_unlock_bh_state(struct buffer_head *bh) | |
12564 | { | |
12565 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12566 | bit_spin_unlock(BH_State, &bh->b_state); | |
12567 | +#else | |
12568 | + spin_unlock(&bh->b_state_lock); | |
12569 | +#endif | |
12570 | } | |
12571 | ||
12572 | static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) | |
12573 | { | |
12574 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12575 | bit_spin_lock(BH_JournalHead, &bh->b_state); | |
12576 | +#else | |
12577 | + spin_lock(&bh->b_journal_head_lock); | |
12578 | +#endif | |
12579 | } | |
12580 | ||
12581 | static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) | |
12582 | { | |
12583 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12584 | bit_spin_unlock(BH_JournalHead, &bh->b_state); | |
12585 | +#else | |
12586 | + spin_unlock(&bh->b_journal_head_lock); | |
12587 | +#endif | |
12588 | } | |
12589 | ||
12590 | #define J_ASSERT(assert) BUG_ON(!(assert)) | |
12591 | diff --git a/include/linux/kdb.h b/include/linux/kdb.h | |
12592 | index a19bcf9e762e..897495386446 100644 | |
12593 | --- a/include/linux/kdb.h | |
12594 | +++ b/include/linux/kdb.h | |
12595 | @@ -167,6 +167,7 @@ extern __printf(2, 0) int vkdb_printf(enum kdb_msgsrc src, const char *fmt, | |
12596 | extern __printf(1, 2) int kdb_printf(const char *, ...); | |
12597 | typedef __printf(1, 2) int (*kdb_printf_t)(const char *, ...); | |
12598 | ||
12599 | +#define in_kdb_printk() (kdb_trap_printk) | |
12600 | extern void kdb_init(int level); | |
12601 | ||
12602 | /* Access to kdb specific polling devices */ | |
12603 | @@ -201,6 +202,7 @@ extern int kdb_register_flags(char *, kdb_func_t, char *, char *, | |
12604 | extern int kdb_unregister(char *); | |
12605 | #else /* ! CONFIG_KGDB_KDB */ | |
12606 | static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; } | |
12607 | +#define in_kdb_printk() (0) | |
12608 | static inline void kdb_init(int level) {} | |
12609 | static inline int kdb_register(char *cmd, kdb_func_t func, char *usage, | |
12610 | char *help, short minlen) { return 0; } | |
12611 | diff --git a/include/linux/kernel.h b/include/linux/kernel.h | |
cb95d48a | 12612 | index 50220cab738c..d68f639f7330 100644 |
b4de310e JK |
12613 | --- a/include/linux/kernel.h |
12614 | +++ b/include/linux/kernel.h | |
12615 | @@ -188,6 +188,9 @@ extern int _cond_resched(void); | |
12616 | */ | |
12617 | # define might_sleep() \ | |
12618 | do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
12619 | + | |
12620 | +# define might_sleep_no_state_check() \ | |
12621 | + do { ___might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0) | |
12622 | # define sched_annotate_sleep() (current->task_state_change = 0) | |
12623 | #else | |
12624 | static inline void ___might_sleep(const char *file, int line, | |
12625 | @@ -195,6 +198,7 @@ extern int _cond_resched(void); | |
12626 | static inline void __might_sleep(const char *file, int line, | |
12627 | int preempt_offset) { } | |
12628 | # define might_sleep() do { might_resched(); } while (0) | |
12629 | +# define might_sleep_no_state_check() do { might_resched(); } while (0) | |
12630 | # define sched_annotate_sleep() do { } while (0) | |
12631 | #endif | |
12632 | ||
12633 | @@ -255,6 +259,7 @@ extern long (*panic_blink)(int state); | |
12634 | __printf(1, 2) | |
12635 | void panic(const char *fmt, ...) | |
12636 | __noreturn __cold; | |
12637 | +void nmi_panic(struct pt_regs *regs, const char *msg); | |
12638 | extern void oops_enter(void); | |
12639 | extern void oops_exit(void); | |
12640 | void print_oops_end_marker(void); | |
cb95d48a | 12641 | @@ -448,6 +453,14 @@ extern int sysctl_panic_on_stackoverflow; |
b4de310e JK |
12642 | extern bool crash_kexec_post_notifiers; |
12643 | ||
12644 | /* | |
12645 | + * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It | |
12646 | + * holds a CPU number which is executing panic() currently. A value of | |
12647 | + * PANIC_CPU_INVALID means no CPU has entered panic() or crash_kexec(). | |
12648 | + */ | |
12649 | +extern atomic_t panic_cpu; | |
12650 | +#define PANIC_CPU_INVALID -1 | |
12651 | + | |
12652 | +/* | |
12653 | * Only to be used by arch init code. If the user over-wrote the default | |
12654 | * CONFIG_PANIC_TIMEOUT, honor it. | |
12655 | */ | |
cb95d48a | 12656 | @@ -475,6 +488,7 @@ extern enum system_states { |
b4de310e JK |
12657 | SYSTEM_HALT, |
12658 | SYSTEM_POWER_OFF, | |
12659 | SYSTEM_RESTART, | |
12660 | + SYSTEM_SUSPEND, | |
12661 | } system_state; | |
12662 | ||
12663 | #define TAINT_PROPRIETARY_MODULE 0 | |
12664 | diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h | |
12665 | index c923350ca20a..c690acc6900e 100644 | |
12666 | --- a/include/linux/kvm_host.h | |
12667 | +++ b/include/linux/kvm_host.h | |
12668 | @@ -25,6 +25,7 @@ | |
12669 | #include <linux/irqflags.h> | |
12670 | #include <linux/context_tracking.h> | |
12671 | #include <linux/irqbypass.h> | |
12672 | +#include <linux/swait.h> | |
12673 | #include <asm/signal.h> | |
12674 | ||
12675 | #include <linux/kvm.h> | |
12676 | @@ -243,7 +244,7 @@ struct kvm_vcpu { | |
12677 | int fpu_active; | |
12678 | int guest_fpu_loaded, guest_xcr0_loaded; | |
12679 | unsigned char fpu_counter; | |
12680 | - wait_queue_head_t wq; | |
12681 | + struct swait_queue_head wq; | |
12682 | struct pid *pid; | |
12683 | int sigset_active; | |
12684 | sigset_t sigset; | |
12685 | @@ -794,7 +795,7 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) | |
12686 | } | |
12687 | #endif | |
12688 | ||
12689 | -static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) | |
12690 | +static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) | |
12691 | { | |
12692 | #ifdef __KVM_HAVE_ARCH_WQP | |
12693 | return vcpu->arch.wqp; | |
12694 | diff --git a/include/linux/lglock.h b/include/linux/lglock.h | |
12695 | index c92ebd100d9b..6f035f635d0e 100644 | |
12696 | --- a/include/linux/lglock.h | |
12697 | +++ b/include/linux/lglock.h | |
12698 | @@ -34,13 +34,30 @@ | |
12699 | #endif | |
12700 | ||
12701 | struct lglock { | |
12702 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12703 | + struct rt_mutex __percpu *lock; | |
12704 | +#else | |
12705 | arch_spinlock_t __percpu *lock; | |
12706 | +#endif | |
12707 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
12708 | struct lock_class_key lock_key; | |
12709 | struct lockdep_map lock_dep_map; | |
12710 | #endif | |
12711 | }; | |
12712 | ||
12713 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12714 | +# define DEFINE_LGLOCK(name) \ | |
12715 | + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \ | |
12716 | + = __RT_MUTEX_INITIALIZER( name ## _lock); \ | |
12717 | + struct lglock name = { .lock = &name ## _lock } | |
12718 | + | |
12719 | +# define DEFINE_STATIC_LGLOCK(name) \ | |
12720 | + static DEFINE_PER_CPU(struct rt_mutex, name ## _lock) \ | |
12721 | + = __RT_MUTEX_INITIALIZER( name ## _lock); \ | |
12722 | + static struct lglock name = { .lock = &name ## _lock } | |
12723 | + | |
12724 | +#else | |
12725 | + | |
12726 | #define DEFINE_LGLOCK(name) \ | |
12727 | static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ | |
12728 | = __ARCH_SPIN_LOCK_UNLOCKED; \ | |
12729 | @@ -50,6 +67,7 @@ struct lglock { | |
12730 | static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ | |
12731 | = __ARCH_SPIN_LOCK_UNLOCKED; \ | |
12732 | static struct lglock name = { .lock = &name ## _lock } | |
12733 | +#endif | |
12734 | ||
12735 | void lg_lock_init(struct lglock *lg, char *name); | |
12736 | ||
12737 | @@ -64,6 +82,12 @@ void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); | |
12738 | void lg_global_lock(struct lglock *lg); | |
12739 | void lg_global_unlock(struct lglock *lg); | |
12740 | ||
12741 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
12742 | +#define lg_global_trylock_relax(name) lg_global_lock(name) | |
12743 | +#else | |
12744 | +void lg_global_trylock_relax(struct lglock *lg); | |
12745 | +#endif | |
12746 | + | |
12747 | #else | |
12748 | /* When !CONFIG_SMP, map lglock to spinlock */ | |
12749 | #define lglock spinlock | |
12750 | diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h | |
12751 | index 8132214e8efd..89ffaa7bd342 100644 | |
12752 | --- a/include/linux/list_bl.h | |
12753 | +++ b/include/linux/list_bl.h | |
12754 | @@ -2,6 +2,7 @@ | |
12755 | #define _LINUX_LIST_BL_H | |
12756 | ||
12757 | #include <linux/list.h> | |
12758 | +#include <linux/spinlock.h> | |
12759 | #include <linux/bit_spinlock.h> | |
12760 | ||
12761 | /* | |
12762 | @@ -32,13 +33,24 @@ | |
12763 | ||
12764 | struct hlist_bl_head { | |
12765 | struct hlist_bl_node *first; | |
12766 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12767 | + raw_spinlock_t lock; | |
12768 | +#endif | |
12769 | }; | |
12770 | ||
12771 | struct hlist_bl_node { | |
12772 | struct hlist_bl_node *next, **pprev; | |
12773 | }; | |
12774 | -#define INIT_HLIST_BL_HEAD(ptr) \ | |
12775 | - ((ptr)->first = NULL) | |
12776 | + | |
12777 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12778 | +#define INIT_HLIST_BL_HEAD(h) \ | |
12779 | +do { \ | |
12780 | + (h)->first = NULL; \ | |
12781 | + raw_spin_lock_init(&(h)->lock); \ | |
12782 | +} while (0) | |
12783 | +#else | |
12784 | +#define INIT_HLIST_BL_HEAD(h) (h)->first = NULL | |
12785 | +#endif | |
12786 | ||
12787 | static inline void INIT_HLIST_BL_NODE(struct hlist_bl_node *h) | |
12788 | { | |
12789 | @@ -118,12 +130,26 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) | |
12790 | ||
12791 | static inline void hlist_bl_lock(struct hlist_bl_head *b) | |
12792 | { | |
12793 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12794 | bit_spin_lock(0, (unsigned long *)b); | |
12795 | +#else | |
12796 | + raw_spin_lock(&b->lock); | |
12797 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
12798 | + __set_bit(0, (unsigned long *)b); | |
12799 | +#endif | |
12800 | +#endif | |
12801 | } | |
12802 | ||
12803 | static inline void hlist_bl_unlock(struct hlist_bl_head *b) | |
12804 | { | |
12805 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
12806 | __bit_spin_unlock(0, (unsigned long *)b); | |
12807 | +#else | |
12808 | +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) | |
12809 | + __clear_bit(0, (unsigned long *)b); | |
12810 | +#endif | |
12811 | + raw_spin_unlock(&b->lock); | |
12812 | +#endif | |
12813 | } | |
12814 | ||
12815 | static inline bool hlist_bl_is_locked(struct hlist_bl_head *b) | |
12816 | diff --git a/include/linux/locallock.h b/include/linux/locallock.h | |
12817 | new file mode 100644 | |
12818 | index 000000000000..e572a3971631 | |
12819 | --- /dev/null | |
12820 | +++ b/include/linux/locallock.h | |
12821 | @@ -0,0 +1,276 @@ | |
12822 | +#ifndef _LINUX_LOCALLOCK_H | |
12823 | +#define _LINUX_LOCALLOCK_H | |
12824 | + | |
12825 | +#include <linux/percpu.h> | |
12826 | +#include <linux/spinlock.h> | |
12827 | + | |
12828 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
12829 | + | |
12830 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
12831 | +# define LL_WARN(cond) WARN_ON(cond) | |
12832 | +#else | |
12833 | +# define LL_WARN(cond) do { } while (0) | |
12834 | +#endif | |
12835 | + | |
12836 | +/* | |
12837 | + * per cpu lock based substitute for local_irq_*() | |
12838 | + */ | |
12839 | +struct local_irq_lock { | |
12840 | + spinlock_t lock; | |
12841 | + struct task_struct *owner; | |
12842 | + int nestcnt; | |
12843 | + unsigned long flags; | |
12844 | +}; | |
12845 | + | |
12846 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) \ | |
12847 | + DEFINE_PER_CPU(struct local_irq_lock, lvar) = { \ | |
12848 | + .lock = __SPIN_LOCK_UNLOCKED((lvar).lock) } | |
12849 | + | |
12850 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) \ | |
12851 | + DECLARE_PER_CPU(struct local_irq_lock, lvar) | |
12852 | + | |
12853 | +#define local_irq_lock_init(lvar) \ | |
12854 | + do { \ | |
12855 | + int __cpu; \ | |
12856 | + for_each_possible_cpu(__cpu) \ | |
12857 | + spin_lock_init(&per_cpu(lvar, __cpu).lock); \ | |
12858 | + } while (0) | |
12859 | + | |
12860 | +/* | |
12861 | + * spin_lock|trylock|unlock_local flavour that does not migrate disable | |
12862 | + * used for __local_lock|trylock|unlock where get_local_var/put_local_var | |
12863 | + * already takes care of the migrate_disable/enable | |
12864 | + * for CONFIG_PREEMPT_BASE map to the normal spin_* calls. | |
12865 | + */ | |
12866 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
12867 | +# define spin_lock_local(lock) rt_spin_lock__no_mg(lock) | |
12868 | +# define spin_trylock_local(lock) rt_spin_trylock__no_mg(lock) | |
12869 | +# define spin_unlock_local(lock) rt_spin_unlock__no_mg(lock) | |
12870 | +#else | |
12871 | +# define spin_lock_local(lock) spin_lock(lock) | |
12872 | +# define spin_trylock_local(lock) spin_trylock(lock) | |
12873 | +# define spin_unlock_local(lock) spin_unlock(lock) | |
12874 | +#endif | |
12875 | + | |
12876 | +static inline void __local_lock(struct local_irq_lock *lv) | |
12877 | +{ | |
12878 | + if (lv->owner != current) { | |
12879 | + spin_lock_local(&lv->lock); | |
12880 | + LL_WARN(lv->owner); | |
12881 | + LL_WARN(lv->nestcnt); | |
12882 | + lv->owner = current; | |
12883 | + } | |
12884 | + lv->nestcnt++; | |
12885 | +} | |
12886 | + | |
12887 | +#define local_lock(lvar) \ | |
12888 | + do { __local_lock(&get_local_var(lvar)); } while (0) | |
12889 | + | |
12890 | +#define local_lock_on(lvar, cpu) \ | |
12891 | + do { __local_lock(&per_cpu(lvar, cpu)); } while (0) | |
12892 | + | |
12893 | +static inline int __local_trylock(struct local_irq_lock *lv) | |
12894 | +{ | |
12895 | + if (lv->owner != current && spin_trylock_local(&lv->lock)) { | |
12896 | + LL_WARN(lv->owner); | |
12897 | + LL_WARN(lv->nestcnt); | |
12898 | + lv->owner = current; | |
12899 | + lv->nestcnt = 1; | |
12900 | + return 1; | |
12901 | + } | |
12902 | + return 0; | |
12903 | +} | |
12904 | + | |
12905 | +#define local_trylock(lvar) \ | |
12906 | + ({ \ | |
12907 | + int __locked; \ | |
12908 | + __locked = __local_trylock(&get_local_var(lvar)); \ | |
12909 | + if (!__locked) \ | |
12910 | + put_local_var(lvar); \ | |
12911 | + __locked; \ | |
12912 | + }) | |
12913 | + | |
12914 | +static inline void __local_unlock(struct local_irq_lock *lv) | |
12915 | +{ | |
12916 | + LL_WARN(lv->nestcnt == 0); | |
12917 | + LL_WARN(lv->owner != current); | |
12918 | + if (--lv->nestcnt) | |
12919 | + return; | |
12920 | + | |
12921 | + lv->owner = NULL; | |
12922 | + spin_unlock_local(&lv->lock); | |
12923 | +} | |
12924 | + | |
12925 | +#define local_unlock(lvar) \ | |
12926 | + do { \ | |
12927 | + __local_unlock(this_cpu_ptr(&lvar)); \ | |
12928 | + put_local_var(lvar); \ | |
12929 | + } while (0) | |
12930 | + | |
12931 | +#define local_unlock_on(lvar, cpu) \ | |
12932 | + do { __local_unlock(&per_cpu(lvar, cpu)); } while (0) | |
12933 | + | |
12934 | +static inline void __local_lock_irq(struct local_irq_lock *lv) | |
12935 | +{ | |
12936 | + spin_lock_irqsave(&lv->lock, lv->flags); | |
12937 | + LL_WARN(lv->owner); | |
12938 | + LL_WARN(lv->nestcnt); | |
12939 | + lv->owner = current; | |
12940 | + lv->nestcnt = 1; | |
12941 | +} | |
12942 | + | |
12943 | +#define local_lock_irq(lvar) \ | |
12944 | + do { __local_lock_irq(&get_local_var(lvar)); } while (0) | |
12945 | + | |
12946 | +#define local_lock_irq_on(lvar, cpu) \ | |
12947 | + do { __local_lock_irq(&per_cpu(lvar, cpu)); } while (0) | |
12948 | + | |
12949 | +static inline void __local_unlock_irq(struct local_irq_lock *lv) | |
12950 | +{ | |
12951 | + LL_WARN(!lv->nestcnt); | |
12952 | + LL_WARN(lv->owner != current); | |
12953 | + lv->owner = NULL; | |
12954 | + lv->nestcnt = 0; | |
12955 | + spin_unlock_irq(&lv->lock); | |
12956 | +} | |
12957 | + | |
12958 | +#define local_unlock_irq(lvar) \ | |
12959 | + do { \ | |
12960 | + __local_unlock_irq(this_cpu_ptr(&lvar)); \ | |
12961 | + put_local_var(lvar); \ | |
12962 | + } while (0) | |
12963 | + | |
12964 | +#define local_unlock_irq_on(lvar, cpu) \ | |
12965 | + do { \ | |
12966 | + __local_unlock_irq(&per_cpu(lvar, cpu)); \ | |
12967 | + } while (0) | |
12968 | + | |
12969 | +static inline int __local_lock_irqsave(struct local_irq_lock *lv) | |
12970 | +{ | |
12971 | + if (lv->owner != current) { | |
12972 | + __local_lock_irq(lv); | |
12973 | + return 0; | |
12974 | + } else { | |
12975 | + lv->nestcnt++; | |
12976 | + return 1; | |
12977 | + } | |
12978 | +} | |
12979 | + | |
12980 | +#define local_lock_irqsave(lvar, _flags) \ | |
12981 | + do { \ | |
12982 | + if (__local_lock_irqsave(&get_local_var(lvar))) \ | |
12983 | + put_local_var(lvar); \ | |
12984 | + _flags = __this_cpu_read(lvar.flags); \ | |
12985 | + } while (0) | |
12986 | + | |
12987 | +#define local_lock_irqsave_on(lvar, _flags, cpu) \ | |
12988 | + do { \ | |
12989 | + __local_lock_irqsave(&per_cpu(lvar, cpu)); \ | |
12990 | + _flags = per_cpu(lvar, cpu).flags; \ | |
12991 | + } while (0) | |
12992 | + | |
12993 | +static inline int __local_unlock_irqrestore(struct local_irq_lock *lv, | |
12994 | + unsigned long flags) | |
12995 | +{ | |
12996 | + LL_WARN(!lv->nestcnt); | |
12997 | + LL_WARN(lv->owner != current); | |
12998 | + if (--lv->nestcnt) | |
12999 | + return 0; | |
13000 | + | |
13001 | + lv->owner = NULL; | |
13002 | + spin_unlock_irqrestore(&lv->lock, lv->flags); | |
13003 | + return 1; | |
13004 | +} | |
13005 | + | |
13006 | +#define local_unlock_irqrestore(lvar, flags) \ | |
13007 | + do { \ | |
13008 | + if (__local_unlock_irqrestore(this_cpu_ptr(&lvar), flags)) \ | |
13009 | + put_local_var(lvar); \ | |
13010 | + } while (0) | |
13011 | + | |
13012 | +#define local_unlock_irqrestore_on(lvar, flags, cpu) \ | |
13013 | + do { \ | |
13014 | + __local_unlock_irqrestore(&per_cpu(lvar, cpu), flags); \ | |
13015 | + } while (0) | |
13016 | + | |
13017 | +#define local_spin_trylock_irq(lvar, lock) \ | |
13018 | + ({ \ | |
13019 | + int __locked; \ | |
13020 | + local_lock_irq(lvar); \ | |
13021 | + __locked = spin_trylock(lock); \ | |
13022 | + if (!__locked) \ | |
13023 | + local_unlock_irq(lvar); \ | |
13024 | + __locked; \ | |
13025 | + }) | |
13026 | + | |
13027 | +#define local_spin_lock_irq(lvar, lock) \ | |
13028 | + do { \ | |
13029 | + local_lock_irq(lvar); \ | |
13030 | + spin_lock(lock); \ | |
13031 | + } while (0) | |
13032 | + | |
13033 | +#define local_spin_unlock_irq(lvar, lock) \ | |
13034 | + do { \ | |
13035 | + spin_unlock(lock); \ | |
13036 | + local_unlock_irq(lvar); \ | |
13037 | + } while (0) | |
13038 | + | |
13039 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
13040 | + do { \ | |
13041 | + local_lock_irqsave(lvar, flags); \ | |
13042 | + spin_lock(lock); \ | |
13043 | + } while (0) | |
13044 | + | |
13045 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
13046 | + do { \ | |
13047 | + spin_unlock(lock); \ | |
13048 | + local_unlock_irqrestore(lvar, flags); \ | |
13049 | + } while (0) | |
13050 | + | |
13051 | +#define get_locked_var(lvar, var) \ | |
13052 | + (*({ \ | |
13053 | + local_lock(lvar); \ | |
13054 | + this_cpu_ptr(&var); \ | |
13055 | + })) | |
13056 | + | |
13057 | +#define put_locked_var(lvar, var) local_unlock(lvar); | |
13058 | + | |
13059 | +#define local_lock_cpu(lvar) \ | |
13060 | + ({ \ | |
13061 | + local_lock(lvar); \ | |
13062 | + smp_processor_id(); \ | |
13063 | + }) | |
13064 | + | |
13065 | +#define local_unlock_cpu(lvar) local_unlock(lvar) | |
13066 | + | |
13067 | +#else /* PREEMPT_RT_BASE */ | |
13068 | + | |
13069 | +#define DEFINE_LOCAL_IRQ_LOCK(lvar) __typeof__(const int) lvar | |
13070 | +#define DECLARE_LOCAL_IRQ_LOCK(lvar) extern __typeof__(const int) lvar | |
13071 | + | |
13072 | +static inline void local_irq_lock_init(int lvar) { } | |
13073 | + | |
13074 | +#define local_lock(lvar) preempt_disable() | |
13075 | +#define local_unlock(lvar) preempt_enable() | |
13076 | +#define local_lock_irq(lvar) local_irq_disable() | |
13077 | +#define local_unlock_irq(lvar) local_irq_enable() | |
13078 | +#define local_lock_irqsave(lvar, flags) local_irq_save(flags) | |
13079 | +#define local_unlock_irqrestore(lvar, flags) local_irq_restore(flags) | |
13080 | + | |
13081 | +#define local_spin_trylock_irq(lvar, lock) spin_trylock_irq(lock) | |
13082 | +#define local_spin_lock_irq(lvar, lock) spin_lock_irq(lock) | |
13083 | +#define local_spin_unlock_irq(lvar, lock) spin_unlock_irq(lock) | |
13084 | +#define local_spin_lock_irqsave(lvar, lock, flags) \ | |
13085 | + spin_lock_irqsave(lock, flags) | |
13086 | +#define local_spin_unlock_irqrestore(lvar, lock, flags) \ | |
13087 | + spin_unlock_irqrestore(lock, flags) | |
13088 | + | |
13089 | +#define get_locked_var(lvar, var) get_cpu_var(var) | |
13090 | +#define put_locked_var(lvar, var) put_cpu_var(var) | |
13091 | + | |
13092 | +#define local_lock_cpu(lvar) get_cpu() | |
13093 | +#define local_unlock_cpu(lvar) put_cpu() | |
13094 | + | |
13095 | +#endif | |
13096 | + | |
13097 | +#endif | |
13098 | diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h | |
13099 | index f8d1492a114f..b238ebfbb4d6 100644 | |
13100 | --- a/include/linux/mm_types.h | |
13101 | +++ b/include/linux/mm_types.h | |
13102 | @@ -11,6 +11,7 @@ | |
13103 | #include <linux/completion.h> | |
13104 | #include <linux/cpumask.h> | |
13105 | #include <linux/uprobes.h> | |
13106 | +#include <linux/rcupdate.h> | |
13107 | #include <linux/page-flags-layout.h> | |
13108 | #include <asm/page.h> | |
13109 | #include <asm/mmu.h> | |
13110 | @@ -504,6 +505,9 @@ struct mm_struct { | |
13111 | bool tlb_flush_pending; | |
13112 | #endif | |
13113 | struct uprobes_state uprobes_state; | |
13114 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13115 | + struct rcu_head delayed_drop; | |
13116 | +#endif | |
13117 | #ifdef CONFIG_X86_INTEL_MPX | |
13118 | /* address of the bounds directory */ | |
13119 | void __user *bd_addr; | |
13120 | diff --git a/include/linux/mutex.h b/include/linux/mutex.h | |
13121 | index 2cb7531e7d7a..b3fdfc820216 100644 | |
13122 | --- a/include/linux/mutex.h | |
13123 | +++ b/include/linux/mutex.h | |
13124 | @@ -19,6 +19,17 @@ | |
13125 | #include <asm/processor.h> | |
13126 | #include <linux/osq_lock.h> | |
13127 | ||
13128 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13129 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
13130 | + , .dep_map = { .name = #lockname } | |
13131 | +#else | |
13132 | +# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
13133 | +#endif | |
13134 | + | |
13135 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13136 | +# include <linux/mutex_rt.h> | |
13137 | +#else | |
13138 | + | |
13139 | /* | |
13140 | * Simple, straightforward mutexes with strict semantics: | |
13141 | * | |
13142 | @@ -99,13 +110,6 @@ do { \ | |
13143 | static inline void mutex_destroy(struct mutex *lock) {} | |
13144 | #endif | |
13145 | ||
13146 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13147 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \ | |
13148 | - , .dep_map = { .name = #lockname } | |
13149 | -#else | |
13150 | -# define __DEP_MAP_MUTEX_INITIALIZER(lockname) | |
13151 | -#endif | |
13152 | - | |
13153 | #define __MUTEX_INITIALIZER(lockname) \ | |
13154 | { .count = ATOMIC_INIT(1) \ | |
13155 | , .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \ | |
13156 | @@ -173,6 +177,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); | |
13157 | extern int mutex_trylock(struct mutex *lock); | |
13158 | extern void mutex_unlock(struct mutex *lock); | |
13159 | ||
13160 | +#endif /* !PREEMPT_RT_FULL */ | |
13161 | + | |
13162 | extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); | |
13163 | ||
13164 | #endif /* __LINUX_MUTEX_H */ | |
13165 | diff --git a/include/linux/mutex_rt.h b/include/linux/mutex_rt.h | |
13166 | new file mode 100644 | |
13167 | index 000000000000..c38a44b14da5 | |
13168 | --- /dev/null | |
13169 | +++ b/include/linux/mutex_rt.h | |
13170 | @@ -0,0 +1,84 @@ | |
13171 | +#ifndef __LINUX_MUTEX_RT_H | |
13172 | +#define __LINUX_MUTEX_RT_H | |
13173 | + | |
13174 | +#ifndef __LINUX_MUTEX_H | |
13175 | +#error "Please include mutex.h" | |
13176 | +#endif | |
13177 | + | |
13178 | +#include <linux/rtmutex.h> | |
13179 | + | |
13180 | +/* FIXME: Just for __lockfunc */ | |
13181 | +#include <linux/spinlock.h> | |
13182 | + | |
13183 | +struct mutex { | |
13184 | + struct rt_mutex lock; | |
13185 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13186 | + struct lockdep_map dep_map; | |
13187 | +#endif | |
13188 | +}; | |
13189 | + | |
13190 | +#define __MUTEX_INITIALIZER(mutexname) \ | |
13191 | + { \ | |
13192 | + .lock = __RT_MUTEX_INITIALIZER(mutexname.lock) \ | |
13193 | + __DEP_MAP_MUTEX_INITIALIZER(mutexname) \ | |
13194 | + } | |
13195 | + | |
13196 | +#define DEFINE_MUTEX(mutexname) \ | |
13197 | + struct mutex mutexname = __MUTEX_INITIALIZER(mutexname) | |
13198 | + | |
13199 | +extern void __mutex_do_init(struct mutex *lock, const char *name, struct lock_class_key *key); | |
13200 | +extern void __lockfunc _mutex_lock(struct mutex *lock); | |
13201 | +extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock); | |
13202 | +extern int __lockfunc _mutex_lock_killable(struct mutex *lock); | |
13203 | +extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass); | |
13204 | +extern void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest_lock); | |
13205 | +extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass); | |
13206 | +extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass); | |
13207 | +extern int __lockfunc _mutex_trylock(struct mutex *lock); | |
13208 | +extern void __lockfunc _mutex_unlock(struct mutex *lock); | |
13209 | + | |
13210 | +#define mutex_is_locked(l) rt_mutex_is_locked(&(l)->lock) | |
13211 | +#define mutex_lock(l) _mutex_lock(l) | |
13212 | +#define mutex_lock_interruptible(l) _mutex_lock_interruptible(l) | |
13213 | +#define mutex_lock_killable(l) _mutex_lock_killable(l) | |
13214 | +#define mutex_trylock(l) _mutex_trylock(l) | |
13215 | +#define mutex_unlock(l) _mutex_unlock(l) | |
13216 | +#define mutex_destroy(l) rt_mutex_destroy(&(l)->lock) | |
13217 | + | |
13218 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
13219 | +# define mutex_lock_nested(l, s) _mutex_lock_nested(l, s) | |
13220 | +# define mutex_lock_interruptible_nested(l, s) \ | |
13221 | + _mutex_lock_interruptible_nested(l, s) | |
13222 | +# define mutex_lock_killable_nested(l, s) \ | |
13223 | + _mutex_lock_killable_nested(l, s) | |
13224 | + | |
13225 | +# define mutex_lock_nest_lock(lock, nest_lock) \ | |
13226 | +do { \ | |
13227 | + typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ | |
13228 | + _mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ | |
13229 | +} while (0) | |
13230 | + | |
13231 | +#else | |
13232 | +# define mutex_lock_nested(l, s) _mutex_lock(l) | |
13233 | +# define mutex_lock_interruptible_nested(l, s) \ | |
13234 | + _mutex_lock_interruptible(l) | |
13235 | +# define mutex_lock_killable_nested(l, s) \ | |
13236 | + _mutex_lock_killable(l) | |
13237 | +# define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) | |
13238 | +#endif | |
13239 | + | |
13240 | +# define mutex_init(mutex) \ | |
13241 | +do { \ | |
13242 | + static struct lock_class_key __key; \ | |
13243 | + \ | |
13244 | + rt_mutex_init(&(mutex)->lock); \ | |
13245 | + __mutex_do_init((mutex), #mutex, &__key); \ | |
13246 | +} while (0) | |
13247 | + | |
13248 | +# define __mutex_init(mutex, name, key) \ | |
13249 | +do { \ | |
13250 | + rt_mutex_init(&(mutex)->lock); \ | |
13251 | + __mutex_do_init((mutex), name, key); \ | |
13252 | +} while (0) | |
13253 | + | |
13254 | +#endif | |
13255 | diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h | |
cb95d48a | 13256 | index 12b4d54a8ffa..a2e7d1816b4c 100644 |
b4de310e JK |
13257 | --- a/include/linux/netdevice.h |
13258 | +++ b/include/linux/netdevice.h | |
13259 | @@ -2248,11 +2248,20 @@ void netdev_freemem(struct net_device *dev); | |
13260 | void synchronize_net(void); | |
13261 | int init_dummy_netdev(struct net_device *dev); | |
13262 | ||
13263 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13264 | +static inline int dev_recursion_level(void) | |
13265 | +{ | |
13266 | + return current->xmit_recursion; | |
13267 | +} | |
13268 | + | |
13269 | +#else | |
13270 | + | |
13271 | DECLARE_PER_CPU(int, xmit_recursion); | |
13272 | static inline int dev_recursion_level(void) | |
13273 | { | |
13274 | return this_cpu_read(xmit_recursion); | |
13275 | } | |
13276 | +#endif | |
13277 | ||
13278 | struct net_device *dev_get_by_index(struct net *net, int ifindex); | |
13279 | struct net_device *__dev_get_by_index(struct net *net, int ifindex); | |
13280 | @@ -2563,6 +2572,7 @@ struct softnet_data { | |
13281 | unsigned int dropped; | |
13282 | struct sk_buff_head input_pkt_queue; | |
13283 | struct napi_struct backlog; | |
13284 | + struct sk_buff_head tofree_queue; | |
13285 | ||
13286 | }; | |
13287 | ||
13288 | diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h | |
13289 | index 04078e8a4803..a61c9609e32f 100644 | |
13290 | --- a/include/linux/netfilter/x_tables.h | |
13291 | +++ b/include/linux/netfilter/x_tables.h | |
13292 | @@ -4,6 +4,7 @@ | |
13293 | ||
13294 | #include <linux/netdevice.h> | |
13295 | #include <linux/static_key.h> | |
13296 | +#include <linux/locallock.h> | |
13297 | #include <uapi/linux/netfilter/x_tables.h> | |
13298 | ||
13299 | /** | |
13300 | @@ -289,6 +290,8 @@ void xt_free_table_info(struct xt_table_info *info); | |
13301 | */ | |
13302 | DECLARE_PER_CPU(seqcount_t, xt_recseq); | |
13303 | ||
13304 | +DECLARE_LOCAL_IRQ_LOCK(xt_write_lock); | |
13305 | + | |
13306 | /* xt_tee_enabled - true if x_tables needs to handle reentrancy | |
13307 | * | |
13308 | * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. | |
13309 | @@ -309,6 +312,9 @@ static inline unsigned int xt_write_recseq_begin(void) | |
13310 | { | |
13311 | unsigned int addend; | |
13312 | ||
13313 | + /* RT protection */ | |
13314 | + local_lock(xt_write_lock); | |
13315 | + | |
13316 | /* | |
13317 | * Low order bit of sequence is set if we already | |
13318 | * called xt_write_recseq_begin(). | |
13319 | @@ -339,6 +345,7 @@ static inline void xt_write_recseq_end(unsigned int addend) | |
13320 | /* this is kind of a write_seqcount_end(), but addend is 0 or 1 */ | |
13321 | smp_wmb(); | |
13322 | __this_cpu_add(xt_recseq.sequence, addend); | |
13323 | + local_unlock(xt_write_lock); | |
13324 | } | |
13325 | ||
13326 | /* | |
13327 | diff --git a/include/linux/notifier.h b/include/linux/notifier.h | |
13328 | index d14a4c362465..2e4414a0c1c4 100644 | |
13329 | --- a/include/linux/notifier.h | |
13330 | +++ b/include/linux/notifier.h | |
13331 | @@ -6,7 +6,7 @@ | |
13332 | * | |
13333 | * Alan Cox <Alan.Cox@linux.org> | |
13334 | */ | |
13335 | - | |
13336 | + | |
13337 | #ifndef _LINUX_NOTIFIER_H | |
13338 | #define _LINUX_NOTIFIER_H | |
13339 | #include <linux/errno.h> | |
13340 | @@ -42,9 +42,7 @@ | |
13341 | * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. | |
13342 | * As compensation, srcu_notifier_chain_unregister() is rather expensive. | |
13343 | * SRCU notifier chains should be used when the chain will be called very | |
13344 | - * often but notifier_blocks will seldom be removed. Also, SRCU notifier | |
13345 | - * chains are slightly more difficult to use because they require special | |
13346 | - * runtime initialization. | |
13347 | + * often but notifier_blocks will seldom be removed. | |
13348 | */ | |
13349 | ||
13350 | typedef int (*notifier_fn_t)(struct notifier_block *nb, | |
13351 | @@ -88,7 +86,7 @@ struct srcu_notifier_head { | |
13352 | (name)->head = NULL; \ | |
13353 | } while (0) | |
13354 | ||
13355 | -/* srcu_notifier_heads must be initialized and cleaned up dynamically */ | |
13356 | +/* srcu_notifier_heads must be cleaned up dynamically */ | |
13357 | extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
13358 | #define srcu_cleanup_notifier_head(name) \ | |
13359 | cleanup_srcu_struct(&(name)->srcu); | |
13360 | @@ -101,7 +99,13 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
13361 | .head = NULL } | |
13362 | #define RAW_NOTIFIER_INIT(name) { \ | |
13363 | .head = NULL } | |
13364 | -/* srcu_notifier_heads cannot be initialized statically */ | |
13365 | + | |
13366 | +#define SRCU_NOTIFIER_INIT(name, pcpu) \ | |
13367 | + { \ | |
13368 | + .mutex = __MUTEX_INITIALIZER(name.mutex), \ | |
13369 | + .head = NULL, \ | |
13370 | + .srcu = __SRCU_STRUCT_INIT(name.srcu, pcpu), \ | |
13371 | + } | |
13372 | ||
13373 | #define ATOMIC_NOTIFIER_HEAD(name) \ | |
13374 | struct atomic_notifier_head name = \ | |
13375 | @@ -113,6 +117,18 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh); | |
13376 | struct raw_notifier_head name = \ | |
13377 | RAW_NOTIFIER_INIT(name) | |
13378 | ||
13379 | +#define _SRCU_NOTIFIER_HEAD(name, mod) \ | |
13380 | + static DEFINE_PER_CPU(struct srcu_struct_array, \ | |
13381 | + name##_head_srcu_array); \ | |
13382 | + mod struct srcu_notifier_head name = \ | |
13383 | + SRCU_NOTIFIER_INIT(name, name##_head_srcu_array) | |
13384 | + | |
13385 | +#define SRCU_NOTIFIER_HEAD(name) \ | |
13386 | + _SRCU_NOTIFIER_HEAD(name, ) | |
13387 | + | |
13388 | +#define SRCU_NOTIFIER_HEAD_STATIC(name) \ | |
13389 | + _SRCU_NOTIFIER_HEAD(name, static) | |
13390 | + | |
13391 | #ifdef __KERNEL__ | |
13392 | ||
13393 | extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, | |
13394 | @@ -182,12 +198,12 @@ static inline int notifier_to_errno(int ret) | |
13395 | ||
13396 | /* | |
13397 | * Declared notifiers so far. I can imagine quite a few more chains | |
13398 | - * over time (eg laptop power reset chains, reboot chain (to clean | |
13399 | + * over time (eg laptop power reset chains, reboot chain (to clean | |
13400 | * device units up), device [un]mount chain, module load/unload chain, | |
13401 | - * low memory chain, screenblank chain (for plug in modular screenblankers) | |
13402 | + * low memory chain, screenblank chain (for plug in modular screenblankers) | |
13403 | * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... | |
13404 | */ | |
13405 | - | |
13406 | + | |
13407 | /* CPU notfiers are defined in include/linux/cpu.h. */ | |
13408 | ||
13409 | /* netdevice notifiers are defined in include/linux/netdevice.h */ | |
13410 | diff --git a/include/linux/percpu.h b/include/linux/percpu.h | |
13411 | index caebf2a758dc..53a60a51c758 100644 | |
13412 | --- a/include/linux/percpu.h | |
13413 | +++ b/include/linux/percpu.h | |
13414 | @@ -24,6 +24,35 @@ | |
13415 | PERCPU_MODULE_RESERVE) | |
13416 | #endif | |
13417 | ||
13418 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13419 | + | |
13420 | +#define get_local_var(var) (*({ \ | |
13421 | + migrate_disable(); \ | |
13422 | + this_cpu_ptr(&var); })) | |
13423 | + | |
13424 | +#define put_local_var(var) do { \ | |
13425 | + (void)&(var); \ | |
13426 | + migrate_enable(); \ | |
13427 | +} while (0) | |
13428 | + | |
13429 | +# define get_local_ptr(var) ({ \ | |
13430 | + migrate_disable(); \ | |
13431 | + this_cpu_ptr(var); }) | |
13432 | + | |
13433 | +# define put_local_ptr(var) do { \ | |
13434 | + (void)(var); \ | |
13435 | + migrate_enable(); \ | |
13436 | +} while (0) | |
13437 | + | |
13438 | +#else | |
13439 | + | |
13440 | +#define get_local_var(var) get_cpu_var(var) | |
13441 | +#define put_local_var(var) put_cpu_var(var) | |
13442 | +#define get_local_ptr(var) get_cpu_ptr(var) | |
13443 | +#define put_local_ptr(var) put_cpu_ptr(var) | |
13444 | + | |
13445 | +#endif | |
13446 | + | |
13447 | /* minimum unit size, also is the maximum supported allocation size */ | |
13448 | #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) | |
13449 | ||
13450 | diff --git a/include/linux/pid.h b/include/linux/pid.h | |
13451 | index 23705a53abba..2cc64b779f03 100644 | |
13452 | --- a/include/linux/pid.h | |
13453 | +++ b/include/linux/pid.h | |
13454 | @@ -2,6 +2,7 @@ | |
13455 | #define _LINUX_PID_H | |
13456 | ||
13457 | #include <linux/rcupdate.h> | |
13458 | +#include <linux/atomic.h> | |
13459 | ||
13460 | enum pid_type | |
13461 | { | |
13462 | diff --git a/include/linux/preempt.h b/include/linux/preempt.h | |
13463 | index 75e4e30677f1..1cfb1cb72354 100644 | |
13464 | --- a/include/linux/preempt.h | |
13465 | +++ b/include/linux/preempt.h | |
13466 | @@ -50,7 +50,11 @@ | |
13467 | #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) | |
13468 | #define NMI_OFFSET (1UL << NMI_SHIFT) | |
13469 | ||
13470 | -#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
13471 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13472 | +# define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) | |
13473 | +#else | |
13474 | +# define SOFTIRQ_DISABLE_OFFSET (0) | |
13475 | +#endif | |
13476 | ||
13477 | /* We use the MSB mostly because its available */ | |
13478 | #define PREEMPT_NEED_RESCHED 0x80000000 | |
13479 | @@ -59,9 +63,15 @@ | |
13480 | #include <asm/preempt.h> | |
13481 | ||
13482 | #define hardirq_count() (preempt_count() & HARDIRQ_MASK) | |
13483 | -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
13484 | #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | |
13485 | | NMI_MASK)) | |
13486 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13487 | +# define softirq_count() (preempt_count() & SOFTIRQ_MASK) | |
13488 | +# define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
13489 | +#else | |
13490 | +# define softirq_count() (0UL) | |
13491 | +extern int in_serving_softirq(void); | |
13492 | +#endif | |
13493 | ||
13494 | /* | |
13495 | * Are we doing bottom half or hardware interrupt processing? | |
13496 | @@ -72,7 +82,6 @@ | |
13497 | #define in_irq() (hardirq_count()) | |
13498 | #define in_softirq() (softirq_count()) | |
13499 | #define in_interrupt() (irq_count()) | |
13500 | -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) | |
13501 | ||
13502 | /* | |
13503 | * Are we in NMI context? | |
13504 | @@ -91,7 +100,11 @@ | |
13505 | /* | |
13506 | * The preempt_count offset after spin_lock() | |
13507 | */ | |
13508 | +#if !defined(CONFIG_PREEMPT_RT_FULL) | |
13509 | #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET | |
13510 | +#else | |
13511 | +#define PREEMPT_LOCK_OFFSET 0 | |
13512 | +#endif | |
13513 | ||
13514 | /* | |
13515 | * The preempt_count offset needed for things like: | |
13516 | @@ -140,6 +153,20 @@ extern void preempt_count_sub(int val); | |
13517 | #define preempt_count_inc() preempt_count_add(1) | |
13518 | #define preempt_count_dec() preempt_count_sub(1) | |
13519 | ||
13520 | +#ifdef CONFIG_PREEMPT_LAZY | |
13521 | +#define add_preempt_lazy_count(val) do { preempt_lazy_count() += (val); } while (0) | |
13522 | +#define sub_preempt_lazy_count(val) do { preempt_lazy_count() -= (val); } while (0) | |
13523 | +#define inc_preempt_lazy_count() add_preempt_lazy_count(1) | |
13524 | +#define dec_preempt_lazy_count() sub_preempt_lazy_count(1) | |
13525 | +#define preempt_lazy_count() (current_thread_info()->preempt_lazy_count) | |
13526 | +#else | |
13527 | +#define add_preempt_lazy_count(val) do { } while (0) | |
13528 | +#define sub_preempt_lazy_count(val) do { } while (0) | |
13529 | +#define inc_preempt_lazy_count() do { } while (0) | |
13530 | +#define dec_preempt_lazy_count() do { } while (0) | |
13531 | +#define preempt_lazy_count() (0) | |
13532 | +#endif | |
13533 | + | |
13534 | #ifdef CONFIG_PREEMPT_COUNT | |
13535 | ||
13536 | #define preempt_disable() \ | |
13537 | @@ -148,13 +175,25 @@ do { \ | |
13538 | barrier(); \ | |
13539 | } while (0) | |
13540 | ||
13541 | +#define preempt_lazy_disable() \ | |
13542 | +do { \ | |
13543 | + inc_preempt_lazy_count(); \ | |
13544 | + barrier(); \ | |
13545 | +} while (0) | |
13546 | + | |
13547 | #define sched_preempt_enable_no_resched() \ | |
13548 | do { \ | |
13549 | barrier(); \ | |
13550 | preempt_count_dec(); \ | |
13551 | } while (0) | |
13552 | ||
13553 | -#define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
13554 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
13555 | +# define preempt_enable_no_resched() sched_preempt_enable_no_resched() | |
13556 | +# define preempt_check_resched_rt() preempt_check_resched() | |
13557 | +#else | |
13558 | +# define preempt_enable_no_resched() preempt_enable() | |
13559 | +# define preempt_check_resched_rt() barrier(); | |
13560 | +#endif | |
13561 | ||
13562 | #define preemptible() (preempt_count() == 0 && !irqs_disabled()) | |
13563 | ||
13564 | @@ -179,6 +218,13 @@ do { \ | |
13565 | __preempt_schedule(); \ | |
13566 | } while (0) | |
13567 | ||
13568 | +#define preempt_lazy_enable() \ | |
13569 | +do { \ | |
13570 | + dec_preempt_lazy_count(); \ | |
13571 | + barrier(); \ | |
13572 | + preempt_check_resched(); \ | |
13573 | +} while (0) | |
13574 | + | |
13575 | #else /* !CONFIG_PREEMPT */ | |
13576 | #define preempt_enable() \ | |
13577 | do { \ | |
13578 | @@ -224,6 +270,7 @@ do { \ | |
13579 | #define preempt_disable_notrace() barrier() | |
13580 | #define preempt_enable_no_resched_notrace() barrier() | |
13581 | #define preempt_enable_notrace() barrier() | |
13582 | +#define preempt_check_resched_rt() barrier() | |
13583 | #define preemptible() 0 | |
13584 | ||
13585 | #endif /* CONFIG_PREEMPT_COUNT */ | |
13586 | @@ -244,10 +291,31 @@ do { \ | |
13587 | } while (0) | |
13588 | #define preempt_fold_need_resched() \ | |
13589 | do { \ | |
13590 | - if (tif_need_resched()) \ | |
13591 | + if (tif_need_resched_now()) \ | |
13592 | set_preempt_need_resched(); \ | |
13593 | } while (0) | |
13594 | ||
13595 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13596 | +# define preempt_disable_rt() preempt_disable() | |
13597 | +# define preempt_enable_rt() preempt_enable() | |
13598 | +# define preempt_disable_nort() barrier() | |
13599 | +# define preempt_enable_nort() barrier() | |
13600 | +# ifdef CONFIG_SMP | |
13601 | + extern void migrate_disable(void); | |
13602 | + extern void migrate_enable(void); | |
13603 | +# else /* CONFIG_SMP */ | |
13604 | +# define migrate_disable() barrier() | |
13605 | +# define migrate_enable() barrier() | |
13606 | +# endif /* CONFIG_SMP */ | |
13607 | +#else | |
13608 | +# define preempt_disable_rt() barrier() | |
13609 | +# define preempt_enable_rt() barrier() | |
13610 | +# define preempt_disable_nort() preempt_disable() | |
13611 | +# define preempt_enable_nort() preempt_enable() | |
13612 | +# define migrate_disable() preempt_disable() | |
13613 | +# define migrate_enable() preempt_enable() | |
13614 | +#endif | |
13615 | + | |
13616 | #ifdef CONFIG_PREEMPT_NOTIFIERS | |
13617 | ||
13618 | struct preempt_notifier; | |
13619 | diff --git a/include/linux/printk.h b/include/linux/printk.h | |
13620 | index 9729565c25ff..9cdca696b718 100644 | |
13621 | --- a/include/linux/printk.h | |
13622 | +++ b/include/linux/printk.h | |
13623 | @@ -117,9 +117,11 @@ int no_printk(const char *fmt, ...) | |
13624 | #ifdef CONFIG_EARLY_PRINTK | |
13625 | extern asmlinkage __printf(1, 2) | |
13626 | void early_printk(const char *fmt, ...); | |
13627 | +extern void printk_kill(void); | |
13628 | #else | |
13629 | static inline __printf(1, 2) __cold | |
13630 | void early_printk(const char *s, ...) { } | |
13631 | +static inline void printk_kill(void) { } | |
13632 | #endif | |
13633 | ||
13634 | typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); | |
13635 | diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h | |
13636 | index 5d5174b59802..8ddbd6e15a3c 100644 | |
13637 | --- a/include/linux/radix-tree.h | |
13638 | +++ b/include/linux/radix-tree.h | |
13639 | @@ -277,8 +277,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, | |
13640 | unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root, | |
13641 | void ***results, unsigned long *indices, | |
13642 | unsigned long first_index, unsigned int max_items); | |
13643 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13644 | int radix_tree_preload(gfp_t gfp_mask); | |
13645 | int radix_tree_maybe_preload(gfp_t gfp_mask); | |
13646 | +#else | |
13647 | +static inline int radix_tree_preload(gfp_t gm) { return 0; } | |
13648 | +static inline int radix_tree_maybe_preload(gfp_t gfp_mask) { return 0; } | |
13649 | +#endif | |
13650 | void radix_tree_init(void); | |
13651 | void *radix_tree_tag_set(struct radix_tree_root *root, | |
13652 | unsigned long index, unsigned int tag); | |
13653 | @@ -303,7 +308,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item); | |
13654 | ||
13655 | static inline void radix_tree_preload_end(void) | |
13656 | { | |
13657 | - preempt_enable(); | |
13658 | + preempt_enable_nort(); | |
13659 | } | |
13660 | ||
13661 | /** | |
13662 | diff --git a/include/linux/random.h b/include/linux/random.h | |
13663 | index a75840c1aa71..1a804361670c 100644 | |
13664 | --- a/include/linux/random.h | |
13665 | +++ b/include/linux/random.h | |
13666 | @@ -20,7 +20,7 @@ struct random_ready_callback { | |
13667 | extern void add_device_randomness(const void *, unsigned int); | |
13668 | extern void add_input_randomness(unsigned int type, unsigned int code, | |
13669 | unsigned int value); | |
13670 | -extern void add_interrupt_randomness(int irq, int irq_flags); | |
13671 | +extern void add_interrupt_randomness(int irq, int irq_flags, __u64 ip); | |
13672 | ||
13673 | extern void get_random_bytes(void *buf, int nbytes); | |
13674 | extern int add_random_ready_callback(struct random_ready_callback *rdy); | |
13675 | diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h | |
13676 | index a5aa7ae671f4..24ddffd25492 100644 | |
13677 | --- a/include/linux/rbtree.h | |
13678 | +++ b/include/linux/rbtree.h | |
13679 | @@ -31,7 +31,6 @@ | |
13680 | ||
13681 | #include <linux/kernel.h> | |
13682 | #include <linux/stddef.h> | |
13683 | -#include <linux/rcupdate.h> | |
13684 | ||
13685 | struct rb_node { | |
13686 | unsigned long __rb_parent_color; | |
13687 | @@ -86,14 +85,8 @@ static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, | |
13688 | *rb_link = node; | |
13689 | } | |
13690 | ||
13691 | -static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, | |
13692 | - struct rb_node **rb_link) | |
13693 | -{ | |
13694 | - node->__rb_parent_color = (unsigned long)parent; | |
13695 | - node->rb_left = node->rb_right = NULL; | |
13696 | - | |
13697 | - rcu_assign_pointer(*rb_link, node); | |
13698 | -} | |
13699 | +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, | |
13700 | + struct rb_node **rb_link); | |
13701 | ||
13702 | #define rb_entry_safe(ptr, type, member) \ | |
13703 | ({ typeof(ptr) ____ptr = (ptr); \ | |
13704 | diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h | |
13705 | index a0189ba67fde..c2f5f955163d 100644 | |
13706 | --- a/include/linux/rcupdate.h | |
13707 | +++ b/include/linux/rcupdate.h | |
13708 | @@ -169,6 +169,9 @@ void call_rcu(struct rcu_head *head, | |
13709 | ||
13710 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
13711 | ||
13712 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13713 | +#define call_rcu_bh call_rcu | |
13714 | +#else | |
13715 | /** | |
13716 | * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. | |
13717 | * @head: structure to be used for queueing the RCU updates. | |
13718 | @@ -192,6 +195,7 @@ void call_rcu(struct rcu_head *head, | |
13719 | */ | |
13720 | void call_rcu_bh(struct rcu_head *head, | |
13721 | rcu_callback_t func); | |
13722 | +#endif | |
13723 | ||
13724 | /** | |
13725 | * call_rcu_sched() - Queue an RCU for invocation after sched grace period. | |
13726 | @@ -292,6 +296,11 @@ void synchronize_rcu(void); | |
13727 | * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. | |
13728 | */ | |
13729 | #define rcu_preempt_depth() (current->rcu_read_lock_nesting) | |
13730 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13731 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
13732 | +#else | |
13733 | +static inline int sched_rcu_preempt_depth(void) { return 0; } | |
13734 | +#endif | |
13735 | ||
13736 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | |
13737 | ||
13738 | @@ -317,6 +326,8 @@ static inline int rcu_preempt_depth(void) | |
13739 | return 0; | |
13740 | } | |
13741 | ||
13742 | +#define sched_rcu_preempt_depth() rcu_preempt_depth() | |
13743 | + | |
13744 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
13745 | ||
13746 | /* Internal to kernel */ | |
13747 | @@ -489,7 +500,14 @@ extern struct lockdep_map rcu_callback_map; | |
13748 | int debug_lockdep_rcu_enabled(void); | |
13749 | ||
13750 | int rcu_read_lock_held(void); | |
13751 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13752 | +static inline int rcu_read_lock_bh_held(void) | |
13753 | +{ | |
13754 | + return rcu_read_lock_held(); | |
13755 | +} | |
13756 | +#else | |
13757 | int rcu_read_lock_bh_held(void); | |
13758 | +#endif | |
13759 | ||
13760 | /** | |
13761 | * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? | |
13762 | @@ -937,10 +955,14 @@ static inline void rcu_read_unlock(void) | |
13763 | static inline void rcu_read_lock_bh(void) | |
13764 | { | |
13765 | local_bh_disable(); | |
13766 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13767 | + rcu_read_lock(); | |
13768 | +#else | |
13769 | __acquire(RCU_BH); | |
13770 | rcu_lock_acquire(&rcu_bh_lock_map); | |
13771 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
13772 | "rcu_read_lock_bh() used illegally while idle"); | |
13773 | +#endif | |
13774 | } | |
13775 | ||
13776 | /* | |
13777 | @@ -950,10 +972,14 @@ static inline void rcu_read_lock_bh(void) | |
13778 | */ | |
13779 | static inline void rcu_read_unlock_bh(void) | |
13780 | { | |
13781 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13782 | + rcu_read_unlock(); | |
13783 | +#else | |
13784 | RCU_LOCKDEP_WARN(!rcu_is_watching(), | |
13785 | "rcu_read_unlock_bh() used illegally while idle"); | |
13786 | rcu_lock_release(&rcu_bh_lock_map); | |
13787 | __release(RCU_BH); | |
13788 | +#endif | |
13789 | local_bh_enable(); | |
13790 | } | |
13791 | ||
13792 | diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h | |
13793 | index 60d15a080d7c..436c9e62bfc6 100644 | |
13794 | --- a/include/linux/rcutree.h | |
13795 | +++ b/include/linux/rcutree.h | |
13796 | @@ -44,7 +44,11 @@ static inline void rcu_virt_note_context_switch(int cpu) | |
13797 | rcu_note_context_switch(); | |
13798 | } | |
13799 | ||
13800 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13801 | +# define synchronize_rcu_bh synchronize_rcu | |
13802 | +#else | |
13803 | void synchronize_rcu_bh(void); | |
13804 | +#endif | |
13805 | void synchronize_sched_expedited(void); | |
13806 | void synchronize_rcu_expedited(void); | |
13807 | ||
13808 | @@ -72,7 +76,11 @@ static inline void synchronize_rcu_bh_expedited(void) | |
13809 | } | |
13810 | ||
13811 | void rcu_barrier(void); | |
13812 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
13813 | +# define rcu_barrier_bh rcu_barrier | |
13814 | +#else | |
13815 | void rcu_barrier_bh(void); | |
13816 | +#endif | |
13817 | void rcu_barrier_sched(void); | |
13818 | unsigned long get_state_synchronize_rcu(void); | |
13819 | void cond_synchronize_rcu(unsigned long oldstate); | |
13820 | @@ -85,12 +93,10 @@ unsigned long rcu_batches_started(void); | |
13821 | unsigned long rcu_batches_started_bh(void); | |
13822 | unsigned long rcu_batches_started_sched(void); | |
13823 | unsigned long rcu_batches_completed(void); | |
13824 | -unsigned long rcu_batches_completed_bh(void); | |
13825 | unsigned long rcu_batches_completed_sched(void); | |
13826 | void show_rcu_gp_kthreads(void); | |
13827 | ||
13828 | void rcu_force_quiescent_state(void); | |
13829 | -void rcu_bh_force_quiescent_state(void); | |
13830 | void rcu_sched_force_quiescent_state(void); | |
13831 | ||
13832 | void rcu_idle_enter(void); | |
13833 | @@ -105,6 +111,14 @@ extern int rcu_scheduler_active __read_mostly; | |
13834 | ||
13835 | bool rcu_is_watching(void); | |
13836 | ||
13837 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
13838 | +void rcu_bh_force_quiescent_state(void); | |
13839 | +unsigned long rcu_batches_completed_bh(void); | |
13840 | +#else | |
13841 | +# define rcu_bh_force_quiescent_state rcu_force_quiescent_state | |
13842 | +# define rcu_batches_completed_bh rcu_batches_completed | |
13843 | +#endif | |
13844 | + | |
13845 | void rcu_all_qs(void); | |
13846 | ||
13847 | #endif /* __LINUX_RCUTREE_H */ | |
13848 | diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h | |
13849 | index 1abba5ce2a2f..30211c627511 100644 | |
13850 | --- a/include/linux/rtmutex.h | |
13851 | +++ b/include/linux/rtmutex.h | |
13852 | @@ -13,11 +13,15 @@ | |
13853 | #define __LINUX_RT_MUTEX_H | |
13854 | ||
13855 | #include <linux/linkage.h> | |
13856 | +#include <linux/spinlock_types_raw.h> | |
13857 | #include <linux/rbtree.h> | |
13858 | -#include <linux/spinlock_types.h> | |
13859 | ||
13860 | extern int max_lock_depth; /* for sysctl */ | |
13861 | ||
13862 | +#ifdef CONFIG_DEBUG_MUTEXES | |
13863 | +#include <linux/debug_locks.h> | |
13864 | +#endif | |
13865 | + | |
13866 | /** | |
13867 | * The rt_mutex structure | |
13868 | * | |
13869 | @@ -31,8 +35,8 @@ struct rt_mutex { | |
13870 | struct rb_root waiters; | |
13871 | struct rb_node *waiters_leftmost; | |
13872 | struct task_struct *owner; | |
13873 | -#ifdef CONFIG_DEBUG_RT_MUTEXES | |
13874 | int save_state; | |
13875 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
13876 | const char *name, *file; | |
13877 | int line; | |
13878 | void *magic; | |
13879 | @@ -55,22 +59,33 @@ struct hrtimer_sleeper; | |
13880 | # define rt_mutex_debug_check_no_locks_held(task) do { } while (0) | |
13881 | #endif | |
13882 | ||
13883 | +# define rt_mutex_init(mutex) \ | |
13884 | + do { \ | |
13885 | + raw_spin_lock_init(&(mutex)->wait_lock); \ | |
13886 | + __rt_mutex_init(mutex, #mutex); \ | |
13887 | + } while (0) | |
13888 | + | |
13889 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
13890 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) \ | |
13891 | , .name = #mutexname, .file = __FILE__, .line = __LINE__ | |
13892 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, __func__) | |
13893 | extern void rt_mutex_debug_task_free(struct task_struct *tsk); | |
13894 | #else | |
13895 | # define __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
13896 | -# define rt_mutex_init(mutex) __rt_mutex_init(mutex, NULL) | |
13897 | # define rt_mutex_debug_task_free(t) do { } while (0) | |
13898 | #endif | |
13899 | ||
13900 | -#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
13901 | - { .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
13902 | +#define __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
13903 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \ | |
13904 | , .waiters = RB_ROOT \ | |
13905 | , .owner = NULL \ | |
13906 | - __DEBUG_RT_MUTEX_INITIALIZER(mutexname)} | |
13907 | + __DEBUG_RT_MUTEX_INITIALIZER(mutexname) | |
13908 | + | |
13909 | +#define __RT_MUTEX_INITIALIZER(mutexname) \ | |
13910 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) } | |
13911 | + | |
13912 | +#define __RT_MUTEX_INITIALIZER_SAVE_STATE(mutexname) \ | |
13913 | + { __RT_MUTEX_INITIALIZER_PLAIN(mutexname) \ | |
13914 | + , .save_state = 1 } | |
13915 | ||
13916 | #define DEFINE_RT_MUTEX(mutexname) \ | |
13917 | struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) | |
13918 | @@ -91,6 +106,7 @@ extern void rt_mutex_destroy(struct rt_mutex *lock); | |
13919 | ||
13920 | extern void rt_mutex_lock(struct rt_mutex *lock); | |
13921 | extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); | |
13922 | +extern int rt_mutex_lock_killable(struct rt_mutex *lock); | |
13923 | extern int rt_mutex_timed_lock(struct rt_mutex *lock, | |
13924 | struct hrtimer_sleeper *timeout); | |
13925 | ||
13926 | diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h | |
13927 | new file mode 100644 | |
13928 | index 000000000000..49ed2d45d3be | |
13929 | --- /dev/null | |
13930 | +++ b/include/linux/rwlock_rt.h | |
13931 | @@ -0,0 +1,99 @@ | |
13932 | +#ifndef __LINUX_RWLOCK_RT_H | |
13933 | +#define __LINUX_RWLOCK_RT_H | |
13934 | + | |
13935 | +#ifndef __LINUX_SPINLOCK_H | |
13936 | +#error Do not include directly. Use spinlock.h | |
13937 | +#endif | |
13938 | + | |
13939 | +#define rwlock_init(rwl) \ | |
13940 | +do { \ | |
13941 | + static struct lock_class_key __key; \ | |
13942 | + \ | |
13943 | + rt_mutex_init(&(rwl)->lock); \ | |
13944 | + __rt_rwlock_init(rwl, #rwl, &__key); \ | |
13945 | +} while (0) | |
13946 | + | |
13947 | +extern void __lockfunc rt_write_lock(rwlock_t *rwlock); | |
13948 | +extern void __lockfunc rt_read_lock(rwlock_t *rwlock); | |
13949 | +extern int __lockfunc rt_write_trylock(rwlock_t *rwlock); | |
13950 | +extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock, unsigned long *flags); | |
13951 | +extern int __lockfunc rt_read_trylock(rwlock_t *rwlock); | |
13952 | +extern void __lockfunc rt_write_unlock(rwlock_t *rwlock); | |
13953 | +extern void __lockfunc rt_read_unlock(rwlock_t *rwlock); | |
13954 | +extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock); | |
13955 | +extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock); | |
13956 | +extern void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key); | |
13957 | + | |
13958 | +#define read_trylock(lock) __cond_lock(lock, rt_read_trylock(lock)) | |
13959 | +#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) | |
13960 | + | |
13961 | +#define write_trylock_irqsave(lock, flags) \ | |
13962 | + __cond_lock(lock, rt_write_trylock_irqsave(lock, &flags)) | |
13963 | + | |
13964 | +#define read_lock_irqsave(lock, flags) \ | |
13965 | + do { \ | |
13966 | + typecheck(unsigned long, flags); \ | |
13967 | + flags = rt_read_lock_irqsave(lock); \ | |
13968 | + } while (0) | |
13969 | + | |
13970 | +#define write_lock_irqsave(lock, flags) \ | |
13971 | + do { \ | |
13972 | + typecheck(unsigned long, flags); \ | |
13973 | + flags = rt_write_lock_irqsave(lock); \ | |
13974 | + } while (0) | |
13975 | + | |
13976 | +#define read_lock(lock) rt_read_lock(lock) | |
13977 | + | |
13978 | +#define read_lock_bh(lock) \ | |
13979 | + do { \ | |
13980 | + local_bh_disable(); \ | |
13981 | + rt_read_lock(lock); \ | |
13982 | + } while (0) | |
13983 | + | |
13984 | +#define read_lock_irq(lock) read_lock(lock) | |
13985 | + | |
13986 | +#define write_lock(lock) rt_write_lock(lock) | |
13987 | + | |
13988 | +#define write_lock_bh(lock) \ | |
13989 | + do { \ | |
13990 | + local_bh_disable(); \ | |
13991 | + rt_write_lock(lock); \ | |
13992 | + } while (0) | |
13993 | + | |
13994 | +#define write_lock_irq(lock) write_lock(lock) | |
13995 | + | |
13996 | +#define read_unlock(lock) rt_read_unlock(lock) | |
13997 | + | |
13998 | +#define read_unlock_bh(lock) \ | |
13999 | + do { \ | |
14000 | + rt_read_unlock(lock); \ | |
14001 | + local_bh_enable(); \ | |
14002 | + } while (0) | |
14003 | + | |
14004 | +#define read_unlock_irq(lock) read_unlock(lock) | |
14005 | + | |
14006 | +#define write_unlock(lock) rt_write_unlock(lock) | |
14007 | + | |
14008 | +#define write_unlock_bh(lock) \ | |
14009 | + do { \ | |
14010 | + rt_write_unlock(lock); \ | |
14011 | + local_bh_enable(); \ | |
14012 | + } while (0) | |
14013 | + | |
14014 | +#define write_unlock_irq(lock) write_unlock(lock) | |
14015 | + | |
14016 | +#define read_unlock_irqrestore(lock, flags) \ | |
14017 | + do { \ | |
14018 | + typecheck(unsigned long, flags); \ | |
14019 | + (void) flags; \ | |
14020 | + rt_read_unlock(lock); \ | |
14021 | + } while (0) | |
14022 | + | |
14023 | +#define write_unlock_irqrestore(lock, flags) \ | |
14024 | + do { \ | |
14025 | + typecheck(unsigned long, flags); \ | |
14026 | + (void) flags; \ | |
14027 | + rt_write_unlock(lock); \ | |
14028 | + } while (0) | |
14029 | + | |
14030 | +#endif | |
14031 | diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h | |
14032 | index cc0072e93e36..d0da966ad7a0 100644 | |
14033 | --- a/include/linux/rwlock_types.h | |
14034 | +++ b/include/linux/rwlock_types.h | |
14035 | @@ -1,6 +1,10 @@ | |
14036 | #ifndef __LINUX_RWLOCK_TYPES_H | |
14037 | #define __LINUX_RWLOCK_TYPES_H | |
14038 | ||
14039 | +#if !defined(__LINUX_SPINLOCK_TYPES_H) | |
14040 | +# error "Do not include directly, include spinlock_types.h" | |
14041 | +#endif | |
14042 | + | |
14043 | /* | |
14044 | * include/linux/rwlock_types.h - generic rwlock type definitions | |
14045 | * and initializers | |
14046 | @@ -43,6 +47,7 @@ typedef struct { | |
14047 | RW_DEP_MAP_INIT(lockname) } | |
14048 | #endif | |
14049 | ||
14050 | -#define DEFINE_RWLOCK(x) rwlock_t x = __RW_LOCK_UNLOCKED(x) | |
14051 | +#define DEFINE_RWLOCK(name) \ | |
14052 | + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name) | |
14053 | ||
14054 | #endif /* __LINUX_RWLOCK_TYPES_H */ | |
14055 | diff --git a/include/linux/rwlock_types_rt.h b/include/linux/rwlock_types_rt.h | |
14056 | new file mode 100644 | |
14057 | index 000000000000..b13832119591 | |
14058 | --- /dev/null | |
14059 | +++ b/include/linux/rwlock_types_rt.h | |
14060 | @@ -0,0 +1,33 @@ | |
14061 | +#ifndef __LINUX_RWLOCK_TYPES_RT_H | |
14062 | +#define __LINUX_RWLOCK_TYPES_RT_H | |
14063 | + | |
14064 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
14065 | +#error "Do not include directly. Include spinlock_types.h instead" | |
14066 | +#endif | |
14067 | + | |
14068 | +/* | |
14069 | + * rwlocks - rtmutex which allows single reader recursion | |
14070 | + */ | |
14071 | +typedef struct { | |
14072 | + struct rt_mutex lock; | |
14073 | + int read_depth; | |
14074 | + unsigned int break_lock; | |
14075 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14076 | + struct lockdep_map dep_map; | |
14077 | +#endif | |
14078 | +} rwlock_t; | |
14079 | + | |
14080 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14081 | +# define RW_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
14082 | +#else | |
14083 | +# define RW_DEP_MAP_INIT(lockname) | |
14084 | +#endif | |
14085 | + | |
14086 | +#define __RW_LOCK_UNLOCKED(name) \ | |
14087 | + { .lock = __RT_MUTEX_INITIALIZER_SAVE_STATE(name.lock), \ | |
14088 | + RW_DEP_MAP_INIT(name) } | |
14089 | + | |
14090 | +#define DEFINE_RWLOCK(name) \ | |
14091 | + rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name) | |
14092 | + | |
14093 | +#endif | |
14094 | diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h | |
14095 | index 8f498cdde280..2b2148431f14 100644 | |
14096 | --- a/include/linux/rwsem.h | |
14097 | +++ b/include/linux/rwsem.h | |
14098 | @@ -18,6 +18,10 @@ | |
14099 | #include <linux/osq_lock.h> | |
14100 | #endif | |
14101 | ||
14102 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14103 | +#include <linux/rwsem_rt.h> | |
14104 | +#else /* PREEMPT_RT_FULL */ | |
14105 | + | |
14106 | struct rw_semaphore; | |
14107 | ||
14108 | #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK | |
14109 | @@ -177,4 +181,6 @@ extern void up_read_non_owner(struct rw_semaphore *sem); | |
14110 | # define up_read_non_owner(sem) up_read(sem) | |
14111 | #endif | |
14112 | ||
14113 | +#endif /* !PREEMPT_RT_FULL */ | |
14114 | + | |
14115 | #endif /* _LINUX_RWSEM_H */ | |
14116 | diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h | |
14117 | new file mode 100644 | |
14118 | index 000000000000..f97860b2e2a4 | |
14119 | --- /dev/null | |
14120 | +++ b/include/linux/rwsem_rt.h | |
14121 | @@ -0,0 +1,152 @@ | |
14122 | +#ifndef _LINUX_RWSEM_RT_H | |
14123 | +#define _LINUX_RWSEM_RT_H | |
14124 | + | |
14125 | +#ifndef _LINUX_RWSEM_H | |
14126 | +#error "Include rwsem.h" | |
14127 | +#endif | |
14128 | + | |
14129 | +/* | |
14130 | + * RW-semaphores are a spinlock plus a reader-depth count. | |
14131 | + * | |
14132 | + * Note that the semantics are different from the usual | |
14133 | + * Linux rw-sems, in PREEMPT_RT mode we do not allow | |
14134 | + * multiple readers to hold the lock at once, we only allow | |
14135 | + * a read-lock owner to read-lock recursively. This is | |
14136 | + * better for latency, makes the implementation inherently | |
14137 | + * fair and makes it simpler as well. | |
14138 | + */ | |
14139 | + | |
14140 | +#include <linux/rtmutex.h> | |
14141 | + | |
14142 | +struct rw_semaphore { | |
14143 | + struct rt_mutex lock; | |
14144 | + int read_depth; | |
14145 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14146 | + struct lockdep_map dep_map; | |
14147 | +#endif | |
14148 | +}; | |
14149 | + | |
14150 | +#define __RWSEM_INITIALIZER(name) \ | |
14151 | + { .lock = __RT_MUTEX_INITIALIZER(name.lock), \ | |
14152 | + RW_DEP_MAP_INIT(name) } | |
14153 | + | |
14154 | +#define DECLARE_RWSEM(lockname) \ | |
14155 | + struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) | |
14156 | + | |
14157 | +extern void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
14158 | + struct lock_class_key *key); | |
14159 | + | |
14160 | +#define __rt_init_rwsem(sem, name, key) \ | |
14161 | + do { \ | |
14162 | + rt_mutex_init(&(sem)->lock); \ | |
14163 | + __rt_rwsem_init((sem), (name), (key));\ | |
14164 | + } while (0) | |
14165 | + | |
14166 | +#define __init_rwsem(sem, name, key) __rt_init_rwsem(sem, name, key) | |
14167 | + | |
14168 | +# define rt_init_rwsem(sem) \ | |
14169 | +do { \ | |
14170 | + static struct lock_class_key __key; \ | |
14171 | + \ | |
14172 | + __rt_init_rwsem((sem), #sem, &__key); \ | |
14173 | +} while (0) | |
14174 | + | |
14175 | +extern void rt_down_write(struct rw_semaphore *rwsem); | |
14176 | +extern void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass); | |
14177 | +extern void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass); | |
14178 | +extern void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
14179 | + struct lockdep_map *nest); | |
14180 | +extern void rt__down_read(struct rw_semaphore *rwsem); | |
14181 | +extern void rt_down_read(struct rw_semaphore *rwsem); | |
14182 | +extern int rt_down_write_trylock(struct rw_semaphore *rwsem); | |
14183 | +extern int rt__down_read_trylock(struct rw_semaphore *rwsem); | |
14184 | +extern int rt_down_read_trylock(struct rw_semaphore *rwsem); | |
14185 | +extern void __rt_up_read(struct rw_semaphore *rwsem); | |
14186 | +extern void rt_up_read(struct rw_semaphore *rwsem); | |
14187 | +extern void rt_up_write(struct rw_semaphore *rwsem); | |
14188 | +extern void rt_downgrade_write(struct rw_semaphore *rwsem); | |
14189 | + | |
14190 | +#define init_rwsem(sem) rt_init_rwsem(sem) | |
14191 | +#define rwsem_is_locked(s) rt_mutex_is_locked(&(s)->lock) | |
14192 | + | |
14193 | +static inline int rwsem_is_contended(struct rw_semaphore *sem) | |
14194 | +{ | |
14195 | + /* rt_mutex_has_waiters() */ | |
14196 | + return !RB_EMPTY_ROOT(&sem->lock.waiters); | |
14197 | +} | |
14198 | + | |
14199 | +static inline void __down_read(struct rw_semaphore *sem) | |
14200 | +{ | |
14201 | + rt__down_read(sem); | |
14202 | +} | |
14203 | + | |
14204 | +static inline void down_read(struct rw_semaphore *sem) | |
14205 | +{ | |
14206 | + rt_down_read(sem); | |
14207 | +} | |
14208 | + | |
14209 | +static inline int __down_read_trylock(struct rw_semaphore *sem) | |
14210 | +{ | |
14211 | + return rt__down_read_trylock(sem); | |
14212 | +} | |
14213 | + | |
14214 | +static inline int down_read_trylock(struct rw_semaphore *sem) | |
14215 | +{ | |
14216 | + return rt_down_read_trylock(sem); | |
14217 | +} | |
14218 | + | |
14219 | +static inline void down_write(struct rw_semaphore *sem) | |
14220 | +{ | |
14221 | + rt_down_write(sem); | |
14222 | +} | |
14223 | + | |
14224 | +static inline int down_write_trylock(struct rw_semaphore *sem) | |
14225 | +{ | |
14226 | + return rt_down_write_trylock(sem); | |
14227 | +} | |
14228 | + | |
14229 | +static inline void __up_read(struct rw_semaphore *sem) | |
14230 | +{ | |
14231 | + __rt_up_read(sem); | |
14232 | +} | |
14233 | + | |
14234 | +static inline void up_read(struct rw_semaphore *sem) | |
14235 | +{ | |
14236 | + rt_up_read(sem); | |
14237 | +} | |
14238 | + | |
14239 | +static inline void up_write(struct rw_semaphore *sem) | |
14240 | +{ | |
14241 | + rt_up_write(sem); | |
14242 | +} | |
14243 | + | |
14244 | +static inline void downgrade_write(struct rw_semaphore *sem) | |
14245 | +{ | |
14246 | + rt_downgrade_write(sem); | |
14247 | +} | |
14248 | + | |
14249 | +static inline void down_read_nested(struct rw_semaphore *sem, int subclass) | |
14250 | +{ | |
14251 | + return rt_down_read_nested(sem, subclass); | |
14252 | +} | |
14253 | + | |
14254 | +static inline void down_write_nested(struct rw_semaphore *sem, int subclass) | |
14255 | +{ | |
14256 | + rt_down_write_nested(sem, subclass); | |
14257 | +} | |
14258 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
14259 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
14260 | + struct rw_semaphore *nest_lock) | |
14261 | +{ | |
14262 | + rt_down_write_nested_lock(sem, &nest_lock->dep_map); | |
14263 | +} | |
14264 | + | |
14265 | +#else | |
14266 | + | |
14267 | +static inline void down_write_nest_lock(struct rw_semaphore *sem, | |
14268 | + struct rw_semaphore *nest_lock) | |
14269 | +{ | |
14270 | + rt_down_write_nested_lock(sem, NULL); | |
14271 | +} | |
14272 | +#endif | |
14273 | +#endif | |
14274 | diff --git a/include/linux/sched.h b/include/linux/sched.h | |
14275 | index 1c0193baea2a..0570d8e022ec 100644 | |
14276 | --- a/include/linux/sched.h | |
14277 | +++ b/include/linux/sched.h | |
14278 | @@ -26,6 +26,7 @@ struct sched_param { | |
14279 | #include <linux/nodemask.h> | |
14280 | #include <linux/mm_types.h> | |
14281 | #include <linux/preempt.h> | |
14282 | +#include <asm/kmap_types.h> | |
14283 | ||
14284 | #include <asm/page.h> | |
14285 | #include <asm/ptrace.h> | |
14286 | @@ -182,8 +183,6 @@ extern void update_cpu_load_nohz(void); | |
14287 | static inline void update_cpu_load_nohz(void) { } | |
14288 | #endif | |
14289 | ||
14290 | -extern unsigned long get_parent_ip(unsigned long addr); | |
14291 | - | |
14292 | extern void dump_cpu_task(int cpu); | |
14293 | ||
14294 | struct seq_file; | |
14295 | @@ -242,10 +241,7 @@ extern char ___assert_task_state[1 - 2*!!( | |
14296 | TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ | |
14297 | __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) | |
14298 | ||
14299 | -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) | |
14300 | #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) | |
14301 | -#define task_is_stopped_or_traced(task) \ | |
14302 | - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) | |
14303 | #define task_contributes_to_load(task) \ | |
14304 | ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ | |
14305 | (task->flags & PF_FROZEN) == 0 && \ | |
14306 | @@ -311,6 +307,11 @@ extern char ___assert_task_state[1 - 2*!!( | |
14307 | ||
14308 | #endif | |
14309 | ||
14310 | +#define __set_current_state_no_track(state_value) \ | |
14311 | + do { current->state = (state_value); } while (0) | |
14312 | +#define set_current_state_no_track(state_value) \ | |
14313 | + set_mb(current->state, (state_value)) | |
14314 | + | |
14315 | /* Task command name length */ | |
14316 | #define TASK_COMM_LEN 16 | |
14317 | ||
14318 | @@ -970,8 +971,18 @@ struct wake_q_head { | |
14319 | struct wake_q_head name = { WAKE_Q_TAIL, &name.first } | |
14320 | ||
14321 | extern void wake_q_add(struct wake_q_head *head, | |
14322 | - struct task_struct *task); | |
14323 | -extern void wake_up_q(struct wake_q_head *head); | |
14324 | + struct task_struct *task); | |
14325 | +extern void __wake_up_q(struct wake_q_head *head, bool sleeper); | |
14326 | + | |
14327 | +static inline void wake_up_q(struct wake_q_head *head) | |
14328 | +{ | |
14329 | + __wake_up_q(head, false); | |
14330 | +} | |
14331 | + | |
14332 | +static inline void wake_up_q_sleeper(struct wake_q_head *head) | |
14333 | +{ | |
14334 | + __wake_up_q(head, true); | |
14335 | +} | |
14336 | ||
14337 | /* | |
14338 | * sched-domains (multiprocessor balancing) declarations: | |
14339 | @@ -1379,6 +1390,7 @@ struct tlbflush_unmap_batch { | |
14340 | ||
14341 | struct task_struct { | |
14342 | volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ | |
14343 | + volatile long saved_state; /* saved state for "spinlock sleepers" */ | |
14344 | void *stack; | |
14345 | atomic_t usage; | |
14346 | unsigned int flags; /* per process flags, defined below */ | |
14347 | @@ -1415,6 +1427,12 @@ struct task_struct { | |
14348 | #endif | |
14349 | ||
14350 | unsigned int policy; | |
14351 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14352 | + int migrate_disable; | |
14353 | +# ifdef CONFIG_SCHED_DEBUG | |
14354 | + int migrate_disable_atomic; | |
14355 | +# endif | |
14356 | +#endif | |
14357 | int nr_cpus_allowed; | |
14358 | cpumask_t cpus_allowed; | |
14359 | ||
14360 | @@ -1522,11 +1540,14 @@ struct task_struct { | |
14361 | cputime_t gtime; | |
14362 | struct prev_cputime prev_cputime; | |
14363 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
14364 | - seqlock_t vtime_seqlock; | |
14365 | + seqcount_t vtime_seqcount; | |
14366 | unsigned long long vtime_snap; | |
14367 | enum { | |
14368 | - VTIME_SLEEPING = 0, | |
14369 | + /* Task is sleeping or running in a CPU with VTIME inactive */ | |
14370 | + VTIME_INACTIVE = 0, | |
14371 | + /* Task runs in userspace in a CPU with VTIME active */ | |
14372 | VTIME_USER, | |
14373 | + /* Task runs in kernelspace in a CPU with VTIME active */ | |
14374 | VTIME_SYS, | |
14375 | } vtime_snap_whence; | |
14376 | #endif | |
14377 | @@ -1538,6 +1559,9 @@ struct task_struct { | |
14378 | ||
14379 | struct task_cputime cputime_expires; | |
14380 | struct list_head cpu_timers[3]; | |
14381 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
14382 | + struct task_struct *posix_timer_list; | |
14383 | +#endif | |
14384 | ||
14385 | /* process credentials */ | |
14386 | const struct cred __rcu *real_cred; /* objective and real subjective task | |
14387 | @@ -1568,10 +1592,15 @@ struct task_struct { | |
14388 | /* signal handlers */ | |
14389 | struct signal_struct *signal; | |
14390 | struct sighand_struct *sighand; | |
14391 | + struct sigqueue *sigqueue_cache; | |
14392 | ||
14393 | sigset_t blocked, real_blocked; | |
14394 | sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ | |
14395 | struct sigpending pending; | |
14396 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14397 | + /* TODO: move me into ->restart_block ? */ | |
14398 | + struct siginfo forced_info; | |
14399 | +#endif | |
14400 | ||
14401 | unsigned long sas_ss_sp; | |
14402 | size_t sas_ss_size; | |
14403 | @@ -1795,6 +1824,12 @@ struct task_struct { | |
14404 | unsigned long trace; | |
14405 | /* bitmask and counter of trace recursion */ | |
14406 | unsigned long trace_recursion; | |
14407 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
14408 | + u64 preempt_timestamp_hist; | |
14409 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
14410 | + long timer_offset; | |
14411 | +#endif | |
14412 | +#endif | |
14413 | #endif /* CONFIG_TRACING */ | |
14414 | #ifdef CONFIG_MEMCG | |
14415 | struct mem_cgroup *memcg_in_oom; | |
14416 | @@ -1811,9 +1846,23 @@ struct task_struct { | |
14417 | unsigned int sequential_io; | |
14418 | unsigned int sequential_io_avg; | |
14419 | #endif | |
14420 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
14421 | + struct rcu_head put_rcu; | |
14422 | + int softirq_nestcnt; | |
14423 | + unsigned int softirqs_raised; | |
14424 | +#endif | |
14425 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14426 | +# if defined CONFIG_HIGHMEM || defined CONFIG_X86_32 | |
14427 | + int kmap_idx; | |
14428 | + pte_t kmap_pte[KM_TYPE_NR]; | |
14429 | +# endif | |
14430 | +#endif | |
14431 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
14432 | unsigned long task_state_change; | |
14433 | #endif | |
14434 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14435 | + int xmit_recursion; | |
14436 | +#endif | |
14437 | int pagefault_disabled; | |
14438 | /* CPU-specific state of this task */ | |
14439 | struct thread_struct thread; | |
14440 | @@ -1831,9 +1880,6 @@ extern int arch_task_struct_size __read_mostly; | |
14441 | # define arch_task_struct_size (sizeof(struct task_struct)) | |
14442 | #endif | |
14443 | ||
14444 | -/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
14445 | -#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) | |
14446 | - | |
14447 | #define TNF_MIGRATED 0x01 | |
14448 | #define TNF_NO_GROUP 0x02 | |
14449 | #define TNF_SHARED 0x04 | |
14450 | @@ -2023,6 +2069,15 @@ extern struct pid *cad_pid; | |
14451 | extern void free_task(struct task_struct *tsk); | |
14452 | #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) | |
14453 | ||
14454 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
14455 | +extern void __put_task_struct_cb(struct rcu_head *rhp); | |
14456 | + | |
14457 | +static inline void put_task_struct(struct task_struct *t) | |
14458 | +{ | |
14459 | + if (atomic_dec_and_test(&t->usage)) | |
14460 | + call_rcu(&t->put_rcu, __put_task_struct_cb); | |
14461 | +} | |
14462 | +#else | |
14463 | extern void __put_task_struct(struct task_struct *t); | |
14464 | ||
14465 | static inline void put_task_struct(struct task_struct *t) | |
14466 | @@ -2030,6 +2085,7 @@ static inline void put_task_struct(struct task_struct *t) | |
14467 | if (atomic_dec_and_test(&t->usage)) | |
14468 | __put_task_struct(t); | |
14469 | } | |
14470 | +#endif | |
14471 | ||
14472 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
14473 | extern void task_cputime(struct task_struct *t, | |
14474 | @@ -2068,6 +2124,7 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, | |
14475 | /* | |
14476 | * Per process flags | |
14477 | */ | |
14478 | +#define PF_IN_SOFTIRQ 0x00000001 /* Task is serving softirq */ | |
14479 | #define PF_EXITING 0x00000004 /* getting shut down */ | |
14480 | #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ | |
14481 | #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ | |
14482 | @@ -2232,6 +2289,10 @@ extern void do_set_cpus_allowed(struct task_struct *p, | |
14483 | ||
14484 | extern int set_cpus_allowed_ptr(struct task_struct *p, | |
14485 | const struct cpumask *new_mask); | |
14486 | +int migrate_me(void); | |
14487 | +void tell_sched_cpu_down_begin(int cpu); | |
14488 | +void tell_sched_cpu_down_done(int cpu); | |
14489 | + | |
14490 | #else | |
14491 | static inline void do_set_cpus_allowed(struct task_struct *p, | |
14492 | const struct cpumask *new_mask) | |
14493 | @@ -2244,6 +2305,9 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, | |
14494 | return -EINVAL; | |
14495 | return 0; | |
14496 | } | |
14497 | +static inline int migrate_me(void) { return 0; } | |
14498 | +static inline void tell_sched_cpu_down_begin(int cpu) { } | |
14499 | +static inline void tell_sched_cpu_down_done(int cpu) { } | |
14500 | #endif | |
14501 | ||
14502 | #ifdef CONFIG_NO_HZ_COMMON | |
14503 | @@ -2453,6 +2517,7 @@ extern void xtime_update(unsigned long ticks); | |
14504 | ||
14505 | extern int wake_up_state(struct task_struct *tsk, unsigned int state); | |
14506 | extern int wake_up_process(struct task_struct *tsk); | |
14507 | +extern int wake_up_lock_sleeper(struct task_struct * tsk); | |
14508 | extern void wake_up_new_task(struct task_struct *tsk); | |
14509 | #ifdef CONFIG_SMP | |
14510 | extern void kick_process(struct task_struct *tsk); | |
14511 | @@ -2576,12 +2641,24 @@ extern struct mm_struct * mm_alloc(void); | |
14512 | ||
14513 | /* mmdrop drops the mm and the page tables */ | |
14514 | extern void __mmdrop(struct mm_struct *); | |
14515 | + | |
14516 | static inline void mmdrop(struct mm_struct * mm) | |
14517 | { | |
14518 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) | |
14519 | __mmdrop(mm); | |
14520 | } | |
14521 | ||
14522 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
14523 | +extern void __mmdrop_delayed(struct rcu_head *rhp); | |
14524 | +static inline void mmdrop_delayed(struct mm_struct *mm) | |
14525 | +{ | |
14526 | + if (atomic_dec_and_test(&mm->mm_count)) | |
14527 | + call_rcu(&mm->delayed_drop, __mmdrop_delayed); | |
14528 | +} | |
14529 | +#else | |
14530 | +# define mmdrop_delayed(mm) mmdrop(mm) | |
14531 | +#endif | |
14532 | + | |
14533 | /* mmput gets rid of the mappings and all user-space */ | |
14534 | extern void mmput(struct mm_struct *); | |
14535 | /* Grab a reference to a task's mm, if it is not already going away */ | |
14536 | @@ -2891,6 +2968,43 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) | |
14537 | return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); | |
14538 | } | |
14539 | ||
14540 | +#ifdef CONFIG_PREEMPT_LAZY | |
14541 | +static inline void set_tsk_need_resched_lazy(struct task_struct *tsk) | |
14542 | +{ | |
14543 | + set_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
14544 | +} | |
14545 | + | |
14546 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) | |
14547 | +{ | |
14548 | + clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY); | |
14549 | +} | |
14550 | + | |
14551 | +static inline int test_tsk_need_resched_lazy(struct task_struct *tsk) | |
14552 | +{ | |
14553 | + return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED_LAZY)); | |
14554 | +} | |
14555 | + | |
14556 | +static inline int need_resched_lazy(void) | |
14557 | +{ | |
14558 | + return test_thread_flag(TIF_NEED_RESCHED_LAZY); | |
14559 | +} | |
14560 | + | |
14561 | +static inline int need_resched_now(void) | |
14562 | +{ | |
14563 | + return test_thread_flag(TIF_NEED_RESCHED); | |
14564 | +} | |
14565 | + | |
14566 | +#else | |
14567 | +static inline void clear_tsk_need_resched_lazy(struct task_struct *tsk) { } | |
14568 | +static inline int need_resched_lazy(void) { return 0; } | |
14569 | + | |
14570 | +static inline int need_resched_now(void) | |
14571 | +{ | |
14572 | + return test_thread_flag(TIF_NEED_RESCHED); | |
14573 | +} | |
14574 | + | |
14575 | +#endif | |
14576 | + | |
14577 | static inline int restart_syscall(void) | |
14578 | { | |
14579 | set_tsk_thread_flag(current, TIF_SIGPENDING); | |
14580 | @@ -2922,6 +3036,51 @@ static inline int signal_pending_state(long state, struct task_struct *p) | |
14581 | return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p); | |
14582 | } | |
14583 | ||
14584 | +static inline bool __task_is_stopped_or_traced(struct task_struct *task) | |
14585 | +{ | |
14586 | + if (task->state & (__TASK_STOPPED | __TASK_TRACED)) | |
14587 | + return true; | |
14588 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14589 | + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED)) | |
14590 | + return true; | |
14591 | +#endif | |
14592 | + return false; | |
14593 | +} | |
14594 | + | |
14595 | +static inline bool task_is_stopped_or_traced(struct task_struct *task) | |
14596 | +{ | |
14597 | + bool traced_stopped; | |
14598 | + | |
14599 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14600 | + unsigned long flags; | |
14601 | + | |
14602 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
14603 | + traced_stopped = __task_is_stopped_or_traced(task); | |
14604 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
14605 | +#else | |
14606 | + traced_stopped = __task_is_stopped_or_traced(task); | |
14607 | +#endif | |
14608 | + return traced_stopped; | |
14609 | +} | |
14610 | + | |
14611 | +static inline bool task_is_traced(struct task_struct *task) | |
14612 | +{ | |
14613 | + bool traced = false; | |
14614 | + | |
14615 | + if (task->state & __TASK_TRACED) | |
14616 | + return true; | |
14617 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14618 | + /* in case the task is sleeping on tasklist_lock */ | |
14619 | + raw_spin_lock_irq(&task->pi_lock); | |
14620 | + if (task->state & __TASK_TRACED) | |
14621 | + traced = true; | |
14622 | + else if (task->saved_state & __TASK_TRACED) | |
14623 | + traced = true; | |
14624 | + raw_spin_unlock_irq(&task->pi_lock); | |
14625 | +#endif | |
14626 | + return traced; | |
14627 | +} | |
14628 | + | |
14629 | /* | |
14630 | * cond_resched() and cond_resched_lock(): latency reduction via | |
14631 | * explicit rescheduling in places that are safe. The return | |
14632 | @@ -2943,12 +3102,16 @@ extern int __cond_resched_lock(spinlock_t *lock); | |
14633 | __cond_resched_lock(lock); \ | |
14634 | }) | |
14635 | ||
14636 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14637 | extern int __cond_resched_softirq(void); | |
14638 | ||
14639 | #define cond_resched_softirq() ({ \ | |
14640 | ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \ | |
14641 | __cond_resched_softirq(); \ | |
14642 | }) | |
14643 | +#else | |
14644 | +# define cond_resched_softirq() cond_resched() | |
14645 | +#endif | |
14646 | ||
14647 | static inline void cond_resched_rcu(void) | |
14648 | { | |
14649 | @@ -3110,6 +3273,31 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) | |
14650 | ||
14651 | #endif /* CONFIG_SMP */ | |
14652 | ||
14653 | +static inline int __migrate_disabled(struct task_struct *p) | |
14654 | +{ | |
14655 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14656 | + return p->migrate_disable; | |
14657 | +#else | |
14658 | + return 0; | |
14659 | +#endif | |
14660 | +} | |
14661 | + | |
14662 | +/* Future-safe accessor for struct task_struct's cpus_allowed. */ | |
14663 | +static inline const struct cpumask *tsk_cpus_allowed(struct task_struct *p) | |
14664 | +{ | |
14665 | + if (__migrate_disabled(p)) | |
14666 | + return cpumask_of(task_cpu(p)); | |
14667 | + | |
14668 | + return &p->cpus_allowed; | |
14669 | +} | |
14670 | + | |
14671 | +static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |
14672 | +{ | |
14673 | + if (__migrate_disabled(p)) | |
14674 | + return 1; | |
14675 | + return p->nr_cpus_allowed; | |
14676 | +} | |
14677 | + | |
14678 | extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); | |
14679 | extern long sched_getaffinity(pid_t pid, struct cpumask *mask); | |
14680 | ||
14681 | diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h | |
14682 | index e0582106ef4f..b14f4d2368aa 100644 | |
14683 | --- a/include/linux/seqlock.h | |
14684 | +++ b/include/linux/seqlock.h | |
14685 | @@ -220,20 +220,30 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) | |
14686 | return __read_seqcount_retry(s, start); | |
14687 | } | |
14688 | ||
14689 | - | |
14690 | - | |
14691 | -static inline void raw_write_seqcount_begin(seqcount_t *s) | |
14692 | +static inline void __raw_write_seqcount_begin(seqcount_t *s) | |
14693 | { | |
14694 | s->sequence++; | |
14695 | smp_wmb(); | |
14696 | } | |
14697 | ||
14698 | -static inline void raw_write_seqcount_end(seqcount_t *s) | |
14699 | +static inline void raw_write_seqcount_begin(seqcount_t *s) | |
14700 | +{ | |
14701 | + preempt_disable_rt(); | |
14702 | + __raw_write_seqcount_begin(s); | |
14703 | +} | |
14704 | + | |
14705 | +static inline void __raw_write_seqcount_end(seqcount_t *s) | |
14706 | { | |
14707 | smp_wmb(); | |
14708 | s->sequence++; | |
14709 | } | |
14710 | ||
14711 | +static inline void raw_write_seqcount_end(seqcount_t *s) | |
14712 | +{ | |
14713 | + __raw_write_seqcount_end(s); | |
14714 | + preempt_enable_rt(); | |
14715 | +} | |
14716 | + | |
14717 | /** | |
14718 | * raw_write_seqcount_barrier - do a seq write barrier | |
14719 | * @s: pointer to seqcount_t | |
14720 | @@ -425,10 +435,32 @@ typedef struct { | |
14721 | /* | |
14722 | * Read side functions for starting and finalizing a read side section. | |
14723 | */ | |
14724 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14725 | static inline unsigned read_seqbegin(const seqlock_t *sl) | |
14726 | { | |
14727 | return read_seqcount_begin(&sl->seqcount); | |
14728 | } | |
14729 | +#else | |
14730 | +/* | |
14731 | + * Starvation safe read side for RT | |
14732 | + */ | |
14733 | +static inline unsigned read_seqbegin(seqlock_t *sl) | |
14734 | +{ | |
14735 | + unsigned ret; | |
14736 | + | |
14737 | +repeat: | |
14738 | + ret = ACCESS_ONCE(sl->seqcount.sequence); | |
14739 | + if (unlikely(ret & 1)) { | |
14740 | + /* | |
14741 | + * Take the lock and let the writer proceed (i.e. evtl | |
14742 | + * boost it), otherwise we could loop here forever. | |
14743 | + */ | |
14744 | + spin_unlock_wait(&sl->lock); | |
14745 | + goto repeat; | |
14746 | + } | |
14747 | + return ret; | |
14748 | +} | |
14749 | +#endif | |
14750 | ||
14751 | static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
14752 | { | |
14753 | @@ -443,36 +475,36 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) | |
14754 | static inline void write_seqlock(seqlock_t *sl) | |
14755 | { | |
14756 | spin_lock(&sl->lock); | |
14757 | - write_seqcount_begin(&sl->seqcount); | |
14758 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14759 | } | |
14760 | ||
14761 | static inline void write_sequnlock(seqlock_t *sl) | |
14762 | { | |
14763 | - write_seqcount_end(&sl->seqcount); | |
14764 | + __raw_write_seqcount_end(&sl->seqcount); | |
14765 | spin_unlock(&sl->lock); | |
14766 | } | |
14767 | ||
14768 | static inline void write_seqlock_bh(seqlock_t *sl) | |
14769 | { | |
14770 | spin_lock_bh(&sl->lock); | |
14771 | - write_seqcount_begin(&sl->seqcount); | |
14772 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14773 | } | |
14774 | ||
14775 | static inline void write_sequnlock_bh(seqlock_t *sl) | |
14776 | { | |
14777 | - write_seqcount_end(&sl->seqcount); | |
14778 | + __raw_write_seqcount_end(&sl->seqcount); | |
14779 | spin_unlock_bh(&sl->lock); | |
14780 | } | |
14781 | ||
14782 | static inline void write_seqlock_irq(seqlock_t *sl) | |
14783 | { | |
14784 | spin_lock_irq(&sl->lock); | |
14785 | - write_seqcount_begin(&sl->seqcount); | |
14786 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14787 | } | |
14788 | ||
14789 | static inline void write_sequnlock_irq(seqlock_t *sl) | |
14790 | { | |
14791 | - write_seqcount_end(&sl->seqcount); | |
14792 | + __raw_write_seqcount_end(&sl->seqcount); | |
14793 | spin_unlock_irq(&sl->lock); | |
14794 | } | |
14795 | ||
14796 | @@ -481,7 +513,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
14797 | unsigned long flags; | |
14798 | ||
14799 | spin_lock_irqsave(&sl->lock, flags); | |
14800 | - write_seqcount_begin(&sl->seqcount); | |
14801 | + __raw_write_seqcount_begin(&sl->seqcount); | |
14802 | return flags; | |
14803 | } | |
14804 | ||
14805 | @@ -491,7 +523,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) | |
14806 | static inline void | |
14807 | write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) | |
14808 | { | |
14809 | - write_seqcount_end(&sl->seqcount); | |
14810 | + __raw_write_seqcount_end(&sl->seqcount); | |
14811 | spin_unlock_irqrestore(&sl->lock, flags); | |
14812 | } | |
14813 | ||
14814 | diff --git a/include/linux/signal.h b/include/linux/signal.h | |
14815 | index d80259afb9e5..ddd1e6866a54 100644 | |
14816 | --- a/include/linux/signal.h | |
14817 | +++ b/include/linux/signal.h | |
14818 | @@ -233,6 +233,7 @@ static inline void init_sigpending(struct sigpending *sig) | |
14819 | } | |
14820 | ||
14821 | extern void flush_sigqueue(struct sigpending *queue); | |
14822 | +extern void flush_task_sigqueue(struct task_struct *tsk); | |
14823 | ||
14824 | /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ | |
14825 | static inline int valid_signal(unsigned long sig) | |
14826 | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h | |
14827 | index d443d9ab0236..2d1c7f9b7fd0 100644 | |
14828 | --- a/include/linux/skbuff.h | |
14829 | +++ b/include/linux/skbuff.h | |
14830 | @@ -203,6 +203,7 @@ struct sk_buff_head { | |
14831 | ||
14832 | __u32 qlen; | |
14833 | spinlock_t lock; | |
14834 | + raw_spinlock_t raw_lock; | |
14835 | }; | |
14836 | ||
14837 | struct sk_buff; | |
14838 | @@ -1465,6 +1466,12 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) | |
14839 | __skb_queue_head_init(list); | |
14840 | } | |
14841 | ||
14842 | +static inline void skb_queue_head_init_raw(struct sk_buff_head *list) | |
14843 | +{ | |
14844 | + raw_spin_lock_init(&list->raw_lock); | |
14845 | + __skb_queue_head_init(list); | |
14846 | +} | |
14847 | + | |
14848 | static inline void skb_queue_head_init_class(struct sk_buff_head *list, | |
14849 | struct lock_class_key *class) | |
14850 | { | |
14851 | diff --git a/include/linux/smp.h b/include/linux/smp.h | |
14852 | index c4414074bd88..e6ab36aeaaab 100644 | |
14853 | --- a/include/linux/smp.h | |
14854 | +++ b/include/linux/smp.h | |
14855 | @@ -185,6 +185,9 @@ static inline void smp_init(void) { } | |
14856 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) | |
14857 | #define put_cpu() preempt_enable() | |
14858 | ||
14859 | +#define get_cpu_light() ({ migrate_disable(); smp_processor_id(); }) | |
14860 | +#define put_cpu_light() migrate_enable() | |
14861 | + | |
14862 | /* | |
14863 | * Callback to arch code if there's nosmp or maxcpus=0 on the | |
14864 | * boot command line: | |
14865 | diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h | |
cb95d48a | 14866 | index 47dd0cebd204..02928fa5499d 100644 |
b4de310e JK |
14867 | --- a/include/linux/spinlock.h |
14868 | +++ b/include/linux/spinlock.h | |
14869 | @@ -271,7 +271,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
14870 | #define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) | |
14871 | ||
14872 | /* Include rwlock functions */ | |
14873 | -#include <linux/rwlock.h> | |
14874 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14875 | +# include <linux/rwlock_rt.h> | |
14876 | +#else | |
14877 | +# include <linux/rwlock.h> | |
14878 | +#endif | |
14879 | ||
14880 | /* | |
14881 | * Pull the _spin_*()/_read_*()/_write_*() functions/declarations: | |
14882 | @@ -282,6 +286,10 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) | |
14883 | # include <linux/spinlock_api_up.h> | |
14884 | #endif | |
14885 | ||
14886 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
14887 | +# include <linux/spinlock_rt.h> | |
14888 | +#else /* PREEMPT_RT_FULL */ | |
14889 | + | |
14890 | /* | |
14891 | * Map the spin_lock functions to the raw variants for PREEMPT_RT=n | |
14892 | */ | |
cb95d48a JK |
14893 | @@ -347,6 +355,12 @@ static __always_inline void spin_unlock(spinlock_t *lock) |
14894 | raw_spin_unlock(&lock->rlock); | |
14895 | } | |
14896 | ||
14897 | +static __always_inline int spin_unlock_no_deboost(spinlock_t *lock) | |
14898 | +{ | |
14899 | + raw_spin_unlock(&lock->rlock); | |
14900 | + return 0; | |
14901 | +} | |
14902 | + | |
14903 | static __always_inline void spin_unlock_bh(spinlock_t *lock) | |
14904 | { | |
14905 | raw_spin_unlock_bh(&lock->rlock); | |
14906 | @@ -416,4 +430,6 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); | |
b4de310e JK |
14907 | #define atomic_dec_and_lock(atomic, lock) \ |
14908 | __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) | |
14909 | ||
14910 | +#endif /* !PREEMPT_RT_FULL */ | |
14911 | + | |
14912 | #endif /* __LINUX_SPINLOCK_H */ | |
14913 | diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h | |
14914 | index 5344268e6e62..043263f30e81 100644 | |
14915 | --- a/include/linux/spinlock_api_smp.h | |
14916 | +++ b/include/linux/spinlock_api_smp.h | |
14917 | @@ -189,6 +189,8 @@ static inline int __raw_spin_trylock_bh(raw_spinlock_t *lock) | |
14918 | return 0; | |
14919 | } | |
14920 | ||
14921 | -#include <linux/rwlock_api_smp.h> | |
14922 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
14923 | +# include <linux/rwlock_api_smp.h> | |
14924 | +#endif | |
14925 | ||
14926 | #endif /* __LINUX_SPINLOCK_API_SMP_H */ | |
14927 | diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h | |
14928 | new file mode 100644 | |
cb95d48a | 14929 | index 000000000000..7eb87584e843 |
b4de310e JK |
14930 | --- /dev/null |
14931 | +++ b/include/linux/spinlock_rt.h | |
cb95d48a | 14932 | @@ -0,0 +1,165 @@ |
b4de310e JK |
14933 | +#ifndef __LINUX_SPINLOCK_RT_H |
14934 | +#define __LINUX_SPINLOCK_RT_H | |
14935 | + | |
14936 | +#ifndef __LINUX_SPINLOCK_H | |
14937 | +#error Do not include directly. Use spinlock.h | |
14938 | +#endif | |
14939 | + | |
14940 | +#include <linux/bug.h> | |
14941 | + | |
14942 | +extern void | |
14943 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key); | |
14944 | + | |
14945 | +#define spin_lock_init(slock) \ | |
14946 | +do { \ | |
14947 | + static struct lock_class_key __key; \ | |
14948 | + \ | |
14949 | + rt_mutex_init(&(slock)->lock); \ | |
14950 | + __rt_spin_lock_init(slock, #slock, &__key); \ | |
14951 | +} while (0) | |
14952 | + | |
14953 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock); | |
14954 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock); | |
14955 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock); | |
14956 | + | |
14957 | +extern void __lockfunc rt_spin_lock(spinlock_t *lock); | |
14958 | +extern unsigned long __lockfunc rt_spin_lock_trace_flags(spinlock_t *lock); | |
14959 | +extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass); | |
14960 | +extern void __lockfunc rt_spin_unlock(spinlock_t *lock); | |
cb95d48a | 14961 | +extern int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock); |
b4de310e JK |
14962 | +extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock); |
14963 | +extern int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags); | |
14964 | +extern int __lockfunc rt_spin_trylock_bh(spinlock_t *lock); | |
14965 | +extern int __lockfunc rt_spin_trylock(spinlock_t *lock); | |
14966 | +extern int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock); | |
14967 | + | |
14968 | +/* | |
14969 | + * lockdep-less calls, for derived types like rwlock: | |
14970 | + * (for trylock they can use rt_mutex_trylock() directly. | |
14971 | + */ | |
14972 | +extern void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock); | |
14973 | +extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock); | |
14974 | +extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock); | |
14975 | +extern int __lockfunc __rt_spin_trylock(struct rt_mutex *lock); | |
14976 | + | |
14977 | +#define spin_lock(lock) rt_spin_lock(lock) | |
14978 | + | |
14979 | +#define spin_lock_bh(lock) \ | |
14980 | + do { \ | |
14981 | + local_bh_disable(); \ | |
14982 | + rt_spin_lock(lock); \ | |
14983 | + } while (0) | |
14984 | + | |
14985 | +#define spin_lock_irq(lock) spin_lock(lock) | |
14986 | + | |
14987 | +#define spin_do_trylock(lock) __cond_lock(lock, rt_spin_trylock(lock)) | |
14988 | + | |
14989 | +#define spin_trylock(lock) \ | |
14990 | +({ \ | |
14991 | + int __locked; \ | |
14992 | + __locked = spin_do_trylock(lock); \ | |
14993 | + __locked; \ | |
14994 | +}) | |
14995 | + | |
14996 | +#ifdef CONFIG_LOCKDEP | |
14997 | +# define spin_lock_nested(lock, subclass) \ | |
14998 | + do { \ | |
14999 | + rt_spin_lock_nested(lock, subclass); \ | |
15000 | + } while (0) | |
15001 | + | |
15002 | +#define spin_lock_bh_nested(lock, subclass) \ | |
15003 | + do { \ | |
15004 | + local_bh_disable(); \ | |
15005 | + rt_spin_lock_nested(lock, subclass); \ | |
15006 | + } while (0) | |
15007 | + | |
15008 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
15009 | + do { \ | |
15010 | + typecheck(unsigned long, flags); \ | |
15011 | + flags = 0; \ | |
15012 | + rt_spin_lock_nested(lock, subclass); \ | |
15013 | + } while (0) | |
15014 | +#else | |
15015 | +# define spin_lock_nested(lock, subclass) spin_lock(lock) | |
15016 | +# define spin_lock_bh_nested(lock, subclass) spin_lock_bh(lock) | |
15017 | + | |
15018 | +# define spin_lock_irqsave_nested(lock, flags, subclass) \ | |
15019 | + do { \ | |
15020 | + typecheck(unsigned long, flags); \ | |
15021 | + flags = 0; \ | |
15022 | + spin_lock(lock); \ | |
15023 | + } while (0) | |
15024 | +#endif | |
15025 | + | |
15026 | +#define spin_lock_irqsave(lock, flags) \ | |
15027 | + do { \ | |
15028 | + typecheck(unsigned long, flags); \ | |
15029 | + flags = 0; \ | |
15030 | + spin_lock(lock); \ | |
15031 | + } while (0) | |
15032 | + | |
15033 | +static inline unsigned long spin_lock_trace_flags(spinlock_t *lock) | |
15034 | +{ | |
15035 | + unsigned long flags = 0; | |
15036 | +#ifdef CONFIG_TRACE_IRQFLAGS | |
15037 | + flags = rt_spin_lock_trace_flags(lock); | |
15038 | +#else | |
15039 | + spin_lock(lock); /* lock_local */ | |
15040 | +#endif | |
15041 | + return flags; | |
15042 | +} | |
15043 | + | |
15044 | +/* FIXME: we need rt_spin_lock_nest_lock */ | |
15045 | +#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0) | |
15046 | + | |
15047 | +#define spin_unlock(lock) rt_spin_unlock(lock) | |
cb95d48a | 15048 | +#define spin_unlock_no_deboost(lock) rt_spin_unlock_no_deboost(lock) |
b4de310e JK |
15049 | + |
15050 | +#define spin_unlock_bh(lock) \ | |
15051 | + do { \ | |
15052 | + rt_spin_unlock(lock); \ | |
15053 | + local_bh_enable(); \ | |
15054 | + } while (0) | |
15055 | + | |
15056 | +#define spin_unlock_irq(lock) spin_unlock(lock) | |
15057 | + | |
15058 | +#define spin_unlock_irqrestore(lock, flags) \ | |
15059 | + do { \ | |
15060 | + typecheck(unsigned long, flags); \ | |
15061 | + (void) flags; \ | |
15062 | + spin_unlock(lock); \ | |
15063 | + } while (0) | |
15064 | + | |
15065 | +#define spin_trylock_bh(lock) __cond_lock(lock, rt_spin_trylock_bh(lock)) | |
15066 | +#define spin_trylock_irq(lock) spin_trylock(lock) | |
15067 | + | |
15068 | +#define spin_trylock_irqsave(lock, flags) \ | |
15069 | + rt_spin_trylock_irqsave(lock, &(flags)) | |
15070 | + | |
15071 | +#define spin_unlock_wait(lock) rt_spin_unlock_wait(lock) | |
15072 | + | |
15073 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
15074 | +# define spin_is_contended(lock) ((lock)->break_lock) | |
15075 | +#else | |
15076 | +# define spin_is_contended(lock) (((void)(lock), 0)) | |
15077 | +#endif | |
15078 | + | |
15079 | +static inline int spin_can_lock(spinlock_t *lock) | |
15080 | +{ | |
15081 | + return !rt_mutex_is_locked(&lock->lock); | |
15082 | +} | |
15083 | + | |
15084 | +static inline int spin_is_locked(spinlock_t *lock) | |
15085 | +{ | |
15086 | + return rt_mutex_is_locked(&lock->lock); | |
15087 | +} | |
15088 | + | |
15089 | +static inline void assert_spin_locked(spinlock_t *lock) | |
15090 | +{ | |
15091 | + BUG_ON(!spin_is_locked(lock)); | |
15092 | +} | |
15093 | + | |
15094 | +#define atomic_dec_and_lock(atomic, lock) \ | |
15095 | + atomic_dec_and_spin_lock(atomic, lock) | |
15096 | + | |
15097 | +#endif | |
15098 | diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h | |
15099 | index 73548eb13a5d..10bac715ea96 100644 | |
15100 | --- a/include/linux/spinlock_types.h | |
15101 | +++ b/include/linux/spinlock_types.h | |
15102 | @@ -9,80 +9,15 @@ | |
15103 | * Released under the General Public License (GPL). | |
15104 | */ | |
15105 | ||
15106 | -#if defined(CONFIG_SMP) | |
15107 | -# include <asm/spinlock_types.h> | |
15108 | -#else | |
15109 | -# include <linux/spinlock_types_up.h> | |
15110 | -#endif | |
15111 | - | |
15112 | -#include <linux/lockdep.h> | |
15113 | - | |
15114 | -typedef struct raw_spinlock { | |
15115 | - arch_spinlock_t raw_lock; | |
15116 | -#ifdef CONFIG_GENERIC_LOCKBREAK | |
15117 | - unsigned int break_lock; | |
15118 | -#endif | |
15119 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
15120 | - unsigned int magic, owner_cpu; | |
15121 | - void *owner; | |
15122 | -#endif | |
15123 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15124 | - struct lockdep_map dep_map; | |
15125 | -#endif | |
15126 | -} raw_spinlock_t; | |
15127 | - | |
15128 | -#define SPINLOCK_MAGIC 0xdead4ead | |
15129 | - | |
15130 | -#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
15131 | - | |
15132 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15133 | -# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
15134 | -#else | |
15135 | -# define SPIN_DEP_MAP_INIT(lockname) | |
15136 | -#endif | |
15137 | +#include <linux/spinlock_types_raw.h> | |
15138 | ||
15139 | -#ifdef CONFIG_DEBUG_SPINLOCK | |
15140 | -# define SPIN_DEBUG_INIT(lockname) \ | |
15141 | - .magic = SPINLOCK_MAGIC, \ | |
15142 | - .owner_cpu = -1, \ | |
15143 | - .owner = SPINLOCK_OWNER_INIT, | |
15144 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
15145 | +# include <linux/spinlock_types_nort.h> | |
15146 | +# include <linux/rwlock_types.h> | |
15147 | #else | |
15148 | -# define SPIN_DEBUG_INIT(lockname) | |
15149 | +# include <linux/rtmutex.h> | |
15150 | +# include <linux/spinlock_types_rt.h> | |
15151 | +# include <linux/rwlock_types_rt.h> | |
15152 | #endif | |
15153 | ||
15154 | -#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
15155 | - { \ | |
15156 | - .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
15157 | - SPIN_DEBUG_INIT(lockname) \ | |
15158 | - SPIN_DEP_MAP_INIT(lockname) } | |
15159 | - | |
15160 | -#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
15161 | - (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
15162 | - | |
15163 | -#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
15164 | - | |
15165 | -typedef struct spinlock { | |
15166 | - union { | |
15167 | - struct raw_spinlock rlock; | |
15168 | - | |
15169 | -#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15170 | -# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
15171 | - struct { | |
15172 | - u8 __padding[LOCK_PADSIZE]; | |
15173 | - struct lockdep_map dep_map; | |
15174 | - }; | |
15175 | -#endif | |
15176 | - }; | |
15177 | -} spinlock_t; | |
15178 | - | |
15179 | -#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
15180 | - { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
15181 | - | |
15182 | -#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
15183 | - (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
15184 | - | |
15185 | -#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
15186 | - | |
15187 | -#include <linux/rwlock_types.h> | |
15188 | - | |
15189 | #endif /* __LINUX_SPINLOCK_TYPES_H */ | |
15190 | diff --git a/include/linux/spinlock_types_nort.h b/include/linux/spinlock_types_nort.h | |
15191 | new file mode 100644 | |
15192 | index 000000000000..f1dac1fb1d6a | |
15193 | --- /dev/null | |
15194 | +++ b/include/linux/spinlock_types_nort.h | |
15195 | @@ -0,0 +1,33 @@ | |
15196 | +#ifndef __LINUX_SPINLOCK_TYPES_NORT_H | |
15197 | +#define __LINUX_SPINLOCK_TYPES_NORT_H | |
15198 | + | |
15199 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
15200 | +#error "Do not include directly. Include spinlock_types.h instead" | |
15201 | +#endif | |
15202 | + | |
15203 | +/* | |
15204 | + * The non RT version maps spinlocks to raw_spinlocks | |
15205 | + */ | |
15206 | +typedef struct spinlock { | |
15207 | + union { | |
15208 | + struct raw_spinlock rlock; | |
15209 | + | |
15210 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15211 | +# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map)) | |
15212 | + struct { | |
15213 | + u8 __padding[LOCK_PADSIZE]; | |
15214 | + struct lockdep_map dep_map; | |
15215 | + }; | |
15216 | +#endif | |
15217 | + }; | |
15218 | +} spinlock_t; | |
15219 | + | |
15220 | +#define __SPIN_LOCK_INITIALIZER(lockname) \ | |
15221 | + { { .rlock = __RAW_SPIN_LOCK_INITIALIZER(lockname) } } | |
15222 | + | |
15223 | +#define __SPIN_LOCK_UNLOCKED(lockname) \ | |
15224 | + (spinlock_t ) __SPIN_LOCK_INITIALIZER(lockname) | |
15225 | + | |
15226 | +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) | |
15227 | + | |
15228 | +#endif | |
15229 | diff --git a/include/linux/spinlock_types_raw.h b/include/linux/spinlock_types_raw.h | |
15230 | new file mode 100644 | |
15231 | index 000000000000..edffc4d53fc9 | |
15232 | --- /dev/null | |
15233 | +++ b/include/linux/spinlock_types_raw.h | |
15234 | @@ -0,0 +1,56 @@ | |
15235 | +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H | |
15236 | +#define __LINUX_SPINLOCK_TYPES_RAW_H | |
15237 | + | |
15238 | +#if defined(CONFIG_SMP) | |
15239 | +# include <asm/spinlock_types.h> | |
15240 | +#else | |
15241 | +# include <linux/spinlock_types_up.h> | |
15242 | +#endif | |
15243 | + | |
15244 | +#include <linux/lockdep.h> | |
15245 | + | |
15246 | +typedef struct raw_spinlock { | |
15247 | + arch_spinlock_t raw_lock; | |
15248 | +#ifdef CONFIG_GENERIC_LOCKBREAK | |
15249 | + unsigned int break_lock; | |
15250 | +#endif | |
15251 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
15252 | + unsigned int magic, owner_cpu; | |
15253 | + void *owner; | |
15254 | +#endif | |
15255 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15256 | + struct lockdep_map dep_map; | |
15257 | +#endif | |
15258 | +} raw_spinlock_t; | |
15259 | + | |
15260 | +#define SPINLOCK_MAGIC 0xdead4ead | |
15261 | + | |
15262 | +#define SPINLOCK_OWNER_INIT ((void *)-1L) | |
15263 | + | |
15264 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15265 | +# define SPIN_DEP_MAP_INIT(lockname) .dep_map = { .name = #lockname } | |
15266 | +#else | |
15267 | +# define SPIN_DEP_MAP_INIT(lockname) | |
15268 | +#endif | |
15269 | + | |
15270 | +#ifdef CONFIG_DEBUG_SPINLOCK | |
15271 | +# define SPIN_DEBUG_INIT(lockname) \ | |
15272 | + .magic = SPINLOCK_MAGIC, \ | |
15273 | + .owner_cpu = -1, \ | |
15274 | + .owner = SPINLOCK_OWNER_INIT, | |
15275 | +#else | |
15276 | +# define SPIN_DEBUG_INIT(lockname) | |
15277 | +#endif | |
15278 | + | |
15279 | +#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \ | |
15280 | + { \ | |
15281 | + .raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ | |
15282 | + SPIN_DEBUG_INIT(lockname) \ | |
15283 | + SPIN_DEP_MAP_INIT(lockname) } | |
15284 | + | |
15285 | +#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \ | |
15286 | + (raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname) | |
15287 | + | |
15288 | +#define DEFINE_RAW_SPINLOCK(x) raw_spinlock_t x = __RAW_SPIN_LOCK_UNLOCKED(x) | |
15289 | + | |
15290 | +#endif | |
15291 | diff --git a/include/linux/spinlock_types_rt.h b/include/linux/spinlock_types_rt.h | |
15292 | new file mode 100644 | |
15293 | index 000000000000..9fd431967abc | |
15294 | --- /dev/null | |
15295 | +++ b/include/linux/spinlock_types_rt.h | |
15296 | @@ -0,0 +1,51 @@ | |
15297 | +#ifndef __LINUX_SPINLOCK_TYPES_RT_H | |
15298 | +#define __LINUX_SPINLOCK_TYPES_RT_H | |
15299 | + | |
15300 | +#ifndef __LINUX_SPINLOCK_TYPES_H | |
15301 | +#error "Do not include directly. Include spinlock_types.h instead" | |
15302 | +#endif | |
15303 | + | |
15304 | +#include <linux/cache.h> | |
15305 | + | |
15306 | +/* | |
15307 | + * PREEMPT_RT: spinlocks - an RT mutex plus lock-break field: | |
15308 | + */ | |
15309 | +typedef struct spinlock { | |
15310 | + struct rt_mutex lock; | |
15311 | + unsigned int break_lock; | |
15312 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
15313 | + struct lockdep_map dep_map; | |
15314 | +#endif | |
15315 | +} spinlock_t; | |
15316 | + | |
15317 | +#ifdef CONFIG_DEBUG_RT_MUTEXES | |
15318 | +# define __RT_SPIN_INITIALIZER(name) \ | |
15319 | + { \ | |
15320 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
15321 | + .save_state = 1, \ | |
15322 | + .file = __FILE__, \ | |
15323 | + .line = __LINE__ , \ | |
15324 | + } | |
15325 | +#else | |
15326 | +# define __RT_SPIN_INITIALIZER(name) \ | |
15327 | + { \ | |
15328 | + .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ | |
15329 | + .save_state = 1, \ | |
15330 | + } | |
15331 | +#endif | |
15332 | + | |
15333 | +/* | |
15334 | +.wait_list = PLIST_HEAD_INIT_RAW((name).lock.wait_list, (name).lock.wait_lock) | |
15335 | +*/ | |
15336 | + | |
15337 | +#define __SPIN_LOCK_UNLOCKED(name) \ | |
15338 | + { .lock = __RT_SPIN_INITIALIZER(name.lock), \ | |
15339 | + SPIN_DEP_MAP_INIT(name) } | |
15340 | + | |
15341 | +#define __DEFINE_SPINLOCK(name) \ | |
15342 | + spinlock_t name = __SPIN_LOCK_UNLOCKED(name) | |
15343 | + | |
15344 | +#define DEFINE_SPINLOCK(name) \ | |
15345 | + spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name) | |
15346 | + | |
15347 | +#endif | |
15348 | diff --git a/include/linux/srcu.h b/include/linux/srcu.h | |
15349 | index f5f80c5643ac..ec1a8f01563c 100644 | |
15350 | --- a/include/linux/srcu.h | |
15351 | +++ b/include/linux/srcu.h | |
15352 | @@ -84,10 +84,10 @@ int init_srcu_struct(struct srcu_struct *sp); | |
15353 | ||
15354 | void process_srcu(struct work_struct *work); | |
15355 | ||
15356 | -#define __SRCU_STRUCT_INIT(name) \ | |
15357 | +#define __SRCU_STRUCT_INIT(name, pcpu_name) \ | |
15358 | { \ | |
15359 | .completed = -300, \ | |
15360 | - .per_cpu_ref = &name##_srcu_array, \ | |
15361 | + .per_cpu_ref = &pcpu_name, \ | |
15362 | .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ | |
15363 | .running = false, \ | |
15364 | .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ | |
15365 | @@ -104,7 +104,7 @@ void process_srcu(struct work_struct *work); | |
15366 | */ | |
15367 | #define __DEFINE_SRCU(name, is_static) \ | |
15368 | static DEFINE_PER_CPU(struct srcu_struct_array, name##_srcu_array);\ | |
15369 | - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | |
15370 | + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name##_srcu_array) | |
15371 | #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) | |
15372 | #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) | |
15373 | ||
15374 | diff --git a/include/linux/suspend.h b/include/linux/suspend.h | |
15375 | index 8b6ec7ef0854..9b77d4cc929f 100644 | |
15376 | --- a/include/linux/suspend.h | |
15377 | +++ b/include/linux/suspend.h | |
15378 | @@ -194,6 +194,12 @@ struct platform_freeze_ops { | |
15379 | void (*end)(void); | |
15380 | }; | |
15381 | ||
15382 | +#if defined(CONFIG_SUSPEND) || defined(CONFIG_HIBERNATION) | |
15383 | +extern bool pm_in_action; | |
15384 | +#else | |
15385 | +# define pm_in_action false | |
15386 | +#endif | |
15387 | + | |
15388 | #ifdef CONFIG_SUSPEND | |
15389 | /** | |
15390 | * suspend_set_ops - set platform dependent suspend operations | |
15391 | diff --git a/include/linux/swait.h b/include/linux/swait.h | |
15392 | new file mode 100644 | |
15393 | index 000000000000..83f004a72320 | |
15394 | --- /dev/null | |
15395 | +++ b/include/linux/swait.h | |
15396 | @@ -0,0 +1,173 @@ | |
15397 | +#ifndef _LINUX_SWAIT_H | |
15398 | +#define _LINUX_SWAIT_H | |
15399 | + | |
15400 | +#include <linux/list.h> | |
15401 | +#include <linux/stddef.h> | |
15402 | +#include <linux/spinlock.h> | |
15403 | +#include <asm/current.h> | |
15404 | + | |
15405 | +/* | |
15406 | + * Simple wait queues | |
15407 | + * | |
15408 | + * While these are very similar to the other/complex wait queues (wait.h) the | |
15409 | + * most important difference is that the simple waitqueue allows for | |
15410 | + * deterministic behaviour -- IOW it has strictly bounded IRQ and lock hold | |
15411 | + * times. | |
15412 | + * | |
15413 | + * In order to make this so, we had to drop a fair number of features of the | |
15414 | + * other waitqueue code; notably: | |
15415 | + * | |
15416 | + * - mixing INTERRUPTIBLE and UNINTERRUPTIBLE sleeps on the same waitqueue; | |
15417 | + * all wakeups are TASK_NORMAL in order to avoid O(n) lookups for the right | |
15418 | + * sleeper state. | |
15419 | + * | |
15420 | + * - the exclusive mode; because this requires preserving the list order | |
15421 | + * and this is hard. | |
15422 | + * | |
15423 | + * - custom wake functions; because you cannot give any guarantees about | |
15424 | + * random code. | |
15425 | + * | |
15426 | + * As a side effect of this; the data structures are slimmer. | |
15427 | + * | |
15428 | + * One would recommend using this wait queue where possible. | |
15429 | + */ | |
15430 | + | |
15431 | +struct task_struct; | |
15432 | + | |
15433 | +struct swait_queue_head { | |
15434 | + raw_spinlock_t lock; | |
15435 | + struct list_head task_list; | |
15436 | +}; | |
15437 | + | |
15438 | +struct swait_queue { | |
15439 | + struct task_struct *task; | |
15440 | + struct list_head task_list; | |
15441 | +}; | |
15442 | + | |
15443 | +#define __SWAITQUEUE_INITIALIZER(name) { \ | |
15444 | + .task = current, \ | |
15445 | + .task_list = LIST_HEAD_INIT((name).task_list), \ | |
15446 | +} | |
15447 | + | |
15448 | +#define DECLARE_SWAITQUEUE(name) \ | |
15449 | + struct swait_queue name = __SWAITQUEUE_INITIALIZER(name) | |
15450 | + | |
15451 | +#define __SWAIT_QUEUE_HEAD_INITIALIZER(name) { \ | |
15452 | + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ | |
15453 | + .task_list = LIST_HEAD_INIT((name).task_list), \ | |
15454 | +} | |
15455 | + | |
15456 | +#define DECLARE_SWAIT_QUEUE_HEAD(name) \ | |
15457 | + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INITIALIZER(name) | |
15458 | + | |
15459 | +extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name, | |
15460 | + struct lock_class_key *key); | |
15461 | + | |
15462 | +#define init_swait_queue_head(q) \ | |
15463 | + do { \ | |
15464 | + static struct lock_class_key __key; \ | |
15465 | + __init_swait_queue_head((q), #q, &__key); \ | |
15466 | + } while (0) | |
15467 | + | |
15468 | +#ifdef CONFIG_LOCKDEP | |
15469 | +# define __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ | |
15470 | + ({ init_swait_queue_head(&name); name; }) | |
15471 | +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ | |
15472 | + struct swait_queue_head name = __SWAIT_QUEUE_HEAD_INIT_ONSTACK(name) | |
15473 | +#else | |
15474 | +# define DECLARE_SWAIT_QUEUE_HEAD_ONSTACK(name) \ | |
15475 | + DECLARE_SWAIT_QUEUE_HEAD(name) | |
15476 | +#endif | |
15477 | + | |
15478 | +static inline int swait_active(struct swait_queue_head *q) | |
15479 | +{ | |
15480 | + return !list_empty(&q->task_list); | |
15481 | +} | |
15482 | + | |
15483 | +extern void swake_up(struct swait_queue_head *q); | |
15484 | +extern void swake_up_all(struct swait_queue_head *q); | |
15485 | +extern void swake_up_locked(struct swait_queue_head *q); | |
15486 | +extern void swake_up_all_locked(struct swait_queue_head *q); | |
15487 | + | |
15488 | +extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); | |
15489 | +extern void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state); | |
15490 | +extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state); | |
15491 | + | |
15492 | +extern void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait); | |
15493 | +extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait); | |
15494 | + | |
15495 | +/* as per ___wait_event() but for swait, therefore "exclusive == 0" */ | |
15496 | +#define ___swait_event(wq, condition, state, ret, cmd) \ | |
15497 | +({ \ | |
15498 | + struct swait_queue __wait; \ | |
15499 | + long __ret = ret; \ | |
15500 | + \ | |
15501 | + INIT_LIST_HEAD(&__wait.task_list); \ | |
15502 | + for (;;) { \ | |
15503 | + long __int = prepare_to_swait_event(&wq, &__wait, state);\ | |
15504 | + \ | |
15505 | + if (condition) \ | |
15506 | + break; \ | |
15507 | + \ | |
15508 | + if (___wait_is_interruptible(state) && __int) { \ | |
15509 | + __ret = __int; \ | |
15510 | + break; \ | |
15511 | + } \ | |
15512 | + \ | |
15513 | + cmd; \ | |
15514 | + } \ | |
15515 | + finish_swait(&wq, &__wait); \ | |
15516 | + __ret; \ | |
15517 | +}) | |
15518 | + | |
15519 | +#define __swait_event(wq, condition) \ | |
15520 | + (void)___swait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, \ | |
15521 | + schedule()) | |
15522 | + | |
15523 | +#define swait_event(wq, condition) \ | |
15524 | +do { \ | |
15525 | + if (condition) \ | |
15526 | + break; \ | |
15527 | + __swait_event(wq, condition); \ | |
15528 | +} while (0) | |
15529 | + | |
15530 | +#define __swait_event_timeout(wq, condition, timeout) \ | |
15531 | + ___swait_event(wq, ___wait_cond_timeout(condition), \ | |
15532 | + TASK_UNINTERRUPTIBLE, timeout, \ | |
15533 | + __ret = schedule_timeout(__ret)) | |
15534 | + | |
15535 | +#define swait_event_timeout(wq, condition, timeout) \ | |
15536 | +({ \ | |
15537 | + long __ret = timeout; \ | |
15538 | + if (!___wait_cond_timeout(condition)) \ | |
15539 | + __ret = __swait_event_timeout(wq, condition, timeout); \ | |
15540 | + __ret; \ | |
15541 | +}) | |
15542 | + | |
15543 | +#define __swait_event_interruptible(wq, condition) \ | |
15544 | + ___swait_event(wq, condition, TASK_INTERRUPTIBLE, 0, \ | |
15545 | + schedule()) | |
15546 | + | |
15547 | +#define swait_event_interruptible(wq, condition) \ | |
15548 | +({ \ | |
15549 | + int __ret = 0; \ | |
15550 | + if (!(condition)) \ | |
15551 | + __ret = __swait_event_interruptible(wq, condition); \ | |
15552 | + __ret; \ | |
15553 | +}) | |
15554 | + | |
15555 | +#define __swait_event_interruptible_timeout(wq, condition, timeout) \ | |
15556 | + ___swait_event(wq, ___wait_cond_timeout(condition), \ | |
15557 | + TASK_INTERRUPTIBLE, timeout, \ | |
15558 | + __ret = schedule_timeout(__ret)) | |
15559 | + | |
15560 | +#define swait_event_interruptible_timeout(wq, condition, timeout) \ | |
15561 | +({ \ | |
15562 | + long __ret = timeout; \ | |
15563 | + if (!___wait_cond_timeout(condition)) \ | |
15564 | + __ret = __swait_event_interruptible_timeout(wq, \ | |
15565 | + condition, timeout); \ | |
15566 | + __ret; \ | |
15567 | +}) | |
15568 | + | |
15569 | +#endif /* _LINUX_SWAIT_H */ | |
15570 | diff --git a/include/linux/swap.h b/include/linux/swap.h | |
cb95d48a | 15571 | index d8ca2eaa3a8b..19e038054914 100644 |
b4de310e JK |
15572 | --- a/include/linux/swap.h |
15573 | +++ b/include/linux/swap.h | |
15574 | @@ -11,6 +11,7 @@ | |
15575 | #include <linux/fs.h> | |
15576 | #include <linux/atomic.h> | |
15577 | #include <linux/page-flags.h> | |
15578 | +#include <linux/locallock.h> | |
15579 | #include <asm/page.h> | |
15580 | ||
15581 | struct notifier_block; | |
15582 | @@ -252,7 +253,8 @@ struct swap_info_struct { | |
15583 | void *workingset_eviction(struct address_space *mapping, struct page *page); | |
15584 | bool workingset_refault(void *shadow); | |
15585 | void workingset_activation(struct page *page); | |
15586 | -extern struct list_lru workingset_shadow_nodes; | |
15587 | +extern struct list_lru __workingset_shadow_nodes; | |
15588 | +DECLARE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
15589 | ||
15590 | static inline unsigned int workingset_node_pages(struct radix_tree_node *node) | |
15591 | { | |
cb95d48a | 15592 | @@ -298,6 +300,7 @@ extern unsigned long nr_free_pagecache_pages(void); |
b4de310e JK |
15593 | |
15594 | ||
15595 | /* linux/mm/swap.c */ | |
15596 | +DECLARE_LOCAL_IRQ_LOCK(swapvec_lock); | |
15597 | extern void lru_cache_add(struct page *); | |
15598 | extern void lru_cache_add_anon(struct page *page); | |
15599 | extern void lru_cache_add_file(struct page *page); | |
15600 | diff --git a/include/linux/swork.h b/include/linux/swork.h | |
15601 | new file mode 100644 | |
15602 | index 000000000000..f175fa9a6016 | |
15603 | --- /dev/null | |
15604 | +++ b/include/linux/swork.h | |
15605 | @@ -0,0 +1,24 @@ | |
15606 | +#ifndef _LINUX_SWORK_H | |
15607 | +#define _LINUX_SWORK_H | |
15608 | + | |
15609 | +#include <linux/list.h> | |
15610 | + | |
15611 | +struct swork_event { | |
15612 | + struct list_head item; | |
15613 | + unsigned long flags; | |
15614 | + void (*func)(struct swork_event *); | |
15615 | +}; | |
15616 | + | |
15617 | +static inline void INIT_SWORK(struct swork_event *event, | |
15618 | + void (*func)(struct swork_event *)) | |
15619 | +{ | |
15620 | + event->flags = 0; | |
15621 | + event->func = func; | |
15622 | +} | |
15623 | + | |
15624 | +bool swork_queue(struct swork_event *sev); | |
15625 | + | |
15626 | +int swork_get(void); | |
15627 | +void swork_put(void); | |
15628 | + | |
15629 | +#endif /* _LINUX_SWORK_H */ | |
15630 | diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h | |
15631 | index ff307b548ed3..be9f9dc6a4e1 100644 | |
15632 | --- a/include/linux/thread_info.h | |
15633 | +++ b/include/linux/thread_info.h | |
15634 | @@ -102,7 +102,17 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) | |
15635 | #define test_thread_flag(flag) \ | |
15636 | test_ti_thread_flag(current_thread_info(), flag) | |
15637 | ||
15638 | -#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
15639 | +#ifdef CONFIG_PREEMPT_LAZY | |
15640 | +#define tif_need_resched() (test_thread_flag(TIF_NEED_RESCHED) || \ | |
15641 | + test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
15642 | +#define tif_need_resched_now() (test_thread_flag(TIF_NEED_RESCHED)) | |
15643 | +#define tif_need_resched_lazy() test_thread_flag(TIF_NEED_RESCHED_LAZY)) | |
15644 | + | |
15645 | +#else | |
15646 | +#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED) | |
15647 | +#define tif_need_resched_now() test_thread_flag(TIF_NEED_RESCHED) | |
15648 | +#define tif_need_resched_lazy() 0 | |
15649 | +#endif | |
15650 | ||
15651 | #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK | |
15652 | /* | |
15653 | diff --git a/include/linux/timer.h b/include/linux/timer.h | |
15654 | index 61aa61dc410c..299d2b78591f 100644 | |
15655 | --- a/include/linux/timer.h | |
15656 | +++ b/include/linux/timer.h | |
15657 | @@ -225,7 +225,7 @@ extern void add_timer(struct timer_list *timer); | |
15658 | ||
15659 | extern int try_to_del_timer_sync(struct timer_list *timer); | |
15660 | ||
15661 | -#ifdef CONFIG_SMP | |
15662 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
15663 | extern int del_timer_sync(struct timer_list *timer); | |
15664 | #else | |
15665 | # define del_timer_sync(t) del_timer(t) | |
15666 | diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h | |
15667 | index 925730bc9fc1..a591f414da6f 100644 | |
15668 | --- a/include/linux/trace_events.h | |
15669 | +++ b/include/linux/trace_events.h | |
15670 | @@ -66,6 +66,9 @@ struct trace_entry { | |
15671 | unsigned char flags; | |
15672 | unsigned char preempt_count; | |
15673 | int pid; | |
15674 | + unsigned short migrate_disable; | |
15675 | + unsigned short padding; | |
15676 | + unsigned char preempt_lazy_count; | |
15677 | }; | |
15678 | ||
15679 | #define TRACE_EVENT_TYPE_MAX \ | |
15680 | diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h | |
15681 | index 558129af828a..cf5c472bbc79 100644 | |
15682 | --- a/include/linux/uaccess.h | |
15683 | +++ b/include/linux/uaccess.h | |
15684 | @@ -24,6 +24,7 @@ static __always_inline void pagefault_disabled_dec(void) | |
15685 | */ | |
15686 | static inline void pagefault_disable(void) | |
15687 | { | |
15688 | + migrate_disable(); | |
15689 | pagefault_disabled_inc(); | |
15690 | /* | |
15691 | * make sure to have issued the store before a pagefault | |
15692 | @@ -40,6 +41,7 @@ static inline void pagefault_enable(void) | |
15693 | */ | |
15694 | barrier(); | |
15695 | pagefault_disabled_dec(); | |
15696 | + migrate_enable(); | |
15697 | } | |
15698 | ||
15699 | /* | |
15700 | diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h | |
15701 | index 4a29c75b146e..0a294e950df8 100644 | |
15702 | --- a/include/linux/uprobes.h | |
15703 | +++ b/include/linux/uprobes.h | |
15704 | @@ -27,6 +27,7 @@ | |
15705 | #include <linux/errno.h> | |
15706 | #include <linux/rbtree.h> | |
15707 | #include <linux/types.h> | |
15708 | +#include <linux/wait.h> | |
15709 | ||
15710 | struct vm_area_struct; | |
15711 | struct mm_struct; | |
15712 | diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h | |
15713 | index 3e5d9075960f..7eaa847cd5a5 100644 | |
15714 | --- a/include/linux/vmstat.h | |
15715 | +++ b/include/linux/vmstat.h | |
15716 | @@ -33,7 +33,9 @@ DECLARE_PER_CPU(struct vm_event_state, vm_event_states); | |
15717 | */ | |
15718 | static inline void __count_vm_event(enum vm_event_item item) | |
15719 | { | |
15720 | + preempt_disable_rt(); | |
15721 | raw_cpu_inc(vm_event_states.event[item]); | |
15722 | + preempt_enable_rt(); | |
15723 | } | |
15724 | ||
15725 | static inline void count_vm_event(enum vm_event_item item) | |
15726 | @@ -43,7 +45,9 @@ static inline void count_vm_event(enum vm_event_item item) | |
15727 | ||
15728 | static inline void __count_vm_events(enum vm_event_item item, long delta) | |
15729 | { | |
15730 | + preempt_disable_rt(); | |
15731 | raw_cpu_add(vm_event_states.event[item], delta); | |
15732 | + preempt_enable_rt(); | |
15733 | } | |
15734 | ||
15735 | static inline void count_vm_events(enum vm_event_item item, long delta) | |
15736 | diff --git a/include/linux/wait.h b/include/linux/wait.h | |
15737 | index 513b36f04dfd..981c8a840f96 100644 | |
15738 | --- a/include/linux/wait.h | |
15739 | +++ b/include/linux/wait.h | |
15740 | @@ -8,6 +8,7 @@ | |
15741 | #include <linux/spinlock.h> | |
15742 | #include <asm/current.h> | |
15743 | #include <uapi/linux/wait.h> | |
15744 | +#include <linux/atomic.h> | |
15745 | ||
15746 | typedef struct __wait_queue wait_queue_t; | |
15747 | typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); | |
15748 | diff --git a/include/net/dst.h b/include/net/dst.h | |
15749 | index c7329dcd90cc..35c3dba16728 100644 | |
15750 | --- a/include/net/dst.h | |
15751 | +++ b/include/net/dst.h | |
15752 | @@ -437,7 +437,7 @@ static inline void dst_confirm(struct dst_entry *dst) | |
15753 | static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, | |
15754 | struct sk_buff *skb) | |
15755 | { | |
15756 | - const struct hh_cache *hh; | |
15757 | + struct hh_cache *hh; | |
15758 | ||
15759 | if (dst->pending_confirm) { | |
15760 | unsigned long now = jiffies; | |
15761 | diff --git a/include/net/neighbour.h b/include/net/neighbour.h | |
15762 | index 8b683841e574..bf656008f6e7 100644 | |
15763 | --- a/include/net/neighbour.h | |
15764 | +++ b/include/net/neighbour.h | |
15765 | @@ -446,7 +446,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) | |
15766 | } | |
15767 | #endif | |
15768 | ||
15769 | -static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb) | |
15770 | +static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb) | |
15771 | { | |
15772 | unsigned int seq; | |
15773 | int hh_len; | |
15774 | @@ -501,7 +501,7 @@ struct neighbour_cb { | |
15775 | ||
15776 | #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) | |
15777 | ||
15778 | -static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, | |
15779 | +static inline void neigh_ha_snapshot(char *dst, struct neighbour *n, | |
15780 | const struct net_device *dev) | |
15781 | { | |
15782 | unsigned int seq; | |
15783 | diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h | |
15784 | index c68926b4899c..dd0751e76065 100644 | |
15785 | --- a/include/net/netns/ipv4.h | |
15786 | +++ b/include/net/netns/ipv4.h | |
15787 | @@ -70,6 +70,7 @@ struct netns_ipv4 { | |
15788 | ||
15789 | int sysctl_icmp_echo_ignore_all; | |
15790 | int sysctl_icmp_echo_ignore_broadcasts; | |
15791 | + int sysctl_icmp_echo_sysrq; | |
15792 | int sysctl_icmp_ignore_bogus_error_responses; | |
15793 | int sysctl_icmp_ratelimit; | |
15794 | int sysctl_icmp_ratemask; | |
15795 | diff --git a/include/trace/events/hist.h b/include/trace/events/hist.h | |
15796 | new file mode 100644 | |
15797 | index 000000000000..f7710de1b1f3 | |
15798 | --- /dev/null | |
15799 | +++ b/include/trace/events/hist.h | |
15800 | @@ -0,0 +1,73 @@ | |
15801 | +#undef TRACE_SYSTEM | |
15802 | +#define TRACE_SYSTEM hist | |
15803 | + | |
15804 | +#if !defined(_TRACE_HIST_H) || defined(TRACE_HEADER_MULTI_READ) | |
15805 | +#define _TRACE_HIST_H | |
15806 | + | |
15807 | +#include "latency_hist.h" | |
15808 | +#include <linux/tracepoint.h> | |
15809 | + | |
15810 | +#if !defined(CONFIG_PREEMPT_OFF_HIST) && !defined(CONFIG_INTERRUPT_OFF_HIST) | |
15811 | +#define trace_preemptirqsoff_hist(a, b) | |
15812 | +#define trace_preemptirqsoff_hist_rcuidle(a, b) | |
15813 | +#else | |
15814 | +TRACE_EVENT(preemptirqsoff_hist, | |
15815 | + | |
15816 | + TP_PROTO(int reason, int starthist), | |
15817 | + | |
15818 | + TP_ARGS(reason, starthist), | |
15819 | + | |
15820 | + TP_STRUCT__entry( | |
15821 | + __field(int, reason) | |
15822 | + __field(int, starthist) | |
15823 | + ), | |
15824 | + | |
15825 | + TP_fast_assign( | |
15826 | + __entry->reason = reason; | |
15827 | + __entry->starthist = starthist; | |
15828 | + ), | |
15829 | + | |
15830 | + TP_printk("reason=%s starthist=%s", getaction(__entry->reason), | |
15831 | + __entry->starthist ? "start" : "stop") | |
15832 | +); | |
15833 | +#endif | |
15834 | + | |
15835 | +#ifndef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
15836 | +#define trace_hrtimer_interrupt(a, b, c, d) | |
15837 | +#else | |
15838 | +TRACE_EVENT(hrtimer_interrupt, | |
15839 | + | |
15840 | + TP_PROTO(int cpu, long long offset, struct task_struct *curr, | |
15841 | + struct task_struct *task), | |
15842 | + | |
15843 | + TP_ARGS(cpu, offset, curr, task), | |
15844 | + | |
15845 | + TP_STRUCT__entry( | |
15846 | + __field(int, cpu) | |
15847 | + __field(long long, offset) | |
15848 | + __array(char, ccomm, TASK_COMM_LEN) | |
15849 | + __field(int, cprio) | |
15850 | + __array(char, tcomm, TASK_COMM_LEN) | |
15851 | + __field(int, tprio) | |
15852 | + ), | |
15853 | + | |
15854 | + TP_fast_assign( | |
15855 | + __entry->cpu = cpu; | |
15856 | + __entry->offset = offset; | |
15857 | + memcpy(__entry->ccomm, curr->comm, TASK_COMM_LEN); | |
15858 | + __entry->cprio = curr->prio; | |
15859 | + memcpy(__entry->tcomm, task != NULL ? task->comm : "<none>", | |
15860 | + task != NULL ? TASK_COMM_LEN : 7); | |
15861 | + __entry->tprio = task != NULL ? task->prio : -1; | |
15862 | + ), | |
15863 | + | |
15864 | + TP_printk("cpu=%d offset=%lld curr=%s[%d] thread=%s[%d]", | |
15865 | + __entry->cpu, __entry->offset, __entry->ccomm, | |
15866 | + __entry->cprio, __entry->tcomm, __entry->tprio) | |
15867 | +); | |
15868 | +#endif | |
15869 | + | |
15870 | +#endif /* _TRACE_HIST_H */ | |
15871 | + | |
15872 | +/* This part must be outside protection */ | |
15873 | +#include <trace/define_trace.h> | |
15874 | diff --git a/include/trace/events/latency_hist.h b/include/trace/events/latency_hist.h | |
15875 | new file mode 100644 | |
15876 | index 000000000000..d3f2fbd560b1 | |
15877 | --- /dev/null | |
15878 | +++ b/include/trace/events/latency_hist.h | |
15879 | @@ -0,0 +1,29 @@ | |
15880 | +#ifndef _LATENCY_HIST_H | |
15881 | +#define _LATENCY_HIST_H | |
15882 | + | |
15883 | +enum hist_action { | |
15884 | + IRQS_ON, | |
15885 | + PREEMPT_ON, | |
15886 | + TRACE_STOP, | |
15887 | + IRQS_OFF, | |
15888 | + PREEMPT_OFF, | |
15889 | + TRACE_START, | |
15890 | +}; | |
15891 | + | |
15892 | +static char *actions[] = { | |
15893 | + "IRQS_ON", | |
15894 | + "PREEMPT_ON", | |
15895 | + "TRACE_STOP", | |
15896 | + "IRQS_OFF", | |
15897 | + "PREEMPT_OFF", | |
15898 | + "TRACE_START", | |
15899 | +}; | |
15900 | + | |
15901 | +static inline char *getaction(int action) | |
15902 | +{ | |
15903 | + if (action >= 0 && action <= sizeof(actions)/sizeof(actions[0])) | |
15904 | + return actions[action]; | |
15905 | + return "unknown"; | |
15906 | +} | |
15907 | + | |
15908 | +#endif /* _LATENCY_HIST_H */ | |
15909 | diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h | |
15910 | index fff846b512e6..73614ce1d204 100644 | |
15911 | --- a/include/trace/events/writeback.h | |
15912 | +++ b/include/trace/events/writeback.h | |
15913 | @@ -134,58 +134,28 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, | |
15914 | #ifdef CREATE_TRACE_POINTS | |
15915 | #ifdef CONFIG_CGROUP_WRITEBACK | |
15916 | ||
15917 | -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb) | |
15918 | +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb) | |
15919 | { | |
15920 | - return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1; | |
15921 | + return wb->memcg_css->cgroup->kn->ino; | |
15922 | } | |
15923 | ||
15924 | -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb) | |
15925 | -{ | |
15926 | - struct cgroup *cgrp = wb->memcg_css->cgroup; | |
15927 | - char *path; | |
15928 | - | |
15929 | - path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1); | |
15930 | - WARN_ON_ONCE(path != buf); | |
15931 | -} | |
15932 | - | |
15933 | -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc) | |
15934 | -{ | |
15935 | - if (wbc->wb) | |
15936 | - return __trace_wb_cgroup_size(wbc->wb); | |
15937 | - else | |
15938 | - return 2; | |
15939 | -} | |
15940 | - | |
15941 | -static inline void __trace_wbc_assign_cgroup(char *buf, | |
15942 | - struct writeback_control *wbc) | |
15943 | +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc) | |
15944 | { | |
15945 | if (wbc->wb) | |
15946 | - __trace_wb_assign_cgroup(buf, wbc->wb); | |
15947 | + return __trace_wb_assign_cgroup(wbc->wb); | |
15948 | else | |
15949 | - strcpy(buf, "/"); | |
15950 | + return -1U; | |
15951 | } | |
15952 | - | |
15953 | #else /* CONFIG_CGROUP_WRITEBACK */ | |
15954 | ||
15955 | -static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb) | |
15956 | -{ | |
15957 | - return 2; | |
15958 | -} | |
15959 | - | |
15960 | -static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb) | |
15961 | -{ | |
15962 | - strcpy(buf, "/"); | |
15963 | -} | |
15964 | - | |
15965 | -static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc) | |
15966 | +static inline unsigned int __trace_wb_assign_cgroup(struct bdi_writeback *wb) | |
15967 | { | |
15968 | - return 2; | |
15969 | + return -1U; | |
15970 | } | |
15971 | ||
15972 | -static inline void __trace_wbc_assign_cgroup(char *buf, | |
15973 | - struct writeback_control *wbc) | |
15974 | +static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *wbc) | |
15975 | { | |
15976 | - strcpy(buf, "/"); | |
15977 | + return -1U; | |
15978 | } | |
15979 | ||
15980 | #endif /* CONFIG_CGROUP_WRITEBACK */ | |
15981 | @@ -201,7 +171,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, | |
15982 | __array(char, name, 32) | |
15983 | __field(unsigned long, ino) | |
15984 | __field(int, sync_mode) | |
15985 | - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) | |
15986 | + __field(unsigned int, cgroup_ino) | |
15987 | ), | |
15988 | ||
15989 | TP_fast_assign( | |
15990 | @@ -209,14 +179,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template, | |
15991 | dev_name(inode_to_bdi(inode)->dev), 32); | |
15992 | __entry->ino = inode->i_ino; | |
15993 | __entry->sync_mode = wbc->sync_mode; | |
15994 | - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); | |
15995 | + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); | |
15996 | ), | |
15997 | ||
15998 | - TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s", | |
15999 | + TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup_ino=%u", | |
16000 | __entry->name, | |
16001 | __entry->ino, | |
16002 | __entry->sync_mode, | |
16003 | - __get_str(cgroup) | |
16004 | + __entry->cgroup_ino | |
16005 | ) | |
16006 | ); | |
16007 | ||
16008 | @@ -246,7 +216,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |
16009 | __field(int, range_cyclic) | |
16010 | __field(int, for_background) | |
16011 | __field(int, reason) | |
16012 | - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) | |
16013 | + __field(unsigned int, cgroup_ino) | |
16014 | ), | |
16015 | TP_fast_assign( | |
16016 | strncpy(__entry->name, | |
16017 | @@ -258,10 +228,10 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |
16018 | __entry->range_cyclic = work->range_cyclic; | |
16019 | __entry->for_background = work->for_background; | |
16020 | __entry->reason = work->reason; | |
16021 | - __trace_wb_assign_cgroup(__get_str(cgroup), wb); | |
16022 | + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); | |
16023 | ), | |
16024 | TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " | |
16025 | - "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s", | |
16026 | + "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup_ino=%u", | |
16027 | __entry->name, | |
16028 | MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), | |
16029 | __entry->nr_pages, | |
16030 | @@ -270,7 +240,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |
16031 | __entry->range_cyclic, | |
16032 | __entry->for_background, | |
16033 | __print_symbolic(__entry->reason, WB_WORK_REASON), | |
16034 | - __get_str(cgroup) | |
16035 | + __entry->cgroup_ino | |
16036 | ) | |
16037 | ); | |
16038 | #define DEFINE_WRITEBACK_WORK_EVENT(name) \ | |
16039 | @@ -300,15 +270,15 @@ DECLARE_EVENT_CLASS(writeback_class, | |
16040 | TP_ARGS(wb), | |
16041 | TP_STRUCT__entry( | |
16042 | __array(char, name, 32) | |
16043 | - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) | |
16044 | + __field(unsigned int, cgroup_ino) | |
16045 | ), | |
16046 | TP_fast_assign( | |
16047 | strncpy(__entry->name, dev_name(wb->bdi->dev), 32); | |
16048 | - __trace_wb_assign_cgroup(__get_str(cgroup), wb); | |
16049 | + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); | |
16050 | ), | |
16051 | - TP_printk("bdi %s: cgroup=%s", | |
16052 | + TP_printk("bdi %s: cgroup_ino=%u", | |
16053 | __entry->name, | |
16054 | - __get_str(cgroup) | |
16055 | + __entry->cgroup_ino | |
16056 | ) | |
16057 | ); | |
16058 | #define DEFINE_WRITEBACK_EVENT(name) \ | |
16059 | @@ -347,7 +317,7 @@ DECLARE_EVENT_CLASS(wbc_class, | |
16060 | __field(int, range_cyclic) | |
16061 | __field(long, range_start) | |
16062 | __field(long, range_end) | |
16063 | - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) | |
16064 | + __field(unsigned int, cgroup_ino) | |
16065 | ), | |
16066 | ||
16067 | TP_fast_assign( | |
16068 | @@ -361,12 +331,12 @@ DECLARE_EVENT_CLASS(wbc_class, | |
16069 | __entry->range_cyclic = wbc->range_cyclic; | |
16070 | __entry->range_start = (long)wbc->range_start; | |
16071 | __entry->range_end = (long)wbc->range_end; | |
16072 | - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); | |
16073 | + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); | |
16074 | ), | |
16075 | ||
16076 | TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " | |
16077 | "bgrd=%d reclm=%d cyclic=%d " | |
16078 | - "start=0x%lx end=0x%lx cgroup=%s", | |
16079 | + "start=0x%lx end=0x%lx cgroup_ino=%u", | |
16080 | __entry->name, | |
16081 | __entry->nr_to_write, | |
16082 | __entry->pages_skipped, | |
16083 | @@ -377,7 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class, | |
16084 | __entry->range_cyclic, | |
16085 | __entry->range_start, | |
16086 | __entry->range_end, | |
16087 | - __get_str(cgroup) | |
16088 | + __entry->cgroup_ino | |
16089 | ) | |
16090 | ) | |
16091 | ||
16092 | @@ -398,7 +368,7 @@ TRACE_EVENT(writeback_queue_io, | |
16093 | __field(long, age) | |
16094 | __field(int, moved) | |
16095 | __field(int, reason) | |
16096 | - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) | |
16097 | + __field(unsigned int, cgroup_ino) | |
16098 | ), | |
16099 | TP_fast_assign( | |
16100 | unsigned long *older_than_this = work->older_than_this; | |
16101 | @@ -408,15 +378,15 @@ TRACE_EVENT(writeback_queue_io, | |
16102 | (jiffies - *older_than_this) * 1000 / HZ : -1; | |
16103 | __entry->moved = moved; | |
16104 | __entry->reason = work->reason; | |
16105 | - __trace_wb_assign_cgroup(__get_str(cgroup), wb); | |
16106 | + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); | |
16107 | ), | |
16108 | - TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s", | |
16109 | + TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%u", | |
16110 | __entry->name, | |
16111 | __entry->older, /* older_than_this in jiffies */ | |
16112 | __entry->age, /* older_than_this in relative milliseconds */ | |
16113 | __entry->moved, | |
16114 | __print_symbolic(__entry->reason, WB_WORK_REASON), | |
16115 | - __get_str(cgroup) | |
16116 | + __entry->cgroup_ino | |
16117 | ) | |
16118 | ); | |
16119 | ||
16120 | @@ -484,7 +454,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, | |
16121 | __field(unsigned long, dirty_ratelimit) | |
16122 | __field(unsigned long, task_ratelimit) | |
16123 | __field(unsigned long, balanced_dirty_ratelimit) | |
16124 | - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) | |
16125 | + __field(unsigned int, cgroup_ino) | |
16126 | ), | |
16127 | ||
16128 | TP_fast_assign( | |
16129 | @@ -496,13 +466,13 @@ TRACE_EVENT(bdi_dirty_ratelimit, | |
16130 | __entry->task_ratelimit = KBps(task_ratelimit); | |
16131 | __entry->balanced_dirty_ratelimit = | |
16132 | KBps(wb->balanced_dirty_ratelimit); | |
16133 | - __trace_wb_assign_cgroup(__get_str(cgroup), wb); | |
16134 | + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); | |
16135 | ), | |
16136 | ||
16137 | TP_printk("bdi %s: " | |
16138 | "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " | |
16139 | "dirty_ratelimit=%lu task_ratelimit=%lu " | |
16140 | - "balanced_dirty_ratelimit=%lu cgroup=%s", | |
16141 | + "balanced_dirty_ratelimit=%lu cgroup_ino=%u", | |
16142 | __entry->bdi, | |
16143 | __entry->write_bw, /* write bandwidth */ | |
16144 | __entry->avg_write_bw, /* avg write bandwidth */ | |
16145 | @@ -510,7 +480,7 @@ TRACE_EVENT(bdi_dirty_ratelimit, | |
16146 | __entry->dirty_ratelimit, /* base ratelimit */ | |
16147 | __entry->task_ratelimit, /* ratelimit with position control */ | |
16148 | __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */ | |
16149 | - __get_str(cgroup) | |
16150 | + __entry->cgroup_ino | |
16151 | ) | |
16152 | ); | |
16153 | ||
16154 | @@ -548,7 +518,7 @@ TRACE_EVENT(balance_dirty_pages, | |
16155 | __field( long, pause) | |
16156 | __field(unsigned long, period) | |
16157 | __field( long, think) | |
16158 | - __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb)) | |
16159 | + __field(unsigned int, cgroup_ino) | |
16160 | ), | |
16161 | ||
16162 | TP_fast_assign( | |
16163 | @@ -571,7 +541,7 @@ TRACE_EVENT(balance_dirty_pages, | |
16164 | __entry->period = period * 1000 / HZ; | |
16165 | __entry->pause = pause * 1000 / HZ; | |
16166 | __entry->paused = (jiffies - start_time) * 1000 / HZ; | |
16167 | - __trace_wb_assign_cgroup(__get_str(cgroup), wb); | |
16168 | + __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); | |
16169 | ), | |
16170 | ||
16171 | ||
16172 | @@ -580,7 +550,7 @@ TRACE_EVENT(balance_dirty_pages, | |
16173 | "bdi_setpoint=%lu bdi_dirty=%lu " | |
16174 | "dirty_ratelimit=%lu task_ratelimit=%lu " | |
16175 | "dirtied=%u dirtied_pause=%u " | |
16176 | - "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s", | |
16177 | + "paused=%lu pause=%ld period=%lu think=%ld cgroup_ino=%u", | |
16178 | __entry->bdi, | |
16179 | __entry->limit, | |
16180 | __entry->setpoint, | |
16181 | @@ -595,7 +565,7 @@ TRACE_EVENT(balance_dirty_pages, | |
16182 | __entry->pause, /* ms */ | |
16183 | __entry->period, /* ms */ | |
16184 | __entry->think, /* ms */ | |
16185 | - __get_str(cgroup) | |
16186 | + __entry->cgroup_ino | |
16187 | ) | |
16188 | ); | |
16189 | ||
16190 | @@ -609,8 +579,7 @@ TRACE_EVENT(writeback_sb_inodes_requeue, | |
16191 | __field(unsigned long, ino) | |
16192 | __field(unsigned long, state) | |
16193 | __field(unsigned long, dirtied_when) | |
16194 | - __dynamic_array(char, cgroup, | |
16195 | - __trace_wb_cgroup_size(inode_to_wb(inode))) | |
16196 | + __field(unsigned int, cgroup_ino) | |
16197 | ), | |
16198 | ||
16199 | TP_fast_assign( | |
16200 | @@ -619,16 +588,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue, | |
16201 | __entry->ino = inode->i_ino; | |
16202 | __entry->state = inode->i_state; | |
16203 | __entry->dirtied_when = inode->dirtied_when; | |
16204 | - __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode)); | |
16205 | + __entry->cgroup_ino = __trace_wb_assign_cgroup(inode_to_wb(inode)); | |
16206 | ), | |
16207 | ||
16208 | - TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s", | |
16209 | + TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup_ino=%u", | |
16210 | __entry->name, | |
16211 | __entry->ino, | |
16212 | show_inode_state(__entry->state), | |
16213 | __entry->dirtied_when, | |
16214 | (jiffies - __entry->dirtied_when) / HZ, | |
16215 | - __get_str(cgroup) | |
16216 | + __entry->cgroup_ino | |
16217 | ) | |
16218 | ); | |
16219 | ||
16220 | @@ -684,7 +653,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, | |
16221 | __field(unsigned long, writeback_index) | |
16222 | __field(long, nr_to_write) | |
16223 | __field(unsigned long, wrote) | |
16224 | - __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc)) | |
16225 | + __field(unsigned int, cgroup_ino) | |
16226 | ), | |
16227 | ||
16228 | TP_fast_assign( | |
16229 | @@ -696,11 +665,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, | |
16230 | __entry->writeback_index = inode->i_mapping->writeback_index; | |
16231 | __entry->nr_to_write = nr_to_write; | |
16232 | __entry->wrote = nr_to_write - wbc->nr_to_write; | |
16233 | - __trace_wbc_assign_cgroup(__get_str(cgroup), wbc); | |
16234 | + __entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc); | |
16235 | ), | |
16236 | ||
16237 | TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " | |
16238 | - "index=%lu to_write=%ld wrote=%lu cgroup=%s", | |
16239 | + "index=%lu to_write=%ld wrote=%lu cgroup_ino=%u", | |
16240 | __entry->name, | |
16241 | __entry->ino, | |
16242 | show_inode_state(__entry->state), | |
16243 | @@ -709,7 +678,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, | |
16244 | __entry->writeback_index, | |
16245 | __entry->nr_to_write, | |
16246 | __entry->wrote, | |
16247 | - __get_str(cgroup) | |
16248 | + __entry->cgroup_ino | |
16249 | ) | |
16250 | ); | |
16251 | ||
16252 | diff --git a/init/Kconfig b/init/Kconfig | |
16253 | index 235c7a2c0d20..a7c81c0911da 100644 | |
16254 | --- a/init/Kconfig | |
16255 | +++ b/init/Kconfig | |
16256 | @@ -498,7 +498,7 @@ config TINY_RCU | |
16257 | ||
16258 | config RCU_EXPERT | |
16259 | bool "Make expert-level adjustments to RCU configuration" | |
16260 | - default n | |
16261 | + default y if PREEMPT_RT_FULL | |
16262 | help | |
16263 | This option needs to be enabled if you wish to make | |
16264 | expert-level adjustments to RCU configuration. By default, | |
16265 | @@ -614,7 +614,7 @@ config RCU_FANOUT_LEAF | |
16266 | ||
16267 | config RCU_FAST_NO_HZ | |
16268 | bool "Accelerate last non-dyntick-idle CPU's grace periods" | |
16269 | - depends on NO_HZ_COMMON && SMP && RCU_EXPERT | |
16270 | + depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL | |
16271 | default n | |
16272 | help | |
16273 | This option permits CPUs to enter dynticks-idle state even if | |
16274 | @@ -641,7 +641,7 @@ config TREE_RCU_TRACE | |
16275 | config RCU_BOOST | |
16276 | bool "Enable RCU priority boosting" | |
16277 | depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT | |
16278 | - default n | |
16279 | + default y if PREEMPT_RT_FULL | |
16280 | help | |
16281 | This option boosts the priority of preempted RCU readers that | |
16282 | block the current preemptible RCU grace period for too long. | |
16283 | @@ -1106,6 +1106,7 @@ config CFS_BANDWIDTH | |
16284 | config RT_GROUP_SCHED | |
16285 | bool "Group scheduling for SCHED_RR/FIFO" | |
16286 | depends on CGROUP_SCHED | |
16287 | + depends on !PREEMPT_RT_FULL | |
16288 | default n | |
16289 | help | |
16290 | This feature lets you explicitly allocate real CPU bandwidth | |
16291 | @@ -1719,6 +1720,7 @@ choice | |
16292 | ||
16293 | config SLAB | |
16294 | bool "SLAB" | |
16295 | + depends on !PREEMPT_RT_FULL | |
16296 | help | |
16297 | The regular slab allocator that is established and known to work | |
16298 | well in all environments. It organizes cache hot objects in | |
16299 | @@ -1737,6 +1739,7 @@ config SLUB | |
16300 | config SLOB | |
16301 | depends on EXPERT | |
16302 | bool "SLOB (Simple Allocator)" | |
16303 | + depends on !PREEMPT_RT_FULL | |
16304 | help | |
16305 | SLOB replaces the stock allocator with a drastically simpler | |
16306 | allocator. SLOB is generally more space efficient but | |
16307 | @@ -1746,7 +1749,7 @@ endchoice | |
16308 | ||
16309 | config SLUB_CPU_PARTIAL | |
16310 | default y | |
16311 | - depends on SLUB && SMP | |
16312 | + depends on SLUB && SMP && !PREEMPT_RT_FULL | |
16313 | bool "SLUB per cpu partial cache" | |
16314 | help | |
16315 | Per cpu partial caches accellerate objects allocation and freeing | |
16316 | diff --git a/init/Makefile b/init/Makefile | |
16317 | index 7bc47ee31c36..88cf473554e0 100644 | |
16318 | --- a/init/Makefile | |
16319 | +++ b/init/Makefile | |
16320 | @@ -33,4 +33,4 @@ silent_chk_compile.h = : | |
16321 | include/generated/compile.h: FORCE | |
16322 | @$($(quiet)chk_compile.h) | |
16323 | $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \ | |
16324 | - "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)" | |
16325 | + "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT_FULL)" "$(CC) $(KBUILD_CFLAGS)" | |
16326 | diff --git a/init/main.c b/init/main.c | |
16327 | index 9e64d7097f1a..4a76e629c137 100644 | |
16328 | --- a/init/main.c | |
16329 | +++ b/init/main.c | |
16330 | @@ -530,6 +530,7 @@ asmlinkage __visible void __init start_kernel(void) | |
16331 | setup_command_line(command_line); | |
16332 | setup_nr_cpu_ids(); | |
16333 | setup_per_cpu_areas(); | |
16334 | + softirq_early_init(); | |
16335 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | |
16336 | ||
16337 | build_all_zonelists(NULL, NULL); | |
16338 | diff --git a/ipc/msg.c b/ipc/msg.c | |
16339 | index c6521c205cb4..996d89023552 100644 | |
16340 | --- a/ipc/msg.c | |
16341 | +++ b/ipc/msg.c | |
16342 | @@ -183,20 +183,14 @@ static void ss_wakeup(struct list_head *h, int kill) | |
16343 | } | |
16344 | } | |
16345 | ||
16346 | -static void expunge_all(struct msg_queue *msq, int res) | |
16347 | +static void expunge_all(struct msg_queue *msq, int res, | |
16348 | + struct wake_q_head *wake_q) | |
16349 | { | |
16350 | struct msg_receiver *msr, *t; | |
16351 | ||
16352 | list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { | |
16353 | - msr->r_msg = NULL; /* initialize expunge ordering */ | |
16354 | - wake_up_process(msr->r_tsk); | |
16355 | - /* | |
16356 | - * Ensure that the wakeup is visible before setting r_msg as | |
16357 | - * the receiving end depends on it: either spinning on a nil, | |
16358 | - * or dealing with -EAGAIN cases. See lockless receive part 1 | |
16359 | - * and 2 in do_msgrcv(). | |
16360 | - */ | |
16361 | - smp_wmb(); /* barrier (B) */ | |
16362 | + | |
16363 | + wake_q_add(wake_q, msr->r_tsk); | |
16364 | msr->r_msg = ERR_PTR(res); | |
16365 | } | |
16366 | } | |
16367 | @@ -213,11 +207,13 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |
16368 | { | |
16369 | struct msg_msg *msg, *t; | |
16370 | struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); | |
16371 | + WAKE_Q(wake_q); | |
16372 | ||
16373 | - expunge_all(msq, -EIDRM); | |
16374 | + expunge_all(msq, -EIDRM, &wake_q); | |
16375 | ss_wakeup(&msq->q_senders, 1); | |
16376 | msg_rmid(ns, msq); | |
16377 | ipc_unlock_object(&msq->q_perm); | |
16378 | + wake_up_q(&wake_q); | |
16379 | rcu_read_unlock(); | |
16380 | ||
16381 | list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) { | |
16382 | @@ -342,6 +338,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |
16383 | struct kern_ipc_perm *ipcp; | |
16384 | struct msqid64_ds uninitialized_var(msqid64); | |
16385 | struct msg_queue *msq; | |
16386 | + WAKE_Q(wake_q); | |
16387 | int err; | |
16388 | ||
16389 | if (cmd == IPC_SET) { | |
16390 | @@ -389,7 +386,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |
16391 | /* sleeping receivers might be excluded by | |
16392 | * stricter permissions. | |
16393 | */ | |
16394 | - expunge_all(msq, -EAGAIN); | |
16395 | + expunge_all(msq, -EAGAIN, &wake_q); | |
16396 | /* sleeping senders might be able to send | |
16397 | * due to a larger queue size. | |
16398 | */ | |
16399 | @@ -402,6 +399,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, | |
16400 | ||
16401 | out_unlock0: | |
16402 | ipc_unlock_object(&msq->q_perm); | |
16403 | + wake_up_q(&wake_q); | |
16404 | out_unlock1: | |
16405 | rcu_read_unlock(); | |
16406 | out_up: | |
16407 | @@ -566,7 +564,8 @@ static int testmsg(struct msg_msg *msg, long type, int mode) | |
16408 | return 0; | |
16409 | } | |
16410 | ||
16411 | -static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) | |
16412 | +static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, | |
16413 | + struct wake_q_head *wake_q) | |
16414 | { | |
16415 | struct msg_receiver *msr, *t; | |
16416 | ||
16417 | @@ -577,27 +576,13 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) | |
16418 | ||
16419 | list_del(&msr->r_list); | |
16420 | if (msr->r_maxsize < msg->m_ts) { | |
16421 | - /* initialize pipelined send ordering */ | |
16422 | - msr->r_msg = NULL; | |
16423 | - wake_up_process(msr->r_tsk); | |
16424 | - /* barrier (B) see barrier comment below */ | |
16425 | - smp_wmb(); | |
16426 | + wake_q_add(wake_q, msr->r_tsk); | |
16427 | msr->r_msg = ERR_PTR(-E2BIG); | |
16428 | } else { | |
16429 | - msr->r_msg = NULL; | |
16430 | msq->q_lrpid = task_pid_vnr(msr->r_tsk); | |
16431 | msq->q_rtime = get_seconds(); | |
16432 | - wake_up_process(msr->r_tsk); | |
16433 | - /* | |
16434 | - * Ensure that the wakeup is visible before | |
16435 | - * setting r_msg, as the receiving can otherwise | |
16436 | - * exit - once r_msg is set, the receiver can | |
16437 | - * continue. See lockless receive part 1 and 2 | |
16438 | - * in do_msgrcv(). Barrier (B). | |
16439 | - */ | |
16440 | - smp_wmb(); | |
16441 | + wake_q_add(wake_q, msr->r_tsk); | |
16442 | msr->r_msg = msg; | |
16443 | - | |
16444 | return 1; | |
16445 | } | |
16446 | } | |
16447 | @@ -613,6 +598,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |
16448 | struct msg_msg *msg; | |
16449 | int err; | |
16450 | struct ipc_namespace *ns; | |
16451 | + WAKE_Q(wake_q); | |
16452 | ||
16453 | ns = current->nsproxy->ipc_ns; | |
16454 | ||
16455 | @@ -698,7 +684,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |
16456 | msq->q_lspid = task_tgid_vnr(current); | |
16457 | msq->q_stime = get_seconds(); | |
16458 | ||
16459 | - if (!pipelined_send(msq, msg)) { | |
16460 | + if (!pipelined_send(msq, msg, &wake_q)) { | |
16461 | /* no one is waiting for this message, enqueue it */ | |
16462 | list_add_tail(&msg->m_list, &msq->q_messages); | |
16463 | msq->q_cbytes += msgsz; | |
16464 | @@ -712,6 +698,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |
16465 | ||
16466 | out_unlock0: | |
16467 | ipc_unlock_object(&msq->q_perm); | |
16468 | + wake_up_q(&wake_q); | |
16469 | out_unlock1: | |
16470 | rcu_read_unlock(); | |
16471 | if (msg != NULL) | |
16472 | @@ -932,57 +919,25 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl | |
16473 | rcu_read_lock(); | |
16474 | ||
16475 | /* Lockless receive, part 2: | |
16476 | - * Wait until pipelined_send or expunge_all are outside of | |
16477 | - * wake_up_process(). There is a race with exit(), see | |
16478 | - * ipc/mqueue.c for the details. The correct serialization | |
16479 | - * ensures that a receiver cannot continue without the wakeup | |
16480 | - * being visibible _before_ setting r_msg: | |
16481 | + * The work in pipelined_send() and expunge_all(): | |
16482 | + * - Set pointer to message | |
16483 | + * - Queue the receiver task for later wakeup | |
16484 | + * - Wake up the process after the lock is dropped. | |
16485 | * | |
16486 | - * CPU 0 CPU 1 | |
16487 | - * <loop receiver> | |
16488 | - * smp_rmb(); (A) <-- pair -. <waker thread> | |
16489 | - * <load ->r_msg> | msr->r_msg = NULL; | |
16490 | - * | wake_up_process(); | |
16491 | - * <continue> `------> smp_wmb(); (B) | |
16492 | - * msr->r_msg = msg; | |
16493 | - * | |
16494 | - * Where (A) orders the message value read and where (B) orders | |
16495 | - * the write to the r_msg -- done in both pipelined_send and | |
16496 | - * expunge_all. | |
16497 | + * Should the process wake up before this wakeup (due to a | |
16498 | + * signal) it will either see the message and continue … | |
16499 | */ | |
16500 | - for (;;) { | |
16501 | - /* | |
16502 | - * Pairs with writer barrier in pipelined_send | |
16503 | - * or expunge_all. | |
16504 | - */ | |
16505 | - smp_rmb(); /* barrier (A) */ | |
16506 | - msg = (struct msg_msg *)msr_d.r_msg; | |
16507 | - if (msg) | |
16508 | - break; | |
16509 | ||
16510 | - /* | |
16511 | - * The cpu_relax() call is a compiler barrier | |
16512 | - * which forces everything in this loop to be | |
16513 | - * re-loaded. | |
16514 | - */ | |
16515 | - cpu_relax(); | |
16516 | - } | |
16517 | - | |
16518 | - /* Lockless receive, part 3: | |
16519 | - * If there is a message or an error then accept it without | |
16520 | - * locking. | |
16521 | - */ | |
16522 | + msg = (struct msg_msg *)msr_d.r_msg; | |
16523 | if (msg != ERR_PTR(-EAGAIN)) | |
16524 | goto out_unlock1; | |
16525 | ||
16526 | - /* Lockless receive, part 3: | |
16527 | - * Acquire the queue spinlock. | |
16528 | - */ | |
16529 | + /* | |
16530 | + * … or see -EAGAIN, acquire the lock to check the message | |
16531 | + * again. | |
16532 | + */ | |
16533 | ipc_lock_object(&msq->q_perm); | |
16534 | ||
16535 | - /* Lockless receive, part 4: | |
16536 | - * Repeat test after acquiring the spinlock. | |
16537 | - */ | |
16538 | msg = (struct msg_msg *)msr_d.r_msg; | |
16539 | if (msg != ERR_PTR(-EAGAIN)) | |
16540 | goto out_unlock0; | |
16541 | diff --git a/ipc/sem.c b/ipc/sem.c | |
cb95d48a | 16542 | index 9862c3d1c26d..ef34d7376697 100644 |
b4de310e JK |
16543 | --- a/ipc/sem.c |
16544 | +++ b/ipc/sem.c | |
cb95d48a | 16545 | @@ -708,6 +708,13 @@ undo: |
b4de310e JK |
16546 | static void wake_up_sem_queue_prepare(struct list_head *pt, |
16547 | struct sem_queue *q, int error) | |
16548 | { | |
16549 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
16550 | + struct task_struct *p = q->sleeper; | |
16551 | + get_task_struct(p); | |
16552 | + q->status = error; | |
16553 | + wake_up_process(p); | |
16554 | + put_task_struct(p); | |
16555 | +#else | |
16556 | if (list_empty(pt)) { | |
16557 | /* | |
16558 | * Hold preempt off so that we don't get preempted and have the | |
cb95d48a | 16559 | @@ -719,6 +726,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, |
b4de310e JK |
16560 | q->pid = error; |
16561 | ||
16562 | list_add_tail(&q->list, pt); | |
16563 | +#endif | |
16564 | } | |
16565 | ||
16566 | /** | |
cb95d48a | 16567 | @@ -732,6 +740,7 @@ static void wake_up_sem_queue_prepare(struct list_head *pt, |
b4de310e JK |
16568 | */ |
16569 | static void wake_up_sem_queue_do(struct list_head *pt) | |
16570 | { | |
16571 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
16572 | struct sem_queue *q, *t; | |
16573 | int did_something; | |
16574 | ||
cb95d48a | 16575 | @@ -744,6 +753,7 @@ static void wake_up_sem_queue_do(struct list_head *pt) |
b4de310e JK |
16576 | } |
16577 | if (did_something) | |
16578 | preempt_enable(); | |
16579 | +#endif | |
16580 | } | |
16581 | ||
16582 | static void unlink_queue(struct sem_array *sma, struct sem_queue *q) | |
16583 | diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks | |
16584 | index ebdb0043203a..b9e6aa7e5aa6 100644 | |
16585 | --- a/kernel/Kconfig.locks | |
16586 | +++ b/kernel/Kconfig.locks | |
16587 | @@ -225,11 +225,11 @@ config ARCH_SUPPORTS_ATOMIC_RMW | |
16588 | ||
16589 | config MUTEX_SPIN_ON_OWNER | |
16590 | def_bool y | |
16591 | - depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW | |
16592 | + depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
16593 | ||
16594 | config RWSEM_SPIN_ON_OWNER | |
16595 | def_bool y | |
16596 | - depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | |
16597 | + depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW && !PREEMPT_RT_FULL | |
16598 | ||
16599 | config LOCK_SPIN_ON_OWNER | |
16600 | def_bool y | |
16601 | diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt | |
16602 | index 3f9c97419f02..11dbe26a8279 100644 | |
16603 | --- a/kernel/Kconfig.preempt | |
16604 | +++ b/kernel/Kconfig.preempt | |
16605 | @@ -1,3 +1,16 @@ | |
16606 | +config PREEMPT | |
16607 | + bool | |
16608 | + select PREEMPT_COUNT | |
16609 | + | |
16610 | +config PREEMPT_RT_BASE | |
16611 | + bool | |
16612 | + select PREEMPT | |
16613 | + | |
16614 | +config HAVE_PREEMPT_LAZY | |
16615 | + bool | |
16616 | + | |
16617 | +config PREEMPT_LAZY | |
16618 | + def_bool y if HAVE_PREEMPT_LAZY && PREEMPT_RT_FULL | |
16619 | ||
16620 | choice | |
16621 | prompt "Preemption Model" | |
16622 | @@ -33,9 +46,9 @@ config PREEMPT_VOLUNTARY | |
16623 | ||
16624 | Select this if you are building a kernel for a desktop system. | |
16625 | ||
16626 | -config PREEMPT | |
16627 | +config PREEMPT__LL | |
16628 | bool "Preemptible Kernel (Low-Latency Desktop)" | |
16629 | - select PREEMPT_COUNT | |
16630 | + select PREEMPT | |
16631 | select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK | |
16632 | help | |
16633 | This option reduces the latency of the kernel by making | |
16634 | @@ -52,6 +65,22 @@ config PREEMPT | |
16635 | embedded system with latency requirements in the milliseconds | |
16636 | range. | |
16637 | ||
16638 | +config PREEMPT_RTB | |
16639 | + bool "Preemptible Kernel (Basic RT)" | |
16640 | + select PREEMPT_RT_BASE | |
16641 | + help | |
16642 | + This option is basically the same as (Low-Latency Desktop) but | |
16643 | + enables changes which are preliminary for the full preemptible | |
16644 | + RT kernel. | |
16645 | + | |
16646 | +config PREEMPT_RT_FULL | |
16647 | + bool "Fully Preemptible Kernel (RT)" | |
16648 | + depends on IRQ_FORCED_THREADING | |
16649 | + select PREEMPT_RT_BASE | |
16650 | + select PREEMPT_RCU | |
16651 | + help | |
16652 | + All and everything | |
16653 | + | |
16654 | endchoice | |
16655 | ||
16656 | config PREEMPT_COUNT | |
16657 | diff --git a/kernel/cgroup.c b/kernel/cgroup.c | |
16658 | index a3424f28aaf4..69434d231e21 100644 | |
16659 | --- a/kernel/cgroup.c | |
16660 | +++ b/kernel/cgroup.c | |
16661 | @@ -4737,10 +4737,10 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) | |
16662 | queue_work(cgroup_destroy_wq, &css->destroy_work); | |
16663 | } | |
16664 | ||
16665 | -static void css_release_work_fn(struct work_struct *work) | |
16666 | +static void css_release_work_fn(struct swork_event *sev) | |
16667 | { | |
16668 | struct cgroup_subsys_state *css = | |
16669 | - container_of(work, struct cgroup_subsys_state, destroy_work); | |
16670 | + container_of(sev, struct cgroup_subsys_state, destroy_swork); | |
16671 | struct cgroup_subsys *ss = css->ss; | |
16672 | struct cgroup *cgrp = css->cgroup; | |
16673 | ||
16674 | @@ -4779,8 +4779,8 @@ static void css_release(struct percpu_ref *ref) | |
16675 | struct cgroup_subsys_state *css = | |
16676 | container_of(ref, struct cgroup_subsys_state, refcnt); | |
16677 | ||
16678 | - INIT_WORK(&css->destroy_work, css_release_work_fn); | |
16679 | - queue_work(cgroup_destroy_wq, &css->destroy_work); | |
16680 | + INIT_SWORK(&css->destroy_swork, css_release_work_fn); | |
16681 | + swork_queue(&css->destroy_swork); | |
16682 | } | |
16683 | ||
16684 | static void init_and_link_css(struct cgroup_subsys_state *css, | |
16685 | @@ -5397,6 +5397,7 @@ static int __init cgroup_wq_init(void) | |
16686 | */ | |
16687 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | |
16688 | BUG_ON(!cgroup_destroy_wq); | |
16689 | + BUG_ON(swork_get()); | |
16690 | ||
16691 | /* | |
16692 | * Used to destroy pidlists and separate to serve as flush domain. | |
16693 | diff --git a/kernel/cpu.c b/kernel/cpu.c | |
16694 | index 85ff5e26e23b..8edd3c716092 100644 | |
16695 | --- a/kernel/cpu.c | |
16696 | +++ b/kernel/cpu.c | |
16697 | @@ -75,8 +75,8 @@ static struct { | |
16698 | #endif | |
16699 | } cpu_hotplug = { | |
16700 | .active_writer = NULL, | |
16701 | - .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), | |
16702 | .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), | |
16703 | + .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq), | |
16704 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
16705 | .dep_map = {.name = "cpu_hotplug.lock" }, | |
16706 | #endif | |
16707 | @@ -89,6 +89,289 @@ static struct { | |
16708 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) | |
16709 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) | |
16710 | ||
16711 | +/** | |
16712 | + * hotplug_pcp - per cpu hotplug descriptor | |
16713 | + * @unplug: set when pin_current_cpu() needs to sync tasks | |
16714 | + * @sync_tsk: the task that waits for tasks to finish pinned sections | |
16715 | + * @refcount: counter of tasks in pinned sections | |
16716 | + * @grab_lock: set when the tasks entering pinned sections should wait | |
16717 | + * @synced: notifier for @sync_tsk to tell cpu_down it's finished | |
16718 | + * @mutex: the mutex to make tasks wait (used when @grab_lock is true) | |
16719 | + * @mutex_init: zero if the mutex hasn't been initialized yet. | |
16720 | + * | |
16721 | + * Although @unplug and @sync_tsk may point to the same task, the @unplug | |
16722 | + * is used as a flag and still exists after @sync_tsk has exited and | |
16723 | + * @sync_tsk set to NULL. | |
16724 | + */ | |
16725 | +struct hotplug_pcp { | |
16726 | + struct task_struct *unplug; | |
16727 | + struct task_struct *sync_tsk; | |
16728 | + int refcount; | |
16729 | + int grab_lock; | |
16730 | + struct completion synced; | |
16731 | + struct completion unplug_wait; | |
16732 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16733 | + /* | |
16734 | + * Note, on PREEMPT_RT, the hotplug lock must save the state of | |
16735 | + * the task, otherwise the mutex will cause the task to fail | |
16736 | + * to sleep when required. (Because it's called from migrate_disable()) | |
16737 | + * | |
16738 | + * The spinlock_t on PREEMPT_RT is a mutex that saves the task's | |
16739 | + * state. | |
16740 | + */ | |
16741 | + spinlock_t lock; | |
16742 | +#else | |
16743 | + struct mutex mutex; | |
16744 | +#endif | |
16745 | + int mutex_init; | |
16746 | +}; | |
16747 | + | |
16748 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16749 | +# define hotplug_lock(hp) rt_spin_lock__no_mg(&(hp)->lock) | |
16750 | +# define hotplug_unlock(hp) rt_spin_unlock__no_mg(&(hp)->lock) | |
16751 | +#else | |
16752 | +# define hotplug_lock(hp) mutex_lock(&(hp)->mutex) | |
16753 | +# define hotplug_unlock(hp) mutex_unlock(&(hp)->mutex) | |
16754 | +#endif | |
16755 | + | |
16756 | +static DEFINE_PER_CPU(struct hotplug_pcp, hotplug_pcp); | |
16757 | + | |
16758 | +/** | |
16759 | + * pin_current_cpu - Prevent the current cpu from being unplugged | |
16760 | + * | |
16761 | + * Lightweight version of get_online_cpus() to prevent cpu from being | |
16762 | + * unplugged when code runs in a migration disabled region. | |
16763 | + * | |
16764 | + * Must be called with preemption disabled (preempt_count = 1)! | |
16765 | + */ | |
16766 | +void pin_current_cpu(void) | |
16767 | +{ | |
16768 | + struct hotplug_pcp *hp; | |
16769 | + int force = 0; | |
16770 | + | |
16771 | +retry: | |
16772 | + hp = this_cpu_ptr(&hotplug_pcp); | |
16773 | + | |
16774 | + if (!hp->unplug || hp->refcount || force || preempt_count() > 1 || | |
16775 | + hp->unplug == current) { | |
16776 | + hp->refcount++; | |
16777 | + return; | |
16778 | + } | |
16779 | + if (hp->grab_lock) { | |
16780 | + preempt_enable(); | |
16781 | + hotplug_lock(hp); | |
16782 | + hotplug_unlock(hp); | |
16783 | + } else { | |
16784 | + preempt_enable(); | |
16785 | + /* | |
16786 | + * Try to push this task off of this CPU. | |
16787 | + */ | |
16788 | + if (!migrate_me()) { | |
16789 | + preempt_disable(); | |
16790 | + hp = this_cpu_ptr(&hotplug_pcp); | |
16791 | + if (!hp->grab_lock) { | |
16792 | + /* | |
16793 | + * Just let it continue it's already pinned | |
16794 | + * or about to sleep. | |
16795 | + */ | |
16796 | + force = 1; | |
16797 | + goto retry; | |
16798 | + } | |
16799 | + preempt_enable(); | |
16800 | + } | |
16801 | + } | |
16802 | + preempt_disable(); | |
16803 | + goto retry; | |
16804 | +} | |
16805 | + | |
16806 | +/** | |
16807 | + * unpin_current_cpu - Allow unplug of current cpu | |
16808 | + * | |
16809 | + * Must be called with preemption or interrupts disabled! | |
16810 | + */ | |
16811 | +void unpin_current_cpu(void) | |
16812 | +{ | |
16813 | + struct hotplug_pcp *hp = this_cpu_ptr(&hotplug_pcp); | |
16814 | + | |
16815 | + WARN_ON(hp->refcount <= 0); | |
16816 | + | |
16817 | + /* This is safe. sync_unplug_thread is pinned to this cpu */ | |
16818 | + if (!--hp->refcount && hp->unplug && hp->unplug != current) | |
16819 | + wake_up_process(hp->unplug); | |
16820 | +} | |
16821 | + | |
16822 | +static void wait_for_pinned_cpus(struct hotplug_pcp *hp) | |
16823 | +{ | |
16824 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
16825 | + while (hp->refcount) { | |
16826 | + schedule_preempt_disabled(); | |
16827 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
16828 | + } | |
16829 | +} | |
16830 | + | |
16831 | +static int sync_unplug_thread(void *data) | |
16832 | +{ | |
16833 | + struct hotplug_pcp *hp = data; | |
16834 | + | |
16835 | + wait_for_completion(&hp->unplug_wait); | |
16836 | + preempt_disable(); | |
16837 | + hp->unplug = current; | |
16838 | + wait_for_pinned_cpus(hp); | |
16839 | + | |
16840 | + /* | |
16841 | + * This thread will synchronize the cpu_down() with threads | |
16842 | + * that have pinned the CPU. When the pinned CPU count reaches | |
16843 | + * zero, we inform the cpu_down code to continue to the next step. | |
16844 | + */ | |
16845 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
16846 | + preempt_enable(); | |
16847 | + complete(&hp->synced); | |
16848 | + | |
16849 | + /* | |
16850 | + * If all succeeds, the next step will need tasks to wait till | |
16851 | + * the CPU is offline before continuing. To do this, the grab_lock | |
16852 | + * is set and tasks going into pin_current_cpu() will block on the | |
16853 | + * mutex. But we still need to wait for those that are already in | |
16854 | + * pinned CPU sections. If the cpu_down() failed, the kthread_should_stop() | |
16855 | + * will kick this thread out. | |
16856 | + */ | |
16857 | + while (!hp->grab_lock && !kthread_should_stop()) { | |
16858 | + schedule(); | |
16859 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
16860 | + } | |
16861 | + | |
16862 | + /* Make sure grab_lock is seen before we see a stale completion */ | |
16863 | + smp_mb(); | |
16864 | + | |
16865 | + /* | |
16866 | + * Now just before cpu_down() enters stop machine, we need to make | |
16867 | + * sure all tasks that are in pinned CPU sections are out, and new | |
16868 | + * tasks will now grab the lock, keeping them from entering pinned | |
16869 | + * CPU sections. | |
16870 | + */ | |
16871 | + if (!kthread_should_stop()) { | |
16872 | + preempt_disable(); | |
16873 | + wait_for_pinned_cpus(hp); | |
16874 | + preempt_enable(); | |
16875 | + complete(&hp->synced); | |
16876 | + } | |
16877 | + | |
16878 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
16879 | + while (!kthread_should_stop()) { | |
16880 | + schedule(); | |
16881 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
16882 | + } | |
16883 | + set_current_state(TASK_RUNNING); | |
16884 | + | |
16885 | + /* | |
16886 | + * Force this thread off this CPU as it's going down and | |
16887 | + * we don't want any more work on this CPU. | |
16888 | + */ | |
16889 | + current->flags &= ~PF_NO_SETAFFINITY; | |
16890 | + set_cpus_allowed_ptr(current, cpu_present_mask); | |
16891 | + migrate_me(); | |
16892 | + return 0; | |
16893 | +} | |
16894 | + | |
16895 | +static void __cpu_unplug_sync(struct hotplug_pcp *hp) | |
16896 | +{ | |
16897 | + wake_up_process(hp->sync_tsk); | |
16898 | + wait_for_completion(&hp->synced); | |
16899 | +} | |
16900 | + | |
16901 | +static void __cpu_unplug_wait(unsigned int cpu) | |
16902 | +{ | |
16903 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
16904 | + | |
16905 | + complete(&hp->unplug_wait); | |
16906 | + wait_for_completion(&hp->synced); | |
16907 | +} | |
16908 | + | |
16909 | +/* | |
16910 | + * Start the sync_unplug_thread on the target cpu and wait for it to | |
16911 | + * complete. | |
16912 | + */ | |
16913 | +static int cpu_unplug_begin(unsigned int cpu) | |
16914 | +{ | |
16915 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
16916 | + int err; | |
16917 | + | |
16918 | + /* Protected by cpu_hotplug.lock */ | |
16919 | + if (!hp->mutex_init) { | |
16920 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
16921 | + spin_lock_init(&hp->lock); | |
16922 | +#else | |
16923 | + mutex_init(&hp->mutex); | |
16924 | +#endif | |
16925 | + hp->mutex_init = 1; | |
16926 | + } | |
16927 | + | |
16928 | + /* Inform the scheduler to migrate tasks off this CPU */ | |
16929 | + tell_sched_cpu_down_begin(cpu); | |
16930 | + | |
16931 | + init_completion(&hp->synced); | |
16932 | + init_completion(&hp->unplug_wait); | |
16933 | + | |
16934 | + hp->sync_tsk = kthread_create(sync_unplug_thread, hp, "sync_unplug/%d", cpu); | |
16935 | + if (IS_ERR(hp->sync_tsk)) { | |
16936 | + err = PTR_ERR(hp->sync_tsk); | |
16937 | + hp->sync_tsk = NULL; | |
16938 | + return err; | |
16939 | + } | |
16940 | + kthread_bind(hp->sync_tsk, cpu); | |
16941 | + | |
16942 | + /* | |
16943 | + * Wait for tasks to get out of the pinned sections, | |
16944 | + * it's still OK if new tasks enter. Some CPU notifiers will | |
16945 | + * wait for tasks that are going to enter these sections and | |
16946 | + * we must not have them block. | |
16947 | + */ | |
16948 | + wake_up_process(hp->sync_tsk); | |
16949 | + return 0; | |
16950 | +} | |
16951 | + | |
16952 | +static void cpu_unplug_sync(unsigned int cpu) | |
16953 | +{ | |
16954 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
16955 | + | |
16956 | + init_completion(&hp->synced); | |
16957 | + /* The completion needs to be initialzied before setting grab_lock */ | |
16958 | + smp_wmb(); | |
16959 | + | |
16960 | + /* Grab the mutex before setting grab_lock */ | |
16961 | + hotplug_lock(hp); | |
16962 | + hp->grab_lock = 1; | |
16963 | + | |
16964 | + /* | |
16965 | + * The CPU notifiers have been completed. | |
16966 | + * Wait for tasks to get out of pinned CPU sections and have new | |
16967 | + * tasks block until the CPU is completely down. | |
16968 | + */ | |
16969 | + __cpu_unplug_sync(hp); | |
16970 | + | |
16971 | + /* All done with the sync thread */ | |
16972 | + kthread_stop(hp->sync_tsk); | |
16973 | + hp->sync_tsk = NULL; | |
16974 | +} | |
16975 | + | |
16976 | +static void cpu_unplug_done(unsigned int cpu) | |
16977 | +{ | |
16978 | + struct hotplug_pcp *hp = &per_cpu(hotplug_pcp, cpu); | |
16979 | + | |
16980 | + hp->unplug = NULL; | |
16981 | + /* Let all tasks know cpu unplug is finished before cleaning up */ | |
16982 | + smp_wmb(); | |
16983 | + | |
16984 | + if (hp->sync_tsk) | |
16985 | + kthread_stop(hp->sync_tsk); | |
16986 | + | |
16987 | + if (hp->grab_lock) { | |
16988 | + hotplug_unlock(hp); | |
16989 | + /* protected by cpu_hotplug.lock */ | |
16990 | + hp->grab_lock = 0; | |
16991 | + } | |
16992 | + tell_sched_cpu_down_done(cpu); | |
16993 | +} | |
16994 | ||
16995 | void get_online_cpus(void) | |
16996 | { | |
16997 | @@ -338,13 +621,15 @@ static int take_cpu_down(void *_param) | |
16998 | /* Requires cpu_add_remove_lock to be held */ | |
16999 | static int _cpu_down(unsigned int cpu, int tasks_frozen) | |
17000 | { | |
17001 | - int err, nr_calls = 0; | |
17002 | + int mycpu, err, nr_calls = 0; | |
17003 | void *hcpu = (void *)(long)cpu; | |
17004 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | |
17005 | struct take_cpu_down_param tcd_param = { | |
17006 | .mod = mod, | |
17007 | .hcpu = hcpu, | |
17008 | }; | |
17009 | + cpumask_var_t cpumask; | |
17010 | + cpumask_var_t cpumask_org; | |
17011 | ||
17012 | if (num_online_cpus() == 1) | |
17013 | return -EBUSY; | |
17014 | @@ -352,7 +637,34 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |
17015 | if (!cpu_online(cpu)) | |
17016 | return -EINVAL; | |
17017 | ||
17018 | + /* Move the downtaker off the unplug cpu */ | |
17019 | + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL)) | |
17020 | + return -ENOMEM; | |
17021 | + if (!alloc_cpumask_var(&cpumask_org, GFP_KERNEL)) { | |
17022 | + free_cpumask_var(cpumask); | |
17023 | + return -ENOMEM; | |
17024 | + } | |
17025 | + | |
17026 | + cpumask_copy(cpumask_org, tsk_cpus_allowed(current)); | |
17027 | + cpumask_andnot(cpumask, cpu_online_mask, cpumask_of(cpu)); | |
17028 | + set_cpus_allowed_ptr(current, cpumask); | |
17029 | + free_cpumask_var(cpumask); | |
17030 | + migrate_disable(); | |
17031 | + mycpu = smp_processor_id(); | |
17032 | + if (mycpu == cpu) { | |
17033 | + printk(KERN_ERR "Yuck! Still on unplug CPU\n!"); | |
17034 | + migrate_enable(); | |
17035 | + err = -EBUSY; | |
17036 | + goto restore_cpus; | |
17037 | + } | |
17038 | + migrate_enable(); | |
17039 | + | |
17040 | cpu_hotplug_begin(); | |
17041 | + err = cpu_unplug_begin(cpu); | |
17042 | + if (err) { | |
17043 | + printk("cpu_unplug_begin(%d) failed\n", cpu); | |
17044 | + goto out_cancel; | |
17045 | + } | |
17046 | ||
17047 | err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); | |
17048 | if (err) { | |
17049 | @@ -378,8 +690,12 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |
17050 | else | |
17051 | synchronize_rcu(); | |
17052 | ||
17053 | + __cpu_unplug_wait(cpu); | |
17054 | smpboot_park_threads(cpu); | |
17055 | ||
17056 | + /* Notifiers are done. Don't let any more tasks pin this CPU. */ | |
17057 | + cpu_unplug_sync(cpu); | |
17058 | + | |
17059 | /* | |
17060 | * Prevent irq alloc/free while the dying cpu reorganizes the | |
17061 | * interrupt affinities. | |
17062 | @@ -424,9 +740,14 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) | |
17063 | check_for_tasks(cpu); | |
17064 | ||
17065 | out_release: | |
17066 | + cpu_unplug_done(cpu); | |
17067 | +out_cancel: | |
17068 | cpu_hotplug_done(); | |
17069 | if (!err) | |
17070 | cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); | |
17071 | +restore_cpus: | |
17072 | + set_cpus_allowed_ptr(current, cpumask_org); | |
17073 | + free_cpumask_var(cpumask_org); | |
17074 | return err; | |
17075 | } | |
17076 | ||
17077 | diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c | |
17078 | index fc1ef736253c..83c666537a7a 100644 | |
17079 | --- a/kernel/debug/kdb/kdb_io.c | |
17080 | +++ b/kernel/debug/kdb/kdb_io.c | |
17081 | @@ -554,7 +554,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
17082 | int linecount; | |
17083 | int colcount; | |
17084 | int logging, saved_loglevel = 0; | |
17085 | - int saved_trap_printk; | |
17086 | int got_printf_lock = 0; | |
17087 | int retlen = 0; | |
17088 | int fnd, len; | |
17089 | @@ -565,8 +564,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap) | |
17090 | unsigned long uninitialized_var(flags); | |
17091 | ||
17092 | preempt_disable(); | |
17093 | - saved_trap_printk = kdb_trap_printk; | |
17094 | - kdb_trap_printk = 0; | |
17095 | ||
17096 | /* Serialize kdb_printf if multiple cpus try to write at once. | |
17097 | * But if any cpu goes recursive in kdb, just print the output, | |
17098 | @@ -855,7 +852,6 @@ kdb_print_out: | |
17099 | } else { | |
17100 | __release(kdb_printf_lock); | |
17101 | } | |
17102 | - kdb_trap_printk = saved_trap_printk; | |
17103 | preempt_enable(); | |
17104 | return retlen; | |
17105 | } | |
17106 | @@ -865,9 +861,11 @@ int kdb_printf(const char *fmt, ...) | |
17107 | va_list ap; | |
17108 | int r; | |
17109 | ||
17110 | + kdb_trap_printk++; | |
17111 | va_start(ap, fmt); | |
17112 | r = vkdb_printf(KDB_MSGSRC_INTERNAL, fmt, ap); | |
17113 | va_end(ap); | |
17114 | + kdb_trap_printk--; | |
17115 | ||
17116 | return r; | |
17117 | } | |
17118 | diff --git a/kernel/events/core.c b/kernel/events/core.c | |
2bb96ace | 17119 | index bc6371b0e4fb..388de1dc27d9 100644 |
b4de310e JK |
17120 | --- a/kernel/events/core.c |
17121 | +++ b/kernel/events/core.c | |
17122 | @@ -802,6 +802,7 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu) | |
17123 | raw_spin_lock_init(&cpuctx->hrtimer_lock); | |
17124 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); | |
17125 | timer->function = perf_mux_hrtimer_handler; | |
17126 | + timer->irqsafe = 1; | |
17127 | } | |
17128 | ||
17129 | static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx) | |
2bb96ace | 17130 | @@ -7240,6 +7241,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event) |
b4de310e JK |
17131 | |
17132 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
17133 | hwc->hrtimer.function = perf_swevent_hrtimer; | |
17134 | + hwc->hrtimer.irqsafe = 1; | |
17135 | ||
17136 | /* | |
17137 | * Since hrtimers have a fixed rate, we can do a static freq->period | |
17138 | diff --git a/kernel/exit.c b/kernel/exit.c | |
17139 | index ffba5df4abd5..e199407f8831 100644 | |
17140 | --- a/kernel/exit.c | |
17141 | +++ b/kernel/exit.c | |
17142 | @@ -144,7 +144,7 @@ static void __exit_signal(struct task_struct *tsk) | |
17143 | * Do this under ->siglock, we can race with another thread | |
17144 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | |
17145 | */ | |
17146 | - flush_sigqueue(&tsk->pending); | |
17147 | + flush_task_sigqueue(tsk); | |
17148 | tsk->sighand = NULL; | |
17149 | spin_unlock(&sighand->siglock); | |
17150 | ||
17151 | diff --git a/kernel/fork.c b/kernel/fork.c | |
2bb96ace | 17152 | index 7161ebe67cbb..3b880312b385 100644 |
b4de310e JK |
17153 | --- a/kernel/fork.c |
17154 | +++ b/kernel/fork.c | |
17155 | @@ -108,7 +108,7 @@ int max_threads; /* tunable limit on nr_threads */ | |
17156 | ||
17157 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; | |
17158 | ||
17159 | -__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | |
17160 | +DEFINE_RWLOCK(tasklist_lock); /* outer */ | |
17161 | ||
17162 | #ifdef CONFIG_PROVE_RCU | |
17163 | int lockdep_tasklist_lock_is_held(void) | |
17164 | @@ -244,7 +244,9 @@ static inline void put_signal_struct(struct signal_struct *sig) | |
17165 | if (atomic_dec_and_test(&sig->sigcnt)) | |
17166 | free_signal_struct(sig); | |
17167 | } | |
17168 | - | |
17169 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17170 | +static | |
17171 | +#endif | |
17172 | void __put_task_struct(struct task_struct *tsk) | |
17173 | { | |
17174 | WARN_ON(!tsk->exit_state); | |
17175 | @@ -261,7 +263,18 @@ void __put_task_struct(struct task_struct *tsk) | |
17176 | if (!profile_handoff_task(tsk)) | |
17177 | free_task(tsk); | |
17178 | } | |
17179 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
17180 | EXPORT_SYMBOL_GPL(__put_task_struct); | |
17181 | +#else | |
17182 | +void __put_task_struct_cb(struct rcu_head *rhp) | |
17183 | +{ | |
17184 | + struct task_struct *tsk = container_of(rhp, struct task_struct, put_rcu); | |
17185 | + | |
17186 | + __put_task_struct(tsk); | |
17187 | + | |
17188 | +} | |
17189 | +EXPORT_SYMBOL_GPL(__put_task_struct_cb); | |
17190 | +#endif | |
17191 | ||
17192 | void __init __weak arch_task_cache_init(void) { } | |
17193 | ||
17194 | @@ -689,6 +702,19 @@ void __mmdrop(struct mm_struct *mm) | |
17195 | } | |
17196 | EXPORT_SYMBOL_GPL(__mmdrop); | |
17197 | ||
17198 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17199 | +/* | |
17200 | + * RCU callback for delayed mm drop. Not strictly rcu, but we don't | |
17201 | + * want another facility to make this work. | |
17202 | + */ | |
17203 | +void __mmdrop_delayed(struct rcu_head *rhp) | |
17204 | +{ | |
17205 | + struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop); | |
17206 | + | |
17207 | + __mmdrop(mm); | |
17208 | +} | |
17209 | +#endif | |
17210 | + | |
17211 | /* | |
17212 | * Decrement the use count and release all resources for an mm. | |
17213 | */ | |
2bb96ace | 17214 | @@ -1239,6 +1265,9 @@ static void rt_mutex_init_task(struct task_struct *p) |
b4de310e JK |
17215 | */ |
17216 | static void posix_cpu_timers_init(struct task_struct *tsk) | |
17217 | { | |
17218 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17219 | + tsk->posix_timer_list = NULL; | |
17220 | +#endif | |
17221 | tsk->cputime_expires.prof_exp = 0; | |
17222 | tsk->cputime_expires.virt_exp = 0; | |
17223 | tsk->cputime_expires.sched_exp = 0; | |
2bb96ace | 17224 | @@ -1364,15 +1393,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, |
b4de310e JK |
17225 | spin_lock_init(&p->alloc_lock); |
17226 | ||
17227 | init_sigpending(&p->pending); | |
17228 | + p->sigqueue_cache = NULL; | |
17229 | ||
17230 | p->utime = p->stime = p->gtime = 0; | |
17231 | p->utimescaled = p->stimescaled = 0; | |
17232 | prev_cputime_init(&p->prev_cputime); | |
17233 | ||
17234 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
17235 | - seqlock_init(&p->vtime_seqlock); | |
17236 | + seqcount_init(&p->vtime_seqcount); | |
17237 | p->vtime_snap = 0; | |
17238 | - p->vtime_snap_whence = VTIME_SLEEPING; | |
17239 | + p->vtime_snap_whence = VTIME_INACTIVE; | |
17240 | #endif | |
17241 | ||
17242 | #if defined(SPLIT_RSS_COUNTING) | |
17243 | diff --git a/kernel/futex.c b/kernel/futex.c | |
cb95d48a | 17244 | index 9d8163afd87c..059623427b99 100644 |
b4de310e JK |
17245 | --- a/kernel/futex.c |
17246 | +++ b/kernel/futex.c | |
17247 | @@ -815,7 +815,9 @@ void exit_pi_state_list(struct task_struct *curr) | |
17248 | * task still owns the PI-state: | |
17249 | */ | |
17250 | if (head->next != next) { | |
17251 | + raw_spin_unlock_irq(&curr->pi_lock); | |
17252 | spin_unlock(&hb->lock); | |
17253 | + raw_spin_lock_irq(&curr->pi_lock); | |
17254 | continue; | |
17255 | } | |
17256 | ||
17257 | @@ -1210,6 +1212,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
17258 | struct futex_pi_state *pi_state = this->pi_state; | |
17259 | u32 uninitialized_var(curval), newval; | |
17260 | WAKE_Q(wake_q); | |
17261 | + WAKE_Q(wake_sleeper_q); | |
17262 | bool deboost; | |
17263 | int ret = 0; | |
17264 | ||
17265 | @@ -1223,7 +1226,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
17266 | if (pi_state->owner != current) | |
17267 | return -EINVAL; | |
17268 | ||
17269 | - raw_spin_lock(&pi_state->pi_mutex.wait_lock); | |
17270 | + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); | |
17271 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | |
17272 | ||
17273 | /* | |
17274 | @@ -1259,24 +1262,25 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, | |
17275 | ret = -EINVAL; | |
17276 | } | |
17277 | if (ret) { | |
17278 | - raw_spin_unlock(&pi_state->pi_mutex.wait_lock); | |
17279 | + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
17280 | return ret; | |
17281 | } | |
17282 | ||
17283 | - raw_spin_lock_irq(&pi_state->owner->pi_lock); | |
17284 | + raw_spin_lock(&pi_state->owner->pi_lock); | |
17285 | WARN_ON(list_empty(&pi_state->list)); | |
17286 | list_del_init(&pi_state->list); | |
17287 | - raw_spin_unlock_irq(&pi_state->owner->pi_lock); | |
17288 | + raw_spin_unlock(&pi_state->owner->pi_lock); | |
17289 | ||
17290 | - raw_spin_lock_irq(&new_owner->pi_lock); | |
17291 | + raw_spin_lock(&new_owner->pi_lock); | |
17292 | WARN_ON(!list_empty(&pi_state->list)); | |
17293 | list_add(&pi_state->list, &new_owner->pi_state_list); | |
17294 | pi_state->owner = new_owner; | |
17295 | - raw_spin_unlock_irq(&new_owner->pi_lock); | |
17296 | + raw_spin_unlock(&new_owner->pi_lock); | |
17297 | ||
17298 | - raw_spin_unlock(&pi_state->pi_mutex.wait_lock); | |
17299 | + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); | |
17300 | ||
17301 | - deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q); | |
17302 | + deboost = rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q, | |
17303 | + &wake_sleeper_q); | |
17304 | ||
17305 | /* | |
17306 | * First unlock HB so the waiter does not spin on it once he got woken | |
cb95d48a JK |
17307 | @@ -1284,8 +1288,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, |
17308 | * deboost first (and lose our higher priority), then the task might get | |
17309 | * scheduled away before the wake up can take place. | |
b4de310e | 17310 | */ |
cb95d48a JK |
17311 | - spin_unlock(&hb->lock); |
17312 | + deboost |= spin_unlock_no_deboost(&hb->lock); | |
b4de310e JK |
17313 | wake_up_q(&wake_q); |
17314 | + wake_up_q_sleeper(&wake_sleeper_q); | |
17315 | if (deboost) | |
17316 | rt_mutex_adjust_prio(current); | |
17317 | ||
17318 | @@ -1822,6 +1827,16 @@ retry_private: | |
17319 | requeue_pi_wake_futex(this, &key2, hb2); | |
17320 | drop_count++; | |
17321 | continue; | |
17322 | + } else if (ret == -EAGAIN) { | |
17323 | + /* | |
17324 | + * Waiter was woken by timeout or | |
17325 | + * signal and has set pi_blocked_on to | |
17326 | + * PI_WAKEUP_INPROGRESS before we | |
17327 | + * tried to enqueue it on the rtmutex. | |
17328 | + */ | |
17329 | + this->pi_state = NULL; | |
17330 | + free_pi_state(pi_state); | |
17331 | + continue; | |
17332 | } else if (ret) { | |
17333 | /* -EDEADLK */ | |
17334 | this->pi_state = NULL; | |
17335 | @@ -2139,11 +2154,11 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) | |
17336 | * we returned due to timeout or signal without taking the | |
17337 | * rt_mutex. Too late. | |
17338 | */ | |
17339 | - raw_spin_lock(&q->pi_state->pi_mutex.wait_lock); | |
17340 | + raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock); | |
17341 | owner = rt_mutex_owner(&q->pi_state->pi_mutex); | |
17342 | if (!owner) | |
17343 | owner = rt_mutex_next_owner(&q->pi_state->pi_mutex); | |
17344 | - raw_spin_unlock(&q->pi_state->pi_mutex.wait_lock); | |
17345 | + raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock); | |
17346 | ret = fixup_pi_state_owner(uaddr, q, owner); | |
17347 | goto out; | |
17348 | } | |
17349 | @@ -2691,7 +2706,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
17350 | struct hrtimer_sleeper timeout, *to = NULL; | |
17351 | struct rt_mutex_waiter rt_waiter; | |
17352 | struct rt_mutex *pi_mutex = NULL; | |
17353 | - struct futex_hash_bucket *hb; | |
17354 | + struct futex_hash_bucket *hb, *hb2; | |
17355 | union futex_key key2 = FUTEX_KEY_INIT; | |
17356 | struct futex_q q = futex_q_init; | |
17357 | int res, ret; | |
17358 | @@ -2716,10 +2731,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
17359 | * The waiter is allocated on our stack, manipulated by the requeue | |
17360 | * code while we sleep on uaddr. | |
17361 | */ | |
17362 | - debug_rt_mutex_init_waiter(&rt_waiter); | |
17363 | - RB_CLEAR_NODE(&rt_waiter.pi_tree_entry); | |
17364 | - RB_CLEAR_NODE(&rt_waiter.tree_entry); | |
17365 | - rt_waiter.task = NULL; | |
17366 | + rt_mutex_init_waiter(&rt_waiter, false); | |
17367 | ||
17368 | ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, VERIFY_WRITE); | |
17369 | if (unlikely(ret != 0)) | |
17370 | @@ -2750,20 +2762,55 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
17371 | /* Queue the futex_q, drop the hb lock, wait for wakeup. */ | |
17372 | futex_wait_queue_me(hb, &q, to); | |
17373 | ||
17374 | - spin_lock(&hb->lock); | |
17375 | - ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
17376 | - spin_unlock(&hb->lock); | |
17377 | - if (ret) | |
17378 | - goto out_put_keys; | |
17379 | + /* | |
17380 | + * On RT we must avoid races with requeue and trying to block | |
17381 | + * on two mutexes (hb->lock and uaddr2's rtmutex) by | |
17382 | + * serializing access to pi_blocked_on with pi_lock. | |
17383 | + */ | |
17384 | + raw_spin_lock_irq(¤t->pi_lock); | |
17385 | + if (current->pi_blocked_on) { | |
17386 | + /* | |
17387 | + * We have been requeued or are in the process of | |
17388 | + * being requeued. | |
17389 | + */ | |
17390 | + raw_spin_unlock_irq(¤t->pi_lock); | |
17391 | + } else { | |
17392 | + /* | |
17393 | + * Setting pi_blocked_on to PI_WAKEUP_INPROGRESS | |
17394 | + * prevents a concurrent requeue from moving us to the | |
17395 | + * uaddr2 rtmutex. After that we can safely acquire | |
17396 | + * (and possibly block on) hb->lock. | |
17397 | + */ | |
17398 | + current->pi_blocked_on = PI_WAKEUP_INPROGRESS; | |
17399 | + raw_spin_unlock_irq(¤t->pi_lock); | |
17400 | + | |
17401 | + spin_lock(&hb->lock); | |
17402 | + | |
17403 | + /* | |
17404 | + * Clean up pi_blocked_on. We might leak it otherwise | |
17405 | + * when we succeeded with the hb->lock in the fast | |
17406 | + * path. | |
17407 | + */ | |
17408 | + raw_spin_lock_irq(¤t->pi_lock); | |
17409 | + current->pi_blocked_on = NULL; | |
17410 | + raw_spin_unlock_irq(¤t->pi_lock); | |
17411 | + | |
17412 | + ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to); | |
17413 | + spin_unlock(&hb->lock); | |
17414 | + if (ret) | |
17415 | + goto out_put_keys; | |
17416 | + } | |
17417 | ||
17418 | /* | |
17419 | - * In order for us to be here, we know our q.key == key2, and since | |
17420 | - * we took the hb->lock above, we also know that futex_requeue() has | |
17421 | - * completed and we no longer have to concern ourselves with a wakeup | |
17422 | - * race with the atomic proxy lock acquisition by the requeue code. The | |
17423 | - * futex_requeue dropped our key1 reference and incremented our key2 | |
17424 | - * reference count. | |
17425 | + * In order to be here, we have either been requeued, are in | |
17426 | + * the process of being requeued, or requeue successfully | |
17427 | + * acquired uaddr2 on our behalf. If pi_blocked_on was | |
17428 | + * non-null above, we may be racing with a requeue. Do not | |
17429 | + * rely on q->lock_ptr to be hb2->lock until after blocking on | |
17430 | + * hb->lock or hb2->lock. The futex_requeue dropped our key1 | |
17431 | + * reference and incremented our key2 reference count. | |
17432 | */ | |
17433 | + hb2 = hash_futex(&key2); | |
17434 | ||
17435 | /* Check if the requeue code acquired the second futex for us. */ | |
17436 | if (!q.rt_waiter) { | |
17437 | @@ -2772,14 +2819,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
17438 | * did a lock-steal - fix up the PI-state in that case. | |
17439 | */ | |
17440 | if (q.pi_state && (q.pi_state->owner != current)) { | |
17441 | - spin_lock(q.lock_ptr); | |
17442 | + spin_lock(&hb2->lock); | |
17443 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
17444 | ret = fixup_pi_state_owner(uaddr2, &q, current); | |
17445 | /* | |
17446 | * Drop the reference to the pi state which | |
17447 | * the requeue_pi() code acquired for us. | |
17448 | */ | |
17449 | free_pi_state(q.pi_state); | |
17450 | - spin_unlock(q.lock_ptr); | |
17451 | + spin_unlock(&hb2->lock); | |
17452 | } | |
17453 | } else { | |
17454 | /* | |
17455 | @@ -2792,7 +2840,8 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, | |
17456 | ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); | |
17457 | debug_rt_mutex_free_waiter(&rt_waiter); | |
17458 | ||
17459 | - spin_lock(q.lock_ptr); | |
17460 | + spin_lock(&hb2->lock); | |
17461 | + BUG_ON(&hb2->lock != q.lock_ptr); | |
17462 | /* | |
17463 | * Fixup the pi_state owner and possibly acquire the lock if we | |
17464 | * haven't already. | |
17465 | diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c | |
17466 | index 57bff7857e87..6c65c9252991 100644 | |
17467 | --- a/kernel/irq/handle.c | |
17468 | +++ b/kernel/irq/handle.c | |
17469 | @@ -134,6 +134,8 @@ void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) | |
17470 | ||
17471 | irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |
17472 | { | |
17473 | + struct pt_regs *regs = get_irq_regs(); | |
17474 | + u64 ip = regs ? instruction_pointer(regs) : 0; | |
17475 | irqreturn_t retval = IRQ_NONE; | |
17476 | unsigned int flags = 0, irq = desc->irq_data.irq; | |
17477 | struct irqaction *action = desc->action; | |
17478 | @@ -176,7 +178,11 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) | |
17479 | action = action->next; | |
17480 | } | |
17481 | ||
17482 | - add_interrupt_randomness(irq, flags); | |
17483 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17484 | + desc->random_ip = ip; | |
17485 | +#else | |
17486 | + add_interrupt_randomness(irq, flags, ip); | |
17487 | +#endif | |
17488 | ||
17489 | if (!noirqdebug) | |
17490 | note_interrupt(desc, retval); | |
17491 | diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c | |
17492 | index 239e2ae2c947..0b73349a42d5 100644 | |
17493 | --- a/kernel/irq/irqdesc.c | |
17494 | +++ b/kernel/irq/irqdesc.c | |
17495 | @@ -24,10 +24,27 @@ | |
17496 | static struct lock_class_key irq_desc_lock_class; | |
17497 | ||
17498 | #if defined(CONFIG_SMP) | |
17499 | +static int __init irq_affinity_setup(char *str) | |
17500 | +{ | |
17501 | + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | |
17502 | + cpulist_parse(str, irq_default_affinity); | |
17503 | + /* | |
17504 | + * Set at least the boot cpu. We don't want to end up with | |
17505 | + * bugreports caused by random comandline masks | |
17506 | + */ | |
17507 | + cpumask_set_cpu(smp_processor_id(), irq_default_affinity); | |
17508 | + return 1; | |
17509 | +} | |
17510 | +__setup("irqaffinity=", irq_affinity_setup); | |
17511 | + | |
17512 | static void __init init_irq_default_affinity(void) | |
17513 | { | |
17514 | - alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | |
17515 | - cpumask_setall(irq_default_affinity); | |
17516 | +#ifdef CONFIG_CPUMASK_OFFSTACK | |
17517 | + if (!irq_default_affinity) | |
17518 | + zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT); | |
17519 | +#endif | |
17520 | + if (cpumask_empty(irq_default_affinity)) | |
17521 | + cpumask_setall(irq_default_affinity); | |
17522 | } | |
17523 | #else | |
17524 | static void __init init_irq_default_affinity(void) | |
17525 | diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c | |
17526 | index 6ead200370da..8e89554aa345 100644 | |
17527 | --- a/kernel/irq/manage.c | |
17528 | +++ b/kernel/irq/manage.c | |
17529 | @@ -22,6 +22,7 @@ | |
17530 | #include "internals.h" | |
17531 | ||
17532 | #ifdef CONFIG_IRQ_FORCED_THREADING | |
17533 | +# ifndef CONFIG_PREEMPT_RT_BASE | |
17534 | __read_mostly bool force_irqthreads; | |
17535 | ||
17536 | static int __init setup_forced_irqthreads(char *arg) | |
17537 | @@ -30,6 +31,7 @@ static int __init setup_forced_irqthreads(char *arg) | |
17538 | return 0; | |
17539 | } | |
17540 | early_param("threadirqs", setup_forced_irqthreads); | |
17541 | +# endif | |
17542 | #endif | |
17543 | ||
17544 | static void __synchronize_hardirq(struct irq_desc *desc) | |
17545 | @@ -181,6 +183,62 @@ static inline void | |
17546 | irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } | |
17547 | #endif | |
17548 | ||
17549 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17550 | +static void _irq_affinity_notify(struct irq_affinity_notify *notify); | |
17551 | +static struct task_struct *set_affinity_helper; | |
17552 | +static LIST_HEAD(affinity_list); | |
17553 | +static DEFINE_RAW_SPINLOCK(affinity_list_lock); | |
17554 | + | |
17555 | +static int set_affinity_thread(void *unused) | |
17556 | +{ | |
17557 | + while (1) { | |
17558 | + struct irq_affinity_notify *notify; | |
17559 | + int empty; | |
17560 | + | |
17561 | + set_current_state(TASK_INTERRUPTIBLE); | |
17562 | + | |
17563 | + raw_spin_lock_irq(&affinity_list_lock); | |
17564 | + empty = list_empty(&affinity_list); | |
17565 | + raw_spin_unlock_irq(&affinity_list_lock); | |
17566 | + | |
17567 | + if (empty) | |
17568 | + schedule(); | |
17569 | + if (kthread_should_stop()) | |
17570 | + break; | |
17571 | + set_current_state(TASK_RUNNING); | |
17572 | +try_next: | |
17573 | + notify = NULL; | |
17574 | + | |
17575 | + raw_spin_lock_irq(&affinity_list_lock); | |
17576 | + if (!list_empty(&affinity_list)) { | |
17577 | + notify = list_first_entry(&affinity_list, | |
17578 | + struct irq_affinity_notify, list); | |
17579 | + list_del_init(¬ify->list); | |
17580 | + } | |
17581 | + raw_spin_unlock_irq(&affinity_list_lock); | |
17582 | + | |
17583 | + if (!notify) | |
17584 | + continue; | |
17585 | + _irq_affinity_notify(notify); | |
17586 | + goto try_next; | |
17587 | + } | |
17588 | + return 0; | |
17589 | +} | |
17590 | + | |
17591 | +static void init_helper_thread(void) | |
17592 | +{ | |
17593 | + if (set_affinity_helper) | |
17594 | + return; | |
17595 | + set_affinity_helper = kthread_run(set_affinity_thread, NULL, | |
17596 | + "affinity-cb"); | |
17597 | + WARN_ON(IS_ERR(set_affinity_helper)); | |
17598 | +} | |
17599 | +#else | |
17600 | + | |
17601 | +static inline void init_helper_thread(void) { } | |
17602 | + | |
17603 | +#endif | |
17604 | + | |
17605 | int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |
17606 | bool force) | |
17607 | { | |
17608 | @@ -220,7 +278,17 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, | |
17609 | ||
17610 | if (desc->affinity_notify) { | |
17611 | kref_get(&desc->affinity_notify->kref); | |
17612 | + | |
17613 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17614 | + raw_spin_lock(&affinity_list_lock); | |
17615 | + if (list_empty(&desc->affinity_notify->list)) | |
17616 | + list_add_tail(&affinity_list, | |
17617 | + &desc->affinity_notify->list); | |
17618 | + raw_spin_unlock(&affinity_list_lock); | |
17619 | + wake_up_process(set_affinity_helper); | |
17620 | +#else | |
17621 | schedule_work(&desc->affinity_notify->work); | |
17622 | +#endif | |
17623 | } | |
17624 | irqd_set(data, IRQD_AFFINITY_SET); | |
17625 | ||
17626 | @@ -258,10 +326,8 @@ int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m) | |
17627 | } | |
17628 | EXPORT_SYMBOL_GPL(irq_set_affinity_hint); | |
17629 | ||
17630 | -static void irq_affinity_notify(struct work_struct *work) | |
17631 | +static void _irq_affinity_notify(struct irq_affinity_notify *notify) | |
17632 | { | |
17633 | - struct irq_affinity_notify *notify = | |
17634 | - container_of(work, struct irq_affinity_notify, work); | |
17635 | struct irq_desc *desc = irq_to_desc(notify->irq); | |
17636 | cpumask_var_t cpumask; | |
17637 | unsigned long flags; | |
17638 | @@ -283,6 +349,13 @@ out: | |
17639 | kref_put(¬ify->kref, notify->release); | |
17640 | } | |
17641 | ||
17642 | +static void irq_affinity_notify(struct work_struct *work) | |
17643 | +{ | |
17644 | + struct irq_affinity_notify *notify = | |
17645 | + container_of(work, struct irq_affinity_notify, work); | |
17646 | + _irq_affinity_notify(notify); | |
17647 | +} | |
17648 | + | |
17649 | /** | |
17650 | * irq_set_affinity_notifier - control notification of IRQ affinity changes | |
17651 | * @irq: Interrupt for which to enable/disable notification | |
17652 | @@ -312,6 +385,8 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) | |
17653 | notify->irq = irq; | |
17654 | kref_init(¬ify->kref); | |
17655 | INIT_WORK(¬ify->work, irq_affinity_notify); | |
17656 | + INIT_LIST_HEAD(¬ify->list); | |
17657 | + init_helper_thread(); | |
17658 | } | |
17659 | ||
17660 | raw_spin_lock_irqsave(&desc->lock, flags); | |
17661 | @@ -865,7 +940,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) | |
17662 | local_bh_disable(); | |
17663 | ret = action->thread_fn(action->irq, action->dev_id); | |
17664 | irq_finalize_oneshot(desc, action); | |
17665 | - local_bh_enable(); | |
17666 | + /* | |
17667 | + * Interrupts which have real time requirements can be set up | |
17668 | + * to avoid softirq processing in the thread handler. This is | |
17669 | + * safe as these interrupts do not raise soft interrupts. | |
17670 | + */ | |
17671 | + if (irq_settings_no_softirq_call(desc)) | |
17672 | + _local_bh_enable(); | |
17673 | + else | |
17674 | + local_bh_enable(); | |
17675 | return ret; | |
17676 | } | |
17677 | ||
17678 | @@ -962,6 +1045,12 @@ static int irq_thread(void *data) | |
17679 | if (action_ret == IRQ_WAKE_THREAD) | |
17680 | irq_wake_secondary(desc, action); | |
17681 | ||
17682 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17683 | + migrate_disable(); | |
17684 | + add_interrupt_randomness(action->irq, 0, | |
17685 | + desc->random_ip ^ (unsigned long) action); | |
17686 | + migrate_enable(); | |
17687 | +#endif | |
17688 | wake_threads_waitq(desc); | |
17689 | } | |
17690 | ||
17691 | @@ -1315,6 +1404,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |
17692 | irqd_set(&desc->irq_data, IRQD_NO_BALANCING); | |
17693 | } | |
17694 | ||
17695 | + if (new->flags & IRQF_NO_SOFTIRQ_CALL) | |
17696 | + irq_settings_set_no_softirq_call(desc); | |
17697 | + | |
17698 | /* Set default affinity mask once everything is setup */ | |
17699 | setup_affinity(desc, mask); | |
17700 | ||
17701 | @@ -1968,7 +2060,7 @@ EXPORT_SYMBOL_GPL(irq_get_irqchip_state); | |
17702 | * This call sets the internal irqchip state of an interrupt, | |
17703 | * depending on the value of @which. | |
17704 | * | |
17705 | - * This function should be called with preemption disabled if the | |
17706 | + * This function should be called with migration disabled if the | |
17707 | * interrupt controller has per-cpu registers. | |
17708 | */ | |
17709 | int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which, | |
17710 | diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h | |
17711 | index 320579d89091..2df2d4445b1e 100644 | |
17712 | --- a/kernel/irq/settings.h | |
17713 | +++ b/kernel/irq/settings.h | |
17714 | @@ -16,6 +16,7 @@ enum { | |
17715 | _IRQ_PER_CPU_DEVID = IRQ_PER_CPU_DEVID, | |
17716 | _IRQ_IS_POLLED = IRQ_IS_POLLED, | |
17717 | _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY, | |
17718 | + _IRQ_NO_SOFTIRQ_CALL = IRQ_NO_SOFTIRQ_CALL, | |
17719 | _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK, | |
17720 | }; | |
17721 | ||
17722 | @@ -30,6 +31,7 @@ enum { | |
17723 | #define IRQ_PER_CPU_DEVID GOT_YOU_MORON | |
17724 | #define IRQ_IS_POLLED GOT_YOU_MORON | |
17725 | #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON | |
17726 | +#define IRQ_NO_SOFTIRQ_CALL GOT_YOU_MORON | |
17727 | #undef IRQF_MODIFY_MASK | |
17728 | #define IRQF_MODIFY_MASK GOT_YOU_MORON | |
17729 | ||
17730 | @@ -40,6 +42,16 @@ irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set) | |
17731 | desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK); | |
17732 | } | |
17733 | ||
17734 | +static inline bool irq_settings_no_softirq_call(struct irq_desc *desc) | |
17735 | +{ | |
17736 | + return desc->status_use_accessors & _IRQ_NO_SOFTIRQ_CALL; | |
17737 | +} | |
17738 | + | |
17739 | +static inline void irq_settings_set_no_softirq_call(struct irq_desc *desc) | |
17740 | +{ | |
17741 | + desc->status_use_accessors |= _IRQ_NO_SOFTIRQ_CALL; | |
17742 | +} | |
17743 | + | |
17744 | static inline bool irq_settings_is_per_cpu(struct irq_desc *desc) | |
17745 | { | |
17746 | return desc->status_use_accessors & _IRQ_PER_CPU; | |
17747 | diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c | |
17748 | index 32144175458d..ed26f2554972 100644 | |
17749 | --- a/kernel/irq/spurious.c | |
17750 | +++ b/kernel/irq/spurious.c | |
17751 | @@ -444,6 +444,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); | |
17752 | ||
17753 | static int __init irqfixup_setup(char *str) | |
17754 | { | |
17755 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17756 | + pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
17757 | + return 1; | |
17758 | +#endif | |
17759 | irqfixup = 1; | |
17760 | printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); | |
17761 | printk(KERN_WARNING "This may impact system performance.\n"); | |
17762 | @@ -456,6 +460,10 @@ module_param(irqfixup, int, 0644); | |
17763 | ||
17764 | static int __init irqpoll_setup(char *str) | |
17765 | { | |
17766 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
17767 | + pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT_BASE\n"); | |
17768 | + return 1; | |
17769 | +#endif | |
17770 | irqfixup = 2; | |
17771 | printk(KERN_WARNING "Misrouted IRQ fixup and polling support " | |
17772 | "enabled\n"); | |
17773 | diff --git a/kernel/irq_work.c b/kernel/irq_work.c | |
17774 | index bcf107ce0854..2899ba0d23d1 100644 | |
17775 | --- a/kernel/irq_work.c | |
17776 | +++ b/kernel/irq_work.c | |
17777 | @@ -17,6 +17,7 @@ | |
17778 | #include <linux/cpu.h> | |
17779 | #include <linux/notifier.h> | |
17780 | #include <linux/smp.h> | |
17781 | +#include <linux/interrupt.h> | |
17782 | #include <asm/processor.h> | |
17783 | ||
17784 | ||
17785 | @@ -65,6 +66,8 @@ void __weak arch_irq_work_raise(void) | |
17786 | */ | |
17787 | bool irq_work_queue_on(struct irq_work *work, int cpu) | |
17788 | { | |
17789 | + struct llist_head *list; | |
17790 | + | |
17791 | /* All work should have been flushed before going offline */ | |
17792 | WARN_ON_ONCE(cpu_is_offline(cpu)); | |
17793 | ||
17794 | @@ -75,7 +78,12 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) | |
17795 | if (!irq_work_claim(work)) | |
17796 | return false; | |
17797 | ||
17798 | - if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) | |
17799 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL) && !(work->flags & IRQ_WORK_HARD_IRQ)) | |
17800 | + list = &per_cpu(lazy_list, cpu); | |
17801 | + else | |
17802 | + list = &per_cpu(raised_list, cpu); | |
17803 | + | |
17804 | + if (llist_add(&work->llnode, list)) | |
17805 | arch_send_call_function_single_ipi(cpu); | |
17806 | ||
17807 | return true; | |
17808 | @@ -86,6 +94,9 @@ EXPORT_SYMBOL_GPL(irq_work_queue_on); | |
17809 | /* Enqueue the irq work @work on the current CPU */ | |
17810 | bool irq_work_queue(struct irq_work *work) | |
17811 | { | |
17812 | + struct llist_head *list; | |
17813 | + bool lazy_work, realtime = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); | |
17814 | + | |
17815 | /* Only queue if not already pending */ | |
17816 | if (!irq_work_claim(work)) | |
17817 | return false; | |
17818 | @@ -93,13 +104,15 @@ bool irq_work_queue(struct irq_work *work) | |
17819 | /* Queue the entry and raise the IPI if needed. */ | |
17820 | preempt_disable(); | |
17821 | ||
17822 | - /* If the work is "lazy", handle it from next tick if any */ | |
17823 | - if (work->flags & IRQ_WORK_LAZY) { | |
17824 | - if (llist_add(&work->llnode, this_cpu_ptr(&lazy_list)) && | |
17825 | - tick_nohz_tick_stopped()) | |
17826 | - arch_irq_work_raise(); | |
17827 | - } else { | |
17828 | - if (llist_add(&work->llnode, this_cpu_ptr(&raised_list))) | |
17829 | + lazy_work = work->flags & IRQ_WORK_LAZY; | |
17830 | + | |
17831 | + if (lazy_work || (realtime && !(work->flags & IRQ_WORK_HARD_IRQ))) | |
17832 | + list = this_cpu_ptr(&lazy_list); | |
17833 | + else | |
17834 | + list = this_cpu_ptr(&raised_list); | |
17835 | + | |
17836 | + if (llist_add(&work->llnode, list)) { | |
17837 | + if (!lazy_work || tick_nohz_tick_stopped()) | |
17838 | arch_irq_work_raise(); | |
17839 | } | |
17840 | ||
17841 | @@ -116,9 +129,8 @@ bool irq_work_needs_cpu(void) | |
17842 | raised = this_cpu_ptr(&raised_list); | |
17843 | lazy = this_cpu_ptr(&lazy_list); | |
17844 | ||
17845 | - if (llist_empty(raised) || arch_irq_work_has_interrupt()) | |
17846 | - if (llist_empty(lazy)) | |
17847 | - return false; | |
17848 | + if (llist_empty(raised) && llist_empty(lazy)) | |
17849 | + return false; | |
17850 | ||
17851 | /* All work should have been flushed before going offline */ | |
17852 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | |
17853 | @@ -132,7 +144,7 @@ static void irq_work_run_list(struct llist_head *list) | |
17854 | struct irq_work *work; | |
17855 | struct llist_node *llnode; | |
17856 | ||
17857 | - BUG_ON(!irqs_disabled()); | |
17858 | + BUG_ON_NONRT(!irqs_disabled()); | |
17859 | ||
17860 | if (llist_empty(list)) | |
17861 | return; | |
17862 | @@ -169,7 +181,16 @@ static void irq_work_run_list(struct llist_head *list) | |
17863 | void irq_work_run(void) | |
17864 | { | |
17865 | irq_work_run_list(this_cpu_ptr(&raised_list)); | |
17866 | - irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
17867 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) { | |
17868 | + /* | |
17869 | + * NOTE: we raise softirq via IPI for safety, | |
17870 | + * and execute in irq_work_tick() to move the | |
17871 | + * overhead from hard to soft irq context. | |
17872 | + */ | |
17873 | + if (!llist_empty(this_cpu_ptr(&lazy_list))) | |
17874 | + raise_softirq(TIMER_SOFTIRQ); | |
17875 | + } else | |
17876 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
17877 | } | |
17878 | EXPORT_SYMBOL_GPL(irq_work_run); | |
17879 | ||
17880 | @@ -179,8 +200,17 @@ void irq_work_tick(void) | |
17881 | ||
17882 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) | |
17883 | irq_work_run_list(raised); | |
17884 | + | |
17885 | + if (!IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) | |
17886 | + irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
17887 | +} | |
17888 | + | |
17889 | +#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) | |
17890 | +void irq_work_tick_soft(void) | |
17891 | +{ | |
17892 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
17893 | } | |
17894 | +#endif | |
17895 | ||
17896 | /* | |
17897 | * Synchronize against the irq_work @entry, ensures the entry is not | |
17898 | diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c | |
17899 | index e83b26464061..c0e08d1cf33e 100644 | |
17900 | --- a/kernel/ksysfs.c | |
17901 | +++ b/kernel/ksysfs.c | |
17902 | @@ -136,6 +136,15 @@ KERNEL_ATTR_RO(vmcoreinfo); | |
17903 | ||
17904 | #endif /* CONFIG_KEXEC_CORE */ | |
17905 | ||
17906 | +#if defined(CONFIG_PREEMPT_RT_FULL) | |
17907 | +static ssize_t realtime_show(struct kobject *kobj, | |
17908 | + struct kobj_attribute *attr, char *buf) | |
17909 | +{ | |
17910 | + return sprintf(buf, "%d\n", 1); | |
17911 | +} | |
17912 | +KERNEL_ATTR_RO(realtime); | |
17913 | +#endif | |
17914 | + | |
17915 | /* whether file capabilities are enabled */ | |
17916 | static ssize_t fscaps_show(struct kobject *kobj, | |
17917 | struct kobj_attribute *attr, char *buf) | |
17918 | @@ -203,6 +212,9 @@ static struct attribute * kernel_attrs[] = { | |
17919 | &vmcoreinfo_attr.attr, | |
17920 | #endif | |
17921 | &rcu_expedited_attr.attr, | |
17922 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17923 | + &realtime_attr.attr, | |
17924 | +#endif | |
17925 | NULL | |
17926 | }; | |
17927 | ||
17928 | diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile | |
17929 | index 8e96f6cc2a4a..447b03082d88 100644 | |
17930 | --- a/kernel/locking/Makefile | |
17931 | +++ b/kernel/locking/Makefile | |
17932 | @@ -1,5 +1,5 @@ | |
17933 | ||
17934 | -obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o | |
17935 | +obj-y += semaphore.o percpu-rwsem.o | |
17936 | ||
17937 | ifdef CONFIG_FUNCTION_TRACER | |
17938 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | |
17939 | @@ -8,7 +8,11 @@ CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) | |
17940 | CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) | |
17941 | endif | |
17942 | ||
17943 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
17944 | +obj-y += mutex.o | |
17945 | obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o | |
17946 | +obj-y += rwsem.o | |
17947 | +endif | |
17948 | obj-$(CONFIG_LOCKDEP) += lockdep.o | |
17949 | ifeq ($(CONFIG_PROC_FS),y) | |
17950 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | |
17951 | @@ -22,7 +26,10 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |
17952 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | |
17953 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | |
17954 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |
17955 | +ifneq ($(CONFIG_PREEMPT_RT_FULL),y) | |
17956 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | |
17957 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | |
17958 | +endif | |
17959 | +obj-$(CONFIG_PREEMPT_RT_FULL) += rt.o | |
17960 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o | |
17961 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | |
17962 | diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c | |
17963 | index 951cfcd10b4a..57e0ea72c28a 100644 | |
17964 | --- a/kernel/locking/lglock.c | |
17965 | +++ b/kernel/locking/lglock.c | |
17966 | @@ -4,6 +4,15 @@ | |
17967 | #include <linux/cpu.h> | |
17968 | #include <linux/string.h> | |
17969 | ||
17970 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
17971 | +# define lg_lock_ptr arch_spinlock_t | |
17972 | +# define lg_do_lock(l) arch_spin_lock(l) | |
17973 | +# define lg_do_unlock(l) arch_spin_unlock(l) | |
17974 | +#else | |
17975 | +# define lg_lock_ptr struct rt_mutex | |
17976 | +# define lg_do_lock(l) __rt_spin_lock__no_mg(l) | |
17977 | +# define lg_do_unlock(l) __rt_spin_unlock(l) | |
17978 | +#endif | |
17979 | /* | |
17980 | * Note there is no uninit, so lglocks cannot be defined in | |
17981 | * modules (but it's fine to use them from there) | |
17982 | @@ -12,51 +21,60 @@ | |
17983 | ||
17984 | void lg_lock_init(struct lglock *lg, char *name) | |
17985 | { | |
17986 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
17987 | + int i; | |
17988 | + | |
17989 | + for_each_possible_cpu(i) { | |
17990 | + struct rt_mutex *lock = per_cpu_ptr(lg->lock, i); | |
17991 | + | |
17992 | + rt_mutex_init(lock); | |
17993 | + } | |
17994 | +#endif | |
17995 | LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); | |
17996 | } | |
17997 | EXPORT_SYMBOL(lg_lock_init); | |
17998 | ||
17999 | void lg_local_lock(struct lglock *lg) | |
18000 | { | |
18001 | - arch_spinlock_t *lock; | |
18002 | + lg_lock_ptr *lock; | |
18003 | ||
18004 | - preempt_disable(); | |
18005 | + migrate_disable(); | |
18006 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
18007 | lock = this_cpu_ptr(lg->lock); | |
18008 | - arch_spin_lock(lock); | |
18009 | + lg_do_lock(lock); | |
18010 | } | |
18011 | EXPORT_SYMBOL(lg_local_lock); | |
18012 | ||
18013 | void lg_local_unlock(struct lglock *lg) | |
18014 | { | |
18015 | - arch_spinlock_t *lock; | |
18016 | + lg_lock_ptr *lock; | |
18017 | ||
18018 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
18019 | lock = this_cpu_ptr(lg->lock); | |
18020 | - arch_spin_unlock(lock); | |
18021 | - preempt_enable(); | |
18022 | + lg_do_unlock(lock); | |
18023 | + migrate_enable(); | |
18024 | } | |
18025 | EXPORT_SYMBOL(lg_local_unlock); | |
18026 | ||
18027 | void lg_local_lock_cpu(struct lglock *lg, int cpu) | |
18028 | { | |
18029 | - arch_spinlock_t *lock; | |
18030 | + lg_lock_ptr *lock; | |
18031 | ||
18032 | - preempt_disable(); | |
18033 | + preempt_disable_nort(); | |
18034 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
18035 | lock = per_cpu_ptr(lg->lock, cpu); | |
18036 | - arch_spin_lock(lock); | |
18037 | + lg_do_lock(lock); | |
18038 | } | |
18039 | EXPORT_SYMBOL(lg_local_lock_cpu); | |
18040 | ||
18041 | void lg_local_unlock_cpu(struct lglock *lg, int cpu) | |
18042 | { | |
18043 | - arch_spinlock_t *lock; | |
18044 | + lg_lock_ptr *lock; | |
18045 | ||
18046 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
18047 | lock = per_cpu_ptr(lg->lock, cpu); | |
18048 | - arch_spin_unlock(lock); | |
18049 | - preempt_enable(); | |
18050 | + lg_do_unlock(lock); | |
18051 | + preempt_enable_nort(); | |
18052 | } | |
18053 | EXPORT_SYMBOL(lg_local_unlock_cpu); | |
18054 | ||
18055 | @@ -68,30 +86,30 @@ void lg_double_lock(struct lglock *lg, int cpu1, int cpu2) | |
18056 | if (cpu2 < cpu1) | |
18057 | swap(cpu1, cpu2); | |
18058 | ||
18059 | - preempt_disable(); | |
18060 | + preempt_disable_nort(); | |
18061 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
18062 | - arch_spin_lock(per_cpu_ptr(lg->lock, cpu1)); | |
18063 | - arch_spin_lock(per_cpu_ptr(lg->lock, cpu2)); | |
18064 | + lg_do_lock(per_cpu_ptr(lg->lock, cpu1)); | |
18065 | + lg_do_lock(per_cpu_ptr(lg->lock, cpu2)); | |
18066 | } | |
18067 | ||
18068 | void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2) | |
18069 | { | |
18070 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
18071 | - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1)); | |
18072 | - arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2)); | |
18073 | - preempt_enable(); | |
18074 | + lg_do_unlock(per_cpu_ptr(lg->lock, cpu1)); | |
18075 | + lg_do_unlock(per_cpu_ptr(lg->lock, cpu2)); | |
18076 | + preempt_enable_nort(); | |
18077 | } | |
18078 | ||
18079 | void lg_global_lock(struct lglock *lg) | |
18080 | { | |
18081 | int i; | |
18082 | ||
18083 | - preempt_disable(); | |
18084 | + preempt_disable_nort(); | |
18085 | lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
18086 | for_each_possible_cpu(i) { | |
18087 | - arch_spinlock_t *lock; | |
18088 | + lg_lock_ptr *lock; | |
18089 | lock = per_cpu_ptr(lg->lock, i); | |
18090 | - arch_spin_lock(lock); | |
18091 | + lg_do_lock(lock); | |
18092 | } | |
18093 | } | |
18094 | EXPORT_SYMBOL(lg_global_lock); | |
18095 | @@ -102,10 +120,35 @@ void lg_global_unlock(struct lglock *lg) | |
18096 | ||
18097 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | |
18098 | for_each_possible_cpu(i) { | |
18099 | - arch_spinlock_t *lock; | |
18100 | + lg_lock_ptr *lock; | |
18101 | lock = per_cpu_ptr(lg->lock, i); | |
18102 | - arch_spin_unlock(lock); | |
18103 | + lg_do_unlock(lock); | |
18104 | } | |
18105 | - preempt_enable(); | |
18106 | + preempt_enable_nort(); | |
18107 | } | |
18108 | EXPORT_SYMBOL(lg_global_unlock); | |
18109 | + | |
18110 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18111 | +/* | |
18112 | + * HACK: If you use this, you get to keep the pieces. | |
18113 | + * Used in queue_stop_cpus_work() when stop machinery | |
18114 | + * is called from inactive CPU, so we can't schedule. | |
18115 | + */ | |
18116 | +# define lg_do_trylock_relax(l) \ | |
18117 | + do { \ | |
18118 | + while (!__rt_spin_trylock(l)) \ | |
18119 | + cpu_relax(); \ | |
18120 | + } while (0) | |
18121 | + | |
18122 | +void lg_global_trylock_relax(struct lglock *lg) | |
18123 | +{ | |
18124 | + int i; | |
18125 | + | |
18126 | + lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | |
18127 | + for_each_possible_cpu(i) { | |
18128 | + lg_lock_ptr *lock; | |
18129 | + lock = per_cpu_ptr(lg->lock, i); | |
18130 | + lg_do_trylock_relax(lock); | |
18131 | + } | |
18132 | +} | |
18133 | +#endif | |
18134 | diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c | |
18135 | index 60ace56618f6..e98ee958a353 100644 | |
18136 | --- a/kernel/locking/lockdep.c | |
18137 | +++ b/kernel/locking/lockdep.c | |
18138 | @@ -3525,6 +3525,7 @@ static void check_flags(unsigned long flags) | |
18139 | } | |
18140 | } | |
18141 | ||
18142 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
18143 | /* | |
18144 | * We dont accurately track softirq state in e.g. | |
18145 | * hardirq contexts (such as on 4KSTACKS), so only | |
18146 | @@ -3539,6 +3540,7 @@ static void check_flags(unsigned long flags) | |
18147 | DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled); | |
18148 | } | |
18149 | } | |
18150 | +#endif | |
18151 | ||
18152 | if (!debug_locks) | |
18153 | print_irqtrace_events(current); | |
18154 | diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c | |
18155 | index 8ef1919d63b2..291fc19e28e0 100644 | |
18156 | --- a/kernel/locking/locktorture.c | |
18157 | +++ b/kernel/locking/locktorture.c | |
18158 | @@ -26,7 +26,6 @@ | |
18159 | #include <linux/kthread.h> | |
18160 | #include <linux/sched/rt.h> | |
18161 | #include <linux/spinlock.h> | |
18162 | -#include <linux/rwlock.h> | |
18163 | #include <linux/mutex.h> | |
18164 | #include <linux/rwsem.h> | |
18165 | #include <linux/smp.h> | |
18166 | diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c | |
18167 | new file mode 100644 | |
18168 | index 000000000000..d4ab61c1848b | |
18169 | --- /dev/null | |
18170 | +++ b/kernel/locking/rt.c | |
18171 | @@ -0,0 +1,474 @@ | |
18172 | +/* | |
18173 | + * kernel/rt.c | |
18174 | + * | |
18175 | + * Real-Time Preemption Support | |
18176 | + * | |
18177 | + * started by Ingo Molnar: | |
18178 | + * | |
18179 | + * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | |
18180 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
18181 | + * | |
18182 | + * historic credit for proving that Linux spinlocks can be implemented via | |
18183 | + * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow | |
18184 | + * and others) who prototyped it on 2.4 and did lots of comparative | |
18185 | + * research and analysis; TimeSys, for proving that you can implement a | |
18186 | + * fully preemptible kernel via the use of IRQ threading and mutexes; | |
18187 | + * Bill Huey for persuasively arguing on lkml that the mutex model is the | |
18188 | + * right one; and to MontaVista, who ported pmutexes to 2.6. | |
18189 | + * | |
18190 | + * This code is a from-scratch implementation and is not based on pmutexes, | |
18191 | + * but the idea of converting spinlocks to mutexes is used here too. | |
18192 | + * | |
18193 | + * lock debugging, locking tree, deadlock detection: | |
18194 | + * | |
18195 | + * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey | |
18196 | + * Released under the General Public License (GPL). | |
18197 | + * | |
18198 | + * Includes portions of the generic R/W semaphore implementation from: | |
18199 | + * | |
18200 | + * Copyright (c) 2001 David Howells (dhowells@redhat.com). | |
18201 | + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | |
18202 | + * - Derived also from comments by Linus | |
18203 | + * | |
18204 | + * Pending ownership of locks and ownership stealing: | |
18205 | + * | |
18206 | + * Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt | |
18207 | + * | |
18208 | + * (also by Steven Rostedt) | |
18209 | + * - Converted single pi_lock to individual task locks. | |
18210 | + * | |
18211 | + * By Esben Nielsen: | |
18212 | + * Doing priority inheritance with help of the scheduler. | |
18213 | + * | |
18214 | + * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
18215 | + * - major rework based on Esben Nielsens initial patch | |
18216 | + * - replaced thread_info references by task_struct refs | |
18217 | + * - removed task->pending_owner dependency | |
18218 | + * - BKL drop/reacquire for semaphore style locks to avoid deadlocks | |
18219 | + * in the scheduler return path as discussed with Steven Rostedt | |
18220 | + * | |
18221 | + * Copyright (C) 2006, Kihon Technologies Inc. | |
18222 | + * Steven Rostedt <rostedt@goodmis.org> | |
18223 | + * - debugged and patched Thomas Gleixner's rework. | |
18224 | + * - added back the cmpxchg to the rework. | |
18225 | + * - turned atomic require back on for SMP. | |
18226 | + */ | |
18227 | + | |
18228 | +#include <linux/spinlock.h> | |
18229 | +#include <linux/rtmutex.h> | |
18230 | +#include <linux/sched.h> | |
18231 | +#include <linux/delay.h> | |
18232 | +#include <linux/module.h> | |
18233 | +#include <linux/kallsyms.h> | |
18234 | +#include <linux/syscalls.h> | |
18235 | +#include <linux/interrupt.h> | |
18236 | +#include <linux/plist.h> | |
18237 | +#include <linux/fs.h> | |
18238 | +#include <linux/futex.h> | |
18239 | +#include <linux/hrtimer.h> | |
18240 | + | |
18241 | +#include "rtmutex_common.h" | |
18242 | + | |
18243 | +/* | |
18244 | + * struct mutex functions | |
18245 | + */ | |
18246 | +void __mutex_do_init(struct mutex *mutex, const char *name, | |
18247 | + struct lock_class_key *key) | |
18248 | +{ | |
18249 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
18250 | + /* | |
18251 | + * Make sure we are not reinitializing a held lock: | |
18252 | + */ | |
18253 | + debug_check_no_locks_freed((void *)mutex, sizeof(*mutex)); | |
18254 | + lockdep_init_map(&mutex->dep_map, name, key, 0); | |
18255 | +#endif | |
18256 | + mutex->lock.save_state = 0; | |
18257 | +} | |
18258 | +EXPORT_SYMBOL(__mutex_do_init); | |
18259 | + | |
18260 | +void __lockfunc _mutex_lock(struct mutex *lock) | |
18261 | +{ | |
18262 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
18263 | + rt_mutex_lock(&lock->lock); | |
18264 | +} | |
18265 | +EXPORT_SYMBOL(_mutex_lock); | |
18266 | + | |
18267 | +int __lockfunc _mutex_lock_interruptible(struct mutex *lock) | |
18268 | +{ | |
18269 | + int ret; | |
18270 | + | |
18271 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
18272 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
18273 | + if (ret) | |
18274 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18275 | + return ret; | |
18276 | +} | |
18277 | +EXPORT_SYMBOL(_mutex_lock_interruptible); | |
18278 | + | |
18279 | +int __lockfunc _mutex_lock_killable(struct mutex *lock) | |
18280 | +{ | |
18281 | + int ret; | |
18282 | + | |
18283 | + mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
18284 | + ret = rt_mutex_lock_killable(&lock->lock); | |
18285 | + if (ret) | |
18286 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18287 | + return ret; | |
18288 | +} | |
18289 | +EXPORT_SYMBOL(_mutex_lock_killable); | |
18290 | + | |
18291 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
18292 | +void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass) | |
18293 | +{ | |
18294 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
18295 | + rt_mutex_lock(&lock->lock); | |
18296 | +} | |
18297 | +EXPORT_SYMBOL(_mutex_lock_nested); | |
18298 | + | |
18299 | +void __lockfunc _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) | |
18300 | +{ | |
18301 | + mutex_acquire_nest(&lock->dep_map, 0, 0, nest, _RET_IP_); | |
18302 | + rt_mutex_lock(&lock->lock); | |
18303 | +} | |
18304 | +EXPORT_SYMBOL(_mutex_lock_nest_lock); | |
18305 | + | |
18306 | +int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass) | |
18307 | +{ | |
18308 | + int ret; | |
18309 | + | |
18310 | + mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); | |
18311 | + ret = rt_mutex_lock_interruptible(&lock->lock); | |
18312 | + if (ret) | |
18313 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18314 | + return ret; | |
18315 | +} | |
18316 | +EXPORT_SYMBOL(_mutex_lock_interruptible_nested); | |
18317 | + | |
18318 | +int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) | |
18319 | +{ | |
18320 | + int ret; | |
18321 | + | |
18322 | + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
18323 | + ret = rt_mutex_lock_killable(&lock->lock); | |
18324 | + if (ret) | |
18325 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18326 | + return ret; | |
18327 | +} | |
18328 | +EXPORT_SYMBOL(_mutex_lock_killable_nested); | |
18329 | +#endif | |
18330 | + | |
18331 | +int __lockfunc _mutex_trylock(struct mutex *lock) | |
18332 | +{ | |
18333 | + int ret = rt_mutex_trylock(&lock->lock); | |
18334 | + | |
18335 | + if (ret) | |
18336 | + mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
18337 | + | |
18338 | + return ret; | |
18339 | +} | |
18340 | +EXPORT_SYMBOL(_mutex_trylock); | |
18341 | + | |
18342 | +void __lockfunc _mutex_unlock(struct mutex *lock) | |
18343 | +{ | |
18344 | + mutex_release(&lock->dep_map, 1, _RET_IP_); | |
18345 | + rt_mutex_unlock(&lock->lock); | |
18346 | +} | |
18347 | +EXPORT_SYMBOL(_mutex_unlock); | |
18348 | + | |
18349 | +/* | |
18350 | + * rwlock_t functions | |
18351 | + */ | |
18352 | +int __lockfunc rt_write_trylock(rwlock_t *rwlock) | |
18353 | +{ | |
18354 | + int ret; | |
18355 | + | |
18356 | + migrate_disable(); | |
18357 | + ret = rt_mutex_trylock(&rwlock->lock); | |
18358 | + if (ret) | |
18359 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
18360 | + else | |
18361 | + migrate_enable(); | |
18362 | + | |
18363 | + return ret; | |
18364 | +} | |
18365 | +EXPORT_SYMBOL(rt_write_trylock); | |
18366 | + | |
18367 | +int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) | |
18368 | +{ | |
18369 | + int ret; | |
18370 | + | |
18371 | + *flags = 0; | |
18372 | + ret = rt_write_trylock(rwlock); | |
18373 | + return ret; | |
18374 | +} | |
18375 | +EXPORT_SYMBOL(rt_write_trylock_irqsave); | |
18376 | + | |
18377 | +int __lockfunc rt_read_trylock(rwlock_t *rwlock) | |
18378 | +{ | |
18379 | + struct rt_mutex *lock = &rwlock->lock; | |
18380 | + int ret = 1; | |
18381 | + | |
18382 | + /* | |
18383 | + * recursive read locks succeed when current owns the lock, | |
18384 | + * but not when read_depth == 0 which means that the lock is | |
18385 | + * write locked. | |
18386 | + */ | |
18387 | + if (rt_mutex_owner(lock) != current) { | |
18388 | + migrate_disable(); | |
18389 | + ret = rt_mutex_trylock(lock); | |
18390 | + if (ret) | |
18391 | + rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_); | |
18392 | + else | |
18393 | + migrate_enable(); | |
18394 | + | |
18395 | + } else if (!rwlock->read_depth) { | |
18396 | + ret = 0; | |
18397 | + } | |
18398 | + | |
18399 | + if (ret) | |
18400 | + rwlock->read_depth++; | |
18401 | + | |
18402 | + return ret; | |
18403 | +} | |
18404 | +EXPORT_SYMBOL(rt_read_trylock); | |
18405 | + | |
18406 | +void __lockfunc rt_write_lock(rwlock_t *rwlock) | |
18407 | +{ | |
18408 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
18409 | + __rt_spin_lock(&rwlock->lock); | |
18410 | +} | |
18411 | +EXPORT_SYMBOL(rt_write_lock); | |
18412 | + | |
18413 | +void __lockfunc rt_read_lock(rwlock_t *rwlock) | |
18414 | +{ | |
18415 | + struct rt_mutex *lock = &rwlock->lock; | |
18416 | + | |
18417 | + | |
18418 | + /* | |
18419 | + * recursive read locks succeed when current owns the lock | |
18420 | + */ | |
18421 | + if (rt_mutex_owner(lock) != current) { | |
18422 | + rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); | |
18423 | + __rt_spin_lock(lock); | |
18424 | + } | |
18425 | + rwlock->read_depth++; | |
18426 | +} | |
18427 | + | |
18428 | +EXPORT_SYMBOL(rt_read_lock); | |
18429 | + | |
18430 | +void __lockfunc rt_write_unlock(rwlock_t *rwlock) | |
18431 | +{ | |
18432 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
18433 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
18434 | + __rt_spin_unlock(&rwlock->lock); | |
18435 | + migrate_enable(); | |
18436 | +} | |
18437 | +EXPORT_SYMBOL(rt_write_unlock); | |
18438 | + | |
18439 | +void __lockfunc rt_read_unlock(rwlock_t *rwlock) | |
18440 | +{ | |
18441 | + /* Release the lock only when read_depth is down to 0 */ | |
18442 | + if (--rwlock->read_depth == 0) { | |
18443 | + rwlock_release(&rwlock->dep_map, 1, _RET_IP_); | |
18444 | + __rt_spin_unlock(&rwlock->lock); | |
18445 | + migrate_enable(); | |
18446 | + } | |
18447 | +} | |
18448 | +EXPORT_SYMBOL(rt_read_unlock); | |
18449 | + | |
18450 | +unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock) | |
18451 | +{ | |
18452 | + rt_write_lock(rwlock); | |
18453 | + | |
18454 | + return 0; | |
18455 | +} | |
18456 | +EXPORT_SYMBOL(rt_write_lock_irqsave); | |
18457 | + | |
18458 | +unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock) | |
18459 | +{ | |
18460 | + rt_read_lock(rwlock); | |
18461 | + | |
18462 | + return 0; | |
18463 | +} | |
18464 | +EXPORT_SYMBOL(rt_read_lock_irqsave); | |
18465 | + | |
18466 | +void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key) | |
18467 | +{ | |
18468 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
18469 | + /* | |
18470 | + * Make sure we are not reinitializing a held lock: | |
18471 | + */ | |
18472 | + debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock)); | |
18473 | + lockdep_init_map(&rwlock->dep_map, name, key, 0); | |
18474 | +#endif | |
18475 | + rwlock->lock.save_state = 1; | |
18476 | + rwlock->read_depth = 0; | |
18477 | +} | |
18478 | +EXPORT_SYMBOL(__rt_rwlock_init); | |
18479 | + | |
18480 | +/* | |
18481 | + * rw_semaphores | |
18482 | + */ | |
18483 | + | |
18484 | +void rt_up_write(struct rw_semaphore *rwsem) | |
18485 | +{ | |
18486 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
18487 | + rt_mutex_unlock(&rwsem->lock); | |
18488 | +} | |
18489 | +EXPORT_SYMBOL(rt_up_write); | |
18490 | + | |
18491 | +void __rt_up_read(struct rw_semaphore *rwsem) | |
18492 | +{ | |
18493 | + if (--rwsem->read_depth == 0) | |
18494 | + rt_mutex_unlock(&rwsem->lock); | |
18495 | +} | |
18496 | + | |
18497 | +void rt_up_read(struct rw_semaphore *rwsem) | |
18498 | +{ | |
18499 | + rwsem_release(&rwsem->dep_map, 1, _RET_IP_); | |
18500 | + __rt_up_read(rwsem); | |
18501 | +} | |
18502 | +EXPORT_SYMBOL(rt_up_read); | |
18503 | + | |
18504 | +/* | |
18505 | + * downgrade a write lock into a read lock | |
18506 | + * - just wake up any readers at the front of the queue | |
18507 | + */ | |
18508 | +void rt_downgrade_write(struct rw_semaphore *rwsem) | |
18509 | +{ | |
18510 | + BUG_ON(rt_mutex_owner(&rwsem->lock) != current); | |
18511 | + rwsem->read_depth = 1; | |
18512 | +} | |
18513 | +EXPORT_SYMBOL(rt_downgrade_write); | |
18514 | + | |
18515 | +int rt_down_write_trylock(struct rw_semaphore *rwsem) | |
18516 | +{ | |
18517 | + int ret = rt_mutex_trylock(&rwsem->lock); | |
18518 | + | |
18519 | + if (ret) | |
18520 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
18521 | + return ret; | |
18522 | +} | |
18523 | +EXPORT_SYMBOL(rt_down_write_trylock); | |
18524 | + | |
18525 | +void rt_down_write(struct rw_semaphore *rwsem) | |
18526 | +{ | |
18527 | + rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_); | |
18528 | + rt_mutex_lock(&rwsem->lock); | |
18529 | +} | |
18530 | +EXPORT_SYMBOL(rt_down_write); | |
18531 | + | |
18532 | +void rt_down_write_nested(struct rw_semaphore *rwsem, int subclass) | |
18533 | +{ | |
18534 | + rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
18535 | + rt_mutex_lock(&rwsem->lock); | |
18536 | +} | |
18537 | +EXPORT_SYMBOL(rt_down_write_nested); | |
18538 | + | |
18539 | +void rt_down_write_nested_lock(struct rw_semaphore *rwsem, | |
18540 | + struct lockdep_map *nest) | |
18541 | +{ | |
18542 | + rwsem_acquire_nest(&rwsem->dep_map, 0, 0, nest, _RET_IP_); | |
18543 | + rt_mutex_lock(&rwsem->lock); | |
18544 | +} | |
18545 | +EXPORT_SYMBOL(rt_down_write_nested_lock); | |
18546 | + | |
18547 | +int rt__down_read_trylock(struct rw_semaphore *rwsem) | |
18548 | +{ | |
18549 | + struct rt_mutex *lock = &rwsem->lock; | |
18550 | + int ret = 1; | |
18551 | + | |
18552 | + /* | |
18553 | + * recursive read locks succeed when current owns the rwsem, | |
18554 | + * but not when read_depth == 0 which means that the rwsem is | |
18555 | + * write locked. | |
18556 | + */ | |
18557 | + if (rt_mutex_owner(lock) != current) | |
18558 | + ret = rt_mutex_trylock(&rwsem->lock); | |
18559 | + else if (!rwsem->read_depth) | |
18560 | + ret = 0; | |
18561 | + | |
18562 | + if (ret) | |
18563 | + rwsem->read_depth++; | |
18564 | + return ret; | |
18565 | + | |
18566 | +} | |
18567 | + | |
18568 | +int rt_down_read_trylock(struct rw_semaphore *rwsem) | |
18569 | +{ | |
18570 | + int ret; | |
18571 | + | |
18572 | + ret = rt__down_read_trylock(rwsem); | |
18573 | + if (ret) | |
18574 | + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); | |
18575 | + | |
18576 | + return ret; | |
18577 | +} | |
18578 | +EXPORT_SYMBOL(rt_down_read_trylock); | |
18579 | + | |
18580 | +void rt__down_read(struct rw_semaphore *rwsem) | |
18581 | +{ | |
18582 | + struct rt_mutex *lock = &rwsem->lock; | |
18583 | + | |
18584 | + if (rt_mutex_owner(lock) != current) | |
18585 | + rt_mutex_lock(&rwsem->lock); | |
18586 | + rwsem->read_depth++; | |
18587 | +} | |
18588 | +EXPORT_SYMBOL(rt__down_read); | |
18589 | + | |
18590 | +static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) | |
18591 | +{ | |
18592 | + rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_); | |
18593 | + rt__down_read(rwsem); | |
18594 | +} | |
18595 | + | |
18596 | +void rt_down_read(struct rw_semaphore *rwsem) | |
18597 | +{ | |
18598 | + __rt_down_read(rwsem, 0); | |
18599 | +} | |
18600 | +EXPORT_SYMBOL(rt_down_read); | |
18601 | + | |
18602 | +void rt_down_read_nested(struct rw_semaphore *rwsem, int subclass) | |
18603 | +{ | |
18604 | + __rt_down_read(rwsem, subclass); | |
18605 | +} | |
18606 | +EXPORT_SYMBOL(rt_down_read_nested); | |
18607 | + | |
18608 | +void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, | |
18609 | + struct lock_class_key *key) | |
18610 | +{ | |
18611 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
18612 | + /* | |
18613 | + * Make sure we are not reinitializing a held lock: | |
18614 | + */ | |
18615 | + debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); | |
18616 | + lockdep_init_map(&rwsem->dep_map, name, key, 0); | |
18617 | +#endif | |
18618 | + rwsem->read_depth = 0; | |
18619 | + rwsem->lock.save_state = 0; | |
18620 | +} | |
18621 | +EXPORT_SYMBOL(__rt_rwsem_init); | |
18622 | + | |
18623 | +/** | |
18624 | + * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 | |
18625 | + * @cnt: the atomic which we are to dec | |
18626 | + * @lock: the mutex to return holding if we dec to 0 | |
18627 | + * | |
18628 | + * return true and hold lock if we dec to 0, return false otherwise | |
18629 | + */ | |
18630 | +int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock) | |
18631 | +{ | |
18632 | + /* dec if we can't possibly hit 0 */ | |
18633 | + if (atomic_add_unless(cnt, -1, 1)) | |
18634 | + return 0; | |
18635 | + /* we might hit 0, so take the lock */ | |
18636 | + mutex_lock(lock); | |
18637 | + if (!atomic_dec_and_test(cnt)) { | |
18638 | + /* when we actually did the dec, we didn't hit 0 */ | |
18639 | + mutex_unlock(lock); | |
18640 | + return 0; | |
18641 | + } | |
18642 | + /* we hit 0, and we hold the lock */ | |
18643 | + return 1; | |
18644 | +} | |
18645 | +EXPORT_SYMBOL(atomic_dec_and_mutex_lock); | |
18646 | diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c | |
cb95d48a | 18647 | index 8251e75dd9c0..6759a798c927 100644 |
b4de310e JK |
18648 | --- a/kernel/locking/rtmutex.c |
18649 | +++ b/kernel/locking/rtmutex.c | |
18650 | @@ -7,6 +7,11 @@ | |
18651 | * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> | |
18652 | * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt | |
18653 | * Copyright (C) 2006 Esben Nielsen | |
18654 | + * Adaptive Spinlocks: | |
18655 | + * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich, | |
18656 | + * and Peter Morreale, | |
18657 | + * Adaptive Spinlocks simplification: | |
18658 | + * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com> | |
18659 | * | |
18660 | * See Documentation/locking/rt-mutex-design.txt for details. | |
18661 | */ | |
18662 | @@ -16,6 +21,7 @@ | |
18663 | #include <linux/sched/rt.h> | |
18664 | #include <linux/sched/deadline.h> | |
18665 | #include <linux/timer.h> | |
18666 | +#include <linux/ww_mutex.h> | |
18667 | ||
18668 | #include "rtmutex_common.h" | |
18669 | ||
18670 | @@ -69,6 +75,12 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) | |
18671 | clear_rt_mutex_waiters(lock); | |
18672 | } | |
18673 | ||
18674 | +static int rt_mutex_real_waiter(struct rt_mutex_waiter *waiter) | |
18675 | +{ | |
18676 | + return waiter && waiter != PI_WAKEUP_INPROGRESS && | |
18677 | + waiter != PI_REQUEUE_INPROGRESS; | |
18678 | +} | |
18679 | + | |
18680 | /* | |
18681 | * We can speed up the acquire/release, if there's no debugging state to be | |
18682 | * set up. | |
18683 | @@ -99,13 +111,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | |
18684 | * 2) Drop lock->wait_lock | |
18685 | * 3) Try to unlock the lock with cmpxchg | |
18686 | */ | |
18687 | -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) | |
18688 | +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, | |
18689 | + unsigned long flags) | |
18690 | __releases(lock->wait_lock) | |
18691 | { | |
18692 | struct task_struct *owner = rt_mutex_owner(lock); | |
18693 | ||
18694 | clear_rt_mutex_waiters(lock); | |
18695 | - raw_spin_unlock(&lock->wait_lock); | |
18696 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
18697 | /* | |
18698 | * If a new waiter comes in between the unlock and the cmpxchg | |
18699 | * we have two situations: | |
18700 | @@ -147,11 +160,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) | |
18701 | /* | |
18702 | * Simple slow path only version: lock->owner is protected by lock->wait_lock. | |
18703 | */ | |
18704 | -static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) | |
18705 | +static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock, | |
18706 | + unsigned long flags) | |
18707 | __releases(lock->wait_lock) | |
18708 | { | |
18709 | lock->owner = NULL; | |
18710 | - raw_spin_unlock(&lock->wait_lock); | |
18711 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
18712 | return true; | |
18713 | } | |
18714 | #endif | |
18715 | @@ -348,6 +362,14 @@ static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, | |
18716 | return debug_rt_mutex_detect_deadlock(waiter, chwalk); | |
18717 | } | |
18718 | ||
18719 | +static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) | |
18720 | +{ | |
18721 | + if (waiter->savestate) | |
18722 | + wake_up_lock_sleeper(waiter->task); | |
18723 | + else | |
18724 | + wake_up_process(waiter->task); | |
18725 | +} | |
18726 | + | |
18727 | /* | |
18728 | * Max number of times we'll walk the boosting chain: | |
18729 | */ | |
18730 | @@ -355,7 +377,8 @@ int max_lock_depth = 1024; | |
18731 | ||
18732 | static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) | |
18733 | { | |
18734 | - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; | |
18735 | + return rt_mutex_real_waiter(p->pi_blocked_on) ? | |
18736 | + p->pi_blocked_on->lock : NULL; | |
18737 | } | |
18738 | ||
18739 | /* | |
18740 | @@ -433,7 +456,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18741 | int ret = 0, depth = 0; | |
18742 | struct rt_mutex *lock; | |
18743 | bool detect_deadlock; | |
18744 | - unsigned long flags; | |
18745 | bool requeue = true; | |
18746 | ||
18747 | detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); | |
18748 | @@ -476,7 +498,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18749 | /* | |
18750 | * [1] Task cannot go away as we did a get_task() before ! | |
18751 | */ | |
18752 | - raw_spin_lock_irqsave(&task->pi_lock, flags); | |
18753 | + raw_spin_lock_irq(&task->pi_lock); | |
18754 | ||
18755 | /* | |
18756 | * [2] Get the waiter on which @task is blocked on. | |
18757 | @@ -492,7 +514,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18758 | * reached or the state of the chain has changed while we | |
18759 | * dropped the locks. | |
18760 | */ | |
18761 | - if (!waiter) | |
18762 | + if (!rt_mutex_real_waiter(waiter)) | |
18763 | goto out_unlock_pi; | |
18764 | ||
18765 | /* | |
18766 | @@ -560,7 +582,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18767 | * operations. | |
18768 | */ | |
18769 | if (!raw_spin_trylock(&lock->wait_lock)) { | |
18770 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18771 | + raw_spin_unlock_irq(&task->pi_lock); | |
18772 | cpu_relax(); | |
18773 | goto retry; | |
18774 | } | |
18775 | @@ -591,7 +613,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18776 | /* | |
18777 | * No requeue[7] here. Just release @task [8] | |
18778 | */ | |
18779 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18780 | + raw_spin_unlock(&task->pi_lock); | |
18781 | put_task_struct(task); | |
18782 | ||
18783 | /* | |
18784 | @@ -599,14 +621,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18785 | * If there is no owner of the lock, end of chain. | |
18786 | */ | |
18787 | if (!rt_mutex_owner(lock)) { | |
18788 | - raw_spin_unlock(&lock->wait_lock); | |
18789 | + raw_spin_unlock_irq(&lock->wait_lock); | |
18790 | return 0; | |
18791 | } | |
18792 | ||
18793 | /* [10] Grab the next task, i.e. owner of @lock */ | |
18794 | task = rt_mutex_owner(lock); | |
18795 | get_task_struct(task); | |
18796 | - raw_spin_lock_irqsave(&task->pi_lock, flags); | |
18797 | + raw_spin_lock(&task->pi_lock); | |
18798 | ||
18799 | /* | |
18800 | * No requeue [11] here. We just do deadlock detection. | |
18801 | @@ -621,8 +643,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18802 | top_waiter = rt_mutex_top_waiter(lock); | |
18803 | ||
18804 | /* [13] Drop locks */ | |
18805 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18806 | - raw_spin_unlock(&lock->wait_lock); | |
18807 | + raw_spin_unlock(&task->pi_lock); | |
18808 | + raw_spin_unlock_irq(&lock->wait_lock); | |
18809 | ||
18810 | /* If owner is not blocked, end of chain. */ | |
18811 | if (!next_lock) | |
18812 | @@ -643,7 +665,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18813 | rt_mutex_enqueue(lock, waiter); | |
18814 | ||
18815 | /* [8] Release the task */ | |
18816 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18817 | + raw_spin_unlock(&task->pi_lock); | |
18818 | put_task_struct(task); | |
18819 | ||
18820 | /* | |
18821 | @@ -654,21 +676,24 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18822 | * follow here. This is the end of the chain we are walking. | |
18823 | */ | |
18824 | if (!rt_mutex_owner(lock)) { | |
18825 | + struct rt_mutex_waiter *lock_top_waiter; | |
18826 | + | |
18827 | /* | |
18828 | * If the requeue [7] above changed the top waiter, | |
18829 | * then we need to wake the new top waiter up to try | |
18830 | * to get the lock. | |
18831 | */ | |
18832 | - if (prerequeue_top_waiter != rt_mutex_top_waiter(lock)) | |
18833 | - wake_up_process(rt_mutex_top_waiter(lock)->task); | |
18834 | - raw_spin_unlock(&lock->wait_lock); | |
18835 | + lock_top_waiter = rt_mutex_top_waiter(lock); | |
18836 | + if (prerequeue_top_waiter != lock_top_waiter) | |
18837 | + rt_mutex_wake_waiter(lock_top_waiter); | |
18838 | + raw_spin_unlock_irq(&lock->wait_lock); | |
18839 | return 0; | |
18840 | } | |
18841 | ||
18842 | /* [10] Grab the next task, i.e. the owner of @lock */ | |
18843 | task = rt_mutex_owner(lock); | |
18844 | get_task_struct(task); | |
18845 | - raw_spin_lock_irqsave(&task->pi_lock, flags); | |
18846 | + raw_spin_lock(&task->pi_lock); | |
18847 | ||
18848 | /* [11] requeue the pi waiters if necessary */ | |
18849 | if (waiter == rt_mutex_top_waiter(lock)) { | |
18850 | @@ -722,8 +747,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18851 | top_waiter = rt_mutex_top_waiter(lock); | |
18852 | ||
18853 | /* [13] Drop the locks */ | |
18854 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18855 | - raw_spin_unlock(&lock->wait_lock); | |
18856 | + raw_spin_unlock(&task->pi_lock); | |
18857 | + raw_spin_unlock_irq(&lock->wait_lock); | |
18858 | ||
18859 | /* | |
18860 | * Make the actual exit decisions [12], based on the stored | |
18861 | @@ -746,28 +771,46 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, | |
18862 | goto again; | |
18863 | ||
18864 | out_unlock_pi: | |
18865 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18866 | + raw_spin_unlock_irq(&task->pi_lock); | |
18867 | out_put_task: | |
18868 | put_task_struct(task); | |
18869 | ||
18870 | return ret; | |
18871 | } | |
18872 | ||
18873 | + | |
18874 | +#define STEAL_NORMAL 0 | |
18875 | +#define STEAL_LATERAL 1 | |
18876 | + | |
18877 | +/* | |
18878 | + * Note that RT tasks are excluded from lateral-steals to prevent the | |
18879 | + * introduction of an unbounded latency | |
18880 | + */ | |
18881 | +static inline int lock_is_stealable(struct task_struct *task, | |
18882 | + struct task_struct *pendowner, int mode) | |
18883 | +{ | |
18884 | + if (mode == STEAL_NORMAL || rt_task(task)) { | |
18885 | + if (task->prio >= pendowner->prio) | |
18886 | + return 0; | |
18887 | + } else if (task->prio > pendowner->prio) | |
18888 | + return 0; | |
18889 | + return 1; | |
18890 | +} | |
18891 | + | |
18892 | /* | |
18893 | * Try to take an rt-mutex | |
18894 | * | |
18895 | - * Must be called with lock->wait_lock held. | |
18896 | + * Must be called with lock->wait_lock held and interrupts disabled | |
18897 | * | |
18898 | * @lock: The lock to be acquired. | |
18899 | * @task: The task which wants to acquire the lock | |
18900 | * @waiter: The waiter that is queued to the lock's wait tree if the | |
18901 | * callsite called task_blocked_on_lock(), otherwise NULL | |
18902 | */ | |
18903 | -static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
18904 | - struct rt_mutex_waiter *waiter) | |
18905 | +static int __try_to_take_rt_mutex(struct rt_mutex *lock, | |
18906 | + struct task_struct *task, | |
18907 | + struct rt_mutex_waiter *waiter, int mode) | |
18908 | { | |
18909 | - unsigned long flags; | |
18910 | - | |
18911 | /* | |
18912 | * Before testing whether we can acquire @lock, we set the | |
18913 | * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all | |
18914 | @@ -803,8 +846,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
18915 | * If waiter is not the highest priority waiter of | |
18916 | * @lock, give up. | |
18917 | */ | |
18918 | - if (waiter != rt_mutex_top_waiter(lock)) | |
18919 | + if (waiter != rt_mutex_top_waiter(lock)) { | |
18920 | + /* XXX lock_is_stealable() ? */ | |
18921 | return 0; | |
18922 | + } | |
18923 | ||
18924 | /* | |
18925 | * We can acquire the lock. Remove the waiter from the | |
18926 | @@ -822,14 +867,10 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
18927 | * not need to be dequeued. | |
18928 | */ | |
18929 | if (rt_mutex_has_waiters(lock)) { | |
18930 | - /* | |
18931 | - * If @task->prio is greater than or equal to | |
18932 | - * the top waiter priority (kernel view), | |
18933 | - * @task lost. | |
18934 | - */ | |
18935 | - if (task->prio >= rt_mutex_top_waiter(lock)->prio) | |
18936 | - return 0; | |
18937 | + struct task_struct *pown = rt_mutex_top_waiter(lock)->task; | |
18938 | ||
18939 | + if (task != pown && !lock_is_stealable(task, pown, mode)) | |
18940 | + return 0; | |
18941 | /* | |
18942 | * The current top waiter stays enqueued. We | |
18943 | * don't have to change anything in the lock | |
18944 | @@ -852,7 +893,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
18945 | * case, but conditionals are more expensive than a redundant | |
18946 | * store. | |
18947 | */ | |
18948 | - raw_spin_lock_irqsave(&task->pi_lock, flags); | |
18949 | + raw_spin_lock(&task->pi_lock); | |
18950 | task->pi_blocked_on = NULL; | |
18951 | /* | |
18952 | * Finish the lock acquisition. @task is the new owner. If | |
18953 | @@ -861,7 +902,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
18954 | */ | |
18955 | if (rt_mutex_has_waiters(lock)) | |
18956 | rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); | |
18957 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
18958 | + raw_spin_unlock(&task->pi_lock); | |
18959 | ||
18960 | takeit: | |
18961 | /* We got the lock. */ | |
cb95d48a | 18962 | @@ -878,12 +919,444 @@ takeit: |
b4de310e JK |
18963 | return 1; |
18964 | } | |
18965 | ||
18966 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
18967 | +/* | |
18968 | + * preemptible spin_lock functions: | |
18969 | + */ | |
18970 | +static inline void rt_spin_lock_fastlock(struct rt_mutex *lock, | |
18971 | + void (*slowfn)(struct rt_mutex *lock, | |
18972 | + bool mg_off), | |
18973 | + bool do_mig_dis) | |
18974 | +{ | |
18975 | + might_sleep_no_state_check(); | |
18976 | + | |
18977 | + if (do_mig_dis) | |
18978 | + migrate_disable(); | |
18979 | + | |
18980 | + if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) | |
18981 | + rt_mutex_deadlock_account_lock(lock, current); | |
18982 | + else | |
18983 | + slowfn(lock, do_mig_dis); | |
18984 | +} | |
18985 | + | |
cb95d48a JK |
18986 | +static inline int rt_spin_lock_fastunlock(struct rt_mutex *lock, |
18987 | + int (*slowfn)(struct rt_mutex *lock)) | |
b4de310e | 18988 | +{ |
cb95d48a | 18989 | + if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { |
b4de310e | 18990 | + rt_mutex_deadlock_account_unlock(current); |
cb95d48a JK |
18991 | + return 0; |
18992 | + } | |
18993 | + return slowfn(lock); | |
b4de310e JK |
18994 | +} |
18995 | +#ifdef CONFIG_SMP | |
18996 | +/* | |
18997 | + * Note that owner is a speculative pointer and dereferencing relies | |
18998 | + * on rcu_read_lock() and the check against the lock owner. | |
18999 | + */ | |
19000 | +static int adaptive_wait(struct rt_mutex *lock, | |
19001 | + struct task_struct *owner) | |
19002 | +{ | |
19003 | + int res = 0; | |
19004 | + | |
19005 | + rcu_read_lock(); | |
19006 | + for (;;) { | |
19007 | + if (owner != rt_mutex_owner(lock)) | |
19008 | + break; | |
19009 | + /* | |
19010 | + * Ensure that owner->on_cpu is dereferenced _after_ | |
19011 | + * checking the above to be valid. | |
19012 | + */ | |
19013 | + barrier(); | |
19014 | + if (!owner->on_cpu) { | |
19015 | + res = 1; | |
19016 | + break; | |
19017 | + } | |
19018 | + cpu_relax(); | |
19019 | + } | |
19020 | + rcu_read_unlock(); | |
19021 | + return res; | |
19022 | +} | |
19023 | +#else | |
19024 | +static int adaptive_wait(struct rt_mutex *lock, | |
19025 | + struct task_struct *orig_owner) | |
19026 | +{ | |
19027 | + return 1; | |
19028 | +} | |
19029 | +#endif | |
19030 | + | |
19031 | +static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
19032 | + struct rt_mutex_waiter *waiter, | |
19033 | + struct task_struct *task, | |
19034 | + enum rtmutex_chainwalk chwalk); | |
19035 | +/* | |
19036 | + * Slow path lock function spin_lock style: this variant is very | |
19037 | + * careful not to miss any non-lock wakeups. | |
19038 | + * | |
19039 | + * We store the current state under p->pi_lock in p->saved_state and | |
19040 | + * the try_to_wake_up() code handles this accordingly. | |
19041 | + */ | |
19042 | +static void noinline __sched rt_spin_lock_slowlock(struct rt_mutex *lock, | |
19043 | + bool mg_off) | |
19044 | +{ | |
19045 | + struct task_struct *lock_owner, *self = current; | |
19046 | + struct rt_mutex_waiter waiter, *top_waiter; | |
19047 | + unsigned long flags; | |
19048 | + int ret; | |
19049 | + | |
19050 | + rt_mutex_init_waiter(&waiter, true); | |
19051 | + | |
19052 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19053 | + | |
19054 | + if (__try_to_take_rt_mutex(lock, self, NULL, STEAL_LATERAL)) { | |
19055 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19056 | + return; | |
19057 | + } | |
19058 | + | |
19059 | + BUG_ON(rt_mutex_owner(lock) == self); | |
19060 | + | |
19061 | + /* | |
19062 | + * We save whatever state the task is in and we'll restore it | |
19063 | + * after acquiring the lock taking real wakeups into account | |
19064 | + * as well. We are serialized via pi_lock against wakeups. See | |
19065 | + * try_to_wake_up(). | |
19066 | + */ | |
19067 | + raw_spin_lock(&self->pi_lock); | |
19068 | + self->saved_state = self->state; | |
19069 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
19070 | + raw_spin_unlock(&self->pi_lock); | |
19071 | + | |
19072 | + ret = task_blocks_on_rt_mutex(lock, &waiter, self, RT_MUTEX_MIN_CHAINWALK); | |
19073 | + BUG_ON(ret); | |
19074 | + | |
19075 | + for (;;) { | |
19076 | + /* Try to acquire the lock again. */ | |
19077 | + if (__try_to_take_rt_mutex(lock, self, &waiter, STEAL_LATERAL)) | |
19078 | + break; | |
19079 | + | |
19080 | + top_waiter = rt_mutex_top_waiter(lock); | |
19081 | + lock_owner = rt_mutex_owner(lock); | |
19082 | + | |
19083 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19084 | + | |
19085 | + debug_rt_mutex_print_deadlock(&waiter); | |
19086 | + | |
19087 | + if (top_waiter != &waiter || adaptive_wait(lock, lock_owner)) { | |
19088 | + if (mg_off) | |
19089 | + migrate_enable(); | |
19090 | + schedule(); | |
19091 | + if (mg_off) | |
19092 | + migrate_disable(); | |
19093 | + } | |
19094 | + | |
19095 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19096 | + | |
19097 | + raw_spin_lock(&self->pi_lock); | |
19098 | + __set_current_state_no_track(TASK_UNINTERRUPTIBLE); | |
19099 | + raw_spin_unlock(&self->pi_lock); | |
19100 | + } | |
19101 | + | |
19102 | + /* | |
19103 | + * Restore the task state to current->saved_state. We set it | |
19104 | + * to the original state above and the try_to_wake_up() code | |
19105 | + * has possibly updated it when a real (non-rtmutex) wakeup | |
19106 | + * happened while we were blocked. Clear saved_state so | |
19107 | + * try_to_wakeup() does not get confused. | |
19108 | + */ | |
19109 | + raw_spin_lock(&self->pi_lock); | |
19110 | + __set_current_state_no_track(self->saved_state); | |
19111 | + self->saved_state = TASK_RUNNING; | |
19112 | + raw_spin_unlock(&self->pi_lock); | |
19113 | + | |
19114 | + /* | |
19115 | + * try_to_take_rt_mutex() sets the waiter bit | |
19116 | + * unconditionally. We might have to fix that up: | |
19117 | + */ | |
19118 | + fixup_rt_mutex_waiters(lock); | |
19119 | + | |
19120 | + BUG_ON(rt_mutex_has_waiters(lock) && &waiter == rt_mutex_top_waiter(lock)); | |
19121 | + BUG_ON(!RB_EMPTY_NODE(&waiter.tree_entry)); | |
19122 | + | |
19123 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19124 | + | |
19125 | + debug_rt_mutex_free_waiter(&waiter); | |
19126 | +} | |
19127 | + | |
19128 | +static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
19129 | + struct wake_q_head *wake_sleeper_q, | |
19130 | + struct rt_mutex *lock); | |
19131 | +/* | |
19132 | + * Slow path to release a rt_mutex spin_lock style | |
19133 | + */ | |
cb95d48a | 19134 | +static int noinline __sched rt_spin_lock_slowunlock(struct rt_mutex *lock) |
b4de310e JK |
19135 | +{ |
19136 | + unsigned long flags; | |
19137 | + WAKE_Q(wake_q); | |
19138 | + WAKE_Q(wake_sleeper_q); | |
19139 | + | |
19140 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19141 | + | |
19142 | + debug_rt_mutex_unlock(lock); | |
19143 | + | |
19144 | + rt_mutex_deadlock_account_unlock(current); | |
19145 | + | |
19146 | + if (!rt_mutex_has_waiters(lock)) { | |
19147 | + lock->owner = NULL; | |
19148 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
cb95d48a | 19149 | + return 0; |
b4de310e JK |
19150 | + } |
19151 | + | |
19152 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
19153 | + | |
19154 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19155 | + wake_up_q(&wake_q); | |
19156 | + wake_up_q_sleeper(&wake_sleeper_q); | |
19157 | + | |
19158 | + /* Undo pi boosting.when necessary */ | |
19159 | + rt_mutex_adjust_prio(current); | |
cb95d48a JK |
19160 | + return 0; |
19161 | +} | |
19162 | + | |
19163 | +static int noinline __sched rt_spin_lock_slowunlock_no_deboost(struct rt_mutex *lock) | |
19164 | +{ | |
19165 | + unsigned long flags; | |
19166 | + WAKE_Q(wake_q); | |
19167 | + WAKE_Q(wake_sleeper_q); | |
19168 | + | |
19169 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19170 | + | |
19171 | + debug_rt_mutex_unlock(lock); | |
19172 | + | |
19173 | + rt_mutex_deadlock_account_unlock(current); | |
19174 | + | |
19175 | + if (!rt_mutex_has_waiters(lock)) { | |
19176 | + lock->owner = NULL; | |
19177 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19178 | + return 0; | |
19179 | + } | |
19180 | + | |
19181 | + mark_wakeup_next_waiter(&wake_q, &wake_sleeper_q, lock); | |
19182 | + | |
19183 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19184 | + wake_up_q(&wake_q); | |
19185 | + wake_up_q_sleeper(&wake_sleeper_q); | |
19186 | + return 1; | |
b4de310e JK |
19187 | +} |
19188 | + | |
19189 | +void __lockfunc rt_spin_lock__no_mg(spinlock_t *lock) | |
19190 | +{ | |
19191 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, false); | |
19192 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
19193 | +} | |
19194 | +EXPORT_SYMBOL(rt_spin_lock__no_mg); | |
19195 | + | |
19196 | +void __lockfunc rt_spin_lock(spinlock_t *lock) | |
19197 | +{ | |
19198 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
19199 | + spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); | |
19200 | +} | |
19201 | +EXPORT_SYMBOL(rt_spin_lock); | |
19202 | + | |
19203 | +void __lockfunc __rt_spin_lock(struct rt_mutex *lock) | |
19204 | +{ | |
19205 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, true); | |
19206 | +} | |
19207 | +EXPORT_SYMBOL(__rt_spin_lock); | |
19208 | + | |
19209 | +void __lockfunc __rt_spin_lock__no_mg(struct rt_mutex *lock) | |
19210 | +{ | |
19211 | + rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock, false); | |
19212 | +} | |
19213 | +EXPORT_SYMBOL(__rt_spin_lock__no_mg); | |
19214 | + | |
19215 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
19216 | +void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass) | |
19217 | +{ | |
19218 | + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); | |
19219 | + rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock, true); | |
19220 | +} | |
19221 | +EXPORT_SYMBOL(rt_spin_lock_nested); | |
19222 | +#endif | |
19223 | + | |
19224 | +void __lockfunc rt_spin_unlock__no_mg(spinlock_t *lock) | |
19225 | +{ | |
19226 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
19227 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
19228 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
19229 | +} | |
19230 | +EXPORT_SYMBOL(rt_spin_unlock__no_mg); | |
19231 | + | |
19232 | +void __lockfunc rt_spin_unlock(spinlock_t *lock) | |
19233 | +{ | |
19234 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
19235 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
19236 | + rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock); | |
19237 | + migrate_enable(); | |
19238 | +} | |
19239 | +EXPORT_SYMBOL(rt_spin_unlock); | |
19240 | + | |
cb95d48a JK |
19241 | +int __lockfunc rt_spin_unlock_no_deboost(spinlock_t *lock) |
19242 | +{ | |
19243 | + int ret; | |
19244 | + | |
19245 | + /* NOTE: we always pass in '1' for nested, for simplicity */ | |
19246 | + spin_release(&lock->dep_map, 1, _RET_IP_); | |
19247 | + ret = rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock_no_deboost); | |
19248 | + migrate_enable(); | |
19249 | + return ret; | |
19250 | +} | |
19251 | + | |
b4de310e JK |
19252 | +void __lockfunc __rt_spin_unlock(struct rt_mutex *lock) |
19253 | +{ | |
19254 | + rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock); | |
19255 | +} | |
19256 | +EXPORT_SYMBOL(__rt_spin_unlock); | |
19257 | + | |
19258 | +/* | |
19259 | + * Wait for the lock to get unlocked: instead of polling for an unlock | |
19260 | + * (like raw spinlocks do), we lock and unlock, to force the kernel to | |
19261 | + * schedule if there's contention: | |
19262 | + */ | |
19263 | +void __lockfunc rt_spin_unlock_wait(spinlock_t *lock) | |
19264 | +{ | |
19265 | + spin_lock(lock); | |
19266 | + spin_unlock(lock); | |
19267 | +} | |
19268 | +EXPORT_SYMBOL(rt_spin_unlock_wait); | |
19269 | + | |
19270 | +int __lockfunc __rt_spin_trylock(struct rt_mutex *lock) | |
19271 | +{ | |
19272 | + return rt_mutex_trylock(lock); | |
19273 | +} | |
19274 | + | |
19275 | +int __lockfunc rt_spin_trylock__no_mg(spinlock_t *lock) | |
19276 | +{ | |
19277 | + int ret; | |
19278 | + | |
19279 | + ret = rt_mutex_trylock(&lock->lock); | |
19280 | + if (ret) | |
19281 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
19282 | + return ret; | |
19283 | +} | |
19284 | +EXPORT_SYMBOL(rt_spin_trylock__no_mg); | |
19285 | + | |
19286 | +int __lockfunc rt_spin_trylock(spinlock_t *lock) | |
19287 | +{ | |
19288 | + int ret; | |
19289 | + | |
19290 | + migrate_disable(); | |
19291 | + ret = rt_mutex_trylock(&lock->lock); | |
19292 | + if (ret) | |
19293 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
19294 | + else | |
19295 | + migrate_enable(); | |
19296 | + return ret; | |
19297 | +} | |
19298 | +EXPORT_SYMBOL(rt_spin_trylock); | |
19299 | + | |
19300 | +int __lockfunc rt_spin_trylock_bh(spinlock_t *lock) | |
19301 | +{ | |
19302 | + int ret; | |
19303 | + | |
19304 | + local_bh_disable(); | |
19305 | + ret = rt_mutex_trylock(&lock->lock); | |
19306 | + if (ret) { | |
19307 | + migrate_disable(); | |
19308 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
19309 | + } else | |
19310 | + local_bh_enable(); | |
19311 | + return ret; | |
19312 | +} | |
19313 | +EXPORT_SYMBOL(rt_spin_trylock_bh); | |
19314 | + | |
19315 | +int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) | |
19316 | +{ | |
19317 | + int ret; | |
19318 | + | |
19319 | + *flags = 0; | |
19320 | + ret = rt_mutex_trylock(&lock->lock); | |
19321 | + if (ret) { | |
19322 | + migrate_disable(); | |
19323 | + spin_acquire(&lock->dep_map, 0, 1, _RET_IP_); | |
19324 | + } | |
19325 | + return ret; | |
19326 | +} | |
19327 | +EXPORT_SYMBOL(rt_spin_trylock_irqsave); | |
19328 | + | |
19329 | +int atomic_dec_and_spin_lock(atomic_t *atomic, spinlock_t *lock) | |
19330 | +{ | |
19331 | + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ | |
19332 | + if (atomic_add_unless(atomic, -1, 1)) | |
19333 | + return 0; | |
19334 | + rt_spin_lock(lock); | |
19335 | + if (atomic_dec_and_test(atomic)) | |
19336 | + return 1; | |
19337 | + rt_spin_unlock(lock); | |
19338 | + return 0; | |
19339 | +} | |
19340 | +EXPORT_SYMBOL(atomic_dec_and_spin_lock); | |
19341 | + | |
19342 | + void | |
19343 | +__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key) | |
19344 | +{ | |
19345 | +#ifdef CONFIG_DEBUG_LOCK_ALLOC | |
19346 | + /* | |
19347 | + * Make sure we are not reinitializing a held lock: | |
19348 | + */ | |
19349 | + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); | |
19350 | + lockdep_init_map(&lock->dep_map, name, key, 0); | |
19351 | +#endif | |
19352 | +} | |
19353 | +EXPORT_SYMBOL(__rt_spin_lock_init); | |
19354 | + | |
19355 | +#endif /* PREEMPT_RT_FULL */ | |
19356 | + | |
19357 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19358 | + static inline int __sched | |
19359 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
19360 | +{ | |
19361 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
19362 | + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx); | |
19363 | + | |
19364 | + if (!hold_ctx) | |
19365 | + return 0; | |
19366 | + | |
19367 | + if (unlikely(ctx == hold_ctx)) | |
19368 | + return -EALREADY; | |
19369 | + | |
19370 | + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | |
19371 | + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | |
19372 | +#ifdef CONFIG_DEBUG_MUTEXES | |
19373 | + DEBUG_LOCKS_WARN_ON(ctx->contending_lock); | |
19374 | + ctx->contending_lock = ww; | |
19375 | +#endif | |
19376 | + return -EDEADLK; | |
19377 | + } | |
19378 | + | |
19379 | + return 0; | |
19380 | +} | |
19381 | +#else | |
19382 | + static inline int __sched | |
19383 | +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx) | |
19384 | +{ | |
19385 | + BUG(); | |
19386 | + return 0; | |
19387 | +} | |
19388 | + | |
19389 | +#endif | |
19390 | + | |
19391 | +static inline int | |
19392 | +try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, | |
19393 | + struct rt_mutex_waiter *waiter) | |
19394 | +{ | |
19395 | + return __try_to_take_rt_mutex(lock, task, waiter, STEAL_NORMAL); | |
19396 | +} | |
19397 | + | |
19398 | /* | |
19399 | * Task blocks on lock. | |
19400 | * | |
19401 | * Prepare waiter and propagate pi chain | |
19402 | * | |
19403 | - * This must be called with lock->wait_lock held. | |
19404 | + * This must be called with lock->wait_lock held and interrupts disabled | |
19405 | */ | |
19406 | static int task_blocks_on_rt_mutex(struct rt_mutex *lock, | |
19407 | struct rt_mutex_waiter *waiter, | |
cb95d48a | 19408 | @@ -894,7 +1367,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
b4de310e JK |
19409 | struct rt_mutex_waiter *top_waiter = waiter; |
19410 | struct rt_mutex *next_lock; | |
19411 | int chain_walk = 0, res; | |
19412 | - unsigned long flags; | |
19413 | ||
19414 | /* | |
19415 | * Early deadlock detection. We really don't want the task to | |
cb95d48a | 19416 | @@ -908,7 +1380,24 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
b4de310e JK |
19417 | if (owner == task) |
19418 | return -EDEADLK; | |
19419 | ||
19420 | - raw_spin_lock_irqsave(&task->pi_lock, flags); | |
19421 | + raw_spin_lock(&task->pi_lock); | |
19422 | + | |
19423 | + /* | |
19424 | + * In the case of futex requeue PI, this will be a proxy | |
19425 | + * lock. The task will wake unaware that it is enqueueed on | |
19426 | + * this lock. Avoid blocking on two locks and corrupting | |
19427 | + * pi_blocked_on via the PI_WAKEUP_INPROGRESS | |
19428 | + * flag. futex_wait_requeue_pi() sets this when it wakes up | |
19429 | + * before requeue (due to a signal or timeout). Do not enqueue | |
19430 | + * the task if PI_WAKEUP_INPROGRESS is set. | |
19431 | + */ | |
19432 | + if (task != current && task->pi_blocked_on == PI_WAKEUP_INPROGRESS) { | |
19433 | + raw_spin_unlock(&task->pi_lock); | |
19434 | + return -EAGAIN; | |
19435 | + } | |
19436 | + | |
19437 | + BUG_ON(rt_mutex_real_waiter(task->pi_blocked_on)); | |
19438 | + | |
19439 | __rt_mutex_adjust_prio(task); | |
19440 | waiter->task = task; | |
19441 | waiter->lock = lock; | |
cb95d48a | 19442 | @@ -921,18 +1410,18 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
b4de310e JK |
19443 | |
19444 | task->pi_blocked_on = waiter; | |
19445 | ||
19446 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
19447 | + raw_spin_unlock(&task->pi_lock); | |
19448 | ||
19449 | if (!owner) | |
19450 | return 0; | |
19451 | ||
19452 | - raw_spin_lock_irqsave(&owner->pi_lock, flags); | |
19453 | + raw_spin_lock(&owner->pi_lock); | |
19454 | if (waiter == rt_mutex_top_waiter(lock)) { | |
19455 | rt_mutex_dequeue_pi(owner, top_waiter); | |
19456 | rt_mutex_enqueue_pi(owner, waiter); | |
19457 | ||
19458 | __rt_mutex_adjust_prio(owner); | |
19459 | - if (owner->pi_blocked_on) | |
19460 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
19461 | chain_walk = 1; | |
19462 | } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { | |
19463 | chain_walk = 1; | |
cb95d48a | 19464 | @@ -941,7 +1430,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
b4de310e JK |
19465 | /* Store the lock on which owner is blocked or NULL */ |
19466 | next_lock = task_blocked_on_lock(owner); | |
19467 | ||
19468 | - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); | |
19469 | + raw_spin_unlock(&owner->pi_lock); | |
19470 | /* | |
19471 | * Even if full deadlock detection is on, if the owner is not | |
19472 | * blocked itself, we can avoid finding this out in the chain | |
cb95d48a | 19473 | @@ -957,12 +1446,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
b4de310e JK |
19474 | */ |
19475 | get_task_struct(owner); | |
19476 | ||
19477 | - raw_spin_unlock(&lock->wait_lock); | |
19478 | + raw_spin_unlock_irq(&lock->wait_lock); | |
19479 | ||
19480 | res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, | |
19481 | next_lock, waiter, task); | |
19482 | ||
19483 | - raw_spin_lock(&lock->wait_lock); | |
19484 | + raw_spin_lock_irq(&lock->wait_lock); | |
19485 | ||
19486 | return res; | |
19487 | } | |
cb95d48a | 19488 | @@ -971,15 +1460,15 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, |
b4de310e JK |
19489 | * Remove the top waiter from the current tasks pi waiter tree and |
19490 | * queue it up. | |
19491 | * | |
19492 | - * Called with lock->wait_lock held. | |
19493 | + * Called with lock->wait_lock held and interrupts disabled. | |
19494 | */ | |
19495 | static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, | |
19496 | + struct wake_q_head *wake_sleeper_q, | |
19497 | struct rt_mutex *lock) | |
19498 | { | |
19499 | struct rt_mutex_waiter *waiter; | |
19500 | - unsigned long flags; | |
19501 | ||
19502 | - raw_spin_lock_irqsave(¤t->pi_lock, flags); | |
19503 | + raw_spin_lock(¤t->pi_lock); | |
19504 | ||
19505 | waiter = rt_mutex_top_waiter(lock); | |
19506 | ||
cb95d48a | 19507 | @@ -1001,15 +1490,18 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q, |
b4de310e JK |
19508 | */ |
19509 | lock->owner = (void *) RT_MUTEX_HAS_WAITERS; | |
19510 | ||
19511 | - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | |
19512 | + raw_spin_unlock(¤t->pi_lock); | |
19513 | ||
19514 | - wake_q_add(wake_q, waiter->task); | |
19515 | + if (waiter->savestate) | |
19516 | + wake_q_add(wake_sleeper_q, waiter->task); | |
19517 | + else | |
19518 | + wake_q_add(wake_q, waiter->task); | |
19519 | } | |
19520 | ||
19521 | /* | |
19522 | * Remove a waiter from a lock and give up | |
19523 | * | |
19524 | - * Must be called with lock->wait_lock held and | |
19525 | + * Must be called with lock->wait_lock held and interrupts disabled. I must | |
19526 | * have just failed to try_to_take_rt_mutex(). | |
19527 | */ | |
19528 | static void remove_waiter(struct rt_mutex *lock, | |
cb95d48a | 19529 | @@ -1017,13 +1509,12 @@ static void remove_waiter(struct rt_mutex *lock, |
b4de310e JK |
19530 | { |
19531 | bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); | |
19532 | struct task_struct *owner = rt_mutex_owner(lock); | |
19533 | - struct rt_mutex *next_lock; | |
19534 | - unsigned long flags; | |
19535 | + struct rt_mutex *next_lock = NULL; | |
19536 | ||
19537 | - raw_spin_lock_irqsave(¤t->pi_lock, flags); | |
19538 | + raw_spin_lock(¤t->pi_lock); | |
19539 | rt_mutex_dequeue(lock, waiter); | |
19540 | current->pi_blocked_on = NULL; | |
19541 | - raw_spin_unlock_irqrestore(¤t->pi_lock, flags); | |
19542 | + raw_spin_unlock(¤t->pi_lock); | |
19543 | ||
19544 | /* | |
19545 | * Only update priority if the waiter was the highest priority | |
cb95d48a | 19546 | @@ -1032,7 +1523,7 @@ static void remove_waiter(struct rt_mutex *lock, |
b4de310e JK |
19547 | if (!owner || !is_top_waiter) |
19548 | return; | |
19549 | ||
19550 | - raw_spin_lock_irqsave(&owner->pi_lock, flags); | |
19551 | + raw_spin_lock(&owner->pi_lock); | |
19552 | ||
19553 | rt_mutex_dequeue_pi(owner, waiter); | |
19554 | ||
cb95d48a | 19555 | @@ -1042,9 +1533,10 @@ static void remove_waiter(struct rt_mutex *lock, |
b4de310e JK |
19556 | __rt_mutex_adjust_prio(owner); |
19557 | ||
19558 | /* Store the lock on which owner is blocked or NULL */ | |
19559 | - next_lock = task_blocked_on_lock(owner); | |
19560 | + if (rt_mutex_real_waiter(owner->pi_blocked_on)) | |
19561 | + next_lock = task_blocked_on_lock(owner); | |
19562 | ||
19563 | - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); | |
19564 | + raw_spin_unlock(&owner->pi_lock); | |
19565 | ||
19566 | /* | |
19567 | * Don't walk the chain, if the owner task is not blocked | |
cb95d48a | 19568 | @@ -1056,12 +1548,12 @@ static void remove_waiter(struct rt_mutex *lock, |
b4de310e JK |
19569 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ |
19570 | get_task_struct(owner); | |
19571 | ||
19572 | - raw_spin_unlock(&lock->wait_lock); | |
19573 | + raw_spin_unlock_irq(&lock->wait_lock); | |
19574 | ||
19575 | rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, | |
19576 | next_lock, NULL, current); | |
19577 | ||
19578 | - raw_spin_lock(&lock->wait_lock); | |
19579 | + raw_spin_lock_irq(&lock->wait_lock); | |
19580 | } | |
19581 | ||
19582 | /* | |
cb95d48a | 19583 | @@ -1078,17 +1570,17 @@ void rt_mutex_adjust_pi(struct task_struct *task) |
b4de310e JK |
19584 | raw_spin_lock_irqsave(&task->pi_lock, flags); |
19585 | ||
19586 | waiter = task->pi_blocked_on; | |
19587 | - if (!waiter || (waiter->prio == task->prio && | |
19588 | + if (!rt_mutex_real_waiter(waiter) || (waiter->prio == task->prio && | |
19589 | !dl_prio(task->prio))) { | |
19590 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
19591 | return; | |
19592 | } | |
19593 | next_lock = waiter->lock; | |
19594 | - raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
19595 | ||
19596 | /* gets dropped in rt_mutex_adjust_prio_chain()! */ | |
19597 | get_task_struct(task); | |
19598 | ||
19599 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
19600 | rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, | |
19601 | next_lock, NULL, task); | |
19602 | } | |
cb95d48a | 19603 | @@ -1097,16 +1589,17 @@ void rt_mutex_adjust_pi(struct task_struct *task) |
b4de310e JK |
19604 | * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop |
19605 | * @lock: the rt_mutex to take | |
19606 | * @state: the state the task should block in (TASK_INTERRUPTIBLE | |
19607 | - * or TASK_UNINTERRUPTIBLE) | |
19608 | + * or TASK_UNINTERRUPTIBLE) | |
19609 | * @timeout: the pre-initialized and started timer, or NULL for none | |
19610 | * @waiter: the pre-initialized rt_mutex_waiter | |
19611 | * | |
19612 | - * lock->wait_lock must be held by the caller. | |
19613 | + * Must be called with lock->wait_lock held and interrupts disabled | |
19614 | */ | |
19615 | static int __sched | |
19616 | __rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
19617 | struct hrtimer_sleeper *timeout, | |
19618 | - struct rt_mutex_waiter *waiter) | |
19619 | + struct rt_mutex_waiter *waiter, | |
19620 | + struct ww_acquire_ctx *ww_ctx) | |
19621 | { | |
19622 | int ret = 0; | |
19623 | ||
cb95d48a | 19624 | @@ -1129,13 +1622,19 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, |
b4de310e JK |
19625 | break; |
19626 | } | |
19627 | ||
19628 | - raw_spin_unlock(&lock->wait_lock); | |
19629 | + if (ww_ctx && ww_ctx->acquired > 0) { | |
19630 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
19631 | + if (ret) | |
19632 | + break; | |
19633 | + } | |
19634 | + | |
19635 | + raw_spin_unlock_irq(&lock->wait_lock); | |
19636 | ||
19637 | debug_rt_mutex_print_deadlock(waiter); | |
19638 | ||
19639 | schedule(); | |
19640 | ||
19641 | - raw_spin_lock(&lock->wait_lock); | |
19642 | + raw_spin_lock_irq(&lock->wait_lock); | |
19643 | set_current_state(state); | |
19644 | } | |
19645 | ||
cb95d48a | 19646 | @@ -1163,26 +1662,112 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock, |
b4de310e JK |
19647 | } |
19648 | } | |
19649 | ||
19650 | +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww, | |
19651 | + struct ww_acquire_ctx *ww_ctx) | |
19652 | +{ | |
19653 | +#ifdef CONFIG_DEBUG_MUTEXES | |
19654 | + /* | |
19655 | + * If this WARN_ON triggers, you used ww_mutex_lock to acquire, | |
19656 | + * but released with a normal mutex_unlock in this call. | |
19657 | + * | |
19658 | + * This should never happen, always use ww_mutex_unlock. | |
19659 | + */ | |
19660 | + DEBUG_LOCKS_WARN_ON(ww->ctx); | |
19661 | + | |
19662 | + /* | |
19663 | + * Not quite done after calling ww_acquire_done() ? | |
19664 | + */ | |
19665 | + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire); | |
19666 | + | |
19667 | + if (ww_ctx->contending_lock) { | |
19668 | + /* | |
19669 | + * After -EDEADLK you tried to | |
19670 | + * acquire a different ww_mutex? Bad! | |
19671 | + */ | |
19672 | + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww); | |
19673 | + | |
19674 | + /* | |
19675 | + * You called ww_mutex_lock after receiving -EDEADLK, | |
19676 | + * but 'forgot' to unlock everything else first? | |
19677 | + */ | |
19678 | + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0); | |
19679 | + ww_ctx->contending_lock = NULL; | |
19680 | + } | |
19681 | + | |
19682 | + /* | |
19683 | + * Naughty, using a different class will lead to undefined behavior! | |
19684 | + */ | |
19685 | + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class); | |
19686 | +#endif | |
19687 | + ww_ctx->acquired++; | |
19688 | +} | |
19689 | + | |
19690 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19691 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
19692 | + struct ww_acquire_ctx *ww_ctx) | |
19693 | +{ | |
19694 | + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock); | |
19695 | + struct rt_mutex_waiter *waiter, *n; | |
19696 | + | |
19697 | + /* | |
19698 | + * This branch gets optimized out for the common case, | |
19699 | + * and is only important for ww_mutex_lock. | |
19700 | + */ | |
19701 | + ww_mutex_lock_acquired(ww, ww_ctx); | |
19702 | + ww->ctx = ww_ctx; | |
19703 | + | |
19704 | + /* | |
19705 | + * Give any possible sleeping processes the chance to wake up, | |
19706 | + * so they can recheck if they have to back off. | |
19707 | + */ | |
19708 | + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters, | |
19709 | + tree_entry) { | |
19710 | + /* XXX debug rt mutex waiter wakeup */ | |
19711 | + | |
19712 | + BUG_ON(waiter->lock != lock); | |
19713 | + rt_mutex_wake_waiter(waiter); | |
19714 | + } | |
19715 | +} | |
19716 | + | |
19717 | +#else | |
19718 | + | |
19719 | +static void ww_mutex_account_lock(struct rt_mutex *lock, | |
19720 | + struct ww_acquire_ctx *ww_ctx) | |
19721 | +{ | |
19722 | + BUG(); | |
19723 | +} | |
19724 | +#endif | |
19725 | + | |
19726 | /* | |
19727 | * Slow path lock function: | |
19728 | */ | |
19729 | static int __sched | |
19730 | rt_mutex_slowlock(struct rt_mutex *lock, int state, | |
19731 | struct hrtimer_sleeper *timeout, | |
19732 | - enum rtmutex_chainwalk chwalk) | |
19733 | + enum rtmutex_chainwalk chwalk, | |
19734 | + struct ww_acquire_ctx *ww_ctx) | |
19735 | { | |
19736 | struct rt_mutex_waiter waiter; | |
19737 | + unsigned long flags; | |
19738 | int ret = 0; | |
19739 | ||
19740 | - debug_rt_mutex_init_waiter(&waiter); | |
19741 | - RB_CLEAR_NODE(&waiter.pi_tree_entry); | |
19742 | - RB_CLEAR_NODE(&waiter.tree_entry); | |
19743 | + rt_mutex_init_waiter(&waiter, false); | |
19744 | ||
19745 | - raw_spin_lock(&lock->wait_lock); | |
19746 | + /* | |
19747 | + * Technically we could use raw_spin_[un]lock_irq() here, but this can | |
19748 | + * be called in early boot if the cmpxchg() fast path is disabled | |
19749 | + * (debug, no architecture support). In this case we will acquire the | |
19750 | + * rtmutex with lock->wait_lock held. But we cannot unconditionally | |
19751 | + * enable interrupts in that early boot case. So we need to use the | |
19752 | + * irqsave/restore variants. | |
19753 | + */ | |
19754 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19755 | ||
19756 | /* Try to acquire the lock again: */ | |
19757 | if (try_to_take_rt_mutex(lock, current, NULL)) { | |
19758 | - raw_spin_unlock(&lock->wait_lock); | |
19759 | + if (ww_ctx) | |
19760 | + ww_mutex_account_lock(lock, ww_ctx); | |
19761 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19762 | return 0; | |
19763 | } | |
19764 | ||
cb95d48a | 19765 | @@ -1196,13 +1781,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, |
b4de310e JK |
19766 | |
19767 | if (likely(!ret)) | |
19768 | /* sleep on the mutex */ | |
19769 | - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter); | |
19770 | + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, | |
19771 | + ww_ctx); | |
19772 | + else if (ww_ctx) { | |
19773 | + /* ww_mutex received EDEADLK, let it become EALREADY */ | |
19774 | + ret = __mutex_lock_check_stamp(lock, ww_ctx); | |
19775 | + BUG_ON(!ret); | |
19776 | + } | |
19777 | ||
19778 | if (unlikely(ret)) { | |
19779 | __set_current_state(TASK_RUNNING); | |
19780 | if (rt_mutex_has_waiters(lock)) | |
19781 | remove_waiter(lock, &waiter); | |
19782 | - rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
19783 | + /* ww_mutex want to report EDEADLK/EALREADY, let them */ | |
19784 | + if (!ww_ctx) | |
19785 | + rt_mutex_handle_deadlock(ret, chwalk, &waiter); | |
19786 | + } else if (ww_ctx) { | |
19787 | + ww_mutex_account_lock(lock, ww_ctx); | |
19788 | } | |
19789 | ||
19790 | /* | |
cb95d48a | 19791 | @@ -1211,7 +1806,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, |
b4de310e JK |
19792 | */ |
19793 | fixup_rt_mutex_waiters(lock); | |
19794 | ||
19795 | - raw_spin_unlock(&lock->wait_lock); | |
19796 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19797 | ||
19798 | /* Remove pending timer: */ | |
19799 | if (unlikely(timeout)) | |
cb95d48a | 19800 | @@ -1227,6 +1822,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, |
b4de310e JK |
19801 | */ |
19802 | static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) | |
19803 | { | |
19804 | + unsigned long flags; | |
19805 | int ret; | |
19806 | ||
19807 | /* | |
cb95d48a | 19808 | @@ -1238,10 +1834,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) |
b4de310e JK |
19809 | return 0; |
19810 | ||
19811 | /* | |
19812 | - * The mutex has currently no owner. Lock the wait lock and | |
19813 | - * try to acquire the lock. | |
19814 | + * The mutex has currently no owner. Lock the wait lock and try to | |
19815 | + * acquire the lock. We use irqsave here to support early boot calls. | |
19816 | */ | |
19817 | - raw_spin_lock(&lock->wait_lock); | |
19818 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19819 | ||
19820 | ret = try_to_take_rt_mutex(lock, current, NULL); | |
19821 | ||
cb95d48a | 19822 | @@ -1251,7 +1847,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) |
b4de310e JK |
19823 | */ |
19824 | fixup_rt_mutex_waiters(lock); | |
19825 | ||
19826 | - raw_spin_unlock(&lock->wait_lock); | |
19827 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19828 | ||
19829 | return ret; | |
19830 | } | |
cb95d48a | 19831 | @@ -1261,9 +1857,13 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) |
b4de310e JK |
19832 | * Return whether the current task needs to undo a potential priority boosting. |
19833 | */ | |
19834 | static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, | |
19835 | - struct wake_q_head *wake_q) | |
19836 | + struct wake_q_head *wake_q, | |
19837 | + struct wake_q_head *wake_sleeper_q) | |
19838 | { | |
19839 | - raw_spin_lock(&lock->wait_lock); | |
19840 | + unsigned long flags; | |
19841 | + | |
19842 | + /* irqsave required to support early boot calls */ | |
19843 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19844 | ||
19845 | debug_rt_mutex_unlock(lock); | |
19846 | ||
cb95d48a | 19847 | @@ -1302,10 +1902,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, |
b4de310e JK |
19848 | */ |
19849 | while (!rt_mutex_has_waiters(lock)) { | |
19850 | /* Drops lock->wait_lock ! */ | |
19851 | - if (unlock_rt_mutex_safe(lock) == true) | |
19852 | + if (unlock_rt_mutex_safe(lock, flags) == true) | |
19853 | return false; | |
19854 | /* Relock the rtmutex and try again */ | |
19855 | - raw_spin_lock(&lock->wait_lock); | |
19856 | + raw_spin_lock_irqsave(&lock->wait_lock, flags); | |
19857 | } | |
19858 | ||
19859 | /* | |
cb95d48a | 19860 | @@ -1314,9 +1914,9 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, |
b4de310e JK |
19861 | * |
19862 | * Queue the next waiter for wakeup once we release the wait_lock. | |
19863 | */ | |
19864 | - mark_wakeup_next_waiter(wake_q, lock); | |
19865 | + mark_wakeup_next_waiter(wake_q, wake_sleeper_q, lock); | |
19866 | ||
19867 | - raw_spin_unlock(&lock->wait_lock); | |
19868 | + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); | |
19869 | ||
19870 | /* check PI boosting */ | |
19871 | return true; | |
cb95d48a | 19872 | @@ -1330,31 +1930,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock, |
b4de310e JK |
19873 | */ |
19874 | static inline int | |
19875 | rt_mutex_fastlock(struct rt_mutex *lock, int state, | |
19876 | + struct ww_acquire_ctx *ww_ctx, | |
19877 | int (*slowfn)(struct rt_mutex *lock, int state, | |
19878 | struct hrtimer_sleeper *timeout, | |
19879 | - enum rtmutex_chainwalk chwalk)) | |
19880 | + enum rtmutex_chainwalk chwalk, | |
19881 | + struct ww_acquire_ctx *ww_ctx)) | |
19882 | { | |
19883 | if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
19884 | rt_mutex_deadlock_account_lock(lock, current); | |
19885 | return 0; | |
19886 | } else | |
19887 | - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK); | |
19888 | + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, | |
19889 | + ww_ctx); | |
19890 | } | |
19891 | ||
19892 | static inline int | |
19893 | rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, | |
19894 | struct hrtimer_sleeper *timeout, | |
19895 | enum rtmutex_chainwalk chwalk, | |
19896 | + struct ww_acquire_ctx *ww_ctx, | |
19897 | int (*slowfn)(struct rt_mutex *lock, int state, | |
19898 | struct hrtimer_sleeper *timeout, | |
19899 | - enum rtmutex_chainwalk chwalk)) | |
19900 | + enum rtmutex_chainwalk chwalk, | |
19901 | + struct ww_acquire_ctx *ww_ctx)) | |
19902 | { | |
19903 | if (chwalk == RT_MUTEX_MIN_CHAINWALK && | |
19904 | likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) { | |
19905 | rt_mutex_deadlock_account_lock(lock, current); | |
19906 | return 0; | |
19907 | } else | |
19908 | - return slowfn(lock, state, timeout, chwalk); | |
19909 | + return slowfn(lock, state, timeout, chwalk, ww_ctx); | |
19910 | } | |
19911 | ||
19912 | static inline int | |
cb95d48a | 19913 | @@ -1371,17 +1976,20 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, |
b4de310e JK |
19914 | static inline void |
19915 | rt_mutex_fastunlock(struct rt_mutex *lock, | |
19916 | bool (*slowfn)(struct rt_mutex *lock, | |
19917 | - struct wake_q_head *wqh)) | |
19918 | + struct wake_q_head *wqh, | |
19919 | + struct wake_q_head *wq_sleeper)) | |
19920 | { | |
19921 | WAKE_Q(wake_q); | |
19922 | + WAKE_Q(wake_sleeper_q); | |
19923 | ||
19924 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
19925 | rt_mutex_deadlock_account_unlock(current); | |
19926 | ||
19927 | } else { | |
19928 | - bool deboost = slowfn(lock, &wake_q); | |
19929 | + bool deboost = slowfn(lock, &wake_q, &wake_sleeper_q); | |
19930 | ||
19931 | wake_up_q(&wake_q); | |
19932 | + wake_up_q_sleeper(&wake_sleeper_q); | |
19933 | ||
19934 | /* Undo pi boosting if necessary: */ | |
19935 | if (deboost) | |
cb95d48a | 19936 | @@ -1398,7 +2006,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) |
b4de310e JK |
19937 | { |
19938 | might_sleep(); | |
19939 | ||
19940 | - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock); | |
19941 | + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
19942 | } | |
19943 | EXPORT_SYMBOL_GPL(rt_mutex_lock); | |
19944 | ||
cb95d48a | 19945 | @@ -1415,7 +2023,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) |
b4de310e JK |
19946 | { |
19947 | might_sleep(); | |
19948 | ||
19949 | - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock); | |
19950 | + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock); | |
19951 | } | |
19952 | EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); | |
19953 | ||
cb95d48a | 19954 | @@ -1428,11 +2036,30 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock, |
b4de310e JK |
19955 | might_sleep(); |
19956 | ||
19957 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
19958 | - RT_MUTEX_FULL_CHAINWALK, | |
19959 | + RT_MUTEX_FULL_CHAINWALK, NULL, | |
19960 | rt_mutex_slowlock); | |
19961 | } | |
19962 | ||
19963 | /** | |
19964 | + * rt_mutex_lock_killable - lock a rt_mutex killable | |
19965 | + * | |
19966 | + * @lock: the rt_mutex to be locked | |
19967 | + * @detect_deadlock: deadlock detection on/off | |
19968 | + * | |
19969 | + * Returns: | |
19970 | + * 0 on success | |
19971 | + * -EINTR when interrupted by a signal | |
19972 | + * -EDEADLK when the lock would deadlock (when deadlock detection is on) | |
19973 | + */ | |
19974 | +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) | |
19975 | +{ | |
19976 | + might_sleep(); | |
19977 | + | |
19978 | + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock); | |
19979 | +} | |
19980 | +EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); | |
19981 | + | |
19982 | +/** | |
19983 | * rt_mutex_timed_lock - lock a rt_mutex interruptible | |
19984 | * the timeout structure is provided | |
19985 | * by the caller | |
cb95d48a | 19986 | @@ -1452,6 +2079,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) |
b4de310e JK |
19987 | |
19988 | return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, | |
19989 | RT_MUTEX_MIN_CHAINWALK, | |
19990 | + NULL, | |
19991 | rt_mutex_slowlock); | |
19992 | } | |
19993 | EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); | |
cb95d48a | 19994 | @@ -1469,7 +2097,11 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); |
b4de310e JK |
19995 | */ |
19996 | int __sched rt_mutex_trylock(struct rt_mutex *lock) | |
19997 | { | |
19998 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
19999 | + if (WARN_ON_ONCE(in_irq() || in_nmi())) | |
20000 | +#else | |
20001 | if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq())) | |
20002 | +#endif | |
20003 | return 0; | |
20004 | ||
20005 | return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); | |
cb95d48a | 20006 | @@ -1495,13 +2127,14 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock); |
b4de310e JK |
20007 | * required or not. |
20008 | */ | |
20009 | bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock, | |
20010 | - struct wake_q_head *wqh) | |
20011 | + struct wake_q_head *wqh, | |
20012 | + struct wake_q_head *wq_sleeper) | |
20013 | { | |
20014 | if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) { | |
20015 | rt_mutex_deadlock_account_unlock(current); | |
20016 | return false; | |
20017 | } | |
20018 | - return rt_mutex_slowunlock(lock, wqh); | |
20019 | + return rt_mutex_slowunlock(lock, wqh, wq_sleeper); | |
20020 | } | |
20021 | ||
20022 | /** | |
cb95d48a | 20023 | @@ -1534,13 +2167,12 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy); |
b4de310e JK |
20024 | void __rt_mutex_init(struct rt_mutex *lock, const char *name) |
20025 | { | |
20026 | lock->owner = NULL; | |
20027 | - raw_spin_lock_init(&lock->wait_lock); | |
20028 | lock->waiters = RB_ROOT; | |
20029 | lock->waiters_leftmost = NULL; | |
20030 | ||
20031 | debug_rt_mutex_init(lock, name); | |
20032 | } | |
20033 | -EXPORT_SYMBOL_GPL(__rt_mutex_init); | |
20034 | +EXPORT_SYMBOL(__rt_mutex_init); | |
20035 | ||
20036 | /** | |
20037 | * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a | |
cb95d48a | 20038 | @@ -1555,7 +2187,7 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init); |
b4de310e JK |
20039 | void rt_mutex_init_proxy_locked(struct rt_mutex *lock, |
20040 | struct task_struct *proxy_owner) | |
20041 | { | |
20042 | - __rt_mutex_init(lock, NULL); | |
20043 | + rt_mutex_init(lock); | |
20044 | debug_rt_mutex_proxy_lock(lock, proxy_owner); | |
20045 | rt_mutex_set_owner(lock, proxy_owner); | |
20046 | rt_mutex_deadlock_account_lock(lock, proxy_owner); | |
cb95d48a | 20047 | @@ -1596,13 +2228,42 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
b4de310e JK |
20048 | { |
20049 | int ret; | |
20050 | ||
20051 | - raw_spin_lock(&lock->wait_lock); | |
20052 | + raw_spin_lock_irq(&lock->wait_lock); | |
20053 | ||
20054 | if (try_to_take_rt_mutex(lock, task, NULL)) { | |
20055 | - raw_spin_unlock(&lock->wait_lock); | |
20056 | + raw_spin_unlock_irq(&lock->wait_lock); | |
20057 | return 1; | |
20058 | } | |
20059 | ||
20060 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20061 | + /* | |
20062 | + * In PREEMPT_RT there's an added race. | |
20063 | + * If the task, that we are about to requeue, times out, | |
20064 | + * it can set the PI_WAKEUP_INPROGRESS. This tells the requeue | |
20065 | + * to skip this task. But right after the task sets | |
20066 | + * its pi_blocked_on to PI_WAKEUP_INPROGRESS it can then | |
20067 | + * block on the spin_lock(&hb->lock), which in RT is an rtmutex. | |
20068 | + * This will replace the PI_WAKEUP_INPROGRESS with the actual | |
20069 | + * lock that it blocks on. We *must not* place this task | |
20070 | + * on this proxy lock in that case. | |
20071 | + * | |
20072 | + * To prevent this race, we first take the task's pi_lock | |
20073 | + * and check if it has updated its pi_blocked_on. If it has, | |
20074 | + * we assume that it woke up and we return -EAGAIN. | |
20075 | + * Otherwise, we set the task's pi_blocked_on to | |
20076 | + * PI_REQUEUE_INPROGRESS, so that if the task is waking up | |
20077 | + * it will know that we are in the process of requeuing it. | |
20078 | + */ | |
20079 | + raw_spin_lock(&task->pi_lock); | |
20080 | + if (task->pi_blocked_on) { | |
20081 | + raw_spin_unlock(&task->pi_lock); | |
20082 | + raw_spin_unlock_irq(&lock->wait_lock); | |
20083 | + return -EAGAIN; | |
20084 | + } | |
20085 | + task->pi_blocked_on = PI_REQUEUE_INPROGRESS; | |
20086 | + raw_spin_unlock(&task->pi_lock); | |
20087 | +#endif | |
20088 | + | |
20089 | /* We enforce deadlock detection for futexes */ | |
20090 | ret = task_blocks_on_rt_mutex(lock, waiter, task, | |
20091 | RT_MUTEX_FULL_CHAINWALK); | |
cb95d48a | 20092 | @@ -1617,10 +2278,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, |
b4de310e JK |
20093 | ret = 0; |
20094 | } | |
20095 | ||
20096 | - if (unlikely(ret)) | |
20097 | + if (ret && rt_mutex_has_waiters(lock)) | |
20098 | remove_waiter(lock, waiter); | |
20099 | ||
20100 | - raw_spin_unlock(&lock->wait_lock); | |
20101 | + raw_spin_unlock_irq(&lock->wait_lock); | |
20102 | ||
20103 | debug_rt_mutex_print_deadlock(waiter); | |
20104 | ||
cb95d48a | 20105 | @@ -1668,12 +2329,12 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, |
b4de310e JK |
20106 | { |
20107 | int ret; | |
20108 | ||
20109 | - raw_spin_lock(&lock->wait_lock); | |
20110 | + raw_spin_lock_irq(&lock->wait_lock); | |
20111 | ||
20112 | set_current_state(TASK_INTERRUPTIBLE); | |
20113 | ||
20114 | /* sleep on the mutex */ | |
20115 | - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter); | |
20116 | + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL); | |
20117 | ||
20118 | if (unlikely(ret)) | |
20119 | remove_waiter(lock, waiter); | |
cb95d48a | 20120 | @@ -1684,7 +2345,93 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, |
b4de310e JK |
20121 | */ |
20122 | fixup_rt_mutex_waiters(lock); | |
20123 | ||
20124 | - raw_spin_unlock(&lock->wait_lock); | |
20125 | + raw_spin_unlock_irq(&lock->wait_lock); | |
20126 | ||
20127 | return ret; | |
20128 | } | |
20129 | + | |
20130 | +static inline int | |
20131 | +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) | |
20132 | +{ | |
20133 | +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH | |
20134 | + unsigned tmp; | |
20135 | + | |
20136 | + if (ctx->deadlock_inject_countdown-- == 0) { | |
20137 | + tmp = ctx->deadlock_inject_interval; | |
20138 | + if (tmp > UINT_MAX/4) | |
20139 | + tmp = UINT_MAX; | |
20140 | + else | |
20141 | + tmp = tmp*2 + tmp + tmp/2; | |
20142 | + | |
20143 | + ctx->deadlock_inject_interval = tmp; | |
20144 | + ctx->deadlock_inject_countdown = tmp; | |
20145 | + ctx->contending_lock = lock; | |
20146 | + | |
20147 | + ww_mutex_unlock(lock); | |
20148 | + | |
20149 | + return -EDEADLK; | |
20150 | + } | |
20151 | +#endif | |
20152 | + | |
20153 | + return 0; | |
20154 | +} | |
20155 | + | |
20156 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20157 | +int __sched | |
20158 | +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
20159 | +{ | |
20160 | + int ret; | |
20161 | + | |
20162 | + might_sleep(); | |
20163 | + | |
20164 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
20165 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx); | |
20166 | + if (ret) | |
20167 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
20168 | + else if (!ret && ww_ctx->acquired > 1) | |
20169 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
20170 | + | |
20171 | + return ret; | |
20172 | +} | |
20173 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible); | |
20174 | + | |
20175 | +int __sched | |
20176 | +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) | |
20177 | +{ | |
20178 | + int ret; | |
20179 | + | |
20180 | + might_sleep(); | |
20181 | + | |
20182 | + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); | |
20183 | + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx); | |
20184 | + if (ret) | |
20185 | + mutex_release(&lock->base.dep_map, 1, _RET_IP_); | |
20186 | + else if (!ret && ww_ctx->acquired > 1) | |
20187 | + return ww_mutex_deadlock_injection(lock, ww_ctx); | |
20188 | + | |
20189 | + return ret; | |
20190 | +} | |
20191 | +EXPORT_SYMBOL_GPL(__ww_mutex_lock); | |
20192 | + | |
20193 | +void __sched ww_mutex_unlock(struct ww_mutex *lock) | |
20194 | +{ | |
20195 | + int nest = !!lock->ctx; | |
20196 | + | |
20197 | + /* | |
20198 | + * The unlocking fastpath is the 0->1 transition from 'locked' | |
20199 | + * into 'unlocked' state: | |
20200 | + */ | |
20201 | + if (nest) { | |
20202 | +#ifdef CONFIG_DEBUG_MUTEXES | |
20203 | + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); | |
20204 | +#endif | |
20205 | + if (lock->ctx->acquired > 0) | |
20206 | + lock->ctx->acquired--; | |
20207 | + lock->ctx = NULL; | |
20208 | + } | |
20209 | + | |
20210 | + mutex_release(&lock->base.dep_map, nest, _RET_IP_); | |
20211 | + rt_mutex_unlock(&lock->base.lock); | |
20212 | +} | |
20213 | +EXPORT_SYMBOL(ww_mutex_unlock); | |
20214 | +#endif | |
20215 | diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h | |
20216 | index 4f5f83c7d2d3..289f062f26cd 100644 | |
20217 | --- a/kernel/locking/rtmutex_common.h | |
20218 | +++ b/kernel/locking/rtmutex_common.h | |
20219 | @@ -27,6 +27,7 @@ struct rt_mutex_waiter { | |
20220 | struct rb_node pi_tree_entry; | |
20221 | struct task_struct *task; | |
20222 | struct rt_mutex *lock; | |
20223 | + bool savestate; | |
20224 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
20225 | unsigned long ip; | |
20226 | struct pid *deadlock_task_pid; | |
20227 | @@ -97,6 +98,9 @@ enum rtmutex_chainwalk { | |
20228 | /* | |
20229 | * PI-futex support (proxy locking functions, etc.): | |
20230 | */ | |
20231 | +#define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) | |
20232 | +#define PI_REQUEUE_INPROGRESS ((struct rt_mutex_waiter *) 2) | |
20233 | + | |
20234 | extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); | |
20235 | extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, | |
20236 | struct task_struct *proxy_owner); | |
20237 | @@ -110,7 +114,8 @@ extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, | |
20238 | struct rt_mutex_waiter *waiter); | |
20239 | extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); | |
20240 | extern bool rt_mutex_futex_unlock(struct rt_mutex *lock, | |
20241 | - struct wake_q_head *wqh); | |
20242 | + struct wake_q_head *wqh, | |
20243 | + struct wake_q_head *wq_sleeper); | |
20244 | extern void rt_mutex_adjust_prio(struct task_struct *task); | |
20245 | ||
20246 | #ifdef CONFIG_DEBUG_RT_MUTEXES | |
20247 | @@ -119,4 +124,14 @@ extern void rt_mutex_adjust_prio(struct task_struct *task); | |
20248 | # include "rtmutex.h" | |
20249 | #endif | |
20250 | ||
20251 | +static inline void | |
20252 | +rt_mutex_init_waiter(struct rt_mutex_waiter *waiter, bool savestate) | |
20253 | +{ | |
20254 | + debug_rt_mutex_init_waiter(waiter); | |
20255 | + waiter->task = NULL; | |
20256 | + waiter->savestate = savestate; | |
20257 | + RB_CLEAR_NODE(&waiter->pi_tree_entry); | |
20258 | + RB_CLEAR_NODE(&waiter->tree_entry); | |
20259 | +} | |
20260 | + | |
20261 | #endif | |
20262 | diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c | |
20263 | index db3ccb1dd614..909779647bd1 100644 | |
20264 | --- a/kernel/locking/spinlock.c | |
20265 | +++ b/kernel/locking/spinlock.c | |
20266 | @@ -124,8 +124,11 @@ void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ | |
20267 | * __[spin|read|write]_lock_bh() | |
20268 | */ | |
20269 | BUILD_LOCK_OPS(spin, raw_spinlock); | |
20270 | + | |
20271 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20272 | BUILD_LOCK_OPS(read, rwlock); | |
20273 | BUILD_LOCK_OPS(write, rwlock); | |
20274 | +#endif | |
20275 | ||
20276 | #endif | |
20277 | ||
20278 | @@ -209,6 +212,8 @@ void __lockfunc _raw_spin_unlock_bh(raw_spinlock_t *lock) | |
20279 | EXPORT_SYMBOL(_raw_spin_unlock_bh); | |
20280 | #endif | |
20281 | ||
20282 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20283 | + | |
20284 | #ifndef CONFIG_INLINE_READ_TRYLOCK | |
20285 | int __lockfunc _raw_read_trylock(rwlock_t *lock) | |
20286 | { | |
20287 | @@ -353,6 +358,8 @@ void __lockfunc _raw_write_unlock_bh(rwlock_t *lock) | |
20288 | EXPORT_SYMBOL(_raw_write_unlock_bh); | |
20289 | #endif | |
20290 | ||
20291 | +#endif /* !PREEMPT_RT_FULL */ | |
20292 | + | |
20293 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | |
20294 | ||
20295 | void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass) | |
20296 | diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c | |
20297 | index 0374a596cffa..94970338d518 100644 | |
20298 | --- a/kernel/locking/spinlock_debug.c | |
20299 | +++ b/kernel/locking/spinlock_debug.c | |
20300 | @@ -31,6 +31,7 @@ void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, | |
20301 | ||
20302 | EXPORT_SYMBOL(__raw_spin_lock_init); | |
20303 | ||
20304 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20305 | void __rwlock_init(rwlock_t *lock, const char *name, | |
20306 | struct lock_class_key *key) | |
20307 | { | |
20308 | @@ -48,6 +49,7 @@ void __rwlock_init(rwlock_t *lock, const char *name, | |
20309 | } | |
20310 | ||
20311 | EXPORT_SYMBOL(__rwlock_init); | |
20312 | +#endif | |
20313 | ||
20314 | static void spin_dump(raw_spinlock_t *lock, const char *msg) | |
20315 | { | |
20316 | @@ -159,6 +161,7 @@ void do_raw_spin_unlock(raw_spinlock_t *lock) | |
20317 | arch_spin_unlock(&lock->raw_lock); | |
20318 | } | |
20319 | ||
20320 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20321 | static void rwlock_bug(rwlock_t *lock, const char *msg) | |
20322 | { | |
20323 | if (!debug_locks_off()) | |
20324 | @@ -300,3 +303,5 @@ void do_raw_write_unlock(rwlock_t *lock) | |
20325 | debug_write_unlock(lock); | |
20326 | arch_write_unlock(&lock->raw_lock); | |
20327 | } | |
20328 | + | |
20329 | +#endif | |
20330 | diff --git a/kernel/panic.c b/kernel/panic.c | |
20331 | index 41e2b54f36b5..3535f802953a 100644 | |
20332 | --- a/kernel/panic.c | |
20333 | +++ b/kernel/panic.c | |
20334 | @@ -61,6 +61,37 @@ void __weak panic_smp_self_stop(void) | |
20335 | cpu_relax(); | |
20336 | } | |
20337 | ||
20338 | +/* | |
20339 | + * Stop ourselves in NMI context if another CPU has already panicked. Arch code | |
20340 | + * may override this to prepare for crash dumping, e.g. save regs info. | |
20341 | + */ | |
20342 | +void __weak nmi_panic_self_stop(struct pt_regs *regs) | |
20343 | +{ | |
20344 | + panic_smp_self_stop(); | |
20345 | +} | |
20346 | + | |
20347 | +atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); | |
20348 | + | |
20349 | +/* | |
20350 | + * A variant of panic() called from NMI context. We return if we've already | |
20351 | + * panicked on this CPU. If another CPU already panicked, loop in | |
20352 | + * nmi_panic_self_stop() which can provide architecture dependent code such | |
20353 | + * as saving register state for crash dump. | |
20354 | + */ | |
20355 | +void nmi_panic(struct pt_regs *regs, const char *msg) | |
20356 | +{ | |
20357 | + int old_cpu, cpu; | |
20358 | + | |
20359 | + cpu = raw_smp_processor_id(); | |
20360 | + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); | |
20361 | + | |
20362 | + if (old_cpu == PANIC_CPU_INVALID) | |
20363 | + panic("%s", msg); | |
20364 | + else if (old_cpu != cpu) | |
20365 | + nmi_panic_self_stop(regs); | |
20366 | +} | |
20367 | +EXPORT_SYMBOL(nmi_panic); | |
20368 | + | |
20369 | /** | |
20370 | * panic - halt the system | |
20371 | * @fmt: The text string to print | |
20372 | @@ -71,17 +102,17 @@ void __weak panic_smp_self_stop(void) | |
20373 | */ | |
20374 | void panic(const char *fmt, ...) | |
20375 | { | |
20376 | - static DEFINE_SPINLOCK(panic_lock); | |
20377 | static char buf[1024]; | |
20378 | va_list args; | |
20379 | long i, i_next = 0; | |
20380 | int state = 0; | |
20381 | + int old_cpu, this_cpu; | |
20382 | ||
20383 | /* | |
20384 | * Disable local interrupts. This will prevent panic_smp_self_stop | |
20385 | * from deadlocking the first cpu that invokes the panic, since | |
20386 | * there is nothing to prevent an interrupt handler (that runs | |
20387 | - * after the panic_lock is acquired) from invoking panic again. | |
20388 | + * after setting panic_cpu) from invoking panic() again. | |
20389 | */ | |
20390 | local_irq_disable(); | |
20391 | ||
20392 | @@ -94,8 +125,16 @@ void panic(const char *fmt, ...) | |
20393 | * multiple parallel invocations of panic, all other CPUs either | |
20394 | * stop themself or will wait until they are stopped by the 1st CPU | |
20395 | * with smp_send_stop(). | |
20396 | + * | |
20397 | + * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which | |
20398 | + * comes here, so go ahead. | |
20399 | + * `old_cpu == this_cpu' means we came from nmi_panic() which sets | |
20400 | + * panic_cpu to this CPU. In this case, this is also the 1st CPU. | |
20401 | */ | |
20402 | - if (!spin_trylock(&panic_lock)) | |
20403 | + this_cpu = raw_smp_processor_id(); | |
20404 | + old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); | |
20405 | + | |
20406 | + if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) | |
20407 | panic_smp_self_stop(); | |
20408 | ||
20409 | console_verbose(); | |
20410 | @@ -400,9 +439,11 @@ static u64 oops_id; | |
20411 | ||
20412 | static int init_oops_id(void) | |
20413 | { | |
20414 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20415 | if (!oops_id) | |
20416 | get_random_bytes(&oops_id, sizeof(oops_id)); | |
20417 | else | |
20418 | +#endif | |
20419 | oops_id++; | |
20420 | ||
20421 | return 0; | |
20422 | diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c | |
20423 | index 3124cebaec31..c1b981521dd0 100644 | |
20424 | --- a/kernel/power/hibernate.c | |
20425 | +++ b/kernel/power/hibernate.c | |
20426 | @@ -285,6 +285,8 @@ static int create_image(int platform_mode) | |
20427 | ||
20428 | local_irq_disable(); | |
20429 | ||
20430 | + system_state = SYSTEM_SUSPEND; | |
20431 | + | |
20432 | error = syscore_suspend(); | |
20433 | if (error) { | |
20434 | printk(KERN_ERR "PM: Some system devices failed to power down, " | |
20435 | @@ -314,6 +316,7 @@ static int create_image(int platform_mode) | |
20436 | syscore_resume(); | |
20437 | ||
20438 | Enable_irqs: | |
20439 | + system_state = SYSTEM_RUNNING; | |
20440 | local_irq_enable(); | |
20441 | ||
20442 | Enable_cpus: | |
20443 | @@ -438,6 +441,7 @@ static int resume_target_kernel(bool platform_mode) | |
20444 | goto Enable_cpus; | |
20445 | ||
20446 | local_irq_disable(); | |
20447 | + system_state = SYSTEM_SUSPEND; | |
20448 | ||
20449 | error = syscore_suspend(); | |
20450 | if (error) | |
20451 | @@ -471,6 +475,7 @@ static int resume_target_kernel(bool platform_mode) | |
20452 | syscore_resume(); | |
20453 | ||
20454 | Enable_irqs: | |
20455 | + system_state = SYSTEM_RUNNING; | |
20456 | local_irq_enable(); | |
20457 | ||
20458 | Enable_cpus: | |
20459 | @@ -556,6 +561,7 @@ int hibernation_platform_enter(void) | |
20460 | goto Enable_cpus; | |
20461 | ||
20462 | local_irq_disable(); | |
20463 | + system_state = SYSTEM_SUSPEND; | |
20464 | syscore_suspend(); | |
20465 | if (pm_wakeup_pending()) { | |
20466 | error = -EAGAIN; | |
20467 | @@ -568,6 +574,7 @@ int hibernation_platform_enter(void) | |
20468 | ||
20469 | Power_up: | |
20470 | syscore_resume(); | |
20471 | + system_state = SYSTEM_RUNNING; | |
20472 | local_irq_enable(); | |
20473 | ||
20474 | Enable_cpus: | |
20475 | @@ -642,6 +649,10 @@ static void power_down(void) | |
20476 | cpu_relax(); | |
20477 | } | |
20478 | ||
20479 | +#ifndef CONFIG_SUSPEND | |
20480 | +bool pm_in_action; | |
20481 | +#endif | |
20482 | + | |
20483 | /** | |
20484 | * hibernate - Carry out system hibernation, including saving the image. | |
20485 | */ | |
20486 | @@ -654,6 +665,8 @@ int hibernate(void) | |
20487 | return -EPERM; | |
20488 | } | |
20489 | ||
20490 | + pm_in_action = true; | |
20491 | + | |
20492 | lock_system_sleep(); | |
20493 | /* The snapshot device should not be opened while we're running */ | |
20494 | if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { | |
20495 | @@ -719,6 +732,7 @@ int hibernate(void) | |
20496 | atomic_inc(&snapshot_device_available); | |
20497 | Unlock: | |
20498 | unlock_system_sleep(); | |
20499 | + pm_in_action = false; | |
20500 | return error; | |
20501 | } | |
20502 | ||
20503 | diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c | |
20504 | index f9fe133c13e2..393bc342c586 100644 | |
20505 | --- a/kernel/power/suspend.c | |
20506 | +++ b/kernel/power/suspend.c | |
20507 | @@ -359,6 +359,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |
20508 | arch_suspend_disable_irqs(); | |
20509 | BUG_ON(!irqs_disabled()); | |
20510 | ||
20511 | + system_state = SYSTEM_SUSPEND; | |
20512 | + | |
20513 | error = syscore_suspend(); | |
20514 | if (!error) { | |
20515 | *wakeup = pm_wakeup_pending(); | |
20516 | @@ -375,6 +377,8 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |
20517 | syscore_resume(); | |
20518 | } | |
20519 | ||
20520 | + system_state = SYSTEM_RUNNING; | |
20521 | + | |
20522 | arch_suspend_enable_irqs(); | |
20523 | BUG_ON(irqs_disabled()); | |
20524 | ||
20525 | @@ -518,6 +522,8 @@ static int enter_state(suspend_state_t state) | |
20526 | return error; | |
20527 | } | |
20528 | ||
20529 | +bool pm_in_action; | |
20530 | + | |
20531 | /** | |
20532 | * pm_suspend - Externally visible function for suspending the system. | |
20533 | * @state: System sleep state to enter. | |
20534 | @@ -532,6 +538,8 @@ int pm_suspend(suspend_state_t state) | |
20535 | if (state <= PM_SUSPEND_ON || state >= PM_SUSPEND_MAX) | |
20536 | return -EINVAL; | |
20537 | ||
20538 | + pm_in_action = true; | |
20539 | + | |
20540 | error = enter_state(state); | |
20541 | if (error) { | |
20542 | suspend_stats.fail++; | |
20543 | @@ -539,6 +547,7 @@ int pm_suspend(suspend_state_t state) | |
20544 | } else { | |
20545 | suspend_stats.success++; | |
20546 | } | |
20547 | + pm_in_action = false; | |
20548 | return error; | |
20549 | } | |
20550 | EXPORT_SYMBOL(pm_suspend); | |
20551 | diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c | |
20552 | index c048e34b177f..c747bdfa199e 100644 | |
20553 | --- a/kernel/printk/printk.c | |
20554 | +++ b/kernel/printk/printk.c | |
20555 | @@ -241,6 +241,65 @@ struct printk_log { | |
20556 | */ | |
20557 | static DEFINE_RAW_SPINLOCK(logbuf_lock); | |
20558 | ||
20559 | +#ifdef CONFIG_EARLY_PRINTK | |
20560 | +struct console *early_console; | |
20561 | + | |
20562 | +static void early_vprintk(const char *fmt, va_list ap) | |
20563 | +{ | |
20564 | + if (early_console) { | |
20565 | + char buf[512]; | |
20566 | + int n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
20567 | + | |
20568 | + early_console->write(early_console, buf, n); | |
20569 | + } | |
20570 | +} | |
20571 | + | |
20572 | +asmlinkage void early_printk(const char *fmt, ...) | |
20573 | +{ | |
20574 | + va_list ap; | |
20575 | + | |
20576 | + va_start(ap, fmt); | |
20577 | + early_vprintk(fmt, ap); | |
20578 | + va_end(ap); | |
20579 | +} | |
20580 | + | |
20581 | +/* | |
20582 | + * This is independent of any log levels - a global | |
20583 | + * kill switch that turns off all of printk. | |
20584 | + * | |
20585 | + * Used by the NMI watchdog if early-printk is enabled. | |
20586 | + */ | |
20587 | +static bool __read_mostly printk_killswitch; | |
20588 | + | |
20589 | +static int __init force_early_printk_setup(char *str) | |
20590 | +{ | |
20591 | + printk_killswitch = true; | |
20592 | + return 0; | |
20593 | +} | |
20594 | +early_param("force_early_printk", force_early_printk_setup); | |
20595 | + | |
20596 | +void printk_kill(void) | |
20597 | +{ | |
20598 | + printk_killswitch = true; | |
20599 | +} | |
20600 | + | |
20601 | +#ifdef CONFIG_PRINTK | |
20602 | +static int forced_early_printk(const char *fmt, va_list ap) | |
20603 | +{ | |
20604 | + if (!printk_killswitch) | |
20605 | + return 0; | |
20606 | + early_vprintk(fmt, ap); | |
20607 | + return 1; | |
20608 | +} | |
20609 | +#endif | |
20610 | + | |
20611 | +#else | |
20612 | +static inline int forced_early_printk(const char *fmt, va_list ap) | |
20613 | +{ | |
20614 | + return 0; | |
20615 | +} | |
20616 | +#endif | |
20617 | + | |
20618 | #ifdef CONFIG_PRINTK | |
20619 | DECLARE_WAIT_QUEUE_HEAD(log_wait); | |
20620 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | |
20621 | @@ -1203,6 +1262,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
20622 | { | |
20623 | char *text; | |
20624 | int len = 0; | |
20625 | + int attempts = 0; | |
20626 | ||
20627 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); | |
20628 | if (!text) | |
20629 | @@ -1214,7 +1274,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
20630 | u64 seq; | |
20631 | u32 idx; | |
20632 | enum log_flags prev; | |
20633 | - | |
20634 | + int num_msg; | |
20635 | +try_again: | |
20636 | + attempts++; | |
20637 | + if (attempts > 10) { | |
20638 | + len = -EBUSY; | |
20639 | + goto out; | |
20640 | + } | |
20641 | + num_msg = 0; | |
20642 | if (clear_seq < log_first_seq) { | |
20643 | /* messages are gone, move to first available one */ | |
20644 | clear_seq = log_first_seq; | |
20645 | @@ -1235,6 +1302,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
20646 | prev = msg->flags; | |
20647 | idx = log_next(idx); | |
20648 | seq++; | |
20649 | + num_msg++; | |
20650 | + if (num_msg > 5) { | |
20651 | + num_msg = 0; | |
20652 | + raw_spin_unlock_irq(&logbuf_lock); | |
20653 | + raw_spin_lock_irq(&logbuf_lock); | |
20654 | + if (clear_seq < log_first_seq) | |
20655 | + goto try_again; | |
20656 | + } | |
20657 | } | |
20658 | ||
20659 | /* move first record forward until length fits into the buffer */ | |
20660 | @@ -1248,6 +1323,14 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
20661 | prev = msg->flags; | |
20662 | idx = log_next(idx); | |
20663 | seq++; | |
20664 | + num_msg++; | |
20665 | + if (num_msg > 5) { | |
20666 | + num_msg = 0; | |
20667 | + raw_spin_unlock_irq(&logbuf_lock); | |
20668 | + raw_spin_lock_irq(&logbuf_lock); | |
20669 | + if (clear_seq < log_first_seq) | |
20670 | + goto try_again; | |
20671 | + } | |
20672 | } | |
20673 | ||
20674 | /* last message fitting into this dump */ | |
20675 | @@ -1288,6 +1371,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |
20676 | clear_seq = log_next_seq; | |
20677 | clear_idx = log_next_idx; | |
20678 | } | |
20679 | +out: | |
20680 | raw_spin_unlock_irq(&logbuf_lock); | |
20681 | ||
20682 | kfree(text); | |
20683 | @@ -1443,6 +1527,12 @@ static void call_console_drivers(int level, | |
20684 | if (!console_drivers) | |
20685 | return; | |
20686 | ||
20687 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
20688 | + if (in_irq() || in_nmi()) | |
20689 | + return; | |
20690 | + } | |
20691 | + | |
20692 | + migrate_disable(); | |
20693 | for_each_console(con) { | |
20694 | if (exclusive_console && con != exclusive_console) | |
20695 | continue; | |
20696 | @@ -1458,6 +1548,7 @@ static void call_console_drivers(int level, | |
20697 | else | |
20698 | con->write(con, text, len); | |
20699 | } | |
20700 | + migrate_enable(); | |
20701 | } | |
20702 | ||
20703 | /* | |
20704 | @@ -1518,6 +1609,15 @@ static inline int can_use_console(unsigned int cpu) | |
20705 | static int console_trylock_for_printk(void) | |
20706 | { | |
20707 | unsigned int cpu = smp_processor_id(); | |
20708 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20709 | + int lock = !early_boot_irqs_disabled && (preempt_count() == 0) && | |
20710 | + !irqs_disabled(); | |
20711 | +#else | |
20712 | + int lock = 1; | |
20713 | +#endif | |
20714 | + | |
20715 | + if (!lock) | |
20716 | + return 0; | |
20717 | ||
20718 | if (!console_trylock()) | |
20719 | return 0; | |
20720 | @@ -1672,6 +1772,13 @@ asmlinkage int vprintk_emit(int facility, int level, | |
20721 | /* cpu currently holding logbuf_lock in this function */ | |
20722 | static unsigned int logbuf_cpu = UINT_MAX; | |
20723 | ||
20724 | + /* | |
20725 | + * Fall back to early_printk if a debugging subsystem has | |
20726 | + * killed printk output | |
20727 | + */ | |
20728 | + if (unlikely(forced_early_printk(fmt, args))) | |
20729 | + return 1; | |
20730 | + | |
20731 | if (level == LOGLEVEL_SCHED) { | |
20732 | level = LOGLEVEL_DEFAULT; | |
20733 | in_sched = true; | |
20734 | @@ -1813,8 +1920,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |
20735 | * console_sem which would prevent anyone from printing to | |
20736 | * console | |
20737 | */ | |
20738 | - preempt_disable(); | |
20739 | - | |
20740 | + migrate_disable(); | |
20741 | /* | |
20742 | * Try to acquire and then immediately release the console | |
20743 | * semaphore. The release will print out buffers and wake up | |
20744 | @@ -1822,7 +1928,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |
20745 | */ | |
20746 | if (console_trylock_for_printk()) | |
20747 | console_unlock(); | |
20748 | - preempt_enable(); | |
20749 | + migrate_enable(); | |
20750 | lockdep_on(); | |
20751 | } | |
20752 | ||
20753 | @@ -1961,26 +2067,6 @@ DEFINE_PER_CPU(printk_func_t, printk_func); | |
20754 | ||
20755 | #endif /* CONFIG_PRINTK */ | |
20756 | ||
20757 | -#ifdef CONFIG_EARLY_PRINTK | |
20758 | -struct console *early_console; | |
20759 | - | |
20760 | -asmlinkage __visible void early_printk(const char *fmt, ...) | |
20761 | -{ | |
20762 | - va_list ap; | |
20763 | - char buf[512]; | |
20764 | - int n; | |
20765 | - | |
20766 | - if (!early_console) | |
20767 | - return; | |
20768 | - | |
20769 | - va_start(ap, fmt); | |
20770 | - n = vscnprintf(buf, sizeof(buf), fmt, ap); | |
20771 | - va_end(ap); | |
20772 | - | |
20773 | - early_console->write(early_console, buf, n); | |
20774 | -} | |
20775 | -#endif | |
20776 | - | |
20777 | static int __add_preferred_console(char *name, int idx, char *options, | |
20778 | char *brl_options) | |
20779 | { | |
20780 | @@ -2202,11 +2288,16 @@ static void console_cont_flush(char *text, size_t size) | |
20781 | goto out; | |
20782 | ||
20783 | len = cont_print_text(text, size); | |
20784 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20785 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
20786 | + call_console_drivers(cont.level, NULL, 0, text, len); | |
20787 | +#else | |
20788 | raw_spin_unlock(&logbuf_lock); | |
20789 | stop_critical_timings(); | |
20790 | call_console_drivers(cont.level, NULL, 0, text, len); | |
20791 | start_critical_timings(); | |
20792 | local_irq_restore(flags); | |
20793 | +#endif | |
20794 | return; | |
20795 | out: | |
20796 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
20797 | @@ -2316,13 +2407,17 @@ skip: | |
20798 | console_idx = log_next(console_idx); | |
20799 | console_seq++; | |
20800 | console_prev = msg->flags; | |
20801 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20802 | + raw_spin_unlock_irqrestore(&logbuf_lock, flags); | |
20803 | + call_console_drivers(level, ext_text, ext_len, text, len); | |
20804 | +#else | |
20805 | raw_spin_unlock(&logbuf_lock); | |
20806 | ||
20807 | stop_critical_timings(); /* don't trace print latency */ | |
20808 | call_console_drivers(level, ext_text, ext_len, text, len); | |
20809 | start_critical_timings(); | |
20810 | local_irq_restore(flags); | |
20811 | - | |
20812 | +#endif | |
20813 | if (do_cond_resched) | |
20814 | cond_resched(); | |
20815 | } | |
20816 | @@ -2374,6 +2469,11 @@ void console_unblank(void) | |
20817 | { | |
20818 | struct console *c; | |
20819 | ||
20820 | + if (IS_ENABLED(CONFIG_PREEMPT_RT_BASE)) { | |
20821 | + if (in_irq() || in_nmi()) | |
20822 | + return; | |
20823 | + } | |
20824 | + | |
20825 | /* | |
20826 | * console_unblank can no longer be called in interrupt context unless | |
20827 | * oops_in_progress is set to 1.. | |
20828 | diff --git a/kernel/ptrace.c b/kernel/ptrace.c | |
20829 | index 3189e51db7e8..1004af706be7 100644 | |
20830 | --- a/kernel/ptrace.c | |
20831 | +++ b/kernel/ptrace.c | |
20832 | @@ -129,7 +129,14 @@ static bool ptrace_freeze_traced(struct task_struct *task) | |
20833 | ||
20834 | spin_lock_irq(&task->sighand->siglock); | |
20835 | if (task_is_traced(task) && !__fatal_signal_pending(task)) { | |
20836 | - task->state = __TASK_TRACED; | |
20837 | + unsigned long flags; | |
20838 | + | |
20839 | + raw_spin_lock_irqsave(&task->pi_lock, flags); | |
20840 | + if (task->state & __TASK_TRACED) | |
20841 | + task->state = __TASK_TRACED; | |
20842 | + else | |
20843 | + task->saved_state = __TASK_TRACED; | |
20844 | + raw_spin_unlock_irqrestore(&task->pi_lock, flags); | |
20845 | ret = true; | |
20846 | } | |
20847 | spin_unlock_irq(&task->sighand->siglock); | |
20848 | diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c | |
20849 | index d89328e260df..5bb3364a6284 100644 | |
20850 | --- a/kernel/rcu/rcutorture.c | |
20851 | +++ b/kernel/rcu/rcutorture.c | |
20852 | @@ -390,6 +390,7 @@ static struct rcu_torture_ops rcu_ops = { | |
20853 | .name = "rcu" | |
20854 | }; | |
20855 | ||
20856 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20857 | /* | |
20858 | * Definitions for rcu_bh torture testing. | |
20859 | */ | |
20860 | @@ -429,6 +430,12 @@ static struct rcu_torture_ops rcu_bh_ops = { | |
20861 | .name = "rcu_bh" | |
20862 | }; | |
20863 | ||
20864 | +#else | |
20865 | +static struct rcu_torture_ops rcu_bh_ops = { | |
20866 | + .ttype = INVALID_RCU_FLAVOR, | |
20867 | +}; | |
20868 | +#endif | |
20869 | + | |
20870 | /* | |
20871 | * Don't even think about trying any of these in real life!!! | |
20872 | * The names includes "busted", and they really means it! | |
20873 | diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c | |
20874 | index f07343b54fe5..d862a203fce0 100644 | |
20875 | --- a/kernel/rcu/tree.c | |
20876 | +++ b/kernel/rcu/tree.c | |
20877 | @@ -56,6 +56,11 @@ | |
20878 | #include <linux/random.h> | |
20879 | #include <linux/trace_events.h> | |
20880 | #include <linux/suspend.h> | |
20881 | +#include <linux/delay.h> | |
20882 | +#include <linux/gfp.h> | |
20883 | +#include <linux/oom.h> | |
20884 | +#include <linux/smpboot.h> | |
20885 | +#include "../time/tick-internal.h" | |
20886 | ||
20887 | #include "tree.h" | |
20888 | #include "rcu.h" | |
20889 | @@ -266,6 +271,19 @@ void rcu_sched_qs(void) | |
20890 | } | |
20891 | } | |
20892 | ||
20893 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
20894 | +static void rcu_preempt_qs(void); | |
20895 | + | |
20896 | +void rcu_bh_qs(void) | |
20897 | +{ | |
20898 | + unsigned long flags; | |
20899 | + | |
20900 | + /* Callers to this function, rcu_preempt_qs(), must disable irqs. */ | |
20901 | + local_irq_save(flags); | |
20902 | + rcu_preempt_qs(); | |
20903 | + local_irq_restore(flags); | |
20904 | +} | |
20905 | +#else | |
20906 | void rcu_bh_qs(void) | |
20907 | { | |
20908 | if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { | |
20909 | @@ -275,6 +293,7 @@ void rcu_bh_qs(void) | |
20910 | __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false); | |
20911 | } | |
20912 | } | |
20913 | +#endif | |
20914 | ||
20915 | static DEFINE_PER_CPU(int, rcu_sched_qs_mask); | |
20916 | ||
20917 | @@ -435,11 +454,13 @@ EXPORT_SYMBOL_GPL(rcu_batches_started_sched); | |
20918 | /* | |
20919 | * Return the number of RCU BH batches started thus far for debug & stats. | |
20920 | */ | |
20921 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20922 | unsigned long rcu_batches_started_bh(void) | |
20923 | { | |
20924 | return rcu_bh_state.gpnum; | |
20925 | } | |
20926 | EXPORT_SYMBOL_GPL(rcu_batches_started_bh); | |
20927 | +#endif | |
20928 | ||
20929 | /* | |
20930 | * Return the number of RCU batches completed thus far for debug & stats. | |
20931 | @@ -459,6 +480,7 @@ unsigned long rcu_batches_completed_sched(void) | |
20932 | } | |
20933 | EXPORT_SYMBOL_GPL(rcu_batches_completed_sched); | |
20934 | ||
20935 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20936 | /* | |
20937 | * Return the number of RCU BH batches completed thus far for debug & stats. | |
20938 | */ | |
20939 | @@ -486,6 +508,13 @@ void rcu_bh_force_quiescent_state(void) | |
20940 | } | |
20941 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | |
20942 | ||
20943 | +#else | |
20944 | +void rcu_force_quiescent_state(void) | |
20945 | +{ | |
20946 | +} | |
20947 | +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |
20948 | +#endif | |
20949 | + | |
20950 | /* | |
20951 | * Force a quiescent state for RCU-sched. | |
20952 | */ | |
20953 | @@ -536,9 +565,11 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, | |
20954 | case RCU_FLAVOR: | |
20955 | rsp = rcu_state_p; | |
20956 | break; | |
20957 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
20958 | case RCU_BH_FLAVOR: | |
20959 | rsp = &rcu_bh_state; | |
20960 | break; | |
20961 | +#endif | |
20962 | case RCU_SCHED_FLAVOR: | |
20963 | rsp = &rcu_sched_state; | |
20964 | break; | |
20965 | @@ -1590,7 +1621,6 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | |
20966 | int needmore; | |
20967 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | |
20968 | ||
20969 | - rcu_nocb_gp_cleanup(rsp, rnp); | |
20970 | rnp->need_future_gp[c & 0x1] = 0; | |
20971 | needmore = rnp->need_future_gp[(c + 1) & 0x1]; | |
20972 | trace_rcu_future_gp(rnp, rdp, c, | |
20973 | @@ -1611,7 +1641,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp) | |
20974 | !READ_ONCE(rsp->gp_flags) || | |
20975 | !rsp->gp_kthread) | |
20976 | return; | |
20977 | - wake_up(&rsp->gp_wq); | |
20978 | + swake_up(&rsp->gp_wq); | |
20979 | } | |
20980 | ||
20981 | /* | |
20982 | @@ -1991,6 +2021,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |
20983 | int nocb = 0; | |
20984 | struct rcu_data *rdp; | |
20985 | struct rcu_node *rnp = rcu_get_root(rsp); | |
20986 | + struct swait_queue_head *sq; | |
20987 | ||
20988 | WRITE_ONCE(rsp->gp_activity, jiffies); | |
20989 | raw_spin_lock_irq(&rnp->lock); | |
20990 | @@ -2029,7 +2060,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) | |
20991 | needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; | |
20992 | /* smp_mb() provided by prior unlock-lock pair. */ | |
20993 | nocb += rcu_future_gp_cleanup(rsp, rnp); | |
20994 | + sq = rcu_nocb_gp_get(rnp); | |
20995 | raw_spin_unlock_irq(&rnp->lock); | |
20996 | + rcu_nocb_gp_cleanup(sq); | |
20997 | cond_resched_rcu_qs(); | |
20998 | WRITE_ONCE(rsp->gp_activity, jiffies); | |
20999 | rcu_gp_slow(rsp, gp_cleanup_delay); | |
21000 | @@ -2076,7 +2109,7 @@ static int __noreturn rcu_gp_kthread(void *arg) | |
21001 | READ_ONCE(rsp->gpnum), | |
21002 | TPS("reqwait")); | |
21003 | rsp->gp_state = RCU_GP_WAIT_GPS; | |
21004 | - wait_event_interruptible(rsp->gp_wq, | |
21005 | + swait_event_interruptible(rsp->gp_wq, | |
21006 | READ_ONCE(rsp->gp_flags) & | |
21007 | RCU_GP_FLAG_INIT); | |
21008 | rsp->gp_state = RCU_GP_DONE_GPS; | |
21009 | @@ -2106,7 +2139,7 @@ static int __noreturn rcu_gp_kthread(void *arg) | |
21010 | READ_ONCE(rsp->gpnum), | |
21011 | TPS("fqswait")); | |
21012 | rsp->gp_state = RCU_GP_WAIT_FQS; | |
21013 | - ret = wait_event_interruptible_timeout(rsp->gp_wq, | |
21014 | + ret = swait_event_interruptible_timeout(rsp->gp_wq, | |
21015 | rcu_gp_fqs_check_wake(rsp, &gf), j); | |
21016 | rsp->gp_state = RCU_GP_DOING_FQS; | |
21017 | /* Locking provides needed memory barriers. */ | |
21018 | @@ -2230,7 +2263,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) | |
21019 | WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); | |
21020 | WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); | |
21021 | raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); | |
21022 | - rcu_gp_kthread_wake(rsp); | |
21023 | + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ | |
21024 | } | |
21025 | ||
21026 | /* | |
21027 | @@ -2891,7 +2924,7 @@ static void force_quiescent_state(struct rcu_state *rsp) | |
21028 | } | |
21029 | WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); | |
21030 | raw_spin_unlock_irqrestore(&rnp_old->lock, flags); | |
21031 | - rcu_gp_kthread_wake(rsp); | |
21032 | + swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */ | |
21033 | } | |
21034 | ||
21035 | /* | |
21036 | @@ -2934,18 +2967,17 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |
21037 | /* | |
21038 | * Do RCU core processing for the current CPU. | |
21039 | */ | |
21040 | -static void rcu_process_callbacks(struct softirq_action *unused) | |
21041 | +static void rcu_process_callbacks(void) | |
21042 | { | |
21043 | struct rcu_state *rsp; | |
21044 | ||
21045 | if (cpu_is_offline(smp_processor_id())) | |
21046 | return; | |
21047 | - trace_rcu_utilization(TPS("Start RCU core")); | |
21048 | for_each_rcu_flavor(rsp) | |
21049 | __rcu_process_callbacks(rsp); | |
21050 | - trace_rcu_utilization(TPS("End RCU core")); | |
21051 | } | |
21052 | ||
21053 | +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
21054 | /* | |
21055 | * Schedule RCU callback invocation. If the specified type of RCU | |
21056 | * does not support RCU priority boosting, just do a direct call, | |
21057 | @@ -2957,18 +2989,105 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |
21058 | { | |
21059 | if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) | |
21060 | return; | |
21061 | - if (likely(!rsp->boost)) { | |
21062 | - rcu_do_batch(rsp, rdp); | |
21063 | - return; | |
21064 | - } | |
21065 | - invoke_rcu_callbacks_kthread(); | |
21066 | + rcu_do_batch(rsp, rdp); | |
21067 | } | |
21068 | ||
21069 | +static void rcu_wake_cond(struct task_struct *t, int status) | |
21070 | +{ | |
21071 | + /* | |
21072 | + * If the thread is yielding, only wake it when this | |
21073 | + * is invoked from idle | |
21074 | + */ | |
21075 | + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current))) | |
21076 | + wake_up_process(t); | |
21077 | +} | |
21078 | + | |
21079 | +/* | |
21080 | + * Wake up this CPU's rcuc kthread to do RCU core processing. | |
21081 | + */ | |
21082 | static void invoke_rcu_core(void) | |
21083 | { | |
21084 | - if (cpu_online(smp_processor_id())) | |
21085 | - raise_softirq(RCU_SOFTIRQ); | |
21086 | + unsigned long flags; | |
21087 | + struct task_struct *t; | |
21088 | + | |
21089 | + if (!cpu_online(smp_processor_id())) | |
21090 | + return; | |
21091 | + local_irq_save(flags); | |
21092 | + __this_cpu_write(rcu_cpu_has_work, 1); | |
21093 | + t = __this_cpu_read(rcu_cpu_kthread_task); | |
21094 | + if (t != NULL && current != t) | |
21095 | + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status)); | |
21096 | + local_irq_restore(flags); | |
21097 | +} | |
21098 | + | |
21099 | +static void rcu_cpu_kthread_park(unsigned int cpu) | |
21100 | +{ | |
21101 | + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
21102 | +} | |
21103 | + | |
21104 | +static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
21105 | +{ | |
21106 | + return __this_cpu_read(rcu_cpu_has_work); | |
21107 | +} | |
21108 | + | |
21109 | +/* | |
21110 | + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
21111 | + * RCU softirq used in flavors and configurations of RCU that do not | |
21112 | + * support RCU priority boosting. | |
21113 | + */ | |
21114 | +static void rcu_cpu_kthread(unsigned int cpu) | |
21115 | +{ | |
21116 | + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
21117 | + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
21118 | + int spincnt; | |
21119 | + | |
21120 | + for (spincnt = 0; spincnt < 10; spincnt++) { | |
21121 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
21122 | + local_bh_disable(); | |
21123 | + *statusp = RCU_KTHREAD_RUNNING; | |
21124 | + this_cpu_inc(rcu_cpu_kthread_loops); | |
21125 | + local_irq_disable(); | |
21126 | + work = *workp; | |
21127 | + *workp = 0; | |
21128 | + local_irq_enable(); | |
21129 | + if (work) | |
21130 | + rcu_process_callbacks(); | |
21131 | + local_bh_enable(); | |
21132 | + if (*workp == 0) { | |
21133 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
21134 | + *statusp = RCU_KTHREAD_WAITING; | |
21135 | + return; | |
21136 | + } | |
21137 | + } | |
21138 | + *statusp = RCU_KTHREAD_YIELDING; | |
21139 | + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
21140 | + schedule_timeout_interruptible(2); | |
21141 | + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
21142 | + *statusp = RCU_KTHREAD_WAITING; | |
21143 | +} | |
21144 | + | |
21145 | +static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
21146 | + .store = &rcu_cpu_kthread_task, | |
21147 | + .thread_should_run = rcu_cpu_kthread_should_run, | |
21148 | + .thread_fn = rcu_cpu_kthread, | |
21149 | + .thread_comm = "rcuc/%u", | |
21150 | + .setup = rcu_cpu_kthread_setup, | |
21151 | + .park = rcu_cpu_kthread_park, | |
21152 | +}; | |
21153 | + | |
21154 | +/* | |
21155 | + * Spawn per-CPU RCU core processing kthreads. | |
21156 | + */ | |
21157 | +static int __init rcu_spawn_core_kthreads(void) | |
21158 | +{ | |
21159 | + int cpu; | |
21160 | + | |
21161 | + for_each_possible_cpu(cpu) | |
21162 | + per_cpu(rcu_cpu_has_work, cpu) = 0; | |
21163 | + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
21164 | + return 0; | |
21165 | } | |
21166 | +early_initcall(rcu_spawn_core_kthreads); | |
21167 | ||
21168 | /* | |
21169 | * Handle any core-RCU processing required by a call_rcu() invocation. | |
21170 | @@ -3114,6 +3233,7 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) | |
21171 | } | |
21172 | EXPORT_SYMBOL_GPL(call_rcu_sched); | |
21173 | ||
21174 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21175 | /* | |
21176 | * Queue an RCU callback for invocation after a quicker grace period. | |
21177 | */ | |
21178 | @@ -3122,6 +3242,7 @@ void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) | |
21179 | __call_rcu(head, func, &rcu_bh_state, -1, 0); | |
21180 | } | |
21181 | EXPORT_SYMBOL_GPL(call_rcu_bh); | |
21182 | +#endif | |
21183 | ||
21184 | /* | |
21185 | * Queue an RCU callback for lazy invocation after a grace period. | |
21186 | @@ -3213,6 +3334,7 @@ void synchronize_sched(void) | |
21187 | } | |
21188 | EXPORT_SYMBOL_GPL(synchronize_sched); | |
21189 | ||
21190 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21191 | /** | |
21192 | * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. | |
21193 | * | |
21194 | @@ -3239,6 +3361,7 @@ void synchronize_rcu_bh(void) | |
21195 | wait_rcu_gp(call_rcu_bh); | |
21196 | } | |
21197 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | |
21198 | +#endif | |
21199 | ||
21200 | /** | |
21201 | * get_state_synchronize_rcu - Snapshot current RCU state | |
21202 | @@ -3524,7 +3647,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | |
21203 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | |
21204 | if (wake) { | |
21205 | smp_mb(); /* EGP done before wake_up(). */ | |
21206 | - wake_up(&rsp->expedited_wq); | |
21207 | + swake_up(&rsp->expedited_wq); | |
21208 | } | |
21209 | break; | |
21210 | } | |
21211 | @@ -3781,7 +3904,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | |
21212 | jiffies_start = jiffies; | |
21213 | ||
21214 | for (;;) { | |
21215 | - ret = wait_event_interruptible_timeout( | |
21216 | + ret = swait_event_timeout( | |
21217 | rsp->expedited_wq, | |
21218 | sync_rcu_preempt_exp_done(rnp_root), | |
21219 | jiffies_stall); | |
21220 | @@ -3789,7 +3912,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp) | |
21221 | return; | |
21222 | if (ret < 0) { | |
21223 | /* Hit a signal, disable CPU stall warnings. */ | |
21224 | - wait_event(rsp->expedited_wq, | |
21225 | + swait_event(rsp->expedited_wq, | |
21226 | sync_rcu_preempt_exp_done(rnp_root)); | |
21227 | return; | |
21228 | } | |
21229 | @@ -4101,6 +4224,7 @@ static void _rcu_barrier(struct rcu_state *rsp) | |
21230 | mutex_unlock(&rsp->barrier_mutex); | |
21231 | } | |
21232 | ||
21233 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21234 | /** | |
21235 | * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete. | |
21236 | */ | |
21237 | @@ -4109,6 +4233,7 @@ void rcu_barrier_bh(void) | |
21238 | _rcu_barrier(&rcu_bh_state); | |
21239 | } | |
21240 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | |
21241 | +#endif | |
21242 | ||
21243 | /** | |
21244 | * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks. | |
21245 | @@ -4455,8 +4580,8 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |
21246 | } | |
21247 | } | |
21248 | ||
21249 | - init_waitqueue_head(&rsp->gp_wq); | |
21250 | - init_waitqueue_head(&rsp->expedited_wq); | |
21251 | + init_swait_queue_head(&rsp->gp_wq); | |
21252 | + init_swait_queue_head(&rsp->expedited_wq); | |
21253 | rnp = rsp->level[rcu_num_lvls - 1]; | |
21254 | for_each_possible_cpu(i) { | |
21255 | while (i > rnp->grphi) | |
21256 | @@ -4576,12 +4701,13 @@ void __init rcu_init(void) | |
21257 | ||
21258 | rcu_bootup_announce(); | |
21259 | rcu_init_geometry(); | |
21260 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21261 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | |
21262 | +#endif | |
21263 | rcu_init_one(&rcu_sched_state, &rcu_sched_data); | |
21264 | if (dump_tree) | |
21265 | rcu_dump_rcu_node_tree(&rcu_sched_state); | |
21266 | __rcu_init_preempt(); | |
21267 | - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | |
21268 | ||
21269 | /* | |
21270 | * We don't need protection against CPU-hotplug here because | |
21271 | diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h | |
21272 | index 9fb4e238d4dc..c75834d8de24 100644 | |
21273 | --- a/kernel/rcu/tree.h | |
21274 | +++ b/kernel/rcu/tree.h | |
21275 | @@ -27,6 +27,7 @@ | |
21276 | #include <linux/threads.h> | |
21277 | #include <linux/cpumask.h> | |
21278 | #include <linux/seqlock.h> | |
21279 | +#include <linux/swait.h> | |
21280 | #include <linux/stop_machine.h> | |
21281 | ||
21282 | /* | |
21283 | @@ -241,7 +242,7 @@ struct rcu_node { | |
21284 | /* Refused to boost: not sure why, though. */ | |
21285 | /* This can happen due to race conditions. */ | |
21286 | #ifdef CONFIG_RCU_NOCB_CPU | |
21287 | - wait_queue_head_t nocb_gp_wq[2]; | |
21288 | + struct swait_queue_head nocb_gp_wq[2]; | |
21289 | /* Place for rcu_nocb_kthread() to wait GP. */ | |
21290 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | |
21291 | int need_future_gp[2]; | |
21292 | @@ -393,7 +394,7 @@ struct rcu_data { | |
21293 | atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */ | |
21294 | struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */ | |
21295 | struct rcu_head **nocb_follower_tail; | |
21296 | - wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ | |
21297 | + struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */ | |
21298 | struct task_struct *nocb_kthread; | |
21299 | int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ | |
21300 | ||
21301 | @@ -472,7 +473,7 @@ struct rcu_state { | |
21302 | unsigned long gpnum; /* Current gp number. */ | |
21303 | unsigned long completed; /* # of last completed gp. */ | |
21304 | struct task_struct *gp_kthread; /* Task for grace periods. */ | |
21305 | - wait_queue_head_t gp_wq; /* Where GP task waits. */ | |
21306 | + struct swait_queue_head gp_wq; /* Where GP task waits. */ | |
21307 | short gp_flags; /* Commands for GP task. */ | |
21308 | short gp_state; /* GP kthread sleep state. */ | |
21309 | ||
21310 | @@ -504,7 +505,7 @@ struct rcu_state { | |
21311 | atomic_long_t expedited_workdone3; /* # done by others #3. */ | |
21312 | atomic_long_t expedited_normal; /* # fallbacks to normal. */ | |
21313 | atomic_t expedited_need_qs; /* # CPUs left to check in. */ | |
21314 | - wait_queue_head_t expedited_wq; /* Wait for check-ins. */ | |
21315 | + struct swait_queue_head expedited_wq; /* Wait for check-ins. */ | |
21316 | int ncpus_snap; /* # CPUs seen last time. */ | |
21317 | ||
21318 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | |
21319 | @@ -556,18 +557,18 @@ extern struct list_head rcu_struct_flavors; | |
21320 | */ | |
21321 | extern struct rcu_state rcu_sched_state; | |
21322 | ||
21323 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21324 | extern struct rcu_state rcu_bh_state; | |
21325 | +#endif | |
21326 | ||
21327 | #ifdef CONFIG_PREEMPT_RCU | |
21328 | extern struct rcu_state rcu_preempt_state; | |
21329 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ | |
21330 | ||
21331 | -#ifdef CONFIG_RCU_BOOST | |
21332 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
21333 | DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu); | |
21334 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
21335 | DECLARE_PER_CPU(char, rcu_cpu_has_work); | |
21336 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
21337 | ||
21338 | #ifndef RCU_TREE_NONCORE | |
21339 | ||
21340 | @@ -587,10 +588,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func); | |
21341 | static void __init __rcu_init_preempt(void); | |
21342 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | |
21343 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | |
21344 | -static void invoke_rcu_callbacks_kthread(void); | |
21345 | static bool rcu_is_callbacks_kthread(void); | |
21346 | +static void rcu_cpu_kthread_setup(unsigned int cpu); | |
21347 | #ifdef CONFIG_RCU_BOOST | |
21348 | -static void rcu_preempt_do_callbacks(void); | |
21349 | static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
21350 | struct rcu_node *rnp); | |
21351 | #endif /* #ifdef CONFIG_RCU_BOOST */ | |
21352 | @@ -607,7 +607,8 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp); | |
21353 | static void increment_cpu_stall_ticks(void); | |
21354 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu); | |
21355 | static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); | |
21356 | -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); | |
21357 | +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); | |
21358 | +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); | |
21359 | static void rcu_init_one_nocb(struct rcu_node *rnp); | |
21360 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | |
21361 | bool lazy, unsigned long flags); | |
21362 | diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h | |
21363 | index 630c19772630..8e119cf647ba 100644 | |
21364 | --- a/kernel/rcu/tree_plugin.h | |
21365 | +++ b/kernel/rcu/tree_plugin.h | |
21366 | @@ -24,25 +24,10 @@ | |
21367 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | |
21368 | */ | |
21369 | ||
21370 | -#include <linux/delay.h> | |
21371 | -#include <linux/gfp.h> | |
21372 | -#include <linux/oom.h> | |
21373 | -#include <linux/smpboot.h> | |
21374 | -#include "../time/tick-internal.h" | |
21375 | - | |
21376 | #ifdef CONFIG_RCU_BOOST | |
21377 | ||
21378 | #include "../locking/rtmutex_common.h" | |
21379 | ||
21380 | -/* | |
21381 | - * Control variables for per-CPU and per-rcu_node kthreads. These | |
21382 | - * handle all flavors of RCU. | |
21383 | - */ | |
21384 | -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | |
21385 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
21386 | -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
21387 | -DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
21388 | - | |
21389 | #else /* #ifdef CONFIG_RCU_BOOST */ | |
21390 | ||
21391 | /* | |
21392 | @@ -55,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
21393 | ||
21394 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
21395 | ||
21396 | +/* | |
21397 | + * Control variables for per-CPU and per-rcu_node kthreads. These | |
21398 | + * handle all flavors of RCU. | |
21399 | + */ | |
21400 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | |
21401 | +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | |
21402 | +DEFINE_PER_CPU(char, rcu_cpu_has_work); | |
21403 | + | |
21404 | #ifdef CONFIG_RCU_NOCB_CPU | |
21405 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | |
21406 | static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ | |
21407 | @@ -432,7 +425,7 @@ void rcu_read_unlock_special(struct task_struct *t) | |
21408 | } | |
21409 | ||
21410 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | |
21411 | - if (in_irq() || in_serving_softirq()) { | |
21412 | + if (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET)) { | |
21413 | lockdep_rcu_suspicious(__FILE__, __LINE__, | |
21414 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | |
21415 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n", | |
21416 | @@ -645,15 +638,6 @@ static void rcu_preempt_check_callbacks(void) | |
21417 | t->rcu_read_unlock_special.b.need_qs = true; | |
21418 | } | |
21419 | ||
21420 | -#ifdef CONFIG_RCU_BOOST | |
21421 | - | |
21422 | -static void rcu_preempt_do_callbacks(void) | |
21423 | -{ | |
21424 | - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); | |
21425 | -} | |
21426 | - | |
21427 | -#endif /* #ifdef CONFIG_RCU_BOOST */ | |
21428 | - | |
21429 | /* | |
21430 | * Queue a preemptible-RCU callback for invocation after a grace period. | |
21431 | */ | |
21432 | @@ -930,6 +914,19 @@ void exit_rcu(void) | |
21433 | ||
21434 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | |
21435 | ||
21436 | +/* | |
21437 | + * If boosting, set rcuc kthreads to realtime priority. | |
21438 | + */ | |
21439 | +static void rcu_cpu_kthread_setup(unsigned int cpu) | |
21440 | +{ | |
21441 | +#ifdef CONFIG_RCU_BOOST | |
21442 | + struct sched_param sp; | |
21443 | + | |
21444 | + sp.sched_priority = kthread_prio; | |
21445 | + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
21446 | +#endif /* #ifdef CONFIG_RCU_BOOST */ | |
21447 | +} | |
21448 | + | |
21449 | #ifdef CONFIG_RCU_BOOST | |
21450 | ||
21451 | #include "../locking/rtmutex_common.h" | |
21452 | @@ -961,16 +958,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp) | |
21453 | ||
21454 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | |
21455 | ||
21456 | -static void rcu_wake_cond(struct task_struct *t, int status) | |
21457 | -{ | |
21458 | - /* | |
21459 | - * If the thread is yielding, only wake it when this | |
21460 | - * is invoked from idle | |
21461 | - */ | |
21462 | - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current)) | |
21463 | - wake_up_process(t); | |
21464 | -} | |
21465 | - | |
21466 | /* | |
21467 | * Carry out RCU priority boosting on the task indicated by ->exp_tasks | |
21468 | * or ->boost_tasks, advancing the pointer to the next task in the | |
21469 | @@ -1115,23 +1102,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
21470 | } | |
21471 | ||
21472 | /* | |
21473 | - * Wake up the per-CPU kthread to invoke RCU callbacks. | |
21474 | - */ | |
21475 | -static void invoke_rcu_callbacks_kthread(void) | |
21476 | -{ | |
21477 | - unsigned long flags; | |
21478 | - | |
21479 | - local_irq_save(flags); | |
21480 | - __this_cpu_write(rcu_cpu_has_work, 1); | |
21481 | - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && | |
21482 | - current != __this_cpu_read(rcu_cpu_kthread_task)) { | |
21483 | - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), | |
21484 | - __this_cpu_read(rcu_cpu_kthread_status)); | |
21485 | - } | |
21486 | - local_irq_restore(flags); | |
21487 | -} | |
21488 | - | |
21489 | -/* | |
21490 | * Is the current CPU running the RCU-callbacks kthread? | |
21491 | * Caller must have preemption disabled. | |
21492 | */ | |
21493 | @@ -1186,67 +1156,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |
21494 | return 0; | |
21495 | } | |
21496 | ||
21497 | -static void rcu_kthread_do_work(void) | |
21498 | -{ | |
21499 | - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); | |
21500 | - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); | |
21501 | - rcu_preempt_do_callbacks(); | |
21502 | -} | |
21503 | - | |
21504 | -static void rcu_cpu_kthread_setup(unsigned int cpu) | |
21505 | -{ | |
21506 | - struct sched_param sp; | |
21507 | - | |
21508 | - sp.sched_priority = kthread_prio; | |
21509 | - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | |
21510 | -} | |
21511 | - | |
21512 | -static void rcu_cpu_kthread_park(unsigned int cpu) | |
21513 | -{ | |
21514 | - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; | |
21515 | -} | |
21516 | - | |
21517 | -static int rcu_cpu_kthread_should_run(unsigned int cpu) | |
21518 | -{ | |
21519 | - return __this_cpu_read(rcu_cpu_has_work); | |
21520 | -} | |
21521 | - | |
21522 | -/* | |
21523 | - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the | |
21524 | - * RCU softirq used in flavors and configurations of RCU that do not | |
21525 | - * support RCU priority boosting. | |
21526 | - */ | |
21527 | -static void rcu_cpu_kthread(unsigned int cpu) | |
21528 | -{ | |
21529 | - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); | |
21530 | - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); | |
21531 | - int spincnt; | |
21532 | - | |
21533 | - for (spincnt = 0; spincnt < 10; spincnt++) { | |
21534 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); | |
21535 | - local_bh_disable(); | |
21536 | - *statusp = RCU_KTHREAD_RUNNING; | |
21537 | - this_cpu_inc(rcu_cpu_kthread_loops); | |
21538 | - local_irq_disable(); | |
21539 | - work = *workp; | |
21540 | - *workp = 0; | |
21541 | - local_irq_enable(); | |
21542 | - if (work) | |
21543 | - rcu_kthread_do_work(); | |
21544 | - local_bh_enable(); | |
21545 | - if (*workp == 0) { | |
21546 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); | |
21547 | - *statusp = RCU_KTHREAD_WAITING; | |
21548 | - return; | |
21549 | - } | |
21550 | - } | |
21551 | - *statusp = RCU_KTHREAD_YIELDING; | |
21552 | - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield")); | |
21553 | - schedule_timeout_interruptible(2); | |
21554 | - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); | |
21555 | - *statusp = RCU_KTHREAD_WAITING; | |
21556 | -} | |
21557 | - | |
21558 | /* | |
21559 | * Set the per-rcu_node kthread's affinity to cover all CPUs that are | |
21560 | * served by the rcu_node in question. The CPU hotplug lock is still | |
21561 | @@ -1276,26 +1185,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | |
21562 | free_cpumask_var(cm); | |
21563 | } | |
21564 | ||
21565 | -static struct smp_hotplug_thread rcu_cpu_thread_spec = { | |
21566 | - .store = &rcu_cpu_kthread_task, | |
21567 | - .thread_should_run = rcu_cpu_kthread_should_run, | |
21568 | - .thread_fn = rcu_cpu_kthread, | |
21569 | - .thread_comm = "rcuc/%u", | |
21570 | - .setup = rcu_cpu_kthread_setup, | |
21571 | - .park = rcu_cpu_kthread_park, | |
21572 | -}; | |
21573 | - | |
21574 | /* | |
21575 | * Spawn boost kthreads -- called as soon as the scheduler is running. | |
21576 | */ | |
21577 | static void __init rcu_spawn_boost_kthreads(void) | |
21578 | { | |
21579 | struct rcu_node *rnp; | |
21580 | - int cpu; | |
21581 | - | |
21582 | - for_each_possible_cpu(cpu) | |
21583 | - per_cpu(rcu_cpu_has_work, cpu) = 0; | |
21584 | - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); | |
21585 | rcu_for_each_leaf_node(rcu_state_p, rnp) | |
21586 | (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); | |
21587 | } | |
21588 | @@ -1318,11 +1213,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) | |
21589 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | |
21590 | } | |
21591 | ||
21592 | -static void invoke_rcu_callbacks_kthread(void) | |
21593 | -{ | |
21594 | - WARN_ON_ONCE(1); | |
21595 | -} | |
21596 | - | |
21597 | static bool rcu_is_callbacks_kthread(void) | |
21598 | { | |
21599 | return false; | |
21600 | @@ -1346,7 +1236,7 @@ static void rcu_prepare_kthreads(int cpu) | |
21601 | ||
21602 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | |
21603 | ||
21604 | -#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
21605 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) | |
21606 | ||
21607 | /* | |
21608 | * Check to see if any future RCU-related work will need to be done | |
21609 | @@ -1363,7 +1253,9 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
21610 | return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) | |
21611 | ? 0 : rcu_cpu_has_callbacks(NULL); | |
21612 | } | |
21613 | +#endif /* !defined(CONFIG_RCU_FAST_NO_HZ) || defined(CONFIG_PREEMPT_RT_FULL) */ | |
21614 | ||
21615 | +#if !defined(CONFIG_RCU_FAST_NO_HZ) | |
21616 | /* | |
21617 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | |
21618 | * after it. | |
21619 | @@ -1459,6 +1351,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |
21620 | return cbs_ready; | |
21621 | } | |
21622 | ||
21623 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21624 | + | |
21625 | /* | |
21626 | * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready | |
21627 | * to invoke. If the CPU has callbacks, try to advance them. Tell the | |
21628 | @@ -1504,6 +1398,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) | |
21629 | *nextevt = basemono + dj * TICK_NSEC; | |
21630 | return 0; | |
21631 | } | |
21632 | +#endif /* #ifndef CONFIG_PREEMPT_RT_FULL */ | |
21633 | ||
21634 | /* | |
21635 | * Prepare a CPU for idle from an RCU perspective. The first major task | |
21636 | @@ -1822,9 +1717,9 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll); | |
21637 | * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended | |
21638 | * grace period. | |
21639 | */ | |
21640 | -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | |
21641 | +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) | |
21642 | { | |
21643 | - wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]); | |
21644 | + swake_up_all(sq); | |
21645 | } | |
21646 | ||
21647 | /* | |
21648 | @@ -1840,10 +1735,15 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) | |
21649 | rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq; | |
21650 | } | |
21651 | ||
21652 | +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) | |
21653 | +{ | |
21654 | + return &rnp->nocb_gp_wq[rnp->completed & 0x1]; | |
21655 | +} | |
21656 | + | |
21657 | static void rcu_init_one_nocb(struct rcu_node *rnp) | |
21658 | { | |
21659 | - init_waitqueue_head(&rnp->nocb_gp_wq[0]); | |
21660 | - init_waitqueue_head(&rnp->nocb_gp_wq[1]); | |
21661 | + init_swait_queue_head(&rnp->nocb_gp_wq[0]); | |
21662 | + init_swait_queue_head(&rnp->nocb_gp_wq[1]); | |
21663 | } | |
21664 | ||
21665 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | |
21666 | @@ -1868,7 +1768,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) | |
21667 | if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) { | |
21668 | /* Prior smp_mb__after_atomic() orders against prior enqueue. */ | |
21669 | WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); | |
21670 | - wake_up(&rdp_leader->nocb_wq); | |
21671 | + swake_up(&rdp_leader->nocb_wq); | |
21672 | } | |
21673 | } | |
21674 | ||
21675 | @@ -2081,7 +1981,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) | |
21676 | */ | |
21677 | trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait")); | |
21678 | for (;;) { | |
21679 | - wait_event_interruptible( | |
21680 | + swait_event_interruptible( | |
21681 | rnp->nocb_gp_wq[c & 0x1], | |
21682 | (d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c))); | |
21683 | if (likely(d)) | |
21684 | @@ -2109,7 +2009,7 @@ wait_again: | |
21685 | /* Wait for callbacks to appear. */ | |
21686 | if (!rcu_nocb_poll) { | |
21687 | trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep"); | |
21688 | - wait_event_interruptible(my_rdp->nocb_wq, | |
21689 | + swait_event_interruptible(my_rdp->nocb_wq, | |
21690 | !READ_ONCE(my_rdp->nocb_leader_sleep)); | |
21691 | /* Memory barrier handled by smp_mb() calls below and repoll. */ | |
21692 | } else if (firsttime) { | |
21693 | @@ -2184,7 +2084,7 @@ wait_again: | |
21694 | * List was empty, wake up the follower. | |
21695 | * Memory barriers supplied by atomic_long_add(). | |
21696 | */ | |
21697 | - wake_up(&rdp->nocb_wq); | |
21698 | + swake_up(&rdp->nocb_wq); | |
21699 | } | |
21700 | } | |
21701 | ||
21702 | @@ -2205,7 +2105,7 @@ static void nocb_follower_wait(struct rcu_data *rdp) | |
21703 | if (!rcu_nocb_poll) { | |
21704 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | |
21705 | "FollowerSleep"); | |
21706 | - wait_event_interruptible(rdp->nocb_wq, | |
21707 | + swait_event_interruptible(rdp->nocb_wq, | |
21708 | READ_ONCE(rdp->nocb_follower_head)); | |
21709 | } else if (firsttime) { | |
21710 | /* Don't drown trace log with "Poll"! */ | |
21711 | @@ -2364,7 +2264,7 @@ void __init rcu_init_nohz(void) | |
21712 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | |
21713 | { | |
21714 | rdp->nocb_tail = &rdp->nocb_head; | |
21715 | - init_waitqueue_head(&rdp->nocb_wq); | |
21716 | + init_swait_queue_head(&rdp->nocb_wq); | |
21717 | rdp->nocb_follower_tail = &rdp->nocb_follower_head; | |
21718 | } | |
21719 | ||
21720 | @@ -2514,7 +2414,7 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | |
21721 | return false; | |
21722 | } | |
21723 | ||
21724 | -static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | |
21725 | +static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq) | |
21726 | { | |
21727 | } | |
21728 | ||
21729 | @@ -2522,6 +2422,11 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq) | |
21730 | { | |
21731 | } | |
21732 | ||
21733 | +static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) | |
21734 | +{ | |
21735 | + return NULL; | |
21736 | +} | |
21737 | + | |
21738 | static void rcu_init_one_nocb(struct rcu_node *rnp) | |
21739 | { | |
21740 | } | |
21741 | diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c | |
21742 | index 5f748c5a40f0..9a3904603ff6 100644 | |
21743 | --- a/kernel/rcu/update.c | |
21744 | +++ b/kernel/rcu/update.c | |
21745 | @@ -276,6 +276,7 @@ int rcu_read_lock_held(void) | |
21746 | } | |
21747 | EXPORT_SYMBOL_GPL(rcu_read_lock_held); | |
21748 | ||
21749 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21750 | /** | |
21751 | * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? | |
21752 | * | |
21753 | @@ -302,6 +303,7 @@ int rcu_read_lock_bh_held(void) | |
21754 | return in_softirq() || irqs_disabled(); | |
21755 | } | |
21756 | EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |
21757 | +#endif | |
21758 | ||
21759 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | |
21760 | ||
21761 | diff --git a/kernel/relay.c b/kernel/relay.c | |
21762 | index 0b4570cfacae..60684be39f22 100644 | |
21763 | --- a/kernel/relay.c | |
21764 | +++ b/kernel/relay.c | |
21765 | @@ -336,6 +336,10 @@ static void wakeup_readers(unsigned long data) | |
21766 | { | |
21767 | struct rchan_buf *buf = (struct rchan_buf *)data; | |
21768 | wake_up_interruptible(&buf->read_wait); | |
21769 | + /* | |
21770 | + * Stupid polling for now: | |
21771 | + */ | |
21772 | + mod_timer(&buf->timer, jiffies + 1); | |
21773 | } | |
21774 | ||
21775 | /** | |
21776 | @@ -353,6 +357,7 @@ static void __relay_reset(struct rchan_buf *buf, unsigned int init) | |
21777 | init_waitqueue_head(&buf->read_wait); | |
21778 | kref_init(&buf->kref); | |
21779 | setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf); | |
21780 | + mod_timer(&buf->timer, jiffies + 1); | |
21781 | } else | |
21782 | del_timer_sync(&buf->timer); | |
21783 | ||
21784 | @@ -736,15 +741,6 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) | |
21785 | else | |
21786 | buf->early_bytes += buf->chan->subbuf_size - | |
21787 | buf->padding[old_subbuf]; | |
21788 | - smp_mb(); | |
21789 | - if (waitqueue_active(&buf->read_wait)) | |
21790 | - /* | |
21791 | - * Calling wake_up_interruptible() from here | |
21792 | - * will deadlock if we happen to be logging | |
21793 | - * from the scheduler (trying to re-grab | |
21794 | - * rq->lock), so defer it. | |
21795 | - */ | |
21796 | - mod_timer(&buf->timer, jiffies + 1); | |
21797 | } | |
21798 | ||
21799 | old = buf->data; | |
21800 | diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile | |
21801 | index 67687973ce80..01b9994b367a 100644 | |
21802 | --- a/kernel/sched/Makefile | |
21803 | +++ b/kernel/sched/Makefile | |
21804 | @@ -13,7 +13,7 @@ endif | |
21805 | ||
21806 | obj-y += core.o loadavg.o clock.o cputime.o | |
21807 | obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o | |
21808 | -obj-y += wait.o completion.o idle.o | |
21809 | +obj-y += wait.o swait.o swork.o completion.o idle.o | |
21810 | obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o | |
21811 | obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o | |
21812 | obj-$(CONFIG_SCHEDSTATS) += stats.o | |
21813 | diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c | |
21814 | index 8d0f35debf35..b62cf6400fe0 100644 | |
21815 | --- a/kernel/sched/completion.c | |
21816 | +++ b/kernel/sched/completion.c | |
21817 | @@ -30,10 +30,10 @@ void complete(struct completion *x) | |
21818 | { | |
21819 | unsigned long flags; | |
21820 | ||
21821 | - spin_lock_irqsave(&x->wait.lock, flags); | |
21822 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
21823 | x->done++; | |
21824 | - __wake_up_locked(&x->wait, TASK_NORMAL, 1); | |
21825 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
21826 | + swake_up_locked(&x->wait); | |
21827 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
21828 | } | |
21829 | EXPORT_SYMBOL(complete); | |
21830 | ||
21831 | @@ -50,10 +50,10 @@ void complete_all(struct completion *x) | |
21832 | { | |
21833 | unsigned long flags; | |
21834 | ||
21835 | - spin_lock_irqsave(&x->wait.lock, flags); | |
21836 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
21837 | x->done += UINT_MAX/2; | |
21838 | - __wake_up_locked(&x->wait, TASK_NORMAL, 0); | |
21839 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
21840 | + swake_up_all_locked(&x->wait); | |
21841 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
21842 | } | |
21843 | EXPORT_SYMBOL(complete_all); | |
21844 | ||
21845 | @@ -62,20 +62,20 @@ do_wait_for_common(struct completion *x, | |
21846 | long (*action)(long), long timeout, int state) | |
21847 | { | |
21848 | if (!x->done) { | |
21849 | - DECLARE_WAITQUEUE(wait, current); | |
21850 | + DECLARE_SWAITQUEUE(wait); | |
21851 | ||
21852 | - __add_wait_queue_tail_exclusive(&x->wait, &wait); | |
21853 | + __prepare_to_swait(&x->wait, &wait); | |
21854 | do { | |
21855 | if (signal_pending_state(state, current)) { | |
21856 | timeout = -ERESTARTSYS; | |
21857 | break; | |
21858 | } | |
21859 | __set_current_state(state); | |
21860 | - spin_unlock_irq(&x->wait.lock); | |
21861 | + raw_spin_unlock_irq(&x->wait.lock); | |
21862 | timeout = action(timeout); | |
21863 | - spin_lock_irq(&x->wait.lock); | |
21864 | + raw_spin_lock_irq(&x->wait.lock); | |
21865 | } while (!x->done && timeout); | |
21866 | - __remove_wait_queue(&x->wait, &wait); | |
21867 | + __finish_swait(&x->wait, &wait); | |
21868 | if (!x->done) | |
21869 | return timeout; | |
21870 | } | |
21871 | @@ -89,9 +89,9 @@ __wait_for_common(struct completion *x, | |
21872 | { | |
21873 | might_sleep(); | |
21874 | ||
21875 | - spin_lock_irq(&x->wait.lock); | |
21876 | + raw_spin_lock_irq(&x->wait.lock); | |
21877 | timeout = do_wait_for_common(x, action, timeout, state); | |
21878 | - spin_unlock_irq(&x->wait.lock); | |
21879 | + raw_spin_unlock_irq(&x->wait.lock); | |
21880 | return timeout; | |
21881 | } | |
21882 | ||
21883 | @@ -277,12 +277,12 @@ bool try_wait_for_completion(struct completion *x) | |
21884 | if (!READ_ONCE(x->done)) | |
21885 | return 0; | |
21886 | ||
21887 | - spin_lock_irqsave(&x->wait.lock, flags); | |
21888 | + raw_spin_lock_irqsave(&x->wait.lock, flags); | |
21889 | if (!x->done) | |
21890 | ret = 0; | |
21891 | else | |
21892 | x->done--; | |
21893 | - spin_unlock_irqrestore(&x->wait.lock, flags); | |
21894 | + raw_spin_unlock_irqrestore(&x->wait.lock, flags); | |
21895 | return ret; | |
21896 | } | |
21897 | EXPORT_SYMBOL(try_wait_for_completion); | |
21898 | @@ -311,7 +311,7 @@ bool completion_done(struct completion *x) | |
21899 | * after it's acquired the lock. | |
21900 | */ | |
21901 | smp_rmb(); | |
21902 | - spin_unlock_wait(&x->wait.lock); | |
21903 | + raw_spin_unlock_wait(&x->wait.lock); | |
21904 | return true; | |
21905 | } | |
21906 | EXPORT_SYMBOL(completion_done); | |
21907 | diff --git a/kernel/sched/core.c b/kernel/sched/core.c | |
21908 | index 20253dbc8610..e9b8d518202e 100644 | |
21909 | --- a/kernel/sched/core.c | |
21910 | +++ b/kernel/sched/core.c | |
21911 | @@ -260,7 +260,11 @@ late_initcall(sched_init_debug); | |
21912 | * Number of tasks to iterate in a single balance run. | |
21913 | * Limited because this is done with IRQs disabled. | |
21914 | */ | |
21915 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
21916 | const_debug unsigned int sysctl_sched_nr_migrate = 32; | |
21917 | +#else | |
21918 | +const_debug unsigned int sysctl_sched_nr_migrate = 8; | |
21919 | +#endif | |
21920 | ||
21921 | /* | |
21922 | * period over which we average the RT time consumption, measured | |
21923 | @@ -438,6 +442,7 @@ static void init_rq_hrtick(struct rq *rq) | |
21924 | ||
21925 | hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
21926 | rq->hrtick_timer.function = hrtick; | |
21927 | + rq->hrtick_timer.irqsafe = 1; | |
21928 | } | |
21929 | #else /* CONFIG_SCHED_HRTICK */ | |
21930 | static inline void hrtick_clear(struct rq *rq) | |
21931 | @@ -542,7 +547,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) | |
21932 | head->lastp = &node->next; | |
21933 | } | |
21934 | ||
21935 | -void wake_up_q(struct wake_q_head *head) | |
21936 | +void __wake_up_q(struct wake_q_head *head, bool sleeper) | |
21937 | { | |
21938 | struct wake_q_node *node = head->first; | |
21939 | ||
21940 | @@ -559,7 +564,10 @@ void wake_up_q(struct wake_q_head *head) | |
21941 | * wake_up_process() implies a wmb() to pair with the queueing | |
21942 | * in wake_q_add() so as not to miss wakeups. | |
21943 | */ | |
21944 | - wake_up_process(task); | |
21945 | + if (sleeper) | |
21946 | + wake_up_lock_sleeper(task); | |
21947 | + else | |
21948 | + wake_up_process(task); | |
21949 | put_task_struct(task); | |
21950 | } | |
21951 | } | |
21952 | @@ -595,6 +603,38 @@ void resched_curr(struct rq *rq) | |
21953 | trace_sched_wake_idle_without_ipi(cpu); | |
21954 | } | |
21955 | ||
21956 | +#ifdef CONFIG_PREEMPT_LAZY | |
21957 | +void resched_curr_lazy(struct rq *rq) | |
21958 | +{ | |
21959 | + struct task_struct *curr = rq->curr; | |
21960 | + int cpu; | |
21961 | + | |
21962 | + if (!sched_feat(PREEMPT_LAZY)) { | |
21963 | + resched_curr(rq); | |
21964 | + return; | |
21965 | + } | |
21966 | + | |
21967 | + lockdep_assert_held(&rq->lock); | |
21968 | + | |
21969 | + if (test_tsk_need_resched(curr)) | |
21970 | + return; | |
21971 | + | |
21972 | + if (test_tsk_need_resched_lazy(curr)) | |
21973 | + return; | |
21974 | + | |
21975 | + set_tsk_need_resched_lazy(curr); | |
21976 | + | |
21977 | + cpu = cpu_of(rq); | |
21978 | + if (cpu == smp_processor_id()) | |
21979 | + return; | |
21980 | + | |
21981 | + /* NEED_RESCHED_LAZY must be visible before we test polling */ | |
21982 | + smp_mb(); | |
21983 | + if (!tsk_is_polling(curr)) | |
21984 | + smp_send_reschedule(cpu); | |
21985 | +} | |
21986 | +#endif | |
21987 | + | |
21988 | void resched_cpu(int cpu) | |
21989 | { | |
21990 | struct rq *rq = cpu_rq(cpu); | |
21991 | @@ -618,11 +658,14 @@ void resched_cpu(int cpu) | |
21992 | */ | |
21993 | int get_nohz_timer_target(void) | |
21994 | { | |
21995 | - int i, cpu = smp_processor_id(); | |
21996 | + int i, cpu; | |
21997 | struct sched_domain *sd; | |
21998 | ||
21999 | + preempt_disable_rt(); | |
22000 | + cpu = smp_processor_id(); | |
22001 | + | |
22002 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) | |
22003 | - return cpu; | |
22004 | + goto preempt_en_rt; | |
22005 | ||
22006 | rcu_read_lock(); | |
22007 | for_each_domain(cpu, sd) { | |
22008 | @@ -641,6 +684,8 @@ int get_nohz_timer_target(void) | |
22009 | cpu = housekeeping_any_cpu(); | |
22010 | unlock: | |
22011 | rcu_read_unlock(); | |
22012 | +preempt_en_rt: | |
22013 | + preempt_enable_rt(); | |
22014 | return cpu; | |
22015 | } | |
22016 | /* | |
22017 | @@ -1174,6 +1219,11 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |
22018 | ||
22019 | lockdep_assert_held(&p->pi_lock); | |
22020 | ||
22021 | + if (__migrate_disabled(p)) { | |
22022 | + cpumask_copy(&p->cpus_allowed, new_mask); | |
22023 | + return; | |
22024 | + } | |
22025 | + | |
22026 | queued = task_on_rq_queued(p); | |
22027 | running = task_current(rq, p); | |
22028 | ||
22029 | @@ -1196,6 +1246,84 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) | |
22030 | enqueue_task(rq, p, ENQUEUE_RESTORE); | |
22031 | } | |
22032 | ||
22033 | +static DEFINE_PER_CPU(struct cpumask, sched_cpumasks); | |
22034 | +static DEFINE_MUTEX(sched_down_mutex); | |
22035 | +static cpumask_t sched_down_cpumask; | |
22036 | + | |
22037 | +void tell_sched_cpu_down_begin(int cpu) | |
22038 | +{ | |
22039 | + mutex_lock(&sched_down_mutex); | |
22040 | + cpumask_set_cpu(cpu, &sched_down_cpumask); | |
22041 | + mutex_unlock(&sched_down_mutex); | |
22042 | +} | |
22043 | + | |
22044 | +void tell_sched_cpu_down_done(int cpu) | |
22045 | +{ | |
22046 | + mutex_lock(&sched_down_mutex); | |
22047 | + cpumask_clear_cpu(cpu, &sched_down_cpumask); | |
22048 | + mutex_unlock(&sched_down_mutex); | |
22049 | +} | |
22050 | + | |
22051 | +/** | |
22052 | + * migrate_me - try to move the current task off this cpu | |
22053 | + * | |
22054 | + * Used by the pin_current_cpu() code to try to get tasks | |
22055 | + * to move off the current CPU as it is going down. | |
22056 | + * It will only move the task if the task isn't pinned to | |
22057 | + * the CPU (with migrate_disable, affinity or NO_SETAFFINITY) | |
22058 | + * and the task has to be in a RUNNING state. Otherwise the | |
22059 | + * movement of the task will wake it up (change its state | |
22060 | + * to running) when the task did not expect it. | |
22061 | + * | |
22062 | + * Returns 1 if it succeeded in moving the current task | |
22063 | + * 0 otherwise. | |
22064 | + */ | |
22065 | +int migrate_me(void) | |
22066 | +{ | |
22067 | + struct task_struct *p = current; | |
22068 | + struct migration_arg arg; | |
22069 | + struct cpumask *cpumask; | |
22070 | + struct cpumask *mask; | |
22071 | + unsigned long flags; | |
22072 | + unsigned int dest_cpu; | |
22073 | + struct rq *rq; | |
22074 | + | |
22075 | + /* | |
22076 | + * We can not migrate tasks bounded to a CPU or tasks not | |
22077 | + * running. The movement of the task will wake it up. | |
22078 | + */ | |
22079 | + if (p->flags & PF_NO_SETAFFINITY || p->state) | |
22080 | + return 0; | |
22081 | + | |
22082 | + mutex_lock(&sched_down_mutex); | |
22083 | + rq = task_rq_lock(p, &flags); | |
22084 | + | |
22085 | + cpumask = this_cpu_ptr(&sched_cpumasks); | |
22086 | + mask = &p->cpus_allowed; | |
22087 | + | |
22088 | + cpumask_andnot(cpumask, mask, &sched_down_cpumask); | |
22089 | + | |
22090 | + if (!cpumask_weight(cpumask)) { | |
22091 | + /* It's only on this CPU? */ | |
22092 | + task_rq_unlock(rq, p, &flags); | |
22093 | + mutex_unlock(&sched_down_mutex); | |
22094 | + return 0; | |
22095 | + } | |
22096 | + | |
22097 | + dest_cpu = cpumask_any_and(cpu_active_mask, cpumask); | |
22098 | + | |
22099 | + arg.task = p; | |
22100 | + arg.dest_cpu = dest_cpu; | |
22101 | + | |
22102 | + task_rq_unlock(rq, p, &flags); | |
22103 | + | |
22104 | + stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | |
22105 | + tlb_migrate_finish(p->mm); | |
22106 | + mutex_unlock(&sched_down_mutex); | |
22107 | + | |
22108 | + return 1; | |
22109 | +} | |
22110 | + | |
22111 | /* | |
22112 | * Change a given task's CPU affinity. Migrate the thread to a | |
22113 | * proper CPU and schedule it away if the CPU it's executing on | |
22114 | @@ -1235,7 +1363,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, | |
22115 | do_set_cpus_allowed(p, new_mask); | |
22116 | ||
22117 | /* Can the task run on the task's current CPU? If so, we're done */ | |
22118 | - if (cpumask_test_cpu(task_cpu(p), new_mask)) | |
22119 | + if (cpumask_test_cpu(task_cpu(p), new_mask) || __migrate_disabled(p)) | |
22120 | goto out; | |
22121 | ||
22122 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | |
22123 | @@ -1411,6 +1539,18 @@ out: | |
22124 | return ret; | |
22125 | } | |
22126 | ||
22127 | +static bool check_task_state(struct task_struct *p, long match_state) | |
22128 | +{ | |
22129 | + bool match = false; | |
22130 | + | |
22131 | + raw_spin_lock_irq(&p->pi_lock); | |
22132 | + if (p->state == match_state || p->saved_state == match_state) | |
22133 | + match = true; | |
22134 | + raw_spin_unlock_irq(&p->pi_lock); | |
22135 | + | |
22136 | + return match; | |
22137 | +} | |
22138 | + | |
22139 | /* | |
22140 | * wait_task_inactive - wait for a thread to unschedule. | |
22141 | * | |
22142 | @@ -1455,7 +1595,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |
22143 | * is actually now running somewhere else! | |
22144 | */ | |
22145 | while (task_running(rq, p)) { | |
22146 | - if (match_state && unlikely(p->state != match_state)) | |
22147 | + if (match_state && !check_task_state(p, match_state)) | |
22148 | return 0; | |
22149 | cpu_relax(); | |
22150 | } | |
22151 | @@ -1470,7 +1610,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |
22152 | running = task_running(rq, p); | |
22153 | queued = task_on_rq_queued(p); | |
22154 | ncsw = 0; | |
22155 | - if (!match_state || p->state == match_state) | |
22156 | + if (!match_state || p->state == match_state || | |
22157 | + p->saved_state == match_state) | |
22158 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | |
22159 | task_rq_unlock(rq, p, &flags); | |
22160 | ||
22161 | @@ -1627,7 +1768,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | |
22162 | { | |
22163 | lockdep_assert_held(&p->pi_lock); | |
22164 | ||
22165 | - if (p->nr_cpus_allowed > 1) | |
22166 | + if (tsk_nr_cpus_allowed(p) > 1) | |
22167 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); | |
22168 | ||
22169 | /* | |
22170 | @@ -1707,10 +1848,6 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl | |
22171 | { | |
22172 | activate_task(rq, p, en_flags); | |
22173 | p->on_rq = TASK_ON_RQ_QUEUED; | |
22174 | - | |
22175 | - /* if a worker is waking up, notify workqueue */ | |
22176 | - if (p->flags & PF_WQ_WORKER) | |
22177 | - wq_worker_waking_up(p, cpu_of(rq)); | |
22178 | } | |
22179 | ||
22180 | /* | |
22181 | @@ -1937,8 +2074,27 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |
22182 | */ | |
22183 | smp_mb__before_spinlock(); | |
22184 | raw_spin_lock_irqsave(&p->pi_lock, flags); | |
22185 | - if (!(p->state & state)) | |
22186 | + if (!(p->state & state)) { | |
22187 | + /* | |
22188 | + * The task might be running due to a spinlock sleeper | |
22189 | + * wakeup. Check the saved state and set it to running | |
22190 | + * if the wakeup condition is true. | |
22191 | + */ | |
22192 | + if (!(wake_flags & WF_LOCK_SLEEPER)) { | |
22193 | + if (p->saved_state & state) { | |
22194 | + p->saved_state = TASK_RUNNING; | |
22195 | + success = 1; | |
22196 | + } | |
22197 | + } | |
22198 | goto out; | |
22199 | + } | |
22200 | + | |
22201 | + /* | |
22202 | + * If this is a regular wakeup, then we can unconditionally | |
22203 | + * clear the saved state of a "lock sleeper". | |
22204 | + */ | |
22205 | + if (!(wake_flags & WF_LOCK_SLEEPER)) | |
22206 | + p->saved_state = TASK_RUNNING; | |
22207 | ||
22208 | trace_sched_waking(p); | |
22209 | ||
22210 | @@ -2030,52 +2186,6 @@ out: | |
22211 | } | |
22212 | ||
22213 | /** | |
22214 | - * try_to_wake_up_local - try to wake up a local task with rq lock held | |
22215 | - * @p: the thread to be awakened | |
22216 | - * | |
22217 | - * Put @p on the run-queue if it's not already there. The caller must | |
22218 | - * ensure that this_rq() is locked, @p is bound to this_rq() and not | |
22219 | - * the current task. | |
22220 | - */ | |
22221 | -static void try_to_wake_up_local(struct task_struct *p) | |
22222 | -{ | |
22223 | - struct rq *rq = task_rq(p); | |
22224 | - | |
22225 | - if (WARN_ON_ONCE(rq != this_rq()) || | |
22226 | - WARN_ON_ONCE(p == current)) | |
22227 | - return; | |
22228 | - | |
22229 | - lockdep_assert_held(&rq->lock); | |
22230 | - | |
22231 | - if (!raw_spin_trylock(&p->pi_lock)) { | |
22232 | - /* | |
22233 | - * This is OK, because current is on_cpu, which avoids it being | |
22234 | - * picked for load-balance and preemption/IRQs are still | |
22235 | - * disabled avoiding further scheduler activity on it and we've | |
22236 | - * not yet picked a replacement task. | |
22237 | - */ | |
22238 | - lockdep_unpin_lock(&rq->lock); | |
22239 | - raw_spin_unlock(&rq->lock); | |
22240 | - raw_spin_lock(&p->pi_lock); | |
22241 | - raw_spin_lock(&rq->lock); | |
22242 | - lockdep_pin_lock(&rq->lock); | |
22243 | - } | |
22244 | - | |
22245 | - if (!(p->state & TASK_NORMAL)) | |
22246 | - goto out; | |
22247 | - | |
22248 | - trace_sched_waking(p); | |
22249 | - | |
22250 | - if (!task_on_rq_queued(p)) | |
22251 | - ttwu_activate(rq, p, ENQUEUE_WAKEUP); | |
22252 | - | |
22253 | - ttwu_do_wakeup(rq, p, 0); | |
22254 | - ttwu_stat(p, smp_processor_id(), 0); | |
22255 | -out: | |
22256 | - raw_spin_unlock(&p->pi_lock); | |
22257 | -} | |
22258 | - | |
22259 | -/** | |
22260 | * wake_up_process - Wake up a specific process | |
22261 | * @p: The process to be woken up. | |
22262 | * | |
22263 | @@ -2093,6 +2203,18 @@ int wake_up_process(struct task_struct *p) | |
22264 | } | |
22265 | EXPORT_SYMBOL(wake_up_process); | |
22266 | ||
22267 | +/** | |
22268 | + * wake_up_lock_sleeper - Wake up a specific process blocked on a "sleeping lock" | |
22269 | + * @p: The process to be woken up. | |
22270 | + * | |
22271 | + * Same as wake_up_process() above, but wake_flags=WF_LOCK_SLEEPER to indicate | |
22272 | + * the nature of the wakeup. | |
22273 | + */ | |
22274 | +int wake_up_lock_sleeper(struct task_struct *p) | |
22275 | +{ | |
22276 | + return try_to_wake_up(p, TASK_ALL, WF_LOCK_SLEEPER); | |
22277 | +} | |
22278 | + | |
22279 | int wake_up_state(struct task_struct *p, unsigned int state) | |
22280 | { | |
22281 | return try_to_wake_up(p, state, 0); | |
22282 | @@ -2279,6 +2401,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) | |
22283 | p->on_cpu = 0; | |
22284 | #endif | |
22285 | init_task_preempt_count(p); | |
22286 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
22287 | + task_thread_info(p)->preempt_lazy_count = 0; | |
22288 | +#endif | |
22289 | #ifdef CONFIG_SMP | |
22290 | plist_node_init(&p->pushable_tasks, MAX_PRIO); | |
22291 | RB_CLEAR_NODE(&p->pushable_dl_tasks); | |
22292 | @@ -2603,8 +2728,12 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |
22293 | finish_arch_post_lock_switch(); | |
22294 | ||
22295 | fire_sched_in_preempt_notifiers(current); | |
22296 | + /* | |
22297 | + * We use mmdrop_delayed() here so we don't have to do the | |
22298 | + * full __mmdrop() when we are the last user. | |
22299 | + */ | |
22300 | if (mm) | |
22301 | - mmdrop(mm); | |
22302 | + mmdrop_delayed(mm); | |
22303 | if (unlikely(prev_state == TASK_DEAD)) { | |
22304 | if (prev->sched_class->task_dead) | |
22305 | prev->sched_class->task_dead(prev); | |
22306 | @@ -2935,16 +3064,6 @@ u64 scheduler_tick_max_deferment(void) | |
22307 | } | |
22308 | #endif | |
22309 | ||
22310 | -notrace unsigned long get_parent_ip(unsigned long addr) | |
22311 | -{ | |
22312 | - if (in_lock_functions(addr)) { | |
22313 | - addr = CALLER_ADDR2; | |
22314 | - if (in_lock_functions(addr)) | |
22315 | - addr = CALLER_ADDR3; | |
22316 | - } | |
22317 | - return addr; | |
22318 | -} | |
22319 | - | |
22320 | #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ | |
22321 | defined(CONFIG_PREEMPT_TRACER)) | |
22322 | ||
22323 | @@ -2966,7 +3085,7 @@ void preempt_count_add(int val) | |
22324 | PREEMPT_MASK - 10); | |
22325 | #endif | |
22326 | if (preempt_count() == val) { | |
22327 | - unsigned long ip = get_parent_ip(CALLER_ADDR1); | |
22328 | + unsigned long ip = get_lock_parent_ip(); | |
22329 | #ifdef CONFIG_DEBUG_PREEMPT | |
22330 | current->preempt_disable_ip = ip; | |
22331 | #endif | |
22332 | @@ -2993,7 +3112,7 @@ void preempt_count_sub(int val) | |
22333 | #endif | |
22334 | ||
22335 | if (preempt_count() == val) | |
22336 | - trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | |
22337 | + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); | |
22338 | __preempt_count_sub(val); | |
22339 | } | |
22340 | EXPORT_SYMBOL(preempt_count_sub); | |
22341 | @@ -3048,6 +3167,77 @@ static inline void schedule_debug(struct task_struct *prev) | |
22342 | schedstat_inc(this_rq(), sched_count); | |
22343 | } | |
22344 | ||
22345 | +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP) | |
22346 | + | |
22347 | +void migrate_disable(void) | |
22348 | +{ | |
22349 | + struct task_struct *p = current; | |
22350 | + | |
22351 | + if (in_atomic() || irqs_disabled()) { | |
22352 | +#ifdef CONFIG_SCHED_DEBUG | |
22353 | + p->migrate_disable_atomic++; | |
22354 | +#endif | |
22355 | + return; | |
22356 | + } | |
22357 | + | |
22358 | +#ifdef CONFIG_SCHED_DEBUG | |
22359 | + if (unlikely(p->migrate_disable_atomic)) { | |
22360 | + tracing_off(); | |
22361 | + WARN_ON_ONCE(1); | |
22362 | + } | |
22363 | +#endif | |
22364 | + | |
22365 | + if (p->migrate_disable) { | |
22366 | + p->migrate_disable++; | |
22367 | + return; | |
22368 | + } | |
22369 | + | |
22370 | + preempt_disable(); | |
22371 | + preempt_lazy_disable(); | |
22372 | + pin_current_cpu(); | |
22373 | + p->migrate_disable = 1; | |
22374 | + preempt_enable(); | |
22375 | +} | |
22376 | +EXPORT_SYMBOL(migrate_disable); | |
22377 | + | |
22378 | +void migrate_enable(void) | |
22379 | +{ | |
22380 | + struct task_struct *p = current; | |
22381 | + | |
22382 | + if (in_atomic() || irqs_disabled()) { | |
22383 | +#ifdef CONFIG_SCHED_DEBUG | |
22384 | + p->migrate_disable_atomic--; | |
22385 | +#endif | |
22386 | + return; | |
22387 | + } | |
22388 | + | |
22389 | +#ifdef CONFIG_SCHED_DEBUG | |
22390 | + if (unlikely(p->migrate_disable_atomic)) { | |
22391 | + tracing_off(); | |
22392 | + WARN_ON_ONCE(1); | |
22393 | + } | |
22394 | +#endif | |
22395 | + WARN_ON_ONCE(p->migrate_disable <= 0); | |
22396 | + | |
22397 | + if (p->migrate_disable > 1) { | |
22398 | + p->migrate_disable--; | |
22399 | + return; | |
22400 | + } | |
22401 | + | |
22402 | + preempt_disable(); | |
22403 | + /* | |
22404 | + * Clearing migrate_disable causes tsk_cpus_allowed to | |
22405 | + * show the tasks original cpu affinity. | |
22406 | + */ | |
22407 | + p->migrate_disable = 0; | |
22408 | + | |
22409 | + unpin_current_cpu(); | |
22410 | + preempt_enable(); | |
22411 | + preempt_lazy_enable(); | |
22412 | +} | |
22413 | +EXPORT_SYMBOL(migrate_enable); | |
22414 | +#endif | |
22415 | + | |
22416 | /* | |
22417 | * Pick up the highest-prio task: | |
22418 | */ | |
22419 | @@ -3172,19 +3362,6 @@ static void __sched notrace __schedule(bool preempt) | |
22420 | } else { | |
22421 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | |
22422 | prev->on_rq = 0; | |
22423 | - | |
22424 | - /* | |
22425 | - * If a worker went to sleep, notify and ask workqueue | |
22426 | - * whether it wants to wake up a task to maintain | |
22427 | - * concurrency. | |
22428 | - */ | |
22429 | - if (prev->flags & PF_WQ_WORKER) { | |
22430 | - struct task_struct *to_wakeup; | |
22431 | - | |
22432 | - to_wakeup = wq_worker_sleeping(prev, cpu); | |
22433 | - if (to_wakeup) | |
22434 | - try_to_wake_up_local(to_wakeup); | |
22435 | - } | |
22436 | } | |
22437 | switch_count = &prev->nvcsw; | |
22438 | } | |
22439 | @@ -3194,6 +3371,7 @@ static void __sched notrace __schedule(bool preempt) | |
22440 | ||
22441 | next = pick_next_task(rq, prev); | |
22442 | clear_tsk_need_resched(prev); | |
22443 | + clear_tsk_need_resched_lazy(prev); | |
22444 | clear_preempt_need_resched(); | |
22445 | rq->clock_skip_update = 0; | |
22446 | ||
22447 | @@ -3215,9 +3393,20 @@ static void __sched notrace __schedule(bool preempt) | |
22448 | ||
22449 | static inline void sched_submit_work(struct task_struct *tsk) | |
22450 | { | |
22451 | - if (!tsk->state || tsk_is_pi_blocked(tsk)) | |
22452 | + if (!tsk->state) | |
22453 | return; | |
22454 | /* | |
22455 | + * If a worker went to sleep, notify and ask workqueue whether | |
22456 | + * it wants to wake up a task to maintain concurrency. | |
22457 | + */ | |
22458 | + if (tsk->flags & PF_WQ_WORKER) | |
22459 | + wq_worker_sleeping(tsk); | |
22460 | + | |
22461 | + | |
22462 | + if (tsk_is_pi_blocked(tsk)) | |
22463 | + return; | |
22464 | + | |
22465 | + /* | |
22466 | * If we are going to sleep and we have plugged IO queued, | |
22467 | * make sure to submit it to avoid deadlocks. | |
22468 | */ | |
22469 | @@ -3225,6 +3414,12 @@ static inline void sched_submit_work(struct task_struct *tsk) | |
22470 | blk_schedule_flush_plug(tsk); | |
22471 | } | |
22472 | ||
22473 | +static void sched_update_worker(struct task_struct *tsk) | |
22474 | +{ | |
22475 | + if (tsk->flags & PF_WQ_WORKER) | |
22476 | + wq_worker_running(tsk); | |
22477 | +} | |
22478 | + | |
22479 | asmlinkage __visible void __sched schedule(void) | |
22480 | { | |
22481 | struct task_struct *tsk = current; | |
22482 | @@ -3235,6 +3430,7 @@ asmlinkage __visible void __sched schedule(void) | |
22483 | __schedule(false); | |
22484 | sched_preempt_enable_no_resched(); | |
22485 | } while (need_resched()); | |
22486 | + sched_update_worker(tsk); | |
22487 | } | |
22488 | EXPORT_SYMBOL(schedule); | |
22489 | ||
22490 | @@ -3283,6 +3479,30 @@ static void __sched notrace preempt_schedule_common(void) | |
22491 | } while (need_resched()); | |
22492 | } | |
22493 | ||
22494 | +#ifdef CONFIG_PREEMPT_LAZY | |
22495 | +/* | |
22496 | + * If TIF_NEED_RESCHED is then we allow to be scheduled away since this is | |
22497 | + * set by a RT task. Oterwise we try to avoid beeing scheduled out as long as | |
22498 | + * preempt_lazy_count counter >0. | |
22499 | + */ | |
22500 | +static __always_inline int preemptible_lazy(void) | |
22501 | +{ | |
22502 | + if (test_thread_flag(TIF_NEED_RESCHED)) | |
22503 | + return 1; | |
22504 | + if (current_thread_info()->preempt_lazy_count) | |
22505 | + return 0; | |
22506 | + return 1; | |
22507 | +} | |
22508 | + | |
22509 | +#else | |
22510 | + | |
22511 | +static inline int preemptible_lazy(void) | |
22512 | +{ | |
22513 | + return 1; | |
22514 | +} | |
22515 | + | |
22516 | +#endif | |
22517 | + | |
22518 | #ifdef CONFIG_PREEMPT | |
22519 | /* | |
22520 | * this is the entry point to schedule() from in-kernel preemption | |
22521 | @@ -3297,6 +3517,8 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |
22522 | */ | |
22523 | if (likely(!preemptible())) | |
22524 | return; | |
22525 | + if (!preemptible_lazy()) | |
22526 | + return; | |
22527 | ||
22528 | preempt_schedule_common(); | |
22529 | } | |
22530 | @@ -3323,6 +3545,8 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) | |
22531 | ||
22532 | if (likely(!preemptible())) | |
22533 | return; | |
22534 | + if (!preemptible_lazy()) | |
22535 | + return; | |
22536 | ||
22537 | do { | |
22538 | preempt_disable_notrace(); | |
22539 | @@ -3332,7 +3556,16 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) | |
22540 | * an infinite recursion. | |
22541 | */ | |
22542 | prev_ctx = exception_enter(); | |
22543 | + /* | |
22544 | + * The add/subtract must not be traced by the function | |
22545 | + * tracer. But we still want to account for the | |
22546 | + * preempt off latency tracer. Since the _notrace versions | |
22547 | + * of add/subtract skip the accounting for latency tracer | |
22548 | + * we must force it manually. | |
22549 | + */ | |
22550 | + start_critical_timings(); | |
22551 | __schedule(true); | |
22552 | + stop_critical_timings(); | |
22553 | exception_exit(prev_ctx); | |
22554 | ||
22555 | preempt_enable_no_resched_notrace(); | |
22556 | @@ -4676,6 +4909,7 @@ int __cond_resched_lock(spinlock_t *lock) | |
22557 | } | |
22558 | EXPORT_SYMBOL(__cond_resched_lock); | |
22559 | ||
22560 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
22561 | int __sched __cond_resched_softirq(void) | |
22562 | { | |
22563 | BUG_ON(!in_softirq()); | |
22564 | @@ -4689,6 +4923,7 @@ int __sched __cond_resched_softirq(void) | |
22565 | return 0; | |
22566 | } | |
22567 | EXPORT_SYMBOL(__cond_resched_softirq); | |
22568 | +#endif | |
22569 | ||
22570 | /** | |
22571 | * yield - yield the current processor to other threads. | |
22572 | @@ -5055,7 +5290,9 @@ void init_idle(struct task_struct *idle, int cpu) | |
22573 | ||
22574 | /* Set the preempt count _outside_ the spinlocks! */ | |
22575 | init_idle_preempt_count(idle, cpu); | |
22576 | - | |
22577 | +#ifdef CONFIG_HAVE_PREEMPT_LAZY | |
22578 | + task_thread_info(idle)->preempt_lazy_count = 0; | |
22579 | +#endif | |
22580 | /* | |
22581 | * The idle tasks have their own, simple scheduling class: | |
22582 | */ | |
22583 | @@ -5196,6 +5433,8 @@ void sched_setnuma(struct task_struct *p, int nid) | |
22584 | #endif /* CONFIG_NUMA_BALANCING */ | |
22585 | ||
22586 | #ifdef CONFIG_HOTPLUG_CPU | |
22587 | +static DEFINE_PER_CPU(struct mm_struct *, idle_last_mm); | |
22588 | + | |
22589 | /* | |
22590 | * Ensures that the idle task is using init_mm right before its cpu goes | |
22591 | * offline. | |
22592 | @@ -5210,7 +5449,11 @@ void idle_task_exit(void) | |
22593 | switch_mm(mm, &init_mm, current); | |
22594 | finish_arch_post_lock_switch(); | |
22595 | } | |
22596 | - mmdrop(mm); | |
22597 | + /* | |
22598 | + * Defer the cleanup to an alive cpu. On RT we can neither | |
22599 | + * call mmdrop() nor mmdrop_delayed() from here. | |
22600 | + */ | |
22601 | + per_cpu(idle_last_mm, smp_processor_id()) = mm; | |
22602 | } | |
22603 | ||
22604 | /* | |
22605 | @@ -5583,6 +5826,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |
22606 | ||
22607 | case CPU_DEAD: | |
22608 | calc_load_migrate(rq); | |
22609 | + if (per_cpu(idle_last_mm, cpu)) { | |
22610 | + mmdrop(per_cpu(idle_last_mm, cpu)); | |
22611 | + per_cpu(idle_last_mm, cpu) = NULL; | |
22612 | + } | |
22613 | break; | |
22614 | #endif | |
22615 | } | |
22616 | @@ -7566,7 +7813,7 @@ void __init sched_init(void) | |
22617 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP | |
22618 | static inline int preempt_count_equals(int preempt_offset) | |
22619 | { | |
22620 | - int nested = preempt_count() + rcu_preempt_depth(); | |
22621 | + int nested = preempt_count() + sched_rcu_preempt_depth(); | |
22622 | ||
22623 | return (nested == preempt_offset); | |
22624 | } | |
22625 | diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c | |
22626 | index 5a75b08cfd85..5be58820465c 100644 | |
22627 | --- a/kernel/sched/cpudeadline.c | |
22628 | +++ b/kernel/sched/cpudeadline.c | |
22629 | @@ -103,10 +103,10 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |
22630 | const struct sched_dl_entity *dl_se = &p->dl; | |
22631 | ||
22632 | if (later_mask && | |
22633 | - cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) { | |
22634 | + cpumask_and(later_mask, cp->free_cpus, tsk_cpus_allowed(p))) { | |
22635 | best_cpu = cpumask_any(later_mask); | |
22636 | goto out; | |
22637 | - } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) && | |
22638 | + } else if (cpumask_test_cpu(cpudl_maximum(cp), tsk_cpus_allowed(p)) && | |
22639 | dl_time_before(dl_se->deadline, cp->elements[0].dl)) { | |
22640 | best_cpu = cpudl_maximum(cp); | |
22641 | if (later_mask) | |
22642 | diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c | |
22643 | index 981fcd7dc394..11e9705bf937 100644 | |
22644 | --- a/kernel/sched/cpupri.c | |
22645 | +++ b/kernel/sched/cpupri.c | |
22646 | @@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |
22647 | if (skip) | |
22648 | continue; | |
22649 | ||
22650 | - if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | |
22651 | + if (cpumask_any_and(tsk_cpus_allowed(p), vec->mask) >= nr_cpu_ids) | |
22652 | continue; | |
22653 | ||
22654 | if (lowest_mask) { | |
22655 | - cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | |
22656 | + cpumask_and(lowest_mask, tsk_cpus_allowed(p), vec->mask); | |
22657 | ||
22658 | /* | |
22659 | * We have to ensure that we have at least one bit | |
22660 | diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c | |
22661 | index a1aecbedf5b1..558b98af241d 100644 | |
22662 | --- a/kernel/sched/cputime.c | |
22663 | +++ b/kernel/sched/cputime.c | |
22664 | @@ -685,7 +685,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk) | |
22665 | { | |
22666 | unsigned long long delta = vtime_delta(tsk); | |
22667 | ||
22668 | - WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); | |
22669 | + WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); | |
22670 | tsk->vtime_snap += delta; | |
22671 | ||
22672 | /* CHECKME: always safe to convert nsecs to cputime? */ | |
22673 | @@ -701,37 +701,37 @@ static void __vtime_account_system(struct task_struct *tsk) | |
22674 | ||
22675 | void vtime_account_system(struct task_struct *tsk) | |
22676 | { | |
22677 | - write_seqlock(&tsk->vtime_seqlock); | |
22678 | + write_seqcount_begin(&tsk->vtime_seqcount); | |
22679 | __vtime_account_system(tsk); | |
22680 | - write_sequnlock(&tsk->vtime_seqlock); | |
22681 | + write_seqcount_end(&tsk->vtime_seqcount); | |
22682 | } | |
22683 | ||
22684 | void vtime_gen_account_irq_exit(struct task_struct *tsk) | |
22685 | { | |
22686 | - write_seqlock(&tsk->vtime_seqlock); | |
22687 | + write_seqcount_begin(&tsk->vtime_seqcount); | |
22688 | __vtime_account_system(tsk); | |
22689 | if (context_tracking_in_user()) | |
22690 | tsk->vtime_snap_whence = VTIME_USER; | |
22691 | - write_sequnlock(&tsk->vtime_seqlock); | |
22692 | + write_seqcount_end(&tsk->vtime_seqcount); | |
22693 | } | |
22694 | ||
22695 | void vtime_account_user(struct task_struct *tsk) | |
22696 | { | |
22697 | cputime_t delta_cpu; | |
22698 | ||
22699 | - write_seqlock(&tsk->vtime_seqlock); | |
22700 | + write_seqcount_begin(&tsk->vtime_seqcount); | |
22701 | delta_cpu = get_vtime_delta(tsk); | |
22702 | tsk->vtime_snap_whence = VTIME_SYS; | |
22703 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | |
22704 | - write_sequnlock(&tsk->vtime_seqlock); | |
22705 | + write_seqcount_end(&tsk->vtime_seqcount); | |
22706 | } | |
22707 | ||
22708 | void vtime_user_enter(struct task_struct *tsk) | |
22709 | { | |
22710 | - write_seqlock(&tsk->vtime_seqlock); | |
22711 | + write_seqcount_begin(&tsk->vtime_seqcount); | |
22712 | __vtime_account_system(tsk); | |
22713 | tsk->vtime_snap_whence = VTIME_USER; | |
22714 | - write_sequnlock(&tsk->vtime_seqlock); | |
22715 | + write_seqcount_end(&tsk->vtime_seqcount); | |
22716 | } | |
22717 | ||
22718 | void vtime_guest_enter(struct task_struct *tsk) | |
22719 | @@ -743,19 +743,19 @@ void vtime_guest_enter(struct task_struct *tsk) | |
22720 | * synchronization against the reader (task_gtime()) | |
22721 | * that can thus safely catch up with a tickless delta. | |
22722 | */ | |
22723 | - write_seqlock(&tsk->vtime_seqlock); | |
22724 | + write_seqcount_begin(&tsk->vtime_seqcount); | |
22725 | __vtime_account_system(tsk); | |
22726 | current->flags |= PF_VCPU; | |
22727 | - write_sequnlock(&tsk->vtime_seqlock); | |
22728 | + write_seqcount_end(&tsk->vtime_seqcount); | |
22729 | } | |
22730 | EXPORT_SYMBOL_GPL(vtime_guest_enter); | |
22731 | ||
22732 | void vtime_guest_exit(struct task_struct *tsk) | |
22733 | { | |
22734 | - write_seqlock(&tsk->vtime_seqlock); | |
22735 | + write_seqcount_begin(&tsk->vtime_seqcount); | |
22736 | __vtime_account_system(tsk); | |
22737 | current->flags &= ~PF_VCPU; | |
22738 | - write_sequnlock(&tsk->vtime_seqlock); | |
22739 | + write_seqcount_end(&tsk->vtime_seqcount); | |
22740 | } | |
22741 | EXPORT_SYMBOL_GPL(vtime_guest_exit); | |
22742 | ||
22743 | @@ -768,24 +768,26 @@ void vtime_account_idle(struct task_struct *tsk) | |
22744 | ||
22745 | void arch_vtime_task_switch(struct task_struct *prev) | |
22746 | { | |
22747 | - write_seqlock(&prev->vtime_seqlock); | |
22748 | - prev->vtime_snap_whence = VTIME_SLEEPING; | |
22749 | - write_sequnlock(&prev->vtime_seqlock); | |
22750 | + write_seqcount_begin(&prev->vtime_seqcount); | |
22751 | + prev->vtime_snap_whence = VTIME_INACTIVE; | |
22752 | + write_seqcount_end(&prev->vtime_seqcount); | |
22753 | ||
22754 | - write_seqlock(¤t->vtime_seqlock); | |
22755 | + write_seqcount_begin(¤t->vtime_seqcount); | |
22756 | current->vtime_snap_whence = VTIME_SYS; | |
22757 | current->vtime_snap = sched_clock_cpu(smp_processor_id()); | |
22758 | - write_sequnlock(¤t->vtime_seqlock); | |
22759 | + write_seqcount_end(¤t->vtime_seqcount); | |
22760 | } | |
22761 | ||
22762 | void vtime_init_idle(struct task_struct *t, int cpu) | |
22763 | { | |
22764 | unsigned long flags; | |
22765 | ||
22766 | - write_seqlock_irqsave(&t->vtime_seqlock, flags); | |
22767 | + local_irq_save(flags); | |
22768 | + write_seqcount_begin(&t->vtime_seqcount); | |
22769 | t->vtime_snap_whence = VTIME_SYS; | |
22770 | t->vtime_snap = sched_clock_cpu(cpu); | |
22771 | - write_sequnlock_irqrestore(&t->vtime_seqlock, flags); | |
22772 | + write_seqcount_end(&t->vtime_seqcount); | |
22773 | + local_irq_restore(flags); | |
22774 | } | |
22775 | ||
22776 | cputime_t task_gtime(struct task_struct *t) | |
22777 | @@ -797,13 +799,13 @@ cputime_t task_gtime(struct task_struct *t) | |
22778 | return t->gtime; | |
22779 | ||
22780 | do { | |
22781 | - seq = read_seqbegin(&t->vtime_seqlock); | |
22782 | + seq = read_seqcount_begin(&t->vtime_seqcount); | |
22783 | ||
22784 | gtime = t->gtime; | |
22785 | if (t->flags & PF_VCPU) | |
22786 | gtime += vtime_delta(t); | |
22787 | ||
22788 | - } while (read_seqretry(&t->vtime_seqlock, seq)); | |
22789 | + } while (read_seqcount_retry(&t->vtime_seqcount, seq)); | |
22790 | ||
22791 | return gtime; | |
22792 | } | |
22793 | @@ -826,7 +828,7 @@ fetch_task_cputime(struct task_struct *t, | |
22794 | *udelta = 0; | |
22795 | *sdelta = 0; | |
22796 | ||
22797 | - seq = read_seqbegin(&t->vtime_seqlock); | |
22798 | + seq = read_seqcount_begin(&t->vtime_seqcount); | |
22799 | ||
22800 | if (u_dst) | |
22801 | *u_dst = *u_src; | |
22802 | @@ -834,7 +836,7 @@ fetch_task_cputime(struct task_struct *t, | |
22803 | *s_dst = *s_src; | |
22804 | ||
22805 | /* Task is sleeping, nothing to add */ | |
22806 | - if (t->vtime_snap_whence == VTIME_SLEEPING || | |
22807 | + if (t->vtime_snap_whence == VTIME_INACTIVE || | |
22808 | is_idle_task(t)) | |
22809 | continue; | |
22810 | ||
22811 | @@ -850,7 +852,7 @@ fetch_task_cputime(struct task_struct *t, | |
22812 | if (t->vtime_snap_whence == VTIME_SYS) | |
22813 | *sdelta = delta; | |
22814 | } | |
22815 | - } while (read_seqretry(&t->vtime_seqlock, seq)); | |
22816 | + } while (read_seqcount_retry(&t->vtime_seqcount, seq)); | |
22817 | } | |
22818 | ||
22819 | ||
22820 | diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c | |
22821 | index 8b0a15e285f9..7a72e69fcf65 100644 | |
22822 | --- a/kernel/sched/deadline.c | |
22823 | +++ b/kernel/sched/deadline.c | |
22824 | @@ -134,7 +134,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
22825 | { | |
22826 | struct task_struct *p = dl_task_of(dl_se); | |
22827 | ||
22828 | - if (p->nr_cpus_allowed > 1) | |
22829 | + if (tsk_nr_cpus_allowed(p) > 1) | |
22830 | dl_rq->dl_nr_migratory++; | |
22831 | ||
22832 | update_dl_migration(dl_rq); | |
22833 | @@ -144,7 +144,7 @@ static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) | |
22834 | { | |
22835 | struct task_struct *p = dl_task_of(dl_se); | |
22836 | ||
22837 | - if (p->nr_cpus_allowed > 1) | |
22838 | + if (tsk_nr_cpus_allowed(p) > 1) | |
22839 | dl_rq->dl_nr_migratory--; | |
22840 | ||
22841 | update_dl_migration(dl_rq); | |
22842 | @@ -697,6 +697,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |
22843 | ||
22844 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
22845 | timer->function = dl_task_timer; | |
22846 | + timer->irqsafe = 1; | |
22847 | } | |
22848 | ||
22849 | static | |
22850 | @@ -989,7 +990,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |
22851 | ||
22852 | enqueue_dl_entity(&p->dl, pi_se, flags); | |
22853 | ||
22854 | - if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | |
22855 | + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) | |
22856 | enqueue_pushable_dl_task(rq, p); | |
22857 | } | |
22858 | ||
22859 | @@ -1067,9 +1068,9 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | |
22860 | * try to make it stay here, it might be important. | |
22861 | */ | |
22862 | if (unlikely(dl_task(curr)) && | |
22863 | - (curr->nr_cpus_allowed < 2 || | |
22864 | + (tsk_nr_cpus_allowed(curr) < 2 || | |
22865 | !dl_entity_preempt(&p->dl, &curr->dl)) && | |
22866 | - (p->nr_cpus_allowed > 1)) { | |
22867 | + (tsk_nr_cpus_allowed(p) > 1)) { | |
22868 | int target = find_later_rq(p); | |
22869 | ||
22870 | if (target != -1 && | |
22871 | @@ -1090,7 +1091,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |
22872 | * Current can't be migrated, useless to reschedule, | |
22873 | * let's hope p can move out. | |
22874 | */ | |
22875 | - if (rq->curr->nr_cpus_allowed == 1 || | |
22876 | + if (tsk_nr_cpus_allowed(rq->curr) == 1 || | |
22877 | cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) | |
22878 | return; | |
22879 | ||
22880 | @@ -1098,7 +1099,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |
22881 | * p is migratable, so let's not schedule it and | |
22882 | * see if it is pushed or pulled somewhere else. | |
22883 | */ | |
22884 | - if (p->nr_cpus_allowed != 1 && | |
22885 | + if (tsk_nr_cpus_allowed(p) != 1 && | |
22886 | cpudl_find(&rq->rd->cpudl, p, NULL) != -1) | |
22887 | return; | |
22888 | ||
22889 | @@ -1212,7 +1213,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p) | |
22890 | { | |
22891 | update_curr_dl(rq); | |
22892 | ||
22893 | - if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) | |
22894 | + if (on_dl_rq(&p->dl) && tsk_nr_cpus_allowed(p) > 1) | |
22895 | enqueue_pushable_dl_task(rq, p); | |
22896 | } | |
22897 | ||
22898 | @@ -1335,7 +1336,7 @@ static int find_later_rq(struct task_struct *task) | |
22899 | if (unlikely(!later_mask)) | |
22900 | return -1; | |
22901 | ||
22902 | - if (task->nr_cpus_allowed == 1) | |
22903 | + if (tsk_nr_cpus_allowed(task) == 1) | |
22904 | return -1; | |
22905 | ||
22906 | /* | |
22907 | @@ -1441,7 +1442,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) | |
22908 | if (double_lock_balance(rq, later_rq)) { | |
22909 | if (unlikely(task_rq(task) != rq || | |
22910 | !cpumask_test_cpu(later_rq->cpu, | |
22911 | - &task->cpus_allowed) || | |
22912 | + tsk_cpus_allowed(task)) || | |
22913 | task_running(rq, task) || | |
22914 | !task_on_rq_queued(task))) { | |
22915 | double_unlock_balance(rq, later_rq); | |
22916 | @@ -1480,7 +1481,7 @@ static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) | |
22917 | ||
22918 | BUG_ON(rq->cpu != task_cpu(p)); | |
22919 | BUG_ON(task_current(rq, p)); | |
22920 | - BUG_ON(p->nr_cpus_allowed <= 1); | |
22921 | + BUG_ON(tsk_nr_cpus_allowed(p) <= 1); | |
22922 | ||
22923 | BUG_ON(!task_on_rq_queued(p)); | |
22924 | BUG_ON(!dl_task(p)); | |
22925 | @@ -1519,7 +1520,7 @@ retry: | |
22926 | */ | |
22927 | if (dl_task(rq->curr) && | |
22928 | dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && | |
22929 | - rq->curr->nr_cpus_allowed > 1) { | |
22930 | + tsk_nr_cpus_allowed(rq->curr) > 1) { | |
22931 | resched_curr(rq); | |
22932 | return 0; | |
22933 | } | |
22934 | @@ -1666,9 +1667,9 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) | |
22935 | { | |
22936 | if (!task_running(rq, p) && | |
22937 | !test_tsk_need_resched(rq->curr) && | |
22938 | - p->nr_cpus_allowed > 1 && | |
22939 | + tsk_nr_cpus_allowed(p) > 1 && | |
22940 | dl_task(rq->curr) && | |
22941 | - (rq->curr->nr_cpus_allowed < 2 || | |
22942 | + (tsk_nr_cpus_allowed(rq->curr) < 2 || | |
22943 | !dl_entity_preempt(&p->dl, &rq->curr->dl))) { | |
22944 | push_dl_tasks(rq); | |
22945 | } | |
22946 | @@ -1769,7 +1770,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) | |
22947 | { | |
22948 | if (task_on_rq_queued(p) && rq->curr != p) { | |
22949 | #ifdef CONFIG_SMP | |
22950 | - if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) | |
22951 | + if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded) | |
22952 | queue_push_tasks(rq); | |
22953 | #else | |
22954 | if (dl_task(rq->curr)) | |
22955 | diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c | |
22956 | index 641511771ae6..a2d69b883623 100644 | |
22957 | --- a/kernel/sched/debug.c | |
22958 | +++ b/kernel/sched/debug.c | |
22959 | @@ -251,6 +251,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | |
22960 | P(rt_throttled); | |
22961 | PN(rt_time); | |
22962 | PN(rt_runtime); | |
22963 | +#ifdef CONFIG_SMP | |
22964 | + P(rt_nr_migratory); | |
22965 | +#endif | |
22966 | ||
22967 | #undef PN | |
22968 | #undef P | |
22969 | @@ -635,6 +638,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |
22970 | #endif | |
22971 | P(policy); | |
22972 | P(prio); | |
22973 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
22974 | + P(migrate_disable); | |
22975 | +#endif | |
22976 | + P(nr_cpus_allowed); | |
22977 | #undef PN | |
22978 | #undef __PN | |
22979 | #undef P | |
22980 | diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c | |
22981 | index 8f258f437ac2..cf0a1adba6c6 100644 | |
22982 | --- a/kernel/sched/fair.c | |
22983 | +++ b/kernel/sched/fair.c | |
22984 | @@ -3166,7 +3166,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |
22985 | ideal_runtime = sched_slice(cfs_rq, curr); | |
22986 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | |
22987 | if (delta_exec > ideal_runtime) { | |
22988 | - resched_curr(rq_of(cfs_rq)); | |
22989 | + resched_curr_lazy(rq_of(cfs_rq)); | |
22990 | /* | |
22991 | * The current task ran long enough, ensure it doesn't get | |
22992 | * re-elected due to buddy favours. | |
22993 | @@ -3190,7 +3190,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |
22994 | return; | |
22995 | ||
22996 | if (delta > ideal_runtime) | |
22997 | - resched_curr(rq_of(cfs_rq)); | |
22998 | + resched_curr_lazy(rq_of(cfs_rq)); | |
22999 | } | |
23000 | ||
23001 | static void | |
23002 | @@ -3330,7 +3330,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |
23003 | * validating it and just reschedule. | |
23004 | */ | |
23005 | if (queued) { | |
23006 | - resched_curr(rq_of(cfs_rq)); | |
23007 | + resched_curr_lazy(rq_of(cfs_rq)); | |
23008 | return; | |
23009 | } | |
23010 | /* | |
23011 | @@ -3512,7 +3512,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) | |
23012 | * hierarchy can be throttled | |
23013 | */ | |
23014 | if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) | |
23015 | - resched_curr(rq_of(cfs_rq)); | |
23016 | + resched_curr_lazy(rq_of(cfs_rq)); | |
23017 | } | |
23018 | ||
23019 | static __always_inline | |
23020 | @@ -4124,7 +4124,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) | |
23021 | ||
23022 | if (delta < 0) { | |
23023 | if (rq->curr == p) | |
23024 | - resched_curr(rq); | |
23025 | + resched_curr_lazy(rq); | |
23026 | return; | |
23027 | } | |
23028 | hrtick_start(rq, delta); | |
23029 | @@ -5213,7 +5213,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |
23030 | return; | |
23031 | ||
23032 | preempt: | |
23033 | - resched_curr(rq); | |
23034 | + resched_curr_lazy(rq); | |
23035 | /* | |
23036 | * Only set the backward buddy when the current task is still | |
23037 | * on the rq. This can happen when a wakeup gets interleaved | |
23038 | @@ -7964,7 +7964,7 @@ static void task_fork_fair(struct task_struct *p) | |
23039 | * 'current' within the tree based on its new key value. | |
23040 | */ | |
23041 | swap(curr->vruntime, se->vruntime); | |
23042 | - resched_curr(rq); | |
23043 | + resched_curr_lazy(rq); | |
23044 | } | |
23045 | ||
23046 | se->vruntime -= cfs_rq->min_vruntime; | |
23047 | @@ -7989,7 +7989,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) | |
23048 | */ | |
23049 | if (rq->curr == p) { | |
23050 | if (p->prio > oldprio) | |
23051 | - resched_curr(rq); | |
23052 | + resched_curr_lazy(rq); | |
23053 | } else | |
23054 | check_preempt_curr(rq, p, 0); | |
23055 | } | |
23056 | diff --git a/kernel/sched/features.h b/kernel/sched/features.h | |
23057 | index 69631fa46c2f..6d28fcd08872 100644 | |
23058 | --- a/kernel/sched/features.h | |
23059 | +++ b/kernel/sched/features.h | |
23060 | @@ -45,11 +45,19 @@ SCHED_FEAT(LB_BIAS, true) | |
23061 | */ | |
23062 | SCHED_FEAT(NONTASK_CAPACITY, true) | |
23063 | ||
23064 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23065 | +SCHED_FEAT(TTWU_QUEUE, false) | |
23066 | +# ifdef CONFIG_PREEMPT_LAZY | |
23067 | +SCHED_FEAT(PREEMPT_LAZY, true) | |
23068 | +# endif | |
23069 | +#else | |
23070 | + | |
23071 | /* | |
23072 | * Queue remote wakeups on the target CPU and process them | |
23073 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | |
23074 | */ | |
23075 | SCHED_FEAT(TTWU_QUEUE, true) | |
23076 | +#endif | |
23077 | ||
23078 | #ifdef HAVE_RT_PUSH_IPI | |
23079 | /* | |
23080 | diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c | |
23081 | index 8ec86abe0ea1..8cf360d309ec 100644 | |
23082 | --- a/kernel/sched/rt.c | |
23083 | +++ b/kernel/sched/rt.c | |
23084 | @@ -47,6 +47,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |
23085 | ||
23086 | hrtimer_init(&rt_b->rt_period_timer, | |
23087 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
23088 | + rt_b->rt_period_timer.irqsafe = 1; | |
23089 | rt_b->rt_period_timer.function = sched_rt_period_timer; | |
23090 | } | |
23091 | ||
23092 | @@ -93,6 +94,7 @@ void init_rt_rq(struct rt_rq *rt_rq) | |
23093 | rt_rq->push_cpu = nr_cpu_ids; | |
23094 | raw_spin_lock_init(&rt_rq->push_lock); | |
23095 | init_irq_work(&rt_rq->push_work, push_irq_work_func); | |
23096 | + rt_rq->push_work.flags |= IRQ_WORK_HARD_IRQ; | |
23097 | #endif | |
23098 | #endif /* CONFIG_SMP */ | |
23099 | /* We start is dequeued state, because no RT tasks are queued */ | |
23100 | @@ -326,7 +328,7 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |
23101 | rt_rq = &rq_of_rt_rq(rt_rq)->rt; | |
23102 | ||
23103 | rt_rq->rt_nr_total++; | |
23104 | - if (p->nr_cpus_allowed > 1) | |
23105 | + if (tsk_nr_cpus_allowed(p) > 1) | |
23106 | rt_rq->rt_nr_migratory++; | |
23107 | ||
23108 | update_rt_migration(rt_rq); | |
23109 | @@ -343,7 +345,7 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |
23110 | rt_rq = &rq_of_rt_rq(rt_rq)->rt; | |
23111 | ||
23112 | rt_rq->rt_nr_total--; | |
23113 | - if (p->nr_cpus_allowed > 1) | |
23114 | + if (tsk_nr_cpus_allowed(p) > 1) | |
23115 | rt_rq->rt_nr_migratory--; | |
23116 | ||
23117 | update_rt_migration(rt_rq); | |
23118 | @@ -1262,7 +1264,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) | |
23119 | ||
23120 | enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); | |
23121 | ||
23122 | - if (!task_current(rq, p) && p->nr_cpus_allowed > 1) | |
23123 | + if (!task_current(rq, p) && tsk_nr_cpus_allowed(p) > 1) | |
23124 | enqueue_pushable_task(rq, p); | |
23125 | } | |
23126 | ||
23127 | @@ -1351,7 +1353,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) | |
23128 | * will have to sort it out. | |
23129 | */ | |
23130 | if (curr && unlikely(rt_task(curr)) && | |
23131 | - (curr->nr_cpus_allowed < 2 || | |
23132 | + (tsk_nr_cpus_allowed(curr) < 2 || | |
23133 | curr->prio <= p->prio)) { | |
23134 | int target = find_lowest_rq(p); | |
23135 | ||
23136 | @@ -1375,7 +1377,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |
23137 | * Current can't be migrated, useless to reschedule, | |
23138 | * let's hope p can move out. | |
23139 | */ | |
23140 | - if (rq->curr->nr_cpus_allowed == 1 || | |
23141 | + if (tsk_nr_cpus_allowed(rq->curr) == 1 || | |
23142 | !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) | |
23143 | return; | |
23144 | ||
23145 | @@ -1383,7 +1385,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |
23146 | * p is migratable, so let's not schedule it and | |
23147 | * see if it is pushed or pulled somewhere else. | |
23148 | */ | |
23149 | - if (p->nr_cpus_allowed != 1 | |
23150 | + if (tsk_nr_cpus_allowed(p) != 1 | |
23151 | && cpupri_find(&rq->rd->cpupri, p, NULL)) | |
23152 | return; | |
23153 | ||
23154 | @@ -1517,7 +1519,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |
23155 | * The previous task needs to be made eligible for pushing | |
23156 | * if it is still active | |
23157 | */ | |
23158 | - if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) | |
23159 | + if (on_rt_rq(&p->rt) && tsk_nr_cpus_allowed(p) > 1) | |
23160 | enqueue_pushable_task(rq, p); | |
23161 | } | |
23162 | ||
23163 | @@ -1567,7 +1569,7 @@ static int find_lowest_rq(struct task_struct *task) | |
23164 | if (unlikely(!lowest_mask)) | |
23165 | return -1; | |
23166 | ||
23167 | - if (task->nr_cpus_allowed == 1) | |
23168 | + if (tsk_nr_cpus_allowed(task) == 1) | |
23169 | return -1; /* No other targets possible */ | |
23170 | ||
23171 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) | |
23172 | @@ -1699,7 +1701,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) | |
23173 | ||
23174 | BUG_ON(rq->cpu != task_cpu(p)); | |
23175 | BUG_ON(task_current(rq, p)); | |
23176 | - BUG_ON(p->nr_cpus_allowed <= 1); | |
23177 | + BUG_ON(tsk_nr_cpus_allowed(p) <= 1); | |
23178 | ||
23179 | BUG_ON(!task_on_rq_queued(p)); | |
23180 | BUG_ON(!rt_task(p)); | |
23181 | @@ -2059,9 +2061,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) | |
23182 | { | |
23183 | if (!task_running(rq, p) && | |
23184 | !test_tsk_need_resched(rq->curr) && | |
23185 | - p->nr_cpus_allowed > 1 && | |
23186 | + tsk_nr_cpus_allowed(p) > 1 && | |
23187 | (dl_task(rq->curr) || rt_task(rq->curr)) && | |
23188 | - (rq->curr->nr_cpus_allowed < 2 || | |
23189 | + (tsk_nr_cpus_allowed(rq->curr) < 2 || | |
23190 | rq->curr->prio <= p->prio)) | |
23191 | push_rt_tasks(rq); | |
23192 | } | |
23193 | @@ -2134,7 +2136,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |
23194 | */ | |
23195 | if (task_on_rq_queued(p) && rq->curr != p) { | |
23196 | #ifdef CONFIG_SMP | |
23197 | - if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) | |
23198 | + if (tsk_nr_cpus_allowed(p) > 1 && rq->rt.overloaded) | |
23199 | queue_push_tasks(rq); | |
23200 | #else | |
23201 | if (p->prio < rq->curr->prio) | |
23202 | diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h | |
23203 | index 0517abd7dd73..a8a9b156ea15 100644 | |
23204 | --- a/kernel/sched/sched.h | |
23205 | +++ b/kernel/sched/sched.h | |
23206 | @@ -1100,6 +1100,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |
23207 | #define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ | |
23208 | #define WF_FORK 0x02 /* child wakeup after fork */ | |
23209 | #define WF_MIGRATED 0x4 /* internal use, task got migrated */ | |
23210 | +#define WF_LOCK_SLEEPER 0x08 /* wakeup spinlock "sleeper" */ | |
23211 | ||
23212 | /* | |
23213 | * To aid in avoiding the subversion of "niceness" due to uneven distribution | |
23214 | @@ -1299,6 +1300,15 @@ extern void init_sched_fair_class(void); | |
23215 | extern void resched_curr(struct rq *rq); | |
23216 | extern void resched_cpu(int cpu); | |
23217 | ||
23218 | +#ifdef CONFIG_PREEMPT_LAZY | |
23219 | +extern void resched_curr_lazy(struct rq *rq); | |
23220 | +#else | |
23221 | +static inline void resched_curr_lazy(struct rq *rq) | |
23222 | +{ | |
23223 | + resched_curr(rq); | |
23224 | +} | |
23225 | +#endif | |
23226 | + | |
23227 | extern struct rt_bandwidth def_rt_bandwidth; | |
23228 | extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); | |
23229 | ||
23230 | diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c | |
23231 | new file mode 100644 | |
23232 | index 000000000000..205fe36868f9 | |
23233 | --- /dev/null | |
23234 | +++ b/kernel/sched/swait.c | |
23235 | @@ -0,0 +1,143 @@ | |
23236 | +#include <linux/sched.h> | |
23237 | +#include <linux/swait.h> | |
23238 | +#include <linux/suspend.h> | |
23239 | + | |
23240 | +void __init_swait_queue_head(struct swait_queue_head *q, const char *name, | |
23241 | + struct lock_class_key *key) | |
23242 | +{ | |
23243 | + raw_spin_lock_init(&q->lock); | |
23244 | + lockdep_set_class_and_name(&q->lock, key, name); | |
23245 | + INIT_LIST_HEAD(&q->task_list); | |
23246 | +} | |
23247 | +EXPORT_SYMBOL(__init_swait_queue_head); | |
23248 | + | |
23249 | +/* | |
23250 | + * The thing about the wake_up_state() return value; I think we can ignore it. | |
23251 | + * | |
23252 | + * If for some reason it would return 0, that means the previously waiting | |
23253 | + * task is already running, so it will observe condition true (or has already). | |
23254 | + */ | |
23255 | +void swake_up_locked(struct swait_queue_head *q) | |
23256 | +{ | |
23257 | + struct swait_queue *curr; | |
23258 | + | |
23259 | + if (list_empty(&q->task_list)) | |
23260 | + return; | |
23261 | + | |
23262 | + curr = list_first_entry(&q->task_list, typeof(*curr), task_list); | |
23263 | + wake_up_process(curr->task); | |
23264 | + list_del_init(&curr->task_list); | |
23265 | +} | |
23266 | +EXPORT_SYMBOL(swake_up_locked); | |
23267 | + | |
23268 | +void swake_up_all_locked(struct swait_queue_head *q) | |
23269 | +{ | |
23270 | + struct swait_queue *curr; | |
23271 | + int wakes = 0; | |
23272 | + | |
23273 | + while (!list_empty(&q->task_list)) { | |
23274 | + | |
23275 | + curr = list_first_entry(&q->task_list, typeof(*curr), | |
23276 | + task_list); | |
23277 | + wake_up_process(curr->task); | |
23278 | + list_del_init(&curr->task_list); | |
23279 | + wakes++; | |
23280 | + } | |
23281 | + if (pm_in_action) | |
23282 | + return; | |
23283 | + WARN(wakes > 2, "complate_all() with %d waiters\n", wakes); | |
23284 | +} | |
23285 | +EXPORT_SYMBOL(swake_up_all_locked); | |
23286 | + | |
23287 | +void swake_up(struct swait_queue_head *q) | |
23288 | +{ | |
23289 | + unsigned long flags; | |
23290 | + | |
23291 | + if (!swait_active(q)) | |
23292 | + return; | |
23293 | + | |
23294 | + raw_spin_lock_irqsave(&q->lock, flags); | |
23295 | + swake_up_locked(q); | |
23296 | + raw_spin_unlock_irqrestore(&q->lock, flags); | |
23297 | +} | |
23298 | +EXPORT_SYMBOL(swake_up); | |
23299 | + | |
23300 | +/* | |
23301 | + * Does not allow usage from IRQ disabled, since we must be able to | |
23302 | + * release IRQs to guarantee bounded hold time. | |
23303 | + */ | |
23304 | +void swake_up_all(struct swait_queue_head *q) | |
23305 | +{ | |
23306 | + struct swait_queue *curr; | |
23307 | + LIST_HEAD(tmp); | |
23308 | + | |
23309 | + if (!swait_active(q)) | |
23310 | + return; | |
23311 | + | |
23312 | + raw_spin_lock_irq(&q->lock); | |
23313 | + list_splice_init(&q->task_list, &tmp); | |
23314 | + while (!list_empty(&tmp)) { | |
23315 | + curr = list_first_entry(&tmp, typeof(*curr), task_list); | |
23316 | + | |
23317 | + wake_up_state(curr->task, TASK_NORMAL); | |
23318 | + list_del_init(&curr->task_list); | |
23319 | + | |
23320 | + if (list_empty(&tmp)) | |
23321 | + break; | |
23322 | + | |
23323 | + raw_spin_unlock_irq(&q->lock); | |
23324 | + raw_spin_lock_irq(&q->lock); | |
23325 | + } | |
23326 | + raw_spin_unlock_irq(&q->lock); | |
23327 | +} | |
23328 | +EXPORT_SYMBOL(swake_up_all); | |
23329 | + | |
23330 | +void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait) | |
23331 | +{ | |
23332 | + wait->task = current; | |
23333 | + if (list_empty(&wait->task_list)) | |
23334 | + list_add(&wait->task_list, &q->task_list); | |
23335 | +} | |
23336 | + | |
23337 | +void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state) | |
23338 | +{ | |
23339 | + unsigned long flags; | |
23340 | + | |
23341 | + raw_spin_lock_irqsave(&q->lock, flags); | |
23342 | + __prepare_to_swait(q, wait); | |
23343 | + set_current_state(state); | |
23344 | + raw_spin_unlock_irqrestore(&q->lock, flags); | |
23345 | +} | |
23346 | +EXPORT_SYMBOL(prepare_to_swait); | |
23347 | + | |
23348 | +long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state) | |
23349 | +{ | |
23350 | + if (signal_pending_state(state, current)) | |
23351 | + return -ERESTARTSYS; | |
23352 | + | |
23353 | + prepare_to_swait(q, wait, state); | |
23354 | + | |
23355 | + return 0; | |
23356 | +} | |
23357 | +EXPORT_SYMBOL(prepare_to_swait_event); | |
23358 | + | |
23359 | +void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait) | |
23360 | +{ | |
23361 | + __set_current_state(TASK_RUNNING); | |
23362 | + if (!list_empty(&wait->task_list)) | |
23363 | + list_del_init(&wait->task_list); | |
23364 | +} | |
23365 | + | |
23366 | +void finish_swait(struct swait_queue_head *q, struct swait_queue *wait) | |
23367 | +{ | |
23368 | + unsigned long flags; | |
23369 | + | |
23370 | + __set_current_state(TASK_RUNNING); | |
23371 | + | |
23372 | + if (!list_empty_careful(&wait->task_list)) { | |
23373 | + raw_spin_lock_irqsave(&q->lock, flags); | |
23374 | + list_del_init(&wait->task_list); | |
23375 | + raw_spin_unlock_irqrestore(&q->lock, flags); | |
23376 | + } | |
23377 | +} | |
23378 | +EXPORT_SYMBOL(finish_swait); | |
23379 | diff --git a/kernel/sched/swork.c b/kernel/sched/swork.c | |
23380 | new file mode 100644 | |
23381 | index 000000000000..1950f40ca725 | |
23382 | --- /dev/null | |
23383 | +++ b/kernel/sched/swork.c | |
23384 | @@ -0,0 +1,173 @@ | |
23385 | +/* | |
23386 | + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de | |
23387 | + * | |
23388 | + * Provides a framework for enqueuing callbacks from irq context | |
23389 | + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. | |
23390 | + */ | |
23391 | + | |
23392 | +#include <linux/swait.h> | |
23393 | +#include <linux/swork.h> | |
23394 | +#include <linux/kthread.h> | |
23395 | +#include <linux/slab.h> | |
23396 | +#include <linux/spinlock.h> | |
23397 | +#include <linux/export.h> | |
23398 | + | |
23399 | +#define SWORK_EVENT_PENDING (1 << 0) | |
23400 | + | |
23401 | +static DEFINE_MUTEX(worker_mutex); | |
23402 | +static struct sworker *glob_worker; | |
23403 | + | |
23404 | +struct sworker { | |
23405 | + struct list_head events; | |
23406 | + struct swait_queue_head wq; | |
23407 | + | |
23408 | + raw_spinlock_t lock; | |
23409 | + | |
23410 | + struct task_struct *task; | |
23411 | + int refs; | |
23412 | +}; | |
23413 | + | |
23414 | +static bool swork_readable(struct sworker *worker) | |
23415 | +{ | |
23416 | + bool r; | |
23417 | + | |
23418 | + if (kthread_should_stop()) | |
23419 | + return true; | |
23420 | + | |
23421 | + raw_spin_lock_irq(&worker->lock); | |
23422 | + r = !list_empty(&worker->events); | |
23423 | + raw_spin_unlock_irq(&worker->lock); | |
23424 | + | |
23425 | + return r; | |
23426 | +} | |
23427 | + | |
23428 | +static int swork_kthread(void *arg) | |
23429 | +{ | |
23430 | + struct sworker *worker = arg; | |
23431 | + | |
23432 | + for (;;) { | |
23433 | + swait_event_interruptible(worker->wq, | |
23434 | + swork_readable(worker)); | |
23435 | + if (kthread_should_stop()) | |
23436 | + break; | |
23437 | + | |
23438 | + raw_spin_lock_irq(&worker->lock); | |
23439 | + while (!list_empty(&worker->events)) { | |
23440 | + struct swork_event *sev; | |
23441 | + | |
23442 | + sev = list_first_entry(&worker->events, | |
23443 | + struct swork_event, item); | |
23444 | + list_del(&sev->item); | |
23445 | + raw_spin_unlock_irq(&worker->lock); | |
23446 | + | |
23447 | + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, | |
23448 | + &sev->flags)); | |
23449 | + sev->func(sev); | |
23450 | + raw_spin_lock_irq(&worker->lock); | |
23451 | + } | |
23452 | + raw_spin_unlock_irq(&worker->lock); | |
23453 | + } | |
23454 | + return 0; | |
23455 | +} | |
23456 | + | |
23457 | +static struct sworker *swork_create(void) | |
23458 | +{ | |
23459 | + struct sworker *worker; | |
23460 | + | |
23461 | + worker = kzalloc(sizeof(*worker), GFP_KERNEL); | |
23462 | + if (!worker) | |
23463 | + return ERR_PTR(-ENOMEM); | |
23464 | + | |
23465 | + INIT_LIST_HEAD(&worker->events); | |
23466 | + raw_spin_lock_init(&worker->lock); | |
23467 | + init_swait_queue_head(&worker->wq); | |
23468 | + | |
23469 | + worker->task = kthread_run(swork_kthread, worker, "kswork"); | |
23470 | + if (IS_ERR(worker->task)) { | |
23471 | + kfree(worker); | |
23472 | + return ERR_PTR(-ENOMEM); | |
23473 | + } | |
23474 | + | |
23475 | + return worker; | |
23476 | +} | |
23477 | + | |
23478 | +static void swork_destroy(struct sworker *worker) | |
23479 | +{ | |
23480 | + kthread_stop(worker->task); | |
23481 | + | |
23482 | + WARN_ON(!list_empty(&worker->events)); | |
23483 | + kfree(worker); | |
23484 | +} | |
23485 | + | |
23486 | +/** | |
23487 | + * swork_queue - queue swork | |
23488 | + * | |
23489 | + * Returns %false if @work was already on a queue, %true otherwise. | |
23490 | + * | |
23491 | + * The work is queued and processed on a random CPU | |
23492 | + */ | |
23493 | +bool swork_queue(struct swork_event *sev) | |
23494 | +{ | |
23495 | + unsigned long flags; | |
23496 | + | |
23497 | + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) | |
23498 | + return false; | |
23499 | + | |
23500 | + raw_spin_lock_irqsave(&glob_worker->lock, flags); | |
23501 | + list_add_tail(&sev->item, &glob_worker->events); | |
23502 | + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); | |
23503 | + | |
23504 | + swake_up(&glob_worker->wq); | |
23505 | + return true; | |
23506 | +} | |
23507 | +EXPORT_SYMBOL_GPL(swork_queue); | |
23508 | + | |
23509 | +/** | |
23510 | + * swork_get - get an instance of the sworker | |
23511 | + * | |
23512 | + * Returns an negative error code if the initialization if the worker did not | |
23513 | + * work, %0 otherwise. | |
23514 | + * | |
23515 | + */ | |
23516 | +int swork_get(void) | |
23517 | +{ | |
23518 | + struct sworker *worker; | |
23519 | + | |
23520 | + mutex_lock(&worker_mutex); | |
23521 | + if (!glob_worker) { | |
23522 | + worker = swork_create(); | |
23523 | + if (IS_ERR(worker)) { | |
23524 | + mutex_unlock(&worker_mutex); | |
23525 | + return -ENOMEM; | |
23526 | + } | |
23527 | + | |
23528 | + glob_worker = worker; | |
23529 | + } | |
23530 | + | |
23531 | + glob_worker->refs++; | |
23532 | + mutex_unlock(&worker_mutex); | |
23533 | + | |
23534 | + return 0; | |
23535 | +} | |
23536 | +EXPORT_SYMBOL_GPL(swork_get); | |
23537 | + | |
23538 | +/** | |
23539 | + * swork_put - puts an instance of the sworker | |
23540 | + * | |
23541 | + * Will destroy the sworker thread. This function must not be called until all | |
23542 | + * queued events have been completed. | |
23543 | + */ | |
23544 | +void swork_put(void) | |
23545 | +{ | |
23546 | + mutex_lock(&worker_mutex); | |
23547 | + | |
23548 | + glob_worker->refs--; | |
23549 | + if (glob_worker->refs > 0) | |
23550 | + goto out; | |
23551 | + | |
23552 | + swork_destroy(glob_worker); | |
23553 | + glob_worker = NULL; | |
23554 | +out: | |
23555 | + mutex_unlock(&worker_mutex); | |
23556 | +} | |
23557 | +EXPORT_SYMBOL_GPL(swork_put); | |
23558 | diff --git a/kernel/signal.c b/kernel/signal.c | |
23559 | index f3f1f7a972fd..bc2c990f3f63 100644 | |
23560 | --- a/kernel/signal.c | |
23561 | +++ b/kernel/signal.c | |
23562 | @@ -14,6 +14,7 @@ | |
23563 | #include <linux/export.h> | |
23564 | #include <linux/init.h> | |
23565 | #include <linux/sched.h> | |
23566 | +#include <linux/sched/rt.h> | |
23567 | #include <linux/fs.h> | |
23568 | #include <linux/tty.h> | |
23569 | #include <linux/binfmts.h> | |
23570 | @@ -352,13 +353,30 @@ static bool task_participate_group_stop(struct task_struct *task) | |
23571 | return false; | |
23572 | } | |
23573 | ||
23574 | +static inline struct sigqueue *get_task_cache(struct task_struct *t) | |
23575 | +{ | |
23576 | + struct sigqueue *q = t->sigqueue_cache; | |
23577 | + | |
23578 | + if (cmpxchg(&t->sigqueue_cache, q, NULL) != q) | |
23579 | + return NULL; | |
23580 | + return q; | |
23581 | +} | |
23582 | + | |
23583 | +static inline int put_task_cache(struct task_struct *t, struct sigqueue *q) | |
23584 | +{ | |
23585 | + if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL) | |
23586 | + return 0; | |
23587 | + return 1; | |
23588 | +} | |
23589 | + | |
23590 | /* | |
23591 | * allocate a new signal queue record | |
23592 | * - this may be called without locks if and only if t == current, otherwise an | |
23593 | * appropriate lock must be held to stop the target task from exiting | |
23594 | */ | |
23595 | static struct sigqueue * | |
23596 | -__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) | |
23597 | +__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags, | |
23598 | + int override_rlimit, int fromslab) | |
23599 | { | |
23600 | struct sigqueue *q = NULL; | |
23601 | struct user_struct *user; | |
23602 | @@ -375,7 +393,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
23603 | if (override_rlimit || | |
23604 | atomic_read(&user->sigpending) <= | |
23605 | task_rlimit(t, RLIMIT_SIGPENDING)) { | |
23606 | - q = kmem_cache_alloc(sigqueue_cachep, flags); | |
23607 | + if (!fromslab) | |
23608 | + q = get_task_cache(t); | |
23609 | + if (!q) | |
23610 | + q = kmem_cache_alloc(sigqueue_cachep, flags); | |
23611 | } else { | |
23612 | print_dropped_signal(sig); | |
23613 | } | |
23614 | @@ -392,6 +413,13 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi | |
23615 | return q; | |
23616 | } | |
23617 | ||
23618 | +static struct sigqueue * | |
23619 | +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, | |
23620 | + int override_rlimit) | |
23621 | +{ | |
23622 | + return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0); | |
23623 | +} | |
23624 | + | |
23625 | static void __sigqueue_free(struct sigqueue *q) | |
23626 | { | |
23627 | if (q->flags & SIGQUEUE_PREALLOC) | |
23628 | @@ -401,6 +429,21 @@ static void __sigqueue_free(struct sigqueue *q) | |
23629 | kmem_cache_free(sigqueue_cachep, q); | |
23630 | } | |
23631 | ||
23632 | +static void sigqueue_free_current(struct sigqueue *q) | |
23633 | +{ | |
23634 | + struct user_struct *up; | |
23635 | + | |
23636 | + if (q->flags & SIGQUEUE_PREALLOC) | |
23637 | + return; | |
23638 | + | |
23639 | + up = q->user; | |
23640 | + if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) { | |
23641 | + atomic_dec(&up->sigpending); | |
23642 | + free_uid(up); | |
23643 | + } else | |
23644 | + __sigqueue_free(q); | |
23645 | +} | |
23646 | + | |
23647 | void flush_sigqueue(struct sigpending *queue) | |
23648 | { | |
23649 | struct sigqueue *q; | |
23650 | @@ -414,6 +457,21 @@ void flush_sigqueue(struct sigpending *queue) | |
23651 | } | |
23652 | ||
23653 | /* | |
23654 | + * Called from __exit_signal. Flush tsk->pending and | |
23655 | + * tsk->sigqueue_cache | |
23656 | + */ | |
23657 | +void flush_task_sigqueue(struct task_struct *tsk) | |
23658 | +{ | |
23659 | + struct sigqueue *q; | |
23660 | + | |
23661 | + flush_sigqueue(&tsk->pending); | |
23662 | + | |
23663 | + q = get_task_cache(tsk); | |
23664 | + if (q) | |
23665 | + kmem_cache_free(sigqueue_cachep, q); | |
23666 | +} | |
23667 | + | |
23668 | +/* | |
23669 | * Flush all pending signals for this kthread. | |
23670 | */ | |
23671 | void flush_signals(struct task_struct *t) | |
23672 | @@ -525,7 +583,7 @@ static void collect_signal(int sig, struct sigpending *list, siginfo_t *info) | |
23673 | still_pending: | |
23674 | list_del_init(&first->list); | |
23675 | copy_siginfo(info, &first->info); | |
23676 | - __sigqueue_free(first); | |
23677 | + sigqueue_free_current(first); | |
23678 | } else { | |
23679 | /* | |
23680 | * Ok, it wasn't in the queue. This must be | |
23681 | @@ -560,6 +618,8 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |
23682 | { | |
23683 | int signr; | |
23684 | ||
23685 | + WARN_ON_ONCE(tsk != current); | |
23686 | + | |
23687 | /* We only dequeue private signals from ourselves, we don't let | |
23688 | * signalfd steal them | |
23689 | */ | |
23690 | @@ -1156,8 +1216,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p, | |
23691 | * We don't want to have recursive SIGSEGV's etc, for example, | |
23692 | * that is why we also clear SIGNAL_UNKILLABLE. | |
23693 | */ | |
23694 | -int | |
23695 | -force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
23696 | +static int | |
23697 | +do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
23698 | { | |
23699 | unsigned long int flags; | |
23700 | int ret, blocked, ignored; | |
23701 | @@ -1182,6 +1242,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
23702 | return ret; | |
23703 | } | |
23704 | ||
23705 | +int force_sig_info(int sig, struct siginfo *info, struct task_struct *t) | |
23706 | +{ | |
23707 | +/* | |
23708 | + * On some archs, PREEMPT_RT has to delay sending a signal from a trap | |
23709 | + * since it can not enable preemption, and the signal code's spin_locks | |
23710 | + * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will | |
23711 | + * send the signal on exit of the trap. | |
23712 | + */ | |
23713 | +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND | |
23714 | + if (in_atomic()) { | |
23715 | + if (WARN_ON_ONCE(t != current)) | |
23716 | + return 0; | |
23717 | + if (WARN_ON_ONCE(t->forced_info.si_signo)) | |
23718 | + return 0; | |
23719 | + | |
23720 | + if (is_si_special(info)) { | |
23721 | + WARN_ON_ONCE(info != SEND_SIG_PRIV); | |
23722 | + t->forced_info.si_signo = sig; | |
23723 | + t->forced_info.si_errno = 0; | |
23724 | + t->forced_info.si_code = SI_KERNEL; | |
23725 | + t->forced_info.si_pid = 0; | |
23726 | + t->forced_info.si_uid = 0; | |
23727 | + } else { | |
23728 | + t->forced_info = *info; | |
23729 | + } | |
23730 | + | |
23731 | + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); | |
23732 | + return 0; | |
23733 | + } | |
23734 | +#endif | |
23735 | + return do_force_sig_info(sig, info, t); | |
23736 | +} | |
23737 | + | |
23738 | /* | |
23739 | * Nuke all other threads in the group. | |
23740 | */ | |
23741 | @@ -1216,12 +1309,12 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
23742 | * Disable interrupts early to avoid deadlocks. | |
23743 | * See rcu_read_unlock() comment header for details. | |
23744 | */ | |
23745 | - local_irq_save(*flags); | |
23746 | + local_irq_save_nort(*flags); | |
23747 | rcu_read_lock(); | |
23748 | sighand = rcu_dereference(tsk->sighand); | |
23749 | if (unlikely(sighand == NULL)) { | |
23750 | rcu_read_unlock(); | |
23751 | - local_irq_restore(*flags); | |
23752 | + local_irq_restore_nort(*flags); | |
23753 | break; | |
23754 | } | |
23755 | /* | |
23756 | @@ -1242,7 +1335,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |
23757 | } | |
23758 | spin_unlock(&sighand->siglock); | |
23759 | rcu_read_unlock(); | |
23760 | - local_irq_restore(*flags); | |
23761 | + local_irq_restore_nort(*flags); | |
23762 | } | |
23763 | ||
23764 | return sighand; | |
23765 | @@ -1485,7 +1578,8 @@ EXPORT_SYMBOL(kill_pid); | |
23766 | */ | |
23767 | struct sigqueue *sigqueue_alloc(void) | |
23768 | { | |
23769 | - struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); | |
23770 | + /* Preallocated sigqueue objects always from the slabcache ! */ | |
23771 | + struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1); | |
23772 | ||
23773 | if (q) | |
23774 | q->flags |= SIGQUEUE_PREALLOC; | |
23775 | @@ -1846,15 +1940,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) | |
23776 | if (gstop_done && ptrace_reparented(current)) | |
23777 | do_notify_parent_cldstop(current, false, why); | |
23778 | ||
23779 | - /* | |
23780 | - * Don't want to allow preemption here, because | |
23781 | - * sys_ptrace() needs this task to be inactive. | |
23782 | - * | |
23783 | - * XXX: implement read_unlock_no_resched(). | |
23784 | - */ | |
23785 | - preempt_disable(); | |
23786 | read_unlock(&tasklist_lock); | |
23787 | - preempt_enable_no_resched(); | |
23788 | freezable_schedule(); | |
23789 | } else { | |
23790 | /* | |
23791 | diff --git a/kernel/softirq.c b/kernel/softirq.c | |
23792 | index 479e4436f787..cb9c1d5dee10 100644 | |
23793 | --- a/kernel/softirq.c | |
23794 | +++ b/kernel/softirq.c | |
23795 | @@ -21,10 +21,12 @@ | |
23796 | #include <linux/freezer.h> | |
23797 | #include <linux/kthread.h> | |
23798 | #include <linux/rcupdate.h> | |
23799 | +#include <linux/delay.h> | |
23800 | #include <linux/ftrace.h> | |
23801 | #include <linux/smp.h> | |
23802 | #include <linux/smpboot.h> | |
23803 | #include <linux/tick.h> | |
23804 | +#include <linux/locallock.h> | |
23805 | #include <linux/irq.h> | |
23806 | ||
23807 | #define CREATE_TRACE_POINTS | |
23808 | @@ -56,12 +58,108 @@ EXPORT_SYMBOL(irq_stat); | |
23809 | static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; | |
23810 | ||
23811 | DEFINE_PER_CPU(struct task_struct *, ksoftirqd); | |
23812 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23813 | +#define TIMER_SOFTIRQS ((1 << TIMER_SOFTIRQ) | (1 << HRTIMER_SOFTIRQ)) | |
23814 | +DEFINE_PER_CPU(struct task_struct *, ktimer_softirqd); | |
23815 | +#endif | |
23816 | ||
23817 | const char * const softirq_to_name[NR_SOFTIRQS] = { | |
23818 | "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", | |
23819 | "TASKLET", "SCHED", "HRTIMER", "RCU" | |
23820 | }; | |
23821 | ||
23822 | +#ifdef CONFIG_NO_HZ_COMMON | |
23823 | +# ifdef CONFIG_PREEMPT_RT_FULL | |
23824 | + | |
23825 | +struct softirq_runner { | |
23826 | + struct task_struct *runner[NR_SOFTIRQS]; | |
23827 | +}; | |
23828 | + | |
23829 | +static DEFINE_PER_CPU(struct softirq_runner, softirq_runners); | |
23830 | + | |
23831 | +static inline void softirq_set_runner(unsigned int sirq) | |
23832 | +{ | |
23833 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
23834 | + | |
23835 | + sr->runner[sirq] = current; | |
23836 | +} | |
23837 | + | |
23838 | +static inline void softirq_clr_runner(unsigned int sirq) | |
23839 | +{ | |
23840 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
23841 | + | |
23842 | + sr->runner[sirq] = NULL; | |
23843 | +} | |
23844 | + | |
23845 | +/* | |
23846 | + * On preempt-rt a softirq running context might be blocked on a | |
23847 | + * lock. There might be no other runnable task on this CPU because the | |
23848 | + * lock owner runs on some other CPU. So we have to go into idle with | |
23849 | + * the pending bit set. Therefor we need to check this otherwise we | |
23850 | + * warn about false positives which confuses users and defeats the | |
23851 | + * whole purpose of this test. | |
23852 | + * | |
23853 | + * This code is called with interrupts disabled. | |
23854 | + */ | |
23855 | +void softirq_check_pending_idle(void) | |
23856 | +{ | |
23857 | + static int rate_limit; | |
23858 | + struct softirq_runner *sr = this_cpu_ptr(&softirq_runners); | |
23859 | + u32 warnpending; | |
23860 | + int i; | |
23861 | + | |
23862 | + if (rate_limit >= 10) | |
23863 | + return; | |
23864 | + | |
23865 | + warnpending = local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK; | |
23866 | + for (i = 0; i < NR_SOFTIRQS; i++) { | |
23867 | + struct task_struct *tsk = sr->runner[i]; | |
23868 | + | |
23869 | + /* | |
23870 | + * The wakeup code in rtmutex.c wakes up the task | |
23871 | + * _before_ it sets pi_blocked_on to NULL under | |
23872 | + * tsk->pi_lock. So we need to check for both: state | |
23873 | + * and pi_blocked_on. | |
23874 | + */ | |
23875 | + if (tsk) { | |
23876 | + raw_spin_lock(&tsk->pi_lock); | |
23877 | + if (tsk->pi_blocked_on || tsk->state == TASK_RUNNING) { | |
23878 | + /* Clear all bits pending in that task */ | |
23879 | + warnpending &= ~(tsk->softirqs_raised); | |
23880 | + warnpending &= ~(1 << i); | |
23881 | + } | |
23882 | + raw_spin_unlock(&tsk->pi_lock); | |
23883 | + } | |
23884 | + } | |
23885 | + | |
23886 | + if (warnpending) { | |
23887 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
23888 | + warnpending); | |
23889 | + rate_limit++; | |
23890 | + } | |
23891 | +} | |
23892 | +# else | |
23893 | +/* | |
23894 | + * On !PREEMPT_RT we just printk rate limited: | |
23895 | + */ | |
23896 | +void softirq_check_pending_idle(void) | |
23897 | +{ | |
23898 | + static int rate_limit; | |
23899 | + | |
23900 | + if (rate_limit < 10 && | |
23901 | + (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
23902 | + printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n", | |
23903 | + local_softirq_pending()); | |
23904 | + rate_limit++; | |
23905 | + } | |
23906 | +} | |
23907 | +# endif | |
23908 | + | |
23909 | +#else /* !CONFIG_NO_HZ_COMMON */ | |
23910 | +static inline void softirq_set_runner(unsigned int sirq) { } | |
23911 | +static inline void softirq_clr_runner(unsigned int sirq) { } | |
23912 | +#endif | |
23913 | + | |
23914 | /* | |
23915 | * we cannot loop indefinitely here to avoid userspace starvation, | |
23916 | * but we also don't want to introduce a worst case 1/HZ latency | |
23917 | @@ -77,6 +175,79 @@ static void wakeup_softirqd(void) | |
23918 | wake_up_process(tsk); | |
23919 | } | |
23920 | ||
23921 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
23922 | +static void wakeup_timer_softirqd(void) | |
23923 | +{ | |
23924 | + /* Interrupts are disabled: no need to stop preemption */ | |
23925 | + struct task_struct *tsk = __this_cpu_read(ktimer_softirqd); | |
23926 | + | |
23927 | + if (tsk && tsk->state != TASK_RUNNING) | |
23928 | + wake_up_process(tsk); | |
23929 | +} | |
23930 | +#endif | |
23931 | + | |
23932 | +static void handle_softirq(unsigned int vec_nr) | |
23933 | +{ | |
23934 | + struct softirq_action *h = softirq_vec + vec_nr; | |
23935 | + int prev_count; | |
23936 | + | |
23937 | + prev_count = preempt_count(); | |
23938 | + | |
23939 | + kstat_incr_softirqs_this_cpu(vec_nr); | |
23940 | + | |
23941 | + trace_softirq_entry(vec_nr); | |
23942 | + h->action(h); | |
23943 | + trace_softirq_exit(vec_nr); | |
23944 | + if (unlikely(prev_count != preempt_count())) { | |
23945 | + pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
23946 | + vec_nr, softirq_to_name[vec_nr], h->action, | |
23947 | + prev_count, preempt_count()); | |
23948 | + preempt_count_set(prev_count); | |
23949 | + } | |
23950 | +} | |
23951 | + | |
23952 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
23953 | +static inline int ksoftirqd_softirq_pending(void) | |
23954 | +{ | |
23955 | + return local_softirq_pending(); | |
23956 | +} | |
23957 | + | |
23958 | +static void handle_pending_softirqs(u32 pending) | |
23959 | +{ | |
23960 | + struct softirq_action *h = softirq_vec; | |
23961 | + int softirq_bit; | |
23962 | + | |
23963 | + local_irq_enable(); | |
23964 | + | |
23965 | + h = softirq_vec; | |
23966 | + | |
23967 | + while ((softirq_bit = ffs(pending))) { | |
23968 | + unsigned int vec_nr; | |
23969 | + | |
23970 | + h += softirq_bit - 1; | |
23971 | + vec_nr = h - softirq_vec; | |
23972 | + handle_softirq(vec_nr); | |
23973 | + | |
23974 | + h++; | |
23975 | + pending >>= softirq_bit; | |
23976 | + } | |
23977 | + | |
23978 | + rcu_bh_qs(); | |
23979 | + local_irq_disable(); | |
23980 | +} | |
23981 | + | |
23982 | +static void run_ksoftirqd(unsigned int cpu) | |
23983 | +{ | |
23984 | + local_irq_disable(); | |
23985 | + if (ksoftirqd_softirq_pending()) { | |
23986 | + __do_softirq(); | |
23987 | + local_irq_enable(); | |
23988 | + cond_resched_rcu_qs(); | |
23989 | + return; | |
23990 | + } | |
23991 | + local_irq_enable(); | |
23992 | +} | |
23993 | + | |
23994 | /* | |
23995 | * preempt_count and SOFTIRQ_OFFSET usage: | |
23996 | * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving | |
23997 | @@ -116,9 +287,9 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) | |
23998 | ||
23999 | if (preempt_count() == cnt) { | |
24000 | #ifdef CONFIG_DEBUG_PREEMPT | |
24001 | - current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1); | |
24002 | + current->preempt_disable_ip = get_lock_parent_ip(); | |
24003 | #endif | |
24004 | - trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); | |
24005 | + trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip()); | |
24006 | } | |
24007 | } | |
24008 | EXPORT_SYMBOL(__local_bh_disable_ip); | |
24009 | @@ -232,10 +403,8 @@ asmlinkage __visible void __do_softirq(void) | |
24010 | unsigned long end = jiffies + MAX_SOFTIRQ_TIME; | |
24011 | unsigned long old_flags = current->flags; | |
24012 | int max_restart = MAX_SOFTIRQ_RESTART; | |
24013 | - struct softirq_action *h; | |
24014 | bool in_hardirq; | |
24015 | __u32 pending; | |
24016 | - int softirq_bit; | |
24017 | ||
24018 | /* | |
24019 | * Mask out PF_MEMALLOC s current task context is borrowed for the | |
24020 | @@ -254,36 +423,7 @@ restart: | |
24021 | /* Reset the pending bitmask before enabling irqs */ | |
24022 | set_softirq_pending(0); | |
24023 | ||
24024 | - local_irq_enable(); | |
24025 | - | |
24026 | - h = softirq_vec; | |
24027 | - | |
24028 | - while ((softirq_bit = ffs(pending))) { | |
24029 | - unsigned int vec_nr; | |
24030 | - int prev_count; | |
24031 | - | |
24032 | - h += softirq_bit - 1; | |
24033 | - | |
24034 | - vec_nr = h - softirq_vec; | |
24035 | - prev_count = preempt_count(); | |
24036 | - | |
24037 | - kstat_incr_softirqs_this_cpu(vec_nr); | |
24038 | - | |
24039 | - trace_softirq_entry(vec_nr); | |
24040 | - h->action(h); | |
24041 | - trace_softirq_exit(vec_nr); | |
24042 | - if (unlikely(prev_count != preempt_count())) { | |
24043 | - pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n", | |
24044 | - vec_nr, softirq_to_name[vec_nr], h->action, | |
24045 | - prev_count, preempt_count()); | |
24046 | - preempt_count_set(prev_count); | |
24047 | - } | |
24048 | - h++; | |
24049 | - pending >>= softirq_bit; | |
24050 | - } | |
24051 | - | |
24052 | - rcu_bh_qs(); | |
24053 | - local_irq_disable(); | |
24054 | + handle_pending_softirqs(pending); | |
24055 | ||
24056 | pending = local_softirq_pending(); | |
24057 | if (pending) { | |
24058 | @@ -320,6 +460,310 @@ asmlinkage __visible void do_softirq(void) | |
24059 | } | |
24060 | ||
24061 | /* | |
24062 | + * This function must run with irqs disabled! | |
24063 | + */ | |
24064 | +void raise_softirq_irqoff(unsigned int nr) | |
24065 | +{ | |
24066 | + __raise_softirq_irqoff(nr); | |
24067 | + | |
24068 | + /* | |
24069 | + * If we're in an interrupt or softirq, we're done | |
24070 | + * (this also catches softirq-disabled code). We will | |
24071 | + * actually run the softirq once we return from | |
24072 | + * the irq or softirq. | |
24073 | + * | |
24074 | + * Otherwise we wake up ksoftirqd to make sure we | |
24075 | + * schedule the softirq soon. | |
24076 | + */ | |
24077 | + if (!in_interrupt()) | |
24078 | + wakeup_softirqd(); | |
24079 | +} | |
24080 | + | |
24081 | +void __raise_softirq_irqoff(unsigned int nr) | |
24082 | +{ | |
24083 | + trace_softirq_raise(nr); | |
24084 | + or_softirq_pending(1UL << nr); | |
24085 | +} | |
24086 | + | |
24087 | +static inline void local_bh_disable_nort(void) { local_bh_disable(); } | |
24088 | +static inline void _local_bh_enable_nort(void) { _local_bh_enable(); } | |
24089 | +static void ksoftirqd_set_sched_params(unsigned int cpu) { } | |
24090 | + | |
24091 | +#else /* !PREEMPT_RT_FULL */ | |
24092 | + | |
24093 | +/* | |
24094 | + * On RT we serialize softirq execution with a cpu local lock per softirq | |
24095 | + */ | |
24096 | +static DEFINE_PER_CPU(struct local_irq_lock [NR_SOFTIRQS], local_softirq_locks); | |
24097 | + | |
24098 | +void __init softirq_early_init(void) | |
24099 | +{ | |
24100 | + int i; | |
24101 | + | |
24102 | + for (i = 0; i < NR_SOFTIRQS; i++) | |
24103 | + local_irq_lock_init(local_softirq_locks[i]); | |
24104 | +} | |
24105 | + | |
24106 | +static void lock_softirq(int which) | |
24107 | +{ | |
24108 | + local_lock(local_softirq_locks[which]); | |
24109 | +} | |
24110 | + | |
24111 | +static void unlock_softirq(int which) | |
24112 | +{ | |
24113 | + local_unlock(local_softirq_locks[which]); | |
24114 | +} | |
24115 | + | |
24116 | +static void do_single_softirq(int which) | |
24117 | +{ | |
24118 | + unsigned long old_flags = current->flags; | |
24119 | + | |
24120 | + current->flags &= ~PF_MEMALLOC; | |
24121 | + vtime_account_irq_enter(current); | |
24122 | + current->flags |= PF_IN_SOFTIRQ; | |
24123 | + lockdep_softirq_enter(); | |
24124 | + local_irq_enable(); | |
24125 | + handle_softirq(which); | |
24126 | + local_irq_disable(); | |
24127 | + lockdep_softirq_exit(); | |
24128 | + current->flags &= ~PF_IN_SOFTIRQ; | |
24129 | + vtime_account_irq_enter(current); | |
24130 | + tsk_restore_flags(current, old_flags, PF_MEMALLOC); | |
24131 | +} | |
24132 | + | |
24133 | +/* | |
24134 | + * Called with interrupts disabled. Process softirqs which were raised | |
24135 | + * in current context (or on behalf of ksoftirqd). | |
24136 | + */ | |
24137 | +static void do_current_softirqs(void) | |
24138 | +{ | |
24139 | + while (current->softirqs_raised) { | |
24140 | + int i = __ffs(current->softirqs_raised); | |
24141 | + unsigned int pending, mask = (1U << i); | |
24142 | + | |
24143 | + current->softirqs_raised &= ~mask; | |
24144 | + local_irq_enable(); | |
24145 | + | |
24146 | + /* | |
24147 | + * If the lock is contended, we boost the owner to | |
24148 | + * process the softirq or leave the critical section | |
24149 | + * now. | |
24150 | + */ | |
24151 | + lock_softirq(i); | |
24152 | + local_irq_disable(); | |
24153 | + softirq_set_runner(i); | |
24154 | + /* | |
24155 | + * Check with the local_softirq_pending() bits, | |
24156 | + * whether we need to process this still or if someone | |
24157 | + * else took care of it. | |
24158 | + */ | |
24159 | + pending = local_softirq_pending(); | |
24160 | + if (pending & mask) { | |
24161 | + set_softirq_pending(pending & ~mask); | |
24162 | + do_single_softirq(i); | |
24163 | + } | |
24164 | + softirq_clr_runner(i); | |
24165 | + WARN_ON(current->softirq_nestcnt != 1); | |
24166 | + local_irq_enable(); | |
24167 | + unlock_softirq(i); | |
24168 | + local_irq_disable(); | |
24169 | + } | |
24170 | +} | |
24171 | + | |
24172 | +void __local_bh_disable(void) | |
24173 | +{ | |
24174 | + if (++current->softirq_nestcnt == 1) | |
24175 | + migrate_disable(); | |
24176 | +} | |
24177 | +EXPORT_SYMBOL(__local_bh_disable); | |
24178 | + | |
24179 | +void __local_bh_enable(void) | |
24180 | +{ | |
24181 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
24182 | + return; | |
24183 | + | |
24184 | + local_irq_disable(); | |
24185 | + if (current->softirq_nestcnt == 1 && current->softirqs_raised) | |
24186 | + do_current_softirqs(); | |
24187 | + local_irq_enable(); | |
24188 | + | |
24189 | + if (--current->softirq_nestcnt == 0) | |
24190 | + migrate_enable(); | |
24191 | +} | |
24192 | +EXPORT_SYMBOL(__local_bh_enable); | |
24193 | + | |
24194 | +void _local_bh_enable(void) | |
24195 | +{ | |
24196 | + if (WARN_ON(current->softirq_nestcnt == 0)) | |
24197 | + return; | |
24198 | + if (--current->softirq_nestcnt == 0) | |
24199 | + migrate_enable(); | |
24200 | +} | |
24201 | +EXPORT_SYMBOL(_local_bh_enable); | |
24202 | + | |
24203 | +int in_serving_softirq(void) | |
24204 | +{ | |
24205 | + return current->flags & PF_IN_SOFTIRQ; | |
24206 | +} | |
24207 | +EXPORT_SYMBOL(in_serving_softirq); | |
24208 | + | |
24209 | +/* Called with preemption disabled */ | |
24210 | +static void run_ksoftirqd(unsigned int cpu) | |
24211 | +{ | |
24212 | + local_irq_disable(); | |
24213 | + current->softirq_nestcnt++; | |
24214 | + | |
24215 | + do_current_softirqs(); | |
24216 | + current->softirq_nestcnt--; | |
24217 | + local_irq_enable(); | |
24218 | + cond_resched_rcu_qs(); | |
24219 | +} | |
24220 | + | |
24221 | +/* | |
24222 | + * Called from netif_rx_ni(). Preemption enabled, but migration | |
24223 | + * disabled. So the cpu can't go away under us. | |
24224 | + */ | |
24225 | +void thread_do_softirq(void) | |
24226 | +{ | |
24227 | + if (!in_serving_softirq() && current->softirqs_raised) { | |
24228 | + current->softirq_nestcnt++; | |
24229 | + do_current_softirqs(); | |
24230 | + current->softirq_nestcnt--; | |
24231 | + } | |
24232 | +} | |
24233 | + | |
24234 | +static void do_raise_softirq_irqoff(unsigned int nr) | |
24235 | +{ | |
24236 | + unsigned int mask; | |
24237 | + | |
24238 | + mask = 1UL << nr; | |
24239 | + | |
24240 | + trace_softirq_raise(nr); | |
24241 | + or_softirq_pending(mask); | |
24242 | + | |
24243 | + /* | |
24244 | + * If we are not in a hard interrupt and inside a bh disabled | |
24245 | + * region, we simply raise the flag on current. local_bh_enable() | |
24246 | + * will make sure that the softirq is executed. Otherwise we | |
24247 | + * delegate it to ksoftirqd. | |
24248 | + */ | |
24249 | + if (!in_irq() && current->softirq_nestcnt) | |
24250 | + current->softirqs_raised |= mask; | |
24251 | + else if (!__this_cpu_read(ksoftirqd) || !__this_cpu_read(ktimer_softirqd)) | |
24252 | + return; | |
24253 | + | |
24254 | + if (mask & TIMER_SOFTIRQS) | |
24255 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
24256 | + else | |
24257 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
24258 | +} | |
24259 | + | |
24260 | +static void wakeup_proper_softirq(unsigned int nr) | |
24261 | +{ | |
24262 | + if ((1UL << nr) & TIMER_SOFTIRQS) | |
24263 | + wakeup_timer_softirqd(); | |
24264 | + else | |
24265 | + wakeup_softirqd(); | |
24266 | +} | |
24267 | + | |
24268 | + | |
24269 | +void __raise_softirq_irqoff(unsigned int nr) | |
24270 | +{ | |
24271 | + do_raise_softirq_irqoff(nr); | |
24272 | + if (!in_irq() && !current->softirq_nestcnt) | |
24273 | + wakeup_proper_softirq(nr); | |
24274 | +} | |
24275 | + | |
24276 | +/* | |
24277 | + * Same as __raise_softirq_irqoff() but will process them in ksoftirqd | |
24278 | + */ | |
24279 | +void __raise_softirq_irqoff_ksoft(unsigned int nr) | |
24280 | +{ | |
24281 | + unsigned int mask; | |
24282 | + | |
24283 | + if (WARN_ON_ONCE(!__this_cpu_read(ksoftirqd) || | |
24284 | + !__this_cpu_read(ktimer_softirqd))) | |
24285 | + return; | |
24286 | + mask = 1UL << nr; | |
24287 | + | |
24288 | + trace_softirq_raise(nr); | |
24289 | + or_softirq_pending(mask); | |
24290 | + if (mask & TIMER_SOFTIRQS) | |
24291 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised |= mask; | |
24292 | + else | |
24293 | + __this_cpu_read(ksoftirqd)->softirqs_raised |= mask; | |
24294 | + wakeup_proper_softirq(nr); | |
24295 | +} | |
24296 | + | |
24297 | +/* | |
24298 | + * This function must run with irqs disabled! | |
24299 | + */ | |
24300 | +void raise_softirq_irqoff(unsigned int nr) | |
24301 | +{ | |
24302 | + do_raise_softirq_irqoff(nr); | |
24303 | + | |
24304 | + /* | |
24305 | + * If we're in an hard interrupt we let irq return code deal | |
24306 | + * with the wakeup of ksoftirqd. | |
24307 | + */ | |
24308 | + if (in_irq()) | |
24309 | + return; | |
24310 | + /* | |
24311 | + * If we are in thread context but outside of a bh disabled | |
24312 | + * region, we need to wake ksoftirqd as well. | |
24313 | + * | |
24314 | + * CHECKME: Some of the places which do that could be wrapped | |
24315 | + * into local_bh_disable/enable pairs. Though it's unclear | |
24316 | + * whether this is worth the effort. To find those places just | |
24317 | + * raise a WARN() if the condition is met. | |
24318 | + */ | |
24319 | + if (!current->softirq_nestcnt) | |
24320 | + wakeup_proper_softirq(nr); | |
24321 | +} | |
24322 | + | |
24323 | +static inline int ksoftirqd_softirq_pending(void) | |
24324 | +{ | |
24325 | + return current->softirqs_raised; | |
24326 | +} | |
24327 | + | |
24328 | +static inline void local_bh_disable_nort(void) { } | |
24329 | +static inline void _local_bh_enable_nort(void) { } | |
24330 | + | |
24331 | +static inline void ksoftirqd_set_sched_params(unsigned int cpu) | |
24332 | +{ | |
24333 | + /* Take over all but timer pending softirqs when starting */ | |
24334 | + local_irq_disable(); | |
24335 | + current->softirqs_raised = local_softirq_pending() & ~TIMER_SOFTIRQS; | |
24336 | + local_irq_enable(); | |
24337 | +} | |
24338 | + | |
24339 | +static inline void ktimer_softirqd_set_sched_params(unsigned int cpu) | |
24340 | +{ | |
24341 | + struct sched_param param = { .sched_priority = 1 }; | |
24342 | + | |
24343 | + sched_setscheduler(current, SCHED_FIFO, ¶m); | |
24344 | + | |
24345 | + /* Take over timer pending softirqs when starting */ | |
24346 | + local_irq_disable(); | |
24347 | + current->softirqs_raised = local_softirq_pending() & TIMER_SOFTIRQS; | |
24348 | + local_irq_enable(); | |
24349 | +} | |
24350 | + | |
24351 | +static inline void ktimer_softirqd_clr_sched_params(unsigned int cpu, | |
24352 | + bool online) | |
24353 | +{ | |
24354 | + struct sched_param param = { .sched_priority = 0 }; | |
24355 | + | |
24356 | + sched_setscheduler(current, SCHED_NORMAL, ¶m); | |
24357 | +} | |
24358 | + | |
24359 | +static int ktimer_softirqd_should_run(unsigned int cpu) | |
24360 | +{ | |
24361 | + return current->softirqs_raised; | |
24362 | +} | |
24363 | + | |
24364 | +#endif /* PREEMPT_RT_FULL */ | |
24365 | +/* | |
24366 | * Enter an interrupt context. | |
24367 | */ | |
24368 | void irq_enter(void) | |
24369 | @@ -330,9 +774,9 @@ void irq_enter(void) | |
24370 | * Prevent raise_softirq from needlessly waking up ksoftirqd | |
24371 | * here, as softirq will be serviced on return from interrupt. | |
24372 | */ | |
24373 | - local_bh_disable(); | |
24374 | + local_bh_disable_nort(); | |
24375 | tick_irq_enter(); | |
24376 | - _local_bh_enable(); | |
24377 | + _local_bh_enable_nort(); | |
24378 | } | |
24379 | ||
24380 | __irq_enter(); | |
24381 | @@ -340,6 +784,7 @@ void irq_enter(void) | |
24382 | ||
24383 | static inline void invoke_softirq(void) | |
24384 | { | |
24385 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
24386 | if (!force_irqthreads) { | |
24387 | #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK | |
24388 | /* | |
24389 | @@ -359,6 +804,18 @@ static inline void invoke_softirq(void) | |
24390 | } else { | |
24391 | wakeup_softirqd(); | |
24392 | } | |
24393 | +#else /* PREEMPT_RT_FULL */ | |
24394 | + unsigned long flags; | |
24395 | + | |
24396 | + local_irq_save(flags); | |
24397 | + if (__this_cpu_read(ksoftirqd) && | |
24398 | + __this_cpu_read(ksoftirqd)->softirqs_raised) | |
24399 | + wakeup_softirqd(); | |
24400 | + if (__this_cpu_read(ktimer_softirqd) && | |
24401 | + __this_cpu_read(ktimer_softirqd)->softirqs_raised) | |
24402 | + wakeup_timer_softirqd(); | |
24403 | + local_irq_restore(flags); | |
24404 | +#endif | |
24405 | } | |
24406 | ||
24407 | static inline void tick_irq_exit(void) | |
24408 | @@ -395,26 +852,6 @@ void irq_exit(void) | |
24409 | trace_hardirq_exit(); /* must be last! */ | |
24410 | } | |
24411 | ||
24412 | -/* | |
24413 | - * This function must run with irqs disabled! | |
24414 | - */ | |
24415 | -inline void raise_softirq_irqoff(unsigned int nr) | |
24416 | -{ | |
24417 | - __raise_softirq_irqoff(nr); | |
24418 | - | |
24419 | - /* | |
24420 | - * If we're in an interrupt or softirq, we're done | |
24421 | - * (this also catches softirq-disabled code). We will | |
24422 | - * actually run the softirq once we return from | |
24423 | - * the irq or softirq. | |
24424 | - * | |
24425 | - * Otherwise we wake up ksoftirqd to make sure we | |
24426 | - * schedule the softirq soon. | |
24427 | - */ | |
24428 | - if (!in_interrupt()) | |
24429 | - wakeup_softirqd(); | |
24430 | -} | |
24431 | - | |
24432 | void raise_softirq(unsigned int nr) | |
24433 | { | |
24434 | unsigned long flags; | |
24435 | @@ -424,12 +861,6 @@ void raise_softirq(unsigned int nr) | |
24436 | local_irq_restore(flags); | |
24437 | } | |
24438 | ||
24439 | -void __raise_softirq_irqoff(unsigned int nr) | |
24440 | -{ | |
24441 | - trace_softirq_raise(nr); | |
24442 | - or_softirq_pending(1UL << nr); | |
24443 | -} | |
24444 | - | |
24445 | void open_softirq(int nr, void (*action)(struct softirq_action *)) | |
24446 | { | |
24447 | softirq_vec[nr].action = action; | |
24448 | @@ -446,15 +877,45 @@ struct tasklet_head { | |
24449 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec); | |
24450 | static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec); | |
24451 | ||
24452 | +static void inline | |
24453 | +__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr) | |
24454 | +{ | |
24455 | + if (tasklet_trylock(t)) { | |
24456 | +again: | |
24457 | + /* We may have been preempted before tasklet_trylock | |
24458 | + * and __tasklet_action may have already run. | |
24459 | + * So double check the sched bit while the takslet | |
24460 | + * is locked before adding it to the list. | |
24461 | + */ | |
24462 | + if (test_bit(TASKLET_STATE_SCHED, &t->state)) { | |
24463 | + t->next = NULL; | |
24464 | + *head->tail = t; | |
24465 | + head->tail = &(t->next); | |
24466 | + raise_softirq_irqoff(nr); | |
24467 | + tasklet_unlock(t); | |
24468 | + } else { | |
24469 | + /* This is subtle. If we hit the corner case above | |
24470 | + * It is possible that we get preempted right here, | |
24471 | + * and another task has successfully called | |
24472 | + * tasklet_schedule(), then this function, and | |
24473 | + * failed on the trylock. Thus we must be sure | |
24474 | + * before releasing the tasklet lock, that the | |
24475 | + * SCHED_BIT is clear. Otherwise the tasklet | |
24476 | + * may get its SCHED_BIT set, but not added to the | |
24477 | + * list | |
24478 | + */ | |
24479 | + if (!tasklet_tryunlock(t)) | |
24480 | + goto again; | |
24481 | + } | |
24482 | + } | |
24483 | +} | |
24484 | + | |
24485 | void __tasklet_schedule(struct tasklet_struct *t) | |
24486 | { | |
24487 | unsigned long flags; | |
24488 | ||
24489 | local_irq_save(flags); | |
24490 | - t->next = NULL; | |
24491 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
24492 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
24493 | - raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
24494 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ); | |
24495 | local_irq_restore(flags); | |
24496 | } | |
24497 | EXPORT_SYMBOL(__tasklet_schedule); | |
24498 | @@ -464,10 +925,7 @@ void __tasklet_hi_schedule(struct tasklet_struct *t) | |
24499 | unsigned long flags; | |
24500 | ||
24501 | local_irq_save(flags); | |
24502 | - t->next = NULL; | |
24503 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
24504 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
24505 | - raise_softirq_irqoff(HI_SOFTIRQ); | |
24506 | + __tasklet_common_schedule(t, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ); | |
24507 | local_irq_restore(flags); | |
24508 | } | |
24509 | EXPORT_SYMBOL(__tasklet_hi_schedule); | |
24510 | @@ -476,82 +934,122 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t) | |
24511 | { | |
24512 | BUG_ON(!irqs_disabled()); | |
24513 | ||
24514 | - t->next = __this_cpu_read(tasklet_hi_vec.head); | |
24515 | - __this_cpu_write(tasklet_hi_vec.head, t); | |
24516 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
24517 | + __tasklet_hi_schedule(t); | |
24518 | } | |
24519 | EXPORT_SYMBOL(__tasklet_hi_schedule_first); | |
24520 | ||
24521 | -static void tasklet_action(struct softirq_action *a) | |
24522 | +void tasklet_enable(struct tasklet_struct *t) | |
24523 | { | |
24524 | - struct tasklet_struct *list; | |
24525 | + if (!atomic_dec_and_test(&t->count)) | |
24526 | + return; | |
24527 | + if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state)) | |
24528 | + tasklet_schedule(t); | |
24529 | +} | |
24530 | +EXPORT_SYMBOL(tasklet_enable); | |
24531 | ||
24532 | - local_irq_disable(); | |
24533 | - list = __this_cpu_read(tasklet_vec.head); | |
24534 | - __this_cpu_write(tasklet_vec.head, NULL); | |
24535 | - __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
24536 | - local_irq_enable(); | |
24537 | +static void __tasklet_action(struct softirq_action *a, | |
24538 | + struct tasklet_struct *list) | |
24539 | +{ | |
24540 | + int loops = 1000000; | |
24541 | ||
24542 | while (list) { | |
24543 | struct tasklet_struct *t = list; | |
24544 | ||
24545 | list = list->next; | |
24546 | ||
24547 | - if (tasklet_trylock(t)) { | |
24548 | - if (!atomic_read(&t->count)) { | |
24549 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
24550 | - &t->state)) | |
24551 | - BUG(); | |
24552 | - t->func(t->data); | |
24553 | - tasklet_unlock(t); | |
24554 | - continue; | |
24555 | - } | |
24556 | - tasklet_unlock(t); | |
24557 | + /* | |
24558 | + * Should always succeed - after a tasklist got on the | |
24559 | + * list (after getting the SCHED bit set from 0 to 1), | |
24560 | + * nothing but the tasklet softirq it got queued to can | |
24561 | + * lock it: | |
24562 | + */ | |
24563 | + if (!tasklet_trylock(t)) { | |
24564 | + WARN_ON(1); | |
24565 | + continue; | |
24566 | } | |
24567 | ||
24568 | - local_irq_disable(); | |
24569 | t->next = NULL; | |
24570 | - *__this_cpu_read(tasklet_vec.tail) = t; | |
24571 | - __this_cpu_write(tasklet_vec.tail, &(t->next)); | |
24572 | - __raise_softirq_irqoff(TASKLET_SOFTIRQ); | |
24573 | - local_irq_enable(); | |
24574 | + | |
24575 | + /* | |
24576 | + * If we cannot handle the tasklet because it's disabled, | |
24577 | + * mark it as pending. tasklet_enable() will later | |
24578 | + * re-schedule the tasklet. | |
24579 | + */ | |
24580 | + if (unlikely(atomic_read(&t->count))) { | |
24581 | +out_disabled: | |
24582 | + /* implicit unlock: */ | |
24583 | + wmb(); | |
24584 | + t->state = TASKLET_STATEF_PENDING; | |
24585 | + continue; | |
24586 | + } | |
24587 | + | |
24588 | + /* | |
24589 | + * After this point on the tasklet might be rescheduled | |
24590 | + * on another CPU, but it can only be added to another | |
24591 | + * CPU's tasklet list if we unlock the tasklet (which we | |
24592 | + * dont do yet). | |
24593 | + */ | |
24594 | + if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
24595 | + WARN_ON(1); | |
24596 | + | |
24597 | +again: | |
24598 | + t->func(t->data); | |
24599 | + | |
24600 | + /* | |
24601 | + * Try to unlock the tasklet. We must use cmpxchg, because | |
24602 | + * another CPU might have scheduled or disabled the tasklet. | |
24603 | + * We only allow the STATE_RUN -> 0 transition here. | |
24604 | + */ | |
24605 | + while (!tasklet_tryunlock(t)) { | |
24606 | + /* | |
24607 | + * If it got disabled meanwhile, bail out: | |
24608 | + */ | |
24609 | + if (atomic_read(&t->count)) | |
24610 | + goto out_disabled; | |
24611 | + /* | |
24612 | + * If it got scheduled meanwhile, re-execute | |
24613 | + * the tasklet function: | |
24614 | + */ | |
24615 | + if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state)) | |
24616 | + goto again; | |
24617 | + if (!--loops) { | |
24618 | + printk("hm, tasklet state: %08lx\n", t->state); | |
24619 | + WARN_ON(1); | |
24620 | + tasklet_unlock(t); | |
24621 | + break; | |
24622 | + } | |
24623 | + } | |
24624 | } | |
24625 | } | |
24626 | ||
24627 | +static void tasklet_action(struct softirq_action *a) | |
24628 | +{ | |
24629 | + struct tasklet_struct *list; | |
24630 | + | |
24631 | + local_irq_disable(); | |
24632 | + | |
24633 | + list = __this_cpu_read(tasklet_vec.head); | |
24634 | + __this_cpu_write(tasklet_vec.head, NULL); | |
24635 | + __this_cpu_write(tasklet_vec.tail, this_cpu_ptr(&tasklet_vec.head)); | |
24636 | + | |
24637 | + local_irq_enable(); | |
24638 | + | |
24639 | + __tasklet_action(a, list); | |
24640 | +} | |
24641 | + | |
24642 | static void tasklet_hi_action(struct softirq_action *a) | |
24643 | { | |
24644 | struct tasklet_struct *list; | |
24645 | ||
24646 | local_irq_disable(); | |
24647 | + | |
24648 | list = __this_cpu_read(tasklet_hi_vec.head); | |
24649 | __this_cpu_write(tasklet_hi_vec.head, NULL); | |
24650 | __this_cpu_write(tasklet_hi_vec.tail, this_cpu_ptr(&tasklet_hi_vec.head)); | |
24651 | - local_irq_enable(); | |
24652 | ||
24653 | - while (list) { | |
24654 | - struct tasklet_struct *t = list; | |
24655 | - | |
24656 | - list = list->next; | |
24657 | - | |
24658 | - if (tasklet_trylock(t)) { | |
24659 | - if (!atomic_read(&t->count)) { | |
24660 | - if (!test_and_clear_bit(TASKLET_STATE_SCHED, | |
24661 | - &t->state)) | |
24662 | - BUG(); | |
24663 | - t->func(t->data); | |
24664 | - tasklet_unlock(t); | |
24665 | - continue; | |
24666 | - } | |
24667 | - tasklet_unlock(t); | |
24668 | - } | |
24669 | + local_irq_enable(); | |
24670 | ||
24671 | - local_irq_disable(); | |
24672 | - t->next = NULL; | |
24673 | - *__this_cpu_read(tasklet_hi_vec.tail) = t; | |
24674 | - __this_cpu_write(tasklet_hi_vec.tail, &(t->next)); | |
24675 | - __raise_softirq_irqoff(HI_SOFTIRQ); | |
24676 | - local_irq_enable(); | |
24677 | - } | |
24678 | + __tasklet_action(a, list); | |
24679 | } | |
24680 | ||
24681 | void tasklet_init(struct tasklet_struct *t, | |
24682 | @@ -572,7 +1070,7 @@ void tasklet_kill(struct tasklet_struct *t) | |
24683 | ||
24684 | while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { | |
24685 | do { | |
24686 | - yield(); | |
24687 | + msleep(1); | |
24688 | } while (test_bit(TASKLET_STATE_SCHED, &t->state)); | |
24689 | } | |
24690 | tasklet_unlock_wait(t); | |
24691 | @@ -646,25 +1144,26 @@ void __init softirq_init(void) | |
24692 | open_softirq(HI_SOFTIRQ, tasklet_hi_action); | |
24693 | } | |
24694 | ||
24695 | -static int ksoftirqd_should_run(unsigned int cpu) | |
24696 | -{ | |
24697 | - return local_softirq_pending(); | |
24698 | -} | |
24699 | - | |
24700 | -static void run_ksoftirqd(unsigned int cpu) | |
24701 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
24702 | +void tasklet_unlock_wait(struct tasklet_struct *t) | |
24703 | { | |
24704 | - local_irq_disable(); | |
24705 | - if (local_softirq_pending()) { | |
24706 | + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { | |
24707 | /* | |
24708 | - * We can safely run softirq on inline stack, as we are not deep | |
24709 | - * in the task stack here. | |
24710 | + * Hack for now to avoid this busy-loop: | |
24711 | */ | |
24712 | - __do_softirq(); | |
24713 | - local_irq_enable(); | |
24714 | - cond_resched_rcu_qs(); | |
24715 | - return; | |
24716 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24717 | + msleep(1); | |
24718 | +#else | |
24719 | + barrier(); | |
24720 | +#endif | |
24721 | } | |
24722 | - local_irq_enable(); | |
24723 | +} | |
24724 | +EXPORT_SYMBOL(tasklet_unlock_wait); | |
24725 | +#endif | |
24726 | + | |
24727 | +static int ksoftirqd_should_run(unsigned int cpu) | |
24728 | +{ | |
24729 | + return ksoftirqd_softirq_pending(); | |
24730 | } | |
24731 | ||
24732 | #ifdef CONFIG_HOTPLUG_CPU | |
24733 | @@ -746,16 +1245,31 @@ static struct notifier_block cpu_nfb = { | |
24734 | ||
24735 | static struct smp_hotplug_thread softirq_threads = { | |
24736 | .store = &ksoftirqd, | |
24737 | + .setup = ksoftirqd_set_sched_params, | |
24738 | .thread_should_run = ksoftirqd_should_run, | |
24739 | .thread_fn = run_ksoftirqd, | |
24740 | .thread_comm = "ksoftirqd/%u", | |
24741 | }; | |
24742 | ||
24743 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24744 | +static struct smp_hotplug_thread softirq_timer_threads = { | |
24745 | + .store = &ktimer_softirqd, | |
24746 | + .setup = ktimer_softirqd_set_sched_params, | |
24747 | + .cleanup = ktimer_softirqd_clr_sched_params, | |
24748 | + .thread_should_run = ktimer_softirqd_should_run, | |
24749 | + .thread_fn = run_ksoftirqd, | |
24750 | + .thread_comm = "ktimersoftd/%u", | |
24751 | +}; | |
24752 | +#endif | |
24753 | + | |
24754 | static __init int spawn_ksoftirqd(void) | |
24755 | { | |
24756 | register_cpu_notifier(&cpu_nfb); | |
24757 | ||
24758 | BUG_ON(smpboot_register_percpu_thread(&softirq_threads)); | |
24759 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24760 | + BUG_ON(smpboot_register_percpu_thread(&softirq_timer_threads)); | |
24761 | +#endif | |
24762 | ||
24763 | return 0; | |
24764 | } | |
24765 | diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c | |
24766 | index a3bbaee77c58..f84d3b45cda7 100644 | |
24767 | --- a/kernel/stop_machine.c | |
24768 | +++ b/kernel/stop_machine.c | |
24769 | @@ -37,7 +37,7 @@ struct cpu_stop_done { | |
24770 | struct cpu_stopper { | |
24771 | struct task_struct *thread; | |
24772 | ||
24773 | - spinlock_t lock; | |
24774 | + raw_spinlock_t lock; | |
24775 | bool enabled; /* is this stopper enabled? */ | |
24776 | struct list_head works; /* list of pending works */ | |
24777 | ||
24778 | @@ -86,12 +86,12 @@ static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) | |
24779 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | |
24780 | unsigned long flags; | |
24781 | ||
24782 | - spin_lock_irqsave(&stopper->lock, flags); | |
24783 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
24784 | if (stopper->enabled) | |
24785 | __cpu_stop_queue_work(stopper, work); | |
24786 | else | |
24787 | cpu_stop_signal_done(work->done, false); | |
24788 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
24789 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
24790 | } | |
24791 | ||
24792 | /** | |
24793 | @@ -224,8 +224,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |
24794 | int err; | |
24795 | ||
24796 | lg_double_lock(&stop_cpus_lock, cpu1, cpu2); | |
24797 | - spin_lock_irq(&stopper1->lock); | |
24798 | - spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
24799 | + raw_spin_lock_irq(&stopper1->lock); | |
24800 | + raw_spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | |
24801 | ||
24802 | err = -ENOENT; | |
24803 | if (!stopper1->enabled || !stopper2->enabled) | |
24804 | @@ -235,8 +235,8 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |
24805 | __cpu_stop_queue_work(stopper1, work1); | |
24806 | __cpu_stop_queue_work(stopper2, work2); | |
24807 | unlock: | |
24808 | - spin_unlock(&stopper2->lock); | |
24809 | - spin_unlock_irq(&stopper1->lock); | |
24810 | + raw_spin_unlock(&stopper2->lock); | |
24811 | + raw_spin_unlock_irq(&stopper1->lock); | |
24812 | lg_double_unlock(&stop_cpus_lock, cpu1, cpu2); | |
24813 | ||
24814 | return err; | |
24815 | @@ -258,7 +258,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * | |
24816 | struct cpu_stop_work work1, work2; | |
24817 | struct multi_stop_data msdata; | |
24818 | ||
24819 | - preempt_disable(); | |
24820 | + preempt_disable_nort(); | |
24821 | msdata = (struct multi_stop_data){ | |
24822 | .fn = fn, | |
24823 | .data = arg, | |
24824 | @@ -278,11 +278,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * | |
24825 | if (cpu1 > cpu2) | |
24826 | swap(cpu1, cpu2); | |
24827 | if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) { | |
24828 | - preempt_enable(); | |
24829 | + preempt_enable_nort(); | |
24830 | return -ENOENT; | |
24831 | } | |
24832 | ||
24833 | - preempt_enable(); | |
24834 | + preempt_enable_nort(); | |
24835 | ||
24836 | wait_for_completion(&done.completion); | |
24837 | ||
24838 | @@ -315,17 +315,20 @@ static DEFINE_MUTEX(stop_cpus_mutex); | |
24839 | ||
24840 | static void queue_stop_cpus_work(const struct cpumask *cpumask, | |
24841 | cpu_stop_fn_t fn, void *arg, | |
24842 | - struct cpu_stop_done *done) | |
24843 | + struct cpu_stop_done *done, bool inactive) | |
24844 | { | |
24845 | struct cpu_stop_work *work; | |
24846 | unsigned int cpu; | |
24847 | ||
24848 | /* | |
24849 | - * Disable preemption while queueing to avoid getting | |
24850 | - * preempted by a stopper which might wait for other stoppers | |
24851 | - * to enter @fn which can lead to deadlock. | |
24852 | + * Make sure that all work is queued on all cpus before | |
24853 | + * any of the cpus can execute it. | |
24854 | */ | |
24855 | - lg_global_lock(&stop_cpus_lock); | |
24856 | + if (!inactive) | |
24857 | + lg_global_lock(&stop_cpus_lock); | |
24858 | + else | |
24859 | + lg_global_trylock_relax(&stop_cpus_lock); | |
24860 | + | |
24861 | for_each_cpu(cpu, cpumask) { | |
24862 | work = &per_cpu(cpu_stopper.stop_work, cpu); | |
24863 | work->fn = fn; | |
24864 | @@ -342,7 +345,7 @@ static int __stop_cpus(const struct cpumask *cpumask, | |
24865 | struct cpu_stop_done done; | |
24866 | ||
24867 | cpu_stop_init_done(&done, cpumask_weight(cpumask)); | |
24868 | - queue_stop_cpus_work(cpumask, fn, arg, &done); | |
24869 | + queue_stop_cpus_work(cpumask, fn, arg, &done, false); | |
24870 | wait_for_completion(&done.completion); | |
24871 | return done.executed ? done.ret : -ENOENT; | |
24872 | } | |
24873 | @@ -422,9 +425,9 @@ static int cpu_stop_should_run(unsigned int cpu) | |
24874 | unsigned long flags; | |
24875 | int run; | |
24876 | ||
24877 | - spin_lock_irqsave(&stopper->lock, flags); | |
24878 | + raw_spin_lock_irqsave(&stopper->lock, flags); | |
24879 | run = !list_empty(&stopper->works); | |
24880 | - spin_unlock_irqrestore(&stopper->lock, flags); | |
24881 | + raw_spin_unlock_irqrestore(&stopper->lock, flags); | |
24882 | return run; | |
24883 | } | |
24884 | ||
24885 | @@ -436,13 +439,13 @@ static void cpu_stopper_thread(unsigned int cpu) | |
24886 | ||
24887 | repeat: | |
24888 | work = NULL; | |
24889 | - spin_lock_irq(&stopper->lock); | |
24890 | + raw_spin_lock_irq(&stopper->lock); | |
24891 | if (!list_empty(&stopper->works)) { | |
24892 | work = list_first_entry(&stopper->works, | |
24893 | struct cpu_stop_work, list); | |
24894 | list_del_init(&work->list); | |
24895 | } | |
24896 | - spin_unlock_irq(&stopper->lock); | |
24897 | + raw_spin_unlock_irq(&stopper->lock); | |
24898 | ||
24899 | if (work) { | |
24900 | cpu_stop_fn_t fn = work->fn; | |
24901 | @@ -450,6 +453,16 @@ repeat: | |
24902 | struct cpu_stop_done *done = work->done; | |
24903 | char ksym_buf[KSYM_NAME_LEN] __maybe_unused; | |
24904 | ||
24905 | + /* | |
24906 | + * Wait until the stopper finished scheduling on all | |
24907 | + * cpus | |
24908 | + */ | |
24909 | + lg_global_lock(&stop_cpus_lock); | |
24910 | + /* | |
24911 | + * Let other cpu threads continue as well | |
24912 | + */ | |
24913 | + lg_global_unlock(&stop_cpus_lock); | |
24914 | + | |
24915 | /* cpu stop callbacks are not allowed to sleep */ | |
24916 | preempt_disable(); | |
24917 | ||
24918 | @@ -520,10 +533,12 @@ static int __init cpu_stop_init(void) | |
24919 | for_each_possible_cpu(cpu) { | |
24920 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | |
24921 | ||
24922 | - spin_lock_init(&stopper->lock); | |
24923 | + raw_spin_lock_init(&stopper->lock); | |
24924 | INIT_LIST_HEAD(&stopper->works); | |
24925 | } | |
24926 | ||
24927 | + lg_lock_init(&stop_cpus_lock, "stop_cpus_lock"); | |
24928 | + | |
24929 | BUG_ON(smpboot_register_percpu_thread(&cpu_stop_threads)); | |
24930 | stop_machine_unpark(raw_smp_processor_id()); | |
24931 | stop_machine_initialized = true; | |
24932 | @@ -620,7 +635,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, | |
24933 | set_state(&msdata, MULTI_STOP_PREPARE); | |
24934 | cpu_stop_init_done(&done, num_active_cpus()); | |
24935 | queue_stop_cpus_work(cpu_active_mask, multi_cpu_stop, &msdata, | |
24936 | - &done); | |
24937 | + &done, true); | |
24938 | ret = multi_cpu_stop(&msdata); | |
24939 | ||
24940 | /* Busy wait for completion. */ | |
24941 | diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c | |
24942 | index 17f7bcff1e02..ba3d60144838 100644 | |
24943 | --- a/kernel/time/hrtimer.c | |
24944 | +++ b/kernel/time/hrtimer.c | |
24945 | @@ -48,11 +48,13 @@ | |
24946 | #include <linux/sched/rt.h> | |
24947 | #include <linux/sched/deadline.h> | |
24948 | #include <linux/timer.h> | |
24949 | +#include <linux/kthread.h> | |
24950 | #include <linux/freezer.h> | |
24951 | ||
24952 | #include <asm/uaccess.h> | |
24953 | ||
24954 | #include <trace/events/timer.h> | |
24955 | +#include <trace/events/hist.h> | |
24956 | ||
24957 | #include "tick-internal.h" | |
24958 | ||
24959 | @@ -717,6 +719,44 @@ static void clock_was_set_work(struct work_struct *work) | |
24960 | ||
24961 | static DECLARE_WORK(hrtimer_work, clock_was_set_work); | |
24962 | ||
24963 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
24964 | +/* | |
24965 | + * RT can not call schedule_work from real interrupt context. | |
24966 | + * Need to make a thread to do the real work. | |
24967 | + */ | |
24968 | +static struct task_struct *clock_set_delay_thread; | |
24969 | +static bool do_clock_set_delay; | |
24970 | + | |
24971 | +static int run_clock_set_delay(void *ignore) | |
24972 | +{ | |
24973 | + while (!kthread_should_stop()) { | |
24974 | + set_current_state(TASK_INTERRUPTIBLE); | |
24975 | + if (do_clock_set_delay) { | |
24976 | + do_clock_set_delay = false; | |
24977 | + schedule_work(&hrtimer_work); | |
24978 | + } | |
24979 | + schedule(); | |
24980 | + } | |
24981 | + __set_current_state(TASK_RUNNING); | |
24982 | + return 0; | |
24983 | +} | |
24984 | + | |
24985 | +void clock_was_set_delayed(void) | |
24986 | +{ | |
24987 | + do_clock_set_delay = true; | |
24988 | + /* Make visible before waking up process */ | |
24989 | + smp_wmb(); | |
24990 | + wake_up_process(clock_set_delay_thread); | |
24991 | +} | |
24992 | + | |
24993 | +static __init int create_clock_set_delay_thread(void) | |
24994 | +{ | |
24995 | + clock_set_delay_thread = kthread_run(run_clock_set_delay, NULL, "kclksetdelayd"); | |
24996 | + BUG_ON(!clock_set_delay_thread); | |
24997 | + return 0; | |
24998 | +} | |
24999 | +early_initcall(create_clock_set_delay_thread); | |
25000 | +#else /* PREEMPT_RT_FULL */ | |
25001 | /* | |
25002 | * Called from timekeeping and resume code to reprogramm the hrtimer | |
25003 | * interrupt device on all cpus. | |
25004 | @@ -725,6 +765,7 @@ void clock_was_set_delayed(void) | |
25005 | { | |
25006 | schedule_work(&hrtimer_work); | |
25007 | } | |
25008 | +#endif | |
25009 | ||
25010 | #else | |
25011 | ||
25012 | @@ -734,11 +775,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; } | |
25013 | static inline void hrtimer_switch_to_hres(void) { } | |
25014 | static inline void | |
25015 | hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } | |
25016 | -static inline int hrtimer_reprogram(struct hrtimer *timer, | |
25017 | - struct hrtimer_clock_base *base) | |
25018 | -{ | |
25019 | - return 0; | |
25020 | -} | |
25021 | +static inline void hrtimer_reprogram(struct hrtimer *timer, | |
25022 | + struct hrtimer_clock_base *base) { } | |
25023 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | |
25024 | static inline void retrigger_next_event(void *arg) { } | |
25025 | ||
25026 | @@ -870,6 +908,32 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) | |
25027 | } | |
25028 | EXPORT_SYMBOL_GPL(hrtimer_forward); | |
25029 | ||
25030 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25031 | +# define wake_up_timer_waiters(b) wake_up(&(b)->wait) | |
25032 | + | |
25033 | +/** | |
25034 | + * hrtimer_wait_for_timer - Wait for a running timer | |
25035 | + * | |
25036 | + * @timer: timer to wait for | |
25037 | + * | |
25038 | + * The function waits in case the timers callback function is | |
25039 | + * currently executed on the waitqueue of the timer base. The | |
25040 | + * waitqueue is woken up after the timer callback function has | |
25041 | + * finished execution. | |
25042 | + */ | |
25043 | +void hrtimer_wait_for_timer(const struct hrtimer *timer) | |
25044 | +{ | |
25045 | + struct hrtimer_clock_base *base = timer->base; | |
25046 | + | |
25047 | + if (base && base->cpu_base && !timer->irqsafe) | |
25048 | + wait_event(base->cpu_base->wait, | |
25049 | + !(hrtimer_callback_running(timer))); | |
25050 | +} | |
25051 | + | |
25052 | +#else | |
25053 | +# define wake_up_timer_waiters(b) do { } while (0) | |
25054 | +#endif | |
25055 | + | |
25056 | /* | |
25057 | * enqueue_hrtimer - internal function to (re)start a timer | |
25058 | * | |
25059 | @@ -911,6 +975,11 @@ static void __remove_hrtimer(struct hrtimer *timer, | |
25060 | if (!(state & HRTIMER_STATE_ENQUEUED)) | |
25061 | return; | |
25062 | ||
25063 | + if (unlikely(!list_empty(&timer->cb_entry))) { | |
25064 | + list_del_init(&timer->cb_entry); | |
25065 | + return; | |
25066 | + } | |
25067 | + | |
25068 | if (!timerqueue_del(&base->active, &timer->node)) | |
25069 | cpu_base->active_bases &= ~(1 << base->index); | |
25070 | ||
25071 | @@ -1006,7 +1075,16 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |
25072 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); | |
25073 | ||
25074 | timer_stats_hrtimer_set_start_info(timer); | |
25075 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
25076 | + { | |
25077 | + ktime_t now = new_base->get_time(); | |
25078 | ||
25079 | + if (ktime_to_ns(tim) < ktime_to_ns(now)) | |
25080 | + timer->praecox = now; | |
25081 | + else | |
25082 | + timer->praecox = ktime_set(0, 0); | |
25083 | + } | |
25084 | +#endif | |
25085 | leftmost = enqueue_hrtimer(timer, new_base); | |
25086 | if (!leftmost) | |
25087 | goto unlock; | |
25088 | @@ -1078,7 +1156,7 @@ int hrtimer_cancel(struct hrtimer *timer) | |
25089 | ||
25090 | if (ret >= 0) | |
25091 | return ret; | |
25092 | - cpu_relax(); | |
25093 | + hrtimer_wait_for_timer(timer); | |
25094 | } | |
25095 | } | |
25096 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | |
25097 | @@ -1142,6 +1220,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |
25098 | ||
25099 | base = hrtimer_clockid_to_base(clock_id); | |
25100 | timer->base = &cpu_base->clock_base[base]; | |
25101 | + INIT_LIST_HEAD(&timer->cb_entry); | |
25102 | timerqueue_init(&timer->node); | |
25103 | ||
25104 | #ifdef CONFIG_TIMER_STATS | |
25105 | @@ -1182,6 +1261,7 @@ bool hrtimer_active(const struct hrtimer *timer) | |
25106 | seq = raw_read_seqcount_begin(&cpu_base->seq); | |
25107 | ||
25108 | if (timer->state != HRTIMER_STATE_INACTIVE || | |
25109 | + cpu_base->running_soft == timer || | |
25110 | cpu_base->running == timer) | |
25111 | return true; | |
25112 | ||
25113 | @@ -1280,10 +1360,112 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |
25114 | cpu_base->running = NULL; | |
25115 | } | |
25116 | ||
25117 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25118 | +static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer, | |
25119 | + struct hrtimer_clock_base *base) | |
25120 | +{ | |
25121 | + int leftmost; | |
25122 | + | |
25123 | + if (restart != HRTIMER_NORESTART && | |
25124 | + !(timer->state & HRTIMER_STATE_ENQUEUED)) { | |
25125 | + | |
25126 | + leftmost = enqueue_hrtimer(timer, base); | |
25127 | + if (!leftmost) | |
25128 | + return; | |
25129 | +#ifdef CONFIG_HIGH_RES_TIMERS | |
25130 | + if (!hrtimer_is_hres_active(timer)) { | |
25131 | + /* | |
25132 | + * Kick to reschedule the next tick to handle the new timer | |
25133 | + * on dynticks target. | |
25134 | + */ | |
25135 | + if (base->cpu_base->nohz_active) | |
25136 | + wake_up_nohz_cpu(base->cpu_base->cpu); | |
25137 | + } else { | |
25138 | + | |
25139 | + hrtimer_reprogram(timer, base); | |
25140 | + } | |
25141 | +#endif | |
25142 | + } | |
25143 | +} | |
25144 | + | |
25145 | +/* | |
25146 | + * The changes in mainline which removed the callback modes from | |
25147 | + * hrtimer are not yet working with -rt. The non wakeup_process() | |
25148 | + * based callbacks which involve sleeping locks need to be treated | |
25149 | + * seperately. | |
25150 | + */ | |
25151 | +static void hrtimer_rt_run_pending(void) | |
25152 | +{ | |
25153 | + enum hrtimer_restart (*fn)(struct hrtimer *); | |
25154 | + struct hrtimer_cpu_base *cpu_base; | |
25155 | + struct hrtimer_clock_base *base; | |
25156 | + struct hrtimer *timer; | |
25157 | + int index, restart; | |
25158 | + | |
25159 | + local_irq_disable(); | |
25160 | + cpu_base = &per_cpu(hrtimer_bases, smp_processor_id()); | |
25161 | + | |
25162 | + raw_spin_lock(&cpu_base->lock); | |
25163 | + | |
25164 | + for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { | |
25165 | + base = &cpu_base->clock_base[index]; | |
25166 | + | |
25167 | + while (!list_empty(&base->expired)) { | |
25168 | + timer = list_first_entry(&base->expired, | |
25169 | + struct hrtimer, cb_entry); | |
25170 | + | |
25171 | + /* | |
25172 | + * Same as the above __run_hrtimer function | |
25173 | + * just we run with interrupts enabled. | |
25174 | + */ | |
25175 | + debug_deactivate(timer); | |
25176 | + cpu_base->running_soft = timer; | |
25177 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
25178 | + | |
25179 | + __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); | |
25180 | + timer_stats_account_hrtimer(timer); | |
25181 | + fn = timer->function; | |
25182 | + | |
25183 | + raw_spin_unlock_irq(&cpu_base->lock); | |
25184 | + restart = fn(timer); | |
25185 | + raw_spin_lock_irq(&cpu_base->lock); | |
25186 | + | |
25187 | + hrtimer_rt_reprogram(restart, timer, base); | |
25188 | + raw_write_seqcount_barrier(&cpu_base->seq); | |
25189 | + | |
25190 | + WARN_ON_ONCE(cpu_base->running_soft != timer); | |
25191 | + cpu_base->running_soft = NULL; | |
25192 | + } | |
25193 | + } | |
25194 | + | |
25195 | + raw_spin_unlock_irq(&cpu_base->lock); | |
25196 | + | |
25197 | + wake_up_timer_waiters(cpu_base); | |
25198 | +} | |
25199 | + | |
25200 | +static int hrtimer_rt_defer(struct hrtimer *timer) | |
25201 | +{ | |
25202 | + if (timer->irqsafe) | |
25203 | + return 0; | |
25204 | + | |
25205 | + __remove_hrtimer(timer, timer->base, timer->state, 0); | |
25206 | + list_add_tail(&timer->cb_entry, &timer->base->expired); | |
25207 | + return 1; | |
25208 | +} | |
25209 | + | |
25210 | +#else | |
25211 | + | |
25212 | +static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; } | |
25213 | + | |
25214 | +#endif | |
25215 | + | |
25216 | +static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer); | |
25217 | + | |
25218 | static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
25219 | { | |
25220 | struct hrtimer_clock_base *base = cpu_base->clock_base; | |
25221 | unsigned int active = cpu_base->active_bases; | |
25222 | + int raise = 0; | |
25223 | ||
25224 | for (; active; base++, active >>= 1) { | |
25225 | struct timerqueue_node *node; | |
25226 | @@ -1299,6 +1481,15 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
25227 | ||
25228 | timer = container_of(node, struct hrtimer, node); | |
25229 | ||
25230 | + trace_hrtimer_interrupt(raw_smp_processor_id(), | |
25231 | + ktime_to_ns(ktime_sub(ktime_to_ns(timer->praecox) ? | |
25232 | + timer->praecox : hrtimer_get_expires(timer), | |
25233 | + basenow)), | |
25234 | + current, | |
25235 | + timer->function == hrtimer_wakeup ? | |
25236 | + container_of(timer, struct hrtimer_sleeper, | |
25237 | + timer)->task : NULL); | |
25238 | + | |
25239 | /* | |
25240 | * The immediate goal for using the softexpires is | |
25241 | * minimizing wakeups, not running timers at the | |
25242 | @@ -1314,9 +1505,14 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | |
25243 | if (basenow.tv64 < hrtimer_get_softexpires_tv64(timer)) | |
25244 | break; | |
25245 | ||
25246 | - __run_hrtimer(cpu_base, base, timer, &basenow); | |
25247 | + if (!hrtimer_rt_defer(timer)) | |
25248 | + __run_hrtimer(cpu_base, base, timer, &basenow); | |
25249 | + else | |
25250 | + raise = 1; | |
25251 | } | |
25252 | } | |
25253 | + if (raise) | |
25254 | + raise_softirq_irqoff(HRTIMER_SOFTIRQ); | |
25255 | } | |
25256 | ||
25257 | #ifdef CONFIG_HIGH_RES_TIMERS | |
25258 | @@ -1479,16 +1675,18 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) | |
25259 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | |
25260 | { | |
25261 | sl->timer.function = hrtimer_wakeup; | |
25262 | + sl->timer.irqsafe = 1; | |
25263 | sl->task = task; | |
25264 | } | |
25265 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | |
25266 | ||
25267 | -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) | |
25268 | +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode, | |
25269 | + unsigned long state) | |
25270 | { | |
25271 | hrtimer_init_sleeper(t, current); | |
25272 | ||
25273 | do { | |
25274 | - set_current_state(TASK_INTERRUPTIBLE); | |
25275 | + set_current_state(state); | |
25276 | hrtimer_start_expires(&t->timer, mode); | |
25277 | ||
25278 | if (likely(t->task)) | |
25279 | @@ -1530,7 +1728,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |
25280 | HRTIMER_MODE_ABS); | |
25281 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | |
25282 | ||
25283 | - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) | |
25284 | + /* cpu_chill() does not care about restart state. */ | |
25285 | + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE)) | |
25286 | goto out; | |
25287 | ||
25288 | rmtp = restart->nanosleep.rmtp; | |
25289 | @@ -1547,8 +1746,10 @@ out: | |
25290 | return ret; | |
25291 | } | |
25292 | ||
25293 | -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
25294 | - const enum hrtimer_mode mode, const clockid_t clockid) | |
25295 | +static long | |
25296 | +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
25297 | + const enum hrtimer_mode mode, const clockid_t clockid, | |
25298 | + unsigned long state) | |
25299 | { | |
25300 | struct restart_block *restart; | |
25301 | struct hrtimer_sleeper t; | |
25302 | @@ -1561,7 +1762,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
25303 | ||
25304 | hrtimer_init_on_stack(&t.timer, clockid, mode); | |
25305 | hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); | |
25306 | - if (do_nanosleep(&t, mode)) | |
25307 | + if (do_nanosleep(&t, mode, state)) | |
25308 | goto out; | |
25309 | ||
25310 | /* Absolute timers do not update the rmtp value and restart: */ | |
25311 | @@ -1588,6 +1789,12 @@ out: | |
25312 | return ret; | |
25313 | } | |
25314 | ||
25315 | +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, | |
25316 | + const enum hrtimer_mode mode, const clockid_t clockid) | |
25317 | +{ | |
25318 | + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE); | |
25319 | +} | |
25320 | + | |
25321 | SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
25322 | struct timespec __user *, rmtp) | |
25323 | { | |
25324 | @@ -1602,6 +1809,26 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, | |
25325 | return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); | |
25326 | } | |
25327 | ||
25328 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25329 | +/* | |
25330 | + * Sleep for 1 ms in hope whoever holds what we want will let it go. | |
25331 | + */ | |
25332 | +void cpu_chill(void) | |
25333 | +{ | |
25334 | + struct timespec tu = { | |
25335 | + .tv_nsec = NSEC_PER_MSEC, | |
25336 | + }; | |
25337 | + unsigned int freeze_flag = current->flags & PF_NOFREEZE; | |
25338 | + | |
25339 | + current->flags |= PF_NOFREEZE; | |
25340 | + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC, | |
25341 | + TASK_UNINTERRUPTIBLE); | |
25342 | + if (!freeze_flag) | |
25343 | + current->flags &= ~PF_NOFREEZE; | |
25344 | +} | |
25345 | +EXPORT_SYMBOL(cpu_chill); | |
25346 | +#endif | |
25347 | + | |
25348 | /* | |
25349 | * Functions related to boot-time initialization: | |
25350 | */ | |
25351 | @@ -1613,10 +1840,14 @@ static void init_hrtimers_cpu(int cpu) | |
25352 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | |
25353 | cpu_base->clock_base[i].cpu_base = cpu_base; | |
25354 | timerqueue_init_head(&cpu_base->clock_base[i].active); | |
25355 | + INIT_LIST_HEAD(&cpu_base->clock_base[i].expired); | |
25356 | } | |
25357 | ||
25358 | cpu_base->cpu = cpu; | |
25359 | hrtimer_init_hres(cpu_base); | |
25360 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25361 | + init_waitqueue_head(&cpu_base->wait); | |
25362 | +#endif | |
25363 | } | |
25364 | ||
25365 | #ifdef CONFIG_HOTPLUG_CPU | |
25366 | @@ -1714,11 +1945,21 @@ static struct notifier_block hrtimers_nb = { | |
25367 | .notifier_call = hrtimer_cpu_notify, | |
25368 | }; | |
25369 | ||
25370 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25371 | +static void run_hrtimer_softirq(struct softirq_action *h) | |
25372 | +{ | |
25373 | + hrtimer_rt_run_pending(); | |
25374 | +} | |
25375 | +#endif | |
25376 | + | |
25377 | void __init hrtimers_init(void) | |
25378 | { | |
25379 | hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, | |
25380 | (void *)(long)smp_processor_id()); | |
25381 | register_cpu_notifier(&hrtimers_nb); | |
25382 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25383 | + open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); | |
25384 | +#endif | |
25385 | } | |
25386 | ||
25387 | /** | |
25388 | diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c | |
25389 | index 1d5c7204ddc9..184de6751180 100644 | |
25390 | --- a/kernel/time/itimer.c | |
25391 | +++ b/kernel/time/itimer.c | |
25392 | @@ -213,6 +213,7 @@ again: | |
25393 | /* We are sharing ->siglock with it_real_fn() */ | |
25394 | if (hrtimer_try_to_cancel(timer) < 0) { | |
25395 | spin_unlock_irq(&tsk->sighand->siglock); | |
25396 | + hrtimer_wait_for_timer(&tsk->signal->real_timer); | |
25397 | goto again; | |
25398 | } | |
25399 | expires = timeval_to_ktime(value->it_value); | |
25400 | diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c | |
25401 | index 347fecf86a3f..2ede47408a3e 100644 | |
25402 | --- a/kernel/time/jiffies.c | |
25403 | +++ b/kernel/time/jiffies.c | |
25404 | @@ -74,7 +74,8 @@ static struct clocksource clocksource_jiffies = { | |
25405 | .max_cycles = 10, | |
25406 | }; | |
25407 | ||
25408 | -__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock); | |
25409 | +__cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); | |
25410 | +__cacheline_aligned_in_smp seqcount_t jiffies_seq; | |
25411 | ||
25412 | #if (BITS_PER_LONG < 64) | |
25413 | u64 get_jiffies_64(void) | |
25414 | @@ -83,9 +84,9 @@ u64 get_jiffies_64(void) | |
25415 | u64 ret; | |
25416 | ||
25417 | do { | |
25418 | - seq = read_seqbegin(&jiffies_lock); | |
25419 | + seq = read_seqcount_begin(&jiffies_seq); | |
25420 | ret = jiffies_64; | |
25421 | - } while (read_seqretry(&jiffies_lock, seq)); | |
25422 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
25423 | return ret; | |
25424 | } | |
25425 | EXPORT_SYMBOL(get_jiffies_64); | |
25426 | diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c | |
25427 | index ab861771e37f..0f6868fd2de6 100644 | |
25428 | --- a/kernel/time/ntp.c | |
25429 | +++ b/kernel/time/ntp.c | |
25430 | @@ -10,6 +10,7 @@ | |
25431 | #include <linux/workqueue.h> | |
25432 | #include <linux/hrtimer.h> | |
25433 | #include <linux/jiffies.h> | |
25434 | +#include <linux/kthread.h> | |
25435 | #include <linux/math64.h> | |
25436 | #include <linux/timex.h> | |
25437 | #include <linux/time.h> | |
25438 | @@ -562,10 +563,52 @@ static void sync_cmos_clock(struct work_struct *work) | |
25439 | &sync_cmos_work, timespec64_to_jiffies(&next)); | |
25440 | } | |
25441 | ||
25442 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25443 | +/* | |
25444 | + * RT can not call schedule_delayed_work from real interrupt context. | |
25445 | + * Need to make a thread to do the real work. | |
25446 | + */ | |
25447 | +static struct task_struct *cmos_delay_thread; | |
25448 | +static bool do_cmos_delay; | |
25449 | + | |
25450 | +static int run_cmos_delay(void *ignore) | |
25451 | +{ | |
25452 | + while (!kthread_should_stop()) { | |
25453 | + set_current_state(TASK_INTERRUPTIBLE); | |
25454 | + if (do_cmos_delay) { | |
25455 | + do_cmos_delay = false; | |
25456 | + queue_delayed_work(system_power_efficient_wq, | |
25457 | + &sync_cmos_work, 0); | |
25458 | + } | |
25459 | + schedule(); | |
25460 | + } | |
25461 | + __set_current_state(TASK_RUNNING); | |
25462 | + return 0; | |
25463 | +} | |
25464 | + | |
25465 | +void ntp_notify_cmos_timer(void) | |
25466 | +{ | |
25467 | + do_cmos_delay = true; | |
25468 | + /* Make visible before waking up process */ | |
25469 | + smp_wmb(); | |
25470 | + wake_up_process(cmos_delay_thread); | |
25471 | +} | |
25472 | + | |
25473 | +static __init int create_cmos_delay_thread(void) | |
25474 | +{ | |
25475 | + cmos_delay_thread = kthread_run(run_cmos_delay, NULL, "kcmosdelayd"); | |
25476 | + BUG_ON(!cmos_delay_thread); | |
25477 | + return 0; | |
25478 | +} | |
25479 | +early_initcall(create_cmos_delay_thread); | |
25480 | + | |
25481 | +#else | |
25482 | + | |
25483 | void ntp_notify_cmos_timer(void) | |
25484 | { | |
25485 | queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); | |
25486 | } | |
25487 | +#endif /* CONFIG_PREEMPT_RT_FULL */ | |
25488 | ||
25489 | #else | |
25490 | void ntp_notify_cmos_timer(void) { } | |
25491 | diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c | |
25492 | index 80016b329d94..b7342b6e6a5a 100644 | |
25493 | --- a/kernel/time/posix-cpu-timers.c | |
25494 | +++ b/kernel/time/posix-cpu-timers.c | |
25495 | @@ -3,6 +3,7 @@ | |
25496 | */ | |
25497 | ||
25498 | #include <linux/sched.h> | |
25499 | +#include <linux/sched/rt.h> | |
25500 | #include <linux/posix-timers.h> | |
25501 | #include <linux/errno.h> | |
25502 | #include <linux/math64.h> | |
25503 | @@ -650,7 +651,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, | |
25504 | /* | |
25505 | * Disarm any old timer after extracting its expiry time. | |
25506 | */ | |
25507 | - WARN_ON_ONCE(!irqs_disabled()); | |
25508 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
25509 | ||
25510 | ret = 0; | |
25511 | old_incr = timer->it.cpu.incr; | |
25512 | @@ -1092,7 +1093,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) | |
25513 | /* | |
25514 | * Now re-arm for the new expiry time. | |
25515 | */ | |
25516 | - WARN_ON_ONCE(!irqs_disabled()); | |
25517 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
25518 | arm_timer(timer); | |
25519 | unlock_task_sighand(p, &flags); | |
25520 | ||
25521 | @@ -1183,13 +1184,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk) | |
25522 | * already updated our counts. We need to check if any timers fire now. | |
25523 | * Interrupts are disabled. | |
25524 | */ | |
25525 | -void run_posix_cpu_timers(struct task_struct *tsk) | |
25526 | +static void __run_posix_cpu_timers(struct task_struct *tsk) | |
25527 | { | |
25528 | LIST_HEAD(firing); | |
25529 | struct k_itimer *timer, *next; | |
25530 | unsigned long flags; | |
25531 | ||
25532 | - WARN_ON_ONCE(!irqs_disabled()); | |
25533 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
25534 | ||
25535 | /* | |
25536 | * The fast path checks that there are no expired thread or thread | |
25537 | @@ -1243,6 +1244,190 @@ void run_posix_cpu_timers(struct task_struct *tsk) | |
25538 | } | |
25539 | } | |
25540 | ||
25541 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
25542 | +#include <linux/kthread.h> | |
25543 | +#include <linux/cpu.h> | |
25544 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_task); | |
25545 | +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist); | |
25546 | + | |
25547 | +static int posix_cpu_timers_thread(void *data) | |
25548 | +{ | |
25549 | + int cpu = (long)data; | |
25550 | + | |
25551 | + BUG_ON(per_cpu(posix_timer_task,cpu) != current); | |
25552 | + | |
25553 | + while (!kthread_should_stop()) { | |
25554 | + struct task_struct *tsk = NULL; | |
25555 | + struct task_struct *next = NULL; | |
25556 | + | |
25557 | + if (cpu_is_offline(cpu)) | |
25558 | + goto wait_to_die; | |
25559 | + | |
25560 | + /* grab task list */ | |
25561 | + raw_local_irq_disable(); | |
25562 | + tsk = per_cpu(posix_timer_tasklist, cpu); | |
25563 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
25564 | + raw_local_irq_enable(); | |
25565 | + | |
25566 | + /* its possible the list is empty, just return */ | |
25567 | + if (!tsk) { | |
25568 | + set_current_state(TASK_INTERRUPTIBLE); | |
25569 | + schedule(); | |
25570 | + __set_current_state(TASK_RUNNING); | |
25571 | + continue; | |
25572 | + } | |
25573 | + | |
25574 | + /* Process task list */ | |
25575 | + while (1) { | |
25576 | + /* save next */ | |
25577 | + next = tsk->posix_timer_list; | |
25578 | + | |
25579 | + /* run the task timers, clear its ptr and | |
25580 | + * unreference it | |
25581 | + */ | |
25582 | + __run_posix_cpu_timers(tsk); | |
25583 | + tsk->posix_timer_list = NULL; | |
25584 | + put_task_struct(tsk); | |
25585 | + | |
25586 | + /* check if this is the last on the list */ | |
25587 | + if (next == tsk) | |
25588 | + break; | |
25589 | + tsk = next; | |
25590 | + } | |
25591 | + } | |
25592 | + return 0; | |
25593 | + | |
25594 | +wait_to_die: | |
25595 | + /* Wait for kthread_stop */ | |
25596 | + set_current_state(TASK_INTERRUPTIBLE); | |
25597 | + while (!kthread_should_stop()) { | |
25598 | + schedule(); | |
25599 | + set_current_state(TASK_INTERRUPTIBLE); | |
25600 | + } | |
25601 | + __set_current_state(TASK_RUNNING); | |
25602 | + return 0; | |
25603 | +} | |
25604 | + | |
25605 | +static inline int __fastpath_timer_check(struct task_struct *tsk) | |
25606 | +{ | |
25607 | + /* tsk == current, ensure it is safe to use ->signal/sighand */ | |
25608 | + if (unlikely(tsk->exit_state)) | |
25609 | + return 0; | |
25610 | + | |
25611 | + if (!task_cputime_zero(&tsk->cputime_expires)) | |
25612 | + return 1; | |
25613 | + | |
25614 | + if (!task_cputime_zero(&tsk->signal->cputime_expires)) | |
25615 | + return 1; | |
25616 | + | |
25617 | + return 0; | |
25618 | +} | |
25619 | + | |
25620 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
25621 | +{ | |
25622 | + unsigned long cpu = smp_processor_id(); | |
25623 | + struct task_struct *tasklist; | |
25624 | + | |
25625 | + BUG_ON(!irqs_disabled()); | |
25626 | + if(!per_cpu(posix_timer_task, cpu)) | |
25627 | + return; | |
25628 | + /* get per-cpu references */ | |
25629 | + tasklist = per_cpu(posix_timer_tasklist, cpu); | |
25630 | + | |
25631 | + /* check to see if we're already queued */ | |
25632 | + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) { | |
25633 | + get_task_struct(tsk); | |
25634 | + if (tasklist) { | |
25635 | + tsk->posix_timer_list = tasklist; | |
25636 | + } else { | |
25637 | + /* | |
25638 | + * The list is terminated by a self-pointing | |
25639 | + * task_struct | |
25640 | + */ | |
25641 | + tsk->posix_timer_list = tsk; | |
25642 | + } | |
25643 | + per_cpu(posix_timer_tasklist, cpu) = tsk; | |
25644 | + | |
25645 | + wake_up_process(per_cpu(posix_timer_task, cpu)); | |
25646 | + } | |
25647 | +} | |
25648 | + | |
25649 | +/* | |
25650 | + * posix_cpu_thread_call - callback that gets triggered when a CPU is added. | |
25651 | + * Here we can start up the necessary migration thread for the new CPU. | |
25652 | + */ | |
25653 | +static int posix_cpu_thread_call(struct notifier_block *nfb, | |
25654 | + unsigned long action, void *hcpu) | |
25655 | +{ | |
25656 | + int cpu = (long)hcpu; | |
25657 | + struct task_struct *p; | |
25658 | + struct sched_param param; | |
25659 | + | |
25660 | + switch (action) { | |
25661 | + case CPU_UP_PREPARE: | |
25662 | + p = kthread_create(posix_cpu_timers_thread, hcpu, | |
25663 | + "posixcputmr/%d",cpu); | |
25664 | + if (IS_ERR(p)) | |
25665 | + return NOTIFY_BAD; | |
25666 | + p->flags |= PF_NOFREEZE; | |
25667 | + kthread_bind(p, cpu); | |
25668 | + /* Must be high prio to avoid getting starved */ | |
25669 | + param.sched_priority = MAX_RT_PRIO-1; | |
25670 | + sched_setscheduler(p, SCHED_FIFO, ¶m); | |
25671 | + per_cpu(posix_timer_task,cpu) = p; | |
25672 | + break; | |
25673 | + case CPU_ONLINE: | |
25674 | + /* Strictly unneccessary, as first user will wake it. */ | |
25675 | + wake_up_process(per_cpu(posix_timer_task,cpu)); | |
25676 | + break; | |
25677 | +#ifdef CONFIG_HOTPLUG_CPU | |
25678 | + case CPU_UP_CANCELED: | |
25679 | + /* Unbind it from offline cpu so it can run. Fall thru. */ | |
25680 | + kthread_bind(per_cpu(posix_timer_task, cpu), | |
25681 | + cpumask_any(cpu_online_mask)); | |
25682 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
25683 | + per_cpu(posix_timer_task,cpu) = NULL; | |
25684 | + break; | |
25685 | + case CPU_DEAD: | |
25686 | + kthread_stop(per_cpu(posix_timer_task,cpu)); | |
25687 | + per_cpu(posix_timer_task,cpu) = NULL; | |
25688 | + break; | |
25689 | +#endif | |
25690 | + } | |
25691 | + return NOTIFY_OK; | |
25692 | +} | |
25693 | + | |
25694 | +/* Register at highest priority so that task migration (migrate_all_tasks) | |
25695 | + * happens before everything else. | |
25696 | + */ | |
25697 | +static struct notifier_block posix_cpu_thread_notifier = { | |
25698 | + .notifier_call = posix_cpu_thread_call, | |
25699 | + .priority = 10 | |
25700 | +}; | |
25701 | + | |
25702 | +static int __init posix_cpu_thread_init(void) | |
25703 | +{ | |
25704 | + void *hcpu = (void *)(long)smp_processor_id(); | |
25705 | + /* Start one for boot CPU. */ | |
25706 | + unsigned long cpu; | |
25707 | + | |
25708 | + /* init the per-cpu posix_timer_tasklets */ | |
25709 | + for_each_possible_cpu(cpu) | |
25710 | + per_cpu(posix_timer_tasklist, cpu) = NULL; | |
25711 | + | |
25712 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu); | |
25713 | + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu); | |
25714 | + register_cpu_notifier(&posix_cpu_thread_notifier); | |
25715 | + return 0; | |
25716 | +} | |
25717 | +early_initcall(posix_cpu_thread_init); | |
25718 | +#else /* CONFIG_PREEMPT_RT_BASE */ | |
25719 | +void run_posix_cpu_timers(struct task_struct *tsk) | |
25720 | +{ | |
25721 | + __run_posix_cpu_timers(tsk); | |
25722 | +} | |
25723 | +#endif /* CONFIG_PREEMPT_RT_BASE */ | |
25724 | + | |
25725 | /* | |
25726 | * Set one of the process-wide special case CPU timers or RLIMIT_CPU. | |
25727 | * The tsk->sighand->siglock must be held by the caller. | |
25728 | diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c | |
25729 | index f2826c35e918..464a98155a0e 100644 | |
25730 | --- a/kernel/time/posix-timers.c | |
25731 | +++ b/kernel/time/posix-timers.c | |
25732 | @@ -506,6 +506,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) | |
25733 | static struct pid *good_sigevent(sigevent_t * event) | |
25734 | { | |
25735 | struct task_struct *rtn = current->group_leader; | |
25736 | + int sig = event->sigev_signo; | |
25737 | ||
25738 | if ((event->sigev_notify & SIGEV_THREAD_ID ) && | |
25739 | (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || | |
25740 | @@ -514,7 +515,8 @@ static struct pid *good_sigevent(sigevent_t * event) | |
25741 | return NULL; | |
25742 | ||
25743 | if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) && | |
25744 | - ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) | |
25745 | + (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) || | |
25746 | + sig_kernel_coredump(sig))) | |
25747 | return NULL; | |
25748 | ||
25749 | return task_pid(rtn); | |
25750 | @@ -826,6 +828,20 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) | |
25751 | return overrun; | |
25752 | } | |
25753 | ||
25754 | +/* | |
25755 | + * Protected by RCU! | |
25756 | + */ | |
25757 | +static void timer_wait_for_callback(struct k_clock *kc, struct k_itimer *timr) | |
25758 | +{ | |
25759 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
25760 | + if (kc->timer_set == common_timer_set) | |
25761 | + hrtimer_wait_for_timer(&timr->it.real.timer); | |
25762 | + else | |
25763 | + /* FIXME: Whacky hack for posix-cpu-timers */ | |
25764 | + schedule_timeout(1); | |
25765 | +#endif | |
25766 | +} | |
25767 | + | |
25768 | /* Set a POSIX.1b interval timer. */ | |
25769 | /* timr->it_lock is taken. */ | |
25770 | static int | |
25771 | @@ -903,6 +919,7 @@ retry: | |
25772 | if (!timr) | |
25773 | return -EINVAL; | |
25774 | ||
25775 | + rcu_read_lock(); | |
25776 | kc = clockid_to_kclock(timr->it_clock); | |
25777 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) | |
25778 | error = -EINVAL; | |
25779 | @@ -911,9 +928,12 @@ retry: | |
25780 | ||
25781 | unlock_timer(timr, flag); | |
25782 | if (error == TIMER_RETRY) { | |
25783 | + timer_wait_for_callback(kc, timr); | |
25784 | rtn = NULL; // We already got the old time... | |
25785 | + rcu_read_unlock(); | |
25786 | goto retry; | |
25787 | } | |
25788 | + rcu_read_unlock(); | |
25789 | ||
25790 | if (old_setting && !error && | |
25791 | copy_to_user(old_setting, &old_spec, sizeof (old_spec))) | |
25792 | @@ -951,10 +971,15 @@ retry_delete: | |
25793 | if (!timer) | |
25794 | return -EINVAL; | |
25795 | ||
25796 | + rcu_read_lock(); | |
25797 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
25798 | unlock_timer(timer, flags); | |
25799 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
25800 | + timer); | |
25801 | + rcu_read_unlock(); | |
25802 | goto retry_delete; | |
25803 | } | |
25804 | + rcu_read_unlock(); | |
25805 | ||
25806 | spin_lock(¤t->sighand->siglock); | |
25807 | list_del(&timer->list); | |
25808 | @@ -980,8 +1005,18 @@ static void itimer_delete(struct k_itimer *timer) | |
25809 | retry_delete: | |
25810 | spin_lock_irqsave(&timer->it_lock, flags); | |
25811 | ||
25812 | + /* On RT we can race with a deletion */ | |
25813 | + if (!timer->it_signal) { | |
25814 | + unlock_timer(timer, flags); | |
25815 | + return; | |
25816 | + } | |
25817 | + | |
25818 | if (timer_delete_hook(timer) == TIMER_RETRY) { | |
25819 | + rcu_read_lock(); | |
25820 | unlock_timer(timer, flags); | |
25821 | + timer_wait_for_callback(clockid_to_kclock(timer->it_clock), | |
25822 | + timer); | |
25823 | + rcu_read_unlock(); | |
25824 | goto retry_delete; | |
25825 | } | |
25826 | list_del(&timer->list); | |
25827 | diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c | |
25828 | index 53d7184da0be..1b4ac3361c3f 100644 | |
25829 | --- a/kernel/time/tick-broadcast-hrtimer.c | |
25830 | +++ b/kernel/time/tick-broadcast-hrtimer.c | |
25831 | @@ -106,5 +106,6 @@ void tick_setup_hrtimer_broadcast(void) | |
25832 | { | |
25833 | hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
25834 | bctimer.function = bc_handler; | |
25835 | + bctimer.irqsafe = true; | |
25836 | clockevents_register_device(&ce_broadcast_hrtimer); | |
25837 | } | |
25838 | diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c | |
25839 | index 4fcd99e12aa0..5a47f2e98faf 100644 | |
25840 | --- a/kernel/time/tick-common.c | |
25841 | +++ b/kernel/time/tick-common.c | |
25842 | @@ -79,13 +79,15 @@ int tick_is_oneshot_available(void) | |
25843 | static void tick_periodic(int cpu) | |
25844 | { | |
25845 | if (tick_do_timer_cpu == cpu) { | |
25846 | - write_seqlock(&jiffies_lock); | |
25847 | + raw_spin_lock(&jiffies_lock); | |
25848 | + write_seqcount_begin(&jiffies_seq); | |
25849 | ||
25850 | /* Keep track of the next tick event */ | |
25851 | tick_next_period = ktime_add(tick_next_period, tick_period); | |
25852 | ||
25853 | do_timer(1); | |
25854 | - write_sequnlock(&jiffies_lock); | |
25855 | + write_seqcount_end(&jiffies_seq); | |
25856 | + raw_spin_unlock(&jiffies_lock); | |
25857 | update_wall_time(); | |
25858 | } | |
25859 | ||
25860 | @@ -157,9 +159,9 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) | |
25861 | ktime_t next; | |
25862 | ||
25863 | do { | |
25864 | - seq = read_seqbegin(&jiffies_lock); | |
25865 | + seq = read_seqcount_begin(&jiffies_seq); | |
25866 | next = tick_next_period; | |
25867 | - } while (read_seqretry(&jiffies_lock, seq)); | |
25868 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
25869 | ||
25870 | clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); | |
25871 | ||
25872 | diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c | |
25873 | index 22c57e191a23..d536824cbd36 100644 | |
25874 | --- a/kernel/time/tick-sched.c | |
25875 | +++ b/kernel/time/tick-sched.c | |
25876 | @@ -62,7 +62,8 @@ static void tick_do_update_jiffies64(ktime_t now) | |
25877 | return; | |
25878 | ||
25879 | /* Reevalute with jiffies_lock held */ | |
25880 | - write_seqlock(&jiffies_lock); | |
25881 | + raw_spin_lock(&jiffies_lock); | |
25882 | + write_seqcount_begin(&jiffies_seq); | |
25883 | ||
25884 | delta = ktime_sub(now, last_jiffies_update); | |
25885 | if (delta.tv64 >= tick_period.tv64) { | |
25886 | @@ -85,10 +86,12 @@ static void tick_do_update_jiffies64(ktime_t now) | |
25887 | /* Keep the tick_next_period variable up to date */ | |
25888 | tick_next_period = ktime_add(last_jiffies_update, tick_period); | |
25889 | } else { | |
25890 | - write_sequnlock(&jiffies_lock); | |
25891 | + write_seqcount_end(&jiffies_seq); | |
25892 | + raw_spin_unlock(&jiffies_lock); | |
25893 | return; | |
25894 | } | |
25895 | - write_sequnlock(&jiffies_lock); | |
25896 | + write_seqcount_end(&jiffies_seq); | |
25897 | + raw_spin_unlock(&jiffies_lock); | |
25898 | update_wall_time(); | |
25899 | } | |
25900 | ||
25901 | @@ -99,12 +102,14 @@ static ktime_t tick_init_jiffy_update(void) | |
25902 | { | |
25903 | ktime_t period; | |
25904 | ||
25905 | - write_seqlock(&jiffies_lock); | |
25906 | + raw_spin_lock(&jiffies_lock); | |
25907 | + write_seqcount_begin(&jiffies_seq); | |
25908 | /* Did we start the jiffies update yet ? */ | |
25909 | if (last_jiffies_update.tv64 == 0) | |
25910 | last_jiffies_update = tick_next_period; | |
25911 | period = last_jiffies_update; | |
25912 | - write_sequnlock(&jiffies_lock); | |
25913 | + write_seqcount_end(&jiffies_seq); | |
25914 | + raw_spin_unlock(&jiffies_lock); | |
25915 | return period; | |
25916 | } | |
25917 | ||
25918 | @@ -176,6 +181,11 @@ static bool can_stop_full_tick(void) | |
25919 | return false; | |
25920 | } | |
25921 | ||
25922 | + if (!arch_irq_work_has_interrupt()) { | |
25923 | + trace_tick_stop(0, "missing irq work interrupt\n"); | |
25924 | + return false; | |
25925 | + } | |
25926 | + | |
25927 | /* sched_clock_tick() needs us? */ | |
25928 | #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK | |
25929 | /* | |
25930 | @@ -204,6 +214,7 @@ static void nohz_full_kick_work_func(struct irq_work *work) | |
25931 | ||
25932 | static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { | |
25933 | .func = nohz_full_kick_work_func, | |
25934 | + .flags = IRQ_WORK_HARD_IRQ, | |
25935 | }; | |
25936 | ||
25937 | /* | |
25938 | @@ -578,10 +589,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |
25939 | ||
25940 | /* Read jiffies and the time when jiffies were updated last */ | |
25941 | do { | |
25942 | - seq = read_seqbegin(&jiffies_lock); | |
25943 | + seq = read_seqcount_begin(&jiffies_seq); | |
25944 | basemono = last_jiffies_update.tv64; | |
25945 | basejiff = jiffies; | |
25946 | - } while (read_seqretry(&jiffies_lock, seq)); | |
25947 | + } while (read_seqcount_retry(&jiffies_seq, seq)); | |
25948 | ts->last_jiffies = basejiff; | |
25949 | ||
25950 | if (rcu_needs_cpu(basemono, &next_rcu) || | |
25951 | @@ -753,14 +764,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |
25952 | return false; | |
25953 | ||
25954 | if (unlikely(local_softirq_pending() && cpu_online(cpu))) { | |
25955 | - static int ratelimit; | |
25956 | - | |
25957 | - if (ratelimit < 10 && | |
25958 | - (local_softirq_pending() & SOFTIRQ_STOP_IDLE_MASK)) { | |
25959 | - pr_warn("NOHZ: local_softirq_pending %02x\n", | |
25960 | - (unsigned int) local_softirq_pending()); | |
25961 | - ratelimit++; | |
25962 | - } | |
25963 | + softirq_check_pending_idle(); | |
25964 | return false; | |
25965 | } | |
25966 | ||
25967 | @@ -1100,6 +1104,7 @@ void tick_setup_sched_timer(void) | |
25968 | * Emulate tick processing via per-CPU hrtimers: | |
25969 | */ | |
25970 | hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | |
25971 | + ts->sched_timer.irqsafe = 1; | |
25972 | ts->sched_timer.function = tick_sched_timer; | |
25973 | ||
25974 | /* Get the next period (per cpu) */ | |
25975 | diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c | |
2bb96ace | 25976 | index 445601c580d6..8744b0d87479 100644 |
b4de310e JK |
25977 | --- a/kernel/time/timekeeping.c |
25978 | +++ b/kernel/time/timekeeping.c | |
2bb96ace | 25979 | @@ -2070,8 +2070,10 @@ EXPORT_SYMBOL(hardpps); |
b4de310e JK |
25980 | */ |
25981 | void xtime_update(unsigned long ticks) | |
25982 | { | |
25983 | - write_seqlock(&jiffies_lock); | |
25984 | + raw_spin_lock(&jiffies_lock); | |
25985 | + write_seqcount_begin(&jiffies_seq); | |
25986 | do_timer(ticks); | |
25987 | - write_sequnlock(&jiffies_lock); | |
25988 | + write_seqcount_end(&jiffies_seq); | |
25989 | + raw_spin_unlock(&jiffies_lock); | |
25990 | update_wall_time(); | |
25991 | } | |
25992 | diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h | |
25993 | index 704f595ce83f..763a3e5121ff 100644 | |
25994 | --- a/kernel/time/timekeeping.h | |
25995 | +++ b/kernel/time/timekeeping.h | |
25996 | @@ -19,7 +19,8 @@ extern void timekeeping_resume(void); | |
25997 | extern void do_timer(unsigned long ticks); | |
25998 | extern void update_wall_time(void); | |
25999 | ||
26000 | -extern seqlock_t jiffies_lock; | |
26001 | +extern raw_spinlock_t jiffies_lock; | |
26002 | +extern seqcount_t jiffies_seq; | |
26003 | ||
26004 | #define CS_NAME_LEN 32 | |
26005 | ||
26006 | diff --git a/kernel/time/timer.c b/kernel/time/timer.c | |
26007 | index bbc5d1114583..603699ff9411 100644 | |
26008 | --- a/kernel/time/timer.c | |
26009 | +++ b/kernel/time/timer.c | |
26010 | @@ -80,6 +80,9 @@ struct tvec_root { | |
26011 | struct tvec_base { | |
26012 | spinlock_t lock; | |
26013 | struct timer_list *running_timer; | |
26014 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26015 | + wait_queue_head_t wait_for_running_timer; | |
26016 | +#endif | |
26017 | unsigned long timer_jiffies; | |
26018 | unsigned long next_timer; | |
26019 | unsigned long active_timers; | |
26020 | @@ -777,6 +780,39 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, | |
26021 | cpu_relax(); | |
26022 | } | |
26023 | } | |
26024 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26025 | +static inline struct tvec_base *switch_timer_base(struct timer_list *timer, | |
26026 | + struct tvec_base *old, | |
26027 | + struct tvec_base *new) | |
26028 | +{ | |
26029 | + /* | |
26030 | + * We cannot do the below because we might be preempted and | |
26031 | + * then the preempter would see NULL and loop forever. | |
26032 | + */ | |
26033 | + if (spin_trylock(&new->lock)) { | |
26034 | + WRITE_ONCE(timer->flags, | |
26035 | + (timer->flags & ~TIMER_BASEMASK) | new->cpu); | |
26036 | + spin_unlock(&old->lock); | |
26037 | + return new; | |
26038 | + } | |
26039 | + return old; | |
26040 | +} | |
26041 | + | |
26042 | +#else | |
26043 | +static inline struct tvec_base *switch_timer_base(struct timer_list *timer, | |
26044 | + struct tvec_base *old, | |
26045 | + struct tvec_base *new) | |
26046 | +{ | |
26047 | + /* See the comment in lock_timer_base() */ | |
26048 | + timer->flags |= TIMER_MIGRATING; | |
26049 | + | |
26050 | + spin_unlock(&old->lock); | |
26051 | + spin_lock(&new->lock); | |
26052 | + WRITE_ONCE(timer->flags, | |
26053 | + (timer->flags & ~TIMER_BASEMASK) | new->cpu); | |
26054 | + return new; | |
26055 | +} | |
26056 | +#endif | |
26057 | ||
26058 | static inline int | |
26059 | __mod_timer(struct timer_list *timer, unsigned long expires, | |
26060 | @@ -807,16 +843,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, | |
26061 | * handler yet has not finished. This also guarantees that | |
26062 | * the timer is serialized wrt itself. | |
26063 | */ | |
26064 | - if (likely(base->running_timer != timer)) { | |
26065 | - /* See the comment in lock_timer_base() */ | |
26066 | - timer->flags |= TIMER_MIGRATING; | |
26067 | - | |
26068 | - spin_unlock(&base->lock); | |
26069 | - base = new_base; | |
26070 | - spin_lock(&base->lock); | |
26071 | - WRITE_ONCE(timer->flags, | |
26072 | - (timer->flags & ~TIMER_BASEMASK) | base->cpu); | |
26073 | - } | |
26074 | + if (likely(base->running_timer != timer)) | |
26075 | + base = switch_timer_base(timer, base, new_base); | |
26076 | } | |
26077 | ||
26078 | timer->expires = expires; | |
26079 | @@ -1006,6 +1034,33 @@ void add_timer_on(struct timer_list *timer, int cpu) | |
26080 | } | |
26081 | EXPORT_SYMBOL_GPL(add_timer_on); | |
26082 | ||
26083 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26084 | +/* | |
26085 | + * Wait for a running timer | |
26086 | + */ | |
26087 | +static void wait_for_running_timer(struct timer_list *timer) | |
26088 | +{ | |
26089 | + struct tvec_base *base; | |
26090 | + u32 tf = timer->flags; | |
26091 | + | |
26092 | + if (tf & TIMER_MIGRATING) | |
26093 | + return; | |
26094 | + | |
26095 | + base = per_cpu_ptr(&tvec_bases, tf & TIMER_CPUMASK); | |
26096 | + wait_event(base->wait_for_running_timer, | |
26097 | + base->running_timer != timer); | |
26098 | +} | |
26099 | + | |
26100 | +# define wakeup_timer_waiters(b) wake_up_all(&(b)->wait_for_running_timer) | |
26101 | +#else | |
26102 | +static inline void wait_for_running_timer(struct timer_list *timer) | |
26103 | +{ | |
26104 | + cpu_relax(); | |
26105 | +} | |
26106 | + | |
26107 | +# define wakeup_timer_waiters(b) do { } while (0) | |
26108 | +#endif | |
26109 | + | |
26110 | /** | |
26111 | * del_timer - deactive a timer. | |
26112 | * @timer: the timer to be deactivated | |
26113 | @@ -1063,7 +1118,7 @@ int try_to_del_timer_sync(struct timer_list *timer) | |
26114 | } | |
26115 | EXPORT_SYMBOL(try_to_del_timer_sync); | |
26116 | ||
26117 | -#ifdef CONFIG_SMP | |
26118 | +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) | |
26119 | /** | |
26120 | * del_timer_sync - deactivate a timer and wait for the handler to finish. | |
26121 | * @timer: the timer to be deactivated | |
26122 | @@ -1123,7 +1178,7 @@ int del_timer_sync(struct timer_list *timer) | |
26123 | int ret = try_to_del_timer_sync(timer); | |
26124 | if (ret >= 0) | |
26125 | return ret; | |
26126 | - cpu_relax(); | |
26127 | + wait_for_running_timer(timer); | |
26128 | } | |
26129 | } | |
26130 | EXPORT_SYMBOL(del_timer_sync); | |
26131 | @@ -1248,16 +1303,18 @@ static inline void __run_timers(struct tvec_base *base) | |
26132 | if (irqsafe) { | |
26133 | spin_unlock(&base->lock); | |
26134 | call_timer_fn(timer, fn, data); | |
26135 | + base->running_timer = NULL; | |
26136 | spin_lock(&base->lock); | |
26137 | } else { | |
26138 | spin_unlock_irq(&base->lock); | |
26139 | call_timer_fn(timer, fn, data); | |
26140 | + base->running_timer = NULL; | |
26141 | spin_lock_irq(&base->lock); | |
26142 | } | |
26143 | } | |
26144 | } | |
26145 | - base->running_timer = NULL; | |
26146 | spin_unlock_irq(&base->lock); | |
26147 | + wakeup_timer_waiters(base); | |
26148 | } | |
26149 | ||
26150 | #ifdef CONFIG_NO_HZ_COMMON | |
26151 | @@ -1390,6 +1447,14 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) | |
26152 | if (cpu_is_offline(smp_processor_id())) | |
26153 | return expires; | |
26154 | ||
26155 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26156 | + /* | |
26157 | + * On PREEMPT_RT we cannot sleep here. As a result we can't take | |
26158 | + * the base lock to check when the next timer is pending and so | |
26159 | + * we assume the next jiffy. | |
26160 | + */ | |
26161 | + return basem + TICK_NSEC; | |
26162 | +#endif | |
26163 | spin_lock(&base->lock); | |
26164 | if (base->active_timers) { | |
26165 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | |
26166 | @@ -1416,13 +1481,13 @@ void update_process_times(int user_tick) | |
26167 | ||
26168 | /* Note: this timer irq context must be accounted for as well. */ | |
26169 | account_process_tick(p, user_tick); | |
26170 | + scheduler_tick(); | |
26171 | run_local_timers(); | |
26172 | rcu_check_callbacks(user_tick); | |
26173 | -#ifdef CONFIG_IRQ_WORK | |
26174 | +#if defined(CONFIG_IRQ_WORK) | |
26175 | if (in_irq()) | |
26176 | irq_work_tick(); | |
26177 | #endif | |
26178 | - scheduler_tick(); | |
26179 | run_posix_cpu_timers(p); | |
26180 | } | |
26181 | ||
26182 | @@ -1433,6 +1498,8 @@ static void run_timer_softirq(struct softirq_action *h) | |
26183 | { | |
26184 | struct tvec_base *base = this_cpu_ptr(&tvec_bases); | |
26185 | ||
26186 | + irq_work_tick_soft(); | |
26187 | + | |
26188 | if (time_after_eq(jiffies, base->timer_jiffies)) | |
26189 | __run_timers(base); | |
26190 | } | |
26191 | @@ -1589,7 +1656,7 @@ static void migrate_timers(int cpu) | |
26192 | ||
26193 | BUG_ON(cpu_online(cpu)); | |
26194 | old_base = per_cpu_ptr(&tvec_bases, cpu); | |
26195 | - new_base = get_cpu_ptr(&tvec_bases); | |
26196 | + new_base = get_local_ptr(&tvec_bases); | |
26197 | /* | |
26198 | * The caller is globally serialized and nobody else | |
26199 | * takes two locks at once, deadlock is not possible. | |
26200 | @@ -1613,7 +1680,7 @@ static void migrate_timers(int cpu) | |
26201 | ||
26202 | spin_unlock(&old_base->lock); | |
26203 | spin_unlock_irq(&new_base->lock); | |
26204 | - put_cpu_ptr(&tvec_bases); | |
26205 | + put_local_ptr(&tvec_bases); | |
26206 | } | |
26207 | ||
26208 | static int timer_cpu_notify(struct notifier_block *self, | |
26209 | @@ -1645,6 +1712,9 @@ static void __init init_timer_cpu(int cpu) | |
26210 | ||
26211 | base->cpu = cpu; | |
26212 | spin_lock_init(&base->lock); | |
26213 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
26214 | + init_waitqueue_head(&base->wait_for_running_timer); | |
26215 | +#endif | |
26216 | ||
26217 | base->timer_jiffies = jiffies; | |
26218 | base->next_timer = base->timer_jiffies; | |
26219 | diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig | |
26220 | index e45db6b0d878..364ccd0eb57b 100644 | |
26221 | --- a/kernel/trace/Kconfig | |
26222 | +++ b/kernel/trace/Kconfig | |
26223 | @@ -187,6 +187,24 @@ config IRQSOFF_TRACER | |
26224 | enabled. This option and the preempt-off timing option can be | |
26225 | used together or separately.) | |
26226 | ||
26227 | +config INTERRUPT_OFF_HIST | |
26228 | + bool "Interrupts-off Latency Histogram" | |
26229 | + depends on IRQSOFF_TRACER | |
26230 | + help | |
26231 | + This option generates continuously updated histograms (one per cpu) | |
26232 | + of the duration of time periods with interrupts disabled. The | |
26233 | + histograms are disabled by default. To enable them, write a non-zero | |
26234 | + number to | |
26235 | + | |
26236 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
26237 | + | |
26238 | + If PREEMPT_OFF_HIST is also selected, additional histograms (one | |
26239 | + per cpu) are generated that accumulate the duration of time periods | |
26240 | + when both interrupts and preemption are disabled. The histogram data | |
26241 | + will be located in the debug file system at | |
26242 | + | |
26243 | + /sys/kernel/debug/tracing/latency_hist/irqsoff | |
26244 | + | |
26245 | config PREEMPT_TRACER | |
26246 | bool "Preemption-off Latency Tracer" | |
26247 | default n | |
26248 | @@ -211,6 +229,24 @@ config PREEMPT_TRACER | |
26249 | enabled. This option and the irqs-off timing option can be | |
26250 | used together or separately.) | |
26251 | ||
26252 | +config PREEMPT_OFF_HIST | |
26253 | + bool "Preemption-off Latency Histogram" | |
26254 | + depends on PREEMPT_TRACER | |
26255 | + help | |
26256 | + This option generates continuously updated histograms (one per cpu) | |
26257 | + of the duration of time periods with preemption disabled. The | |
26258 | + histograms are disabled by default. To enable them, write a non-zero | |
26259 | + number to | |
26260 | + | |
26261 | + /sys/kernel/debug/tracing/latency_hist/enable/preemptirqsoff | |
26262 | + | |
26263 | + If INTERRUPT_OFF_HIST is also selected, additional histograms (one | |
26264 | + per cpu) are generated that accumulate the duration of time periods | |
26265 | + when both interrupts and preemption are disabled. The histogram data | |
26266 | + will be located in the debug file system at | |
26267 | + | |
26268 | + /sys/kernel/debug/tracing/latency_hist/preemptoff | |
26269 | + | |
26270 | config SCHED_TRACER | |
26271 | bool "Scheduling Latency Tracer" | |
26272 | select GENERIC_TRACER | |
26273 | @@ -221,6 +257,74 @@ config SCHED_TRACER | |
26274 | This tracer tracks the latency of the highest priority task | |
26275 | to be scheduled in, starting from the point it has woken up. | |
26276 | ||
26277 | +config WAKEUP_LATENCY_HIST | |
26278 | + bool "Scheduling Latency Histogram" | |
26279 | + depends on SCHED_TRACER | |
26280 | + help | |
26281 | + This option generates continuously updated histograms (one per cpu) | |
26282 | + of the scheduling latency of the highest priority task. | |
26283 | + The histograms are disabled by default. To enable them, write a | |
26284 | + non-zero number to | |
26285 | + | |
26286 | + /sys/kernel/debug/tracing/latency_hist/enable/wakeup | |
26287 | + | |
26288 | + Two different algorithms are used, one to determine the latency of | |
26289 | + processes that exclusively use the highest priority of the system and | |
26290 | + another one to determine the latency of processes that share the | |
26291 | + highest system priority with other processes. The former is used to | |
26292 | + improve hardware and system software, the latter to optimize the | |
26293 | + priority design of a given system. The histogram data will be | |
26294 | + located in the debug file system at | |
26295 | + | |
26296 | + /sys/kernel/debug/tracing/latency_hist/wakeup | |
26297 | + | |
26298 | + and | |
26299 | + | |
26300 | + /sys/kernel/debug/tracing/latency_hist/wakeup/sharedprio | |
26301 | + | |
26302 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
26303 | + Histogram are selected, additional histogram data will be collected | |
26304 | + that contain, in addition to the wakeup latency, the timer latency, in | |
26305 | + case the wakeup was triggered by an expired timer. These histograms | |
26306 | + are available in the | |
26307 | + | |
26308 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
26309 | + | |
26310 | + directory. They reflect the apparent interrupt and scheduling latency | |
26311 | + and are best suitable to determine the worst-case latency of a given | |
26312 | + system. To enable these histograms, write a non-zero number to | |
26313 | + | |
26314 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
26315 | + | |
26316 | +config MISSED_TIMER_OFFSETS_HIST | |
26317 | + depends on HIGH_RES_TIMERS | |
26318 | + select GENERIC_TRACER | |
26319 | + bool "Missed Timer Offsets Histogram" | |
26320 | + help | |
26321 | + Generate a histogram of missed timer offsets in microseconds. The | |
26322 | + histograms are disabled by default. To enable them, write a non-zero | |
26323 | + number to | |
26324 | + | |
26325 | + /sys/kernel/debug/tracing/latency_hist/enable/missed_timer_offsets | |
26326 | + | |
26327 | + The histogram data will be located in the debug file system at | |
26328 | + | |
26329 | + /sys/kernel/debug/tracing/latency_hist/missed_timer_offsets | |
26330 | + | |
26331 | + If both Scheduling Latency Histogram and Missed Timer Offsets | |
26332 | + Histogram are selected, additional histogram data will be collected | |
26333 | + that contain, in addition to the wakeup latency, the timer latency, in | |
26334 | + case the wakeup was triggered by an expired timer. These histograms | |
26335 | + are available in the | |
26336 | + | |
26337 | + /sys/kernel/debug/tracing/latency_hist/timerandwakeup | |
26338 | + | |
26339 | + directory. They reflect the apparent interrupt and scheduling latency | |
26340 | + and are best suitable to determine the worst-case latency of a given | |
26341 | + system. To enable these histograms, write a non-zero number to | |
26342 | + | |
26343 | + /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup | |
26344 | + | |
26345 | config ENABLE_DEFAULT_TRACERS | |
26346 | bool "Trace process context switches and events" | |
26347 | depends on !GENERIC_TRACER | |
26348 | diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile | |
26349 | index 05ea5167e6bb..bc08c67301ae 100644 | |
26350 | --- a/kernel/trace/Makefile | |
26351 | +++ b/kernel/trace/Makefile | |
26352 | @@ -40,6 +40,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o | |
26353 | obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o | |
26354 | obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o | |
26355 | obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o | |
26356 | +obj-$(CONFIG_INTERRUPT_OFF_HIST) += latency_hist.o | |
26357 | +obj-$(CONFIG_PREEMPT_OFF_HIST) += latency_hist.o | |
26358 | +obj-$(CONFIG_WAKEUP_LATENCY_HIST) += latency_hist.o | |
26359 | +obj-$(CONFIG_MISSED_TIMER_OFFSETS_HIST) += latency_hist.o | |
26360 | obj-$(CONFIG_NOP_TRACER) += trace_nop.o | |
26361 | obj-$(CONFIG_STACK_TRACER) += trace_stack.o | |
26362 | obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o | |
26363 | diff --git a/kernel/trace/latency_hist.c b/kernel/trace/latency_hist.c | |
26364 | new file mode 100644 | |
26365 | index 000000000000..7f6ee70dea41 | |
26366 | --- /dev/null | |
26367 | +++ b/kernel/trace/latency_hist.c | |
26368 | @@ -0,0 +1,1178 @@ | |
26369 | +/* | |
26370 | + * kernel/trace/latency_hist.c | |
26371 | + * | |
26372 | + * Add support for histograms of preemption-off latency and | |
26373 | + * interrupt-off latency and wakeup latency, it depends on | |
26374 | + * Real-Time Preemption Support. | |
26375 | + * | |
26376 | + * Copyright (C) 2005 MontaVista Software, Inc. | |
26377 | + * Yi Yang <yyang@ch.mvista.com> | |
26378 | + * | |
26379 | + * Converted to work with the new latency tracer. | |
26380 | + * Copyright (C) 2008 Red Hat, Inc. | |
26381 | + * Steven Rostedt <srostedt@redhat.com> | |
26382 | + * | |
26383 | + */ | |
26384 | +#include <linux/module.h> | |
26385 | +#include <linux/debugfs.h> | |
26386 | +#include <linux/seq_file.h> | |
26387 | +#include <linux/percpu.h> | |
26388 | +#include <linux/kallsyms.h> | |
26389 | +#include <linux/uaccess.h> | |
26390 | +#include <linux/sched.h> | |
26391 | +#include <linux/sched/rt.h> | |
26392 | +#include <linux/slab.h> | |
26393 | +#include <linux/atomic.h> | |
26394 | +#include <asm/div64.h> | |
26395 | + | |
26396 | +#include "trace.h" | |
26397 | +#include <trace/events/sched.h> | |
26398 | + | |
26399 | +#define NSECS_PER_USECS 1000L | |
26400 | + | |
26401 | +#define CREATE_TRACE_POINTS | |
26402 | +#include <trace/events/hist.h> | |
26403 | + | |
26404 | +enum { | |
26405 | + IRQSOFF_LATENCY = 0, | |
26406 | + PREEMPTOFF_LATENCY, | |
26407 | + PREEMPTIRQSOFF_LATENCY, | |
26408 | + WAKEUP_LATENCY, | |
26409 | + WAKEUP_LATENCY_SHAREDPRIO, | |
26410 | + MISSED_TIMER_OFFSETS, | |
26411 | + TIMERANDWAKEUP_LATENCY, | |
26412 | + MAX_LATENCY_TYPE, | |
26413 | +}; | |
26414 | + | |
26415 | +#define MAX_ENTRY_NUM 10240 | |
26416 | + | |
26417 | +struct hist_data { | |
26418 | + atomic_t hist_mode; /* 0 log, 1 don't log */ | |
26419 | + long offset; /* set it to MAX_ENTRY_NUM/2 for a bipolar scale */ | |
26420 | + long min_lat; | |
26421 | + long max_lat; | |
26422 | + unsigned long long below_hist_bound_samples; | |
26423 | + unsigned long long above_hist_bound_samples; | |
26424 | + long long accumulate_lat; | |
26425 | + unsigned long long total_samples; | |
26426 | + unsigned long long hist_array[MAX_ENTRY_NUM]; | |
26427 | +}; | |
26428 | + | |
26429 | +struct enable_data { | |
26430 | + int latency_type; | |
26431 | + int enabled; | |
26432 | +}; | |
26433 | + | |
26434 | +static char *latency_hist_dir_root = "latency_hist"; | |
26435 | + | |
26436 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
26437 | +static DEFINE_PER_CPU(struct hist_data, irqsoff_hist); | |
26438 | +static char *irqsoff_hist_dir = "irqsoff"; | |
26439 | +static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start); | |
26440 | +static DEFINE_PER_CPU(int, hist_irqsoff_counting); | |
26441 | +#endif | |
26442 | + | |
26443 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
26444 | +static DEFINE_PER_CPU(struct hist_data, preemptoff_hist); | |
26445 | +static char *preemptoff_hist_dir = "preemptoff"; | |
26446 | +static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start); | |
26447 | +static DEFINE_PER_CPU(int, hist_preemptoff_counting); | |
26448 | +#endif | |
26449 | + | |
26450 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
26451 | +static DEFINE_PER_CPU(struct hist_data, preemptirqsoff_hist); | |
26452 | +static char *preemptirqsoff_hist_dir = "preemptirqsoff"; | |
26453 | +static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start); | |
26454 | +static DEFINE_PER_CPU(int, hist_preemptirqsoff_counting); | |
26455 | +#endif | |
26456 | + | |
26457 | +#if defined(CONFIG_PREEMPT_OFF_HIST) || defined(CONFIG_INTERRUPT_OFF_HIST) | |
26458 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, int start); | |
26459 | +static struct enable_data preemptirqsoff_enabled_data = { | |
26460 | + .latency_type = PREEMPTIRQSOFF_LATENCY, | |
26461 | + .enabled = 0, | |
26462 | +}; | |
26463 | +#endif | |
26464 | + | |
26465 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26466 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26467 | +struct maxlatproc_data { | |
26468 | + char comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
26469 | + char current_comm[FIELD_SIZEOF(struct task_struct, comm)]; | |
26470 | + int pid; | |
26471 | + int current_pid; | |
26472 | + int prio; | |
26473 | + int current_prio; | |
26474 | + long latency; | |
26475 | + long timeroffset; | |
26476 | + cycle_t timestamp; | |
26477 | +}; | |
26478 | +#endif | |
26479 | + | |
26480 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
26481 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist); | |
26482 | +static DEFINE_PER_CPU(struct hist_data, wakeup_latency_hist_sharedprio); | |
26483 | +static char *wakeup_latency_hist_dir = "wakeup"; | |
26484 | +static char *wakeup_latency_hist_dir_sharedprio = "sharedprio"; | |
26485 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
26486 | + struct task_struct *p); | |
26487 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
26488 | + bool preempt, struct task_struct *prev, struct task_struct *next); | |
26489 | +static notrace void probe_sched_migrate_task(void *, | |
26490 | + struct task_struct *task, int cpu); | |
26491 | +static struct enable_data wakeup_latency_enabled_data = { | |
26492 | + .latency_type = WAKEUP_LATENCY, | |
26493 | + .enabled = 0, | |
26494 | +}; | |
26495 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc); | |
26496 | +static DEFINE_PER_CPU(struct maxlatproc_data, wakeup_maxlatproc_sharedprio); | |
26497 | +static DEFINE_PER_CPU(struct task_struct *, wakeup_task); | |
26498 | +static DEFINE_PER_CPU(int, wakeup_sharedprio); | |
26499 | +static unsigned long wakeup_pid; | |
26500 | +#endif | |
26501 | + | |
26502 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
26503 | +static DEFINE_PER_CPU(struct hist_data, missed_timer_offsets); | |
26504 | +static char *missed_timer_offsets_dir = "missed_timer_offsets"; | |
26505 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
26506 | + long long offset, struct task_struct *curr, struct task_struct *task); | |
26507 | +static struct enable_data missed_timer_offsets_enabled_data = { | |
26508 | + .latency_type = MISSED_TIMER_OFFSETS, | |
26509 | + .enabled = 0, | |
26510 | +}; | |
26511 | +static DEFINE_PER_CPU(struct maxlatproc_data, missed_timer_offsets_maxlatproc); | |
26512 | +static unsigned long missed_timer_offsets_pid; | |
26513 | +#endif | |
26514 | + | |
26515 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
26516 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26517 | +static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist); | |
26518 | +static char *timerandwakeup_latency_hist_dir = "timerandwakeup"; | |
26519 | +static struct enable_data timerandwakeup_enabled_data = { | |
26520 | + .latency_type = TIMERANDWAKEUP_LATENCY, | |
26521 | + .enabled = 0, | |
26522 | +}; | |
26523 | +static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc); | |
26524 | +#endif | |
26525 | + | |
26526 | +void notrace latency_hist(int latency_type, int cpu, long latency, | |
26527 | + long timeroffset, cycle_t stop, | |
26528 | + struct task_struct *p) | |
26529 | +{ | |
26530 | + struct hist_data *my_hist; | |
26531 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26532 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26533 | + struct maxlatproc_data *mp = NULL; | |
26534 | +#endif | |
26535 | + | |
26536 | + if (!cpu_possible(cpu) || latency_type < 0 || | |
26537 | + latency_type >= MAX_LATENCY_TYPE) | |
26538 | + return; | |
26539 | + | |
26540 | + switch (latency_type) { | |
26541 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
26542 | + case IRQSOFF_LATENCY: | |
26543 | + my_hist = &per_cpu(irqsoff_hist, cpu); | |
26544 | + break; | |
26545 | +#endif | |
26546 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
26547 | + case PREEMPTOFF_LATENCY: | |
26548 | + my_hist = &per_cpu(preemptoff_hist, cpu); | |
26549 | + break; | |
26550 | +#endif | |
26551 | +#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST) | |
26552 | + case PREEMPTIRQSOFF_LATENCY: | |
26553 | + my_hist = &per_cpu(preemptirqsoff_hist, cpu); | |
26554 | + break; | |
26555 | +#endif | |
26556 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
26557 | + case WAKEUP_LATENCY: | |
26558 | + my_hist = &per_cpu(wakeup_latency_hist, cpu); | |
26559 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
26560 | + break; | |
26561 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
26562 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
26563 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
26564 | + break; | |
26565 | +#endif | |
26566 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
26567 | + case MISSED_TIMER_OFFSETS: | |
26568 | + my_hist = &per_cpu(missed_timer_offsets, cpu); | |
26569 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
26570 | + break; | |
26571 | +#endif | |
26572 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
26573 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26574 | + case TIMERANDWAKEUP_LATENCY: | |
26575 | + my_hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
26576 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
26577 | + break; | |
26578 | +#endif | |
26579 | + | |
26580 | + default: | |
26581 | + return; | |
26582 | + } | |
26583 | + | |
26584 | + latency += my_hist->offset; | |
26585 | + | |
26586 | + if (atomic_read(&my_hist->hist_mode) == 0) | |
26587 | + return; | |
26588 | + | |
26589 | + if (latency < 0 || latency >= MAX_ENTRY_NUM) { | |
26590 | + if (latency < 0) | |
26591 | + my_hist->below_hist_bound_samples++; | |
26592 | + else | |
26593 | + my_hist->above_hist_bound_samples++; | |
26594 | + } else | |
26595 | + my_hist->hist_array[latency]++; | |
26596 | + | |
26597 | + if (unlikely(latency > my_hist->max_lat || | |
26598 | + my_hist->min_lat == LONG_MAX)) { | |
26599 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26600 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26601 | + if (latency_type == WAKEUP_LATENCY || | |
26602 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
26603 | + latency_type == MISSED_TIMER_OFFSETS || | |
26604 | + latency_type == TIMERANDWAKEUP_LATENCY) { | |
26605 | + strncpy(mp->comm, p->comm, sizeof(mp->comm)); | |
26606 | + strncpy(mp->current_comm, current->comm, | |
26607 | + sizeof(mp->current_comm)); | |
26608 | + mp->pid = task_pid_nr(p); | |
26609 | + mp->current_pid = task_pid_nr(current); | |
26610 | + mp->prio = p->prio; | |
26611 | + mp->current_prio = current->prio; | |
26612 | + mp->latency = latency; | |
26613 | + mp->timeroffset = timeroffset; | |
26614 | + mp->timestamp = stop; | |
26615 | + } | |
26616 | +#endif | |
26617 | + my_hist->max_lat = latency; | |
26618 | + } | |
26619 | + if (unlikely(latency < my_hist->min_lat)) | |
26620 | + my_hist->min_lat = latency; | |
26621 | + my_hist->total_samples++; | |
26622 | + my_hist->accumulate_lat += latency; | |
26623 | +} | |
26624 | + | |
26625 | +static void *l_start(struct seq_file *m, loff_t *pos) | |
26626 | +{ | |
26627 | + loff_t *index_ptr = NULL; | |
26628 | + loff_t index = *pos; | |
26629 | + struct hist_data *my_hist = m->private; | |
26630 | + | |
26631 | + if (index == 0) { | |
26632 | + char minstr[32], avgstr[32], maxstr[32]; | |
26633 | + | |
26634 | + atomic_dec(&my_hist->hist_mode); | |
26635 | + | |
26636 | + if (likely(my_hist->total_samples)) { | |
26637 | + long avg = (long) div64_s64(my_hist->accumulate_lat, | |
26638 | + my_hist->total_samples); | |
26639 | + snprintf(minstr, sizeof(minstr), "%ld", | |
26640 | + my_hist->min_lat - my_hist->offset); | |
26641 | + snprintf(avgstr, sizeof(avgstr), "%ld", | |
26642 | + avg - my_hist->offset); | |
26643 | + snprintf(maxstr, sizeof(maxstr), "%ld", | |
26644 | + my_hist->max_lat - my_hist->offset); | |
26645 | + } else { | |
26646 | + strcpy(minstr, "<undef>"); | |
26647 | + strcpy(avgstr, minstr); | |
26648 | + strcpy(maxstr, minstr); | |
26649 | + } | |
26650 | + | |
26651 | + seq_printf(m, "#Minimum latency: %s microseconds\n" | |
26652 | + "#Average latency: %s microseconds\n" | |
26653 | + "#Maximum latency: %s microseconds\n" | |
26654 | + "#Total samples: %llu\n" | |
26655 | + "#There are %llu samples lower than %ld" | |
26656 | + " microseconds.\n" | |
26657 | + "#There are %llu samples greater or equal" | |
26658 | + " than %ld microseconds.\n" | |
26659 | + "#usecs\t%16s\n", | |
26660 | + minstr, avgstr, maxstr, | |
26661 | + my_hist->total_samples, | |
26662 | + my_hist->below_hist_bound_samples, | |
26663 | + -my_hist->offset, | |
26664 | + my_hist->above_hist_bound_samples, | |
26665 | + MAX_ENTRY_NUM - my_hist->offset, | |
26666 | + "samples"); | |
26667 | + } | |
26668 | + if (index < MAX_ENTRY_NUM) { | |
26669 | + index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL); | |
26670 | + if (index_ptr) | |
26671 | + *index_ptr = index; | |
26672 | + } | |
26673 | + | |
26674 | + return index_ptr; | |
26675 | +} | |
26676 | + | |
26677 | +static void *l_next(struct seq_file *m, void *p, loff_t *pos) | |
26678 | +{ | |
26679 | + loff_t *index_ptr = p; | |
26680 | + struct hist_data *my_hist = m->private; | |
26681 | + | |
26682 | + if (++*pos >= MAX_ENTRY_NUM) { | |
26683 | + atomic_inc(&my_hist->hist_mode); | |
26684 | + return NULL; | |
26685 | + } | |
26686 | + *index_ptr = *pos; | |
26687 | + return index_ptr; | |
26688 | +} | |
26689 | + | |
26690 | +static void l_stop(struct seq_file *m, void *p) | |
26691 | +{ | |
26692 | + kfree(p); | |
26693 | +} | |
26694 | + | |
26695 | +static int l_show(struct seq_file *m, void *p) | |
26696 | +{ | |
26697 | + int index = *(loff_t *) p; | |
26698 | + struct hist_data *my_hist = m->private; | |
26699 | + | |
26700 | + seq_printf(m, "%6ld\t%16llu\n", index - my_hist->offset, | |
26701 | + my_hist->hist_array[index]); | |
26702 | + return 0; | |
26703 | +} | |
26704 | + | |
26705 | +static const struct seq_operations latency_hist_seq_op = { | |
26706 | + .start = l_start, | |
26707 | + .next = l_next, | |
26708 | + .stop = l_stop, | |
26709 | + .show = l_show | |
26710 | +}; | |
26711 | + | |
26712 | +static int latency_hist_open(struct inode *inode, struct file *file) | |
26713 | +{ | |
26714 | + int ret; | |
26715 | + | |
26716 | + ret = seq_open(file, &latency_hist_seq_op); | |
26717 | + if (!ret) { | |
26718 | + struct seq_file *seq = file->private_data; | |
26719 | + seq->private = inode->i_private; | |
26720 | + } | |
26721 | + return ret; | |
26722 | +} | |
26723 | + | |
26724 | +static const struct file_operations latency_hist_fops = { | |
26725 | + .open = latency_hist_open, | |
26726 | + .read = seq_read, | |
26727 | + .llseek = seq_lseek, | |
26728 | + .release = seq_release, | |
26729 | +}; | |
26730 | + | |
26731 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26732 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26733 | +static void clear_maxlatprocdata(struct maxlatproc_data *mp) | |
26734 | +{ | |
26735 | + mp->comm[0] = mp->current_comm[0] = '\0'; | |
26736 | + mp->prio = mp->current_prio = mp->pid = mp->current_pid = | |
26737 | + mp->latency = mp->timeroffset = -1; | |
26738 | + mp->timestamp = 0; | |
26739 | +} | |
26740 | +#endif | |
26741 | + | |
26742 | +static void hist_reset(struct hist_data *hist) | |
26743 | +{ | |
26744 | + atomic_dec(&hist->hist_mode); | |
26745 | + | |
26746 | + memset(hist->hist_array, 0, sizeof(hist->hist_array)); | |
26747 | + hist->below_hist_bound_samples = 0ULL; | |
26748 | + hist->above_hist_bound_samples = 0ULL; | |
26749 | + hist->min_lat = LONG_MAX; | |
26750 | + hist->max_lat = LONG_MIN; | |
26751 | + hist->total_samples = 0ULL; | |
26752 | + hist->accumulate_lat = 0LL; | |
26753 | + | |
26754 | + atomic_inc(&hist->hist_mode); | |
26755 | +} | |
26756 | + | |
26757 | +static ssize_t | |
26758 | +latency_hist_reset(struct file *file, const char __user *a, | |
26759 | + size_t size, loff_t *off) | |
26760 | +{ | |
26761 | + int cpu; | |
26762 | + struct hist_data *hist = NULL; | |
26763 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26764 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26765 | + struct maxlatproc_data *mp = NULL; | |
26766 | +#endif | |
26767 | + off_t latency_type = (off_t) file->private_data; | |
26768 | + | |
26769 | + for_each_online_cpu(cpu) { | |
26770 | + | |
26771 | + switch (latency_type) { | |
26772 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
26773 | + case PREEMPTOFF_LATENCY: | |
26774 | + hist = &per_cpu(preemptoff_hist, cpu); | |
26775 | + break; | |
26776 | +#endif | |
26777 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
26778 | + case IRQSOFF_LATENCY: | |
26779 | + hist = &per_cpu(irqsoff_hist, cpu); | |
26780 | + break; | |
26781 | +#endif | |
26782 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
26783 | + case PREEMPTIRQSOFF_LATENCY: | |
26784 | + hist = &per_cpu(preemptirqsoff_hist, cpu); | |
26785 | + break; | |
26786 | +#endif | |
26787 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
26788 | + case WAKEUP_LATENCY: | |
26789 | + hist = &per_cpu(wakeup_latency_hist, cpu); | |
26790 | + mp = &per_cpu(wakeup_maxlatproc, cpu); | |
26791 | + break; | |
26792 | + case WAKEUP_LATENCY_SHAREDPRIO: | |
26793 | + hist = &per_cpu(wakeup_latency_hist_sharedprio, cpu); | |
26794 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu); | |
26795 | + break; | |
26796 | +#endif | |
26797 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
26798 | + case MISSED_TIMER_OFFSETS: | |
26799 | + hist = &per_cpu(missed_timer_offsets, cpu); | |
26800 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu); | |
26801 | + break; | |
26802 | +#endif | |
26803 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
26804 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26805 | + case TIMERANDWAKEUP_LATENCY: | |
26806 | + hist = &per_cpu(timerandwakeup_latency_hist, cpu); | |
26807 | + mp = &per_cpu(timerandwakeup_maxlatproc, cpu); | |
26808 | + break; | |
26809 | +#endif | |
26810 | + } | |
26811 | + | |
26812 | + hist_reset(hist); | |
26813 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26814 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26815 | + if (latency_type == WAKEUP_LATENCY || | |
26816 | + latency_type == WAKEUP_LATENCY_SHAREDPRIO || | |
26817 | + latency_type == MISSED_TIMER_OFFSETS || | |
26818 | + latency_type == TIMERANDWAKEUP_LATENCY) | |
26819 | + clear_maxlatprocdata(mp); | |
26820 | +#endif | |
26821 | + } | |
26822 | + | |
26823 | + return size; | |
26824 | +} | |
26825 | + | |
26826 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26827 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26828 | +static ssize_t | |
26829 | +show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
26830 | +{ | |
26831 | + char buf[64]; | |
26832 | + int r; | |
26833 | + unsigned long *this_pid = file->private_data; | |
26834 | + | |
26835 | + r = snprintf(buf, sizeof(buf), "%lu\n", *this_pid); | |
26836 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
26837 | +} | |
26838 | + | |
26839 | +static ssize_t do_pid(struct file *file, const char __user *ubuf, | |
26840 | + size_t cnt, loff_t *ppos) | |
26841 | +{ | |
26842 | + char buf[64]; | |
26843 | + unsigned long pid; | |
26844 | + unsigned long *this_pid = file->private_data; | |
26845 | + | |
26846 | + if (cnt >= sizeof(buf)) | |
26847 | + return -EINVAL; | |
26848 | + | |
26849 | + if (copy_from_user(&buf, ubuf, cnt)) | |
26850 | + return -EFAULT; | |
26851 | + | |
26852 | + buf[cnt] = '\0'; | |
26853 | + | |
26854 | + if (kstrtoul(buf, 10, &pid)) | |
26855 | + return -EINVAL; | |
26856 | + | |
26857 | + *this_pid = pid; | |
26858 | + | |
26859 | + return cnt; | |
26860 | +} | |
26861 | +#endif | |
26862 | + | |
26863 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
26864 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
26865 | +static ssize_t | |
26866 | +show_maxlatproc(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
26867 | +{ | |
26868 | + int r; | |
26869 | + struct maxlatproc_data *mp = file->private_data; | |
26870 | + int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8); | |
26871 | + unsigned long long t; | |
26872 | + unsigned long usecs, secs; | |
26873 | + char *buf; | |
26874 | + | |
26875 | + if (mp->pid == -1 || mp->current_pid == -1) { | |
26876 | + buf = "(none)\n"; | |
26877 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, | |
26878 | + strlen(buf)); | |
26879 | + } | |
26880 | + | |
26881 | + buf = kmalloc(strmaxlen, GFP_KERNEL); | |
26882 | + if (buf == NULL) | |
26883 | + return -ENOMEM; | |
26884 | + | |
26885 | + t = ns2usecs(mp->timestamp); | |
26886 | + usecs = do_div(t, USEC_PER_SEC); | |
26887 | + secs = (unsigned long) t; | |
26888 | + r = snprintf(buf, strmaxlen, | |
26889 | + "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid, | |
26890 | + MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm, | |
26891 | + mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm, | |
26892 | + secs, usecs); | |
26893 | + r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
26894 | + kfree(buf); | |
26895 | + return r; | |
26896 | +} | |
26897 | +#endif | |
26898 | + | |
26899 | +static ssize_t | |
26900 | +show_enable(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos) | |
26901 | +{ | |
26902 | + char buf[64]; | |
26903 | + struct enable_data *ed = file->private_data; | |
26904 | + int r; | |
26905 | + | |
26906 | + r = snprintf(buf, sizeof(buf), "%d\n", ed->enabled); | |
26907 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); | |
26908 | +} | |
26909 | + | |
26910 | +static ssize_t | |
26911 | +do_enable(struct file *file, const char __user *ubuf, size_t cnt, loff_t *ppos) | |
26912 | +{ | |
26913 | + char buf[64]; | |
26914 | + long enable; | |
26915 | + struct enable_data *ed = file->private_data; | |
26916 | + | |
26917 | + if (cnt >= sizeof(buf)) | |
26918 | + return -EINVAL; | |
26919 | + | |
26920 | + if (copy_from_user(&buf, ubuf, cnt)) | |
26921 | + return -EFAULT; | |
26922 | + | |
26923 | + buf[cnt] = 0; | |
26924 | + | |
26925 | + if (kstrtoul(buf, 10, &enable)) | |
26926 | + return -EINVAL; | |
26927 | + | |
26928 | + if ((enable && ed->enabled) || (!enable && !ed->enabled)) | |
26929 | + return cnt; | |
26930 | + | |
26931 | + if (enable) { | |
26932 | + int ret; | |
26933 | + | |
26934 | + switch (ed->latency_type) { | |
26935 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
26936 | + case PREEMPTIRQSOFF_LATENCY: | |
26937 | + ret = register_trace_preemptirqsoff_hist( | |
26938 | + probe_preemptirqsoff_hist, NULL); | |
26939 | + if (ret) { | |
26940 | + pr_info("wakeup trace: Couldn't assign " | |
26941 | + "probe_preemptirqsoff_hist " | |
26942 | + "to trace_preemptirqsoff_hist\n"); | |
26943 | + return ret; | |
26944 | + } | |
26945 | + break; | |
26946 | +#endif | |
26947 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
26948 | + case WAKEUP_LATENCY: | |
26949 | + ret = register_trace_sched_wakeup( | |
26950 | + probe_wakeup_latency_hist_start, NULL); | |
26951 | + if (ret) { | |
26952 | + pr_info("wakeup trace: Couldn't assign " | |
26953 | + "probe_wakeup_latency_hist_start " | |
26954 | + "to trace_sched_wakeup\n"); | |
26955 | + return ret; | |
26956 | + } | |
26957 | + ret = register_trace_sched_wakeup_new( | |
26958 | + probe_wakeup_latency_hist_start, NULL); | |
26959 | + if (ret) { | |
26960 | + pr_info("wakeup trace: Couldn't assign " | |
26961 | + "probe_wakeup_latency_hist_start " | |
26962 | + "to trace_sched_wakeup_new\n"); | |
26963 | + unregister_trace_sched_wakeup( | |
26964 | + probe_wakeup_latency_hist_start, NULL); | |
26965 | + return ret; | |
26966 | + } | |
26967 | + ret = register_trace_sched_switch( | |
26968 | + probe_wakeup_latency_hist_stop, NULL); | |
26969 | + if (ret) { | |
26970 | + pr_info("wakeup trace: Couldn't assign " | |
26971 | + "probe_wakeup_latency_hist_stop " | |
26972 | + "to trace_sched_switch\n"); | |
26973 | + unregister_trace_sched_wakeup( | |
26974 | + probe_wakeup_latency_hist_start, NULL); | |
26975 | + unregister_trace_sched_wakeup_new( | |
26976 | + probe_wakeup_latency_hist_start, NULL); | |
26977 | + return ret; | |
26978 | + } | |
26979 | + ret = register_trace_sched_migrate_task( | |
26980 | + probe_sched_migrate_task, NULL); | |
26981 | + if (ret) { | |
26982 | + pr_info("wakeup trace: Couldn't assign " | |
26983 | + "probe_sched_migrate_task " | |
26984 | + "to trace_sched_migrate_task\n"); | |
26985 | + unregister_trace_sched_wakeup( | |
26986 | + probe_wakeup_latency_hist_start, NULL); | |
26987 | + unregister_trace_sched_wakeup_new( | |
26988 | + probe_wakeup_latency_hist_start, NULL); | |
26989 | + unregister_trace_sched_switch( | |
26990 | + probe_wakeup_latency_hist_stop, NULL); | |
26991 | + return ret; | |
26992 | + } | |
26993 | + break; | |
26994 | +#endif | |
26995 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
26996 | + case MISSED_TIMER_OFFSETS: | |
26997 | + ret = register_trace_hrtimer_interrupt( | |
26998 | + probe_hrtimer_interrupt, NULL); | |
26999 | + if (ret) { | |
27000 | + pr_info("wakeup trace: Couldn't assign " | |
27001 | + "probe_hrtimer_interrupt " | |
27002 | + "to trace_hrtimer_interrupt\n"); | |
27003 | + return ret; | |
27004 | + } | |
27005 | + break; | |
27006 | +#endif | |
27007 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
27008 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
27009 | + case TIMERANDWAKEUP_LATENCY: | |
27010 | + if (!wakeup_latency_enabled_data.enabled || | |
27011 | + !missed_timer_offsets_enabled_data.enabled) | |
27012 | + return -EINVAL; | |
27013 | + break; | |
27014 | +#endif | |
27015 | + default: | |
27016 | + break; | |
27017 | + } | |
27018 | + } else { | |
27019 | + switch (ed->latency_type) { | |
27020 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
27021 | + case PREEMPTIRQSOFF_LATENCY: | |
27022 | + { | |
27023 | + int cpu; | |
27024 | + | |
27025 | + unregister_trace_preemptirqsoff_hist( | |
27026 | + probe_preemptirqsoff_hist, NULL); | |
27027 | + for_each_online_cpu(cpu) { | |
27028 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
27029 | + per_cpu(hist_irqsoff_counting, | |
27030 | + cpu) = 0; | |
27031 | +#endif | |
27032 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
27033 | + per_cpu(hist_preemptoff_counting, | |
27034 | + cpu) = 0; | |
27035 | +#endif | |
27036 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
27037 | + per_cpu(hist_preemptirqsoff_counting, | |
27038 | + cpu) = 0; | |
27039 | +#endif | |
27040 | + } | |
27041 | + } | |
27042 | + break; | |
27043 | +#endif | |
27044 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
27045 | + case WAKEUP_LATENCY: | |
27046 | + { | |
27047 | + int cpu; | |
27048 | + | |
27049 | + unregister_trace_sched_wakeup( | |
27050 | + probe_wakeup_latency_hist_start, NULL); | |
27051 | + unregister_trace_sched_wakeup_new( | |
27052 | + probe_wakeup_latency_hist_start, NULL); | |
27053 | + unregister_trace_sched_switch( | |
27054 | + probe_wakeup_latency_hist_stop, NULL); | |
27055 | + unregister_trace_sched_migrate_task( | |
27056 | + probe_sched_migrate_task, NULL); | |
27057 | + | |
27058 | + for_each_online_cpu(cpu) { | |
27059 | + per_cpu(wakeup_task, cpu) = NULL; | |
27060 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
27061 | + } | |
27062 | + } | |
27063 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
27064 | + timerandwakeup_enabled_data.enabled = 0; | |
27065 | +#endif | |
27066 | + break; | |
27067 | +#endif | |
27068 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
27069 | + case MISSED_TIMER_OFFSETS: | |
27070 | + unregister_trace_hrtimer_interrupt( | |
27071 | + probe_hrtimer_interrupt, NULL); | |
27072 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
27073 | + timerandwakeup_enabled_data.enabled = 0; | |
27074 | +#endif | |
27075 | + break; | |
27076 | +#endif | |
27077 | + default: | |
27078 | + break; | |
27079 | + } | |
27080 | + } | |
27081 | + ed->enabled = enable; | |
27082 | + return cnt; | |
27083 | +} | |
27084 | + | |
27085 | +static const struct file_operations latency_hist_reset_fops = { | |
27086 | + .open = tracing_open_generic, | |
27087 | + .write = latency_hist_reset, | |
27088 | +}; | |
27089 | + | |
27090 | +static const struct file_operations enable_fops = { | |
27091 | + .open = tracing_open_generic, | |
27092 | + .read = show_enable, | |
27093 | + .write = do_enable, | |
27094 | +}; | |
27095 | + | |
27096 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
27097 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
27098 | +static const struct file_operations pid_fops = { | |
27099 | + .open = tracing_open_generic, | |
27100 | + .read = show_pid, | |
27101 | + .write = do_pid, | |
27102 | +}; | |
27103 | + | |
27104 | +static const struct file_operations maxlatproc_fops = { | |
27105 | + .open = tracing_open_generic, | |
27106 | + .read = show_maxlatproc, | |
27107 | +}; | |
27108 | +#endif | |
27109 | + | |
27110 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
27111 | +static notrace void probe_preemptirqsoff_hist(void *v, int reason, | |
27112 | + int starthist) | |
27113 | +{ | |
27114 | + int cpu = raw_smp_processor_id(); | |
27115 | + int time_set = 0; | |
27116 | + | |
27117 | + if (starthist) { | |
27118 | + cycle_t uninitialized_var(start); | |
27119 | + | |
27120 | + if (!preempt_count() && !irqs_disabled()) | |
27121 | + return; | |
27122 | + | |
27123 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
27124 | + if ((reason == IRQS_OFF || reason == TRACE_START) && | |
27125 | + !per_cpu(hist_irqsoff_counting, cpu)) { | |
27126 | + per_cpu(hist_irqsoff_counting, cpu) = 1; | |
27127 | + start = ftrace_now(cpu); | |
27128 | + time_set++; | |
27129 | + per_cpu(hist_irqsoff_start, cpu) = start; | |
27130 | + } | |
27131 | +#endif | |
27132 | + | |
27133 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
27134 | + if ((reason == PREEMPT_OFF || reason == TRACE_START) && | |
27135 | + !per_cpu(hist_preemptoff_counting, cpu)) { | |
27136 | + per_cpu(hist_preemptoff_counting, cpu) = 1; | |
27137 | + if (!(time_set++)) | |
27138 | + start = ftrace_now(cpu); | |
27139 | + per_cpu(hist_preemptoff_start, cpu) = start; | |
27140 | + } | |
27141 | +#endif | |
27142 | + | |
27143 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
27144 | + if (per_cpu(hist_irqsoff_counting, cpu) && | |
27145 | + per_cpu(hist_preemptoff_counting, cpu) && | |
27146 | + !per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
27147 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 1; | |
27148 | + if (!time_set) | |
27149 | + start = ftrace_now(cpu); | |
27150 | + per_cpu(hist_preemptirqsoff_start, cpu) = start; | |
27151 | + } | |
27152 | +#endif | |
27153 | + } else { | |
27154 | + cycle_t uninitialized_var(stop); | |
27155 | + | |
27156 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
27157 | + if ((reason == IRQS_ON || reason == TRACE_STOP) && | |
27158 | + per_cpu(hist_irqsoff_counting, cpu)) { | |
27159 | + cycle_t start = per_cpu(hist_irqsoff_start, cpu); | |
27160 | + | |
27161 | + stop = ftrace_now(cpu); | |
27162 | + time_set++; | |
27163 | + if (start) { | |
27164 | + long latency = ((long) (stop - start)) / | |
27165 | + NSECS_PER_USECS; | |
27166 | + | |
27167 | + latency_hist(IRQSOFF_LATENCY, cpu, latency, 0, | |
27168 | + stop, NULL); | |
27169 | + } | |
27170 | + per_cpu(hist_irqsoff_counting, cpu) = 0; | |
27171 | + } | |
27172 | +#endif | |
27173 | + | |
27174 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
27175 | + if ((reason == PREEMPT_ON || reason == TRACE_STOP) && | |
27176 | + per_cpu(hist_preemptoff_counting, cpu)) { | |
27177 | + cycle_t start = per_cpu(hist_preemptoff_start, cpu); | |
27178 | + | |
27179 | + if (!(time_set++)) | |
27180 | + stop = ftrace_now(cpu); | |
27181 | + if (start) { | |
27182 | + long latency = ((long) (stop - start)) / | |
27183 | + NSECS_PER_USECS; | |
27184 | + | |
27185 | + latency_hist(PREEMPTOFF_LATENCY, cpu, latency, | |
27186 | + 0, stop, NULL); | |
27187 | + } | |
27188 | + per_cpu(hist_preemptoff_counting, cpu) = 0; | |
27189 | + } | |
27190 | +#endif | |
27191 | + | |
27192 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
27193 | + if ((!per_cpu(hist_irqsoff_counting, cpu) || | |
27194 | + !per_cpu(hist_preemptoff_counting, cpu)) && | |
27195 | + per_cpu(hist_preemptirqsoff_counting, cpu)) { | |
27196 | + cycle_t start = per_cpu(hist_preemptirqsoff_start, cpu); | |
27197 | + | |
27198 | + if (!time_set) | |
27199 | + stop = ftrace_now(cpu); | |
27200 | + if (start) { | |
27201 | + long latency = ((long) (stop - start)) / | |
27202 | + NSECS_PER_USECS; | |
27203 | + | |
27204 | + latency_hist(PREEMPTIRQSOFF_LATENCY, cpu, | |
27205 | + latency, 0, stop, NULL); | |
27206 | + } | |
27207 | + per_cpu(hist_preemptirqsoff_counting, cpu) = 0; | |
27208 | + } | |
27209 | +#endif | |
27210 | + } | |
27211 | +} | |
27212 | +#endif | |
27213 | + | |
27214 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
27215 | +static DEFINE_RAW_SPINLOCK(wakeup_lock); | |
27216 | +static notrace void probe_sched_migrate_task(void *v, struct task_struct *task, | |
27217 | + int cpu) | |
27218 | +{ | |
27219 | + int old_cpu = task_cpu(task); | |
27220 | + | |
27221 | + if (cpu != old_cpu) { | |
27222 | + unsigned long flags; | |
27223 | + struct task_struct *cpu_wakeup_task; | |
27224 | + | |
27225 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
27226 | + | |
27227 | + cpu_wakeup_task = per_cpu(wakeup_task, old_cpu); | |
27228 | + if (task == cpu_wakeup_task) { | |
27229 | + put_task_struct(cpu_wakeup_task); | |
27230 | + per_cpu(wakeup_task, old_cpu) = NULL; | |
27231 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task; | |
27232 | + get_task_struct(cpu_wakeup_task); | |
27233 | + } | |
27234 | + | |
27235 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
27236 | + } | |
27237 | +} | |
27238 | + | |
27239 | +static notrace void probe_wakeup_latency_hist_start(void *v, | |
27240 | + struct task_struct *p) | |
27241 | +{ | |
27242 | + unsigned long flags; | |
27243 | + struct task_struct *curr = current; | |
27244 | + int cpu = task_cpu(p); | |
27245 | + struct task_struct *cpu_wakeup_task; | |
27246 | + | |
27247 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
27248 | + | |
27249 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
27250 | + | |
27251 | + if (wakeup_pid) { | |
27252 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
27253 | + p->prio == curr->prio) | |
27254 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
27255 | + if (likely(wakeup_pid != task_pid_nr(p))) | |
27256 | + goto out; | |
27257 | + } else { | |
27258 | + if (likely(!rt_task(p)) || | |
27259 | + (cpu_wakeup_task && p->prio > cpu_wakeup_task->prio) || | |
27260 | + p->prio > curr->prio) | |
27261 | + goto out; | |
27262 | + if ((cpu_wakeup_task && p->prio == cpu_wakeup_task->prio) || | |
27263 | + p->prio == curr->prio) | |
27264 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
27265 | + } | |
27266 | + | |
27267 | + if (cpu_wakeup_task) | |
27268 | + put_task_struct(cpu_wakeup_task); | |
27269 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu) = p; | |
27270 | + get_task_struct(cpu_wakeup_task); | |
27271 | + cpu_wakeup_task->preempt_timestamp_hist = | |
27272 | + ftrace_now(raw_smp_processor_id()); | |
27273 | +out: | |
27274 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
27275 | +} | |
27276 | + | |
27277 | +static notrace void probe_wakeup_latency_hist_stop(void *v, | |
27278 | + bool preempt, struct task_struct *prev, struct task_struct *next) | |
27279 | +{ | |
27280 | + unsigned long flags; | |
27281 | + int cpu = task_cpu(next); | |
27282 | + long latency; | |
27283 | + cycle_t stop; | |
27284 | + struct task_struct *cpu_wakeup_task; | |
27285 | + | |
27286 | + raw_spin_lock_irqsave(&wakeup_lock, flags); | |
27287 | + | |
27288 | + cpu_wakeup_task = per_cpu(wakeup_task, cpu); | |
27289 | + | |
27290 | + if (cpu_wakeup_task == NULL) | |
27291 | + goto out; | |
27292 | + | |
27293 | + /* Already running? */ | |
27294 | + if (unlikely(current == cpu_wakeup_task)) | |
27295 | + goto out_reset; | |
27296 | + | |
27297 | + if (next != cpu_wakeup_task) { | |
27298 | + if (next->prio < cpu_wakeup_task->prio) | |
27299 | + goto out_reset; | |
27300 | + | |
27301 | + if (next->prio == cpu_wakeup_task->prio) | |
27302 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
27303 | + | |
27304 | + goto out; | |
27305 | + } | |
27306 | + | |
27307 | + if (current->prio == cpu_wakeup_task->prio) | |
27308 | + per_cpu(wakeup_sharedprio, cpu) = 1; | |
27309 | + | |
27310 | + /* | |
27311 | + * The task we are waiting for is about to be switched to. | |
27312 | + * Calculate latency and store it in histogram. | |
27313 | + */ | |
27314 | + stop = ftrace_now(raw_smp_processor_id()); | |
27315 | + | |
27316 | + latency = ((long) (stop - next->preempt_timestamp_hist)) / | |
27317 | + NSECS_PER_USECS; | |
27318 | + | |
27319 | + if (per_cpu(wakeup_sharedprio, cpu)) { | |
27320 | + latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop, | |
27321 | + next); | |
27322 | + per_cpu(wakeup_sharedprio, cpu) = 0; | |
27323 | + } else { | |
27324 | + latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next); | |
27325 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
27326 | + if (timerandwakeup_enabled_data.enabled) { | |
27327 | + latency_hist(TIMERANDWAKEUP_LATENCY, cpu, | |
27328 | + next->timer_offset + latency, next->timer_offset, | |
27329 | + stop, next); | |
27330 | + } | |
27331 | +#endif | |
27332 | + } | |
27333 | + | |
27334 | +out_reset: | |
27335 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
27336 | + next->timer_offset = 0; | |
27337 | +#endif | |
27338 | + put_task_struct(cpu_wakeup_task); | |
27339 | + per_cpu(wakeup_task, cpu) = NULL; | |
27340 | +out: | |
27341 | + raw_spin_unlock_irqrestore(&wakeup_lock, flags); | |
27342 | +} | |
27343 | +#endif | |
27344 | + | |
27345 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
27346 | +static notrace void probe_hrtimer_interrupt(void *v, int cpu, | |
27347 | + long long latency_ns, struct task_struct *curr, | |
27348 | + struct task_struct *task) | |
27349 | +{ | |
27350 | + if (latency_ns <= 0 && task != NULL && rt_task(task) && | |
27351 | + (task->prio < curr->prio || | |
27352 | + (task->prio == curr->prio && | |
27353 | + !cpumask_test_cpu(cpu, &task->cpus_allowed)))) { | |
27354 | + long latency; | |
27355 | + cycle_t now; | |
27356 | + | |
27357 | + if (missed_timer_offsets_pid) { | |
27358 | + if (likely(missed_timer_offsets_pid != | |
27359 | + task_pid_nr(task))) | |
27360 | + return; | |
27361 | + } | |
27362 | + | |
27363 | + now = ftrace_now(cpu); | |
27364 | + latency = (long) div_s64(-latency_ns, NSECS_PER_USECS); | |
27365 | + latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now, | |
27366 | + task); | |
27367 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
27368 | + task->timer_offset = latency; | |
27369 | +#endif | |
27370 | + } | |
27371 | +} | |
27372 | +#endif | |
27373 | + | |
27374 | +static __init int latency_hist_init(void) | |
27375 | +{ | |
27376 | + struct dentry *latency_hist_root = NULL; | |
27377 | + struct dentry *dentry; | |
27378 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
27379 | + struct dentry *dentry_sharedprio; | |
27380 | +#endif | |
27381 | + struct dentry *entry; | |
27382 | + struct dentry *enable_root; | |
27383 | + int i = 0; | |
27384 | + struct hist_data *my_hist; | |
27385 | + char name[64]; | |
27386 | + char *cpufmt = "CPU%d"; | |
27387 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) || \ | |
27388 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
27389 | + char *cpufmt_maxlatproc = "max_latency-CPU%d"; | |
27390 | + struct maxlatproc_data *mp = NULL; | |
27391 | +#endif | |
27392 | + | |
27393 | + dentry = tracing_init_dentry(); | |
27394 | + latency_hist_root = debugfs_create_dir(latency_hist_dir_root, dentry); | |
27395 | + enable_root = debugfs_create_dir("enable", latency_hist_root); | |
27396 | + | |
27397 | +#ifdef CONFIG_INTERRUPT_OFF_HIST | |
27398 | + dentry = debugfs_create_dir(irqsoff_hist_dir, latency_hist_root); | |
27399 | + for_each_possible_cpu(i) { | |
27400 | + sprintf(name, cpufmt, i); | |
27401 | + entry = debugfs_create_file(name, 0444, dentry, | |
27402 | + &per_cpu(irqsoff_hist, i), &latency_hist_fops); | |
27403 | + my_hist = &per_cpu(irqsoff_hist, i); | |
27404 | + atomic_set(&my_hist->hist_mode, 1); | |
27405 | + my_hist->min_lat = LONG_MAX; | |
27406 | + } | |
27407 | + entry = debugfs_create_file("reset", 0644, dentry, | |
27408 | + (void *)IRQSOFF_LATENCY, &latency_hist_reset_fops); | |
27409 | +#endif | |
27410 | + | |
27411 | +#ifdef CONFIG_PREEMPT_OFF_HIST | |
27412 | + dentry = debugfs_create_dir(preemptoff_hist_dir, | |
27413 | + latency_hist_root); | |
27414 | + for_each_possible_cpu(i) { | |
27415 | + sprintf(name, cpufmt, i); | |
27416 | + entry = debugfs_create_file(name, 0444, dentry, | |
27417 | + &per_cpu(preemptoff_hist, i), &latency_hist_fops); | |
27418 | + my_hist = &per_cpu(preemptoff_hist, i); | |
27419 | + atomic_set(&my_hist->hist_mode, 1); | |
27420 | + my_hist->min_lat = LONG_MAX; | |
27421 | + } | |
27422 | + entry = debugfs_create_file("reset", 0644, dentry, | |
27423 | + (void *)PREEMPTOFF_LATENCY, &latency_hist_reset_fops); | |
27424 | +#endif | |
27425 | + | |
27426 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST) | |
27427 | + dentry = debugfs_create_dir(preemptirqsoff_hist_dir, | |
27428 | + latency_hist_root); | |
27429 | + for_each_possible_cpu(i) { | |
27430 | + sprintf(name, cpufmt, i); | |
27431 | + entry = debugfs_create_file(name, 0444, dentry, | |
27432 | + &per_cpu(preemptirqsoff_hist, i), &latency_hist_fops); | |
27433 | + my_hist = &per_cpu(preemptirqsoff_hist, i); | |
27434 | + atomic_set(&my_hist->hist_mode, 1); | |
27435 | + my_hist->min_lat = LONG_MAX; | |
27436 | + } | |
27437 | + entry = debugfs_create_file("reset", 0644, dentry, | |
27438 | + (void *)PREEMPTIRQSOFF_LATENCY, &latency_hist_reset_fops); | |
27439 | +#endif | |
27440 | + | |
27441 | +#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST) | |
27442 | + entry = debugfs_create_file("preemptirqsoff", 0644, | |
27443 | + enable_root, (void *)&preemptirqsoff_enabled_data, | |
27444 | + &enable_fops); | |
27445 | +#endif | |
27446 | + | |
27447 | +#ifdef CONFIG_WAKEUP_LATENCY_HIST | |
27448 | + dentry = debugfs_create_dir(wakeup_latency_hist_dir, | |
27449 | + latency_hist_root); | |
27450 | + dentry_sharedprio = debugfs_create_dir( | |
27451 | + wakeup_latency_hist_dir_sharedprio, dentry); | |
27452 | + for_each_possible_cpu(i) { | |
27453 | + sprintf(name, cpufmt, i); | |
27454 | + | |
27455 | + entry = debugfs_create_file(name, 0444, dentry, | |
27456 | + &per_cpu(wakeup_latency_hist, i), | |
27457 | + &latency_hist_fops); | |
27458 | + my_hist = &per_cpu(wakeup_latency_hist, i); | |
27459 | + atomic_set(&my_hist->hist_mode, 1); | |
27460 | + my_hist->min_lat = LONG_MAX; | |
27461 | + | |
27462 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, | |
27463 | + &per_cpu(wakeup_latency_hist_sharedprio, i), | |
27464 | + &latency_hist_fops); | |
27465 | + my_hist = &per_cpu(wakeup_latency_hist_sharedprio, i); | |
27466 | + atomic_set(&my_hist->hist_mode, 1); | |
27467 | + my_hist->min_lat = LONG_MAX; | |
27468 | + | |
27469 | + sprintf(name, cpufmt_maxlatproc, i); | |
27470 | + | |
27471 | + mp = &per_cpu(wakeup_maxlatproc, i); | |
27472 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
27473 | + &maxlatproc_fops); | |
27474 | + clear_maxlatprocdata(mp); | |
27475 | + | |
27476 | + mp = &per_cpu(wakeup_maxlatproc_sharedprio, i); | |
27477 | + entry = debugfs_create_file(name, 0444, dentry_sharedprio, mp, | |
27478 | + &maxlatproc_fops); | |
27479 | + clear_maxlatprocdata(mp); | |
27480 | + } | |
27481 | + entry = debugfs_create_file("pid", 0644, dentry, | |
27482 | + (void *)&wakeup_pid, &pid_fops); | |
27483 | + entry = debugfs_create_file("reset", 0644, dentry, | |
27484 | + (void *)WAKEUP_LATENCY, &latency_hist_reset_fops); | |
27485 | + entry = debugfs_create_file("reset", 0644, dentry_sharedprio, | |
27486 | + (void *)WAKEUP_LATENCY_SHAREDPRIO, &latency_hist_reset_fops); | |
27487 | + entry = debugfs_create_file("wakeup", 0644, | |
27488 | + enable_root, (void *)&wakeup_latency_enabled_data, | |
27489 | + &enable_fops); | |
27490 | +#endif | |
27491 | + | |
27492 | +#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST | |
27493 | + dentry = debugfs_create_dir(missed_timer_offsets_dir, | |
27494 | + latency_hist_root); | |
27495 | + for_each_possible_cpu(i) { | |
27496 | + sprintf(name, cpufmt, i); | |
27497 | + entry = debugfs_create_file(name, 0444, dentry, | |
27498 | + &per_cpu(missed_timer_offsets, i), &latency_hist_fops); | |
27499 | + my_hist = &per_cpu(missed_timer_offsets, i); | |
27500 | + atomic_set(&my_hist->hist_mode, 1); | |
27501 | + my_hist->min_lat = LONG_MAX; | |
27502 | + | |
27503 | + sprintf(name, cpufmt_maxlatproc, i); | |
27504 | + mp = &per_cpu(missed_timer_offsets_maxlatproc, i); | |
27505 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
27506 | + &maxlatproc_fops); | |
27507 | + clear_maxlatprocdata(mp); | |
27508 | + } | |
27509 | + entry = debugfs_create_file("pid", 0644, dentry, | |
27510 | + (void *)&missed_timer_offsets_pid, &pid_fops); | |
27511 | + entry = debugfs_create_file("reset", 0644, dentry, | |
27512 | + (void *)MISSED_TIMER_OFFSETS, &latency_hist_reset_fops); | |
27513 | + entry = debugfs_create_file("missed_timer_offsets", 0644, | |
27514 | + enable_root, (void *)&missed_timer_offsets_enabled_data, | |
27515 | + &enable_fops); | |
27516 | +#endif | |
27517 | + | |
27518 | +#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \ | |
27519 | + defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) | |
27520 | + dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir, | |
27521 | + latency_hist_root); | |
27522 | + for_each_possible_cpu(i) { | |
27523 | + sprintf(name, cpufmt, i); | |
27524 | + entry = debugfs_create_file(name, 0444, dentry, | |
27525 | + &per_cpu(timerandwakeup_latency_hist, i), | |
27526 | + &latency_hist_fops); | |
27527 | + my_hist = &per_cpu(timerandwakeup_latency_hist, i); | |
27528 | + atomic_set(&my_hist->hist_mode, 1); | |
27529 | + my_hist->min_lat = LONG_MAX; | |
27530 | + | |
27531 | + sprintf(name, cpufmt_maxlatproc, i); | |
27532 | + mp = &per_cpu(timerandwakeup_maxlatproc, i); | |
27533 | + entry = debugfs_create_file(name, 0444, dentry, mp, | |
27534 | + &maxlatproc_fops); | |
27535 | + clear_maxlatprocdata(mp); | |
27536 | + } | |
27537 | + entry = debugfs_create_file("reset", 0644, dentry, | |
27538 | + (void *)TIMERANDWAKEUP_LATENCY, &latency_hist_reset_fops); | |
27539 | + entry = debugfs_create_file("timerandwakeup", 0644, | |
27540 | + enable_root, (void *)&timerandwakeup_enabled_data, | |
27541 | + &enable_fops); | |
27542 | +#endif | |
27543 | + return 0; | |
27544 | +} | |
27545 | + | |
27546 | +device_initcall(latency_hist_init); | |
27547 | diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c | |
cb95d48a | 27548 | index 059233abcfcf..cad1a28bfbe2 100644 |
b4de310e JK |
27549 | --- a/kernel/trace/trace.c |
27550 | +++ b/kernel/trace/trace.c | |
27551 | @@ -1652,6 +1652,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
27552 | struct task_struct *tsk = current; | |
27553 | ||
27554 | entry->preempt_count = pc & 0xff; | |
27555 | + entry->preempt_lazy_count = preempt_lazy_count(); | |
27556 | entry->pid = (tsk) ? tsk->pid : 0; | |
27557 | entry->flags = | |
27558 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT | |
27559 | @@ -1661,8 +1662,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, | |
27560 | #endif | |
27561 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | | |
27562 | ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | | |
27563 | - (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
27564 | + (tif_need_resched_now() ? TRACE_FLAG_NEED_RESCHED : 0) | | |
27565 | + (need_resched_lazy() ? TRACE_FLAG_NEED_RESCHED_LAZY : 0) | | |
27566 | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); | |
27567 | + | |
27568 | + entry->migrate_disable = (tsk) ? __migrate_disabled(tsk) & 0xFF : 0; | |
27569 | } | |
27570 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); | |
27571 | ||
27572 | @@ -2555,14 +2559,17 @@ get_total_entries(struct trace_buffer *buf, | |
27573 | ||
27574 | static void print_lat_help_header(struct seq_file *m) | |
27575 | { | |
27576 | - seq_puts(m, "# _------=> CPU# \n" | |
27577 | - "# / _-----=> irqs-off \n" | |
27578 | - "# | / _----=> need-resched \n" | |
27579 | - "# || / _---=> hardirq/softirq \n" | |
27580 | - "# ||| / _--=> preempt-depth \n" | |
27581 | - "# |||| / delay \n" | |
27582 | - "# cmd pid ||||| time | caller \n" | |
27583 | - "# \\ / ||||| \\ | / \n"); | |
cb95d48a JK |
27584 | + seq_puts(m, "# _--------=> CPU# \n" |
27585 | + "# / _-------=> irqs-off \n" | |
27586 | + "# | / _------=> need-resched \n" | |
27587 | + "# || / _-----=> need-resched_lazy \n" | |
27588 | + "# ||| / _----=> hardirq/softirq \n" | |
27589 | + "# |||| / _---=> preempt-depth \n" | |
27590 | + "# ||||| / _--=> preempt-lazy-depth\n" | |
27591 | + "# |||||| / _-=> migrate-disable \n" | |
27592 | + "# ||||||| / delay \n" | |
27593 | + "# cmd pid |||||||| time | caller \n" | |
27594 | + "# \\ / |||||||| \\ | / \n"); | |
b4de310e JK |
27595 | } |
27596 | ||
27597 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) | |
27598 | @@ -2588,11 +2595,14 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file | |
27599 | print_event_info(buf, m); | |
27600 | seq_puts(m, "# _-----=> irqs-off\n" | |
27601 | "# / _----=> need-resched\n" | |
27602 | - "# | / _---=> hardirq/softirq\n" | |
27603 | - "# || / _--=> preempt-depth\n" | |
27604 | - "# ||| / delay\n" | |
27605 | - "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" | |
27606 | - "# | | | |||| | |\n"); | |
27607 | + "# |/ _-----=> need-resched_lazy\n" | |
27608 | + "# || / _---=> hardirq/softirq\n" | |
27609 | + "# ||| / _--=> preempt-depth\n" | |
cb95d48a JK |
27610 | + "# |||| / _-=> preempt-lazy-depth\n" |
27611 | + "# ||||| / _-=> migrate-disable \n" | |
27612 | + "# |||||| / delay\n" | |
27613 | + "# TASK-PID CPU# ||||||| TIMESTAMP FUNCTION\n" | |
27614 | + "# | | | ||||||| | |\n"); | |
b4de310e JK |
27615 | } |
27616 | ||
27617 | void | |
27618 | diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h | |
27619 | index 919d9d07686f..3bf86ece683c 100644 | |
27620 | --- a/kernel/trace/trace.h | |
27621 | +++ b/kernel/trace/trace.h | |
27622 | @@ -117,6 +117,7 @@ struct kretprobe_trace_entry_head { | |
27623 | * NEED_RESCHED - reschedule is requested | |
27624 | * HARDIRQ - inside an interrupt handler | |
27625 | * SOFTIRQ - inside a softirq handler | |
27626 | + * NEED_RESCHED_LAZY - lazy reschedule is requested | |
27627 | */ | |
27628 | enum trace_flag_type { | |
27629 | TRACE_FLAG_IRQS_OFF = 0x01, | |
27630 | @@ -125,6 +126,7 @@ enum trace_flag_type { | |
27631 | TRACE_FLAG_HARDIRQ = 0x08, | |
27632 | TRACE_FLAG_SOFTIRQ = 0x10, | |
27633 | TRACE_FLAG_PREEMPT_RESCHED = 0x20, | |
27634 | + TRACE_FLAG_NEED_RESCHED_LAZY = 0x40, | |
27635 | }; | |
27636 | ||
27637 | #define TRACE_BUF_SIZE 1024 | |
27638 | diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c | |
27639 | index 996f0fd34312..5bd79b347398 100644 | |
27640 | --- a/kernel/trace/trace_events.c | |
27641 | +++ b/kernel/trace/trace_events.c | |
27642 | @@ -188,6 +188,8 @@ static int trace_define_common_fields(void) | |
27643 | __common_field(unsigned char, flags); | |
27644 | __common_field(unsigned char, preempt_count); | |
27645 | __common_field(int, pid); | |
27646 | + __common_field(unsigned short, migrate_disable); | |
27647 | + __common_field(unsigned short, padding); | |
27648 | ||
27649 | return ret; | |
27650 | } | |
27651 | @@ -244,6 +246,14 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer, | |
27652 | ||
27653 | local_save_flags(fbuffer->flags); | |
27654 | fbuffer->pc = preempt_count(); | |
27655 | + /* | |
27656 | + * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables | |
27657 | + * preemption (adding one to the preempt_count). Since we are | |
27658 | + * interested in the preempt_count at the time the tracepoint was | |
27659 | + * hit, we need to subtract one to offset the increment. | |
27660 | + */ | |
27661 | + if (IS_ENABLED(CONFIG_PREEMPT)) | |
27662 | + fbuffer->pc--; | |
27663 | fbuffer->trace_file = trace_file; | |
27664 | ||
27665 | fbuffer->event = | |
27666 | diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c | |
27667 | index be3222b7d72e..553e71254ad6 100644 | |
27668 | --- a/kernel/trace/trace_irqsoff.c | |
27669 | +++ b/kernel/trace/trace_irqsoff.c | |
27670 | @@ -13,6 +13,7 @@ | |
27671 | #include <linux/uaccess.h> | |
27672 | #include <linux/module.h> | |
27673 | #include <linux/ftrace.h> | |
27674 | +#include <trace/events/hist.h> | |
27675 | ||
27676 | #include "trace.h" | |
27677 | ||
27678 | @@ -424,11 +425,13 @@ void start_critical_timings(void) | |
27679 | { | |
27680 | if (preempt_trace() || irq_trace()) | |
27681 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
27682 | + trace_preemptirqsoff_hist_rcuidle(TRACE_START, 1); | |
27683 | } | |
27684 | EXPORT_SYMBOL_GPL(start_critical_timings); | |
27685 | ||
27686 | void stop_critical_timings(void) | |
27687 | { | |
27688 | + trace_preemptirqsoff_hist_rcuidle(TRACE_STOP, 0); | |
27689 | if (preempt_trace() || irq_trace()) | |
27690 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
27691 | } | |
27692 | @@ -438,6 +441,7 @@ EXPORT_SYMBOL_GPL(stop_critical_timings); | |
27693 | #ifdef CONFIG_PROVE_LOCKING | |
27694 | void time_hardirqs_on(unsigned long a0, unsigned long a1) | |
27695 | { | |
27696 | + trace_preemptirqsoff_hist_rcuidle(IRQS_ON, 0); | |
27697 | if (!preempt_trace() && irq_trace()) | |
27698 | stop_critical_timing(a0, a1); | |
27699 | } | |
27700 | @@ -446,6 +450,7 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) | |
27701 | { | |
27702 | if (!preempt_trace() && irq_trace()) | |
27703 | start_critical_timing(a0, a1); | |
27704 | + trace_preemptirqsoff_hist_rcuidle(IRQS_OFF, 1); | |
27705 | } | |
27706 | ||
27707 | #else /* !CONFIG_PROVE_LOCKING */ | |
27708 | @@ -471,6 +476,7 @@ inline void print_irqtrace_events(struct task_struct *curr) | |
27709 | */ | |
27710 | void trace_hardirqs_on(void) | |
27711 | { | |
27712 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
27713 | if (!preempt_trace() && irq_trace()) | |
27714 | stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
27715 | } | |
27716 | @@ -480,11 +486,13 @@ void trace_hardirqs_off(void) | |
27717 | { | |
27718 | if (!preempt_trace() && irq_trace()) | |
27719 | start_critical_timing(CALLER_ADDR0, CALLER_ADDR1); | |
27720 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
27721 | } | |
27722 | EXPORT_SYMBOL(trace_hardirqs_off); | |
27723 | ||
27724 | __visible void trace_hardirqs_on_caller(unsigned long caller_addr) | |
27725 | { | |
27726 | + trace_preemptirqsoff_hist(IRQS_ON, 0); | |
27727 | if (!preempt_trace() && irq_trace()) | |
27728 | stop_critical_timing(CALLER_ADDR0, caller_addr); | |
27729 | } | |
27730 | @@ -494,6 +502,7 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr) | |
27731 | { | |
27732 | if (!preempt_trace() && irq_trace()) | |
27733 | start_critical_timing(CALLER_ADDR0, caller_addr); | |
27734 | + trace_preemptirqsoff_hist(IRQS_OFF, 1); | |
27735 | } | |
27736 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
27737 | ||
27738 | @@ -503,12 +512,14 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller); | |
27739 | #ifdef CONFIG_PREEMPT_TRACER | |
27740 | void trace_preempt_on(unsigned long a0, unsigned long a1) | |
27741 | { | |
27742 | + trace_preemptirqsoff_hist(PREEMPT_ON, 0); | |
27743 | if (preempt_trace() && !irq_trace()) | |
27744 | stop_critical_timing(a0, a1); | |
27745 | } | |
27746 | ||
27747 | void trace_preempt_off(unsigned long a0, unsigned long a1) | |
27748 | { | |
27749 | + trace_preemptirqsoff_hist(PREEMPT_ON, 1); | |
27750 | if (preempt_trace() && !irq_trace()) | |
27751 | start_critical_timing(a0, a1); | |
27752 | } | |
27753 | diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c | |
27754 | index 282982195e09..9f19d839a756 100644 | |
27755 | --- a/kernel/trace/trace_output.c | |
27756 | +++ b/kernel/trace/trace_output.c | |
27757 | @@ -386,6 +386,7 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
27758 | { | |
27759 | char hardsoft_irq; | |
27760 | char need_resched; | |
27761 | + char need_resched_lazy; | |
27762 | char irqs_off; | |
27763 | int hardirq; | |
27764 | int softirq; | |
27765 | @@ -413,6 +414,8 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
27766 | need_resched = '.'; | |
27767 | break; | |
27768 | } | |
27769 | + need_resched_lazy = | |
27770 | + (entry->flags & TRACE_FLAG_NEED_RESCHED_LAZY) ? 'L' : '.'; | |
27771 | ||
27772 | hardsoft_irq = | |
27773 | (hardirq && softirq) ? 'H' : | |
27774 | @@ -420,14 +423,25 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |
27775 | softirq ? 's' : | |
27776 | '.'; | |
27777 | ||
27778 | - trace_seq_printf(s, "%c%c%c", | |
27779 | - irqs_off, need_resched, hardsoft_irq); | |
27780 | + trace_seq_printf(s, "%c%c%c%c", | |
27781 | + irqs_off, need_resched, need_resched_lazy, | |
27782 | + hardsoft_irq); | |
27783 | ||
27784 | if (entry->preempt_count) | |
27785 | trace_seq_printf(s, "%x", entry->preempt_count); | |
27786 | else | |
27787 | trace_seq_putc(s, '.'); | |
27788 | ||
27789 | + if (entry->preempt_lazy_count) | |
27790 | + trace_seq_printf(s, "%x", entry->preempt_lazy_count); | |
27791 | + else | |
27792 | + trace_seq_putc(s, '.'); | |
27793 | + | |
27794 | + if (entry->migrate_disable) | |
27795 | + trace_seq_printf(s, "%x", entry->migrate_disable); | |
27796 | + else | |
27797 | + trace_seq_putc(s, '.'); | |
27798 | + | |
27799 | return !trace_seq_has_overflowed(s); | |
27800 | } | |
27801 | ||
27802 | diff --git a/kernel/user.c b/kernel/user.c | |
27803 | index b069ccbfb0b0..1a2e88e98b5e 100644 | |
27804 | --- a/kernel/user.c | |
27805 | +++ b/kernel/user.c | |
27806 | @@ -161,11 +161,11 @@ void free_uid(struct user_struct *up) | |
27807 | if (!up) | |
27808 | return; | |
27809 | ||
27810 | - local_irq_save(flags); | |
27811 | + local_irq_save_nort(flags); | |
27812 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) | |
27813 | free_user(up, flags); | |
27814 | else | |
27815 | - local_irq_restore(flags); | |
27816 | + local_irq_restore_nort(flags); | |
27817 | } | |
27818 | ||
27819 | struct user_struct *alloc_uid(kuid_t uid) | |
27820 | diff --git a/kernel/watchdog.c b/kernel/watchdog.c | |
27821 | index 198137b1cadc..47d143740774 100644 | |
27822 | --- a/kernel/watchdog.c | |
27823 | +++ b/kernel/watchdog.c | |
27824 | @@ -299,6 +299,8 @@ static int is_softlockup(unsigned long touch_ts) | |
27825 | ||
27826 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | |
27827 | ||
27828 | +static DEFINE_RAW_SPINLOCK(watchdog_output_lock); | |
27829 | + | |
27830 | static struct perf_event_attr wd_hw_attr = { | |
27831 | .type = PERF_TYPE_HARDWARE, | |
27832 | .config = PERF_COUNT_HW_CPU_CYCLES, | |
27833 | @@ -333,6 +335,13 @@ static void watchdog_overflow_callback(struct perf_event *event, | |
27834 | /* only print hardlockups once */ | |
27835 | if (__this_cpu_read(hard_watchdog_warn) == true) | |
27836 | return; | |
27837 | + /* | |
27838 | + * If early-printk is enabled then make sure we do not | |
27839 | + * lock up in printk() and kill console logging: | |
27840 | + */ | |
27841 | + printk_kill(); | |
27842 | + | |
27843 | + raw_spin_lock(&watchdog_output_lock); | |
27844 | ||
27845 | pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); | |
27846 | print_modules(); | |
27847 | @@ -350,8 +359,9 @@ static void watchdog_overflow_callback(struct perf_event *event, | |
27848 | !test_and_set_bit(0, &hardlockup_allcpu_dumped)) | |
27849 | trigger_allbutself_cpu_backtrace(); | |
27850 | ||
27851 | + raw_spin_unlock(&watchdog_output_lock); | |
27852 | if (hardlockup_panic) | |
27853 | - panic("Hard LOCKUP"); | |
27854 | + nmi_panic(regs, "Hard LOCKUP"); | |
27855 | ||
27856 | __this_cpu_write(hard_watchdog_warn, true); | |
27857 | return; | |
27858 | @@ -497,6 +507,7 @@ static void watchdog_enable(unsigned int cpu) | |
27859 | /* kick off the timer for the hardlockup detector */ | |
27860 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | |
27861 | hrtimer->function = watchdog_timer_fn; | |
27862 | + hrtimer->irqsafe = 1; | |
27863 | ||
27864 | /* Enable the perf event */ | |
27865 | watchdog_nmi_enable(cpu); | |
27866 | diff --git a/kernel/workqueue.c b/kernel/workqueue.c | |
27867 | index 2c2f971f3e75..965d5f65e847 100644 | |
27868 | --- a/kernel/workqueue.c | |
27869 | +++ b/kernel/workqueue.c | |
27870 | @@ -48,6 +48,8 @@ | |
27871 | #include <linux/nodemask.h> | |
27872 | #include <linux/moduleparam.h> | |
27873 | #include <linux/uaccess.h> | |
27874 | +#include <linux/locallock.h> | |
27875 | +#include <linux/delay.h> | |
27876 | ||
27877 | #include "workqueue_internal.h" | |
27878 | ||
27879 | @@ -121,11 +123,16 @@ enum { | |
27880 | * cpu or grabbing pool->lock is enough for read access. If | |
27881 | * POOL_DISASSOCIATED is set, it's identical to L. | |
27882 | * | |
27883 | + * On RT we need the extra protection via rt_lock_idle_list() for | |
27884 | + * the list manipulations against read access from | |
27885 | + * wq_worker_sleeping(). All other places are nicely serialized via | |
27886 | + * pool->lock. | |
27887 | + * | |
27888 | * A: pool->attach_mutex protected. | |
27889 | * | |
27890 | * PL: wq_pool_mutex protected. | |
27891 | * | |
27892 | - * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. | |
27893 | + * PR: wq_pool_mutex protected for writes. RCU protected for reads. | |
27894 | * | |
27895 | * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads. | |
27896 | * | |
27897 | @@ -134,7 +141,7 @@ enum { | |
27898 | * | |
27899 | * WQ: wq->mutex protected. | |
27900 | * | |
27901 | - * WR: wq->mutex protected for writes. Sched-RCU protected for reads. | |
27902 | + * WR: wq->mutex protected for writes. RCU protected for reads. | |
27903 | * | |
27904 | * MD: wq_mayday_lock protected. | |
27905 | */ | |
27906 | @@ -183,7 +190,7 @@ struct worker_pool { | |
27907 | atomic_t nr_running ____cacheline_aligned_in_smp; | |
27908 | ||
27909 | /* | |
27910 | - * Destruction of pool is sched-RCU protected to allow dereferences | |
27911 | + * Destruction of pool is RCU protected to allow dereferences | |
27912 | * from get_work_pool(). | |
27913 | */ | |
27914 | struct rcu_head rcu; | |
27915 | @@ -212,7 +219,7 @@ struct pool_workqueue { | |
27916 | /* | |
27917 | * Release of unbound pwq is punted to system_wq. See put_pwq() | |
27918 | * and pwq_unbound_release_workfn() for details. pool_workqueue | |
27919 | - * itself is also sched-RCU protected so that the first pwq can be | |
27920 | + * itself is also RCU protected so that the first pwq can be | |
27921 | * determined without grabbing wq->mutex. | |
27922 | */ | |
27923 | struct work_struct unbound_release_work; | |
27924 | @@ -331,6 +338,8 @@ EXPORT_SYMBOL_GPL(system_power_efficient_wq); | |
27925 | struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly; | |
27926 | EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); | |
27927 | ||
27928 | +static DEFINE_LOCAL_IRQ_LOCK(pendingb_lock); | |
27929 | + | |
27930 | static int worker_thread(void *__worker); | |
27931 | static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
27932 | ||
27933 | @@ -338,20 +347,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
27934 | #include <trace/events/workqueue.h> | |
27935 | ||
27936 | #define assert_rcu_or_pool_mutex() \ | |
27937 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
27938 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
27939 | !lockdep_is_held(&wq_pool_mutex), \ | |
27940 | - "sched RCU or wq_pool_mutex should be held") | |
27941 | + "RCU or wq_pool_mutex should be held") | |
27942 | ||
27943 | #define assert_rcu_or_wq_mutex(wq) \ | |
27944 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
27945 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
27946 | !lockdep_is_held(&wq->mutex), \ | |
27947 | - "sched RCU or wq->mutex should be held") | |
27948 | + "RCU or wq->mutex should be held") | |
27949 | ||
27950 | #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ | |
27951 | - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | |
27952 | + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | |
27953 | !lockdep_is_held(&wq->mutex) && \ | |
27954 | !lockdep_is_held(&wq_pool_mutex), \ | |
27955 | - "sched RCU, wq->mutex or wq_pool_mutex should be held") | |
27956 | + "RCU, wq->mutex or wq_pool_mutex should be held") | |
27957 | ||
27958 | #define for_each_cpu_worker_pool(pool, cpu) \ | |
27959 | for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ | |
27960 | @@ -363,7 +372,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
27961 | * @pool: iteration cursor | |
27962 | * @pi: integer used for iteration | |
27963 | * | |
27964 | - * This must be called either with wq_pool_mutex held or sched RCU read | |
27965 | + * This must be called either with wq_pool_mutex held or RCU read | |
27966 | * locked. If the pool needs to be used beyond the locking in effect, the | |
27967 | * caller is responsible for guaranteeing that the pool stays online. | |
27968 | * | |
27969 | @@ -395,7 +404,7 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
27970 | * @pwq: iteration cursor | |
27971 | * @wq: the target workqueue | |
27972 | * | |
27973 | - * This must be called either with wq->mutex held or sched RCU read locked. | |
27974 | + * This must be called either with wq->mutex held or RCU read locked. | |
27975 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
27976 | * responsible for guaranteeing that the pwq stays online. | |
27977 | * | |
27978 | @@ -407,6 +416,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); | |
27979 | if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \ | |
27980 | else | |
27981 | ||
27982 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
27983 | +static inline void rt_lock_idle_list(struct worker_pool *pool) | |
27984 | +{ | |
27985 | + preempt_disable(); | |
27986 | +} | |
27987 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) | |
27988 | +{ | |
27989 | + preempt_enable(); | |
27990 | +} | |
27991 | +static inline void sched_lock_idle_list(struct worker_pool *pool) { } | |
27992 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) { } | |
27993 | +#else | |
27994 | +static inline void rt_lock_idle_list(struct worker_pool *pool) { } | |
27995 | +static inline void rt_unlock_idle_list(struct worker_pool *pool) { } | |
27996 | +static inline void sched_lock_idle_list(struct worker_pool *pool) | |
27997 | +{ | |
27998 | + spin_lock_irq(&pool->lock); | |
27999 | +} | |
28000 | +static inline void sched_unlock_idle_list(struct worker_pool *pool) | |
28001 | +{ | |
28002 | + spin_unlock_irq(&pool->lock); | |
28003 | +} | |
28004 | +#endif | |
28005 | + | |
28006 | + | |
28007 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | |
28008 | ||
28009 | static struct debug_obj_descr work_debug_descr; | |
28010 | @@ -557,7 +591,7 @@ static int worker_pool_assign_id(struct worker_pool *pool) | |
28011 | * @wq: the target workqueue | |
28012 | * @node: the node ID | |
28013 | * | |
28014 | - * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU | |
28015 | + * This must be called with any of wq_pool_mutex, wq->mutex or RCU | |
28016 | * read locked. | |
28017 | * If the pwq needs to be used beyond the locking in effect, the caller is | |
28018 | * responsible for guaranteeing that the pwq stays online. | |
28019 | @@ -701,8 +735,8 @@ static struct pool_workqueue *get_work_pwq(struct work_struct *work) | |
28020 | * @work: the work item of interest | |
28021 | * | |
28022 | * Pools are created and destroyed under wq_pool_mutex, and allows read | |
28023 | - * access under sched-RCU read lock. As such, this function should be | |
28024 | - * called under wq_pool_mutex or with preemption disabled. | |
28025 | + * access under RCU read lock. As such, this function should be | |
28026 | + * called under wq_pool_mutex or inside of a rcu_read_lock() region. | |
28027 | * | |
28028 | * All fields of the returned pool are accessible as long as the above | |
28029 | * mentioned locking is in effect. If the returned pool needs to be used | |
28030 | @@ -839,51 +873,44 @@ static struct worker *first_idle_worker(struct worker_pool *pool) | |
28031 | */ | |
28032 | static void wake_up_worker(struct worker_pool *pool) | |
28033 | { | |
28034 | - struct worker *worker = first_idle_worker(pool); | |
28035 | + struct worker *worker; | |
28036 | + | |
28037 | + rt_lock_idle_list(pool); | |
28038 | + | |
28039 | + worker = first_idle_worker(pool); | |
28040 | ||
28041 | if (likely(worker)) | |
28042 | wake_up_process(worker->task); | |
28043 | + | |
28044 | + rt_unlock_idle_list(pool); | |
28045 | } | |
28046 | ||
28047 | /** | |
28048 | - * wq_worker_waking_up - a worker is waking up | |
28049 | - * @task: task waking up | |
28050 | - * @cpu: CPU @task is waking up to | |
28051 | + * wq_worker_running - a worker is running again | |
28052 | + * @task: task returning from sleep | |
28053 | * | |
28054 | - * This function is called during try_to_wake_up() when a worker is | |
28055 | - * being awoken. | |
28056 | - * | |
28057 | - * CONTEXT: | |
28058 | - * spin_lock_irq(rq->lock) | |
28059 | + * This function is called when a worker returns from schedule() | |
28060 | */ | |
28061 | -void wq_worker_waking_up(struct task_struct *task, int cpu) | |
28062 | +void wq_worker_running(struct task_struct *task) | |
28063 | { | |
28064 | struct worker *worker = kthread_data(task); | |
28065 | ||
28066 | - if (!(worker->flags & WORKER_NOT_RUNNING)) { | |
28067 | - WARN_ON_ONCE(worker->pool->cpu != cpu); | |
28068 | + if (!worker->sleeping) | |
28069 | + return; | |
28070 | + if (!(worker->flags & WORKER_NOT_RUNNING)) | |
28071 | atomic_inc(&worker->pool->nr_running); | |
28072 | - } | |
28073 | + worker->sleeping = 0; | |
28074 | } | |
28075 | ||
28076 | /** | |
28077 | * wq_worker_sleeping - a worker is going to sleep | |
28078 | * @task: task going to sleep | |
28079 | - * @cpu: CPU in question, must be the current CPU number | |
28080 | - * | |
28081 | - * This function is called during schedule() when a busy worker is | |
28082 | - * going to sleep. Worker on the same cpu can be woken up by | |
28083 | - * returning pointer to its task. | |
28084 | - * | |
28085 | - * CONTEXT: | |
28086 | - * spin_lock_irq(rq->lock) | |
28087 | - * | |
28088 | - * Return: | |
28089 | - * Worker task on @cpu to wake up, %NULL if none. | |
28090 | + * This function is called from schedule() when a busy worker is | |
28091 | + * going to sleep. | |
28092 | */ | |
28093 | -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu) | |
28094 | +void wq_worker_sleeping(struct task_struct *task) | |
28095 | { | |
28096 | - struct worker *worker = kthread_data(task), *to_wakeup = NULL; | |
28097 | + struct worker *worker = kthread_data(task); | |
28098 | struct worker_pool *pool; | |
28099 | ||
28100 | /* | |
28101 | @@ -892,29 +919,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu) | |
28102 | * checking NOT_RUNNING. | |
28103 | */ | |
28104 | if (worker->flags & WORKER_NOT_RUNNING) | |
28105 | - return NULL; | |
28106 | + return; | |
28107 | ||
28108 | pool = worker->pool; | |
28109 | ||
28110 | - /* this can only happen on the local cpu */ | |
28111 | - if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu)) | |
28112 | - return NULL; | |
28113 | + if (WARN_ON_ONCE(worker->sleeping)) | |
28114 | + return; | |
28115 | + | |
28116 | + worker->sleeping = 1; | |
28117 | ||
28118 | /* | |
28119 | * The counterpart of the following dec_and_test, implied mb, | |
28120 | * worklist not empty test sequence is in insert_work(). | |
28121 | * Please read comment there. | |
28122 | - * | |
28123 | - * NOT_RUNNING is clear. This means that we're bound to and | |
28124 | - * running on the local cpu w/ rq lock held and preemption | |
28125 | - * disabled, which in turn means that none else could be | |
28126 | - * manipulating idle_list, so dereferencing idle_list without pool | |
28127 | - * lock is safe. | |
28128 | */ | |
28129 | if (atomic_dec_and_test(&pool->nr_running) && | |
28130 | - !list_empty(&pool->worklist)) | |
28131 | - to_wakeup = first_idle_worker(pool); | |
28132 | - return to_wakeup ? to_wakeup->task : NULL; | |
28133 | + !list_empty(&pool->worklist)) { | |
28134 | + sched_lock_idle_list(pool); | |
28135 | + wake_up_worker(pool); | |
28136 | + sched_unlock_idle_list(pool); | |
28137 | + } | |
28138 | } | |
28139 | ||
28140 | /** | |
28141 | @@ -1108,12 +1132,12 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq) | |
28142 | { | |
28143 | if (pwq) { | |
28144 | /* | |
28145 | - * As both pwqs and pools are sched-RCU protected, the | |
28146 | + * As both pwqs and pools are RCU protected, the | |
28147 | * following lock operations are safe. | |
28148 | */ | |
28149 | - spin_lock_irq(&pwq->pool->lock); | |
28150 | + local_spin_lock_irq(pendingb_lock, &pwq->pool->lock); | |
28151 | put_pwq(pwq); | |
28152 | - spin_unlock_irq(&pwq->pool->lock); | |
28153 | + local_spin_unlock_irq(pendingb_lock, &pwq->pool->lock); | |
28154 | } | |
28155 | } | |
28156 | ||
28157 | @@ -1215,7 +1239,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
28158 | struct worker_pool *pool; | |
28159 | struct pool_workqueue *pwq; | |
28160 | ||
28161 | - local_irq_save(*flags); | |
28162 | + local_lock_irqsave(pendingb_lock, *flags); | |
28163 | ||
28164 | /* try to steal the timer if it exists */ | |
28165 | if (is_dwork) { | |
28166 | @@ -1234,6 +1258,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
28167 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) | |
28168 | return 0; | |
28169 | ||
28170 | + rcu_read_lock(); | |
28171 | /* | |
28172 | * The queueing is in progress, or it is already queued. Try to | |
28173 | * steal it from ->worklist without clearing WORK_STRUCT_PENDING. | |
28174 | @@ -1272,14 +1297,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, | |
28175 | set_work_pool_and_keep_pending(work, pool->id); | |
28176 | ||
28177 | spin_unlock(&pool->lock); | |
28178 | + rcu_read_unlock(); | |
28179 | return 1; | |
28180 | } | |
28181 | spin_unlock(&pool->lock); | |
28182 | fail: | |
28183 | - local_irq_restore(*flags); | |
28184 | + rcu_read_unlock(); | |
28185 | + local_unlock_irqrestore(pendingb_lock, *flags); | |
28186 | if (work_is_canceling(work)) | |
28187 | return -ENOENT; | |
28188 | - cpu_relax(); | |
28189 | + cpu_chill(); | |
28190 | return -EAGAIN; | |
28191 | } | |
28192 | ||
28193 | @@ -1348,7 +1375,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
28194 | * queued or lose PENDING. Grabbing PENDING and queueing should | |
28195 | * happen with IRQ disabled. | |
28196 | */ | |
28197 | - WARN_ON_ONCE(!irqs_disabled()); | |
28198 | + WARN_ON_ONCE_NONRT(!irqs_disabled()); | |
28199 | ||
28200 | debug_work_activate(work); | |
28201 | ||
28202 | @@ -1356,6 +1383,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |
28203 | if (unlikely(wq->flags & __WQ_DRAINING) && | |
28204 | WARN_ON_ONCE(!is_chained_work(wq))) | |
28205 | return; | |
28206 | + | |
28207 | + rcu_read_lock(); | |
28208 | retry: | |
28209 | if (req_cpu == WORK_CPU_UNBOUND) | |
28210 | cpu = raw_smp_processor_id(); | |
28211 | @@ -1412,10 +1441,8 @@ retry: | |
28212 | /* pwq determined, queue */ | |
28213 | trace_workqueue_queue_work(req_cpu, pwq, work); | |
28214 | ||
28215 | - if (WARN_ON(!list_empty(&work->entry))) { | |
28216 | - spin_unlock(&pwq->pool->lock); | |
28217 | - return; | |
28218 | - } | |
28219 | + if (WARN_ON(!list_empty(&work->entry))) | |
28220 | + goto out; | |
28221 | ||
28222 | pwq->nr_in_flight[pwq->work_color]++; | |
28223 | work_flags = work_color_to_flags(pwq->work_color); | |
28224 | @@ -1431,7 +1458,9 @@ retry: | |
28225 | ||
28226 | insert_work(pwq, work, worklist, work_flags); | |
28227 | ||
28228 | +out: | |
28229 | spin_unlock(&pwq->pool->lock); | |
28230 | + rcu_read_unlock(); | |
28231 | } | |
28232 | ||
28233 | /** | |
28234 | @@ -1451,14 +1480,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, | |
28235 | bool ret = false; | |
28236 | unsigned long flags; | |
28237 | ||
28238 | - local_irq_save(flags); | |
28239 | + local_lock_irqsave(pendingb_lock,flags); | |
28240 | ||
28241 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
28242 | __queue_work(cpu, wq, work); | |
28243 | ret = true; | |
28244 | } | |
28245 | ||
28246 | - local_irq_restore(flags); | |
28247 | + local_unlock_irqrestore(pendingb_lock, flags); | |
28248 | return ret; | |
28249 | } | |
28250 | EXPORT_SYMBOL(queue_work_on); | |
28251 | @@ -1525,14 +1554,14 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |
28252 | unsigned long flags; | |
28253 | ||
28254 | /* read the comment in __queue_work() */ | |
28255 | - local_irq_save(flags); | |
28256 | + local_lock_irqsave(pendingb_lock, flags); | |
28257 | ||
28258 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | |
28259 | __queue_delayed_work(cpu, wq, dwork, delay); | |
28260 | ret = true; | |
28261 | } | |
28262 | ||
28263 | - local_irq_restore(flags); | |
28264 | + local_unlock_irqrestore(pendingb_lock, flags); | |
28265 | return ret; | |
28266 | } | |
28267 | EXPORT_SYMBOL(queue_delayed_work_on); | |
28268 | @@ -1567,7 +1596,7 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, | |
28269 | ||
28270 | if (likely(ret >= 0)) { | |
28271 | __queue_delayed_work(cpu, wq, dwork, delay); | |
28272 | - local_irq_restore(flags); | |
28273 | + local_unlock_irqrestore(pendingb_lock, flags); | |
28274 | } | |
28275 | ||
28276 | /* -ENOENT from try_to_grab_pending() becomes %true */ | |
28277 | @@ -1600,7 +1629,9 @@ static void worker_enter_idle(struct worker *worker) | |
28278 | worker->last_active = jiffies; | |
28279 | ||
28280 | /* idle_list is LIFO */ | |
28281 | + rt_lock_idle_list(pool); | |
28282 | list_add(&worker->entry, &pool->idle_list); | |
28283 | + rt_unlock_idle_list(pool); | |
28284 | ||
28285 | if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) | |
28286 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); | |
28287 | @@ -1633,7 +1664,9 @@ static void worker_leave_idle(struct worker *worker) | |
28288 | return; | |
28289 | worker_clr_flags(worker, WORKER_IDLE); | |
28290 | pool->nr_idle--; | |
28291 | + rt_lock_idle_list(pool); | |
28292 | list_del_init(&worker->entry); | |
28293 | + rt_unlock_idle_list(pool); | |
28294 | } | |
28295 | ||
28296 | static struct worker *alloc_worker(int node) | |
28297 | @@ -1799,7 +1832,9 @@ static void destroy_worker(struct worker *worker) | |
28298 | pool->nr_workers--; | |
28299 | pool->nr_idle--; | |
28300 | ||
28301 | + rt_lock_idle_list(pool); | |
28302 | list_del_init(&worker->entry); | |
28303 | + rt_unlock_idle_list(pool); | |
28304 | worker->flags |= WORKER_DIE; | |
28305 | wake_up_process(worker->task); | |
28306 | } | |
28307 | @@ -2716,14 +2751,14 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) | |
28308 | ||
28309 | might_sleep(); | |
28310 | ||
28311 | - local_irq_disable(); | |
28312 | + rcu_read_lock(); | |
28313 | pool = get_work_pool(work); | |
28314 | if (!pool) { | |
28315 | - local_irq_enable(); | |
28316 | + rcu_read_unlock(); | |
28317 | return false; | |
28318 | } | |
28319 | ||
28320 | - spin_lock(&pool->lock); | |
28321 | + spin_lock_irq(&pool->lock); | |
28322 | /* see the comment in try_to_grab_pending() with the same code */ | |
28323 | pwq = get_work_pwq(work); | |
28324 | if (pwq) { | |
28325 | @@ -2750,10 +2785,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) | |
28326 | else | |
28327 | lock_map_acquire_read(&pwq->wq->lockdep_map); | |
28328 | lock_map_release(&pwq->wq->lockdep_map); | |
28329 | - | |
28330 | + rcu_read_unlock(); | |
28331 | return true; | |
28332 | already_gone: | |
28333 | spin_unlock_irq(&pool->lock); | |
28334 | + rcu_read_unlock(); | |
28335 | return false; | |
28336 | } | |
28337 | ||
28338 | @@ -2840,7 +2876,7 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) | |
28339 | ||
28340 | /* tell other tasks trying to grab @work to back off */ | |
28341 | mark_work_canceling(work); | |
28342 | - local_irq_restore(flags); | |
28343 | + local_unlock_irqrestore(pendingb_lock, flags); | |
28344 | ||
28345 | flush_work(work); | |
28346 | clear_work_data(work); | |
28347 | @@ -2895,10 +2931,10 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); | |
28348 | */ | |
28349 | bool flush_delayed_work(struct delayed_work *dwork) | |
28350 | { | |
28351 | - local_irq_disable(); | |
28352 | + local_lock_irq(pendingb_lock); | |
28353 | if (del_timer_sync(&dwork->timer)) | |
28354 | __queue_work(dwork->cpu, dwork->wq, &dwork->work); | |
28355 | - local_irq_enable(); | |
28356 | + local_unlock_irq(pendingb_lock); | |
28357 | return flush_work(&dwork->work); | |
28358 | } | |
28359 | EXPORT_SYMBOL(flush_delayed_work); | |
28360 | @@ -2933,7 +2969,7 @@ bool cancel_delayed_work(struct delayed_work *dwork) | |
28361 | ||
28362 | set_work_pool_and_clear_pending(&dwork->work, | |
28363 | get_work_pool_id(&dwork->work)); | |
28364 | - local_irq_restore(flags); | |
28365 | + local_unlock_irqrestore(pendingb_lock, flags); | |
28366 | return ret; | |
28367 | } | |
28368 | EXPORT_SYMBOL(cancel_delayed_work); | |
28369 | @@ -3161,7 +3197,7 @@ static void rcu_free_pool(struct rcu_head *rcu) | |
28370 | * put_unbound_pool - put a worker_pool | |
28371 | * @pool: worker_pool to put | |
28372 | * | |
28373 | - * Put @pool. If its refcnt reaches zero, it gets destroyed in sched-RCU | |
28374 | + * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU | |
28375 | * safe manner. get_unbound_pool() calls this function on its failure path | |
28376 | * and this function should be able to release pools which went through, | |
28377 | * successfully or not, init_worker_pool(). | |
28378 | @@ -3215,8 +3251,8 @@ static void put_unbound_pool(struct worker_pool *pool) | |
28379 | del_timer_sync(&pool->idle_timer); | |
28380 | del_timer_sync(&pool->mayday_timer); | |
28381 | ||
28382 | - /* sched-RCU protected to allow dereferences from get_work_pool() */ | |
28383 | - call_rcu_sched(&pool->rcu, rcu_free_pool); | |
28384 | + /* RCU protected to allow dereferences from get_work_pool() */ | |
28385 | + call_rcu(&pool->rcu, rcu_free_pool); | |
28386 | } | |
28387 | ||
28388 | /** | |
28389 | @@ -3323,14 +3359,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) | |
28390 | put_unbound_pool(pool); | |
28391 | mutex_unlock(&wq_pool_mutex); | |
28392 | ||
28393 | - call_rcu_sched(&pwq->rcu, rcu_free_pwq); | |
28394 | + call_rcu(&pwq->rcu, rcu_free_pwq); | |
28395 | ||
28396 | /* | |
28397 | * If we're the last pwq going away, @wq is already dead and no one | |
28398 | * is gonna access it anymore. Schedule RCU free. | |
28399 | */ | |
28400 | if (is_last) | |
28401 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
28402 | + call_rcu(&wq->rcu, rcu_free_wq); | |
28403 | } | |
28404 | ||
28405 | /** | |
28406 | @@ -3983,7 +4019,7 @@ void destroy_workqueue(struct workqueue_struct *wq) | |
28407 | * The base ref is never dropped on per-cpu pwqs. Directly | |
28408 | * schedule RCU free. | |
28409 | */ | |
28410 | - call_rcu_sched(&wq->rcu, rcu_free_wq); | |
28411 | + call_rcu(&wq->rcu, rcu_free_wq); | |
28412 | } else { | |
28413 | /* | |
28414 | * We're the sole accessor of @wq at this point. Directly | |
28415 | @@ -4076,7 +4112,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) | |
28416 | struct pool_workqueue *pwq; | |
28417 | bool ret; | |
28418 | ||
28419 | - rcu_read_lock_sched(); | |
28420 | + rcu_read_lock(); | |
28421 | + preempt_disable(); | |
28422 | ||
28423 | if (cpu == WORK_CPU_UNBOUND) | |
28424 | cpu = smp_processor_id(); | |
28425 | @@ -4087,7 +4124,8 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq) | |
28426 | pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); | |
28427 | ||
28428 | ret = !list_empty(&pwq->delayed_works); | |
28429 | - rcu_read_unlock_sched(); | |
28430 | + preempt_enable(); | |
28431 | + rcu_read_unlock(); | |
28432 | ||
28433 | return ret; | |
28434 | } | |
28435 | @@ -4113,15 +4151,15 @@ unsigned int work_busy(struct work_struct *work) | |
28436 | if (work_pending(work)) | |
28437 | ret |= WORK_BUSY_PENDING; | |
28438 | ||
28439 | - local_irq_save(flags); | |
28440 | + rcu_read_lock(); | |
28441 | pool = get_work_pool(work); | |
28442 | if (pool) { | |
28443 | - spin_lock(&pool->lock); | |
28444 | + spin_lock_irqsave(&pool->lock, flags); | |
28445 | if (find_worker_executing_work(pool, work)) | |
28446 | ret |= WORK_BUSY_RUNNING; | |
28447 | - spin_unlock(&pool->lock); | |
28448 | + spin_unlock_irqrestore(&pool->lock, flags); | |
28449 | } | |
28450 | - local_irq_restore(flags); | |
28451 | + rcu_read_unlock(); | |
28452 | ||
28453 | return ret; | |
28454 | } | |
28455 | @@ -4310,7 +4348,7 @@ void show_workqueue_state(void) | |
28456 | unsigned long flags; | |
28457 | int pi; | |
28458 | ||
28459 | - rcu_read_lock_sched(); | |
28460 | + rcu_read_lock(); | |
28461 | ||
28462 | pr_info("Showing busy workqueues and worker pools:\n"); | |
28463 | ||
28464 | @@ -4361,7 +4399,7 @@ void show_workqueue_state(void) | |
28465 | spin_unlock_irqrestore(&pool->lock, flags); | |
28466 | } | |
28467 | ||
28468 | - rcu_read_unlock_sched(); | |
28469 | + rcu_read_unlock(); | |
28470 | } | |
28471 | ||
28472 | /* | |
28473 | @@ -4722,16 +4760,16 @@ bool freeze_workqueues_busy(void) | |
28474 | * nr_active is monotonically decreasing. It's safe | |
28475 | * to peek without lock. | |
28476 | */ | |
28477 | - rcu_read_lock_sched(); | |
28478 | + rcu_read_lock(); | |
28479 | for_each_pwq(pwq, wq) { | |
28480 | WARN_ON_ONCE(pwq->nr_active < 0); | |
28481 | if (pwq->nr_active) { | |
28482 | busy = true; | |
28483 | - rcu_read_unlock_sched(); | |
28484 | + rcu_read_unlock(); | |
28485 | goto out_unlock; | |
28486 | } | |
28487 | } | |
28488 | - rcu_read_unlock_sched(); | |
28489 | + rcu_read_unlock(); | |
28490 | } | |
28491 | out_unlock: | |
28492 | mutex_unlock(&wq_pool_mutex); | |
28493 | @@ -4921,7 +4959,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, | |
28494 | const char *delim = ""; | |
28495 | int node, written = 0; | |
28496 | ||
28497 | - rcu_read_lock_sched(); | |
28498 | + get_online_cpus(); | |
28499 | + rcu_read_lock(); | |
28500 | for_each_node(node) { | |
28501 | written += scnprintf(buf + written, PAGE_SIZE - written, | |
28502 | "%s%d:%d", delim, node, | |
28503 | @@ -4929,7 +4968,8 @@ static ssize_t wq_pool_ids_show(struct device *dev, | |
28504 | delim = " "; | |
28505 | } | |
28506 | written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); | |
28507 | - rcu_read_unlock_sched(); | |
28508 | + rcu_read_unlock(); | |
28509 | + put_online_cpus(); | |
28510 | ||
28511 | return written; | |
28512 | } | |
28513 | diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h | |
28514 | index 45215870ac6c..f000c4d6917e 100644 | |
28515 | --- a/kernel/workqueue_internal.h | |
28516 | +++ b/kernel/workqueue_internal.h | |
28517 | @@ -43,6 +43,7 @@ struct worker { | |
28518 | unsigned long last_active; /* L: last active timestamp */ | |
28519 | unsigned int flags; /* X: flags */ | |
28520 | int id; /* I: worker id */ | |
28521 | + int sleeping; /* None */ | |
28522 | ||
28523 | /* | |
28524 | * Opaque string set with work_set_desc(). Printed out with task | |
28525 | @@ -68,7 +69,7 @@ static inline struct worker *current_wq_worker(void) | |
28526 | * Scheduler hooks for concurrency managed workqueue. Only to be used from | |
28527 | * sched/core.c and workqueue.c. | |
28528 | */ | |
28529 | -void wq_worker_waking_up(struct task_struct *task, int cpu); | |
28530 | -struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu); | |
28531 | +void wq_worker_running(struct task_struct *task); | |
28532 | +void wq_worker_sleeping(struct task_struct *task); | |
28533 | ||
28534 | #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */ | |
28535 | diff --git a/lib/Kconfig b/lib/Kconfig | |
28536 | index 1a48744253d7..f75de578cca8 100644 | |
28537 | --- a/lib/Kconfig | |
28538 | +++ b/lib/Kconfig | |
28539 | @@ -397,6 +397,7 @@ config CHECK_SIGNATURE | |
28540 | ||
28541 | config CPUMASK_OFFSTACK | |
28542 | bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS | |
28543 | + depends on !PREEMPT_RT_FULL | |
28544 | help | |
28545 | Use dynamic allocation for cpumask_var_t, instead of putting | |
28546 | them on the stack. This is a bit more expensive, but avoids | |
28547 | diff --git a/lib/debugobjects.c b/lib/debugobjects.c | |
28548 | index 547f7f923dbc..8fcdbc2fc6d0 100644 | |
28549 | --- a/lib/debugobjects.c | |
28550 | +++ b/lib/debugobjects.c | |
28551 | @@ -309,7 +309,10 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) | |
28552 | struct debug_obj *obj; | |
28553 | unsigned long flags; | |
28554 | ||
28555 | - fill_pool(); | |
28556 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
28557 | + if (preempt_count() == 0 && !irqs_disabled()) | |
28558 | +#endif | |
28559 | + fill_pool(); | |
28560 | ||
28561 | db = get_bucket((unsigned long) addr); | |
28562 | ||
28563 | diff --git a/lib/idr.c b/lib/idr.c | |
28564 | index 6098336df267..9decbe914595 100644 | |
28565 | --- a/lib/idr.c | |
28566 | +++ b/lib/idr.c | |
28567 | @@ -30,6 +30,7 @@ | |
28568 | #include <linux/idr.h> | |
28569 | #include <linux/spinlock.h> | |
28570 | #include <linux/percpu.h> | |
28571 | +#include <linux/locallock.h> | |
28572 | ||
28573 | #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) | |
28574 | #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) | |
28575 | @@ -45,6 +46,37 @@ static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head); | |
28576 | static DEFINE_PER_CPU(int, idr_preload_cnt); | |
28577 | static DEFINE_SPINLOCK(simple_ida_lock); | |
28578 | ||
28579 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
28580 | +static DEFINE_LOCAL_IRQ_LOCK(idr_lock); | |
28581 | + | |
28582 | +static inline void idr_preload_lock(void) | |
28583 | +{ | |
28584 | + local_lock(idr_lock); | |
28585 | +} | |
28586 | + | |
28587 | +static inline void idr_preload_unlock(void) | |
28588 | +{ | |
28589 | + local_unlock(idr_lock); | |
28590 | +} | |
28591 | + | |
28592 | +void idr_preload_end(void) | |
28593 | +{ | |
28594 | + idr_preload_unlock(); | |
28595 | +} | |
28596 | +EXPORT_SYMBOL(idr_preload_end); | |
28597 | +#else | |
28598 | +static inline void idr_preload_lock(void) | |
28599 | +{ | |
28600 | + preempt_disable(); | |
28601 | +} | |
28602 | + | |
28603 | +static inline void idr_preload_unlock(void) | |
28604 | +{ | |
28605 | + preempt_enable(); | |
28606 | +} | |
28607 | +#endif | |
28608 | + | |
28609 | + | |
28610 | /* the maximum ID which can be allocated given idr->layers */ | |
28611 | static int idr_max(int layers) | |
28612 | { | |
28613 | @@ -115,14 +147,14 @@ static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr) | |
28614 | * context. See idr_preload() for details. | |
28615 | */ | |
28616 | if (!in_interrupt()) { | |
28617 | - preempt_disable(); | |
28618 | + idr_preload_lock(); | |
28619 | new = __this_cpu_read(idr_preload_head); | |
28620 | if (new) { | |
28621 | __this_cpu_write(idr_preload_head, new->ary[0]); | |
28622 | __this_cpu_dec(idr_preload_cnt); | |
28623 | new->ary[0] = NULL; | |
28624 | } | |
28625 | - preempt_enable(); | |
28626 | + idr_preload_unlock(); | |
28627 | if (new) | |
28628 | return new; | |
28629 | } | |
28630 | @@ -366,7 +398,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, | |
28631 | idr_mark_full(pa, id); | |
28632 | } | |
28633 | ||
28634 | - | |
28635 | /** | |
28636 | * idr_preload - preload for idr_alloc() | |
28637 | * @gfp_mask: allocation mask to use for preloading | |
28638 | @@ -401,7 +432,7 @@ void idr_preload(gfp_t gfp_mask) | |
28639 | WARN_ON_ONCE(in_interrupt()); | |
28640 | might_sleep_if(gfpflags_allow_blocking(gfp_mask)); | |
28641 | ||
28642 | - preempt_disable(); | |
28643 | + idr_preload_lock(); | |
28644 | ||
28645 | /* | |
28646 | * idr_alloc() is likely to succeed w/o full idr_layer buffer and | |
28647 | @@ -413,9 +444,9 @@ void idr_preload(gfp_t gfp_mask) | |
28648 | while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) { | |
28649 | struct idr_layer *new; | |
28650 | ||
28651 | - preempt_enable(); | |
28652 | + idr_preload_unlock(); | |
28653 | new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); | |
28654 | - preempt_disable(); | |
28655 | + idr_preload_lock(); | |
28656 | if (!new) | |
28657 | break; | |
28658 | ||
28659 | diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c | |
28660 | index 872a15a2a637..b93a6103fa4d 100644 | |
28661 | --- a/lib/locking-selftest.c | |
28662 | +++ b/lib/locking-selftest.c | |
28663 | @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) | |
28664 | #include "locking-selftest-spin-hardirq.h" | |
28665 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) | |
28666 | ||
28667 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28668 | + | |
28669 | #include "locking-selftest-rlock-hardirq.h" | |
28670 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) | |
28671 | ||
28672 | @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) | |
28673 | #include "locking-selftest-wlock-softirq.h" | |
28674 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) | |
28675 | ||
28676 | +#endif | |
28677 | + | |
28678 | #undef E1 | |
28679 | #undef E2 | |
28680 | ||
28681 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28682 | /* | |
28683 | * Enabling hardirqs with a softirq-safe lock held: | |
28684 | */ | |
28685 | @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
28686 | #undef E1 | |
28687 | #undef E2 | |
28688 | ||
28689 | +#endif | |
28690 | + | |
28691 | /* | |
28692 | * Enabling irqs with an irq-safe lock held: | |
28693 | */ | |
28694 | @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) | |
28695 | #include "locking-selftest-spin-hardirq.h" | |
28696 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) | |
28697 | ||
28698 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28699 | + | |
28700 | #include "locking-selftest-rlock-hardirq.h" | |
28701 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) | |
28702 | ||
28703 | @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) | |
28704 | #include "locking-selftest-wlock-softirq.h" | |
28705 | GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
28706 | ||
28707 | +#endif | |
28708 | + | |
28709 | #undef E1 | |
28710 | #undef E2 | |
28711 | ||
28712 | @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) | |
28713 | #include "locking-selftest-spin-hardirq.h" | |
28714 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) | |
28715 | ||
28716 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28717 | + | |
28718 | #include "locking-selftest-rlock-hardirq.h" | |
28719 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) | |
28720 | ||
28721 | @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) | |
28722 | #include "locking-selftest-wlock-softirq.h" | |
28723 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
28724 | ||
28725 | +#endif | |
28726 | + | |
28727 | #undef E1 | |
28728 | #undef E2 | |
28729 | #undef E3 | |
28730 | @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) | |
28731 | #include "locking-selftest-spin-hardirq.h" | |
28732 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) | |
28733 | ||
28734 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28735 | + | |
28736 | #include "locking-selftest-rlock-hardirq.h" | |
28737 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) | |
28738 | ||
28739 | @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) | |
28740 | #include "locking-selftest-wlock-softirq.h" | |
28741 | GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) | |
28742 | ||
28743 | +#endif | |
28744 | + | |
28745 | #undef E1 | |
28746 | #undef E2 | |
28747 | #undef E3 | |
28748 | ||
28749 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28750 | + | |
28751 | /* | |
28752 | * read-lock / write-lock irq inversion. | |
28753 | * | |
28754 | @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) | |
28755 | #undef E2 | |
28756 | #undef E3 | |
28757 | ||
28758 | +#endif | |
28759 | + | |
28760 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28761 | + | |
28762 | /* | |
28763 | * read-lock / write-lock recursion that is actually safe. | |
28764 | */ | |
28765 | @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) | |
28766 | #undef E2 | |
28767 | #undef E3 | |
28768 | ||
28769 | +#endif | |
28770 | + | |
28771 | /* | |
28772 | * read-lock / write-lock recursion that is unsafe. | |
28773 | */ | |
28774 | @@ -1858,6 +1885,7 @@ void locking_selftest(void) | |
28775 | ||
28776 | printk(" --------------------------------------------------------------------------\n"); | |
28777 | ||
28778 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28779 | /* | |
28780 | * irq-context testcases: | |
28781 | */ | |
28782 | @@ -1870,6 +1898,28 @@ void locking_selftest(void) | |
28783 | ||
28784 | DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); | |
28785 | // DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); | |
28786 | +#else | |
28787 | + /* On -rt, we only do hardirq context test for raw spinlock */ | |
28788 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 12); | |
28789 | + DO_TESTCASE_1B("hard-irqs-on + irq-safe-A", irqsafe1_hard_spin, 21); | |
28790 | + | |
28791 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 12); | |
28792 | + DO_TESTCASE_1B("hard-safe-A + irqs-on", irqsafe2B_hard_spin, 21); | |
28793 | + | |
28794 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 123); | |
28795 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 132); | |
28796 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 213); | |
28797 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 231); | |
28798 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 312); | |
28799 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #1", irqsafe3_hard_spin, 321); | |
28800 | + | |
28801 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 123); | |
28802 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 132); | |
28803 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 213); | |
28804 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 231); | |
28805 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 312); | |
28806 | + DO_TESTCASE_1B("hard-safe-A + unsafe-B #2", irqsafe4_hard_spin, 321); | |
28807 | +#endif | |
28808 | ||
28809 | ww_tests(); | |
28810 | ||
28811 | diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c | |
28812 | index 6d40944960de..822a2c027e72 100644 | |
28813 | --- a/lib/percpu_ida.c | |
28814 | +++ b/lib/percpu_ida.c | |
28815 | @@ -26,6 +26,9 @@ | |
28816 | #include <linux/string.h> | |
28817 | #include <linux/spinlock.h> | |
28818 | #include <linux/percpu_ida.h> | |
28819 | +#include <linux/locallock.h> | |
28820 | + | |
28821 | +static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock); | |
28822 | ||
28823 | struct percpu_ida_cpu { | |
28824 | /* | |
28825 | @@ -148,13 +151,13 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
28826 | unsigned long flags; | |
28827 | int tag; | |
28828 | ||
28829 | - local_irq_save(flags); | |
28830 | + local_lock_irqsave(irq_off_lock, flags); | |
28831 | tags = this_cpu_ptr(pool->tag_cpu); | |
28832 | ||
28833 | /* Fastpath */ | |
28834 | tag = alloc_local_tag(tags); | |
28835 | if (likely(tag >= 0)) { | |
28836 | - local_irq_restore(flags); | |
28837 | + local_unlock_irqrestore(irq_off_lock, flags); | |
28838 | return tag; | |
28839 | } | |
28840 | ||
28841 | @@ -173,6 +176,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
28842 | ||
28843 | if (!tags->nr_free) | |
28844 | alloc_global_tags(pool, tags); | |
28845 | + | |
28846 | if (!tags->nr_free) | |
28847 | steal_tags(pool, tags); | |
28848 | ||
28849 | @@ -184,7 +188,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
28850 | } | |
28851 | ||
28852 | spin_unlock(&pool->lock); | |
28853 | - local_irq_restore(flags); | |
28854 | + local_unlock_irqrestore(irq_off_lock, flags); | |
28855 | ||
28856 | if (tag >= 0 || state == TASK_RUNNING) | |
28857 | break; | |
28858 | @@ -196,7 +200,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) | |
28859 | ||
28860 | schedule(); | |
28861 | ||
28862 | - local_irq_save(flags); | |
28863 | + local_lock_irqsave(irq_off_lock, flags); | |
28864 | tags = this_cpu_ptr(pool->tag_cpu); | |
28865 | } | |
28866 | if (state != TASK_RUNNING) | |
28867 | @@ -221,7 +225,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
28868 | ||
28869 | BUG_ON(tag >= pool->nr_tags); | |
28870 | ||
28871 | - local_irq_save(flags); | |
28872 | + local_lock_irqsave(irq_off_lock, flags); | |
28873 | tags = this_cpu_ptr(pool->tag_cpu); | |
28874 | ||
28875 | spin_lock(&tags->lock); | |
28876 | @@ -253,7 +257,7 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |
28877 | spin_unlock(&pool->lock); | |
28878 | } | |
28879 | ||
28880 | - local_irq_restore(flags); | |
28881 | + local_unlock_irqrestore(irq_off_lock, flags); | |
28882 | } | |
28883 | EXPORT_SYMBOL_GPL(percpu_ida_free); | |
28884 | ||
28885 | @@ -345,7 +349,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
28886 | struct percpu_ida_cpu *remote; | |
28887 | unsigned cpu, i, err = 0; | |
28888 | ||
28889 | - local_irq_save(flags); | |
28890 | + local_lock_irqsave(irq_off_lock, flags); | |
28891 | for_each_possible_cpu(cpu) { | |
28892 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | |
28893 | spin_lock(&remote->lock); | |
28894 | @@ -367,7 +371,7 @@ int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | |
28895 | } | |
28896 | spin_unlock(&pool->lock); | |
28897 | out: | |
28898 | - local_irq_restore(flags); | |
28899 | + local_unlock_irqrestore(irq_off_lock, flags); | |
28900 | return err; | |
28901 | } | |
28902 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | |
28903 | diff --git a/lib/radix-tree.c b/lib/radix-tree.c | |
28904 | index 6b79e9026e24..f27e0bcb74f7 100644 | |
28905 | --- a/lib/radix-tree.c | |
28906 | +++ b/lib/radix-tree.c | |
28907 | @@ -196,13 +196,14 @@ radix_tree_node_alloc(struct radix_tree_root *root) | |
28908 | * succeed in getting a node here (and never reach | |
28909 | * kmem_cache_alloc) | |
28910 | */ | |
28911 | - rtp = this_cpu_ptr(&radix_tree_preloads); | |
28912 | + rtp = &get_cpu_var(radix_tree_preloads); | |
28913 | if (rtp->nr) { | |
28914 | ret = rtp->nodes; | |
28915 | rtp->nodes = ret->private_data; | |
28916 | ret->private_data = NULL; | |
28917 | rtp->nr--; | |
28918 | } | |
28919 | + put_cpu_var(radix_tree_preloads); | |
28920 | /* | |
28921 | * Update the allocation stack trace as this is more useful | |
28922 | * for debugging. | |
28923 | @@ -242,6 +243,7 @@ radix_tree_node_free(struct radix_tree_node *node) | |
28924 | call_rcu(&node->rcu_head, radix_tree_node_rcu_free); | |
28925 | } | |
28926 | ||
28927 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
28928 | /* | |
28929 | * Load up this CPU's radix_tree_node buffer with sufficient objects to | |
28930 | * ensure that the addition of a single element in the tree cannot fail. On | |
28931 | @@ -310,6 +312,7 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) | |
28932 | return 0; | |
28933 | } | |
28934 | EXPORT_SYMBOL(radix_tree_maybe_preload); | |
28935 | +#endif | |
28936 | ||
28937 | /* | |
28938 | * Return the maximum key which can be store into a | |
28939 | diff --git a/lib/rbtree.c b/lib/rbtree.c | |
28940 | index 1356454e36de..d15d6c4327f1 100644 | |
28941 | --- a/lib/rbtree.c | |
28942 | +++ b/lib/rbtree.c | |
28943 | @@ -23,6 +23,7 @@ | |
28944 | ||
28945 | #include <linux/rbtree_augmented.h> | |
28946 | #include <linux/export.h> | |
28947 | +#include <linux/rcupdate.h> | |
28948 | ||
28949 | /* | |
28950 | * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree | |
28951 | @@ -590,3 +591,13 @@ struct rb_node *rb_first_postorder(const struct rb_root *root) | |
28952 | return rb_left_deepest_node(root->rb_node); | |
28953 | } | |
28954 | EXPORT_SYMBOL(rb_first_postorder); | |
28955 | + | |
28956 | +void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, | |
28957 | + struct rb_node **rb_link) | |
28958 | +{ | |
28959 | + node->__rb_parent_color = (unsigned long)parent; | |
28960 | + node->rb_left = node->rb_right = NULL; | |
28961 | + | |
28962 | + rcu_assign_pointer(*rb_link, node); | |
28963 | +} | |
28964 | +EXPORT_SYMBOL(rb_link_node_rcu); | |
28965 | diff --git a/lib/scatterlist.c b/lib/scatterlist.c | |
28966 | index bafa9933fa76..ebe3b7edd086 100644 | |
28967 | --- a/lib/scatterlist.c | |
28968 | +++ b/lib/scatterlist.c | |
28969 | @@ -620,7 +620,7 @@ void sg_miter_stop(struct sg_mapping_iter *miter) | |
28970 | flush_kernel_dcache_page(miter->page); | |
28971 | ||
28972 | if (miter->__flags & SG_MITER_ATOMIC) { | |
28973 | - WARN_ON_ONCE(preemptible()); | |
28974 | + WARN_ON_ONCE(!pagefault_disabled()); | |
28975 | kunmap_atomic(miter->addr); | |
28976 | } else | |
28977 | kunmap(miter->page); | |
28978 | @@ -664,7 +664,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
28979 | if (!sg_miter_skip(&miter, skip)) | |
28980 | return false; | |
28981 | ||
28982 | - local_irq_save(flags); | |
28983 | + local_irq_save_nort(flags); | |
28984 | ||
28985 | while (sg_miter_next(&miter) && offset < buflen) { | |
28986 | unsigned int len; | |
28987 | @@ -681,7 +681,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, | |
28988 | ||
28989 | sg_miter_stop(&miter); | |
28990 | ||
28991 | - local_irq_restore(flags); | |
28992 | + local_irq_restore_nort(flags); | |
28993 | return offset; | |
28994 | } | |
28995 | EXPORT_SYMBOL(sg_copy_buffer); | |
28996 | diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c | |
28997 | index 1afec32de6f2..11fa431046a8 100644 | |
28998 | --- a/lib/smp_processor_id.c | |
28999 | +++ b/lib/smp_processor_id.c | |
29000 | @@ -39,8 +39,9 @@ notrace static unsigned int check_preemption_disabled(const char *what1, | |
29001 | if (!printk_ratelimit()) | |
29002 | goto out_enable; | |
29003 | ||
29004 | - printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", | |
29005 | - what1, what2, preempt_count() - 1, current->comm, current->pid); | |
29006 | + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x %08x] code: %s/%d\n", | |
29007 | + what1, what2, preempt_count() - 1, __migrate_disabled(current), | |
29008 | + current->comm, current->pid); | |
29009 | ||
29010 | print_symbol("caller is %s\n", (long)__builtin_return_address(0)); | |
29011 | dump_stack(); | |
29012 | diff --git a/localversion-rt b/localversion-rt | |
29013 | new file mode 100644 | |
cb95d48a | 29014 | index 000000000000..629e0b4384b8 |
b4de310e JK |
29015 | --- /dev/null |
29016 | +++ b/localversion-rt | |
29017 | @@ -0,0 +1 @@ | |
cb95d48a | 29018 | +-rt41 |
b4de310e JK |
29019 | diff --git a/mm/Kconfig b/mm/Kconfig |
29020 | index 97a4e06b15c0..9614351e68b8 100644 | |
29021 | --- a/mm/Kconfig | |
29022 | +++ b/mm/Kconfig | |
29023 | @@ -392,7 +392,7 @@ config NOMMU_INITIAL_TRIM_EXCESS | |
29024 | ||
29025 | config TRANSPARENT_HUGEPAGE | |
29026 | bool "Transparent Hugepage Support" | |
29027 | - depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE | |
29028 | + depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT_FULL | |
29029 | select COMPACTION | |
29030 | help | |
29031 | Transparent Hugepages allows the kernel to use huge pages and | |
29032 | diff --git a/mm/backing-dev.c b/mm/backing-dev.c | |
29033 | index 9ef80bf441b3..826fed55c1cc 100644 | |
29034 | --- a/mm/backing-dev.c | |
29035 | +++ b/mm/backing-dev.c | |
29036 | @@ -457,9 +457,9 @@ void wb_congested_put(struct bdi_writeback_congested *congested) | |
29037 | { | |
29038 | unsigned long flags; | |
29039 | ||
29040 | - local_irq_save(flags); | |
29041 | + local_irq_save_nort(flags); | |
29042 | if (!atomic_dec_and_lock(&congested->refcnt, &cgwb_lock)) { | |
29043 | - local_irq_restore(flags); | |
29044 | + local_irq_restore_nort(flags); | |
29045 | return; | |
29046 | } | |
29047 | ||
29048 | diff --git a/mm/compaction.c b/mm/compaction.c | |
29049 | index dba02dec7195..51963f58a29b 100644 | |
29050 | --- a/mm/compaction.c | |
29051 | +++ b/mm/compaction.c | |
29052 | @@ -1430,10 +1430,12 @@ check_drain: | |
29053 | cc->migrate_pfn & ~((1UL << cc->order) - 1); | |
29054 | ||
29055 | if (cc->last_migrated_pfn < current_block_start) { | |
29056 | - cpu = get_cpu(); | |
29057 | + cpu = get_cpu_light(); | |
29058 | + local_lock_irq(swapvec_lock); | |
29059 | lru_add_drain_cpu(cpu); | |
29060 | + local_unlock_irq(swapvec_lock); | |
29061 | drain_local_pages(zone); | |
29062 | - put_cpu(); | |
29063 | + put_cpu_light(); | |
29064 | /* No more flushing until we migrate again */ | |
29065 | cc->last_migrated_pfn = 0; | |
29066 | } | |
29067 | diff --git a/mm/filemap.c b/mm/filemap.c | |
cb95d48a | 29068 | index c588d1222b2a..da6a5fbfadd2 100644 |
b4de310e JK |
29069 | --- a/mm/filemap.c |
29070 | +++ b/mm/filemap.c | |
cb95d48a | 29071 | @@ -144,9 +144,12 @@ static int page_cache_tree_insert(struct address_space *mapping, |
b4de310e JK |
29072 | * node->private_list is protected by |
29073 | * mapping->tree_lock. | |
29074 | */ | |
29075 | - if (!list_empty(&node->private_list)) | |
29076 | - list_lru_del(&workingset_shadow_nodes, | |
29077 | + if (!list_empty(&node->private_list)) { | |
29078 | + local_lock(workingset_shadow_lock); | |
29079 | + list_lru_del(&__workingset_shadow_nodes, | |
29080 | &node->private_list); | |
29081 | + local_unlock(workingset_shadow_lock); | |
29082 | + } | |
29083 | } | |
29084 | return 0; | |
29085 | } | |
cb95d48a JK |
29086 | @@ -218,7 +221,9 @@ static void page_cache_tree_delete(struct address_space *mapping, |
29087 | if (!workingset_node_pages(node) && | |
29088 | list_empty(&node->private_list)) { | |
29089 | node->private_data = mapping; | |
29090 | - list_lru_add(&workingset_shadow_nodes, &node->private_list); | |
29091 | + local_lock(workingset_shadow_lock); | |
29092 | + list_lru_add(&__workingset_shadow_nodes, &node->private_list); | |
29093 | + local_unlock(workingset_shadow_lock); | |
29094 | } | |
29095 | } | |
29096 | ||
b4de310e JK |
29097 | diff --git a/mm/highmem.c b/mm/highmem.c |
29098 | index 123bcd3ed4f2..16e8cf26d38a 100644 | |
29099 | --- a/mm/highmem.c | |
29100 | +++ b/mm/highmem.c | |
29101 | @@ -29,10 +29,11 @@ | |
29102 | #include <linux/kgdb.h> | |
29103 | #include <asm/tlbflush.h> | |
29104 | ||
29105 | - | |
29106 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
29107 | #if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32) | |
29108 | DEFINE_PER_CPU(int, __kmap_atomic_idx); | |
29109 | #endif | |
29110 | +#endif | |
29111 | ||
29112 | /* | |
29113 | * Virtual_count is not a pure "count". | |
29114 | @@ -107,8 +108,9 @@ static inline wait_queue_head_t *get_pkmap_wait_queue_head(unsigned int color) | |
29115 | unsigned long totalhigh_pages __read_mostly; | |
29116 | EXPORT_SYMBOL(totalhigh_pages); | |
29117 | ||
29118 | - | |
29119 | +#ifndef CONFIG_PREEMPT_RT_FULL | |
29120 | EXPORT_PER_CPU_SYMBOL(__kmap_atomic_idx); | |
29121 | +#endif | |
29122 | ||
29123 | unsigned int nr_free_highpages (void) | |
29124 | { | |
29125 | diff --git a/mm/memcontrol.c b/mm/memcontrol.c | |
29126 | index 6b90d184e9c0..ed7aa011ad70 100644 | |
29127 | --- a/mm/memcontrol.c | |
29128 | +++ b/mm/memcontrol.c | |
29129 | @@ -67,6 +67,8 @@ | |
29130 | #include <net/sock.h> | |
29131 | #include <net/ip.h> | |
29132 | #include <net/tcp_memcontrol.h> | |
29133 | +#include <linux/locallock.h> | |
29134 | + | |
29135 | #include "slab.h" | |
29136 | ||
29137 | #include <asm/uaccess.h> | |
29138 | @@ -87,6 +89,7 @@ int do_swap_account __read_mostly; | |
29139 | #define do_swap_account 0 | |
29140 | #endif | |
29141 | ||
29142 | +static DEFINE_LOCAL_IRQ_LOCK(event_lock); | |
29143 | static const char * const mem_cgroup_stat_names[] = { | |
29144 | "cache", | |
29145 | "rss", | |
29146 | @@ -1922,14 +1925,17 @@ static void drain_local_stock(struct work_struct *dummy) | |
29147 | */ | |
29148 | static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) | |
29149 | { | |
29150 | - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); | |
29151 | + struct memcg_stock_pcp *stock; | |
29152 | + int cpu = get_cpu_light(); | |
29153 | + | |
29154 | + stock = &per_cpu(memcg_stock, cpu); | |
29155 | ||
29156 | if (stock->cached != memcg) { /* reset if necessary */ | |
29157 | drain_stock(stock); | |
29158 | stock->cached = memcg; | |
29159 | } | |
29160 | stock->nr_pages += nr_pages; | |
29161 | - put_cpu_var(memcg_stock); | |
29162 | + put_cpu_light(); | |
29163 | } | |
29164 | ||
29165 | /* | |
29166 | @@ -1945,7 +1951,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) | |
29167 | return; | |
29168 | /* Notify other cpus that system-wide "drain" is running */ | |
29169 | get_online_cpus(); | |
29170 | - curcpu = get_cpu(); | |
29171 | + curcpu = get_cpu_light(); | |
29172 | for_each_online_cpu(cpu) { | |
29173 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | |
29174 | struct mem_cgroup *memcg; | |
29175 | @@ -1962,7 +1968,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg) | |
29176 | schedule_work_on(cpu, &stock->work); | |
29177 | } | |
29178 | } | |
29179 | - put_cpu(); | |
29180 | + put_cpu_light(); | |
29181 | put_online_cpus(); | |
29182 | mutex_unlock(&percpu_charge_mutex); | |
29183 | } | |
29184 | @@ -4700,12 +4706,12 @@ static int mem_cgroup_move_account(struct page *page, | |
29185 | ||
29186 | ret = 0; | |
29187 | ||
29188 | - local_irq_disable(); | |
29189 | + local_lock_irq(event_lock); | |
29190 | mem_cgroup_charge_statistics(to, page, nr_pages); | |
29191 | memcg_check_events(to, page); | |
29192 | mem_cgroup_charge_statistics(from, page, -nr_pages); | |
29193 | memcg_check_events(from, page); | |
29194 | - local_irq_enable(); | |
29195 | + local_unlock_irq(event_lock); | |
29196 | out_unlock: | |
29197 | unlock_page(page); | |
29198 | out: | |
29199 | @@ -5495,10 +5501,10 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, | |
29200 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | |
29201 | } | |
29202 | ||
29203 | - local_irq_disable(); | |
29204 | + local_lock_irq(event_lock); | |
29205 | mem_cgroup_charge_statistics(memcg, page, nr_pages); | |
29206 | memcg_check_events(memcg, page); | |
29207 | - local_irq_enable(); | |
29208 | + local_unlock_irq(event_lock); | |
29209 | ||
29210 | if (do_swap_account && PageSwapCache(page)) { | |
29211 | swp_entry_t entry = { .val = page_private(page) }; | |
29212 | @@ -5554,14 +5560,14 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, | |
29213 | memcg_oom_recover(memcg); | |
29214 | } | |
29215 | ||
29216 | - local_irq_save(flags); | |
29217 | + local_lock_irqsave(event_lock, flags); | |
29218 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); | |
29219 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file); | |
29220 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge); | |
29221 | __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout); | |
29222 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); | |
29223 | memcg_check_events(memcg, dummy_page); | |
29224 | - local_irq_restore(flags); | |
29225 | + local_unlock_irqrestore(event_lock, flags); | |
29226 | ||
29227 | if (!mem_cgroup_is_root(memcg)) | |
29228 | css_put_many(&memcg->css, nr_pages); | |
29229 | @@ -5753,6 +5759,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |
29230 | { | |
29231 | struct mem_cgroup *memcg, *swap_memcg; | |
29232 | unsigned short oldid; | |
29233 | + unsigned long flags; | |
29234 | ||
29235 | VM_BUG_ON_PAGE(PageLRU(page), page); | |
29236 | VM_BUG_ON_PAGE(page_count(page), page); | |
29237 | @@ -5793,12 +5800,16 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | |
29238 | * important here to have the interrupts disabled because it is the | |
29239 | * only synchronisation we have for udpating the per-CPU variables. | |
29240 | */ | |
29241 | + local_lock_irqsave(event_lock, flags); | |
29242 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
29243 | VM_BUG_ON(!irqs_disabled()); | |
29244 | +#endif | |
29245 | mem_cgroup_charge_statistics(memcg, page, -1); | |
29246 | memcg_check_events(memcg, page); | |
29247 | ||
29248 | if (!mem_cgroup_is_root(memcg)) | |
29249 | css_put(&memcg->css); | |
29250 | + local_unlock_irqrestore(event_lock, flags); | |
29251 | } | |
29252 | ||
29253 | /** | |
29254 | diff --git a/mm/mmu_context.c b/mm/mmu_context.c | |
29255 | index f802c2d216a7..b1b6f238e42d 100644 | |
29256 | --- a/mm/mmu_context.c | |
29257 | +++ b/mm/mmu_context.c | |
29258 | @@ -23,6 +23,7 @@ void use_mm(struct mm_struct *mm) | |
29259 | struct task_struct *tsk = current; | |
29260 | ||
29261 | task_lock(tsk); | |
29262 | + preempt_disable_rt(); | |
29263 | active_mm = tsk->active_mm; | |
29264 | if (active_mm != mm) { | |
29265 | atomic_inc(&mm->mm_count); | |
29266 | @@ -30,6 +31,7 @@ void use_mm(struct mm_struct *mm) | |
29267 | } | |
29268 | tsk->mm = mm; | |
29269 | switch_mm(active_mm, mm, tsk); | |
29270 | + preempt_enable_rt(); | |
29271 | task_unlock(tsk); | |
29272 | #ifdef finish_arch_post_lock_switch | |
29273 | finish_arch_post_lock_switch(); | |
29274 | diff --git a/mm/page_alloc.c b/mm/page_alloc.c | |
29275 | index 2bcdfbf8c36d..a500c9e740dd 100644 | |
29276 | --- a/mm/page_alloc.c | |
29277 | +++ b/mm/page_alloc.c | |
29278 | @@ -60,6 +60,7 @@ | |
29279 | #include <linux/page_ext.h> | |
29280 | #include <linux/hugetlb.h> | |
29281 | #include <linux/sched/rt.h> | |
29282 | +#include <linux/locallock.h> | |
29283 | #include <linux/page_owner.h> | |
29284 | #include <linux/kthread.h> | |
29285 | ||
29286 | @@ -264,6 +265,18 @@ EXPORT_SYMBOL(nr_node_ids); | |
29287 | EXPORT_SYMBOL(nr_online_nodes); | |
29288 | #endif | |
29289 | ||
29290 | +static DEFINE_LOCAL_IRQ_LOCK(pa_lock); | |
29291 | + | |
29292 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
29293 | +# define cpu_lock_irqsave(cpu, flags) \ | |
29294 | + local_lock_irqsave_on(pa_lock, flags, cpu) | |
29295 | +# define cpu_unlock_irqrestore(cpu, flags) \ | |
29296 | + local_unlock_irqrestore_on(pa_lock, flags, cpu) | |
29297 | +#else | |
29298 | +# define cpu_lock_irqsave(cpu, flags) local_irq_save(flags) | |
29299 | +# define cpu_unlock_irqrestore(cpu, flags) local_irq_restore(flags) | |
29300 | +#endif | |
29301 | + | |
29302 | int page_group_by_mobility_disabled __read_mostly; | |
29303 | ||
29304 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | |
29305 | @@ -786,7 +799,7 @@ static inline int free_pages_check(struct page *page) | |
29306 | } | |
29307 | ||
29308 | /* | |
29309 | - * Frees a number of pages from the PCP lists | |
29310 | + * Frees a number of pages which have been collected from the pcp lists. | |
29311 | * Assumes all pages on list are in same zone, and of same order. | |
29312 | * count is the number of pages to free. | |
29313 | * | |
29314 | @@ -797,18 +810,53 @@ static inline int free_pages_check(struct page *page) | |
29315 | * pinned" detection logic. | |
29316 | */ | |
29317 | static void free_pcppages_bulk(struct zone *zone, int count, | |
29318 | - struct per_cpu_pages *pcp) | |
29319 | + struct list_head *list) | |
29320 | { | |
29321 | - int migratetype = 0; | |
29322 | - int batch_free = 0; | |
29323 | int to_free = count; | |
29324 | unsigned long nr_scanned; | |
29325 | + unsigned long flags; | |
29326 | + | |
29327 | + spin_lock_irqsave(&zone->lock, flags); | |
29328 | ||
29329 | - spin_lock(&zone->lock); | |
29330 | nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); | |
29331 | if (nr_scanned) | |
29332 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); | |
29333 | ||
29334 | + while (!list_empty(list)) { | |
29335 | + struct page *page = list_first_entry(list, struct page, lru); | |
29336 | + int mt; /* migratetype of the to-be-freed page */ | |
29337 | + | |
29338 | + /* must delete as __free_one_page list manipulates */ | |
29339 | + list_del(&page->lru); | |
29340 | + | |
29341 | + mt = get_pcppage_migratetype(page); | |
29342 | + /* MIGRATE_ISOLATE page should not go to pcplists */ | |
29343 | + VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
29344 | + /* Pageblock could have been isolated meanwhile */ | |
29345 | + if (unlikely(has_isolate_pageblock(zone))) | |
29346 | + mt = get_pageblock_migratetype(page); | |
29347 | + | |
29348 | + __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
29349 | + trace_mm_page_pcpu_drain(page, 0, mt); | |
29350 | + to_free--; | |
29351 | + } | |
29352 | + WARN_ON(to_free != 0); | |
29353 | + spin_unlock_irqrestore(&zone->lock, flags); | |
29354 | +} | |
29355 | + | |
29356 | +/* | |
29357 | + * Moves a number of pages from the PCP lists to free list which | |
29358 | + * is freed outside of the locked region. | |
29359 | + * | |
29360 | + * Assumes all pages on list are in same zone, and of same order. | |
29361 | + * count is the number of pages to free. | |
29362 | + */ | |
29363 | +static void isolate_pcp_pages(int to_free, struct per_cpu_pages *src, | |
29364 | + struct list_head *dst) | |
29365 | +{ | |
29366 | + int migratetype = 0; | |
29367 | + int batch_free = 0; | |
29368 | + | |
29369 | while (to_free) { | |
29370 | struct page *page; | |
29371 | struct list_head *list; | |
29372 | @@ -824,7 +872,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |
29373 | batch_free++; | |
29374 | if (++migratetype == MIGRATE_PCPTYPES) | |
29375 | migratetype = 0; | |
29376 | - list = &pcp->lists[migratetype]; | |
29377 | + list = &src->lists[migratetype]; | |
29378 | } while (list_empty(list)); | |
29379 | ||
29380 | /* This is the only non-empty list. Free them all. */ | |
29381 | @@ -832,24 +880,12 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |
29382 | batch_free = to_free; | |
29383 | ||
29384 | do { | |
29385 | - int mt; /* migratetype of the to-be-freed page */ | |
29386 | - | |
29387 | - page = list_entry(list->prev, struct page, lru); | |
29388 | - /* must delete as __free_one_page list manipulates */ | |
29389 | + page = list_last_entry(list, struct page, lru); | |
29390 | list_del(&page->lru); | |
29391 | ||
29392 | - mt = get_pcppage_migratetype(page); | |
29393 | - /* MIGRATE_ISOLATE page should not go to pcplists */ | |
29394 | - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); | |
29395 | - /* Pageblock could have been isolated meanwhile */ | |
29396 | - if (unlikely(has_isolate_pageblock(zone))) | |
29397 | - mt = get_pageblock_migratetype(page); | |
29398 | - | |
29399 | - __free_one_page(page, page_to_pfn(page), zone, 0, mt); | |
29400 | - trace_mm_page_pcpu_drain(page, 0, mt); | |
29401 | + list_add(&page->lru, dst); | |
29402 | } while (--to_free && --batch_free && !list_empty(list)); | |
29403 | } | |
29404 | - spin_unlock(&zone->lock); | |
29405 | } | |
29406 | ||
29407 | static void free_one_page(struct zone *zone, | |
29408 | @@ -858,7 +894,9 @@ static void free_one_page(struct zone *zone, | |
29409 | int migratetype) | |
29410 | { | |
29411 | unsigned long nr_scanned; | |
29412 | - spin_lock(&zone->lock); | |
29413 | + unsigned long flags; | |
29414 | + | |
29415 | + spin_lock_irqsave(&zone->lock, flags); | |
29416 | nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); | |
29417 | if (nr_scanned) | |
29418 | __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); | |
29419 | @@ -868,7 +906,7 @@ static void free_one_page(struct zone *zone, | |
29420 | migratetype = get_pfnblock_migratetype(page, pfn); | |
29421 | } | |
29422 | __free_one_page(page, pfn, zone, order, migratetype); | |
29423 | - spin_unlock(&zone->lock); | |
29424 | + spin_unlock_irqrestore(&zone->lock, flags); | |
29425 | } | |
29426 | ||
29427 | static int free_tail_pages_check(struct page *head_page, struct page *page) | |
29428 | @@ -1019,10 +1057,10 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |
29429 | return; | |
29430 | ||
29431 | migratetype = get_pfnblock_migratetype(page, pfn); | |
29432 | - local_irq_save(flags); | |
29433 | + local_lock_irqsave(pa_lock, flags); | |
29434 | __count_vm_events(PGFREE, 1 << order); | |
29435 | free_one_page(page_zone(page), page, pfn, order, migratetype); | |
29436 | - local_irq_restore(flags); | |
29437 | + local_unlock_irqrestore(pa_lock, flags); | |
29438 | } | |
29439 | ||
29440 | static void __init __free_pages_boot_core(struct page *page, | |
29441 | @@ -1879,16 +1917,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |
29442 | void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |
29443 | { | |
29444 | unsigned long flags; | |
29445 | + LIST_HEAD(dst); | |
29446 | int to_drain, batch; | |
29447 | ||
29448 | - local_irq_save(flags); | |
29449 | + local_lock_irqsave(pa_lock, flags); | |
29450 | batch = READ_ONCE(pcp->batch); | |
29451 | to_drain = min(pcp->count, batch); | |
29452 | if (to_drain > 0) { | |
29453 | - free_pcppages_bulk(zone, to_drain, pcp); | |
29454 | + isolate_pcp_pages(to_drain, pcp, &dst); | |
29455 | pcp->count -= to_drain; | |
29456 | } | |
29457 | - local_irq_restore(flags); | |
29458 | + local_unlock_irqrestore(pa_lock, flags); | |
29459 | + free_pcppages_bulk(zone, to_drain, &dst); | |
29460 | } | |
29461 | #endif | |
29462 | ||
29463 | @@ -1904,16 +1944,21 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone) | |
29464 | unsigned long flags; | |
29465 | struct per_cpu_pageset *pset; | |
29466 | struct per_cpu_pages *pcp; | |
29467 | + LIST_HEAD(dst); | |
29468 | + int count; | |
29469 | ||
29470 | - local_irq_save(flags); | |
29471 | + cpu_lock_irqsave(cpu, flags); | |
29472 | pset = per_cpu_ptr(zone->pageset, cpu); | |
29473 | ||
29474 | pcp = &pset->pcp; | |
29475 | - if (pcp->count) { | |
29476 | - free_pcppages_bulk(zone, pcp->count, pcp); | |
29477 | + count = pcp->count; | |
29478 | + if (count) { | |
29479 | + isolate_pcp_pages(count, pcp, &dst); | |
29480 | pcp->count = 0; | |
29481 | } | |
29482 | - local_irq_restore(flags); | |
29483 | + cpu_unlock_irqrestore(cpu, flags); | |
29484 | + if (count) | |
29485 | + free_pcppages_bulk(zone, count, &dst); | |
29486 | } | |
29487 | ||
29488 | /* | |
29489 | @@ -1999,8 +2044,17 @@ void drain_all_pages(struct zone *zone) | |
29490 | else | |
29491 | cpumask_clear_cpu(cpu, &cpus_with_pcps); | |
29492 | } | |
29493 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
29494 | on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages, | |
29495 | zone, 1); | |
29496 | +#else | |
29497 | + for_each_cpu(cpu, &cpus_with_pcps) { | |
29498 | + if (zone) | |
29499 | + drain_pages_zone(cpu, zone); | |
29500 | + else | |
29501 | + drain_pages(cpu); | |
29502 | + } | |
29503 | +#endif | |
29504 | } | |
29505 | ||
29506 | #ifdef CONFIG_HIBERNATION | |
29507 | @@ -2056,7 +2110,7 @@ void free_hot_cold_page(struct page *page, bool cold) | |
29508 | ||
29509 | migratetype = get_pfnblock_migratetype(page, pfn); | |
29510 | set_pcppage_migratetype(page, migratetype); | |
29511 | - local_irq_save(flags); | |
29512 | + local_lock_irqsave(pa_lock, flags); | |
29513 | __count_vm_event(PGFREE); | |
29514 | ||
29515 | /* | |
29516 | @@ -2082,12 +2136,17 @@ void free_hot_cold_page(struct page *page, bool cold) | |
29517 | pcp->count++; | |
29518 | if (pcp->count >= pcp->high) { | |
29519 | unsigned long batch = READ_ONCE(pcp->batch); | |
29520 | - free_pcppages_bulk(zone, batch, pcp); | |
29521 | + LIST_HEAD(dst); | |
29522 | + | |
29523 | + isolate_pcp_pages(batch, pcp, &dst); | |
29524 | pcp->count -= batch; | |
29525 | + local_unlock_irqrestore(pa_lock, flags); | |
29526 | + free_pcppages_bulk(zone, batch, &dst); | |
29527 | + return; | |
29528 | } | |
29529 | ||
29530 | out: | |
29531 | - local_irq_restore(flags); | |
29532 | + local_unlock_irqrestore(pa_lock, flags); | |
29533 | } | |
29534 | ||
29535 | /* | |
29536 | @@ -2222,7 +2281,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
29537 | struct per_cpu_pages *pcp; | |
29538 | struct list_head *list; | |
29539 | ||
29540 | - local_irq_save(flags); | |
29541 | + local_lock_irqsave(pa_lock, flags); | |
29542 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | |
29543 | list = &pcp->lists[migratetype]; | |
29544 | if (list_empty(list)) { | |
29545 | @@ -2254,7 +2313,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
29546 | */ | |
29547 | WARN_ON_ONCE(order > 1); | |
29548 | } | |
29549 | - spin_lock_irqsave(&zone->lock, flags); | |
29550 | + local_spin_lock_irqsave(pa_lock, &zone->lock, flags); | |
29551 | ||
29552 | page = NULL; | |
29553 | if (alloc_flags & ALLOC_HARDER) { | |
29554 | @@ -2264,11 +2323,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
29555 | } | |
29556 | if (!page) | |
29557 | page = __rmqueue(zone, order, migratetype, gfp_flags); | |
29558 | - spin_unlock(&zone->lock); | |
29559 | - if (!page) | |
29560 | + if (!page) { | |
29561 | + spin_unlock(&zone->lock); | |
29562 | goto failed; | |
29563 | + } | |
29564 | __mod_zone_freepage_state(zone, -(1 << order), | |
29565 | get_pcppage_migratetype(page)); | |
29566 | + spin_unlock(&zone->lock); | |
29567 | } | |
29568 | ||
29569 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | |
29570 | @@ -2278,13 +2339,13 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |
29571 | ||
29572 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | |
29573 | zone_statistics(preferred_zone, zone, gfp_flags); | |
29574 | - local_irq_restore(flags); | |
29575 | + local_unlock_irqrestore(pa_lock, flags); | |
29576 | ||
29577 | VM_BUG_ON_PAGE(bad_range(zone, page), page); | |
29578 | return page; | |
29579 | ||
29580 | failed: | |
29581 | - local_irq_restore(flags); | |
29582 | + local_unlock_irqrestore(pa_lock, flags); | |
29583 | return NULL; | |
29584 | } | |
29585 | ||
29586 | @@ -5950,6 +6011,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, | |
29587 | void __init page_alloc_init(void) | |
29588 | { | |
29589 | hotcpu_notifier(page_alloc_cpu_notify, 0); | |
29590 | + local_irq_lock_init(pa_lock); | |
29591 | } | |
29592 | ||
29593 | /* | |
29594 | @@ -6844,7 +6906,7 @@ void zone_pcp_reset(struct zone *zone) | |
29595 | struct per_cpu_pageset *pset; | |
29596 | ||
29597 | /* avoid races with drain_pages() */ | |
29598 | - local_irq_save(flags); | |
29599 | + local_lock_irqsave(pa_lock, flags); | |
29600 | if (zone->pageset != &boot_pageset) { | |
29601 | for_each_online_cpu(cpu) { | |
29602 | pset = per_cpu_ptr(zone->pageset, cpu); | |
29603 | @@ -6853,7 +6915,7 @@ void zone_pcp_reset(struct zone *zone) | |
29604 | free_percpu(zone->pageset); | |
29605 | zone->pageset = &boot_pageset; | |
29606 | } | |
29607 | - local_irq_restore(flags); | |
29608 | + local_unlock_irqrestore(pa_lock, flags); | |
29609 | } | |
29610 | ||
29611 | #ifdef CONFIG_MEMORY_HOTREMOVE | |
29612 | diff --git a/mm/slab.h b/mm/slab.h | |
29613 | index 7b6087197997..afdc57941179 100644 | |
29614 | --- a/mm/slab.h | |
29615 | +++ b/mm/slab.h | |
29616 | @@ -324,7 +324,11 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) | |
29617 | * The slab lists for all objects. | |
29618 | */ | |
29619 | struct kmem_cache_node { | |
29620 | +#ifdef CONFIG_SLUB | |
29621 | + raw_spinlock_t list_lock; | |
29622 | +#else | |
29623 | spinlock_t list_lock; | |
29624 | +#endif | |
29625 | ||
29626 | #ifdef CONFIG_SLAB | |
29627 | struct list_head slabs_partial; /* partial list first, better asm code */ | |
29628 | diff --git a/mm/slub.c b/mm/slub.c | |
29629 | index 65d5f92d51d2..feb4a445a546 100644 | |
29630 | --- a/mm/slub.c | |
29631 | +++ b/mm/slub.c | |
29632 | @@ -1075,7 +1075,7 @@ static noinline struct kmem_cache_node *free_debug_processing( | |
29633 | void *object = head; | |
29634 | int cnt = 0; | |
29635 | ||
29636 | - spin_lock_irqsave(&n->list_lock, *flags); | |
29637 | + raw_spin_lock_irqsave(&n->list_lock, *flags); | |
29638 | slab_lock(page); | |
29639 | ||
29640 | if (!check_slab(s, page)) | |
29641 | @@ -1136,7 +1136,7 @@ out: | |
29642 | ||
29643 | fail: | |
29644 | slab_unlock(page); | |
29645 | - spin_unlock_irqrestore(&n->list_lock, *flags); | |
29646 | + raw_spin_unlock_irqrestore(&n->list_lock, *flags); | |
29647 | slab_fix(s, "Object at 0x%p not freed", object); | |
29648 | return NULL; | |
29649 | } | |
29650 | @@ -1263,6 +1263,12 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, | |
29651 | ||
29652 | #endif /* CONFIG_SLUB_DEBUG */ | |
29653 | ||
29654 | +struct slub_free_list { | |
29655 | + raw_spinlock_t lock; | |
29656 | + struct list_head list; | |
29657 | +}; | |
29658 | +static DEFINE_PER_CPU(struct slub_free_list, slub_free_list); | |
29659 | + | |
29660 | /* | |
29661 | * Hooks for other subsystems that check memory allocations. In a typical | |
29662 | * production configuration these hooks all should produce no code at all. | |
29663 | @@ -1399,10 +1405,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |
29664 | gfp_t alloc_gfp; | |
29665 | void *start, *p; | |
29666 | int idx, order; | |
29667 | + bool enableirqs = false; | |
29668 | ||
29669 | flags &= gfp_allowed_mask; | |
29670 | ||
29671 | if (gfpflags_allow_blocking(flags)) | |
29672 | + enableirqs = true; | |
29673 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
29674 | + if (system_state == SYSTEM_RUNNING) | |
29675 | + enableirqs = true; | |
29676 | +#endif | |
29677 | + if (enableirqs) | |
29678 | local_irq_enable(); | |
29679 | ||
29680 | flags |= s->allocflags; | |
29681 | @@ -1473,7 +1486,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |
29682 | page->frozen = 1; | |
29683 | ||
29684 | out: | |
29685 | - if (gfpflags_allow_blocking(flags)) | |
29686 | + if (enableirqs) | |
29687 | local_irq_disable(); | |
29688 | if (!page) | |
29689 | return NULL; | |
29690 | @@ -1529,6 +1542,16 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |
29691 | __free_kmem_pages(page, order); | |
29692 | } | |
29693 | ||
29694 | +static void free_delayed(struct list_head *h) | |
29695 | +{ | |
29696 | + while(!list_empty(h)) { | |
29697 | + struct page *page = list_first_entry(h, struct page, lru); | |
29698 | + | |
29699 | + list_del(&page->lru); | |
29700 | + __free_slab(page->slab_cache, page); | |
29701 | + } | |
29702 | +} | |
29703 | + | |
29704 | #define need_reserve_slab_rcu \ | |
29705 | (sizeof(((struct page *)NULL)->lru) < sizeof(struct rcu_head)) | |
29706 | ||
29707 | @@ -1560,6 +1583,12 @@ static void free_slab(struct kmem_cache *s, struct page *page) | |
29708 | } | |
29709 | ||
29710 | call_rcu(head, rcu_free_slab); | |
29711 | + } else if (irqs_disabled()) { | |
29712 | + struct slub_free_list *f = this_cpu_ptr(&slub_free_list); | |
29713 | + | |
29714 | + raw_spin_lock(&f->lock); | |
29715 | + list_add(&page->lru, &f->list); | |
29716 | + raw_spin_unlock(&f->lock); | |
29717 | } else | |
29718 | __free_slab(s, page); | |
29719 | } | |
29720 | @@ -1673,7 +1702,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |
29721 | if (!n || !n->nr_partial) | |
29722 | return NULL; | |
29723 | ||
29724 | - spin_lock(&n->list_lock); | |
29725 | + raw_spin_lock(&n->list_lock); | |
29726 | list_for_each_entry_safe(page, page2, &n->partial, lru) { | |
29727 | void *t; | |
29728 | ||
29729 | @@ -1698,7 +1727,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, | |
29730 | break; | |
29731 | ||
29732 | } | |
29733 | - spin_unlock(&n->list_lock); | |
29734 | + raw_spin_unlock(&n->list_lock); | |
29735 | return object; | |
29736 | } | |
29737 | ||
29738 | @@ -1944,7 +1973,7 @@ redo: | |
29739 | * that acquire_slab() will see a slab page that | |
29740 | * is frozen | |
29741 | */ | |
29742 | - spin_lock(&n->list_lock); | |
29743 | + raw_spin_lock(&n->list_lock); | |
29744 | } | |
29745 | } else { | |
29746 | m = M_FULL; | |
29747 | @@ -1955,7 +1984,7 @@ redo: | |
29748 | * slabs from diagnostic functions will not see | |
29749 | * any frozen slabs. | |
29750 | */ | |
29751 | - spin_lock(&n->list_lock); | |
29752 | + raw_spin_lock(&n->list_lock); | |
29753 | } | |
29754 | } | |
29755 | ||
29756 | @@ -1990,7 +2019,7 @@ redo: | |
29757 | goto redo; | |
29758 | ||
29759 | if (lock) | |
29760 | - spin_unlock(&n->list_lock); | |
29761 | + raw_spin_unlock(&n->list_lock); | |
29762 | ||
29763 | if (m == M_FREE) { | |
29764 | stat(s, DEACTIVATE_EMPTY); | |
29765 | @@ -2022,10 +2051,10 @@ static void unfreeze_partials(struct kmem_cache *s, | |
29766 | n2 = get_node(s, page_to_nid(page)); | |
29767 | if (n != n2) { | |
29768 | if (n) | |
29769 | - spin_unlock(&n->list_lock); | |
29770 | + raw_spin_unlock(&n->list_lock); | |
29771 | ||
29772 | n = n2; | |
29773 | - spin_lock(&n->list_lock); | |
29774 | + raw_spin_lock(&n->list_lock); | |
29775 | } | |
29776 | ||
29777 | do { | |
29778 | @@ -2054,7 +2083,7 @@ static void unfreeze_partials(struct kmem_cache *s, | |
29779 | } | |
29780 | ||
29781 | if (n) | |
29782 | - spin_unlock(&n->list_lock); | |
29783 | + raw_spin_unlock(&n->list_lock); | |
29784 | ||
29785 | while (discard_page) { | |
29786 | page = discard_page; | |
29787 | @@ -2093,14 +2122,21 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) | |
29788 | pobjects = oldpage->pobjects; | |
29789 | pages = oldpage->pages; | |
29790 | if (drain && pobjects > s->cpu_partial) { | |
29791 | + struct slub_free_list *f; | |
29792 | unsigned long flags; | |
29793 | + LIST_HEAD(tofree); | |
29794 | /* | |
29795 | * partial array is full. Move the existing | |
29796 | * set to the per node partial list. | |
29797 | */ | |
29798 | local_irq_save(flags); | |
29799 | unfreeze_partials(s, this_cpu_ptr(s->cpu_slab)); | |
29800 | + f = this_cpu_ptr(&slub_free_list); | |
29801 | + raw_spin_lock(&f->lock); | |
29802 | + list_splice_init(&f->list, &tofree); | |
29803 | + raw_spin_unlock(&f->lock); | |
29804 | local_irq_restore(flags); | |
29805 | + free_delayed(&tofree); | |
29806 | oldpage = NULL; | |
29807 | pobjects = 0; | |
29808 | pages = 0; | |
29809 | @@ -2172,7 +2208,22 @@ static bool has_cpu_slab(int cpu, void *info) | |
29810 | ||
29811 | static void flush_all(struct kmem_cache *s) | |
29812 | { | |
29813 | + LIST_HEAD(tofree); | |
29814 | + int cpu; | |
29815 | + | |
29816 | on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC); | |
29817 | + for_each_online_cpu(cpu) { | |
29818 | + struct slub_free_list *f; | |
29819 | + | |
29820 | + if (!has_cpu_slab(cpu, s)) | |
29821 | + continue; | |
29822 | + | |
29823 | + f = &per_cpu(slub_free_list, cpu); | |
29824 | + raw_spin_lock_irq(&f->lock); | |
29825 | + list_splice_init(&f->list, &tofree); | |
29826 | + raw_spin_unlock_irq(&f->lock); | |
29827 | + free_delayed(&tofree); | |
29828 | + } | |
29829 | } | |
29830 | ||
29831 | /* | |
29832 | @@ -2208,10 +2259,10 @@ static unsigned long count_partial(struct kmem_cache_node *n, | |
29833 | unsigned long x = 0; | |
29834 | struct page *page; | |
29835 | ||
29836 | - spin_lock_irqsave(&n->list_lock, flags); | |
29837 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
29838 | list_for_each_entry(page, &n->partial, lru) | |
29839 | x += get_count(page); | |
29840 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
29841 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
29842 | return x; | |
29843 | } | |
29844 | #endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ | |
29845 | @@ -2349,8 +2400,10 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) | |
29846 | * already disabled (which is the case for bulk allocation). | |
29847 | */ | |
29848 | static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
29849 | - unsigned long addr, struct kmem_cache_cpu *c) | |
29850 | + unsigned long addr, struct kmem_cache_cpu *c, | |
29851 | + struct list_head *to_free) | |
29852 | { | |
29853 | + struct slub_free_list *f; | |
29854 | void *freelist; | |
29855 | struct page *page; | |
29856 | ||
29857 | @@ -2410,6 +2463,13 @@ load_freelist: | |
29858 | VM_BUG_ON(!c->page->frozen); | |
29859 | c->freelist = get_freepointer(s, freelist); | |
29860 | c->tid = next_tid(c->tid); | |
29861 | + | |
29862 | +out: | |
29863 | + f = this_cpu_ptr(&slub_free_list); | |
29864 | + raw_spin_lock(&f->lock); | |
29865 | + list_splice_init(&f->list, to_free); | |
29866 | + raw_spin_unlock(&f->lock); | |
29867 | + | |
29868 | return freelist; | |
29869 | ||
29870 | new_slab: | |
29871 | @@ -2441,7 +2501,7 @@ new_slab: | |
29872 | deactivate_slab(s, page, get_freepointer(s, freelist)); | |
29873 | c->page = NULL; | |
29874 | c->freelist = NULL; | |
29875 | - return freelist; | |
29876 | + goto out; | |
29877 | } | |
29878 | ||
29879 | /* | |
29880 | @@ -2453,6 +2513,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
29881 | { | |
29882 | void *p; | |
29883 | unsigned long flags; | |
29884 | + LIST_HEAD(tofree); | |
29885 | ||
29886 | local_irq_save(flags); | |
29887 | #ifdef CONFIG_PREEMPT | |
29888 | @@ -2464,8 +2525,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |
29889 | c = this_cpu_ptr(s->cpu_slab); | |
29890 | #endif | |
29891 | ||
29892 | - p = ___slab_alloc(s, gfpflags, node, addr, c); | |
29893 | + p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree); | |
29894 | local_irq_restore(flags); | |
29895 | + free_delayed(&tofree); | |
29896 | return p; | |
29897 | } | |
29898 | ||
29899 | @@ -2652,7 +2714,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
29900 | ||
29901 | do { | |
29902 | if (unlikely(n)) { | |
29903 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
29904 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
29905 | n = NULL; | |
29906 | } | |
29907 | prior = page->freelist; | |
29908 | @@ -2684,7 +2746,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
29909 | * Otherwise the list_lock will synchronize with | |
29910 | * other processors updating the list of slabs. | |
29911 | */ | |
29912 | - spin_lock_irqsave(&n->list_lock, flags); | |
29913 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
29914 | ||
29915 | } | |
29916 | } | |
29917 | @@ -2726,7 +2788,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |
29918 | add_partial(n, page, DEACTIVATE_TO_TAIL); | |
29919 | stat(s, FREE_ADD_PARTIAL); | |
29920 | } | |
29921 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
29922 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
29923 | return; | |
29924 | ||
29925 | slab_empty: | |
29926 | @@ -2741,7 +2803,7 @@ slab_empty: | |
29927 | remove_full(s, n, page); | |
29928 | } | |
29929 | ||
29930 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
29931 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
29932 | stat(s, FREE_SLAB); | |
29933 | discard_slab(s, page); | |
29934 | } | |
29935 | @@ -2913,6 +2975,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
29936 | void **p) | |
29937 | { | |
29938 | struct kmem_cache_cpu *c; | |
29939 | + LIST_HEAD(to_free); | |
29940 | int i; | |
29941 | ||
29942 | /* memcg and kmem_cache debug support */ | |
29943 | @@ -2936,7 +2999,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
29944 | * of re-populating per CPU c->freelist | |
29945 | */ | |
29946 | p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE, | |
29947 | - _RET_IP_, c); | |
29948 | + _RET_IP_, c, &to_free); | |
29949 | if (unlikely(!p[i])) | |
29950 | goto error; | |
29951 | ||
29952 | @@ -2948,6 +3011,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, | |
29953 | } | |
29954 | c->tid = next_tid(c->tid); | |
29955 | local_irq_enable(); | |
29956 | + free_delayed(&to_free); | |
29957 | ||
29958 | /* Clear memory outside IRQ disabled fastpath loop */ | |
29959 | if (unlikely(flags & __GFP_ZERO)) { | |
29960 | @@ -3095,7 +3159,7 @@ static void | |
29961 | init_kmem_cache_node(struct kmem_cache_node *n) | |
29962 | { | |
29963 | n->nr_partial = 0; | |
29964 | - spin_lock_init(&n->list_lock); | |
29965 | + raw_spin_lock_init(&n->list_lock); | |
29966 | INIT_LIST_HEAD(&n->partial); | |
29967 | #ifdef CONFIG_SLUB_DEBUG | |
29968 | atomic_long_set(&n->nr_slabs, 0); | |
29969 | @@ -3677,7 +3741,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |
29970 | for (i = 0; i < SHRINK_PROMOTE_MAX; i++) | |
29971 | INIT_LIST_HEAD(promote + i); | |
29972 | ||
29973 | - spin_lock_irqsave(&n->list_lock, flags); | |
29974 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
29975 | ||
29976 | /* | |
29977 | * Build lists of slabs to discard or promote. | |
29978 | @@ -3708,7 +3772,7 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |
29979 | for (i = SHRINK_PROMOTE_MAX - 1; i >= 0; i--) | |
29980 | list_splice(promote + i, &n->partial); | |
29981 | ||
29982 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
29983 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
29984 | ||
29985 | /* Release empty slabs */ | |
29986 | list_for_each_entry_safe(page, t, &discard, lru) | |
29987 | @@ -3884,6 +3948,12 @@ void __init kmem_cache_init(void) | |
29988 | { | |
29989 | static __initdata struct kmem_cache boot_kmem_cache, | |
29990 | boot_kmem_cache_node; | |
29991 | + int cpu; | |
29992 | + | |
29993 | + for_each_possible_cpu(cpu) { | |
29994 | + raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock); | |
29995 | + INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list); | |
29996 | + } | |
29997 | ||
29998 | if (debug_guardpage_minorder()) | |
29999 | slub_max_order = 0; | |
30000 | @@ -4127,7 +4197,7 @@ static int validate_slab_node(struct kmem_cache *s, | |
30001 | struct page *page; | |
30002 | unsigned long flags; | |
30003 | ||
30004 | - spin_lock_irqsave(&n->list_lock, flags); | |
30005 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
30006 | ||
30007 | list_for_each_entry(page, &n->partial, lru) { | |
30008 | validate_slab_slab(s, page, map); | |
30009 | @@ -4149,7 +4219,7 @@ static int validate_slab_node(struct kmem_cache *s, | |
30010 | s->name, count, atomic_long_read(&n->nr_slabs)); | |
30011 | ||
30012 | out: | |
30013 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
30014 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
30015 | return count; | |
30016 | } | |
30017 | ||
30018 | @@ -4337,12 +4407,12 @@ static int list_locations(struct kmem_cache *s, char *buf, | |
30019 | if (!atomic_long_read(&n->nr_slabs)) | |
30020 | continue; | |
30021 | ||
30022 | - spin_lock_irqsave(&n->list_lock, flags); | |
30023 | + raw_spin_lock_irqsave(&n->list_lock, flags); | |
30024 | list_for_each_entry(page, &n->partial, lru) | |
30025 | process_slab(&t, s, page, alloc, map); | |
30026 | list_for_each_entry(page, &n->full, lru) | |
30027 | process_slab(&t, s, page, alloc, map); | |
30028 | - spin_unlock_irqrestore(&n->list_lock, flags); | |
30029 | + raw_spin_unlock_irqrestore(&n->list_lock, flags); | |
30030 | } | |
30031 | ||
30032 | for (i = 0; i < t.count; i++) { | |
30033 | diff --git a/mm/swap.c b/mm/swap.c | |
30034 | index 39395fb549c0..ad16649221d7 100644 | |
30035 | --- a/mm/swap.c | |
30036 | +++ b/mm/swap.c | |
30037 | @@ -31,6 +31,7 @@ | |
30038 | #include <linux/memcontrol.h> | |
30039 | #include <linux/gfp.h> | |
30040 | #include <linux/uio.h> | |
30041 | +#include <linux/locallock.h> | |
30042 | #include <linux/hugetlb.h> | |
30043 | #include <linux/page_idle.h> | |
30044 | ||
30045 | @@ -46,6 +47,9 @@ static DEFINE_PER_CPU(struct pagevec, lru_add_pvec); | |
30046 | static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs); | |
30047 | static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs); | |
30048 | ||
30049 | +static DEFINE_LOCAL_IRQ_LOCK(rotate_lock); | |
30050 | +DEFINE_LOCAL_IRQ_LOCK(swapvec_lock); | |
30051 | + | |
30052 | /* | |
30053 | * This path almost never happens for VM activity - pages are normally | |
30054 | * freed via pagevecs. But it gets used by networking. | |
30055 | @@ -481,11 +485,11 @@ void rotate_reclaimable_page(struct page *page) | |
30056 | unsigned long flags; | |
30057 | ||
30058 | page_cache_get(page); | |
30059 | - local_irq_save(flags); | |
30060 | + local_lock_irqsave(rotate_lock, flags); | |
30061 | pvec = this_cpu_ptr(&lru_rotate_pvecs); | |
30062 | if (!pagevec_add(pvec, page)) | |
30063 | pagevec_move_tail(pvec); | |
30064 | - local_irq_restore(flags); | |
30065 | + local_unlock_irqrestore(rotate_lock, flags); | |
30066 | } | |
30067 | } | |
30068 | ||
30069 | @@ -536,12 +540,13 @@ static bool need_activate_page_drain(int cpu) | |
30070 | void activate_page(struct page *page) | |
30071 | { | |
30072 | if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { | |
30073 | - struct pagevec *pvec = &get_cpu_var(activate_page_pvecs); | |
30074 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
30075 | + activate_page_pvecs); | |
30076 | ||
30077 | page_cache_get(page); | |
30078 | if (!pagevec_add(pvec, page)) | |
30079 | pagevec_lru_move_fn(pvec, __activate_page, NULL); | |
30080 | - put_cpu_var(activate_page_pvecs); | |
30081 | + put_locked_var(swapvec_lock, activate_page_pvecs); | |
30082 | } | |
30083 | } | |
30084 | ||
30085 | @@ -567,7 +572,7 @@ void activate_page(struct page *page) | |
30086 | ||
30087 | static void __lru_cache_activate_page(struct page *page) | |
30088 | { | |
30089 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
30090 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
30091 | int i; | |
30092 | ||
30093 | /* | |
30094 | @@ -589,7 +594,7 @@ static void __lru_cache_activate_page(struct page *page) | |
30095 | } | |
30096 | } | |
30097 | ||
30098 | - put_cpu_var(lru_add_pvec); | |
30099 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
30100 | } | |
30101 | ||
30102 | /* | |
30103 | @@ -630,13 +635,13 @@ EXPORT_SYMBOL(mark_page_accessed); | |
30104 | ||
30105 | static void __lru_cache_add(struct page *page) | |
30106 | { | |
30107 | - struct pagevec *pvec = &get_cpu_var(lru_add_pvec); | |
30108 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec); | |
30109 | ||
30110 | page_cache_get(page); | |
30111 | if (!pagevec_space(pvec)) | |
30112 | __pagevec_lru_add(pvec); | |
30113 | pagevec_add(pvec, page); | |
30114 | - put_cpu_var(lru_add_pvec); | |
30115 | + put_locked_var(swapvec_lock, lru_add_pvec); | |
30116 | } | |
30117 | ||
30118 | /** | |
30119 | @@ -816,9 +821,15 @@ void lru_add_drain_cpu(int cpu) | |
30120 | unsigned long flags; | |
30121 | ||
30122 | /* No harm done if a racing interrupt already did this */ | |
30123 | - local_irq_save(flags); | |
30124 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
30125 | + local_lock_irqsave_on(rotate_lock, flags, cpu); | |
30126 | + pagevec_move_tail(pvec); | |
30127 | + local_unlock_irqrestore_on(rotate_lock, flags, cpu); | |
30128 | +#else | |
30129 | + local_lock_irqsave(rotate_lock, flags); | |
30130 | pagevec_move_tail(pvec); | |
30131 | - local_irq_restore(flags); | |
30132 | + local_unlock_irqrestore(rotate_lock, flags); | |
30133 | +#endif | |
30134 | } | |
30135 | ||
30136 | pvec = &per_cpu(lru_deactivate_file_pvecs, cpu); | |
30137 | @@ -846,26 +857,47 @@ void deactivate_file_page(struct page *page) | |
30138 | return; | |
30139 | ||
30140 | if (likely(get_page_unless_zero(page))) { | |
30141 | - struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs); | |
30142 | + struct pagevec *pvec = &get_locked_var(swapvec_lock, | |
30143 | + lru_deactivate_file_pvecs); | |
30144 | ||
30145 | if (!pagevec_add(pvec, page)) | |
30146 | pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL); | |
30147 | - put_cpu_var(lru_deactivate_file_pvecs); | |
30148 | + put_locked_var(swapvec_lock, lru_deactivate_file_pvecs); | |
30149 | } | |
30150 | } | |
30151 | ||
30152 | void lru_add_drain(void) | |
30153 | { | |
30154 | - lru_add_drain_cpu(get_cpu()); | |
30155 | - put_cpu(); | |
30156 | + lru_add_drain_cpu(local_lock_cpu(swapvec_lock)); | |
30157 | + local_unlock_cpu(swapvec_lock); | |
30158 | } | |
30159 | ||
30160 | + | |
30161 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
30162 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
30163 | +{ | |
30164 | + local_lock_on(swapvec_lock, cpu); | |
30165 | + lru_add_drain_cpu(cpu); | |
30166 | + local_unlock_on(swapvec_lock, cpu); | |
30167 | +} | |
30168 | + | |
30169 | +#else | |
30170 | + | |
30171 | static void lru_add_drain_per_cpu(struct work_struct *dummy) | |
30172 | { | |
30173 | lru_add_drain(); | |
30174 | } | |
30175 | ||
30176 | static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work); | |
30177 | +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work) | |
30178 | +{ | |
30179 | + struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
30180 | + | |
30181 | + INIT_WORK(work, lru_add_drain_per_cpu); | |
30182 | + schedule_work_on(cpu, work); | |
30183 | + cpumask_set_cpu(cpu, has_work); | |
30184 | +} | |
30185 | +#endif | |
30186 | ||
30187 | void lru_add_drain_all(void) | |
30188 | { | |
30189 | @@ -878,20 +910,17 @@ void lru_add_drain_all(void) | |
30190 | cpumask_clear(&has_work); | |
30191 | ||
30192 | for_each_online_cpu(cpu) { | |
30193 | - struct work_struct *work = &per_cpu(lru_add_drain_work, cpu); | |
30194 | - | |
30195 | if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) || | |
30196 | pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) || | |
30197 | pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) || | |
30198 | - need_activate_page_drain(cpu)) { | |
30199 | - INIT_WORK(work, lru_add_drain_per_cpu); | |
30200 | - schedule_work_on(cpu, work); | |
30201 | - cpumask_set_cpu(cpu, &has_work); | |
30202 | - } | |
30203 | + need_activate_page_drain(cpu)) | |
30204 | + remote_lru_add_drain(cpu, &has_work); | |
30205 | } | |
30206 | ||
30207 | +#ifndef CONFIG_PREEMPT_RT_BASE | |
30208 | for_each_cpu(cpu, &has_work) | |
30209 | flush_work(&per_cpu(lru_add_drain_work, cpu)); | |
30210 | +#endif | |
30211 | ||
30212 | put_online_cpus(); | |
30213 | mutex_unlock(&lock); | |
30214 | diff --git a/mm/truncate.c b/mm/truncate.c | |
30215 | index 76e35ad97102..5f196420020c 100644 | |
30216 | --- a/mm/truncate.c | |
30217 | +++ b/mm/truncate.c | |
30218 | @@ -56,8 +56,11 @@ static void clear_exceptional_entry(struct address_space *mapping, | |
30219 | * protected by mapping->tree_lock. | |
30220 | */ | |
30221 | if (!workingset_node_shadows(node) && | |
30222 | - !list_empty(&node->private_list)) | |
30223 | - list_lru_del(&workingset_shadow_nodes, &node->private_list); | |
30224 | + !list_empty(&node->private_list)) { | |
30225 | + local_lock(workingset_shadow_lock); | |
30226 | + list_lru_del(&__workingset_shadow_nodes, &node->private_list); | |
30227 | + local_unlock(workingset_shadow_lock); | |
30228 | + } | |
30229 | __radix_tree_delete_node(&mapping->page_tree, node); | |
30230 | unlock: | |
30231 | spin_unlock_irq(&mapping->tree_lock); | |
30232 | diff --git a/mm/vmalloc.c b/mm/vmalloc.c | |
30233 | index 8e3c9c5a3042..68740314ad54 100644 | |
30234 | --- a/mm/vmalloc.c | |
30235 | +++ b/mm/vmalloc.c | |
30236 | @@ -821,7 +821,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
30237 | struct vmap_block *vb; | |
30238 | struct vmap_area *va; | |
30239 | unsigned long vb_idx; | |
30240 | - int node, err; | |
30241 | + int node, err, cpu; | |
30242 | void *vaddr; | |
30243 | ||
30244 | node = numa_node_id(); | |
30245 | @@ -864,11 +864,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) | |
30246 | BUG_ON(err); | |
30247 | radix_tree_preload_end(); | |
30248 | ||
30249 | - vbq = &get_cpu_var(vmap_block_queue); | |
30250 | + cpu = get_cpu_light(); | |
30251 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
30252 | spin_lock(&vbq->lock); | |
30253 | list_add_tail_rcu(&vb->free_list, &vbq->free); | |
30254 | spin_unlock(&vbq->lock); | |
30255 | - put_cpu_var(vmap_block_queue); | |
30256 | + put_cpu_light(); | |
30257 | ||
30258 | return vaddr; | |
30259 | } | |
30260 | @@ -937,6 +938,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
30261 | struct vmap_block *vb; | |
30262 | void *vaddr = NULL; | |
30263 | unsigned int order; | |
30264 | + int cpu; | |
30265 | ||
30266 | BUG_ON(offset_in_page(size)); | |
30267 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | |
30268 | @@ -951,7 +953,8 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
30269 | order = get_order(size); | |
30270 | ||
30271 | rcu_read_lock(); | |
30272 | - vbq = &get_cpu_var(vmap_block_queue); | |
30273 | + cpu = get_cpu_light(); | |
30274 | + vbq = this_cpu_ptr(&vmap_block_queue); | |
30275 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | |
30276 | unsigned long pages_off; | |
30277 | ||
30278 | @@ -974,7 +977,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | |
30279 | break; | |
30280 | } | |
30281 | ||
30282 | - put_cpu_var(vmap_block_queue); | |
30283 | + put_cpu_light(); | |
30284 | rcu_read_unlock(); | |
30285 | ||
30286 | /* Allocate new block if nothing was found */ | |
30287 | diff --git a/mm/vmstat.c b/mm/vmstat.c | |
30288 | index c54fd2924f25..64416fd7c209 100644 | |
30289 | --- a/mm/vmstat.c | |
30290 | +++ b/mm/vmstat.c | |
30291 | @@ -226,6 +226,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
30292 | long x; | |
30293 | long t; | |
30294 | ||
30295 | + preempt_disable_rt(); | |
30296 | x = delta + __this_cpu_read(*p); | |
30297 | ||
30298 | t = __this_cpu_read(pcp->stat_threshold); | |
30299 | @@ -235,6 +236,7 @@ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
30300 | x = 0; | |
30301 | } | |
30302 | __this_cpu_write(*p, x); | |
30303 | + preempt_enable_rt(); | |
30304 | } | |
30305 | EXPORT_SYMBOL(__mod_zone_page_state); | |
30306 | ||
30307 | @@ -267,6 +269,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
30308 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
30309 | s8 v, t; | |
30310 | ||
30311 | + preempt_disable_rt(); | |
30312 | v = __this_cpu_inc_return(*p); | |
30313 | t = __this_cpu_read(pcp->stat_threshold); | |
30314 | if (unlikely(v > t)) { | |
30315 | @@ -275,6 +278,7 @@ void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | |
30316 | zone_page_state_add(v + overstep, zone, item); | |
30317 | __this_cpu_write(*p, -overstep); | |
30318 | } | |
30319 | + preempt_enable_rt(); | |
30320 | } | |
30321 | ||
30322 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
30323 | @@ -289,6 +293,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
30324 | s8 __percpu *p = pcp->vm_stat_diff + item; | |
30325 | s8 v, t; | |
30326 | ||
30327 | + preempt_disable_rt(); | |
30328 | v = __this_cpu_dec_return(*p); | |
30329 | t = __this_cpu_read(pcp->stat_threshold); | |
30330 | if (unlikely(v < - t)) { | |
30331 | @@ -297,6 +302,7 @@ void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | |
30332 | zone_page_state_add(v - overstep, zone, item); | |
30333 | __this_cpu_write(*p, overstep); | |
30334 | } | |
30335 | + preempt_enable_rt(); | |
30336 | } | |
30337 | ||
30338 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
30339 | diff --git a/mm/workingset.c b/mm/workingset.c | |
cb95d48a | 30340 | index df66f426fdcf..6db7b243fa0d 100644 |
b4de310e JK |
30341 | --- a/mm/workingset.c |
30342 | +++ b/mm/workingset.c | |
30343 | @@ -264,7 +264,8 @@ void workingset_activation(struct page *page) | |
30344 | * point where they would still be useful. | |
30345 | */ | |
30346 | ||
30347 | -struct list_lru workingset_shadow_nodes; | |
30348 | +struct list_lru __workingset_shadow_nodes; | |
30349 | +DEFINE_LOCAL_IRQ_LOCK(workingset_shadow_lock); | |
30350 | ||
30351 | static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
30352 | struct shrink_control *sc) | |
30353 | @@ -274,9 +275,9 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker, | |
30354 | unsigned long pages; | |
30355 | ||
30356 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
30357 | - local_irq_disable(); | |
30358 | - shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc); | |
30359 | - local_irq_enable(); | |
30360 | + local_lock_irq(workingset_shadow_lock); | |
30361 | + shadow_nodes = list_lru_shrink_count(&__workingset_shadow_nodes, sc); | |
30362 | + local_unlock_irq(workingset_shadow_lock); | |
30363 | ||
30364 | pages = node_present_pages(sc->nid); | |
30365 | /* | |
cb95d48a | 30366 | @@ -361,9 +362,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, |
b4de310e JK |
30367 | spin_unlock(&mapping->tree_lock); |
30368 | ret = LRU_REMOVED_RETRY; | |
30369 | out: | |
30370 | - local_irq_enable(); | |
30371 | + local_unlock_irq(workingset_shadow_lock); | |
30372 | cond_resched(); | |
30373 | - local_irq_disable(); | |
30374 | + local_lock_irq(workingset_shadow_lock); | |
30375 | spin_lock(lru_lock); | |
30376 | return ret; | |
30377 | } | |
cb95d48a | 30378 | @@ -374,10 +375,10 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker, |
b4de310e JK |
30379 | unsigned long ret; |
30380 | ||
30381 | /* list_lru lock nests inside IRQ-safe mapping->tree_lock */ | |
30382 | - local_irq_disable(); | |
30383 | - ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc, | |
30384 | + local_lock_irq(workingset_shadow_lock); | |
30385 | + ret = list_lru_shrink_walk(&__workingset_shadow_nodes, sc, | |
30386 | shadow_lru_isolate, NULL); | |
30387 | - local_irq_enable(); | |
30388 | + local_unlock_irq(workingset_shadow_lock); | |
30389 | return ret; | |
30390 | } | |
30391 | ||
cb95d48a | 30392 | @@ -398,7 +399,7 @@ static int __init workingset_init(void) |
b4de310e JK |
30393 | { |
30394 | int ret; | |
30395 | ||
30396 | - ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); | |
30397 | + ret = list_lru_init_key(&__workingset_shadow_nodes, &shadow_nodes_key); | |
30398 | if (ret) | |
30399 | goto err; | |
30400 | ret = register_shrinker(&workingset_shadow_shrinker); | |
cb95d48a | 30401 | @@ -406,7 +407,7 @@ static int __init workingset_init(void) |
b4de310e JK |
30402 | goto err_list_lru; |
30403 | return 0; | |
30404 | err_list_lru: | |
30405 | - list_lru_destroy(&workingset_shadow_nodes); | |
30406 | + list_lru_destroy(&__workingset_shadow_nodes); | |
30407 | err: | |
30408 | return ret; | |
30409 | } | |
30410 | diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c | |
cb95d48a | 30411 | index c1ea19478119..529552c3716d 100644 |
b4de310e JK |
30412 | --- a/mm/zsmalloc.c |
30413 | +++ b/mm/zsmalloc.c | |
cb95d48a JK |
30414 | @@ -64,6 +64,7 @@ |
30415 | #include <linux/debugfs.h> | |
30416 | #include <linux/zsmalloc.h> | |
30417 | #include <linux/zpool.h> | |
30418 | +#include <linux/locallock.h> | |
30419 | ||
30420 | /* | |
30421 | * This must be power of 2 and greater than of equal to sizeof(link_free). | |
30422 | @@ -403,6 +404,7 @@ static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage) | |
30423 | ||
30424 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ | |
30425 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); | |
30426 | +static DEFINE_LOCAL_IRQ_LOCK(zs_map_area_lock); | |
30427 | ||
30428 | static int is_first_page(struct page *page) | |
30429 | { | |
30430 | @@ -1289,7 +1291,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |
b4de310e JK |
30431 | class = pool->size_class[class_idx]; |
30432 | off = obj_idx_to_offset(page, obj_idx, class->size); | |
30433 | ||
30434 | - area = &get_cpu_var(zs_map_area); | |
cb95d48a | 30435 | + area = &get_locked_var(zs_map_area_lock, zs_map_area); |
b4de310e JK |
30436 | area->vm_mm = mm; |
30437 | if (off + class->size <= PAGE_SIZE) { | |
30438 | /* this object is contained entirely within a page */ | |
cb95d48a | 30439 | @@ -1342,7 +1344,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) |
b4de310e JK |
30440 | |
30441 | __zs_unmap_object(area, pages, off, class->size); | |
30442 | } | |
30443 | - put_cpu_var(zs_map_area); | |
cb95d48a | 30444 | + put_locked_var(zs_map_area_lock, zs_map_area); |
b4de310e JK |
30445 | unpin_tag(handle); |
30446 | } | |
30447 | EXPORT_SYMBOL_GPL(zs_unmap_object); | |
30448 | diff --git a/net/core/dev.c b/net/core/dev.c | |
cb95d48a | 30449 | index 0989fea88c44..4d5f550f01f5 100644 |
b4de310e JK |
30450 | --- a/net/core/dev.c |
30451 | +++ b/net/core/dev.c | |
30452 | @@ -186,6 +186,7 @@ static unsigned int napi_gen_id; | |
30453 | static DEFINE_HASHTABLE(napi_hash, 8); | |
30454 | ||
30455 | static seqcount_t devnet_rename_seq; | |
30456 | +static DEFINE_MUTEX(devnet_rename_mutex); | |
30457 | ||
30458 | static inline void dev_base_seq_inc(struct net *net) | |
30459 | { | |
30460 | @@ -207,14 +208,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) | |
30461 | static inline void rps_lock(struct softnet_data *sd) | |
30462 | { | |
30463 | #ifdef CONFIG_RPS | |
30464 | - spin_lock(&sd->input_pkt_queue.lock); | |
30465 | + raw_spin_lock(&sd->input_pkt_queue.raw_lock); | |
30466 | #endif | |
30467 | } | |
30468 | ||
30469 | static inline void rps_unlock(struct softnet_data *sd) | |
30470 | { | |
30471 | #ifdef CONFIG_RPS | |
30472 | - spin_unlock(&sd->input_pkt_queue.lock); | |
30473 | + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); | |
30474 | #endif | |
30475 | } | |
30476 | ||
30477 | @@ -884,7 +885,8 @@ retry: | |
30478 | strcpy(name, dev->name); | |
30479 | rcu_read_unlock(); | |
30480 | if (read_seqcount_retry(&devnet_rename_seq, seq)) { | |
30481 | - cond_resched(); | |
30482 | + mutex_lock(&devnet_rename_mutex); | |
30483 | + mutex_unlock(&devnet_rename_mutex); | |
30484 | goto retry; | |
30485 | } | |
30486 | ||
30487 | @@ -1153,20 +1155,17 @@ int dev_change_name(struct net_device *dev, const char *newname) | |
30488 | if (dev->flags & IFF_UP) | |
30489 | return -EBUSY; | |
30490 | ||
30491 | - write_seqcount_begin(&devnet_rename_seq); | |
30492 | + mutex_lock(&devnet_rename_mutex); | |
30493 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
30494 | ||
30495 | - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { | |
30496 | - write_seqcount_end(&devnet_rename_seq); | |
30497 | - return 0; | |
30498 | - } | |
30499 | + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) | |
30500 | + goto outunlock; | |
30501 | ||
30502 | memcpy(oldname, dev->name, IFNAMSIZ); | |
30503 | ||
30504 | err = dev_get_valid_name(net, dev, newname); | |
30505 | - if (err < 0) { | |
30506 | - write_seqcount_end(&devnet_rename_seq); | |
30507 | - return err; | |
30508 | - } | |
30509 | + if (err < 0) | |
30510 | + goto outunlock; | |
30511 | ||
30512 | if (oldname[0] && !strchr(oldname, '%')) | |
30513 | netdev_info(dev, "renamed from %s\n", oldname); | |
30514 | @@ -1179,11 +1178,12 @@ rollback: | |
30515 | if (ret) { | |
30516 | memcpy(dev->name, oldname, IFNAMSIZ); | |
30517 | dev->name_assign_type = old_assign_type; | |
30518 | - write_seqcount_end(&devnet_rename_seq); | |
30519 | - return ret; | |
30520 | + err = ret; | |
30521 | + goto outunlock; | |
30522 | } | |
30523 | ||
30524 | - write_seqcount_end(&devnet_rename_seq); | |
30525 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
30526 | + mutex_unlock(&devnet_rename_mutex); | |
30527 | ||
30528 | netdev_adjacent_rename_links(dev, oldname); | |
30529 | ||
30530 | @@ -1204,7 +1204,8 @@ rollback: | |
30531 | /* err >= 0 after dev_alloc_name() or stores the first errno */ | |
30532 | if (err >= 0) { | |
30533 | err = ret; | |
30534 | - write_seqcount_begin(&devnet_rename_seq); | |
30535 | + mutex_lock(&devnet_rename_mutex); | |
30536 | + __raw_write_seqcount_begin(&devnet_rename_seq); | |
30537 | memcpy(dev->name, oldname, IFNAMSIZ); | |
30538 | memcpy(oldname, newname, IFNAMSIZ); | |
30539 | dev->name_assign_type = old_assign_type; | |
30540 | @@ -1217,6 +1218,11 @@ rollback: | |
30541 | } | |
30542 | ||
30543 | return err; | |
30544 | + | |
30545 | +outunlock: | |
30546 | + __raw_write_seqcount_end(&devnet_rename_seq); | |
30547 | + mutex_unlock(&devnet_rename_mutex); | |
30548 | + return err; | |
30549 | } | |
30550 | ||
30551 | /** | |
30552 | @@ -2246,6 +2252,7 @@ static inline void __netif_reschedule(struct Qdisc *q) | |
30553 | sd->output_queue_tailp = &q->next_sched; | |
30554 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
30555 | local_irq_restore(flags); | |
30556 | + preempt_check_resched_rt(); | |
30557 | } | |
30558 | ||
30559 | void __netif_schedule(struct Qdisc *q) | |
30560 | @@ -2327,6 +2334,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) | |
30561 | __this_cpu_write(softnet_data.completion_queue, skb); | |
30562 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
30563 | local_irq_restore(flags); | |
30564 | + preempt_check_resched_rt(); | |
30565 | } | |
30566 | EXPORT_SYMBOL(__dev_kfree_skb_irq); | |
30567 | ||
30568 | @@ -2883,7 +2891,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, | |
30569 | * This permits __QDISC___STATE_RUNNING owner to get the lock more | |
30570 | * often and dequeue packets faster. | |
30571 | */ | |
30572 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
30573 | + contended = true; | |
30574 | +#else | |
30575 | contended = qdisc_is_running(q); | |
30576 | +#endif | |
30577 | if (unlikely(contended)) | |
30578 | spin_lock(&q->busylock); | |
30579 | ||
30580 | @@ -2943,9 +2955,44 @@ static void skb_update_prio(struct sk_buff *skb) | |
30581 | #define skb_update_prio(skb) | |
30582 | #endif | |
30583 | ||
30584 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
30585 | + | |
30586 | +static inline int xmit_rec_read(void) | |
30587 | +{ | |
30588 | + return current->xmit_recursion; | |
30589 | +} | |
30590 | + | |
30591 | +static inline void xmit_rec_inc(void) | |
30592 | +{ | |
30593 | + current->xmit_recursion++; | |
30594 | +} | |
30595 | + | |
30596 | +static inline void xmit_rec_dec(void) | |
30597 | +{ | |
30598 | + current->xmit_recursion--; | |
30599 | +} | |
30600 | + | |
30601 | +#else | |
30602 | + | |
30603 | DEFINE_PER_CPU(int, xmit_recursion); | |
30604 | EXPORT_SYMBOL(xmit_recursion); | |
30605 | ||
30606 | +static inline int xmit_rec_read(void) | |
30607 | +{ | |
30608 | + return __this_cpu_read(xmit_recursion); | |
30609 | +} | |
30610 | + | |
30611 | +static inline void xmit_rec_inc(void) | |
30612 | +{ | |
30613 | + __this_cpu_inc(xmit_recursion); | |
30614 | +} | |
30615 | + | |
30616 | +static inline void xmit_rec_dec(void) | |
30617 | +{ | |
30618 | + __this_cpu_dec(xmit_recursion); | |
30619 | +} | |
30620 | +#endif | |
30621 | + | |
30622 | #define RECURSION_LIMIT 10 | |
30623 | ||
30624 | /** | |
30625 | @@ -3138,7 +3185,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |
30626 | ||
30627 | if (txq->xmit_lock_owner != cpu) { | |
30628 | ||
30629 | - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) | |
30630 | + if (xmit_rec_read() > RECURSION_LIMIT) | |
30631 | goto recursion_alert; | |
30632 | ||
30633 | skb = validate_xmit_skb(skb, dev); | |
30634 | @@ -3148,9 +3195,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) | |
30635 | HARD_TX_LOCK(dev, txq, cpu); | |
30636 | ||
30637 | if (!netif_xmit_stopped(txq)) { | |
30638 | - __this_cpu_inc(xmit_recursion); | |
30639 | + xmit_rec_inc(); | |
30640 | skb = dev_hard_start_xmit(skb, dev, txq, &rc); | |
30641 | - __this_cpu_dec(xmit_recursion); | |
30642 | + xmit_rec_dec(); | |
30643 | if (dev_xmit_complete(rc)) { | |
30644 | HARD_TX_UNLOCK(dev, txq); | |
30645 | goto out; | |
30646 | @@ -3524,6 +3571,7 @@ drop: | |
30647 | rps_unlock(sd); | |
30648 | ||
30649 | local_irq_restore(flags); | |
30650 | + preempt_check_resched_rt(); | |
30651 | ||
30652 | atomic_long_inc(&skb->dev->rx_dropped); | |
30653 | kfree_skb(skb); | |
30654 | @@ -3542,7 +3590,7 @@ static int netif_rx_internal(struct sk_buff *skb) | |
30655 | struct rps_dev_flow voidflow, *rflow = &voidflow; | |
30656 | int cpu; | |
30657 | ||
30658 | - preempt_disable(); | |
30659 | + migrate_disable(); | |
30660 | rcu_read_lock(); | |
30661 | ||
30662 | cpu = get_rps_cpu(skb->dev, skb, &rflow); | |
30663 | @@ -3552,13 +3600,13 @@ static int netif_rx_internal(struct sk_buff *skb) | |
30664 | ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); | |
30665 | ||
30666 | rcu_read_unlock(); | |
30667 | - preempt_enable(); | |
30668 | + migrate_enable(); | |
30669 | } else | |
30670 | #endif | |
30671 | { | |
30672 | unsigned int qtail; | |
30673 | - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); | |
30674 | - put_cpu(); | |
30675 | + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); | |
30676 | + put_cpu_light(); | |
30677 | } | |
30678 | return ret; | |
30679 | } | |
30680 | @@ -3592,16 +3640,44 @@ int netif_rx_ni(struct sk_buff *skb) | |
30681 | ||
30682 | trace_netif_rx_ni_entry(skb); | |
30683 | ||
30684 | - preempt_disable(); | |
30685 | + local_bh_disable(); | |
30686 | err = netif_rx_internal(skb); | |
30687 | - if (local_softirq_pending()) | |
30688 | - do_softirq(); | |
30689 | - preempt_enable(); | |
30690 | + local_bh_enable(); | |
30691 | ||
30692 | return err; | |
30693 | } | |
30694 | EXPORT_SYMBOL(netif_rx_ni); | |
30695 | ||
30696 | +#ifdef CONFIG_PREEMPT_RT_FULL | |
30697 | +/* | |
30698 | + * RT runs ksoftirqd as a real time thread and the root_lock is a | |
30699 | + * "sleeping spinlock". If the trylock fails then we can go into an | |
30700 | + * infinite loop when ksoftirqd preempted the task which actually | |
30701 | + * holds the lock, because we requeue q and raise NET_TX softirq | |
30702 | + * causing ksoftirqd to loop forever. | |
30703 | + * | |
30704 | + * It's safe to use spin_lock on RT here as softirqs run in thread | |
30705 | + * context and cannot deadlock against the thread which is holding | |
30706 | + * root_lock. | |
30707 | + * | |
30708 | + * On !RT the trylock might fail, but there we bail out from the | |
30709 | + * softirq loop after 10 attempts which we can't do on RT. And the | |
30710 | + * task holding root_lock cannot be preempted, so the only downside of | |
30711 | + * that trylock is that we need 10 loops to decide that we should have | |
30712 | + * given up in the first one :) | |
30713 | + */ | |
30714 | +static inline int take_root_lock(spinlock_t *lock) | |
30715 | +{ | |
30716 | + spin_lock(lock); | |
30717 | + return 1; | |
30718 | +} | |
30719 | +#else | |
30720 | +static inline int take_root_lock(spinlock_t *lock) | |
30721 | +{ | |
30722 | + return spin_trylock(lock); | |
30723 | +} | |
30724 | +#endif | |
30725 | + | |
30726 | static void net_tx_action(struct softirq_action *h) | |
30727 | { | |
30728 | struct softnet_data *sd = this_cpu_ptr(&softnet_data); | |
30729 | @@ -3643,7 +3719,7 @@ static void net_tx_action(struct softirq_action *h) | |
30730 | head = head->next_sched; | |
30731 | ||
30732 | root_lock = qdisc_lock(q); | |
30733 | - if (spin_trylock(root_lock)) { | |
30734 | + if (take_root_lock(root_lock)) { | |
30735 | smp_mb__before_atomic(); | |
30736 | clear_bit(__QDISC_STATE_SCHED, | |
30737 | &q->state); | |
30738 | @@ -4065,7 +4141,7 @@ static void flush_backlog(void *arg) | |
30739 | skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { | |
30740 | if (skb->dev == dev) { | |
30741 | __skb_unlink(skb, &sd->input_pkt_queue); | |
30742 | - kfree_skb(skb); | |
30743 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
30744 | input_queue_head_incr(sd); | |
30745 | } | |
30746 | } | |
30747 | @@ -4074,10 +4150,13 @@ static void flush_backlog(void *arg) | |
30748 | skb_queue_walk_safe(&sd->process_queue, skb, tmp) { | |
30749 | if (skb->dev == dev) { | |
30750 | __skb_unlink(skb, &sd->process_queue); | |
30751 | - kfree_skb(skb); | |
30752 | + __skb_queue_tail(&sd->tofree_queue, skb); | |
30753 | input_queue_head_incr(sd); | |
30754 | } | |
30755 | } | |
30756 | + | |
30757 | + if (!skb_queue_empty(&sd->tofree_queue)) | |
30758 | + raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
30759 | } | |
30760 | ||
30761 | static int napi_gro_complete(struct sk_buff *skb) | |
30762 | @@ -4531,6 +4610,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) | |
30763 | sd->rps_ipi_list = NULL; | |
30764 | ||
30765 | local_irq_enable(); | |
30766 | + preempt_check_resched_rt(); | |
30767 | ||
30768 | /* Send pending IPI's to kick RPS processing on remote cpus. */ | |
30769 | while (remsd) { | |
30770 | @@ -4544,6 +4624,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) | |
30771 | } else | |
30772 | #endif | |
30773 | local_irq_enable(); | |
30774 | + preempt_check_resched_rt(); | |
30775 | } | |
30776 | ||
30777 | static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) | |
30778 | @@ -4625,6 +4706,7 @@ void __napi_schedule(struct napi_struct *n) | |
30779 | local_irq_save(flags); | |
30780 | ____napi_schedule(this_cpu_ptr(&softnet_data), n); | |
30781 | local_irq_restore(flags); | |
30782 | + preempt_check_resched_rt(); | |
30783 | } | |
30784 | EXPORT_SYMBOL(__napi_schedule); | |
30785 | ||
30786 | @@ -4901,7 +4983,7 @@ static void net_rx_action(struct softirq_action *h) | |
30787 | list_splice_tail(&repoll, &list); | |
30788 | list_splice(&list, &sd->poll_list); | |
30789 | if (!list_empty(&sd->poll_list)) | |
30790 | - __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
30791 | + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); | |
30792 | ||
30793 | net_rps_action_and_irq_enable(sd); | |
30794 | } | |
30795 | @@ -7234,7 +7316,7 @@ EXPORT_SYMBOL(free_netdev); | |
30796 | void synchronize_net(void) | |
30797 | { | |
30798 | might_sleep(); | |
30799 | - if (rtnl_is_locked()) | |
30800 | + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) | |
30801 | synchronize_rcu_expedited(); | |
30802 | else | |
30803 | synchronize_rcu(); | |
30804 | @@ -7475,16 +7557,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, | |
30805 | ||
30806 | raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
30807 | local_irq_enable(); | |
30808 | + preempt_check_resched_rt(); | |
30809 | ||
30810 | /* Process offline CPU's input_pkt_queue */ | |
30811 | while ((skb = __skb_dequeue(&oldsd->process_queue))) { | |
30812 | netif_rx_ni(skb); | |
30813 | input_queue_head_incr(oldsd); | |
30814 | } | |
30815 | - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { | |
30816 | + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { | |
30817 | netif_rx_ni(skb); | |
30818 | input_queue_head_incr(oldsd); | |
30819 | } | |
30820 | + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { | |
30821 | + kfree_skb(skb); | |
30822 | + } | |
30823 | ||
30824 | return NOTIFY_OK; | |
30825 | } | |
30826 | @@ -7786,8 +7872,9 @@ static int __init net_dev_init(void) | |
30827 | for_each_possible_cpu(i) { | |
30828 | struct softnet_data *sd = &per_cpu(softnet_data, i); | |
30829 | ||
30830 | - skb_queue_head_init(&sd->input_pkt_queue); | |
30831 | - skb_queue_head_init(&sd->process_queue); | |
30832 | + skb_queue_head_init_raw(&sd->input_pkt_queue); | |
30833 | + skb_queue_head_init_raw(&sd->process_queue); | |
30834 | + skb_queue_head_init_raw(&sd->tofree_queue); | |
30835 | INIT_LIST_HEAD(&sd->poll_list); | |
30836 | sd->output_queue_tailp = &sd->output_queue; | |
30837 | #ifdef CONFIG_RPS | |
30838 | diff --git a/net/core/skbuff.c b/net/core/skbuff.c | |
30839 | index 4968b5ddea69..c8d778f405dc 100644 | |
30840 | --- a/net/core/skbuff.c | |
30841 | +++ b/net/core/skbuff.c | |
30842 | @@ -63,6 +63,7 @@ | |
30843 | #include <linux/errqueue.h> | |
30844 | #include <linux/prefetch.h> | |
30845 | #include <linux/if_vlan.h> | |
30846 | +#include <linux/locallock.h> | |
30847 | ||
30848 | #include <net/protocol.h> | |
30849 | #include <net/dst.h> | |
30850 | @@ -351,6 +352,8 @@ EXPORT_SYMBOL(build_skb); | |
30851 | ||
30852 | static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); | |
30853 | static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); | |
30854 | +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); | |
30855 | +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); | |
30856 | ||
30857 | static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
30858 | { | |
30859 | @@ -358,10 +361,10 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
30860 | unsigned long flags; | |
30861 | void *data; | |
30862 | ||
30863 | - local_irq_save(flags); | |
30864 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
30865 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
30866 | data = __alloc_page_frag(nc, fragsz, gfp_mask); | |
30867 | - local_irq_restore(flags); | |
30868 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
30869 | return data; | |
30870 | } | |
30871 | ||
30872 | @@ -380,9 +383,13 @@ EXPORT_SYMBOL(netdev_alloc_frag); | |
30873 | ||
30874 | static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) | |
30875 | { | |
30876 | - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
30877 | + struct page_frag_cache *nc; | |
30878 | + void *data; | |
30879 | ||
30880 | - return __alloc_page_frag(nc, fragsz, gfp_mask); | |
30881 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
30882 | + data = __alloc_page_frag(nc, fragsz, gfp_mask); | |
30883 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
30884 | + return data; | |
30885 | } | |
30886 | ||
30887 | void *napi_alloc_frag(unsigned int fragsz) | |
30888 | @@ -429,13 +436,13 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, | |
30889 | if (sk_memalloc_socks()) | |
30890 | gfp_mask |= __GFP_MEMALLOC; | |
30891 | ||
30892 | - local_irq_save(flags); | |
30893 | + local_lock_irqsave(netdev_alloc_lock, flags); | |
30894 | ||
30895 | nc = this_cpu_ptr(&netdev_alloc_cache); | |
30896 | data = __alloc_page_frag(nc, len, gfp_mask); | |
30897 | pfmemalloc = nc->pfmemalloc; | |
30898 | ||
30899 | - local_irq_restore(flags); | |
30900 | + local_unlock_irqrestore(netdev_alloc_lock, flags); | |
30901 | ||
30902 | if (unlikely(!data)) | |
30903 | return NULL; | |
30904 | @@ -476,9 +483,10 @@ EXPORT_SYMBOL(__netdev_alloc_skb); | |
30905 | struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
30906 | gfp_t gfp_mask) | |
30907 | { | |
30908 | - struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); | |
30909 | + struct page_frag_cache *nc; | |
30910 | struct sk_buff *skb; | |
30911 | void *data; | |
30912 | + bool pfmemalloc; | |
30913 | ||
30914 | len += NET_SKB_PAD + NET_IP_ALIGN; | |
30915 | ||
30916 | @@ -496,7 +504,11 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
30917 | if (sk_memalloc_socks()) | |
30918 | gfp_mask |= __GFP_MEMALLOC; | |
30919 | ||
30920 | + nc = &get_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
30921 | data = __alloc_page_frag(nc, len, gfp_mask); | |
30922 | + pfmemalloc = nc->pfmemalloc; | |
30923 | + put_locked_var(napi_alloc_cache_lock, napi_alloc_cache); | |
30924 | + | |
30925 | if (unlikely(!data)) | |
30926 | return NULL; | |
30927 | ||
30928 | @@ -507,7 +519,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, | |
30929 | } | |
30930 | ||
30931 | /* use OR instead of assignment to avoid clearing of bits in mask */ | |
30932 | - if (nc->pfmemalloc) | |
30933 | + if (pfmemalloc) | |
30934 | skb->pfmemalloc = 1; | |
30935 | skb->head_frag = 1; | |
30936 | ||
30937 | diff --git a/net/core/sock.c b/net/core/sock.c | |
30938 | index 0d91f7dca751..9c3234299fc3 100644 | |
30939 | --- a/net/core/sock.c | |
30940 | +++ b/net/core/sock.c | |
30941 | @@ -2435,12 +2435,11 @@ void lock_sock_nested(struct sock *sk, int subclass) | |
30942 | if (sk->sk_lock.owned) | |
30943 | __lock_sock(sk); | |
30944 | sk->sk_lock.owned = 1; | |
30945 | - spin_unlock(&sk->sk_lock.slock); | |
30946 | + spin_unlock_bh(&sk->sk_lock.slock); | |
30947 | /* | |
30948 | * The sk_lock has mutex_lock() semantics here: | |
30949 | */ | |
30950 | mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); | |
30951 | - local_bh_enable(); | |
30952 | } | |
30953 | EXPORT_SYMBOL(lock_sock_nested); | |
30954 | ||
30955 | diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c | |
30956 | index 36e26977c908..ff2593269089 100644 | |
30957 | --- a/net/ipv4/icmp.c | |
30958 | +++ b/net/ipv4/icmp.c | |
30959 | @@ -69,6 +69,7 @@ | |
30960 | #include <linux/jiffies.h> | |
30961 | #include <linux/kernel.h> | |
30962 | #include <linux/fcntl.h> | |
30963 | +#include <linux/sysrq.h> | |
30964 | #include <linux/socket.h> | |
30965 | #include <linux/in.h> | |
30966 | #include <linux/inet.h> | |
30967 | @@ -77,6 +78,7 @@ | |
30968 | #include <linux/string.h> | |
30969 | #include <linux/netfilter_ipv4.h> | |
30970 | #include <linux/slab.h> | |
30971 | +#include <linux/locallock.h> | |
30972 | #include <net/snmp.h> | |
30973 | #include <net/ip.h> | |
30974 | #include <net/route.h> | |
30975 | @@ -204,6 +206,8 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; | |
30976 | * | |
30977 | * On SMP we have one ICMP socket per-cpu. | |
30978 | */ | |
30979 | +static DEFINE_LOCAL_IRQ_LOCK(icmp_sk_lock); | |
30980 | + | |
30981 | static struct sock *icmp_sk(struct net *net) | |
30982 | { | |
30983 | return *this_cpu_ptr(net->ipv4.icmp_sk); | |
30984 | @@ -215,12 +219,14 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
30985 | ||
30986 | local_bh_disable(); | |
30987 | ||
30988 | + local_lock(icmp_sk_lock); | |
30989 | sk = icmp_sk(net); | |
30990 | ||
30991 | if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { | |
30992 | /* This can happen if the output path signals a | |
30993 | * dst_link_failure() for an outgoing ICMP packet. | |
30994 | */ | |
30995 | + local_unlock(icmp_sk_lock); | |
30996 | local_bh_enable(); | |
30997 | return NULL; | |
30998 | } | |
30999 | @@ -230,6 +236,7 @@ static inline struct sock *icmp_xmit_lock(struct net *net) | |
31000 | static inline void icmp_xmit_unlock(struct sock *sk) | |
31001 | { | |
31002 | spin_unlock_bh(&sk->sk_lock.slock); | |
31003 | + local_unlock(icmp_sk_lock); | |
31004 | } | |
31005 | ||
31006 | int sysctl_icmp_msgs_per_sec __read_mostly = 1000; | |
31007 | @@ -358,6 +365,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
31008 | struct sock *sk; | |
31009 | struct sk_buff *skb; | |
31010 | ||
31011 | + local_lock(icmp_sk_lock); | |
31012 | sk = icmp_sk(dev_net((*rt)->dst.dev)); | |
31013 | if (ip_append_data(sk, fl4, icmp_glue_bits, icmp_param, | |
31014 | icmp_param->data_len+icmp_param->head_len, | |
31015 | @@ -380,6 +388,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, | |
31016 | skb->ip_summed = CHECKSUM_NONE; | |
31017 | ip_push_pending_frames(sk, fl4); | |
31018 | } | |
31019 | + local_unlock(icmp_sk_lock); | |
31020 | } | |
31021 | ||
31022 | /* | |
31023 | @@ -891,6 +900,30 @@ static bool icmp_redirect(struct sk_buff *skb) | |
31024 | } | |
31025 | ||
31026 | /* | |
31027 | + * 32bit and 64bit have different timestamp length, so we check for | |
31028 | + * the cookie at offset 20 and verify it is repeated at offset 50 | |
31029 | + */ | |
31030 | +#define CO_POS0 20 | |
31031 | +#define CO_POS1 50 | |
31032 | +#define CO_SIZE sizeof(int) | |
31033 | +#define ICMP_SYSRQ_SIZE 57 | |
31034 | + | |
31035 | +/* | |
31036 | + * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie | |
31037 | + * pattern and if it matches send the next byte as a trigger to sysrq. | |
31038 | + */ | |
31039 | +static void icmp_check_sysrq(struct net *net, struct sk_buff *skb) | |
31040 | +{ | |
31041 | + int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq); | |
31042 | + char *p = skb->data; | |
31043 | + | |
31044 | + if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) && | |
31045 | + !memcmp(&cookie, p + CO_POS1, CO_SIZE) && | |
31046 | + p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE]) | |
31047 | + handle_sysrq(p[CO_POS0 + CO_SIZE]); | |
31048 | +} | |
31049 | + | |
31050 | +/* | |
31051 | * Handle ICMP_ECHO ("ping") requests. | |
31052 | * | |
31053 | * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo | |
31054 | @@ -917,6 +950,11 @@ static bool icmp_echo(struct sk_buff *skb) | |
31055 | icmp_param.data_len = skb->len; | |
31056 | icmp_param.head_len = sizeof(struct icmphdr); | |
31057 | icmp_reply(&icmp_param, skb); | |
31058 | + | |
31059 | + if (skb->len == ICMP_SYSRQ_SIZE && | |
31060 | + net->ipv4.sysctl_icmp_echo_sysrq) { | |
31061 | + icmp_check_sysrq(net, skb); | |
31062 | + } | |
31063 | } | |
31064 | /* should there be an ICMP stat for ignored echos? */ | |
31065 | return true; | |
31066 | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c | |
31067 | index a0bd7a55193e..1866f910263f 100644 | |
31068 | --- a/net/ipv4/sysctl_net_ipv4.c | |
31069 | +++ b/net/ipv4/sysctl_net_ipv4.c | |
31070 | @@ -818,6 +818,13 @@ static struct ctl_table ipv4_net_table[] = { | |
31071 | .proc_handler = proc_dointvec | |
31072 | }, | |
31073 | { | |
31074 | + .procname = "icmp_echo_sysrq", | |
31075 | + .data = &init_net.ipv4.sysctl_icmp_echo_sysrq, | |
31076 | + .maxlen = sizeof(int), | |
31077 | + .mode = 0644, | |
31078 | + .proc_handler = proc_dointvec | |
31079 | + }, | |
31080 | + { | |
31081 | .procname = "icmp_ignore_bogus_error_responses", | |
31082 | .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses, | |
31083 | .maxlen = sizeof(int), | |
31084 | diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c | |
31085 | index b5853cac3269..de922d86ba2c 100644 | |
31086 | --- a/net/ipv4/tcp_ipv4.c | |
31087 | +++ b/net/ipv4/tcp_ipv4.c | |
31088 | @@ -62,6 +62,7 @@ | |
31089 | #include <linux/init.h> | |
31090 | #include <linux/times.h> | |
31091 | #include <linux/slab.h> | |
31092 | +#include <linux/locallock.h> | |
31093 | ||
31094 | #include <net/net_namespace.h> | |
31095 | #include <net/icmp.h> | |
31096 | @@ -566,6 +567,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) | |
31097 | } | |
31098 | EXPORT_SYMBOL(tcp_v4_send_check); | |
31099 | ||
31100 | +static DEFINE_LOCAL_IRQ_LOCK(tcp_sk_lock); | |
31101 | /* | |
31102 | * This routine will send an RST to the other tcp. | |
31103 | * | |
31104 | @@ -687,10 +689,13 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) | |
31105 | arg.bound_dev_if = sk->sk_bound_dev_if; | |
31106 | ||
31107 | arg.tos = ip_hdr(skb)->tos; | |
31108 | + | |
31109 | + local_lock(tcp_sk_lock); | |
31110 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
31111 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
31112 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | |
31113 | &arg, arg.iov[0].iov_len); | |
31114 | + local_unlock(tcp_sk_lock); | |
31115 | ||
31116 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | |
31117 | TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); | |
31118 | @@ -772,10 +777,12 @@ static void tcp_v4_send_ack(struct net *net, | |
31119 | if (oif) | |
31120 | arg.bound_dev_if = oif; | |
31121 | arg.tos = tos; | |
31122 | + local_lock(tcp_sk_lock); | |
31123 | ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk), | |
31124 | skb, &TCP_SKB_CB(skb)->header.h4.opt, | |
31125 | ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, | |
31126 | &arg, arg.iov[0].iov_len); | |
31127 | + local_unlock(tcp_sk_lock); | |
31128 | ||
31129 | TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); | |
31130 | } | |
31131 | diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c | |
31132 | index a3bb8f7f5fc5..3be977394a80 100644 | |
31133 | --- a/net/mac80211/rx.c | |
31134 | +++ b/net/mac80211/rx.c | |
31135 | @@ -3574,7 +3574,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb, | |
31136 | struct ieee80211_supported_band *sband; | |
31137 | struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); | |
31138 | ||
31139 | - WARN_ON_ONCE(softirq_count() == 0); | |
31140 | + WARN_ON_ONCE_NONRT(softirq_count() == 0); | |
31141 | ||
31142 | if (WARN_ON(status->band >= IEEE80211_NUM_BANDS)) | |
31143 | goto drop; | |
31144 | diff --git a/net/netfilter/core.c b/net/netfilter/core.c | |
31145 | index f39276d1c2d7..10880c89d62f 100644 | |
31146 | --- a/net/netfilter/core.c | |
31147 | +++ b/net/netfilter/core.c | |
31148 | @@ -22,11 +22,17 @@ | |
31149 | #include <linux/proc_fs.h> | |
31150 | #include <linux/mutex.h> | |
31151 | #include <linux/slab.h> | |
31152 | +#include <linux/locallock.h> | |
31153 | #include <net/net_namespace.h> | |
31154 | #include <net/sock.h> | |
31155 | ||
31156 | #include "nf_internals.h" | |
31157 | ||
31158 | +#ifdef CONFIG_PREEMPT_RT_BASE | |
31159 | +DEFINE_LOCAL_IRQ_LOCK(xt_write_lock); | |
31160 | +EXPORT_PER_CPU_SYMBOL(xt_write_lock); | |
31161 | +#endif | |
31162 | + | |
31163 | static DEFINE_MUTEX(afinfo_mutex); | |
31164 | ||
31165 | const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; | |
31166 | diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c | |
31167 | index a86f26d05bc2..77276e3ff2a2 100644 | |
31168 | --- a/net/packet/af_packet.c | |
31169 | +++ b/net/packet/af_packet.c | |
31170 | @@ -63,6 +63,7 @@ | |
31171 | #include <linux/if_packet.h> | |
31172 | #include <linux/wireless.h> | |
31173 | #include <linux/kernel.h> | |
31174 | +#include <linux/delay.h> | |
31175 | #include <linux/kmod.h> | |
31176 | #include <linux/slab.h> | |
31177 | #include <linux/vmalloc.h> | |
31178 | @@ -695,7 +696,7 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) | |
31179 | if (BLOCK_NUM_PKTS(pbd)) { | |
31180 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
31181 | /* Waiting for skb_copy_bits to finish... */ | |
31182 | - cpu_relax(); | |
31183 | + cpu_chill(); | |
31184 | } | |
31185 | } | |
31186 | ||
31187 | @@ -957,7 +958,7 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, | |
31188 | if (!(status & TP_STATUS_BLK_TMO)) { | |
31189 | while (atomic_read(&pkc->blk_fill_in_prog)) { | |
31190 | /* Waiting for skb_copy_bits to finish... */ | |
31191 | - cpu_relax(); | |
31192 | + cpu_chill(); | |
31193 | } | |
31194 | } | |
31195 | prb_close_block(pkc, pbd, po, status); | |
31196 | diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c | |
31197 | index a2340748ec86..19123a97b354 100644 | |
31198 | --- a/net/rds/ib_rdma.c | |
31199 | +++ b/net/rds/ib_rdma.c | |
31200 | @@ -34,6 +34,7 @@ | |
31201 | #include <linux/slab.h> | |
31202 | #include <linux/rculist.h> | |
31203 | #include <linux/llist.h> | |
31204 | +#include <linux/delay.h> | |
31205 | ||
31206 | #include "rds.h" | |
31207 | #include "ib.h" | |
31208 | @@ -313,7 +314,7 @@ static inline void wait_clean_list_grace(void) | |
31209 | for_each_online_cpu(cpu) { | |
31210 | flag = &per_cpu(clean_list_grace, cpu); | |
31211 | while (test_bit(CLEAN_LIST_BUSY_BIT, flag)) | |
31212 | - cpu_relax(); | |
31213 | + cpu_chill(); | |
31214 | } | |
31215 | } | |
31216 | ||
31217 | diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c | |
31218 | index aa4725038f94..00b81cab28f3 100644 | |
31219 | --- a/net/sched/sch_generic.c | |
31220 | +++ b/net/sched/sch_generic.c | |
31221 | @@ -893,7 +893,7 @@ void dev_deactivate_many(struct list_head *head) | |
31222 | /* Wait for outstanding qdisc_run calls. */ | |
31223 | list_for_each_entry(dev, head, close_list) | |
31224 | while (some_qdisc_is_busy(dev)) | |
31225 | - yield(); | |
31226 | + msleep(1); | |
31227 | } | |
31228 | ||
31229 | void dev_deactivate(struct net_device *dev) | |
31230 | diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c | |
31231 | index a6cbb2104667..5b69bb580617 100644 | |
31232 | --- a/net/sunrpc/svc_xprt.c | |
31233 | +++ b/net/sunrpc/svc_xprt.c | |
31234 | @@ -340,7 +340,7 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) | |
31235 | goto out; | |
31236 | } | |
31237 | ||
31238 | - cpu = get_cpu(); | |
31239 | + cpu = get_cpu_light(); | |
31240 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | |
31241 | ||
31242 | atomic_long_inc(&pool->sp_stats.packets); | |
31243 | @@ -376,7 +376,7 @@ redo_search: | |
31244 | ||
31245 | atomic_long_inc(&pool->sp_stats.threads_woken); | |
31246 | wake_up_process(rqstp->rq_task); | |
31247 | - put_cpu(); | |
31248 | + put_cpu_light(); | |
31249 | goto out; | |
31250 | } | |
31251 | rcu_read_unlock(); | |
31252 | @@ -397,7 +397,7 @@ redo_search: | |
31253 | goto redo_search; | |
31254 | } | |
31255 | rqstp = NULL; | |
31256 | - put_cpu(); | |
31257 | + put_cpu_light(); | |
31258 | out: | |
31259 | trace_svc_xprt_do_enqueue(xprt, rqstp); | |
31260 | } | |
31261 | diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h | |
31262 | index 6fdc97ef6023..523e0420d7f0 100755 | |
31263 | --- a/scripts/mkcompile_h | |
31264 | +++ b/scripts/mkcompile_h | |
31265 | @@ -4,7 +4,8 @@ TARGET=$1 | |
31266 | ARCH=$2 | |
31267 | SMP=$3 | |
31268 | PREEMPT=$4 | |
31269 | -CC=$5 | |
31270 | +RT=$5 | |
31271 | +CC=$6 | |
31272 | ||
31273 | vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; } | |
31274 | ||
31275 | @@ -57,6 +58,7 @@ UTS_VERSION="#$VERSION" | |
31276 | CONFIG_FLAGS="" | |
31277 | if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi | |
31278 | if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi | |
31279 | +if [ -n "$RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi | |
31280 | UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP" | |
31281 | ||
31282 | # Truncate to maximum length | |
31283 | diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c | |
31284 | index 4ba64fd49759..34e50186885d 100644 | |
31285 | --- a/sound/core/pcm_native.c | |
31286 | +++ b/sound/core/pcm_native.c | |
31287 | @@ -135,7 +135,7 @@ EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock); | |
31288 | void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream) | |
31289 | { | |
31290 | if (!substream->pcm->nonatomic) | |
31291 | - local_irq_disable(); | |
31292 | + local_irq_disable_nort(); | |
31293 | snd_pcm_stream_lock(substream); | |
31294 | } | |
31295 | EXPORT_SYMBOL_GPL(snd_pcm_stream_lock_irq); | |
31296 | @@ -150,7 +150,7 @@ void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream) | |
31297 | { | |
31298 | snd_pcm_stream_unlock(substream); | |
31299 | if (!substream->pcm->nonatomic) | |
31300 | - local_irq_enable(); | |
31301 | + local_irq_enable_nort(); | |
31302 | } | |
31303 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irq); | |
31304 | ||
31305 | @@ -158,7 +158,7 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) | |
31306 | { | |
31307 | unsigned long flags = 0; | |
31308 | if (!substream->pcm->nonatomic) | |
31309 | - local_irq_save(flags); | |
31310 | + local_irq_save_nort(flags); | |
31311 | snd_pcm_stream_lock(substream); | |
31312 | return flags; | |
31313 | } | |
31314 | @@ -176,7 +176,7 @@ void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, | |
31315 | { | |
31316 | snd_pcm_stream_unlock(substream); | |
31317 | if (!substream->pcm->nonatomic) | |
31318 | - local_irq_restore(flags); | |
31319 | + local_irq_restore_nort(flags); | |
31320 | } | |
31321 | EXPORT_SYMBOL_GPL(snd_pcm_stream_unlock_irqrestore); | |
31322 | ||
31323 | diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c | |
31324 | index 4f70d12e392d..9378d0919ed8 100644 | |
31325 | --- a/virt/kvm/async_pf.c | |
31326 | +++ b/virt/kvm/async_pf.c | |
31327 | @@ -98,8 +98,8 @@ static void async_pf_execute(struct work_struct *work) | |
31328 | * This memory barrier pairs with prepare_to_wait's set_current_state() | |
31329 | */ | |
31330 | smp_mb(); | |
31331 | - if (waitqueue_active(&vcpu->wq)) | |
31332 | - wake_up_interruptible(&vcpu->wq); | |
31333 | + if (swait_active(&vcpu->wq)) | |
31334 | + swake_up(&vcpu->wq); | |
31335 | ||
31336 | mmput(mm); | |
31337 | kvm_put_kvm(vcpu->kvm); | |
31338 | diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c | |
31339 | index 336ed267c407..7748ca386e60 100644 | |
31340 | --- a/virt/kvm/kvm_main.c | |
31341 | +++ b/virt/kvm/kvm_main.c | |
31342 | @@ -228,8 +228,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) | |
31343 | vcpu->kvm = kvm; | |
31344 | vcpu->vcpu_id = id; | |
31345 | vcpu->pid = NULL; | |
31346 | - vcpu->halt_poll_ns = 0; | |
31347 | - init_waitqueue_head(&vcpu->wq); | |
31348 | + init_swait_queue_head(&vcpu->wq); | |
31349 | kvm_async_pf_vcpu_init(vcpu); | |
31350 | ||
31351 | vcpu->pre_pcpu = -1; | |
31352 | @@ -2005,7 +2004,7 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu) | |
31353 | void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |
31354 | { | |
31355 | ktime_t start, cur; | |
31356 | - DEFINE_WAIT(wait); | |
31357 | + DECLARE_SWAITQUEUE(wait); | |
31358 | bool waited = false; | |
31359 | u64 block_ns; | |
31360 | ||
31361 | @@ -2030,7 +2029,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |
31362 | kvm_arch_vcpu_blocking(vcpu); | |
31363 | ||
31364 | for (;;) { | |
31365 | - prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | |
31366 | + prepare_to_swait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); | |
31367 | ||
31368 | if (kvm_vcpu_check_block(vcpu) < 0) | |
31369 | break; | |
31370 | @@ -2039,7 +2038,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) | |
31371 | schedule(); | |
31372 | } | |
31373 | ||
31374 | - finish_wait(&vcpu->wq, &wait); | |
31375 | + finish_swait(&vcpu->wq, &wait); | |
31376 | cur = ktime_get(); | |
31377 | ||
31378 | kvm_arch_vcpu_unblocking(vcpu); | |
31379 | @@ -2071,11 +2070,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |
31380 | { | |
31381 | int me; | |
31382 | int cpu = vcpu->cpu; | |
31383 | - wait_queue_head_t *wqp; | |
31384 | + struct swait_queue_head *wqp; | |
31385 | ||
31386 | wqp = kvm_arch_vcpu_wq(vcpu); | |
31387 | - if (waitqueue_active(wqp)) { | |
31388 | - wake_up_interruptible(wqp); | |
31389 | + if (swait_active(wqp)) { | |
31390 | + swake_up(wqp); | |
31391 | ++vcpu->stat.halt_wakeup; | |
31392 | } | |
31393 | ||
31394 | @@ -2176,7 +2175,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me) | |
31395 | continue; | |
31396 | if (vcpu == me) | |
31397 | continue; | |
31398 | - if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) | |
31399 | + if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) | |
31400 | continue; | |
31401 | if (!kvm_vcpu_eligible_for_directed_yield(vcpu)) | |
31402 | continue; |