]>
Commit | Line | Data |
---|---|---|
5041df65 JR |
1 | From 8e52fd71e693619f7a58de2692e59f0c826e9988 Mon Sep 17 00:00:00 2001 |
2 | From: Michael Jeanson <mjeanson@efficios.com> | |
3 | Date: Mon, 4 Apr 2022 13:52:57 -0400 | |
4 | Subject: [PATCH 03/13] fix: sched/tracing: Don't re-read p->state when | |
5 | emitting sched_switch event (v5.18) | |
6 | ||
7 | See upstream commit : | |
8 | ||
9 | commit fa2c3254d7cfff5f7a916ab928a562d1165f17bb | |
10 | Author: Valentin Schneider <valentin.schneider@arm.com> | |
11 | Date: Thu Jan 20 16:25:19 2022 +0000 | |
12 | ||
13 | sched/tracing: Don't re-read p->state when emitting sched_switch event | |
14 | ||
15 | As of commit | |
16 | ||
17 | c6e7bd7afaeb ("sched/core: Optimize ttwu() spinning on p->on_cpu") | |
18 | ||
19 | the following sequence becomes possible: | |
20 | ||
21 | p->__state = TASK_INTERRUPTIBLE; | |
22 | __schedule() | |
23 | deactivate_task(p); | |
24 | ttwu() | |
25 | READ !p->on_rq | |
26 | p->__state=TASK_WAKING | |
27 | trace_sched_switch() | |
28 | __trace_sched_switch_state() | |
29 | task_state_index() | |
30 | return 0; | |
31 | ||
32 | TASK_WAKING isn't in TASK_REPORT, so the task appears as TASK_RUNNING in | |
33 | the trace event. | |
34 | ||
35 | Prevent this by pushing the value read from __schedule() down the trace | |
36 | event. | |
37 | ||
38 | Change-Id: I46743cd006be4b4d573cae2d77df7d6d16744d04 | |
39 | Signed-off-by: Michael Jeanson <mjeanson@efficios.com> | |
40 | Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | |
41 | --- | |
42 | include/instrumentation/events/sched.h | 88 +++++++++++++++++++++++--- | |
43 | 1 file changed, 78 insertions(+), 10 deletions(-) | |
44 | ||
45 | diff --git a/include/instrumentation/events/sched.h b/include/instrumentation/events/sched.h | |
46 | index 91953a6f..339bec94 100644 | |
47 | --- a/include/instrumentation/events/sched.h | |
48 | +++ b/include/instrumentation/events/sched.h | |
49 | @@ -20,7 +20,37 @@ | |
50 | #ifndef _TRACE_SCHED_DEF_ | |
51 | #define _TRACE_SCHED_DEF_ | |
52 | ||
53 | -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0)) | |
54 | +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0)) | |
55 | + | |
56 | +static inline long __trace_sched_switch_state(bool preempt, | |
57 | + unsigned int prev_state, | |
58 | + struct task_struct *p) | |
59 | +{ | |
60 | + unsigned int state; | |
61 | + | |
62 | +#ifdef CONFIG_SCHED_DEBUG | |
63 | + BUG_ON(p != current); | |
64 | +#endif /* CONFIG_SCHED_DEBUG */ | |
65 | + | |
66 | + /* | |
67 | + * Preemption ignores task state, therefore preempted tasks are always | |
68 | + * RUNNING (we will not have dequeued if state != RUNNING). | |
69 | + */ | |
70 | + if (preempt) | |
71 | + return TASK_REPORT_MAX; | |
72 | + | |
73 | + /* | |
74 | + * task_state_index() uses fls() and returns a value from 0-8 range. | |
75 | + * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using | |
76 | + * it for left shift operation to get the correct task->state | |
77 | + * mapping. | |
78 | + */ | |
79 | + state = __task_state_index(prev_state, p->exit_state); | |
80 | + | |
81 | + return state ? (1 << (state - 1)) : state; | |
82 | +} | |
83 | + | |
84 | +#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,15,0)) | |
85 | ||
86 | static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p) | |
87 | { | |
88 | @@ -321,43 +351,81 @@ LTTNG_TRACEPOINT_EVENT_INSTANCE(sched_wakeup_template, sched_wakeup_new, | |
89 | /* | |
90 | * Tracepoint for task switches, performed by the scheduler: | |
91 | */ | |
92 | + | |
93 | +#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(5,18,0)) | |
94 | LTTNG_TRACEPOINT_EVENT(sched_switch, | |
95 | ||
96 | -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) | |
97 | TP_PROTO(bool preempt, | |
98 | - struct task_struct *prev, | |
99 | - struct task_struct *next), | |
100 | + unsigned int prev_state, | |
101 | + struct task_struct *prev, | |
102 | + struct task_struct *next), | |
103 | ||
104 | - TP_ARGS(preempt, prev, next), | |
105 | + TP_ARGS(preempt, prev_state, prev, next), | |
106 | + | |
107 | + TP_FIELDS( | |
108 | + ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN) | |
109 | + ctf_integer(pid_t, prev_tid, prev->pid) | |
110 | + ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO) | |
111 | +#ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM | |
112 | + ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev)) | |
113 | #else | |
114 | - TP_PROTO(struct task_struct *prev, | |
115 | + ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev_state, prev)) | |
116 | +#endif | |
117 | + ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN) | |
118 | + ctf_integer(pid_t, next_tid, next->pid) | |
119 | + ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO) | |
120 | + ) | |
121 | +) | |
122 | + | |
123 | +#elif (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) | |
124 | + | |
125 | +LTTNG_TRACEPOINT_EVENT(sched_switch, | |
126 | + | |
127 | + TP_PROTO(bool preempt, | |
128 | + struct task_struct *prev, | |
129 | struct task_struct *next), | |
130 | ||
131 | - TP_ARGS(prev, next), | |
132 | -#endif /* #if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) */ | |
133 | + TP_ARGS(preempt, prev, next), | |
134 | ||
135 | TP_FIELDS( | |
136 | ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN) | |
137 | ctf_integer(pid_t, prev_tid, prev->pid) | |
138 | ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO) | |
139 | -#if (LTTNG_LINUX_VERSION_CODE >= LTTNG_KERNEL_VERSION(4,4,0)) | |
140 | #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM | |
141 | ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(preempt, prev)) | |
142 | #else | |
143 | ctf_integer(long, prev_state, __trace_sched_switch_state(preempt, prev)) | |
144 | #endif | |
145 | + ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN) | |
146 | + ctf_integer(pid_t, next_tid, next->pid) | |
147 | + ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO) | |
148 | + ) | |
149 | +) | |
150 | + | |
151 | #else | |
152 | + | |
153 | +LTTNG_TRACEPOINT_EVENT(sched_switch, | |
154 | + | |
155 | + TP_PROTO(struct task_struct *prev, | |
156 | + struct task_struct *next), | |
157 | + | |
158 | + TP_ARGS(prev, next), | |
159 | + | |
160 | + TP_FIELDS( | |
161 | + ctf_array_text(char, prev_comm, prev->comm, TASK_COMM_LEN) | |
162 | + ctf_integer(pid_t, prev_tid, prev->pid) | |
163 | + ctf_integer(int, prev_prio, prev->prio - MAX_RT_PRIO) | |
164 | #ifdef CONFIG_LTTNG_EXPERIMENTAL_BITWISE_ENUM | |
165 | ctf_enum(task_state, long, prev_state, __trace_sched_switch_state(prev)) | |
166 | #else | |
167 | ctf_integer(long, prev_state, __trace_sched_switch_state(prev)) | |
168 | -#endif | |
169 | #endif | |
170 | ctf_array_text(char, next_comm, next->comm, TASK_COMM_LEN) | |
171 | ctf_integer(pid_t, next_tid, next->pid) | |
172 | ctf_integer(int, next_prio, next->prio - MAX_RT_PRIO) | |
173 | ) | |
174 | ) | |
175 | +#endif | |
176 | ||
177 | /* | |
178 | * Tracepoint for a task being migrated: | |
179 | -- | |
180 | 2.36.1 | |
181 |