From 9fd83755cbe3722f2d161803731dbc8a3c574035 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20R=C4=99korajski?= Date: Mon, 29 Jan 2018 21:21:13 +0100 Subject: [PATCH] - upstream fixes for kernel 4.15 - rel 3 --- kernel-4.15-atomics.patch | 568 ++++++++++++++++++++++++++++++++++++++ kernel-4.15-timers.patch | 328 ++++++++++++++++++++++ kmem-update.patch | 67 +++++ lttng-modules.spec | 10 +- sock-update.patch | 57 ++++ 5 files changed, 1029 insertions(+), 1 deletion(-) create mode 100644 kernel-4.15-atomics.patch create mode 100644 kernel-4.15-timers.patch create mode 100644 kmem-update.patch create mode 100644 sock-update.patch diff --git a/kernel-4.15-atomics.patch b/kernel-4.15-atomics.patch new file mode 100644 index 0000000..aa095d8 --- /dev/null +++ b/kernel-4.15-atomics.patch @@ -0,0 +1,568 @@ +From a8f2d0c75c9cc179fc9e7f7ca17ea3b3b3b5af41 Mon Sep 17 00:00:00 2001 +From: Michael Jeanson +Date: Tue, 19 Dec 2017 15:06:42 -0500 +Subject: [PATCH] Fix: ACCESS_ONCE() removed in kernel 4.15 + +The ACCESS_ONCE() macro was removed in kernel 4.15 and should be +replaced by READ_ONCE and WRITE_ONCE which were introduced in kernel +3.19. + +This commit replaces all calls to ACCESS_ONCE() with the appropriate +READ_ONCE or WRITE_ONCE and adds compatibility macros for kernels that +have them. + +See this upstream commit: + + commit b03a0fe0c5e4b46dcd400d27395b124499554a71 + Author: Paul E. McKenney + Date: Mon Oct 23 14:07:25 2017 -0700 + + locking/atomics, mm: Convert ACCESS_ONCE() to READ_ONCE()/WRITE_ONCE() + + For several reasons, it is desirable to use {READ,WRITE}_ONCE() in + preference to ACCESS_ONCE(), and new code is expected to use one of the + former. So far, there's been no reason to change most existing uses of + ACCESS_ONCE(), as these aren't currently harmful. + + However, for some features it is necessary to instrument reads and + writes separately, which is not possible with ACCESS_ONCE(). This + distinction is critical to correct operation. + + It's possible to transform the bulk of kernel code using the Coccinelle + script below. However, this doesn't handle comments, leaving references + to ACCESS_ONCE() instances which have been removed. As a preparatory + step, this patch converts the mm code and comments to use + {READ,WRITE}_ONCE() consistently. + + ---- + virtual patch + + @ depends on patch @ + expression E1, E2; + @@ + + - ACCESS_ONCE(E1) = E2 + + WRITE_ONCE(E1, E2) + + @ depends on patch @ + expression E; + @@ + + - ACCESS_ONCE(E) + + READ_ONCE(E) + ---- + +Signed-off-by: Michael Jeanson +Signed-off-by: Mathieu Desnoyers +--- + instrumentation/events/lttng-module/i2c.h | 4 ++-- + lib/ringbuffer/backend.h | 2 +- + lib/ringbuffer/backend_internal.h | 7 ++++--- + lib/ringbuffer/frontend.h | 2 +- + lib/ringbuffer/ring_buffer_frontend.c | 10 +++++----- + lib/ringbuffer/ring_buffer_iterator.c | 2 +- + lttng-clock.c | 6 +++--- + lttng-events.c | 30 +++++++++++++++--------------- + probes/lttng-ftrace.c | 12 ++++++------ + probes/lttng-kprobes.c | 6 +++--- + probes/lttng-kretprobes.c | 10 +++++----- + probes/lttng-tracepoint-event-impl.h | 12 ++++++------ + wrapper/compiler.h | 13 +++++++++++++ + wrapper/trace-clock.h | 11 ++++++----- + 14 files changed, 71 insertions(+), 56 deletions(-) + +diff --git a/instrumentation/events/lttng-module/i2c.h b/instrumentation/events/lttng-module/i2c.h +index 4088b60..dd91c9b 100644 +--- a/instrumentation/events/lttng-module/i2c.h ++++ b/instrumentation/events/lttng-module/i2c.h +@@ -22,7 +22,7 @@ LTTNG_TRACEPOINT_EVENT_CODE(i2c_write, + + TP_code_pre( + tp_locvar->extract_sensitive_payload = +- ACCESS_ONCE(extract_sensitive_payload); ++ READ_ONCE(extract_sensitive_payload); + ), + + TP_FIELDS( +@@ -77,7 +77,7 @@ LTTNG_TRACEPOINT_EVENT_CODE(i2c_reply, + + TP_code_pre( + tp_locvar->extract_sensitive_payload = +- ACCESS_ONCE(extract_sensitive_payload); ++ READ_ONCE(extract_sensitive_payload); + ), + + TP_FIELDS( +diff --git a/lib/ringbuffer/backend.h b/lib/ringbuffer/backend.h +index 9db0095..0b75de8 100644 +--- a/lib/ringbuffer/backend.h ++++ b/lib/ringbuffer/backend.h +@@ -169,7 +169,7 @@ size_t lib_ring_buffer_do_strcpy(const struct lib_ring_buffer_config *config, + * Only read source character once, in case it is + * modified concurrently. + */ +- c = ACCESS_ONCE(src[count]); ++ c = READ_ONCE(src[count]); + if (!c) + break; + lib_ring_buffer_do_copy(config, &dest[count], &c, 1); +diff --git a/lib/ringbuffer/backend_internal.h b/lib/ringbuffer/backend_internal.h +index 2e59b68..dc69ecf 100644 +--- a/lib/ringbuffer/backend_internal.h ++++ b/lib/ringbuffer/backend_internal.h +@@ -23,6 +23,7 @@ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + ++#include + #include + #include + #include +@@ -171,7 +172,7 @@ void subbuffer_id_set_noref_offset(const struct lib_ring_buffer_config *config, + tmp |= offset << SB_ID_OFFSET_SHIFT; + tmp |= SB_ID_NOREF_MASK; + /* Volatile store, read concurrently by readers. */ +- ACCESS_ONCE(*id) = tmp; ++ WRITE_ONCE(*id, tmp); + } + } + +@@ -379,7 +380,7 @@ void lib_ring_buffer_clear_noref(const struct lib_ring_buffer_config *config, + * Performing a volatile access to read the sb_pages, because we want to + * read a coherent version of the pointer and the associated noref flag. + */ +- id = ACCESS_ONCE(bufb->buf_wsb[idx].id); ++ id = READ_ONCE(bufb->buf_wsb[idx].id); + for (;;) { + /* This check is called on the fast path for each record. */ + if (likely(!subbuffer_id_is_noref(config, id))) { +@@ -448,7 +449,7 @@ int update_read_sb_index(const struct lib_ring_buffer_config *config, + if (config->mode == RING_BUFFER_OVERWRITE) { + /* + * Exchange the target writer subbuffer with our own unused +- * subbuffer. No need to use ACCESS_ONCE() here to read the ++ * subbuffer. No need to use READ_ONCE() here to read the + * old_wpage, because the value read will be confirmed by the + * following cmpxchg(). + */ +diff --git a/lib/ringbuffer/frontend.h b/lib/ringbuffer/frontend.h +index 909abc2..1450cb7 100644 +--- a/lib/ringbuffer/frontend.h ++++ b/lib/ringbuffer/frontend.h +@@ -168,7 +168,7 @@ static inline + int lib_ring_buffer_is_finalized(const struct lib_ring_buffer_config *config, + struct lib_ring_buffer *buf) + { +- int finalized = ACCESS_ONCE(buf->finalized); ++ int finalized = READ_ONCE(buf->finalized); + /* + * Read finalized before counters. + */ +diff --git a/lib/ringbuffer/ring_buffer_frontend.c b/lib/ringbuffer/ring_buffer_frontend.c +index abd9757..0d8279b 100644 +--- a/lib/ringbuffer/ring_buffer_frontend.c ++++ b/lib/ringbuffer/ring_buffer_frontend.c +@@ -983,7 +983,7 @@ void *channel_destroy(struct channel *chan) + * Perform flush before writing to finalized. + */ + smp_wmb(); +- ACCESS_ONCE(buf->finalized) = 1; ++ WRITE_ONCE(buf->finalized, 1); + wake_up_interruptible(&buf->read_wait); + } + } else { +@@ -997,10 +997,10 @@ void *channel_destroy(struct channel *chan) + * Perform flush before writing to finalized. + */ + smp_wmb(); +- ACCESS_ONCE(buf->finalized) = 1; ++ WRITE_ONCE(buf->finalized, 1); + wake_up_interruptible(&buf->read_wait); + } +- ACCESS_ONCE(chan->finalized) = 1; ++ WRITE_ONCE(chan->finalized, 1); + wake_up_interruptible(&chan->hp_wait); + wake_up_interruptible(&chan->read_wait); + priv = chan->backend.priv; +@@ -1077,7 +1077,7 @@ int lib_ring_buffer_snapshot(struct lib_ring_buffer *buf, + int finalized; + + retry: +- finalized = ACCESS_ONCE(buf->finalized); ++ finalized = READ_ONCE(buf->finalized); + /* + * Read finalized before counters. + */ +@@ -1248,7 +1248,7 @@ int lib_ring_buffer_get_subbuf(struct lib_ring_buffer *buf, + return -EBUSY; + } + retry: +- finalized = ACCESS_ONCE(buf->finalized); ++ finalized = READ_ONCE(buf->finalized); + /* + * Read finalized before counters. + */ +diff --git a/lib/ringbuffer/ring_buffer_iterator.c b/lib/ringbuffer/ring_buffer_iterator.c +index b6bec48..61eaa5b 100644 +--- a/lib/ringbuffer/ring_buffer_iterator.c ++++ b/lib/ringbuffer/ring_buffer_iterator.c +@@ -61,7 +61,7 @@ ssize_t lib_ring_buffer_get_next_record(struct channel *chan, + switch (iter->state) { + case ITER_GET_SUBBUF: + ret = lib_ring_buffer_get_next_subbuf(buf); +- if (ret && !ACCESS_ONCE(buf->finalized) ++ if (ret && !READ_ONCE(buf->finalized) + && config->alloc == RING_BUFFER_ALLOC_GLOBAL) { + /* + * Use "pull" scheme for global buffers. The reader +diff --git a/lttng-clock.c b/lttng-clock.c +index a5a7eaa..48b4be5 100644 +--- a/lttng-clock.c ++++ b/lttng-clock.c +@@ -48,7 +48,7 @@ int lttng_clock_register_plugin(struct lttng_trace_clock *ltc, + goto end; + } + /* set clock */ +- ACCESS_ONCE(lttng_trace_clock) = ltc; ++ WRITE_ONCE(lttng_trace_clock, ltc); + lttng_trace_clock_mod = mod; + end: + mutex_unlock(&clock_mutex); +@@ -66,7 +66,7 @@ void lttng_clock_unregister_plugin(struct lttng_trace_clock *ltc, + } + WARN_ON_ONCE(lttng_trace_clock_mod != mod); + +- ACCESS_ONCE(lttng_trace_clock) = NULL; ++ WRITE_ONCE(lttng_trace_clock, NULL); + lttng_trace_clock_mod = NULL; + end: + mutex_unlock(&clock_mutex); +@@ -83,7 +83,7 @@ void lttng_clock_ref(void) + ret = try_module_get(lttng_trace_clock_mod); + if (!ret) { + printk(KERN_ERR "LTTng-clock cannot get clock plugin module\n"); +- ACCESS_ONCE(lttng_trace_clock) = NULL; ++ WRITE_ONCE(lttng_trace_clock, NULL); + lttng_trace_clock_mod = NULL; + } + } +diff --git a/lttng-events.c b/lttng-events.c +index 21c4113..75c3fb1 100644 +--- a/lttng-events.c ++++ b/lttng-events.c +@@ -186,7 +186,7 @@ void lttng_session_destroy(struct lttng_session *session) + int ret; + + mutex_lock(&sessions_mutex); +- ACCESS_ONCE(session->active) = 0; ++ WRITE_ONCE(session->active, 0); + list_for_each_entry(chan, &session->chan, list) { + ret = lttng_syscalls_unregister(chan); + WARN_ON(ret); +@@ -261,16 +261,16 @@ int lttng_session_enable(struct lttng_session *session) + lib_ring_buffer_clear_quiescent_channel(chan->chan); + } + +- ACCESS_ONCE(session->active) = 1; +- ACCESS_ONCE(session->been_active) = 1; ++ WRITE_ONCE(session->active, 1); ++ WRITE_ONCE(session->been_active, 1); + ret = _lttng_session_metadata_statedump(session); + if (ret) { +- ACCESS_ONCE(session->active) = 0; ++ WRITE_ONCE(session->active, 0); + goto end; + } + ret = lttng_statedump_start(session); + if (ret) +- ACCESS_ONCE(session->active) = 0; ++ WRITE_ONCE(session->active, 0); + end: + mutex_unlock(&sessions_mutex); + return ret; +@@ -286,7 +286,7 @@ int lttng_session_disable(struct lttng_session *session) + ret = -EBUSY; + goto end; + } +- ACCESS_ONCE(session->active) = 0; ++ WRITE_ONCE(session->active, 0); + + /* Set transient enabler state to "disabled" */ + session->tstate = 0; +@@ -361,7 +361,7 @@ int lttng_channel_enable(struct lttng_channel *channel) + channel->tstate = 1; + lttng_session_sync_enablers(channel->session); + /* Set atomically the state to "enabled" */ +- ACCESS_ONCE(channel->enabled) = 1; ++ WRITE_ONCE(channel->enabled, 1); + end: + mutex_unlock(&sessions_mutex); + return ret; +@@ -381,7 +381,7 @@ int lttng_channel_disable(struct lttng_channel *channel) + goto end; + } + /* Set atomically the state to "disabled" */ +- ACCESS_ONCE(channel->enabled) = 0; ++ WRITE_ONCE(channel->enabled, 0); + /* Set transient enabler state to "enabled" */ + channel->tstate = 0; + lttng_session_sync_enablers(channel->session); +@@ -411,7 +411,7 @@ int lttng_event_enable(struct lttng_event *event) + case LTTNG_KERNEL_KPROBE: + case LTTNG_KERNEL_FUNCTION: + case LTTNG_KERNEL_NOOP: +- ACCESS_ONCE(event->enabled) = 1; ++ WRITE_ONCE(event->enabled, 1); + break; + case LTTNG_KERNEL_KRETPROBE: + ret = lttng_kretprobes_event_enable_state(event, 1); +@@ -446,7 +446,7 @@ int lttng_event_disable(struct lttng_event *event) + case LTTNG_KERNEL_KPROBE: + case LTTNG_KERNEL_FUNCTION: + case LTTNG_KERNEL_NOOP: +- ACCESS_ONCE(event->enabled) = 0; ++ WRITE_ONCE(event->enabled, 0); + break; + case LTTNG_KERNEL_KRETPROBE: + ret = lttng_kretprobes_event_enable_state(event, 0); +@@ -1517,7 +1517,7 @@ void lttng_session_sync_enablers(struct lttng_session *session) + */ + enabled = enabled && session->tstate && event->chan->tstate; + +- ACCESS_ONCE(event->enabled) = enabled; ++ WRITE_ONCE(event->enabled, enabled); + /* + * Sync tracepoint registration with event enabled + * state. +@@ -1643,7 +1643,7 @@ int lttng_metadata_printf(struct lttng_session *session, + va_list ap; + struct lttng_metadata_stream *stream; + +- WARN_ON_ONCE(!ACCESS_ONCE(session->active)); ++ WARN_ON_ONCE(!READ_ONCE(session->active)); + + va_start(ap, fmt); + str = kvasprintf(GFP_KERNEL, fmt, ap); +@@ -2230,7 +2230,7 @@ int _lttng_event_metadata_statedump(struct lttng_session *session, + { + int ret = 0; + +- if (event->metadata_dumped || !ACCESS_ONCE(session->active)) ++ if (event->metadata_dumped || !READ_ONCE(session->active)) + return 0; + if (chan->channel_type == METADATA_CHANNEL) + return 0; +@@ -2297,7 +2297,7 @@ int _lttng_channel_metadata_statedump(struct lttng_session *session, + { + int ret = 0; + +- if (chan->metadata_dumped || !ACCESS_ONCE(session->active)) ++ if (chan->metadata_dumped || !READ_ONCE(session->active)) + return 0; + + if (chan->channel_type == METADATA_CHANNEL) +@@ -2454,7 +2454,7 @@ int _lttng_session_metadata_statedump(struct lttng_session *session) + struct lttng_event *event; + int ret = 0; + +- if (!ACCESS_ONCE(session->active)) ++ if (!READ_ONCE(session->active)) + return 0; + if (session->metadata_dumped) + goto skip_session; +diff --git a/probes/lttng-ftrace.c b/probes/lttng-ftrace.c +index 9ec326e..50675a4 100644 +--- a/probes/lttng-ftrace.c ++++ b/probes/lttng-ftrace.c +@@ -58,11 +58,11 @@ void lttng_ftrace_handler(unsigned long ip, unsigned long parent_ip, + } payload; + int ret; + +- if (unlikely(!ACCESS_ONCE(chan->session->active))) ++ if (unlikely(!READ_ONCE(chan->session->active))) + return; +- if (unlikely(!ACCESS_ONCE(chan->enabled))) ++ if (unlikely(!READ_ONCE(chan->enabled))) + return; +- if (unlikely(!ACCESS_ONCE(event->enabled))) ++ if (unlikely(!READ_ONCE(event->enabled))) + return; + + lib_ring_buffer_ctx_init(&ctx, chan->chan, <tng_probe_ctx, +@@ -94,11 +94,11 @@ void lttng_ftrace_handler(unsigned long ip, unsigned long parent_ip, void **data + } payload; + int ret; + +- if (unlikely(!ACCESS_ONCE(chan->session->active))) ++ if (unlikely(!READ_ONCE(chan->session->active))) + return; +- if (unlikely(!ACCESS_ONCE(chan->enabled))) ++ if (unlikely(!READ_ONCE(chan->enabled))) + return; +- if (unlikely(!ACCESS_ONCE(event->enabled))) ++ if (unlikely(!READ_ONCE(event->enabled))) + return; + + lib_ring_buffer_ctx_init(&ctx, chan->chan, <tng_probe_ctx, +diff --git a/probes/lttng-kprobes.c b/probes/lttng-kprobes.c +index daf14ce..b58a09b 100644 +--- a/probes/lttng-kprobes.c ++++ b/probes/lttng-kprobes.c +@@ -43,11 +43,11 @@ int lttng_kprobes_handler_pre(struct kprobe *p, struct pt_regs *regs) + int ret; + unsigned long data = (unsigned long) p->addr; + +- if (unlikely(!ACCESS_ONCE(chan->session->active))) ++ if (unlikely(!READ_ONCE(chan->session->active))) + return 0; +- if (unlikely(!ACCESS_ONCE(chan->enabled))) ++ if (unlikely(!READ_ONCE(chan->enabled))) + return 0; +- if (unlikely(!ACCESS_ONCE(event->enabled))) ++ if (unlikely(!READ_ONCE(event->enabled))) + return 0; + + lib_ring_buffer_ctx_init(&ctx, chan->chan, <tng_probe_ctx, sizeof(data), +diff --git a/probes/lttng-kretprobes.c b/probes/lttng-kretprobes.c +index 498df62..49b7de8 100644 +--- a/probes/lttng-kretprobes.c ++++ b/probes/lttng-kretprobes.c +@@ -63,11 +63,11 @@ int _lttng_kretprobes_handler(struct kretprobe_instance *krpi, + unsigned long parent_ip; + } payload; + +- if (unlikely(!ACCESS_ONCE(chan->session->active))) ++ if (unlikely(!READ_ONCE(chan->session->active))) + return 0; +- if (unlikely(!ACCESS_ONCE(chan->enabled))) ++ if (unlikely(!READ_ONCE(chan->enabled))) + return 0; +- if (unlikely(!ACCESS_ONCE(event->enabled))) ++ if (unlikely(!READ_ONCE(event->enabled))) + return 0; + + payload.ip = (unsigned long) krpi->rp->kp.addr; +@@ -304,8 +304,8 @@ int lttng_kretprobes_event_enable_state(struct lttng_event *event, + } + lttng_krp = event->u.kretprobe.lttng_krp; + event_return = lttng_krp->event[EVENT_RETURN]; +- ACCESS_ONCE(event->enabled) = enable; +- ACCESS_ONCE(event_return->enabled) = enable; ++ WRITE_ONCE(event->enabled, enable); ++ WRITE_ONCE(event_return->enabled, enable); + return 0; + } + EXPORT_SYMBOL_GPL(lttng_kretprobes_event_enable_state); +diff --git a/probes/lttng-tracepoint-event-impl.h b/probes/lttng-tracepoint-event-impl.h +index 7ec0d75..61f1c2d 100644 +--- a/probes/lttng-tracepoint-event-impl.h ++++ b/probes/lttng-tracepoint-event-impl.h +@@ -1143,11 +1143,11 @@ static void __event_probe__##_name(void *__data, _proto) \ + \ + if (!_TP_SESSION_CHECK(session, __session)) \ + return; \ +- if (unlikely(!ACCESS_ONCE(__session->active))) \ ++ if (unlikely(!READ_ONCE(__session->active))) \ + return; \ +- if (unlikely(!ACCESS_ONCE(__chan->enabled))) \ ++ if (unlikely(!READ_ONCE(__chan->enabled))) \ + return; \ +- if (unlikely(!ACCESS_ONCE(__event->enabled))) \ ++ if (unlikely(!READ_ONCE(__event->enabled))) \ + return; \ + __lpf = lttng_rcu_dereference(__session->pid_tracker); \ + if (__lpf && likely(!lttng_pid_tracker_lookup(__lpf, current->tgid))) \ +@@ -1217,11 +1217,11 @@ static void __event_probe__##_name(void *__data) \ + \ + if (!_TP_SESSION_CHECK(session, __session)) \ + return; \ +- if (unlikely(!ACCESS_ONCE(__session->active))) \ ++ if (unlikely(!READ_ONCE(__session->active))) \ + return; \ +- if (unlikely(!ACCESS_ONCE(__chan->enabled))) \ ++ if (unlikely(!READ_ONCE(__chan->enabled))) \ + return; \ +- if (unlikely(!ACCESS_ONCE(__event->enabled))) \ ++ if (unlikely(!READ_ONCE(__event->enabled))) \ + return; \ + __lpf = lttng_rcu_dereference(__session->pid_tracker); \ + if (__lpf && likely(!lttng_pid_tracker_lookup(__lpf, current->pid))) \ +diff --git a/wrapper/compiler.h b/wrapper/compiler.h +index 0c01632..e4533d4 100644 +--- a/wrapper/compiler.h ++++ b/wrapper/compiler.h +@@ -39,4 +39,17 @@ + # endif + #endif + ++/* ++ * READ/WRITE_ONCE were introduced in kernel 3.19 and ACCESS_ONCE ++ * was removed in 4.15. Prefer READ/WRITE but fallback to ACCESS ++ * when they are not available. ++ */ ++#ifndef READ_ONCE ++# define READ_ONCE(x) ACCESS_ONCE(x) ++#endif ++ ++#ifndef WRITE_ONCE ++# define WRITE_ONCE(x, val) ({ ACCESS_ONCE(x) = val; }) ++#endif ++ + #endif /* _LTTNG_WRAPPER_COMPILER_H */ +diff --git a/wrapper/trace-clock.h b/wrapper/trace-clock.h +index 7f17ccd..08f9922 100644 +--- a/wrapper/trace-clock.h ++++ b/wrapper/trace-clock.h +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -176,7 +177,7 @@ static inline void put_trace_clock(void) + + static inline u64 trace_clock_read64(void) + { +- struct lttng_trace_clock *ltc = ACCESS_ONCE(lttng_trace_clock); ++ struct lttng_trace_clock *ltc = READ_ONCE(lttng_trace_clock); + + if (likely(!ltc)) { + return trace_clock_read64_monotonic(); +@@ -188,7 +189,7 @@ static inline u64 trace_clock_read64(void) + + static inline u64 trace_clock_freq(void) + { +- struct lttng_trace_clock *ltc = ACCESS_ONCE(lttng_trace_clock); ++ struct lttng_trace_clock *ltc = READ_ONCE(lttng_trace_clock); + + if (!ltc) { + return trace_clock_freq_monotonic(); +@@ -200,7 +201,7 @@ static inline u64 trace_clock_freq(void) + + static inline int trace_clock_uuid(char *uuid) + { +- struct lttng_trace_clock *ltc = ACCESS_ONCE(lttng_trace_clock); ++ struct lttng_trace_clock *ltc = READ_ONCE(lttng_trace_clock); + + read_barrier_depends(); /* load ltc before content */ + /* Use default UUID cb when NULL */ +@@ -213,7 +214,7 @@ static inline int trace_clock_uuid(char *uuid) + + static inline const char *trace_clock_name(void) + { +- struct lttng_trace_clock *ltc = ACCESS_ONCE(lttng_trace_clock); ++ struct lttng_trace_clock *ltc = READ_ONCE(lttng_trace_clock); + + if (!ltc) { + return trace_clock_name_monotonic(); +@@ -225,7 +226,7 @@ static inline const char *trace_clock_name(void) + + static inline const char *trace_clock_description(void) + { +- struct lttng_trace_clock *ltc = ACCESS_ONCE(lttng_trace_clock); ++ struct lttng_trace_clock *ltc = READ_ONCE(lttng_trace_clock); + + if (!ltc) { + return trace_clock_description_monotonic(); diff --git a/kernel-4.15-timers.patch b/kernel-4.15-timers.patch new file mode 100644 index 0000000..0e412ec --- /dev/null +++ b/kernel-4.15-timers.patch @@ -0,0 +1,328 @@ +From 1fd97f9f4f773e3e9dfc787e9c90b1418fa5a7d4 Mon Sep 17 00:00:00 2001 +From: Michael Jeanson +Date: Wed, 29 Nov 2017 17:03:21 -0500 +Subject: [PATCH] timer API transition for kernel 4.15 + +The timer API changes starting from kernel 4.15.0. + +There's an interresting LWN article on this subject: + + https://lwn.net/Articles/735887/ + +Check these upstream commits for more details: + + commit 686fef928bba6be13cabe639f154af7d72b63120 + Author: Kees Cook + Date: Thu Sep 28 06:38:17 2017 -0700 + + timer: Prepare to change timer callback argument type + + Modern kernel callback systems pass the structure associated with a + given callback to the callback function. The timer callback remains one + of the legacy cases where an arbitrary unsigned long argument continues + to be passed as the callback argument. This has several problems: + + - This bloats the timer_list structure with a normally redundant + .data field. + + - No type checking is being performed, forcing callbacks to do + explicit type casts of the unsigned long argument into the object + that was passed, rather than using container_of(), as done in most + of the other callback infrastructure. + + - Neighboring buffer overflows can overwrite both the .function and + the .data field, providing attackers with a way to elevate from a buffer + overflow into a simplistic ROP-like mechanism that allows calling + arbitrary functions with a controlled first argument. + + - For future Control Flow Integrity work, this creates a unique function + prototype for timer callbacks, instead of allowing them to continue to + be clustered with other void functions that take a single unsigned long + argument. + + This adds a new timer initialization API, which will ultimately replace + the existing setup_timer(), setup_{deferrable,pinned,etc}_timer() family, + named timer_setup() (to mirror hrtimer_setup(), making instances of its + use much easier to grep for). + + In order to support the migration of existing timers into the new + callback arguments, timer_setup() casts its arguments to the existing + legacy types, and explicitly passes the timer pointer as the legacy + data argument. Once all setup_*timer() callers have been replaced with + timer_setup(), the casts can be removed, and the data argument can be + dropped with the timer expiration code changed to just pass the timer + to the callback directly. + +: + Modern kernel callback systems pass the structure associated with a + given callback to the callback function. The timer callback remains one + of the legacy cases where an arbitrary unsigned long argument continues + to be passed as the callback argument. This has several problems: + + - This bloats the timer_list structure with a normally redundant + .data field. + + - No type checking is being performed, forcing callbacks to do + explicit type casts of the unsigned long argument into the object + that was passed, rather than using container_of(), as done in most + of the other callback infrastructure. + + - Neighboring buffer overflows can overwrite both the .function and + the .data field, providing attackers with a way to elevate from a buffer + overflow into a simplistic ROP-like mechanism that allows calling + arbitrary functions with a controlled first argument. + + - For future Control Flow Integrity work, this creates a unique function + prototype for timer callbacks, instead of allowing them to continue to + be clustered with other void functions that take a single unsigned long + argument. + + This adds a new timer initialization API, which will ultimately replace + the existing setup_timer(), setup_{deferrable,pinned,etc}_timer() family, + named timer_setup() (to mirror hrtimer_setup(), making instances of its + use much easier to grep for). + + In order to support the migration of existing timers into the new + callback arguments, timer_setup() casts its arguments to the existing + legacy types, and explicitly passes the timer pointer as the legacy + data argument. Once all setup_*timer() callers have been replaced with + timer_setup(), the casts can be removed, and the data argument can be + dropped with the timer expiration code changed to just pass the timer + to the callback directly. + + Since the regular pattern of using container_of() during local variable + declaration repeats the need for the variable type declaration + to be included, this adds a helper modeled after other from_*() + helpers that wrap container_of(), named from_timer(). This helper uses + typeof(*variable), removing the type redundancy and minimizing the need + for line wraps in forthcoming conversions from "unsigned data long" to + "struct timer_list *" in the timer callbacks: + + -void callback(unsigned long data) + +void callback(struct timer_list *t) + { + - struct some_data_structure *local = (struct some_data_structure *)data; + + struct some_data_structure *local = from_timer(local, t, timer); + + Finally, in order to support the handful of timer users that perform + open-coded assignments of the .function (and .data) fields, provide + cast macros (TIMER_FUNC_TYPE and TIMER_DATA_TYPE) that can be used + temporarily. Once conversion has been completed, these can be globally + trivially removed. + + ... + + commit e99e88a9d2b067465adaa9c111ada99a041bef9a + Author: Kees Cook + Date: Mon Oct 16 14:43:17 2017 -0700 + + treewide: setup_timer() -> timer_setup() + + This converts all remaining cases of the old setup_timer() API into using + timer_setup(), where the callback argument is the structure already + holding the struct timer_list. These should have no behavioral changes, + since they just change which pointer is passed into the callback with + the same available pointers after conversion. It handles the following + examples, in addition to some other variations. + + ... + + commit 185981d54a60ae90942c6ba9006b250f3348cef2 + Author: Kees Cook + Date: Wed Oct 4 16:26:58 2017 -0700 + + timer: Remove init_timer_pinned() in favor of timer_setup() + + This refactors the only users of init_timer_pinned() to use + the new timer_setup() and from_timer(). Drops the definition of + init_timer_pinned(). + + ... + +Signed-off-by: Michael Jeanson +Signed-off-by: Mathieu Desnoyers +--- + lib/ringbuffer/ring_buffer_frontend.c | 27 +++++++------ + wrapper/timer.h | 72 ++++++++++++++++++++++++++++------- + 2 files changed, 72 insertions(+), 27 deletions(-) + +diff --git a/lib/ringbuffer/ring_buffer_frontend.c b/lib/ringbuffer/ring_buffer_frontend.c +index bdd31ad..abd9757 100644 +--- a/lib/ringbuffer/ring_buffer_frontend.c ++++ b/lib/ringbuffer/ring_buffer_frontend.c +@@ -314,9 +314,9 @@ int lib_ring_buffer_create(struct lib_ring_buffer *buf, + return ret; + } + +-static void switch_buffer_timer(unsigned long data) ++static void switch_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t) + { +- struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data; ++ struct lib_ring_buffer *buf = lttng_from_timer(buf, t, switch_timer); + struct channel *chan = buf->backend.chan; + const struct lib_ring_buffer_config *config = &chan->backend.config; + +@@ -341,22 +341,22 @@ static void lib_ring_buffer_start_switch_timer(struct lib_ring_buffer *buf) + { + struct channel *chan = buf->backend.chan; + const struct lib_ring_buffer_config *config = &chan->backend.config; ++ unsigned int flags = 0; + + if (!chan->switch_timer_interval || buf->switch_timer_enabled) + return; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) +- lttng_init_timer_pinned(&buf->switch_timer); +- else +- init_timer(&buf->switch_timer); ++ flags = LTTNG_TIMER_PINNED; + +- buf->switch_timer.function = switch_buffer_timer; ++ lttng_timer_setup(&buf->switch_timer, switch_buffer_timer, flags, buf); + buf->switch_timer.expires = jiffies + chan->switch_timer_interval; +- buf->switch_timer.data = (unsigned long)buf; ++ + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + add_timer_on(&buf->switch_timer, buf->backend.cpu); + else + add_timer(&buf->switch_timer); ++ + buf->switch_timer_enabled = 1; + } + +@@ -377,9 +377,9 @@ static void lib_ring_buffer_stop_switch_timer(struct lib_ring_buffer *buf) + /* + * Polling timer to check the channels for data. + */ +-static void read_buffer_timer(unsigned long data) ++static void read_buffer_timer(LTTNG_TIMER_FUNC_ARG_TYPE t) + { +- struct lib_ring_buffer *buf = (struct lib_ring_buffer *)data; ++ struct lib_ring_buffer *buf = lttng_from_timer(buf, t, read_timer); + struct channel *chan = buf->backend.chan; + const struct lib_ring_buffer_config *config = &chan->backend.config; + +@@ -406,6 +406,7 @@ static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf) + { + struct channel *chan = buf->backend.chan; + const struct lib_ring_buffer_config *config = &chan->backend.config; ++ unsigned int flags; + + if (config->wakeup != RING_BUFFER_WAKEUP_BY_TIMER + || !chan->read_timer_interval +@@ -413,18 +414,16 @@ static void lib_ring_buffer_start_read_timer(struct lib_ring_buffer *buf) + return; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) +- lttng_init_timer_pinned(&buf->read_timer); +- else +- init_timer(&buf->read_timer); ++ flags = LTTNG_TIMER_PINNED; + +- buf->read_timer.function = read_buffer_timer; ++ lttng_timer_setup(&buf->read_timer, read_buffer_timer, flags, buf); + buf->read_timer.expires = jiffies + chan->read_timer_interval; +- buf->read_timer.data = (unsigned long)buf; + + if (config->alloc == RING_BUFFER_ALLOC_PER_CPU) + add_timer_on(&buf->read_timer, buf->backend.cpu); + else + add_timer(&buf->read_timer); ++ + buf->read_timer_enabled = 1; + } + +diff --git a/wrapper/timer.h b/wrapper/timer.h +index c1c0c95..4fc9828 100644 +--- a/wrapper/timer.h ++++ b/wrapper/timer.h +@@ -27,30 +27,76 @@ + #include + #include + ++/* ++ * In the olden days, pinned timers were initialized normaly with init_timer() ++ * and then modified with mod_timer_pinned(). ++ * ++ * Then came kernel 4.8.0 and they had to be initilized as pinned with ++ * init_timer_pinned() and then modified as regular timers with mod_timer(). ++ * ++ * Then came kernel 4.15.0 with a new timer API where init_timer() is no more. ++ * It's replaced by timer_setup() where pinned is now part of timer flags. ++ */ ++ ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) ++ ++#define LTTNG_TIMER_PINNED TIMER_PINNED ++#define LTTNG_TIMER_FUNC_ARG_TYPE struct timer_list * ++ ++#define lttng_mod_timer_pinned(timer, expires) \ ++ mod_timer(timer, expires) ++ ++#define lttng_from_timer(var, callback_timer, timer_fieldname) \ ++ from_timer(var, callback_timer, timer_fieldname) ++ ++#define lttng_timer_setup(timer, callback, flags, unused) \ ++ timer_setup(timer, callback, flags) ++ + +-#if (LTTNG_RT_VERSION_CODE >= LTTNG_RT_KERNEL_VERSION(4,6,4,8) \ ++#else /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0) */ ++ ++ ++# if (LTTNG_RT_VERSION_CODE >= LTTNG_RT_KERNEL_VERSION(4,6,4,8) \ + || LINUX_VERSION_CODE >= KERNEL_VERSION(4,8,0)) + +-#define lttng_init_timer_pinned(timer) \ ++#define lttng_init_timer_pinned(timer) \ + init_timer_pinned(timer) + +-static inline int lttng_mod_timer_pinned(struct timer_list *timer, +- unsigned long expires) +-{ +- return mod_timer(timer, expires); +-} ++#define lttng_mod_timer_pinned(timer, expires) \ ++ mod_timer(timer, expires) + +-#else ++# else /* LTTNG_RT_VERSION_CODE >= LTTNG_RT_KERNEL_VERSION(4,6,4,8) */ + +-#define lttng_init_timer_pinned(timer) \ ++#define lttng_init_timer_pinned(timer) \ + init_timer(timer) + +-static inline int lttng_mod_timer_pinned(struct timer_list *timer, +- unsigned long expires) ++#define lttng_mod_timer_pinned(timer, expires) \ ++ mod_timer_pinned(timer, expires) ++ ++# endif /* LTTNG_RT_VERSION_CODE >= LTTNG_RT_KERNEL_VERSION(4,6,4,8) */ ++ ++ ++#define LTTNG_TIMER_PINNED TIMER_PINNED ++#define LTTNG_TIMER_FUNC_ARG_TYPE unsigned long ++ ++/* timer_fieldname is unused prior to 4.15. */ ++#define lttng_from_timer(var, timer_data, timer_fieldname) \ ++ ((typeof(var))timer_data) ++ ++static inline void lttng_timer_setup(struct timer_list *timer, ++ void (*function)(LTTNG_TIMER_FUNC_ARG_TYPE), ++ unsigned int flags, void *data) + { +- return mod_timer_pinned(timer, expires); ++ if (flags & LTTNG_TIMER_PINNED) ++ lttng_init_timer_pinned(timer); ++ else ++ init_timer(timer); ++ ++ timer->function = function; ++ timer->data = (unsigned long)data; + } + +-#endif ++#endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0) */ + + #endif /* _LTTNG_WRAPPER_TIMER_H */ diff --git a/kmem-update.patch b/kmem-update.patch new file mode 100644 index 0000000..0465b2f --- /dev/null +++ b/kmem-update.patch @@ -0,0 +1,67 @@ +From 071c76b533ed92a4107cd162826a08a18b9ff91c Mon Sep 17 00:00:00 2001 +From: Michael Jeanson +Date: Tue, 28 Nov 2017 16:02:45 -0500 +Subject: [PATCH] Fix: update kmem instrumentation for kernel 4.15 + +See upstream commit: + + commit 2d4894b5d2ae0fe1725ea7abd57b33bfbbe45492 + Author: Mel Gorman + Date: Wed Nov 15 17:37:59 2017 -0800 + + mm: remove cold parameter from free_hot_cold_page* + +Signed-off-by: Michael Jeanson +Signed-off-by: Mathieu Desnoyers +--- + instrumentation/events/lttng-module/kmem.h | 27 +++++++++++++++++++++++++-- + 1 file changed, 25 insertions(+), 2 deletions(-) + +diff --git a/instrumentation/events/lttng-module/kmem.h b/instrumentation/events/lttng-module/kmem.h +index ad7bf77..c3fa25a 100644 +--- a/instrumentation/events/lttng-module/kmem.h ++++ b/instrumentation/events/lttng-module/kmem.h +@@ -132,11 +132,33 @@ LTTNG_TRACEPOINT_EVENT_MAP(mm_page_free_direct, kmem_mm_page_free_direct, + ) + ) + +-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)) ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) ++LTTNG_TRACEPOINT_EVENT_MAP(mm_page_free_batched, kmem_mm_page_free_batched, ++ ++ TP_PROTO(struct page *page), ++ ++ TP_ARGS(page), ++ ++ TP_FIELDS( ++ ctf_integer_hex(struct page *, page, page) ++ ctf_integer(unsigned long, pfn, page_to_pfn(page)) ++ ) ++) ++#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(3,3,0)) + LTTNG_TRACEPOINT_EVENT_MAP(mm_page_free_batched, kmem_mm_page_free_batched, ++ ++ TP_PROTO(struct page *page, int cold), ++ ++ TP_ARGS(page, cold), ++ ++ TP_FIELDS( ++ ctf_integer_hex(struct page *, page, page) ++ ctf_integer(unsigned long, pfn, page_to_pfn(page)) ++ ctf_integer(int, cold, cold) ++ ) ++) + #else + LTTNG_TRACEPOINT_EVENT_MAP(mm_pagevec_free, kmem_pagevec_free, +-#endif + + TP_PROTO(struct page *page, int cold), + +@@ -148,6 +170,7 @@ LTTNG_TRACEPOINT_EVENT_MAP(mm_pagevec_free, kmem_pagevec_free, + ctf_integer(int, cold, cold) + ) + ) ++#endif + + LTTNG_TRACEPOINT_EVENT_MAP(mm_page_alloc, kmem_mm_page_alloc, + diff --git a/lttng-modules.spec b/lttng-modules.spec index 65a7cb9..dac967b 100644 --- a/lttng-modules.spec +++ b/lttng-modules.spec @@ -7,7 +7,7 @@ # nothing to be placed to debuginfo package %define _enable_debug_packages 0 -%define rel 2 +%define rel 3 %define pname lttng-modules Summary: LTTng 2.x kernel modules Summary(pl.UTF-8): Moduły jądra LTTng 2.x @@ -20,6 +20,10 @@ Source0: http://lttng.org/files/lttng-modules/%{pname}-%{version}.tar.bz2 # Source0-md5: 832452b321a4df6836549e72d05b2ce9 Patch0: build.patch Patch1: kvm-update.patch +Patch2: kmem-update.patch +Patch3: kernel-4.15-timers.patch +Patch4: kernel-4.15-atomics.patch +Patch5: sock-update.patch URL: http://lttng.org/ %{expand:%buildrequires_kernel kernel%%{_alt_kernel}-module-build >= 3:2.6.38} %{?with_kernelsrc:%{expand:%buildrequires_kernel kernel%%{_alt_kernel}-source >= 3:2.6.38}} @@ -90,6 +94,10 @@ p=`pwd`\ %setup -q -n %{pname}-%{version} %patch0 -p1 %patch1 -p1 +%patch2 -p1 +%patch3 -p1 +%patch4 -p1 +%patch5 -p1 %build %{expand:%build_kernel_packages} diff --git a/sock-update.patch b/sock-update.patch new file mode 100644 index 0000000..668f4ff --- /dev/null +++ b/sock-update.patch @@ -0,0 +1,57 @@ +From f279893a432a5c5c5d4488f65a0719181a06da56 Mon Sep 17 00:00:00 2001 +From: Mathieu Desnoyers +Date: Tue, 26 Dec 2017 09:47:36 -0500 +Subject: [PATCH] Update sock instrumentation for 4.15 + +Signed-off-by: Mathieu Desnoyers +--- + instrumentation/events/lttng-module/sock.h | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/instrumentation/events/lttng-module/sock.h b/instrumentation/events/lttng-module/sock.h +index e79f8dc..5cd02ca 100644 +--- a/instrumentation/events/lttng-module/sock.h ++++ b/instrumentation/events/lttng-module/sock.h +@@ -5,6 +5,7 @@ + #define LTTNG_TRACE_SOCK_H + + #include ++#include + #include + + LTTNG_TRACEPOINT_EVENT(sock_rcvqueue_full, +@@ -20,6 +21,25 @@ LTTNG_TRACEPOINT_EVENT(sock_rcvqueue_full, + ) + ) + ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) ++ ++LTTNG_TRACEPOINT_EVENT(sock_exceed_buf_limit, ++ ++ TP_PROTO(struct sock *sk, struct proto *prot, long allocated), ++ ++ TP_ARGS(sk, prot, allocated), ++ ++ TP_FIELDS( ++ ctf_string(name, prot->name) ++ ctf_array(long, sysctl_mem, prot->sysctl_mem, 3) ++ ctf_integer(long, allocated, allocated) ++ ctf_integer(int, sysctl_rmem, sk_get_rmem0(sk, prot)) ++ ctf_integer(int, rmem_alloc, atomic_read(&sk->sk_rmem_alloc)) ++ ) ++) ++ ++#else /* #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) */ ++ + LTTNG_TRACEPOINT_EVENT(sock_exceed_buf_limit, + + TP_PROTO(struct sock *sk, struct proto *prot, long allocated), +@@ -35,6 +55,8 @@ LTTNG_TRACEPOINT_EVENT(sock_exceed_buf_limit, + ) + ) + ++#endif /* #else #if (LINUX_VERSION_CODE >= KERNEL_VERSION(4,15,0)) */ ++ + #endif /* LTTNG_TRACE_SOCK_H */ + + /* This part must be outside protection */ -- 2.44.0