]>
Commit | Line | Data |
---|---|---|
a5affa61 ER |
1 | https://resin.io/blog/building-arm-containers-on-any-x86-machine-even-dockerhub/ |
2 | https://github.com/resin-io/qemu/commit/782e5bb77014ff136f7bb6133a911e5f53e914a7 | |
3 | ||
102e18f7 ER |
4 | https://github.com/resin-io/qemu/commit/782e5bb77014ff136f7bb6133a911e5f53e914a7#commitcomment-17193923 |
5 | It has gone through review[1][2][3] and I'm waiting for the maintainer of the linux-user subsystem to accept it in his tree. | |
6 | ||
7 | [1] https://patchwork.ozlabs.org/patch/569452/ | |
8 | [2] https://patchwork.ozlabs.org/patch/573877/ | |
9 | [3] https://patchwork.ozlabs.org/patch/582756/ | |
10 | ||
11 | From patchwork Mon Feb 15 05:51:47 2016 | |
12 | Content-Type: text/plain; charset="utf-8" | |
13 | MIME-Version: 1.0 | |
14 | Content-Transfer-Encoding: 7bit | |
15 | Subject: [v3] linux-user: add option to intercept execve() syscalls | |
a5affa61 | 16 | From: Petros Angelatos <petrosagg@resin.io> |
102e18f7 ER |
17 | X-Patchwork-Id: 582756 |
18 | Message-Id: <1455515507-26877-1-git-send-email-petrosagg@resin.io> | |
19 | To: qemu-devel@nongnu.org | |
20 | Cc: lucas.kaldstrom@hotmail.co.uk, peter.maydell@linaro.org, | |
21 | riku.voipio@iki.fi, | |
22 | laurent@vivier.eu, Petros Angelatos <petrosagg@resin.io> | |
23 | Date: Sun, 14 Feb 2016 21:51:47 -0800 | |
a5affa61 ER |
24 | |
25 | In order for one to use QEMU user mode emulation under a chroot, it is | |
26 | required to use binfmt_misc. This can be avoided by QEMU never doing a | |
27 | raw execve() to the host system. | |
28 | ||
29 | Introduce a new option, -execve, that uses the current QEMU interpreter | |
30 | to intercept execve(). | |
31 | ||
32 | qemu_execve() will prepend the interpreter path , similar to what | |
33 | binfmt_misc would do, and then pass the modified execve() to the host. | |
34 | ||
35 | It is necessary to parse hashbang scripts in that function otherwise | |
36 | the kernel will try to run the interpreter of a script without QEMU and | |
37 | get an invalid exec format error. | |
38 | ||
39 | Signed-off-by: Petros Angelatos <petrosagg@resin.io> | |
102e18f7 ER |
40 | Tested-by: Laurent Vivier <laurent@vivier.eu> |
41 | Reviewed-by: Laurent Vivier <laurent@vivier.eu> | |
a5affa61 | 42 | --- |
102e18f7 ER |
43 | v3 changes: |
44 | - rebase the patchset against current code | |
45 | ||
0fa62ee2 ER |
46 | --- qemu-2.7.0/linux-user/main.c~ 2016-09-26 12:07:20.000000000 +0300 |
47 | +++ qemu-2.7.0/linux-user/main.c 2016-09-26 12:09:24.258470304 +0300 | |
48 | @@ -18,6 +18,7 @@ | |
a5affa61 ER |
49 | */ |
50 | #include "qemu/osdep.h" | |
0fa62ee2 | 51 | #include "qemu-version.h" |
a5affa61 | 52 | +#include <sys/auxv.h> |
a5affa61 ER |
53 | #include <sys/syscall.h> |
54 | #include <sys/resource.h> | |
0fa62ee2 | 55 | |
a5affa61 ER |
56 | @@ -75,6 +76,7 @@ static void usage(int exitcode); |
57 | ||
58 | static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; | |
59 | const char *qemu_uname_release; | |
60 | +const char *qemu_execve_path; | |
61 | ||
62 | /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so | |
63 | we allocate a bigger stack. Need a better solution, for example | |
64 | @@ -3824,6 +3826,38 @@ static void handle_arg_guest_base(const char *arg) | |
65 | have_guest_base = 1; | |
66 | } | |
67 | ||
68 | +static void handle_arg_execve(const char *arg) | |
69 | +{ | |
70 | + const char *execfn; | |
71 | + char buf[PATH_MAX]; | |
72 | + char *ret; | |
73 | + int len; | |
74 | + | |
75 | + /* try getauxval() */ | |
76 | + execfn = (const char *) getauxval(AT_EXECFN); | |
77 | + | |
78 | + if (execfn != 0) { | |
79 | + ret = realpath(execfn, buf); | |
80 | + | |
81 | + if (ret != NULL) { | |
82 | + qemu_execve_path = strdup(buf); | |
83 | + return; | |
84 | + } | |
85 | + } | |
86 | + | |
87 | + /* try /proc/self/exe */ | |
88 | + len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); | |
89 | + | |
90 | + if (len != -1) { | |
91 | + buf[len] = '\0'; | |
92 | + qemu_execve_path = strdup(buf); | |
93 | + return; | |
94 | + } | |
95 | + | |
96 | + fprintf(stderr, "qemu_execve: unable to determine intepreter's path\n"); | |
97 | + exit(EXIT_FAILURE); | |
98 | +} | |
99 | + | |
100 | static void handle_arg_reserved_va(const char *arg) | |
101 | { | |
102 | char *p; | |
103 | @@ -3909,6 +3943,8 @@ static const struct qemu_argument arg_table[] = { | |
104 | "uname", "set qemu uname release string to 'uname'"}, | |
105 | {"B", "QEMU_GUEST_BASE", true, handle_arg_guest_base, | |
106 | "address", "set guest_base address to 'address'"}, | |
107 | + {"execve", "QEMU_EXECVE", false, handle_arg_execve, | |
108 | + "", "use this interpreter when a process calls execve()"}, | |
109 | {"R", "QEMU_RESERVED_VA", true, handle_arg_reserved_va, | |
110 | "size", "reserve 'size' bytes for guest virtual address space"}, | |
111 | {"d", "QEMU_LOG", true, handle_arg_log, | |
112 | diff --git a/linux-user/qemu.h b/linux-user/qemu.h | |
102e18f7 | 113 | index bd90cc3..0d9b058 100644 |
a5affa61 ER |
114 | --- a/linux-user/qemu.h |
115 | +++ b/linux-user/qemu.h | |
102e18f7 | 116 | @@ -140,6 +140,7 @@ void init_task_state(TaskState *ts); |
a5affa61 ER |
117 | void task_settid(TaskState *); |
118 | void stop_all_tasks(void); | |
119 | extern const char *qemu_uname_release; | |
120 | +extern const char *qemu_execve_path; | |
121 | extern unsigned long mmap_min_addr; | |
122 | ||
123 | /* ??? See if we can avoid exposing so much of the loader internals. */ | |
0fa62ee2 ER |
124 | --- qemu-2.7.0/linux-user/syscall.c~ 2016-09-26 12:10:36.000000000 +0300 |
125 | +++ qemu-2.7.0/linux-user/syscall.c 2016-09-26 12:13:54.312490312 +0300 | |
126 | @@ -99,6 +99,7 @@ | |
127 | #include <linux/reboot.h> | |
a5affa61 ER |
128 | #include <linux/route.h> |
129 | #include <linux/filter.h> | |
a5affa61 | 130 | +#include <linux/binfmts.h> |
0fa62ee2 ER |
131 | #include <linux/blkpg.h> |
132 | #include <netpacket/packet.h> | |
133 | #include <linux/netlink.h> | |
102e18f7 | 134 | @@ -5842,6 +5843,118 @@ static target_timer_t get_timer_id(abi_long arg) |
a5affa61 ER |
135 | return timerid; |
136 | } | |
137 | ||
138 | +/* qemu_execve() Must return target values and target errnos. */ | |
139 | +static abi_long qemu_execve(char *filename, char *argv[], | |
140 | + char *envp[]) | |
141 | +{ | |
142 | + char *i_arg = NULL, *i_name = NULL; | |
143 | + char **new_argp; | |
144 | + int argc, fd, ret, i, offset = 3; | |
145 | + char *cp; | |
146 | + char buf[BINPRM_BUF_SIZE]; | |
147 | + | |
148 | + /* normal execve case */ | |
149 | + if (qemu_execve_path == NULL || *qemu_execve_path == 0) { | |
150 | + return get_errno(execve(filename, argv, envp)); | |
151 | + } | |
152 | + | |
153 | + for (argc = 0; argv[argc] != NULL; argc++) { | |
154 | + /* nothing */ ; | |
155 | + } | |
156 | + | |
157 | + fd = open(filename, O_RDONLY); | |
158 | + if (fd == -1) { | |
159 | + return get_errno(fd); | |
160 | + } | |
161 | + | |
162 | + ret = read(fd, buf, BINPRM_BUF_SIZE); | |
163 | + if (ret == -1) { | |
164 | + close(fd); | |
165 | + return get_errno(ret); | |
166 | + } | |
167 | + | |
168 | + /* if we have less than 2 bytes, we can guess it is not executable */ | |
169 | + if (ret < 2) { | |
170 | + close(fd); | |
171 | + return -host_to_target_errno(ENOEXEC); | |
172 | + } | |
173 | + | |
174 | + close(fd); | |
175 | + | |
176 | + /* adapted from the kernel | |
177 | + * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/binfmt_script.c | |
178 | + */ | |
179 | + if ((buf[0] == '#') && (buf[1] == '!')) { | |
180 | + /* | |
181 | + * This section does the #! interpretation. | |
182 | + * Sorta complicated, but hopefully it will work. -TYT | |
183 | + */ | |
184 | + | |
185 | + buf[BINPRM_BUF_SIZE - 1] = '\0'; | |
186 | + cp = strchr(buf, '\n'); | |
187 | + if (cp == NULL) { | |
188 | + cp = buf + BINPRM_BUF_SIZE - 1; | |
189 | + } | |
190 | + *cp = '\0'; | |
191 | + while (cp > buf) { | |
192 | + cp--; | |
193 | + if ((*cp == ' ') || (*cp == '\t')) { | |
194 | + *cp = '\0'; | |
195 | + } else { | |
196 | + break; | |
197 | + } | |
198 | + } | |
199 | + for (cp = buf + 2; (*cp == ' ') || (*cp == '\t'); cp++) { | |
200 | + /* nothing */ ; | |
201 | + } | |
202 | + if (*cp == '\0') { | |
203 | + return -ENOEXEC; /* No interpreter name found */ | |
204 | + } | |
205 | + i_name = cp; | |
206 | + i_arg = NULL; | |
207 | + for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) { | |
208 | + /* nothing */ ; | |
209 | + } | |
210 | + while ((*cp == ' ') || (*cp == '\t')) { | |
211 | + *cp++ = '\0'; | |
212 | + } | |
213 | + if (*cp) { | |
214 | + i_arg = cp; | |
215 | + } | |
216 | + | |
217 | + if (i_arg) { | |
218 | + offset = 5; | |
219 | + } else { | |
220 | + offset = 4; | |
221 | + } | |
222 | + } | |
223 | + | |
224 | + new_argp = alloca((argc + offset + 1) * sizeof(void *)); | |
225 | + | |
226 | + /* Copy the original arguments with offset */ | |
227 | + for (i = 0; i < argc; i++) { | |
228 | + new_argp[i + offset] = argv[i]; | |
229 | + } | |
230 | + | |
231 | + new_argp[0] = strdup(qemu_execve_path); | |
232 | + new_argp[1] = strdup("-0"); | |
233 | + new_argp[offset] = filename; | |
234 | + new_argp[argc + offset] = NULL; | |
235 | + | |
236 | + if (i_name) { | |
237 | + new_argp[2] = i_name; | |
238 | + new_argp[3] = i_name; | |
239 | + | |
240 | + if (i_arg) { | |
241 | + new_argp[4] = i_arg; | |
242 | + } | |
243 | + } else { | |
244 | + new_argp[2] = argv[0]; | |
245 | + } | |
246 | + | |
0fa62ee2 | 247 | + return get_errno(safe_execve(qemu_execve_path, new_argp, envp)); |
a5affa61 ER |
248 | +} |
249 | + | |
250 | /* do_syscall() should always have a single exit point at the end so | |
251 | that actions, such as logging of syscall results, can be performed. | |
252 | All errnos that do_syscall() returns must be -TARGET_<errcode>. */ | |
0fa62ee2 ER |
253 | @@ -7703,7 +7703,7 @@ |
254 | * before the execve completes and makes it the other | |
255 | * program's problem. | |
256 | */ | |
257 | - ret = get_errno(safe_execve(p, argp, envp)); | |
a5affa61 | 258 | + ret = qemu_execve(p, argp, envp); |
a5affa61 ER |
259 | unlock_user(p, arg1, 0); |
260 | ||
261 | goto execve_end; |