]>
Commit | Line | Data |
---|---|---|
a5affa61 ER |
1 | https://resin.io/blog/building-arm-containers-on-any-x86-machine-even-dockerhub/ |
2 | https://github.com/resin-io/qemu/commit/782e5bb77014ff136f7bb6133a911e5f53e914a7 | |
3 | ||
4 | From 782e5bb77014ff136f7bb6133a911e5f53e914a7 Mon Sep 17 00:00:00 2001 | |
5 | From: Petros Angelatos <petrosagg@resin.io> | |
6 | Date: Thu, 24 Dec 2015 14:43:17 -0800 | |
7 | Subject: [PATCH] linux-user: add option to intercept execve() syscalls | |
8 | ||
9 | In order for one to use QEMU user mode emulation under a chroot, it is | |
10 | required to use binfmt_misc. This can be avoided by QEMU never doing a | |
11 | raw execve() to the host system. | |
12 | ||
13 | Introduce a new option, -execve, that uses the current QEMU interpreter | |
14 | to intercept execve(). | |
15 | ||
16 | qemu_execve() will prepend the interpreter path , similar to what | |
17 | binfmt_misc would do, and then pass the modified execve() to the host. | |
18 | ||
19 | It is necessary to parse hashbang scripts in that function otherwise | |
20 | the kernel will try to run the interpreter of a script without QEMU and | |
21 | get an invalid exec format error. | |
22 | ||
23 | Signed-off-by: Petros Angelatos <petrosagg@resin.io> | |
24 | --- | |
25 | linux-user/main.c | 36 ++++++++++++++++ | |
26 | linux-user/qemu.h | 1 + | |
27 | linux-user/syscall.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++- | |
28 | 3 files changed, 153 insertions(+), 1 deletion(-) | |
29 | ||
30 | diff --git a/linux-user/main.c b/linux-user/main.c | |
31 | index 700724e..16cce85 100644 | |
32 | --- a/linux-user/main.c | |
33 | +++ b/linux-user/main.c | |
34 | @@ -17,6 +17,7 @@ | |
35 | * along with this program; if not, see <http://www.gnu.org/licenses/>. | |
36 | */ | |
37 | #include "qemu/osdep.h" | |
38 | +#include <sys/auxv.h> | |
39 | #include <sys/mman.h> | |
40 | #include <sys/syscall.h> | |
41 | #include <sys/resource.h> | |
42 | @@ -75,6 +76,7 @@ static void usage(int exitcode); | |
43 | ||
44 | static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; | |
45 | const char *qemu_uname_release; | |
46 | +const char *qemu_execve_path; | |
47 | ||
48 | /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so | |
49 | we allocate a bigger stack. Need a better solution, for example | |
50 | @@ -3824,6 +3826,38 @@ static void handle_arg_guest_base(const char *arg) | |
51 | have_guest_base = 1; | |
52 | } | |
53 | ||
54 | +static void handle_arg_execve(const char *arg) | |
55 | +{ | |
56 | + const char *execfn; | |
57 | + char buf[PATH_MAX]; | |
58 | + char *ret; | |
59 | + int len; | |
60 | + | |
61 | + /* try getauxval() */ | |
62 | + execfn = (const char *) getauxval(AT_EXECFN); | |
63 | + | |
64 | + if (execfn != 0) { | |
65 | + ret = realpath(execfn, buf); | |
66 | + | |
67 | + if (ret != NULL) { | |
68 | + qemu_execve_path = strdup(buf); | |
69 | + return; | |
70 | + } | |
71 | + } | |
72 | + | |
73 | + /* try /proc/self/exe */ | |
74 | + len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); | |
75 | + | |
76 | + if (len != -1) { | |
77 | + buf[len] = '\0'; | |
78 | + qemu_execve_path = strdup(buf); | |
79 | + return; | |
80 | + } | |
81 | + | |
82 | + fprintf(stderr, "qemu_execve: unable to determine intepreter's path\n"); | |
83 | + exit(EXIT_FAILURE); | |
84 | +} | |
85 | + | |
86 | static void handle_arg_reserved_va(const char *arg) | |
87 | { | |
88 | char *p; | |
89 | @@ -3909,6 +3943,8 @@ static const struct qemu_argument arg_table[] = { | |
90 | "uname", "set qemu uname release string to 'uname'"}, | |
91 | {"B", "QEMU_GUEST_BASE", true, handle_arg_guest_base, | |
92 | "address", "set guest_base address to 'address'"}, | |
93 | + {"execve", "QEMU_EXECVE", false, handle_arg_execve, | |
94 | + "", "use this interpreter when a process calls execve()"}, | |
95 | {"R", "QEMU_RESERVED_VA", true, handle_arg_reserved_va, | |
96 | "size", "reserve 'size' bytes for guest virtual address space"}, | |
97 | {"d", "QEMU_LOG", true, handle_arg_log, | |
98 | diff --git a/linux-user/qemu.h b/linux-user/qemu.h | |
99 | index 26b0ba2..8270268 100644 | |
100 | --- a/linux-user/qemu.h | |
101 | +++ b/linux-user/qemu.h | |
102 | @@ -137,6 +137,7 @@ void init_task_state(TaskState *ts); | |
103 | void task_settid(TaskState *); | |
104 | void stop_all_tasks(void); | |
105 | extern const char *qemu_uname_release; | |
106 | +extern const char *qemu_execve_path; | |
107 | extern unsigned long mmap_min_addr; | |
108 | ||
109 | /* ??? See if we can avoid exposing so much of the loader internals. */ | |
110 | diff --git a/linux-user/syscall.c b/linux-user/syscall.c | |
111 | index 9517531..66446f7 100644 | |
112 | --- a/linux-user/syscall.c | |
113 | +++ b/linux-user/syscall.c | |
114 | @@ -99,6 +99,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base, | |
115 | #include <linux/route.h> | |
116 | #include <linux/filter.h> | |
117 | #include <linux/blkpg.h> | |
118 | +#include <linux/binfmts.h> | |
119 | #include "linux_loop.h" | |
120 | #include "uname.h" | |
121 | ||
122 | @@ -5845,6 +5846,118 @@ static target_timer_t get_timer_id(abi_long arg) | |
123 | return timerid; | |
124 | } | |
125 | ||
126 | +/* qemu_execve() Must return target values and target errnos. */ | |
127 | +static abi_long qemu_execve(char *filename, char *argv[], | |
128 | + char *envp[]) | |
129 | +{ | |
130 | + char *i_arg = NULL, *i_name = NULL; | |
131 | + char **new_argp; | |
132 | + int argc, fd, ret, i, offset = 3; | |
133 | + char *cp; | |
134 | + char buf[BINPRM_BUF_SIZE]; | |
135 | + | |
136 | + /* normal execve case */ | |
137 | + if (qemu_execve_path == NULL || *qemu_execve_path == 0) { | |
138 | + return get_errno(execve(filename, argv, envp)); | |
139 | + } | |
140 | + | |
141 | + for (argc = 0; argv[argc] != NULL; argc++) { | |
142 | + /* nothing */ ; | |
143 | + } | |
144 | + | |
145 | + fd = open(filename, O_RDONLY); | |
146 | + if (fd == -1) { | |
147 | + return get_errno(fd); | |
148 | + } | |
149 | + | |
150 | + ret = read(fd, buf, BINPRM_BUF_SIZE); | |
151 | + if (ret == -1) { | |
152 | + close(fd); | |
153 | + return get_errno(ret); | |
154 | + } | |
155 | + | |
156 | + /* if we have less than 2 bytes, we can guess it is not executable */ | |
157 | + if (ret < 2) { | |
158 | + close(fd); | |
159 | + return -host_to_target_errno(ENOEXEC); | |
160 | + } | |
161 | + | |
162 | + close(fd); | |
163 | + | |
164 | + /* adapted from the kernel | |
165 | + * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/binfmt_script.c | |
166 | + */ | |
167 | + if ((buf[0] == '#') && (buf[1] == '!')) { | |
168 | + /* | |
169 | + * This section does the #! interpretation. | |
170 | + * Sorta complicated, but hopefully it will work. -TYT | |
171 | + */ | |
172 | + | |
173 | + buf[BINPRM_BUF_SIZE - 1] = '\0'; | |
174 | + cp = strchr(buf, '\n'); | |
175 | + if (cp == NULL) { | |
176 | + cp = buf + BINPRM_BUF_SIZE - 1; | |
177 | + } | |
178 | + *cp = '\0'; | |
179 | + while (cp > buf) { | |
180 | + cp--; | |
181 | + if ((*cp == ' ') || (*cp == '\t')) { | |
182 | + *cp = '\0'; | |
183 | + } else { | |
184 | + break; | |
185 | + } | |
186 | + } | |
187 | + for (cp = buf + 2; (*cp == ' ') || (*cp == '\t'); cp++) { | |
188 | + /* nothing */ ; | |
189 | + } | |
190 | + if (*cp == '\0') { | |
191 | + return -ENOEXEC; /* No interpreter name found */ | |
192 | + } | |
193 | + i_name = cp; | |
194 | + i_arg = NULL; | |
195 | + for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) { | |
196 | + /* nothing */ ; | |
197 | + } | |
198 | + while ((*cp == ' ') || (*cp == '\t')) { | |
199 | + *cp++ = '\0'; | |
200 | + } | |
201 | + if (*cp) { | |
202 | + i_arg = cp; | |
203 | + } | |
204 | + | |
205 | + if (i_arg) { | |
206 | + offset = 5; | |
207 | + } else { | |
208 | + offset = 4; | |
209 | + } | |
210 | + } | |
211 | + | |
212 | + new_argp = alloca((argc + offset + 1) * sizeof(void *)); | |
213 | + | |
214 | + /* Copy the original arguments with offset */ | |
215 | + for (i = 0; i < argc; i++) { | |
216 | + new_argp[i + offset] = argv[i]; | |
217 | + } | |
218 | + | |
219 | + new_argp[0] = strdup(qemu_execve_path); | |
220 | + new_argp[1] = strdup("-0"); | |
221 | + new_argp[offset] = filename; | |
222 | + new_argp[argc + offset] = NULL; | |
223 | + | |
224 | + if (i_name) { | |
225 | + new_argp[2] = i_name; | |
226 | + new_argp[3] = i_name; | |
227 | + | |
228 | + if (i_arg) { | |
229 | + new_argp[4] = i_arg; | |
230 | + } | |
231 | + } else { | |
232 | + new_argp[2] = argv[0]; | |
233 | + } | |
234 | + | |
235 | + return get_errno(execve(qemu_execve_path, new_argp, envp)); | |
236 | +} | |
237 | + | |
238 | /* do_syscall() should always have a single exit point at the end so | |
239 | that actions, such as logging of syscall results, can be performed. | |
240 | All errnos that do_syscall() returns must be -TARGET_<errcode>. */ | |
241 | @@ -6104,7 +6217,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, | |
242 | ||
243 | if (!(p = lock_user_string(arg1))) | |
244 | goto execve_efault; | |
245 | - ret = get_errno(execve(p, argp, envp)); | |
246 | + | |
247 | + ret = qemu_execve(p, argp, envp); | |
248 | + | |
249 | unlock_user(p, arg1, 0); | |
250 | ||
251 | goto execve_end; |