Discussion: https://resin.io/blog/building-arm-containers-on-any-x86-machine-even-dockerhub/ https://github.com/resin-io/qemu/commit/782e5bb77014ff136f7bb6133a911e5f53e914a7 https://github.com/resin-io/qemu/commit/782e5bb77014ff136f7bb6133a911e5f53e914a7#commitcomment-17193923 It has gone through review[1][2][3] and I'm waiting for the maintainer of the linux-user subsystem to accept it in his tree. [1] https://patchwork.ozlabs.org/patch/569452/ [2] https://patchwork.ozlabs.org/patch/573877/ [3] https://patchwork.ozlabs.org/patch/582756/ From patchwork Mon Feb 15 05:51:47 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [v3] linux-user: add option to intercept execve() syscalls From: Petros Angelatos X-Patchwork-Id: 582756 Message-Id: <1455515507-26877-1-git-send-email-petrosagg@resin.io> To: qemu-devel@nongnu.org Cc: lucas.kaldstrom@hotmail.co.uk, peter.maydell@linaro.org, riku.voipio@iki.fi, laurent@vivier.eu, Petros Angelatos Date: Sun, 14 Feb 2016 21:51:47 -0800 In order for one to use QEMU user mode emulation under a chroot, it is required to use binfmt_misc. This can be avoided by QEMU never doing a raw execve() to the host system. Introduce a new option, -execve, that uses the current QEMU interpreter to intercept execve(). qemu_execve() will prepend the interpreter path , similar to what binfmt_misc would do, and then pass the modified execve() to the host. It is necessary to parse hashbang scripts in that function otherwise the kernel will try to run the interpreter of a script without QEMU and get an invalid exec format error. Signed-off-by: Petros Angelatos Tested-by: Laurent Vivier Reviewed-by: Laurent Vivier --- v3 changes: - rebase the patchset against current code diff --git a/linux-user/main.c b/linux-user/main.c index ee12035..5951279 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -79,6 +79,7 @@ static void usage(int exitcode); static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; const char *qemu_uname_release; +const char *qemu_execve_path; /* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so we allocate a bigger stack. Need a better solution, for example @@ -3828,6 +3829,11 @@ static void handle_arg_guest_base(const char *arg) have_guest_base = 1; } +static void handle_arg_execve(const char *arg) +{ + qemu_execve_path = strdup(arg); +} + static void handle_arg_reserved_va(const char *arg) { char *p; @@ -3913,6 +3919,8 @@ static const struct qemu_argument arg_table[] = { "uname", "set qemu uname release string to 'uname'"}, {"B", "QEMU_GUEST_BASE", true, handle_arg_guest_base, "address", "set guest_base address to 'address'"}, + {"execve", "QEMU_EXECVE", true, handle_arg_execve, + "path", "use interpreter at 'path' when a process calls execve()"}, {"R", "QEMU_RESERVED_VA", true, handle_arg_reserved_va, "size", "reserve 'size' bytes for guest virtual address space"}, {"d", "QEMU_LOG", true, handle_arg_log, diff --git a/linux-user/qemu.h b/linux-user/qemu.h index bd90cc3..0d9b058 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -140,6 +140,7 @@ void init_task_state(TaskState *ts); void task_settid(TaskState *); void stop_all_tasks(void); extern const char *qemu_uname_release; +extern const char *qemu_execve_path; extern unsigned long mmap_min_addr; /* ??? See if we can avoid exposing so much of the loader internals. */ --- qemu-2.12.0/linux-user/syscall.c~ 2018-04-30 21:43:39.000000000 +0300 +++ qemu-2.12.0/linux-user/syscall.c 2018-04-30 21:46:36.362935706 +0300 @@ -5854,6 +5854,109 @@ static target_timer_t get_timer_id(abi_long arg) return timerid; } +#define BINPRM_BUF_SIZE 128 + +/* qemu_execve() Must return target values and target errnos. */ +static abi_long qemu_execve(char *filename, char *argv[], + char *envp[]) +{ + char *i_arg = NULL, *i_name = NULL; + char **new_argp; + int argc, fd, ret, i, offset = 3; + char *cp; + char buf[BINPRM_BUF_SIZE]; + + for (argc = 0; argv[argc] != NULL; argc++) { + /* nothing */ ; + } + + fd = open(filename, O_RDONLY); + if (fd == -1) { + return -ENOENT; + } + + ret = read(fd, buf, BINPRM_BUF_SIZE); + if (ret == -1) { + close(fd); + return -ENOENT; + } + + close(fd); + + /* adapted from the kernel + * https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/fs/binfmt_script.c + */ + if ((buf[0] == '#') && (buf[1] == '!')) { + /* + * This section does the #! interpretation. + * Sorta complicated, but hopefully it will work. -TYT + */ + + buf[BINPRM_BUF_SIZE - 1] = '\0'; + cp = strchr(buf, '\n'); + if (cp == NULL) { + cp = buf+BINPRM_BUF_SIZE-1; + } + *cp = '\0'; + while (cp > buf) { + cp--; + if ((*cp == ' ') || (*cp == '\t')) { + *cp = '\0'; + } else { + break; + } + } + for (cp = buf+2; (*cp == ' ') || (*cp == '\t'); cp++) { + /* nothing */ ; + } + if (*cp == '\0') { + return -ENOEXEC; /* No interpreter name found */ + } + i_name = cp; + i_arg = NULL; + for ( ; *cp && (*cp != ' ') && (*cp != '\t'); cp++) { + /* nothing */ ; + } + while ((*cp == ' ') || (*cp == '\t')) { + *cp++ = '\0'; + } + if (*cp) { + i_arg = cp; + } + + if (i_arg) { + offset = 5; + } else { + offset = 4; + } + } + + new_argp = alloca((argc + offset + 1) * sizeof(void *)); + + /* Copy the original arguments with offset */ + for (i = 0; i < argc; i++) { + new_argp[i + offset] = argv[i]; + } + + new_argp[0] = strdup(qemu_execve_path); + new_argp[1] = strdup("-0"); + new_argp[offset] = filename; + new_argp[argc + offset] = NULL; + + if (i_name) { + new_argp[2] = i_name; + new_argp[3] = i_name; + + if (i_arg) { + new_argp[4] = i_arg; + } + } else { + new_argp[2] = argv[0]; + } + + return get_errno(safe_execve(qemu_execve_path, new_argp, envp)); +} + static int target_to_host_cpu_mask(unsigned long *host_mask, size_t host_size, abi_ulong target_addr, @@ -8257,7 +8257,12 @@ * before the execve completes and makes it the other * program's problem. */ - ret = get_errno(safe_execve(p, argp, envp)); + if (qemu_execve_path && *qemu_execve_path) { + ret = get_errno(qemu_execve(p, argp, envp)); + } else { + ret = get_errno(safe_execve(p, argp, envp)); + } + unlock_user(p, arg1, 0); goto execve_end;