Index: dyngen-exec.h =================================================================== RCS file: /cvsroot/qemu/qemu/dyngen-exec.h,v retrieving revision 1.25 diff -u -p -r1.25 dyngen-exec.h --- dyngen-exec.h 24 Apr 2005 18:01:56 -0000 1.25 +++ dyngen-exec.h 11 May 2005 20:38:33 -0000 @@ -155,7 +155,12 @@ extern int printf(const char *, ...); #endif /* force GCC to generate only one epilog at the end of the function */ +#if defined(__i386__) || defined(__x86_64__) +/* Also add 4 bytes of padding so that we can replace the ret with a jmp. */ +#define FORCE_RET() asm volatile ("nop;nop;nop;nop"); +#else #define FORCE_RET() asm volatile (""); +#endif #ifndef OPPROTO #define OPPROTO @@ -205,12 +210,19 @@ extern int __op_jmp0, __op_jmp1, __op_jm #endif #ifdef __i386__ -#define EXIT_TB() asm volatile ("ret") -#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n) +/* Dyngen will replace hlt instructions with a ret instruction. Inserting a + ret directly would confuse dyngen. */ +#define EXIT_TB() asm volatile ("hlt") +/* Dyngen will replace cli with 0x9e (jmp). + We generate the offset manually. */ +#define GOTO_LABEL_PARAM(n) \ + asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:") #endif #ifdef __x86_64__ -#define EXIT_TB() asm volatile ("ret") -#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n) +/* The same as i386. */ +#define EXIT_TB() asm volatile ("hlt") +#define GOTO_LABEL_PARAM(n) \ + asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:") #endif #ifdef __powerpc__ #define EXIT_TB() asm volatile ("blr") Index: dyngen.c =================================================================== RCS file: /cvsroot/qemu/qemu/dyngen.c,v retrieving revision 1.40 diff -u -p -r1.40 dyngen.c --- dyngen.c 27 Apr 2005 19:55:58 -0000 1.40 +++ dyngen.c 11 May 2005 20:38:33 -0000 @@ -32,6 +32,8 @@ #include "config-host.h" +//#define DEBUG_OP + /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross compilation */ #if defined(CONFIG_WIN32) @@ -1343,6 +1345,639 @@ int arm_emit_ldr_info(const char *name, #endif +#if defined(HOST_I386) || defined(HOST_X86_64) + +/* This byte is the first byte of an instruction. */ +#define FLAG_INSN (1 << 0) +/* This byte has been processed as part of an instruction. */ +#define FLAG_SCANNED (1 << 1) +/* This instruction is a return instruction. Gcc cometimes generates prefix + bytes, so may be more than one byte long. */ +#define FLAG_RET (1 << 2) +/* This is either the target of a jump, or the preceeding instruction uses + a pc-relative offset. */ +#define FLAG_TARGET (1 << 3) +/* This is a magic instruction that needs fixing up. */ +#define FLAG_EXIT (1 << 4) +#define MAX_EXITS 5 + +static void +bad_opcode(const char *name, uint32_t op) +{ + error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name); +} + +/* Mark len bytes as scanned, Returns insn_size + len. Reports an error + if these bytes have already been scanned. */ +static int +eat_bytes(const char *name, char *flags, int insn, int insn_size, int len) +{ + while (len > 0) { + /* This should never occur in sane code. */ + if (flags[insn + insn_size] & FLAG_SCANNED) + error ("Overlapping instructions in %s", name); + flags[insn + insn_size] |= FLAG_SCANNED; + insn_size++; + len--; + } + return insn_size; +} + +static void +trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn, + int len) +{ + uint8_t *ptr; + uint8_t op; + int modrm; + int is_prefix; + int op_size; + int addr_size; + int insn_size; + int is_ret; + int is_condjmp; + int is_jmp; + int is_exit; + int is_pcrel; + int immed; + int seen_rexw; + int32_t disp; + + ptr = start_p + insn; + /* nonzero if this insn has a ModR/M byte. */ + modrm = 1; + /* The size of the immediate value in this instruction. */ + immed = 0; + /* The operand size. */ + op_size = 4; + /* The address size */ + addr_size = 4; + /* The total length of this instruction. */ + insn_size = 0; + is_prefix = 1; + is_ret = 0; + is_condjmp = 0; + is_jmp = 0; + is_exit = 0; + seen_rexw = 0; + is_pcrel = 0; + + while (is_prefix) { + op = ptr[insn_size]; + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + is_prefix = 0; + switch (op >> 4) { + case 0: + case 1: + case 2: + case 3: + if (op == 0x0f) { + /* two-byte opcode. */ + op = ptr[insn_size]; + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + switch (op >> 4) { + case 0: + if ((op & 0xf) > 3) + modrm = 0; + break; + case 1: /* vector move or prefetch */ + case 2: /* various moves and vector compares. */ + case 4: /* cmov */ + case 5: /* vector instructions */ + case 6: + case 13: + case 14: + case 15: + break; + case 7: /* mmx */ + if (op & 0x77) /* emms */ + modrm = 0; + break; + case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */ + modrm = 0; + break; + case 8: /* long conditional jump */ + is_condjmp = 1; + immed = op_size; + modrm = 0; + break; + case 9: /* setcc */ + break; + case 10: + switch (op & 0x7) { + case 0: /* push fs/gs */ + case 1: /* pop fs/gs */ + case 2: /* cpuid/rsm */ + modrm = 0; + break; + case 4: /* shld/shrd immediate */ + immed = 1; + break; + default: /* Normal instructions with a ModR/M byte. */ + break; + } + break; + case 11: + switch (op & 0xf) { + case 10: /* bt, bts, btr, btc */ + immed = 1; + break; + default: + /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr + undefined, and movsx */ + break; + } + break; + case 12: + if (op & 8) { + /* bswap */ + modrm = 0; + } else { + switch (op & 0x7) { + case 2: + case 4: + case 5: + case 6: + immed = 1; + break; + default: + break; + } + } + break; + } + } else if ((op & 0x07) <= 0x3) { + /* General arithmentic ax. */ + } else if ((op & 0x07) <= 0x5) { + /* General arithmetic ax, immediate. */ + if (op & 0x01) + immed = op_size; + else + immed = 1; + modrm = 0; + } else if ((op & 0x23) == 0x22) { + /* Segment prefix. */ + is_prefix = 1; + } else { + /* Segment register push/pop or DAA/AAA/DAS/AAS. */ + modrm = 0; + } + break; + +#if defined(HOST_X86_64) + case 4: /* rex prefix. */ + is_prefix = 1; + /* The address/operand size is actually 64-bit, but the immediate + values in the instruction are still 32-bit. */ + op_size = 4; + addr_size = 4; + if (op & 8) + seen_rexw = 1; + break; +#else + case 4: /* inc/dec register. */ +#endif + case 5: /* push/pop general register. */ + modrm = 0; + break; + + case 6: + switch (op & 0x0f) { + case 0: /* pusha */ + case 1: /* popa */ + modrm = 0; + break; + case 2: /* bound */ + case 3: /* arpl */ + break; + case 4: /* FS */ + case 5: /* GS */ + is_prefix = 1; + break; + case 6: /* opcode size prefix. */ + op_size = 2; + is_prefix = 1; + break; + case 7: /* Address size prefix. */ + addr_size = 2; + is_prefix = 1; + break; + case 8: /* push immediate */ + case 10: /* pop immediate */ + immed = op_size; + modrm = 0; + break; + case 9: /* imul immediate */ + case 11: /* imul immediate */ + immed = op_size; + break; + case 12: /* insb */ + case 13: /* insw */ + case 14: /* outsb */ + case 15: /* outsw */ + modrm = 0; + break; + } + break; + + case 7: /* Short conditional jump. */ + is_condjmp = 1; + immed = 1; + modrm = 0; + break; + + case 8: + if ((op & 0xf) <= 3) { + /* arithmetic immediate. */ + if ((op & 3) == 1) + immed = op_size; + else + immed = 1; + } + /* else test, xchg, mov, lea or pop general. */ + break; + + case 9: + /* Various single-byte opcodes with no modrm byte. */ + modrm = 0; + if (op == 10) { + /* Call */ + immed = 4; + } + break; + + case 10: + switch ((op & 0xe) >> 1) { + case 0: /* mov absoliute immediate. */ + case 1: + if (seen_rexw) + immed = 8; + else + immed = addr_size; + break; + case 4: /* test immediate. */ + if (op & 1) + immed = op_size; + else + immed = 1; + break; + default: /* Various string ops. */ + break; + } + modrm = 0; + break; + + case 11: /* move immediate to register */ + if (op & 8) { + if (seen_rexw) + immed = 8; + else + immed = op_size; + } else { + immed = 1; + } + modrm = 0; + break; + + case 12: + switch (op & 0xf) { + case 0: /* shift immediate */ + case 1: + immed = 1; + break; + case 2: /* ret immediate */ + immed = 2; + modrm = 0; + bad_opcode(name, op); + break; + case 3: /* ret */ + modrm = 0; + is_ret = 1; + case 4: /* les */ + case 5: /* lds */ + break; + case 6: /* mov immediate byte */ + immed = 1; + break; + case 7: /* mov immediate */ + immed = op_size; + break; + case 8: /* enter */ + /* TODO: Is this right? */ + immed = 3; + modrm = 0; + break; + case 10: /* retf immediate */ + immed = 2; + modrm = 0; + bad_opcode(name, op); + break; + case 13: /* int */ + immed = 1; + modrm = 0; + break; + case 11: /* retf */ + case 15: /* iret */ + modrm = 0; + bad_opcode(name, op); + break; + default: /* leave, int3 or into */ + modrm = 0; + break; + } + break; + + case 13: + if ((op & 0xf) >= 8) { + /* Coprocessor escape. For our purposes this is just a normal + instruction with a ModR/M byte. */ + } else if ((op & 0xf) >= 4) { + /* AAM, AAD or XLAT */ + modrm = 0; + } + /* else shift instruction */ + break; + + case 14: + switch ((op & 0xc) >> 2) { + case 0: /* loop or jcxz */ + is_condjmp = 1; + immed = 1; + break; + case 1: /* in/out immed */ + immed = 1; + break; + case 2: /* call or jmp */ + switch (op & 3) { + case 0: /* call */ + immed = op_size; + break; + case 1: /* long jump */ + immed = 4; + is_jmp = 1; + break; + case 2: /* far jmp */ + bad_opcode(name, op); + break; + case 3: /* short jmp */ + immed = 1; + is_jmp = 1; + break; + } + break; + case 3: /* in/out register */ + break; + } + modrm = 0; + break; + + case 15: + switch ((op & 0xe) >> 1) { + case 0: + case 1: + is_prefix = 1; + break; + case 2: + case 4: + case 5: + case 6: + modrm = 0; + /* Some privileged insns are used as markers. */ + switch (op) { + case 0xf4: /* hlt: Exit translation block. */ + is_exit = 1; + break; + case 0xfa: /* cli: Jump to label. */ + is_exit = 1; + immed = 4; + break; + case 0xfb: /* sti: TB patch jump. */ + /* Mark the insn for patching, but continue sscanning. */ + flags[insn] |= FLAG_EXIT; + immed = 4; + break; + } + break; + case 3: /* unary grp3 */ + if ((ptr[insn_size] & 0x38) == 0) { + if (op == 0xf7) + immed = op_size; + else + immed = 1; /* test immediate */ + } + break; + case 7: /* inc/dec grp4/5 */ + /* TODO: This includes indirect jumps. We should fail if we + encounter one of these. */ + break; + } + break; + } + } + + if (modrm) { + if (addr_size != 4) + error("16-bit addressing mode used in %s", name); + + disp = 0; + modrm = ptr[insn_size]; + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + modrm &= 0xc7; + switch ((modrm & 0xc0) >> 6) { + case 0: + if (modrm == 5) + disp = 4; + break; + case 1: + disp = 1; + break; + case 2: + disp = 4; + break; + } + if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) { + /* SIB byte */ + if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) { + disp = 4; + is_pcrel = 1; + } + insn_size = eat_bytes(name, flags, insn, insn_size, 1); + } + insn_size = eat_bytes(name, flags, insn, insn_size, disp); + } + insn_size = eat_bytes(name, flags, insn, insn_size, immed); + if (is_condjmp || is_jmp) { + if (immed == 1) { + disp = (int8_t)*(ptr + insn_size - 1); + } else { + disp = (((int32_t)*(ptr + insn_size - 1)) << 24) + | (((int32_t)*(ptr + insn_size - 2)) << 16) + | (((int32_t)*(ptr + insn_size - 3)) << 8) + | *(ptr + insn_size - 4); + } + disp += insn_size; + /* Jumps to external symbols point to the address of the offset + before relocation. */ + /* ??? These are probably a tailcall. We could fix them up by + replacing them with jmp to EOB + call, but it's easier to just + prevent the compiler generating them. */ + if (disp == 1) + error("Unconditional jump (sibcall?) in %s", name); + disp += insn; + if (disp < 0 || disp > len) + error("Jump outside instruction in %s", name); + + if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED) + error("Overlapping instructions in %s", name); + + flags[disp] |= (FLAG_INSN | FLAG_TARGET); + is_pcrel = 1; + } + if (is_pcrel) { + /* Mark the following insn as a jump target. This will stop + this instruction being moved. */ + flags[insn + insn_size] |= FLAG_TARGET; + } + if (is_ret) + flags[insn] |= FLAG_RET; + + if (is_exit) + flags[insn] |= FLAG_EXIT; + + if (!(is_jmp || is_ret || is_exit)) + flags[insn + insn_size] |= FLAG_INSN; +} + +/* Scan a function body. Returns the position of the return sequence. + Sets *patch_bytes to the number of bytes that need to be copied from that + location. If no patching is required (ie. the return is the last insn) + *patch_bytes will be set to -1. *plen is the number of code bytes to copy. + */ +static int trace_i386_op(const char * name, uint8_t *start_p, int *plen, + int *patch_bytes, int *exit_addrs) +{ + char *flags; + int more; + int insn; + int retpos; + int bytes; + int num_exits; + int len; + int last_insn; + + len = *plen; + flags = malloc(len + 1); + memset(flags, 0, len + 1); + flags[0] |= FLAG_INSN; + more = 1; + while (more) { + more = 0; + for (insn = 0; insn < len; insn++) { + if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) { + trace_i386_insn(name, start_p, flags, insn, len); + more = 1; + } + } + } + + /* Strip any unused code at the end of the function. */ + while (len > 0 && flags[len - 1] == 0) + len--; + + retpos = -1; + num_exits = 0; + last_insn = 0; + for (insn = 0; insn < len; insn++) { + if (flags[insn] & FLAG_RET) { + /* ??? In theory it should be possible to handle multiple return + points. In practice it's not worth the effort. */ + if (retpos != -1) + error("Multiple return instructions in %s", name); + retpos = insn; + } + if (flags[insn] & FLAG_EXIT) { + if (num_exits == MAX_EXITS) + error("Too many block exits in %s", name); + exit_addrs[num_exits] = insn; + num_exits++; + } + if (flags[insn] & FLAG_INSN) + last_insn = insn; + } + + exit_addrs[num_exits] = -1; + if (retpos == -1) { + if (num_exits == 0) { + error ("No return instruction found in %s", name); + } else { + retpos = len; + last_insn = len; + } + } + + /* If the return instruction is the last instruction we can just + remove it. */ + if (retpos == last_insn) + *patch_bytes = -1; + else + *patch_bytes = 0; + + /* Back up over any nop instructions. */ + while (retpos > 0 + && (flags[retpos] & FLAG_TARGET) == 0 + && (flags[retpos - 1] & FLAG_INSN) != 0 + && start_p[retpos - 1] == 0x90) { + retpos--; + } + + if (*patch_bytes == -1) { + *plen = retpos; + free (flags); + return retpos; + } + *plen = len; + + /* The ret is in the middle of the function. Find four more bytes that + so the ret can be replaced by a jmp. */ + /* ??? Use a short jump where possible. */ + bytes = 4; + insn = retpos + 1; + /* We can clobber everything up to the next jump target. */ + while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) { + insn++; + bytes--; + } + if (bytes > 0) { + /* ???: Strip out nop blocks. */ + /* We can't do the replacement without clobbering anything important. + Copy preceeding instructions(s) to give us some space. */ + while (retpos > 0) { + /* If this byte is the target of a jmp we can't move it. */ + if (flags[retpos] & FLAG_TARGET) + break; + + (*patch_bytes)++; + bytes--; + retpos--; + + /* Break out of the loop if we have enough space and this is either + the first byte of an instruction or a pad byte. */ + if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED + && bytes <= 0) { + break; + } + } + } + + if (bytes > 0) + error("Unable to replace ret with jmp in %s\n", name); + + free(flags); + return retpos; +} + +#endif + #define MAX_ARGS 3 /* generate op code */ @@ -1356,6 +1991,11 @@ void gen_code(const char *name, host_ulo uint8_t args_present[MAX_ARGS]; const char *sym_name, *p; EXE_RELOC *rel; +#if defined(HOST_I386) || defined(HOST_X86_64) + int patch_bytes; + int retpos; + int exit_addrs[MAX_EXITS]; +#endif /* Compute exact size excluding prologue and epilogue instructions. * Increment start_offset to skip epilogue instructions, then compute @@ -1366,33 +2006,12 @@ void gen_code(const char *name, host_ulo p_end = p_start + size; start_offset = offset; #if defined(HOST_I386) || defined(HOST_X86_64) -#ifdef CONFIG_FORMAT_COFF - { - uint8_t *p; - p = p_end - 1; - if (p == p_start) - error("empty code for %s", name); - while (*p != 0xc3) { - p--; - if (p <= p_start) - error("ret or jmp expected at the end of %s", name); - } - copy_size = p - p_start; - } -#else { int len; len = p_end - p_start; - if (len == 0) - error("empty code for %s", name); - if (p_end[-1] == 0xc3) { - len--; - } else { - error("ret or jmp expected at the end of %s", name); - } + retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs); copy_size = len; } -#endif #elif defined(HOST_PPC) { uint8_t *p; @@ -1559,6 +2178,13 @@ void gen_code(const char *name, host_ulo } if (gen_switch == 2) { +#if defined(HOST_I386) || defined(HOST_X86_64) + if (patch_bytes != -1) + copy_size += patch_bytes; +#ifdef DEBUG_OP + copy_size += 2; +#endif +#endif fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size); } else if (gen_switch == 1) { @@ -1761,7 +2387,43 @@ void gen_code(const char *name, host_ulo #error unsupport object format #endif } + } + /* Replace the marker instructions with the actual opcodes. */ + for (i = 0; exit_addrs[i] != -1; i++) { + int op; + switch (p_start[exit_addrs[i]]) + { + case 0xf4: op = 0xc3; break; /* hlt -> ret */ + case 0xfa: op = 0xe9; break; /* cli -> jmp */ + case 0xfb: op = 0xe9; break; /* sti -> jmp */ + default: error("Internal error"); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n", + exit_addrs[i], op); + } + /* Fix up the return instruction. */ + if (patch_bytes != -1) { + if (patch_bytes) { + fprintf(outfile, " memcpy(gen_code_ptr + %d," + "gen_code_ptr + %d, %d);\n", + copy_size, retpos, patch_bytes); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n", + retpos); + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n", + retpos + 1, copy_size - (retpos + 5)); + + copy_size += patch_bytes; } +#ifdef DEBUG_OP + fprintf(outfile, + " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n", + copy_size); + copy_size += 2; +#endif } #elif defined(HOST_X86_64) { @@ -1793,6 +2455,42 @@ void gen_code(const char *name, host_ulo } } } + /* Replace the marker instructions with the actual opcodes. */ + for (i = 0; exit_addrs[i] != -1; i++) { + int op; + switch (p_start[exit_addrs[i]]) + { + case 0xf4: op = 0xc3; break; /* hlt -> ret */ + case 0xfa: op = 0xe9; break; /* cli -> jmp */ + case 0xfb: op = 0xe9; break; /* sti -> jmp */ + default: error("Internal error"); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n", + exit_addrs[i], op); + } + /* Fix up the return instruction. */ + if (patch_bytes != -1) { + if (patch_bytes) { + fprintf(outfile, " memcpy(gen_code_ptr + %d," + "gen_code_ptr + %d, %d);\n", + copy_size, retpos, patch_bytes); + } + fprintf(outfile, + " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n", + retpos); + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n", + retpos + 1, copy_size - (retpos + 5)); + + copy_size += patch_bytes; + } +#ifdef DEBUG_OP + fprintf(outfile, + " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n", + copy_size); + copy_size += 2; +#endif } #elif defined(HOST_PPC) { Index: exec-all.h =================================================================== RCS file: /cvsroot/qemu/qemu/exec-all.h,v retrieving revision 1.31 diff -u -p -r1.31 exec-all.h --- exec-all.h 2005-09-04 19:11:31.000000000 +0200 +++ exec-all.h 2005-09-06 03:16:54.000000000 +0200 @@ -338,14 +338,15 @@ #elif defined(__i386__) && defined(USE_DIRECT_JUMP) -/* we patch the jump instruction directly */ +/* we patch the jump instruction directly. Use sti in place of the actual + jmp instruction so that dyngen can patch in the correct result. */ #define GOTO_TB(opname, tbparam, n)\ do {\ asm volatile (".section .data\n"\ ASM_OP_LABEL_NAME(n, opname) ":\n"\ ".long 1f\n"\ ASM_PREVIOUS_SECTION \ - "jmp " ASM_NAME(__op_jmp) #n "\n"\ + "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\ "1:\n");\ } while (0) Index: target-ppc/exec.h =================================================================== RCS file: /cvsroot/qemu/qemu/target-ppc/exec.h,v retrieving revision 1.10 diff -u -p -r1.10 exec.h --- target-ppc/exec.h 13 Mar 2005 17:01:22 -0000 1.10 +++ target-ppc/exec.h 11 May 2005 20:38:35 -0000 @@ -33,11 +33,7 @@ register uint32_t T2 asm(AREG3); #define FT1 (env->ft1) #define FT2 (env->ft2) -#if defined (DEBUG_OP) -#define RETURN() __asm__ __volatile__("nop"); -#else -#define RETURN() __asm__ __volatile__(""); -#endif +#define RETURN() FORCE_RET() #include "cpu.h" #include "exec-all.h"