1 2005-06-02 Gwenole Beauchesne <gbeauchesne@mandriva.com>
3 * dyngen.c (trace_i386_insn): Fix push/imul case with 8-bit
6 2005-05-11 Paul Brook <paul@codesourcery.com>
10 --- qemu-0.7.0/target-ppc/exec.h.gcc4 2005-04-27 22:52:05.000000000 +0200
11 +++ qemu-0.7.0/target-ppc/exec.h 2005-06-02 21:41:51.000000000 +0200
12 @@ -33,11 +33,7 @@ register uint32_t T2 asm(AREG3);
13 #define FT1 (env->ft1)
14 #define FT2 (env->ft2)
16 -#if defined (DEBUG_OP)
17 -#define RETURN() __asm__ __volatile__("nop");
19 -#define RETURN() __asm__ __volatile__("");
21 +#define RETURN() FORCE_RET()
25 --- qemu-0.7.0/dyngen-exec.h.gcc4 2005-04-27 22:52:05.000000000 +0200
26 +++ qemu-0.7.0/dyngen-exec.h 2005-06-02 21:41:51.000000000 +0200
27 @@ -155,7 +155,12 @@ extern int printf(const char *, ...);
30 /* force GCC to generate only one epilog at the end of the function */
31 +#if defined(__i386__) || defined(__x86_64__)
32 +/* Also add 4 bytes of padding so that we can replace the ret with a jmp. */
33 +#define FORCE_RET() asm volatile ("nop;nop;nop;nop");
35 #define FORCE_RET() asm volatile ("");
40 @@ -205,12 +210,19 @@ extern int __op_jmp0, __op_jmp1, __op_jm
44 -#define EXIT_TB() asm volatile ("ret")
45 -#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
46 +/* Dyngen will replace hlt instructions with a ret instruction. Inserting a
47 + ret directly would confuse dyngen. */
48 +#define EXIT_TB() asm volatile ("hlt")
49 +/* Dyngen will replace cli with 0x9e (jmp).
50 + We generate the offset manually. */
51 +#define GOTO_LABEL_PARAM(n) \
52 + asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
55 -#define EXIT_TB() asm volatile ("ret")
56 -#define GOTO_LABEL_PARAM(n) asm volatile ("jmp " ASM_NAME(__op_gen_label) #n)
57 +/* The same as i386. */
58 +#define EXIT_TB() asm volatile ("hlt")
59 +#define GOTO_LABEL_PARAM(n) \
60 + asm volatile ("cli;.long " ASM_NAME(__op_gen_label) #n " - 1f;1:")
63 #define EXIT_TB() asm volatile ("blr")
64 --- qemu-0.7.0/dyngen.c.gcc4 2005-04-27 22:52:05.000000000 +0200
65 +++ qemu-0.7.0/dyngen.c 2005-06-02 22:25:06.000000000 +0200
68 #include "config-host.h"
72 /* NOTE: we test CONFIG_WIN32 instead of _WIN32 to enabled cross
74 #if defined(CONFIG_WIN32)
75 @@ -1343,6 +1345,644 @@ int arm_emit_ldr_info(const char *name,
79 +#if defined(HOST_I386) || defined(HOST_X86_64)
81 +/* This byte is the first byte of an instruction. */
82 +#define FLAG_INSN (1 << 0)
83 +/* This byte has been processed as part of an instruction. */
84 +#define FLAG_SCANNED (1 << 1)
85 +/* This instruction is a return instruction. Gcc cometimes generates prefix
86 + bytes, so may be more than one byte long. */
87 +#define FLAG_RET (1 << 2)
88 +/* This is either the target of a jump, or the preceeding instruction uses
89 + a pc-relative offset. */
90 +#define FLAG_TARGET (1 << 3)
91 +/* This is a magic instruction that needs fixing up. */
92 +#define FLAG_EXIT (1 << 4)
96 +bad_opcode(const char *name, uint32_t op)
98 + error("Unsupported opcode %0*x in %s", (op > 0xff) ? 4 : 2, op, name);
101 +/* Mark len bytes as scanned, Returns insn_size + len. Reports an error
102 + if these bytes have already been scanned. */
104 +eat_bytes(const char *name, char *flags, int insn, int insn_size, int len)
107 + /* This should never occur in sane code. */
108 + if (flags[insn + insn_size] & FLAG_SCANNED)
109 + error ("Overlapping instructions in %s", name);
110 + flags[insn + insn_size] |= FLAG_SCANNED;
118 +trace_i386_insn (const char *name, uint8_t *start_p, char *flags, int insn,
137 + ptr = start_p + insn;
138 + /* nonzero if this insn has a ModR/M byte. */
140 + /* The size of the immediate value in this instruction. */
142 + /* The operand size. */
144 + /* The address size */
146 + /* The total length of this instruction. */
156 + while (is_prefix) {
157 + op = ptr[insn_size];
158 + insn_size = eat_bytes(name, flags, insn, insn_size, 1);
166 + /* two-byte opcode. */
167 + op = ptr[insn_size];
168 + insn_size = eat_bytes(name, flags, insn, insn_size, 1);
171 + if ((op & 0xf) > 3)
174 + case 1: /* vector move or prefetch */
175 + case 2: /* various moves and vector compares. */
177 + case 5: /* vector instructions */
184 + if (op & 0x77) /* emms */
187 + case 3: /* wrmsr, rdtsc, rdmsr, rdpmc, sysenter, sysexit */
190 + case 8: /* long conditional jump */
195 + case 9: /* setcc */
198 + switch (op & 0x7) {
199 + case 0: /* push fs/gs */
200 + case 1: /* pop fs/gs */
201 + case 2: /* cpuid/rsm */
204 + case 4: /* shld/shrd immediate */
207 + default: /* Normal instructions with a ModR/M byte. */
212 + switch (op & 0xf) {
213 + case 10: /* bt, bts, btr, btc */
217 + /* cmpxchg, lss, btr, lfs, lgs, movzx, btc, bsf, bsr
218 + undefined, and movsx */
227 + switch (op & 0x7) {
240 + } else if ((op & 0x07) <= 0x3) {
241 + /* General arithmentic ax. */
242 + } else if ((op & 0x07) <= 0x5) {
243 + /* General arithmetic ax, immediate. */
249 + } else if ((op & 0x23) == 0x22) {
250 + /* Segment prefix. */
253 + /* Segment register push/pop or DAA/AAA/DAS/AAS. */
258 +#if defined(HOST_X86_64)
259 + case 4: /* rex prefix. */
261 + /* The address/operand size is actually 64-bit, but the immediate
262 + values in the instruction are still 32-bit. */
269 + case 4: /* inc/dec register. */
271 + case 5: /* push/pop general register. */
276 + switch (op & 0x0f) {
277 + case 0: /* pusha */
281 + case 2: /* bound */
288 + case 6: /* opcode size prefix. */
292 + case 7: /* Address size prefix. */
296 + case 8: /* push immediate */
300 + case 10: /* push 8-bit immediate */
304 + case 9: /* imul immediate */
307 + case 11: /* imul 8-bit immediate */
310 + case 12: /* insb */
311 + case 13: /* insw */
312 + case 14: /* outsb */
313 + case 15: /* outsw */
319 + case 7: /* Short conditional jump. */
326 + if ((op & 0xf) <= 3) {
327 + /* arithmetic immediate. */
333 + /* else test, xchg, mov, lea or pop general. */
337 + /* Various single-byte opcodes with no modrm byte. */
346 + switch ((op & 0xe) >> 1) {
347 + case 0: /* mov absoliute immediate. */
354 + case 4: /* test immediate. */
360 + default: /* Various string ops. */
366 + case 11: /* move immediate to register */
379 + switch (op & 0xf) {
380 + case 0: /* shift immediate */
384 + case 2: /* ret immediate */
387 + bad_opcode(name, op);
395 + case 6: /* mov immediate byte */
398 + case 7: /* mov immediate */
401 + case 8: /* enter */
402 + /* TODO: Is this right? */
406 + case 10: /* retf immediate */
409 + bad_opcode(name, op);
415 + case 11: /* retf */
416 + case 15: /* iret */
418 + bad_opcode(name, op);
420 + default: /* leave, int3 or into */
427 + if ((op & 0xf) >= 8) {
428 + /* Coprocessor escape. For our purposes this is just a normal
429 + instruction with a ModR/M byte. */
430 + } else if ((op & 0xf) >= 4) {
431 + /* AAM, AAD or XLAT */
434 + /* else shift instruction */
438 + switch ((op & 0xc) >> 2) {
439 + case 0: /* loop or jcxz */
443 + case 1: /* in/out immed */
446 + case 2: /* call or jmp */
451 + case 1: /* long jump */
455 + case 2: /* far jmp */
456 + bad_opcode(name, op);
458 + case 3: /* short jmp */
464 + case 3: /* in/out register */
471 + switch ((op & 0xe) >> 1) {
481 + /* Some privileged insns are used as markers. */
483 + case 0xf4: /* hlt: Exit translation block. */
486 + case 0xfa: /* cli: Jump to label. */
490 + case 0xfb: /* sti: TB patch jump. */
491 + /* Mark the insn for patching, but continue sscanning. */
492 + flags[insn] |= FLAG_EXIT;
497 + case 3: /* unary grp3 */
498 + if ((ptr[insn_size] & 0x38) == 0) {
502 + immed = 1; /* test immediate */
505 + case 7: /* inc/dec grp4/5 */
506 + /* TODO: This includes indirect jumps. We should fail if we
507 + encounter one of these. */
515 + if (addr_size != 4)
516 + error("16-bit addressing mode used in %s", name);
519 + modrm = ptr[insn_size];
520 + insn_size = eat_bytes(name, flags, insn, insn_size, 1);
522 + switch ((modrm & 0xc0) >> 6) {
534 + if ((modrm & 0xc0) != 0xc0 && (modrm & 0x7) == 4) {
536 + if (modrm == 4 && (ptr[insn_size] & 0x7) == 5) {
540 + insn_size = eat_bytes(name, flags, insn, insn_size, 1);
542 + insn_size = eat_bytes(name, flags, insn, insn_size, disp);
544 + insn_size = eat_bytes(name, flags, insn, insn_size, immed);
545 + if (is_condjmp || is_jmp) {
547 + disp = (int8_t)*(ptr + insn_size - 1);
549 + disp = (((int32_t)*(ptr + insn_size - 1)) << 24)
550 + | (((int32_t)*(ptr + insn_size - 2)) << 16)
551 + | (((int32_t)*(ptr + insn_size - 3)) << 8)
552 + | *(ptr + insn_size - 4);
555 + /* Jumps to external symbols point to the address of the offset
556 + before relocation. */
557 + /* ??? These are probably a tailcall. We could fix them up by
558 + replacing them with jmp to EOB + call, but it's easier to just
559 + prevent the compiler generating them. */
561 + error("Unconditional jump (sibcall?) in %s", name);
563 + if (disp < 0 || disp > len)
564 + error("Jump outside instruction in %s", name);
566 + if ((flags[disp] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_SCANNED)
567 + error("Overlapping instructions in %s", name);
569 + flags[disp] |= (FLAG_INSN | FLAG_TARGET);
573 + /* Mark the following insn as a jump target. This will stop
574 + this instruction being moved. */
575 + flags[insn + insn_size] |= FLAG_TARGET;
578 + flags[insn] |= FLAG_RET;
581 + flags[insn] |= FLAG_EXIT;
583 + if (!(is_jmp || is_ret || is_exit))
584 + flags[insn + insn_size] |= FLAG_INSN;
587 +/* Scan a function body. Returns the position of the return sequence.
588 + Sets *patch_bytes to the number of bytes that need to be copied from that
589 + location. If no patching is required (ie. the return is the last insn)
590 + *patch_bytes will be set to -1. *plen is the number of code bytes to copy.
592 +static int trace_i386_op(const char * name, uint8_t *start_p, int *plen,
593 + int *patch_bytes, int *exit_addrs)
605 + flags = malloc(len + 1);
606 + memset(flags, 0, len + 1);
607 + flags[0] |= FLAG_INSN;
611 + for (insn = 0; insn < len; insn++) {
612 + if ((flags[insn] & (FLAG_INSN | FLAG_SCANNED)) == FLAG_INSN) {
613 + trace_i386_insn(name, start_p, flags, insn, len);
619 + /* Strip any unused code at the end of the function. */
620 + while (len > 0 && flags[len - 1] == 0)
626 + for (insn = 0; insn < len; insn++) {
627 + if (flags[insn] & FLAG_RET) {
628 + /* ??? In theory it should be possible to handle multiple return
629 + points. In practice it's not worth the effort. */
631 + error("Multiple return instructions in %s", name);
634 + if (flags[insn] & FLAG_EXIT) {
635 + if (num_exits == MAX_EXITS)
636 + error("Too many block exits in %s", name);
637 + exit_addrs[num_exits] = insn;
640 + if (flags[insn] & FLAG_INSN)
644 + exit_addrs[num_exits] = -1;
645 + if (retpos == -1) {
646 + if (num_exits == 0) {
647 + error ("No return instruction found in %s", name);
654 + /* If the return instruction is the last instruction we can just
656 + if (retpos == last_insn)
661 + /* Back up over any nop instructions. */
663 + && (flags[retpos] & FLAG_TARGET) == 0
664 + && (flags[retpos - 1] & FLAG_INSN) != 0
665 + && start_p[retpos - 1] == 0x90) {
669 + if (*patch_bytes == -1) {
676 + /* The ret is in the middle of the function. Find four more bytes that
677 + so the ret can be replaced by a jmp. */
678 + /* ??? Use a short jump where possible. */
681 + /* We can clobber everything up to the next jump target. */
682 + while (insn < len && bytes > 0 && (flags[insn] & FLAG_TARGET) == 0) {
687 + /* ???: Strip out nop blocks. */
688 + /* We can't do the replacement without clobbering anything important.
689 + Copy preceeding instructions(s) to give us some space. */
690 + while (retpos > 0) {
691 + /* If this byte is the target of a jmp we can't move it. */
692 + if (flags[retpos] & FLAG_TARGET)
699 + /* Break out of the loop if we have enough space and this is either
700 + the first byte of an instruction or a pad byte. */
701 + if ((flags[retpos] & (FLAG_INSN | FLAG_SCANNED)) != FLAG_SCANNED
709 + error("Unable to replace ret with jmp in %s\n", name);
719 /* generate op code */
720 @@ -1356,6 +1996,11 @@ void gen_code(const char *name, host_ulo
721 uint8_t args_present[MAX_ARGS];
722 const char *sym_name, *p;
724 +#if defined(HOST_I386) || defined(HOST_X86_64)
727 + int exit_addrs[MAX_EXITS];
730 /* Compute exact size excluding prologue and epilogue instructions.
731 * Increment start_offset to skip epilogue instructions, then compute
732 @@ -1366,33 +2011,12 @@ void gen_code(const char *name, host_ulo
733 p_end = p_start + size;
734 start_offset = offset;
735 #if defined(HOST_I386) || defined(HOST_X86_64)
736 -#ifdef CONFIG_FORMAT_COFF
741 - error("empty code for %s", name);
742 - while (*p != 0xc3) {
745 - error("ret or jmp expected at the end of %s", name);
747 - copy_size = p - p_start;
752 len = p_end - p_start;
754 - error("empty code for %s", name);
755 - if (p_end[-1] == 0xc3) {
758 - error("ret or jmp expected at the end of %s", name);
760 + retpos = trace_i386_op(name, p_start, &len, &patch_bytes, exit_addrs);
764 #elif defined(HOST_PPC)
767 @@ -1559,6 +2183,13 @@ void gen_code(const char *name, host_ulo
770 if (gen_switch == 2) {
771 +#if defined(HOST_I386) || defined(HOST_X86_64)
772 + if (patch_bytes != -1)
773 + copy_size += patch_bytes;
778 fprintf(outfile, "DEF(%s, %d, %d)\n", name + 3, nb_args, copy_size);
779 } else if (gen_switch == 1) {
781 @@ -1761,7 +2392,43 @@ void gen_code(const char *name, host_ulo
782 #error unsupport object format
786 + /* Replace the marker instructions with the actual opcodes. */
787 + for (i = 0; exit_addrs[i] != -1; i++) {
789 + switch (p_start[exit_addrs[i]])
791 + case 0xf4: op = 0xc3; break; /* hlt -> ret */
792 + case 0xfa: op = 0xe9; break; /* cli -> jmp */
793 + case 0xfb: op = 0xe9; break; /* sti -> jmp */
794 + default: error("Internal error");
797 + " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
798 + exit_addrs[i], op);
800 + /* Fix up the return instruction. */
801 + if (patch_bytes != -1) {
803 + fprintf(outfile, " memcpy(gen_code_ptr + %d,"
804 + "gen_code_ptr + %d, %d);\n",
805 + copy_size, retpos, patch_bytes);
808 + " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
811 + " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
812 + retpos + 1, copy_size - (retpos + 5));
814 + copy_size += patch_bytes;
818 + " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
823 #elif defined(HOST_X86_64)
825 @@ -1793,6 +2460,42 @@ void gen_code(const char *name, host_ulo
829 + /* Replace the marker instructions with the actual opcodes. */
830 + for (i = 0; exit_addrs[i] != -1; i++) {
832 + switch (p_start[exit_addrs[i]])
834 + case 0xf4: op = 0xc3; break; /* hlt -> ret */
835 + case 0xfa: op = 0xe9; break; /* cli -> jmp */
836 + case 0xfb: op = 0xe9; break; /* sti -> jmp */
837 + default: error("Internal error");
840 + " *(uint8_t *)(gen_code_ptr + %d) = 0x%x;\n",
841 + exit_addrs[i], op);
843 + /* Fix up the return instruction. */
844 + if (patch_bytes != -1) {
846 + fprintf(outfile, " memcpy(gen_code_ptr + %d,"
847 + "gen_code_ptr + %d, %d);\n",
848 + copy_size, retpos, patch_bytes);
851 + " *(uint8_t *)(gen_code_ptr + %d) = 0xe9;\n",
854 + " *(uint32_t *)(gen_code_ptr + %d) = 0x%x;\n",
855 + retpos + 1, copy_size - (retpos + 5));
857 + copy_size += patch_bytes;
861 + " *(uint16_t *)(gen_code_ptr + %d) = 0x9090;\n",
866 #elif defined(HOST_PPC)
868 --- qemu-0.7.0/exec-all.h.gcc4 2005-04-27 22:52:05.000000000 +0200
869 +++ qemu-0.7.0/exec-all.h 2005-06-02 21:41:51.000000000 +0200
870 @@ -335,14 +335,15 @@ do {\
872 #elif defined(__i386__) && defined(USE_DIRECT_JUMP)
874 -/* we patch the jump instruction directly */
875 +/* we patch the jump instruction directly. Use sti in place of the actual
876 + jmp instruction so that dyngen can patch in the correct result. */
877 #define GOTO_TB(opname, tbparam, n)\
879 asm volatile (".section .data\n"\
880 ASM_OP_LABEL_NAME(n, opname) ":\n"\
882 ASM_PREVIOUS_SECTION \
883 - "jmp " ASM_NAME(__op_jmp) #n "\n"\
884 + "sti;.long " ASM_NAME(__op_jmp) #n " - 1f\n"\