From: Paweł Sikora Date: Sat, 23 Jul 2005 15:50:38 +0000 (+0000) Subject: - fix mmx/x87 interactions. X-Git-Tag: auto/th/gcc-4_1_0-0_20050724T0642UTC_0_1~2 X-Git-Url: http://git.pld-linux.org/?a=commitdiff_plain;h=e2ca1c07d21d161339240c0eac686214b1f88816;p=packages%2Fgcc.git - fix mmx/x87 interactions. Changed files: gcc-mmx-x87-fpu-mode-switching-and-mmx-vectorizer.patch -> 1.1 --- diff --git a/gcc-mmx-x87-fpu-mode-switching-and-mmx-vectorizer.patch b/gcc-mmx-x87-fpu-mode-switching-and-mmx-vectorizer.patch new file mode 100644 index 0000000..253d3a8 --- /dev/null +++ b/gcc-mmx-x87-fpu-mode-switching-and-mmx-vectorizer.patch @@ -0,0 +1,1558 @@ +--- gcc/gcc/builtins.c 2005-07-12 11:19:59.000000000 +0200 ++++ gcc/gcc/builtins.c 2005-07-18 06:14:15.000000000 +0200 +@@ -52,6 +52,14 @@ Software Foundation, 51 Franklin Street, + #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN + #endif + ++#ifndef FUNCTION_VALUE_REGNO_P_APPLY_RESULT ++#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT FUNCTION_VALUE_REGNO_P ++#endif ++ ++#ifndef FUNCTION_ARG_REGNO_P_APPLY_ARGS ++#define FUNCTION_ARG_REGNO_P_APPLY_ARGS FUNCTION_ARG_REGNO_P ++#endif ++ + /* Define the names of the builtin function types and codes. */ + const char *const built_in_class_names[4] + = {"NOT_BUILT_IN", "BUILT_IN_FRONTEND", "BUILT_IN_MD", "BUILT_IN_NORMAL"}; +@@ -1079,7 +1087,7 @@ apply_args_size (void) + size += GET_MODE_SIZE (Pmode); + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (FUNCTION_ARG_REGNO_P (regno)) ++ if (FUNCTION_ARG_REGNO_P_APPLY_ARGS (regno)) + { + mode = reg_raw_mode[regno]; + +@@ -1117,7 +1125,7 @@ apply_result_size (void) + size = 0; + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) +- if (FUNCTION_VALUE_REGNO_P (regno)) ++ if (FUNCTION_VALUE_REGNO_P_APPLY_RESULT (regno)) + { + mode = reg_raw_mode[regno]; + +--- gcc/gcc/caller-save.c 2005-06-25 03:59:25.000000000 +0200 ++++ gcc/gcc/caller-save.c 2005-07-18 06:14:15.000000000 +0200 +@@ -377,6 +377,7 @@ save_call_clobbered_regs (void) + { + rtx insn = chain->insn; + enum rtx_code code = GET_CODE (insn); ++ rtx reg ATTRIBUTE_UNUSED; + + next = chain->next; + +@@ -450,6 +451,12 @@ save_call_clobbered_regs (void) + CLEAR_HARD_REG_SET (this_insn_sets); + note_stores (PATTERN (insn), mark_set_regs, NULL); + ++#ifdef CALL_INSN_SETS ++ reg = CALL_INSN_SETS (insn); ++ ++ if (reg) ++ mark_set_regs (reg, NULL_RTX, NULL); ++#endif + /* Compute which hard regs must be saved before this call. */ + AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set); + AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets); +--- gcc/gcc/config/i386/i386.c 2005-07-14 09:46:16.000000000 +0200 ++++ gcc/gcc/config/i386/i386.c 2005-07-18 06:14:15.000000000 +0200 +@@ -2067,12 +2067,13 @@ ix86_return_pops_args (tree fundecl, tre + + /* Return true when register may be used to pass function parameters. */ + bool +-ix86_function_arg_regno_p (int regno) ++ix86_function_arg_regno_p (int regno, bool from_builtin) + { + int i; + if (!TARGET_64BIT) + return (regno < REGPARM_MAX +- || (TARGET_MMX && MMX_REGNO_P (regno) ++ || (TARGET_MMX && !(TARGET_80387 && from_builtin) ++ && MMX_REGNO_P (regno) + && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) + || (TARGET_SSE && SSE_REGNO_P (regno) + && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); +@@ -3181,14 +3182,14 @@ ix86_function_arg_boundary (enum machine + + /* Return true if N is a possible register number of function value. */ + bool +-ix86_function_value_regno_p (int regno) ++ix86_function_value_regno_p (int regno, bool from_builtin) + { + if (regno == 0 + || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) + || (regno == FIRST_SSE_REG && TARGET_SSE)) + return true; + +- if (!TARGET_64BIT ++ if (!TARGET_64BIT && !(TARGET_80387 && from_builtin) + && (regno == FIRST_MMX_REG && TARGET_MMX)) + return true; + +@@ -7450,12 +7451,152 @@ output_387_binary_op (rtx insn, rtx *ope + return buf; + } + +-/* Return needed mode for entity in optimize_mode_switching pass. */ ++/* Return needed mode for entity in optimize_mode_switching pass. ++ Returned mode should match ix86_mode_entry () for function calls. */ + + int + ix86_mode_needed (int entity, rtx insn) + { +- enum attr_i387_cw mode; ++ int unit, mode; ++ ++ if (entity == I387_FPU_MODE) ++ { ++ /* If a function call uses MMX registers, select MMX FPU mode and ++ if function call uses x87 registers, select x87 FPU mode. */ ++ if (CALL_P (insn)) ++ { ++ rtx link; ++ rtx reg; ++ bool mmx = false; ++ bool x87 = false; ++ ++ for (link = CALL_INSN_FUNCTION_USAGE (insn); ++ link; ++ link = XEXP (link, 1)) ++ { ++ if (GET_CODE (XEXP (link, 0)) == USE) ++ { ++ reg = XEXP (XEXP (link, 0), 0); ++ ++ if (reg) ++ { ++ if (MMX_REG_P (reg)) ++ mmx = true; ++ ++ if (FP_REG_P (reg)) ++ x87 = true; ++ } ++ } ++ } ++ ++ /* Mixing of x87 and MMX registers is not allowed ++ in function call. */ ++ gcc_assert (!mmx || !x87); ++ ++ if (mmx) ++ return FPU_MODE_MMX; ++ ++ /* Fall back to default mode. */ ++ return FPU_MODE_X87; ++ } ++ ++ /* Parse ASM operands to check input and output constraints. If ++ an ASM uses MMX registers, select MMX mode and if it uses x87 ++ registers, select x87 mode. Mixing of MMX and x87 constraints ++ is not allowed. If no MMX or x87 input and output registers ++ are used, switch to default mode. */ ++ if (NONJUMP_INSN_P (insn)) ++ { ++ rtx pat = PATTERN (insn); ++ int noperands = asm_noperands (pat); ++ ++ if (noperands >= 0) ++ { ++ const char **constraints; ++ int i; ++ bool mmx = false; ++ bool x87 = false; ++ ++ constraints = alloca (noperands * sizeof (char *)); ++ decode_asm_operands (pat, NULL, NULL, constraints, NULL); ++ ++ for (i = 0; i < noperands; i++) ++ { ++ const char *c = constraints[i]; ++ enum reg_class class; ++ ++ if (c[0] == '%') ++ c++; ++ if (ISDIGIT ((unsigned char) c[0]) && c[1] == '\0') ++ c = constraints[c[0] - '0']; ++ ++ while (*c) ++ { ++ char cc = *c; ++ int len; ++ switch (cc) ++ { ++ case ',': ++ cc++; ++ continue; ++ case '=': ++ case '+': ++ case '*': ++ case '%': ++ case '!': ++ case '#': ++ case '&': ++ case '?': ++ break; ++ ++ default: ++ class = REG_CLASS_FROM_LETTER (cc); ++ ++ if (MMX_CLASS_P (class)) ++ mmx = true; ++ ++ if (FLOAT_CLASS_P (class)) ++ x87 = true; ++ } ++ ++ len = CONSTRAINT_LEN (cc, c); ++ do ++ c++; ++ while (--len && *c); ++ } ++ } ++ ++ /* Mixing x87 and MMX registers in ASM is not allowed. */ ++ if (mmx && x87) ++ error_for_asm (insn, "mixing of x87 and MMX registers " ++ "is not allowed in %"); ++ ++ if (mmx) ++ return FPU_MODE_MMX; ++ ++ /* Fall back to default mode. */ ++ return FPU_MODE_X87; ++ } ++ } ++ ++ if (recog_memoized (insn) < 0) ++ return FPU_MODE_ANY; ++ ++ unit = get_attr_unit (insn); ++ ++ switch (unit) ++ { ++ case UNIT_MMX: ++ return FPU_MODE_MMX; ++ ++ case UNIT_I387: ++ return FPU_MODE_X87; ++ ++ default: ++ return FPU_MODE_ANY; ++ ++ } ++ } + + /* The mode UNINITIALIZED is used to store control word after a + function call or ASM pattern. The mode ANY specify that function +@@ -7502,21 +7643,132 @@ ix86_mode_needed (int entity, rtx insn) + return I387_CW_ANY; + } + +-/* Output code to initialize control word copies used by trunc?f?i and +- rounding patterns. CURRENT_MODE is set to current control word, +- while NEW_MODE is set to new control word. */ ++ ++/* Switch FPU mode to appropriate mode after function call in ++ optimize_mode_switchig pass. Returned mode should match ++ ix86_mode_exit (). */ ++ ++int ++ix86_mode_after (int entity, int mode, rtx insn) ++{ ++ if (entity == I387_FPU_MODE) ++ { ++ /* Switch FPU to MMX mode after funciton call if function value ++ is returned in MMX register and similar for x87 reg. ++ If no value is returned in MMX or x87 reg, fall back to ++ default mode. */ ++ if (CALL_P (insn)) ++ { ++ rtx reg = SET_DEST (PATTERN (insn)); ++ ++ int new_mode; ++ ++ if (reg && MMX_REG_P (reg)) ++ new_mode = FPU_MODE_MMX; ++ else ++ new_mode = FPU_MODE_X87; ++ ++ /* Call insn should never operate in FPU_MODE_ANY. */ ++ if ((mode != FPU_MODE_ANY) && (new_mode != mode)) ++ ix86_fpu_mode_changed = 1; ++ ++ return new_mode; ++ } ++ } ++ ++ return mode; ++} ++ ++/* Switch FPU mode of function entry to appropriate mode in ++ optimize_mode_switchig pass. Returned mode should match ++ ix86_mode_needed () for function calls. */ ++ ++int ++ix86_mode_entry (int entity) ++{ ++ if (entity == I387_FPU_MODE) ++ { ++ if (! current_function_args_info.maybe_vaarg) ++ { ++ if (current_function_args_info.mmx_nregs != MMX_REGPARM_MAX) ++ return FPU_MODE_MMX; ++ ++ /* ??? Handle x87 registers for fpregparm. */ ++ } ++ ++ /* Fall back to default mode. */ ++ return FPU_MODE_X87; ++ } ++ ++ return I387_CW_ANY; ++} ++ ++/* Switch FPU mode of function exit to appropriate mode in ++ optimize_mode_switchig pass. Returned mode should match ++ ix86_mode_after () for function calls. */ ++ ++int ++ix86_mode_exit (int entity) ++{ ++ if (entity == I387_FPU_MODE) ++ { ++ rtx reg = current_function_return_rtx; ++ ++ /* If MMX output register is specified, switch FPU mode ++ of function exit to MMX mode. */ ++ if (reg && MMX_REG_P (reg)) ++ return FPU_MODE_MMX; ++ ++ /* Fall back to default mode. */ ++ return FPU_MODE_X87; ++ } ++ ++ return I387_CW_ANY; ++} ++ ++/* Emit mode switching instructions in optimize_mode_switching pass. */ + + void +-emit_i387_cw_initialization (int mode) ++ix86_emit_mode_set (int entity, int mode) + { +- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); +- rtx new_mode; ++ rtx stored_mode, new_mode; ++ rtx reg; + + int slot; + +- rtx reg = gen_reg_rtx (HImode); ++ if (entity == I387_FPU_MODE) ++ { ++ switch (mode) ++ { ++ case FPU_MODE_ANY: ++ return; ++ ++ case FPU_MODE_X87: ++ emit_insn (gen_emms ()); ++ ix86_fpu_mode_changed = 1; ++ return; ++ ++ case FPU_MODE_MMX: ++ emit_insn (gen_efpu ()); ++ ix86_fpu_mode_changed = 1; ++ return; ++ ++ default: ++ gcc_unreachable (); ++ } ++ } ++ ++ /* Output code to initialize control word copies used by trunc?f?i ++ and rounding patterns. STORED_MODE is set to current control ++ word, while NEW_MODE is set to new control word. */ + ++ if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY)) ++ return; ++ ++ stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); + emit_insn (gen_x86_fnstcw_1 (stored_mode)); ++ ++ reg = gen_reg_rtx (HImode); + emit_move_insn (reg, stored_mode); + + if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) +@@ -12279,6 +12531,7 @@ ix86_init_machine_status (void) + + f = ggc_alloc_cleared (sizeof (struct machine_function)); + f->use_fast_prologue_epilogue_nregs = -1; ++ f->optimize_mode_switching[I387_FPU_MODE] = TARGET_80387 && TARGET_MMX; + + return f; + } +@@ -12877,7 +13130,77 @@ ix86_local_alignment (tree type, int ali + } + return align; + } ++ + ++ ++/* Return true to prevent register allocator from allocating registers ++ from the unit that is not active. */ ++ ++bool ++ix86_epilogue_uses (int regno) ++{ ++ int mode; ++ ++ if (! ix86_fpu_mode_changed) ++ return false; ++ ++ mode = ix86_mode_exit (I387_FPU_MODE); ++ ++ if (mode == FPU_MODE_MMX) ++ return FP_REGNO_P (regno); ++ else ++ return MMX_REGNO_P (regno); ++} ++ ++/* Return RTX code of additional register that CALL_INSN uses. ++ This function is used to maintain correct register life ++ information before CALL_INSN in case of MMX/x87 switching. */ ++ ++rtx ++ix86_call_insn_uses (rtx insn) ++{ ++ int mode; ++ ++ if (! ix86_fpu_mode_changed) ++ return NULL_RTX; ++ ++ gcc_assert (CALL_P (insn)); ++ ++ mode = ix86_mode_needed (I387_FPU_MODE, insn); ++ if (mode == FPU_MODE_MMX) ++ return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG); ++ else ++ return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG); ++ ++ return NULL_RTX; ++} ++ ++/* Return RTX code of additional register that CALL_INSN sets. ++ This function is used to maintain correct register life ++ information after CALL_INSN in case of MMX/x87 switching. */ ++ ++rtx ++ix86_call_insn_sets (rtx insn) ++{ ++ int mode; ++ ++ if (! ix86_fpu_mode_changed) ++ return NULL_RTX; ++ ++ gcc_assert (CALL_P (insn)); ++ ++ /* Current mode in call to ix86_mode_after is set to FPU_MODE_ANY ++ to prevent setting of ix86_fpu_mode_changed variable. */ ++ mode = ix86_mode_after (I387_FPU_MODE, FPU_MODE_ANY, insn); ++ if (mode == FPU_MODE_MMX) ++ return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG); ++ else ++ return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG); ++ ++ return NULL_RTX; ++} ++ ++ + /* Emit RTL insns to initialize the variable parts of a trampoline. + FNADDR is an RTX for the address of the function's pure code. + CXT is an RTX for the static chain value for the function. */ +@@ -13357,9 +13680,11 @@ enum ix86_builtins + IX86_BUILTIN_MONITOR, + IX86_BUILTIN_MWAIT, + ++ IX86_BUILTIN_VEC_INIT_V2SF, + IX86_BUILTIN_VEC_INIT_V2SI, + IX86_BUILTIN_VEC_INIT_V4HI, + IX86_BUILTIN_VEC_INIT_V8QI, ++ IX86_BUILTIN_VEC_EXT_V2SF, + IX86_BUILTIN_VEC_EXT_V2DF, + IX86_BUILTIN_VEC_EXT_V2DI, + IX86_BUILTIN_VEC_EXT_V4SF, +@@ -13541,24 +13866,24 @@ static const struct builtin_description + { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, + { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, + +- { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, + +- { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, ++ { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, ++ { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, ++ { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, ++ { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, + +- { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, +- { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, ++ { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, + + { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, + { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, +@@ -14323,6 +14648,11 @@ ix86_init_mmx_sse_builtins (void) + v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); + + /* Access to the vec_init patterns. */ ++ ftype = build_function_type_list (V2SF_type_node, float_type_node, ++ integer_type_node, NULL_TREE); ++ def_builtin (MASK_3DNOW, "__builtin_ia32_vec_init_v2sf", ++ ftype, IX86_BUILTIN_VEC_INIT_V2SF); ++ + ftype = build_function_type_list (V2SI_type_node, integer_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", +@@ -14344,6 +14674,11 @@ ix86_init_mmx_sse_builtins (void) + ftype, IX86_BUILTIN_VEC_INIT_V8QI); + + /* Access to the vec_extract patterns. */ ++ ftype = build_function_type_list (float_type_node, V2SF_type_node, ++ integer_type_node, NULL_TREE); ++ def_builtin (MASK_3DNOW, "__builtin_ia32_vec_ext_v2sf", ++ ftype, IX86_BUILTIN_VEC_EXT_V2DF); ++ + ftype = build_function_type_list (double_type_node, V2DF_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df", +@@ -14818,7 +15153,7 @@ ix86_expand_builtin (tree exp, rtx targe + switch (fcode) + { + case IX86_BUILTIN_EMMS: +- emit_insn (gen_mmx_emms ()); ++ /* emms insn is emitted automatically. */ + return 0; + + case IX86_BUILTIN_SFENCE: +@@ -15035,7 +15370,7 @@ ix86_expand_builtin (tree exp, rtx targe + return target; + + case IX86_BUILTIN_FEMMS: +- emit_insn (gen_mmx_femms ()); ++ /* femms insn is emitted automatically. */ + return NULL_RTX; + + case IX86_BUILTIN_PAVGUSB: +@@ -15181,11 +15516,13 @@ ix86_expand_builtin (tree exp, rtx targe + return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, + target, 1); + ++ case IX86_BUILTIN_VEC_INIT_V2SF: + case IX86_BUILTIN_VEC_INIT_V2SI: + case IX86_BUILTIN_VEC_INIT_V4HI: + case IX86_BUILTIN_VEC_INIT_V8QI: + return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); + ++ case IX86_BUILTIN_VEC_EXT_V2SF: + case IX86_BUILTIN_VEC_EXT_V2DF: + case IX86_BUILTIN_VEC_EXT_V2DI: + case IX86_BUILTIN_VEC_EXT_V4SF: +--- gcc/gcc/config/i386/i386.h 2005-07-14 09:46:21.000000000 +0200 ++++ gcc/gcc/config/i386/i386.h 2005-07-18 06:14:15.000000000 +0200 +@@ -819,7 +819,9 @@ do { \ + + #define HARD_REGNO_NREGS(REGNO, MODE) \ + (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \ +- ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \ ++ ? ((MODE) == ALLREGSmode \ ++ ? 8 \ ++ : (COMPLEX_MODE_P (MODE) ? 2 : 1)) \ + : ((MODE) == XFmode \ + ? (TARGET_64BIT ? 2 : 3) \ + : (MODE) == XCmode \ +@@ -841,9 +843,8 @@ do { \ + ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \ + || (MODE) == V2SImode || (MODE) == SImode) + +-/* ??? No autovectorization into MMX or 3DNOW until we can reliably +- place emms and femms instructions. */ +-#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD) ++#define UNITS_PER_SIMD_WORD \ ++ (TARGET_SSE ? 16 : TARGET_MMX ? 8 : UNITS_PER_WORD) + + #define VALID_FP_MODE_P(MODE) \ + ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ +@@ -1433,8 +1434,16 @@ enum reg_class + #define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) \ + ix86_return_pops_args ((FUNDECL), (FUNTYPE), (SIZE)) + ++/* 1 if N is the number of a register in which the values of ++ called function may come back. */ + #define FUNCTION_VALUE_REGNO_P(N) \ +- ix86_function_value_regno_p (N) ++ ix86_function_value_regno_p ((N), false) ++ ++/* 1 if N is the number of a register in which the value of ++ __builtin_return builtin function may come back. */ ++ ++#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT(N) \ ++ ix86_function_value_regno_p ((N), true) + + /* Define how to find the value returned by a library function + assuming the value has mode MODE. */ +@@ -1449,7 +1458,13 @@ enum reg_class + #define APPLY_RESULT_SIZE (8+108) + + /* 1 if N is a possible register number for function argument passing. */ +-#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N) ++#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p ((N), false) ++ ++/* 1 if N is a possible register number for function argument passing ++ from __builtin_apply_args and __builtin_apply builtin functions. */ ++ ++#define FUNCTION_ARG_REGNO_P_APPLY_ARGS(N) \ ++ ix86_function_arg_regno_p ((N), true) + + /* Define a data type for recording info about an argument list + during the scan of that argument list. This data type should +@@ -1531,6 +1546,23 @@ typedef struct ix86_args { + + #define EXIT_IGNORE_STACK 1 + ++/* Define this macro as a C expression that is nonzero for registers ++ that are used by the epilogue or the return' pattern. The stack ++ and frame pointer registers are already be assumed to be used as ++ needed. */ ++ ++#define EPILOGUE_USES(REGNO) ix86_epilogue_uses (REGNO) ++ ++/* Define this macro as a C expression that returns RTL expression of ++ additional hard register set by call_insn. */ ++ ++#define CALL_INSN_SETS(INSN) ix86_call_insn_sets (INSN) ++ ++/* Define this macro as a C expression that returns RTL expression of ++ additional hard register used by call_insn. */ ++ ++#define CALL_INSN_USES(INSN) ix86_call_insn_uses (INSN) ++ + /* Output assembler code for a block containing the constant parts + of a trampoline, leaving space for the variable parts. */ + +@@ -2167,6 +2199,10 @@ extern rtx ix86_compare_op0; /* operand + extern rtx ix86_compare_op1; /* operand 1 for comparisons */ + extern rtx ix86_compare_emitted; + ++ ++/* x87 FPU modes for x87/MMX switching. */ ++enum ix86_fpu_mode { FPU_MODE_X87, FPU_MODE_MMX, FPU_MODE_ANY }; ++ + /* To properly truncate FP values into integers, we need to set i387 control + word. We can't emit proper mode switching code before reload, as spills + generated by reload may truncate values incorrectly, but we still can avoid +@@ -2188,6 +2224,7 @@ enum ix86_entity + I387_FLOOR, + I387_CEIL, + I387_MASK_PM, ++ I387_FPU_MODE, + MAX_386_ENTITIES + }; + +@@ -2217,7 +2254,12 @@ enum ix86_stack_slot + refer to the mode-switched entity in question. */ + + #define NUM_MODES_FOR_MODE_SWITCHING \ +- { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } ++ { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, FPU_MODE_ANY } ++ ++/* Define this macro if the port needs extra register life analysis ++ after mode switching. */ ++ ++#define LIFE_ANALYSIS_AFTER_MODE_SWITCHING ix86_fpu_mode_changed + + /* ENTITY is an integer specifying a mode-switched entity. If + `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to +@@ -2227,6 +2269,22 @@ enum ix86_stack_slot + + #define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I)) + ++/* This macro determines the mode that an INSN results in (if different ++ from the incoming mode). */ ++ ++#define MODE_AFTER(ENTITY, MODE, I) \ ++ ix86_mode_after ((ENTITY), (MODE), (I)) ++ ++/* This macro specifies a mode that ENTITY is assumed to be ++ switched to at function entry. */ ++ ++#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY) ++ ++/* This macro specifies a mode that ENTITY is assumed to be ++ switched to at function exit. */ ++ ++#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY) ++ + /* This macro specifies the order in which modes for ENTITY are + processed. 0 is the highest priority. */ + +@@ -2236,10 +2294,8 @@ enum ix86_stack_slot + is the set of hard registers live at the point where the insn(s) + are to be inserted. */ + +-#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ +- ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \ +- ? emit_i387_cw_initialization (MODE), 0 \ +- : 0) ++#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ ++ ix86_emit_mode_set ((ENTITY), (MODE)) + + + /* Avoid renaming of stack registers, as doing so in combination with +@@ -2263,6 +2319,7 @@ struct machine_function GTY(()) + int save_varrargs_registers; + int accesses_prev_frame; + int optimize_mode_switching[MAX_386_ENTITIES]; ++ int fpu_mode_changed; + /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to + determine the style used. */ + int use_fast_prologue_epilogue; +@@ -2274,6 +2331,7 @@ struct machine_function GTY(()) + #define ix86_stack_locals (cfun->machine->stack_locals) + #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) + #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching) ++#define ix86_fpu_mode_changed (cfun->machine->fpu_mode_changed) + + /* Control behavior of x86_file_start. */ + #define X86_FILE_START_VERSION_DIRECTIVE false +--- gcc/gcc/config/i386/i386.md 2005-07-12 11:20:12.000000000 +0200 ++++ gcc/gcc/config/i386/i386.md 2005-07-18 06:14:15.000000000 +0200 +@@ -152,7 +152,7 @@ + (UNSPECV_EMMS 2) + (UNSPECV_LDMXCSR 3) + (UNSPECV_STMXCSR 4) +- (UNSPECV_FEMMS 5) ++ (UNSPECV_EFPU 5) + (UNSPECV_CLFLUSH 6) + (UNSPECV_ALIGN 7) + (UNSPECV_MONITOR 8) +@@ -167,9 +167,11 @@ + (define_constants + [(BP_REG 6) + (SP_REG 7) ++ (FIRSTFP_REG 8) + (FLAGS_REG 17) + (FPSR_REG 18) + (DIRFLAG_REG 19) ++ (FIRSTMMX_REG 29) + ]) + + ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls +--- gcc/gcc/config/i386/i386-modes.def 2005-06-25 03:21:07.000000000 +0200 ++++ gcc/gcc/config/i386/i386-modes.def 2005-07-18 06:14:15.000000000 +0200 +@@ -62,6 +62,9 @@ CC_MODE (CCZ); + CC_MODE (CCFP); + CC_MODE (CCFPU); + ++/* This mode is used to cover all MMX and all x87 registers. */ ++RANDOM_MODE (ALLREGS); ++ + /* Vector modes. */ + VECTOR_MODES (INT, 4); /* V4QI V2HI */ + VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ +--- gcc/gcc/config/i386/i386-protos.h 2005-07-14 09:46:16.000000000 +0200 ++++ gcc/gcc/config/i386/i386-protos.h 2005-07-18 06:14:15.000000000 +0200 +@@ -152,6 +152,9 @@ extern bool ix86_expand_fp_vcond (rtx[]) + extern bool ix86_expand_int_vcond (rtx[]); + extern int ix86_expand_int_addcc (rtx[]); + extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); ++extern bool ix86_epilogue_uses (int); ++extern rtx ix86_call_insn_sets (rtx); ++extern rtx ix86_call_insn_uses (rtx); + extern void x86_initialize_trampoline (rtx, rtx, rtx); + extern rtx ix86_zero_extend_to_Pmode (rtx); + extern void ix86_split_long_move (rtx[]); +@@ -168,8 +171,8 @@ extern int ix86_attr_length_address_defa + extern enum machine_mode ix86_fp_compare_mode (enum rtx_code); + + extern rtx ix86_libcall_value (enum machine_mode); +-extern bool ix86_function_value_regno_p (int); +-extern bool ix86_function_arg_regno_p (int); ++extern bool ix86_function_value_regno_p (int, bool); ++extern bool ix86_function_arg_regno_p (int, bool); + extern int ix86_function_arg_boundary (enum machine_mode, tree); + extern int ix86_return_in_memory (tree); + extern void ix86_va_start (tree, rtx); +@@ -190,7 +193,10 @@ extern bool ix86_cannot_change_mode_clas + extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class); + extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int); + extern int ix86_mode_needed (int, rtx); +-extern void emit_i387_cw_initialization (int); ++extern int ix86_mode_after (int, int, rtx); ++extern int ix86_mode_entry (int); ++extern int ix86_mode_exit (int); ++extern void ix86_emit_mode_set (int, int); + extern bool ix86_fp_jump_nontrivial_p (enum rtx_code); + extern void x86_order_regs_for_local_alloc (void); + extern void x86_function_profiler (FILE *, int); +--- gcc/gcc/config/i386/mm3dnow.h 2005-06-25 03:21:23.000000000 +0200 ++++ gcc/gcc/config/i386/mm3dnow.h 2005-07-18 06:14:15.000000000 +0200 +@@ -172,14 +172,13 @@ _m_prefetchw (void *__P) + static __inline __m64 + _m_from_float (float __A) + { +- return (__m64)(__v2sf){ __A, 0 }; ++ return (__m64) __builtin_ia32_vec_init_v2sf (__A, 0); + } + + static __inline float + _m_to_float (__m64 __A) + { +- union { __v2sf v; float a[2]; } __tmp = { (__v2sf)__A }; +- return __tmp.a[0]; ++ return __builtin_ia32_vec_ext_v2sf ((__v2sf)__A, 0); + } + + #ifdef __3dNOW_A__ +--- gcc/gcc/config/i386/mmx.md 2005-06-25 03:21:23.000000000 +0200 ++++ gcc/gcc/config/i386/mmx.md 2005-07-18 06:14:15.000000000 +0200 +@@ -23,14 +23,6 @@ + ;; the same register file, and 3dNOW! adds a number of extensions to + ;; the base integer MMX isa. + +-;; Note! Except for the basic move instructions, *all* of these +-;; patterns are outside the normal optabs namespace. This is because +-;; use of these registers requires the insertion of emms or femms +-;; instructions to return to normal fpu mode. The compiler doesn't +-;; know how to do that itself, which means it's up to the user. Which +-;; means that we should never use any of these patterns except at the +-;; direction of the user via a builtin. +- + ;; 8 byte integral modes handled by MMX (and by extension, SSE) + (define_mode_macro MMXMODEI [V8QI V4HI V2SI]) + +@@ -481,7 +473,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_set (false, operands[0], operands[1], ++ ix86_expand_vector_set (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -537,7 +529,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_extract (false, operands[0], operands[1], ++ ix86_expand_vector_extract (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -547,7 +539,7 @@ + (match_operand 1 "" "")] + "TARGET_SSE" + { +- ix86_expand_vector_init (false, operands[0], operands[1]); ++ ix86_expand_vector_init (true, operands[0], operands[1]); + DONE; + }) + +@@ -557,6 +549,21 @@ + ;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ++(define_expand "neg2" ++ [(set (match_operand:MMXMODEI 0 "register_operand" "") ++ (minus:MMXMODEI ++ (match_dup 2) ++ (match_operand:MMXMODEI 1 "nonimmediate_operand" "")))] ++ "TARGET_MMX" ++ "operands[2] = force_reg (mode, CONST0_RTX (mode));") ++ ++(define_expand "add3" ++ [(set (match_operand:MMXMODEI 0 "register_operand" "") ++ (plus:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") ++ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] ++ "TARGET_MMX" ++ "ix86_fixup_binary_operands_no_copy (PLUS, mode, operands);") ++ + (define_insn "mmx_add3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (plus:MMXMODEI +@@ -598,6 +605,13 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + ++(define_expand "sub3" ++ [(set (match_operand:MMXMODEI 0 "register_operand" "") ++ (minus:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "") ++ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] ++ "TARGET_MMX" ++ "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands);") ++ + (define_insn "mmx_sub3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (minus:MMXMODEI +@@ -639,6 +653,13 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + ++(define_expand "mulv4hi3" ++ [(set (match_operand:V4HI 0 "register_operand" "") ++ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "") ++ (match_operand:V4HI 2 "nonimmediate_operand" "")))] ++ "TARGET_MMX" ++ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") ++ + (define_insn "mmx_mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") +@@ -735,6 +756,13 @@ + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) + ++(define_expand "umaxv8qi3" ++ [(set (match_operand:V8QI 0 "register_operand" "") ++ (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "") ++ (match_operand:V8QI 2 "nonimmediate_operand" "")))] ++ "(TARGET_SSE || TARGET_3DNOW_A)" ++ "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);") ++ + (define_insn "mmx_umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") +@@ -745,6 +773,13 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + ++(define_expand "smaxv4hi3" ++ [(set (match_operand:V4HI 0 "register_operand" "") ++ (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "") ++ (match_operand:V4HI 2 "nonimmediate_operand" "")))] ++ "(TARGET_SSE || TARGET_3DNOW_A)" ++ "ix86_fixup_binary_operands_no_copy (SMAX, V4HImode, operands);") ++ + (define_insn "mmx_smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") +@@ -755,6 +790,13 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + ++(define_expand "uminv8qi3" ++ [(set (match_operand:V8QI 0 "register_operand" "") ++ (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "") ++ (match_operand:V8QI 2 "nonimmediate_operand" "")))] ++ "(TARGET_SSE || TARGET_3DNOW_A)" ++ "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);") ++ + (define_insn "mmx_uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "=y") + (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") +@@ -765,6 +807,13 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + ++(define_expand "sminv4hi3" ++ [(set (match_operand:V4HI 0 "register_operand" "") ++ (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "") ++ (match_operand:V4HI 2 "nonimmediate_operand" "")))] ++ "(TARGET_SSE || TARGET_3DNOW_A)" ++ "ix86_fixup_binary_operands_no_copy (SMIN, V4HImode, operands);") ++ + (define_insn "mmx_sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "=y") + (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") +@@ -775,7 +824,7 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + +-(define_insn "mmx_ashr3" ++(define_insn "ashr3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (ashiftrt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") +@@ -785,7 +834,7 @@ + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +-(define_insn "mmx_lshr3" ++(define_insn "lshr3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (lshiftrt:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") +@@ -806,7 +855,7 @@ + [(set_attr "type" "mmxshft") + (set_attr "mode" "DI")]) + +-(define_insn "mmx_ashl3" ++(define_insn "ashl3" + [(set (match_operand:MMXMODE24 0 "register_operand" "=y") + (ashift:MMXMODE24 + (match_operand:MMXMODE24 1 "register_operand" "0") +@@ -853,12 +902,66 @@ + [(set_attr "type" "mmxcmp") + (set_attr "mode" "DI")]) + ++(define_expand "vcond" ++ [(set (match_operand:MMXMODE12 0 "register_operand" "") ++ (if_then_else:MMXMODE12 ++ (match_operator 3 "" ++ [(match_operand:MMXMODE12 4 "nonimmediate_operand" "") ++ (match_operand:MMXMODE12 5 "nonimmediate_operand" "")]) ++ (match_operand:MMXMODE12 1 "general_operand" "") ++ (match_operand:MMXMODE12 2 "general_operand" "")))] ++ "TARGET_MMX" ++{ ++ if (ix86_expand_int_vcond (operands)) ++ DONE; ++ else ++ FAIL; ++}) ++ ++(define_expand "vconduv8qi" ++ [(set (match_operand:V8QI 0 "register_operand" "") ++ (if_then_else:V8QI ++ (match_operator 3 "" ++ [(match_operand:V8QI 4 "nonimmediate_operand" "") ++ (match_operand:V8QI 5 "nonimmediate_operand" "")]) ++ (match_operand:V8QI 1 "general_operand" "") ++ (match_operand:V8QI 2 "general_operand" "")))] ++ "TARGET_MMX" ++{ ++ if (ix86_expand_int_vcond (operands)) ++ DONE; ++ else ++ FAIL; ++}) ++ + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; + ;; Parallel integral logical operations + ;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ++(define_expand "one_cmpl2" ++ [(set (match_operand:MMXMODEI 0 "register_operand" "") ++ (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") ++ (match_dup 2)))] ++ "TARGET_MMX" ++{ ++ int i, n = GET_MODE_NUNITS (mode); ++ rtvec v = rtvec_alloc (n); ++ ++ for (i = 0; i < n; ++i) ++ RTVEC_ELT (v, i) = constm1_rtx; ++ ++ operands[2] = force_reg (mode, gen_rtx_CONST_VECTOR (mode, v)); ++}) ++ ++(define_expand "and3" ++ [(set (match_operand:MMXMODEI 0 "register_operand" "") ++ (and:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") ++ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] ++ "TARGET_MMX" ++ "ix86_fixup_binary_operands_no_copy (AND, mode, operands);") ++ + (define_insn "mmx_and3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (and:MMXMODEI +@@ -879,6 +982,13 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + ++(define_expand "ior3" ++ [(set (match_operand:MMXMODEI 0 "register_operand" "") ++ (ior:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") ++ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] ++ "TARGET_MMX" ++ "ix86_fixup_binary_operands_no_copy (IOR, mode, operands);") ++ + (define_insn "mmx_ior3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (ior:MMXMODEI +@@ -889,6 +999,13 @@ + [(set_attr "type" "mmxadd") + (set_attr "mode" "DI")]) + ++(define_expand "xor3" ++ [(set (match_operand:MMXMODEI 0 "register_operand" "") ++ (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") ++ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] ++ "TARGET_MMX" ++ "ix86_fixup_binary_operands_no_copy (XOR, mode, operands);") ++ + (define_insn "mmx_xor3" + [(set (match_operand:MMXMODEI 0 "register_operand" "=y") + (xor:MMXMODEI +@@ -1147,7 +1264,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_set (false, operands[0], operands[1], ++ ix86_expand_vector_set (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -1205,7 +1322,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_extract (false, operands[0], operands[1], ++ ix86_expand_vector_extract (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -1215,7 +1332,7 @@ + (match_operand 1 "" "")] + "TARGET_SSE" + { +- ix86_expand_vector_init (false, operands[0], operands[1]); ++ ix86_expand_vector_init (true, operands[0], operands[1]); + DONE; + }) + +@@ -1225,7 +1342,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_set (false, operands[0], operands[1], ++ ix86_expand_vector_set (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -1236,7 +1353,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_extract (false, operands[0], operands[1], ++ ix86_expand_vector_extract (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -1246,7 +1363,7 @@ + (match_operand 1 "" "")] + "TARGET_SSE" + { +- ix86_expand_vector_init (false, operands[0], operands[1]); ++ ix86_expand_vector_init (true, operands[0], operands[1]); + DONE; + }) + +@@ -1256,7 +1373,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_set (false, operands[0], operands[1], ++ ix86_expand_vector_set (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -1267,7 +1384,7 @@ + (match_operand 2 "const_int_operand" "")] + "TARGET_MMX" + { +- ix86_expand_vector_extract (false, operands[0], operands[1], ++ ix86_expand_vector_extract (true, operands[0], operands[1], + INTVAL (operands[2])); + DONE; + }) +@@ -1277,7 +1394,7 @@ + (match_operand 1 "" "")] + "TARGET_SSE" + { +- ix86_expand_vector_init (false, operands[0], operands[1]); ++ ix86_expand_vector_init (true, operands[0], operands[1]); + DONE; + }) + +@@ -1386,48 +1503,20 @@ + [(set_attr "type" "mmxcvt") + (set_attr "mode" "DI")]) + +-(define_insn "mmx_emms" +- [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) +- (clobber (reg:XF 8)) +- (clobber (reg:XF 9)) +- (clobber (reg:XF 10)) +- (clobber (reg:XF 11)) +- (clobber (reg:XF 12)) +- (clobber (reg:XF 13)) +- (clobber (reg:XF 14)) +- (clobber (reg:XF 15)) +- (clobber (reg:DI 29)) +- (clobber (reg:DI 30)) +- (clobber (reg:DI 31)) +- (clobber (reg:DI 32)) +- (clobber (reg:DI 33)) +- (clobber (reg:DI 34)) +- (clobber (reg:DI 35)) +- (clobber (reg:DI 36))] +- "TARGET_MMX" +- "emms" +- [(set_attr "type" "mmx") +- (set_attr "memory" "unknown")]) ++(define_insn "efpu" ++ [(set (reg:ALLREGS FIRSTFP_REG) ++ (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTMMX_REG)] ++ UNSPECV_EFPU))] ++ "TARGET_80387 && TARGET_MMX" ++ "" ++ [(set_attr "length" "0")]) ++ ++(define_insn "emms" ++ [(set (reg:ALLREGS FIRSTMMX_REG) ++ (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTFP_REG)] ++ UNSPECV_EMMS))] ++ "TARGET_80387 && TARGET_MMX" ++{ ++ return TARGET_3DNOW ? "femms" : "emms"; ++}) + +-(define_insn "mmx_femms" +- [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) +- (clobber (reg:XF 8)) +- (clobber (reg:XF 9)) +- (clobber (reg:XF 10)) +- (clobber (reg:XF 11)) +- (clobber (reg:XF 12)) +- (clobber (reg:XF 13)) +- (clobber (reg:XF 14)) +- (clobber (reg:XF 15)) +- (clobber (reg:DI 29)) +- (clobber (reg:DI 30)) +- (clobber (reg:DI 31)) +- (clobber (reg:DI 32)) +- (clobber (reg:DI 33)) +- (clobber (reg:DI 34)) +- (clobber (reg:DI 35)) +- (clobber (reg:DI 36))] +- "TARGET_3DNOW" +- "femms" +- [(set_attr "type" "mmx") +- (set_attr "memory" "none")]) +--- gcc/gcc/config/i386/sse.md 2005-06-29 19:27:19.000000000 +0200 ++++ gcc/gcc/config/i386/sse.md 2005-07-18 06:14:15.000000000 +0200 +@@ -881,6 +881,7 @@ + "TARGET_SSE" + "cvtpi2ps\t{%2, %0|%0, %2}" + [(set_attr "type" "ssecvt") ++ (set_attr "unit" "mmx") + (set_attr "mode" "V4SF")]) + + (define_insn "sse_cvtps2pi" +@@ -3508,6 +3509,7 @@ + movhps\t{%2, %0|%0, %2} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") ++ (set_attr "unit" "*,mmx,*,*,*,*") + (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) + + (define_expand "vec_setv2di" +--- gcc/gcc/config/sh/sh.h 2005-07-03 23:08:07.000000000 +0200 ++++ gcc/gcc/config/sh/sh.h 2005-07-18 06:14:15.000000000 +0200 +@@ -3301,7 +3301,7 @@ extern struct rtx_def *sp_switch; + ? get_attr_fp_mode (INSN) \ + : FP_MODE_NONE) + +-#define MODE_AFTER(MODE, INSN) \ ++#define MODE_AFTER(ENTITY, MODE, INSN) \ + (TARGET_HITACHI \ + && recog_memoized (INSN) >= 0 \ + && get_attr_fp_set (INSN) != FP_SET_NONE \ +--- gcc/gcc/doc/tm.texi 2005-07-13 19:27:39.000000000 +0200 ++++ gcc/gcc/doc/tm.texi 2005-07-18 06:14:15.000000000 +0200 +@@ -4227,6 +4227,16 @@ stack adjustment in a function that has + compiler knows this regardless of @code{EXIT_IGNORE_STACK}. + @end defmac + ++@defmac CALL_INSN_SETS (@var{INSN}) ++Define this macro as a C expression that returns RTL expression of ++additional hard register set by call_insn. ++@end defmac ++ ++@defmac CALL_INSN_USES (@var{INSN}) ++Define this macro as a C expression that returns RTL expression of ++additional hard register used by call_insn. ++@end defmac ++ + @defmac EPILOGUE_USES (@var{regno}) + Define this macro as a C expression that is nonzero for registers that are + used by the epilogue or the @samp{return} pattern. The stack and frame +@@ -8376,6 +8386,13 @@ represented as numbers 0 @dots{} N @minu + switch is needed / supplied. + @end defmac + ++@defmac LIFE_ANALYSIS_AFTER_MODE_SWITCHING ++Define this macro if the port needs extra register life analysis after ++mode switching. This macro should be defined if mode switching inserts ++instructions that change global registers to maintain consistent global ++register life information. ++@end defmac ++ + @defmac MODE_NEEDED (@var{entity}, @var{insn}) + @var{entity} is an integer specifying a mode-switched entity. If + @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro to +@@ -8384,9 +8401,9 @@ return an integer value not larger than + be switched into prior to the execution of @var{insn}. + @end defmac + +-@defmac MODE_AFTER (@var{mode}, @var{insn}) +-If this macro is defined, it is evaluated for every @var{insn} during +-mode switching. It determines the mode that an insn results in (if ++@defmac MODE_AFTER (@var{entity}, @var{mode}, @var{insn}) ++If this macro is defined, it is evaluated for every @var{entity} that needs ++mode switching. It determines the mode that an @var{insn} results in (if + different from the incoming mode). + @end defmac + +--- gcc/gcc/flow.c 2005-07-05 18:19:55.000000000 +0200 ++++ gcc/gcc/flow.c 2005-07-18 06:14:15.000000000 +0200 +@@ -1830,10 +1830,11 @@ propagate_one_insn (struct propagate_blo + { + regset live_at_end; + bool sibcall_p; +- rtx note, cond; ++ rtx note; ++ rtx cond = NULL_RTX; ++ rtx reg ATTRIBUTE_UNUSED; + int i; + +- cond = NULL_RTX; + if (GET_CODE (PATTERN (insn)) == COND_EXEC) + cond = COND_EXEC_TEST (PATTERN (insn)); + +@@ -1856,6 +1857,13 @@ propagate_one_insn (struct propagate_blo + mark_set_1 (pbi, CLOBBER, XEXP (XEXP (note, 0), 0), + cond, insn, pbi->flags); + ++#ifdef CALL_INSN_SETS ++ reg = CALL_INSN_SETS (insn); ++ ++ if (reg) ++ mark_set_1 (pbi, SET, reg, cond, insn, pbi->flags); ++#endif ++ + /* Calls change all call-used and global registers; sibcalls do not + clobber anything that must be preserved at end-of-function, + except for return values. */ +@@ -1894,10 +1902,11 @@ propagate_one_insn (struct propagate_blo + + if (! insn_is_dead && CALL_P (insn)) + { ++ rtx note; ++ rtx cond = NULL_RTX; ++ rtx reg ATTRIBUTE_UNUSED; + int i; +- rtx note, cond; + +- cond = NULL_RTX; + if (GET_CODE (PATTERN (insn)) == COND_EXEC) + cond = COND_EXEC_TEST (PATTERN (insn)); + +@@ -1910,6 +1919,13 @@ propagate_one_insn (struct propagate_blo + of which mark_used_regs knows how to handle. */ + mark_used_regs (pbi, XEXP (XEXP (note, 0), 0), cond, insn); + ++#ifdef CALL_INSN_USES ++ reg = CALL_INSN_USES (insn); ++ ++ if (reg) ++ mark_used_reg (pbi, reg, cond, insn); ++#endif ++ + /* The stack ptr is used (honorarily) by a CALL insn. */ + if ((flags & PROP_REG_INFO) + && !REGNO_REG_SET_P (pbi->reg_live, STACK_POINTER_REGNUM)) +--- gcc/gcc/mode-switching.c 2005-07-05 18:20:07.000000000 +0200 ++++ gcc/gcc/mode-switching.c 2005-07-18 06:14:15.000000000 +0200 +@@ -473,7 +473,7 @@ optimize_mode_switching (FILE *file) + RESET_BIT (transp[bb->index], j); + } + #ifdef MODE_AFTER +- last_mode = MODE_AFTER (last_mode, insn); ++ last_mode = MODE_AFTER (e, last_mode, insn); + #endif + /* Update LIVE_NOW. */ + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) +@@ -730,6 +730,14 @@ rest_of_handle_mode_switching (void) + no_new_pseudos = 0; + optimize_mode_switching (NULL); + no_new_pseudos = 1; ++ ++ /* Mode switching can insert instructions that ++ change global registers life data. */ ++#ifdef LIFE_ANALYSIS_AFTER_MODE_SWITCHING ++ if (LIFE_ANALYSIS_AFTER_MODE_SWITCHING) ++ life_analysis (NULL, PROP_REG_INFO); ++#endif ++ + #endif /* OPTIMIZE_MODE_SWITCHING */ + } + +--- gcc/gcc/reg-stack.c 2005-07-14 09:39:54.000000000 +0200 ++++ gcc/gcc/reg-stack.c 2005-07-18 06:14:15.000000000 +0200 +@@ -1579,6 +1579,41 @@ subst_stack_regs_pat (rtx insn, stack re + } + break; + ++ case UNSPEC_VOLATILE: ++ switch (XINT (pat_src, 1)) ++ { ++ int i; ++ ++ case UNSPECV_EFPU: ++ /* There should be no stack registers live ++ at this point. */ ++ gcc_assert (regstack->top == -1); ++ ++ /* Mark all x87 registers as used. */ ++ for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--) ++ { ++ regstack->reg[++regstack->top] = i; ++ SET_HARD_REG_BIT (regstack->reg_set, i); ++ } ++ break; ++ ++ case UNSPECV_EMMS: ++ /* All stack registers should be alive ++ at this point. */ ++ gcc_assert (regstack->top == REG_STACK_SIZE - 1); ++ ++ /* Mark all x87 registers as empty. */ ++ for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--) ++ CLEAR_HARD_REG_BIT (regstack->reg_set, i); ++ ++ regstack->top = -1; ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++ break; ++ + case UNSPEC: + switch (XINT (pat_src, 1)) + { +@@ -2269,6 +2304,25 @@ subst_stack_regs (rtx insn, stack regsta + if (NOTE_P (insn) || INSN_DELETED_P (insn)) + return control_flow_insn_deleted; + ++#ifdef CALL_INSN_SETS ++ if (CALL_P (insn)) ++ { ++ rtx reg = CALL_INSN_SETS (insn); ++ ++ if (reg && STACK_REG_P (reg)) ++ { ++ int count; ++ ++ for (count = hard_regno_nregs[REGNO (reg)][GET_MODE (reg)]; ++ --count >= 0;) ++ { ++ regstack->reg[++regstack->top] = REGNO (reg) + count; ++ SET_HARD_REG_BIT (regstack->reg_set, REGNO (reg) + count); ++ } ++ } ++ } ++#endif ++ + /* If there is a REG_UNUSED note on a stack register on this insn, + the indicated reg must be popped. The REG_UNUSED note is removed, + since the form of the newly emitted pop insn references the reg, +@@ -2544,6 +2598,15 @@ convert_regs_entry (void) + basic_block block = e->dest; + block_info bi = BLOCK_INFO (block); + int reg, top = -1; ++ int numregs = 0; ++ ++ /* Check if all stack registers are live at function entry. ++ This is the case where stack registers are disabled and no ++ register initialization is needed. */ ++ ++ for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg) ++ if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg)) ++ numregs++; + + for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg) + if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg)) +@@ -2552,11 +2615,14 @@ convert_regs_entry (void) + + bi->stack_in.reg[++top] = reg; + +- init = gen_rtx_SET (VOIDmode, +- FP_MODE_REG (FIRST_STACK_REG, SFmode), +- not_a_num); +- insert_insn_on_edge (init, e); +- inserted = 1; ++ if (numregs != REG_STACK_SIZE) ++ { ++ init = gen_rtx_SET (VOIDmode, ++ FP_MODE_REG (FIRST_STACK_REG, SFmode), ++ not_a_num); ++ insert_insn_on_edge (init, e); ++ inserted = 1; ++ } + } + + bi->stack_in.top = top; +@@ -2575,13 +2641,34 @@ convert_regs_exit (void) + stack output_stack; + rtx retvalue; + +- retvalue = stack_result (current_function_decl); + value_reg_low = value_reg_high = -1; +- if (retvalue) ++ ++#ifdef EPILOGUE_USES ++ { ++ int numregs = 0; ++ int i; ++ ++ for (i = FIRST_STACK_REG; i < LAST_STACK_REG + 1; i++) ++ if (EPILOGUE_USES (i)) ++ numregs++; ++ ++ if (numregs) ++ { ++ value_reg_low = FIRST_STACK_REG; ++ value_reg_high = value_reg_low + numregs - 1; ++ } ++ } ++#endif ++ ++ if (value_reg_low < 0) + { +- value_reg_low = REGNO (retvalue); +- value_reg_high = value_reg_low +- + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1; ++ retvalue = stack_result (current_function_decl); ++ if (retvalue) ++ { ++ value_reg_low = REGNO (retvalue); ++ value_reg_high = value_reg_low ++ + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1; ++ } + } + + output_stack = &BLOCK_INFO (EXIT_BLOCK_PTR)->stack_in;