diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/builtins.c gcc-4.1-20050818T1605UTC/gcc/builtins.c --- gcc-4.1-20050818T1605UTC/gcc.orig/builtins.c 2005-08-18 16:22:46.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/builtins.c 2005-08-18 16:25:02.000000000 +0000 @@ -52,6 +52,14 @@ #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN #endif +#ifndef FUNCTION_VALUE_REGNO_P_APPLY_RESULT +#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT FUNCTION_VALUE_REGNO_P +#endif + +#ifndef FUNCTION_ARG_REGNO_P_APPLY_ARGS +#define FUNCTION_ARG_REGNO_P_APPLY_ARGS FUNCTION_ARG_REGNO_P +#endif + /* Define the names of the builtin function types and codes. */ const char *const built_in_class_names[4] = {"NOT_BUILT_IN", "BUILT_IN_FRONTEND", "BUILT_IN_MD", "BUILT_IN_NORMAL"}; @@ -1079,7 +1087,7 @@ size += GET_MODE_SIZE (Pmode); for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (FUNCTION_ARG_REGNO_P (regno)) + if (FUNCTION_ARG_REGNO_P_APPLY_ARGS (regno)) { mode = reg_raw_mode[regno]; @@ -1117,7 +1125,7 @@ size = 0; for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (FUNCTION_VALUE_REGNO_P (regno)) + if (FUNCTION_VALUE_REGNO_P_APPLY_RESULT (regno)) { mode = reg_raw_mode[regno]; diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/caller-save.c gcc-4.1-20050818T1605UTC/gcc/caller-save.c --- gcc-4.1-20050818T1605UTC/gcc.orig/caller-save.c 2005-06-28 08:15:34.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/caller-save.c 2005-08-18 16:25:02.000000000 +0000 @@ -377,6 +377,7 @@ { rtx insn = chain->insn; enum rtx_code code = GET_CODE (insn); + rtx reg ATTRIBUTE_UNUSED; next = chain->next; @@ -450,6 +451,12 @@ CLEAR_HARD_REG_SET (this_insn_sets); note_stores (PATTERN (insn), mark_set_regs, NULL); +#ifdef CALL_INSN_SETS + reg = CALL_INSN_SETS (insn); + + if (reg) + mark_set_regs (reg, NULL_RTX, NULL); +#endif /* Compute which hard regs must be saved before this call. */ AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set); AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets); diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-modes.def gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-modes.def --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-modes.def 2005-06-28 08:16:53.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-modes.def 2005-08-18 16:25:02.000000000 +0000 @@ -62,6 +62,9 @@ CC_MODE (CCFP); CC_MODE (CCFPU); +/* This mode is used to cover all MMX and all x87 registers. */ +RANDOM_MODE (ALLREGS); + /* Vector modes. */ VECTOR_MODES (INT, 4); /* V4QI V2HI */ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-protos.h gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-protos.h --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-protos.h 2005-08-18 16:01:16.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-protos.h 2005-08-18 16:25:02.000000000 +0000 @@ -152,6 +152,9 @@ extern bool ix86_expand_int_vcond (rtx[]); extern int ix86_expand_int_addcc (rtx[]); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); +extern bool ix86_epilogue_uses (int); +extern rtx ix86_call_insn_sets (rtx); +extern rtx ix86_call_insn_uses (rtx); extern void x86_initialize_trampoline (rtx, rtx, rtx); extern rtx ix86_zero_extend_to_Pmode (rtx); extern void ix86_split_long_move (rtx[]); @@ -168,8 +171,8 @@ extern enum machine_mode ix86_fp_compare_mode (enum rtx_code); extern rtx ix86_libcall_value (enum machine_mode); -extern bool ix86_function_value_regno_p (int); -extern bool ix86_function_arg_regno_p (int); +extern bool ix86_function_value_regno_p (int, bool); +extern bool ix86_function_arg_regno_p (int, bool); extern int ix86_function_arg_boundary (enum machine_mode, tree); extern int ix86_return_in_memory (tree); extern void ix86_va_start (tree, rtx); @@ -190,7 +193,10 @@ extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class); extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int); extern int ix86_mode_needed (int, rtx); -extern void emit_i387_cw_initialization (int); +extern int ix86_mode_after (int, int, rtx); +extern int ix86_mode_entry (int); +extern int ix86_mode_exit (int); +extern void ix86_emit_mode_set (int, int); extern bool ix86_fp_jump_nontrivial_p (enum rtx_code); extern void x86_order_regs_for_local_alloc (void); extern void x86_function_profiler (FILE *, int); diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.c gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.c --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.c 2005-08-18 16:01:16.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.c 2005-08-18 16:25:02.000000000 +0000 @@ -2273,12 +2273,13 @@ /* Return true when register may be used to pass function parameters. */ bool -ix86_function_arg_regno_p (int regno) +ix86_function_arg_regno_p (int regno, bool from_builtin) { int i; if (!TARGET_64BIT) return (regno < REGPARM_MAX - || (TARGET_MMX && MMX_REGNO_P (regno) + || (TARGET_MMX && !(TARGET_80387 && from_builtin) + && MMX_REGNO_P (regno) && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) || (TARGET_SSE && SSE_REGNO_P (regno) && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); @@ -3387,14 +3388,14 @@ /* Return true if N is a possible register number of function value. */ bool -ix86_function_value_regno_p (int regno) +ix86_function_value_regno_p (int regno, bool from_builtin) { if (regno == 0 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387) || (regno == FIRST_SSE_REG && TARGET_SSE)) return true; - if (!TARGET_64BIT + if (!TARGET_64BIT && !(TARGET_80387 && from_builtin) && (regno == FIRST_MMX_REG && TARGET_MMX)) return true; @@ -7714,12 +7715,152 @@ return buf; } -/* Return needed mode for entity in optimize_mode_switching pass. */ +/* Return needed mode for entity in optimize_mode_switching pass. + Returned mode should match ix86_mode_entry () for function calls. */ int ix86_mode_needed (int entity, rtx insn) { - enum attr_i387_cw mode; + int unit, mode; + + if (entity == I387_FPU_MODE) + { + /* If a function call uses MMX registers, select MMX FPU mode and + if function call uses x87 registers, select x87 FPU mode. */ + if (CALL_P (insn)) + { + rtx link; + rtx reg; + bool mmx = false; + bool x87 = false; + + for (link = CALL_INSN_FUNCTION_USAGE (insn); + link; + link = XEXP (link, 1)) + { + if (GET_CODE (XEXP (link, 0)) == USE) + { + reg = XEXP (XEXP (link, 0), 0); + + if (reg) + { + if (MMX_REG_P (reg)) + mmx = true; + + if (FP_REG_P (reg)) + x87 = true; + } + } + } + + /* Mixing of x87 and MMX registers is not allowed + in function call. */ + gcc_assert (!mmx || !x87); + + if (mmx) + return FPU_MODE_MMX; + + /* Fall back to default mode. */ + return FPU_MODE_X87; + } + + /* Parse ASM operands to check input and output constraints. If + an ASM uses MMX registers, select MMX mode and if it uses x87 + registers, select x87 mode. Mixing of MMX and x87 constraints + is not allowed. If no MMX or x87 input and output registers + are used, switch to default mode. */ + if (NONJUMP_INSN_P (insn)) + { + rtx pat = PATTERN (insn); + int noperands = asm_noperands (pat); + + if (noperands >= 0) + { + const char **constraints; + int i; + bool mmx = false; + bool x87 = false; + + constraints = alloca (noperands * sizeof (char *)); + decode_asm_operands (pat, NULL, NULL, constraints, NULL); + + for (i = 0; i < noperands; i++) + { + const char *c = constraints[i]; + enum reg_class class; + + if (c[0] == '%') + c++; + if (ISDIGIT ((unsigned char) c[0]) && c[1] == '\0') + c = constraints[c[0] - '0']; + + while (*c) + { + char cc = *c; + int len; + switch (cc) + { + case ',': + cc++; + continue; + case '=': + case '+': + case '*': + case '%': + case '!': + case '#': + case '&': + case '?': + break; + + default: + class = REG_CLASS_FROM_LETTER (cc); + + if (MMX_CLASS_P (class)) + mmx = true; + + if (FLOAT_CLASS_P (class)) + x87 = true; + } + + len = CONSTRAINT_LEN (cc, c); + do + c++; + while (--len && *c); + } + } + + /* Mixing x87 and MMX registers in ASM is not allowed. */ + if (mmx && x87) + error_for_asm (insn, "mixing of x87 and MMX registers " + "is not allowed in %"); + + if (mmx) + return FPU_MODE_MMX; + + /* Fall back to default mode. */ + return FPU_MODE_X87; + } + } + + if (recog_memoized (insn) < 0) + return FPU_MODE_ANY; + + unit = get_attr_unit (insn); + + switch (unit) + { + case UNIT_MMX: + return FPU_MODE_MMX; + + case UNIT_I387: + return FPU_MODE_X87; + + default: + return FPU_MODE_ANY; + + } + } /* The mode UNINITIALIZED is used to store control word after a function call or ASM pattern. The mode ANY specify that function @@ -7766,21 +7907,132 @@ return I387_CW_ANY; } -/* Output code to initialize control word copies used by trunc?f?i and - rounding patterns. CURRENT_MODE is set to current control word, - while NEW_MODE is set to new control word. */ + +/* Switch FPU mode to appropriate mode after function call in + optimize_mode_switchig pass. Returned mode should match + ix86_mode_exit (). */ + +int +ix86_mode_after (int entity, int mode, rtx insn) +{ + if (entity == I387_FPU_MODE) + { + /* Switch FPU to MMX mode after funciton call if function value + is returned in MMX register and similar for x87 reg. + If no value is returned in MMX or x87 reg, fall back to + default mode. */ + if (CALL_P (insn)) + { + rtx reg = SET_DEST (PATTERN (insn)); + + int new_mode; + + if (reg && MMX_REG_P (reg)) + new_mode = FPU_MODE_MMX; + else + new_mode = FPU_MODE_X87; + + /* Call insn should never operate in FPU_MODE_ANY. */ + if ((mode != FPU_MODE_ANY) && (new_mode != mode)) + ix86_fpu_mode_changed = 1; + + return new_mode; + } + } + + return mode; +} + +/* Switch FPU mode of function entry to appropriate mode in + optimize_mode_switchig pass. Returned mode should match + ix86_mode_needed () for function calls. */ + +int +ix86_mode_entry (int entity) +{ + if (entity == I387_FPU_MODE) + { + if (! current_function_args_info.maybe_vaarg) + { + if (current_function_args_info.mmx_nregs != MMX_REGPARM_MAX) + return FPU_MODE_MMX; + + /* ??? Handle x87 registers for fpregparm. */ + } + + /* Fall back to default mode. */ + return FPU_MODE_X87; + } + + return I387_CW_ANY; +} + +/* Switch FPU mode of function exit to appropriate mode in + optimize_mode_switchig pass. Returned mode should match + ix86_mode_after () for function calls. */ + +int +ix86_mode_exit (int entity) +{ + if (entity == I387_FPU_MODE) + { + rtx reg = current_function_return_rtx; + + /* If MMX output register is specified, switch FPU mode + of function exit to MMX mode. */ + if (reg && MMX_REG_P (reg)) + return FPU_MODE_MMX; + + /* Fall back to default mode. */ + return FPU_MODE_X87; + } + + return I387_CW_ANY; +} + +/* Emit mode switching instructions in optimize_mode_switching pass. */ void -emit_i387_cw_initialization (int mode) +ix86_emit_mode_set (int entity, int mode) { - rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); - rtx new_mode; + rtx stored_mode, new_mode; + rtx reg; int slot; - rtx reg = gen_reg_rtx (HImode); + if (entity == I387_FPU_MODE) + { + switch (mode) + { + case FPU_MODE_ANY: + return; + + case FPU_MODE_X87: + emit_insn (gen_emms ()); + ix86_fpu_mode_changed = 1; + return; + + case FPU_MODE_MMX: + emit_insn (gen_efpu ()); + ix86_fpu_mode_changed = 1; + return; + + default: + gcc_unreachable (); + } + } + + /* Output code to initialize control word copies used by trunc?f?i + and rounding patterns. STORED_MODE is set to current control + word, while NEW_MODE is set to new control word. */ + if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY)) + return; + + stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); emit_insn (gen_x86_fnstcw_1 (stored_mode)); + + reg = gen_reg_rtx (HImode); emit_move_insn (reg, stored_mode); if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size) @@ -12598,6 +12850,7 @@ f = ggc_alloc_cleared (sizeof (struct machine_function)); f->use_fast_prologue_epilogue_nregs = -1; + f->optimize_mode_switching[I387_FPU_MODE] = TARGET_80387 && TARGET_MMX; return f; } @@ -13196,7 +13449,77 @@ } return align; } + + +/* Return true to prevent register allocator from allocating registers + from the unit that is not active. */ + +bool +ix86_epilogue_uses (int regno) +{ + int mode; + + if (! ix86_fpu_mode_changed) + return false; + + mode = ix86_mode_exit (I387_FPU_MODE); + + if (mode == FPU_MODE_MMX) + return FP_REGNO_P (regno); + else + return MMX_REGNO_P (regno); +} + +/* Return RTX code of additional register that CALL_INSN uses. + This function is used to maintain correct register life + information before CALL_INSN in case of MMX/x87 switching. */ + +rtx +ix86_call_insn_uses (rtx insn) +{ + int mode; + + if (! ix86_fpu_mode_changed) + return NULL_RTX; + + gcc_assert (CALL_P (insn)); + + mode = ix86_mode_needed (I387_FPU_MODE, insn); + if (mode == FPU_MODE_MMX) + return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG); + else + return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG); + + return NULL_RTX; +} + +/* Return RTX code of additional register that CALL_INSN sets. + This function is used to maintain correct register life + information after CALL_INSN in case of MMX/x87 switching. */ + +rtx +ix86_call_insn_sets (rtx insn) +{ + int mode; + + if (! ix86_fpu_mode_changed) + return NULL_RTX; + + gcc_assert (CALL_P (insn)); + + /* Current mode in call to ix86_mode_after is set to FPU_MODE_ANY + to prevent setting of ix86_fpu_mode_changed variable. */ + mode = ix86_mode_after (I387_FPU_MODE, FPU_MODE_ANY, insn); + if (mode == FPU_MODE_MMX) + return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG); + else + return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG); + + return NULL_RTX; +} + + /* Emit RTL insns to initialize the variable parts of a trampoline. FNADDR is an RTX for the address of the function's pure code. CXT is an RTX for the static chain value for the function. */ @@ -13676,9 +13999,11 @@ IX86_BUILTIN_MONITOR, IX86_BUILTIN_MWAIT, + IX86_BUILTIN_VEC_INIT_V2SF, IX86_BUILTIN_VEC_INIT_V2SI, IX86_BUILTIN_VEC_INIT_V4HI, IX86_BUILTIN_VEC_INIT_V8QI, + IX86_BUILTIN_VEC_EXT_V2SF, IX86_BUILTIN_VEC_EXT_V2DF, IX86_BUILTIN_VEC_EXT_V2DI, IX86_BUILTIN_VEC_EXT_V4SF, @@ -13860,24 +14185,24 @@ { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 }, { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 }, + { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 }, + { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 }, + { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, + { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 }, + { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 }, + { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 }, + { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, - { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, + { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 }, + { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 }, + { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 }, + { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 }, { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 }, { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 }, @@ -14642,6 +14967,11 @@ v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU); /* Access to the vec_init patterns. */ + ftype = build_function_type_list (V2SF_type_node, float_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_3DNOW, "__builtin_ia32_vec_init_v2sf", + ftype, IX86_BUILTIN_VEC_INIT_V2SF); + ftype = build_function_type_list (V2SI_type_node, integer_type_node, integer_type_node, NULL_TREE); def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si", @@ -14663,6 +14993,11 @@ ftype, IX86_BUILTIN_VEC_INIT_V8QI); /* Access to the vec_extract patterns. */ + ftype = build_function_type_list (float_type_node, V2SF_type_node, + integer_type_node, NULL_TREE); + def_builtin (MASK_3DNOW, "__builtin_ia32_vec_ext_v2sf", + ftype, IX86_BUILTIN_VEC_EXT_V2DF); + ftype = build_function_type_list (double_type_node, V2DF_type_node, integer_type_node, NULL_TREE); def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df", @@ -15137,7 +15472,7 @@ switch (fcode) { case IX86_BUILTIN_EMMS: - emit_insn (gen_mmx_emms ()); + /* emms insn is emitted automatically. */ return 0; case IX86_BUILTIN_SFENCE: @@ -15354,7 +15689,7 @@ return target; case IX86_BUILTIN_FEMMS: - emit_insn (gen_mmx_femms ()); + /* femms insn is emitted automatically. */ return NULL_RTX; case IX86_BUILTIN_PAVGUSB: @@ -15500,11 +15835,13 @@ return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist, target, 1); + case IX86_BUILTIN_VEC_INIT_V2SF: case IX86_BUILTIN_VEC_INIT_V2SI: case IX86_BUILTIN_VEC_INIT_V4HI: case IX86_BUILTIN_VEC_INIT_V8QI: return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); + case IX86_BUILTIN_VEC_EXT_V2SF: case IX86_BUILTIN_VEC_EXT_V2DF: case IX86_BUILTIN_VEC_EXT_V2DI: case IX86_BUILTIN_VEC_EXT_V4SF: diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.h gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.h --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.h 2005-08-18 16:01:16.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.h 2005-08-18 16:25:02.000000000 +0000 @@ -819,7 +819,9 @@ #define HARD_REGNO_NREGS(REGNO, MODE) \ (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \ - ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \ + ? ((MODE) == ALLREGSmode \ + ? 8 \ + : (COMPLEX_MODE_P (MODE) ? 2 : 1)) \ : ((MODE) == XFmode \ ? (TARGET_64BIT ? 2 : 3) \ : (MODE) == XCmode \ @@ -841,9 +843,8 @@ ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \ || (MODE) == V2SImode || (MODE) == SImode) -/* ??? No autovectorization into MMX or 3DNOW until we can reliably - place emms and femms instructions. */ -#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD) +#define UNITS_PER_SIMD_WORD \ + (TARGET_SSE ? 16 : TARGET_MMX ? 8 : UNITS_PER_WORD) #define VALID_FP_MODE_P(MODE) \ ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \ @@ -1436,8 +1437,16 @@ #define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) \ ix86_return_pops_args ((FUNDECL), (FUNTYPE), (SIZE)) +/* 1 if N is the number of a register in which the values of + called function may come back. */ #define FUNCTION_VALUE_REGNO_P(N) \ - ix86_function_value_regno_p (N) + ix86_function_value_regno_p ((N), false) + +/* 1 if N is the number of a register in which the value of + __builtin_return builtin function may come back. */ + +#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT(N) \ + ix86_function_value_regno_p ((N), true) /* Define how to find the value returned by a library function assuming the value has mode MODE. */ @@ -1452,7 +1461,13 @@ #define APPLY_RESULT_SIZE (8+108) /* 1 if N is a possible register number for function argument passing. */ -#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N) +#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p ((N), false) + +/* 1 if N is a possible register number for function argument passing + from __builtin_apply_args and __builtin_apply builtin functions. */ + +#define FUNCTION_ARG_REGNO_P_APPLY_ARGS(N) \ + ix86_function_arg_regno_p ((N), true) /* Define a data type for recording info about an argument list during the scan of that argument list. This data type should @@ -1534,6 +1549,23 @@ #define EXIT_IGNORE_STACK 1 +/* Define this macro as a C expression that is nonzero for registers + that are used by the epilogue or the return' pattern. The stack + and frame pointer registers are already be assumed to be used as + needed. */ + +#define EPILOGUE_USES(REGNO) ix86_epilogue_uses (REGNO) + +/* Define this macro as a C expression that returns RTL expression of + additional hard register set by call_insn. */ + +#define CALL_INSN_SETS(INSN) ix86_call_insn_sets (INSN) + +/* Define this macro as a C expression that returns RTL expression of + additional hard register used by call_insn. */ + +#define CALL_INSN_USES(INSN) ix86_call_insn_uses (INSN) + /* Output assembler code for a block containing the constant parts of a trampoline, leaving space for the variable parts. */ @@ -2169,6 +2201,10 @@ extern rtx ix86_compare_op1; /* operand 1 for comparisons */ extern rtx ix86_compare_emitted; + +/* x87 FPU modes for x87/MMX switching. */ +enum ix86_fpu_mode { FPU_MODE_X87, FPU_MODE_MMX, FPU_MODE_ANY }; + /* To properly truncate FP values into integers, we need to set i387 control word. We can't emit proper mode switching code before reload, as spills generated by reload may truncate values incorrectly, but we still can avoid @@ -2190,6 +2226,7 @@ I387_FLOOR, I387_CEIL, I387_MASK_PM, + I387_FPU_MODE, MAX_386_ENTITIES }; @@ -2219,7 +2256,12 @@ refer to the mode-switched entity in question. */ #define NUM_MODES_FOR_MODE_SWITCHING \ - { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY } + { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, FPU_MODE_ANY } + +/* Define this macro if the port needs extra register life analysis + after mode switching. */ + +#define LIFE_ANALYSIS_AFTER_MODE_SWITCHING ix86_fpu_mode_changed /* ENTITY is an integer specifying a mode-switched entity. If `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to @@ -2229,6 +2271,22 @@ #define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I)) +/* This macro determines the mode that an INSN results in (if different + from the incoming mode). */ + +#define MODE_AFTER(ENTITY, MODE, I) \ + ix86_mode_after ((ENTITY), (MODE), (I)) + +/* This macro specifies a mode that ENTITY is assumed to be + switched to at function entry. */ + +#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY) + +/* This macro specifies a mode that ENTITY is assumed to be + switched to at function exit. */ + +#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY) + /* This macro specifies the order in which modes for ENTITY are processed. 0 is the highest priority. */ @@ -2238,10 +2296,8 @@ is the set of hard registers live at the point where the insn(s) are to be inserted. */ -#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ - ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \ - ? emit_i387_cw_initialization (MODE), 0 \ - : 0) +#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \ + ix86_emit_mode_set ((ENTITY), (MODE)) /* Avoid renaming of stack registers, as doing so in combination with @@ -2265,6 +2321,7 @@ int save_varrargs_registers; int accesses_prev_frame; int optimize_mode_switching[MAX_386_ENTITIES]; + int fpu_mode_changed; /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to determine the style used. */ int use_fast_prologue_epilogue; @@ -2276,6 +2333,7 @@ #define ix86_stack_locals (cfun->machine->stack_locals) #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers) #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching) +#define ix86_fpu_mode_changed (cfun->machine->fpu_mode_changed) /* Control behavior of x86_file_start. */ #define X86_FILE_START_VERSION_DIRECTIVE false diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.md gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.md --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.md 2005-08-18 16:01:16.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.md 2005-08-18 16:25:02.000000000 +0000 @@ -152,7 +152,7 @@ (UNSPECV_EMMS 2) (UNSPECV_LDMXCSR 3) (UNSPECV_STMXCSR 4) - (UNSPECV_FEMMS 5) + (UNSPECV_EFPU 5) (UNSPECV_CLFLUSH 6) (UNSPECV_ALIGN 7) (UNSPECV_MONITOR 8) @@ -167,9 +167,11 @@ (define_constants [(BP_REG 6) (SP_REG 7) + (FIRSTFP_REG 8) (FLAGS_REG 17) (FPSR_REG 18) (DIRFLAG_REG 19) + (FIRSTMMX_REG 29) ]) ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mm3dnow.h gcc-4.1-20050818T1605UTC/gcc/config/i386/mm3dnow.h --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mm3dnow.h 2005-06-28 08:16:53.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/mm3dnow.h 2005-08-18 16:25:02.000000000 +0000 @@ -172,14 +172,13 @@ static __inline __m64 _m_from_float (float __A) { - return (__m64)(__v2sf){ __A, 0 }; + return (__m64) __builtin_ia32_vec_init_v2sf (__A, 0); } static __inline float _m_to_float (__m64 __A) { - union { __v2sf v; float a[2]; } __tmp = { (__v2sf)__A }; - return __tmp.a[0]; + return __builtin_ia32_vec_ext_v2sf ((__v2sf)__A, 0); } #ifdef __3dNOW_A__ diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mmx.md gcc-4.1-20050818T1605UTC/gcc/config/i386/mmx.md --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mmx.md 2005-06-28 08:16:53.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/mmx.md 2005-08-18 16:25:02.000000000 +0000 @@ -23,14 +23,6 @@ ;; the same register file, and 3dNOW! adds a number of extensions to ;; the base integer MMX isa. -;; Note! Except for the basic move instructions, *all* of these -;; patterns are outside the normal optabs namespace. This is because -;; use of these registers requires the insertion of emms or femms -;; instructions to return to normal fpu mode. The compiler doesn't -;; know how to do that itself, which means it's up to the user. Which -;; means that we should never use any of these patterns except at the -;; direction of the user via a builtin. - ;; 8 byte integral modes handled by MMX (and by extension, SSE) (define_mode_macro MMXMODEI [V8QI V4HI V2SI]) @@ -481,7 +473,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_set (false, operands[0], operands[1], + ix86_expand_vector_set (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -537,7 +529,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_extract (false, operands[0], operands[1], + ix86_expand_vector_extract (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -547,7 +539,7 @@ (match_operand 1 "" "")] "TARGET_SSE" { - ix86_expand_vector_init (false, operands[0], operands[1]); + ix86_expand_vector_init (true, operands[0], operands[1]); DONE; }) @@ -557,6 +549,21 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_expand "neg2" + [(set (match_operand:MMXMODEI 0 "register_operand" "") + (minus:MMXMODEI + (match_dup 2) + (match_operand:MMXMODEI 1 "nonimmediate_operand" "")))] + "TARGET_MMX" + "operands[2] = force_reg (mode, CONST0_RTX (mode));") + +(define_expand "add3" + [(set (match_operand:MMXMODEI 0 "register_operand" "") + (plus:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (PLUS, mode, operands);") + (define_insn "mmx_add3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") (plus:MMXMODEI @@ -598,6 +605,13 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_expand "sub3" + [(set (match_operand:MMXMODEI 0 "register_operand" "") + (minus:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (MINUS, mode, operands);") + (define_insn "mmx_sub3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") (minus:MMXMODEI @@ -639,6 +653,13 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_expand "mulv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "") + (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "") + (match_operand:V4HI 2 "nonimmediate_operand" "")))] + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") + (define_insn "mmx_mulv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") @@ -735,6 +756,13 @@ [(set_attr "type" "mmxmul") (set_attr "mode" "DI")]) +(define_expand "umaxv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "") + (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "") + (match_operand:V8QI 2 "nonimmediate_operand" "")))] + "(TARGET_SSE || TARGET_3DNOW_A)" + "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);") + (define_insn "mmx_umaxv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") @@ -745,6 +773,13 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_expand "smaxv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "") + (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "") + (match_operand:V4HI 2 "nonimmediate_operand" "")))] + "(TARGET_SSE || TARGET_3DNOW_A)" + "ix86_fixup_binary_operands_no_copy (SMAX, V4HImode, operands);") + (define_insn "mmx_smaxv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") @@ -755,6 +790,13 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_expand "uminv8qi3" + [(set (match_operand:V8QI 0 "register_operand" "") + (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "") + (match_operand:V8QI 2 "nonimmediate_operand" "")))] + "(TARGET_SSE || TARGET_3DNOW_A)" + "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);") + (define_insn "mmx_uminv8qi3" [(set (match_operand:V8QI 0 "register_operand" "=y") (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0") @@ -765,6 +807,13 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_expand "sminv4hi3" + [(set (match_operand:V4HI 0 "register_operand" "") + (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "") + (match_operand:V4HI 2 "nonimmediate_operand" "")))] + "(TARGET_SSE || TARGET_3DNOW_A)" + "ix86_fixup_binary_operands_no_copy (SMIN, V4HImode, operands);") + (define_insn "mmx_sminv4hi3" [(set (match_operand:V4HI 0 "register_operand" "=y") (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0") @@ -775,7 +824,7 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) -(define_insn "mmx_ashr3" +(define_insn "ashr3" [(set (match_operand:MMXMODE24 0 "register_operand" "=y") (ashiftrt:MMXMODE24 (match_operand:MMXMODE24 1 "register_operand" "0") @@ -785,7 +834,7 @@ [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_lshr3" +(define_insn "lshr3" [(set (match_operand:MMXMODE24 0 "register_operand" "=y") (lshiftrt:MMXMODE24 (match_operand:MMXMODE24 1 "register_operand" "0") @@ -806,7 +855,7 @@ [(set_attr "type" "mmxshft") (set_attr "mode" "DI")]) -(define_insn "mmx_ashl3" +(define_insn "ashl3" [(set (match_operand:MMXMODE24 0 "register_operand" "=y") (ashift:MMXMODE24 (match_operand:MMXMODE24 1 "register_operand" "0") @@ -853,12 +902,66 @@ [(set_attr "type" "mmxcmp") (set_attr "mode" "DI")]) +(define_expand "vcond" + [(set (match_operand:MMXMODE12 0 "register_operand" "") + (if_then_else:MMXMODE12 + (match_operator 3 "" + [(match_operand:MMXMODE12 4 "nonimmediate_operand" "") + (match_operand:MMXMODE12 5 "nonimmediate_operand" "")]) + (match_operand:MMXMODE12 1 "general_operand" "") + (match_operand:MMXMODE12 2 "general_operand" "")))] + "TARGET_MMX" +{ + if (ix86_expand_int_vcond (operands)) + DONE; + else + FAIL; +}) + +(define_expand "vconduv8qi" + [(set (match_operand:V8QI 0 "register_operand" "") + (if_then_else:V8QI + (match_operator 3 "" + [(match_operand:V8QI 4 "nonimmediate_operand" "") + (match_operand:V8QI 5 "nonimmediate_operand" "")]) + (match_operand:V8QI 1 "general_operand" "") + (match_operand:V8QI 2 "general_operand" "")))] + "TARGET_MMX" +{ + if (ix86_expand_int_vcond (operands)) + DONE; + else + FAIL; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Parallel integral logical operations ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_expand "one_cmpl2" + [(set (match_operand:MMXMODEI 0 "register_operand" "") + (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") + (match_dup 2)))] + "TARGET_MMX" +{ + int i, n = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (n); + + for (i = 0; i < n; ++i) + RTVEC_ELT (v, i) = constm1_rtx; + + operands[2] = force_reg (mode, gen_rtx_CONST_VECTOR (mode, v)); +}) + +(define_expand "and3" + [(set (match_operand:MMXMODEI 0 "register_operand" "") + (and:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (AND, mode, operands);") + (define_insn "mmx_and3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") (and:MMXMODEI @@ -879,6 +982,13 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_expand "ior3" + [(set (match_operand:MMXMODEI 0 "register_operand" "") + (ior:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (IOR, mode, operands);") + (define_insn "mmx_ior3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") (ior:MMXMODEI @@ -889,6 +999,13 @@ [(set_attr "type" "mmxadd") (set_attr "mode" "DI")]) +(define_expand "xor3" + [(set (match_operand:MMXMODEI 0 "register_operand" "") + (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "") + (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))] + "TARGET_MMX" + "ix86_fixup_binary_operands_no_copy (XOR, mode, operands);") + (define_insn "mmx_xor3" [(set (match_operand:MMXMODEI 0 "register_operand" "=y") (xor:MMXMODEI @@ -1147,7 +1264,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_set (false, operands[0], operands[1], + ix86_expand_vector_set (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -1205,7 +1322,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_extract (false, operands[0], operands[1], + ix86_expand_vector_extract (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -1215,7 +1332,7 @@ (match_operand 1 "" "")] "TARGET_SSE" { - ix86_expand_vector_init (false, operands[0], operands[1]); + ix86_expand_vector_init (true, operands[0], operands[1]); DONE; }) @@ -1225,7 +1342,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_set (false, operands[0], operands[1], + ix86_expand_vector_set (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -1236,7 +1353,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_extract (false, operands[0], operands[1], + ix86_expand_vector_extract (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -1246,7 +1363,7 @@ (match_operand 1 "" "")] "TARGET_SSE" { - ix86_expand_vector_init (false, operands[0], operands[1]); + ix86_expand_vector_init (true, operands[0], operands[1]); DONE; }) @@ -1256,7 +1373,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_set (false, operands[0], operands[1], + ix86_expand_vector_set (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -1267,7 +1384,7 @@ (match_operand 2 "const_int_operand" "")] "TARGET_MMX" { - ix86_expand_vector_extract (false, operands[0], operands[1], + ix86_expand_vector_extract (true, operands[0], operands[1], INTVAL (operands[2])); DONE; }) @@ -1277,7 +1394,7 @@ (match_operand 1 "" "")] "TARGET_SSE" { - ix86_expand_vector_init (false, operands[0], operands[1]); + ix86_expand_vector_init (true, operands[0], operands[1]); DONE; }) @@ -1386,48 +1503,20 @@ [(set_attr "type" "mmxcvt") (set_attr "mode" "DI")]) -(define_insn "mmx_emms" - [(unspec_volatile [(const_int 0)] UNSPECV_EMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_MMX" - "emms" - [(set_attr "type" "mmx") - (set_attr "memory" "unknown")]) +(define_insn "efpu" + [(set (reg:ALLREGS FIRSTFP_REG) + (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTMMX_REG)] + UNSPECV_EFPU))] + "TARGET_80387 && TARGET_MMX" + "" + [(set_attr "length" "0")]) + +(define_insn "emms" + [(set (reg:ALLREGS FIRSTMMX_REG) + (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTFP_REG)] + UNSPECV_EMMS))] + "TARGET_80387 && TARGET_MMX" +{ + return TARGET_3DNOW ? "femms" : "emms"; +}) -(define_insn "mmx_femms" - [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS) - (clobber (reg:XF 8)) - (clobber (reg:XF 9)) - (clobber (reg:XF 10)) - (clobber (reg:XF 11)) - (clobber (reg:XF 12)) - (clobber (reg:XF 13)) - (clobber (reg:XF 14)) - (clobber (reg:XF 15)) - (clobber (reg:DI 29)) - (clobber (reg:DI 30)) - (clobber (reg:DI 31)) - (clobber (reg:DI 32)) - (clobber (reg:DI 33)) - (clobber (reg:DI 34)) - (clobber (reg:DI 35)) - (clobber (reg:DI 36))] - "TARGET_3DNOW" - "femms" - [(set_attr "type" "mmx") - (set_attr "memory" "none")]) diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/sse.md gcc-4.1-20050818T1605UTC/gcc/config/i386/sse.md --- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/sse.md 2005-08-18 16:01:16.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/i386/sse.md 2005-08-18 16:25:02.000000000 +0000 @@ -881,6 +881,7 @@ "TARGET_SSE" "cvtpi2ps\t{%2, %0|%0, %2}" [(set_attr "type" "ssecvt") + (set_attr "unit" "mmx") (set_attr "mode" "V4SF")]) (define_insn "sse_cvtps2pi" @@ -3508,6 +3509,7 @@ movhps\t{%2, %0|%0, %2} movlps\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") + (set_attr "unit" "*,mmx,*,*,*,*") (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) (define_expand "vec_setv2di" diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/sh/sh.h gcc-4.1-20050818T1605UTC/gcc/config/sh/sh.h --- gcc-4.1-20050818T1605UTC/gcc.orig/config/sh/sh.h 2005-08-18 16:01:22.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/config/sh/sh.h 2005-08-18 16:25:02.000000000 +0000 @@ -3311,7 +3311,7 @@ ? get_attr_fp_mode (INSN) \ : FP_MODE_NONE) -#define MODE_AFTER(MODE, INSN) \ +#define MODE_AFTER(ENTITY, MODE, INSN) \ (TARGET_HITACHI \ && recog_memoized (INSN) >= 0 \ && get_attr_fp_set (INSN) != FP_SET_NONE \ diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/doc/tm.texi gcc-4.1-20050818T1605UTC/gcc/doc/tm.texi --- gcc-4.1-20050818T1605UTC/gcc.orig/doc/tm.texi 2005-08-18 16:01:28.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/doc/tm.texi 2005-08-18 16:25:02.000000000 +0000 @@ -4227,6 +4227,16 @@ compiler knows this regardless of @code{EXIT_IGNORE_STACK}. @end defmac +@defmac CALL_INSN_SETS (@var{INSN}) +Define this macro as a C expression that returns RTL expression of +additional hard register set by call_insn. +@end defmac + +@defmac CALL_INSN_USES (@var{INSN}) +Define this macro as a C expression that returns RTL expression of +additional hard register used by call_insn. +@end defmac + @defmac EPILOGUE_USES (@var{regno}) Define this macro as a C expression that is nonzero for registers that are used by the epilogue or the @samp{return} pattern. The stack and frame @@ -8376,6 +8386,13 @@ switch is needed / supplied. @end defmac +@defmac LIFE_ANALYSIS_AFTER_MODE_SWITCHING +Define this macro if the port needs extra register life analysis after +mode switching. This macro should be defined if mode switching inserts +instructions that change global registers to maintain consistent global +register life information. +@end defmac + @defmac MODE_NEEDED (@var{entity}, @var{insn}) @var{entity} is an integer specifying a mode-switched entity. If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro to @@ -8384,9 +8401,9 @@ be switched into prior to the execution of @var{insn}. @end defmac -@defmac MODE_AFTER (@var{mode}, @var{insn}) -If this macro is defined, it is evaluated for every @var{insn} during -mode switching. It determines the mode that an insn results in (if +@defmac MODE_AFTER (@var{entity}, @var{mode}, @var{insn}) +If this macro is defined, it is evaluated for every @var{entity} that needs +mode switching. It determines the mode that an @var{insn} results in (if different from the incoming mode). @end defmac diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/flow.c gcc-4.1-20050818T1605UTC/gcc/flow.c --- gcc-4.1-20050818T1605UTC/gcc.orig/flow.c 2005-08-18 16:00:40.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/flow.c 2005-08-18 16:25:02.000000000 +0000 @@ -1830,10 +1830,11 @@ { regset live_at_end; bool sibcall_p; - rtx note, cond; + rtx note; + rtx cond = NULL_RTX; + rtx reg ATTRIBUTE_UNUSED; int i; - cond = NULL_RTX; if (GET_CODE (PATTERN (insn)) == COND_EXEC) cond = COND_EXEC_TEST (PATTERN (insn)); @@ -1856,6 +1857,13 @@ mark_set_1 (pbi, CLOBBER, XEXP (XEXP (note, 0), 0), cond, insn, pbi->flags); +#ifdef CALL_INSN_SETS + reg = CALL_INSN_SETS (insn); + + if (reg) + mark_set_1 (pbi, SET, reg, cond, insn, pbi->flags); +#endif + /* Calls change all call-used and global registers; sibcalls do not clobber anything that must be preserved at end-of-function, except for return values. */ @@ -1894,10 +1902,11 @@ if (! insn_is_dead && CALL_P (insn)) { + rtx note; + rtx cond = NULL_RTX; + rtx reg ATTRIBUTE_UNUSED; int i; - rtx note, cond; - cond = NULL_RTX; if (GET_CODE (PATTERN (insn)) == COND_EXEC) cond = COND_EXEC_TEST (PATTERN (insn)); @@ -1910,6 +1919,13 @@ of which mark_used_regs knows how to handle. */ mark_used_regs (pbi, XEXP (XEXP (note, 0), 0), cond, insn); +#ifdef CALL_INSN_USES + reg = CALL_INSN_USES (insn); + + if (reg) + mark_used_reg (pbi, reg, cond, insn); +#endif + /* The stack ptr is used (honorarily) by a CALL insn. */ if ((flags & PROP_REG_INFO) && !REGNO_REG_SET_P (pbi->reg_live, STACK_POINTER_REGNUM)) diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/mode-switching.c gcc-4.1-20050818T1605UTC/gcc/mode-switching.c --- gcc-4.1-20050818T1605UTC/gcc.orig/mode-switching.c 2005-08-18 16:00:42.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/mode-switching.c 2005-08-18 16:25:02.000000000 +0000 @@ -473,7 +473,7 @@ RESET_BIT (transp[bb->index], j); } #ifdef MODE_AFTER - last_mode = MODE_AFTER (last_mode, insn); + last_mode = MODE_AFTER (e, last_mode, insn); #endif /* Update LIVE_NOW. */ for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) @@ -730,6 +730,14 @@ no_new_pseudos = 0; optimize_mode_switching (NULL); no_new_pseudos = 1; + + /* Mode switching can insert instructions that + change global registers life data. */ +#ifdef LIFE_ANALYSIS_AFTER_MODE_SWITCHING + if (LIFE_ANALYSIS_AFTER_MODE_SWITCHING) + life_analysis (NULL, PROP_REG_INFO); +#endif + #endif /* OPTIMIZE_MODE_SWITCHING */ } diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/reg-stack.c gcc-4.1-20050818T1605UTC/gcc/reg-stack.c --- gcc-4.1-20050818T1605UTC/gcc.orig/reg-stack.c 2005-08-18 16:00:43.000000000 +0000 +++ gcc-4.1-20050818T1605UTC/gcc/reg-stack.c 2005-08-18 16:25:02.000000000 +0000 @@ -1579,6 +1579,41 @@ } break; + case UNSPEC_VOLATILE: + switch (XINT (pat_src, 1)) + { + int i; + + case UNSPECV_EFPU: + /* There should be no stack registers live + at this point. */ + gcc_assert (regstack->top == -1); + + /* Mark all x87 registers as used. */ + for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--) + { + regstack->reg[++regstack->top] = i; + SET_HARD_REG_BIT (regstack->reg_set, i); + } + break; + + case UNSPECV_EMMS: + /* All stack registers should be alive + at this point. */ + gcc_assert (regstack->top == REG_STACK_SIZE - 1); + + /* Mark all x87 registers as empty. */ + for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--) + CLEAR_HARD_REG_BIT (regstack->reg_set, i); + + regstack->top = -1; + break; + + default: + gcc_unreachable (); + } + break; + case UNSPEC: switch (XINT (pat_src, 1)) { @@ -2269,6 +2304,25 @@ if (NOTE_P (insn) || INSN_DELETED_P (insn)) return control_flow_insn_deleted; +#ifdef CALL_INSN_SETS + if (CALL_P (insn)) + { + rtx reg = CALL_INSN_SETS (insn); + + if (reg && STACK_REG_P (reg)) + { + int count; + + for (count = hard_regno_nregs[REGNO (reg)][GET_MODE (reg)]; + --count >= 0;) + { + regstack->reg[++regstack->top] = REGNO (reg) + count; + SET_HARD_REG_BIT (regstack->reg_set, REGNO (reg) + count); + } + } + } +#endif + /* If there is a REG_UNUSED note on a stack register on this insn, the indicated reg must be popped. The REG_UNUSED note is removed, since the form of the newly emitted pop insn references the reg, @@ -2544,6 +2598,15 @@ basic_block block = e->dest; block_info bi = BLOCK_INFO (block); int reg, top = -1; + int numregs = 0; + + /* Check if all stack registers are live at function entry. + This is the case where stack registers are disabled and no + register initialization is needed. */ + + for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg) + if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg)) + numregs++; for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg) if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg)) @@ -2552,11 +2615,14 @@ bi->stack_in.reg[++top] = reg; - init = gen_rtx_SET (VOIDmode, - FP_MODE_REG (FIRST_STACK_REG, SFmode), - not_a_num); - insert_insn_on_edge (init, e); - inserted = 1; + if (numregs != REG_STACK_SIZE) + { + init = gen_rtx_SET (VOIDmode, + FP_MODE_REG (FIRST_STACK_REG, SFmode), + not_a_num); + insert_insn_on_edge (init, e); + inserted = 1; + } } bi->stack_in.top = top; @@ -2575,13 +2641,34 @@ stack output_stack; rtx retvalue; - retvalue = stack_result (current_function_decl); value_reg_low = value_reg_high = -1; - if (retvalue) + +#ifdef EPILOGUE_USES + { + int numregs = 0; + int i; + + for (i = FIRST_STACK_REG; i < LAST_STACK_REG + 1; i++) + if (EPILOGUE_USES (i)) + numregs++; + + if (numregs) + { + value_reg_low = FIRST_STACK_REG; + value_reg_high = value_reg_low + numregs - 1; + } + } +#endif + + if (value_reg_low < 0) { - value_reg_low = REGNO (retvalue); - value_reg_high = value_reg_low - + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1; + retvalue = stack_result (current_function_decl); + if (retvalue) + { + value_reg_low = REGNO (retvalue); + value_reg_high = value_reg_low + + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1; + } } output_stack = &BLOCK_INFO (EXIT_BLOCK_PTR)->stack_in;