]> git.pld-linux.org Git - packages/gcc.git/blame - gcc-x87-mmx-switch.patch
- [x87] better floating point compare optimization.
[packages/gcc.git] / gcc-x87-mmx-switch.patch
CommitLineData
de9651ff
PS
1diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/builtins.c gcc-4.1-20050818T1605UTC/gcc/builtins.c
2--- gcc-4.1-20050818T1605UTC/gcc.orig/builtins.c 2005-08-18 16:22:46.000000000 +0000
3+++ gcc-4.1-20050818T1605UTC/gcc/builtins.c 2005-08-18 16:25:02.000000000 +0000
4@@ -52,6 +52,14 @@
5 #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN
6 #endif
7
8+#ifndef FUNCTION_VALUE_REGNO_P_APPLY_RESULT
9+#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT FUNCTION_VALUE_REGNO_P
10+#endif
11+
12+#ifndef FUNCTION_ARG_REGNO_P_APPLY_ARGS
13+#define FUNCTION_ARG_REGNO_P_APPLY_ARGS FUNCTION_ARG_REGNO_P
14+#endif
15+
16 /* Define the names of the builtin function types and codes. */
17 const char *const built_in_class_names[4]
18 = {"NOT_BUILT_IN", "BUILT_IN_FRONTEND", "BUILT_IN_MD", "BUILT_IN_NORMAL"};
19@@ -1079,7 +1087,7 @@
20 size += GET_MODE_SIZE (Pmode);
21
22 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
23- if (FUNCTION_ARG_REGNO_P (regno))
24+ if (FUNCTION_ARG_REGNO_P_APPLY_ARGS (regno))
25 {
26 mode = reg_raw_mode[regno];
27
28@@ -1117,7 +1125,7 @@
29 size = 0;
30
31 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
32- if (FUNCTION_VALUE_REGNO_P (regno))
33+ if (FUNCTION_VALUE_REGNO_P_APPLY_RESULT (regno))
34 {
35 mode = reg_raw_mode[regno];
36
37diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/caller-save.c gcc-4.1-20050818T1605UTC/gcc/caller-save.c
38--- gcc-4.1-20050818T1605UTC/gcc.orig/caller-save.c 2005-06-28 08:15:34.000000000 +0000
39+++ gcc-4.1-20050818T1605UTC/gcc/caller-save.c 2005-08-18 16:25:02.000000000 +0000
40@@ -377,6 +377,7 @@
41 {
42 rtx insn = chain->insn;
43 enum rtx_code code = GET_CODE (insn);
44+ rtx reg ATTRIBUTE_UNUSED;
45
46 next = chain->next;
47
48@@ -450,6 +451,12 @@
49 CLEAR_HARD_REG_SET (this_insn_sets);
50 note_stores (PATTERN (insn), mark_set_regs, NULL);
51
52+#ifdef CALL_INSN_SETS
53+ reg = CALL_INSN_SETS (insn);
54+
55+ if (reg)
56+ mark_set_regs (reg, NULL_RTX, NULL);
57+#endif
58 /* Compute which hard regs must be saved before this call. */
59 AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set);
60 AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets);
61diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-modes.def gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-modes.def
62--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-modes.def 2005-06-28 08:16:53.000000000 +0000
63+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-modes.def 2005-08-18 16:25:02.000000000 +0000
64@@ -62,6 +62,9 @@
65 CC_MODE (CCFP);
66 CC_MODE (CCFPU);
67
68+/* This mode is used to cover all MMX and all x87 registers. */
69+RANDOM_MODE (ALLREGS);
70+
71 /* Vector modes. */
72 VECTOR_MODES (INT, 4); /* V4QI V2HI */
73 VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
74diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-protos.h gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-protos.h
75--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386-protos.h 2005-08-18 16:01:16.000000000 +0000
76+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386-protos.h 2005-08-18 16:25:02.000000000 +0000
77@@ -152,6 +152,9 @@
78 extern bool ix86_expand_int_vcond (rtx[]);
79 extern int ix86_expand_int_addcc (rtx[]);
80 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
81+extern bool ix86_epilogue_uses (int);
82+extern rtx ix86_call_insn_sets (rtx);
83+extern rtx ix86_call_insn_uses (rtx);
84 extern void x86_initialize_trampoline (rtx, rtx, rtx);
85 extern rtx ix86_zero_extend_to_Pmode (rtx);
86 extern void ix86_split_long_move (rtx[]);
87@@ -168,8 +171,8 @@
88 extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
89
90 extern rtx ix86_libcall_value (enum machine_mode);
91-extern bool ix86_function_value_regno_p (int);
92-extern bool ix86_function_arg_regno_p (int);
93+extern bool ix86_function_value_regno_p (int, bool);
94+extern bool ix86_function_arg_regno_p (int, bool);
95 extern int ix86_function_arg_boundary (enum machine_mode, tree);
96 extern int ix86_return_in_memory (tree);
97 extern void ix86_va_start (tree, rtx);
98@@ -190,7 +193,10 @@
99 extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
100 extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
101 extern int ix86_mode_needed (int, rtx);
102-extern void emit_i387_cw_initialization (int);
103+extern int ix86_mode_after (int, int, rtx);
104+extern int ix86_mode_entry (int);
105+extern int ix86_mode_exit (int);
106+extern void ix86_emit_mode_set (int, int);
107 extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
108 extern void x86_order_regs_for_local_alloc (void);
109 extern void x86_function_profiler (FILE *, int);
110diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.c gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.c
111--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.c 2005-08-18 16:01:16.000000000 +0000
112+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.c 2005-08-18 16:25:02.000000000 +0000
113@@ -2273,12 +2273,13 @@
114
115 /* Return true when register may be used to pass function parameters. */
116 bool
117-ix86_function_arg_regno_p (int regno)
118+ix86_function_arg_regno_p (int regno, bool from_builtin)
119 {
120 int i;
121 if (!TARGET_64BIT)
122 return (regno < REGPARM_MAX
123- || (TARGET_MMX && MMX_REGNO_P (regno)
124+ || (TARGET_MMX && !(TARGET_80387 && from_builtin)
125+ && MMX_REGNO_P (regno)
126 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
127 || (TARGET_SSE && SSE_REGNO_P (regno)
128 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
129@@ -3387,14 +3388,14 @@
130
131 /* Return true if N is a possible register number of function value. */
132 bool
133-ix86_function_value_regno_p (int regno)
134+ix86_function_value_regno_p (int regno, bool from_builtin)
135 {
136 if (regno == 0
137 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
138 || (regno == FIRST_SSE_REG && TARGET_SSE))
139 return true;
140
141- if (!TARGET_64BIT
142+ if (!TARGET_64BIT && !(TARGET_80387 && from_builtin)
143 && (regno == FIRST_MMX_REG && TARGET_MMX))
144 return true;
145
146@@ -7714,12 +7715,152 @@
147 return buf;
148 }
149
150-/* Return needed mode for entity in optimize_mode_switching pass. */
151+/* Return needed mode for entity in optimize_mode_switching pass.
152+ Returned mode should match ix86_mode_entry () for function calls. */
153
154 int
155 ix86_mode_needed (int entity, rtx insn)
156 {
157- enum attr_i387_cw mode;
158+ int unit, mode;
159+
160+ if (entity == I387_FPU_MODE)
161+ {
162+ /* If a function call uses MMX registers, select MMX FPU mode and
163+ if function call uses x87 registers, select x87 FPU mode. */
164+ if (CALL_P (insn))
165+ {
166+ rtx link;
167+ rtx reg;
168+ bool mmx = false;
169+ bool x87 = false;
170+
171+ for (link = CALL_INSN_FUNCTION_USAGE (insn);
172+ link;
173+ link = XEXP (link, 1))
174+ {
175+ if (GET_CODE (XEXP (link, 0)) == USE)
176+ {
177+ reg = XEXP (XEXP (link, 0), 0);
178+
179+ if (reg)
180+ {
181+ if (MMX_REG_P (reg))
182+ mmx = true;
183+
184+ if (FP_REG_P (reg))
185+ x87 = true;
186+ }
187+ }
188+ }
189+
190+ /* Mixing of x87 and MMX registers is not allowed
191+ in function call. */
192+ gcc_assert (!mmx || !x87);
193+
194+ if (mmx)
195+ return FPU_MODE_MMX;
196+
197+ /* Fall back to default mode. */
198+ return FPU_MODE_X87;
199+ }
200+
201+ /* Parse ASM operands to check input and output constraints. If
202+ an ASM uses MMX registers, select MMX mode and if it uses x87
203+ registers, select x87 mode. Mixing of MMX and x87 constraints
204+ is not allowed. If no MMX or x87 input and output registers
205+ are used, switch to default mode. */
206+ if (NONJUMP_INSN_P (insn))
207+ {
208+ rtx pat = PATTERN (insn);
209+ int noperands = asm_noperands (pat);
210+
211+ if (noperands >= 0)
212+ {
213+ const char **constraints;
214+ int i;
215+ bool mmx = false;
216+ bool x87 = false;
217+
218+ constraints = alloca (noperands * sizeof (char *));
219+ decode_asm_operands (pat, NULL, NULL, constraints, NULL);
220+
221+ for (i = 0; i < noperands; i++)
222+ {
223+ const char *c = constraints[i];
224+ enum reg_class class;
225+
226+ if (c[0] == '%')
227+ c++;
228+ if (ISDIGIT ((unsigned char) c[0]) && c[1] == '\0')
229+ c = constraints[c[0] - '0'];
230+
231+ while (*c)
232+ {
233+ char cc = *c;
234+ int len;
235+ switch (cc)
236+ {
237+ case ',':
238+ cc++;
239+ continue;
240+ case '=':
241+ case '+':
242+ case '*':
243+ case '%':
244+ case '!':
245+ case '#':
246+ case '&':
247+ case '?':
248+ break;
249+
250+ default:
251+ class = REG_CLASS_FROM_LETTER (cc);
252+
253+ if (MMX_CLASS_P (class))
254+ mmx = true;
255+
256+ if (FLOAT_CLASS_P (class))
257+ x87 = true;
258+ }
259+
260+ len = CONSTRAINT_LEN (cc, c);
261+ do
262+ c++;
263+ while (--len && *c);
264+ }
265+ }
266+
267+ /* Mixing x87 and MMX registers in ASM is not allowed. */
268+ if (mmx && x87)
269+ error_for_asm (insn, "mixing of x87 and MMX registers "
270+ "is not allowed in %<asm%>");
271+
272+ if (mmx)
273+ return FPU_MODE_MMX;
274+
275+ /* Fall back to default mode. */
276+ return FPU_MODE_X87;
277+ }
278+ }
279+
280+ if (recog_memoized (insn) < 0)
281+ return FPU_MODE_ANY;
282+
283+ unit = get_attr_unit (insn);
284+
285+ switch (unit)
286+ {
287+ case UNIT_MMX:
288+ return FPU_MODE_MMX;
289+
290+ case UNIT_I387:
291+ return FPU_MODE_X87;
292+
293+ default:
294+ return FPU_MODE_ANY;
295+
296+ }
297+ }
298
299 /* The mode UNINITIALIZED is used to store control word after a
300 function call or ASM pattern. The mode ANY specify that function
301@@ -7766,21 +7907,132 @@
302 return I387_CW_ANY;
303 }
304
305-/* Output code to initialize control word copies used by trunc?f?i and
306- rounding patterns. CURRENT_MODE is set to current control word,
307- while NEW_MODE is set to new control word. */
308+
309+/* Switch FPU mode to appropriate mode after function call in
310+ optimize_mode_switchig pass. Returned mode should match
311+ ix86_mode_exit (). */
312+
313+int
314+ix86_mode_after (int entity, int mode, rtx insn)
315+{
316+ if (entity == I387_FPU_MODE)
317+ {
318+ /* Switch FPU to MMX mode after funciton call if function value
319+ is returned in MMX register and similar for x87 reg.
320+ If no value is returned in MMX or x87 reg, fall back to
321+ default mode. */
322+ if (CALL_P (insn))
323+ {
324+ rtx reg = SET_DEST (PATTERN (insn));
325+
326+ int new_mode;
327+
328+ if (reg && MMX_REG_P (reg))
329+ new_mode = FPU_MODE_MMX;
330+ else
331+ new_mode = FPU_MODE_X87;
332+
333+ /* Call insn should never operate in FPU_MODE_ANY. */
334+ if ((mode != FPU_MODE_ANY) && (new_mode != mode))
335+ ix86_fpu_mode_changed = 1;
336+
337+ return new_mode;
338+ }
339+ }
340+
341+ return mode;
342+}
343+
344+/* Switch FPU mode of function entry to appropriate mode in
345+ optimize_mode_switchig pass. Returned mode should match
346+ ix86_mode_needed () for function calls. */
347+
348+int
349+ix86_mode_entry (int entity)
350+{
351+ if (entity == I387_FPU_MODE)
352+ {
353+ if (! current_function_args_info.maybe_vaarg)
354+ {
355+ if (current_function_args_info.mmx_nregs != MMX_REGPARM_MAX)
356+ return FPU_MODE_MMX;
357+
358+ /* ??? Handle x87 registers for fpregparm. */
359+ }
360+
361+ /* Fall back to default mode. */
362+ return FPU_MODE_X87;
363+ }
364+
365+ return I387_CW_ANY;
366+}
367+
368+/* Switch FPU mode of function exit to appropriate mode in
369+ optimize_mode_switchig pass. Returned mode should match
370+ ix86_mode_after () for function calls. */
371+
372+int
373+ix86_mode_exit (int entity)
374+{
375+ if (entity == I387_FPU_MODE)
376+ {
377+ rtx reg = current_function_return_rtx;
378+
379+ /* If MMX output register is specified, switch FPU mode
380+ of function exit to MMX mode. */
381+ if (reg && MMX_REG_P (reg))
382+ return FPU_MODE_MMX;
383+
384+ /* Fall back to default mode. */
385+ return FPU_MODE_X87;
386+ }
387+
388+ return I387_CW_ANY;
389+}
390+
391+/* Emit mode switching instructions in optimize_mode_switching pass. */
392
393 void
394-emit_i387_cw_initialization (int mode)
395+ix86_emit_mode_set (int entity, int mode)
396 {
397- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
398- rtx new_mode;
399+ rtx stored_mode, new_mode;
400+ rtx reg;
401
402 int slot;
403
404- rtx reg = gen_reg_rtx (HImode);
405+ if (entity == I387_FPU_MODE)
406+ {
407+ switch (mode)
408+ {
409+ case FPU_MODE_ANY:
410+ return;
411+
412+ case FPU_MODE_X87:
413+ emit_insn (gen_emms ());
414+ ix86_fpu_mode_changed = 1;
415+ return;
416+
417+ case FPU_MODE_MMX:
418+ emit_insn (gen_efpu ());
419+ ix86_fpu_mode_changed = 1;
420+ return;
421+
422+ default:
423+ gcc_unreachable ();
424+ }
425+ }
426+
427+ /* Output code to initialize control word copies used by trunc?f?i
428+ and rounding patterns. STORED_MODE is set to current control
429+ word, while NEW_MODE is set to new control word. */
430
431+ if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY))
432+ return;
433+
434+ stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
435 emit_insn (gen_x86_fnstcw_1 (stored_mode));
436+
437+ reg = gen_reg_rtx (HImode);
438 emit_move_insn (reg, stored_mode);
439
440 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
441@@ -12598,6 +12850,7 @@
442
443 f = ggc_alloc_cleared (sizeof (struct machine_function));
444 f->use_fast_prologue_epilogue_nregs = -1;
445+ f->optimize_mode_switching[I387_FPU_MODE] = TARGET_80387 && TARGET_MMX;
446
447 return f;
448 }
449@@ -13196,7 +13449,77 @@
450 }
451 return align;
452 }
453+
454 \f
455+
456+/* Return true to prevent register allocator from allocating registers
457+ from the unit that is not active. */
458+
459+bool
460+ix86_epilogue_uses (int regno)
461+{
462+ int mode;
463+
464+ if (! ix86_fpu_mode_changed)
465+ return false;
466+
467+ mode = ix86_mode_exit (I387_FPU_MODE);
468+
469+ if (mode == FPU_MODE_MMX)
470+ return FP_REGNO_P (regno);
471+ else
472+ return MMX_REGNO_P (regno);
473+}
474+
475+/* Return RTX code of additional register that CALL_INSN uses.
476+ This function is used to maintain correct register life
477+ information before CALL_INSN in case of MMX/x87 switching. */
478+
479+rtx
480+ix86_call_insn_uses (rtx insn)
481+{
482+ int mode;
483+
484+ if (! ix86_fpu_mode_changed)
485+ return NULL_RTX;
486+
487+ gcc_assert (CALL_P (insn));
488+
489+ mode = ix86_mode_needed (I387_FPU_MODE, insn);
490+ if (mode == FPU_MODE_MMX)
491+ return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG);
492+ else
493+ return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG);
494+
495+ return NULL_RTX;
496+}
497+
498+/* Return RTX code of additional register that CALL_INSN sets.
499+ This function is used to maintain correct register life
500+ information after CALL_INSN in case of MMX/x87 switching. */
501+
502+rtx
503+ix86_call_insn_sets (rtx insn)
504+{
505+ int mode;
506+
507+ if (! ix86_fpu_mode_changed)
508+ return NULL_RTX;
509+
510+ gcc_assert (CALL_P (insn));
511+
512+ /* Current mode in call to ix86_mode_after is set to FPU_MODE_ANY
513+ to prevent setting of ix86_fpu_mode_changed variable. */
514+ mode = ix86_mode_after (I387_FPU_MODE, FPU_MODE_ANY, insn);
515+ if (mode == FPU_MODE_MMX)
516+ return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG);
517+ else
518+ return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG);
519+
520+ return NULL_RTX;
521+}
522+
523+
524 /* Emit RTL insns to initialize the variable parts of a trampoline.
525 FNADDR is an RTX for the address of the function's pure code.
526 CXT is an RTX for the static chain value for the function. */
527@@ -13676,9 +13999,11 @@
528 IX86_BUILTIN_MONITOR,
529 IX86_BUILTIN_MWAIT,
530
531+ IX86_BUILTIN_VEC_INIT_V2SF,
532 IX86_BUILTIN_VEC_INIT_V2SI,
533 IX86_BUILTIN_VEC_INIT_V4HI,
534 IX86_BUILTIN_VEC_INIT_V8QI,
535+ IX86_BUILTIN_VEC_EXT_V2SF,
536 IX86_BUILTIN_VEC_EXT_V2DF,
537 IX86_BUILTIN_VEC_EXT_V2DI,
538 IX86_BUILTIN_VEC_EXT_V4SF,
539@@ -13860,24 +14185,24 @@
540 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
541 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
542
543- { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
544- { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
545- { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
546- { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
547+ { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
548+ { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
549+ { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
550+ { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
551 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
552 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
553
554- { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
555- { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
556- { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
557- { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
558+ { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
559+ { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
560+ { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
561+ { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
562 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
563 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
564
565- { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
566- { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
567- { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
568- { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
569+ { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
570+ { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
571+ { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
572+ { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
573
574 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
575 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
576@@ -14642,6 +14967,11 @@
577 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
578
579 /* Access to the vec_init patterns. */
580+ ftype = build_function_type_list (V2SF_type_node, float_type_node,
581+ integer_type_node, NULL_TREE);
582+ def_builtin (MASK_3DNOW, "__builtin_ia32_vec_init_v2sf",
583+ ftype, IX86_BUILTIN_VEC_INIT_V2SF);
584+
585 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
586 integer_type_node, NULL_TREE);
587 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
588@@ -14663,6 +14993,11 @@
589 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
590
591 /* Access to the vec_extract patterns. */
592+ ftype = build_function_type_list (float_type_node, V2SF_type_node,
593+ integer_type_node, NULL_TREE);
594+ def_builtin (MASK_3DNOW, "__builtin_ia32_vec_ext_v2sf",
595+ ftype, IX86_BUILTIN_VEC_EXT_V2DF);
596+
597 ftype = build_function_type_list (double_type_node, V2DF_type_node,
598 integer_type_node, NULL_TREE);
599 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
600@@ -15137,7 +15472,7 @@
601 switch (fcode)
602 {
603 case IX86_BUILTIN_EMMS:
604- emit_insn (gen_mmx_emms ());
605+ /* emms insn is emitted automatically. */
606 return 0;
607
608 case IX86_BUILTIN_SFENCE:
609@@ -15354,7 +15689,7 @@
610 return target;
611
612 case IX86_BUILTIN_FEMMS:
613- emit_insn (gen_mmx_femms ());
614+ /* femms insn is emitted automatically. */
615 return NULL_RTX;
616
617 case IX86_BUILTIN_PAVGUSB:
618@@ -15500,11 +15835,13 @@
619 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
620 target, 1);
621
622+ case IX86_BUILTIN_VEC_INIT_V2SF:
623 case IX86_BUILTIN_VEC_INIT_V2SI:
624 case IX86_BUILTIN_VEC_INIT_V4HI:
625 case IX86_BUILTIN_VEC_INIT_V8QI:
626 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
627
628+ case IX86_BUILTIN_VEC_EXT_V2SF:
629 case IX86_BUILTIN_VEC_EXT_V2DF:
630 case IX86_BUILTIN_VEC_EXT_V2DI:
631 case IX86_BUILTIN_VEC_EXT_V4SF:
632diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.h gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.h
633--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.h 2005-08-18 16:01:16.000000000 +0000
634+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.h 2005-08-18 16:25:02.000000000 +0000
635@@ -819,7 +819,9 @@
636
637 #define HARD_REGNO_NREGS(REGNO, MODE) \
638 (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
639- ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
640+ ? ((MODE) == ALLREGSmode \
641+ ? 8 \
642+ : (COMPLEX_MODE_P (MODE) ? 2 : 1)) \
643 : ((MODE) == XFmode \
644 ? (TARGET_64BIT ? 2 : 3) \
645 : (MODE) == XCmode \
646@@ -841,9 +843,8 @@
647 ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
648 || (MODE) == V2SImode || (MODE) == SImode)
649
650-/* ??? No autovectorization into MMX or 3DNOW until we can reliably
651- place emms and femms instructions. */
652-#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
653+#define UNITS_PER_SIMD_WORD \
654+ (TARGET_SSE ? 16 : TARGET_MMX ? 8 : UNITS_PER_WORD)
655
656 #define VALID_FP_MODE_P(MODE) \
657 ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
658@@ -1436,8 +1437,16 @@
659 #define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) \
660 ix86_return_pops_args ((FUNDECL), (FUNTYPE), (SIZE))
661
662+/* 1 if N is the number of a register in which the values of
663+ called function may come back. */
664 #define FUNCTION_VALUE_REGNO_P(N) \
665- ix86_function_value_regno_p (N)
666+ ix86_function_value_regno_p ((N), false)
667+
668+/* 1 if N is the number of a register in which the value of
669+ __builtin_return builtin function may come back. */
670+
671+#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT(N) \
672+ ix86_function_value_regno_p ((N), true)
673
674 /* Define how to find the value returned by a library function
675 assuming the value has mode MODE. */
676@@ -1452,7 +1461,13 @@
677 #define APPLY_RESULT_SIZE (8+108)
678
679 /* 1 if N is a possible register number for function argument passing. */
680-#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N)
681+#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p ((N), false)
682+
683+/* 1 if N is a possible register number for function argument passing
684+ from __builtin_apply_args and __builtin_apply builtin functions. */
685+
686+#define FUNCTION_ARG_REGNO_P_APPLY_ARGS(N) \
687+ ix86_function_arg_regno_p ((N), true)
688
689 /* Define a data type for recording info about an argument list
690 during the scan of that argument list. This data type should
691@@ -1534,6 +1549,23 @@
692
693 #define EXIT_IGNORE_STACK 1
694
695+/* Define this macro as a C expression that is nonzero for registers
696+ that are used by the epilogue or the return' pattern. The stack
697+ and frame pointer registers are already be assumed to be used as
698+ needed. */
699+
700+#define EPILOGUE_USES(REGNO) ix86_epilogue_uses (REGNO)
701+
702+/* Define this macro as a C expression that returns RTL expression of
703+ additional hard register set by call_insn. */
704+
705+#define CALL_INSN_SETS(INSN) ix86_call_insn_sets (INSN)
706+
707+/* Define this macro as a C expression that returns RTL expression of
708+ additional hard register used by call_insn. */
709+
710+#define CALL_INSN_USES(INSN) ix86_call_insn_uses (INSN)
711+
712 /* Output assembler code for a block containing the constant parts
713 of a trampoline, leaving space for the variable parts. */
714
715@@ -2169,6 +2201,10 @@
716 extern rtx ix86_compare_op1; /* operand 1 for comparisons */
717 extern rtx ix86_compare_emitted;
718 \f
719+
720+/* x87 FPU modes for x87/MMX switching. */
721+enum ix86_fpu_mode { FPU_MODE_X87, FPU_MODE_MMX, FPU_MODE_ANY };
722+
723 /* To properly truncate FP values into integers, we need to set i387 control
724 word. We can't emit proper mode switching code before reload, as spills
725 generated by reload may truncate values incorrectly, but we still can avoid
726@@ -2190,6 +2226,7 @@
727 I387_FLOOR,
728 I387_CEIL,
729 I387_MASK_PM,
730+ I387_FPU_MODE,
731 MAX_386_ENTITIES
732 };
733
734@@ -2219,7 +2256,12 @@
735 refer to the mode-switched entity in question. */
736
737 #define NUM_MODES_FOR_MODE_SWITCHING \
738- { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
739+ { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, FPU_MODE_ANY }
740+
741+/* Define this macro if the port needs extra register life analysis
742+ after mode switching. */
743+
744+#define LIFE_ANALYSIS_AFTER_MODE_SWITCHING ix86_fpu_mode_changed
745
746 /* ENTITY is an integer specifying a mode-switched entity. If
747 `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
748@@ -2229,6 +2271,22 @@
749
750 #define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
751
752+/* This macro determines the mode that an INSN results in (if different
753+ from the incoming mode). */
754+
755+#define MODE_AFTER(ENTITY, MODE, I) \
756+ ix86_mode_after ((ENTITY), (MODE), (I))
757+
758+/* This macro specifies a mode that ENTITY is assumed to be
759+ switched to at function entry. */
760+
761+#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY)
762+
763+/* This macro specifies a mode that ENTITY is assumed to be
764+ switched to at function exit. */
765+
766+#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY)
767+
768 /* This macro specifies the order in which modes for ENTITY are
769 processed. 0 is the highest priority. */
770
771@@ -2238,10 +2296,8 @@
772 is the set of hard registers live at the point where the insn(s)
773 are to be inserted. */
774
775-#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
776- ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \
777- ? emit_i387_cw_initialization (MODE), 0 \
778- : 0)
779+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
780+ ix86_emit_mode_set ((ENTITY), (MODE))
781
782 \f
783 /* Avoid renaming of stack registers, as doing so in combination with
784@@ -2265,6 +2321,7 @@
785 int save_varrargs_registers;
786 int accesses_prev_frame;
787 int optimize_mode_switching[MAX_386_ENTITIES];
788+ int fpu_mode_changed;
789 /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to
790 determine the style used. */
791 int use_fast_prologue_epilogue;
792@@ -2276,6 +2333,7 @@
793 #define ix86_stack_locals (cfun->machine->stack_locals)
794 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
795 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
796+#define ix86_fpu_mode_changed (cfun->machine->fpu_mode_changed)
797
798 /* Control behavior of x86_file_start. */
799 #define X86_FILE_START_VERSION_DIRECTIVE false
800diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.md gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.md
801--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/i386.md 2005-08-18 16:01:16.000000000 +0000
802+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/i386.md 2005-08-18 16:25:02.000000000 +0000
803@@ -152,7 +152,7 @@
804 (UNSPECV_EMMS 2)
805 (UNSPECV_LDMXCSR 3)
806 (UNSPECV_STMXCSR 4)
807- (UNSPECV_FEMMS 5)
808+ (UNSPECV_EFPU 5)
809 (UNSPECV_CLFLUSH 6)
810 (UNSPECV_ALIGN 7)
811 (UNSPECV_MONITOR 8)
812@@ -167,9 +167,11 @@
813 (define_constants
814 [(BP_REG 6)
815 (SP_REG 7)
816+ (FIRSTFP_REG 8)
817 (FLAGS_REG 17)
818 (FPSR_REG 18)
819 (DIRFLAG_REG 19)
820+ (FIRSTMMX_REG 29)
821 ])
822
823 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
824diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mm3dnow.h gcc-4.1-20050818T1605UTC/gcc/config/i386/mm3dnow.h
825--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mm3dnow.h 2005-06-28 08:16:53.000000000 +0000
826+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/mm3dnow.h 2005-08-18 16:25:02.000000000 +0000
827@@ -172,14 +172,13 @@
828 static __inline __m64
829 _m_from_float (float __A)
830 {
831- return (__m64)(__v2sf){ __A, 0 };
832+ return (__m64) __builtin_ia32_vec_init_v2sf (__A, 0);
833 }
834
835 static __inline float
836 _m_to_float (__m64 __A)
837 {
838- union { __v2sf v; float a[2]; } __tmp = { (__v2sf)__A };
839- return __tmp.a[0];
840+ return __builtin_ia32_vec_ext_v2sf ((__v2sf)__A, 0);
841 }
842
843 #ifdef __3dNOW_A__
844diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mmx.md gcc-4.1-20050818T1605UTC/gcc/config/i386/mmx.md
845--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/mmx.md 2005-06-28 08:16:53.000000000 +0000
846+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/mmx.md 2005-08-18 16:25:02.000000000 +0000
847@@ -23,14 +23,6 @@
848 ;; the same register file, and 3dNOW! adds a number of extensions to
849 ;; the base integer MMX isa.
850
851-;; Note! Except for the basic move instructions, *all* of these
852-;; patterns are outside the normal optabs namespace. This is because
853-;; use of these registers requires the insertion of emms or femms
854-;; instructions to return to normal fpu mode. The compiler doesn't
855-;; know how to do that itself, which means it's up to the user. Which
856-;; means that we should never use any of these patterns except at the
857-;; direction of the user via a builtin.
858-
859 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
860 (define_mode_macro MMXMODEI [V8QI V4HI V2SI])
861
862@@ -481,7 +473,7 @@
863 (match_operand 2 "const_int_operand" "")]
864 "TARGET_MMX"
865 {
866- ix86_expand_vector_set (false, operands[0], operands[1],
867+ ix86_expand_vector_set (true, operands[0], operands[1],
868 INTVAL (operands[2]));
869 DONE;
870 })
871@@ -537,7 +529,7 @@
872 (match_operand 2 "const_int_operand" "")]
873 "TARGET_MMX"
874 {
875- ix86_expand_vector_extract (false, operands[0], operands[1],
876+ ix86_expand_vector_extract (true, operands[0], operands[1],
877 INTVAL (operands[2]));
878 DONE;
879 })
880@@ -547,7 +539,7 @@
881 (match_operand 1 "" "")]
882 "TARGET_SSE"
883 {
884- ix86_expand_vector_init (false, operands[0], operands[1]);
885+ ix86_expand_vector_init (true, operands[0], operands[1]);
886 DONE;
887 })
888
889@@ -557,6 +549,21 @@
890 ;;
891 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
892
893+(define_expand "neg<mode>2"
894+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
895+ (minus:MMXMODEI
896+ (match_dup 2)
897+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "")))]
898+ "TARGET_MMX"
899+ "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
900+
901+(define_expand "add<mode>3"
902+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
903+ (plus:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
904+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
905+ "TARGET_MMX"
906+ "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
907+
908 (define_insn "mmx_add<mode>3"
909 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
910 (plus:MMXMODEI
911@@ -598,6 +605,13 @@
912 [(set_attr "type" "mmxadd")
913 (set_attr "mode" "DI")])
914
915+(define_expand "sub<mode>3"
916+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
917+ (minus:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "")
918+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
919+ "TARGET_MMX"
920+ "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
921+
922 (define_insn "mmx_sub<mode>3"
923 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
924 (minus:MMXMODEI
925@@ -639,6 +653,13 @@
926 [(set_attr "type" "mmxadd")
927 (set_attr "mode" "DI")])
928
929+(define_expand "mulv4hi3"
930+ [(set (match_operand:V4HI 0 "register_operand" "")
931+ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
932+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
933+ "TARGET_MMX"
934+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
935+
936 (define_insn "mmx_mulv4hi3"
937 [(set (match_operand:V4HI 0 "register_operand" "=y")
938 (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
939@@ -735,6 +756,13 @@
940 [(set_attr "type" "mmxmul")
941 (set_attr "mode" "DI")])
942
943+(define_expand "umaxv8qi3"
944+ [(set (match_operand:V8QI 0 "register_operand" "")
945+ (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "")
946+ (match_operand:V8QI 2 "nonimmediate_operand" "")))]
947+ "(TARGET_SSE || TARGET_3DNOW_A)"
948+ "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);")
949+
950 (define_insn "mmx_umaxv8qi3"
951 [(set (match_operand:V8QI 0 "register_operand" "=y")
952 (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
953@@ -745,6 +773,13 @@
954 [(set_attr "type" "mmxadd")
955 (set_attr "mode" "DI")])
956
957+(define_expand "smaxv4hi3"
958+ [(set (match_operand:V4HI 0 "register_operand" "")
959+ (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
960+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
961+ "(TARGET_SSE || TARGET_3DNOW_A)"
962+ "ix86_fixup_binary_operands_no_copy (SMAX, V4HImode, operands);")
963+
964 (define_insn "mmx_smaxv4hi3"
965 [(set (match_operand:V4HI 0 "register_operand" "=y")
966 (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
967@@ -755,6 +790,13 @@
968 [(set_attr "type" "mmxadd")
969 (set_attr "mode" "DI")])
970
971+(define_expand "uminv8qi3"
972+ [(set (match_operand:V8QI 0 "register_operand" "")
973+ (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "")
974+ (match_operand:V8QI 2 "nonimmediate_operand" "")))]
975+ "(TARGET_SSE || TARGET_3DNOW_A)"
976+ "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);")
977+
978 (define_insn "mmx_uminv8qi3"
979 [(set (match_operand:V8QI 0 "register_operand" "=y")
980 (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
981@@ -765,6 +807,13 @@
982 [(set_attr "type" "mmxadd")
983 (set_attr "mode" "DI")])
984
985+(define_expand "sminv4hi3"
986+ [(set (match_operand:V4HI 0 "register_operand" "")
987+ (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
988+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
989+ "(TARGET_SSE || TARGET_3DNOW_A)"
990+ "ix86_fixup_binary_operands_no_copy (SMIN, V4HImode, operands);")
991+
992 (define_insn "mmx_sminv4hi3"
993 [(set (match_operand:V4HI 0 "register_operand" "=y")
994 (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
995@@ -775,7 +824,7 @@
996 [(set_attr "type" "mmxadd")
997 (set_attr "mode" "DI")])
998
999-(define_insn "mmx_ashr<mode>3"
1000+(define_insn "ashr<mode>3"
1001 [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
1002 (ashiftrt:MMXMODE24
1003 (match_operand:MMXMODE24 1 "register_operand" "0")
1004@@ -785,7 +834,7 @@
1005 [(set_attr "type" "mmxshft")
1006 (set_attr "mode" "DI")])
1007
1008-(define_insn "mmx_lshr<mode>3"
1009+(define_insn "lshr<mode>3"
1010 [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
1011 (lshiftrt:MMXMODE24
1012 (match_operand:MMXMODE24 1 "register_operand" "0")
1013@@ -806,7 +855,7 @@
1014 [(set_attr "type" "mmxshft")
1015 (set_attr "mode" "DI")])
1016
1017-(define_insn "mmx_ashl<mode>3"
1018+(define_insn "ashl<mode>3"
1019 [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
1020 (ashift:MMXMODE24
1021 (match_operand:MMXMODE24 1 "register_operand" "0")
1022@@ -853,12 +902,66 @@
1023 [(set_attr "type" "mmxcmp")
1024 (set_attr "mode" "DI")])
1025
1026+(define_expand "vcond<mode>"
1027+ [(set (match_operand:MMXMODE12 0 "register_operand" "")
1028+ (if_then_else:MMXMODE12
1029+ (match_operator 3 ""
1030+ [(match_operand:MMXMODE12 4 "nonimmediate_operand" "")
1031+ (match_operand:MMXMODE12 5 "nonimmediate_operand" "")])
1032+ (match_operand:MMXMODE12 1 "general_operand" "")
1033+ (match_operand:MMXMODE12 2 "general_operand" "")))]
1034+ "TARGET_MMX"
1035+{
1036+ if (ix86_expand_int_vcond (operands))
1037+ DONE;
1038+ else
1039+ FAIL;
1040+})
1041+
1042+(define_expand "vconduv8qi"
1043+ [(set (match_operand:V8QI 0 "register_operand" "")
1044+ (if_then_else:V8QI
1045+ (match_operator 3 ""
1046+ [(match_operand:V8QI 4 "nonimmediate_operand" "")
1047+ (match_operand:V8QI 5 "nonimmediate_operand" "")])
1048+ (match_operand:V8QI 1 "general_operand" "")
1049+ (match_operand:V8QI 2 "general_operand" "")))]
1050+ "TARGET_MMX"
1051+{
1052+ if (ix86_expand_int_vcond (operands))
1053+ DONE;
1054+ else
1055+ FAIL;
1056+})
1057+
1058 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1059 ;;
1060 ;; Parallel integral logical operations
1061 ;;
1062 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1063
1064+(define_expand "one_cmpl<mode>2"
1065+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1066+ (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1067+ (match_dup 2)))]
1068+ "TARGET_MMX"
1069+{
1070+ int i, n = GET_MODE_NUNITS (<MODE>mode);
1071+ rtvec v = rtvec_alloc (n);
1072+
1073+ for (i = 0; i < n; ++i)
1074+ RTVEC_ELT (v, i) = constm1_rtx;
1075+
1076+ operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
1077+})
1078+
1079+(define_expand "and<mode>3"
1080+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1081+ (and:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1082+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
1083+ "TARGET_MMX"
1084+ "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
1085+
1086 (define_insn "mmx_and<mode>3"
1087 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
1088 (and:MMXMODEI
1089@@ -879,6 +982,13 @@
1090 [(set_attr "type" "mmxadd")
1091 (set_attr "mode" "DI")])
1092
1093+(define_expand "ior<mode>3"
1094+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1095+ (ior:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1096+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
1097+ "TARGET_MMX"
1098+ "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
1099+
1100 (define_insn "mmx_ior<mode>3"
1101 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
1102 (ior:MMXMODEI
1103@@ -889,6 +999,13 @@
1104 [(set_attr "type" "mmxadd")
1105 (set_attr "mode" "DI")])
1106
1107+(define_expand "xor<mode>3"
1108+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1109+ (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1110+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
1111+ "TARGET_MMX"
1112+ "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
1113+
1114 (define_insn "mmx_xor<mode>3"
1115 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
1116 (xor:MMXMODEI
1117@@ -1147,7 +1264,7 @@
1118 (match_operand 2 "const_int_operand" "")]
1119 "TARGET_MMX"
1120 {
1121- ix86_expand_vector_set (false, operands[0], operands[1],
1122+ ix86_expand_vector_set (true, operands[0], operands[1],
1123 INTVAL (operands[2]));
1124 DONE;
1125 })
1126@@ -1205,7 +1322,7 @@
1127 (match_operand 2 "const_int_operand" "")]
1128 "TARGET_MMX"
1129 {
1130- ix86_expand_vector_extract (false, operands[0], operands[1],
1131+ ix86_expand_vector_extract (true, operands[0], operands[1],
1132 INTVAL (operands[2]));
1133 DONE;
1134 })
1135@@ -1215,7 +1332,7 @@
1136 (match_operand 1 "" "")]
1137 "TARGET_SSE"
1138 {
1139- ix86_expand_vector_init (false, operands[0], operands[1]);
1140+ ix86_expand_vector_init (true, operands[0], operands[1]);
1141 DONE;
1142 })
1143
1144@@ -1225,7 +1342,7 @@
1145 (match_operand 2 "const_int_operand" "")]
1146 "TARGET_MMX"
1147 {
1148- ix86_expand_vector_set (false, operands[0], operands[1],
1149+ ix86_expand_vector_set (true, operands[0], operands[1],
1150 INTVAL (operands[2]));
1151 DONE;
1152 })
1153@@ -1236,7 +1353,7 @@
1154 (match_operand 2 "const_int_operand" "")]
1155 "TARGET_MMX"
1156 {
1157- ix86_expand_vector_extract (false, operands[0], operands[1],
1158+ ix86_expand_vector_extract (true, operands[0], operands[1],
1159 INTVAL (operands[2]));
1160 DONE;
1161 })
1162@@ -1246,7 +1363,7 @@
1163 (match_operand 1 "" "")]
1164 "TARGET_SSE"
1165 {
1166- ix86_expand_vector_init (false, operands[0], operands[1]);
1167+ ix86_expand_vector_init (true, operands[0], operands[1]);
1168 DONE;
1169 })
1170
1171@@ -1256,7 +1373,7 @@
1172 (match_operand 2 "const_int_operand" "")]
1173 "TARGET_MMX"
1174 {
1175- ix86_expand_vector_set (false, operands[0], operands[1],
1176+ ix86_expand_vector_set (true, operands[0], operands[1],
1177 INTVAL (operands[2]));
1178 DONE;
1179 })
1180@@ -1267,7 +1384,7 @@
1181 (match_operand 2 "const_int_operand" "")]
1182 "TARGET_MMX"
1183 {
1184- ix86_expand_vector_extract (false, operands[0], operands[1],
1185+ ix86_expand_vector_extract (true, operands[0], operands[1],
1186 INTVAL (operands[2]));
1187 DONE;
1188 })
1189@@ -1277,7 +1394,7 @@
1190 (match_operand 1 "" "")]
1191 "TARGET_SSE"
1192 {
1193- ix86_expand_vector_init (false, operands[0], operands[1]);
1194+ ix86_expand_vector_init (true, operands[0], operands[1]);
1195 DONE;
1196 })
1197
1198@@ -1386,48 +1503,20 @@
1199 [(set_attr "type" "mmxcvt")
1200 (set_attr "mode" "DI")])
1201
1202-(define_insn "mmx_emms"
1203- [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
1204- (clobber (reg:XF 8))
1205- (clobber (reg:XF 9))
1206- (clobber (reg:XF 10))
1207- (clobber (reg:XF 11))
1208- (clobber (reg:XF 12))
1209- (clobber (reg:XF 13))
1210- (clobber (reg:XF 14))
1211- (clobber (reg:XF 15))
1212- (clobber (reg:DI 29))
1213- (clobber (reg:DI 30))
1214- (clobber (reg:DI 31))
1215- (clobber (reg:DI 32))
1216- (clobber (reg:DI 33))
1217- (clobber (reg:DI 34))
1218- (clobber (reg:DI 35))
1219- (clobber (reg:DI 36))]
1220- "TARGET_MMX"
1221- "emms"
1222- [(set_attr "type" "mmx")
1223- (set_attr "memory" "unknown")])
1224+(define_insn "efpu"
1225+ [(set (reg:ALLREGS FIRSTFP_REG)
1226+ (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTMMX_REG)]
1227+ UNSPECV_EFPU))]
1228+ "TARGET_80387 && TARGET_MMX"
1229+ ""
1230+ [(set_attr "length" "0")])
1231+
1232+(define_insn "emms"
1233+ [(set (reg:ALLREGS FIRSTMMX_REG)
1234+ (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTFP_REG)]
1235+ UNSPECV_EMMS))]
1236+ "TARGET_80387 && TARGET_MMX"
1237+{
1238+ return TARGET_3DNOW ? "femms" : "emms";
1239+})
1240
1241-(define_insn "mmx_femms"
1242- [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
1243- (clobber (reg:XF 8))
1244- (clobber (reg:XF 9))
1245- (clobber (reg:XF 10))
1246- (clobber (reg:XF 11))
1247- (clobber (reg:XF 12))
1248- (clobber (reg:XF 13))
1249- (clobber (reg:XF 14))
1250- (clobber (reg:XF 15))
1251- (clobber (reg:DI 29))
1252- (clobber (reg:DI 30))
1253- (clobber (reg:DI 31))
1254- (clobber (reg:DI 32))
1255- (clobber (reg:DI 33))
1256- (clobber (reg:DI 34))
1257- (clobber (reg:DI 35))
1258- (clobber (reg:DI 36))]
1259- "TARGET_3DNOW"
1260- "femms"
1261- [(set_attr "type" "mmx")
1262- (set_attr "memory" "none")])
1263diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/sse.md gcc-4.1-20050818T1605UTC/gcc/config/i386/sse.md
1264--- gcc-4.1-20050818T1605UTC/gcc.orig/config/i386/sse.md 2005-08-18 16:01:16.000000000 +0000
1265+++ gcc-4.1-20050818T1605UTC/gcc/config/i386/sse.md 2005-08-18 16:25:02.000000000 +0000
1266@@ -881,6 +881,7 @@
1267 "TARGET_SSE"
1268 "cvtpi2ps\t{%2, %0|%0, %2}"
1269 [(set_attr "type" "ssecvt")
1270+ (set_attr "unit" "mmx")
1271 (set_attr "mode" "V4SF")])
1272
1273 (define_insn "sse_cvtps2pi"
1274@@ -3508,6 +3509,7 @@
1275 movhps\t{%2, %0|%0, %2}
1276 movlps\t{%1, %0|%0, %1}"
1277 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
1278+ (set_attr "unit" "*,mmx,*,*,*,*")
1279 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
1280
1281 (define_expand "vec_setv2di"
1282diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/config/sh/sh.h gcc-4.1-20050818T1605UTC/gcc/config/sh/sh.h
1283--- gcc-4.1-20050818T1605UTC/gcc.orig/config/sh/sh.h 2005-08-18 16:01:22.000000000 +0000
1284+++ gcc-4.1-20050818T1605UTC/gcc/config/sh/sh.h 2005-08-18 16:25:02.000000000 +0000
1285@@ -3311,7 +3311,7 @@
1286 ? get_attr_fp_mode (INSN) \
1287 : FP_MODE_NONE)
1288
1289-#define MODE_AFTER(MODE, INSN) \
1290+#define MODE_AFTER(ENTITY, MODE, INSN) \
1291 (TARGET_HITACHI \
1292 && recog_memoized (INSN) >= 0 \
1293 && get_attr_fp_set (INSN) != FP_SET_NONE \
1294diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/doc/tm.texi gcc-4.1-20050818T1605UTC/gcc/doc/tm.texi
1295--- gcc-4.1-20050818T1605UTC/gcc.orig/doc/tm.texi 2005-08-18 16:01:28.000000000 +0000
1296+++ gcc-4.1-20050818T1605UTC/gcc/doc/tm.texi 2005-08-18 16:25:02.000000000 +0000
1297@@ -4227,6 +4227,16 @@
1298 compiler knows this regardless of @code{EXIT_IGNORE_STACK}.
1299 @end defmac
1300
1301+@defmac CALL_INSN_SETS (@var{INSN})
1302+Define this macro as a C expression that returns RTL expression of
1303+additional hard register set by call_insn.
1304+@end defmac
1305+
1306+@defmac CALL_INSN_USES (@var{INSN})
1307+Define this macro as a C expression that returns RTL expression of
1308+additional hard register used by call_insn.
1309+@end defmac
1310+
1311 @defmac EPILOGUE_USES (@var{regno})
1312 Define this macro as a C expression that is nonzero for registers that are
1313 used by the epilogue or the @samp{return} pattern. The stack and frame
1314@@ -8376,6 +8386,13 @@
1315 switch is needed / supplied.
1316 @end defmac
1317
1318+@defmac LIFE_ANALYSIS_AFTER_MODE_SWITCHING
1319+Define this macro if the port needs extra register life analysis after
1320+mode switching. This macro should be defined if mode switching inserts
1321+instructions that change global registers to maintain consistent global
1322+register life information.
1323+@end defmac
1324+
1325 @defmac MODE_NEEDED (@var{entity}, @var{insn})
1326 @var{entity} is an integer specifying a mode-switched entity. If
1327 @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro to
1328@@ -8384,9 +8401,9 @@
1329 be switched into prior to the execution of @var{insn}.
1330 @end defmac
1331
1332-@defmac MODE_AFTER (@var{mode}, @var{insn})
1333-If this macro is defined, it is evaluated for every @var{insn} during
1334-mode switching. It determines the mode that an insn results in (if
1335+@defmac MODE_AFTER (@var{entity}, @var{mode}, @var{insn})
1336+If this macro is defined, it is evaluated for every @var{entity} that needs
1337+mode switching. It determines the mode that an @var{insn} results in (if
1338 different from the incoming mode).
1339 @end defmac
1340
1341diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/flow.c gcc-4.1-20050818T1605UTC/gcc/flow.c
1342--- gcc-4.1-20050818T1605UTC/gcc.orig/flow.c 2005-08-18 16:00:40.000000000 +0000
1343+++ gcc-4.1-20050818T1605UTC/gcc/flow.c 2005-08-18 16:25:02.000000000 +0000
1344@@ -1830,10 +1830,11 @@
1345 {
1346 regset live_at_end;
1347 bool sibcall_p;
1348- rtx note, cond;
1349+ rtx note;
1350+ rtx cond = NULL_RTX;
1351+ rtx reg ATTRIBUTE_UNUSED;
1352 int i;
1353
1354- cond = NULL_RTX;
1355 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
1356 cond = COND_EXEC_TEST (PATTERN (insn));
1357
1358@@ -1856,6 +1857,13 @@
1359 mark_set_1 (pbi, CLOBBER, XEXP (XEXP (note, 0), 0),
1360 cond, insn, pbi->flags);
1361
1362+#ifdef CALL_INSN_SETS
1363+ reg = CALL_INSN_SETS (insn);
1364+
1365+ if (reg)
1366+ mark_set_1 (pbi, SET, reg, cond, insn, pbi->flags);
1367+#endif
1368+
1369 /* Calls change all call-used and global registers; sibcalls do not
1370 clobber anything that must be preserved at end-of-function,
1371 except for return values. */
1372@@ -1894,10 +1902,11 @@
1373
1374 if (! insn_is_dead && CALL_P (insn))
1375 {
1376+ rtx note;
1377+ rtx cond = NULL_RTX;
1378+ rtx reg ATTRIBUTE_UNUSED;
1379 int i;
1380- rtx note, cond;
1381
1382- cond = NULL_RTX;
1383 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
1384 cond = COND_EXEC_TEST (PATTERN (insn));
1385
1386@@ -1910,6 +1919,13 @@
1387 of which mark_used_regs knows how to handle. */
1388 mark_used_regs (pbi, XEXP (XEXP (note, 0), 0), cond, insn);
1389
1390+#ifdef CALL_INSN_USES
1391+ reg = CALL_INSN_USES (insn);
1392+
1393+ if (reg)
1394+ mark_used_reg (pbi, reg, cond, insn);
1395+#endif
1396+
1397 /* The stack ptr is used (honorarily) by a CALL insn. */
1398 if ((flags & PROP_REG_INFO)
1399 && !REGNO_REG_SET_P (pbi->reg_live, STACK_POINTER_REGNUM))
1400diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/mode-switching.c gcc-4.1-20050818T1605UTC/gcc/mode-switching.c
1401--- gcc-4.1-20050818T1605UTC/gcc.orig/mode-switching.c 2005-08-18 16:00:42.000000000 +0000
1402+++ gcc-4.1-20050818T1605UTC/gcc/mode-switching.c 2005-08-18 16:25:02.000000000 +0000
1403@@ -473,7 +473,7 @@
1404 RESET_BIT (transp[bb->index], j);
1405 }
1406 #ifdef MODE_AFTER
1407- last_mode = MODE_AFTER (last_mode, insn);
1408+ last_mode = MODE_AFTER (e, last_mode, insn);
1409 #endif
1410 /* Update LIVE_NOW. */
1411 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1412@@ -730,6 +730,14 @@
1413 no_new_pseudos = 0;
1414 optimize_mode_switching (NULL);
1415 no_new_pseudos = 1;
1416+
1417+ /* Mode switching can insert instructions that
1418+ change global registers life data. */
1419+#ifdef LIFE_ANALYSIS_AFTER_MODE_SWITCHING
1420+ if (LIFE_ANALYSIS_AFTER_MODE_SWITCHING)
1421+ life_analysis (NULL, PROP_REG_INFO);
1422+#endif
1423+
1424 #endif /* OPTIMIZE_MODE_SWITCHING */
1425 }
1426
1427diff -uNr gcc-4.1-20050818T1605UTC/gcc.orig/reg-stack.c gcc-4.1-20050818T1605UTC/gcc/reg-stack.c
1428--- gcc-4.1-20050818T1605UTC/gcc.orig/reg-stack.c 2005-08-18 16:00:43.000000000 +0000
1429+++ gcc-4.1-20050818T1605UTC/gcc/reg-stack.c 2005-08-18 16:25:02.000000000 +0000
1430@@ -1579,6 +1579,41 @@
1431 }
1432 break;
1433
1434+ case UNSPEC_VOLATILE:
1435+ switch (XINT (pat_src, 1))
1436+ {
1437+ int i;
1438+
1439+ case UNSPECV_EFPU:
1440+ /* There should be no stack registers live
1441+ at this point. */
1442+ gcc_assert (regstack->top == -1);
1443+
1444+ /* Mark all x87 registers as used. */
1445+ for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--)
1446+ {
1447+ regstack->reg[++regstack->top] = i;
1448+ SET_HARD_REG_BIT (regstack->reg_set, i);
1449+ }
1450+ break;
1451+
1452+ case UNSPECV_EMMS:
1453+ /* All stack registers should be alive
1454+ at this point. */
1455+ gcc_assert (regstack->top == REG_STACK_SIZE - 1);
1456+
1457+ /* Mark all x87 registers as empty. */
1458+ for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--)
1459+ CLEAR_HARD_REG_BIT (regstack->reg_set, i);
1460+
1461+ regstack->top = -1;
1462+ break;
1463+
1464+ default:
1465+ gcc_unreachable ();
1466+ }
1467+ break;
1468+
1469 case UNSPEC:
1470 switch (XINT (pat_src, 1))
1471 {
1472@@ -2269,6 +2304,25 @@
1473 if (NOTE_P (insn) || INSN_DELETED_P (insn))
1474 return control_flow_insn_deleted;
1475
1476+#ifdef CALL_INSN_SETS
1477+ if (CALL_P (insn))
1478+ {
1479+ rtx reg = CALL_INSN_SETS (insn);
1480+
1481+ if (reg && STACK_REG_P (reg))
1482+ {
1483+ int count;
1484+
1485+ for (count = hard_regno_nregs[REGNO (reg)][GET_MODE (reg)];
1486+ --count >= 0;)
1487+ {
1488+ regstack->reg[++regstack->top] = REGNO (reg) + count;
1489+ SET_HARD_REG_BIT (regstack->reg_set, REGNO (reg) + count);
1490+ }
1491+ }
1492+ }
1493+#endif
1494+
1495 /* If there is a REG_UNUSED note on a stack register on this insn,
1496 the indicated reg must be popped. The REG_UNUSED note is removed,
1497 since the form of the newly emitted pop insn references the reg,
1498@@ -2544,6 +2598,15 @@
1499 basic_block block = e->dest;
1500 block_info bi = BLOCK_INFO (block);
1501 int reg, top = -1;
1502+ int numregs = 0;
1503+
1504+ /* Check if all stack registers are live at function entry.
1505+ This is the case where stack registers are disabled and no
1506+ register initialization is needed. */
1507+
1508+ for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg)
1509+ if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg))
1510+ numregs++;
1511
1512 for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg)
1513 if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg))
1514@@ -2552,11 +2615,14 @@
1515
1516 bi->stack_in.reg[++top] = reg;
1517
1518- init = gen_rtx_SET (VOIDmode,
1519- FP_MODE_REG (FIRST_STACK_REG, SFmode),
1520- not_a_num);
1521- insert_insn_on_edge (init, e);
1522- inserted = 1;
1523+ if (numregs != REG_STACK_SIZE)
1524+ {
1525+ init = gen_rtx_SET (VOIDmode,
1526+ FP_MODE_REG (FIRST_STACK_REG, SFmode),
1527+ not_a_num);
1528+ insert_insn_on_edge (init, e);
1529+ inserted = 1;
1530+ }
1531 }
1532
1533 bi->stack_in.top = top;
1534@@ -2575,13 +2641,34 @@
1535 stack output_stack;
1536 rtx retvalue;
1537
1538- retvalue = stack_result (current_function_decl);
1539 value_reg_low = value_reg_high = -1;
1540- if (retvalue)
1541+
1542+#ifdef EPILOGUE_USES
1543+ {
1544+ int numregs = 0;
1545+ int i;
1546+
1547+ for (i = FIRST_STACK_REG; i < LAST_STACK_REG + 1; i++)
1548+ if (EPILOGUE_USES (i))
1549+ numregs++;
1550+
1551+ if (numregs)
1552+ {
1553+ value_reg_low = FIRST_STACK_REG;
1554+ value_reg_high = value_reg_low + numregs - 1;
1555+ }
1556+ }
1557+#endif
1558+
1559+ if (value_reg_low < 0)
1560 {
1561- value_reg_low = REGNO (retvalue);
1562- value_reg_high = value_reg_low
1563- + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1;
1564+ retvalue = stack_result (current_function_decl);
1565+ if (retvalue)
1566+ {
1567+ value_reg_low = REGNO (retvalue);
1568+ value_reg_high = value_reg_low
1569+ + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1;
1570+ }
1571 }
1572
1573 output_stack = &BLOCK_INFO (EXIT_BLOCK_PTR)->stack_in;
This page took 0.233072 seconds and 4 git commands to generate.