]> git.pld-linux.org Git - packages/gcc.git/blame - gcc-mmx-x87-fpu-mode-switching-and-mmx-vectorizer.patch
- pr22493 patch updated, release 0.3.
[packages/gcc.git] / gcc-mmx-x87-fpu-mode-switching-and-mmx-vectorizer.patch
CommitLineData
e2ca1c07
PS
1--- gcc/gcc/builtins.c 2005-07-12 11:19:59.000000000 +0200
2+++ gcc/gcc/builtins.c 2005-07-18 06:14:15.000000000 +0200
3@@ -52,6 +52,14 @@ Software Foundation, 51 Franklin Street,
4 #define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN
5 #endif
6
7+#ifndef FUNCTION_VALUE_REGNO_P_APPLY_RESULT
8+#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT FUNCTION_VALUE_REGNO_P
9+#endif
10+
11+#ifndef FUNCTION_ARG_REGNO_P_APPLY_ARGS
12+#define FUNCTION_ARG_REGNO_P_APPLY_ARGS FUNCTION_ARG_REGNO_P
13+#endif
14+
15 /* Define the names of the builtin function types and codes. */
16 const char *const built_in_class_names[4]
17 = {"NOT_BUILT_IN", "BUILT_IN_FRONTEND", "BUILT_IN_MD", "BUILT_IN_NORMAL"};
18@@ -1079,7 +1087,7 @@ apply_args_size (void)
19 size += GET_MODE_SIZE (Pmode);
20
21 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
22- if (FUNCTION_ARG_REGNO_P (regno))
23+ if (FUNCTION_ARG_REGNO_P_APPLY_ARGS (regno))
24 {
25 mode = reg_raw_mode[regno];
26
27@@ -1117,7 +1125,7 @@ apply_result_size (void)
28 size = 0;
29
30 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
31- if (FUNCTION_VALUE_REGNO_P (regno))
32+ if (FUNCTION_VALUE_REGNO_P_APPLY_RESULT (regno))
33 {
34 mode = reg_raw_mode[regno];
35
36--- gcc/gcc/caller-save.c 2005-06-25 03:59:25.000000000 +0200
37+++ gcc/gcc/caller-save.c 2005-07-18 06:14:15.000000000 +0200
38@@ -377,6 +377,7 @@ save_call_clobbered_regs (void)
39 {
40 rtx insn = chain->insn;
41 enum rtx_code code = GET_CODE (insn);
42+ rtx reg ATTRIBUTE_UNUSED;
43
44 next = chain->next;
45
46@@ -450,6 +451,12 @@ save_call_clobbered_regs (void)
47 CLEAR_HARD_REG_SET (this_insn_sets);
48 note_stores (PATTERN (insn), mark_set_regs, NULL);
49
50+#ifdef CALL_INSN_SETS
51+ reg = CALL_INSN_SETS (insn);
52+
53+ if (reg)
54+ mark_set_regs (reg, NULL_RTX, NULL);
55+#endif
56 /* Compute which hard regs must be saved before this call. */
57 AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set);
58 AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets);
59--- gcc/gcc/config/i386/i386.c 2005-07-14 09:46:16.000000000 +0200
60+++ gcc/gcc/config/i386/i386.c 2005-07-18 06:14:15.000000000 +0200
61@@ -2067,12 +2067,13 @@ ix86_return_pops_args (tree fundecl, tre
62
63 /* Return true when register may be used to pass function parameters. */
64 bool
65-ix86_function_arg_regno_p (int regno)
66+ix86_function_arg_regno_p (int regno, bool from_builtin)
67 {
68 int i;
69 if (!TARGET_64BIT)
70 return (regno < REGPARM_MAX
71- || (TARGET_MMX && MMX_REGNO_P (regno)
72+ || (TARGET_MMX && !(TARGET_80387 && from_builtin)
73+ && MMX_REGNO_P (regno)
74 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
75 || (TARGET_SSE && SSE_REGNO_P (regno)
76 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
77@@ -3181,14 +3182,14 @@ ix86_function_arg_boundary (enum machine
78
79 /* Return true if N is a possible register number of function value. */
80 bool
81-ix86_function_value_regno_p (int regno)
82+ix86_function_value_regno_p (int regno, bool from_builtin)
83 {
84 if (regno == 0
85 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
86 || (regno == FIRST_SSE_REG && TARGET_SSE))
87 return true;
88
89- if (!TARGET_64BIT
90+ if (!TARGET_64BIT && !(TARGET_80387 && from_builtin)
91 && (regno == FIRST_MMX_REG && TARGET_MMX))
92 return true;
93
94@@ -7450,12 +7451,152 @@ output_387_binary_op (rtx insn, rtx *ope
95 return buf;
96 }
97
98-/* Return needed mode for entity in optimize_mode_switching pass. */
99+/* Return needed mode for entity in optimize_mode_switching pass.
100+ Returned mode should match ix86_mode_entry () for function calls. */
101
102 int
103 ix86_mode_needed (int entity, rtx insn)
104 {
105- enum attr_i387_cw mode;
106+ int unit, mode;
107+
108+ if (entity == I387_FPU_MODE)
109+ {
110+ /* If a function call uses MMX registers, select MMX FPU mode and
111+ if function call uses x87 registers, select x87 FPU mode. */
112+ if (CALL_P (insn))
113+ {
114+ rtx link;
115+ rtx reg;
116+ bool mmx = false;
117+ bool x87 = false;
118+
119+ for (link = CALL_INSN_FUNCTION_USAGE (insn);
120+ link;
121+ link = XEXP (link, 1))
122+ {
123+ if (GET_CODE (XEXP (link, 0)) == USE)
124+ {
125+ reg = XEXP (XEXP (link, 0), 0);
126+
127+ if (reg)
128+ {
129+ if (MMX_REG_P (reg))
130+ mmx = true;
131+
132+ if (FP_REG_P (reg))
133+ x87 = true;
134+ }
135+ }
136+ }
137+
138+ /* Mixing of x87 and MMX registers is not allowed
139+ in function call. */
140+ gcc_assert (!mmx || !x87);
141+
142+ if (mmx)
143+ return FPU_MODE_MMX;
144+
145+ /* Fall back to default mode. */
146+ return FPU_MODE_X87;
147+ }
148+
149+ /* Parse ASM operands to check input and output constraints. If
150+ an ASM uses MMX registers, select MMX mode and if it uses x87
151+ registers, select x87 mode. Mixing of MMX and x87 constraints
152+ is not allowed. If no MMX or x87 input and output registers
153+ are used, switch to default mode. */
154+ if (NONJUMP_INSN_P (insn))
155+ {
156+ rtx pat = PATTERN (insn);
157+ int noperands = asm_noperands (pat);
158+
159+ if (noperands >= 0)
160+ {
161+ const char **constraints;
162+ int i;
163+ bool mmx = false;
164+ bool x87 = false;
165+
166+ constraints = alloca (noperands * sizeof (char *));
167+ decode_asm_operands (pat, NULL, NULL, constraints, NULL);
168+
169+ for (i = 0; i < noperands; i++)
170+ {
171+ const char *c = constraints[i];
172+ enum reg_class class;
173+
174+ if (c[0] == '%')
175+ c++;
176+ if (ISDIGIT ((unsigned char) c[0]) && c[1] == '\0')
177+ c = constraints[c[0] - '0'];
178+
179+ while (*c)
180+ {
181+ char cc = *c;
182+ int len;
183+ switch (cc)
184+ {
185+ case ',':
186+ cc++;
187+ continue;
188+ case '=':
189+ case '+':
190+ case '*':
191+ case '%':
192+ case '!':
193+ case '#':
194+ case '&':
195+ case '?':
196+ break;
197+
198+ default:
199+ class = REG_CLASS_FROM_LETTER (cc);
200+
201+ if (MMX_CLASS_P (class))
202+ mmx = true;
203+
204+ if (FLOAT_CLASS_P (class))
205+ x87 = true;
206+ }
207+
208+ len = CONSTRAINT_LEN (cc, c);
209+ do
210+ c++;
211+ while (--len && *c);
212+ }
213+ }
214+
215+ /* Mixing x87 and MMX registers in ASM is not allowed. */
216+ if (mmx && x87)
217+ error_for_asm (insn, "mixing of x87 and MMX registers "
218+ "is not allowed in %<asm%>");
219+
220+ if (mmx)
221+ return FPU_MODE_MMX;
222+
223+ /* Fall back to default mode. */
224+ return FPU_MODE_X87;
225+ }
226+ }
227+
228+ if (recog_memoized (insn) < 0)
229+ return FPU_MODE_ANY;
230+
231+ unit = get_attr_unit (insn);
232+
233+ switch (unit)
234+ {
235+ case UNIT_MMX:
236+ return FPU_MODE_MMX;
237+
238+ case UNIT_I387:
239+ return FPU_MODE_X87;
240+
241+ default:
242+ return FPU_MODE_ANY;
243+
244+ }
245+ }
246
247 /* The mode UNINITIALIZED is used to store control word after a
248 function call or ASM pattern. The mode ANY specify that function
249@@ -7502,21 +7643,132 @@ ix86_mode_needed (int entity, rtx insn)
250 return I387_CW_ANY;
251 }
252
253-/* Output code to initialize control word copies used by trunc?f?i and
254- rounding patterns. CURRENT_MODE is set to current control word,
255- while NEW_MODE is set to new control word. */
256+
257+/* Switch FPU mode to appropriate mode after function call in
258+ optimize_mode_switchig pass. Returned mode should match
259+ ix86_mode_exit (). */
260+
261+int
262+ix86_mode_after (int entity, int mode, rtx insn)
263+{
264+ if (entity == I387_FPU_MODE)
265+ {
266+ /* Switch FPU to MMX mode after funciton call if function value
267+ is returned in MMX register and similar for x87 reg.
268+ If no value is returned in MMX or x87 reg, fall back to
269+ default mode. */
270+ if (CALL_P (insn))
271+ {
272+ rtx reg = SET_DEST (PATTERN (insn));
273+
274+ int new_mode;
275+
276+ if (reg && MMX_REG_P (reg))
277+ new_mode = FPU_MODE_MMX;
278+ else
279+ new_mode = FPU_MODE_X87;
280+
281+ /* Call insn should never operate in FPU_MODE_ANY. */
282+ if ((mode != FPU_MODE_ANY) && (new_mode != mode))
283+ ix86_fpu_mode_changed = 1;
284+
285+ return new_mode;
286+ }
287+ }
288+
289+ return mode;
290+}
291+
292+/* Switch FPU mode of function entry to appropriate mode in
293+ optimize_mode_switchig pass. Returned mode should match
294+ ix86_mode_needed () for function calls. */
295+
296+int
297+ix86_mode_entry (int entity)
298+{
299+ if (entity == I387_FPU_MODE)
300+ {
301+ if (! current_function_args_info.maybe_vaarg)
302+ {
303+ if (current_function_args_info.mmx_nregs != MMX_REGPARM_MAX)
304+ return FPU_MODE_MMX;
305+
306+ /* ??? Handle x87 registers for fpregparm. */
307+ }
308+
309+ /* Fall back to default mode. */
310+ return FPU_MODE_X87;
311+ }
312+
313+ return I387_CW_ANY;
314+}
315+
316+/* Switch FPU mode of function exit to appropriate mode in
317+ optimize_mode_switchig pass. Returned mode should match
318+ ix86_mode_after () for function calls. */
319+
320+int
321+ix86_mode_exit (int entity)
322+{
323+ if (entity == I387_FPU_MODE)
324+ {
325+ rtx reg = current_function_return_rtx;
326+
327+ /* If MMX output register is specified, switch FPU mode
328+ of function exit to MMX mode. */
329+ if (reg && MMX_REG_P (reg))
330+ return FPU_MODE_MMX;
331+
332+ /* Fall back to default mode. */
333+ return FPU_MODE_X87;
334+ }
335+
336+ return I387_CW_ANY;
337+}
338+
339+/* Emit mode switching instructions in optimize_mode_switching pass. */
340
341 void
342-emit_i387_cw_initialization (int mode)
343+ix86_emit_mode_set (int entity, int mode)
344 {
345- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
346- rtx new_mode;
347+ rtx stored_mode, new_mode;
348+ rtx reg;
349
350 int slot;
351
352- rtx reg = gen_reg_rtx (HImode);
353+ if (entity == I387_FPU_MODE)
354+ {
355+ switch (mode)
356+ {
357+ case FPU_MODE_ANY:
358+ return;
359+
360+ case FPU_MODE_X87:
361+ emit_insn (gen_emms ());
362+ ix86_fpu_mode_changed = 1;
363+ return;
364+
365+ case FPU_MODE_MMX:
366+ emit_insn (gen_efpu ());
367+ ix86_fpu_mode_changed = 1;
368+ return;
369+
370+ default:
371+ gcc_unreachable ();
372+ }
373+ }
374+
375+ /* Output code to initialize control word copies used by trunc?f?i
376+ and rounding patterns. STORED_MODE is set to current control
377+ word, while NEW_MODE is set to new control word. */
378
379+ if ((mode == I387_CW_UNINITIALIZED) || (mode == I387_CW_ANY))
380+ return;
381+
382+ stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
383 emit_insn (gen_x86_fnstcw_1 (stored_mode));
384+
385+ reg = gen_reg_rtx (HImode);
386 emit_move_insn (reg, stored_mode);
387
388 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
389@@ -12279,6 +12531,7 @@ ix86_init_machine_status (void)
390
391 f = ggc_alloc_cleared (sizeof (struct machine_function));
392 f->use_fast_prologue_epilogue_nregs = -1;
393+ f->optimize_mode_switching[I387_FPU_MODE] = TARGET_80387 && TARGET_MMX;
394
395 return f;
396 }
397@@ -12877,7 +13130,77 @@ ix86_local_alignment (tree type, int ali
398 }
399 return align;
400 }
401+
402 \f
403+
404+/* Return true to prevent register allocator from allocating registers
405+ from the unit that is not active. */
406+
407+bool
408+ix86_epilogue_uses (int regno)
409+{
410+ int mode;
411+
412+ if (! ix86_fpu_mode_changed)
413+ return false;
414+
415+ mode = ix86_mode_exit (I387_FPU_MODE);
416+
417+ if (mode == FPU_MODE_MMX)
418+ return FP_REGNO_P (regno);
419+ else
420+ return MMX_REGNO_P (regno);
421+}
422+
423+/* Return RTX code of additional register that CALL_INSN uses.
424+ This function is used to maintain correct register life
425+ information before CALL_INSN in case of MMX/x87 switching. */
426+
427+rtx
428+ix86_call_insn_uses (rtx insn)
429+{
430+ int mode;
431+
432+ if (! ix86_fpu_mode_changed)
433+ return NULL_RTX;
434+
435+ gcc_assert (CALL_P (insn));
436+
437+ mode = ix86_mode_needed (I387_FPU_MODE, insn);
438+ if (mode == FPU_MODE_MMX)
439+ return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG);
440+ else
441+ return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG);
442+
443+ return NULL_RTX;
444+}
445+
446+/* Return RTX code of additional register that CALL_INSN sets.
447+ This function is used to maintain correct register life
448+ information after CALL_INSN in case of MMX/x87 switching. */
449+
450+rtx
451+ix86_call_insn_sets (rtx insn)
452+{
453+ int mode;
454+
455+ if (! ix86_fpu_mode_changed)
456+ return NULL_RTX;
457+
458+ gcc_assert (CALL_P (insn));
459+
460+ /* Current mode in call to ix86_mode_after is set to FPU_MODE_ANY
461+ to prevent setting of ix86_fpu_mode_changed variable. */
462+ mode = ix86_mode_after (I387_FPU_MODE, FPU_MODE_ANY, insn);
463+ if (mode == FPU_MODE_MMX)
464+ return gen_rtx_REG (ALLREGSmode, FIRST_FLOAT_REG);
465+ else
466+ return gen_rtx_REG (ALLREGSmode, FIRST_MMX_REG);
467+
468+ return NULL_RTX;
469+}
470+
471+
472 /* Emit RTL insns to initialize the variable parts of a trampoline.
473 FNADDR is an RTX for the address of the function's pure code.
474 CXT is an RTX for the static chain value for the function. */
475@@ -13357,9 +13680,11 @@ enum ix86_builtins
476 IX86_BUILTIN_MONITOR,
477 IX86_BUILTIN_MWAIT,
478
479+ IX86_BUILTIN_VEC_INIT_V2SF,
480 IX86_BUILTIN_VEC_INIT_V2SI,
481 IX86_BUILTIN_VEC_INIT_V4HI,
482 IX86_BUILTIN_VEC_INIT_V8QI,
483+ IX86_BUILTIN_VEC_EXT_V2SF,
484 IX86_BUILTIN_VEC_EXT_V2DF,
485 IX86_BUILTIN_VEC_EXT_V2DI,
486 IX86_BUILTIN_VEC_EXT_V4SF,
487@@ -13541,24 +13866,24 @@ static const struct builtin_description
488 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
489 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
490
491- { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
492- { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
493- { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
494- { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
495+ { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
496+ { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
497+ { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
498+ { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
499 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
500 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
501
502- { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
503- { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
504- { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
505- { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
506+ { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
507+ { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
508+ { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
509+ { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
510 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
511 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
512
513- { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
514- { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
515- { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
516- { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
517+ { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
518+ { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
519+ { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
520+ { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
521
522 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
523 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
524@@ -14323,6 +14648,11 @@ ix86_init_mmx_sse_builtins (void)
525 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
526
527 /* Access to the vec_init patterns. */
528+ ftype = build_function_type_list (V2SF_type_node, float_type_node,
529+ integer_type_node, NULL_TREE);
530+ def_builtin (MASK_3DNOW, "__builtin_ia32_vec_init_v2sf",
531+ ftype, IX86_BUILTIN_VEC_INIT_V2SF);
532+
533 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
534 integer_type_node, NULL_TREE);
535 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
536@@ -14344,6 +14674,11 @@ ix86_init_mmx_sse_builtins (void)
537 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
538
539 /* Access to the vec_extract patterns. */
540+ ftype = build_function_type_list (float_type_node, V2SF_type_node,
541+ integer_type_node, NULL_TREE);
542+ def_builtin (MASK_3DNOW, "__builtin_ia32_vec_ext_v2sf",
543+ ftype, IX86_BUILTIN_VEC_EXT_V2DF);
544+
545 ftype = build_function_type_list (double_type_node, V2DF_type_node,
546 integer_type_node, NULL_TREE);
547 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
548@@ -14818,7 +15153,7 @@ ix86_expand_builtin (tree exp, rtx targe
549 switch (fcode)
550 {
551 case IX86_BUILTIN_EMMS:
552- emit_insn (gen_mmx_emms ());
553+ /* emms insn is emitted automatically. */
554 return 0;
555
556 case IX86_BUILTIN_SFENCE:
557@@ -15035,7 +15370,7 @@ ix86_expand_builtin (tree exp, rtx targe
558 return target;
559
560 case IX86_BUILTIN_FEMMS:
561- emit_insn (gen_mmx_femms ());
562+ /* femms insn is emitted automatically. */
563 return NULL_RTX;
564
565 case IX86_BUILTIN_PAVGUSB:
566@@ -15181,11 +15516,13 @@ ix86_expand_builtin (tree exp, rtx targe
567 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
568 target, 1);
569
570+ case IX86_BUILTIN_VEC_INIT_V2SF:
571 case IX86_BUILTIN_VEC_INIT_V2SI:
572 case IX86_BUILTIN_VEC_INIT_V4HI:
573 case IX86_BUILTIN_VEC_INIT_V8QI:
574 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
575
576+ case IX86_BUILTIN_VEC_EXT_V2SF:
577 case IX86_BUILTIN_VEC_EXT_V2DF:
578 case IX86_BUILTIN_VEC_EXT_V2DI:
579 case IX86_BUILTIN_VEC_EXT_V4SF:
580--- gcc/gcc/config/i386/i386.h 2005-07-14 09:46:21.000000000 +0200
581+++ gcc/gcc/config/i386/i386.h 2005-07-18 06:14:15.000000000 +0200
582@@ -819,7 +819,9 @@ do { \
583
584 #define HARD_REGNO_NREGS(REGNO, MODE) \
585 (FP_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
586- ? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
587+ ? ((MODE) == ALLREGSmode \
588+ ? 8 \
589+ : (COMPLEX_MODE_P (MODE) ? 2 : 1)) \
590 : ((MODE) == XFmode \
591 ? (TARGET_64BIT ? 2 : 3) \
592 : (MODE) == XCmode \
593@@ -841,9 +843,8 @@ do { \
594 ((MODE) == DImode || (MODE) == V8QImode || (MODE) == V4HImode \
595 || (MODE) == V2SImode || (MODE) == SImode)
596
597-/* ??? No autovectorization into MMX or 3DNOW until we can reliably
598- place emms and femms instructions. */
599-#define UNITS_PER_SIMD_WORD (TARGET_SSE ? 16 : UNITS_PER_WORD)
600+#define UNITS_PER_SIMD_WORD \
601+ (TARGET_SSE ? 16 : TARGET_MMX ? 8 : UNITS_PER_WORD)
602
603 #define VALID_FP_MODE_P(MODE) \
604 ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode \
605@@ -1433,8 +1434,16 @@ enum reg_class
606 #define RETURN_POPS_ARGS(FUNDECL, FUNTYPE, SIZE) \
607 ix86_return_pops_args ((FUNDECL), (FUNTYPE), (SIZE))
608
609+/* 1 if N is the number of a register in which the values of
610+ called function may come back. */
611 #define FUNCTION_VALUE_REGNO_P(N) \
612- ix86_function_value_regno_p (N)
613+ ix86_function_value_regno_p ((N), false)
614+
615+/* 1 if N is the number of a register in which the value of
616+ __builtin_return builtin function may come back. */
617+
618+#define FUNCTION_VALUE_REGNO_P_APPLY_RESULT(N) \
619+ ix86_function_value_regno_p ((N), true)
620
621 /* Define how to find the value returned by a library function
622 assuming the value has mode MODE. */
623@@ -1449,7 +1458,13 @@ enum reg_class
624 #define APPLY_RESULT_SIZE (8+108)
625
626 /* 1 if N is a possible register number for function argument passing. */
627-#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p (N)
628+#define FUNCTION_ARG_REGNO_P(N) ix86_function_arg_regno_p ((N), false)
629+
630+/* 1 if N is a possible register number for function argument passing
631+ from __builtin_apply_args and __builtin_apply builtin functions. */
632+
633+#define FUNCTION_ARG_REGNO_P_APPLY_ARGS(N) \
634+ ix86_function_arg_regno_p ((N), true)
635
636 /* Define a data type for recording info about an argument list
637 during the scan of that argument list. This data type should
638@@ -1531,6 +1546,23 @@ typedef struct ix86_args {
639
640 #define EXIT_IGNORE_STACK 1
641
642+/* Define this macro as a C expression that is nonzero for registers
643+ that are used by the epilogue or the return' pattern. The stack
644+ and frame pointer registers are already be assumed to be used as
645+ needed. */
646+
647+#define EPILOGUE_USES(REGNO) ix86_epilogue_uses (REGNO)
648+
649+/* Define this macro as a C expression that returns RTL expression of
650+ additional hard register set by call_insn. */
651+
652+#define CALL_INSN_SETS(INSN) ix86_call_insn_sets (INSN)
653+
654+/* Define this macro as a C expression that returns RTL expression of
655+ additional hard register used by call_insn. */
656+
657+#define CALL_INSN_USES(INSN) ix86_call_insn_uses (INSN)
658+
659 /* Output assembler code for a block containing the constant parts
660 of a trampoline, leaving space for the variable parts. */
661
662@@ -2167,6 +2199,10 @@ extern rtx ix86_compare_op0; /* operand
663 extern rtx ix86_compare_op1; /* operand 1 for comparisons */
664 extern rtx ix86_compare_emitted;
665 \f
666+
667+/* x87 FPU modes for x87/MMX switching. */
668+enum ix86_fpu_mode { FPU_MODE_X87, FPU_MODE_MMX, FPU_MODE_ANY };
669+
670 /* To properly truncate FP values into integers, we need to set i387 control
671 word. We can't emit proper mode switching code before reload, as spills
672 generated by reload may truncate values incorrectly, but we still can avoid
673@@ -2188,6 +2224,7 @@ enum ix86_entity
674 I387_FLOOR,
675 I387_CEIL,
676 I387_MASK_PM,
677+ I387_FPU_MODE,
678 MAX_386_ENTITIES
679 };
680
681@@ -2217,7 +2254,12 @@ enum ix86_stack_slot
682 refer to the mode-switched entity in question. */
683
684 #define NUM_MODES_FOR_MODE_SWITCHING \
685- { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY }
686+ { I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, I387_CW_ANY, FPU_MODE_ANY }
687+
688+/* Define this macro if the port needs extra register life analysis
689+ after mode switching. */
690+
691+#define LIFE_ANALYSIS_AFTER_MODE_SWITCHING ix86_fpu_mode_changed
692
693 /* ENTITY is an integer specifying a mode-switched entity. If
694 `OPTIMIZE_MODE_SWITCHING' is defined, you must define this macro to
695@@ -2227,6 +2269,22 @@ enum ix86_stack_slot
696
697 #define MODE_NEEDED(ENTITY, I) ix86_mode_needed ((ENTITY), (I))
698
699+/* This macro determines the mode that an INSN results in (if different
700+ from the incoming mode). */
701+
702+#define MODE_AFTER(ENTITY, MODE, I) \
703+ ix86_mode_after ((ENTITY), (MODE), (I))
704+
705+/* This macro specifies a mode that ENTITY is assumed to be
706+ switched to at function entry. */
707+
708+#define MODE_ENTRY(ENTITY) ix86_mode_entry (ENTITY)
709+
710+/* This macro specifies a mode that ENTITY is assumed to be
711+ switched to at function exit. */
712+
713+#define MODE_EXIT(ENTITY) ix86_mode_exit (ENTITY)
714+
715 /* This macro specifies the order in which modes for ENTITY are
716 processed. 0 is the highest priority. */
717
718@@ -2236,10 +2294,8 @@ enum ix86_stack_slot
719 is the set of hard registers live at the point where the insn(s)
720 are to be inserted. */
721
722-#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
723- ((MODE) != I387_CW_ANY && (MODE) != I387_CW_UNINITIALIZED \
724- ? emit_i387_cw_initialization (MODE), 0 \
725- : 0)
726+#define EMIT_MODE_SET(ENTITY, MODE, HARD_REGS_LIVE) \
727+ ix86_emit_mode_set ((ENTITY), (MODE))
728
729 \f
730 /* Avoid renaming of stack registers, as doing so in combination with
731@@ -2263,6 +2319,7 @@ struct machine_function GTY(())
732 int save_varrargs_registers;
733 int accesses_prev_frame;
734 int optimize_mode_switching[MAX_386_ENTITIES];
735+ int fpu_mode_changed;
736 /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to
737 determine the style used. */
738 int use_fast_prologue_epilogue;
739@@ -2274,6 +2331,7 @@ struct machine_function GTY(())
740 #define ix86_stack_locals (cfun->machine->stack_locals)
741 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
742 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
743+#define ix86_fpu_mode_changed (cfun->machine->fpu_mode_changed)
744
745 /* Control behavior of x86_file_start. */
746 #define X86_FILE_START_VERSION_DIRECTIVE false
747--- gcc/gcc/config/i386/i386.md 2005-07-12 11:20:12.000000000 +0200
748+++ gcc/gcc/config/i386/i386.md 2005-07-18 06:14:15.000000000 +0200
749@@ -152,7 +152,7 @@
750 (UNSPECV_EMMS 2)
751 (UNSPECV_LDMXCSR 3)
752 (UNSPECV_STMXCSR 4)
753- (UNSPECV_FEMMS 5)
754+ (UNSPECV_EFPU 5)
755 (UNSPECV_CLFLUSH 6)
756 (UNSPECV_ALIGN 7)
757 (UNSPECV_MONITOR 8)
758@@ -167,9 +167,11 @@
759 (define_constants
760 [(BP_REG 6)
761 (SP_REG 7)
762+ (FIRSTFP_REG 8)
763 (FLAGS_REG 17)
764 (FPSR_REG 18)
765 (DIRFLAG_REG 19)
766+ (FIRSTMMX_REG 29)
767 ])
768
769 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
770--- gcc/gcc/config/i386/i386-modes.def 2005-06-25 03:21:07.000000000 +0200
771+++ gcc/gcc/config/i386/i386-modes.def 2005-07-18 06:14:15.000000000 +0200
772@@ -62,6 +62,9 @@ CC_MODE (CCZ);
773 CC_MODE (CCFP);
774 CC_MODE (CCFPU);
775
776+/* This mode is used to cover all MMX and all x87 registers. */
777+RANDOM_MODE (ALLREGS);
778+
779 /* Vector modes. */
780 VECTOR_MODES (INT, 4); /* V4QI V2HI */
781 VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
782--- gcc/gcc/config/i386/i386-protos.h 2005-07-14 09:46:16.000000000 +0200
783+++ gcc/gcc/config/i386/i386-protos.h 2005-07-18 06:14:15.000000000 +0200
784@@ -152,6 +152,9 @@ extern bool ix86_expand_fp_vcond (rtx[])
785 extern bool ix86_expand_int_vcond (rtx[]);
786 extern int ix86_expand_int_addcc (rtx[]);
787 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
788+extern bool ix86_epilogue_uses (int);
789+extern rtx ix86_call_insn_sets (rtx);
790+extern rtx ix86_call_insn_uses (rtx);
791 extern void x86_initialize_trampoline (rtx, rtx, rtx);
792 extern rtx ix86_zero_extend_to_Pmode (rtx);
793 extern void ix86_split_long_move (rtx[]);
794@@ -168,8 +171,8 @@ extern int ix86_attr_length_address_defa
795 extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
796
797 extern rtx ix86_libcall_value (enum machine_mode);
798-extern bool ix86_function_value_regno_p (int);
799-extern bool ix86_function_arg_regno_p (int);
800+extern bool ix86_function_value_regno_p (int, bool);
801+extern bool ix86_function_arg_regno_p (int, bool);
802 extern int ix86_function_arg_boundary (enum machine_mode, tree);
803 extern int ix86_return_in_memory (tree);
804 extern void ix86_va_start (tree, rtx);
805@@ -190,7 +193,10 @@ extern bool ix86_cannot_change_mode_clas
806 extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
807 extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
808 extern int ix86_mode_needed (int, rtx);
809-extern void emit_i387_cw_initialization (int);
810+extern int ix86_mode_after (int, int, rtx);
811+extern int ix86_mode_entry (int);
812+extern int ix86_mode_exit (int);
813+extern void ix86_emit_mode_set (int, int);
814 extern bool ix86_fp_jump_nontrivial_p (enum rtx_code);
815 extern void x86_order_regs_for_local_alloc (void);
816 extern void x86_function_profiler (FILE *, int);
817--- gcc/gcc/config/i386/mm3dnow.h 2005-06-25 03:21:23.000000000 +0200
818+++ gcc/gcc/config/i386/mm3dnow.h 2005-07-18 06:14:15.000000000 +0200
819@@ -172,14 +172,13 @@ _m_prefetchw (void *__P)
820 static __inline __m64
821 _m_from_float (float __A)
822 {
823- return (__m64)(__v2sf){ __A, 0 };
824+ return (__m64) __builtin_ia32_vec_init_v2sf (__A, 0);
825 }
826
827 static __inline float
828 _m_to_float (__m64 __A)
829 {
830- union { __v2sf v; float a[2]; } __tmp = { (__v2sf)__A };
831- return __tmp.a[0];
832+ return __builtin_ia32_vec_ext_v2sf ((__v2sf)__A, 0);
833 }
834
835 #ifdef __3dNOW_A__
836--- gcc/gcc/config/i386/mmx.md 2005-06-25 03:21:23.000000000 +0200
837+++ gcc/gcc/config/i386/mmx.md 2005-07-18 06:14:15.000000000 +0200
838@@ -23,14 +23,6 @@
839 ;; the same register file, and 3dNOW! adds a number of extensions to
840 ;; the base integer MMX isa.
841
842-;; Note! Except for the basic move instructions, *all* of these
843-;; patterns are outside the normal optabs namespace. This is because
844-;; use of these registers requires the insertion of emms or femms
845-;; instructions to return to normal fpu mode. The compiler doesn't
846-;; know how to do that itself, which means it's up to the user. Which
847-;; means that we should never use any of these patterns except at the
848-;; direction of the user via a builtin.
849-
850 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
851 (define_mode_macro MMXMODEI [V8QI V4HI V2SI])
852
853@@ -481,7 +473,7 @@
854 (match_operand 2 "const_int_operand" "")]
855 "TARGET_MMX"
856 {
857- ix86_expand_vector_set (false, operands[0], operands[1],
858+ ix86_expand_vector_set (true, operands[0], operands[1],
859 INTVAL (operands[2]));
860 DONE;
861 })
862@@ -537,7 +529,7 @@
863 (match_operand 2 "const_int_operand" "")]
864 "TARGET_MMX"
865 {
866- ix86_expand_vector_extract (false, operands[0], operands[1],
867+ ix86_expand_vector_extract (true, operands[0], operands[1],
868 INTVAL (operands[2]));
869 DONE;
870 })
871@@ -547,7 +539,7 @@
872 (match_operand 1 "" "")]
873 "TARGET_SSE"
874 {
875- ix86_expand_vector_init (false, operands[0], operands[1]);
876+ ix86_expand_vector_init (true, operands[0], operands[1]);
877 DONE;
878 })
879
880@@ -557,6 +549,21 @@
881 ;;
882 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
883
884+(define_expand "neg<mode>2"
885+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
886+ (minus:MMXMODEI
887+ (match_dup 2)
888+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "")))]
889+ "TARGET_MMX"
890+ "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
891+
892+(define_expand "add<mode>3"
893+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
894+ (plus:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
895+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
896+ "TARGET_MMX"
897+ "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
898+
899 (define_insn "mmx_add<mode>3"
900 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
901 (plus:MMXMODEI
902@@ -598,6 +605,13 @@
903 [(set_attr "type" "mmxadd")
904 (set_attr "mode" "DI")])
905
906+(define_expand "sub<mode>3"
907+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
908+ (minus:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "")
909+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
910+ "TARGET_MMX"
911+ "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
912+
913 (define_insn "mmx_sub<mode>3"
914 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
915 (minus:MMXMODEI
916@@ -639,6 +653,13 @@
917 [(set_attr "type" "mmxadd")
918 (set_attr "mode" "DI")])
919
920+(define_expand "mulv4hi3"
921+ [(set (match_operand:V4HI 0 "register_operand" "")
922+ (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
923+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
924+ "TARGET_MMX"
925+ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
926+
927 (define_insn "mmx_mulv4hi3"
928 [(set (match_operand:V4HI 0 "register_operand" "=y")
929 (mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
930@@ -735,6 +756,13 @@
931 [(set_attr "type" "mmxmul")
932 (set_attr "mode" "DI")])
933
934+(define_expand "umaxv8qi3"
935+ [(set (match_operand:V8QI 0 "register_operand" "")
936+ (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "")
937+ (match_operand:V8QI 2 "nonimmediate_operand" "")))]
938+ "(TARGET_SSE || TARGET_3DNOW_A)"
939+ "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);")
940+
941 (define_insn "mmx_umaxv8qi3"
942 [(set (match_operand:V8QI 0 "register_operand" "=y")
943 (umax:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
944@@ -745,6 +773,13 @@
945 [(set_attr "type" "mmxadd")
946 (set_attr "mode" "DI")])
947
948+(define_expand "smaxv4hi3"
949+ [(set (match_operand:V4HI 0 "register_operand" "")
950+ (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
951+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
952+ "(TARGET_SSE || TARGET_3DNOW_A)"
953+ "ix86_fixup_binary_operands_no_copy (SMAX, V4HImode, operands);")
954+
955 (define_insn "mmx_smaxv4hi3"
956 [(set (match_operand:V4HI 0 "register_operand" "=y")
957 (smax:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
958@@ -755,6 +790,13 @@
959 [(set_attr "type" "mmxadd")
960 (set_attr "mode" "DI")])
961
962+(define_expand "uminv8qi3"
963+ [(set (match_operand:V8QI 0 "register_operand" "")
964+ (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "")
965+ (match_operand:V8QI 2 "nonimmediate_operand" "")))]
966+ "(TARGET_SSE || TARGET_3DNOW_A)"
967+ "ix86_fixup_binary_operands_no_copy (UMAX, V8QImode, operands);")
968+
969 (define_insn "mmx_uminv8qi3"
970 [(set (match_operand:V8QI 0 "register_operand" "=y")
971 (umin:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
972@@ -765,6 +807,13 @@
973 [(set_attr "type" "mmxadd")
974 (set_attr "mode" "DI")])
975
976+(define_expand "sminv4hi3"
977+ [(set (match_operand:V4HI 0 "register_operand" "")
978+ (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
979+ (match_operand:V4HI 2 "nonimmediate_operand" "")))]
980+ "(TARGET_SSE || TARGET_3DNOW_A)"
981+ "ix86_fixup_binary_operands_no_copy (SMIN, V4HImode, operands);")
982+
983 (define_insn "mmx_sminv4hi3"
984 [(set (match_operand:V4HI 0 "register_operand" "=y")
985 (smin:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
986@@ -775,7 +824,7 @@
987 [(set_attr "type" "mmxadd")
988 (set_attr "mode" "DI")])
989
990-(define_insn "mmx_ashr<mode>3"
991+(define_insn "ashr<mode>3"
992 [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
993 (ashiftrt:MMXMODE24
994 (match_operand:MMXMODE24 1 "register_operand" "0")
995@@ -785,7 +834,7 @@
996 [(set_attr "type" "mmxshft")
997 (set_attr "mode" "DI")])
998
999-(define_insn "mmx_lshr<mode>3"
1000+(define_insn "lshr<mode>3"
1001 [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
1002 (lshiftrt:MMXMODE24
1003 (match_operand:MMXMODE24 1 "register_operand" "0")
1004@@ -806,7 +855,7 @@
1005 [(set_attr "type" "mmxshft")
1006 (set_attr "mode" "DI")])
1007
1008-(define_insn "mmx_ashl<mode>3"
1009+(define_insn "ashl<mode>3"
1010 [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
1011 (ashift:MMXMODE24
1012 (match_operand:MMXMODE24 1 "register_operand" "0")
1013@@ -853,12 +902,66 @@
1014 [(set_attr "type" "mmxcmp")
1015 (set_attr "mode" "DI")])
1016
1017+(define_expand "vcond<mode>"
1018+ [(set (match_operand:MMXMODE12 0 "register_operand" "")
1019+ (if_then_else:MMXMODE12
1020+ (match_operator 3 ""
1021+ [(match_operand:MMXMODE12 4 "nonimmediate_operand" "")
1022+ (match_operand:MMXMODE12 5 "nonimmediate_operand" "")])
1023+ (match_operand:MMXMODE12 1 "general_operand" "")
1024+ (match_operand:MMXMODE12 2 "general_operand" "")))]
1025+ "TARGET_MMX"
1026+{
1027+ if (ix86_expand_int_vcond (operands))
1028+ DONE;
1029+ else
1030+ FAIL;
1031+})
1032+
1033+(define_expand "vconduv8qi"
1034+ [(set (match_operand:V8QI 0 "register_operand" "")
1035+ (if_then_else:V8QI
1036+ (match_operator 3 ""
1037+ [(match_operand:V8QI 4 "nonimmediate_operand" "")
1038+ (match_operand:V8QI 5 "nonimmediate_operand" "")])
1039+ (match_operand:V8QI 1 "general_operand" "")
1040+ (match_operand:V8QI 2 "general_operand" "")))]
1041+ "TARGET_MMX"
1042+{
1043+ if (ix86_expand_int_vcond (operands))
1044+ DONE;
1045+ else
1046+ FAIL;
1047+})
1048+
1049 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1050 ;;
1051 ;; Parallel integral logical operations
1052 ;;
1053 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1054
1055+(define_expand "one_cmpl<mode>2"
1056+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1057+ (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1058+ (match_dup 2)))]
1059+ "TARGET_MMX"
1060+{
1061+ int i, n = GET_MODE_NUNITS (<MODE>mode);
1062+ rtvec v = rtvec_alloc (n);
1063+
1064+ for (i = 0; i < n; ++i)
1065+ RTVEC_ELT (v, i) = constm1_rtx;
1066+
1067+ operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
1068+})
1069+
1070+(define_expand "and<mode>3"
1071+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1072+ (and:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1073+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
1074+ "TARGET_MMX"
1075+ "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
1076+
1077 (define_insn "mmx_and<mode>3"
1078 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
1079 (and:MMXMODEI
1080@@ -879,6 +982,13 @@
1081 [(set_attr "type" "mmxadd")
1082 (set_attr "mode" "DI")])
1083
1084+(define_expand "ior<mode>3"
1085+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1086+ (ior:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1087+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
1088+ "TARGET_MMX"
1089+ "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
1090+
1091 (define_insn "mmx_ior<mode>3"
1092 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
1093 (ior:MMXMODEI
1094@@ -889,6 +999,13 @@
1095 [(set_attr "type" "mmxadd")
1096 (set_attr "mode" "DI")])
1097
1098+(define_expand "xor<mode>3"
1099+ [(set (match_operand:MMXMODEI 0 "register_operand" "")
1100+ (xor:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "")
1101+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "")))]
1102+ "TARGET_MMX"
1103+ "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
1104+
1105 (define_insn "mmx_xor<mode>3"
1106 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
1107 (xor:MMXMODEI
1108@@ -1147,7 +1264,7 @@
1109 (match_operand 2 "const_int_operand" "")]
1110 "TARGET_MMX"
1111 {
1112- ix86_expand_vector_set (false, operands[0], operands[1],
1113+ ix86_expand_vector_set (true, operands[0], operands[1],
1114 INTVAL (operands[2]));
1115 DONE;
1116 })
1117@@ -1205,7 +1322,7 @@
1118 (match_operand 2 "const_int_operand" "")]
1119 "TARGET_MMX"
1120 {
1121- ix86_expand_vector_extract (false, operands[0], operands[1],
1122+ ix86_expand_vector_extract (true, operands[0], operands[1],
1123 INTVAL (operands[2]));
1124 DONE;
1125 })
1126@@ -1215,7 +1332,7 @@
1127 (match_operand 1 "" "")]
1128 "TARGET_SSE"
1129 {
1130- ix86_expand_vector_init (false, operands[0], operands[1]);
1131+ ix86_expand_vector_init (true, operands[0], operands[1]);
1132 DONE;
1133 })
1134
1135@@ -1225,7 +1342,7 @@
1136 (match_operand 2 "const_int_operand" "")]
1137 "TARGET_MMX"
1138 {
1139- ix86_expand_vector_set (false, operands[0], operands[1],
1140+ ix86_expand_vector_set (true, operands[0], operands[1],
1141 INTVAL (operands[2]));
1142 DONE;
1143 })
1144@@ -1236,7 +1353,7 @@
1145 (match_operand 2 "const_int_operand" "")]
1146 "TARGET_MMX"
1147 {
1148- ix86_expand_vector_extract (false, operands[0], operands[1],
1149+ ix86_expand_vector_extract (true, operands[0], operands[1],
1150 INTVAL (operands[2]));
1151 DONE;
1152 })
1153@@ -1246,7 +1363,7 @@
1154 (match_operand 1 "" "")]
1155 "TARGET_SSE"
1156 {
1157- ix86_expand_vector_init (false, operands[0], operands[1]);
1158+ ix86_expand_vector_init (true, operands[0], operands[1]);
1159 DONE;
1160 })
1161
1162@@ -1256,7 +1373,7 @@
1163 (match_operand 2 "const_int_operand" "")]
1164 "TARGET_MMX"
1165 {
1166- ix86_expand_vector_set (false, operands[0], operands[1],
1167+ ix86_expand_vector_set (true, operands[0], operands[1],
1168 INTVAL (operands[2]));
1169 DONE;
1170 })
1171@@ -1267,7 +1384,7 @@
1172 (match_operand 2 "const_int_operand" "")]
1173 "TARGET_MMX"
1174 {
1175- ix86_expand_vector_extract (false, operands[0], operands[1],
1176+ ix86_expand_vector_extract (true, operands[0], operands[1],
1177 INTVAL (operands[2]));
1178 DONE;
1179 })
1180@@ -1277,7 +1394,7 @@
1181 (match_operand 1 "" "")]
1182 "TARGET_SSE"
1183 {
1184- ix86_expand_vector_init (false, operands[0], operands[1]);
1185+ ix86_expand_vector_init (true, operands[0], operands[1]);
1186 DONE;
1187 })
1188
1189@@ -1386,48 +1503,20 @@
1190 [(set_attr "type" "mmxcvt")
1191 (set_attr "mode" "DI")])
1192
1193-(define_insn "mmx_emms"
1194- [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
1195- (clobber (reg:XF 8))
1196- (clobber (reg:XF 9))
1197- (clobber (reg:XF 10))
1198- (clobber (reg:XF 11))
1199- (clobber (reg:XF 12))
1200- (clobber (reg:XF 13))
1201- (clobber (reg:XF 14))
1202- (clobber (reg:XF 15))
1203- (clobber (reg:DI 29))
1204- (clobber (reg:DI 30))
1205- (clobber (reg:DI 31))
1206- (clobber (reg:DI 32))
1207- (clobber (reg:DI 33))
1208- (clobber (reg:DI 34))
1209- (clobber (reg:DI 35))
1210- (clobber (reg:DI 36))]
1211- "TARGET_MMX"
1212- "emms"
1213- [(set_attr "type" "mmx")
1214- (set_attr "memory" "unknown")])
1215+(define_insn "efpu"
1216+ [(set (reg:ALLREGS FIRSTFP_REG)
1217+ (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTMMX_REG)]
1218+ UNSPECV_EFPU))]
1219+ "TARGET_80387 && TARGET_MMX"
1220+ ""
1221+ [(set_attr "length" "0")])
1222+
1223+(define_insn "emms"
1224+ [(set (reg:ALLREGS FIRSTMMX_REG)
1225+ (unspec_volatile:ALLREGS [(reg:ALLREGS FIRSTFP_REG)]
1226+ UNSPECV_EMMS))]
1227+ "TARGET_80387 && TARGET_MMX"
1228+{
1229+ return TARGET_3DNOW ? "femms" : "emms";
1230+})
1231
1232-(define_insn "mmx_femms"
1233- [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
1234- (clobber (reg:XF 8))
1235- (clobber (reg:XF 9))
1236- (clobber (reg:XF 10))
1237- (clobber (reg:XF 11))
1238- (clobber (reg:XF 12))
1239- (clobber (reg:XF 13))
1240- (clobber (reg:XF 14))
1241- (clobber (reg:XF 15))
1242- (clobber (reg:DI 29))
1243- (clobber (reg:DI 30))
1244- (clobber (reg:DI 31))
1245- (clobber (reg:DI 32))
1246- (clobber (reg:DI 33))
1247- (clobber (reg:DI 34))
1248- (clobber (reg:DI 35))
1249- (clobber (reg:DI 36))]
1250- "TARGET_3DNOW"
1251- "femms"
1252- [(set_attr "type" "mmx")
1253- (set_attr "memory" "none")])
1254--- gcc/gcc/config/i386/sse.md 2005-06-29 19:27:19.000000000 +0200
1255+++ gcc/gcc/config/i386/sse.md 2005-07-18 06:14:15.000000000 +0200
1256@@ -881,6 +881,7 @@
1257 "TARGET_SSE"
1258 "cvtpi2ps\t{%2, %0|%0, %2}"
1259 [(set_attr "type" "ssecvt")
1260+ (set_attr "unit" "mmx")
1261 (set_attr "mode" "V4SF")])
1262
1263 (define_insn "sse_cvtps2pi"
1264@@ -3508,6 +3509,7 @@
1265 movhps\t{%2, %0|%0, %2}
1266 movlps\t{%1, %0|%0, %1}"
1267 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
1268+ (set_attr "unit" "*,mmx,*,*,*,*")
1269 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
1270
1271 (define_expand "vec_setv2di"
1272--- gcc/gcc/config/sh/sh.h 2005-07-03 23:08:07.000000000 +0200
1273+++ gcc/gcc/config/sh/sh.h 2005-07-18 06:14:15.000000000 +0200
1274@@ -3301,7 +3301,7 @@ extern struct rtx_def *sp_switch;
1275 ? get_attr_fp_mode (INSN) \
1276 : FP_MODE_NONE)
1277
1278-#define MODE_AFTER(MODE, INSN) \
1279+#define MODE_AFTER(ENTITY, MODE, INSN) \
1280 (TARGET_HITACHI \
1281 && recog_memoized (INSN) >= 0 \
1282 && get_attr_fp_set (INSN) != FP_SET_NONE \
1283--- gcc/gcc/doc/tm.texi 2005-07-13 19:27:39.000000000 +0200
1284+++ gcc/gcc/doc/tm.texi 2005-07-18 06:14:15.000000000 +0200
1285@@ -4227,6 +4227,16 @@ stack adjustment in a function that has
1286 compiler knows this regardless of @code{EXIT_IGNORE_STACK}.
1287 @end defmac
1288
1289+@defmac CALL_INSN_SETS (@var{INSN})
1290+Define this macro as a C expression that returns RTL expression of
1291+additional hard register set by call_insn.
1292+@end defmac
1293+
1294+@defmac CALL_INSN_USES (@var{INSN})
1295+Define this macro as a C expression that returns RTL expression of
1296+additional hard register used by call_insn.
1297+@end defmac
1298+
1299 @defmac EPILOGUE_USES (@var{regno})
1300 Define this macro as a C expression that is nonzero for registers that are
1301 used by the epilogue or the @samp{return} pattern. The stack and frame
1302@@ -8376,6 +8386,13 @@ represented as numbers 0 @dots{} N @minu
1303 switch is needed / supplied.
1304 @end defmac
1305
1306+@defmac LIFE_ANALYSIS_AFTER_MODE_SWITCHING
1307+Define this macro if the port needs extra register life analysis after
1308+mode switching. This macro should be defined if mode switching inserts
1309+instructions that change global registers to maintain consistent global
1310+register life information.
1311+@end defmac
1312+
1313 @defmac MODE_NEEDED (@var{entity}, @var{insn})
1314 @var{entity} is an integer specifying a mode-switched entity. If
1315 @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro to
1316@@ -8384,9 +8401,9 @@ return an integer value not larger than
1317 be switched into prior to the execution of @var{insn}.
1318 @end defmac
1319
1320-@defmac MODE_AFTER (@var{mode}, @var{insn})
1321-If this macro is defined, it is evaluated for every @var{insn} during
1322-mode switching. It determines the mode that an insn results in (if
1323+@defmac MODE_AFTER (@var{entity}, @var{mode}, @var{insn})
1324+If this macro is defined, it is evaluated for every @var{entity} that needs
1325+mode switching. It determines the mode that an @var{insn} results in (if
1326 different from the incoming mode).
1327 @end defmac
1328
1329--- gcc/gcc/flow.c 2005-07-05 18:19:55.000000000 +0200
1330+++ gcc/gcc/flow.c 2005-07-18 06:14:15.000000000 +0200
1331@@ -1830,10 +1830,11 @@ propagate_one_insn (struct propagate_blo
1332 {
1333 regset live_at_end;
1334 bool sibcall_p;
1335- rtx note, cond;
1336+ rtx note;
1337+ rtx cond = NULL_RTX;
1338+ rtx reg ATTRIBUTE_UNUSED;
1339 int i;
1340
1341- cond = NULL_RTX;
1342 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
1343 cond = COND_EXEC_TEST (PATTERN (insn));
1344
1345@@ -1856,6 +1857,13 @@ propagate_one_insn (struct propagate_blo
1346 mark_set_1 (pbi, CLOBBER, XEXP (XEXP (note, 0), 0),
1347 cond, insn, pbi->flags);
1348
1349+#ifdef CALL_INSN_SETS
1350+ reg = CALL_INSN_SETS (insn);
1351+
1352+ if (reg)
1353+ mark_set_1 (pbi, SET, reg, cond, insn, pbi->flags);
1354+#endif
1355+
1356 /* Calls change all call-used and global registers; sibcalls do not
1357 clobber anything that must be preserved at end-of-function,
1358 except for return values. */
1359@@ -1894,10 +1902,11 @@ propagate_one_insn (struct propagate_blo
1360
1361 if (! insn_is_dead && CALL_P (insn))
1362 {
1363+ rtx note;
1364+ rtx cond = NULL_RTX;
1365+ rtx reg ATTRIBUTE_UNUSED;
1366 int i;
1367- rtx note, cond;
1368
1369- cond = NULL_RTX;
1370 if (GET_CODE (PATTERN (insn)) == COND_EXEC)
1371 cond = COND_EXEC_TEST (PATTERN (insn));
1372
1373@@ -1910,6 +1919,13 @@ propagate_one_insn (struct propagate_blo
1374 of which mark_used_regs knows how to handle. */
1375 mark_used_regs (pbi, XEXP (XEXP (note, 0), 0), cond, insn);
1376
1377+#ifdef CALL_INSN_USES
1378+ reg = CALL_INSN_USES (insn);
1379+
1380+ if (reg)
1381+ mark_used_reg (pbi, reg, cond, insn);
1382+#endif
1383+
1384 /* The stack ptr is used (honorarily) by a CALL insn. */
1385 if ((flags & PROP_REG_INFO)
1386 && !REGNO_REG_SET_P (pbi->reg_live, STACK_POINTER_REGNUM))
1387--- gcc/gcc/mode-switching.c 2005-07-05 18:20:07.000000000 +0200
1388+++ gcc/gcc/mode-switching.c 2005-07-18 06:14:15.000000000 +0200
1389@@ -473,7 +473,7 @@ optimize_mode_switching (FILE *file)
1390 RESET_BIT (transp[bb->index], j);
1391 }
1392 #ifdef MODE_AFTER
1393- last_mode = MODE_AFTER (last_mode, insn);
1394+ last_mode = MODE_AFTER (e, last_mode, insn);
1395 #endif
1396 /* Update LIVE_NOW. */
1397 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1398@@ -730,6 +730,14 @@ rest_of_handle_mode_switching (void)
1399 no_new_pseudos = 0;
1400 optimize_mode_switching (NULL);
1401 no_new_pseudos = 1;
1402+
1403+ /* Mode switching can insert instructions that
1404+ change global registers life data. */
1405+#ifdef LIFE_ANALYSIS_AFTER_MODE_SWITCHING
1406+ if (LIFE_ANALYSIS_AFTER_MODE_SWITCHING)
1407+ life_analysis (NULL, PROP_REG_INFO);
1408+#endif
1409+
1410 #endif /* OPTIMIZE_MODE_SWITCHING */
1411 }
1412
1413--- gcc/gcc/reg-stack.c 2005-07-14 09:39:54.000000000 +0200
1414+++ gcc/gcc/reg-stack.c 2005-07-18 06:14:15.000000000 +0200
1415@@ -1579,6 +1579,41 @@ subst_stack_regs_pat (rtx insn, stack re
1416 }
1417 break;
1418
1419+ case UNSPEC_VOLATILE:
1420+ switch (XINT (pat_src, 1))
1421+ {
1422+ int i;
1423+
1424+ case UNSPECV_EFPU:
1425+ /* There should be no stack registers live
1426+ at this point. */
1427+ gcc_assert (regstack->top == -1);
1428+
1429+ /* Mark all x87 registers as used. */
1430+ for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--)
1431+ {
1432+ regstack->reg[++regstack->top] = i;
1433+ SET_HARD_REG_BIT (regstack->reg_set, i);
1434+ }
1435+ break;
1436+
1437+ case UNSPECV_EMMS:
1438+ /* All stack registers should be alive
1439+ at this point. */
1440+ gcc_assert (regstack->top == REG_STACK_SIZE - 1);
1441+
1442+ /* Mark all x87 registers as empty. */
1443+ for (i = LAST_STACK_REG; i >= FIRST_STACK_REG; i--)
1444+ CLEAR_HARD_REG_BIT (regstack->reg_set, i);
1445+
1446+ regstack->top = -1;
1447+ break;
1448+
1449+ default:
1450+ gcc_unreachable ();
1451+ }
1452+ break;
1453+
1454 case UNSPEC:
1455 switch (XINT (pat_src, 1))
1456 {
1457@@ -2269,6 +2304,25 @@ subst_stack_regs (rtx insn, stack regsta
1458 if (NOTE_P (insn) || INSN_DELETED_P (insn))
1459 return control_flow_insn_deleted;
1460
1461+#ifdef CALL_INSN_SETS
1462+ if (CALL_P (insn))
1463+ {
1464+ rtx reg = CALL_INSN_SETS (insn);
1465+
1466+ if (reg && STACK_REG_P (reg))
1467+ {
1468+ int count;
1469+
1470+ for (count = hard_regno_nregs[REGNO (reg)][GET_MODE (reg)];
1471+ --count >= 0;)
1472+ {
1473+ regstack->reg[++regstack->top] = REGNO (reg) + count;
1474+ SET_HARD_REG_BIT (regstack->reg_set, REGNO (reg) + count);
1475+ }
1476+ }
1477+ }
1478+#endif
1479+
1480 /* If there is a REG_UNUSED note on a stack register on this insn,
1481 the indicated reg must be popped. The REG_UNUSED note is removed,
1482 since the form of the newly emitted pop insn references the reg,
1483@@ -2544,6 +2598,15 @@ convert_regs_entry (void)
1484 basic_block block = e->dest;
1485 block_info bi = BLOCK_INFO (block);
1486 int reg, top = -1;
1487+ int numregs = 0;
1488+
1489+ /* Check if all stack registers are live at function entry.
1490+ This is the case where stack registers are disabled and no
1491+ register initialization is needed. */
1492+
1493+ for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg)
1494+ if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg))
1495+ numregs++;
1496
1497 for (reg = LAST_STACK_REG; reg >= FIRST_STACK_REG; --reg)
1498 if (TEST_HARD_REG_BIT (bi->stack_in.reg_set, reg))
1499@@ -2552,11 +2615,14 @@ convert_regs_entry (void)
1500
1501 bi->stack_in.reg[++top] = reg;
1502
1503- init = gen_rtx_SET (VOIDmode,
1504- FP_MODE_REG (FIRST_STACK_REG, SFmode),
1505- not_a_num);
1506- insert_insn_on_edge (init, e);
1507- inserted = 1;
1508+ if (numregs != REG_STACK_SIZE)
1509+ {
1510+ init = gen_rtx_SET (VOIDmode,
1511+ FP_MODE_REG (FIRST_STACK_REG, SFmode),
1512+ not_a_num);
1513+ insert_insn_on_edge (init, e);
1514+ inserted = 1;
1515+ }
1516 }
1517
1518 bi->stack_in.top = top;
1519@@ -2575,13 +2641,34 @@ convert_regs_exit (void)
1520 stack output_stack;
1521 rtx retvalue;
1522
1523- retvalue = stack_result (current_function_decl);
1524 value_reg_low = value_reg_high = -1;
1525- if (retvalue)
1526+
1527+#ifdef EPILOGUE_USES
1528+ {
1529+ int numregs = 0;
1530+ int i;
1531+
1532+ for (i = FIRST_STACK_REG; i < LAST_STACK_REG + 1; i++)
1533+ if (EPILOGUE_USES (i))
1534+ numregs++;
1535+
1536+ if (numregs)
1537+ {
1538+ value_reg_low = FIRST_STACK_REG;
1539+ value_reg_high = value_reg_low + numregs - 1;
1540+ }
1541+ }
1542+#endif
1543+
1544+ if (value_reg_low < 0)
1545 {
1546- value_reg_low = REGNO (retvalue);
1547- value_reg_high = value_reg_low
1548- + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1;
1549+ retvalue = stack_result (current_function_decl);
1550+ if (retvalue)
1551+ {
1552+ value_reg_low = REGNO (retvalue);
1553+ value_reg_high = value_reg_low
1554+ + hard_regno_nregs[value_reg_low][GET_MODE (retvalue)] - 1;
1555+ }
1556 }
1557
1558 output_stack = &BLOCK_INFO (EXIT_BLOCK_PTR)->stack_in;
This page took 0.197626 seconds and 4 git commands to generate.