Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/doc/invoke.texi (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -584,7 +584,7 @@ -m96bit-long-double -mregparm=@var{num} -msseregparm @gol -mveclibabi=@var{type} -mpc32 -mpc64 -mpc80 -mstackrealign @gol -momit-leaf-frame-pointer -mno-red-zone -mno-tls-direct-seg-refs @gol --mcmodel=@var{code-model} @gol +-mcmodel=@var{code-model} -mabi=@var{name} @gol -m32 -m64 -mlarge-data-threshold=@var{num} @gol -mfused-madd -mno-fused-madd -msse2avx} @@ -10959,6 +10959,9 @@ @item core2 Intel Core2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 instruction set support. +@item atom +Intel Atom CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3 and SSSE3 +instruction set support. @item k6 AMD K6 CPU with MMX instruction set support. @item k6-2, k6-3 @@ -11394,6 +11397,16 @@ @option{-funsafe-math-optimizations} have to be enabled. A SVML or ACML ABI compatible library will have to be specified at link time. +@item -mabi=@var{name} +@opindex mabi +Generate code for the specified calling convention. Permissible values +are: @samp{sysv} for the ABI used on GNU/Linux and other systems and +@samp{ms} for the Microsoft ABI. The default is to use the Microsoft +ABI when targeting Windows. On all other systems, the default is the +SYSV ABI. You can control this behavior for a specific function by +using the function attribute @samp{ms_abi}/@samp{sysv_abi}. +@xref{Function Attributes}. + @item -mpush-args @itemx -mno-push-args @opindex mpush-args Index: gcc/doc/md.texi =================================================================== --- gcc/doc/md.texi (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/doc/md.texi (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -7506,6 +7506,11 @@ recognize complicated bypasses, e.g.@: when the consumer is only an address of insn @samp{store} (not a stored value). +If there are more one bypass with the same output and input insns, the +chosen bypass is the first bypass with a guard in description whose +guard function returns nonzero. If there is no such bypass, then +bypass without the guard function is chosen. + @findex exclusion_set @findex presence_set @findex final_presence_set Index: gcc/genautomata.c =================================================================== --- gcc/genautomata.c (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/genautomata.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -1,5 +1,5 @@ /* Pipeline hazard description translator. - Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008 + Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. Written by Vladimir Makarov @@ -22,21 +22,25 @@ /* References: - 1. Detecting pipeline structural hazards quickly. T. Proebsting, + 1. The finite state automaton based pipeline hazard recognizer and + instruction scheduler in GCC. V. Makarov. Proceedings of GCC + summit, 2003. + + 2. Detecting pipeline structural hazards quickly. T. Proebsting, C. Fraser. Proceedings of ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages, pages 280--286, 1994. This article is a good start point to understand usage of finite state automata for pipeline hazard recognizers. But I'd - recommend the 2nd article for more deep understanding. + recommend the 1st and 3rd article for more deep understanding. - 2. Efficient Instruction Scheduling Using Finite State Automata: + 3. Efficient Instruction Scheduling Using Finite State Automata: V. Bala and N. Rubin, Proceedings of MICRO-28. This is the best article about usage of finite state automata for pipeline hazard recognizers. - The current implementation is different from the 2nd article in the - following: + The current implementation is described in the 1st article and it + is different from the 3rd article in the following: 1. New operator `|' (alternative) is permitted in functional unit reservation which can be treated deterministically and @@ -463,7 +467,10 @@ insn. */ int insn_num; /* The following field value is list of bypasses in which given insn - is output insn. */ + is output insn. Bypasses with the same input insn stay one after + another in the list in the same order as their occurrences in the + description but the bypass without a guard stays always the last + in a row of bypasses with the same input insn. */ struct bypass_decl *bypass_list; /* The following fields are defined by automaton generator. */ @@ -2367,18 +2374,67 @@ } -/* The function searches for bypass with given IN_INSN_RESERV in given - BYPASS_LIST. */ -static struct bypass_decl * -find_bypass (struct bypass_decl *bypass_list, - struct insn_reserv_decl *in_insn_reserv) +/* The function inserts BYPASS in the list of bypasses of the + corresponding output insn. The order of bypasses in the list is + decribed in a comment for member `bypass_list' (see above). If + there is already the same bypass in the list the function reports + this and does nothing. */ +static void +insert_bypass (struct bypass_decl *bypass) { - struct bypass_decl *bypass; - - for (bypass = bypass_list; bypass != NULL; bypass = bypass->next) - if (bypass->in_insn_reserv == in_insn_reserv) - break; - return bypass; + struct bypass_decl *curr, *last; + struct insn_reserv_decl *out_insn_reserv = bypass->out_insn_reserv; + struct insn_reserv_decl *in_insn_reserv = bypass->in_insn_reserv; + + for (curr = out_insn_reserv->bypass_list, last = NULL; + curr != NULL; + last = curr, curr = curr->next) + if (curr->in_insn_reserv == in_insn_reserv) + { + if ((bypass->bypass_guard_name != NULL + && curr->bypass_guard_name != NULL + && ! strcmp (bypass->bypass_guard_name, curr->bypass_guard_name)) + || bypass->bypass_guard_name == curr->bypass_guard_name) + { + if (bypass->bypass_guard_name == NULL) + { + if (!w_flag) + error ("the same bypass `%s - %s' is already defined", + bypass->out_insn_name, bypass->in_insn_name); + else + warning (0, "the same bypass `%s - %s' is already defined", + bypass->out_insn_name, bypass->in_insn_name); + } + else if (!w_flag) + error ("the same bypass `%s - %s' (guard %s) is already defined", + bypass->out_insn_name, bypass->in_insn_name, + bypass->bypass_guard_name); + else + warning + (0, "the same bypass `%s - %s' (guard %s) is already defined", + bypass->out_insn_name, bypass->in_insn_name, + bypass->bypass_guard_name); + return; + } + if (curr->bypass_guard_name == NULL) + break; + if (curr->next == NULL || curr->next->in_insn_reserv != in_insn_reserv) + { + last = curr; + break; + } + + } + if (last == NULL) + { + bypass->next = out_insn_reserv->bypass_list; + out_insn_reserv->bypass_list = bypass; + } + else + { + bypass->next = last->next; + last->next = bypass; + } } /* The function processes pipeline description declarations, checks @@ -2391,7 +2447,6 @@ decl_t decl_in_table; decl_t out_insn_reserv; decl_t in_insn_reserv; - struct bypass_decl *bypass; int automaton_presence; int i; @@ -2514,36 +2569,7 @@ = DECL_INSN_RESERV (out_insn_reserv); DECL_BYPASS (decl)->in_insn_reserv = DECL_INSN_RESERV (in_insn_reserv); - bypass - = find_bypass (DECL_INSN_RESERV (out_insn_reserv)->bypass_list, - DECL_BYPASS (decl)->in_insn_reserv); - if (bypass != NULL) - { - if (DECL_BYPASS (decl)->latency == bypass->latency) - { - if (!w_flag) - error - ("the same bypass `%s - %s' is already defined", - DECL_BYPASS (decl)->out_insn_name, - DECL_BYPASS (decl)->in_insn_name); - else - warning - (0, "the same bypass `%s - %s' is already defined", - DECL_BYPASS (decl)->out_insn_name, - DECL_BYPASS (decl)->in_insn_name); - } - else - error ("bypass `%s - %s' is already defined", - DECL_BYPASS (decl)->out_insn_name, - DECL_BYPASS (decl)->in_insn_name); - } - else - { - DECL_BYPASS (decl)->next - = DECL_INSN_RESERV (out_insn_reserv)->bypass_list; - DECL_INSN_RESERV (out_insn_reserv)->bypass_list - = DECL_BYPASS (decl); - } + insert_bypass (DECL_BYPASS (decl)); } } } @@ -8159,19 +8185,32 @@ (advance_cycle_insn_decl)->insn_num)); fprintf (output_file, " case %d:\n", bypass->in_insn_reserv->insn_num); - if (bypass->bypass_guard_name == NULL) - fprintf (output_file, " return %d;\n", - bypass->latency); - else + for (;;) { - fprintf (output_file, - " if (%s (%s, %s))\n", - bypass->bypass_guard_name, INSN_PARAMETER_NAME, - INSN2_PARAMETER_NAME); - fprintf (output_file, - " return %d;\n break;\n", - bypass->latency); + if (bypass->bypass_guard_name == NULL) + { + gcc_assert (bypass->next == NULL + || (bypass->in_insn_reserv + != bypass->next->in_insn_reserv)); + fprintf (output_file, " return %d;\n", + bypass->latency); + } + else + { + fprintf (output_file, + " if (%s (%s, %s))\n", + bypass->bypass_guard_name, INSN_PARAMETER_NAME, + INSN2_PARAMETER_NAME); + fprintf (output_file, " return %d;\n", + bypass->latency); + } + if (bypass->next == NULL + || bypass->in_insn_reserv != bypass->next->in_insn_reserv) + break; + bypass = bypass->next; } + if (bypass->bypass_guard_name != NULL) + fprintf (output_file, " break;\n"); } fputs (" }\n break;\n", output_file); } Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4a.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4a.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4a.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,24 @@ +/* Test for cross x86_64<->w64 abi va_list calls. */ +/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ +/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ +/* { dg-additional-sources "vaarg-4b.c" } */ + +extern __SIZE_TYPE__ __attribute__ ((sysv_abi)) strlen (const char *); +extern int __attribute__ ((sysv_abi)) sprintf (char *,const char *, ...); +extern void __attribute__ ((sysv_abi)) abort (void); + +extern void do_cpy (char *, ...); + +int __attribute__ ((sysv_abi)) +main () +{ + char s[256]; + + do_cpy (s, "1","2","3","4", "5", "6", "7", ""); + + if (s[0] != '1' || s[1] !='2' || s[2] != '3' || s[3] != '4' + || s[4] != '5' || s[5] != '6' || s[6] != '7' || s[7] != 0) + abort (); + + return 0; +} Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4b.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4b.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-4b.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,31 @@ +/* Test for cross x86_64<->w64 abi va_list calls. */ +/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ + +#include + +extern __SIZE_TYPE__ __attribute__ ((sysv_abi)) strlen (const char *); +extern int __attribute__ ((sysv_abi)) sprintf (char *, const char *, ...); + +static void +vdo_cpy (char *s, va_list argp) +{ + __SIZE_TYPE__ len; + char *r = s; + char *e; + *r = 0; + for (;;) { + e = va_arg (argp, char *); + if (*e == 0) break; + sprintf (r,"%s", e); + r += strlen (r); + } +} + +void +do_cpy (char *s, ...) +{ + va_list argp; + va_start (argp, s); + vdo_cpy (s, argp); + va_end (argp); +} Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5a.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5a.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5a.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,17 @@ +/* Test for cross x86_64<->w64 abi va_list calls. */ +/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ +/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ +/* { dg-additional-sources "vaarg-5b.c" } */ + +extern void __attribute__ ((sysv_abi)) abort (void); +extern int fct2 (int, ...); + +#define SZ_ARGS 1ll,2ll,3ll,4ll,5ll,6ll,7ll,0ll + +int __attribute__ ((sysv_abi)) +main() +{ + if (fct2 (-1, SZ_ARGS) != 0) + abort (); + return 0; +} Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5b.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5b.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/vaarg-5b.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,37 @@ +/* Test for cross x86_64<->w64 abi va_list calls. */ +/* { dg-options "-O2 -mabi=ms -std=gnu99 -fno-builtin" } */ + +#include + +#define SZ_ARGS 1ll,2ll,3ll,4ll,5ll,6ll,7ll,0ll + +static int __attribute__ ((sysv_abi)) +fct1 (va_list argp, ...) +{ + long long p1,p2; + int ret = 1; + __builtin_sysv_va_list argp_2; + + __builtin_sysv_va_start (argp_2, argp); + do { + p1 = va_arg (argp_2, long long); + p2 = va_arg (argp, long long); + if (p1 != p2) + ret = 0; + } while (ret && p1 != 0); + __builtin_sysv_va_end (argp_2); + + return ret; +} + +int +fct2 (int dummy, ...) +{ + va_list argp; + int ret = dummy; + + va_start (argp, dummy); + ret += fct1 (argp, SZ_ARGS); + va_end (argp); + return ret; +} Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2a.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2a.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2a.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,17 @@ +/* Test for cross x86_64<->w64 abi standard calls via variable. */ +/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ +/* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ +/* { dg-additional-sources "func-indirect-2b.c" } */ + +extern void __attribute__ ((sysv_abi)) abort (void); +typedef int (*func)(void *, char *, char *, short, long long); +extern func get_callback (void); + +int __attribute__ ((sysv_abi)) +main () +{ + func callme = get_callback (); + if (callme (0, 0, 0, 0x1234, 0x1234567890abcdefLL)) + abort (); + return 0; +} Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2a.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2a.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2a.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,27 @@ +/* Test for cross x86_64<->w64 abi standard calls. */ +/* { dg-do run { target i?86-*-linux* x86_64-*-linux* } } */ +/* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ +/* { dg-additional-sources "func-2b.c" } */ + +extern void __attribute__ ((sysv_abi)) abort (void); +long double func_cross (long double, double, float, long, int, char); + +long double __attribute__ ((sysv_abi)) +func_native (long double a, double b, float c, long d, int e, char f) +{ + long double ret; + ret = a + (long double) b + (long double) c; + ret *= (long double) (d + (long) e); + if (f>0) + ret += func_native (a,b,c,d,e,-f); + return ret; +} + +int __attribute__ ((sysv_abi)) +main () +{ + if (func_cross (1.0,2.0,3.0,1,2,3) + != func_native (1.0,2.0,3.0,1,2,3)) + abort (); + return 0; +} Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2b.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2b.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/func-indirect-2b.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,24 @@ +/* Test for cross x86_64<->w64 abi standard calls via variable. */ +/* { dg-options "-O2 -mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ + +typedef int (*func)(void *, char *, char *, short, long long); + +static int +callback (void *ptr, char *string1, char *string2, short number, + long long rand) +{ + if (ptr != 0 + || string1 != 0 + || string2 != 0 + || number != 0x1234 + || rand != 0x1234567890abcdefLL) + return 1; + else + return 0; +} + +func +get_callback (void) +{ + return callback; +} Index: gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2b.c =================================================================== --- gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2b.c (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/gcc.target/x86_64/abi/callabi/func-2b.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,13 @@ +/* Test for cross x86_64<->w64 abi standard calls. */ +/* { dg-options "-mabi=ms -std=gnu99 -ffast-math -fno-builtin" } */ + +long double func_cross (long double a, double b, float c, long d, int e, + char f) +{ + long double ret; + ret = a + (long double) b + (long double) c; + ret *= (long double) (d + (long) e); + if (f>0) + ret += func_cross (a,b,c,d,e,-f); + return ret; +} Property changes on: gcc/testsuite/gcc.target/x86_64/abi/callabi ___________________________________________________________________ Deleted: svn:mergeinfo Index: gcc/testsuite/ChangeLog.ix86 =================================================================== --- gcc/testsuite/ChangeLog.ix86 (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/testsuite/ChangeLog.ix86 (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,14 @@ +2009-03-27 H.J. Lu + + Backport from mainline: + 2009-03-27 H.J. Lu + + PR target/39472 + * gcc.target/x86_64/abi/callabi/func-2a.c: New. + * gcc.target/x86_64/abi/callabi/func-2b.c: Likewise. + * gcc.target/x86_64/abi/callabi/func-indirect-2a.c: Likewise. + * gcc.target/x86_64/abi/callabi/func-indirect-2b.c: Likewise. + * gcc.target/x86_64/abi/callabi/vaarg-4a.c: Likewise. + * gcc.target/x86_64/abi/callabi/vaarg-4b.c: Likewise. + * gcc.target/x86_64/abi/callabi/vaarg-5a.c: Likewise. + * gcc.target/x86_64/abi/callabi/vaarg-5b.c: Likewise. Property changes on: gcc/testsuite/gcc.dg/torture/pr36227.c ___________________________________________________________________ Deleted: svn:mergeinfo Property changes on: gcc/testsuite/g++.dg/cpp0x/decltype-38655.C ___________________________________________________________________ Deleted: svn:mergeinfo Property changes on: gcc/testsuite/ChangeLog-2008 ___________________________________________________________________ Deleted: svn:mergeinfo Property changes on: gcc/cp/ChangeLog-2007 ___________________________________________________________________ Deleted: svn:mergeinfo Property changes on: gcc/cp/ChangeLog-2008 ___________________________________________________________________ Deleted: svn:mergeinfo Index: gcc/rtl.def =================================================================== --- gcc/rtl.def (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/rtl.def (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -1088,7 +1088,11 @@ guard for the bypass. The function will get the two insns as parameters. If the function returns zero the bypass will be ignored for this case. Additional guard is necessary to recognize - complicated bypasses, e.g. when consumer is load address. */ + complicated bypasses, e.g. when consumer is load address. If there + are more one bypass with the same output and input insns, the + chosen bypass is the first bypass with a guard in description whose + guard function returns nonzero. If there is no such bypass, then + bypass without the guard function is chosen. */ DEF_RTL_EXPR(DEFINE_BYPASS, "define_bypass", "issS", RTX_EXTRA) /* (define_automaton string) describes names of automata generated and Property changes on: gcc/ChangeLog-2008 ___________________________________________________________________ Deleted: svn:mergeinfo Index: gcc/ChangeLog.ix86 =================================================================== --- gcc/ChangeLog.ix86 (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/ChangeLog.ix86 (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,171 @@ +2009-04-20 H.J. Lu + + Backport from mainline: + 2009-04-20 Joey Ye + Xuepeng Guo + H.J. Lu + + * config/i386/atom.md: Add bypasses with ix86_dep_by_shift_count. + + * config/i386/i386.c (LEA_SEARCH_THRESHOLD): New macro. + (IX86_LEA_PRIORITY): Likewise. + (distance_non_agu_define): New function. + (distance_agu_use): Likewise. + (ix86_lea_for_add_ok): Likewise. + (ix86_dep_by_shift_count): Likewise. + + * config/i386/i386.md: Call ix86_lea_for_add_ok to decide we + should split for LEA. + + * config/i386/i386-protos.h (ix86_lea_for_add_ok): Declare new + function. + (ix86_dep_by_shift_count): Likewise. + +2009-04-07 H.J. Lu + + Backport from mainline: + 2009-04-07 H.J. Lu + + * doc/invoke.texi: Document Atom support. + +2009-04-06 H.J. Lu + + * config/i386/i386.md: Revert 2 accidental checkins. + +2009-04-06 H.J. Lu + + Backport from mainline: + 2009-04-06 Joey Ye + Xuepeng Guo + H.J. Lu + + Atom pipeline model, tuning and insn selection. + * config.gcc (atom): Add atom config options and target. + + * config/i386/atom.md: New. + + * config/i386/i386.c (atom_cost): New cost. + (m_ATOM): New macro flag. + (initial_ix86_tune_features): Set m_ATOM. + (x86_accumulate_outgoing_args): Likewise. + (x86_arch_always_fancy_math_387): Likewise. + (processor_target): Add Atom cost. + (cpu_names): Add Atom cpu name. + (override_options): Set Atom ISA. + (ix86_issue_rate): New case PROCESSOR_ATOM. + (ix86_adjust_cost): Likewise. + + * config/i386/i386.h (TARGET_ATOM): New target macro. + (ix86_tune_indices): Add X86_TUNE_OPT_AGU. + (TARGET_OPT_AGU): New target option. + (target_cpu_default): Add TARGET_CPU_DEFAULT_atom. + (processor_type): Add PROCESSOR_ATOM. + + * config/i386/i386.md (cpu): Add new value "atom". + (use_carry, movu): New attr. + (atom.md): Include atom.md. + (adddi3_carry_rex64): Set attr "use_carry". + (addqi3_carry): Likewise. + (addhi3_carry): Likewise. + (addsi3_carry): Likewise. + (*addsi3_carry_zext): Likewise. + (subdi3_carry_rex64): Likewise. + (subqi3_carry): Likewise. + (subhi3_carry): Likewise. + (subsi3_carry): Likewise. + (x86_movdicc_0_m1_rex64): Likewise. + (*x86_movdicc_0_m1_se): Likewise. + (x86_movsicc_0_m1): Likewise. + (*x86_movsicc_0_m1_se): Likewise. + (*adddi_1_rex64): Emit add insn as much as possible. + (*addsi_1): Likewise. + (return_internal): Set atom_unit. + (return_internal_long): Likewise. + (return_pop_internal): Likewise. + (*rcpsf2_sse): Set atom_sse_attr attr. + (*qrt2_sse): Likewise. + +2009-04-02 H.J. Lu + + Backport from mainline: + 2009-04-02 H.J. Lu + + * config/i386/i386.c (ix86_abi): Move initialization to ... + (override_options): Here. + +2009-03-29 H.J. Lu + + Backport from mainline: + 2009-03-29 H.J. Lu + + * config/i386/i386-protos.h (ix86_agi_dependent): New. + + * config/i386/i386.c (ix86_agi_dependent): Rewrite. + (ix86_adjust_cost): Updated. + +2009-03-27 H.J. Lu + + Backport from mainline: + 2009-03-27 H.J. Lu + + PR target/39472 + * config/i386/i386.c (ix86_abi): New. + (override_options): Handle -mabi=. + (ix86_function_arg_regno_p): Replace DEFAULT_ABI with + ix86_abi. + (ix86_call_abi_override): Likewise. + (init_cumulative_args): Likewise. + (function_arg_advance): Likewise. + (function_arg_64): Likewise. + (function_arg): Likewise. + (ix86_pass_by_reference): Likewise. + (ix86_function_value_regno_p): Likewise. + (ix86_build_builtin_va_list_abi): Likewise. + (setup_incoming_varargs_64): Likewise. + (is_va_list_char_pointer): Likewise. + (ix86_init_machine_status): Likewise. + (ix86_reg_parm_stack_space): Use enum calling_abi on + call_abi. + (ix86_function_type_abi): Return enum calling_abi. Rewrite + for 64bit. Replace DEFAULT_ABI with ix86_abi. + (ix86_function_abi): Make it static and return enum + calling_abi. + (ix86_cfun_abi): Return enum calling_abi. Replace DEFAULT_ABI + with ix86_abi. + (ix86_fn_abi_va_list): Updated. + + * config/i386/i386.h (ix86_abi): New. + (STACK_BOUNDARY): Replace DEFAULT_ABI with ix86_abi. + (CONDITIONAL_REGISTER_USAGE): Likewise. + (CUMULATIVE_ARGS): Change call_abi type to enum calling_abi. + (machine_function): Likewise. + + * config/i386/i386.md (untyped_call): Replace DEFAULT_ABI + with ix86_abi. + * config/i386/cygming.h (TARGET_64BIT_MS_ABI): Likewise. + (STACK_BOUNDARY): Likewise. + * config/i386/mingw32.h (EXTRA_OS_CPP_BUILTINS): Likewise. + + * config/i386/i386.opt (mabi=): New. + + * config/i386/i386-protos.h (ix86_cfun_abi): Changed to + return enum calling_abi. + (ix86_function_type_abi): Likewise. + (ix86_function_abi): Removed. + +2009-03-27 H.J. Lu + + Backport from mainline: + 2009-03-27 Vladimir Makarov + + * genautomata.c: Add a new year to the copyright. Add a new + reference. + (struct insn_reserv_decl): Add comments for member bypass_list. + (find_bypass): Remove. + (insert_bypass): New. + (process_decls): Use insert_bypass. + (output_internal_insn_latency_func): Output all bypasses with the + same input insn in one switch case. + + * rtl.def (define_bypass): Describe bypass choice. + * doc/md.texi (define_bypass): Ditto. Index: gcc/config.gcc =================================================================== --- gcc/config.gcc (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config.gcc (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -1088,7 +1088,7 @@ tmake_file="${tmake_file} i386/t-linux64" need_64bit_hwint=yes case X"${with_cpu}" in - Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx) + Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx) ;; X) if test x$with_cpu_64 = x; then @@ -1097,7 +1097,7 @@ ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2 + echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2 exit 1 ;; esac @@ -1202,7 +1202,7 @@ # libgcc/configure.ac instead. need_64bit_hwint=yes case X"${with_cpu}" in - Xgeneric|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx) + Xgeneric|Xatom|Xcore2|Xnocona|Xx86-64|Xamdfam10|Xbarcelona|Xk8|Xopteron|Xathlon64|Xathlon-fx) ;; X) if test x$with_cpu_64 = x; then @@ -1211,7 +1211,7 @@ ;; *) echo "Unsupported CPU used in --with-cpu=$with_cpu, supported values:" 1>&2 - echo "generic core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2 + echo "generic atom core2 nocona x86-64 amdfam10 barcelona k8 opteron athlon64 athlon-fx" 1>&2 exit 1 ;; esac @@ -2805,7 +2805,7 @@ esac # OK ;; - "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic) + "" | amdfam10 | barcelona | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | atom | generic) # OK ;; *) Index: gcc/config/i386/i386.h =================================================================== --- gcc/config/i386/i386.h (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/i386.h (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -236,6 +236,7 @@ #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64) #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64) #define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10) +#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM) /* Feature tests against the various tunings. */ enum ix86_tune_indices { @@ -300,6 +301,7 @@ X86_TUNE_USE_VECTOR_FP_CONVERTS, X86_TUNE_USE_VECTOR_CONVERTS, X86_TUNE_FUSE_CMP_AND_BRANCH, + X86_TUNE_OPT_AGU, X86_TUNE_LAST }; @@ -387,6 +389,7 @@ ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS] #define TARGET_FUSE_CMP_AND_BRANCH \ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH] +#define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { @@ -470,7 +473,10 @@ MS_ABI = 1 }; -/* The default abi form used by target. */ +/* The abi used by target. */ +extern enum calling_abi ix86_abi; + +/* The default abi used by target. */ #define DEFAULT_ABI SYSV_ABI /* Subtargets may reset this to 1 in order to enable 96-bit long double @@ -569,6 +575,7 @@ TARGET_CPU_DEFAULT_prescott, TARGET_CPU_DEFAULT_nocona, TARGET_CPU_DEFAULT_core2, + TARGET_CPU_DEFAULT_atom, TARGET_CPU_DEFAULT_geode, TARGET_CPU_DEFAULT_k6, @@ -658,7 +665,7 @@ /* Boundary (in *bits*) on which stack pointer should be aligned. */ #define STACK_BOUNDARY \ - (TARGET_64BIT && DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD) + (TARGET_64BIT && ix86_abi == MS_ABI ? 128 : BITS_PER_WORD) /* Stack boundary of the main function guaranteed by OS. */ #define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32) @@ -954,7 +961,7 @@ fixed_regs[j] = call_used_regs[j] = 1; \ if (TARGET_64BIT \ && ((cfun && cfun->machine->call_abi == MS_ABI) \ - || (!cfun && DEFAULT_ABI == MS_ABI))) \ + || (!cfun && ix86_abi == MS_ABI))) \ { \ call_used_regs[SI_REG] = 0; \ call_used_regs[DI_REG] = 0; \ @@ -1614,7 +1621,7 @@ int maybe_vaarg; /* true for calls to possibly vardic fncts. */ int float_in_sse; /* 1 if in 32-bit mode SFmode (2 for DFmode) should be passed in SSE registers. Otherwise 0. */ - int call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise + enum calling_abi call_abi; /* Set to SYSV_ABI for sysv abi. Otherwise MS_ABI for ms abi. */ } CUMULATIVE_ARGS; @@ -2260,6 +2267,7 @@ PROCESSOR_GENERIC32, PROCESSOR_GENERIC64, PROCESSOR_AMDFAM10, + PROCESSOR_ATOM, PROCESSOR_max }; @@ -2433,7 +2441,7 @@ int tls_descriptor_call_expanded_p; /* This value is used for amd64 targets and specifies the current abi to be used. MS_ABI means ms abi. Otherwise SYSV_ABI means sysv abi. */ - int call_abi; + enum calling_abi call_abi; }; #define ix86_stack_locals (cfun->machine->stack_locals) Index: gcc/config/i386/cygming.h =================================================================== --- gcc/config/i386/cygming.h (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/cygming.h (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -34,7 +34,7 @@ #endif #undef TARGET_64BIT_MS_ABI -#define TARGET_64BIT_MS_ABI (!cfun ? DEFAULT_ABI == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI) +#define TARGET_64BIT_MS_ABI (!cfun ? ix86_abi == MS_ABI : TARGET_64BIT && cfun->machine->call_abi == MS_ABI) #undef DEFAULT_ABI #define DEFAULT_ABI (TARGET_64BIT ? MS_ABI : SYSV_ABI) @@ -202,7 +202,7 @@ #define CHECK_STACK_LIMIT 4000 #undef STACK_BOUNDARY -#define STACK_BOUNDARY (DEFAULT_ABI == MS_ABI ? 128 : BITS_PER_WORD) +#define STACK_BOUNDARY (ix86_abi == MS_ABI ? 128 : BITS_PER_WORD) /* By default, target has a 80387, uses IEEE compatible arithmetic, returns float values in the 387 and needs stack probes. Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/i386.md (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -316,7 +316,7 @@ ;; Processor type. -(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2, +(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom, generic64,amdfam10" (const (symbol_ref "ix86_schedule"))) @@ -612,6 +612,12 @@ (define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any" (const_string "any")) +;; Define attribute to classify add/sub insns that consumes carry flag (CF) +(define_attr "use_carry" "0,1" (const_string "0")) + +;; Define attribute to indicate unaligned ssemov insns +(define_attr "movu" "0,1" (const_string "0")) + ;; Describe a user's asm statement. (define_asm_attributes [(set_attr "length" "128") @@ -727,6 +733,7 @@ (include "k6.md") (include "athlon.md") (include "geode.md") +(include "atom.md") ;; Operand and operator predicates and constraints @@ -5790,6 +5797,7 @@ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" "adc{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "DI")]) @@ -5864,6 +5872,7 @@ "ix86_binary_operator_ok (PLUS, QImode, operands)" "adc{b}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "QI")]) @@ -5876,6 +5885,7 @@ "ix86_binary_operator_ok (PLUS, HImode, operands)" "adc{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "HI")]) @@ -5888,6 +5898,7 @@ "ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -5901,6 +5912,7 @@ "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)" "adc{l}\t{%2, %k0|%k0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -6130,9 +6142,9 @@ (set_attr "mode" "SI")]) (define_insn "*adddi_1_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r") - (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:DI 2 "x86_64_general_operand" "rme,re,le"))) + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:DI 2 "x86_64_general_operand" "rme,re,0,le"))) (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)" { @@ -6153,6 +6165,10 @@ } default: + /* Use add as much as possible to replace lea for AGU optimization. */ + if (which_alternative == 2 && TARGET_OPT_AGU) + return "add{q}\t{%1, %0|%0, %1}"; + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6171,8 +6187,11 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "2") + (cond [(and (eq_attr "alternative" "2") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) (const_string "lea") + (eq_attr "alternative" "3") + (const_string "lea") ; Current assemblers are broken and do not allow @GOTOFF in ; ought but a memory context. (match_operand:DI 2 "pic_symbolic_operand" "") @@ -6189,8 +6208,8 @@ (plus:DI (match_operand:DI 1 "register_operand" "") (match_operand:DI 2 "x86_64_nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT && reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + "TARGET_64BIT && reload_completed + && ix86_lea_for_add_ok (PLUS, insn, operands)" [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))] @@ -6394,9 +6413,9 @@ (define_insn "*addsi_1" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r") - (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r") - (match_operand:SI 2 "general_operand" "g,ri,li"))) + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r,r") + (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r,r") + (match_operand:SI 2 "general_operand" "g,ri,0,li"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (PLUS, SImode, operands)" { @@ -6417,6 +6436,10 @@ } default: + /* Use add as much as possible to replace lea for AGU optimization. */ + if (which_alternative == 2 && TARGET_OPT_AGU) + return "add{l}\t{%1, %0|%0, %1}"; + gcc_assert (rtx_equal_p (operands[0], operands[1])); /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'. @@ -6433,7 +6456,10 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "2") + (cond [(and (eq_attr "alternative" "2") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) + (const_string "lea") + (eq_attr "alternative" "3") (const_string "lea") ; Current assemblers are broken and do not allow @GOTOFF in ; ought but a memory context. @@ -6451,8 +6477,7 @@ (plus (match_operand 1 "register_operand" "") (match_operand 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "reload_completed - && true_regnum (operands[0]) != true_regnum (operands[1])" + "reload_completed && ix86_lea_for_add_ok (PLUS, insn, operands)" [(const_int 0)] { rtx pat; @@ -7553,6 +7578,7 @@ "TARGET_64BIT && ix86_binary_operator_ok (MINUS, DImode, operands)" "sbb{q}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "DI")]) @@ -7601,6 +7627,7 @@ "ix86_binary_operator_ok (MINUS, QImode, operands)" "sbb{b}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "QI")]) @@ -7613,6 +7640,7 @@ "ix86_binary_operator_ok (MINUS, HImode, operands)" "sbb{w}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "HI")]) @@ -7625,6 +7653,7 @@ "ix86_binary_operator_ok (MINUS, SImode, operands)" "sbb{l}\t{%2, %0|%0, %2}" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "mode" "SI")]) @@ -15164,7 +15193,7 @@ ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL), operands[0], const0_rtx, GEN_INT ((TARGET_64BIT - ? (DEFAULT_ABI == SYSV_ABI + ? (ix86_abi == SYSV_ABI ? X86_64_SSE_REGPARM_MAX : X64_SSE_REGPARM_MAX) : X86_32_SSE_REGPARM_MAX) @@ -15244,6 +15273,7 @@ "reload_completed" "ret" [(set_attr "length" "1") + (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) @@ -15256,6 +15286,7 @@ "reload_completed" "rep\;ret" [(set_attr "length" "1") + (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "0") (set_attr "prefix_rep" "1") (set_attr "modrm" "0")]) @@ -15266,6 +15297,7 @@ "reload_completed" "ret\t%0" [(set_attr "length" "3") + (set_attr "atom_unit" "jeu") (set_attr "length_immediate" "2") (set_attr "modrm" "0")]) @@ -16387,6 +16419,7 @@ "TARGET_SSE_MATH" "%vrcpss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF")]) @@ -16738,6 +16771,7 @@ "TARGET_SSE_MATH" "%vrsqrtss\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SF")]) @@ -16758,6 +16792,7 @@ "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH" "%vsqrts\t{%1, %d0|%d0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "") (set_attr "athlon_decode" "*") @@ -19811,6 +19846,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -19826,6 +19862,7 @@ "" "sbb{q}\t%0, %0" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -19869,6 +19906,7 @@ ; Since we don't have the proper number of operands for an alu insn, ; fill in all the blanks. [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -19884,6 +19922,7 @@ "" "sbb{l}\t%0, %0" [(set_attr "type" "alu") + (set_attr "use_carry" "1") (set_attr "pent_pair" "pu") (set_attr "memory" "none") (set_attr "imm_disp" "false") @@ -20216,7 +20255,8 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "0") + (cond [(and (eq_attr "alternative" "0") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) (const_string "alu") (match_operand:SI 2 "const0_operand" "") (const_string "imov") @@ -20259,7 +20299,8 @@ } } [(set (attr "type") - (cond [(eq_attr "alternative" "0") + (cond [(and (eq_attr "alternative" "0") + (eq (symbol_ref "TARGET_OPT_AGU") (const_int 0))) (const_string "alu") (match_operand:DI 2 "const0_operand" "") (const_string "imov") @@ -21751,6 +21792,7 @@ return patterns[locality]; } [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "prefetch") (set_attr "memory" "none")]) (define_insn "*prefetch_sse_rex" @@ -21769,6 +21811,7 @@ return patterns[locality]; } [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "prefetch") (set_attr "memory" "none")]) (define_insn "*prefetch_3dnow" Index: gcc/config/i386/atom.md =================================================================== --- gcc/config/i386/atom.md (.../tags/gcc_4_4_0_release) (revision 0) +++ gcc/config/i386/atom.md (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -0,0 +1,795 @@ +;; Atom Scheduling +;; Copyright (C) 2009 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; Atom is an in-order core with two integer pipelines. + + +(define_attr "atom_unit" "sishuf,simul,jeu,complex,other" + (const_string "other")) + +(define_attr "atom_sse_attr" "rcp,movdup,lfence,fence,prefetch,sqrt,mxcsr,other" + (const_string "other")) + +(define_automaton "atom") + +;; Atom has two ports: port 0 and port 1 connecting to all execution units +(define_cpu_unit "atom-port-0,atom-port-1" "atom") + +;; EU: Execution Unit +;; Atom EUs are connected by port 0 or port 1. + +(define_cpu_unit "atom-eu-0, atom-eu-1, + atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4" + "atom") + +;; Some EUs have duplicated copied and can be accessed via either +;; port 0 or port 1 +;; (define_reservation "atom-port-either" "(atom-port-0 | atom-port-1)") + +;;; Some instructions is dual-pipe execution, need both ports +;;; Complex multi-op macro-instructoins need both ports and all EUs +(define_reservation "atom-port-dual" "(atom-port-0 + atom-port-1)") +(define_reservation "atom-all-eu" "(atom-eu-0 + atom-eu-1 + + atom-imul-1 + atom-imul-2 + atom-imul-3 + + atom-imul-4)") + +;;; Most of simple instructions have 1 cycle latency. Some of them +;;; issue in port 0, some in port 0 and some in either port. +(define_reservation "atom-simple-0" "(atom-port-0 + atom-eu-0)") +(define_reservation "atom-simple-1" "(atom-port-1 + atom-eu-1)") +(define_reservation "atom-simple-either" "(atom-simple-0 | atom-simple-1)") + +;;; Some insn issues in port 0 with 3 cycle latency and 1 cycle tput +(define_reservation "atom-eu-0-3-1" "(atom-port-0 + atom-eu-0, nothing*2)") + +;;; fmul insn can have 4 or 5 cycles latency +(define_reservation "atom-fmul-5c" "(atom-port-0 + atom-eu-0), nothing*4") +(define_reservation "atom-fmul-4c" "(atom-port-0 + atom-eu-0), nothing*3") + +;;; fadd can has 5 cycles latency depends on instruction forms +(define_reservation "atom-fadd-5c" "(atom-port-1 + atom-eu-1), nothing*5") + +;;; imul insn has 5 cycles latency +(define_reservation "atom-imul-32" + "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4, + atom-port-0") +;;; imul instruction excludes other non-FP instructions. +(exclusion_set "atom-eu-0, atom-eu-1" + "atom-imul-1, atom-imul-2, atom-imul-3, atom-imul-4") + +;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on +;;; instruction forms +(define_reservation "atom-dual-1c" "(atom-port-dual + atom-eu-0 + atom-eu-1)") +(define_reservation "atom-dual-2c" + "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing)") +(define_reservation "atom-dual-5c" + "(atom-port-dual + atom-eu-0 + atom-eu-1, nothing*4)") + +;;; Complex macro-instruction has variants of latency, and uses both ports. +(define_reservation "atom-complex" "(atom-port-dual + atom-all-eu)") + +(define_insn_reservation "atom_other" 9 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "!jeu"))) + "atom-complex, atom-all-eu*8") + +;; return has type "other" with atom_unit "jeu" +(define_insn_reservation "atom_other_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "other") + (eq_attr "atom_unit" "jeu"))) + "atom-dual-1c") + +(define_insn_reservation "atom_multi" 9 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "multi")) + "atom-complex, atom-all-eu*8") + +;; Normal alu insns without carry +(define_insn_reservation "atom_alu" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "0")))) + "atom-simple-either") + +;; Normal alu insns without carry +(define_insn_reservation "atom_alu_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "0")))) + "atom-simple-either") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "atom_alu_carry" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "none") + (eq_attr "use_carry" "1")))) + "atom-simple-either") + +;; Alu insn consuming CF, such as add/sbb +(define_insn_reservation "atom_alu_carry_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu") + (and (eq_attr "memory" "!none") + (eq_attr "use_carry" "1")))) + "atom-simple-either") + +(define_insn_reservation "atom_alu1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_alu1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "alu1") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_negnot" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_negnot_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "negnot") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_imov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imov") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_imov_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imov") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; 16<-16, 32<-32 +(define_insn_reservation "atom_imovx" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "atom-simple-either") + +;; 16<-16, 32<-32, mem +(define_insn_reservation "atom_imovx_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (and (match_operand:HI 0 "register_operand") + (match_operand:HI 1 "general_operand")) + (and (match_operand:SI 0 "register_operand") + (match_operand:SI 1 "general_operand")))))) + "atom-simple-either") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8 +(define_insn_reservation "atom_imovx_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "atom-simple-0") + +;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem +(define_insn_reservation "atom_imovx_2_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (eq_attr "memory" "!none") + (ior (match_operand:QI 0 "register_operand") + (ior (and (match_operand:SI 0 "register_operand") + (not (match_operand:SI 1 "general_operand"))) + (match_operand:DI 0 "register_operand")))))) + "atom-simple-0") + +;; 16<-8 +(define_insn_reservation "atom_imovx_3" 3 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imovx") + (and (match_operand:HI 0 "register_operand") + (match_operand:QI 1 "general_operand")))) + "atom-complex, atom-all-eu*2") + +(define_insn_reservation "atom_lea" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "lea") + (eq_attr "mode" "!HI"))) + "atom-simple-either") + +;; lea 16bit address is complex insn +(define_insn_reservation "atom_lea_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "lea") + (eq_attr "mode" "HI"))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_incdec" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_incdec_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "incdec") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; simple shift instruction use SHIFT eu, none memory +(define_insn_reservation "atom_ishift" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))) + "atom-simple-0") + +;; simple shift instruction use SHIFT eu, memory +(define_insn_reservation "atom_ishift_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift") + (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0")))) + "atom-simple-0") + +;; DF shift (prefixed with 0f) is complex insn with latency of 7 cycles +(define_insn_reservation "atom_ishift_3" 7 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift") + (eq_attr "prefix_0f" "1"))) + "atom-complex, atom-all-eu*6") + +(define_insn_reservation "atom_ishift1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_ishift1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ishift1") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_rotate1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "rotate1") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +(define_insn_reservation "atom_imul" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "none") (eq_attr "mode" "SI")))) + "atom-imul-32") + +(define_insn_reservation "atom_imul_mem" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imul") + (and (eq_attr "memory" "!none") (eq_attr "mode" "SI")))) + "atom-imul-32") + +;; latency set to 10 as common 64x64 imul +(define_insn_reservation "atom_imul_3" 10 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "imul") + (eq_attr "mode" "!SI"))) + "atom-complex, atom-all-eu*9") + +(define_insn_reservation "atom_idiv" 65 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "idiv")) + "atom-complex, atom-all-eu*32, nothing*32") + +(define_insn_reservation "atom_icmp" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_icmp_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmp") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_test" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "test") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_test_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "test") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_ibr" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "!load"))) + "atom-simple-1") + +;; complex if jump target is from address +(define_insn_reservation "atom_ibr_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ibr") + (eq_attr "memory" "load"))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_setcc" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "!store"))) + "atom-simple-either") + +;; 2 cycles complex if target is in memory +(define_insn_reservation "atom_setcc_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "setcc") + (eq_attr "memory" "store"))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_icmov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_icmov_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "icmov") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; UCODE if segreg, ignored +(define_insn_reservation "atom_push" 2 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "push")) + "atom-dual-2c") + +;; pop r64 is 1 cycle. UCODE if segreg, ignored +(define_insn_reservation "atom_pop" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "pop") + (eq_attr "mode" "DI"))) + "atom-dual-1c") + +;; pop non-r64 is 2 cycles. UCODE if segreg, ignored +(define_insn_reservation "atom_pop_2" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "pop") + (eq_attr "mode" "!DI"))) + "atom-dual-2c") + +;; UCODE if segreg, ignored +(define_insn_reservation "atom_call" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "call")) + "atom-dual-1c") + +(define_insn_reservation "atom_callv" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "callv")) + "atom-dual-1c") + +(define_insn_reservation "atom_leave" 3 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "leave")) + "atom-complex, atom-all-eu*2") + +(define_insn_reservation "atom_str" 3 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "str")) + "atom-complex, atom-all-eu*2") + +(define_insn_reservation "atom_sselog" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "none"))) + "atom-simple-either") + +(define_insn_reservation "atom_sselog_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +(define_insn_reservation "atom_sselog1" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "none"))) + "atom-simple-0") + +(define_insn_reservation "atom_sselog1_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sselog1") + (eq_attr "memory" "!none"))) + "atom-simple-0") + +;; not pmad, not psad +(define_insn_reservation "atom_sseiadd" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "!simul") + (eq_attr "atom_unit" "!complex"))))) + "atom-simple-either") + +;; pmad, psad and 64 +(define_insn_reservation "atom_sseiadd_2" 4 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "DI"))))) + "atom-fmul-4c") + +;; pmad, psad and 128 +(define_insn_reservation "atom_sseiadd_3" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (and (not (match_operand:V2DI 0 "register_operand")) + (and (eq_attr "atom_unit" "simul" ) + (eq_attr "mode" "TI"))))) + "atom-fmul-5c") + +;; if paddq(64 bit op), phadd/phsub +(define_insn_reservation "atom_sseiadd_4" 6 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseiadd") + (ior (match_operand:V2DI 0 "register_operand") + (eq_attr "atom_unit" "complex")))) + "atom-complex, atom-all-eu*5") + +;; if immediate op. +(define_insn_reservation "atom_sseishft" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "!sishuf") + (match_operand 2 "immediate_operand")))) + "atom-simple-either") + +;; if palignr or psrldq +(define_insn_reservation "atom_sseishft_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseishft") + (and (eq_attr "atom_unit" "sishuf") + (match_operand 2 "immediate_operand")))) + "atom-simple-0") + +;; if reg/mem op +(define_insn_reservation "atom_sseishft_3" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseishft") + (not (match_operand 2 "immediate_operand")))) + "atom-complex, atom-all-eu") + +(define_insn_reservation "atom_sseimul" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "sseimul")) + "atom-simple-0") + +;; rcpss or rsqrtss +(define_insn_reservation "atom_sse" 4 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF")))) + "atom-fmul-4c") + +;; movshdup, movsldup. Suggest to type sseishft +(define_insn_reservation "atom_sse_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "movdup"))) + "atom-simple-0") + +;; lfence +(define_insn_reservation "atom_sse_3" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (eq_attr "atom_sse_attr" "lfence"))) + "atom-simple-either") + +;; sfence,clflush,mfence, prefetch +(define_insn_reservation "atom_sse_4" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (ior (eq_attr "atom_sse_attr" "fence") + (eq_attr "atom_sse_attr" "prefetch")))) + "atom-simple-0") + +;; rcpps, rsqrtss, sqrt, ldmxcsr +(define_insn_reservation "atom_sse_5" 7 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sse") + (ior (ior (eq_attr "atom_sse_attr" "sqrt") + (eq_attr "atom_sse_attr" "mxcsr")) + (and (eq_attr "atom_sse_attr" "rcp") + (eq_attr "mode" "V4SF"))))) + "atom-complex, atom-all-eu*6") + +;; xmm->xmm +(define_insn_reservation "atom_ssemov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "xy")))) + "atom-simple-either") + +;; reg->xmm +(define_insn_reservation "atom_ssemov_2" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "xy") (match_operand 1 "register_operand" "r")))) + "atom-simple-0") + +;; xmm->reg +(define_insn_reservation "atom_ssemov_3" 3 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (match_operand 0 "register_operand" "r") (match_operand 1 "register_operand" "xy")))) + "atom-eu-0-3-1") + +;; mov mem +(define_insn_reservation "atom_ssemov_4" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (and (eq_attr "movu" "0") (eq_attr "memory" "!none")))) + "atom-simple-0") + +;; movu mem +(define_insn_reservation "atom_ssemov_5" 2 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemov") + (ior (eq_attr "movu" "1") (eq_attr "memory" "!none")))) + "atom-complex, atom-all-eu") + +;; no memory simple +(define_insn_reservation "atom_sseadd" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "atom-fadd-5c") + +;; memory simple +(define_insn_reservation "atom_sseadd_mem" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseadd") + (and (eq_attr "memory" "!none") + (and (eq_attr "mode" "!V2DF") + (eq_attr "atom_unit" "!complex"))))) + "atom-dual-5c") + +;; maxps, minps, *pd, hadd, hsub +(define_insn_reservation "atom_sseadd_3" 8 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseadd") + (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex")))) + "atom-complex, atom-all-eu*7") + +;; Except dppd/dpps +(define_insn_reservation "atom_ssemul" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "!SF"))) + "atom-fmul-5c") + +;; Except dppd/dpps, 4 cycle if mulss +(define_insn_reservation "atom_ssemul_2" 4 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssemul") + (eq_attr "mode" "SF"))) + "atom-fmul-4c") + +(define_insn_reservation "atom_ssecmp" 1 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "ssecmp")) + "atom-simple-either") + +(define_insn_reservation "atom_ssecomi" 10 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "ssecomi")) + "atom-complex, atom-all-eu*9") + +;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "atom_ssecvt" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "register_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "register_operand"))))) + "atom-fadd-5c") + +;; memory and cvtpi2ps, cvtps2pi, cvttps2pi +(define_insn_reservation "atom_ssecvt_2" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssecvt") + (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "memory_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "memory_operand"))))) + "atom-dual-5c") + +;; otherwise. 7 cycles average for cvtss2sd +(define_insn_reservation "atom_ssecvt_3" 7 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "ssecvt") + (not (ior (and (match_operand:V2SI 0 "register_operand") + (match_operand:V4SF 1 "nonimmediate_operand")) + (and (match_operand:V4SF 0 "register_operand") + (match_operand:V2SI 1 "nonimmediate_operand")))))) + "atom-complex, atom-all-eu*6") + +;; memory and cvtsi2sd +(define_insn_reservation "atom_sseicvt" 5 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseicvt") + (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "memory_operand")))) + "atom-dual-5c") + +;; otherwise. 8 cycles average for cvtsd2si +(define_insn_reservation "atom_sseicvt_2" 8 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "sseicvt") + (not (and (match_operand:V2DF 0 "register_operand") + (match_operand:SI 1 "memory_operand"))))) + "atom-complex, atom-all-eu*7") + +(define_insn_reservation "atom_ssediv" 62 + (and (eq_attr "cpu" "atom") + (eq_attr "type" "ssediv")) + "atom-complex, atom-all-eu*12, nothing*49") + +;; simple for fmov +(define_insn_reservation "atom_fmov" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "none"))) + "atom-simple-either") + +;; simple for fmov +(define_insn_reservation "atom_fmov_mem" 1 + (and (eq_attr "cpu" "atom") + (and (eq_attr "type" "fmov") + (eq_attr "memory" "!none"))) + "atom-simple-either") + +;; Define bypass here + +;; There will be no stall from lea to non-mem EX insns +(define_bypass 0 "atom_lea" + "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec, atom_setcc, atom_icmov, atom_pop") + +(define_bypass 0 "atom_lea" + "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "!ix86_agi_dependent") + +;; There will be 3 cycles stall from EX insns to AGAN insns LEA +(define_bypass 4 "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec,atom_ishift,atom_ishift1,atom_rotate, + atom_rotate1, atom_setcc, atom_icmov, atom_pop, + atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "atom_lea") + +;; There will be 3 cycles stall from EX insns to insns need addr calculation +(define_bypass 4 "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec,atom_ishift,atom_ishift1,atom_rotate, + atom_rotate1, atom_setcc, atom_icmov, atom_pop, + atom_imovx_mem, atom_imovx_2_mem, + atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_negnot_mem, atom_imov_mem, atom_incdec_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imul_mem, atom_icmp_mem, + atom_test_mem, atom_icmov_mem, atom_sselog_mem, + atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_agi_dependent") + +;; Stall from imul to lea is 8 cycles. +(define_bypass 9 "atom_imul, atom_imul_mem" "atom_lea") + +;; Stall from imul to memory address is 8 cycles. +(define_bypass 9 "atom_imul, atom_imul_mem" + "atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_negnot_mem, atom_imov_mem, atom_incdec_mem, + atom_ishift_mem, atom_ishift1_mem, atom_rotate_mem, + atom_rotate1_mem, atom_imul_mem, atom_icmp_mem, + atom_test_mem, atom_icmov_mem, atom_sselog_mem, + atom_sselog1_mem, atom_fmov_mem, atom_sseadd_mem" + "ix86_agi_dependent") + +;; There will be 0 cycle stall from cmp/test to jcc + +;; There will be 1 cycle stall from flag producer to cmov and adc/sbb +(define_bypass 2 "atom_icmp, atom_test, atom_alu, atom_alu_carry, + atom_alu1, atom_negnot, atom_incdec, atom_ishift, + atom_ishift1, atom_rotate, atom_rotate1" + "atom_icmov, atom_alu_carry") + +;; lea to shift count stall is 2 cycles +(define_bypass 3 "atom_lea" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_dep_by_shift_count") + +;; lea to shift source stall is 1 cycle +(define_bypass 2 "atom_lea" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1" + "!ix86_dep_by_shift_count") + +;; non-lea to shift count stall is 1 cycle +(define_bypass 2 "atom_alu_carry, + atom_alu,atom_alu1,atom_negnot,atom_imov,atom_imovx, + atom_incdec,atom_ishift,atom_ishift1,atom_rotate, + atom_rotate1, atom_setcc, atom_icmov, atom_pop, + atom_alu_mem, atom_alu_carry_mem, atom_alu1_mem, + atom_imovx_mem, atom_imovx_2_mem, + atom_imov_mem, atom_icmov_mem, atom_fmov_mem" + "atom_ishift, atom_ishift1, atom_rotate, atom_rotate1, + atom_ishift_mem, atom_ishift1_mem, + atom_rotate_mem, atom_rotate1_mem" + "ix86_dep_by_shift_count") Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/sse.md (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -338,6 +338,7 @@ && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "vmovup\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -363,6 +364,7 @@ && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "movup\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "mode" "")]) (define_insn "avx_movdqu" @@ -373,6 +375,7 @@ "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "vmovdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -383,6 +386,7 @@ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "movdqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -424,7 +428,7 @@ UNSPEC_MOVNT))] "TARGET_SSE2" "movntdq\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -434,7 +438,7 @@ UNSPEC_MOVNT))] "TARGET_SSE2" "movnti\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "mode" "V2DF")]) (define_insn "avx_lddqu" @@ -445,6 +449,7 @@ "TARGET_AVX" "vlddqu\t{%1, %0|%0, %1}" [(set_attr "type" "ssecvt") + (set_attr "movu" "1") (set_attr "prefix" "vex") (set_attr "mode" "")]) @@ -454,7 +459,8 @@ UNSPEC_LDDQU))] "TARGET_SSE3" "lddqu\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") + (set_attr "movu" "1") (set_attr "prefix_rep" "1") (set_attr "mode" "TI")]) @@ -761,6 +767,7 @@ "TARGET_SSE" "%vrcpps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V4SF")]) @@ -787,6 +794,7 @@ "TARGET_SSE" "rcpss\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "rcp") (set_attr "mode" "SF")]) (define_expand "sqrtv8sf2" @@ -832,6 +840,7 @@ "TARGET_SSE" "%vsqrtps\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") (set_attr "prefix" "maybe_vex") (set_attr "mode" "V4SF")]) @@ -876,6 +885,7 @@ "SSE_VEC_FLOAT_MODE_P (mode)" "sqrts\t{%1, %0|%0, %1}" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "sqrt") (set_attr "mode" "")]) (define_expand "rsqrtv8sf2" @@ -1039,7 +1049,7 @@ (const_int 1)))] "SSE_VEC_FLOAT_MODE_P (mode)" "s\t{%2, %0|%0, %2}" - [(set_attr "type" "sse") + [(set_attr "type" "sseadd") (set_attr "mode" "")]) ;; These versions of the min/max patterns implement exactly the operations @@ -1175,6 +1185,7 @@ "TARGET_SSE3" "addsubpd\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") + (set_attr "atom_unit" "complex") (set_attr "mode" "V2DF")]) (define_insn "avx_hv4df3" @@ -1298,6 +1309,7 @@ "TARGET_SSE3" "hps\t{%2, %0|%0, %2}" [(set_attr "type" "sseadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_rep" "1") (set_attr "mode" "V4SF")]) @@ -5066,6 +5078,7 @@ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" "pmaddwd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -7025,6 +7038,7 @@ movq\t{%H1, %0|%0, %H1} mov{q}\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov,sseishft,ssemov,imov") + (set_attr "atom_unit" "*,sishuf,*,*") (set_attr "memory" "*,none,*,*") (set_attr "mode" "V2SF,TI,TI,DI")]) @@ -7057,6 +7071,7 @@ psrldq\t{$8, %0|%0, 8} movq\t{%H1, %0|%0, %H1}" [(set_attr "type" "ssemov,sseishft,ssemov") + (set_attr "atom_unit" "*,sishuf,*") (set_attr "memory" "*,none,*") (set_attr "mode" "V2SF,TI,TI")]) @@ -7614,6 +7629,7 @@ "TARGET_SSE2" "psadbw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1") (set_attr "mode" "TI")]) @@ -7635,7 +7651,7 @@ UNSPEC_MOVMSK))] "SSE_VEC_FLOAT_MODE_P (mode)" "%vmovmskp\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") (set_attr "mode" "")]) @@ -7645,7 +7661,7 @@ UNSPEC_MOVMSK))] "TARGET_SSE2" "%vpmovmskb\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "SI")]) @@ -7668,7 +7684,7 @@ "TARGET_SSE2 && !TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "%vmaskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -7682,7 +7698,7 @@ "TARGET_SSE2 && TARGET_64BIT" ;; @@@ check ordering of operands in intel/nonintel syntax "%vmaskmovdqu\t{%2, %1|%1, %2}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_data16" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) @@ -7693,6 +7709,7 @@ "TARGET_SSE" "%vldmxcsr\t%0" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "mxcsr") (set_attr "prefix" "maybe_vex") (set_attr "memory" "load")]) @@ -7702,6 +7719,7 @@ "TARGET_SSE" "%vstmxcsr\t%0" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "mxcsr") (set_attr "prefix" "maybe_vex") (set_attr "memory" "store")]) @@ -7720,6 +7738,7 @@ "TARGET_SSE || TARGET_3DNOW_A" "sfence" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) (define_insn "sse2_clflush" @@ -7728,6 +7747,7 @@ "TARGET_SSE2" "clflush\t%a0" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) (define_expand "sse2_mfence" @@ -7745,6 +7765,7 @@ "TARGET_64BIT || TARGET_SSE2" "mfence" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "fence") (set_attr "memory" "unknown")]) (define_expand "sse2_lfence" @@ -7762,6 +7783,7 @@ "TARGET_SSE2" "lfence" [(set_attr "type" "sse") + (set_attr "atom_sse_attr" "lfence") (set_attr "memory" "unknown")]) (define_insn "sse3_mwait" @@ -7885,6 +7907,7 @@ "TARGET_SSSE3" "phaddw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -7913,6 +7936,7 @@ "TARGET_SSSE3" "phaddw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -7967,6 +7991,7 @@ "TARGET_SSSE3" "phaddd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -7987,6 +8012,7 @@ "TARGET_SSSE3" "phaddd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -8073,6 +8099,7 @@ "TARGET_SSSE3" "phaddsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8101,6 +8128,7 @@ "TARGET_SSSE3" "phaddsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -8187,6 +8215,7 @@ "TARGET_SSSE3" "phsubw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8215,6 +8244,7 @@ "TARGET_SSSE3" "phsubw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -8269,6 +8299,7 @@ "TARGET_SSSE3" "phsubd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8289,6 +8320,7 @@ "TARGET_SSSE3" "phsubd\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -8375,6 +8407,7 @@ "TARGET_SSSE3" "phsubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8403,6 +8436,7 @@ "TARGET_SSSE3" "phsubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "complex") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -8509,6 +8543,7 @@ "TARGET_SSSE3" "pmaddubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "simul") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8547,6 +8582,7 @@ "TARGET_SSSE3" "pmaddubsw\t{%2, %0|%0, %2}" [(set_attr "type" "sseiadd") + (set_attr "atom_unit" "simul") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -8754,6 +8790,7 @@ return "palignr\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sseishft") + (set_attr "atom_unit" "sishuf") (set_attr "prefix_data16" "1") (set_attr "prefix_extra" "1") (set_attr "mode" "TI")]) @@ -8770,6 +8807,7 @@ return "palignr\t{%3, %2, %0|%0, %2, %3}"; } [(set_attr "type" "sseishft") + (set_attr "atom_unit" "sishuf") (set_attr "prefix_extra" "1") (set_attr "mode" "DI")]) @@ -8956,7 +8994,7 @@ UNSPEC_MOVNTDQA))] "TARGET_SSE4_1" "%vmovntdqa\t{%1, %0|%0, %1}" - [(set_attr "type" "ssecvt") + [(set_attr "type" "ssemov") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) Index: gcc/config/i386/i386.opt =================================================================== --- gcc/config/i386/i386.opt (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/i386.opt (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -228,6 +228,10 @@ Target RejectNegative Joined Var(ix86_tune_string) Schedule code for given CPU +mabi= +Target RejectNegative Joined Var(ix86_abi_string) +Generate code that conforms to the given ABI + mveclibabi= Target RejectNegative Joined Var(ix86_veclibabi_string) Vector library ABI to use Index: gcc/config/i386/i386-c.c =================================================================== --- gcc/config/i386/i386-c.c (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/i386-c.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -119,6 +119,10 @@ def_or_undef (parse_in, "__core2"); def_or_undef (parse_in, "__core2__"); break; + case PROCESSOR_ATOM: + def_or_undef (parse_in, "__atom"); + def_or_undef (parse_in, "__atom__"); + break; /* use PROCESSOR_max to not set/unset the arch macro. */ case PROCESSOR_max: break; @@ -187,6 +191,9 @@ case PROCESSOR_CORE2: def_or_undef (parse_in, "__tune_core2__"); break; + case PROCESSOR_ATOM: + def_or_undef (parse_in, "__tune_atom__"); + break; case PROCESSOR_GENERIC32: case PROCESSOR_GENERIC64: break; Index: gcc/config/i386/mingw32.h =================================================================== --- gcc/config/i386/mingw32.h (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/mingw32.h (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -38,7 +38,7 @@ builtin_define_std ("WINNT"); \ builtin_define_with_int_value ("_INTEGRAL_MAX_BITS", \ TYPE_PRECISION (intmax_type_node));\ - if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) \ + if (TARGET_64BIT && ix86_abi == MS_ABI) \ { \ builtin_define ("__MINGW64__"); \ builtin_define_std ("WIN64"); \ Index: gcc/config/i386/i386-protos.h =================================================================== --- gcc/config/i386/i386-protos.h (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/i386-protos.h (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -85,6 +85,9 @@ extern void ix86_expand_binary_operator (enum rtx_code, enum machine_mode, rtx[]); extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]); +extern bool ix86_lea_for_add_ok (enum rtx_code, rtx, rtx[]); +extern bool ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn); +extern bool ix86_agi_dependent (rtx set_insn, rtx use_insn); extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode, rtx[]); extern rtx ix86_build_const_vector (enum machine_mode, bool, rtx); @@ -139,9 +142,8 @@ extern bool ix86_sol10_return_in_memory (const_tree,const_tree); extern rtx ix86_force_to_memory (enum machine_mode, rtx); extern void ix86_free_from_memory (enum machine_mode); -extern int ix86_cfun_abi (void); -extern int ix86_function_abi (const_tree); -extern int ix86_function_type_abi (const_tree); +extern enum calling_abi ix86_cfun_abi (void); +extern enum calling_abi ix86_function_type_abi (const_tree); extern void ix86_call_abi_override (const_tree); extern tree ix86_fn_abi_va_list (tree); extern tree ix86_canonical_va_list_type (tree); Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (.../tags/gcc_4_4_0_release) (revision 146537) +++ gcc/config/i386/i386.c (.../branches/ix86/gcc-4_4-branch) (revision 146537) @@ -1036,6 +1036,79 @@ 1, /* cond_not_taken_branch_cost. */ }; +static const +struct processor_costs atom_cost = { + COSTS_N_INSNS (1), /* cost of an add instruction */ + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ + COSTS_N_INSNS (1), /* variable shift costs */ + COSTS_N_INSNS (1), /* constant shift costs */ + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ + COSTS_N_INSNS (4), /* HI */ + COSTS_N_INSNS (3), /* SI */ + COSTS_N_INSNS (4), /* DI */ + COSTS_N_INSNS (2)}, /* other */ + 0, /* cost of multiply per each bit set */ + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ + COSTS_N_INSNS (26), /* HI */ + COSTS_N_INSNS (42), /* SI */ + COSTS_N_INSNS (74), /* DI */ + COSTS_N_INSNS (74)}, /* other */ + COSTS_N_INSNS (1), /* cost of movsx */ + COSTS_N_INSNS (1), /* cost of movzx */ + 8, /* "large" insn */ + 17, /* MOVE_RATIO */ + 2, /* cost for loading QImode using movzbl */ + {4, 4, 4}, /* cost of loading integer registers + in QImode, HImode and SImode. + Relative to reg-reg move (2). */ + {4, 4, 4}, /* cost of storing integer registers */ + 4, /* cost of reg,reg fld/fst */ + {12, 12, 12}, /* cost of loading fp registers + in SFmode, DFmode and XFmode */ + {6, 6, 8}, /* cost of storing fp registers + in SFmode, DFmode and XFmode */ + 2, /* cost of moving MMX register */ + {8, 8}, /* cost of loading MMX registers + in SImode and DImode */ + {8, 8}, /* cost of storing MMX registers + in SImode and DImode */ + 2, /* cost of moving SSE register */ + {8, 8, 8}, /* cost of loading SSE registers + in SImode, DImode and TImode */ + {8, 8, 8}, /* cost of storing SSE registers + in SImode, DImode and TImode */ + 5, /* MMX or SSE register to integer */ + 32, /* size of l1 cache. */ + 256, /* size of l2 cache. */ + 64, /* size of prefetch block */ + 6, /* number of parallel prefetches */ + 3, /* Branch cost */ + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ + COSTS_N_INSNS (8), /* cost of FMUL instruction. */ + COSTS_N_INSNS (20), /* cost of FDIV instruction. */ + COSTS_N_INSNS (8), /* cost of FABS instruction. */ + COSTS_N_INSNS (8), /* cost of FCHS instruction. */ + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ + {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, + {libcall, {{32, loop}, {64, rep_prefix_4_byte}, + {8192, rep_prefix_8_byte}, {-1, libcall}}}}, + {{libcall, {{8, loop}, {15, unrolled_loop}, + {2048, rep_prefix_4_byte}, {-1, libcall}}}, + {libcall, {{24, loop}, {32, unrolled_loop}, + {8192, rep_prefix_8_byte}, {-1, libcall}}}}, + 1, /* scalar_stmt_cost. */ + 1, /* scalar load_cost. */ + 1, /* scalar_store_cost. */ + 1, /* vec_stmt_cost. */ + 1, /* vec_to_scalar_cost. */ + 1, /* scalar_to_vec_cost. */ + 1, /* vec_align_load_cost. */ + 2, /* vec_unalign_load_cost. */ + 1, /* vec_store_cost. */ + 3, /* cond_taken_branch_cost. */ + 1, /* cond_not_taken_branch_cost. */ +}; + /* Generic64 should produce code tuned for Nocona and K8. */ static const struct processor_costs generic64_cost = { @@ -1194,6 +1267,7 @@ #define m_PENT4 (1<