Index: doc/tm.texi =================================================================== --- gcc/gcc/doc/tm.texi (revision 109809) +++ gcc/gcc/doc/tm.texi (working copy) @@ -9428,6 +9428,15 @@ by the @code{IFCVT_INIT_EXTRA_FIELDS} macro. @end defmac +@deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_AFTER_RELOAD () +If non-null, this hook performs a target-specific pass over the +instruction stream. The compiler will run it at all optimization levels, +after instructions have been split in flow2 pass. + +You need not implement the hook if it has nothing to do. The default +definition is null. +@end deftypefn + @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG () If non-null, this hook performs a target-specific pass over the instruction stream. The compiler will run it at all optimization levels, Index: postreload.c =================================================================== --- gcc/gcc/postreload.c (revision 109809) +++ gcc/gcc/postreload.c (working copy) @@ -41,6 +41,7 @@ #include "output.h" #include "cselib.h" #include "real.h" +#include "target.h" #include "toplev.h" #include "except.h" #include "tree.h" @@ -1600,3 +1601,33 @@ 'o' /* letter */ }; +/* Machine dependent postreload pass. */ +static bool +gate_handle_machine_postreload (void) +{ + return targetm.machine_dependent_postreload != 0; +} + + +static void +rest_of_handle_machine_postreload (void) +{ + targetm.machine_dependent_postreload (); +} + +struct tree_opt_pass pass_machine_postreload = +{ + "mach-postreload", /* name */ + gate_handle_machine_postreload, /* gate */ + rest_of_handle_machine_postreload, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_MACH_DEP_AFTER_RELOAD, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; Index: tree-pass.h =================================================================== --- gcc/gcc/tree-pass.h (revision 109809) +++ gcc/gcc/tree-pass.h (working copy) @@ -356,6 +356,7 @@ extern struct tree_opt_pass pass_postreload_cse; extern struct tree_opt_pass pass_gcse2; extern struct tree_opt_pass pass_flow2; +extern struct tree_opt_pass pass_machine_postreload; extern struct tree_opt_pass pass_stack_adjustments; extern struct tree_opt_pass pass_peephole2; extern struct tree_opt_pass pass_if_after_reload; Index: target.h =================================================================== --- gcc/gcc/target.h (revision 109809) +++ gcc/gcc/target.h (working copy) @@ -501,6 +501,10 @@ enum machine_mode (* cc_modes_compatible) (enum machine_mode, enum machine_mode); + /* Do machine-dependent post-reload pass. Called after + flow2 pass. */ + void (* machine_dependent_postreload) (void); + /* Do machine-dependent code transformations. Called just before delayed-branch scheduling. */ void (* machine_dependent_reorg) (void); Index: timevar.def =================================================================== --- gcc/gcc/timevar.def (revision 109809) +++ gcc/gcc/timevar.def (working copy) @@ -150,8 +150,9 @@ DEFTIMEVAR (TV_GLOBAL_ALLOC , "global alloc") DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs") DEFTIMEVAR (TV_SEQABSTR , "sequence abstraction") -DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") +DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") DEFTIMEVAR (TV_FLOW2 , "flow 2") +DEFTIMEVAR (TV_MACH_DEP_AFTER_RELOAD , "mach-dep after reload") DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2") DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2") DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers") Index: target-def.h =================================================================== --- gcc/gcc/target-def.h (revision 109809) +++ gcc/gcc/target-def.h (working copy) @@ -403,6 +403,7 @@ #define TARGET_CC_MODES_COMPATIBLE default_cc_modes_compatible +#define TARGET_MACHINE_DEPENDENT_AFTER_RELOAD 0 #define TARGET_MACHINE_DEPENDENT_REORG 0 #define TARGET_BUILD_BUILTIN_VA_LIST std_build_builtin_va_list @@ -603,6 +604,7 @@ TARGET_DWARF_REGISTER_SPAN, \ TARGET_FIXED_CONDITION_CODE_REGS, \ TARGET_CC_MODES_COMPATIBLE, \ + TARGET_MACHINE_DEPENDENT_AFTER_RELOAD, \ TARGET_MACHINE_DEPENDENT_REORG, \ TARGET_BUILD_BUILTIN_VA_LIST, \ TARGET_GIMPLIFY_VA_ARG_EXPR, \ Index: Makefile.in =================================================================== --- gcc/gcc/Makefile.in (revision 109809) +++ gcc/gcc/Makefile.in (working copy) @@ -2448,7 +2448,7 @@ $(RTL_H) real.h $(FLAGS_H) $(EXPR_H) $(OPTABS_H) reload.h $(REGS_H) \ hard-reg-set.h insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) output.h \ $(FUNCTION_H) toplev.h cselib.h $(TM_P_H) except.h $(TREE_H) $(MACHMODE_H) \ - $(OBSTACK_H) timevar.h tree-pass.h + $(TARGET_H) $(OBSTACK_H) timevar.h tree-pass.h postreload-gcse.o : postreload-gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) real.h insn-config.h \ $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h toplev.h \ Index: passes.c =================================================================== --- gcc/gcc/passes.c (revision 109809) +++ gcc/gcc/passes.c (working copy) @@ -667,6 +667,7 @@ NEXT_PASS (pass_postreload_cse); NEXT_PASS (pass_gcse2); NEXT_PASS (pass_flow2); + NEXT_PASS (pass_machine_postreload); NEXT_PASS (pass_rtl_seqabstr); NEXT_PASS (pass_stack_adjustments); NEXT_PASS (pass_peephole2); Index: config/i386/i386.c =================================================================== --- gcc/gcc/config/i386/i386.c (revision 109809) +++ gcc/gcc/config/i386/i386.c (working copy) @@ -943,6 +943,7 @@ HOST_WIDE_INT, tree); static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); static void x86_file_start (void); +static void ix86_postreload (void); static void ix86_reorg (void); static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); static tree ix86_build_builtin_va_list (void); @@ -1147,6 +1148,9 @@ #undef TARGET_CC_MODES_COMPATIBLE #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible +#undef TARGET_MACHINE_DEPENDENT_AFTER_RELOAD +#define TARGET_MACHINE_DEPENDENT_AFTER_RELOAD ix86_postreload + #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg @@ -17149,6 +17153,236 @@ return 2; } +/* Non-fcomi 387 FP compare sequences can not be CSE'd during cse1 pass. + This function implements elimination of redundant 387 FP compare + sequences. We look for a sequence of: + + fucom(p), fcom(p), ficom(p), fcompp, ftst + fnstsw %ax + sahf or test %ax + j + + After the FP compare sequence has been found, redundant instructions in + successor blocks are deleted: + + a) fcom/fnstsw combination iff compare arguments + and AX reg were not modified. + b) sahf (test %ax) and fcom/fnstsw iff compare arguments up to compare + insn and CC reg were not modified. + + This code is partially based on code from cse_condition_code_reg () and + cse_cc_succs () functions, as found in cse.c source file. */ + +static void +ix86_cse_i387_compares (void) +{ + rtx cc_reg = gen_rtx_REG (CCmode, FLAGS_REG); + rtx ax_reg; + basic_block bb; + + FOR_EACH_BB (bb) + { + rtx last_insn; + rtx insn; + rtx cc_src_insn; + rtx cc_src; + rtx ax_src_insn; + rtx ax_src; + + bool cc_src_clobbered_pred; + + edge e; + edge_iterator ei; + + last_insn = BB_END (bb); + if (!JUMP_P (last_insn)) + continue; + + /* Find CC setting insn. */ + if (! reg_referenced_p (cc_reg, PATTERN (last_insn))) + continue; + + cc_src_insn = NULL_RTX; + cc_src = NULL_RTX; + for (insn = PREV_INSN (last_insn); + insn && insn != PREV_INSN (BB_HEAD (bb)); + insn = PREV_INSN (insn)) + { + rtx set; + + if (! INSN_P (insn)) + continue; + + set = single_set (insn); + if (set + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == REGNO (cc_reg)) + { + cc_src_insn = insn; + cc_src = SET_SRC (set); + break; + } + else if (reg_set_p (cc_reg, insn)) + break; + } + + if (! cc_src_insn) + continue; + + /* Check if argument to CC setting insn (AX reg) has been + modified between CC setting insn and jump insn. */ + cc_src_clobbered_pred + = modified_between_p (cc_src, cc_src_insn, NEXT_INSN (last_insn)) + ? true : false; + + /* Find AX setting insn. */ + ax_reg = gen_rtx_REG (HImode, 0); + + if (! reg_referenced_p (ax_reg, PATTERN (cc_src_insn))) + continue; + + ax_src_insn = NULL_RTX; + ax_src = NULL_RTX; + for (insn = PREV_INSN (cc_src_insn); + insn && insn != PREV_INSN (BB_HEAD (bb)); + insn = PREV_INSN (insn)) + { + rtx set; + + if (! INSN_P (insn)) + continue; + + set = single_set (insn); + if (set + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == REGNO (ax_reg)) + { + ax_src_insn = insn; + ax_src = SET_SRC (set); + break; + } + else if (reg_set_p (ax_reg, insn)) + break; + } + + if (! ax_src_insn) + continue; + + if (! (GET_CODE (ax_src) == UNSPEC + && XINT (ax_src, 1) == UNSPEC_FNSTSW)) + continue; + + /* Leave this BB if input arguments to AX setting insn (compare) + have been modified between compare and jump insn. */ + if (modified_between_p (ax_src, ax_src_insn, NEXT_INSN (last_insn))) + continue; + + /* FP compare sequence has been found. Check successor blocks + for redundant insns. */ + FOR_EACH_EDGE (e, ei, bb->succs) + { + rtx insn; + rtx end; + + rtx delete_cc_src_insn = NULL_RTX; + rtx delete_ax_src_insn = NULL_RTX; + rtx maybe_delete_ax_src_insn = NULL_RTX; + + bool cc_src_clobbered; + bool cc_reg_clobbered; + + if (e->flags & EDGE_COMPLEX) + continue; + + if (EDGE_COUNT (e->dest->preds) != 1 + || e->dest == EXIT_BLOCK_PTR) + continue; + + end = NEXT_INSN (BB_END (e->dest)); + + cc_src_clobbered = cc_src_clobbered_pred; + cc_reg_clobbered = false; + + for (insn = BB_HEAD (e->dest); insn != end; insn = NEXT_INSN (insn)) + { + rtx set; + + if (! INSN_P (insn)) + continue; + + /* If compare arguments are modified, we have to + stop looking for a compare which uses it. */ + if (modified_in_p (ax_src, insn) + && maybe_delete_ax_src_insn == NULL_RTX) + break; + + set = single_set (insn); + + /* A compare insn can be deleted if it sets AX_REG + from AX_SRC and where CC_SRC is not clobbered yet. */ + if (set + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == REGNO (ax_reg) + && rtx_equal_p (ax_src, SET_SRC (set))) + { + maybe_delete_ax_src_insn = insn; + if (!cc_src_clobbered) + { + delete_ax_src_insn = insn; + continue; + } + } + + /* A CC setting insn can be deleted if it sets + CC_REG from CC_SRC, and CC is not clobbered yet. + In this case, compare insn should also be deleted. */ + if (set + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) == REGNO (cc_reg) + && rtx_equal_p (cc_src, SET_SRC (set)) + && !cc_reg_clobbered + /* There should be a compare insn present in front. */ + && maybe_delete_ax_src_insn != NULL_RTX) + { + delete_ax_src_insn = maybe_delete_ax_src_insn; + delete_cc_src_insn = insn; + break; + } + + if (modified_in_p (cc_src, insn)) + cc_src_clobbered = true; + + if (modified_in_p (cc_reg, insn)) + cc_reg_clobbered = true; + + /* No usable register remains unclobbered. */ + if (cc_src_clobbered && cc_reg_clobbered) + break; + } + + /* Delete comparison. */ + if (delete_ax_src_insn) + { + gcc_assert (maybe_delete_ax_src_insn != NULL_RTX); + delete_insn (delete_ax_src_insn); + } + + /* Delete CC setting instruction. */ + if (delete_cc_src_insn) + delete_insn (delete_cc_src_insn); + } + } +} + +/* Implement machine specific post-reload optimizations. */ +static void +ix86_postreload (void) +{ + if (TARGET_80387 && !TARGET_CMOVE && + !flag_trapping_math && flag_expensive_optimizations) + ix86_cse_i387_compares (); +} + /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte window. */