]>
Commit | Line | Data |
---|---|---|
0539799a PS |
1 | Index: doc/tm.texi |
2 | =================================================================== | |
3 | --- gcc/gcc/doc/tm.texi (revision 109809) | |
4 | +++ gcc/gcc/doc/tm.texi (working copy) | |
5 | @@ -9428,6 +9428,15 @@ | |
6 | by the @code{IFCVT_INIT_EXTRA_FIELDS} macro. | |
7 | @end defmac | |
8 | ||
9 | +@deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_AFTER_RELOAD () | |
10 | +If non-null, this hook performs a target-specific pass over the | |
11 | +instruction stream. The compiler will run it at all optimization levels, | |
12 | +after instructions have been split in flow2 pass. | |
13 | + | |
14 | +You need not implement the hook if it has nothing to do. The default | |
15 | +definition is null. | |
16 | +@end deftypefn | |
17 | + | |
18 | @deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG () | |
19 | If non-null, this hook performs a target-specific pass over the | |
20 | instruction stream. The compiler will run it at all optimization levels, | |
21 | Index: postreload.c | |
22 | =================================================================== | |
23 | --- gcc/gcc/postreload.c (revision 109809) | |
24 | +++ gcc/gcc/postreload.c (working copy) | |
25 | @@ -41,6 +41,7 @@ | |
2163109e PS |
26 | #include "output.h" |
27 | #include "cselib.h" | |
28 | #include "real.h" | |
29 | +#include "target.h" | |
30 | #include "toplev.h" | |
31 | #include "except.h" | |
32 | #include "tree.h" | |
0539799a | 33 | @@ -1600,3 +1601,33 @@ |
2163109e PS |
34 | 'o' /* letter */ |
35 | }; | |
36 | ||
37 | +/* Machine dependent postreload pass. */ | |
38 | +static bool | |
39 | +gate_handle_machine_postreload (void) | |
40 | +{ | |
41 | + return targetm.machine_dependent_postreload != 0; | |
42 | +} | |
43 | + | |
44 | + | |
45 | +static void | |
46 | +rest_of_handle_machine_postreload (void) | |
47 | +{ | |
48 | + targetm.machine_dependent_postreload (); | |
49 | +} | |
50 | + | |
51 | +struct tree_opt_pass pass_machine_postreload = | |
52 | +{ | |
53 | + "mach-postreload", /* name */ | |
54 | + gate_handle_machine_postreload, /* gate */ | |
55 | + rest_of_handle_machine_postreload, /* execute */ | |
56 | + NULL, /* sub */ | |
57 | + NULL, /* next */ | |
58 | + 0, /* static_pass_number */ | |
59 | + TV_MACH_DEP_AFTER_RELOAD, /* tv_id */ | |
60 | + 0, /* properties_required */ | |
61 | + 0, /* properties_provided */ | |
62 | + 0, /* properties_destroyed */ | |
63 | + 0, /* todo_flags_start */ | |
64 | + TODO_dump_func, /* todo_flags_finish */ | |
65 | + 0 /* letter */ | |
66 | +}; | |
0539799a PS |
67 | Index: tree-pass.h |
68 | =================================================================== | |
69 | --- gcc/gcc/tree-pass.h (revision 109809) | |
70 | +++ gcc/gcc/tree-pass.h (working copy) | |
71 | @@ -356,6 +356,7 @@ | |
72 | extern struct tree_opt_pass pass_postreload_cse; | |
73 | extern struct tree_opt_pass pass_gcse2; | |
74 | extern struct tree_opt_pass pass_flow2; | |
75 | +extern struct tree_opt_pass pass_machine_postreload; | |
76 | extern struct tree_opt_pass pass_stack_adjustments; | |
77 | extern struct tree_opt_pass pass_peephole2; | |
78 | extern struct tree_opt_pass pass_if_after_reload; | |
79 | Index: target.h | |
80 | =================================================================== | |
81 | --- gcc/gcc/target.h (revision 109809) | |
82 | +++ gcc/gcc/target.h (working copy) | |
83 | @@ -501,6 +501,10 @@ | |
2163109e PS |
84 | enum machine_mode (* cc_modes_compatible) (enum machine_mode, |
85 | enum machine_mode); | |
86 | ||
87 | + /* Do machine-dependent post-reload pass. Called after | |
88 | + flow2 pass. */ | |
89 | + void (* machine_dependent_postreload) (void); | |
90 | + | |
91 | /* Do machine-dependent code transformations. Called just before | |
92 | delayed-branch scheduling. */ | |
93 | void (* machine_dependent_reorg) (void); | |
0539799a PS |
94 | Index: timevar.def |
95 | =================================================================== | |
96 | --- gcc/gcc/timevar.def (revision 109809) | |
97 | +++ gcc/gcc/timevar.def (working copy) | |
98 | @@ -150,8 +150,9 @@ | |
2163109e PS |
99 | DEFTIMEVAR (TV_GLOBAL_ALLOC , "global alloc") |
100 | DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs") | |
0539799a | 101 | DEFTIMEVAR (TV_SEQABSTR , "sequence abstraction") |
2163109e PS |
102 | -DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") |
103 | +DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") | |
104 | DEFTIMEVAR (TV_FLOW2 , "flow 2") | |
105 | +DEFTIMEVAR (TV_MACH_DEP_AFTER_RELOAD , "mach-dep after reload") | |
106 | DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2") | |
107 | DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2") | |
108 | DEFTIMEVAR (TV_RENAME_REGISTERS , "rename registers") | |
0539799a PS |
109 | Index: target-def.h |
110 | =================================================================== | |
111 | --- gcc/gcc/target-def.h (revision 109809) | |
112 | +++ gcc/gcc/target-def.h (working copy) | |
113 | @@ -403,6 +403,7 @@ | |
114 | ||
115 | #define TARGET_CC_MODES_COMPATIBLE default_cc_modes_compatible | |
116 | ||
117 | +#define TARGET_MACHINE_DEPENDENT_AFTER_RELOAD 0 | |
118 | #define TARGET_MACHINE_DEPENDENT_REORG 0 | |
119 | ||
120 | #define TARGET_BUILD_BUILTIN_VA_LIST std_build_builtin_va_list | |
121 | @@ -603,6 +604,7 @@ | |
122 | TARGET_DWARF_REGISTER_SPAN, \ | |
123 | TARGET_FIXED_CONDITION_CODE_REGS, \ | |
124 | TARGET_CC_MODES_COMPATIBLE, \ | |
125 | + TARGET_MACHINE_DEPENDENT_AFTER_RELOAD, \ | |
126 | TARGET_MACHINE_DEPENDENT_REORG, \ | |
127 | TARGET_BUILD_BUILTIN_VA_LIST, \ | |
128 | TARGET_GIMPLIFY_VA_ARG_EXPR, \ | |
129 | Index: Makefile.in | |
130 | =================================================================== | |
131 | --- gcc/gcc/Makefile.in (revision 109809) | |
132 | +++ gcc/gcc/Makefile.in (working copy) | |
133 | @@ -2448,7 +2448,7 @@ | |
134 | $(RTL_H) real.h $(FLAGS_H) $(EXPR_H) $(OPTABS_H) reload.h $(REGS_H) \ | |
135 | hard-reg-set.h insn-config.h $(BASIC_BLOCK_H) $(RECOG_H) output.h \ | |
136 | $(FUNCTION_H) toplev.h cselib.h $(TM_P_H) except.h $(TREE_H) $(MACHMODE_H) \ | |
137 | - $(OBSTACK_H) timevar.h tree-pass.h | |
138 | + $(TARGET_H) $(OBSTACK_H) timevar.h tree-pass.h | |
139 | postreload-gcse.o : postreload-gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ | |
140 | $(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) real.h insn-config.h \ | |
141 | $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) output.h toplev.h \ | |
142 | Index: passes.c | |
143 | =================================================================== | |
144 | --- gcc/gcc/passes.c (revision 109809) | |
145 | +++ gcc/gcc/passes.c (working copy) | |
146 | @@ -667,6 +667,7 @@ | |
147 | NEXT_PASS (pass_postreload_cse); | |
148 | NEXT_PASS (pass_gcse2); | |
149 | NEXT_PASS (pass_flow2); | |
150 | + NEXT_PASS (pass_machine_postreload); | |
151 | NEXT_PASS (pass_rtl_seqabstr); | |
152 | NEXT_PASS (pass_stack_adjustments); | |
153 | NEXT_PASS (pass_peephole2); | |
154 | Index: config/i386/i386.c | |
155 | =================================================================== | |
156 | --- gcc/gcc/config/i386/i386.c (revision 109809) | |
157 | +++ gcc/gcc/config/i386/i386.c (working copy) | |
158 | @@ -943,6 +943,7 @@ | |
2163109e PS |
159 | HOST_WIDE_INT, tree); |
160 | static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); | |
161 | static void x86_file_start (void); | |
162 | +static void ix86_postreload (void); | |
163 | static void ix86_reorg (void); | |
164 | static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*); | |
165 | static tree ix86_build_builtin_va_list (void); | |
0539799a | 166 | @@ -1147,6 +1148,9 @@ |
2163109e PS |
167 | #undef TARGET_CC_MODES_COMPATIBLE |
168 | #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible | |
169 | ||
170 | +#undef TARGET_MACHINE_DEPENDENT_AFTER_RELOAD | |
171 | +#define TARGET_MACHINE_DEPENDENT_AFTER_RELOAD ix86_postreload | |
172 | + | |
173 | #undef TARGET_MACHINE_DEPENDENT_REORG | |
174 | #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg | |
175 | ||
0539799a | 176 | @@ -17149,6 +17153,236 @@ |
2163109e PS |
177 | return 2; |
178 | } | |
179 | ||
180 | +/* Non-fcomi 387 FP compare sequences can not be CSE'd during cse1 pass. | |
181 | + This function implements elimination of redundant 387 FP compare | |
182 | + sequences. We look for a sequence of: | |
183 | + | |
184 | + fucom(p), fcom(p), ficom(p), fcompp, ftst | |
185 | + fnstsw %ax | |
186 | + sahf or test %ax | |
187 | + j<cc> | |
188 | + | |
189 | + After the FP compare sequence has been found, redundant instructions in | |
190 | + successor blocks are deleted: | |
191 | + | |
192 | + a) fcom/fnstsw combination iff compare arguments | |
193 | + and AX reg were not modified. | |
194 | + b) sahf (test %ax) and fcom/fnstsw iff compare arguments up to compare | |
195 | + insn and CC reg were not modified. | |
196 | + | |
197 | + This code is partially based on code from cse_condition_code_reg () and | |
198 | + cse_cc_succs () functions, as found in cse.c source file. */ | |
199 | + | |
200 | +static void | |
201 | +ix86_cse_i387_compares (void) | |
202 | +{ | |
203 | + rtx cc_reg = gen_rtx_REG (CCmode, FLAGS_REG); | |
204 | + rtx ax_reg; | |
205 | + basic_block bb; | |
206 | + | |
207 | + FOR_EACH_BB (bb) | |
208 | + { | |
209 | + rtx last_insn; | |
210 | + rtx insn; | |
211 | + rtx cc_src_insn; | |
212 | + rtx cc_src; | |
213 | + rtx ax_src_insn; | |
214 | + rtx ax_src; | |
215 | + | |
216 | + bool cc_src_clobbered_pred; | |
217 | + | |
218 | + edge e; | |
219 | + edge_iterator ei; | |
220 | + | |
221 | + last_insn = BB_END (bb); | |
222 | + if (!JUMP_P (last_insn)) | |
223 | + continue; | |
224 | + | |
225 | + /* Find CC setting insn. */ | |
226 | + if (! reg_referenced_p (cc_reg, PATTERN (last_insn))) | |
227 | + continue; | |
228 | + | |
229 | + cc_src_insn = NULL_RTX; | |
230 | + cc_src = NULL_RTX; | |
231 | + for (insn = PREV_INSN (last_insn); | |
232 | + insn && insn != PREV_INSN (BB_HEAD (bb)); | |
233 | + insn = PREV_INSN (insn)) | |
234 | + { | |
235 | + rtx set; | |
236 | + | |
237 | + if (! INSN_P (insn)) | |
238 | + continue; | |
239 | + | |
240 | + set = single_set (insn); | |
241 | + if (set | |
242 | + && REG_P (SET_DEST (set)) | |
243 | + && REGNO (SET_DEST (set)) == REGNO (cc_reg)) | |
244 | + { | |
245 | + cc_src_insn = insn; | |
246 | + cc_src = SET_SRC (set); | |
247 | + break; | |
248 | + } | |
249 | + else if (reg_set_p (cc_reg, insn)) | |
250 | + break; | |
251 | + } | |
252 | + | |
253 | + if (! cc_src_insn) | |
254 | + continue; | |
255 | + | |
256 | + /* Check if argument to CC setting insn (AX reg) has been | |
257 | + modified between CC setting insn and jump insn. */ | |
258 | + cc_src_clobbered_pred | |
259 | + = modified_between_p (cc_src, cc_src_insn, NEXT_INSN (last_insn)) | |
260 | + ? true : false; | |
261 | + | |
262 | + /* Find AX setting insn. */ | |
263 | + ax_reg = gen_rtx_REG (HImode, 0); | |
264 | + | |
265 | + if (! reg_referenced_p (ax_reg, PATTERN (cc_src_insn))) | |
266 | + continue; | |
267 | + | |
268 | + ax_src_insn = NULL_RTX; | |
269 | + ax_src = NULL_RTX; | |
270 | + for (insn = PREV_INSN (cc_src_insn); | |
271 | + insn && insn != PREV_INSN (BB_HEAD (bb)); | |
272 | + insn = PREV_INSN (insn)) | |
273 | + { | |
274 | + rtx set; | |
275 | + | |
276 | + if (! INSN_P (insn)) | |
277 | + continue; | |
278 | + | |
279 | + set = single_set (insn); | |
280 | + if (set | |
281 | + && REG_P (SET_DEST (set)) | |
282 | + && REGNO (SET_DEST (set)) == REGNO (ax_reg)) | |
283 | + { | |
284 | + ax_src_insn = insn; | |
285 | + ax_src = SET_SRC (set); | |
286 | + break; | |
287 | + } | |
288 | + else if (reg_set_p (ax_reg, insn)) | |
289 | + break; | |
290 | + } | |
291 | + | |
292 | + if (! ax_src_insn) | |
293 | + continue; | |
294 | + | |
295 | + if (! (GET_CODE (ax_src) == UNSPEC | |
296 | + && XINT (ax_src, 1) == UNSPEC_FNSTSW)) | |
297 | + continue; | |
298 | + | |
299 | + /* Leave this BB if input arguments to AX setting insn (compare) | |
300 | + have been modified between compare and jump insn. */ | |
301 | + if (modified_between_p (ax_src, ax_src_insn, NEXT_INSN (last_insn))) | |
302 | + continue; | |
303 | + | |
304 | + /* FP compare sequence has been found. Check successor blocks | |
305 | + for redundant insns. */ | |
306 | + FOR_EACH_EDGE (e, ei, bb->succs) | |
307 | + { | |
308 | + rtx insn; | |
309 | + rtx end; | |
310 | + | |
311 | + rtx delete_cc_src_insn = NULL_RTX; | |
312 | + rtx delete_ax_src_insn = NULL_RTX; | |
313 | + rtx maybe_delete_ax_src_insn = NULL_RTX; | |
314 | + | |
315 | + bool cc_src_clobbered; | |
316 | + bool cc_reg_clobbered; | |
317 | + | |
318 | + if (e->flags & EDGE_COMPLEX) | |
319 | + continue; | |
320 | + | |
321 | + if (EDGE_COUNT (e->dest->preds) != 1 | |
322 | + || e->dest == EXIT_BLOCK_PTR) | |
323 | + continue; | |
324 | + | |
325 | + end = NEXT_INSN (BB_END (e->dest)); | |
326 | + | |
327 | + cc_src_clobbered = cc_src_clobbered_pred; | |
328 | + cc_reg_clobbered = false; | |
329 | + | |
330 | + for (insn = BB_HEAD (e->dest); insn != end; insn = NEXT_INSN (insn)) | |
331 | + { | |
332 | + rtx set; | |
333 | + | |
334 | + if (! INSN_P (insn)) | |
335 | + continue; | |
336 | + | |
337 | + /* If compare arguments are modified, we have to | |
338 | + stop looking for a compare which uses it. */ | |
339 | + if (modified_in_p (ax_src, insn) | |
340 | + && maybe_delete_ax_src_insn == NULL_RTX) | |
341 | + break; | |
342 | + | |
343 | + set = single_set (insn); | |
344 | + | |
345 | + /* A compare insn can be deleted if it sets AX_REG | |
346 | + from AX_SRC and where CC_SRC is not clobbered yet. */ | |
347 | + if (set | |
348 | + && REG_P (SET_DEST (set)) | |
349 | + && REGNO (SET_DEST (set)) == REGNO (ax_reg) | |
350 | + && rtx_equal_p (ax_src, SET_SRC (set))) | |
351 | + { | |
352 | + maybe_delete_ax_src_insn = insn; | |
353 | + if (!cc_src_clobbered) | |
354 | + { | |
355 | + delete_ax_src_insn = insn; | |
356 | + continue; | |
357 | + } | |
358 | + } | |
359 | + | |
360 | + /* A CC setting insn can be deleted if it sets | |
361 | + CC_REG from CC_SRC, and CC is not clobbered yet. | |
362 | + In this case, compare insn should also be deleted. */ | |
363 | + if (set | |
364 | + && REG_P (SET_DEST (set)) | |
365 | + && REGNO (SET_DEST (set)) == REGNO (cc_reg) | |
366 | + && rtx_equal_p (cc_src, SET_SRC (set)) | |
367 | + && !cc_reg_clobbered | |
368 | + /* There should be a compare insn present in front. */ | |
369 | + && maybe_delete_ax_src_insn != NULL_RTX) | |
370 | + { | |
371 | + delete_ax_src_insn = maybe_delete_ax_src_insn; | |
372 | + delete_cc_src_insn = insn; | |
373 | + break; | |
374 | + } | |
375 | + | |
376 | + if (modified_in_p (cc_src, insn)) | |
377 | + cc_src_clobbered = true; | |
378 | + | |
379 | + if (modified_in_p (cc_reg, insn)) | |
380 | + cc_reg_clobbered = true; | |
381 | + | |
382 | + /* No usable register remains unclobbered. */ | |
383 | + if (cc_src_clobbered && cc_reg_clobbered) | |
384 | + break; | |
385 | + } | |
386 | + | |
387 | + /* Delete comparison. */ | |
388 | + if (delete_ax_src_insn) | |
389 | + { | |
390 | + gcc_assert (maybe_delete_ax_src_insn != NULL_RTX); | |
391 | + delete_insn (delete_ax_src_insn); | |
392 | + } | |
393 | + | |
394 | + /* Delete CC setting instruction. */ | |
395 | + if (delete_cc_src_insn) | |
396 | + delete_insn (delete_cc_src_insn); | |
397 | + } | |
398 | + } | |
399 | +} | |
400 | + | |
401 | +/* Implement machine specific post-reload optimizations. */ | |
402 | +static void | |
403 | +ix86_postreload (void) | |
404 | +{ | |
405 | + if (TARGET_80387 && !TARGET_CMOVE && | |
406 | + !flag_trapping_math && flag_expensive_optimizations) | |
407 | + ix86_cse_i387_compares (); | |
408 | +} | |
409 | + | |
410 | /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte | |
411 | window. */ | |
412 |