1 This patch adds back generation of cld instructions when stringops are
2 used in the function. cld insn is emitted as unspec volatile at the
3 end of function prologue. Currently, patched gcc emits cld when
4 "-mcld" is added to compile flags.
6 2008-03-06 Uros Bizjak <ubizjak@gmail.com>
8 * config/i386/i386.h (TARGET_CLD): New define.
9 (struct machine_function): Add needs_cld field.
10 (ix86_current_function_needs_cld): New define.
11 * config/i386/i386.md (UNSPEC_CLD): New unspec volatile constant.
12 ("cld"): New isns pattern.
13 ("strmov_singleop"): Set ix86_current_function_needs_cld flag.
15 ("strset_singleop"): Ditto.
17 ("cmpstrnqi_nz_1"): Ditto.
18 ("cmpstrnqi_1"): Ditto.
19 ("strlenqi_1"): Ditto.
20 * config/i386/i386.opt (mcld): New option.
21 * config/i386/i386.c (ix86_expand_prologue): Emit cld insn for
22 TARGET_CLD when ix86_current_function_needs_cld is set.
24 Index: gcc/config/i386/i386.h
25 ===================================================================
26 --- gcc/config/i386/i386.h (revision 132966)
27 +++ gcc/config/i386/i386.h (working copy)
28 @@ -388,6 +388,7 @@ extern unsigned int ix86_arch_features[X
30 extern int x86_prefetch_sse;
32 +#define TARGET_CLD x86_cld
33 #define TARGET_ABM x86_abm
34 #define TARGET_CMPXCHG16B x86_cmpxchg16b
35 #define TARGET_POPCNT x86_popcnt
36 @@ -2446,8 +2447,9 @@ struct machine_function GTY(())
37 int save_varrargs_registers;
38 int accesses_prev_frame;
39 int optimize_mode_switching[MAX_386_ENTITIES];
40 - /* Set by ix86_compute_frame_layout and used by prologue/epilogue expander to
41 - determine the style used. */
43 + /* Set by ix86_compute_frame_layout and used by prologue/epilogue
44 + expander to determine the style used. */
45 int use_fast_prologue_epilogue;
46 /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed
48 @@ -2467,6 +2469,7 @@ struct machine_function GTY(())
49 #define ix86_stack_locals (cfun->machine->stack_locals)
50 #define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
51 #define ix86_optimize_mode_switching (cfun->machine->optimize_mode_switching)
52 +#define ix86_current_function_needs_cld (cfun->machine->needs_cld)
53 #define ix86_tls_descriptor_calls_expanded_in_cfun \
54 (cfun->machine->tls_descriptor_call_expanded_p)
55 /* Since tls_descriptor_call_expanded is not cleared, even if all TLS
56 Index: gcc/config/i386/i386.md
57 ===================================================================
58 --- gcc/config/i386/i386.md (revision 132966)
59 +++ gcc/config/i386/i386.md (working copy)
63 (UNSPECV_PROLOGUE_USE 14)
67 ;; Constants to represent pcomtrue/pcomfalse variants
68 @@ -18519,6 +18520,14 @@
70 ;; Block operation instructions
73 + [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
76 + [(set_attr "length" "1")
77 + (set_attr "length_immediate" "0")
78 + (set_attr "modrm" "0")])
80 (define_expand "movmemsi"
81 [(use (match_operand:BLK 0 "memory_operand" ""))
82 (use (match_operand:BLK 1 "memory_operand" ""))
83 @@ -18591,7 +18600,7 @@
84 (set (match_operand 2 "register_operand" "")
85 (match_operand 5 "" ""))])]
86 "TARGET_SINGLE_STRINGOP || optimize_size"
88 + "ix86_current_function_needs_cld = 1;")
90 (define_insn "*strmovdi_rex_1"
91 [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
92 @@ -18708,7 +18717,7 @@
93 (match_operand 3 "memory_operand" ""))
94 (use (match_dup 4))])]
97 + "ix86_current_function_needs_cld = 1;")
99 (define_insn "*rep_movdi_rex64"
100 [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
101 @@ -18868,7 +18877,7 @@
102 (set (match_operand 0 "register_operand" "")
103 (match_operand 3 "" ""))])]
104 "TARGET_SINGLE_STRINGOP || optimize_size"
106 + "ix86_current_function_needs_cld = 1;")
108 (define_insn "*strsetdi_rex_1"
109 [(set (mem:DI (match_operand:DI 1 "register_operand" "0"))
110 @@ -18962,7 +18971,7 @@
111 (use (match_operand 3 "register_operand" ""))
112 (use (match_dup 1))])]
115 + "ix86_current_function_needs_cld = 1;")
117 (define_insn "*rep_stosdi_rex64"
118 [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
119 @@ -19138,7 +19147,7 @@
120 (clobber (match_operand 1 "register_operand" ""))
121 (clobber (match_dup 2))])]
124 + "ix86_current_function_needs_cld = 1;")
126 (define_insn "*cmpstrnqi_nz_1"
127 [(set (reg:CC FLAGS_REG)
128 @@ -19185,7 +19194,7 @@
129 (clobber (match_operand 1 "register_operand" ""))
130 (clobber (match_dup 2))])]
133 + "ix86_current_function_needs_cld = 1;")
135 (define_insn "*cmpstrnqi_1"
136 [(set (reg:CC FLAGS_REG)
137 @@ -19254,7 +19263,7 @@
138 (clobber (match_operand 1 "register_operand" ""))
139 (clobber (reg:CC FLAGS_REG))])]
142 + "ix86_current_function_needs_cld = 1;")
144 (define_insn "*strlenqi_1"
145 [(set (match_operand:SI 0 "register_operand" "=&c")
146 Index: gcc/config/i386/i386.opt
147 ===================================================================
148 --- gcc/config/i386/i386.opt (revision 132966)
149 +++ gcc/config/i386/i386.opt (working copy)
150 @@ -250,6 +250,10 @@ Support SSE5 built-in functions and code
152 ;; Instruction support
155 +Target Report RejectNegative Var(x86_cld)
156 +Generate cld instruction in the function prologue.
159 Target Report RejectNegative Var(x86_abm)
160 Support code generation of Advanced Bit Manipulation (ABM) instructions.
161 Index: gcc/config/i386/i386.c
162 ===================================================================
163 --- gcc/config/i386/i386.c (revision 132966)
164 +++ gcc/config/i386/i386.c (working copy)
165 @@ -6498,6 +6498,10 @@ ix86_expand_prologue (void)
166 emit_insn (gen_prologue_use (pic_offset_table_rtx));
167 emit_insn (gen_blockage ());
170 + /* Emit cld instruction if stringops are used in the function. */
171 + if (TARGET_CLD && ix86_current_function_needs_cld)
172 + emit_insn (gen_cld ());
175 /* Emit code to restore saved registers using MOV insns. First register