]>
Commit | Line | Data |
---|---|---|
38a49943 PS |
1 | Index: gcc/doc/extend.texi |
2 | =================================================================== | |
3 | --- gcc/doc/extend.texi (.../gcc-4_3-branch) (revision 145062) | |
4 | +++ gcc/doc/extend.texi (.../ix86/gcc-4_3-branch) (revision 145364) | |
5 | @@ -7962,6 +7962,27 @@ | |
6 | Generates the @code{popcntq} machine instruction. | |
7 | @end table | |
8 | ||
9 | +The following built-in functions are available when @option{-maes} is | |
10 | +used. All of them generate the machine instruction that is part of the | |
11 | +name. | |
12 | + | |
13 | +@smallexample | |
14 | +v2di __builtin_ia32_aesenc128 (v2di, v2di) | |
15 | +v2di __builtin_ia32_aesenclast128 (v2di, v2di) | |
16 | +v2di __builtin_ia32_aesdec128 (v2di, v2di) | |
17 | +v2di __builtin_ia32_aesdeclast128 (v2di, v2di) | |
18 | +v2di __builtin_ia32_aeskeygenassist128 (v2di, const int) | |
19 | +v2di __builtin_ia32_aesimc128 (v2di) | |
20 | +@end smallexample | |
21 | + | |
22 | +The following built-in function is available when @option{-mpclmul} is | |
23 | +used. | |
24 | + | |
25 | +@table @code | |
26 | +@item v2di __builtin_ia32_pclmulqdq128 (v2di, v2di, const int) | |
27 | +Generates the @code{pclmulqdq} machine instruction. | |
28 | +@end table | |
29 | + | |
30 | The following built-in functions are available when @option{-msse4a} is used. | |
31 | All of them generate the machine instruction that is part of the name. | |
32 | ||
33 | Index: gcc/doc/invoke.texi | |
34 | =================================================================== | |
35 | --- gcc/doc/invoke.texi (.../gcc-4_3-branch) (revision 145062) | |
36 | +++ gcc/doc/invoke.texi (.../ix86/gcc-4_3-branch) (revision 145364) | |
37 | @@ -551,6 +551,7 @@ | |
38 | -mno-wide-multiply -mrtd -malign-double @gol | |
39 | -mpreferred-stack-boundary=@var{num} -mcld -mcx16 -msahf -mrecip @gol | |
40 | -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol | |
41 | +-maes -mpclmul @gol | |
42 | -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol | |
43 | -mthreads -mno-align-stringops -minline-all-stringops @gol | |
44 | -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol | |
45 | @@ -10733,6 +10734,10 @@ | |
46 | @itemx -mno-sse4.2 | |
47 | @item -msse4 | |
48 | @itemx -mno-sse4 | |
49 | +@item -maes | |
50 | +@itemx -mno-aes | |
51 | +@item -mpclmul | |
52 | +@itemx -mno-pclmul | |
53 | @item -msse4a | |
54 | @item -mno-sse4a | |
55 | @item -msse5 | |
56 | @@ -10750,8 +10755,8 @@ | |
57 | @opindex m3dnow | |
58 | @opindex mno-3dnow | |
59 | These switches enable or disable the use of instructions in the MMX, | |
60 | -SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4A, SSE5, ABM or 3DNow!@: extended | |
61 | -instruction sets. | |
62 | +SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or | |
63 | +3DNow!@: extended instruction sets. | |
64 | These extensions are also available as built-in functions: see | |
65 | @ref{X86 Built-in Functions}, for details of the functions enabled and | |
66 | disabled by these switches. | |
67 | Index: gcc/testsuite/gcc.target/i386/sse-14.c | |
68 | =================================================================== | |
69 | --- gcc/testsuite/gcc.target/i386/sse-14.c (.../gcc-4_3-branch) (revision 145062) | |
70 | +++ gcc/testsuite/gcc.target/i386/sse-14.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
71 | @@ -1,14 +1,15 @@ | |
72 | /* { dg-do compile } */ | |
73 | -/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5" } */ | |
74 | +/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */ | |
75 | ||
76 | /* Test that the intrinsics compile without optimization. All of them are | |
77 | - defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h | |
78 | + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h | |
79 | that reference the proper builtin functions. Defining away "extern" and | |
80 | "__inline" results in all of them being compiled as proper functions. */ | |
81 | ||
82 | #define extern | |
83 | #define __inline | |
84 | ||
85 | +#include <wmmintrin.h> | |
86 | #include <bmmintrin.h> | |
87 | #include <smmintrin.h> | |
88 | #include <mm3dnow.h> | |
89 | @@ -44,6 +45,10 @@ | |
90 | test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1) | |
91 | test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1) | |
92 | ||
93 | +/* wmmintrin.h */ | |
94 | +test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1) | |
95 | +test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1) | |
96 | + | |
97 | /* smmintrin.h */ | |
98 | test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1) | |
99 | test_2 (_mm_blend_ps, __m128, __m128, __m128, 1) | |
100 | Index: gcc/testsuite/gcc.target/i386/i386.exp | |
101 | =================================================================== | |
102 | --- gcc/testsuite/gcc.target/i386/i386.exp (.../gcc-4_3-branch) (revision 145062) | |
103 | +++ gcc/testsuite/gcc.target/i386/i386.exp (.../ix86/gcc-4_3-branch) (revision 145364) | |
104 | @@ -51,6 +51,34 @@ | |
105 | } "-O2 -msse4.1" ] | |
106 | } | |
107 | ||
108 | +# Return 1 if aes instructions can be compiled. | |
109 | +proc check_effective_target_aes { } { | |
110 | + return [check_no_compiler_messages aes object { | |
111 | + typedef long long __m128i __attribute__ ((__vector_size__ (16))); | |
112 | + typedef long long __v2di __attribute__ ((__vector_size__ (16))); | |
113 | + | |
114 | + __m128i _mm_aesimc_si128 (__m128i __X) | |
115 | + { | |
116 | + return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X); | |
117 | + } | |
118 | + } "-O2 -maes" ] | |
119 | +} | |
120 | + | |
121 | +# Return 1 if pclmul instructions can be compiled. | |
122 | +proc check_effective_target_pclmul { } { | |
123 | + return [check_no_compiler_messages pclmul object { | |
124 | + typedef long long __m128i __attribute__ ((__vector_size__ (16))); | |
125 | + typedef long long __v2di __attribute__ ((__vector_size__ (16))); | |
126 | + | |
127 | + __m128i pclmulqdq_test (__m128i __X, __m128i __Y) | |
128 | + { | |
129 | + return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X, | |
130 | + (__v2di)__Y, | |
131 | + 1); | |
132 | + } | |
133 | + } "-O2 -mpclmul" ] | |
134 | +} | |
135 | + | |
136 | # Return 1 if sse4a instructions can be compiled. | |
137 | proc check_effective_target_sse4a { } { | |
138 | return [check_no_compiler_messages sse4a object { | |
139 | Index: gcc/testsuite/gcc.target/i386/aesdeclast.c | |
140 | =================================================================== | |
141 | --- gcc/testsuite/gcc.target/i386/aesdeclast.c (.../gcc-4_3-branch) (revision 0) | |
142 | +++ gcc/testsuite/gcc.target/i386/aesdeclast.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
143 | @@ -0,0 +1,69 @@ | |
144 | +/* { dg-do run } */ | |
145 | +/* { dg-require-effective-target aes } */ | |
146 | +/* { dg-options "-O2 -maes" } */ | |
147 | + | |
148 | +#include <wmmintrin.h> | |
149 | +#include <string.h> | |
150 | + | |
151 | +#include "aes-check.h" | |
152 | + | |
153 | +extern void abort (void); | |
154 | + | |
155 | +#define NUM 1024 | |
156 | + | |
157 | +static __m128i src1[NUM]; | |
158 | +static __m128i src2[NUM]; | |
159 | +static __m128i edst[NUM]; | |
160 | + | |
161 | +static __m128i resdst[NUM]; | |
162 | + | |
163 | +/* Initialize input/output vectors. (Currently, there is only one set of | |
164 | + input/output vectors). */ | |
165 | + | |
166 | +static void | |
167 | +init_data (__m128i *s1, __m128i *s2, __m128i *d) | |
168 | +{ | |
169 | + int i; | |
170 | + | |
171 | + for (i = 0; i < NUM; i++) | |
172 | + { | |
173 | + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, | |
174 | + 0x73745665, 0x7b5b5465); | |
175 | + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, | |
176 | + 0x68617929, 0x48692853); | |
177 | + d[i] = _mm_setr_epi32 (0x72a593d0, 0xd410637b, | |
178 | + 0x6b317f95, 0xc5a391ef); | |
179 | + } | |
180 | +} | |
181 | + | |
182 | +static void | |
183 | +aes_test (void) | |
184 | +{ | |
185 | + int i; | |
186 | + | |
187 | + init_data (src1, src2, edst); | |
188 | + | |
189 | + for (i = 0; i < NUM; i += 16) | |
190 | + { | |
191 | + resdst[i] = _mm_aesdeclast_si128 (src1[i], src2[i]); | |
192 | + resdst[i + 1] = _mm_aesdeclast_si128 (src1[i + 1], src2[i + 1]); | |
193 | + resdst[i + 2] = _mm_aesdeclast_si128 (src1[i + 2], src2[i + 2]); | |
194 | + resdst[i + 3] = _mm_aesdeclast_si128 (src1[i + 3], src2[i + 3]); | |
195 | + resdst[i + 4] = _mm_aesdeclast_si128 (src1[i + 4], src2[i + 4]); | |
196 | + resdst[i + 5] = _mm_aesdeclast_si128 (src1[i + 5], src2[i + 5]); | |
197 | + resdst[i + 6] = _mm_aesdeclast_si128 (src1[i + 6], src2[i + 6]); | |
198 | + resdst[i + 7] = _mm_aesdeclast_si128 (src1[i + 7], src2[i + 7]); | |
199 | + resdst[i + 8] = _mm_aesdeclast_si128 (src1[i + 8], src2[i + 8]); | |
200 | + resdst[i + 9] = _mm_aesdeclast_si128 (src1[i + 9], src2[i + 9]); | |
201 | + resdst[i + 10] = _mm_aesdeclast_si128 (src1[i + 10], src2[i + 10]); | |
202 | + resdst[i + 11] = _mm_aesdeclast_si128 (src1[i + 11], src2[i + 11]); | |
203 | + resdst[i + 12] = _mm_aesdeclast_si128 (src1[i + 12], src2[i + 12]); | |
204 | + resdst[i + 13] = _mm_aesdeclast_si128 (src1[i + 13], src2[i + 13]); | |
205 | + resdst[i + 14] = _mm_aesdeclast_si128 (src1[i + 14], src2[i + 14]); | |
206 | + resdst[i + 15] = _mm_aesdeclast_si128 (src1[i + 15], src2[i + 15]); | |
207 | + } | |
208 | + | |
209 | + for (i = 0; i < NUM; i++) | |
210 | + if (memcmp (edst + i, resdst + i, sizeof (__m128i))) | |
211 | + abort (); | |
212 | +} | |
213 | Index: gcc/testsuite/gcc.target/i386/pclmulqdq.c | |
214 | =================================================================== | |
215 | --- gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../gcc-4_3-branch) (revision 0) | |
216 | +++ gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
217 | @@ -0,0 +1,87 @@ | |
218 | +/* { dg-do run } */ | |
219 | +/* { dg-require-effective-target pclmul } */ | |
220 | +/* { dg-options "-O2 -mpclmul" } */ | |
221 | + | |
222 | +#include <wmmintrin.h> | |
223 | +#include <string.h> | |
224 | + | |
225 | +#include "pclmul-check.h" | |
226 | + | |
227 | +extern void abort (void); | |
228 | + | |
229 | +#define NUM 1024 | |
230 | + | |
231 | +static __m128i s1[NUM]; | |
232 | +static __m128i s2[NUM]; | |
233 | +/* We need this array to generate mem form of inst */ | |
234 | +static __m128i s2m[NUM]; | |
235 | + | |
236 | +static __m128i e_00[NUM]; | |
237 | +static __m128i e_01[NUM]; | |
238 | +static __m128i e_10[NUM]; | |
239 | +static __m128i e_11[NUM]; | |
240 | + | |
241 | +static __m128i d_00[NUM]; | |
242 | +static __m128i d_01[NUM]; | |
243 | +static __m128i d_10[NUM]; | |
244 | +static __m128i d_11[NUM]; | |
245 | + | |
246 | +/* Initialize input/output vectors. (Currently, there is only one set | |
247 | + of input/output vectors). */ | |
248 | +static void | |
249 | +init_data (__m128i *ls1, __m128i *ls2, __m128i *le_00, __m128i *le_01, | |
250 | + __m128i *le_10, __m128i *le_11) | |
251 | +{ | |
252 | + int i; | |
253 | + | |
254 | + for (i = 0; i < NUM; i++) | |
255 | + { | |
256 | + ls1[i] = _mm_set_epi32 (0x7B5B5465, 0x73745665, | |
257 | + 0x63746F72, 0x5D53475D); | |
258 | + ls2[i] = _mm_set_epi32 (0x48692853, 0x68617929, | |
259 | + 0x5B477565, 0x726F6E5D); | |
260 | + s2m[i] = _mm_set_epi32 (0x48692853, 0x68617929, | |
261 | + 0x5B477565, 0x726F6E5D); | |
262 | + le_00[i] = _mm_set_epi32 (0x1D4D84C8, 0x5C3440C0, | |
263 | + 0x929633D5, 0xD36F0451); | |
264 | + le_01[i] = _mm_set_epi32 (0x1A2BF6DB, 0x3A30862F, | |
265 | + 0xBABF262D, 0xF4B7D5C9); | |
266 | + le_10[i] = _mm_set_epi32 (0x1BD17C8D, 0x556AB5A1, | |
267 | + 0x7FA540AC, 0x2A281315); | |
268 | + le_11[i] = _mm_set_epi32 (0x1D1E1F2C, 0x592E7C45, | |
269 | + 0xD66EE03E, 0x410FD4ED); | |
270 | + } | |
271 | +} | |
272 | + | |
273 | +static void | |
274 | +pclmul_test (void) | |
275 | +{ | |
276 | + int i; | |
277 | + | |
278 | + init_data (s1, s2, e_00, e_01, e_10, e_11); | |
279 | + | |
280 | + for (i = 0; i < NUM; i += 2) | |
281 | + { | |
282 | + d_00[i] = _mm_clmulepi64_si128 (s1[i], s2m[i], 0x00); | |
283 | + d_01[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x01); | |
284 | + d_10[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x10); | |
285 | + d_11[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x11); | |
286 | + | |
287 | + d_11[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x11); | |
288 | + d_00[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x00); | |
289 | + d_10[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2m[i + 1], 0x10); | |
290 | + d_01[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x01); | |
291 | + } | |
292 | + | |
293 | + for (i = 0; i < NUM; i++) | |
294 | + { | |
295 | + if (memcmp (d_00 + i, e_00 + i, sizeof (__m128i))) | |
296 | + abort (); | |
297 | + if (memcmp (d_01 + i, e_01 + i, sizeof (__m128i))) | |
298 | + abort (); | |
299 | + if (memcmp (d_10 + i, e_10 + i, sizeof (__m128i))) | |
300 | + abort (); | |
301 | + if (memcmp(d_11 + i, e_11 + i, sizeof (__m128i))) | |
302 | + abort (); | |
303 | + } | |
304 | +} | |
305 | Index: gcc/testsuite/gcc.target/i386/aes-check.h | |
306 | =================================================================== | |
307 | --- gcc/testsuite/gcc.target/i386/aes-check.h (.../gcc-4_3-branch) (revision 0) | |
308 | +++ gcc/testsuite/gcc.target/i386/aes-check.h (.../ix86/gcc-4_3-branch) (revision 145364) | |
309 | @@ -0,0 +1,30 @@ | |
310 | +#include <stdio.h> | |
311 | +#include <stdlib.h> | |
312 | + | |
313 | +#include "cpuid.h" | |
314 | + | |
315 | +static void aes_test (void); | |
316 | + | |
317 | +int | |
318 | +main () | |
319 | +{ | |
320 | + unsigned int eax, ebx, ecx, edx; | |
321 | + | |
322 | + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) | |
323 | + return 0; | |
324 | + | |
325 | + /* Run AES test only if host has AES support. */ | |
326 | + if (ecx & bit_AES) | |
327 | + { | |
328 | + aes_test (); | |
329 | +#ifdef DEBUG | |
330 | + printf ("PASSED\n"); | |
331 | +#endif | |
332 | + } | |
333 | +#ifdef DEBUG | |
334 | + else | |
335 | + printf ("SKIPPED\n"); | |
336 | +#endif | |
337 | + | |
338 | + return 0; | |
339 | +} | |
340 | Index: gcc/testsuite/gcc.target/i386/pclmul-check.h | |
341 | =================================================================== | |
342 | --- gcc/testsuite/gcc.target/i386/pclmul-check.h (.../gcc-4_3-branch) (revision 0) | |
343 | +++ gcc/testsuite/gcc.target/i386/pclmul-check.h (.../ix86/gcc-4_3-branch) (revision 145364) | |
344 | @@ -0,0 +1,30 @@ | |
345 | +#include <stdio.h> | |
346 | +#include <stdlib.h> | |
347 | + | |
348 | +#include "cpuid.h" | |
349 | + | |
350 | +static void pclmul_test (void); | |
351 | + | |
352 | +int | |
353 | +main () | |
354 | +{ | |
355 | + unsigned int eax, ebx, ecx, edx; | |
356 | + | |
357 | + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) | |
358 | + return 0; | |
359 | + | |
360 | + /* Run PCLMULQDQ test only if host has PCLMULQDQ support. */ | |
361 | + if (ecx & bit_PCLMUL) | |
362 | + { | |
363 | + pclmul_test (); | |
364 | +#ifdef DEBUG | |
365 | + printf ("PASSED\n"); | |
366 | +#endif | |
367 | + } | |
368 | +#ifdef DEBUG | |
369 | + else | |
370 | + printf ("SKIPPED\n"); | |
371 | +#endif | |
372 | + | |
373 | + return 0; | |
374 | +} | |
375 | Index: gcc/testsuite/gcc.target/i386/aeskeygenassist.c | |
376 | =================================================================== | |
377 | --- gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../gcc-4_3-branch) (revision 0) | |
378 | +++ gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
379 | @@ -0,0 +1,66 @@ | |
380 | +/* { dg-do run } */ | |
381 | +/* { dg-require-effective-target aes } */ | |
382 | +/* { dg-options "-O2 -maes" } */ | |
383 | + | |
384 | +#include <wmmintrin.h> | |
385 | +#include <string.h> | |
386 | + | |
387 | +#include "aes-check.h" | |
388 | + | |
389 | +extern void abort (void); | |
390 | + | |
391 | +#define NUM 1024 | |
392 | +#define IMM8 1 | |
393 | + | |
394 | +static __m128i src1[NUM]; | |
395 | +static __m128i edst[NUM]; | |
396 | + | |
397 | +static __m128i resdst[NUM]; | |
398 | + | |
399 | +/* Initialize input/output vectors. (Currently, there is only one set | |
400 | + of input/output vectors). */ | |
401 | + | |
402 | +static void | |
403 | +init_data (__m128i *s1, __m128i *d) | |
404 | +{ | |
405 | + int i; | |
406 | + for (i = 0; i < NUM; i++) | |
407 | + { | |
408 | + s1[i] = _mm_setr_epi32 (0x16157e2b, 0xa6d2ae28, | |
409 | + 0x8815f7ab, 0x3c4fcf09); | |
410 | + d[i] = _mm_setr_epi32 (0x24b5e434, 0x3424b5e5, | |
411 | + 0xeb848a01, 0x01eb848b); | |
412 | + } | |
413 | +} | |
414 | + | |
415 | +static void | |
416 | +aes_test (void) | |
417 | +{ | |
418 | + int i; | |
419 | + | |
420 | + init_data (src1, edst); | |
421 | + | |
422 | + for (i = 0; i < NUM; i += 16) | |
423 | + { | |
424 | + resdst[i] = _mm_aeskeygenassist_si128 (src1[i], IMM8); | |
425 | + resdst[i + 1] = _mm_aeskeygenassist_si128 (src1[i + 1], IMM8); | |
426 | + resdst[i + 2] = _mm_aeskeygenassist_si128 (src1[i + 2], IMM8); | |
427 | + resdst[i + 3] = _mm_aeskeygenassist_si128 (src1[i + 3], IMM8); | |
428 | + resdst[i + 4] = _mm_aeskeygenassist_si128 (src1[i + 4], IMM8); | |
429 | + resdst[i + 5] = _mm_aeskeygenassist_si128 (src1[i + 5], IMM8); | |
430 | + resdst[i + 6] = _mm_aeskeygenassist_si128 (src1[i + 6], IMM8); | |
431 | + resdst[i + 7] = _mm_aeskeygenassist_si128 (src1[i + 7], IMM8); | |
432 | + resdst[i + 8] = _mm_aeskeygenassist_si128 (src1[i + 8], IMM8); | |
433 | + resdst[i + 9] = _mm_aeskeygenassist_si128 (src1[i + 9], IMM8); | |
434 | + resdst[i + 10] = _mm_aeskeygenassist_si128 (src1[i + 10], IMM8); | |
435 | + resdst[i + 11] = _mm_aeskeygenassist_si128 (src1[i + 11], IMM8); | |
436 | + resdst[i + 12] = _mm_aeskeygenassist_si128 (src1[i + 12], IMM8); | |
437 | + resdst[i + 13] = _mm_aeskeygenassist_si128 (src1[i + 13], IMM8); | |
438 | + resdst[i + 14] = _mm_aeskeygenassist_si128 (src1[i + 14], IMM8); | |
439 | + resdst[i + 15] = _mm_aeskeygenassist_si128 (src1[i + 15], IMM8); | |
440 | + } | |
441 | + | |
442 | + for (i = 0; i < NUM; i++) | |
443 | + if (memcmp(edst + i, resdst + i, sizeof (__m128i))) | |
444 | + abort (); | |
445 | +} | |
446 | Index: gcc/testsuite/gcc.target/i386/aesenclast.c | |
447 | =================================================================== | |
448 | --- gcc/testsuite/gcc.target/i386/aesenclast.c (.../gcc-4_3-branch) (revision 0) | |
449 | +++ gcc/testsuite/gcc.target/i386/aesenclast.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
450 | @@ -0,0 +1,68 @@ | |
451 | +/* { dg-do run } */ | |
452 | +/* { dg-require-effective-target aes } */ | |
453 | +/* { dg-options "-O2 -maes" } */ | |
454 | + | |
455 | +#include <wmmintrin.h> | |
456 | +#include <string.h> | |
457 | + | |
458 | +#include "aes-check.h" | |
459 | + | |
460 | +extern void abort (void); | |
461 | + | |
462 | +#define NUM 1024 | |
463 | + | |
464 | +static __m128i src1[NUM]; | |
465 | +static __m128i src2[NUM]; | |
466 | +static __m128i edst[NUM]; | |
467 | + | |
468 | +static __m128i resdst[NUM]; | |
469 | + | |
470 | +/* Initialize input/output vectors. (Currently, there is only one | |
471 | + set of input/output vectors). */ | |
472 | + | |
473 | +static void | |
474 | +init_data (__m128i *s1, __m128i *s2, __m128i *d) | |
475 | +{ | |
476 | + int i; | |
477 | + for (i = 0; i < NUM; i++) | |
478 | + { | |
479 | + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, | |
480 | + 0x73745665, 0x7b5b5465); | |
481 | + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, | |
482 | + 0x68617929, 0x48692853); | |
483 | + d[i] = _mm_setr_epi32 (0x53fdc611, 0x177ec425, | |
484 | + 0x938c5964, 0xc7fb881e); | |
485 | + } | |
486 | +} | |
487 | + | |
488 | +static void | |
489 | +aes_test (void) | |
490 | +{ | |
491 | + int i; | |
492 | + | |
493 | + init_data (src1, src2, edst); | |
494 | + | |
495 | + for (i = 0; i < NUM; i += 16) | |
496 | + { | |
497 | + resdst[i] = _mm_aesenclast_si128 (src1[i], src2[i]); | |
498 | + resdst[i + 1] = _mm_aesenclast_si128 (src1[i + 1], src2[i + 1]); | |
499 | + resdst[i + 2] = _mm_aesenclast_si128 (src1[i + 2], src2[i + 2]); | |
500 | + resdst[i + 3] = _mm_aesenclast_si128 (src1[i + 3], src2[i + 3]); | |
501 | + resdst[i + 4] = _mm_aesenclast_si128 (src1[i + 4], src2[i + 4]); | |
502 | + resdst[i + 5] = _mm_aesenclast_si128 (src1[i + 5], src2[i + 5]); | |
503 | + resdst[i + 6] = _mm_aesenclast_si128 (src1[i + 6], src2[i + 6]); | |
504 | + resdst[i + 7] = _mm_aesenclast_si128 (src1[i + 7], src2[i + 7]); | |
505 | + resdst[i + 8] = _mm_aesenclast_si128 (src1[i + 8], src2[i + 8]); | |
506 | + resdst[i + 9] = _mm_aesenclast_si128 (src1[i + 9], src2[i + 9]); | |
507 | + resdst[i + 10] = _mm_aesenclast_si128 (src1[i + 10], src2[i + 10]); | |
508 | + resdst[i + 11] = _mm_aesenclast_si128 (src1[i + 11], src2[i + 11]); | |
509 | + resdst[i + 12] = _mm_aesenclast_si128 (src1[i + 12], src2[i + 12]); | |
510 | + resdst[i + 13] = _mm_aesenclast_si128 (src1[i + 13], src2[i + 13]); | |
511 | + resdst[i + 14] = _mm_aesenclast_si128 (src1[i + 14], src2[i + 14]); | |
512 | + resdst[i + 15] = _mm_aesenclast_si128 (src1[i + 15], src2[i + 15]); | |
513 | + } | |
514 | + | |
515 | + for (i = 0; i < NUM; i++) | |
516 | + if (memcmp(edst + i, resdst + i, sizeof (__m128i))) | |
517 | + abort (); | |
518 | +} | |
519 | Index: gcc/testsuite/gcc.target/i386/aesimc.c | |
520 | =================================================================== | |
521 | --- gcc/testsuite/gcc.target/i386/aesimc.c (.../gcc-4_3-branch) (revision 0) | |
522 | +++ gcc/testsuite/gcc.target/i386/aesimc.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
523 | @@ -0,0 +1,66 @@ | |
524 | +/* { dg-do run } */ | |
525 | +/* { dg-require-effective-target aes } */ | |
526 | +/* { dg-options "-O2 -maes" } */ | |
527 | + | |
528 | +#include <wmmintrin.h> | |
529 | +#include <string.h> | |
530 | + | |
531 | +#include "aes-check.h" | |
532 | + | |
533 | +extern void abort (void); | |
534 | + | |
535 | +#define NUM 1024 | |
536 | + | |
537 | +static __m128i src1[NUM]; | |
538 | +static __m128i edst[NUM]; | |
539 | + | |
540 | +static __m128i resdst[NUM]; | |
541 | + | |
542 | +/* Initialize input/output vectors. (Currently, there is only one set | |
543 | + of input/output vectors). */ | |
544 | + | |
545 | +static void | |
546 | +init_data (__m128i *s1, __m128i *d) | |
547 | +{ | |
548 | + int i; | |
549 | + | |
550 | + for (i = 0; i < NUM; i++) | |
551 | + { | |
552 | + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, | |
553 | + 0x73745665, 0x7b5b5465); | |
554 | + d[i] = _mm_setr_epi32 (0x81c3b3e5, 0x2b18330a, | |
555 | + 0x44b109c8, 0x627a6f66); | |
556 | + } | |
557 | +} | |
558 | + | |
559 | +static void | |
560 | +aes_test (void) | |
561 | +{ | |
562 | + int i; | |
563 | + | |
564 | + init_data (src1, edst); | |
565 | + | |
566 | + for (i = 0; i < NUM; i += 16) | |
567 | + { | |
568 | + resdst[i] = _mm_aesimc_si128 (src1[i]); | |
569 | + resdst[i + 1] = _mm_aesimc_si128 (src1[i + 1]); | |
570 | + resdst[i + 2] = _mm_aesimc_si128 (src1[i + 2]); | |
571 | + resdst[i + 3] = _mm_aesimc_si128 (src1[i + 3]); | |
572 | + resdst[i + 4] = _mm_aesimc_si128 (src1[i + 4]); | |
573 | + resdst[i + 5] = _mm_aesimc_si128 (src1[i + 5]); | |
574 | + resdst[i + 6] = _mm_aesimc_si128 (src1[i + 6]); | |
575 | + resdst[i + 7] = _mm_aesimc_si128 (src1[i + 7]); | |
576 | + resdst[i + 8] = _mm_aesimc_si128 (src1[i + 8]); | |
577 | + resdst[i + 9] = _mm_aesimc_si128 (src1[i + 9]); | |
578 | + resdst[i + 10] = _mm_aesimc_si128 (src1[i + 10]); | |
579 | + resdst[i + 11] = _mm_aesimc_si128 (src1[i + 11]); | |
580 | + resdst[i + 12] = _mm_aesimc_si128 (src1[i + 12]); | |
581 | + resdst[i + 13] = _mm_aesimc_si128 (src1[i + 13]); | |
582 | + resdst[i + 14] = _mm_aesimc_si128 (src1[i + 14]); | |
583 | + resdst[i + 15] = _mm_aesimc_si128 (src1[i + 15]); | |
584 | + } | |
585 | + | |
586 | + for (i = 0; i < NUM; i++) | |
587 | + if (memcmp(edst + i, resdst + i, sizeof (__m128i))) | |
588 | + abort (); | |
589 | +} | |
590 | Index: gcc/testsuite/gcc.target/i386/aesenc.c | |
591 | =================================================================== | |
592 | --- gcc/testsuite/gcc.target/i386/aesenc.c (.../gcc-4_3-branch) (revision 0) | |
593 | +++ gcc/testsuite/gcc.target/i386/aesenc.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
594 | @@ -0,0 +1,68 @@ | |
595 | +/* { dg-do run } */ | |
596 | +/* { dg-require-effective-target aes } */ | |
597 | +/* { dg-options "-O2 -maes" } */ | |
598 | + | |
599 | +#include <wmmintrin.h> | |
600 | +#include <string.h> | |
601 | + | |
602 | +#include "aes-check.h" | |
603 | + | |
604 | +extern void abort (void); | |
605 | + | |
606 | +#define NUM 1024 | |
607 | + | |
608 | +static __m128i src1[NUM]; | |
609 | +static __m128i src2[NUM]; | |
610 | +static __m128i edst[NUM]; | |
611 | + | |
612 | +static __m128i resdst[NUM]; | |
613 | + | |
614 | +/* Initialize input/output vectors. (Currently, there is only one set | |
615 | + of input/output vectors). */ | |
616 | + | |
617 | +static void | |
618 | +init_data (__m128i *s1, __m128i *s2, __m128i *d) | |
619 | +{ | |
620 | + int i; | |
621 | + for (i = 0; i < NUM; i++) | |
622 | + { | |
623 | + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, | |
624 | + 0x73745665, 0x7b5b5465); | |
625 | + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, | |
626 | + 0x68617929, 0x48692853); | |
627 | + d[i] = _mm_setr_epi32 (0xded7e595, 0x8b104b58, | |
628 | + 0x9fdba3c5, 0xa8311c2f); | |
629 | + } | |
630 | +} | |
631 | + | |
632 | +static void | |
633 | +aes_test (void) | |
634 | +{ | |
635 | + int i; | |
636 | + | |
637 | + init_data (src1, src2, edst); | |
638 | + | |
639 | + for (i = 0; i < NUM; i += 16) | |
640 | + { | |
641 | + resdst[i] = _mm_aesenc_si128 (src1[i], src2[i]); | |
642 | + resdst[i + 1] = _mm_aesenc_si128 (src1[i + 1], src2[i + 1]); | |
643 | + resdst[i + 2] = _mm_aesenc_si128 (src1[i + 2], src2[i + 2]); | |
644 | + resdst[i + 3] = _mm_aesenc_si128 (src1[i + 3], src2[i + 3]); | |
645 | + resdst[i + 4] = _mm_aesenc_si128 (src1[i + 4], src2[i + 4]); | |
646 | + resdst[i + 5] = _mm_aesenc_si128 (src1[i + 5], src2[i + 5]); | |
647 | + resdst[i + 6] = _mm_aesenc_si128 (src1[i + 6], src2[i + 6]); | |
648 | + resdst[i + 7] = _mm_aesenc_si128 (src1[i + 7], src2[i + 7]); | |
649 | + resdst[i + 8] = _mm_aesenc_si128 (src1[i + 8], src2[i + 8]); | |
650 | + resdst[i + 9] = _mm_aesenc_si128 (src1[i + 9], src2[i + 9]); | |
651 | + resdst[i + 10] = _mm_aesenc_si128 (src1[i + 10], src2[i + 10]); | |
652 | + resdst[i + 11] = _mm_aesenc_si128 (src1[i + 11], src2[i + 11]); | |
653 | + resdst[i + 12] = _mm_aesenc_si128 (src1[i + 12], src2[i + 12]); | |
654 | + resdst[i + 13] = _mm_aesenc_si128 (src1[i + 13], src2[i + 13]); | |
655 | + resdst[i + 14] = _mm_aesenc_si128 (src1[i + 14], src2[i + 14]); | |
656 | + resdst[i + 15] = _mm_aesenc_si128 (src1[i + 15], src2[i + 15]); | |
657 | + } | |
658 | + | |
659 | + for (i = 0; i < NUM; i++) | |
660 | + if (memcmp (edst + i, resdst + i, sizeof (__m128i))) | |
661 | + abort (); | |
662 | +} | |
663 | Index: gcc/testsuite/gcc.target/i386/sse-13.c | |
664 | =================================================================== | |
665 | --- gcc/testsuite/gcc.target/i386/sse-13.c (.../gcc-4_3-branch) (revision 145062) | |
666 | +++ gcc/testsuite/gcc.target/i386/sse-13.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
667 | @@ -1,8 +1,8 @@ | |
668 | /* { dg-do compile } */ | |
669 | -/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5" } */ | |
670 | +/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */ | |
671 | ||
672 | /* Test that the intrinsics compile with optimization. All of them are | |
673 | - defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h | |
674 | + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h | |
675 | that reference the proper builtin functions. Defining away "extern" and | |
676 | "__inline" results in all of them being compiled as proper functions. */ | |
677 | ||
678 | @@ -15,6 +15,10 @@ | |
679 | #define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1) | |
680 | #define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1) | |
681 | ||
682 | +/* wmmintrin.h */ | |
683 | +#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1) | |
684 | +#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1) | |
685 | + | |
686 | /* smmintrin.h */ | |
687 | #define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1) | |
688 | #define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1) | |
689 | @@ -92,6 +96,7 @@ | |
690 | #define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1) | |
691 | #define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1) | |
692 | ||
693 | +#include <wmmintrin.h> | |
694 | #include <bmmintrin.h> | |
695 | #include <smmintrin.h> | |
696 | #include <mm3dnow.h> | |
697 | Index: gcc/testsuite/gcc.target/i386/aesdec.c | |
698 | =================================================================== | |
699 | --- gcc/testsuite/gcc.target/i386/aesdec.c (.../gcc-4_3-branch) (revision 0) | |
700 | +++ gcc/testsuite/gcc.target/i386/aesdec.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
701 | @@ -0,0 +1,67 @@ | |
702 | +/* { dg-do run } */ | |
703 | +/* { dg-require-effective-target aes } */ | |
704 | +/* { dg-options "-O2 -maes" } */ | |
705 | + | |
706 | +#include <wmmintrin.h> | |
707 | +#include <string.h> | |
708 | + | |
709 | +#include "aes-check.h" | |
710 | + | |
711 | +extern void abort (void); | |
712 | + | |
713 | +#define NUM 1024 | |
714 | + | |
715 | +static __m128i src1[NUM]; | |
716 | +static __m128i src2[NUM]; | |
717 | +static __m128i edst[NUM]; | |
718 | + | |
719 | +static __m128i resdst[NUM]; | |
720 | + | |
721 | +/* Initialize input/output vectors. (Currently, there is only one set | |
722 | + of input/output vectors). */ | |
723 | +static void | |
724 | +init_data (__m128i *s1, __m128i *s2, __m128i *d) | |
725 | +{ | |
726 | + int i; | |
727 | + for (i = 0; i < NUM; i++) | |
728 | + { | |
729 | + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, | |
730 | + 0x73745665, 0x7b5b5465); | |
731 | + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, | |
732 | + 0x68617929, 0x48692853); | |
733 | + d[i] = _mm_setr_epi32 (0xb730392a, 0xb58eb95e, | |
734 | + 0xfaea2787, 0x138ac342); | |
735 | + } | |
736 | +} | |
737 | + | |
738 | +static void | |
739 | +aes_test (void) | |
740 | +{ | |
741 | + int i; | |
742 | + | |
743 | + init_data (src1, src2, edst); | |
744 | + | |
745 | + for (i = 0; i < NUM; i += 16) | |
746 | + { | |
747 | + resdst[i] = _mm_aesdec_si128 (src1[i], src2[i]); | |
748 | + resdst[i + 1] = _mm_aesdec_si128 (src1[i + 1], src2[i + 1]); | |
749 | + resdst[i + 2] = _mm_aesdec_si128 (src1[i + 2], src2[i + 2]); | |
750 | + resdst[i + 3] = _mm_aesdec_si128 (src1[i + 3], src2[i + 3]); | |
751 | + resdst[i + 4] = _mm_aesdec_si128 (src1[i + 4], src2[i + 4]); | |
752 | + resdst[i + 5] = _mm_aesdec_si128 (src1[i + 5], src2[i + 5]); | |
753 | + resdst[i + 6] = _mm_aesdec_si128 (src1[i + 6], src2[i + 6]); | |
754 | + resdst[i + 7] = _mm_aesdec_si128 (src1[i + 7], src2[i + 7]); | |
755 | + resdst[i + 8] = _mm_aesdec_si128 (src1[i + 8], src2[i + 8]); | |
756 | + resdst[i + 9] = _mm_aesdec_si128 (src1[i + 9], src2[i + 9]); | |
757 | + resdst[i + 10] = _mm_aesdec_si128 (src1[i + 10], src2[i + 10]); | |
758 | + resdst[i + 11] = _mm_aesdec_si128 (src1[i + 11], src2[i + 11]); | |
759 | + resdst[i + 12] = _mm_aesdec_si128 (src1[i + 12], src2[i + 12]); | |
760 | + resdst[i + 13] = _mm_aesdec_si128 (src1[i + 13], src2[i + 13]); | |
761 | + resdst[i + 14] = _mm_aesdec_si128 (src1[i + 14], src2[i + 14]); | |
762 | + resdst[i + 15] = _mm_aesdec_si128 (src1[i + 15], src2[i + 15]); | |
763 | + } | |
764 | + | |
765 | + for (i = 0; i < NUM; i++) | |
766 | + if (memcmp (edst + i, resdst + i, sizeof (__m128i))) | |
767 | + abort (); | |
768 | +} | |
769 | Index: gcc/testsuite/ChangeLog.ix86 | |
770 | =================================================================== | |
771 | --- gcc/testsuite/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0) | |
772 | +++ gcc/testsuite/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364) | |
773 | @@ -0,0 +1,22 @@ | |
774 | +2008-04-08 H.J. Lu <hongjiu.lu@intel.com> | |
775 | + | |
776 | + Backport from mainline: | |
777 | + 2008-04-04 H.J. Lu <hongjiu.lu@intel.com> | |
778 | + | |
779 | + * g++.dg/other/i386-2.C: Include <wmmintrin.h>. | |
780 | + * g++.dg/other/i386-3.C: Likewise. | |
781 | + * gcc.target/i386/sse-13.c: Likewise. | |
782 | + * gcc.target/i386/sse-14.c: Likewise. | |
783 | + | |
784 | + * gcc.target/i386/aes-check.h: New. | |
785 | + * gcc.target/i386/aesdec.c: Likewise. | |
786 | + * gcc.target/i386/aesdeclast.c: Likewise. | |
787 | + * gcc.target/i386/aesenc.c: Likewise. | |
788 | + * gcc.target/i386/aesenclast.c: Likewise. | |
789 | + * gcc.target/i386/aesimc.c: Likewise. | |
790 | + * gcc.target/i386/aeskeygenassist.c: Likewise. | |
791 | + * gcc.target/i386/pclmulqdq.c: Likewise. | |
792 | + * gcc.target/i386/pclmul-check.h: Likewise. | |
793 | + | |
794 | + * gcc.target/i386/i386.exp (check_effective_target_aes): New. | |
795 | + (check_effective_target_pclmul): Likewise. | |
796 | Index: gcc/testsuite/g++.dg/other/i386-2.C | |
797 | =================================================================== | |
798 | --- gcc/testsuite/g++.dg/other/i386-2.C (.../gcc-4_3-branch) (revision 145062) | |
799 | +++ gcc/testsuite/g++.dg/other/i386-2.C (.../ix86/gcc-4_3-branch) (revision 145364) | |
800 | @@ -1,8 +1,9 @@ | |
801 | -/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are | |
802 | +/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are | |
803 | usable with -O -pedantic-errors. */ | |
804 | /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ | |
805 | -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5" } */ | |
806 | +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */ | |
807 | ||
808 | +#include <wmmintrin.h> | |
809 | #include <bmmintrin.h> | |
810 | #include <smmintrin.h> | |
811 | #include <mm3dnow.h> | |
812 | Index: gcc/testsuite/g++.dg/other/i386-3.C | |
813 | =================================================================== | |
814 | --- gcc/testsuite/g++.dg/other/i386-3.C (.../gcc-4_3-branch) (revision 145062) | |
815 | +++ gcc/testsuite/g++.dg/other/i386-3.C (.../ix86/gcc-4_3-branch) (revision 145364) | |
816 | @@ -1,8 +1,9 @@ | |
817 | -/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are | |
818 | +/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are | |
819 | usable with -O -fkeep-inline-functions. */ | |
820 | /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ | |
821 | -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -msse4 -msse5" } */ | |
822 | +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -maes -mpclmul -msse4 -msse5" } */ | |
823 | ||
824 | +#include <wmmintrin.h> | |
825 | #include <bmmintrin.h> | |
826 | #include <smmintrin.h> | |
827 | #include <mm3dnow.h> | |
828 | Index: gcc/ChangeLog.ix86 | |
829 | =================================================================== | |
830 | --- gcc/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0) | |
831 | +++ gcc/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364) | |
832 | @@ -0,0 +1,59 @@ | |
833 | +2008-04-08 H.J. Lu <hongjiu.lu@intel.com> | |
834 | + | |
835 | + Backport from mainline: | |
836 | + 2008-04-04 H.J. Lu <hongjiu.lu@intel.com> | |
837 | + | |
838 | + * config.gcc (extra_headers): Add wmmintrin.h for x86 and x86-64. | |
839 | + | |
840 | + * config/i386/cpuid.h (bit_AES): New. | |
841 | + (bit_PCLMUL): Likewise. | |
842 | + | |
843 | + * config/i386/i386.c (pta_flags): Add PTA_AES and PTA_PCLMUL. | |
844 | + (override_options): Handle PTA_AES and PTA_PCLMUL. Enable | |
845 | + SSE2 if AES or PCLMUL is enabled. | |
846 | + (ix86_builtins): Add IX86_BUILTIN_AESENC128, | |
847 | + IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128, | |
848 | + IX86_BUILTIN_AESDECLAST128, IX86_BUILTIN_AESIMC128, | |
849 | + IX86_BUILTIN_AESKEYGENASSIST128 and IX86_BUILTIN_PCLMULQDQ128. | |
850 | + (bdesc_sse_3arg): Add IX86_BUILTIN_PCLMULQDQ128. | |
851 | + (bdesc_2arg): Add IX86_BUILTIN_AESENC128, | |
852 | + IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128, | |
853 | + IX86_BUILTIN_AESDECLAST128 and IX86_BUILTIN_AESKEYGENASSIST128. | |
854 | + (bdesc_1arg): Add IX86_BUILTIN_AESIMC128. | |
855 | + (ix86_init_mmx_sse_builtins): Define __builtin_ia32_aesenc128, | |
856 | + __builtin_ia32_aesenclast128, __builtin_ia32_aesdec128, | |
857 | + __builtin_ia32_aesdeclast128,__builtin_ia32_aesimc128, | |
858 | + __builtin_ia32_aeskeygenassist128 and | |
859 | + __builtin_ia32_pclmulqdq128. | |
860 | + * config/i386/i386.c (ix86_expand_binop_imm_builtin): New. | |
861 | + (ix86_expand_builtin): Use it for IX86_BUILTIN_PSLLDQI128 and | |
862 | + IX86_BUILTIN_PSRLDQI128. Handle IX86_BUILTIN_AESKEYGENASSIST128. | |
863 | + | |
864 | + * config/i386/i386.h (TARGET_AES): New. | |
865 | + (TARGET_PCLMUL): Likewise. | |
866 | + (TARGET_CPU_CPP_BUILTINS): Handle TARGET_AES and TARGET_PCLMUL. | |
867 | + | |
868 | + * config/i386/i386.md (UNSPEC_AESENC): New. | |
869 | + (UNSPEC_AESENCLAST): Likewise. | |
870 | + (UNSPEC_AESDEC): Likewise. | |
871 | + (UNSPEC_AESDECLAST): Likewise. | |
872 | + (UNSPEC_AESIMC): Likewise. | |
873 | + (UNSPEC_AESKEYGENASSIST): Likewise. | |
874 | + (UNSPEC_PCLMULQDQ): Likewise. | |
875 | + | |
876 | + * config/i386/i386.opt (maes): New. | |
877 | + (mpclmul): Likewise. | |
878 | + | |
879 | + * config/i386/sse.md (aesenc): New pattern. | |
880 | + (aesenclast): Likewise. | |
881 | + (aesdec): Likewise. | |
882 | + (aesdeclast): Likewise. | |
883 | + (aesimc): Likewise. | |
884 | + (aeskeygenassist): Likewise. | |
885 | + (pclmulqdq): Likewise. | |
886 | + | |
887 | + * config/i386/wmmintrin.h: New. | |
888 | + | |
889 | + * doc/extend.texi: Document AES and PCLMUL built-in function. | |
890 | + | |
891 | + * doc/invoke.texi: Document -maes and -mpclmul. | |
892 | Index: gcc/config.gcc | |
893 | =================================================================== | |
894 | --- gcc/config.gcc (.../gcc-4_3-branch) (revision 145062) | |
895 | +++ gcc/config.gcc (.../ix86/gcc-4_3-branch) (revision 145364) | |
896 | @@ -308,13 +308,15 @@ | |
897 | cpu_type=i386 | |
898 | extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h | |
899 | pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h | |
900 | - nmmintrin.h bmmintrin.h mmintrin-common.h" | |
901 | + nmmintrin.h bmmintrin.h mmintrin-common.h | |
902 | + wmmintrin.h" | |
903 | ;; | |
904 | x86_64-*-*) | |
905 | cpu_type=i386 | |
906 | extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h | |
907 | pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h | |
908 | - nmmintrin.h bmmintrin.h mmintrin-common.h" | |
909 | + nmmintrin.h bmmintrin.h mmintrin-common.h | |
910 | + wmmintrin.h" | |
911 | need_64bit_hwint=yes | |
912 | ;; | |
913 | ia64-*-*) | |
914 | Index: gcc/config/i386/i386.h | |
915 | =================================================================== | |
916 | --- gcc/config/i386/i386.h (.../gcc-4_3-branch) (revision 145062) | |
917 | +++ gcc/config/i386/i386.h (.../ix86/gcc-4_3-branch) (revision 145364) | |
918 | @@ -395,6 +395,8 @@ | |
919 | #define TARGET_SAHF x86_sahf | |
920 | #define TARGET_RECIP x86_recip | |
921 | #define TARGET_FUSED_MADD x86_fused_muladd | |
922 | +#define TARGET_AES (TARGET_SSE2 && x86_aes) | |
923 | +#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul) | |
924 | ||
925 | #define ASSEMBLER_DIALECT (ix86_asm_dialect) | |
926 | ||
927 | @@ -683,6 +685,10 @@ | |
928 | builtin_define ("__SSE4_1__"); \ | |
929 | if (TARGET_SSE4_2) \ | |
930 | builtin_define ("__SSE4_2__"); \ | |
931 | + if (TARGET_AES) \ | |
932 | + builtin_define ("__AES__"); \ | |
933 | + if (TARGET_PCLMUL) \ | |
934 | + builtin_define ("__PCLMUL__"); \ | |
935 | if (TARGET_SSE4A) \ | |
936 | builtin_define ("__SSE4A__"); \ | |
937 | if (TARGET_SSE5) \ | |
938 | Index: gcc/config/i386/i386.md | |
939 | =================================================================== | |
940 | --- gcc/config/i386/i386.md (.../gcc-4_3-branch) (revision 145062) | |
941 | +++ gcc/config/i386/i386.md (.../ix86/gcc-4_3-branch) (revision 145364) | |
942 | @@ -189,6 +189,17 @@ | |
943 | (UNSPEC_FRCZ 156) | |
944 | (UNSPEC_CVTPH2PS 157) | |
945 | (UNSPEC_CVTPS2PH 158) | |
946 | + | |
947 | + ; For AES support | |
948 | + (UNSPEC_AESENC 159) | |
949 | + (UNSPEC_AESENCLAST 160) | |
950 | + (UNSPEC_AESDEC 161) | |
951 | + (UNSPEC_AESDECLAST 162) | |
952 | + (UNSPEC_AESIMC 163) | |
953 | + (UNSPEC_AESKEYGENASSIST 164) | |
954 | + | |
955 | + ; For PCLMUL support | |
956 | + (UNSPEC_PCLMUL 165) | |
957 | ]) | |
958 | ||
959 | (define_constants | |
960 | Index: gcc/config/i386/wmmintrin.h | |
961 | =================================================================== | |
962 | --- gcc/config/i386/wmmintrin.h (.../gcc-4_3-branch) (revision 0) | |
963 | +++ gcc/config/i386/wmmintrin.h (.../ix86/gcc-4_3-branch) (revision 145364) | |
964 | @@ -0,0 +1,123 @@ | |
965 | +/* Copyright (C) 2008 Free Software Foundation, Inc. | |
966 | + | |
967 | + This file is part of GCC. | |
968 | + | |
969 | + GCC is free software; you can redistribute it and/or modify | |
970 | + it under the terms of the GNU General Public License as published by | |
971 | + the Free Software Foundation; either version 2, or (at your option) | |
972 | + any later version. | |
973 | + | |
974 | + GCC is distributed in the hope that it will be useful, | |
975 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
976 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
977 | + GNU General Public License for more details. | |
978 | + | |
979 | + You should have received a copy of the GNU General Public License | |
980 | + along with GCC; see the file COPYING. If not, write to | |
981 | + the Free Software Foundation, 59 Temple Place - Suite 330, | |
982 | + Boston, MA 02111-1307, USA. */ | |
983 | + | |
984 | +/* As a special exception, if you include this header file into source | |
985 | + files compiled by GCC, this header file does not by itself cause | |
986 | + the resulting executable to be covered by the GNU General Public | |
987 | + License. This exception does not however invalidate any other | |
988 | + reasons why the executable file might be covered by the GNU General | |
989 | + Public License. */ | |
990 | + | |
991 | +/* Implemented from the specification included in the Intel C++ Compiler | |
992 | + User Guide and Reference, version 10.1. */ | |
993 | + | |
994 | +#ifndef _WMMINTRIN_H_INCLUDED | |
995 | +#define _WMMINTRIN_H_INCLUDED | |
996 | + | |
997 | +/* We need definitions from the SSE2 header file. */ | |
998 | +#include <emmintrin.h> | |
999 | + | |
1000 | +#if !defined (__AES__) && !defined (__PCLMUL__) | |
1001 | +# error "AES/PCLMUL instructions not enabled" | |
1002 | +#else | |
1003 | + | |
1004 | +/* AES */ | |
1005 | + | |
1006 | +#ifdef __AES__ | |
1007 | +/* Performs 1 round of AES decryption of the first m128i using | |
1008 | + the second m128i as a round key. */ | |
1009 | +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1010 | +_mm_aesdec_si128 (__m128i __X, __m128i __Y) | |
1011 | +{ | |
1012 | + return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y); | |
1013 | +} | |
1014 | + | |
1015 | +/* Performs the last round of AES decryption of the first m128i | |
1016 | + using the second m128i as a round key. */ | |
1017 | +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1018 | +_mm_aesdeclast_si128 (__m128i __X, __m128i __Y) | |
1019 | +{ | |
1020 | + return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X, | |
1021 | + (__v2di)__Y); | |
1022 | +} | |
1023 | + | |
1024 | +/* Performs 1 round of AES encryption of the first m128i using | |
1025 | + the second m128i as a round key. */ | |
1026 | +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1027 | +_mm_aesenc_si128 (__m128i __X, __m128i __Y) | |
1028 | +{ | |
1029 | + return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y); | |
1030 | +} | |
1031 | + | |
1032 | +/* Performs the last round of AES encryption of the first m128i | |
1033 | + using the second m128i as a round key. */ | |
1034 | +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1035 | +_mm_aesenclast_si128 (__m128i __X, __m128i __Y) | |
1036 | +{ | |
1037 | + return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y); | |
1038 | +} | |
1039 | + | |
1040 | +/* Performs the InverseMixColumn operation on the source m128i | |
1041 | + and stores the result into m128i destination. */ | |
1042 | +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1043 | +_mm_aesimc_si128 (__m128i __X) | |
1044 | +{ | |
1045 | + return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X); | |
1046 | +} | |
1047 | + | |
1048 | +/* Generates a m128i round key for the input m128i AES cipher key and | |
1049 | + byte round constant. The second parameter must be a compile time | |
1050 | + constant. */ | |
1051 | +#ifdef __OPTIMIZE__ | |
1052 | +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1053 | +_mm_aeskeygenassist_si128 (__m128i __X, const int __C) | |
1054 | +{ | |
1055 | + return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C); | |
1056 | +} | |
1057 | +#else | |
1058 | +#define _mm_aeskeygenassist_si128(X, C) \ | |
1059 | + ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \ | |
1060 | + (int)(C))) | |
1061 | +#endif | |
1062 | +#endif /* __AES__ */ | |
1063 | + | |
1064 | +/* PCLMUL */ | |
1065 | + | |
1066 | +#ifdef __PCLMUL__ | |
1067 | +/* Performs carry-less integer multiplication of 64-bit halves of | |
1068 | + 128-bit input operands. The third parameter inducates which 64-bit | |
1069 | + haves of the input parameters v1 and v2 should be used. It must be | |
1070 | + a compile time constant. */ | |
1071 | +#ifdef __OPTIMIZE__ | |
1072 | +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) | |
1073 | +_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I) | |
1074 | +{ | |
1075 | + return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X, | |
1076 | + (__v2di)__Y, __I); | |
1077 | +} | |
1078 | +#else | |
1079 | +#define _mm_clmulepi64_si128(X, Y, I) \ | |
1080 | + ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \ | |
1081 | + (__v2di)(__m128i)(Y), (int)(I))) | |
1082 | +#endif | |
1083 | +#endif /* __PCLMUL__ */ | |
1084 | + | |
1085 | +#endif /* __AES__/__PCLMUL__ */ | |
1086 | + | |
1087 | +#endif /* _WMMINTRIN_H_INCLUDED */ | |
1088 | Index: gcc/config/i386/cpuid.h | |
1089 | =================================================================== | |
1090 | --- gcc/config/i386/cpuid.h (.../gcc-4_3-branch) (revision 145062) | |
1091 | +++ gcc/config/i386/cpuid.h (.../ix86/gcc-4_3-branch) (revision 145364) | |
1092 | @@ -33,11 +33,13 @@ | |
1093 | ||
1094 | /* %ecx */ | |
1095 | #define bit_SSE3 (1 << 0) | |
1096 | +#define bit_PCLMUL (1 << 1) | |
1097 | #define bit_SSSE3 (1 << 9) | |
1098 | #define bit_CMPXCHG16B (1 << 13) | |
1099 | #define bit_SSE4_1 (1 << 19) | |
1100 | #define bit_SSE4_2 (1 << 20) | |
1101 | #define bit_POPCNT (1 << 23) | |
1102 | +#define bit_AES (1 << 25) | |
1103 | ||
1104 | /* %edx */ | |
1105 | #define bit_CMPXCHG8B (1 << 8) | |
1106 | Index: gcc/config/i386/sse.md | |
1107 | =================================================================== | |
1108 | --- gcc/config/i386/sse.md (.../gcc-4_3-branch) (revision 145062) | |
1109 | +++ gcc/config/i386/sse.md (.../ix86/gcc-4_3-branch) (revision 145364) | |
1110 | @@ -8700,3 +8700,80 @@ | |
1111 | } | |
1112 | [(set_attr "type" "ssecmp") | |
1113 | (set_attr "mode" "TI")]) | |
1114 | + | |
1115 | +(define_insn "aesenc" | |
1116 | + [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1117 | + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
1118 | + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | |
1119 | + UNSPEC_AESENC))] | |
1120 | + "TARGET_AES" | |
1121 | + "aesenc\t{%2, %0|%0, %2}" | |
1122 | + [(set_attr "type" "sselog1") | |
1123 | + (set_attr "prefix_extra" "1") | |
1124 | + (set_attr "mode" "TI")]) | |
1125 | + | |
1126 | +(define_insn "aesenclast" | |
1127 | + [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1128 | + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
1129 | + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | |
1130 | + UNSPEC_AESENCLAST))] | |
1131 | + "TARGET_AES" | |
1132 | + "aesenclast\t{%2, %0|%0, %2}" | |
1133 | + [(set_attr "type" "sselog1") | |
1134 | + (set_attr "prefix_extra" "1") | |
1135 | + (set_attr "mode" "TI")]) | |
1136 | + | |
1137 | +(define_insn "aesdec" | |
1138 | + [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1139 | + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
1140 | + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | |
1141 | + UNSPEC_AESDEC))] | |
1142 | + "TARGET_AES" | |
1143 | + "aesdec\t{%2, %0|%0, %2}" | |
1144 | + [(set_attr "type" "sselog1") | |
1145 | + (set_attr "prefix_extra" "1") | |
1146 | + (set_attr "mode" "TI")]) | |
1147 | + | |
1148 | +(define_insn "aesdeclast" | |
1149 | + [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1150 | + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
1151 | + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | |
1152 | + UNSPEC_AESDECLAST))] | |
1153 | + "TARGET_AES" | |
1154 | + "aesdeclast\t{%2, %0|%0, %2}" | |
1155 | + [(set_attr "type" "sselog1") | |
1156 | + (set_attr "prefix_extra" "1") | |
1157 | + (set_attr "mode" "TI")]) | |
1158 | + | |
1159 | +(define_insn "aesimc" | |
1160 | + [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1161 | + (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")] | |
1162 | + UNSPEC_AESIMC))] | |
1163 | + "TARGET_AES" | |
1164 | + "aesimc\t{%1, %0|%0, %1}" | |
1165 | + [(set_attr "type" "sselog1") | |
1166 | + (set_attr "prefix_extra" "1") | |
1167 | + (set_attr "mode" "TI")]) | |
1168 | + | |
1169 | +(define_insn "aeskeygenassist" | |
1170 | + [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1171 | + (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm") | |
1172 | + (match_operand:SI 2 "const_0_to_255_operand" "n")] | |
1173 | + UNSPEC_AESKEYGENASSIST))] | |
1174 | + "TARGET_AES" | |
1175 | + "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}" | |
1176 | + [(set_attr "type" "sselog1") | |
1177 | + (set_attr "prefix_extra" "1") | |
1178 | + (set_attr "mode" "TI")]) | |
1179 | + | |
1180 | +(define_insn "pclmulqdq" | |
1181 | + [(set (match_operand:V2DI 0 "register_operand" "=x") | |
1182 | + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
1183 | + (match_operand:V2DI 2 "nonimmediate_operand" "xm") | |
1184 | + (match_operand:SI 3 "const_0_to_255_operand" "n")] | |
1185 | + UNSPEC_PCLMUL))] | |
1186 | + "TARGET_PCLMUL" | |
1187 | + "pclmulqdq\t{%3, %2, %0|%0, %2, %3}" | |
1188 | + [(set_attr "type" "sselog1") | |
1189 | + (set_attr "prefix_extra" "1") | |
1190 | + (set_attr "mode" "TI")]) | |
1191 | Index: gcc/config/i386/i386.opt | |
1192 | =================================================================== | |
1193 | --- gcc/config/i386/i386.opt (.../gcc-4_3-branch) (revision 145062) | |
1194 | +++ gcc/config/i386/i386.opt (.../ix86/gcc-4_3-branch) (revision 145364) | |
1195 | @@ -279,3 +279,11 @@ | |
1196 | Enable automatic generation of fused floating point multiply-add instructions | |
1197 | if the ISA supports such instructions. The -mfused-madd option is on by | |
1198 | default. | |
1199 | + | |
1200 | +maes | |
1201 | +Target Report RejectNegative Var(x86_aes) | |
1202 | +Support AES built-in functions and code generation | |
1203 | + | |
1204 | +mpclmul | |
1205 | +Target Report RejectNegative Var(x86_pclmul) | |
1206 | +Support PCLMUL built-in functions and code generation | |
1207 | Index: gcc/config/i386/i386.c | |
1208 | =================================================================== | |
1209 | --- gcc/config/i386/i386.c (.../gcc-4_3-branch) (revision 145062) | |
1210 | +++ gcc/config/i386/i386.c (.../ix86/gcc-4_3-branch) (revision 145364) | |
1211 | @@ -2077,7 +2077,9 @@ | |
1212 | PTA_NO_SAHF = 1 << 13, | |
1213 | PTA_SSE4_1 = 1 << 14, | |
1214 | PTA_SSE4_2 = 1 << 15, | |
1215 | - PTA_SSE5 = 1 << 16 | |
1216 | + PTA_SSE5 = 1 << 16, | |
1217 | + PTA_AES = 1 << 17, | |
1218 | + PTA_PCLMUL = 1 << 18 | |
1219 | }; | |
1220 | ||
1221 | static struct pta | |
1222 | @@ -2384,6 +2386,10 @@ | |
1223 | x86_prefetch_sse = true; | |
1224 | if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))) | |
1225 | x86_sahf = true; | |
1226 | + if (processor_alias_table[i].flags & PTA_AES) | |
1227 | + x86_aes = true; | |
1228 | + if (processor_alias_table[i].flags & PTA_PCLMUL) | |
1229 | + x86_pclmul = true; | |
1230 | ||
1231 | break; | |
1232 | } | |
1233 | @@ -2427,6 +2433,14 @@ | |
1234 | if (i == pta_size) | |
1235 | error ("bad value (%s) for -mtune= switch", ix86_tune_string); | |
1236 | ||
1237 | + /* Enable SSE2 if AES or PCLMUL is enabled. */ | |
1238 | + if ((x86_aes || x86_pclmul) | |
1239 | + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) | |
1240 | + { | |
1241 | + ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET; | |
1242 | + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET; | |
1243 | + } | |
1244 | + | |
1245 | ix86_tune_mask = 1u << ix86_tune; | |
1246 | for (i = 0; i < X86_TUNE_LAST; ++i) | |
1247 | ix86_tune_features[i] &= ix86_tune_mask; | |
1248 | @@ -17582,6 +17596,17 @@ | |
1249 | ||
1250 | IX86_BUILTIN_PCMPGTQ, | |
1251 | ||
1252 | + /* AES instructions */ | |
1253 | + IX86_BUILTIN_AESENC128, | |
1254 | + IX86_BUILTIN_AESENCLAST128, | |
1255 | + IX86_BUILTIN_AESDEC128, | |
1256 | + IX86_BUILTIN_AESDECLAST128, | |
1257 | + IX86_BUILTIN_AESIMC128, | |
1258 | + IX86_BUILTIN_AESKEYGENASSIST128, | |
1259 | + | |
1260 | + /* PCLMUL instruction */ | |
1261 | + IX86_BUILTIN_PCLMULQDQ128, | |
1262 | + | |
1263 | /* TFmode support builtins. */ | |
1264 | IX86_BUILTIN_INFQ, | |
1265 | IX86_BUILTIN_FABSQ, | |
1266 | @@ -17937,6 +17962,9 @@ | |
1267 | { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 }, | |
1268 | { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 }, | |
1269 | { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 }, | |
1270 | + | |
1271 | + /* PCLMUL */ | |
1272 | + { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, 0 }, | |
1273 | }; | |
1274 | ||
1275 | static const struct builtin_description bdesc_2arg[] = | |
1276 | @@ -18247,6 +18275,13 @@ | |
1277 | ||
1278 | /* SSE4.2 */ | |
1279 | { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 }, | |
1280 | + | |
1281 | + /* AES */ | |
1282 | + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 }, | |
1283 | + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 }, | |
1284 | + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 }, | |
1285 | + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 }, | |
1286 | + { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, 0 }, | |
1287 | }; | |
1288 | ||
1289 | static const struct builtin_description bdesc_1arg[] = | |
1290 | @@ -18322,6 +18357,9 @@ | |
1291 | /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */ | |
1292 | { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 }, | |
1293 | { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 }, | |
1294 | + | |
1295 | + /* AES */ | |
1296 | + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 }, | |
1297 | }; | |
1298 | ||
1299 | /* SSE5 */ | |
1300 | @@ -19555,6 +19593,25 @@ | |
1301 | NULL_TREE); | |
1302 | def_builtin_const (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI); | |
1303 | ||
1304 | + /* AES */ | |
1305 | + if (TARGET_AES) | |
1306 | + { | |
1307 | + /* Define AES built-in functions only if AES is enabled. */ | |
1308 | + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); | |
1309 | + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); | |
1310 | + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); | |
1311 | + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); | |
1312 | + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); | |
1313 | + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); | |
1314 | + } | |
1315 | + | |
1316 | + /* PCLMUL */ | |
1317 | + if (TARGET_PCLMUL) | |
1318 | + { | |
1319 | + /* Define PCLMUL built-in function only if PCLMUL is enabled. */ | |
1320 | + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); | |
1321 | + } | |
1322 | + | |
1323 | /* AMDFAM10 SSE4A New built-ins */ | |
1324 | def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); | |
1325 | def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); | |
1326 | @@ -19830,6 +19887,44 @@ | |
1327 | return target; | |
1328 | } | |
1329 | ||
1330 | +/* Subroutine of ix86_expand_builtin to take care of binop insns | |
1331 | + with an immediate. */ | |
1332 | + | |
1333 | +static rtx | |
1334 | +ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp, | |
1335 | + rtx target) | |
1336 | +{ | |
1337 | + rtx pat; | |
1338 | + tree arg0 = CALL_EXPR_ARG (exp, 0); | |
1339 | + tree arg1 = CALL_EXPR_ARG (exp, 1); | |
1340 | + rtx op0 = expand_normal (arg0); | |
1341 | + rtx op1 = expand_normal (arg1); | |
1342 | + enum machine_mode tmode = insn_data[icode].operand[0].mode; | |
1343 | + enum machine_mode mode0 = insn_data[icode].operand[1].mode; | |
1344 | + enum machine_mode mode1 = insn_data[icode].operand[2].mode; | |
1345 | + | |
1346 | + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) | |
1347 | + { | |
1348 | + op0 = copy_to_reg (op0); | |
1349 | + op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0); | |
1350 | + } | |
1351 | + | |
1352 | + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
1353 | + { | |
1354 | + error ("the last operand must be an immediate"); | |
1355 | + return const0_rtx; | |
1356 | + } | |
1357 | + | |
1358 | + target = gen_reg_rtx (V2DImode); | |
1359 | + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, | |
1360 | + V2DImode, 0), | |
1361 | + op0, op1); | |
1362 | + if (! pat) | |
1363 | + return 0; | |
1364 | + emit_insn (pat); | |
1365 | + return target; | |
1366 | +} | |
1367 | + | |
1368 | /* Subroutine of ix86_expand_builtin to take care of binop insns. */ | |
1369 | ||
1370 | static rtx | |
1371 | @@ -20926,34 +21021,18 @@ | |
1372 | return target; | |
1373 | ||
1374 | case IX86_BUILTIN_PSLLDQI128: | |
1375 | + return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3, | |
1376 | + exp, target); | |
1377 | + break; | |
1378 | + | |
1379 | case IX86_BUILTIN_PSRLDQI128: | |
1380 | - icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 | |
1381 | - : CODE_FOR_sse2_lshrti3); | |
1382 | - arg0 = CALL_EXPR_ARG (exp, 0); | |
1383 | - arg1 = CALL_EXPR_ARG (exp, 1); | |
1384 | - op0 = expand_normal (arg0); | |
1385 | - op1 = expand_normal (arg1); | |
1386 | - tmode = insn_data[icode].operand[0].mode; | |
1387 | - mode1 = insn_data[icode].operand[1].mode; | |
1388 | - mode2 = insn_data[icode].operand[2].mode; | |
1389 | + return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3, | |
1390 | + exp, target); | |
1391 | + break; | |
1392 | ||
1393 | - if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) | |
1394 | - { | |
1395 | - op0 = copy_to_reg (op0); | |
1396 | - op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); | |
1397 | - } | |
1398 | - if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) | |
1399 | - { | |
1400 | - error ("shift must be an immediate"); | |
1401 | - return const0_rtx; | |
1402 | - } | |
1403 | - target = gen_reg_rtx (V2DImode); | |
1404 | - pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), | |
1405 | - op0, op1); | |
1406 | - if (! pat) | |
1407 | - return 0; | |
1408 | - emit_insn (pat); | |
1409 | - return target; | |
1410 | + case IX86_BUILTIN_AESKEYGENASSIST128: | |
1411 | + return ix86_expand_binop_imm_builtin (CODE_FOR_aeskeygenassist, | |
1412 | + exp, target); | |
1413 | ||
1414 | case IX86_BUILTIN_FEMMS: | |
1415 | emit_insn (gen_mmx_femms ()); | |
1416 | ||
1417 | Property changes on: . | |
1418 | ___________________________________________________________________ | |
1419 | Added: svn:mergeinfo | |
1420 | Merged /branches/gcc-4_3-branch:r139021-145062 | |
1421 |