1 Index: gcc/doc/extend.texi
2 ===================================================================
3 --- gcc/doc/extend.texi (.../gcc-4_3-branch) (revision 145062)
4 +++ gcc/doc/extend.texi (.../ix86/gcc-4_3-branch) (revision 145364)
6 Generates the @code{popcntq} machine instruction.
9 +The following built-in functions are available when @option{-maes} is
10 +used. All of them generate the machine instruction that is part of the
14 +v2di __builtin_ia32_aesenc128 (v2di, v2di)
15 +v2di __builtin_ia32_aesenclast128 (v2di, v2di)
16 +v2di __builtin_ia32_aesdec128 (v2di, v2di)
17 +v2di __builtin_ia32_aesdeclast128 (v2di, v2di)
18 +v2di __builtin_ia32_aeskeygenassist128 (v2di, const int)
19 +v2di __builtin_ia32_aesimc128 (v2di)
22 +The following built-in function is available when @option{-mpclmul} is
26 +@item v2di __builtin_ia32_pclmulqdq128 (v2di, v2di, const int)
27 +Generates the @code{pclmulqdq} machine instruction.
30 The following built-in functions are available when @option{-msse4a} is used.
31 All of them generate the machine instruction that is part of the name.
33 Index: gcc/doc/invoke.texi
34 ===================================================================
35 --- gcc/doc/invoke.texi (.../gcc-4_3-branch) (revision 145062)
36 +++ gcc/doc/invoke.texi (.../ix86/gcc-4_3-branch) (revision 145364)
38 -mno-wide-multiply -mrtd -malign-double @gol
39 -mpreferred-stack-boundary=@var{num} -mcld -mcx16 -msahf -mrecip @gol
40 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
42 -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
43 -mthreads -mno-align-stringops -minline-all-stringops @gol
44 -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
45 @@ -10733,6 +10734,10 @@
56 @@ -10750,8 +10755,8 @@
59 These switches enable or disable the use of instructions in the MMX,
60 -SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4A, SSE5, ABM or 3DNow!@: extended
62 +SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or
63 +3DNow!@: extended instruction sets.
64 These extensions are also available as built-in functions: see
65 @ref{X86 Built-in Functions}, for details of the functions enabled and
66 disabled by these switches.
67 Index: gcc/testsuite/gcc.target/i386/sse-14.c
68 ===================================================================
69 --- gcc/testsuite/gcc.target/i386/sse-14.c (.../gcc-4_3-branch) (revision 145062)
70 +++ gcc/testsuite/gcc.target/i386/sse-14.c (.../ix86/gcc-4_3-branch) (revision 145364)
72 /* { dg-do compile } */
73 -/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5" } */
74 +/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
76 /* Test that the intrinsics compile without optimization. All of them are
77 - defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
78 + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
79 that reference the proper builtin functions. Defining away "extern" and
80 "__inline" results in all of them being compiled as proper functions. */
85 +#include <wmmintrin.h>
86 #include <bmmintrin.h>
87 #include <smmintrin.h>
90 test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1)
91 test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1)
94 +test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
95 +test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
98 test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1)
99 test_2 (_mm_blend_ps, __m128, __m128, __m128, 1)
100 Index: gcc/testsuite/gcc.target/i386/i386.exp
101 ===================================================================
102 --- gcc/testsuite/gcc.target/i386/i386.exp (.../gcc-4_3-branch) (revision 145062)
103 +++ gcc/testsuite/gcc.target/i386/i386.exp (.../ix86/gcc-4_3-branch) (revision 145364)
108 +# Return 1 if aes instructions can be compiled.
109 +proc check_effective_target_aes { } {
110 + return [check_no_compiler_messages aes object {
111 + typedef long long __m128i __attribute__ ((__vector_size__ (16)));
112 + typedef long long __v2di __attribute__ ((__vector_size__ (16)));
114 + __m128i _mm_aesimc_si128 (__m128i __X)
116 + return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
121 +# Return 1 if pclmul instructions can be compiled.
122 +proc check_effective_target_pclmul { } {
123 + return [check_no_compiler_messages pclmul object {
124 + typedef long long __m128i __attribute__ ((__vector_size__ (16)));
125 + typedef long long __v2di __attribute__ ((__vector_size__ (16)));
127 + __m128i pclmulqdq_test (__m128i __X, __m128i __Y)
129 + return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
136 # Return 1 if sse4a instructions can be compiled.
137 proc check_effective_target_sse4a { } {
138 return [check_no_compiler_messages sse4a object {
139 Index: gcc/testsuite/gcc.target/i386/aesdeclast.c
140 ===================================================================
141 --- gcc/testsuite/gcc.target/i386/aesdeclast.c (.../gcc-4_3-branch) (revision 0)
142 +++ gcc/testsuite/gcc.target/i386/aesdeclast.c (.../ix86/gcc-4_3-branch) (revision 145364)
145 +/* { dg-require-effective-target aes } */
146 +/* { dg-options "-O2 -maes" } */
148 +#include <wmmintrin.h>
151 +#include "aes-check.h"
153 +extern void abort (void);
157 +static __m128i src1[NUM];
158 +static __m128i src2[NUM];
159 +static __m128i edst[NUM];
161 +static __m128i resdst[NUM];
163 +/* Initialize input/output vectors. (Currently, there is only one set of
164 + input/output vectors). */
167 +init_data (__m128i *s1, __m128i *s2, __m128i *d)
171 + for (i = 0; i < NUM; i++)
173 + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
174 + 0x73745665, 0x7b5b5465);
175 + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
176 + 0x68617929, 0x48692853);
177 + d[i] = _mm_setr_epi32 (0x72a593d0, 0xd410637b,
178 + 0x6b317f95, 0xc5a391ef);
187 + init_data (src1, src2, edst);
189 + for (i = 0; i < NUM; i += 16)
191 + resdst[i] = _mm_aesdeclast_si128 (src1[i], src2[i]);
192 + resdst[i + 1] = _mm_aesdeclast_si128 (src1[i + 1], src2[i + 1]);
193 + resdst[i + 2] = _mm_aesdeclast_si128 (src1[i + 2], src2[i + 2]);
194 + resdst[i + 3] = _mm_aesdeclast_si128 (src1[i + 3], src2[i + 3]);
195 + resdst[i + 4] = _mm_aesdeclast_si128 (src1[i + 4], src2[i + 4]);
196 + resdst[i + 5] = _mm_aesdeclast_si128 (src1[i + 5], src2[i + 5]);
197 + resdst[i + 6] = _mm_aesdeclast_si128 (src1[i + 6], src2[i + 6]);
198 + resdst[i + 7] = _mm_aesdeclast_si128 (src1[i + 7], src2[i + 7]);
199 + resdst[i + 8] = _mm_aesdeclast_si128 (src1[i + 8], src2[i + 8]);
200 + resdst[i + 9] = _mm_aesdeclast_si128 (src1[i + 9], src2[i + 9]);
201 + resdst[i + 10] = _mm_aesdeclast_si128 (src1[i + 10], src2[i + 10]);
202 + resdst[i + 11] = _mm_aesdeclast_si128 (src1[i + 11], src2[i + 11]);
203 + resdst[i + 12] = _mm_aesdeclast_si128 (src1[i + 12], src2[i + 12]);
204 + resdst[i + 13] = _mm_aesdeclast_si128 (src1[i + 13], src2[i + 13]);
205 + resdst[i + 14] = _mm_aesdeclast_si128 (src1[i + 14], src2[i + 14]);
206 + resdst[i + 15] = _mm_aesdeclast_si128 (src1[i + 15], src2[i + 15]);
209 + for (i = 0; i < NUM; i++)
210 + if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
213 Index: gcc/testsuite/gcc.target/i386/pclmulqdq.c
214 ===================================================================
215 --- gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../gcc-4_3-branch) (revision 0)
216 +++ gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../ix86/gcc-4_3-branch) (revision 145364)
219 +/* { dg-require-effective-target pclmul } */
220 +/* { dg-options "-O2 -mpclmul" } */
222 +#include <wmmintrin.h>
225 +#include "pclmul-check.h"
227 +extern void abort (void);
231 +static __m128i s1[NUM];
232 +static __m128i s2[NUM];
233 +/* We need this array to generate mem form of inst */
234 +static __m128i s2m[NUM];
236 +static __m128i e_00[NUM];
237 +static __m128i e_01[NUM];
238 +static __m128i e_10[NUM];
239 +static __m128i e_11[NUM];
241 +static __m128i d_00[NUM];
242 +static __m128i d_01[NUM];
243 +static __m128i d_10[NUM];
244 +static __m128i d_11[NUM];
246 +/* Initialize input/output vectors. (Currently, there is only one set
247 + of input/output vectors). */
249 +init_data (__m128i *ls1, __m128i *ls2, __m128i *le_00, __m128i *le_01,
250 + __m128i *le_10, __m128i *le_11)
254 + for (i = 0; i < NUM; i++)
256 + ls1[i] = _mm_set_epi32 (0x7B5B5465, 0x73745665,
257 + 0x63746F72, 0x5D53475D);
258 + ls2[i] = _mm_set_epi32 (0x48692853, 0x68617929,
259 + 0x5B477565, 0x726F6E5D);
260 + s2m[i] = _mm_set_epi32 (0x48692853, 0x68617929,
261 + 0x5B477565, 0x726F6E5D);
262 + le_00[i] = _mm_set_epi32 (0x1D4D84C8, 0x5C3440C0,
263 + 0x929633D5, 0xD36F0451);
264 + le_01[i] = _mm_set_epi32 (0x1A2BF6DB, 0x3A30862F,
265 + 0xBABF262D, 0xF4B7D5C9);
266 + le_10[i] = _mm_set_epi32 (0x1BD17C8D, 0x556AB5A1,
267 + 0x7FA540AC, 0x2A281315);
268 + le_11[i] = _mm_set_epi32 (0x1D1E1F2C, 0x592E7C45,
269 + 0xD66EE03E, 0x410FD4ED);
278 + init_data (s1, s2, e_00, e_01, e_10, e_11);
280 + for (i = 0; i < NUM; i += 2)
282 + d_00[i] = _mm_clmulepi64_si128 (s1[i], s2m[i], 0x00);
283 + d_01[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x01);
284 + d_10[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x10);
285 + d_11[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x11);
287 + d_11[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x11);
288 + d_00[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x00);
289 + d_10[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2m[i + 1], 0x10);
290 + d_01[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x01);
293 + for (i = 0; i < NUM; i++)
295 + if (memcmp (d_00 + i, e_00 + i, sizeof (__m128i)))
297 + if (memcmp (d_01 + i, e_01 + i, sizeof (__m128i)))
299 + if (memcmp (d_10 + i, e_10 + i, sizeof (__m128i)))
301 + if (memcmp(d_11 + i, e_11 + i, sizeof (__m128i)))
305 Index: gcc/testsuite/gcc.target/i386/aes-check.h
306 ===================================================================
307 --- gcc/testsuite/gcc.target/i386/aes-check.h (.../gcc-4_3-branch) (revision 0)
308 +++ gcc/testsuite/gcc.target/i386/aes-check.h (.../ix86/gcc-4_3-branch) (revision 145364)
315 +static void aes_test (void);
320 + unsigned int eax, ebx, ecx, edx;
322 + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
325 + /* Run AES test only if host has AES support. */
330 + printf ("PASSED\n");
335 + printf ("SKIPPED\n");
340 Index: gcc/testsuite/gcc.target/i386/pclmul-check.h
341 ===================================================================
342 --- gcc/testsuite/gcc.target/i386/pclmul-check.h (.../gcc-4_3-branch) (revision 0)
343 +++ gcc/testsuite/gcc.target/i386/pclmul-check.h (.../ix86/gcc-4_3-branch) (revision 145364)
350 +static void pclmul_test (void);
355 + unsigned int eax, ebx, ecx, edx;
357 + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
360 + /* Run PCLMULQDQ test only if host has PCLMULQDQ support. */
361 + if (ecx & bit_PCLMUL)
365 + printf ("PASSED\n");
370 + printf ("SKIPPED\n");
375 Index: gcc/testsuite/gcc.target/i386/aeskeygenassist.c
376 ===================================================================
377 --- gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../gcc-4_3-branch) (revision 0)
378 +++ gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../ix86/gcc-4_3-branch) (revision 145364)
381 +/* { dg-require-effective-target aes } */
382 +/* { dg-options "-O2 -maes" } */
384 +#include <wmmintrin.h>
387 +#include "aes-check.h"
389 +extern void abort (void);
394 +static __m128i src1[NUM];
395 +static __m128i edst[NUM];
397 +static __m128i resdst[NUM];
399 +/* Initialize input/output vectors. (Currently, there is only one set
400 + of input/output vectors). */
403 +init_data (__m128i *s1, __m128i *d)
406 + for (i = 0; i < NUM; i++)
408 + s1[i] = _mm_setr_epi32 (0x16157e2b, 0xa6d2ae28,
409 + 0x8815f7ab, 0x3c4fcf09);
410 + d[i] = _mm_setr_epi32 (0x24b5e434, 0x3424b5e5,
411 + 0xeb848a01, 0x01eb848b);
420 + init_data (src1, edst);
422 + for (i = 0; i < NUM; i += 16)
424 + resdst[i] = _mm_aeskeygenassist_si128 (src1[i], IMM8);
425 + resdst[i + 1] = _mm_aeskeygenassist_si128 (src1[i + 1], IMM8);
426 + resdst[i + 2] = _mm_aeskeygenassist_si128 (src1[i + 2], IMM8);
427 + resdst[i + 3] = _mm_aeskeygenassist_si128 (src1[i + 3], IMM8);
428 + resdst[i + 4] = _mm_aeskeygenassist_si128 (src1[i + 4], IMM8);
429 + resdst[i + 5] = _mm_aeskeygenassist_si128 (src1[i + 5], IMM8);
430 + resdst[i + 6] = _mm_aeskeygenassist_si128 (src1[i + 6], IMM8);
431 + resdst[i + 7] = _mm_aeskeygenassist_si128 (src1[i + 7], IMM8);
432 + resdst[i + 8] = _mm_aeskeygenassist_si128 (src1[i + 8], IMM8);
433 + resdst[i + 9] = _mm_aeskeygenassist_si128 (src1[i + 9], IMM8);
434 + resdst[i + 10] = _mm_aeskeygenassist_si128 (src1[i + 10], IMM8);
435 + resdst[i + 11] = _mm_aeskeygenassist_si128 (src1[i + 11], IMM8);
436 + resdst[i + 12] = _mm_aeskeygenassist_si128 (src1[i + 12], IMM8);
437 + resdst[i + 13] = _mm_aeskeygenassist_si128 (src1[i + 13], IMM8);
438 + resdst[i + 14] = _mm_aeskeygenassist_si128 (src1[i + 14], IMM8);
439 + resdst[i + 15] = _mm_aeskeygenassist_si128 (src1[i + 15], IMM8);
442 + for (i = 0; i < NUM; i++)
443 + if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
446 Index: gcc/testsuite/gcc.target/i386/aesenclast.c
447 ===================================================================
448 --- gcc/testsuite/gcc.target/i386/aesenclast.c (.../gcc-4_3-branch) (revision 0)
449 +++ gcc/testsuite/gcc.target/i386/aesenclast.c (.../ix86/gcc-4_3-branch) (revision 145364)
452 +/* { dg-require-effective-target aes } */
453 +/* { dg-options "-O2 -maes" } */
455 +#include <wmmintrin.h>
458 +#include "aes-check.h"
460 +extern void abort (void);
464 +static __m128i src1[NUM];
465 +static __m128i src2[NUM];
466 +static __m128i edst[NUM];
468 +static __m128i resdst[NUM];
470 +/* Initialize input/output vectors. (Currently, there is only one
471 + set of input/output vectors). */
474 +init_data (__m128i *s1, __m128i *s2, __m128i *d)
477 + for (i = 0; i < NUM; i++)
479 + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
480 + 0x73745665, 0x7b5b5465);
481 + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
482 + 0x68617929, 0x48692853);
483 + d[i] = _mm_setr_epi32 (0x53fdc611, 0x177ec425,
484 + 0x938c5964, 0xc7fb881e);
493 + init_data (src1, src2, edst);
495 + for (i = 0; i < NUM; i += 16)
497 + resdst[i] = _mm_aesenclast_si128 (src1[i], src2[i]);
498 + resdst[i + 1] = _mm_aesenclast_si128 (src1[i + 1], src2[i + 1]);
499 + resdst[i + 2] = _mm_aesenclast_si128 (src1[i + 2], src2[i + 2]);
500 + resdst[i + 3] = _mm_aesenclast_si128 (src1[i + 3], src2[i + 3]);
501 + resdst[i + 4] = _mm_aesenclast_si128 (src1[i + 4], src2[i + 4]);
502 + resdst[i + 5] = _mm_aesenclast_si128 (src1[i + 5], src2[i + 5]);
503 + resdst[i + 6] = _mm_aesenclast_si128 (src1[i + 6], src2[i + 6]);
504 + resdst[i + 7] = _mm_aesenclast_si128 (src1[i + 7], src2[i + 7]);
505 + resdst[i + 8] = _mm_aesenclast_si128 (src1[i + 8], src2[i + 8]);
506 + resdst[i + 9] = _mm_aesenclast_si128 (src1[i + 9], src2[i + 9]);
507 + resdst[i + 10] = _mm_aesenclast_si128 (src1[i + 10], src2[i + 10]);
508 + resdst[i + 11] = _mm_aesenclast_si128 (src1[i + 11], src2[i + 11]);
509 + resdst[i + 12] = _mm_aesenclast_si128 (src1[i + 12], src2[i + 12]);
510 + resdst[i + 13] = _mm_aesenclast_si128 (src1[i + 13], src2[i + 13]);
511 + resdst[i + 14] = _mm_aesenclast_si128 (src1[i + 14], src2[i + 14]);
512 + resdst[i + 15] = _mm_aesenclast_si128 (src1[i + 15], src2[i + 15]);
515 + for (i = 0; i < NUM; i++)
516 + if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
519 Index: gcc/testsuite/gcc.target/i386/aesimc.c
520 ===================================================================
521 --- gcc/testsuite/gcc.target/i386/aesimc.c (.../gcc-4_3-branch) (revision 0)
522 +++ gcc/testsuite/gcc.target/i386/aesimc.c (.../ix86/gcc-4_3-branch) (revision 145364)
525 +/* { dg-require-effective-target aes } */
526 +/* { dg-options "-O2 -maes" } */
528 +#include <wmmintrin.h>
531 +#include "aes-check.h"
533 +extern void abort (void);
537 +static __m128i src1[NUM];
538 +static __m128i edst[NUM];
540 +static __m128i resdst[NUM];
542 +/* Initialize input/output vectors. (Currently, there is only one set
543 + of input/output vectors). */
546 +init_data (__m128i *s1, __m128i *d)
550 + for (i = 0; i < NUM; i++)
552 + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
553 + 0x73745665, 0x7b5b5465);
554 + d[i] = _mm_setr_epi32 (0x81c3b3e5, 0x2b18330a,
555 + 0x44b109c8, 0x627a6f66);
564 + init_data (src1, edst);
566 + for (i = 0; i < NUM; i += 16)
568 + resdst[i] = _mm_aesimc_si128 (src1[i]);
569 + resdst[i + 1] = _mm_aesimc_si128 (src1[i + 1]);
570 + resdst[i + 2] = _mm_aesimc_si128 (src1[i + 2]);
571 + resdst[i + 3] = _mm_aesimc_si128 (src1[i + 3]);
572 + resdst[i + 4] = _mm_aesimc_si128 (src1[i + 4]);
573 + resdst[i + 5] = _mm_aesimc_si128 (src1[i + 5]);
574 + resdst[i + 6] = _mm_aesimc_si128 (src1[i + 6]);
575 + resdst[i + 7] = _mm_aesimc_si128 (src1[i + 7]);
576 + resdst[i + 8] = _mm_aesimc_si128 (src1[i + 8]);
577 + resdst[i + 9] = _mm_aesimc_si128 (src1[i + 9]);
578 + resdst[i + 10] = _mm_aesimc_si128 (src1[i + 10]);
579 + resdst[i + 11] = _mm_aesimc_si128 (src1[i + 11]);
580 + resdst[i + 12] = _mm_aesimc_si128 (src1[i + 12]);
581 + resdst[i + 13] = _mm_aesimc_si128 (src1[i + 13]);
582 + resdst[i + 14] = _mm_aesimc_si128 (src1[i + 14]);
583 + resdst[i + 15] = _mm_aesimc_si128 (src1[i + 15]);
586 + for (i = 0; i < NUM; i++)
587 + if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
590 Index: gcc/testsuite/gcc.target/i386/aesenc.c
591 ===================================================================
592 --- gcc/testsuite/gcc.target/i386/aesenc.c (.../gcc-4_3-branch) (revision 0)
593 +++ gcc/testsuite/gcc.target/i386/aesenc.c (.../ix86/gcc-4_3-branch) (revision 145364)
596 +/* { dg-require-effective-target aes } */
597 +/* { dg-options "-O2 -maes" } */
599 +#include <wmmintrin.h>
602 +#include "aes-check.h"
604 +extern void abort (void);
608 +static __m128i src1[NUM];
609 +static __m128i src2[NUM];
610 +static __m128i edst[NUM];
612 +static __m128i resdst[NUM];
614 +/* Initialize input/output vectors. (Currently, there is only one set
615 + of input/output vectors). */
618 +init_data (__m128i *s1, __m128i *s2, __m128i *d)
621 + for (i = 0; i < NUM; i++)
623 + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
624 + 0x73745665, 0x7b5b5465);
625 + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
626 + 0x68617929, 0x48692853);
627 + d[i] = _mm_setr_epi32 (0xded7e595, 0x8b104b58,
628 + 0x9fdba3c5, 0xa8311c2f);
637 + init_data (src1, src2, edst);
639 + for (i = 0; i < NUM; i += 16)
641 + resdst[i] = _mm_aesenc_si128 (src1[i], src2[i]);
642 + resdst[i + 1] = _mm_aesenc_si128 (src1[i + 1], src2[i + 1]);
643 + resdst[i + 2] = _mm_aesenc_si128 (src1[i + 2], src2[i + 2]);
644 + resdst[i + 3] = _mm_aesenc_si128 (src1[i + 3], src2[i + 3]);
645 + resdst[i + 4] = _mm_aesenc_si128 (src1[i + 4], src2[i + 4]);
646 + resdst[i + 5] = _mm_aesenc_si128 (src1[i + 5], src2[i + 5]);
647 + resdst[i + 6] = _mm_aesenc_si128 (src1[i + 6], src2[i + 6]);
648 + resdst[i + 7] = _mm_aesenc_si128 (src1[i + 7], src2[i + 7]);
649 + resdst[i + 8] = _mm_aesenc_si128 (src1[i + 8], src2[i + 8]);
650 + resdst[i + 9] = _mm_aesenc_si128 (src1[i + 9], src2[i + 9]);
651 + resdst[i + 10] = _mm_aesenc_si128 (src1[i + 10], src2[i + 10]);
652 + resdst[i + 11] = _mm_aesenc_si128 (src1[i + 11], src2[i + 11]);
653 + resdst[i + 12] = _mm_aesenc_si128 (src1[i + 12], src2[i + 12]);
654 + resdst[i + 13] = _mm_aesenc_si128 (src1[i + 13], src2[i + 13]);
655 + resdst[i + 14] = _mm_aesenc_si128 (src1[i + 14], src2[i + 14]);
656 + resdst[i + 15] = _mm_aesenc_si128 (src1[i + 15], src2[i + 15]);
659 + for (i = 0; i < NUM; i++)
660 + if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
663 Index: gcc/testsuite/gcc.target/i386/sse-13.c
664 ===================================================================
665 --- gcc/testsuite/gcc.target/i386/sse-13.c (.../gcc-4_3-branch) (revision 145062)
666 +++ gcc/testsuite/gcc.target/i386/sse-13.c (.../ix86/gcc-4_3-branch) (revision 145364)
668 /* { dg-do compile } */
669 -/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5" } */
670 +/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
672 /* Test that the intrinsics compile with optimization. All of them are
673 - defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
674 + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
675 that reference the proper builtin functions. Defining away "extern" and
676 "__inline" results in all of them being compiled as proper functions. */
679 #define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1)
680 #define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1)
683 +#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
684 +#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
687 #define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
688 #define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
690 #define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1)
691 #define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1)
693 +#include <wmmintrin.h>
694 #include <bmmintrin.h>
695 #include <smmintrin.h>
697 Index: gcc/testsuite/gcc.target/i386/aesdec.c
698 ===================================================================
699 --- gcc/testsuite/gcc.target/i386/aesdec.c (.../gcc-4_3-branch) (revision 0)
700 +++ gcc/testsuite/gcc.target/i386/aesdec.c (.../ix86/gcc-4_3-branch) (revision 145364)
703 +/* { dg-require-effective-target aes } */
704 +/* { dg-options "-O2 -maes" } */
706 +#include <wmmintrin.h>
709 +#include "aes-check.h"
711 +extern void abort (void);
715 +static __m128i src1[NUM];
716 +static __m128i src2[NUM];
717 +static __m128i edst[NUM];
719 +static __m128i resdst[NUM];
721 +/* Initialize input/output vectors. (Currently, there is only one set
722 + of input/output vectors). */
724 +init_data (__m128i *s1, __m128i *s2, __m128i *d)
727 + for (i = 0; i < NUM; i++)
729 + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
730 + 0x73745665, 0x7b5b5465);
731 + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
732 + 0x68617929, 0x48692853);
733 + d[i] = _mm_setr_epi32 (0xb730392a, 0xb58eb95e,
734 + 0xfaea2787, 0x138ac342);
743 + init_data (src1, src2, edst);
745 + for (i = 0; i < NUM; i += 16)
747 + resdst[i] = _mm_aesdec_si128 (src1[i], src2[i]);
748 + resdst[i + 1] = _mm_aesdec_si128 (src1[i + 1], src2[i + 1]);
749 + resdst[i + 2] = _mm_aesdec_si128 (src1[i + 2], src2[i + 2]);
750 + resdst[i + 3] = _mm_aesdec_si128 (src1[i + 3], src2[i + 3]);
751 + resdst[i + 4] = _mm_aesdec_si128 (src1[i + 4], src2[i + 4]);
752 + resdst[i + 5] = _mm_aesdec_si128 (src1[i + 5], src2[i + 5]);
753 + resdst[i + 6] = _mm_aesdec_si128 (src1[i + 6], src2[i + 6]);
754 + resdst[i + 7] = _mm_aesdec_si128 (src1[i + 7], src2[i + 7]);
755 + resdst[i + 8] = _mm_aesdec_si128 (src1[i + 8], src2[i + 8]);
756 + resdst[i + 9] = _mm_aesdec_si128 (src1[i + 9], src2[i + 9]);
757 + resdst[i + 10] = _mm_aesdec_si128 (src1[i + 10], src2[i + 10]);
758 + resdst[i + 11] = _mm_aesdec_si128 (src1[i + 11], src2[i + 11]);
759 + resdst[i + 12] = _mm_aesdec_si128 (src1[i + 12], src2[i + 12]);
760 + resdst[i + 13] = _mm_aesdec_si128 (src1[i + 13], src2[i + 13]);
761 + resdst[i + 14] = _mm_aesdec_si128 (src1[i + 14], src2[i + 14]);
762 + resdst[i + 15] = _mm_aesdec_si128 (src1[i + 15], src2[i + 15]);
765 + for (i = 0; i < NUM; i++)
766 + if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
769 Index: gcc/testsuite/ChangeLog.ix86
770 ===================================================================
771 --- gcc/testsuite/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0)
772 +++ gcc/testsuite/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364)
774 +2008-04-08 H.J. Lu <hongjiu.lu@intel.com>
776 + Backport from mainline:
777 + 2008-04-04 H.J. Lu <hongjiu.lu@intel.com>
779 + * g++.dg/other/i386-2.C: Include <wmmintrin.h>.
780 + * g++.dg/other/i386-3.C: Likewise.
781 + * gcc.target/i386/sse-13.c: Likewise.
782 + * gcc.target/i386/sse-14.c: Likewise.
784 + * gcc.target/i386/aes-check.h: New.
785 + * gcc.target/i386/aesdec.c: Likewise.
786 + * gcc.target/i386/aesdeclast.c: Likewise.
787 + * gcc.target/i386/aesenc.c: Likewise.
788 + * gcc.target/i386/aesenclast.c: Likewise.
789 + * gcc.target/i386/aesimc.c: Likewise.
790 + * gcc.target/i386/aeskeygenassist.c: Likewise.
791 + * gcc.target/i386/pclmulqdq.c: Likewise.
792 + * gcc.target/i386/pclmul-check.h: Likewise.
794 + * gcc.target/i386/i386.exp (check_effective_target_aes): New.
795 + (check_effective_target_pclmul): Likewise.
796 Index: gcc/testsuite/g++.dg/other/i386-2.C
797 ===================================================================
798 --- gcc/testsuite/g++.dg/other/i386-2.C (.../gcc-4_3-branch) (revision 145062)
799 +++ gcc/testsuite/g++.dg/other/i386-2.C (.../ix86/gcc-4_3-branch) (revision 145364)
801 -/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
802 +/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
803 usable with -O -pedantic-errors. */
804 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
805 -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5" } */
806 +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
808 +#include <wmmintrin.h>
809 #include <bmmintrin.h>
810 #include <smmintrin.h>
812 Index: gcc/testsuite/g++.dg/other/i386-3.C
813 ===================================================================
814 --- gcc/testsuite/g++.dg/other/i386-3.C (.../gcc-4_3-branch) (revision 145062)
815 +++ gcc/testsuite/g++.dg/other/i386-3.C (.../ix86/gcc-4_3-branch) (revision 145364)
817 -/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
818 +/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
819 usable with -O -fkeep-inline-functions. */
820 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
821 -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -msse4 -msse5" } */
822 +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -maes -mpclmul -msse4 -msse5" } */
824 +#include <wmmintrin.h>
825 #include <bmmintrin.h>
826 #include <smmintrin.h>
828 Index: gcc/ChangeLog.ix86
829 ===================================================================
830 --- gcc/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0)
831 +++ gcc/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364)
833 +2008-04-08 H.J. Lu <hongjiu.lu@intel.com>
835 + Backport from mainline:
836 + 2008-04-04 H.J. Lu <hongjiu.lu@intel.com>
838 + * config.gcc (extra_headers): Add wmmintrin.h for x86 and x86-64.
840 + * config/i386/cpuid.h (bit_AES): New.
841 + (bit_PCLMUL): Likewise.
843 + * config/i386/i386.c (pta_flags): Add PTA_AES and PTA_PCLMUL.
844 + (override_options): Handle PTA_AES and PTA_PCLMUL. Enable
845 + SSE2 if AES or PCLMUL is enabled.
846 + (ix86_builtins): Add IX86_BUILTIN_AESENC128,
847 + IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128,
848 + IX86_BUILTIN_AESDECLAST128, IX86_BUILTIN_AESIMC128,
849 + IX86_BUILTIN_AESKEYGENASSIST128 and IX86_BUILTIN_PCLMULQDQ128.
850 + (bdesc_sse_3arg): Add IX86_BUILTIN_PCLMULQDQ128.
851 + (bdesc_2arg): Add IX86_BUILTIN_AESENC128,
852 + IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128,
853 + IX86_BUILTIN_AESDECLAST128 and IX86_BUILTIN_AESKEYGENASSIST128.
854 + (bdesc_1arg): Add IX86_BUILTIN_AESIMC128.
855 + (ix86_init_mmx_sse_builtins): Define __builtin_ia32_aesenc128,
856 + __builtin_ia32_aesenclast128, __builtin_ia32_aesdec128,
857 + __builtin_ia32_aesdeclast128,__builtin_ia32_aesimc128,
858 + __builtin_ia32_aeskeygenassist128 and
859 + __builtin_ia32_pclmulqdq128.
860 + * config/i386/i386.c (ix86_expand_binop_imm_builtin): New.
861 + (ix86_expand_builtin): Use it for IX86_BUILTIN_PSLLDQI128 and
862 + IX86_BUILTIN_PSRLDQI128. Handle IX86_BUILTIN_AESKEYGENASSIST128.
864 + * config/i386/i386.h (TARGET_AES): New.
865 + (TARGET_PCLMUL): Likewise.
866 + (TARGET_CPU_CPP_BUILTINS): Handle TARGET_AES and TARGET_PCLMUL.
868 + * config/i386/i386.md (UNSPEC_AESENC): New.
869 + (UNSPEC_AESENCLAST): Likewise.
870 + (UNSPEC_AESDEC): Likewise.
871 + (UNSPEC_AESDECLAST): Likewise.
872 + (UNSPEC_AESIMC): Likewise.
873 + (UNSPEC_AESKEYGENASSIST): Likewise.
874 + (UNSPEC_PCLMULQDQ): Likewise.
876 + * config/i386/i386.opt (maes): New.
877 + (mpclmul): Likewise.
879 + * config/i386/sse.md (aesenc): New pattern.
880 + (aesenclast): Likewise.
881 + (aesdec): Likewise.
882 + (aesdeclast): Likewise.
883 + (aesimc): Likewise.
884 + (aeskeygenassist): Likewise.
885 + (pclmulqdq): Likewise.
887 + * config/i386/wmmintrin.h: New.
889 + * doc/extend.texi: Document AES and PCLMUL built-in function.
891 + * doc/invoke.texi: Document -maes and -mpclmul.
892 Index: gcc/config.gcc
893 ===================================================================
894 --- gcc/config.gcc (.../gcc-4_3-branch) (revision 145062)
895 +++ gcc/config.gcc (.../ix86/gcc-4_3-branch) (revision 145364)
896 @@ -308,13 +308,15 @@
898 extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
899 pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
900 - nmmintrin.h bmmintrin.h mmintrin-common.h"
901 + nmmintrin.h bmmintrin.h mmintrin-common.h
906 extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
907 pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
908 - nmmintrin.h bmmintrin.h mmintrin-common.h"
909 + nmmintrin.h bmmintrin.h mmintrin-common.h
914 Index: gcc/config/i386/i386.h
915 ===================================================================
916 --- gcc/config/i386/i386.h (.../gcc-4_3-branch) (revision 145062)
917 +++ gcc/config/i386/i386.h (.../ix86/gcc-4_3-branch) (revision 145364)
919 #define TARGET_SAHF x86_sahf
920 #define TARGET_RECIP x86_recip
921 #define TARGET_FUSED_MADD x86_fused_muladd
922 +#define TARGET_AES (TARGET_SSE2 && x86_aes)
923 +#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul)
925 #define ASSEMBLER_DIALECT (ix86_asm_dialect)
928 builtin_define ("__SSE4_1__"); \
930 builtin_define ("__SSE4_2__"); \
932 + builtin_define ("__AES__"); \
933 + if (TARGET_PCLMUL) \
934 + builtin_define ("__PCLMUL__"); \
936 builtin_define ("__SSE4A__"); \
938 Index: gcc/config/i386/i386.md
939 ===================================================================
940 --- gcc/config/i386/i386.md (.../gcc-4_3-branch) (revision 145062)
941 +++ gcc/config/i386/i386.md (.../ix86/gcc-4_3-branch) (revision 145364)
944 (UNSPEC_CVTPH2PS 157)
945 (UNSPEC_CVTPS2PH 158)
948 + (UNSPEC_AESENC 159)
949 + (UNSPEC_AESENCLAST 160)
950 + (UNSPEC_AESDEC 161)
951 + (UNSPEC_AESDECLAST 162)
952 + (UNSPEC_AESIMC 163)
953 + (UNSPEC_AESKEYGENASSIST 164)
955 + ; For PCLMUL support
956 + (UNSPEC_PCLMUL 165)
960 Index: gcc/config/i386/wmmintrin.h
961 ===================================================================
962 --- gcc/config/i386/wmmintrin.h (.../gcc-4_3-branch) (revision 0)
963 +++ gcc/config/i386/wmmintrin.h (.../ix86/gcc-4_3-branch) (revision 145364)
965 +/* Copyright (C) 2008 Free Software Foundation, Inc.
967 + This file is part of GCC.
969 + GCC is free software; you can redistribute it and/or modify
970 + it under the terms of the GNU General Public License as published by
971 + the Free Software Foundation; either version 2, or (at your option)
974 + GCC is distributed in the hope that it will be useful,
975 + but WITHOUT ANY WARRANTY; without even the implied warranty of
976 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
977 + GNU General Public License for more details.
979 + You should have received a copy of the GNU General Public License
980 + along with GCC; see the file COPYING. If not, write to
981 + the Free Software Foundation, 59 Temple Place - Suite 330,
982 + Boston, MA 02111-1307, USA. */
984 +/* As a special exception, if you include this header file into source
985 + files compiled by GCC, this header file does not by itself cause
986 + the resulting executable to be covered by the GNU General Public
987 + License. This exception does not however invalidate any other
988 + reasons why the executable file might be covered by the GNU General
991 +/* Implemented from the specification included in the Intel C++ Compiler
992 + User Guide and Reference, version 10.1. */
994 +#ifndef _WMMINTRIN_H_INCLUDED
995 +#define _WMMINTRIN_H_INCLUDED
997 +/* We need definitions from the SSE2 header file. */
998 +#include <emmintrin.h>
1000 +#if !defined (__AES__) && !defined (__PCLMUL__)
1001 +# error "AES/PCLMUL instructions not enabled"
1007 +/* Performs 1 round of AES decryption of the first m128i using
1008 + the second m128i as a round key. */
1009 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1010 +_mm_aesdec_si128 (__m128i __X, __m128i __Y)
1012 + return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
1015 +/* Performs the last round of AES decryption of the first m128i
1016 + using the second m128i as a round key. */
1017 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1018 +_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
1020 + return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
1024 +/* Performs 1 round of AES encryption of the first m128i using
1025 + the second m128i as a round key. */
1026 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1027 +_mm_aesenc_si128 (__m128i __X, __m128i __Y)
1029 + return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
1032 +/* Performs the last round of AES encryption of the first m128i
1033 + using the second m128i as a round key. */
1034 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1035 +_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
1037 + return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
1040 +/* Performs the InverseMixColumn operation on the source m128i
1041 + and stores the result into m128i destination. */
1042 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1043 +_mm_aesimc_si128 (__m128i __X)
1045 + return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
1048 +/* Generates a m128i round key for the input m128i AES cipher key and
1049 + byte round constant. The second parameter must be a compile time
1051 +#ifdef __OPTIMIZE__
1052 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1053 +_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
1055 + return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
1058 +#define _mm_aeskeygenassist_si128(X, C) \
1059 + ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \
1062 +#endif /* __AES__ */
1067 +/* Performs carry-less integer multiplication of 64-bit halves of
1068 + 128-bit input operands. The third parameter inducates which 64-bit
1069 + haves of the input parameters v1 and v2 should be used. It must be
1070 + a compile time constant. */
1071 +#ifdef __OPTIMIZE__
1072 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1073 +_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
1075 + return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
1076 + (__v2di)__Y, __I);
1079 +#define _mm_clmulepi64_si128(X, Y, I) \
1080 + ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \
1081 + (__v2di)(__m128i)(Y), (int)(I)))
1083 +#endif /* __PCLMUL__ */
1085 +#endif /* __AES__/__PCLMUL__ */
1087 +#endif /* _WMMINTRIN_H_INCLUDED */
1088 Index: gcc/config/i386/cpuid.h
1089 ===================================================================
1090 --- gcc/config/i386/cpuid.h (.../gcc-4_3-branch) (revision 145062)
1091 +++ gcc/config/i386/cpuid.h (.../ix86/gcc-4_3-branch) (revision 145364)
1095 #define bit_SSE3 (1 << 0)
1096 +#define bit_PCLMUL (1 << 1)
1097 #define bit_SSSE3 (1 << 9)
1098 #define bit_CMPXCHG16B (1 << 13)
1099 #define bit_SSE4_1 (1 << 19)
1100 #define bit_SSE4_2 (1 << 20)
1101 #define bit_POPCNT (1 << 23)
1102 +#define bit_AES (1 << 25)
1105 #define bit_CMPXCHG8B (1 << 8)
1106 Index: gcc/config/i386/sse.md
1107 ===================================================================
1108 --- gcc/config/i386/sse.md (.../gcc-4_3-branch) (revision 145062)
1109 +++ gcc/config/i386/sse.md (.../ix86/gcc-4_3-branch) (revision 145364)
1110 @@ -8700,3 +8700,80 @@
1112 [(set_attr "type" "ssecmp")
1113 (set_attr "mode" "TI")])
1115 +(define_insn "aesenc"
1116 + [(set (match_operand:V2DI 0 "register_operand" "=x")
1117 + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1118 + (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1121 + "aesenc\t{%2, %0|%0, %2}"
1122 + [(set_attr "type" "sselog1")
1123 + (set_attr "prefix_extra" "1")
1124 + (set_attr "mode" "TI")])
1126 +(define_insn "aesenclast"
1127 + [(set (match_operand:V2DI 0 "register_operand" "=x")
1128 + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1129 + (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1130 + UNSPEC_AESENCLAST))]
1132 + "aesenclast\t{%2, %0|%0, %2}"
1133 + [(set_attr "type" "sselog1")
1134 + (set_attr "prefix_extra" "1")
1135 + (set_attr "mode" "TI")])
1137 +(define_insn "aesdec"
1138 + [(set (match_operand:V2DI 0 "register_operand" "=x")
1139 + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1140 + (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1143 + "aesdec\t{%2, %0|%0, %2}"
1144 + [(set_attr "type" "sselog1")
1145 + (set_attr "prefix_extra" "1")
1146 + (set_attr "mode" "TI")])
1148 +(define_insn "aesdeclast"
1149 + [(set (match_operand:V2DI 0 "register_operand" "=x")
1150 + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1151 + (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1152 + UNSPEC_AESDECLAST))]
1154 + "aesdeclast\t{%2, %0|%0, %2}"
1155 + [(set_attr "type" "sselog1")
1156 + (set_attr "prefix_extra" "1")
1157 + (set_attr "mode" "TI")])
1159 +(define_insn "aesimc"
1160 + [(set (match_operand:V2DI 0 "register_operand" "=x")
1161 + (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
1164 + "aesimc\t{%1, %0|%0, %1}"
1165 + [(set_attr "type" "sselog1")
1166 + (set_attr "prefix_extra" "1")
1167 + (set_attr "mode" "TI")])
1169 +(define_insn "aeskeygenassist"
1170 + [(set (match_operand:V2DI 0 "register_operand" "=x")
1171 + (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
1172 + (match_operand:SI 2 "const_0_to_255_operand" "n")]
1173 + UNSPEC_AESKEYGENASSIST))]
1175 + "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
1176 + [(set_attr "type" "sselog1")
1177 + (set_attr "prefix_extra" "1")
1178 + (set_attr "mode" "TI")])
1180 +(define_insn "pclmulqdq"
1181 + [(set (match_operand:V2DI 0 "register_operand" "=x")
1182 + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1183 + (match_operand:V2DI 2 "nonimmediate_operand" "xm")
1184 + (match_operand:SI 3 "const_0_to_255_operand" "n")]
1187 + "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
1188 + [(set_attr "type" "sselog1")
1189 + (set_attr "prefix_extra" "1")
1190 + (set_attr "mode" "TI")])
1191 Index: gcc/config/i386/i386.opt
1192 ===================================================================
1193 --- gcc/config/i386/i386.opt (.../gcc-4_3-branch) (revision 145062)
1194 +++ gcc/config/i386/i386.opt (.../ix86/gcc-4_3-branch) (revision 145364)
1195 @@ -279,3 +279,11 @@
1196 Enable automatic generation of fused floating point multiply-add instructions
1197 if the ISA supports such instructions. The -mfused-madd option is on by
1201 +Target Report RejectNegative Var(x86_aes)
1202 +Support AES built-in functions and code generation
1205 +Target Report RejectNegative Var(x86_pclmul)
1206 +Support PCLMUL built-in functions and code generation
1207 Index: gcc/config/i386/i386.c
1208 ===================================================================
1209 --- gcc/config/i386/i386.c (.../gcc-4_3-branch) (revision 145062)
1210 +++ gcc/config/i386/i386.c (.../ix86/gcc-4_3-branch) (revision 145364)
1211 @@ -2077,7 +2077,9 @@
1212 PTA_NO_SAHF = 1 << 13,
1213 PTA_SSE4_1 = 1 << 14,
1214 PTA_SSE4_2 = 1 << 15,
1215 - PTA_SSE5 = 1 << 16
1216 + PTA_SSE5 = 1 << 16,
1217 + PTA_AES = 1 << 17,
1218 + PTA_PCLMUL = 1 << 18
1222 @@ -2384,6 +2386,10 @@
1223 x86_prefetch_sse = true;
1224 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1226 + if (processor_alias_table[i].flags & PTA_AES)
1228 + if (processor_alias_table[i].flags & PTA_PCLMUL)
1229 + x86_pclmul = true;
1233 @@ -2427,6 +2433,14 @@
1235 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1237 + /* Enable SSE2 if AES or PCLMUL is enabled. */
1238 + if ((x86_aes || x86_pclmul)
1239 + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
1241 + ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1242 + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1245 ix86_tune_mask = 1u << ix86_tune;
1246 for (i = 0; i < X86_TUNE_LAST; ++i)
1247 ix86_tune_features[i] &= ix86_tune_mask;
1248 @@ -17582,6 +17596,17 @@
1250 IX86_BUILTIN_PCMPGTQ,
1252 + /* AES instructions */
1253 + IX86_BUILTIN_AESENC128,
1254 + IX86_BUILTIN_AESENCLAST128,
1255 + IX86_BUILTIN_AESDEC128,
1256 + IX86_BUILTIN_AESDECLAST128,
1257 + IX86_BUILTIN_AESIMC128,
1258 + IX86_BUILTIN_AESKEYGENASSIST128,
1260 + /* PCLMUL instruction */
1261 + IX86_BUILTIN_PCLMULQDQ128,
1263 /* TFmode support builtins. */
1266 @@ -17937,6 +17962,9 @@
1267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
1268 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
1269 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
1272 + { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, 0 },
1275 static const struct builtin_description bdesc_2arg[] =
1276 @@ -18247,6 +18275,13 @@
1279 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
1282 + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 },
1283 + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 },
1284 + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 },
1285 + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 },
1286 + { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, 0 },
1289 static const struct builtin_description bdesc_1arg[] =
1290 @@ -18322,6 +18357,9 @@
1291 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
1292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
1293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
1296 + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 },
1300 @@ -19555,6 +19593,25 @@
1302 def_builtin_const (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
1307 + /* Define AES built-in functions only if AES is enabled. */
1308 + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
1309 + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
1310 + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
1311 + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
1312 + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
1313 + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
1317 + if (TARGET_PCLMUL)
1319 + /* Define PCLMUL built-in function only if PCLMUL is enabled. */
1320 + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
1323 /* AMDFAM10 SSE4A New built-ins */
1324 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
1325 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
1326 @@ -19830,6 +19887,44 @@
1330 +/* Subroutine of ix86_expand_builtin to take care of binop insns
1331 + with an immediate. */
1334 +ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp,
1338 + tree arg0 = CALL_EXPR_ARG (exp, 0);
1339 + tree arg1 = CALL_EXPR_ARG (exp, 1);
1340 + rtx op0 = expand_normal (arg0);
1341 + rtx op1 = expand_normal (arg1);
1342 + enum machine_mode tmode = insn_data[icode].operand[0].mode;
1343 + enum machine_mode mode0 = insn_data[icode].operand[1].mode;
1344 + enum machine_mode mode1 = insn_data[icode].operand[2].mode;
1346 + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
1348 + op0 = copy_to_reg (op0);
1349 + op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
1352 + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
1354 + error ("the last operand must be an immediate");
1355 + return const0_rtx;
1358 + target = gen_reg_rtx (V2DImode);
1359 + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target,
1368 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
1371 @@ -20926,34 +21021,18 @@
1374 case IX86_BUILTIN_PSLLDQI128:
1375 + return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3,
1379 case IX86_BUILTIN_PSRLDQI128:
1380 - icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
1381 - : CODE_FOR_sse2_lshrti3);
1382 - arg0 = CALL_EXPR_ARG (exp, 0);
1383 - arg1 = CALL_EXPR_ARG (exp, 1);
1384 - op0 = expand_normal (arg0);
1385 - op1 = expand_normal (arg1);
1386 - tmode = insn_data[icode].operand[0].mode;
1387 - mode1 = insn_data[icode].operand[1].mode;
1388 - mode2 = insn_data[icode].operand[2].mode;
1389 + return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3,
1393 - if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
1395 - op0 = copy_to_reg (op0);
1396 - op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
1398 - if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
1400 - error ("shift must be an immediate");
1401 - return const0_rtx;
1403 - target = gen_reg_rtx (V2DImode);
1404 - pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
1410 + case IX86_BUILTIN_AESKEYGENASSIST128:
1411 + return ix86_expand_binop_imm_builtin (CODE_FOR_aeskeygenassist,
1414 case IX86_BUILTIN_FEMMS:
1415 emit_insn (gen_mmx_femms ());
1417 Property changes on: .
1418 ___________________________________________________________________
1419 Added: svn:mergeinfo
1420 Merged /branches/gcc-4_3-branch:r139021-145062