]> git.pld-linux.org Git - packages/crossppc-gcc.git/blame - gcc-ix86-branch.diff
- regenerated against latest merge point.
[packages/crossppc-gcc.git] / gcc-ix86-branch.diff
CommitLineData
38a49943
PS
1Index: gcc/doc/extend.texi
2===================================================================
3--- gcc/doc/extend.texi (.../gcc-4_3-branch) (revision 145062)
4+++ gcc/doc/extend.texi (.../ix86/gcc-4_3-branch) (revision 145364)
5@@ -7962,6 +7962,27 @@
6 Generates the @code{popcntq} machine instruction.
7 @end table
8
9+The following built-in functions are available when @option{-maes} is
10+used. All of them generate the machine instruction that is part of the
11+name.
12+
13+@smallexample
14+v2di __builtin_ia32_aesenc128 (v2di, v2di)
15+v2di __builtin_ia32_aesenclast128 (v2di, v2di)
16+v2di __builtin_ia32_aesdec128 (v2di, v2di)
17+v2di __builtin_ia32_aesdeclast128 (v2di, v2di)
18+v2di __builtin_ia32_aeskeygenassist128 (v2di, const int)
19+v2di __builtin_ia32_aesimc128 (v2di)
20+@end smallexample
21+
22+The following built-in function is available when @option{-mpclmul} is
23+used.
24+
25+@table @code
26+@item v2di __builtin_ia32_pclmulqdq128 (v2di, v2di, const int)
27+Generates the @code{pclmulqdq} machine instruction.
28+@end table
29+
30 The following built-in functions are available when @option{-msse4a} is used.
31 All of them generate the machine instruction that is part of the name.
32
33Index: gcc/doc/invoke.texi
34===================================================================
35--- gcc/doc/invoke.texi (.../gcc-4_3-branch) (revision 145062)
36+++ gcc/doc/invoke.texi (.../ix86/gcc-4_3-branch) (revision 145364)
37@@ -551,6 +551,7 @@
38 -mno-wide-multiply -mrtd -malign-double @gol
39 -mpreferred-stack-boundary=@var{num} -mcld -mcx16 -msahf -mrecip @gol
40 -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
41+-maes -mpclmul @gol
42 -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
43 -mthreads -mno-align-stringops -minline-all-stringops @gol
44 -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
45@@ -10733,6 +10734,10 @@
46 @itemx -mno-sse4.2
47 @item -msse4
48 @itemx -mno-sse4
49+@item -maes
50+@itemx -mno-aes
51+@item -mpclmul
52+@itemx -mno-pclmul
53 @item -msse4a
54 @item -mno-sse4a
55 @item -msse5
56@@ -10750,8 +10755,8 @@
57 @opindex m3dnow
58 @opindex mno-3dnow
59 These switches enable or disable the use of instructions in the MMX,
60-SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4A, SSE5, ABM or 3DNow!@: extended
61-instruction sets.
62+SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or
63+3DNow!@: extended instruction sets.
64 These extensions are also available as built-in functions: see
65 @ref{X86 Built-in Functions}, for details of the functions enabled and
66 disabled by these switches.
67Index: gcc/testsuite/gcc.target/i386/sse-14.c
68===================================================================
69--- gcc/testsuite/gcc.target/i386/sse-14.c (.../gcc-4_3-branch) (revision 145062)
70+++ gcc/testsuite/gcc.target/i386/sse-14.c (.../ix86/gcc-4_3-branch) (revision 145364)
71@@ -1,14 +1,15 @@
72 /* { dg-do compile } */
73-/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5" } */
74+/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
75
76 /* Test that the intrinsics compile without optimization. All of them are
77- defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
78+ defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
79 that reference the proper builtin functions. Defining away "extern" and
80 "__inline" results in all of them being compiled as proper functions. */
81
82 #define extern
83 #define __inline
84
85+#include <wmmintrin.h>
86 #include <bmmintrin.h>
87 #include <smmintrin.h>
88 #include <mm3dnow.h>
89@@ -44,6 +45,10 @@
90 test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1)
91 test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1)
92
93+/* wmmintrin.h */
94+test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
95+test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
96+
97 /* smmintrin.h */
98 test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1)
99 test_2 (_mm_blend_ps, __m128, __m128, __m128, 1)
100Index: gcc/testsuite/gcc.target/i386/i386.exp
101===================================================================
102--- gcc/testsuite/gcc.target/i386/i386.exp (.../gcc-4_3-branch) (revision 145062)
103+++ gcc/testsuite/gcc.target/i386/i386.exp (.../ix86/gcc-4_3-branch) (revision 145364)
104@@ -51,6 +51,34 @@
105 } "-O2 -msse4.1" ]
106 }
107
108+# Return 1 if aes instructions can be compiled.
109+proc check_effective_target_aes { } {
110+ return [check_no_compiler_messages aes object {
111+ typedef long long __m128i __attribute__ ((__vector_size__ (16)));
112+ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
113+
114+ __m128i _mm_aesimc_si128 (__m128i __X)
115+ {
116+ return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
117+ }
118+ } "-O2 -maes" ]
119+}
120+
121+# Return 1 if pclmul instructions can be compiled.
122+proc check_effective_target_pclmul { } {
123+ return [check_no_compiler_messages pclmul object {
124+ typedef long long __m128i __attribute__ ((__vector_size__ (16)));
125+ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
126+
127+ __m128i pclmulqdq_test (__m128i __X, __m128i __Y)
128+ {
129+ return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
130+ (__v2di)__Y,
131+ 1);
132+ }
133+ } "-O2 -mpclmul" ]
134+}
135+
136 # Return 1 if sse4a instructions can be compiled.
137 proc check_effective_target_sse4a { } {
138 return [check_no_compiler_messages sse4a object {
139Index: gcc/testsuite/gcc.target/i386/aesdeclast.c
140===================================================================
141--- gcc/testsuite/gcc.target/i386/aesdeclast.c (.../gcc-4_3-branch) (revision 0)
142+++ gcc/testsuite/gcc.target/i386/aesdeclast.c (.../ix86/gcc-4_3-branch) (revision 145364)
143@@ -0,0 +1,69 @@
144+/* { dg-do run } */
145+/* { dg-require-effective-target aes } */
146+/* { dg-options "-O2 -maes" } */
147+
148+#include <wmmintrin.h>
149+#include <string.h>
150+
151+#include "aes-check.h"
152+
153+extern void abort (void);
154+
155+#define NUM 1024
156+
157+static __m128i src1[NUM];
158+static __m128i src2[NUM];
159+static __m128i edst[NUM];
160+
161+static __m128i resdst[NUM];
162+
163+/* Initialize input/output vectors. (Currently, there is only one set of
164+ input/output vectors). */
165+
166+static void
167+init_data (__m128i *s1, __m128i *s2, __m128i *d)
168+{
169+ int i;
170+
171+ for (i = 0; i < NUM; i++)
172+ {
173+ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
174+ 0x73745665, 0x7b5b5465);
175+ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
176+ 0x68617929, 0x48692853);
177+ d[i] = _mm_setr_epi32 (0x72a593d0, 0xd410637b,
178+ 0x6b317f95, 0xc5a391ef);
179+ }
180+}
181+
182+static void
183+aes_test (void)
184+{
185+ int i;
186+
187+ init_data (src1, src2, edst);
188+
189+ for (i = 0; i < NUM; i += 16)
190+ {
191+ resdst[i] = _mm_aesdeclast_si128 (src1[i], src2[i]);
192+ resdst[i + 1] = _mm_aesdeclast_si128 (src1[i + 1], src2[i + 1]);
193+ resdst[i + 2] = _mm_aesdeclast_si128 (src1[i + 2], src2[i + 2]);
194+ resdst[i + 3] = _mm_aesdeclast_si128 (src1[i + 3], src2[i + 3]);
195+ resdst[i + 4] = _mm_aesdeclast_si128 (src1[i + 4], src2[i + 4]);
196+ resdst[i + 5] = _mm_aesdeclast_si128 (src1[i + 5], src2[i + 5]);
197+ resdst[i + 6] = _mm_aesdeclast_si128 (src1[i + 6], src2[i + 6]);
198+ resdst[i + 7] = _mm_aesdeclast_si128 (src1[i + 7], src2[i + 7]);
199+ resdst[i + 8] = _mm_aesdeclast_si128 (src1[i + 8], src2[i + 8]);
200+ resdst[i + 9] = _mm_aesdeclast_si128 (src1[i + 9], src2[i + 9]);
201+ resdst[i + 10] = _mm_aesdeclast_si128 (src1[i + 10], src2[i + 10]);
202+ resdst[i + 11] = _mm_aesdeclast_si128 (src1[i + 11], src2[i + 11]);
203+ resdst[i + 12] = _mm_aesdeclast_si128 (src1[i + 12], src2[i + 12]);
204+ resdst[i + 13] = _mm_aesdeclast_si128 (src1[i + 13], src2[i + 13]);
205+ resdst[i + 14] = _mm_aesdeclast_si128 (src1[i + 14], src2[i + 14]);
206+ resdst[i + 15] = _mm_aesdeclast_si128 (src1[i + 15], src2[i + 15]);
207+ }
208+
209+ for (i = 0; i < NUM; i++)
210+ if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
211+ abort ();
212+}
213Index: gcc/testsuite/gcc.target/i386/pclmulqdq.c
214===================================================================
215--- gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../gcc-4_3-branch) (revision 0)
216+++ gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../ix86/gcc-4_3-branch) (revision 145364)
217@@ -0,0 +1,87 @@
218+/* { dg-do run } */
219+/* { dg-require-effective-target pclmul } */
220+/* { dg-options "-O2 -mpclmul" } */
221+
222+#include <wmmintrin.h>
223+#include <string.h>
224+
225+#include "pclmul-check.h"
226+
227+extern void abort (void);
228+
229+#define NUM 1024
230+
231+static __m128i s1[NUM];
232+static __m128i s2[NUM];
233+/* We need this array to generate mem form of inst */
234+static __m128i s2m[NUM];
235+
236+static __m128i e_00[NUM];
237+static __m128i e_01[NUM];
238+static __m128i e_10[NUM];
239+static __m128i e_11[NUM];
240+
241+static __m128i d_00[NUM];
242+static __m128i d_01[NUM];
243+static __m128i d_10[NUM];
244+static __m128i d_11[NUM];
245+
246+/* Initialize input/output vectors. (Currently, there is only one set
247+ of input/output vectors). */
248+static void
249+init_data (__m128i *ls1, __m128i *ls2, __m128i *le_00, __m128i *le_01,
250+ __m128i *le_10, __m128i *le_11)
251+{
252+ int i;
253+
254+ for (i = 0; i < NUM; i++)
255+ {
256+ ls1[i] = _mm_set_epi32 (0x7B5B5465, 0x73745665,
257+ 0x63746F72, 0x5D53475D);
258+ ls2[i] = _mm_set_epi32 (0x48692853, 0x68617929,
259+ 0x5B477565, 0x726F6E5D);
260+ s2m[i] = _mm_set_epi32 (0x48692853, 0x68617929,
261+ 0x5B477565, 0x726F6E5D);
262+ le_00[i] = _mm_set_epi32 (0x1D4D84C8, 0x5C3440C0,
263+ 0x929633D5, 0xD36F0451);
264+ le_01[i] = _mm_set_epi32 (0x1A2BF6DB, 0x3A30862F,
265+ 0xBABF262D, 0xF4B7D5C9);
266+ le_10[i] = _mm_set_epi32 (0x1BD17C8D, 0x556AB5A1,
267+ 0x7FA540AC, 0x2A281315);
268+ le_11[i] = _mm_set_epi32 (0x1D1E1F2C, 0x592E7C45,
269+ 0xD66EE03E, 0x410FD4ED);
270+ }
271+}
272+
273+static void
274+pclmul_test (void)
275+{
276+ int i;
277+
278+ init_data (s1, s2, e_00, e_01, e_10, e_11);
279+
280+ for (i = 0; i < NUM; i += 2)
281+ {
282+ d_00[i] = _mm_clmulepi64_si128 (s1[i], s2m[i], 0x00);
283+ d_01[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x01);
284+ d_10[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x10);
285+ d_11[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x11);
286+
287+ d_11[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x11);
288+ d_00[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x00);
289+ d_10[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2m[i + 1], 0x10);
290+ d_01[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x01);
291+ }
292+
293+ for (i = 0; i < NUM; i++)
294+ {
295+ if (memcmp (d_00 + i, e_00 + i, sizeof (__m128i)))
296+ abort ();
297+ if (memcmp (d_01 + i, e_01 + i, sizeof (__m128i)))
298+ abort ();
299+ if (memcmp (d_10 + i, e_10 + i, sizeof (__m128i)))
300+ abort ();
301+ if (memcmp(d_11 + i, e_11 + i, sizeof (__m128i)))
302+ abort ();
303+ }
304+}
305Index: gcc/testsuite/gcc.target/i386/aes-check.h
306===================================================================
307--- gcc/testsuite/gcc.target/i386/aes-check.h (.../gcc-4_3-branch) (revision 0)
308+++ gcc/testsuite/gcc.target/i386/aes-check.h (.../ix86/gcc-4_3-branch) (revision 145364)
309@@ -0,0 +1,30 @@
310+#include <stdio.h>
311+#include <stdlib.h>
312+
313+#include "cpuid.h"
314+
315+static void aes_test (void);
316+
317+int
318+main ()
319+{
320+ unsigned int eax, ebx, ecx, edx;
321+
322+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
323+ return 0;
324+
325+ /* Run AES test only if host has AES support. */
326+ if (ecx & bit_AES)
327+ {
328+ aes_test ();
329+#ifdef DEBUG
330+ printf ("PASSED\n");
331+#endif
332+ }
333+#ifdef DEBUG
334+ else
335+ printf ("SKIPPED\n");
336+#endif
337+
338+ return 0;
339+}
340Index: gcc/testsuite/gcc.target/i386/pclmul-check.h
341===================================================================
342--- gcc/testsuite/gcc.target/i386/pclmul-check.h (.../gcc-4_3-branch) (revision 0)
343+++ gcc/testsuite/gcc.target/i386/pclmul-check.h (.../ix86/gcc-4_3-branch) (revision 145364)
344@@ -0,0 +1,30 @@
345+#include <stdio.h>
346+#include <stdlib.h>
347+
348+#include "cpuid.h"
349+
350+static void pclmul_test (void);
351+
352+int
353+main ()
354+{
355+ unsigned int eax, ebx, ecx, edx;
356+
357+ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
358+ return 0;
359+
360+ /* Run PCLMULQDQ test only if host has PCLMULQDQ support. */
361+ if (ecx & bit_PCLMUL)
362+ {
363+ pclmul_test ();
364+#ifdef DEBUG
365+ printf ("PASSED\n");
366+#endif
367+ }
368+#ifdef DEBUG
369+ else
370+ printf ("SKIPPED\n");
371+#endif
372+
373+ return 0;
374+}
375Index: gcc/testsuite/gcc.target/i386/aeskeygenassist.c
376===================================================================
377--- gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../gcc-4_3-branch) (revision 0)
378+++ gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../ix86/gcc-4_3-branch) (revision 145364)
379@@ -0,0 +1,66 @@
380+/* { dg-do run } */
381+/* { dg-require-effective-target aes } */
382+/* { dg-options "-O2 -maes" } */
383+
384+#include <wmmintrin.h>
385+#include <string.h>
386+
387+#include "aes-check.h"
388+
389+extern void abort (void);
390+
391+#define NUM 1024
392+#define IMM8 1
393+
394+static __m128i src1[NUM];
395+static __m128i edst[NUM];
396+
397+static __m128i resdst[NUM];
398+
399+/* Initialize input/output vectors. (Currently, there is only one set
400+ of input/output vectors). */
401+
402+static void
403+init_data (__m128i *s1, __m128i *d)
404+{
405+ int i;
406+ for (i = 0; i < NUM; i++)
407+ {
408+ s1[i] = _mm_setr_epi32 (0x16157e2b, 0xa6d2ae28,
409+ 0x8815f7ab, 0x3c4fcf09);
410+ d[i] = _mm_setr_epi32 (0x24b5e434, 0x3424b5e5,
411+ 0xeb848a01, 0x01eb848b);
412+ }
413+}
414+
415+static void
416+aes_test (void)
417+{
418+ int i;
419+
420+ init_data (src1, edst);
421+
422+ for (i = 0; i < NUM; i += 16)
423+ {
424+ resdst[i] = _mm_aeskeygenassist_si128 (src1[i], IMM8);
425+ resdst[i + 1] = _mm_aeskeygenassist_si128 (src1[i + 1], IMM8);
426+ resdst[i + 2] = _mm_aeskeygenassist_si128 (src1[i + 2], IMM8);
427+ resdst[i + 3] = _mm_aeskeygenassist_si128 (src1[i + 3], IMM8);
428+ resdst[i + 4] = _mm_aeskeygenassist_si128 (src1[i + 4], IMM8);
429+ resdst[i + 5] = _mm_aeskeygenassist_si128 (src1[i + 5], IMM8);
430+ resdst[i + 6] = _mm_aeskeygenassist_si128 (src1[i + 6], IMM8);
431+ resdst[i + 7] = _mm_aeskeygenassist_si128 (src1[i + 7], IMM8);
432+ resdst[i + 8] = _mm_aeskeygenassist_si128 (src1[i + 8], IMM8);
433+ resdst[i + 9] = _mm_aeskeygenassist_si128 (src1[i + 9], IMM8);
434+ resdst[i + 10] = _mm_aeskeygenassist_si128 (src1[i + 10], IMM8);
435+ resdst[i + 11] = _mm_aeskeygenassist_si128 (src1[i + 11], IMM8);
436+ resdst[i + 12] = _mm_aeskeygenassist_si128 (src1[i + 12], IMM8);
437+ resdst[i + 13] = _mm_aeskeygenassist_si128 (src1[i + 13], IMM8);
438+ resdst[i + 14] = _mm_aeskeygenassist_si128 (src1[i + 14], IMM8);
439+ resdst[i + 15] = _mm_aeskeygenassist_si128 (src1[i + 15], IMM8);
440+ }
441+
442+ for (i = 0; i < NUM; i++)
443+ if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
444+ abort ();
445+}
446Index: gcc/testsuite/gcc.target/i386/aesenclast.c
447===================================================================
448--- gcc/testsuite/gcc.target/i386/aesenclast.c (.../gcc-4_3-branch) (revision 0)
449+++ gcc/testsuite/gcc.target/i386/aesenclast.c (.../ix86/gcc-4_3-branch) (revision 145364)
450@@ -0,0 +1,68 @@
451+/* { dg-do run } */
452+/* { dg-require-effective-target aes } */
453+/* { dg-options "-O2 -maes" } */
454+
455+#include <wmmintrin.h>
456+#include <string.h>
457+
458+#include "aes-check.h"
459+
460+extern void abort (void);
461+
462+#define NUM 1024
463+
464+static __m128i src1[NUM];
465+static __m128i src2[NUM];
466+static __m128i edst[NUM];
467+
468+static __m128i resdst[NUM];
469+
470+/* Initialize input/output vectors. (Currently, there is only one
471+ set of input/output vectors). */
472+
473+static void
474+init_data (__m128i *s1, __m128i *s2, __m128i *d)
475+{
476+ int i;
477+ for (i = 0; i < NUM; i++)
478+ {
479+ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
480+ 0x73745665, 0x7b5b5465);
481+ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
482+ 0x68617929, 0x48692853);
483+ d[i] = _mm_setr_epi32 (0x53fdc611, 0x177ec425,
484+ 0x938c5964, 0xc7fb881e);
485+ }
486+}
487+
488+static void
489+aes_test (void)
490+{
491+ int i;
492+
493+ init_data (src1, src2, edst);
494+
495+ for (i = 0; i < NUM; i += 16)
496+ {
497+ resdst[i] = _mm_aesenclast_si128 (src1[i], src2[i]);
498+ resdst[i + 1] = _mm_aesenclast_si128 (src1[i + 1], src2[i + 1]);
499+ resdst[i + 2] = _mm_aesenclast_si128 (src1[i + 2], src2[i + 2]);
500+ resdst[i + 3] = _mm_aesenclast_si128 (src1[i + 3], src2[i + 3]);
501+ resdst[i + 4] = _mm_aesenclast_si128 (src1[i + 4], src2[i + 4]);
502+ resdst[i + 5] = _mm_aesenclast_si128 (src1[i + 5], src2[i + 5]);
503+ resdst[i + 6] = _mm_aesenclast_si128 (src1[i + 6], src2[i + 6]);
504+ resdst[i + 7] = _mm_aesenclast_si128 (src1[i + 7], src2[i + 7]);
505+ resdst[i + 8] = _mm_aesenclast_si128 (src1[i + 8], src2[i + 8]);
506+ resdst[i + 9] = _mm_aesenclast_si128 (src1[i + 9], src2[i + 9]);
507+ resdst[i + 10] = _mm_aesenclast_si128 (src1[i + 10], src2[i + 10]);
508+ resdst[i + 11] = _mm_aesenclast_si128 (src1[i + 11], src2[i + 11]);
509+ resdst[i + 12] = _mm_aesenclast_si128 (src1[i + 12], src2[i + 12]);
510+ resdst[i + 13] = _mm_aesenclast_si128 (src1[i + 13], src2[i + 13]);
511+ resdst[i + 14] = _mm_aesenclast_si128 (src1[i + 14], src2[i + 14]);
512+ resdst[i + 15] = _mm_aesenclast_si128 (src1[i + 15], src2[i + 15]);
513+ }
514+
515+ for (i = 0; i < NUM; i++)
516+ if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
517+ abort ();
518+}
519Index: gcc/testsuite/gcc.target/i386/aesimc.c
520===================================================================
521--- gcc/testsuite/gcc.target/i386/aesimc.c (.../gcc-4_3-branch) (revision 0)
522+++ gcc/testsuite/gcc.target/i386/aesimc.c (.../ix86/gcc-4_3-branch) (revision 145364)
523@@ -0,0 +1,66 @@
524+/* { dg-do run } */
525+/* { dg-require-effective-target aes } */
526+/* { dg-options "-O2 -maes" } */
527+
528+#include <wmmintrin.h>
529+#include <string.h>
530+
531+#include "aes-check.h"
532+
533+extern void abort (void);
534+
535+#define NUM 1024
536+
537+static __m128i src1[NUM];
538+static __m128i edst[NUM];
539+
540+static __m128i resdst[NUM];
541+
542+/* Initialize input/output vectors. (Currently, there is only one set
543+ of input/output vectors). */
544+
545+static void
546+init_data (__m128i *s1, __m128i *d)
547+{
548+ int i;
549+
550+ for (i = 0; i < NUM; i++)
551+ {
552+ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
553+ 0x73745665, 0x7b5b5465);
554+ d[i] = _mm_setr_epi32 (0x81c3b3e5, 0x2b18330a,
555+ 0x44b109c8, 0x627a6f66);
556+ }
557+}
558+
559+static void
560+aes_test (void)
561+{
562+ int i;
563+
564+ init_data (src1, edst);
565+
566+ for (i = 0; i < NUM; i += 16)
567+ {
568+ resdst[i] = _mm_aesimc_si128 (src1[i]);
569+ resdst[i + 1] = _mm_aesimc_si128 (src1[i + 1]);
570+ resdst[i + 2] = _mm_aesimc_si128 (src1[i + 2]);
571+ resdst[i + 3] = _mm_aesimc_si128 (src1[i + 3]);
572+ resdst[i + 4] = _mm_aesimc_si128 (src1[i + 4]);
573+ resdst[i + 5] = _mm_aesimc_si128 (src1[i + 5]);
574+ resdst[i + 6] = _mm_aesimc_si128 (src1[i + 6]);
575+ resdst[i + 7] = _mm_aesimc_si128 (src1[i + 7]);
576+ resdst[i + 8] = _mm_aesimc_si128 (src1[i + 8]);
577+ resdst[i + 9] = _mm_aesimc_si128 (src1[i + 9]);
578+ resdst[i + 10] = _mm_aesimc_si128 (src1[i + 10]);
579+ resdst[i + 11] = _mm_aesimc_si128 (src1[i + 11]);
580+ resdst[i + 12] = _mm_aesimc_si128 (src1[i + 12]);
581+ resdst[i + 13] = _mm_aesimc_si128 (src1[i + 13]);
582+ resdst[i + 14] = _mm_aesimc_si128 (src1[i + 14]);
583+ resdst[i + 15] = _mm_aesimc_si128 (src1[i + 15]);
584+ }
585+
586+ for (i = 0; i < NUM; i++)
587+ if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
588+ abort ();
589+}
590Index: gcc/testsuite/gcc.target/i386/aesenc.c
591===================================================================
592--- gcc/testsuite/gcc.target/i386/aesenc.c (.../gcc-4_3-branch) (revision 0)
593+++ gcc/testsuite/gcc.target/i386/aesenc.c (.../ix86/gcc-4_3-branch) (revision 145364)
594@@ -0,0 +1,68 @@
595+/* { dg-do run } */
596+/* { dg-require-effective-target aes } */
597+/* { dg-options "-O2 -maes" } */
598+
599+#include <wmmintrin.h>
600+#include <string.h>
601+
602+#include "aes-check.h"
603+
604+extern void abort (void);
605+
606+#define NUM 1024
607+
608+static __m128i src1[NUM];
609+static __m128i src2[NUM];
610+static __m128i edst[NUM];
611+
612+static __m128i resdst[NUM];
613+
614+/* Initialize input/output vectors. (Currently, there is only one set
615+ of input/output vectors). */
616+
617+static void
618+init_data (__m128i *s1, __m128i *s2, __m128i *d)
619+{
620+ int i;
621+ for (i = 0; i < NUM; i++)
622+ {
623+ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
624+ 0x73745665, 0x7b5b5465);
625+ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
626+ 0x68617929, 0x48692853);
627+ d[i] = _mm_setr_epi32 (0xded7e595, 0x8b104b58,
628+ 0x9fdba3c5, 0xa8311c2f);
629+ }
630+}
631+
632+static void
633+aes_test (void)
634+{
635+ int i;
636+
637+ init_data (src1, src2, edst);
638+
639+ for (i = 0; i < NUM; i += 16)
640+ {
641+ resdst[i] = _mm_aesenc_si128 (src1[i], src2[i]);
642+ resdst[i + 1] = _mm_aesenc_si128 (src1[i + 1], src2[i + 1]);
643+ resdst[i + 2] = _mm_aesenc_si128 (src1[i + 2], src2[i + 2]);
644+ resdst[i + 3] = _mm_aesenc_si128 (src1[i + 3], src2[i + 3]);
645+ resdst[i + 4] = _mm_aesenc_si128 (src1[i + 4], src2[i + 4]);
646+ resdst[i + 5] = _mm_aesenc_si128 (src1[i + 5], src2[i + 5]);
647+ resdst[i + 6] = _mm_aesenc_si128 (src1[i + 6], src2[i + 6]);
648+ resdst[i + 7] = _mm_aesenc_si128 (src1[i + 7], src2[i + 7]);
649+ resdst[i + 8] = _mm_aesenc_si128 (src1[i + 8], src2[i + 8]);
650+ resdst[i + 9] = _mm_aesenc_si128 (src1[i + 9], src2[i + 9]);
651+ resdst[i + 10] = _mm_aesenc_si128 (src1[i + 10], src2[i + 10]);
652+ resdst[i + 11] = _mm_aesenc_si128 (src1[i + 11], src2[i + 11]);
653+ resdst[i + 12] = _mm_aesenc_si128 (src1[i + 12], src2[i + 12]);
654+ resdst[i + 13] = _mm_aesenc_si128 (src1[i + 13], src2[i + 13]);
655+ resdst[i + 14] = _mm_aesenc_si128 (src1[i + 14], src2[i + 14]);
656+ resdst[i + 15] = _mm_aesenc_si128 (src1[i + 15], src2[i + 15]);
657+ }
658+
659+ for (i = 0; i < NUM; i++)
660+ if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
661+ abort ();
662+}
663Index: gcc/testsuite/gcc.target/i386/sse-13.c
664===================================================================
665--- gcc/testsuite/gcc.target/i386/sse-13.c (.../gcc-4_3-branch) (revision 145062)
666+++ gcc/testsuite/gcc.target/i386/sse-13.c (.../ix86/gcc-4_3-branch) (revision 145364)
667@@ -1,8 +1,8 @@
668 /* { dg-do compile } */
669-/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5" } */
670+/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
671
672 /* Test that the intrinsics compile with optimization. All of them are
673- defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
674+ defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
675 that reference the proper builtin functions. Defining away "extern" and
676 "__inline" results in all of them being compiled as proper functions. */
677
678@@ -15,6 +15,10 @@
679 #define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1)
680 #define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1)
681
682+/* wmmintrin.h */
683+#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
684+#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
685+
686 /* smmintrin.h */
687 #define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
688 #define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
689@@ -92,6 +96,7 @@
690 #define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1)
691 #define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1)
692
693+#include <wmmintrin.h>
694 #include <bmmintrin.h>
695 #include <smmintrin.h>
696 #include <mm3dnow.h>
697Index: gcc/testsuite/gcc.target/i386/aesdec.c
698===================================================================
699--- gcc/testsuite/gcc.target/i386/aesdec.c (.../gcc-4_3-branch) (revision 0)
700+++ gcc/testsuite/gcc.target/i386/aesdec.c (.../ix86/gcc-4_3-branch) (revision 145364)
701@@ -0,0 +1,67 @@
702+/* { dg-do run } */
703+/* { dg-require-effective-target aes } */
704+/* { dg-options "-O2 -maes" } */
705+
706+#include <wmmintrin.h>
707+#include <string.h>
708+
709+#include "aes-check.h"
710+
711+extern void abort (void);
712+
713+#define NUM 1024
714+
715+static __m128i src1[NUM];
716+static __m128i src2[NUM];
717+static __m128i edst[NUM];
718+
719+static __m128i resdst[NUM];
720+
721+/* Initialize input/output vectors. (Currently, there is only one set
722+ of input/output vectors). */
723+static void
724+init_data (__m128i *s1, __m128i *s2, __m128i *d)
725+{
726+ int i;
727+ for (i = 0; i < NUM; i++)
728+ {
729+ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
730+ 0x73745665, 0x7b5b5465);
731+ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
732+ 0x68617929, 0x48692853);
733+ d[i] = _mm_setr_epi32 (0xb730392a, 0xb58eb95e,
734+ 0xfaea2787, 0x138ac342);
735+ }
736+}
737+
738+static void
739+aes_test (void)
740+{
741+ int i;
742+
743+ init_data (src1, src2, edst);
744+
745+ for (i = 0; i < NUM; i += 16)
746+ {
747+ resdst[i] = _mm_aesdec_si128 (src1[i], src2[i]);
748+ resdst[i + 1] = _mm_aesdec_si128 (src1[i + 1], src2[i + 1]);
749+ resdst[i + 2] = _mm_aesdec_si128 (src1[i + 2], src2[i + 2]);
750+ resdst[i + 3] = _mm_aesdec_si128 (src1[i + 3], src2[i + 3]);
751+ resdst[i + 4] = _mm_aesdec_si128 (src1[i + 4], src2[i + 4]);
752+ resdst[i + 5] = _mm_aesdec_si128 (src1[i + 5], src2[i + 5]);
753+ resdst[i + 6] = _mm_aesdec_si128 (src1[i + 6], src2[i + 6]);
754+ resdst[i + 7] = _mm_aesdec_si128 (src1[i + 7], src2[i + 7]);
755+ resdst[i + 8] = _mm_aesdec_si128 (src1[i + 8], src2[i + 8]);
756+ resdst[i + 9] = _mm_aesdec_si128 (src1[i + 9], src2[i + 9]);
757+ resdst[i + 10] = _mm_aesdec_si128 (src1[i + 10], src2[i + 10]);
758+ resdst[i + 11] = _mm_aesdec_si128 (src1[i + 11], src2[i + 11]);
759+ resdst[i + 12] = _mm_aesdec_si128 (src1[i + 12], src2[i + 12]);
760+ resdst[i + 13] = _mm_aesdec_si128 (src1[i + 13], src2[i + 13]);
761+ resdst[i + 14] = _mm_aesdec_si128 (src1[i + 14], src2[i + 14]);
762+ resdst[i + 15] = _mm_aesdec_si128 (src1[i + 15], src2[i + 15]);
763+ }
764+
765+ for (i = 0; i < NUM; i++)
766+ if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
767+ abort ();
768+}
769Index: gcc/testsuite/ChangeLog.ix86
770===================================================================
771--- gcc/testsuite/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0)
772+++ gcc/testsuite/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364)
773@@ -0,0 +1,22 @@
774+2008-04-08 H.J. Lu <hongjiu.lu@intel.com>
775+
776+ Backport from mainline:
777+ 2008-04-04 H.J. Lu <hongjiu.lu@intel.com>
778+
779+ * g++.dg/other/i386-2.C: Include <wmmintrin.h>.
780+ * g++.dg/other/i386-3.C: Likewise.
781+ * gcc.target/i386/sse-13.c: Likewise.
782+ * gcc.target/i386/sse-14.c: Likewise.
783+
784+ * gcc.target/i386/aes-check.h: New.
785+ * gcc.target/i386/aesdec.c: Likewise.
786+ * gcc.target/i386/aesdeclast.c: Likewise.
787+ * gcc.target/i386/aesenc.c: Likewise.
788+ * gcc.target/i386/aesenclast.c: Likewise.
789+ * gcc.target/i386/aesimc.c: Likewise.
790+ * gcc.target/i386/aeskeygenassist.c: Likewise.
791+ * gcc.target/i386/pclmulqdq.c: Likewise.
792+ * gcc.target/i386/pclmul-check.h: Likewise.
793+
794+ * gcc.target/i386/i386.exp (check_effective_target_aes): New.
795+ (check_effective_target_pclmul): Likewise.
796Index: gcc/testsuite/g++.dg/other/i386-2.C
797===================================================================
798--- gcc/testsuite/g++.dg/other/i386-2.C (.../gcc-4_3-branch) (revision 145062)
799+++ gcc/testsuite/g++.dg/other/i386-2.C (.../ix86/gcc-4_3-branch) (revision 145364)
800@@ -1,8 +1,9 @@
801-/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
802+/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
803 usable with -O -pedantic-errors. */
804 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
805-/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5" } */
806+/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
807
808+#include <wmmintrin.h>
809 #include <bmmintrin.h>
810 #include <smmintrin.h>
811 #include <mm3dnow.h>
812Index: gcc/testsuite/g++.dg/other/i386-3.C
813===================================================================
814--- gcc/testsuite/g++.dg/other/i386-3.C (.../gcc-4_3-branch) (revision 145062)
815+++ gcc/testsuite/g++.dg/other/i386-3.C (.../ix86/gcc-4_3-branch) (revision 145364)
816@@ -1,8 +1,9 @@
817-/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
818+/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
819 usable with -O -fkeep-inline-functions. */
820 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
821-/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -msse4 -msse5" } */
822+/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -maes -mpclmul -msse4 -msse5" } */
823
824+#include <wmmintrin.h>
825 #include <bmmintrin.h>
826 #include <smmintrin.h>
827 #include <mm3dnow.h>
828Index: gcc/ChangeLog.ix86
829===================================================================
830--- gcc/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0)
831+++ gcc/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364)
832@@ -0,0 +1,59 @@
833+2008-04-08 H.J. Lu <hongjiu.lu@intel.com>
834+
835+ Backport from mainline:
836+ 2008-04-04 H.J. Lu <hongjiu.lu@intel.com>
837+
838+ * config.gcc (extra_headers): Add wmmintrin.h for x86 and x86-64.
839+
840+ * config/i386/cpuid.h (bit_AES): New.
841+ (bit_PCLMUL): Likewise.
842+
843+ * config/i386/i386.c (pta_flags): Add PTA_AES and PTA_PCLMUL.
844+ (override_options): Handle PTA_AES and PTA_PCLMUL. Enable
845+ SSE2 if AES or PCLMUL is enabled.
846+ (ix86_builtins): Add IX86_BUILTIN_AESENC128,
847+ IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128,
848+ IX86_BUILTIN_AESDECLAST128, IX86_BUILTIN_AESIMC128,
849+ IX86_BUILTIN_AESKEYGENASSIST128 and IX86_BUILTIN_PCLMULQDQ128.
850+ (bdesc_sse_3arg): Add IX86_BUILTIN_PCLMULQDQ128.
851+ (bdesc_2arg): Add IX86_BUILTIN_AESENC128,
852+ IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128,
853+ IX86_BUILTIN_AESDECLAST128 and IX86_BUILTIN_AESKEYGENASSIST128.
854+ (bdesc_1arg): Add IX86_BUILTIN_AESIMC128.
855+ (ix86_init_mmx_sse_builtins): Define __builtin_ia32_aesenc128,
856+ __builtin_ia32_aesenclast128, __builtin_ia32_aesdec128,
857+ __builtin_ia32_aesdeclast128,__builtin_ia32_aesimc128,
858+ __builtin_ia32_aeskeygenassist128 and
859+ __builtin_ia32_pclmulqdq128.
860+ * config/i386/i386.c (ix86_expand_binop_imm_builtin): New.
861+ (ix86_expand_builtin): Use it for IX86_BUILTIN_PSLLDQI128 and
862+ IX86_BUILTIN_PSRLDQI128. Handle IX86_BUILTIN_AESKEYGENASSIST128.
863+
864+ * config/i386/i386.h (TARGET_AES): New.
865+ (TARGET_PCLMUL): Likewise.
866+ (TARGET_CPU_CPP_BUILTINS): Handle TARGET_AES and TARGET_PCLMUL.
867+
868+ * config/i386/i386.md (UNSPEC_AESENC): New.
869+ (UNSPEC_AESENCLAST): Likewise.
870+ (UNSPEC_AESDEC): Likewise.
871+ (UNSPEC_AESDECLAST): Likewise.
872+ (UNSPEC_AESIMC): Likewise.
873+ (UNSPEC_AESKEYGENASSIST): Likewise.
874+ (UNSPEC_PCLMULQDQ): Likewise.
875+
876+ * config/i386/i386.opt (maes): New.
877+ (mpclmul): Likewise.
878+
879+ * config/i386/sse.md (aesenc): New pattern.
880+ (aesenclast): Likewise.
881+ (aesdec): Likewise.
882+ (aesdeclast): Likewise.
883+ (aesimc): Likewise.
884+ (aeskeygenassist): Likewise.
885+ (pclmulqdq): Likewise.
886+
887+ * config/i386/wmmintrin.h: New.
888+
889+ * doc/extend.texi: Document AES and PCLMUL built-in function.
890+
891+ * doc/invoke.texi: Document -maes and -mpclmul.
892Index: gcc/config.gcc
893===================================================================
894--- gcc/config.gcc (.../gcc-4_3-branch) (revision 145062)
895+++ gcc/config.gcc (.../ix86/gcc-4_3-branch) (revision 145364)
896@@ -308,13 +308,15 @@
897 cpu_type=i386
898 extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
899 pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
900- nmmintrin.h bmmintrin.h mmintrin-common.h"
901+ nmmintrin.h bmmintrin.h mmintrin-common.h
902+ wmmintrin.h"
903 ;;
904 x86_64-*-*)
905 cpu_type=i386
906 extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
907 pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
908- nmmintrin.h bmmintrin.h mmintrin-common.h"
909+ nmmintrin.h bmmintrin.h mmintrin-common.h
910+ wmmintrin.h"
911 need_64bit_hwint=yes
912 ;;
913 ia64-*-*)
914Index: gcc/config/i386/i386.h
915===================================================================
916--- gcc/config/i386/i386.h (.../gcc-4_3-branch) (revision 145062)
917+++ gcc/config/i386/i386.h (.../ix86/gcc-4_3-branch) (revision 145364)
918@@ -395,6 +395,8 @@
919 #define TARGET_SAHF x86_sahf
920 #define TARGET_RECIP x86_recip
921 #define TARGET_FUSED_MADD x86_fused_muladd
922+#define TARGET_AES (TARGET_SSE2 && x86_aes)
923+#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul)
924
925 #define ASSEMBLER_DIALECT (ix86_asm_dialect)
926
927@@ -683,6 +685,10 @@
928 builtin_define ("__SSE4_1__"); \
929 if (TARGET_SSE4_2) \
930 builtin_define ("__SSE4_2__"); \
931+ if (TARGET_AES) \
932+ builtin_define ("__AES__"); \
933+ if (TARGET_PCLMUL) \
934+ builtin_define ("__PCLMUL__"); \
935 if (TARGET_SSE4A) \
936 builtin_define ("__SSE4A__"); \
937 if (TARGET_SSE5) \
938Index: gcc/config/i386/i386.md
939===================================================================
940--- gcc/config/i386/i386.md (.../gcc-4_3-branch) (revision 145062)
941+++ gcc/config/i386/i386.md (.../ix86/gcc-4_3-branch) (revision 145364)
942@@ -189,6 +189,17 @@
943 (UNSPEC_FRCZ 156)
944 (UNSPEC_CVTPH2PS 157)
945 (UNSPEC_CVTPS2PH 158)
946+
947+ ; For AES support
948+ (UNSPEC_AESENC 159)
949+ (UNSPEC_AESENCLAST 160)
950+ (UNSPEC_AESDEC 161)
951+ (UNSPEC_AESDECLAST 162)
952+ (UNSPEC_AESIMC 163)
953+ (UNSPEC_AESKEYGENASSIST 164)
954+
955+ ; For PCLMUL support
956+ (UNSPEC_PCLMUL 165)
957 ])
958
959 (define_constants
960Index: gcc/config/i386/wmmintrin.h
961===================================================================
962--- gcc/config/i386/wmmintrin.h (.../gcc-4_3-branch) (revision 0)
963+++ gcc/config/i386/wmmintrin.h (.../ix86/gcc-4_3-branch) (revision 145364)
964@@ -0,0 +1,123 @@
965+/* Copyright (C) 2008 Free Software Foundation, Inc.
966+
967+ This file is part of GCC.
968+
969+ GCC is free software; you can redistribute it and/or modify
970+ it under the terms of the GNU General Public License as published by
971+ the Free Software Foundation; either version 2, or (at your option)
972+ any later version.
973+
974+ GCC is distributed in the hope that it will be useful,
975+ but WITHOUT ANY WARRANTY; without even the implied warranty of
976+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
977+ GNU General Public License for more details.
978+
979+ You should have received a copy of the GNU General Public License
980+ along with GCC; see the file COPYING. If not, write to
981+ the Free Software Foundation, 59 Temple Place - Suite 330,
982+ Boston, MA 02111-1307, USA. */
983+
984+/* As a special exception, if you include this header file into source
985+ files compiled by GCC, this header file does not by itself cause
986+ the resulting executable to be covered by the GNU General Public
987+ License. This exception does not however invalidate any other
988+ reasons why the executable file might be covered by the GNU General
989+ Public License. */
990+
991+/* Implemented from the specification included in the Intel C++ Compiler
992+ User Guide and Reference, version 10.1. */
993+
994+#ifndef _WMMINTRIN_H_INCLUDED
995+#define _WMMINTRIN_H_INCLUDED
996+
997+/* We need definitions from the SSE2 header file. */
998+#include <emmintrin.h>
999+
1000+#if !defined (__AES__) && !defined (__PCLMUL__)
1001+# error "AES/PCLMUL instructions not enabled"
1002+#else
1003+
1004+/* AES */
1005+
1006+#ifdef __AES__
1007+/* Performs 1 round of AES decryption of the first m128i using
1008+ the second m128i as a round key. */
1009+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1010+_mm_aesdec_si128 (__m128i __X, __m128i __Y)
1011+{
1012+ return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
1013+}
1014+
1015+/* Performs the last round of AES decryption of the first m128i
1016+ using the second m128i as a round key. */
1017+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1018+_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
1019+{
1020+ return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
1021+ (__v2di)__Y);
1022+}
1023+
1024+/* Performs 1 round of AES encryption of the first m128i using
1025+ the second m128i as a round key. */
1026+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1027+_mm_aesenc_si128 (__m128i __X, __m128i __Y)
1028+{
1029+ return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
1030+}
1031+
1032+/* Performs the last round of AES encryption of the first m128i
1033+ using the second m128i as a round key. */
1034+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1035+_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
1036+{
1037+ return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
1038+}
1039+
1040+/* Performs the InverseMixColumn operation on the source m128i
1041+ and stores the result into m128i destination. */
1042+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1043+_mm_aesimc_si128 (__m128i __X)
1044+{
1045+ return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
1046+}
1047+
1048+/* Generates a m128i round key for the input m128i AES cipher key and
1049+ byte round constant. The second parameter must be a compile time
1050+ constant. */
1051+#ifdef __OPTIMIZE__
1052+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1053+_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
1054+{
1055+ return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
1056+}
1057+#else
1058+#define _mm_aeskeygenassist_si128(X, C) \
1059+ ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \
1060+ (int)(C)))
1061+#endif
1062+#endif /* __AES__ */
1063+
1064+/* PCLMUL */
1065+
1066+#ifdef __PCLMUL__
1067+/* Performs carry-less integer multiplication of 64-bit halves of
1068+ 128-bit input operands. The third parameter inducates which 64-bit
1069+ haves of the input parameters v1 and v2 should be used. It must be
1070+ a compile time constant. */
1071+#ifdef __OPTIMIZE__
1072+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1073+_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
1074+{
1075+ return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
1076+ (__v2di)__Y, __I);
1077+}
1078+#else
1079+#define _mm_clmulepi64_si128(X, Y, I) \
1080+ ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \
1081+ (__v2di)(__m128i)(Y), (int)(I)))
1082+#endif
1083+#endif /* __PCLMUL__ */
1084+
1085+#endif /* __AES__/__PCLMUL__ */
1086+
1087+#endif /* _WMMINTRIN_H_INCLUDED */
1088Index: gcc/config/i386/cpuid.h
1089===================================================================
1090--- gcc/config/i386/cpuid.h (.../gcc-4_3-branch) (revision 145062)
1091+++ gcc/config/i386/cpuid.h (.../ix86/gcc-4_3-branch) (revision 145364)
1092@@ -33,11 +33,13 @@
1093
1094 /* %ecx */
1095 #define bit_SSE3 (1 << 0)
1096+#define bit_PCLMUL (1 << 1)
1097 #define bit_SSSE3 (1 << 9)
1098 #define bit_CMPXCHG16B (1 << 13)
1099 #define bit_SSE4_1 (1 << 19)
1100 #define bit_SSE4_2 (1 << 20)
1101 #define bit_POPCNT (1 << 23)
1102+#define bit_AES (1 << 25)
1103
1104 /* %edx */
1105 #define bit_CMPXCHG8B (1 << 8)
1106Index: gcc/config/i386/sse.md
1107===================================================================
1108--- gcc/config/i386/sse.md (.../gcc-4_3-branch) (revision 145062)
1109+++ gcc/config/i386/sse.md (.../ix86/gcc-4_3-branch) (revision 145364)
1110@@ -8700,3 +8700,80 @@
1111 }
1112 [(set_attr "type" "ssecmp")
1113 (set_attr "mode" "TI")])
1114+
1115+(define_insn "aesenc"
1116+ [(set (match_operand:V2DI 0 "register_operand" "=x")
1117+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1118+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1119+ UNSPEC_AESENC))]
1120+ "TARGET_AES"
1121+ "aesenc\t{%2, %0|%0, %2}"
1122+ [(set_attr "type" "sselog1")
1123+ (set_attr "prefix_extra" "1")
1124+ (set_attr "mode" "TI")])
1125+
1126+(define_insn "aesenclast"
1127+ [(set (match_operand:V2DI 0 "register_operand" "=x")
1128+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1129+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1130+ UNSPEC_AESENCLAST))]
1131+ "TARGET_AES"
1132+ "aesenclast\t{%2, %0|%0, %2}"
1133+ [(set_attr "type" "sselog1")
1134+ (set_attr "prefix_extra" "1")
1135+ (set_attr "mode" "TI")])
1136+
1137+(define_insn "aesdec"
1138+ [(set (match_operand:V2DI 0 "register_operand" "=x")
1139+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1140+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1141+ UNSPEC_AESDEC))]
1142+ "TARGET_AES"
1143+ "aesdec\t{%2, %0|%0, %2}"
1144+ [(set_attr "type" "sselog1")
1145+ (set_attr "prefix_extra" "1")
1146+ (set_attr "mode" "TI")])
1147+
1148+(define_insn "aesdeclast"
1149+ [(set (match_operand:V2DI 0 "register_operand" "=x")
1150+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1151+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
1152+ UNSPEC_AESDECLAST))]
1153+ "TARGET_AES"
1154+ "aesdeclast\t{%2, %0|%0, %2}"
1155+ [(set_attr "type" "sselog1")
1156+ (set_attr "prefix_extra" "1")
1157+ (set_attr "mode" "TI")])
1158+
1159+(define_insn "aesimc"
1160+ [(set (match_operand:V2DI 0 "register_operand" "=x")
1161+ (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
1162+ UNSPEC_AESIMC))]
1163+ "TARGET_AES"
1164+ "aesimc\t{%1, %0|%0, %1}"
1165+ [(set_attr "type" "sselog1")
1166+ (set_attr "prefix_extra" "1")
1167+ (set_attr "mode" "TI")])
1168+
1169+(define_insn "aeskeygenassist"
1170+ [(set (match_operand:V2DI 0 "register_operand" "=x")
1171+ (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
1172+ (match_operand:SI 2 "const_0_to_255_operand" "n")]
1173+ UNSPEC_AESKEYGENASSIST))]
1174+ "TARGET_AES"
1175+ "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
1176+ [(set_attr "type" "sselog1")
1177+ (set_attr "prefix_extra" "1")
1178+ (set_attr "mode" "TI")])
1179+
1180+(define_insn "pclmulqdq"
1181+ [(set (match_operand:V2DI 0 "register_operand" "=x")
1182+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
1183+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")
1184+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
1185+ UNSPEC_PCLMUL))]
1186+ "TARGET_PCLMUL"
1187+ "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
1188+ [(set_attr "type" "sselog1")
1189+ (set_attr "prefix_extra" "1")
1190+ (set_attr "mode" "TI")])
1191Index: gcc/config/i386/i386.opt
1192===================================================================
1193--- gcc/config/i386/i386.opt (.../gcc-4_3-branch) (revision 145062)
1194+++ gcc/config/i386/i386.opt (.../ix86/gcc-4_3-branch) (revision 145364)
1195@@ -279,3 +279,11 @@
1196 Enable automatic generation of fused floating point multiply-add instructions
1197 if the ISA supports such instructions. The -mfused-madd option is on by
1198 default.
1199+
1200+maes
1201+Target Report RejectNegative Var(x86_aes)
1202+Support AES built-in functions and code generation
1203+
1204+mpclmul
1205+Target Report RejectNegative Var(x86_pclmul)
1206+Support PCLMUL built-in functions and code generation
1207Index: gcc/config/i386/i386.c
1208===================================================================
1209--- gcc/config/i386/i386.c (.../gcc-4_3-branch) (revision 145062)
1210+++ gcc/config/i386/i386.c (.../ix86/gcc-4_3-branch) (revision 145364)
1211@@ -2077,7 +2077,9 @@
1212 PTA_NO_SAHF = 1 << 13,
1213 PTA_SSE4_1 = 1 << 14,
1214 PTA_SSE4_2 = 1 << 15,
1215- PTA_SSE5 = 1 << 16
1216+ PTA_SSE5 = 1 << 16,
1217+ PTA_AES = 1 << 17,
1218+ PTA_PCLMUL = 1 << 18
1219 };
1220
1221 static struct pta
1222@@ -2384,6 +2386,10 @@
1223 x86_prefetch_sse = true;
1224 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1225 x86_sahf = true;
1226+ if (processor_alias_table[i].flags & PTA_AES)
1227+ x86_aes = true;
1228+ if (processor_alias_table[i].flags & PTA_PCLMUL)
1229+ x86_pclmul = true;
1230
1231 break;
1232 }
1233@@ -2427,6 +2433,14 @@
1234 if (i == pta_size)
1235 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1236
1237+ /* Enable SSE2 if AES or PCLMUL is enabled. */
1238+ if ((x86_aes || x86_pclmul)
1239+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
1240+ {
1241+ ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1242+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1243+ }
1244+
1245 ix86_tune_mask = 1u << ix86_tune;
1246 for (i = 0; i < X86_TUNE_LAST; ++i)
1247 ix86_tune_features[i] &= ix86_tune_mask;
1248@@ -17582,6 +17596,17 @@
1249
1250 IX86_BUILTIN_PCMPGTQ,
1251
1252+ /* AES instructions */
1253+ IX86_BUILTIN_AESENC128,
1254+ IX86_BUILTIN_AESENCLAST128,
1255+ IX86_BUILTIN_AESDEC128,
1256+ IX86_BUILTIN_AESDECLAST128,
1257+ IX86_BUILTIN_AESIMC128,
1258+ IX86_BUILTIN_AESKEYGENASSIST128,
1259+
1260+ /* PCLMUL instruction */
1261+ IX86_BUILTIN_PCLMULQDQ128,
1262+
1263 /* TFmode support builtins. */
1264 IX86_BUILTIN_INFQ,
1265 IX86_BUILTIN_FABSQ,
1266@@ -17937,6 +17962,9 @@
1267 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
1268 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
1269 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
1270+
1271+ /* PCLMUL */
1272+ { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, 0 },
1273 };
1274
1275 static const struct builtin_description bdesc_2arg[] =
1276@@ -18247,6 +18275,13 @@
1277
1278 /* SSE4.2 */
1279 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
1280+
1281+ /* AES */
1282+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 },
1283+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 },
1284+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 },
1285+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 },
1286+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, 0 },
1287 };
1288
1289 static const struct builtin_description bdesc_1arg[] =
1290@@ -18322,6 +18357,9 @@
1291 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
1292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
1293 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
1294+
1295+ /* AES */
1296+ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 },
1297 };
1298
1299 /* SSE5 */
1300@@ -19555,6 +19593,25 @@
1301 NULL_TREE);
1302 def_builtin_const (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
1303
1304+ /* AES */
1305+ if (TARGET_AES)
1306+ {
1307+ /* Define AES built-in functions only if AES is enabled. */
1308+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
1309+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
1310+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
1311+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
1312+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
1313+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
1314+ }
1315+
1316+ /* PCLMUL */
1317+ if (TARGET_PCLMUL)
1318+ {
1319+ /* Define PCLMUL built-in function only if PCLMUL is enabled. */
1320+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
1321+ }
1322+
1323 /* AMDFAM10 SSE4A New built-ins */
1324 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
1325 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
1326@@ -19830,6 +19887,44 @@
1327 return target;
1328 }
1329
1330+/* Subroutine of ix86_expand_builtin to take care of binop insns
1331+ with an immediate. */
1332+
1333+static rtx
1334+ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp,
1335+ rtx target)
1336+{
1337+ rtx pat;
1338+ tree arg0 = CALL_EXPR_ARG (exp, 0);
1339+ tree arg1 = CALL_EXPR_ARG (exp, 1);
1340+ rtx op0 = expand_normal (arg0);
1341+ rtx op1 = expand_normal (arg1);
1342+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
1343+ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
1344+ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
1345+
1346+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
1347+ {
1348+ op0 = copy_to_reg (op0);
1349+ op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
1350+ }
1351+
1352+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
1353+ {
1354+ error ("the last operand must be an immediate");
1355+ return const0_rtx;
1356+ }
1357+
1358+ target = gen_reg_rtx (V2DImode);
1359+ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target,
1360+ V2DImode, 0),
1361+ op0, op1);
1362+ if (! pat)
1363+ return 0;
1364+ emit_insn (pat);
1365+ return target;
1366+}
1367+
1368 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
1369
1370 static rtx
1371@@ -20926,34 +21021,18 @@
1372 return target;
1373
1374 case IX86_BUILTIN_PSLLDQI128:
1375+ return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3,
1376+ exp, target);
1377+ break;
1378+
1379 case IX86_BUILTIN_PSRLDQI128:
1380- icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
1381- : CODE_FOR_sse2_lshrti3);
1382- arg0 = CALL_EXPR_ARG (exp, 0);
1383- arg1 = CALL_EXPR_ARG (exp, 1);
1384- op0 = expand_normal (arg0);
1385- op1 = expand_normal (arg1);
1386- tmode = insn_data[icode].operand[0].mode;
1387- mode1 = insn_data[icode].operand[1].mode;
1388- mode2 = insn_data[icode].operand[2].mode;
1389+ return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3,
1390+ exp, target);
1391+ break;
1392
1393- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
1394- {
1395- op0 = copy_to_reg (op0);
1396- op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
1397- }
1398- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
1399- {
1400- error ("shift must be an immediate");
1401- return const0_rtx;
1402- }
1403- target = gen_reg_rtx (V2DImode);
1404- pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
1405- op0, op1);
1406- if (! pat)
1407- return 0;
1408- emit_insn (pat);
1409- return target;
1410+ case IX86_BUILTIN_AESKEYGENASSIST128:
1411+ return ix86_expand_binop_imm_builtin (CODE_FOR_aeskeygenassist,
1412+ exp, target);
1413
1414 case IX86_BUILTIN_FEMMS:
1415 emit_insn (gen_mmx_femms ());
1416
1417Property changes on: .
1418___________________________________________________________________
1419Added: svn:mergeinfo
1420 Merged /branches/gcc-4_3-branch:r139021-145062
1421
This page took 0.286283 seconds and 4 git commands to generate.