Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (.../gcc-4_3-branch) (revision 145062) +++ gcc/doc/extend.texi (.../ix86/gcc-4_3-branch) (revision 145364) @@ -7962,6 +7962,27 @@ Generates the @code{popcntq} machine instruction. @end table +The following built-in functions are available when @option{-maes} is +used. All of them generate the machine instruction that is part of the +name. + +@smallexample +v2di __builtin_ia32_aesenc128 (v2di, v2di) +v2di __builtin_ia32_aesenclast128 (v2di, v2di) +v2di __builtin_ia32_aesdec128 (v2di, v2di) +v2di __builtin_ia32_aesdeclast128 (v2di, v2di) +v2di __builtin_ia32_aeskeygenassist128 (v2di, const int) +v2di __builtin_ia32_aesimc128 (v2di) +@end smallexample + +The following built-in function is available when @option{-mpclmul} is +used. + +@table @code +@item v2di __builtin_ia32_pclmulqdq128 (v2di, v2di, const int) +Generates the @code{pclmulqdq} machine instruction. +@end table + The following built-in functions are available when @option{-msse4a} is used. All of them generate the machine instruction that is part of the name. Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (.../gcc-4_3-branch) (revision 145062) +++ gcc/doc/invoke.texi (.../ix86/gcc-4_3-branch) (revision 145364) @@ -551,6 +551,7 @@ -mno-wide-multiply -mrtd -malign-double @gol -mpreferred-stack-boundary=@var{num} -mcld -mcx16 -msahf -mrecip @gol -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol +-maes -mpclmul @gol -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol -mthreads -mno-align-stringops -minline-all-stringops @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol @@ -10733,6 +10734,10 @@ @itemx -mno-sse4.2 @item -msse4 @itemx -mno-sse4 +@item -maes +@itemx -mno-aes +@item -mpclmul +@itemx -mno-pclmul @item -msse4a @item -mno-sse4a @item -msse5 @@ -10750,8 +10755,8 @@ @opindex m3dnow @opindex mno-3dnow These switches enable or disable the use of instructions in the MMX, -SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4A, SSE5, ABM or 3DNow!@: extended -instruction sets. +SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or +3DNow!@: extended instruction sets. These extensions are also available as built-in functions: see @ref{X86 Built-in Functions}, for details of the functions enabled and disabled by these switches. Index: gcc/testsuite/gcc.target/i386/sse-14.c =================================================================== --- gcc/testsuite/gcc.target/i386/sse-14.c (.../gcc-4_3-branch) (revision 145062) +++ gcc/testsuite/gcc.target/i386/sse-14.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -1,14 +1,15 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5" } */ +/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */ /* Test that the intrinsics compile without optimization. All of them are - defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ #define extern #define __inline +#include #include #include #include @@ -44,6 +45,10 @@ test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1) test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1) +/* wmmintrin.h */ +test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1) +test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1) + /* smmintrin.h */ test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1) test_2 (_mm_blend_ps, __m128, __m128, __m128, 1) Index: gcc/testsuite/gcc.target/i386/i386.exp =================================================================== --- gcc/testsuite/gcc.target/i386/i386.exp (.../gcc-4_3-branch) (revision 145062) +++ gcc/testsuite/gcc.target/i386/i386.exp (.../ix86/gcc-4_3-branch) (revision 145364) @@ -51,6 +51,34 @@ } "-O2 -msse4.1" ] } +# Return 1 if aes instructions can be compiled. +proc check_effective_target_aes { } { + return [check_no_compiler_messages aes object { + typedef long long __m128i __attribute__ ((__vector_size__ (16))); + typedef long long __v2di __attribute__ ((__vector_size__ (16))); + + __m128i _mm_aesimc_si128 (__m128i __X) + { + return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X); + } + } "-O2 -maes" ] +} + +# Return 1 if pclmul instructions can be compiled. +proc check_effective_target_pclmul { } { + return [check_no_compiler_messages pclmul object { + typedef long long __m128i __attribute__ ((__vector_size__ (16))); + typedef long long __v2di __attribute__ ((__vector_size__ (16))); + + __m128i pclmulqdq_test (__m128i __X, __m128i __Y) + { + return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X, + (__v2di)__Y, + 1); + } + } "-O2 -mpclmul" ] +} + # Return 1 if sse4a instructions can be compiled. proc check_effective_target_sse4a { } { return [check_no_compiler_messages sse4a object { Index: gcc/testsuite/gcc.target/i386/aesdeclast.c =================================================================== --- gcc/testsuite/gcc.target/i386/aesdeclast.c (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/aesdeclast.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,69 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aes } */ +/* { dg-options "-O2 -maes" } */ + +#include +#include + +#include "aes-check.h" + +extern void abort (void); + +#define NUM 1024 + +static __m128i src1[NUM]; +static __m128i src2[NUM]; +static __m128i edst[NUM]; + +static __m128i resdst[NUM]; + +/* Initialize input/output vectors. (Currently, there is only one set of + input/output vectors). */ + +static void +init_data (__m128i *s1, __m128i *s2, __m128i *d) +{ + int i; + + for (i = 0; i < NUM; i++) + { + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, + 0x73745665, 0x7b5b5465); + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, + 0x68617929, 0x48692853); + d[i] = _mm_setr_epi32 (0x72a593d0, 0xd410637b, + 0x6b317f95, 0xc5a391ef); + } +} + +static void +aes_test (void) +{ + int i; + + init_data (src1, src2, edst); + + for (i = 0; i < NUM; i += 16) + { + resdst[i] = _mm_aesdeclast_si128 (src1[i], src2[i]); + resdst[i + 1] = _mm_aesdeclast_si128 (src1[i + 1], src2[i + 1]); + resdst[i + 2] = _mm_aesdeclast_si128 (src1[i + 2], src2[i + 2]); + resdst[i + 3] = _mm_aesdeclast_si128 (src1[i + 3], src2[i + 3]); + resdst[i + 4] = _mm_aesdeclast_si128 (src1[i + 4], src2[i + 4]); + resdst[i + 5] = _mm_aesdeclast_si128 (src1[i + 5], src2[i + 5]); + resdst[i + 6] = _mm_aesdeclast_si128 (src1[i + 6], src2[i + 6]); + resdst[i + 7] = _mm_aesdeclast_si128 (src1[i + 7], src2[i + 7]); + resdst[i + 8] = _mm_aesdeclast_si128 (src1[i + 8], src2[i + 8]); + resdst[i + 9] = _mm_aesdeclast_si128 (src1[i + 9], src2[i + 9]); + resdst[i + 10] = _mm_aesdeclast_si128 (src1[i + 10], src2[i + 10]); + resdst[i + 11] = _mm_aesdeclast_si128 (src1[i + 11], src2[i + 11]); + resdst[i + 12] = _mm_aesdeclast_si128 (src1[i + 12], src2[i + 12]); + resdst[i + 13] = _mm_aesdeclast_si128 (src1[i + 13], src2[i + 13]); + resdst[i + 14] = _mm_aesdeclast_si128 (src1[i + 14], src2[i + 14]); + resdst[i + 15] = _mm_aesdeclast_si128 (src1[i + 15], src2[i + 15]); + } + + for (i = 0; i < NUM; i++) + if (memcmp (edst + i, resdst + i, sizeof (__m128i))) + abort (); +} Index: gcc/testsuite/gcc.target/i386/pclmulqdq.c =================================================================== --- gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,87 @@ +/* { dg-do run } */ +/* { dg-require-effective-target pclmul } */ +/* { dg-options "-O2 -mpclmul" } */ + +#include +#include + +#include "pclmul-check.h" + +extern void abort (void); + +#define NUM 1024 + +static __m128i s1[NUM]; +static __m128i s2[NUM]; +/* We need this array to generate mem form of inst */ +static __m128i s2m[NUM]; + +static __m128i e_00[NUM]; +static __m128i e_01[NUM]; +static __m128i e_10[NUM]; +static __m128i e_11[NUM]; + +static __m128i d_00[NUM]; +static __m128i d_01[NUM]; +static __m128i d_10[NUM]; +static __m128i d_11[NUM]; + +/* Initialize input/output vectors. (Currently, there is only one set + of input/output vectors). */ +static void +init_data (__m128i *ls1, __m128i *ls2, __m128i *le_00, __m128i *le_01, + __m128i *le_10, __m128i *le_11) +{ + int i; + + for (i = 0; i < NUM; i++) + { + ls1[i] = _mm_set_epi32 (0x7B5B5465, 0x73745665, + 0x63746F72, 0x5D53475D); + ls2[i] = _mm_set_epi32 (0x48692853, 0x68617929, + 0x5B477565, 0x726F6E5D); + s2m[i] = _mm_set_epi32 (0x48692853, 0x68617929, + 0x5B477565, 0x726F6E5D); + le_00[i] = _mm_set_epi32 (0x1D4D84C8, 0x5C3440C0, + 0x929633D5, 0xD36F0451); + le_01[i] = _mm_set_epi32 (0x1A2BF6DB, 0x3A30862F, + 0xBABF262D, 0xF4B7D5C9); + le_10[i] = _mm_set_epi32 (0x1BD17C8D, 0x556AB5A1, + 0x7FA540AC, 0x2A281315); + le_11[i] = _mm_set_epi32 (0x1D1E1F2C, 0x592E7C45, + 0xD66EE03E, 0x410FD4ED); + } +} + +static void +pclmul_test (void) +{ + int i; + + init_data (s1, s2, e_00, e_01, e_10, e_11); + + for (i = 0; i < NUM; i += 2) + { + d_00[i] = _mm_clmulepi64_si128 (s1[i], s2m[i], 0x00); + d_01[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x01); + d_10[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x10); + d_11[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x11); + + d_11[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x11); + d_00[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x00); + d_10[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2m[i + 1], 0x10); + d_01[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x01); + } + + for (i = 0; i < NUM; i++) + { + if (memcmp (d_00 + i, e_00 + i, sizeof (__m128i))) + abort (); + if (memcmp (d_01 + i, e_01 + i, sizeof (__m128i))) + abort (); + if (memcmp (d_10 + i, e_10 + i, sizeof (__m128i))) + abort (); + if (memcmp(d_11 + i, e_11 + i, sizeof (__m128i))) + abort (); + } +} Index: gcc/testsuite/gcc.target/i386/aes-check.h =================================================================== --- gcc/testsuite/gcc.target/i386/aes-check.h (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/aes-check.h (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,30 @@ +#include +#include + +#include "cpuid.h" + +static void aes_test (void); + +int +main () +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return 0; + + /* Run AES test only if host has AES support. */ + if (ecx & bit_AES) + { + aes_test (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + } +#ifdef DEBUG + else + printf ("SKIPPED\n"); +#endif + + return 0; +} Index: gcc/testsuite/gcc.target/i386/pclmul-check.h =================================================================== --- gcc/testsuite/gcc.target/i386/pclmul-check.h (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/pclmul-check.h (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,30 @@ +#include +#include + +#include "cpuid.h" + +static void pclmul_test (void); + +int +main () +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return 0; + + /* Run PCLMULQDQ test only if host has PCLMULQDQ support. */ + if (ecx & bit_PCLMUL) + { + pclmul_test (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + } +#ifdef DEBUG + else + printf ("SKIPPED\n"); +#endif + + return 0; +} Index: gcc/testsuite/gcc.target/i386/aeskeygenassist.c =================================================================== --- gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aes } */ +/* { dg-options "-O2 -maes" } */ + +#include +#include + +#include "aes-check.h" + +extern void abort (void); + +#define NUM 1024 +#define IMM8 1 + +static __m128i src1[NUM]; +static __m128i edst[NUM]; + +static __m128i resdst[NUM]; + +/* Initialize input/output vectors. (Currently, there is only one set + of input/output vectors). */ + +static void +init_data (__m128i *s1, __m128i *d) +{ + int i; + for (i = 0; i < NUM; i++) + { + s1[i] = _mm_setr_epi32 (0x16157e2b, 0xa6d2ae28, + 0x8815f7ab, 0x3c4fcf09); + d[i] = _mm_setr_epi32 (0x24b5e434, 0x3424b5e5, + 0xeb848a01, 0x01eb848b); + } +} + +static void +aes_test (void) +{ + int i; + + init_data (src1, edst); + + for (i = 0; i < NUM; i += 16) + { + resdst[i] = _mm_aeskeygenassist_si128 (src1[i], IMM8); + resdst[i + 1] = _mm_aeskeygenassist_si128 (src1[i + 1], IMM8); + resdst[i + 2] = _mm_aeskeygenassist_si128 (src1[i + 2], IMM8); + resdst[i + 3] = _mm_aeskeygenassist_si128 (src1[i + 3], IMM8); + resdst[i + 4] = _mm_aeskeygenassist_si128 (src1[i + 4], IMM8); + resdst[i + 5] = _mm_aeskeygenassist_si128 (src1[i + 5], IMM8); + resdst[i + 6] = _mm_aeskeygenassist_si128 (src1[i + 6], IMM8); + resdst[i + 7] = _mm_aeskeygenassist_si128 (src1[i + 7], IMM8); + resdst[i + 8] = _mm_aeskeygenassist_si128 (src1[i + 8], IMM8); + resdst[i + 9] = _mm_aeskeygenassist_si128 (src1[i + 9], IMM8); + resdst[i + 10] = _mm_aeskeygenassist_si128 (src1[i + 10], IMM8); + resdst[i + 11] = _mm_aeskeygenassist_si128 (src1[i + 11], IMM8); + resdst[i + 12] = _mm_aeskeygenassist_si128 (src1[i + 12], IMM8); + resdst[i + 13] = _mm_aeskeygenassist_si128 (src1[i + 13], IMM8); + resdst[i + 14] = _mm_aeskeygenassist_si128 (src1[i + 14], IMM8); + resdst[i + 15] = _mm_aeskeygenassist_si128 (src1[i + 15], IMM8); + } + + for (i = 0; i < NUM; i++) + if (memcmp(edst + i, resdst + i, sizeof (__m128i))) + abort (); +} Index: gcc/testsuite/gcc.target/i386/aesenclast.c =================================================================== --- gcc/testsuite/gcc.target/i386/aesenclast.c (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/aesenclast.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,68 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aes } */ +/* { dg-options "-O2 -maes" } */ + +#include +#include + +#include "aes-check.h" + +extern void abort (void); + +#define NUM 1024 + +static __m128i src1[NUM]; +static __m128i src2[NUM]; +static __m128i edst[NUM]; + +static __m128i resdst[NUM]; + +/* Initialize input/output vectors. (Currently, there is only one + set of input/output vectors). */ + +static void +init_data (__m128i *s1, __m128i *s2, __m128i *d) +{ + int i; + for (i = 0; i < NUM; i++) + { + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, + 0x73745665, 0x7b5b5465); + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, + 0x68617929, 0x48692853); + d[i] = _mm_setr_epi32 (0x53fdc611, 0x177ec425, + 0x938c5964, 0xc7fb881e); + } +} + +static void +aes_test (void) +{ + int i; + + init_data (src1, src2, edst); + + for (i = 0; i < NUM; i += 16) + { + resdst[i] = _mm_aesenclast_si128 (src1[i], src2[i]); + resdst[i + 1] = _mm_aesenclast_si128 (src1[i + 1], src2[i + 1]); + resdst[i + 2] = _mm_aesenclast_si128 (src1[i + 2], src2[i + 2]); + resdst[i + 3] = _mm_aesenclast_si128 (src1[i + 3], src2[i + 3]); + resdst[i + 4] = _mm_aesenclast_si128 (src1[i + 4], src2[i + 4]); + resdst[i + 5] = _mm_aesenclast_si128 (src1[i + 5], src2[i + 5]); + resdst[i + 6] = _mm_aesenclast_si128 (src1[i + 6], src2[i + 6]); + resdst[i + 7] = _mm_aesenclast_si128 (src1[i + 7], src2[i + 7]); + resdst[i + 8] = _mm_aesenclast_si128 (src1[i + 8], src2[i + 8]); + resdst[i + 9] = _mm_aesenclast_si128 (src1[i + 9], src2[i + 9]); + resdst[i + 10] = _mm_aesenclast_si128 (src1[i + 10], src2[i + 10]); + resdst[i + 11] = _mm_aesenclast_si128 (src1[i + 11], src2[i + 11]); + resdst[i + 12] = _mm_aesenclast_si128 (src1[i + 12], src2[i + 12]); + resdst[i + 13] = _mm_aesenclast_si128 (src1[i + 13], src2[i + 13]); + resdst[i + 14] = _mm_aesenclast_si128 (src1[i + 14], src2[i + 14]); + resdst[i + 15] = _mm_aesenclast_si128 (src1[i + 15], src2[i + 15]); + } + + for (i = 0; i < NUM; i++) + if (memcmp(edst + i, resdst + i, sizeof (__m128i))) + abort (); +} Index: gcc/testsuite/gcc.target/i386/aesimc.c =================================================================== --- gcc/testsuite/gcc.target/i386/aesimc.c (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/aesimc.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,66 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aes } */ +/* { dg-options "-O2 -maes" } */ + +#include +#include + +#include "aes-check.h" + +extern void abort (void); + +#define NUM 1024 + +static __m128i src1[NUM]; +static __m128i edst[NUM]; + +static __m128i resdst[NUM]; + +/* Initialize input/output vectors. (Currently, there is only one set + of input/output vectors). */ + +static void +init_data (__m128i *s1, __m128i *d) +{ + int i; + + for (i = 0; i < NUM; i++) + { + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, + 0x73745665, 0x7b5b5465); + d[i] = _mm_setr_epi32 (0x81c3b3e5, 0x2b18330a, + 0x44b109c8, 0x627a6f66); + } +} + +static void +aes_test (void) +{ + int i; + + init_data (src1, edst); + + for (i = 0; i < NUM; i += 16) + { + resdst[i] = _mm_aesimc_si128 (src1[i]); + resdst[i + 1] = _mm_aesimc_si128 (src1[i + 1]); + resdst[i + 2] = _mm_aesimc_si128 (src1[i + 2]); + resdst[i + 3] = _mm_aesimc_si128 (src1[i + 3]); + resdst[i + 4] = _mm_aesimc_si128 (src1[i + 4]); + resdst[i + 5] = _mm_aesimc_si128 (src1[i + 5]); + resdst[i + 6] = _mm_aesimc_si128 (src1[i + 6]); + resdst[i + 7] = _mm_aesimc_si128 (src1[i + 7]); + resdst[i + 8] = _mm_aesimc_si128 (src1[i + 8]); + resdst[i + 9] = _mm_aesimc_si128 (src1[i + 9]); + resdst[i + 10] = _mm_aesimc_si128 (src1[i + 10]); + resdst[i + 11] = _mm_aesimc_si128 (src1[i + 11]); + resdst[i + 12] = _mm_aesimc_si128 (src1[i + 12]); + resdst[i + 13] = _mm_aesimc_si128 (src1[i + 13]); + resdst[i + 14] = _mm_aesimc_si128 (src1[i + 14]); + resdst[i + 15] = _mm_aesimc_si128 (src1[i + 15]); + } + + for (i = 0; i < NUM; i++) + if (memcmp(edst + i, resdst + i, sizeof (__m128i))) + abort (); +} Index: gcc/testsuite/gcc.target/i386/aesenc.c =================================================================== --- gcc/testsuite/gcc.target/i386/aesenc.c (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/aesenc.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,68 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aes } */ +/* { dg-options "-O2 -maes" } */ + +#include +#include + +#include "aes-check.h" + +extern void abort (void); + +#define NUM 1024 + +static __m128i src1[NUM]; +static __m128i src2[NUM]; +static __m128i edst[NUM]; + +static __m128i resdst[NUM]; + +/* Initialize input/output vectors. (Currently, there is only one set + of input/output vectors). */ + +static void +init_data (__m128i *s1, __m128i *s2, __m128i *d) +{ + int i; + for (i = 0; i < NUM; i++) + { + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, + 0x73745665, 0x7b5b5465); + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, + 0x68617929, 0x48692853); + d[i] = _mm_setr_epi32 (0xded7e595, 0x8b104b58, + 0x9fdba3c5, 0xa8311c2f); + } +} + +static void +aes_test (void) +{ + int i; + + init_data (src1, src2, edst); + + for (i = 0; i < NUM; i += 16) + { + resdst[i] = _mm_aesenc_si128 (src1[i], src2[i]); + resdst[i + 1] = _mm_aesenc_si128 (src1[i + 1], src2[i + 1]); + resdst[i + 2] = _mm_aesenc_si128 (src1[i + 2], src2[i + 2]); + resdst[i + 3] = _mm_aesenc_si128 (src1[i + 3], src2[i + 3]); + resdst[i + 4] = _mm_aesenc_si128 (src1[i + 4], src2[i + 4]); + resdst[i + 5] = _mm_aesenc_si128 (src1[i + 5], src2[i + 5]); + resdst[i + 6] = _mm_aesenc_si128 (src1[i + 6], src2[i + 6]); + resdst[i + 7] = _mm_aesenc_si128 (src1[i + 7], src2[i + 7]); + resdst[i + 8] = _mm_aesenc_si128 (src1[i + 8], src2[i + 8]); + resdst[i + 9] = _mm_aesenc_si128 (src1[i + 9], src2[i + 9]); + resdst[i + 10] = _mm_aesenc_si128 (src1[i + 10], src2[i + 10]); + resdst[i + 11] = _mm_aesenc_si128 (src1[i + 11], src2[i + 11]); + resdst[i + 12] = _mm_aesenc_si128 (src1[i + 12], src2[i + 12]); + resdst[i + 13] = _mm_aesenc_si128 (src1[i + 13], src2[i + 13]); + resdst[i + 14] = _mm_aesenc_si128 (src1[i + 14], src2[i + 14]); + resdst[i + 15] = _mm_aesenc_si128 (src1[i + 15], src2[i + 15]); + } + + for (i = 0; i < NUM; i++) + if (memcmp (edst + i, resdst + i, sizeof (__m128i))) + abort (); +} Index: gcc/testsuite/gcc.target/i386/sse-13.c =================================================================== --- gcc/testsuite/gcc.target/i386/sse-13.c (.../gcc-4_3-branch) (revision 145062) +++ gcc/testsuite/gcc.target/i386/sse-13.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -1,8 +1,8 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5" } */ +/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */ /* Test that the intrinsics compile with optimization. All of them are - defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h + defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h that reference the proper builtin functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -15,6 +15,10 @@ #define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1) #define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1) +/* wmmintrin.h */ +#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1) +#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1) + /* smmintrin.h */ #define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1) #define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1) @@ -92,6 +96,7 @@ #define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1) #define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1) +#include #include #include #include Index: gcc/testsuite/gcc.target/i386/aesdec.c =================================================================== --- gcc/testsuite/gcc.target/i386/aesdec.c (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/gcc.target/i386/aesdec.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,67 @@ +/* { dg-do run } */ +/* { dg-require-effective-target aes } */ +/* { dg-options "-O2 -maes" } */ + +#include +#include + +#include "aes-check.h" + +extern void abort (void); + +#define NUM 1024 + +static __m128i src1[NUM]; +static __m128i src2[NUM]; +static __m128i edst[NUM]; + +static __m128i resdst[NUM]; + +/* Initialize input/output vectors. (Currently, there is only one set + of input/output vectors). */ +static void +init_data (__m128i *s1, __m128i *s2, __m128i *d) +{ + int i; + for (i = 0; i < NUM; i++) + { + s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72, + 0x73745665, 0x7b5b5465); + s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565, + 0x68617929, 0x48692853); + d[i] = _mm_setr_epi32 (0xb730392a, 0xb58eb95e, + 0xfaea2787, 0x138ac342); + } +} + +static void +aes_test (void) +{ + int i; + + init_data (src1, src2, edst); + + for (i = 0; i < NUM; i += 16) + { + resdst[i] = _mm_aesdec_si128 (src1[i], src2[i]); + resdst[i + 1] = _mm_aesdec_si128 (src1[i + 1], src2[i + 1]); + resdst[i + 2] = _mm_aesdec_si128 (src1[i + 2], src2[i + 2]); + resdst[i + 3] = _mm_aesdec_si128 (src1[i + 3], src2[i + 3]); + resdst[i + 4] = _mm_aesdec_si128 (src1[i + 4], src2[i + 4]); + resdst[i + 5] = _mm_aesdec_si128 (src1[i + 5], src2[i + 5]); + resdst[i + 6] = _mm_aesdec_si128 (src1[i + 6], src2[i + 6]); + resdst[i + 7] = _mm_aesdec_si128 (src1[i + 7], src2[i + 7]); + resdst[i + 8] = _mm_aesdec_si128 (src1[i + 8], src2[i + 8]); + resdst[i + 9] = _mm_aesdec_si128 (src1[i + 9], src2[i + 9]); + resdst[i + 10] = _mm_aesdec_si128 (src1[i + 10], src2[i + 10]); + resdst[i + 11] = _mm_aesdec_si128 (src1[i + 11], src2[i + 11]); + resdst[i + 12] = _mm_aesdec_si128 (src1[i + 12], src2[i + 12]); + resdst[i + 13] = _mm_aesdec_si128 (src1[i + 13], src2[i + 13]); + resdst[i + 14] = _mm_aesdec_si128 (src1[i + 14], src2[i + 14]); + resdst[i + 15] = _mm_aesdec_si128 (src1[i + 15], src2[i + 15]); + } + + for (i = 0; i < NUM; i++) + if (memcmp (edst + i, resdst + i, sizeof (__m128i))) + abort (); +} Index: gcc/testsuite/ChangeLog.ix86 =================================================================== --- gcc/testsuite/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0) +++ gcc/testsuite/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,22 @@ +2008-04-08 H.J. Lu + + Backport from mainline: + 2008-04-04 H.J. Lu + + * g++.dg/other/i386-2.C: Include . + * g++.dg/other/i386-3.C: Likewise. + * gcc.target/i386/sse-13.c: Likewise. + * gcc.target/i386/sse-14.c: Likewise. + + * gcc.target/i386/aes-check.h: New. + * gcc.target/i386/aesdec.c: Likewise. + * gcc.target/i386/aesdeclast.c: Likewise. + * gcc.target/i386/aesenc.c: Likewise. + * gcc.target/i386/aesenclast.c: Likewise. + * gcc.target/i386/aesimc.c: Likewise. + * gcc.target/i386/aeskeygenassist.c: Likewise. + * gcc.target/i386/pclmulqdq.c: Likewise. + * gcc.target/i386/pclmul-check.h: Likewise. + + * gcc.target/i386/i386.exp (check_effective_target_aes): New. + (check_effective_target_pclmul): Likewise. Index: gcc/testsuite/g++.dg/other/i386-2.C =================================================================== --- gcc/testsuite/g++.dg/other/i386-2.C (.../gcc-4_3-branch) (revision 145062) +++ gcc/testsuite/g++.dg/other/i386-2.C (.../ix86/gcc-4_3-branch) (revision 145364) @@ -1,8 +1,9 @@ -/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are +/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are usable with -O -pedantic-errors. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */ +#include #include #include #include Index: gcc/testsuite/g++.dg/other/i386-3.C =================================================================== --- gcc/testsuite/g++.dg/other/i386-3.C (.../gcc-4_3-branch) (revision 145062) +++ gcc/testsuite/g++.dg/other/i386-3.C (.../ix86/gcc-4_3-branch) (revision 145364) @@ -1,8 +1,9 @@ -/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are +/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are usable with -O -fkeep-inline-functions. */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -msse4 -msse5" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -maes -mpclmul -msse4 -msse5" } */ +#include #include #include #include Index: gcc/ChangeLog.ix86 =================================================================== --- gcc/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0) +++ gcc/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,59 @@ +2008-04-08 H.J. Lu + + Backport from mainline: + 2008-04-04 H.J. Lu + + * config.gcc (extra_headers): Add wmmintrin.h for x86 and x86-64. + + * config/i386/cpuid.h (bit_AES): New. + (bit_PCLMUL): Likewise. + + * config/i386/i386.c (pta_flags): Add PTA_AES and PTA_PCLMUL. + (override_options): Handle PTA_AES and PTA_PCLMUL. Enable + SSE2 if AES or PCLMUL is enabled. + (ix86_builtins): Add IX86_BUILTIN_AESENC128, + IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128, + IX86_BUILTIN_AESDECLAST128, IX86_BUILTIN_AESIMC128, + IX86_BUILTIN_AESKEYGENASSIST128 and IX86_BUILTIN_PCLMULQDQ128. + (bdesc_sse_3arg): Add IX86_BUILTIN_PCLMULQDQ128. + (bdesc_2arg): Add IX86_BUILTIN_AESENC128, + IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128, + IX86_BUILTIN_AESDECLAST128 and IX86_BUILTIN_AESKEYGENASSIST128. + (bdesc_1arg): Add IX86_BUILTIN_AESIMC128. + (ix86_init_mmx_sse_builtins): Define __builtin_ia32_aesenc128, + __builtin_ia32_aesenclast128, __builtin_ia32_aesdec128, + __builtin_ia32_aesdeclast128,__builtin_ia32_aesimc128, + __builtin_ia32_aeskeygenassist128 and + __builtin_ia32_pclmulqdq128. + * config/i386/i386.c (ix86_expand_binop_imm_builtin): New. + (ix86_expand_builtin): Use it for IX86_BUILTIN_PSLLDQI128 and + IX86_BUILTIN_PSRLDQI128. Handle IX86_BUILTIN_AESKEYGENASSIST128. + + * config/i386/i386.h (TARGET_AES): New. + (TARGET_PCLMUL): Likewise. + (TARGET_CPU_CPP_BUILTINS): Handle TARGET_AES and TARGET_PCLMUL. + + * config/i386/i386.md (UNSPEC_AESENC): New. + (UNSPEC_AESENCLAST): Likewise. + (UNSPEC_AESDEC): Likewise. + (UNSPEC_AESDECLAST): Likewise. + (UNSPEC_AESIMC): Likewise. + (UNSPEC_AESKEYGENASSIST): Likewise. + (UNSPEC_PCLMULQDQ): Likewise. + + * config/i386/i386.opt (maes): New. + (mpclmul): Likewise. + + * config/i386/sse.md (aesenc): New pattern. + (aesenclast): Likewise. + (aesdec): Likewise. + (aesdeclast): Likewise. + (aesimc): Likewise. + (aeskeygenassist): Likewise. + (pclmulqdq): Likewise. + + * config/i386/wmmintrin.h: New. + + * doc/extend.texi: Document AES and PCLMUL built-in function. + + * doc/invoke.texi: Document -maes and -mpclmul. Index: gcc/config.gcc =================================================================== --- gcc/config.gcc (.../gcc-4_3-branch) (revision 145062) +++ gcc/config.gcc (.../ix86/gcc-4_3-branch) (revision 145364) @@ -308,13 +308,15 @@ cpu_type=i386 extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h - nmmintrin.h bmmintrin.h mmintrin-common.h" + nmmintrin.h bmmintrin.h mmintrin-common.h + wmmintrin.h" ;; x86_64-*-*) cpu_type=i386 extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h - nmmintrin.h bmmintrin.h mmintrin-common.h" + nmmintrin.h bmmintrin.h mmintrin-common.h + wmmintrin.h" need_64bit_hwint=yes ;; ia64-*-*) Index: gcc/config/i386/i386.h =================================================================== --- gcc/config/i386/i386.h (.../gcc-4_3-branch) (revision 145062) +++ gcc/config/i386/i386.h (.../ix86/gcc-4_3-branch) (revision 145364) @@ -395,6 +395,8 @@ #define TARGET_SAHF x86_sahf #define TARGET_RECIP x86_recip #define TARGET_FUSED_MADD x86_fused_muladd +#define TARGET_AES (TARGET_SSE2 && x86_aes) +#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul) #define ASSEMBLER_DIALECT (ix86_asm_dialect) @@ -683,6 +685,10 @@ builtin_define ("__SSE4_1__"); \ if (TARGET_SSE4_2) \ builtin_define ("__SSE4_2__"); \ + if (TARGET_AES) \ + builtin_define ("__AES__"); \ + if (TARGET_PCLMUL) \ + builtin_define ("__PCLMUL__"); \ if (TARGET_SSE4A) \ builtin_define ("__SSE4A__"); \ if (TARGET_SSE5) \ Index: gcc/config/i386/i386.md =================================================================== --- gcc/config/i386/i386.md (.../gcc-4_3-branch) (revision 145062) +++ gcc/config/i386/i386.md (.../ix86/gcc-4_3-branch) (revision 145364) @@ -189,6 +189,17 @@ (UNSPEC_FRCZ 156) (UNSPEC_CVTPH2PS 157) (UNSPEC_CVTPS2PH 158) + + ; For AES support + (UNSPEC_AESENC 159) + (UNSPEC_AESENCLAST 160) + (UNSPEC_AESDEC 161) + (UNSPEC_AESDECLAST 162) + (UNSPEC_AESIMC 163) + (UNSPEC_AESKEYGENASSIST 164) + + ; For PCLMUL support + (UNSPEC_PCLMUL 165) ]) (define_constants Index: gcc/config/i386/wmmintrin.h =================================================================== --- gcc/config/i386/wmmintrin.h (.../gcc-4_3-branch) (revision 0) +++ gcc/config/i386/wmmintrin.h (.../ix86/gcc-4_3-branch) (revision 145364) @@ -0,0 +1,123 @@ +/* Copyright (C) 2008 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING. If not, write to + the Free Software Foundation, 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* As a special exception, if you include this header file into source + files compiled by GCC, this header file does not by itself cause + the resulting executable to be covered by the GNU General Public + License. This exception does not however invalidate any other + reasons why the executable file might be covered by the GNU General + Public License. */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 10.1. */ + +#ifndef _WMMINTRIN_H_INCLUDED +#define _WMMINTRIN_H_INCLUDED + +/* We need definitions from the SSE2 header file. */ +#include + +#if !defined (__AES__) && !defined (__PCLMUL__) +# error "AES/PCLMUL instructions not enabled" +#else + +/* AES */ + +#ifdef __AES__ +/* Performs 1 round of AES decryption of the first m128i using + the second m128i as a round key. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_aesdec_si128 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y); +} + +/* Performs the last round of AES decryption of the first m128i + using the second m128i as a round key. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_aesdeclast_si128 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X, + (__v2di)__Y); +} + +/* Performs 1 round of AES encryption of the first m128i using + the second m128i as a round key. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_aesenc_si128 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y); +} + +/* Performs the last round of AES encryption of the first m128i + using the second m128i as a round key. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_aesenclast_si128 (__m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y); +} + +/* Performs the InverseMixColumn operation on the source m128i + and stores the result into m128i destination. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_aesimc_si128 (__m128i __X) +{ + return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X); +} + +/* Generates a m128i round key for the input m128i AES cipher key and + byte round constant. The second parameter must be a compile time + constant. */ +#ifdef __OPTIMIZE__ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_aeskeygenassist_si128 (__m128i __X, const int __C) +{ + return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C); +} +#else +#define _mm_aeskeygenassist_si128(X, C) \ + ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \ + (int)(C))) +#endif +#endif /* __AES__ */ + +/* PCLMUL */ + +#ifdef __PCLMUL__ +/* Performs carry-less integer multiplication of 64-bit halves of + 128-bit input operands. The third parameter inducates which 64-bit + haves of the input parameters v1 and v2 should be used. It must be + a compile time constant. */ +#ifdef __OPTIMIZE__ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I) +{ + return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X, + (__v2di)__Y, __I); +} +#else +#define _mm_clmulepi64_si128(X, Y, I) \ + ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \ + (__v2di)(__m128i)(Y), (int)(I))) +#endif +#endif /* __PCLMUL__ */ + +#endif /* __AES__/__PCLMUL__ */ + +#endif /* _WMMINTRIN_H_INCLUDED */ Index: gcc/config/i386/cpuid.h =================================================================== --- gcc/config/i386/cpuid.h (.../gcc-4_3-branch) (revision 145062) +++ gcc/config/i386/cpuid.h (.../ix86/gcc-4_3-branch) (revision 145364) @@ -33,11 +33,13 @@ /* %ecx */ #define bit_SSE3 (1 << 0) +#define bit_PCLMUL (1 << 1) #define bit_SSSE3 (1 << 9) #define bit_CMPXCHG16B (1 << 13) #define bit_SSE4_1 (1 << 19) #define bit_SSE4_2 (1 << 20) #define bit_POPCNT (1 << 23) +#define bit_AES (1 << 25) /* %edx */ #define bit_CMPXCHG8B (1 << 8) Index: gcc/config/i386/sse.md =================================================================== --- gcc/config/i386/sse.md (.../gcc-4_3-branch) (revision 145062) +++ gcc/config/i386/sse.md (.../ix86/gcc-4_3-branch) (revision 145364) @@ -8700,3 +8700,80 @@ } [(set_attr "type" "ssecmp") (set_attr "mode" "TI")]) + +(define_insn "aesenc" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + UNSPEC_AESENC))] + "TARGET_AES" + "aesenc\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "aesenclast" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + UNSPEC_AESENCLAST))] + "TARGET_AES" + "aesenclast\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "aesdec" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + UNSPEC_AESDEC))] + "TARGET_AES" + "aesdec\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "aesdeclast" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm")] + UNSPEC_AESDECLAST))] + "TARGET_AES" + "aesdeclast\t{%2, %0|%0, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "aesimc" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")] + UNSPEC_AESIMC))] + "TARGET_AES" + "aesimc\t{%1, %0|%0, %1}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "aeskeygenassist" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm") + (match_operand:SI 2 "const_0_to_255_operand" "n")] + UNSPEC_AESKEYGENASSIST))] + "TARGET_AES" + "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) + +(define_insn "pclmulqdq" + [(set (match_operand:V2DI 0 "register_operand" "=x") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") + (match_operand:V2DI 2 "nonimmediate_operand" "xm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_PCLMUL))] + "TARGET_PCLMUL" + "pclmulqdq\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "sselog1") + (set_attr "prefix_extra" "1") + (set_attr "mode" "TI")]) Index: gcc/config/i386/i386.opt =================================================================== --- gcc/config/i386/i386.opt (.../gcc-4_3-branch) (revision 145062) +++ gcc/config/i386/i386.opt (.../ix86/gcc-4_3-branch) (revision 145364) @@ -279,3 +279,11 @@ Enable automatic generation of fused floating point multiply-add instructions if the ISA supports such instructions. The -mfused-madd option is on by default. + +maes +Target Report RejectNegative Var(x86_aes) +Support AES built-in functions and code generation + +mpclmul +Target Report RejectNegative Var(x86_pclmul) +Support PCLMUL built-in functions and code generation Index: gcc/config/i386/i386.c =================================================================== --- gcc/config/i386/i386.c (.../gcc-4_3-branch) (revision 145062) +++ gcc/config/i386/i386.c (.../ix86/gcc-4_3-branch) (revision 145364) @@ -2077,7 +2077,9 @@ PTA_NO_SAHF = 1 << 13, PTA_SSE4_1 = 1 << 14, PTA_SSE4_2 = 1 << 15, - PTA_SSE5 = 1 << 16 + PTA_SSE5 = 1 << 16, + PTA_AES = 1 << 17, + PTA_PCLMUL = 1 << 18 }; static struct pta @@ -2384,6 +2386,10 @@ x86_prefetch_sse = true; if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))) x86_sahf = true; + if (processor_alias_table[i].flags & PTA_AES) + x86_aes = true; + if (processor_alias_table[i].flags & PTA_PCLMUL) + x86_pclmul = true; break; } @@ -2427,6 +2433,14 @@ if (i == pta_size) error ("bad value (%s) for -mtune= switch", ix86_tune_string); + /* Enable SSE2 if AES or PCLMUL is enabled. */ + if ((x86_aes || x86_pclmul) + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) + { + ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET; + ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET; + } + ix86_tune_mask = 1u << ix86_tune; for (i = 0; i < X86_TUNE_LAST; ++i) ix86_tune_features[i] &= ix86_tune_mask; @@ -17582,6 +17596,17 @@ IX86_BUILTIN_PCMPGTQ, + /* AES instructions */ + IX86_BUILTIN_AESENC128, + IX86_BUILTIN_AESENCLAST128, + IX86_BUILTIN_AESDEC128, + IX86_BUILTIN_AESDECLAST128, + IX86_BUILTIN_AESIMC128, + IX86_BUILTIN_AESKEYGENASSIST128, + + /* PCLMUL instruction */ + IX86_BUILTIN_PCLMULQDQ128, + /* TFmode support builtins. */ IX86_BUILTIN_INFQ, IX86_BUILTIN_FABSQ, @@ -17937,6 +17962,9 @@ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 }, { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 }, + + /* PCLMUL */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, 0 }, }; static const struct builtin_description bdesc_2arg[] = @@ -18247,6 +18275,13 @@ /* SSE4.2 */ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 }, + + /* AES */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 }, + { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, 0 }, }; static const struct builtin_description bdesc_1arg[] = @@ -18322,6 +18357,9 @@ /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 }, + + /* AES */ + { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 }, }; /* SSE5 */ @@ -19555,6 +19593,25 @@ NULL_TREE); def_builtin_const (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI); + /* AES */ + if (TARGET_AES) + { + /* Define AES built-in functions only if AES is enabled. */ + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); + } + + /* PCLMUL */ + if (TARGET_PCLMUL) + { + /* Define PCLMUL built-in function only if PCLMUL is enabled. */ + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); + } + /* AMDFAM10 SSE4A New built-ins */ def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD); def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS); @@ -19830,6 +19887,44 @@ return target; } +/* Subroutine of ix86_expand_builtin to take care of binop insns + with an immediate. */ + +static rtx +ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp, + rtx target) +{ + rtx pat; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) + { + op0 = copy_to_reg (op0); + op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0); + } + + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + { + error ("the last operand must be an immediate"); + return const0_rtx; + } + + target = gen_reg_rtx (V2DImode); + pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, + V2DImode, 0), + op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + /* Subroutine of ix86_expand_builtin to take care of binop insns. */ static rtx @@ -20926,34 +21021,18 @@ return target; case IX86_BUILTIN_PSLLDQI128: + return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3, + exp, target); + break; + case IX86_BUILTIN_PSRLDQI128: - icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 - : CODE_FOR_sse2_lshrti3); - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - tmode = insn_data[icode].operand[0].mode; - mode1 = insn_data[icode].operand[1].mode; - mode2 = insn_data[icode].operand[2].mode; + return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3, + exp, target); + break; - if (! (*insn_data[icode].operand[1].predicate) (op0, mode1)) - { - op0 = copy_to_reg (op0); - op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0); - } - if (! (*insn_data[icode].operand[2].predicate) (op1, mode2)) - { - error ("shift must be an immediate"); - return const0_rtx; - } - target = gen_reg_rtx (V2DImode); - pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), - op0, op1); - if (! pat) - return 0; - emit_insn (pat); - return target; + case IX86_BUILTIN_AESKEYGENASSIST128: + return ix86_expand_binop_imm_builtin (CODE_FOR_aeskeygenassist, + exp, target); case IX86_BUILTIN_FEMMS: emit_insn (gen_mmx_femms ()); Property changes on: . ___________________________________________________________________ Added: svn:mergeinfo Merged /branches/gcc-4_3-branch:r139021-145062