--- /dev/null
+Index: gcc/doc/extend.texi
+===================================================================
+--- gcc/doc/extend.texi (.../gcc-4_3-branch) (revision 145062)
++++ gcc/doc/extend.texi (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -7962,6 +7962,27 @@
+ Generates the @code{popcntq} machine instruction.
+ @end table
+
++The following built-in functions are available when @option{-maes} is
++used. All of them generate the machine instruction that is part of the
++name.
++
++@smallexample
++v2di __builtin_ia32_aesenc128 (v2di, v2di)
++v2di __builtin_ia32_aesenclast128 (v2di, v2di)
++v2di __builtin_ia32_aesdec128 (v2di, v2di)
++v2di __builtin_ia32_aesdeclast128 (v2di, v2di)
++v2di __builtin_ia32_aeskeygenassist128 (v2di, const int)
++v2di __builtin_ia32_aesimc128 (v2di)
++@end smallexample
++
++The following built-in function is available when @option{-mpclmul} is
++used.
++
++@table @code
++@item v2di __builtin_ia32_pclmulqdq128 (v2di, v2di, const int)
++Generates the @code{pclmulqdq} machine instruction.
++@end table
++
+ The following built-in functions are available when @option{-msse4a} is used.
+ All of them generate the machine instruction that is part of the name.
+
+Index: gcc/doc/invoke.texi
+===================================================================
+--- gcc/doc/invoke.texi (.../gcc-4_3-branch) (revision 145062)
++++ gcc/doc/invoke.texi (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -551,6 +551,7 @@
+ -mno-wide-multiply -mrtd -malign-double @gol
+ -mpreferred-stack-boundary=@var{num} -mcld -mcx16 -msahf -mrecip @gol
+ -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 @gol
++-maes -mpclmul @gol
+ -msse4a -m3dnow -mpopcnt -mabm -msse5 @gol
+ -mthreads -mno-align-stringops -minline-all-stringops @gol
+ -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
+@@ -10733,6 +10734,10 @@
+ @itemx -mno-sse4.2
+ @item -msse4
+ @itemx -mno-sse4
++@item -maes
++@itemx -mno-aes
++@item -mpclmul
++@itemx -mno-pclmul
+ @item -msse4a
+ @item -mno-sse4a
+ @item -msse5
+@@ -10750,8 +10755,8 @@
+ @opindex m3dnow
+ @opindex mno-3dnow
+ These switches enable or disable the use of instructions in the MMX,
+-SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4A, SSE5, ABM or 3DNow!@: extended
+-instruction sets.
++SSE, SSE2, SSE3, SSSE3, SSE4.1, AES, PCLMUL, SSE4A, SSE5, ABM or
++3DNow!@: extended instruction sets.
+ These extensions are also available as built-in functions: see
+ @ref{X86 Built-in Functions}, for details of the functions enabled and
+ disabled by these switches.
+Index: gcc/testsuite/gcc.target/i386/sse-14.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/sse-14.c (.../gcc-4_3-branch) (revision 145062)
++++ gcc/testsuite/gcc.target/i386/sse-14.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -1,14 +1,15 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5" } */
++/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
+
+ /* Test that the intrinsics compile without optimization. All of them are
+- defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
++ defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
+ that reference the proper builtin functions. Defining away "extern" and
+ "__inline" results in all of them being compiled as proper functions. */
+
+ #define extern
+ #define __inline
+
++#include <wmmintrin.h>
+ #include <bmmintrin.h>
+ #include <smmintrin.h>
+ #include <mm3dnow.h>
+@@ -44,6 +45,10 @@
+ test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1)
+ test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1)
+
++/* wmmintrin.h */
++test_1 (_mm_aeskeygenassist_si128, __m128i, __m128i, 1)
++test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
++
+ /* smmintrin.h */
+ test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1)
+ test_2 (_mm_blend_ps, __m128, __m128, __m128, 1)
+Index: gcc/testsuite/gcc.target/i386/i386.exp
+===================================================================
+--- gcc/testsuite/gcc.target/i386/i386.exp (.../gcc-4_3-branch) (revision 145062)
++++ gcc/testsuite/gcc.target/i386/i386.exp (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -51,6 +51,34 @@
+ } "-O2 -msse4.1" ]
+ }
+
++# Return 1 if aes instructions can be compiled.
++proc check_effective_target_aes { } {
++ return [check_no_compiler_messages aes object {
++ typedef long long __m128i __attribute__ ((__vector_size__ (16)));
++ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
++
++ __m128i _mm_aesimc_si128 (__m128i __X)
++ {
++ return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
++ }
++ } "-O2 -maes" ]
++}
++
++# Return 1 if pclmul instructions can be compiled.
++proc check_effective_target_pclmul { } {
++ return [check_no_compiler_messages pclmul object {
++ typedef long long __m128i __attribute__ ((__vector_size__ (16)));
++ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
++
++ __m128i pclmulqdq_test (__m128i __X, __m128i __Y)
++ {
++ return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
++ (__v2di)__Y,
++ 1);
++ }
++ } "-O2 -mpclmul" ]
++}
++
+ # Return 1 if sse4a instructions can be compiled.
+ proc check_effective_target_sse4a { } {
+ return [check_no_compiler_messages sse4a object {
+Index: gcc/testsuite/gcc.target/i386/aesdeclast.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/aesdeclast.c (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/aesdeclast.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,69 @@
++/* { dg-do run } */
++/* { dg-require-effective-target aes } */
++/* { dg-options "-O2 -maes" } */
++
++#include <wmmintrin.h>
++#include <string.h>
++
++#include "aes-check.h"
++
++extern void abort (void);
++
++#define NUM 1024
++
++static __m128i src1[NUM];
++static __m128i src2[NUM];
++static __m128i edst[NUM];
++
++static __m128i resdst[NUM];
++
++/* Initialize input/output vectors. (Currently, there is only one set of
++ input/output vectors). */
++
++static void
++init_data (__m128i *s1, __m128i *s2, __m128i *d)
++{
++ int i;
++
++ for (i = 0; i < NUM; i++)
++ {
++ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
++ 0x73745665, 0x7b5b5465);
++ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
++ 0x68617929, 0x48692853);
++ d[i] = _mm_setr_epi32 (0x72a593d0, 0xd410637b,
++ 0x6b317f95, 0xc5a391ef);
++ }
++}
++
++static void
++aes_test (void)
++{
++ int i;
++
++ init_data (src1, src2, edst);
++
++ for (i = 0; i < NUM; i += 16)
++ {
++ resdst[i] = _mm_aesdeclast_si128 (src1[i], src2[i]);
++ resdst[i + 1] = _mm_aesdeclast_si128 (src1[i + 1], src2[i + 1]);
++ resdst[i + 2] = _mm_aesdeclast_si128 (src1[i + 2], src2[i + 2]);
++ resdst[i + 3] = _mm_aesdeclast_si128 (src1[i + 3], src2[i + 3]);
++ resdst[i + 4] = _mm_aesdeclast_si128 (src1[i + 4], src2[i + 4]);
++ resdst[i + 5] = _mm_aesdeclast_si128 (src1[i + 5], src2[i + 5]);
++ resdst[i + 6] = _mm_aesdeclast_si128 (src1[i + 6], src2[i + 6]);
++ resdst[i + 7] = _mm_aesdeclast_si128 (src1[i + 7], src2[i + 7]);
++ resdst[i + 8] = _mm_aesdeclast_si128 (src1[i + 8], src2[i + 8]);
++ resdst[i + 9] = _mm_aesdeclast_si128 (src1[i + 9], src2[i + 9]);
++ resdst[i + 10] = _mm_aesdeclast_si128 (src1[i + 10], src2[i + 10]);
++ resdst[i + 11] = _mm_aesdeclast_si128 (src1[i + 11], src2[i + 11]);
++ resdst[i + 12] = _mm_aesdeclast_si128 (src1[i + 12], src2[i + 12]);
++ resdst[i + 13] = _mm_aesdeclast_si128 (src1[i + 13], src2[i + 13]);
++ resdst[i + 14] = _mm_aesdeclast_si128 (src1[i + 14], src2[i + 14]);
++ resdst[i + 15] = _mm_aesdeclast_si128 (src1[i + 15], src2[i + 15]);
++ }
++
++ for (i = 0; i < NUM; i++)
++ if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
++ abort ();
++}
+Index: gcc/testsuite/gcc.target/i386/pclmulqdq.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/pclmulqdq.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,87 @@
++/* { dg-do run } */
++/* { dg-require-effective-target pclmul } */
++/* { dg-options "-O2 -mpclmul" } */
++
++#include <wmmintrin.h>
++#include <string.h>
++
++#include "pclmul-check.h"
++
++extern void abort (void);
++
++#define NUM 1024
++
++static __m128i s1[NUM];
++static __m128i s2[NUM];
++/* We need this array to generate mem form of inst */
++static __m128i s2m[NUM];
++
++static __m128i e_00[NUM];
++static __m128i e_01[NUM];
++static __m128i e_10[NUM];
++static __m128i e_11[NUM];
++
++static __m128i d_00[NUM];
++static __m128i d_01[NUM];
++static __m128i d_10[NUM];
++static __m128i d_11[NUM];
++
++/* Initialize input/output vectors. (Currently, there is only one set
++ of input/output vectors). */
++static void
++init_data (__m128i *ls1, __m128i *ls2, __m128i *le_00, __m128i *le_01,
++ __m128i *le_10, __m128i *le_11)
++{
++ int i;
++
++ for (i = 0; i < NUM; i++)
++ {
++ ls1[i] = _mm_set_epi32 (0x7B5B5465, 0x73745665,
++ 0x63746F72, 0x5D53475D);
++ ls2[i] = _mm_set_epi32 (0x48692853, 0x68617929,
++ 0x5B477565, 0x726F6E5D);
++ s2m[i] = _mm_set_epi32 (0x48692853, 0x68617929,
++ 0x5B477565, 0x726F6E5D);
++ le_00[i] = _mm_set_epi32 (0x1D4D84C8, 0x5C3440C0,
++ 0x929633D5, 0xD36F0451);
++ le_01[i] = _mm_set_epi32 (0x1A2BF6DB, 0x3A30862F,
++ 0xBABF262D, 0xF4B7D5C9);
++ le_10[i] = _mm_set_epi32 (0x1BD17C8D, 0x556AB5A1,
++ 0x7FA540AC, 0x2A281315);
++ le_11[i] = _mm_set_epi32 (0x1D1E1F2C, 0x592E7C45,
++ 0xD66EE03E, 0x410FD4ED);
++ }
++}
++
++static void
++pclmul_test (void)
++{
++ int i;
++
++ init_data (s1, s2, e_00, e_01, e_10, e_11);
++
++ for (i = 0; i < NUM; i += 2)
++ {
++ d_00[i] = _mm_clmulepi64_si128 (s1[i], s2m[i], 0x00);
++ d_01[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x01);
++ d_10[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x10);
++ d_11[i] = _mm_clmulepi64_si128 (s1[i], s2[i], 0x11);
++
++ d_11[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x11);
++ d_00[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x00);
++ d_10[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2m[i + 1], 0x10);
++ d_01[i + 1] = _mm_clmulepi64_si128 (s1[i + 1], s2[i + 1], 0x01);
++ }
++
++ for (i = 0; i < NUM; i++)
++ {
++ if (memcmp (d_00 + i, e_00 + i, sizeof (__m128i)))
++ abort ();
++ if (memcmp (d_01 + i, e_01 + i, sizeof (__m128i)))
++ abort ();
++ if (memcmp (d_10 + i, e_10 + i, sizeof (__m128i)))
++ abort ();
++ if (memcmp(d_11 + i, e_11 + i, sizeof (__m128i)))
++ abort ();
++ }
++}
+Index: gcc/testsuite/gcc.target/i386/aes-check.h
+===================================================================
+--- gcc/testsuite/gcc.target/i386/aes-check.h (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/aes-check.h (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,30 @@
++#include <stdio.h>
++#include <stdlib.h>
++
++#include "cpuid.h"
++
++static void aes_test (void);
++
++int
++main ()
++{
++ unsigned int eax, ebx, ecx, edx;
++
++ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
++ return 0;
++
++ /* Run AES test only if host has AES support. */
++ if (ecx & bit_AES)
++ {
++ aes_test ();
++#ifdef DEBUG
++ printf ("PASSED\n");
++#endif
++ }
++#ifdef DEBUG
++ else
++ printf ("SKIPPED\n");
++#endif
++
++ return 0;
++}
+Index: gcc/testsuite/gcc.target/i386/pclmul-check.h
+===================================================================
+--- gcc/testsuite/gcc.target/i386/pclmul-check.h (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/pclmul-check.h (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,30 @@
++#include <stdio.h>
++#include <stdlib.h>
++
++#include "cpuid.h"
++
++static void pclmul_test (void);
++
++int
++main ()
++{
++ unsigned int eax, ebx, ecx, edx;
++
++ if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
++ return 0;
++
++ /* Run PCLMULQDQ test only if host has PCLMULQDQ support. */
++ if (ecx & bit_PCLMUL)
++ {
++ pclmul_test ();
++#ifdef DEBUG
++ printf ("PASSED\n");
++#endif
++ }
++#ifdef DEBUG
++ else
++ printf ("SKIPPED\n");
++#endif
++
++ return 0;
++}
+Index: gcc/testsuite/gcc.target/i386/aeskeygenassist.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/aeskeygenassist.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,66 @@
++/* { dg-do run } */
++/* { dg-require-effective-target aes } */
++/* { dg-options "-O2 -maes" } */
++
++#include <wmmintrin.h>
++#include <string.h>
++
++#include "aes-check.h"
++
++extern void abort (void);
++
++#define NUM 1024
++#define IMM8 1
++
++static __m128i src1[NUM];
++static __m128i edst[NUM];
++
++static __m128i resdst[NUM];
++
++/* Initialize input/output vectors. (Currently, there is only one set
++ of input/output vectors). */
++
++static void
++init_data (__m128i *s1, __m128i *d)
++{
++ int i;
++ for (i = 0; i < NUM; i++)
++ {
++ s1[i] = _mm_setr_epi32 (0x16157e2b, 0xa6d2ae28,
++ 0x8815f7ab, 0x3c4fcf09);
++ d[i] = _mm_setr_epi32 (0x24b5e434, 0x3424b5e5,
++ 0xeb848a01, 0x01eb848b);
++ }
++}
++
++static void
++aes_test (void)
++{
++ int i;
++
++ init_data (src1, edst);
++
++ for (i = 0; i < NUM; i += 16)
++ {
++ resdst[i] = _mm_aeskeygenassist_si128 (src1[i], IMM8);
++ resdst[i + 1] = _mm_aeskeygenassist_si128 (src1[i + 1], IMM8);
++ resdst[i + 2] = _mm_aeskeygenassist_si128 (src1[i + 2], IMM8);
++ resdst[i + 3] = _mm_aeskeygenassist_si128 (src1[i + 3], IMM8);
++ resdst[i + 4] = _mm_aeskeygenassist_si128 (src1[i + 4], IMM8);
++ resdst[i + 5] = _mm_aeskeygenassist_si128 (src1[i + 5], IMM8);
++ resdst[i + 6] = _mm_aeskeygenassist_si128 (src1[i + 6], IMM8);
++ resdst[i + 7] = _mm_aeskeygenassist_si128 (src1[i + 7], IMM8);
++ resdst[i + 8] = _mm_aeskeygenassist_si128 (src1[i + 8], IMM8);
++ resdst[i + 9] = _mm_aeskeygenassist_si128 (src1[i + 9], IMM8);
++ resdst[i + 10] = _mm_aeskeygenassist_si128 (src1[i + 10], IMM8);
++ resdst[i + 11] = _mm_aeskeygenassist_si128 (src1[i + 11], IMM8);
++ resdst[i + 12] = _mm_aeskeygenassist_si128 (src1[i + 12], IMM8);
++ resdst[i + 13] = _mm_aeskeygenassist_si128 (src1[i + 13], IMM8);
++ resdst[i + 14] = _mm_aeskeygenassist_si128 (src1[i + 14], IMM8);
++ resdst[i + 15] = _mm_aeskeygenassist_si128 (src1[i + 15], IMM8);
++ }
++
++ for (i = 0; i < NUM; i++)
++ if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
++ abort ();
++}
+Index: gcc/testsuite/gcc.target/i386/aesenclast.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/aesenclast.c (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/aesenclast.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,68 @@
++/* { dg-do run } */
++/* { dg-require-effective-target aes } */
++/* { dg-options "-O2 -maes" } */
++
++#include <wmmintrin.h>
++#include <string.h>
++
++#include "aes-check.h"
++
++extern void abort (void);
++
++#define NUM 1024
++
++static __m128i src1[NUM];
++static __m128i src2[NUM];
++static __m128i edst[NUM];
++
++static __m128i resdst[NUM];
++
++/* Initialize input/output vectors. (Currently, there is only one
++ set of input/output vectors). */
++
++static void
++init_data (__m128i *s1, __m128i *s2, __m128i *d)
++{
++ int i;
++ for (i = 0; i < NUM; i++)
++ {
++ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
++ 0x73745665, 0x7b5b5465);
++ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
++ 0x68617929, 0x48692853);
++ d[i] = _mm_setr_epi32 (0x53fdc611, 0x177ec425,
++ 0x938c5964, 0xc7fb881e);
++ }
++}
++
++static void
++aes_test (void)
++{
++ int i;
++
++ init_data (src1, src2, edst);
++
++ for (i = 0; i < NUM; i += 16)
++ {
++ resdst[i] = _mm_aesenclast_si128 (src1[i], src2[i]);
++ resdst[i + 1] = _mm_aesenclast_si128 (src1[i + 1], src2[i + 1]);
++ resdst[i + 2] = _mm_aesenclast_si128 (src1[i + 2], src2[i + 2]);
++ resdst[i + 3] = _mm_aesenclast_si128 (src1[i + 3], src2[i + 3]);
++ resdst[i + 4] = _mm_aesenclast_si128 (src1[i + 4], src2[i + 4]);
++ resdst[i + 5] = _mm_aesenclast_si128 (src1[i + 5], src2[i + 5]);
++ resdst[i + 6] = _mm_aesenclast_si128 (src1[i + 6], src2[i + 6]);
++ resdst[i + 7] = _mm_aesenclast_si128 (src1[i + 7], src2[i + 7]);
++ resdst[i + 8] = _mm_aesenclast_si128 (src1[i + 8], src2[i + 8]);
++ resdst[i + 9] = _mm_aesenclast_si128 (src1[i + 9], src2[i + 9]);
++ resdst[i + 10] = _mm_aesenclast_si128 (src1[i + 10], src2[i + 10]);
++ resdst[i + 11] = _mm_aesenclast_si128 (src1[i + 11], src2[i + 11]);
++ resdst[i + 12] = _mm_aesenclast_si128 (src1[i + 12], src2[i + 12]);
++ resdst[i + 13] = _mm_aesenclast_si128 (src1[i + 13], src2[i + 13]);
++ resdst[i + 14] = _mm_aesenclast_si128 (src1[i + 14], src2[i + 14]);
++ resdst[i + 15] = _mm_aesenclast_si128 (src1[i + 15], src2[i + 15]);
++ }
++
++ for (i = 0; i < NUM; i++)
++ if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
++ abort ();
++}
+Index: gcc/testsuite/gcc.target/i386/aesimc.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/aesimc.c (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/aesimc.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,66 @@
++/* { dg-do run } */
++/* { dg-require-effective-target aes } */
++/* { dg-options "-O2 -maes" } */
++
++#include <wmmintrin.h>
++#include <string.h>
++
++#include "aes-check.h"
++
++extern void abort (void);
++
++#define NUM 1024
++
++static __m128i src1[NUM];
++static __m128i edst[NUM];
++
++static __m128i resdst[NUM];
++
++/* Initialize input/output vectors. (Currently, there is only one set
++ of input/output vectors). */
++
++static void
++init_data (__m128i *s1, __m128i *d)
++{
++ int i;
++
++ for (i = 0; i < NUM; i++)
++ {
++ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
++ 0x73745665, 0x7b5b5465);
++ d[i] = _mm_setr_epi32 (0x81c3b3e5, 0x2b18330a,
++ 0x44b109c8, 0x627a6f66);
++ }
++}
++
++static void
++aes_test (void)
++{
++ int i;
++
++ init_data (src1, edst);
++
++ for (i = 0; i < NUM; i += 16)
++ {
++ resdst[i] = _mm_aesimc_si128 (src1[i]);
++ resdst[i + 1] = _mm_aesimc_si128 (src1[i + 1]);
++ resdst[i + 2] = _mm_aesimc_si128 (src1[i + 2]);
++ resdst[i + 3] = _mm_aesimc_si128 (src1[i + 3]);
++ resdst[i + 4] = _mm_aesimc_si128 (src1[i + 4]);
++ resdst[i + 5] = _mm_aesimc_si128 (src1[i + 5]);
++ resdst[i + 6] = _mm_aesimc_si128 (src1[i + 6]);
++ resdst[i + 7] = _mm_aesimc_si128 (src1[i + 7]);
++ resdst[i + 8] = _mm_aesimc_si128 (src1[i + 8]);
++ resdst[i + 9] = _mm_aesimc_si128 (src1[i + 9]);
++ resdst[i + 10] = _mm_aesimc_si128 (src1[i + 10]);
++ resdst[i + 11] = _mm_aesimc_si128 (src1[i + 11]);
++ resdst[i + 12] = _mm_aesimc_si128 (src1[i + 12]);
++ resdst[i + 13] = _mm_aesimc_si128 (src1[i + 13]);
++ resdst[i + 14] = _mm_aesimc_si128 (src1[i + 14]);
++ resdst[i + 15] = _mm_aesimc_si128 (src1[i + 15]);
++ }
++
++ for (i = 0; i < NUM; i++)
++ if (memcmp(edst + i, resdst + i, sizeof (__m128i)))
++ abort ();
++}
+Index: gcc/testsuite/gcc.target/i386/aesenc.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/aesenc.c (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/aesenc.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,68 @@
++/* { dg-do run } */
++/* { dg-require-effective-target aes } */
++/* { dg-options "-O2 -maes" } */
++
++#include <wmmintrin.h>
++#include <string.h>
++
++#include "aes-check.h"
++
++extern void abort (void);
++
++#define NUM 1024
++
++static __m128i src1[NUM];
++static __m128i src2[NUM];
++static __m128i edst[NUM];
++
++static __m128i resdst[NUM];
++
++/* Initialize input/output vectors. (Currently, there is only one set
++ of input/output vectors). */
++
++static void
++init_data (__m128i *s1, __m128i *s2, __m128i *d)
++{
++ int i;
++ for (i = 0; i < NUM; i++)
++ {
++ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
++ 0x73745665, 0x7b5b5465);
++ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
++ 0x68617929, 0x48692853);
++ d[i] = _mm_setr_epi32 (0xded7e595, 0x8b104b58,
++ 0x9fdba3c5, 0xa8311c2f);
++ }
++}
++
++static void
++aes_test (void)
++{
++ int i;
++
++ init_data (src1, src2, edst);
++
++ for (i = 0; i < NUM; i += 16)
++ {
++ resdst[i] = _mm_aesenc_si128 (src1[i], src2[i]);
++ resdst[i + 1] = _mm_aesenc_si128 (src1[i + 1], src2[i + 1]);
++ resdst[i + 2] = _mm_aesenc_si128 (src1[i + 2], src2[i + 2]);
++ resdst[i + 3] = _mm_aesenc_si128 (src1[i + 3], src2[i + 3]);
++ resdst[i + 4] = _mm_aesenc_si128 (src1[i + 4], src2[i + 4]);
++ resdst[i + 5] = _mm_aesenc_si128 (src1[i + 5], src2[i + 5]);
++ resdst[i + 6] = _mm_aesenc_si128 (src1[i + 6], src2[i + 6]);
++ resdst[i + 7] = _mm_aesenc_si128 (src1[i + 7], src2[i + 7]);
++ resdst[i + 8] = _mm_aesenc_si128 (src1[i + 8], src2[i + 8]);
++ resdst[i + 9] = _mm_aesenc_si128 (src1[i + 9], src2[i + 9]);
++ resdst[i + 10] = _mm_aesenc_si128 (src1[i + 10], src2[i + 10]);
++ resdst[i + 11] = _mm_aesenc_si128 (src1[i + 11], src2[i + 11]);
++ resdst[i + 12] = _mm_aesenc_si128 (src1[i + 12], src2[i + 12]);
++ resdst[i + 13] = _mm_aesenc_si128 (src1[i + 13], src2[i + 13]);
++ resdst[i + 14] = _mm_aesenc_si128 (src1[i + 14], src2[i + 14]);
++ resdst[i + 15] = _mm_aesenc_si128 (src1[i + 15], src2[i + 15]);
++ }
++
++ for (i = 0; i < NUM; i++)
++ if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
++ abort ();
++}
+Index: gcc/testsuite/gcc.target/i386/sse-13.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/sse-13.c (.../gcc-4_3-branch) (revision 145062)
++++ gcc/testsuite/gcc.target/i386/sse-13.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -1,8 +1,8 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5" } */
++/* { dg-options "-O2 -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
+
+ /* Test that the intrinsics compile with optimization. All of them are
+- defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h
++ defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h
+ that reference the proper builtin functions. Defining away "extern" and
+ "__inline" results in all of them being compiled as proper functions. */
+
+@@ -15,6 +15,10 @@
+ #define __builtin_ia32_extrqi(X, I, L) __builtin_ia32_extrqi(X, 1, 1)
+ #define __builtin_ia32_insertqi(X, Y, I, L) __builtin_ia32_insertqi(X, Y, 1, 1)
+
++/* wmmintrin.h */
++#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
++#define __builtin_ia32_pclmulqdq128(X, Y, I) __builtin_ia32_pclmulqdq128(X, Y, 1)
++
+ /* smmintrin.h */
+ #define __builtin_ia32_pblendw128(X, Y, M) __builtin_ia32_pblendw128 (X, Y, 1)
+ #define __builtin_ia32_blendps(X, Y, M) __builtin_ia32_blendps(X, Y, 1)
+@@ -92,6 +96,7 @@
+ #define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1)
+ #define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1)
+
++#include <wmmintrin.h>
+ #include <bmmintrin.h>
+ #include <smmintrin.h>
+ #include <mm3dnow.h>
+Index: gcc/testsuite/gcc.target/i386/aesdec.c
+===================================================================
+--- gcc/testsuite/gcc.target/i386/aesdec.c (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/gcc.target/i386/aesdec.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,67 @@
++/* { dg-do run } */
++/* { dg-require-effective-target aes } */
++/* { dg-options "-O2 -maes" } */
++
++#include <wmmintrin.h>
++#include <string.h>
++
++#include "aes-check.h"
++
++extern void abort (void);
++
++#define NUM 1024
++
++static __m128i src1[NUM];
++static __m128i src2[NUM];
++static __m128i edst[NUM];
++
++static __m128i resdst[NUM];
++
++/* Initialize input/output vectors. (Currently, there is only one set
++ of input/output vectors). */
++static void
++init_data (__m128i *s1, __m128i *s2, __m128i *d)
++{
++ int i;
++ for (i = 0; i < NUM; i++)
++ {
++ s1[i] = _mm_setr_epi32 (0x5d53475d, 0x63746f72,
++ 0x73745665, 0x7b5b5465);
++ s2[i] = _mm_setr_epi32 (0x726f6e5d, 0x5b477565,
++ 0x68617929, 0x48692853);
++ d[i] = _mm_setr_epi32 (0xb730392a, 0xb58eb95e,
++ 0xfaea2787, 0x138ac342);
++ }
++}
++
++static void
++aes_test (void)
++{
++ int i;
++
++ init_data (src1, src2, edst);
++
++ for (i = 0; i < NUM; i += 16)
++ {
++ resdst[i] = _mm_aesdec_si128 (src1[i], src2[i]);
++ resdst[i + 1] = _mm_aesdec_si128 (src1[i + 1], src2[i + 1]);
++ resdst[i + 2] = _mm_aesdec_si128 (src1[i + 2], src2[i + 2]);
++ resdst[i + 3] = _mm_aesdec_si128 (src1[i + 3], src2[i + 3]);
++ resdst[i + 4] = _mm_aesdec_si128 (src1[i + 4], src2[i + 4]);
++ resdst[i + 5] = _mm_aesdec_si128 (src1[i + 5], src2[i + 5]);
++ resdst[i + 6] = _mm_aesdec_si128 (src1[i + 6], src2[i + 6]);
++ resdst[i + 7] = _mm_aesdec_si128 (src1[i + 7], src2[i + 7]);
++ resdst[i + 8] = _mm_aesdec_si128 (src1[i + 8], src2[i + 8]);
++ resdst[i + 9] = _mm_aesdec_si128 (src1[i + 9], src2[i + 9]);
++ resdst[i + 10] = _mm_aesdec_si128 (src1[i + 10], src2[i + 10]);
++ resdst[i + 11] = _mm_aesdec_si128 (src1[i + 11], src2[i + 11]);
++ resdst[i + 12] = _mm_aesdec_si128 (src1[i + 12], src2[i + 12]);
++ resdst[i + 13] = _mm_aesdec_si128 (src1[i + 13], src2[i + 13]);
++ resdst[i + 14] = _mm_aesdec_si128 (src1[i + 14], src2[i + 14]);
++ resdst[i + 15] = _mm_aesdec_si128 (src1[i + 15], src2[i + 15]);
++ }
++
++ for (i = 0; i < NUM; i++)
++ if (memcmp (edst + i, resdst + i, sizeof (__m128i)))
++ abort ();
++}
+Index: gcc/testsuite/ChangeLog.ix86
+===================================================================
+--- gcc/testsuite/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0)
++++ gcc/testsuite/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,22 @@
++2008-04-08 H.J. Lu <hongjiu.lu@intel.com>
++
++ Backport from mainline:
++ 2008-04-04 H.J. Lu <hongjiu.lu@intel.com>
++
++ * g++.dg/other/i386-2.C: Include <wmmintrin.h>.
++ * g++.dg/other/i386-3.C: Likewise.
++ * gcc.target/i386/sse-13.c: Likewise.
++ * gcc.target/i386/sse-14.c: Likewise.
++
++ * gcc.target/i386/aes-check.h: New.
++ * gcc.target/i386/aesdec.c: Likewise.
++ * gcc.target/i386/aesdeclast.c: Likewise.
++ * gcc.target/i386/aesenc.c: Likewise.
++ * gcc.target/i386/aesenclast.c: Likewise.
++ * gcc.target/i386/aesimc.c: Likewise.
++ * gcc.target/i386/aeskeygenassist.c: Likewise.
++ * gcc.target/i386/pclmulqdq.c: Likewise.
++ * gcc.target/i386/pclmul-check.h: Likewise.
++
++ * gcc.target/i386/i386.exp (check_effective_target_aes): New.
++ (check_effective_target_pclmul): Likewise.
+Index: gcc/testsuite/g++.dg/other/i386-2.C
+===================================================================
+--- gcc/testsuite/g++.dg/other/i386-2.C (.../gcc-4_3-branch) (revision 145062)
++++ gcc/testsuite/g++.dg/other/i386-2.C (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -1,8 +1,9 @@
+-/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
++/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
+ usable with -O -pedantic-errors. */
+ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+-/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5" } */
++/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -msse4 -msse5 -maes -mpclmul" } */
+
++#include <wmmintrin.h>
+ #include <bmmintrin.h>
+ #include <smmintrin.h>
+ #include <mm3dnow.h>
+Index: gcc/testsuite/g++.dg/other/i386-3.C
+===================================================================
+--- gcc/testsuite/g++.dg/other/i386-3.C (.../gcc-4_3-branch) (revision 145062)
++++ gcc/testsuite/g++.dg/other/i386-3.C (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -1,8 +1,9 @@
+-/* Test that {,x,e,p,t,s,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
++/* Test that {,x,e,p,t,s,w,a,b}mmintrin.h, mm3dnow.h and mm_malloc.h are
+ usable with -O -fkeep-inline-functions. */
+ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+-/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -msse4 -msse5" } */
++/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -maes -mpclmul -msse4 -msse5" } */
+
++#include <wmmintrin.h>
+ #include <bmmintrin.h>
+ #include <smmintrin.h>
+ #include <mm3dnow.h>
+Index: gcc/ChangeLog.ix86
+===================================================================
+--- gcc/ChangeLog.ix86 (.../gcc-4_3-branch) (revision 0)
++++ gcc/ChangeLog.ix86 (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,59 @@
++2008-04-08 H.J. Lu <hongjiu.lu@intel.com>
++
++ Backport from mainline:
++ 2008-04-04 H.J. Lu <hongjiu.lu@intel.com>
++
++ * config.gcc (extra_headers): Add wmmintrin.h for x86 and x86-64.
++
++ * config/i386/cpuid.h (bit_AES): New.
++ (bit_PCLMUL): Likewise.
++
++ * config/i386/i386.c (pta_flags): Add PTA_AES and PTA_PCLMUL.
++ (override_options): Handle PTA_AES and PTA_PCLMUL. Enable
++ SSE2 if AES or PCLMUL is enabled.
++ (ix86_builtins): Add IX86_BUILTIN_AESENC128,
++ IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128,
++ IX86_BUILTIN_AESDECLAST128, IX86_BUILTIN_AESIMC128,
++ IX86_BUILTIN_AESKEYGENASSIST128 and IX86_BUILTIN_PCLMULQDQ128.
++ (bdesc_sse_3arg): Add IX86_BUILTIN_PCLMULQDQ128.
++ (bdesc_2arg): Add IX86_BUILTIN_AESENC128,
++ IX86_BUILTIN_AESENCLAST128, IX86_BUILTIN_AESDEC128,
++ IX86_BUILTIN_AESDECLAST128 and IX86_BUILTIN_AESKEYGENASSIST128.
++ (bdesc_1arg): Add IX86_BUILTIN_AESIMC128.
++ (ix86_init_mmx_sse_builtins): Define __builtin_ia32_aesenc128,
++ __builtin_ia32_aesenclast128, __builtin_ia32_aesdec128,
++ __builtin_ia32_aesdeclast128,__builtin_ia32_aesimc128,
++ __builtin_ia32_aeskeygenassist128 and
++ __builtin_ia32_pclmulqdq128.
++ * config/i386/i386.c (ix86_expand_binop_imm_builtin): New.
++ (ix86_expand_builtin): Use it for IX86_BUILTIN_PSLLDQI128 and
++ IX86_BUILTIN_PSRLDQI128. Handle IX86_BUILTIN_AESKEYGENASSIST128.
++
++ * config/i386/i386.h (TARGET_AES): New.
++ (TARGET_PCLMUL): Likewise.
++ (TARGET_CPU_CPP_BUILTINS): Handle TARGET_AES and TARGET_PCLMUL.
++
++ * config/i386/i386.md (UNSPEC_AESENC): New.
++ (UNSPEC_AESENCLAST): Likewise.
++ (UNSPEC_AESDEC): Likewise.
++ (UNSPEC_AESDECLAST): Likewise.
++ (UNSPEC_AESIMC): Likewise.
++ (UNSPEC_AESKEYGENASSIST): Likewise.
++ (UNSPEC_PCLMULQDQ): Likewise.
++
++ * config/i386/i386.opt (maes): New.
++ (mpclmul): Likewise.
++
++ * config/i386/sse.md (aesenc): New pattern.
++ (aesenclast): Likewise.
++ (aesdec): Likewise.
++ (aesdeclast): Likewise.
++ (aesimc): Likewise.
++ (aeskeygenassist): Likewise.
++ (pclmulqdq): Likewise.
++
++ * config/i386/wmmintrin.h: New.
++
++ * doc/extend.texi: Document AES and PCLMUL built-in function.
++
++ * doc/invoke.texi: Document -maes and -mpclmul.
+Index: gcc/config.gcc
+===================================================================
+--- gcc/config.gcc (.../gcc-4_3-branch) (revision 145062)
++++ gcc/config.gcc (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -308,13 +308,15 @@
+ cpu_type=i386
+ extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
+ pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
+- nmmintrin.h bmmintrin.h mmintrin-common.h"
++ nmmintrin.h bmmintrin.h mmintrin-common.h
++ wmmintrin.h"
+ ;;
+ x86_64-*-*)
+ cpu_type=i386
+ extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
+ pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
+- nmmintrin.h bmmintrin.h mmintrin-common.h"
++ nmmintrin.h bmmintrin.h mmintrin-common.h
++ wmmintrin.h"
+ need_64bit_hwint=yes
+ ;;
+ ia64-*-*)
+Index: gcc/config/i386/i386.h
+===================================================================
+--- gcc/config/i386/i386.h (.../gcc-4_3-branch) (revision 145062)
++++ gcc/config/i386/i386.h (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -395,6 +395,8 @@
+ #define TARGET_SAHF x86_sahf
+ #define TARGET_RECIP x86_recip
+ #define TARGET_FUSED_MADD x86_fused_muladd
++#define TARGET_AES (TARGET_SSE2 && x86_aes)
++#define TARGET_PCLMUL (TARGET_SSE2 && x86_pclmul)
+
+ #define ASSEMBLER_DIALECT (ix86_asm_dialect)
+
+@@ -683,6 +685,10 @@
+ builtin_define ("__SSE4_1__"); \
+ if (TARGET_SSE4_2) \
+ builtin_define ("__SSE4_2__"); \
++ if (TARGET_AES) \
++ builtin_define ("__AES__"); \
++ if (TARGET_PCLMUL) \
++ builtin_define ("__PCLMUL__"); \
+ if (TARGET_SSE4A) \
+ builtin_define ("__SSE4A__"); \
+ if (TARGET_SSE5) \
+Index: gcc/config/i386/i386.md
+===================================================================
+--- gcc/config/i386/i386.md (.../gcc-4_3-branch) (revision 145062)
++++ gcc/config/i386/i386.md (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -189,6 +189,17 @@
+ (UNSPEC_FRCZ 156)
+ (UNSPEC_CVTPH2PS 157)
+ (UNSPEC_CVTPS2PH 158)
++
++ ; For AES support
++ (UNSPEC_AESENC 159)
++ (UNSPEC_AESENCLAST 160)
++ (UNSPEC_AESDEC 161)
++ (UNSPEC_AESDECLAST 162)
++ (UNSPEC_AESIMC 163)
++ (UNSPEC_AESKEYGENASSIST 164)
++
++ ; For PCLMUL support
++ (UNSPEC_PCLMUL 165)
+ ])
+
+ (define_constants
+Index: gcc/config/i386/wmmintrin.h
+===================================================================
+--- gcc/config/i386/wmmintrin.h (.../gcc-4_3-branch) (revision 0)
++++ gcc/config/i386/wmmintrin.h (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -0,0 +1,123 @@
++/* Copyright (C) 2008 Free Software Foundation, Inc.
++
++ This file is part of GCC.
++
++ GCC is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; either version 2, or (at your option)
++ any later version.
++
++ GCC is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with GCC; see the file COPYING. If not, write to
++ the Free Software Foundation, 59 Temple Place - Suite 330,
++ Boston, MA 02111-1307, USA. */
++
++/* As a special exception, if you include this header file into source
++ files compiled by GCC, this header file does not by itself cause
++ the resulting executable to be covered by the GNU General Public
++ License. This exception does not however invalidate any other
++ reasons why the executable file might be covered by the GNU General
++ Public License. */
++
++/* Implemented from the specification included in the Intel C++ Compiler
++ User Guide and Reference, version 10.1. */
++
++#ifndef _WMMINTRIN_H_INCLUDED
++#define _WMMINTRIN_H_INCLUDED
++
++/* We need definitions from the SSE2 header file. */
++#include <emmintrin.h>
++
++#if !defined (__AES__) && !defined (__PCLMUL__)
++# error "AES/PCLMUL instructions not enabled"
++#else
++
++/* AES */
++
++#ifdef __AES__
++/* Performs 1 round of AES decryption of the first m128i using
++ the second m128i as a round key. */
++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_mm_aesdec_si128 (__m128i __X, __m128i __Y)
++{
++ return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
++}
++
++/* Performs the last round of AES decryption of the first m128i
++ using the second m128i as a round key. */
++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
++{
++ return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
++ (__v2di)__Y);
++}
++
++/* Performs 1 round of AES encryption of the first m128i using
++ the second m128i as a round key. */
++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_mm_aesenc_si128 (__m128i __X, __m128i __Y)
++{
++ return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
++}
++
++/* Performs the last round of AES encryption of the first m128i
++ using the second m128i as a round key. */
++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
++{
++ return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
++}
++
++/* Performs the InverseMixColumn operation on the source m128i
++ and stores the result into m128i destination. */
++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_mm_aesimc_si128 (__m128i __X)
++{
++ return (__m128i) __builtin_ia32_aesimc128 ((__v2di)__X);
++}
++
++/* Generates a m128i round key for the input m128i AES cipher key and
++ byte round constant. The second parameter must be a compile time
++ constant. */
++#ifdef __OPTIMIZE__
++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_mm_aeskeygenassist_si128 (__m128i __X, const int __C)
++{
++ return (__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)__X, __C);
++}
++#else
++#define _mm_aeskeygenassist_si128(X, C) \
++ ((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \
++ (int)(C)))
++#endif
++#endif /* __AES__ */
++
++/* PCLMUL */
++
++#ifdef __PCLMUL__
++/* Performs carry-less integer multiplication of 64-bit halves of
++ 128-bit input operands. The third parameter inducates which 64-bit
++ haves of the input parameters v1 and v2 should be used. It must be
++ a compile time constant. */
++#ifdef __OPTIMIZE__
++extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
++_mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
++{
++ return (__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)__X,
++ (__v2di)__Y, __I);
++}
++#else
++#define _mm_clmulepi64_si128(X, Y, I) \
++ ((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \
++ (__v2di)(__m128i)(Y), (int)(I)))
++#endif
++#endif /* __PCLMUL__ */
++
++#endif /* __AES__/__PCLMUL__ */
++
++#endif /* _WMMINTRIN_H_INCLUDED */
+Index: gcc/config/i386/cpuid.h
+===================================================================
+--- gcc/config/i386/cpuid.h (.../gcc-4_3-branch) (revision 145062)
++++ gcc/config/i386/cpuid.h (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -33,11 +33,13 @@
+
+ /* %ecx */
+ #define bit_SSE3 (1 << 0)
++#define bit_PCLMUL (1 << 1)
+ #define bit_SSSE3 (1 << 9)
+ #define bit_CMPXCHG16B (1 << 13)
+ #define bit_SSE4_1 (1 << 19)
+ #define bit_SSE4_2 (1 << 20)
+ #define bit_POPCNT (1 << 23)
++#define bit_AES (1 << 25)
+
+ /* %edx */
+ #define bit_CMPXCHG8B (1 << 8)
+Index: gcc/config/i386/sse.md
+===================================================================
+--- gcc/config/i386/sse.md (.../gcc-4_3-branch) (revision 145062)
++++ gcc/config/i386/sse.md (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -8700,3 +8700,80 @@
+ }
+ [(set_attr "type" "ssecmp")
+ (set_attr "mode" "TI")])
++
++(define_insn "aesenc"
++ [(set (match_operand:V2DI 0 "register_operand" "=x")
++ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
++ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
++ UNSPEC_AESENC))]
++ "TARGET_AES"
++ "aesenc\t{%2, %0|%0, %2}"
++ [(set_attr "type" "sselog1")
++ (set_attr "prefix_extra" "1")
++ (set_attr "mode" "TI")])
++
++(define_insn "aesenclast"
++ [(set (match_operand:V2DI 0 "register_operand" "=x")
++ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
++ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
++ UNSPEC_AESENCLAST))]
++ "TARGET_AES"
++ "aesenclast\t{%2, %0|%0, %2}"
++ [(set_attr "type" "sselog1")
++ (set_attr "prefix_extra" "1")
++ (set_attr "mode" "TI")])
++
++(define_insn "aesdec"
++ [(set (match_operand:V2DI 0 "register_operand" "=x")
++ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
++ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
++ UNSPEC_AESDEC))]
++ "TARGET_AES"
++ "aesdec\t{%2, %0|%0, %2}"
++ [(set_attr "type" "sselog1")
++ (set_attr "prefix_extra" "1")
++ (set_attr "mode" "TI")])
++
++(define_insn "aesdeclast"
++ [(set (match_operand:V2DI 0 "register_operand" "=x")
++ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
++ (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
++ UNSPEC_AESDECLAST))]
++ "TARGET_AES"
++ "aesdeclast\t{%2, %0|%0, %2}"
++ [(set_attr "type" "sselog1")
++ (set_attr "prefix_extra" "1")
++ (set_attr "mode" "TI")])
++
++(define_insn "aesimc"
++ [(set (match_operand:V2DI 0 "register_operand" "=x")
++ (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
++ UNSPEC_AESIMC))]
++ "TARGET_AES"
++ "aesimc\t{%1, %0|%0, %1}"
++ [(set_attr "type" "sselog1")
++ (set_attr "prefix_extra" "1")
++ (set_attr "mode" "TI")])
++
++(define_insn "aeskeygenassist"
++ [(set (match_operand:V2DI 0 "register_operand" "=x")
++ (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
++ (match_operand:SI 2 "const_0_to_255_operand" "n")]
++ UNSPEC_AESKEYGENASSIST))]
++ "TARGET_AES"
++ "aeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
++ [(set_attr "type" "sselog1")
++ (set_attr "prefix_extra" "1")
++ (set_attr "mode" "TI")])
++
++(define_insn "pclmulqdq"
++ [(set (match_operand:V2DI 0 "register_operand" "=x")
++ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
++ (match_operand:V2DI 2 "nonimmediate_operand" "xm")
++ (match_operand:SI 3 "const_0_to_255_operand" "n")]
++ UNSPEC_PCLMUL))]
++ "TARGET_PCLMUL"
++ "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
++ [(set_attr "type" "sselog1")
++ (set_attr "prefix_extra" "1")
++ (set_attr "mode" "TI")])
+Index: gcc/config/i386/i386.opt
+===================================================================
+--- gcc/config/i386/i386.opt (.../gcc-4_3-branch) (revision 145062)
++++ gcc/config/i386/i386.opt (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -279,3 +279,11 @@
+ Enable automatic generation of fused floating point multiply-add instructions
+ if the ISA supports such instructions. The -mfused-madd option is on by
+ default.
++
++maes
++Target Report RejectNegative Var(x86_aes)
++Support AES built-in functions and code generation
++
++mpclmul
++Target Report RejectNegative Var(x86_pclmul)
++Support PCLMUL built-in functions and code generation
+Index: gcc/config/i386/i386.c
+===================================================================
+--- gcc/config/i386/i386.c (.../gcc-4_3-branch) (revision 145062)
++++ gcc/config/i386/i386.c (.../ix86/gcc-4_3-branch) (revision 145364)
+@@ -2077,7 +2077,9 @@
+ PTA_NO_SAHF = 1 << 13,
+ PTA_SSE4_1 = 1 << 14,
+ PTA_SSE4_2 = 1 << 15,
+- PTA_SSE5 = 1 << 16
++ PTA_SSE5 = 1 << 16,
++ PTA_AES = 1 << 17,
++ PTA_PCLMUL = 1 << 18
+ };
+
+ static struct pta
+@@ -2384,6 +2386,10 @@
+ x86_prefetch_sse = true;
+ if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
+ x86_sahf = true;
++ if (processor_alias_table[i].flags & PTA_AES)
++ x86_aes = true;
++ if (processor_alias_table[i].flags & PTA_PCLMUL)
++ x86_pclmul = true;
+
+ break;
+ }
+@@ -2427,6 +2433,14 @@
+ if (i == pta_size)
+ error ("bad value (%s) for -mtune= switch", ix86_tune_string);
+
++ /* Enable SSE2 if AES or PCLMUL is enabled. */
++ if ((x86_aes || x86_pclmul)
++ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
++ {
++ ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
++ ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
++ }
++
+ ix86_tune_mask = 1u << ix86_tune;
+ for (i = 0; i < X86_TUNE_LAST; ++i)
+ ix86_tune_features[i] &= ix86_tune_mask;
+@@ -17582,6 +17596,17 @@
+
+ IX86_BUILTIN_PCMPGTQ,
+
++ /* AES instructions */
++ IX86_BUILTIN_AESENC128,
++ IX86_BUILTIN_AESENCLAST128,
++ IX86_BUILTIN_AESDEC128,
++ IX86_BUILTIN_AESDECLAST128,
++ IX86_BUILTIN_AESIMC128,
++ IX86_BUILTIN_AESKEYGENASSIST128,
++
++ /* PCLMUL instruction */
++ IX86_BUILTIN_PCLMULQDQ128,
++
+ /* TFmode support builtins. */
+ IX86_BUILTIN_INFQ,
+ IX86_BUILTIN_FABSQ,
+@@ -17937,6 +17962,9 @@
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
++
++ /* PCLMUL */
++ { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, 0 },
+ };
+
+ static const struct builtin_description bdesc_2arg[] =
+@@ -18247,6 +18275,13 @@
+
+ /* SSE4.2 */
+ { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
++
++ /* AES */
++ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 },
++ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 },
++ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 },
++ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 },
++ { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, 0 },
+ };
+
+ static const struct builtin_description bdesc_1arg[] =
+@@ -18322,6 +18357,9 @@
+ /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
+ { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
++
++ /* AES */
++ { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 },
+ };
+
+ /* SSE5 */
+@@ -19555,6 +19593,25 @@
+ NULL_TREE);
+ def_builtin_const (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
+
++ /* AES */
++ if (TARGET_AES)
++ {
++ /* Define AES built-in functions only if AES is enabled. */
++ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
++ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
++ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
++ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
++ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
++ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
++ }
++
++ /* PCLMUL */
++ if (TARGET_PCLMUL)
++ {
++ /* Define PCLMUL built-in function only if PCLMUL is enabled. */
++ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
++ }
++
+ /* AMDFAM10 SSE4A New built-ins */
+ def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
+ def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
+@@ -19830,6 +19887,44 @@
+ return target;
+ }
+
++/* Subroutine of ix86_expand_builtin to take care of binop insns
++ with an immediate. */
++
++static rtx
++ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp,
++ rtx target)
++{
++ rtx pat;
++ tree arg0 = CALL_EXPR_ARG (exp, 0);
++ tree arg1 = CALL_EXPR_ARG (exp, 1);
++ rtx op0 = expand_normal (arg0);
++ rtx op1 = expand_normal (arg1);
++ enum machine_mode tmode = insn_data[icode].operand[0].mode;
++ enum machine_mode mode0 = insn_data[icode].operand[1].mode;
++ enum machine_mode mode1 = insn_data[icode].operand[2].mode;
++
++ if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
++ {
++ op0 = copy_to_reg (op0);
++ op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
++ }
++
++ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
++ {
++ error ("the last operand must be an immediate");
++ return const0_rtx;
++ }
++
++ target = gen_reg_rtx (V2DImode);
++ pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target,
++ V2DImode, 0),
++ op0, op1);
++ if (! pat)
++ return 0;
++ emit_insn (pat);
++ return target;
++}
++
+ /* Subroutine of ix86_expand_builtin to take care of binop insns. */
+
+ static rtx
+@@ -20926,34 +21021,18 @@
+ return target;
+
+ case IX86_BUILTIN_PSLLDQI128:
++ return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3,
++ exp, target);
++ break;
++
+ case IX86_BUILTIN_PSRLDQI128:
+- icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
+- : CODE_FOR_sse2_lshrti3);
+- arg0 = CALL_EXPR_ARG (exp, 0);
+- arg1 = CALL_EXPR_ARG (exp, 1);
+- op0 = expand_normal (arg0);
+- op1 = expand_normal (arg1);
+- tmode = insn_data[icode].operand[0].mode;
+- mode1 = insn_data[icode].operand[1].mode;
+- mode2 = insn_data[icode].operand[2].mode;
++ return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3,
++ exp, target);
++ break;
+
+- if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+- {
+- op0 = copy_to_reg (op0);
+- op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
+- }
+- if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+- {
+- error ("shift must be an immediate");
+- return const0_rtx;
+- }
+- target = gen_reg_rtx (V2DImode);
+- pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
+- op0, op1);
+- if (! pat)
+- return 0;
+- emit_insn (pat);
+- return target;
++ case IX86_BUILTIN_AESKEYGENASSIST128:
++ return ix86_expand_binop_imm_builtin (CODE_FOR_aeskeygenassist,
++ exp, target);
+
+ case IX86_BUILTIN_FEMMS:
+ emit_insn (gen_mmx_femms ());
+
+Property changes on: .
+___________________________________________________________________
+Added: svn:mergeinfo
+ Merged /branches/gcc-4_3-branch:r139021-145062
+