+diff -uNr valgrind-3.6.0.orig/coregrind/m_main.c valgrind-3.6.0/coregrind/m_main.c
+--- valgrind-3.6.0.orig/coregrind/m_main.c 2010-10-20 22:19:45.000000000 +0200
++++ valgrind-3.6.0/coregrind/m_main.c 2011-01-17 20:38:26.676472616 +0100
+@@ -519,6 +519,8 @@
+ VG_(clo_vex_control).guest_chase_thresh, 0, 99) {}
+ else if VG_BOOL_CLO(arg, "--vex-guest-chase-cond",
+ VG_(clo_vex_control).guest_chase_cond) {}
++ else if VG_BOOL_CLO(arg, "--vex-native-cpuid",
++ VG_(clo_vex_control).iropt_native_cpuid) {}
+
+ else if VG_INT_CLO(arg, "--log-fd", tmp_log_fd) {
+ log_to = VgLogTo_Fd;
+diff -uNr valgrind-3.6.0.orig/VEX/priv/guest_amd64_defs.h valgrind-3.6.0/VEX/priv/guest_amd64_defs.h
+--- valgrind-3.6.0.orig/VEX/priv/guest_amd64_defs.h 2010-10-20 22:19:51.000000000 +0200
++++ valgrind-3.6.0/VEX/priv/guest_amd64_defs.h 2011-01-17 20:38:57.815124615 +0100
+@@ -147,6 +147,7 @@
+ extern void amd64g_dirtyhelper_CPUID_baseline ( VexGuestAMD64State* st );
+ extern void amd64g_dirtyhelper_CPUID_sse3_and_cx16 ( VexGuestAMD64State* st );
+ extern void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st );
++extern void amd64g_dirtyhelper_CPUID_native ( VexGuestAMD64State* st );
+
+ extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
+
+diff -uNr valgrind-3.6.0.orig/VEX/priv/guest_amd64_helpers.c valgrind-3.6.0/VEX/priv/guest_amd64_helpers.c
+--- valgrind-3.6.0.orig/VEX/priv/guest_amd64_helpers.c 2010-10-20 22:19:51.000000000 +0200
++++ valgrind-3.6.0/VEX/priv/guest_amd64_helpers.c 2011-01-17 20:36:00.884903903 +0100
+@@ -2170,6 +2170,20 @@
+ }
+
+
++void amd64g_dirtyhelper_CPUID_native ( VexGuestAMD64State* st )
++{
++# if defined(__x86_64__)
++ __asm__ __volatile__ ("cpuid" : "=a" (st->guest_RAX),
++ "=b" (st->guest_RBX),
++ "=c" (st->guest_RCX),
++ "=d" (st->guest_RDX)
++ : "0" (st->guest_RAX), "2" (st->guest_RCX));
++# else
++/* do nothing */
++# endif
++}
++
++
+ ULong amd64g_calculate_RCR ( ULong arg,
+ ULong rot_amt,
+ ULong rflags_in,
+diff -uNr valgrind-3.6.0.orig/VEX/priv/guest_amd64_toIR.c valgrind-3.6.0/VEX/priv/guest_amd64_toIR.c
+--- valgrind-3.6.0.orig/VEX/priv/guest_amd64_toIR.c 2011-01-17 20:35:34.380376775 +0100
++++ valgrind-3.6.0/VEX/priv/guest_amd64_toIR.c 2011-01-17 20:36:00.891571709 +0100
+@@ -17620,7 +17620,11 @@
+ HChar* fName = NULL;
+ void* fAddr = NULL;
+ if (haveF2orF3(pfx)) goto decode_failure;
+- if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
++ if (vex_control.iropt_native_cpuid) {
++ fName = "amd64g_dirtyhelper_CPUID_native";
++ fAddr = &amd64g_dirtyhelper_CPUID_native;
++ }
++ else if (archinfo->hwcaps == (VEX_HWCAPS_AMD64_SSE3
+ |VEX_HWCAPS_AMD64_CX16)) {
+ //fName = "amd64g_dirtyhelper_CPUID_sse3_and_cx16";
+ //fAddr = &amd64g_dirtyhelper_CPUID_sse3_and_cx16;
+diff -uNr valgrind-3.6.0.orig/VEX/pub/libvex.h valgrind-3.6.0/VEX/pub/libvex.h
+--- valgrind-3.6.0.orig/VEX/pub/libvex.h 2010-10-20 22:19:52.000000000 +0200
++++ valgrind-3.6.0/VEX/pub/libvex.h 2011-01-17 20:41:02.906490947 +0100
+@@ -60,7 +60,6 @@
+ }
+ VexArch;
+
+-
+ /* For a given architecture, these specify extra capabilities beyond
+ the minimum supported (baseline) capabilities. They may be OR'd
+ together, although some combinations don't make sense. (eg, SSE2
+@@ -270,6 +269,8 @@
+ /* EXPERIMENTAL: chase across conditional branches? Not all
+ front ends honour this. Default: NO. */
+ Bool guest_chase_cond;
++ /* For x86 and amd64 allow the use of native cpuid inst */
++ Int iropt_native_cpuid;
+ }
+ VexControl;
+