From e6af3fbcac15a29e5960c129b1b50a499d3258ad Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20R=C4=99korajski?= Date: Wed, 17 May 2006 10:48:07 +0000 Subject: [PATCH] - Fixes for SSE2 code Changed files: liboil-sse2.patch -> 1.1 --- liboil-sse2.patch | 80 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 liboil-sse2.patch diff --git a/liboil-sse2.patch b/liboil-sse2.patch new file mode 100644 index 0000000..4bb8150 --- /dev/null +++ b/liboil-sse2.patch @@ -0,0 +1,80 @@ +--- liboil-0.3.6/liboil/i386/composite_i386.c 2005-08-01 12:07:01.000000000 +0200 ++++ liboil-0.3.8/liboil/i386/composite_i386.c 2006-01-29 03:54:58.000000000 +0100 +@@ -445,8 +563,6 @@ + static void + composite_over_argb_sse2_2 (uint32_t *dest, uint32_t *src, int n) + { +- int end; +- + __asm__ __volatile__ (" pxor %%xmm7, %%xmm7\n" // mm7 = { 0, 0, 0, 0 } + " movl $0x80808080, %%eax\n" + " movd %%eax, %%xmm6\n" // mm6 = { 128, 128, 128, 128 } +@@ -465,7 +581,7 @@ + : + :"eax"); + +- if (n&1 && ((unsigned long)dest & 0xf)) { ++ if (n&1) { + __asm__ __volatile__ ( + " movl (%1), %%eax\n" + " testl $0xff000000, %%eax\n" +@@ -489,20 +605,21 @@ + "1:\n" + " addl $4, %0\n" + " addl $4, %1\n" +- :"+r" (dest), "+r" (src), "+r" (n) ++ :"+r" (dest), "+r" (src) + : + :"eax"); + } +- end = n&1; + n>>=1; + + if (n>0){ + __asm__ __volatile__ ("\n" + "3:\n" ++#if 0 + " movl (%1), %%eax\n" + " orl 4(%1), %%eax\n" + " testl $0xff000000, %%eax\n" + " jz 4f\n" ++#endif + + " movq (%1), %%xmm1\n" + " punpcklbw %%xmm7, %%xmm1\n" +@@ -527,35 +644,6 @@ + : + :"eax"); + } +- if (end) { +- __asm__ __volatile__ ( +- " movl (%1), %%eax\n" +- " testl $0xff000000, %%eax\n" +- " jz 1f\n" +- +- " movd (%1), %%xmm1\n" +- " punpcklbw %%xmm7, %%xmm1\n" +- " pshuflw $0xff, %%xmm1, %%xmm0\n" +- " pxor %%xmm5, %%xmm0\n" +- +- " movd (%0), %%xmm3\n" +- " punpcklbw %%xmm7, %%xmm3\n" +- " pmullw %%xmm0, %%xmm3\n" +- " paddw %%xmm6, %%xmm3\n" +- " pmulhuw %%xmm4, %%xmm3\n" +- +- " paddw %%xmm1, %%xmm3\n" +- " packuswb %%xmm3, %%xmm3\n" +- " movd %%xmm3, (%0)\n" +- +- "1:\n" +- " addl $4, %0\n" +- " addl $4, %1\n" +- :"+r" (dest), "+r" (src), "+r" (n) +- : +- :"eax"); +- } +- + } + OIL_DEFINE_IMPL_FULL (composite_over_argb_sse2_2, composite_over_argb, OIL_IMPL_FLAG_SSE2); + -- 2.44.0