[packages/ffmpeg.git] / ffmpeg-gcc4.patch

--- ffmpeg/libavcodec/i386/dsputil_mmx.c.orig	2006-01-12 22:43:17.000000000 +0000
+++ ffmpeg/libavcodec/i386/dsputil_mmx.c	2006-01-30 01:42:21.087254880 +0000
@@ -20,6 +20,7 @@
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  */
 
+#include <mmintrin.h>
 #include "../dsputil.h"
 #include "../simple_idct.h"
 #include "../mpegvideo.h"
@@ -617,6 +618,22 @@
     );
 }
 
+#if (__GNUC__ >= 4)
+static inline void transpose4x4(uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride) {
+    __m64 row0 = _mm_cvtsi32_si64(*(unsigned*)(src + (0 * src_stride)));
+    __m64 row1 = _mm_cvtsi32_si64(*(unsigned*)(src + (1 * src_stride)));
+    __m64 row2 = _mm_cvtsi32_si64(*(unsigned*)(src + (2 * src_stride)));
+    __m64 row3 = _mm_cvtsi32_si64(*(unsigned*)(src + (3 * src_stride)));
+    __m64 tmp0 = _mm_unpacklo_pi8(row0, row1);
+    __m64 tmp1 = _mm_unpacklo_pi8(row2, row3);
+    __m64 row01 = _mm_unpacklo_pi16(tmp0, tmp1);
+    __m64 row23 = _mm_unpackhi_pi16(tmp0, tmp1);
+    *((unsigned*)(dst + (0 * dst_stride))) = _mm_cvtsi64_si32(row01);
+    *((unsigned*)(dst + (1 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row01, row01));
+    *((unsigned*)(dst + (2 * dst_stride))) = _mm_cvtsi64_si32(row23);
+    *((unsigned*)(dst + (3 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row23, row23));
+}
+#else
 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
     asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
         "movd  %4, %%mm0                \n\t"
@@ -645,6 +662,7 @@
            "m" (*(uint32_t*)(src + 3*src_stride))
     );
 }
+#endif
 
 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
     const int strength= ff_h263_loop_filter_strength[qscale];
Commit	Line	Data
e881c8b2 PS	1	--- ffmpeg/libavcodec/i386/dsputil_mmx.c.orig 2006-01-12 22:43:17.000000000 +0000
	2	+++ ffmpeg/libavcodec/i386/dsputil_mmx.c 2006-01-30 01:42:21.087254880 +0000
	3	@@ -20,6 +20,7 @@
	4	* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
	5	*/
	6
	7	+#include <mmintrin.h>
	8	#include "../dsputil.h"
	9	#include "../simple_idct.h"
	10	#include "../mpegvideo.h"
c083e2e9	11	@@ -617,6 +618,22 @@
e881c8b2 PS	12	);
	13	}
	14
66bab21c PS	15	+#if (__GNUC__ >= 4)
66bab21c PS	16	+static inline void transpose4x4(uint8_t dst, uint8_t src, ptrdiff_t dst_stride, ptrdiff_t src_stride) {
e881c8b2 PS	17	+ __m64 row0 = _mm_cvtsi32_si64((unsigned)(src + (0 * src_stride)));
	18	+ __m64 row1 = _mm_cvtsi32_si64((unsigned)(src + (1 * src_stride)));
	19	+ __m64 row2 = _mm_cvtsi32_si64((unsigned)(src + (2 * src_stride)));
	20	+ __m64 row3 = _mm_cvtsi32_si64((unsigned)(src + (3 * src_stride)));
	21	+ __m64 tmp0 = _mm_unpacklo_pi8(row0, row1);
	22	+ __m64 tmp1 = _mm_unpacklo_pi8(row2, row3);
	23	+ __m64 row01 = _mm_unpacklo_pi16(tmp0, tmp1);
	24	+ __m64 row23 = _mm_unpackhi_pi16(tmp0, tmp1);
	25	+ ((unsigned)(dst + (0 * dst_stride))) = _mm_cvtsi64_si32(row01);
	26	+ ((unsigned)(dst + (1 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row01, row01));
	27	+ ((unsigned)(dst + (2 * dst_stride))) = _mm_cvtsi64_si32(row23);
	28	+ ((unsigned)(dst + (3 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row23, row23));
	29	+}
	30	+#else
59aba68d	31	static inline void transpose4x4(uint8_t dst, uint8_t src, int dst_stride, int src_stride){
e881c8b2 PS	32	asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
e881c8b2 PS	33	"movd %4, %%mm0 \n\t"
e881c8b2 PS	34	@@ -645,6 +662,7 @@
	35	"m" ((uint32_t)(src + 3*src_stride))
	36	);
	37	}
	38	+#endif
	39
	40	static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
	41	const int strength= ff_h263_loop_filter_strength[qscale];