]>
Commit | Line | Data |
---|---|---|
e881c8b2 PS |
1 | --- ffmpeg/libavcodec/i386/dsputil_mmx.c.orig 2006-01-12 22:43:17.000000000 +0000 |
2 | +++ ffmpeg/libavcodec/i386/dsputil_mmx.c 2006-01-30 01:42:21.087254880 +0000 | |
3 | @@ -20,6 +20,7 @@ | |
4 | * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
5 | */ | |
6 | ||
7 | +#include <mmintrin.h> | |
8 | #include "../dsputil.h" | |
9 | #include "../simple_idct.h" | |
10 | #include "../mpegvideo.h" | |
c083e2e9 | 11 | @@ -617,6 +618,22 @@ |
e881c8b2 PS |
12 | ); |
13 | } | |
14 | ||
66bab21c PS |
15 | +#if (__GNUC__ >= 4) |
16 | +static inline void transpose4x4(uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride) { | |
e881c8b2 PS |
17 | + __m64 row0 = _mm_cvtsi32_si64(*(unsigned*)(src + (0 * src_stride))); |
18 | + __m64 row1 = _mm_cvtsi32_si64(*(unsigned*)(src + (1 * src_stride))); | |
19 | + __m64 row2 = _mm_cvtsi32_si64(*(unsigned*)(src + (2 * src_stride))); | |
20 | + __m64 row3 = _mm_cvtsi32_si64(*(unsigned*)(src + (3 * src_stride))); | |
21 | + __m64 tmp0 = _mm_unpacklo_pi8(row0, row1); | |
22 | + __m64 tmp1 = _mm_unpacklo_pi8(row2, row3); | |
23 | + __m64 row01 = _mm_unpacklo_pi16(tmp0, tmp1); | |
24 | + __m64 row23 = _mm_unpackhi_pi16(tmp0, tmp1); | |
25 | + *((unsigned*)(dst + (0 * dst_stride))) = _mm_cvtsi64_si32(row01); | |
26 | + *((unsigned*)(dst + (1 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row01, row01)); | |
27 | + *((unsigned*)(dst + (2 * dst_stride))) = _mm_cvtsi64_si32(row23); | |
28 | + *((unsigned*)(dst + (3 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row23, row23)); | |
29 | +} | |
30 | +#else | |
59aba68d | 31 | static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ |
e881c8b2 PS |
32 | asm volatile( //FIXME could save 1 instruction if done as 8x4 ... |
33 | "movd %4, %%mm0 \n\t" | |
e881c8b2 PS |
34 | @@ -645,6 +662,7 @@ |
35 | "m" (*(uint32_t*)(src + 3*src_stride)) | |
36 | ); | |
37 | } | |
38 | +#endif | |
39 | ||
40 | static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | |
41 | const int strength= ff_h263_loop_filter_strength[qscale]; |