ffmpeg-gcc4.patch

   1 --- ffmpeg/libavcodec/i386/dsputil_mmx.c.orig   2006-01-12 22:43:17.000000000 +0000
   2 +++ ffmpeg/libavcodec/i386/dsputil_mmx.c        2006-01-30 01:42:21.087254880 +0000
   3 @@ -20,6 +20,7 @@
   4   * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
   5   */
   6
   7 +#include <mmintrin.h>
   8  #include "../dsputil.h"
   9  #include "../simple_idct.h"
  10  #include "../mpegvideo.h"
  11 @@ -617,6 +618,22 @@
  12      );
  13  }
  14
  15 +#if (__GNUC__ >= 4)
  16 +static inline void transpose4x4(uint8_t *dst, uint8_t *src, ptrdiff_t dst_stride, ptrdiff_t src_stride) {
  17 +    __m64 row0 = _mm_cvtsi32_si64(*(unsigned*)(src + (0 * src_stride)));
  18 +    __m64 row1 = _mm_cvtsi32_si64(*(unsigned*)(src + (1 * src_stride)));
  19 +    __m64 row2 = _mm_cvtsi32_si64(*(unsigned*)(src + (2 * src_stride)));
  20 +    __m64 row3 = _mm_cvtsi32_si64(*(unsigned*)(src + (3 * src_stride)));
  21 +    __m64 tmp0 = _mm_unpacklo_pi8(row0, row1);
  22 +    __m64 tmp1 = _mm_unpacklo_pi8(row2, row3);
  23 +    __m64 row01 = _mm_unpacklo_pi16(tmp0, tmp1);
  24 +    __m64 row23 = _mm_unpackhi_pi16(tmp0, tmp1);
  25 +    *((unsigned*)(dst + (0 * dst_stride))) = _mm_cvtsi64_si32(row01);
  26 +    *((unsigned*)(dst + (1 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row01, row01));
  27 +    *((unsigned*)(dst + (2 * dst_stride))) = _mm_cvtsi64_si32(row23);
  28 +    *((unsigned*)(dst + (3 * dst_stride))) = _mm_cvtsi64_si32(_mm_unpackhi_pi32(row23, row23));
  29 +}
  30 +#else
  31  static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
  32      asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
  33          "movd  %4, %%mm0                \n\t"
  34 @@ -645,6 +662,7 @@
  35             "m" (*(uint32_t*)(src + 3*src_stride))
  36      );
  37  }
  38 +#endif
  39
  40  static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
  41      const int strength= ff_h263_loop_filter_strength[qscale];