1 diff -Nur avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/asm.h avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/asm.h
2 --- avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/asm.h 2002-10-16 09:26:12.000000000 +0200
3 +++ avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/asm.h 2003-09-28 17:26:39.000000000 +0200
5 #define AMASK_CIX (1 << 2)
6 #define AMASK_MVI (1 << 8)
8 -inline static uint64_t BYTE_VEC(uint64_t x)
9 +static inline uint64_t BYTE_VEC(uint64_t x)
16 -inline static uint64_t WORD_VEC(uint64_t x)
17 +static inline uint64_t WORD_VEC(uint64_t x)
22 #define sextw(x) ((int16_t) (x))
25 -#define ASM_ACCEPT_MVI asm (".arch pca56")
26 struct unaligned_long { uint64_t l; } __attribute__((packed));
27 #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
28 #define uldq(a) (((const struct unaligned_long *) (a))->l)
31 -/* Unfortunately, __builtin_prefetch is slightly buggy on Alpha. The
32 - defines here are kludged so we still get the right
33 - instruction. This needs to be adapted as soon as gcc is fixed. */
34 -# define prefetch(p) __builtin_prefetch((p), 0, 1)
35 -# define prefetch_en(p) __builtin_prefetch((p), 1, 1)
36 -# define prefetch_m(p) __builtin_prefetch((p), 0, 0)
37 -# define prefetch_men(p) __builtin_prefetch((p), 1, 0)
39 -# define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
40 -# define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
41 -# define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
42 -# define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
46 +#define prefetch(p) __builtin_prefetch((p), 0, 1)
47 +#define prefetch_en(p) __builtin_prefetch((p), 0, 0)
48 +#define prefetch_m(p) __builtin_prefetch((p), 1, 1)
49 +#define prefetch_men(p) __builtin_prefetch((p), 1, 0)
50 #define cmpbge __builtin_alpha_cmpbge
52 #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b))
54 #define amask __builtin_alpha_amask
55 #define implver __builtin_alpha_implver
56 #define rpcc __builtin_alpha_rpcc
58 +#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory")
59 +#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory")
60 +#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
61 +#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
62 +#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
63 +#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
64 +#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
65 +#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
66 +#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
67 +#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
68 +#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
69 +#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
70 +#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
72 +#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
74 +#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
75 #define minub8 __builtin_alpha_minub8
76 #define minsb8 __builtin_alpha_minsb8
77 #define minuw4 __builtin_alpha_minuw4
79 #define unpkbl __builtin_alpha_unpkbl
80 #define unpkbw __builtin_alpha_unpkbw
82 -#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
83 -#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
84 -#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
85 -#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
86 -#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
87 -#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; })
88 -#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; })
89 -#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; })
90 -#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; })
91 -#define minub8(a, b) ({ uint64_t __r; asm ("minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
92 -#define minsb8(a, b) ({ uint64_t __r; asm ("minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
93 -#define minuw4(a, b) ({ uint64_t __r; asm ("minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
94 -#define minsw4(a, b) ({ uint64_t __r; asm ("minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
95 -#define maxub8(a, b) ({ uint64_t __r; asm ("maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
96 -#define maxsb8(a, b) ({ uint64_t __r; asm ("maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
97 -#define maxuw4(a, b) ({ uint64_t __r; asm ("maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
98 -#define maxsw4(a, b) ({ uint64_t __r; asm ("maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
99 -#define perr(a, b) ({ uint64_t __r; asm ("perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
100 -#define pklb(a) ({ uint64_t __r; asm ("pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
101 -#define pkwb(a) ({ uint64_t __r; asm ("pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
102 -#define unpkbl(a) ({ uint64_t __r; asm ("unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
103 -#define unpkbw(a) ({ uint64_t __r; asm ("unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
104 +#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
105 +#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
106 +#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
107 +#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
108 +#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
109 +#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
110 +#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
111 +#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
112 +#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
113 +#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
114 +#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
115 +#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
116 +#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; })
119 #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
122 -#define ASM_ACCEPT_MVI
123 #define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
124 #define uldq(a) (*(const __unaligned uint64_t *) (a))
125 #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
127 #define pkwb(a) asm ("pkwb %a0,%v0", a)
128 #define unpkbl(a) asm ("unpkbl %a0,%v0", a)
129 #define unpkbw(a) asm ("unpkbw %a0,%v0", a)
130 +#define wh64(a) asm ("wh64 %a0", a)
133 #error "Unknown compiler!"
134 diff -Nur avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/dsputil_alpha.c avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/dsputil_alpha.c
135 --- avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/dsputil_alpha.c 1970-01-01 01:00:00.000000000 +0100
136 +++ avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/dsputil_alpha.c 2003-09-28 17:26:39.000000000 +0200
139 + * Alpha optimized DSP utils
140 + * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
142 + * This library is free software; you can redistribute it and/or
143 + * modify it under the terms of the GNU Lesser General Public
144 + * License as published by the Free Software Foundation; either
145 + * version 2 of the License, or (at your option) any later version.
147 + * This library is distributed in the hope that it will be useful,
148 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
149 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
150 + * Lesser General Public License for more details.
152 + * You should have received a copy of the GNU Lesser General Public
153 + * License along with this library; if not, write to the Free Software
154 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
158 +#include "../dsputil.h"
160 +extern void simple_idct_axp(DCTELEM *block);
161 +extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
162 +extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
164 +void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
165 + int line_size, int h);
166 +void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
168 +void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
170 +void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
172 +void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
175 +void get_pixels_mvi(DCTELEM *restrict block,
176 + const uint8_t *restrict pixels, int line_size);
177 +void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
179 +int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
180 +int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
181 +int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
182 +int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
183 +int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
186 +/* These functions were the base for the optimized assembler routines,
187 + and remain here for documentation purposes. */
188 +static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
192 + uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
195 + uint64_t shorts0, shorts1;
197 + shorts0 = ldq(block);
198 + shorts0 = maxsw4(shorts0, 0);
199 + shorts0 = minsw4(shorts0, clampmask);
200 + stl(pkwb(shorts0), pixels);
202 + shorts1 = ldq(block + 4);
203 + shorts1 = maxsw4(shorts1, 0);
204 + shorts1 = minsw4(shorts1, clampmask);
205 + stl(pkwb(shorts1), pixels + 4);
207 + pixels += line_size;
212 +void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
216 + /* Keep this function a leaf function by generating the constants
217 + manually (mainly for the hack value ;-). */
218 + uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
219 + uint64_t signmask = zap(-1, 0x33);
220 + signmask ^= signmask >> 1; /* 0x8000800080008000 */
223 + uint64_t shorts0, pix0, signs0;
224 + uint64_t shorts1, pix1, signs1;
226 + shorts0 = ldq(block);
227 + shorts1 = ldq(block + 4);
229 + pix0 = unpkbw(ldl(pixels));
230 + /* Signed subword add (MMX paddw). */
231 + signs0 = shorts0 & signmask;
232 + shorts0 &= ~signmask;
236 + shorts0 = maxsw4(shorts0, 0);
237 + shorts0 = minsw4(shorts0, clampmask);
240 + pix1 = unpkbw(ldl(pixels + 4));
241 + signs1 = shorts1 & signmask;
242 + shorts1 &= ~signmask;
245 + shorts1 = maxsw4(shorts1, 0);
246 + shorts1 = minsw4(shorts1, clampmask);
248 + stl(pkwb(shorts0), pixels);
249 + stl(pkwb(shorts1), pixels + 4);
251 + pixels += line_size;
257 +static void clear_blocks_axp(DCTELEM *blocks) {
258 + uint64_t *p = (uint64_t *) blocks;
259 + int n = sizeof(DCTELEM) * 6 * 64;
275 +static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
277 + return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
280 +static inline uint64_t avg2(uint64_t a, uint64_t b)
282 + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
286 +/* The XY2 routines basically utilize this scheme, but reuse parts in
288 +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
290 + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
291 + + ((l2 & ~BYTE_VEC(0x03)) >> 2)
292 + + ((l3 & ~BYTE_VEC(0x03)) >> 2)
293 + + ((l4 & ~BYTE_VEC(0x03)) >> 2);
294 + uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
295 + + (l2 & BYTE_VEC(0x03))
296 + + (l3 & BYTE_VEC(0x03))
297 + + (l4 & BYTE_VEC(0x03))
298 + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
303 +#define OP(LOAD, STORE) \
305 + STORE(LOAD(pixels), block); \
306 + pixels += line_size; \
307 + block += line_size; \
310 +#define OP_X2(LOAD, STORE) \
312 + uint64_t pix1, pix2; \
314 + pix1 = LOAD(pixels); \
315 + pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
316 + STORE(AVG2(pix1, pix2), block); \
317 + pixels += line_size; \
318 + block += line_size; \
321 +#define OP_Y2(LOAD, STORE) \
323 + uint64_t pix = LOAD(pixels); \
325 + uint64_t next_pix; \
327 + pixels += line_size; \
328 + next_pix = LOAD(pixels); \
329 + STORE(AVG2(pix, next_pix), block); \
330 + block += line_size; \
335 +#define OP_XY2(LOAD, STORE) \
337 + uint64_t pix1 = LOAD(pixels); \
338 + uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
339 + uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
340 + + (pix2 & BYTE_VEC(0x03)); \
341 + uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
342 + + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
345 + uint64_t npix1, npix2; \
346 + uint64_t npix_l, npix_h; \
349 + pixels += line_size; \
350 + npix1 = LOAD(pixels); \
351 + npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
352 + npix_l = (npix1 & BYTE_VEC(0x03)) \
353 + + (npix2 & BYTE_VEC(0x03)); \
354 + npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
355 + + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
356 + avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
357 + + pix_h + npix_h; \
358 + STORE(avg, block); \
360 + block += line_size; \
366 +#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
367 +static void OPNAME ## _pixels ## SUFF ## _axp \
368 + (uint8_t *restrict block, const uint8_t *restrict pixels, \
369 + int line_size, int h) \
371 + if ((size_t) pixels & 0x7) { \
372 + OPKIND(uldq, STORE); \
374 + OPKIND(ldq, STORE); \
378 +static void OPNAME ## _pixels16 ## SUFF ## _axp \
379 + (uint8_t *restrict block, const uint8_t *restrict pixels, \
380 + int line_size, int h) \
382 + OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
383 + OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
386 +#define PIXOP(OPNAME, STORE) \
387 + MAKE_OP(OPNAME, , OP, STORE) \
388 + MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
389 + MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
390 + MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
392 +/* Rounding primitives. */
395 +#define AVG4_ROUNDER BYTE_VEC(0x02)
396 +#define STORE(l, b) stq(l, b)
400 +#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
403 +/* Not rounding primitives. */
408 +#define AVG2 avg2_no_rnd
409 +#define AVG4 avg4_no_rnd
410 +#define AVG4_ROUNDER BYTE_VEC(0x01)
411 +#define STORE(l, b) stq(l, b)
412 +PIXOP(put_no_rnd, STORE);
415 +#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
416 +PIXOP(avg_no_rnd, STORE);
418 +void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
419 + int line_size, int h)
421 + put_pixels_axp_asm(block, pixels, line_size, h);
422 + put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
425 +static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
427 + return pix_abs16x16_mvi_asm(a, b, stride);
430 +static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride)
432 + return pix_abs8x8_mvi(a, b, stride);
435 +void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
437 + c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
438 + c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
439 + c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
440 + c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
442 + c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
443 + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
444 + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
445 + c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
447 + c->avg_pixels_tab[0][0] = avg_pixels16_axp;
448 + c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
449 + c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
450 + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
452 + c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_axp;
453 + c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_axp;
454 + c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_axp;
455 + c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_axp;
457 + c->put_pixels_tab[1][0] = put_pixels_axp_asm;
458 + c->put_pixels_tab[1][1] = put_pixels_x2_axp;
459 + c->put_pixels_tab[1][2] = put_pixels_y2_axp;
460 + c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
462 + c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
463 + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
464 + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
465 + c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
467 + c->avg_pixels_tab[1][0] = avg_pixels_axp;
468 + c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
469 + c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
470 + c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
472 + c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp;
473 + c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp;
474 + c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp;
475 + c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp;
477 + c->clear_blocks = clear_blocks_axp;
479 + /* amask clears all bits that correspond to present features. */
480 + if (amask(AMASK_MVI) == 0) {
481 + c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
482 + c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
484 + c->get_pixels = get_pixels_mvi;
485 + c->diff_pixels = diff_pixels_mvi;
486 + c->sad[0] = sad16x16_mvi;
487 + c->sad[1] = sad8x8_mvi;
488 + c->pix_abs8x8 = pix_abs8x8_mvi;
489 + c->pix_abs16x16 = pix_abs16x16_mvi_asm;
490 + c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi;
491 + c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi;
492 + c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi;
495 + put_pixels_clamped_axp_p = c->put_pixels_clamped;
496 + add_pixels_clamped_axp_p = c->add_pixels_clamped;
498 + c->idct_put = simple_idct_put_axp;
499 + c->idct_add = simple_idct_add_axp;
500 + c->idct = simple_idct_axp;
502 diff -Nur avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/Makefile.am avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/Makefile.am
503 --- avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/Makefile.am 2003-05-25 23:07:42.000000000 +0200
504 +++ avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/Makefile.am 2003-11-13 23:51:25.426454176 +0100
509 - simple_idct_alpha.c
510 + simple_idct_alpha.c \
511 + dsputil_alpha_asm.S \
512 + motion_est_mvi_asm.S
515 -noinst_HEADERS = asm.h dsputil_alpha_asm.S regdef.h motion_est_mvi_asm.S
516 +noinst_HEADERS = asm.h regdef.h
518 libavcodecalpha_la_SOURCES = $(ALPHA_SRC)
520 diff -Nur avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/motion_est_alpha.c avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/motion_est_alpha.c
521 --- avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/motion_est_alpha.c 1970-01-01 01:00:00.000000000 +0100
522 +++ avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/motion_est_alpha.c 2003-09-28 17:26:39.000000000 +0200
525 + * Alpha optimized DSP utils
526 + * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
528 + * This library is free software; you can redistribute it and/or
529 + * modify it under the terms of the GNU Lesser General Public
530 + * License as published by the Free Software Foundation; either
531 + * version 2 of the License, or (at your option) any later version.
533 + * This library is distributed in the hope that it will be useful,
534 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
535 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
536 + * Lesser General Public License for more details.
538 + * You should have received a copy of the GNU Lesser General Public
539 + * License along with this library; if not, write to the Free Software
540 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
544 +#include "../dsputil.h"
546 +void get_pixels_mvi(DCTELEM *restrict block,
547 + const uint8_t *restrict pixels, int line_size)
555 + stq(unpkbw(p), block);
556 + stq(unpkbw(p >> 32), block + 4);
558 + pixels += line_size;
563 +void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
566 + uint64_t mask = 0x4040;
568 + mask |= mask << 16;
569 + mask |= mask << 32;
571 + uint64_t x, y, c, d, a;
578 + a = zap(mask, c); /* We use 0x4040404040404040 here... */
579 + d += 4 * a; /* ...so we can use s4addq here. */
580 + signs = zap(-1, c);
582 + stq(unpkbw(d) | (unpkbw(signs) << 8), block);
583 + stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
591 +static inline uint64_t avg2(uint64_t a, uint64_t b)
593 + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
596 +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
598 + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
599 + + ((l2 & ~BYTE_VEC(0x03)) >> 2)
600 + + ((l3 & ~BYTE_VEC(0x03)) >> 2)
601 + + ((l4 & ~BYTE_VEC(0x03)) >> 2);
602 + uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
603 + + (l2 & BYTE_VEC(0x03))
604 + + (l3 & BYTE_VEC(0x03))
605 + + (l4 & BYTE_VEC(0x03))
606 + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
610 +int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
615 + if ((size_t) pix2 & 0x7) {
616 + /* works only when pix2 is actually unaligned */
617 + do { /* do 8 pixel a time */
622 + result += perr(p1, p2);
633 + result += perr(p1, p2);
643 +#if 0 /* now done in assembly */
644 +int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
649 + if ((size_t) pix2 & 0x7) {
650 + /* works only when pix2 is actually unaligned */
651 + do { /* do 16 pixel a time */
652 + uint64_t p1_l, p1_r, p2_l, p2_r;
656 + p1_r = ldq(pix1 + 8);
657 + t = ldq_u(pix2 + 8);
658 + p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
659 + p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
663 + result += perr(p1_l, p2_l)
664 + + perr(p1_r, p2_r);
668 + uint64_t p1_l, p1_r, p2_l, p2_r;
671 + p1_r = ldq(pix1 + 8);
673 + p2_r = ldq(pix2 + 8);
677 + result += perr(p1_l, p2_l)
678 + + perr(p1_r, p2_r);
686 +int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
690 + uint64_t disalign = (size_t) pix2 & 0x7;
692 + switch (disalign) {
695 + uint64_t p1_l, p1_r, p2_l, p2_r;
699 + p1_r = ldq(pix1 + 8);
702 + p2_l = avg2(l, (l >> 8) | ((uint64_t) r << 56));
703 + p2_r = avg2(r, (r >> 8) | ((uint64_t) pix2[16] << 56));
707 + result += perr(p1_l, p2_l)
708 + + perr(p1_r, p2_r);
712 + /* |.......l|lllllllr|rrrrrrr*|
713 + This case is special because disalign1 would be 8, which
714 + gets treated as 0 by extqh. At least it is a bit faster
717 + uint64_t p1_l, p1_r, p2_l, p2_r;
721 + p1_r = ldq(pix1 + 8);
723 + m = ldq_u(pix2 + 8);
724 + r = ldq_u(pix2 + 16);
725 + p2_l = avg2(extql(l, disalign) | extqh(m, disalign), m);
726 + p2_r = avg2(extql(m, disalign) | extqh(r, disalign), r);
730 + result += perr(p1_l, p2_l)
731 + + perr(p1_r, p2_r);
736 + uint64_t disalign1 = disalign + 1;
737 + uint64_t p1_l, p1_r, p2_l, p2_r;
741 + p1_r = ldq(pix1 + 8);
743 + m = ldq_u(pix2 + 8);
744 + r = ldq_u(pix2 + 16);
745 + p2_l = avg2(extql(l, disalign) | extqh(m, disalign),
746 + extql(l, disalign1) | extqh(m, disalign1));
747 + p2_r = avg2(extql(m, disalign) | extqh(r, disalign),
748 + extql(m, disalign1) | extqh(r, disalign1));
752 + result += perr(p1_l, p2_l)
753 + + perr(p1_r, p2_r);
760 +int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
765 + if ((size_t) pix2 & 0x7) {
766 + uint64_t t, p2_l, p2_r;
767 + t = ldq_u(pix2 + 8);
768 + p2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
769 + p2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
772 + uint64_t p1_l, p1_r, np2_l, np2_r;
776 + p1_r = ldq(pix1 + 8);
778 + t = ldq_u(pix2 + 8);
779 + np2_l = extql(ldq_u(pix2), pix2) | extqh(t, pix2);
780 + np2_r = extql(t, pix2) | extqh(ldq_u(pix2 + 16), pix2);
782 + result += perr(p1_l, avg2(p2_l, np2_l))
783 + + perr(p1_r, avg2(p2_r, np2_r));
791 + uint64_t p2_l, p2_r;
793 + p2_r = ldq(pix2 + 8);
795 + uint64_t p1_l, p1_r, np2_l, np2_r;
798 + p1_r = ldq(pix1 + 8);
801 + np2_r = ldq(pix2 + 8);
803 + result += perr(p1_l, avg2(p2_l, np2_l))
804 + + perr(p1_r, avg2(p2_r, np2_r));
814 +int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
819 + uint64_t p1_l, p1_r;
820 + uint64_t p2_l, p2_r, p2_x;
823 + p1_r = ldq(pix1 + 8);
825 + if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
827 + p2_r = uldq(pix2 + 8);
828 + p2_x = (uint64_t) pix2[16] << 56;
831 + p2_r = ldq(pix2 + 8);
832 + p2_x = ldq(pix2 + 16) << 56;
836 + uint64_t np1_l, np1_r;
837 + uint64_t np2_l, np2_r, np2_x;
843 + np1_r = ldq(pix1 + 8);
845 + if ((size_t) pix2 & 0x7) { /* could be optimized a lot */
846 + np2_l = uldq(pix2);
847 + np2_r = uldq(pix2 + 8);
848 + np2_x = (uint64_t) pix2[16] << 56;
851 + np2_r = ldq(pix2 + 8);
852 + np2_x = ldq(pix2 + 16) << 56;
855 + result += perr(p1_l,
856 + avg4( p2_l, ( p2_l >> 8) | ((uint64_t) p2_r << 56),
857 + np2_l, (np2_l >> 8) | ((uint64_t) np2_r << 56)))
859 + avg4( p2_r, ( p2_r >> 8) | ((uint64_t) p2_x),
860 + np2_r, (np2_r >> 8) | ((uint64_t) np2_x)));
871 diff -Nur avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/mpegvideo_alpha.c avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/mpegvideo_alpha.c
872 --- avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/mpegvideo_alpha.c 1970-01-01 01:00:00.000000000 +0100
873 +++ avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/mpegvideo_alpha.c 2003-09-28 17:26:39.000000000 +0200
876 + * Alpha optimized DSP utils
877 + * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
879 + * This library is free software; you can redistribute it and/or
880 + * modify it under the terms of the GNU Lesser General Public
881 + * License as published by the Free Software Foundation; either
882 + * version 2 of the License, or (at your option) any later version.
884 + * This library is distributed in the hope that it will be useful,
885 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
886 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
887 + * Lesser General Public License for more details.
889 + * You should have received a copy of the GNU Lesser General Public
890 + * License along with this library; if not, write to the Free Software
891 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
895 +#include "../dsputil.h"
896 +#include "../mpegvideo.h"
898 +static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block,
902 + uint64_t qmul, qadd;
903 + uint64_t correction;
904 + DCTELEM *orig_block = block;
907 + qadd = WORD_VEC((qscale - 1) | 1);
908 + qmul = qscale << 1;
909 + /* This mask kills spill from negative subwords to the next subword. */
910 + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */
913 + if (!s->h263_aic) {
915 + block0 = block[0] * s->y_dc_scale;
917 + block0 = block[0] * s->c_dc_scale;
921 + n_coeffs = 63; // does not always use zigzag table
923 + n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
926 + for(i = 0; i <= n_coeffs; block += 4, i += 4) {
927 + uint64_t levels, negmask, zeros, add;
929 + levels = ldq(block);
933 +#ifdef __alpha_max__
934 + /* I don't think the speed difference justifies runtime
936 + negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */
937 + negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */
939 + negmask = cmpbge(WORD_VEC(0x7fff), levels);
940 + negmask &= (negmask >> 1) | (1 << 7);
941 + negmask = zap(-1, negmask);
944 + zeros = cmpbge(0, levels);
945 + zeros &= zeros >> 1;
946 + /* zeros |= zeros << 1 is not needed since qadd <= 255, so
947 + zapping the lower byte suffices. */
950 + levels -= correction & (negmask << 16);
952 + /* Negate qadd for negative levels. */
953 + add = qadd ^ negmask;
954 + add += WORD_VEC(0x0001) & negmask;
955 + /* Set qadd to 0 for levels == 0. */
956 + add = zap(add, zeros);
960 + stq(levels, block);
963 + if (s->mb_intra && !s->h263_aic)
964 + orig_block[0] = block0;
967 +void MPV_common_init_axp(MpegEncContext *s)
969 + s->dct_unquantize_h263 = dct_unquantize_h263_axp;
971 diff -Nur avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/simple_idct_alpha.c avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/simple_idct_alpha.c
972 --- avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha.orig/simple_idct_alpha.c 1970-01-01 01:00:00.000000000 +0100
973 +++ avifile-0.7-0.7.38/ffmpeg/libavcodec/alpha/simple_idct_alpha.c 2003-09-28 17:26:39.000000000 +0200
976 + * Simple IDCT (Alpha optimized)
978 + * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
980 + * This library is free software; you can redistribute it and/or
981 + * modify it under the terms of the GNU Lesser General Public
982 + * License as published by the Free Software Foundation; either
983 + * version 2 of the License, or (at your option) any later version.
985 + * This library is distributed in the hope that it will be useful,
986 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
987 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
988 + * Lesser General Public License for more details.
990 + * You should have received a copy of the GNU Lesser General Public
991 + * License along with this library; if not, write to the Free Software
992 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
994 + * based upon some outcommented c code from mpeg2dec (idct_mmx.c
995 + * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
997 + * Alpha optimiziations by Måns Rullgård <mru@users.sourceforge.net>
998 + * and Falk Hueffner <falk@debian.org>
1002 +#include "../dsputil.h"
1004 +extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
1006 +extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
1009 +// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
1010 +// W4 is actually exactly 16384, but using 16383 works around
1011 +// accumulating rounding errors for some encoders
1012 +#define W1 ((int_fast32_t) 22725)
1013 +#define W2 ((int_fast32_t) 21407)
1014 +#define W3 ((int_fast32_t) 19266)
1015 +#define W4 ((int_fast32_t) 16383)
1016 +#define W5 ((int_fast32_t) 12873)
1017 +#define W6 ((int_fast32_t) 8867)
1018 +#define W7 ((int_fast32_t) 4520)
1019 +#define ROW_SHIFT 11
1020 +#define COL_SHIFT 20
1022 +/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */
1023 +static inline int idct_row(DCTELEM *row)
1025 + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
1026 + uint64_t l, r, t2;
1030 + if (l == 0 && r == 0)
1033 + a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
1035 + if (((l & ~0xffffUL) | r) == 0) {
1037 + t2 = (uint16_t) a0;
1050 + t = extwl(l, 4); /* row[2] */
1059 + t = extwl(r, 0); /* row[4] */
1068 + t = extwl(r, 4); /* row[6] */
1077 + t = extwl(l, 2); /* row[1] */
1091 + t = extwl(l, 6); /* row[3] */
1101 + t = extwl(r, 2); /* row[5] */
1110 + t = extwl(r, 6); /* row[7] */
1119 + row[0] = (a0 + b0) >> ROW_SHIFT;
1120 + row[1] = (a1 + b1) >> ROW_SHIFT;
1121 + row[2] = (a2 + b2) >> ROW_SHIFT;
1122 + row[3] = (a3 + b3) >> ROW_SHIFT;
1123 + row[4] = (a3 - b3) >> ROW_SHIFT;
1124 + row[5] = (a2 - b2) >> ROW_SHIFT;
1125 + row[6] = (a1 - b1) >> ROW_SHIFT;
1126 + row[7] = (a0 - b0) >> ROW_SHIFT;
1131 +static inline void idct_col(DCTELEM *col)
1133 + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
1135 + col[0] += (1 << (COL_SHIFT - 1)) / W4;
1137 + a0 = W4 * col[8 * 0];
1138 + a1 = W4 * col[8 * 0];
1139 + a2 = W4 * col[8 * 0];
1140 + a3 = W4 * col[8 * 0];
1143 + a0 += W2 * col[8 * 2];
1144 + a1 += W6 * col[8 * 2];
1145 + a2 -= W6 * col[8 * 2];
1146 + a3 -= W2 * col[8 * 2];
1150 + a0 += W4 * col[8 * 4];
1151 + a1 -= W4 * col[8 * 4];
1152 + a2 -= W4 * col[8 * 4];
1153 + a3 += W4 * col[8 * 4];
1157 + a0 += W6 * col[8 * 6];
1158 + a1 -= W2 * col[8 * 6];
1159 + a2 += W2 * col[8 * 6];
1160 + a3 -= W6 * col[8 * 6];
1164 + b0 = W1 * col[8 * 1];
1165 + b1 = W3 * col[8 * 1];
1166 + b2 = W5 * col[8 * 1];
1167 + b3 = W7 * col[8 * 1];
1176 + b0 += W3 * col[8 * 3];
1177 + b1 -= W7 * col[8 * 3];
1178 + b2 -= W1 * col[8 * 3];
1179 + b3 -= W5 * col[8 * 3];
1183 + b0 += W5 * col[8 * 5];
1184 + b1 -= W1 * col[8 * 5];
1185 + b2 += W7 * col[8 * 5];
1186 + b3 += W3 * col[8 * 5];
1190 + b0 += W7 * col[8 * 7];
1191 + b1 -= W5 * col[8 * 7];
1192 + b2 += W3 * col[8 * 7];
1193 + b3 -= W1 * col[8 * 7];
1196 + col[8 * 0] = (a0 + b0) >> COL_SHIFT;
1197 + col[8 * 7] = (a0 - b0) >> COL_SHIFT;
1198 + col[8 * 1] = (a1 + b1) >> COL_SHIFT;
1199 + col[8 * 6] = (a1 - b1) >> COL_SHIFT;
1200 + col[8 * 2] = (a2 + b2) >> COL_SHIFT;
1201 + col[8 * 5] = (a2 - b2) >> COL_SHIFT;
1202 + col[8 * 3] = (a3 + b3) >> COL_SHIFT;
1203 + col[8 * 4] = (a3 - b3) >> COL_SHIFT;
1206 +/* If all rows but the first one are zero after row transformation,
1207 + all rows will be identical after column transformation. */
1208 +static inline void idct_col2(DCTELEM *col)
1212 + uint64_t *lcol = (uint64_t *) col;
1214 + for (i = 0; i < 8; ++i) {
1215 + int_fast32_t a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
1218 + col[0] = a0 >> COL_SHIFT;
1224 + lcol[ 2] = l; lcol[ 3] = r;
1225 + lcol[ 4] = l; lcol[ 5] = r;
1226 + lcol[ 6] = l; lcol[ 7] = r;
1227 + lcol[ 8] = l; lcol[ 9] = r;
1228 + lcol[10] = l; lcol[11] = r;
1229 + lcol[12] = l; lcol[13] = r;
1230 + lcol[14] = l; lcol[15] = r;
1233 +void simple_idct_axp(DCTELEM *block)
1237 + int rowsZero = 1; /* all rows except row 0 zero */
1238 + int rowsConstant = 1; /* all rows consist of a constant value */
1240 + for (i = 0; i < 8; i++) {
1241 + int sparseness = idct_row(block + 8 * i);
1243 + if (i > 0 && sparseness > 0)
1245 + if (sparseness == 2)
1251 + } else if (rowsConstant) {
1252 + uint64_t *lblock = (uint64_t *) block;
1255 + for (i = 0; i < 8; i += 2) {
1256 + uint64_t v = (uint16_t) block[i * 8];
1257 + uint64_t w = (uint16_t) block[i * 8 + 8];
1270 + for (i = 0; i < 8; i++)
1271 + idct_col(block + i);
1275 +void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
1277 + simple_idct_axp(block);
1278 + put_pixels_clamped_axp_p(block, dest, line_size);
1281 +void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
1283 + simple_idct_axp(block);
1284 + add_pixels_clamped_axp_p(block, dest, line_size);
1286 --- avifile-0.7-0.7.38/configure.in.orig 2003-07-10 13:15:54.000000000 +0200
1287 +++ avifile-0.7-0.7.38/configure.in 2003-11-14 00:09:16.019699264 +0100
1296 --- avifile-0.7-0.7.38/acinclude.m4.orig 2003-07-10 15:40:57.000000000 +0200
1297 +++ avifile-0.7-0.7.38/acinclude.m4 2003-11-14 00:17:33.678043696 +0100
1300 -# Figure out how to run the assembler.
1303 -AC_DEFUN([AM_PROG_AS],
1304 -[# By default we simply use the C compiler to build assembly code.
1305 -AC_REQUIRE([AC_PROG_CC])
1307 -# Set ASFLAGS if not already set.
1308 -: ${ASFLAGS='$(CFLAGS)'}
1314 dnl check for compiler version
1315 dnl sets COMPILER_VERSION and GCC_VERSION