1 diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/Makefile.am dosbox-0.61+hq2x/src/gui/Makefile.am
2 --- dosbox-0.61/src/gui/Makefile.am 2004-07-05 02:44:22.000000000 +0200
3 +++ dosbox-0.61+hq2x/src/gui/Makefile.am 2004-07-04 23:25:07.000000000 +0200
5 noinst_LIBRARIES = libgui.a
6 libgui_a_SOURCES = sdlmain.cpp sdl_mapper.cpp \
7 render.cpp render_scalers.cpp render_scalers.h render_templates.h \
8 - midi.cpp midi_win32.h midi_oss.h midi_coreaudio.h midi_alsa.h
9 + midi.cpp midi_win32.h midi_oss.h midi_coreaudio.h midi_alsa.h \
10 + render_hq2x.cpp render_hq2x.h
12 diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render.cpp dosbox-0.61+hq2x/src/gui/render.cpp
13 --- dosbox-0.61/src/gui/render.cpp 2004-08-05 00:12:58.732847304 +0200
14 +++ dosbox-0.61+hq2x/src/gui/render.cpp 2004-08-04 23:50:12.000000000 +0200
18 #include "render_scalers.h"
19 +#include "render_hq2x.h"
27 + if (render.op.type == OP_Hq2x) {
28 + Hq2x_InitLUTs((void*)render.pal.rgb,render.pal.last,render.pal.first);
30 /* Setup pal index to startup values */
34 case OP_Interp2x:block=&Interp2x_8;break;
35 case OP_AdvInterp2x:block=&AdvInterp2x_8;break;
36 case OP_TV2x:block=&TV2x_8;break;
37 + case OP_Hq2x:block=&Hq2x_8;break;
39 gfx_flags=GFX_GetBestMode(block->flags);
43 extern void GFX_SetTitle(Bits cycles, Bits frameskip,bool paused);
44 static void IncreaseFrameSkip(void) {
45 - if (render.frameskip.max<10) render.frameskip.max++;
46 + if (render.frameskip.max<25) render.frameskip.max++;
47 LOG_MSG("Frame Skip at %d",render.frameskip.max);
48 GFX_SetTitle(-1,render.frameskip.max,false);
51 void RENDER_Init(Section * sec) {
52 Section_prop * section=static_cast<Section_prop *>(sec);
54 + Hq2x_colourTrigger=section->Get_int("hq2x_threshold");
55 + if (Hq2x_colourTrigger > 255) Hq2x_colourTrigger = 255;
56 + if (Hq2x_colourTrigger < 0) Hq2x_colourTrigger = 0;
57 + Hq2x_colourTrigger_adaptive=section->Get_int("hq2x_threshold_adaptive");
58 + if (Hq2x_colourTrigger_adaptive > 255) Hq2x_colourTrigger_adaptive = 255;
59 + if (Hq2x_colourTrigger_adaptive <= 0) Hq2x_colourTrigger_adaptive = 75;
62 render.aspect=section->Get_bool("aspect");
64 else if (!strcasecmp(scaler,"advinterp2x")) render.op.want_type=OP_AdvInterp2x;
65 else if (!strcasecmp(scaler,"interp2x")) render.op.want_type=OP_Interp2x;
66 else if (!strcasecmp(scaler,"tv2x")) render.op.want_type=OP_TV2x;
67 + else if (!strcasecmp(scaler,"hq2x")) render.op.want_type=OP_Hq2x;
69 render.op.want_type=OP_Normal;
70 LOG_MSG("Illegal scaler type %s,falling back to normal.",scaler);
72 MAPPER_AddHandler(DecreaseFrameSkip,MK_f7,MMOD1,"decfskip","Dec Fskip");
73 MAPPER_AddHandler(IncreaseFrameSkip,MK_f8,MMOD1,"incfskip","Inc Fskip");
74 + MAPPER_AddHandler(Hq2x_DecreaseThreshold,MK_f3,MMOD1|MMOD2,"dechq2xthreshold","Dec Hq2x Static Threshold");
75 + MAPPER_AddHandler(Hq2x_IncreaseThreshold,MK_f4,MMOD1|MMOD2,"inchq2xthreshold","Inc Hq2x Static Threshold");
76 + MAPPER_AddHandler(Hq2x_DecreaseThresholdAdaptive,MK_f5,MMOD1|MMOD2,"dechq2xadapthreshold","Dec Hq2x Adaptive Threshold");
77 + MAPPER_AddHandler(Hq2x_IncreaseThresholdAdaptive,MK_f6,MMOD1|MMOD2,"inchq2xadapthreshold","Inc Hq2x Adaptive Threshold");
78 GFX_SetTitle(-1,render.frameskip.max,false);
81 diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_hq2x.cpp dosbox-0.61+hq2x/src/gui/render_hq2x.cpp
82 --- dosbox-0.61/src/gui/render_hq2x.cpp 1970-01-01 01:00:00.000000000 +0100
83 +++ dosbox-0.61+hq2x/src/gui/render_hq2x.cpp 2004-08-04 23:43:53.000000000 +0200
85 +//hq2x filter demo program
86 +//----------------------------------------------------------
87 +//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
88 +// Speed optimization and mmx code Copyright (c) 2004 Jörg Walter (jwalt@garni.ch)
90 +//This program is free software; you can redistribute it and/or
91 +//modify it under the terms of the GNU Lesser General Public
92 +//License as published by the Free Software Foundation; either
93 +//version 2.1 of the License, or (at your option) any later version.
95 +//This program is distributed in the hope that it will be useful,
96 +//but WITHOUT ANY WARRANTY; without even the implied warranty of
97 +//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
98 +//Lesser General Public License for more details.
100 +//You should have received a copy of the GNU Lesser General Public
101 +//License along with this program; if not, write to the Free Software
102 +//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
105 + This code comes in three variants:
106 + 1. plain C code with live difference calculation
107 + 2. C code with live difference calculation in MMX
108 + 3. lookup-table based difference calculation
110 + Which one is fastest depends on your CPU speed and cache size. The table based algorithm
111 + should be fastest if you have 32kb L1 data cache or more. Packing diff values into less
112 + bytes is possible, define DIFF_TABLE to the number of bits per int.
114 + Speed: 22fps/27fps(MMX)/32fps(table) on a pentium2/333MHz
117 + Currently only does 32bpp/16bpp BGRA output, and (theoretically) RGB output. YUV
118 + isn't needed anymore, it seems, so this code should now work in all setups.
119 + MMX code only does RGB, thus isn't really usable (but useful for benchmarking).
120 + This code should use the intel compiler functions for mmx, as GCC emulates
121 + them more or less completely.
123 + further optimization ideas:
124 + - fix gcc bugs (shift), so Diff_mmx can run without register spilling
125 + - manual unrolling of Diff loop to get decent memory prefetch for
127 + - add mmxext support to Diff for faster unpacking
128 + - test if sse's movntq in interpolation loop improves things
129 + - find a way to mmxify the interpolation loop sensibly
130 + (currently runs slower than non-mmx code)
131 + - find a way to save (cache-)memory in the factors table
132 + (tighter packing and double indirection are both slower on p2)
133 + - find a way for 16bpp not to suck that hard (speed-wise)
146 +#include "render_scalers.h"
147 +#include "render_hq2x.h"
150 +#define __attribute__(x)
151 +#define __builtin_expect(x,y) x
156 +// #define DIFF_TABLE 32
157 +#define DIFF_TABLE 1
159 +// Gathered experimentally, values from 0x08-0x80 are useful, depending on graphics
160 +// and your personal preference.
161 +long Hq2x_colourTrigger = 0;
162 +long Hq2x_colourTrigger_adaptive = 75;
166 +#define DIFF_TABLE 1
170 +#define bits DIFF_TABLE
177 +difftable[65536/bits];
179 +inline static unsigned int Diff1_calc(int r1, int g1, int b1, int r2, int g2, int b2)
191 + ret = (unsigned int)(((128+rmean)*r*r + (192-rmean)*b*b)/256 + g*g);
192 + return (ret < Hq2x_colourTrigger?0:ret-Hq2x_colourTrigger > 255?255:ret-Hq2x_colourTrigger);
194 + return ((128+rmean)*r*r + (192-rmean)*b*b)/256 + g*g > Hq2x_colourTrigger;
199 +#define Diff1(x,y) (difftable[(*(x)) * (256/bits) + ((*(y)) / bits)] >> ((*(y))%bits))
201 +#define Diff1(x,y) ((difftable[(*(x)) * (256/bits) + ((*(y)) / bits)] >> ((*(y))%bits)) & 1)
203 +inline static int Diff(const unsigned char *l2, const unsigned char *l3)
206 + unsigned int max = Diff1(l2,l3+1);
207 + unsigned int min = max;
208 + unsigned int dynthres = Diff1(l2+1,l3);
209 + if (dynthres > max) max = dynthres;
210 + if (dynthres < min) min = dynthres;
211 + dynthres = Diff1(l2+1,l3+1);
212 + if (dynthres > max) max = dynthres;
213 + if (dynthres < min) min = dynthres;
214 + dynthres = Diff1(l3,l3+1);
215 + if (dynthres > max) max = dynthres;
216 + if (dynthres < min) min = dynthres;
217 + dynthres = (Hq2x_colourTrigger_adaptive*max+(100-Hq2x_colourTrigger_adaptive)*min)/200;
219 + return ((Diff1(l2,l3+1)>dynthres)*0x00aa0055) | ((Diff1(l2+1,l3)>dynthres)*0x005500aa) | ((Diff1(l2+1,l3+1)>dynthres)*0x03000300) | ((Diff1(l3,l3+1)>dynthres)*0x0c000c00);
221 + return (Diff1(l2,l3+1)*0x00aa0055) | (Diff1(l2+1,l3)*0x005500aa) | (Diff1(l2+1,l3+1)*0x03000300) | (Diff1(l3,l3+1)*0x0c000c00);
227 +/* always on for gcc for now */
229 +/* this is safe for -march=..., but not if someone specifies -mmmx manually */
232 +# define Diff_mmx Diff
235 +# define Diff(a,b) (has_mmx?Diff_mmx((a),(b)):Diff_any((a),(b)))
238 +# define Diff_any Diff
241 +// A better colour distance function, adapted from http://www.compuphase.com/cmetric.htm
242 +#if !defined(MMX_ONLY) || defined(DEBUG)
244 +inline static int Diff1(const unsigned char *e1, const unsigned char* e2)
249 + rmean = e1[0]+e2[0];
254 + return ((128+rmean)*r*r + (192-rmean)*b*b)/256 + g*g > Hq2x_colourTrigger;
257 +inline static int Diff_any(const unsigned long *l2, const unsigned long *l3)
259 + return (Diff1((unsigned char *)l2,(unsigned char *)(l3+1))*0x00aa0055) | (Diff1((unsigned char *)(l2+1),(unsigned char *)l3)*0x005500aa) | (Diff1((unsigned char *)(l2+1),(unsigned char *)(l3+1))*0x03000300) | (Diff1((unsigned char *)l3,(unsigned char *)(l3+1))*0x0c000c00);
264 +typedef int mmx_1_64 __attribute__((mode(DI)));
265 +typedef int mmx_2_32 __attribute__((mode(V2SI)));
266 +typedef int mmx_4_16 __attribute__((mode(V4HI)));
267 +typedef int mmx_8_8 __attribute__((mode(V8QI)));
269 +static mmx_4_16 mmx_trigger;
271 +/* Note: this needs BGRA pixel layout, with the A component replaced by (-R)+32 */
272 +inline static int Diff_mmx(const unsigned long *e1, const unsigned long *e2)
274 + mmx_4_16 mm0, mm1, mm2, mm3, mm4;
276 + mmx_4_16 t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18, m0, m1;
281 + const mmx_8_8 zero = (mmx_8_8)0x0ULL;
282 + const mmx_4_16 rmean_off = (mmx_4_16)(0x0a000a000a000a00ULL);
283 + const mmx_4_16 factors = (mmx_4_16)0xfffdfff4aa5655abULL;
285 + /* -1 * aa56 = 0101 0101 1010 1010 */
286 + /* -1 * 55ab = 1010 1010 0101 0101 */
291 + mm1 = *(mmx_4_16 *)e2;
292 + mm0 = *(mmx_4_16 *)e1;
294 + /* (high ................................ low)
295 + -p2r+32, p2b, p2g, p2r, -p1r+32, p1b, p1g, p1r = mm0
296 + -p5r+32, p5b, p5g, p5r, -p4r+32, p4b, p4g, p4r = mm1
298 + Shuffle dwords so we get 4 registers with pixel
299 + arrangement ready for difference calculation:
301 + (2, 5, 4, 1) - (5, 4, 2, 5)
303 + We choose (rrrr, gggg) + (bbbb, rrrr) layout. This
304 + is quite expensive, given that difference calculation
305 + in (rgbr, rgbr) form would need just two unpacks, but
306 + the unpacking has to be done sooner or later, and
307 + this pixel layout makes later calculations cheaper.
308 + TODO: sse/mmxext version of this unpacking should be
311 + -p4r+32, -p1r+32, p4b, p1b, p4g, p1g, p4r, p1r = mm0
312 + -p5r+32, p5b, p5g, p5r, -p5r+32, p5b, p5g, p5r = mm3 (temp)
313 + -p2r+32, -p5r+32, p2b, p5b, p2g, p5g, p2r, p5r = mm2
314 + -p5r+32, -p4r+32, p5b, p4b, p5g, p4g, p5r, p4r = mm1
317 + mm3 = (mmx_4_16)__builtin_ia32_punpckhdq((mmx_2_32)mm1,(mmx_2_32)mm1);
318 + mm2 = (mmx_4_16)__builtin_ia32_punpckhbw((mmx_8_8)mm1,(mmx_8_8)mm0);
319 + mm0 = (mmx_4_16)__builtin_ia32_punpcklbw((mmx_8_8)mm0,(mmx_8_8)mm1);
320 + mm1 = (mmx_4_16)__builtin_ia32_punpcklbw((mmx_8_8)mm1,(mmx_8_8)mm3);
325 + -p2r+32, -p5r+32, -p4r+32, -p1r+32, p2b, p5b, p4b, p1b = mm0
326 + p2g, p5g, p4g, p1g, p2r, p5r, p4r, p1r = mm3
328 + -p5r+32, -p4r+32, -p2r+32, -p5r+32, p5b, p4b, p2b, p5b = mm4
329 + p5g, p4g, p2g, p5g, p5r, p4r, p2r, p5r = mm2
333 + mm3 = (mmx_4_16)__builtin_ia32_punpcklwd(mm0,mm2);
334 + mm0 = (mmx_4_16)__builtin_ia32_punpckhwd(mm0,mm2);
335 + mm4 = (mmx_4_16)__builtin_ia32_punpckhwd(mm2,mm1);
336 + mm2 = (mmx_4_16)__builtin_ia32_punpcklwd(mm2,mm1);
339 + Put mm2 with negated red component into mm1. Negation is done
340 + in the lookup table.
342 + -p2r+32, -p5r+32, -p4r+32, -p1r+32, p2b, p5b, p4b, p1b = mm0
343 + p2g, p5g, p4g, p1g, p2r, p5r, p4r, p1r = mm3
345 + -p5r+32, -p4r+32, -p2r+32, -p5r+32, p5b, p4b, p2b, p5b = mm4
346 + p5g, p4g, p2g, p5g, -p5r+32, -p4r+32, -p2r+32, -p5r+32 = mm2
350 + mm2 = (mmx_4_16)__builtin_ia32_punpckhdq((mmx_2_32)mm4,(mmx_2_32)mm2);
354 + Calculate the differences (and rmean)
355 + mm0-mm4, mm3-mm2 (signed saturation)
357 + d1r/8, d3r/8, d8r/8, d0r/8, d1b/8, d3b/8, d8b/8, d0b/8 = mm0
358 + d1g/8, d3g/8, d8g/8, d0g/8, d1rmean/4-32, d3rmean/4-32, d8rmean/4-32, d0rmean/4-32 = mm3
362 + mm0 = (mmx_4_16)__builtin_ia32_psubsb((mmx_8_8)mm0,(mmx_8_8)mm4);
363 + mm3 = (mmx_4_16)__builtin_ia32_psubsb((mmx_8_8)mm3,(mmx_8_8)mm2);
369 + char *cmm0 = (void*)&mm0, *cmm3 = (void*)&mm3;
370 + if (((int)cmm3[0]) != ((int)(p1&Rmask) + (int)(p5&Rmask) - 32)) abort();
371 + if (((int)cmm0[4]) != -((int)(p1&Rmask) - (int)(p5&Rmask))) abort();
375 + /* Intermediate stats:
377 + (rough) code equivalent:
378 + rmean = (((int)(e1&Rmask) + (int)(e2&Rmask)) >> 16) - 32;
379 + r = ((int)(e2&Rmask) - (int)(e1&Rmask)) >> 16;
380 + g = ((int)(e1&Gmask) - (int)(e2&Gmask)) >> 8;
381 + b = ((int)(e1&Bmask) - (int)(e2&Bmask));
384 + 1 distance w/o mmx = 16 ops
385 + 4 distances w/ mmx = 11 ops
386 + (possible parallelism left to the compiler)
389 + ((160+rmean)*r/8*r/8) + 256*g/8*g/8 + ((160-rmean)*b/8*b/8)
391 + (slightly incorrect: the result is the true difference plus (b/8)^2, but
392 + this eliminates a constant, making the algorithm fit into the available
395 + d1r/8, d3r/8, d8r/8, d0r/8, d1b/8, d3b/8, d8b/8, d0b/8 = mm0
396 + d1g/8, d3g/8, d8g/8, d0g/8, d1rmean/4-32, d3rmean/4-32, d8rmean/4-32, d0rmean/4-32 = mm3
400 + prepare differences for final calculation:
402 + 00 d1r/2 00 d3r/2 00 d8r/2 00 d0r/2 = mm0
403 + 00 d1b/2 00 d3b/2 00 d8b/2 00 d0b/2 = mm1
404 + 00 d1g/8 00 d3g/8 00 d8g/8 00 d0g/8 = mm2
405 + 00 (d1rmean/4-32)*16 00 (d3rmean/4-32)*16 00 (d8rmean/4-32)*16 00 (d0rmean/4-32)*16 = mm3
413 + // TODO: compiler error at __builtin_ia32_psllb(mm0,2);
414 + d(t0) mm0 = __builtin_ia32_pmullw(mm0,(mmx_4_16)(0x0004000400040004ULL));
415 + d(t1) mm1 = (mmx_4_16)__builtin_ia32_punpcklbw(zero, (mmx_8_8)mm0);
416 + d(t2) mm0 = (mmx_4_16)__builtin_ia32_punpckhbw(zero, (mmx_8_8)mm0);
417 + d(t3) mm2 = (mmx_4_16)__builtin_ia32_punpckhbw(zero, (mmx_8_8)mm3);
418 + d(t4) mm3 = (mmx_4_16)__builtin_ia32_punpcklbw(zero, (mmx_8_8)mm3);
419 + // TODO: compiler error at __builtin_ia32_psraw(mm3,4);
420 + d(t5) mm3 = __builtin_ia32_pmulhw(mm3,(mmx_4_16)(0x1000100010001000ULL));
423 + intermediate results: squares and rmean factors
425 + 00 (d1r/2)^2 00 (d3r/2)^2 00 (d8r/2)^2 00 (d0r/2)^2 = mm0
426 + 00 (d1b/2)^2 00 (d3b/2)^2 00 (d8b/2)^2 00 (d0b/2)^2 = mm1
427 + 00 256*(d1g/8)^2 00 256*(d3g/8)^2 00 256*(d8g/8)^2 00 256*(d0g/8)^2 = mm2
428 + 00 128+d1rmean/4 00 128+d3rmean/4 00 128+d8rmean/4 00 128+d0rmean/4 = mm3
429 + 00 192-d1rmean/4 00 192-d3rmean/4 00 192-d8rmean/4 00 192-d0rmean/4 = mm4
433 + d(t9) mm0 = __builtin_ia32_pmulhw(mm0,mm0);
434 + d(t10) mm1 = __builtin_ia32_pmulhw(mm1,mm1);
435 + d(t11) mm2 = __builtin_ia32_pmulhw(mm2,mm2);
436 + d(t12) mm4 = __builtin_ia32_psubsw(rmean_off,mm3);
437 + d(t13) mm3 = __builtin_ia32_paddsw(mm3,rmean_off);
440 + intermediate results: finish red and blue components
442 + 00 (128+d1rmean/4)*(d1r/8)^2 00 (128+d3rmean/4)*(d3r/8)^2 00 (128+d8rmean/4)*(d8r/8)^2 00 (128+d0rmean/4)*(d0r/8)^2 = mm0
443 + 00 (192-d1rmean/4)*(d1b/8)^2 00 (192-d3rmean/4)*(d3b/8)^2 00 (192-d8rmean/4)*(d8b/8)^2 00 (192-d0rmean/4)*(d0b/8)^2 = mm1
444 + 00 (d1g/8)^2 00 (d3g/8)^2 00 (d8g/8)^2 00 (d0g/8)^2 = mm2
448 + d(t15) mm1 = __builtin_ia32_pmulhw(mm1, mm4);
449 + d(t16) mm0 = __builtin_ia32_pmulhw(mm0, mm3);
452 + calculate final visual difference
454 + (128+rmean/4)*(r/8)^2+(192-rmean/4)*(b/8)^2+256*(g/8)^2 = mm0 (order: 1 3 8 0)
457 + d(t17) mm0 = __builtin_ia32_paddw(mm0,mm1);
458 + d(t18) mm0 = __builtin_ia32_paddw(mm0,mm2);
464 + short *smm0 = (void*)&mm0;
467 + rmean = (((p1+p5)&Rmask)-32);
468 + r = (p1&Rmask)-(p5&Rmask);
469 + g = ((p1&Gmask) - (p5&Gmask)) >> 8;
470 + b = ((p1&Bmask) - (p5&Bmask)) >> 16;
472 + diff = ((160+rmean)*r*r + 256*g*g + (160-rmean)*b*b)/256;
473 + if (diff > smm0[0]+1 || diff < smm0[0]-1) abort();
479 + ((((512+rmean)>>8)*r*r) + 4*g*g + (((768-rmean)>>8)*b*b))
481 + test against threshold
483 + (diff1?0xffff:0x0000) (diff3?0xffff:0x0000) (diff8?0xffff:0x0000) (diff0?0x0xffff:0x0000) = mm0
486 + mm0 = __builtin_ia32_pcmpgtw(mm0,mmx_trigger);
489 + create final bit patterns
491 + 0000 0000 (diff1*0x03000300)|(diff3*0x0c000c00)|(diff8*0x005500aa)|(diff0*0x00aa0055)
495 + mm0 = (mmx_4_16)__builtin_ia32_pmaddwd(mm0,factors);
496 + mm0 = (mmx_4_16)__builtin_ia32_punpcklbw((mmx_8_8)mm0,(mmx_8_8)__builtin_ia32_punpckhbw((mmx_8_8)mm0,(mmx_8_8)mm0));
498 + return (unsigned long)(unsigned long long)mm0;
500 + Total: 11+16+3 = 30 ops for 4 distances vs. 16+13+7 = 36 ops for 1 distance
506 +static int LUTPAL8to32[256] __attribute__((aligned(32)));
507 +//#define factors(a,b,c,d) (((a)-1) | ((b)<<2) | ((c)<<4) | ((d)<<6))
508 +#define P0 {8,0,0,0}
509 +#define P10 {6,2,0,0}
510 +#define P11 {6,0,0,2}
511 +#define P12 {6,0,2,0}
512 +#define P20 {4,0,2,2}
513 +#define P21 {4,2,2,0}
514 +#define P22 {4,2,0,2}
515 +#define P60 {5,0,2,1}
516 +#define P61 {5,0,1,2}
517 +#define P70 {6,0,1,1}
518 +#define P90 {2,0,3,3}
519 +#define P100 {7,0,0,1}
521 +#define UNUSED X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, \
522 + X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X
524 +/* sparse table: only 2k entries are used */
525 +static unsigned char factors[4096][4] __attribute__((aligned(32))) = {
526 +/* 0000 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
527 +/* 0040 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
528 +/* 0080 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
529 +/* 00c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
530 +/* 0100 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
531 +/* 0140 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
532 +/* 0180 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
533 +/* 01c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
535 +/* 0400 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
536 +/* 0440 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
537 +/* 0480 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
538 +/* 04c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
539 +/* 0500 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
540 +/* 0540 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
541 +/* 0580 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
542 +/* 05c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
544 +/* 0800 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
545 +/* 0840 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
546 +/* 0880 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
547 +/* 08c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
548 +/* 0900 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
549 +/* 0940 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
550 +/* 0980 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
551 +/* 09c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
553 +/* 0c00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
554 +/* 0c40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
555 +/* 0c80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
556 +/* 0cc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
557 +/* 0d00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
558 +/* 0d40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
559 +/* 0d80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
560 +/* 0dc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
562 +/* 1000 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
563 +/* 1040 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
564 +/* 1080 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
565 +/* 10c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
566 +/* 1100 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
567 +/* 1140 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
568 +/* 1180 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
569 +/* 11c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
571 +/* 1400 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0,
572 +/* 1440 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0,
573 +/* 1480 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0,
574 +/* 14c0 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0,
575 +/* 1500 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0,
576 +/* 1540 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0,
577 +/* 1580 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0,
578 +/* 15c0 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0,
580 +/* 1800 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12,
581 +/* 1840 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12,
582 +/* 1880 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12,
583 +/* 18c0 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12,
584 +/* 1900 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
585 +/* 1940 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
586 +/* 1980 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
587 +/* 19c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
589 +/* 1c00 */ P20, P20, P0, P0, P20, P20, P0, P0, P10, P20, P10, P0, P10, P20, P10, P0,
590 +/* 1c40 */ P20, P20, P0, P0, P20, P20, P0, P0, P10, P20, P10, P0, P10, P20, P10, P0,
591 +/* 1c80 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
592 +/* 1cc0 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
593 +/* 1d00 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0,
594 +/* 1d40 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0,
595 +/* 1d80 */ P70, P90, P10, P0, P70, P90, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
596 +/* 1dc0 */ P70, P90, P10, P0, P70, P90, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
598 +/* 2000 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
599 +/* 2040 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
600 +/* 2080 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
601 +/* 20c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
602 +/* 2100 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
603 +/* 2140 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
604 +/* 2180 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
605 +/* 21c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
607 +/* 2400 */ P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, /* above */
608 +/* 2440 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
609 +/* 2480 */ P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60,
610 +/* 24c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
611 +/* 2500 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
612 +/* 2540 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
613 +/* 2580 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
614 +/* 25c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
616 +/* 2800 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
617 +/* 2840 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
618 +/* 2880 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
619 +/* 28c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
620 +/* 2900 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
621 +/* 2940 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
622 +/* 2980 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
623 +/* 29c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20,
625 +/* 2c00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, /* rot */
626 +/* 2c40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
627 +/* 2c80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P60, P22, P60, P22, P60, P22, P60,
628 +/* 2cc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
629 +/* 2d00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
630 +/* 2d40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
631 +/* 2d80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
632 +/* 2dc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11,
634 +/* 3000 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
635 +/* 3040 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
636 +/* 3080 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
637 +/* 30c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
638 +/* 3100 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
639 +/* 3140 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
640 +/* 3180 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
641 +/* 31c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
643 +/* 3400 */ P20, P20, P0, P0, P20, P20, P0, P0, P70, P20, P10, P0, P70, P20, P10, P0,
644 +/* 3440 */ P20, P20, P0, P0, P20, P20, P0, P0, P70, P20, P10, P0, P70, P20, P10, P0,
645 +/* 3480 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
646 +/* 34c0 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
647 +/* 3500 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P90, P10, P0, P70, P90, P10, P0,
648 +/* 3540 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P90, P10, P0, P70, P90, P10, P0,
649 +/* 3580 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
650 +/* 35c0 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
652 +/* 3800 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
653 +/* 3840 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
654 +/* 3880 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12,
655 +/* 38c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12,
656 +/* 3900 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
657 +/* 3940 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
658 +/* 3980 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
659 +/* 39c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12,
661 +/* 3c00 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0,
662 +/* 3c40 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0,
663 +/* 3c80 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
664 +/* 3cc0 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
665 +/* 3d00 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P20, P10, P0, P10, P20, P10, P0,
666 +/* 3d40 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P20, P10, P0, P10, P20, P10, P0,
667 +/* 3d80 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
668 +/* 3dc0 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0,
672 +/* Memory usage at 320 pixels width:
675 + ~2.5k pattern buffer
676 + 8k factor table (can be packed into as less as 2k, but unpacking takes more time than we lose due
677 + to cache trashing, at least on a pentium2)
682 +If the diff table is used, another 8k (packed) / 64k (unpacked) are used.
687 +Pixel/Pattern layout:
711 +bits: 3 1 2 2 = 8 bit
713 +Factor set 100 is wrong: 7 0 0.5 0.5 would be 100% like the original, but
714 +since pixel 2 and 4 are visually close for all patterns where this set is
715 +used, this important simplification should not be visible.
720 +0 8 1 3 10 5 6 (a) 9 2 (b) 4 7 (11) (c) (d) -> 1 2 4 5
722 +0 8 10 5 9 2 7 x 1 6 3 4
725 +2 9 1 4 11 7 6 (d) 8 0 (x) 3 5 (10) (y) (a) -> 3 2 6 5
728 +5 10 6 3 8 0 1 (m) 11 7 (c) 4 2 (9) (b) (n) -> 7 8 4 5
731 +7 11 6 4 9 2 1 (n) 10 5 (y) 3 0 (8) (x) (m) -> 9 8 6 5
744 + unsigned short p[2];
745 + unsigned long value;
748 +static unsigned long lines0[Hq2x_MAXWIDTH+2] __attribute__((aligned(32)));
749 +static unsigned long lines1[Hq2x_MAXWIDTH+2] __attribute__((aligned(32)));
750 +static unsigned long lines2[Hq2x_MAXWIDTH+2] __attribute__((aligned(32)));
751 +static unsigned long *l1, *l2, *l3, *tmp;
752 +static union pattern p0[Hq2x_MAXWIDTH+2] __attribute__((aligned(32)));
753 +static union pattern p1[Hq2x_MAXWIDTH+2] __attribute__((aligned(32)));
754 +static union pattern *top, *bot, *ptmp;
755 +static unsigned char prev[Hq2x_MAXWIDTH+2];
758 +#define diffcall bot[i].value = Diff(prev+i,pIn+i); memcpy(prev,pIn,Scaler_SrcWidth+1);
760 +#define diffcall bot[i].value = Diff(l2+i,l3+i);
764 +#define __builtin_ia32_emms()
767 +#define CONSTCHECK if (Scaler_SrcWidth == 320) RENDER_DrawLine =
769 +#define store(out,index,x,y) do{((unsigned long*)out)[index*2] = 0xff000000|(x); ((unsigned long*)out)[index*2+1] = 0xff000000|(y);}while(0)
772 +#define FUNC Hq2x_long_320_line
773 +#define Scaler_SrcWidth 320
775 +#include "render_hq2x_template.h"
777 +#undef Scaler_SrcWidth
780 +#define FUNC Hq2x_long_Scaler_SrcWidth_line
781 +#define CHECK_CONST CONSTCHECK Hq2x_long_320_line;
782 +#include "render_hq2x_template.h"
788 +/* 16 bit support */
789 +#ifdef WORDS_BIGENDIAN
790 +#define store(out,index,y,x) ((unsigned long *)out)[index] = (((((x)>>3)&0x1f)|(((y)<<13))&0x1f0000)|((((x)>>5)&0x7e0)|(((y)<<11))&0x7e00000)|((((x)>>8)&0xf800)|(((y)<<8))&0xf8000000))
792 +#define store(out,index,x,y) ((unsigned long *)out)[index] = (((((x)>>3)&0x1f)|(((y)<<13))&0x1f0000)|((((x)>>5)&0x7e0)|(((y)<<11))&0x7e00000)|((((x)>>8)&0xf800)|(((y)<<8))&0xf8000000))
796 +#define FUNC Hq2x_short_320_line
797 +#define Scaler_SrcWidth 320
799 +#include "render_hq2x_template.h"
801 +#undef Scaler_SrcWidth
804 +#define FUNC Hq2x_short_Scaler_SrcWidth_line
805 +#define CHECK_CONST CONSTCHECK Hq2x_short_320_line;
806 +#include "render_hq2x_template.h"
812 +ScalerBlock Hq2x_8={
813 + CAN_16|CAN_32|LOVE_32|NEED_RGB,
815 + 0,Hq2x_short_Scaler_SrcWidth_line,Hq2x_short_Scaler_SrcWidth_line,Hq2x_long_Scaler_SrcWidth_line
818 +void Hq2x_InitLUTs(const void *pal, int palette_end, int palette_start)
821 + struct GFX_PalEntry *palette = (struct GFX_PalEntry *)pal;
823 + // All componets are reduced to 5 bit (VGA palette has 6 bit)
824 + // for simpler multiplication and storage (divided by 8)
825 + for (i=palette_start; i<=palette_end; i++) {
826 + // 5 significant bits with 3 bit multiplier fit into 8 bit, thus
827 + // plain int multiplication can be used without tricks
828 + // R is duplicated into A, negated and increased by 32 for some
829 + // nice mmx distance calculation tricks
830 + LUTPAL8to32[i] = ((palette[i].r&0xf8) << 13) | ((palette[i].g&0xf8) << 5) | ((palette[i].b&0xf8) >> 3) | ((32*8-(palette[i].r&0xf8)) << 21);
835 + memset(difftable,0,sizeof(difftable));
837 + for (i = 0; i < 256; i++) {
838 + for (j = 0; j < 256; j++) {
839 + difftable[(i) * (256/bits) + ((j) / bits)]
845 + Diff1_calc((LUTPAL8to32[i]>>16)&0x1f,(LUTPAL8to32[i]>>8)&0x1f,(LUTPAL8to32[i])&0x1f, (LUTPAL8to32[j]>>16)&0x1f,(LUTPAL8to32[j]>>8)&0x1f,(LUTPAL8to32[j])&0x1f) << (j%bits);
851 + *((short *)(&mmx_trigger)) = Hq2x_colourTrigger;
852 + *(((short *)(&mmx_trigger))+1) = Hq2x_colourTrigger;
853 + *(((short *)(&mmx_trigger))+2) = Hq2x_colourTrigger;
854 + *(((short *)(&mmx_trigger))+3) = Hq2x_colourTrigger;
858 +void Hq2x_IncreaseThreshold(void)
860 + if (Hq2x_colourTrigger < 255) Hq2x_colourTrigger++;
861 + Hq2x_InitLUTs(0,0,1);
862 + LOG_MSG("Hq2x threshold at %i",Hq2x_colourTrigger);
865 +void Hq2x_DecreaseThreshold(void)
867 + if (Hq2x_colourTrigger > 0) Hq2x_colourTrigger--;
868 + Hq2x_InitLUTs(0,0,1);
869 + LOG_MSG("Hq2x threshold at %i",Hq2x_colourTrigger);
872 +void Hq2x_IncreaseThresholdAdaptive(void)
874 + if (Hq2x_colourTrigger_adaptive < 100) Hq2x_colourTrigger_adaptive++;
875 + LOG_MSG("Hq2x adaptive threshold at %i",Hq2x_colourTrigger_adaptive);
878 +void Hq2x_DecreaseThresholdAdaptive(void)
880 + if (Hq2x_colourTrigger_adaptive > 0) Hq2x_colourTrigger_adaptive--;
881 + LOG_MSG("Hq2x adaptive threshold at %i",Hq2x_colourTrigger_adaptive);
884 diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_hq2x.h dosbox-0.61+hq2x/src/gui/render_hq2x.h
885 --- dosbox-0.61/src/gui/render_hq2x.h 1970-01-01 01:00:00.000000000 +0100
886 +++ dosbox-0.61+hq2x/src/gui/render_hq2x.h 2004-08-02 20:34:46.000000000 +0200
888 +//derived from the hq2x filter demo program
889 +//----------------------------------------------------------
890 +//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
891 +// Speed optimization and mmx code Copyright (c) 2004 Jörg Walter (jwalt@garni.ch)
893 +//This program is free software; you can redistribute it and/or
894 +//modify it under the terms of the GNU Lesser General Public
895 +//License as published by the Free Software Foundation; either
896 +//version 2.1 of the License, or (at your option) any later version.
898 +//This program is distributed in the hope that it will be useful,
899 +//but WITHOUT ANY WARRANTY; without even the implied warranty of
900 +//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
901 +//Lesser General Public License for more details.
903 +//You should have received a copy of the GNU Lesser General Public
904 +//License along with this program; if not, write to the Free Software
905 +//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
910 +#define Hq2x_MAXWIDTH 640-2
911 +extern long Hq2x_colourTrigger;
912 +extern long Hq2x_colourTrigger_adaptive;
913 +extern void Hq2x_InitLUTs(const void *palette, int palette_end, int palette_start);
914 +extern void Hq2x_IncreaseThreshold(void);
915 +extern void Hq2x_DecreaseThreshold(void);
916 +extern void Hq2x_IncreaseThresholdAdaptive(void);
917 +extern void Hq2x_DecreaseThresholdAdaptive(void);
919 diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_hq2x_template.h dosbox-0.61+hq2x/src/gui/render_hq2x_template.h
920 --- dosbox-0.61/src/gui/render_hq2x_template.h 1970-01-01 01:00:00.000000000 +0100
921 +++ dosbox-0.61+hq2x/src/gui/render_hq2x_template.h 2004-08-02 17:27:01.000000000 +0200
923 +static void FUNC(const unsigned char *pIn)
926 + unsigned int factor, value1, value2, linesa = (*Scaler_Index++)+1, linesb = linesa/2;
930 + if (__builtin_expect(Scaler_Line++==0,0)) {
934 + Scaler_DstWrite -= 2*sizeof(type);
938 + for (i=0; i <= Scaler_SrcWidth+1; i++) l2[i] = 0x20000000;
939 + l3[0] = 0x20000000;
940 + l3[1] = LUTPAL8to32[pIn[1]];
942 + for (i=2; i<=Scaler_SrcWidth+1; i++)
943 + l3[i] = LUTPAL8to32[pIn[i]];
947 + memcpy(prev,pIn,Scaler_SrcWidth+1);
948 + for (i=1; i <= Scaler_SrcWidth; i++) diffcall
952 + tmp = l1; l1 = l2; l2 = l3; l3 = tmp;
953 + ptmp = top; top = bot; bot = ptmp;
954 + bot[0].value = 0x07ff07ff;
956 + l3[0] = 0x20000000;
957 + l3[1] = LUTPAL8to32[pIn[1]];
959 + for (i=2; i<=Scaler_SrcWidth+1; i++)
960 + l3[i] = LUTPAL8to32[pIn[i]];
962 + for (i=1; i<=Scaler_SrcWidth; i++) diffcall
965 + for (i=1; i<=Scaler_SrcWidth; i++) {
966 + factor = (top[i-1].p[0]&0x503)|(bot[i-1].p[0]&0x20c)|(top[i].p[0]&0x830)|(bot[i].p[0]&0x040);
967 + value1 = (l1[i-1]*factors[factor][1]+l1[i]*factors[factor][2]+l2[i-1]*factors[factor][3]+l2[i]*factors[factor][0]);
969 + factor = (top[i-1].p[1]&0x930)|(bot[i-1].p[1]&0x240)|(top[i].p[1]&0x403)|(bot[i].p[1]&0x00c);
970 + value2 = (l1[i+1]*factors[factor][1]+l1[i]*factors[factor][2]+l2[i+1]*factors[factor][3]+l2[i]*factors[factor][0]);
971 + store(Scaler_DstWrite,i,value1,value2);
974 + memcpy(Scaler_DstWrite+Scaler_DstPitch,Scaler_DstWrite,Scaler_DstPitch);
975 + Scaler_DstWrite += Scaler_DstPitch;
977 + Scaler_DstWrite += Scaler_DstPitch;
981 + for (i=1; i <= Scaler_SrcWidth; i++) {
982 + factor = (top[i-1].p[1]&0x60c)|(bot[i-1].p[1]&0x103)|(top[i].p[1]&0x840)|(bot[i].p[1]&0x030);
983 + value1 = (l3[i-1]*factors[factor][1]+l3[i]*factors[factor][2]+l2[i-1]*factors[factor][3]+l2[i]*factors[factor][0]);
985 + factor = (top[i-1].p[0]&0xa40)|(bot[i-1].p[0]&0x130)|(top[i].p[0]&0x40c)|(bot[i].p[0]&0x003);
986 + value2 = (l3[i+1]*factors[factor][1]+l3[i]*factors[factor][2]+l2[i+1]*factors[factor][3]+l2[i]*factors[factor][0]);
987 + store(Scaler_DstWrite,i,value1,value2);
990 + memcpy(Scaler_DstWrite+Scaler_DstPitch,Scaler_DstWrite,Scaler_DstPitch);
991 + Scaler_DstWrite += Scaler_DstPitch;
993 + Scaler_DstWrite += Scaler_DstPitch;
996 + if (__builtin_expect(Scaler_Line==Scaler_SrcHeight,0)) {
998 + __builtin_ia32_emms();
1001 diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_scalers.h dosbox-0.61+hq2x/src/gui/render_scalers.h
1002 --- dosbox-0.61/src/gui/render_scalers.h 2004-06-10 09:18:19.000000000 +0200
1003 +++ dosbox-0.61+hq2x/src/gui/render_scalers.h 2004-07-04 23:29:49.000000000 +0200
1011 struct ScalerBlock {
1013 extern ScalerBlock AdvInterp2x_8;
1014 extern ScalerBlock Interp2x_8;
1015 extern ScalerBlock TV2x_8;
1016 +extern ScalerBlock Hq2x_8;
1020 diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/hardware/ymf262.c dosbox-0.61+hq2x/src/hardware/ymf262.c
1021 --- dosbox-0.61/src/hardware/ymf262.c 2004-03-28 15:04:45.000000000 +0200
1022 +++ dosbox-0.61+hq2x/src/hardware/ymf262.c 2004-06-20 03:54:47.000000000 +0200
1023 @@ -844,23 +844,52 @@
1024 INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm, unsigned int wave_tab)
1027 + int pos = (((signed int)((phase & ~FREQ_MASK) + (pm<<16))) >> FREQ_SH );
1029 + if ((wave_tab == 1*SIN_LEN) && (pos & (SIN_LEN>>1))) pos = 0;
1030 + if ((wave_tab == 3*SIN_LEN) && (pos & (SIN_LEN>>2))) pos = 0;
1031 + if (wave_tab == 2*SIN_LEN || wave_tab == 3*SIN_LEN) pos &= SIN_MASK>>1;
1032 + if (wave_tab == 4*SIN_LEN || wave_tab == 5*SIN_LEN) {
1033 + if (wave_tab == 5*SIN_LEN) pos &= SIN_MASK>>1;
1035 + if (pos & (SIN_LEN>>1)) pos = 0;
1037 + if (wave_tab != 6*SIN_LEN && wave_tab != 7*SIN_LEN) wave_tab = 0;
1039 + p = (env<<4) + sin_tab[wave_tab + (pos & SIN_MASK)];
1041 - p = (env<<4) + sin_tab[wave_tab + ((((signed int)((phase & ~FREQ_MASK) + (pm<<16))) >> FREQ_SH ) & SIN_MASK) ];
1043 - if (p >= TL_TAB_LEN)
1046 + return tl_tab[p&(TL_TAB_LEN/13-1)] >> (p/(TL_TAB_LEN/13));
1048 + if (p > TL_TAB_LEN) return 0;
1053 INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm, unsigned int wave_tab)
1056 + int pos = (((signed int)((phase & ~FREQ_MASK) + pm)) >> FREQ_SH );
1058 + if ((wave_tab == 1*SIN_LEN) && (pos & (SIN_LEN>>1))) pos = 0;
1059 + if ((wave_tab == 3*SIN_LEN) && (pos & (SIN_LEN>>2))) pos = 0;
1060 + if (wave_tab == 2*SIN_LEN || wave_tab == 3*SIN_LEN) pos &= SIN_MASK>>1;
1061 + if (wave_tab == 4*SIN_LEN || wave_tab == 5*SIN_LEN) {
1062 + if (wave_tab == 5*SIN_LEN) pos &= SIN_MASK>>1;
1064 + if (pos & (SIN_LEN>>1)) pos = 0;
1066 + if (wave_tab != 6*SIN_LEN && wave_tab != 7*SIN_LEN) wave_tab = 0;
1069 - p = (env<<4) + sin_tab[wave_tab + ((((signed int)((phase & ~FREQ_MASK) + pm))>>FREQ_SH) & SIN_MASK)];
1070 + p = (env<<4) + sin_tab[wave_tab + (pos & SIN_MASK)];
1072 - if (p >= TL_TAB_LEN)
1075 + return tl_tab[p&(TL_TAB_LEN/13-1)] >> (p/(TL_TAB_LEN/13));
1077 + if (p > TL_TAB_LEN) return 0;
1083 diff -ruN src./dosbox.cpp src/dosbox.cpp
1084 --- dupa/src./dosbox.cpp 2004-09-30 15:15:59.000000000 +0200
1085 +++ dupa/src/dosbox.cpp 2004-09-30 15:18:48.301932384 +0200
1086 @@ -231,11 +231,17 @@
1087 secprop->Add_int("frameskip",0);
1088 secprop->Add_bool("aspect",false);
1089 secprop->Add_string("scaler","normal2x");
1090 + secprop->Add_int("hq2x_threshold_adaptive",75);
1091 + secprop->Add_int("hq2x_threshold",0);
1092 MSG_Add("RENDER_CONFIGFILE_HELP",
1093 "frameskip -- How many frames dosbox skips before drawing one.\n"
1094 "aspect -- Do aspect correction.\n"
1095 "scaler -- Scaler used to enlarge/enhance low resolution modes.\n"
1096 - " Supported are none,normal2x,advmame2x,advmame3x,advinterp2x,interp2x,tv2x.\n"
1097 + " Supported are none,normal2x,advmame2x,advmame3x,advinterp2x,interp2x,tv2x,hq2x.\n"
1098 + "hq2x_threshold_adaptive -- The adaptive threshold used to detect edges in hq2x\n"
1099 + " Possible values are 0-100, can be modified with Ctrl+Alt+F5/F6\n"
1100 + "hq2x_threshold -- The static threshold used to detect edges in hq2x\n"
1101 + " Possible values are 0-255, can be modified with Ctrl+Alt+F3/F4\n"
1104 secprop=control->AddSection_prop("cpu",&CPU_Init);