From 456fb30250fc8ea3f5befdbaa47820025c1517df Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jan=20R=C4=99korajski?= Date: Fri, 21 Apr 2006 23:40:32 +0000 Subject: [PATCH] - orphaned, outdated Changed files: dosbox-hq2x.patch -> 1.2 dosbox_coreswitch.patch -> 1.2 --- dosbox-hq2x.patch | 1104 --------------------------------------- dosbox_coreswitch.patch | 332 ------------ 2 files changed, 1436 deletions(-) delete mode 100644 dosbox-hq2x.patch delete mode 100644 dosbox_coreswitch.patch diff --git a/dosbox-hq2x.patch b/dosbox-hq2x.patch deleted file mode 100644 index 2b0e2ad..0000000 --- a/dosbox-hq2x.patch +++ /dev/null @@ -1,1104 +0,0 @@ -diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/Makefile.am dosbox-0.61+hq2x/src/gui/Makefile.am ---- dosbox-0.61/src/gui/Makefile.am 2004-07-05 02:44:22.000000000 +0200 -+++ dosbox-0.61+hq2x/src/gui/Makefile.am 2004-07-04 23:25:07.000000000 +0200 -@@ -3,5 +3,6 @@ - noinst_LIBRARIES = libgui.a - libgui_a_SOURCES = sdlmain.cpp sdl_mapper.cpp \ - render.cpp render_scalers.cpp render_scalers.h render_templates.h \ -- midi.cpp midi_win32.h midi_oss.h midi_coreaudio.h midi_alsa.h -+ midi.cpp midi_win32.h midi_oss.h midi_coreaudio.h midi_alsa.h \ -+ render_hq2x.cpp render_hq2x.h - -diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render.cpp dosbox-0.61+hq2x/src/gui/render.cpp ---- dosbox-0.61/src/gui/render.cpp 2004-08-05 00:12:58.732847304 +0200 -+++ dosbox-0.61+hq2x/src/gui/render.cpp 2004-08-04 23:50:12.000000000 +0200 -@@ -33,6 +33,7 @@ - #include "support.h" - - #include "render_scalers.h" -+#include "render_hq2x.h" - - struct PalData { - struct { -@@ -190,6 +191,9 @@ - } - break; - } -+ if (render.op.type == OP_Hq2x) { -+ Hq2x_InitLUTs((void*)render.pal.rgb,render.pal.last,render.pal.first); -+ } - /* Setup pal index to startup values */ - render.pal.first=256; - render.pal.last=0; -@@ -314,6 +318,7 @@ - case OP_Interp2x:block=&Interp2x_8;break; - case OP_AdvInterp2x:block=&AdvInterp2x_8;break; - case OP_TV2x:block=&TV2x_8;break; -+ case OP_Hq2x:block=&Hq2x_8;break; - } - gfx_flags=GFX_GetBestMode(block->flags); - if (!gfx_flags) { -@@ -362,7 +367,7 @@ - - extern void GFX_SetTitle(Bits cycles, Bits frameskip,bool paused); - static void IncreaseFrameSkip(void) { -- if (render.frameskip.max<10) render.frameskip.max++; -+ if (render.frameskip.max<25) render.frameskip.max++; - LOG_MSG("Frame Skip at %d",render.frameskip.max); - GFX_SetTitle(-1,render.frameskip.max,false); - } -@@ -376,6 +381,12 @@ - void RENDER_Init(Section * sec) { - Section_prop * section=static_cast(sec); - -+ Hq2x_colourTrigger=section->Get_int("hq2x_threshold"); -+ if (Hq2x_colourTrigger > 255) Hq2x_colourTrigger = 255; -+ if (Hq2x_colourTrigger < 0) Hq2x_colourTrigger = 0; -+ Hq2x_colourTrigger_adaptive=section->Get_int("hq2x_threshold_adaptive"); -+ if (Hq2x_colourTrigger_adaptive > 255) Hq2x_colourTrigger_adaptive = 255; -+ if (Hq2x_colourTrigger_adaptive <= 0) Hq2x_colourTrigger_adaptive = 75; - render.pal.first=256; - render.pal.last=0; - render.aspect=section->Get_bool("aspect"); -@@ -398,12 +409,17 @@ - else if (!strcasecmp(scaler,"advinterp2x")) render.op.want_type=OP_AdvInterp2x; - else if (!strcasecmp(scaler,"interp2x")) render.op.want_type=OP_Interp2x; - else if (!strcasecmp(scaler,"tv2x")) render.op.want_type=OP_TV2x; -+ else if (!strcasecmp(scaler,"hq2x")) render.op.want_type=OP_Hq2x; - else { - render.op.want_type=OP_Normal; - LOG_MSG("Illegal scaler type %s,falling back to normal.",scaler); - } - MAPPER_AddHandler(DecreaseFrameSkip,MK_f7,MMOD1,"decfskip","Dec Fskip"); - MAPPER_AddHandler(IncreaseFrameSkip,MK_f8,MMOD1,"incfskip","Inc Fskip"); -+ MAPPER_AddHandler(Hq2x_DecreaseThreshold,MK_f3,MMOD1|MMOD2,"dechq2xthreshold","Dec Hq2x Static Threshold"); -+ MAPPER_AddHandler(Hq2x_IncreaseThreshold,MK_f4,MMOD1|MMOD2,"inchq2xthreshold","Inc Hq2x Static Threshold"); -+ MAPPER_AddHandler(Hq2x_DecreaseThresholdAdaptive,MK_f5,MMOD1|MMOD2,"dechq2xadapthreshold","Dec Hq2x Adaptive Threshold"); -+ MAPPER_AddHandler(Hq2x_IncreaseThresholdAdaptive,MK_f6,MMOD1|MMOD2,"inchq2xadapthreshold","Inc Hq2x Adaptive Threshold"); - GFX_SetTitle(-1,render.frameskip.max,false); - } - -diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_hq2x.cpp dosbox-0.61+hq2x/src/gui/render_hq2x.cpp ---- dosbox-0.61/src/gui/render_hq2x.cpp 1970-01-01 01:00:00.000000000 +0100 -+++ dosbox-0.61+hq2x/src/gui/render_hq2x.cpp 2004-08-04 23:43:53.000000000 +0200 -@@ -0,0 +1,799 @@ -+//hq2x filter demo program -+//---------------------------------------------------------- -+//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) -+// Speed optimization and mmx code Copyright (c) 2004 Jörg Walter (jwalt@garni.ch) -+ -+//This program is free software; you can redistribute it and/or -+//modify it under the terms of the GNU Lesser General Public -+//License as published by the Free Software Foundation; either -+//version 2.1 of the License, or (at your option) any later version. -+// -+//This program is distributed in the hope that it will be useful, -+//but WITHOUT ANY WARRANTY; without even the implied warranty of -+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+//Lesser General Public License for more details. -+// -+//You should have received a copy of the GNU Lesser General Public -+//License along with this program; if not, write to the Free Software -+//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+/* -+ This code comes in three variants: -+ 1. plain C code with live difference calculation -+ 2. C code with live difference calculation in MMX -+ 3. lookup-table based difference calculation -+ -+ Which one is fastest depends on your CPU speed and cache size. The table based algorithm -+ should be fastest if you have 32kb L1 data cache or more. Packing diff values into less -+ bytes is possible, define DIFF_TABLE to the number of bits per int. -+ -+ Speed: 22fps/27fps(MMX)/32fps(table) on a pentium2/333MHz -+ -+ TODO: -+ Currently only does 32bpp/16bpp BGRA output, and (theoretically) RGB output. YUV -+ isn't needed anymore, it seems, so this code should now work in all setups. -+ MMX code only does RGB, thus isn't really usable (but useful for benchmarking). -+ This code should use the intel compiler functions for mmx, as GCC emulates -+ them more or less completely. -+ -+ further optimization ideas: -+ - fix gcc bugs (shift), so Diff_mmx can run without register spilling -+ - manual unrolling of Diff loop to get decent memory prefetch for -+ recent CPUs -+ - add mmxext support to Diff for faster unpacking -+ - test if sse's movntq in interpolation loop improves things -+ - find a way to mmxify the interpolation loop sensibly -+ (currently runs slower than non-mmx code) -+ - find a way to save (cache-)memory in the factors table -+ (tighter packing and double indirection are both slower on p2) -+ - find a way for 16bpp not to suck that hard (speed-wise) -+*/ -+ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "config.h" -+#include "dosbox.h" -+#include "video.h" -+#include "render_scalers.h" -+#include "render_hq2x.h" -+ -+#ifndef __GCC__ -+#define __attribute__(x) -+#define __builtin_expect(x,y) x -+#endif -+ -+#define ADAPTIVE -+/* #define DEBUG */ -+// #define DIFF_TABLE 32 -+#define DIFF_TABLE 1 -+ -+// Gathered experimentally, values from 0x08-0x80 are useful, depending on graphics -+// and your personal preference. -+long Hq2x_colourTrigger = 0; -+long Hq2x_colourTrigger_adaptive = 75; -+ -+#ifdef ADAPTIVE -+#undef DIFF_TABLE -+#define DIFF_TABLE 1 -+#endif -+ -+#ifdef DIFF_TABLE -+#define bits DIFF_TABLE -+static -+#if DIFF_TABLE == 1 -+unsigned char -+#else -+int -+#endif -+difftable[65536/bits]; -+ -+inline static unsigned int Diff1_calc(int r1, int g1, int b1, int r2, int g2, int b2) -+{ -+ long r,g,b; -+ long rmean; -+ long ret; -+ -+ rmean = r1+r2; -+ b = b1-b2; -+ g = g1-g2; -+ r = r1-r2; -+ -+#ifdef ADAPTIVE -+ ret = (unsigned int)(((128+rmean)*r*r + (192-rmean)*b*b)/256 + g*g); -+ return (ret < Hq2x_colourTrigger?0:ret-Hq2x_colourTrigger > 255?255:ret-Hq2x_colourTrigger); -+#else -+ return ((128+rmean)*r*r + (192-rmean)*b*b)/256 + g*g > Hq2x_colourTrigger; -+#endif -+} -+ -+#if DIFF_TABLE == 1 -+#define Diff1(x,y) (difftable[(*(x)) * (256/bits) + ((*(y)) / bits)] >> ((*(y))%bits)) -+#else -+#define Diff1(x,y) ((difftable[(*(x)) * (256/bits) + ((*(y)) / bits)] >> ((*(y))%bits)) & 1) -+#endif -+inline static int Diff(const unsigned char *l2, const unsigned char *l3) -+{ -+#ifdef ADAPTIVE -+ unsigned int max = Diff1(l2,l3+1); -+ unsigned int min = max; -+ unsigned int dynthres = Diff1(l2+1,l3); -+ if (dynthres > max) max = dynthres; -+ if (dynthres < min) min = dynthres; -+ dynthres = Diff1(l2+1,l3+1); -+ if (dynthres > max) max = dynthres; -+ if (dynthres < min) min = dynthres; -+ dynthres = Diff1(l3,l3+1); -+ if (dynthres > max) max = dynthres; -+ if (dynthres < min) min = dynthres; -+ dynthres = (Hq2x_colourTrigger_adaptive*max+(100-Hq2x_colourTrigger_adaptive)*min)/200; -+ -+ return ((Diff1(l2,l3+1)>dynthres)*0x00aa0055) | ((Diff1(l2+1,l3)>dynthres)*0x005500aa) | ((Diff1(l2+1,l3+1)>dynthres)*0x03000300) | ((Diff1(l3,l3+1)>dynthres)*0x0c000c00); -+#else -+ return (Diff1(l2,l3+1)*0x00aa0055) | (Diff1(l2+1,l3)*0x005500aa) | (Diff1(l2+1,l3+1)*0x03000300) | (Diff1(l3,l3+1)*0x0c000c00); -+#endif -+} -+#undef __MMX__ -+#else -+#ifdef __MMX__ -+/* always on for gcc for now */ -+#define MMX_ONLY -+/* this is safe for -march=..., but not if someone specifies -mmmx manually */ -+ -+# ifdef MMX_ONLY -+# define Diff_mmx Diff -+# else -+ int has_mmx = 0; -+# define Diff(a,b) (has_mmx?Diff_mmx((a),(b)):Diff_any((a),(b))) -+# endif -+#else -+# define Diff_any Diff -+#endif -+ -+// A better colour distance function, adapted from http://www.compuphase.com/cmetric.htm -+#if !defined(MMX_ONLY) || defined(DEBUG) -+ -+inline static int Diff1(const unsigned char *e1, const unsigned char* e2) -+{ -+ long r,g,b; -+ long rmean; -+ -+ rmean = e1[0]+e2[0]; -+ b = e1[0]-e2[0]; -+ g = e1[1]-e2[1]; -+ r = e1[2]-e2[2]; -+ -+ return ((128+rmean)*r*r + (192-rmean)*b*b)/256 + g*g > Hq2x_colourTrigger; -+} -+ -+inline static int Diff_any(const unsigned long *l2, const unsigned long *l3) -+{ -+ return (Diff1((unsigned char *)l2,(unsigned char *)(l3+1))*0x00aa0055) | (Diff1((unsigned char *)(l2+1),(unsigned char *)l3)*0x005500aa) | (Diff1((unsigned char *)(l2+1),(unsigned char *)(l3+1))*0x03000300) | (Diff1((unsigned char *)l3,(unsigned char *)(l3+1))*0x0c000c00); -+} -+#endif -+ -+#ifdef __MMX__ -+typedef int mmx_1_64 __attribute__((mode(DI))); -+typedef int mmx_2_32 __attribute__((mode(V2SI))); -+typedef int mmx_4_16 __attribute__((mode(V4HI))); -+typedef int mmx_8_8 __attribute__((mode(V8QI))); -+ -+static mmx_4_16 mmx_trigger; -+ -+/* Note: this needs BGRA pixel layout, with the A component replaced by (-R)+32 */ -+inline static int Diff_mmx(const unsigned long *e1, const unsigned long *e2) -+{ -+ mmx_4_16 mm0, mm1, mm2, mm3, mm4; -+#ifdef DEBUG -+ mmx_4_16 t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18, m0, m1; -+ #define d(x) x = -+#else -+#define d(x) -+#endif -+ const mmx_8_8 zero = (mmx_8_8)0x0ULL; -+ const mmx_4_16 rmean_off = (mmx_4_16)(0x0a000a000a000a00ULL); -+ const mmx_4_16 factors = (mmx_4_16)0xfffdfff4aa5655abULL; -+ -+ /* -1 * aa56 = 0101 0101 1010 1010 */ -+ /* -1 * 55ab = 1010 1010 0101 0101 */ -+ -+ /* -+ Read from memory: -+ */ -+ mm1 = *(mmx_4_16 *)e2; -+ mm0 = *(mmx_4_16 *)e1; -+ -+ /* (high ................................ low) -+ -p2r+32, p2b, p2g, p2r, -p1r+32, p1b, p1g, p1r = mm0 -+ -p5r+32, p5b, p5g, p5r, -p4r+32, p4b, p4g, p4r = mm1 -+ -+ Shuffle dwords so we get 4 registers with pixel -+ arrangement ready for difference calculation: -+ -+ (2, 5, 4, 1) - (5, 4, 2, 5) -+ -+ We choose (rrrr, gggg) + (bbbb, rrrr) layout. This -+ is quite expensive, given that difference calculation -+ in (rgbr, rgbr) form would need just two unpacks, but -+ the unpacking has to be done sooner or later, and -+ this pixel layout makes later calculations cheaper. -+ TODO: sse/mmxext version of this unpacking should be -+ much cheaper. -+ -+ -p4r+32, -p1r+32, p4b, p1b, p4g, p1g, p4r, p1r = mm0 -+ -p5r+32, p5b, p5g, p5r, -p5r+32, p5b, p5g, p5r = mm3 (temp) -+ -p2r+32, -p5r+32, p2b, p5b, p2g, p5g, p2r, p5r = mm2 -+ -p5r+32, -p4r+32, p5b, p4b, p5g, p4g, p5r, p4r = mm1 -+*/ -+ -+ mm3 = (mmx_4_16)__builtin_ia32_punpckhdq((mmx_2_32)mm1,(mmx_2_32)mm1); -+ mm2 = (mmx_4_16)__builtin_ia32_punpckhbw((mmx_8_8)mm1,(mmx_8_8)mm0); -+ mm0 = (mmx_4_16)__builtin_ia32_punpcklbw((mmx_8_8)mm0,(mmx_8_8)mm1); -+ mm1 = (mmx_4_16)__builtin_ia32_punpcklbw((mmx_8_8)mm1,(mmx_8_8)mm3); -+ -+/* -+ ... continued ... -+ -+ -p2r+32, -p5r+32, -p4r+32, -p1r+32, p2b, p5b, p4b, p1b = mm0 -+ p2g, p5g, p4g, p1g, p2r, p5r, p4r, p1r = mm3 -+ -+ -p5r+32, -p4r+32, -p2r+32, -p5r+32, p5b, p4b, p2b, p5b = mm4 -+ p5g, p4g, p2g, p5g, p5r, p4r, p2r, p5r = mm2 -+ -+*/ -+ -+ mm3 = (mmx_4_16)__builtin_ia32_punpcklwd(mm0,mm2); -+ mm0 = (mmx_4_16)__builtin_ia32_punpckhwd(mm0,mm2); -+ mm4 = (mmx_4_16)__builtin_ia32_punpckhwd(mm2,mm1); -+ mm2 = (mmx_4_16)__builtin_ia32_punpcklwd(mm2,mm1); -+ -+ /* -+ Put mm2 with negated red component into mm1. Negation is done -+ in the lookup table. -+ -+ -p2r+32, -p5r+32, -p4r+32, -p1r+32, p2b, p5b, p4b, p1b = mm0 -+ p2g, p5g, p4g, p1g, p2r, p5r, p4r, p1r = mm3 -+ -+ -p5r+32, -p4r+32, -p2r+32, -p5r+32, p5b, p4b, p2b, p5b = mm4 -+ p5g, p4g, p2g, p5g, -p5r+32, -p4r+32, -p2r+32, -p5r+32 = mm2 -+ -+ */ -+ -+ mm2 = (mmx_4_16)__builtin_ia32_punpckhdq((mmx_2_32)mm4,(mmx_2_32)mm2); -+ -+ /* -+ -+ Calculate the differences (and rmean) -+ mm0-mm4, mm3-mm2 (signed saturation) -+ -+ d1r/8, d3r/8, d8r/8, d0r/8, d1b/8, d3b/8, d8b/8, d0b/8 = mm0 -+ d1g/8, d3g/8, d8g/8, d0g/8, d1rmean/4-32, d3rmean/4-32, d8rmean/4-32, d0rmean/4-32 = mm3 -+ -+ */ -+ -+ mm0 = (mmx_4_16)__builtin_ia32_psubsb((mmx_8_8)mm0,(mmx_8_8)mm4); -+ mm3 = (mmx_4_16)__builtin_ia32_psubsb((mmx_8_8)mm3,(mmx_8_8)mm2); -+ -+#ifdef DEBUG -+{ -+ int p1 = e1[0]; -+ int p5 = e2[1]; -+ char *cmm0 = (void*)&mm0, *cmm3 = (void*)&mm3; -+ if (((int)cmm3[0]) != ((int)(p1&Rmask) + (int)(p5&Rmask) - 32)) abort(); -+ if (((int)cmm0[4]) != -((int)(p1&Rmask) - (int)(p5&Rmask))) abort(); -+} -+#endif -+ -+ /* Intermediate stats: -+ -+ (rough) code equivalent: -+ rmean = (((int)(e1&Rmask) + (int)(e2&Rmask)) >> 16) - 32; -+ r = ((int)(e2&Rmask) - (int)(e1&Rmask)) >> 16; -+ g = ((int)(e1&Gmask) - (int)(e2&Gmask)) >> 8; -+ b = ((int)(e1&Bmask) - (int)(e2&Bmask)); -+ -+ Gain: -+ 1 distance w/o mmx = 16 ops -+ 4 distances w/ mmx = 11 ops -+ (possible parallelism left to the compiler) -+ -+ Todo: -+ ((160+rmean)*r/8*r/8) + 256*g/8*g/8 + ((160-rmean)*b/8*b/8) -+ -+ (slightly incorrect: the result is the true difference plus (b/8)^2, but -+ this eliminates a constant, making the algorithm fit into the available -+ 8 registers) -+ -+ d1r/8, d3r/8, d8r/8, d0r/8, d1b/8, d3b/8, d8b/8, d0b/8 = mm0 -+ d1g/8, d3g/8, d8g/8, d0g/8, d1rmean/4-32, d3rmean/4-32, d8rmean/4-32, d0rmean/4-32 = mm3 -+ */ -+ -+ /* -+ prepare differences for final calculation: -+ -+ 00 d1r/2 00 d3r/2 00 d8r/2 00 d0r/2 = mm0 -+ 00 d1b/2 00 d3b/2 00 d8b/2 00 d0b/2 = mm1 -+ 00 d1g/8 00 d3g/8 00 d8g/8 00 d0g/8 = mm2 -+ 00 (d1rmean/4-32)*16 00 (d3rmean/4-32)*16 00 (d8rmean/4-32)*16 00 (d0rmean/4-32)*16 = mm3 -+ -+ */ -+ -+#ifdef DEBUG -+ m0 = mm0; -+ m1 = mm3; -+#endif -+ // TODO: compiler error at __builtin_ia32_psllb(mm0,2); -+ d(t0) mm0 = __builtin_ia32_pmullw(mm0,(mmx_4_16)(0x0004000400040004ULL)); -+ d(t1) mm1 = (mmx_4_16)__builtin_ia32_punpcklbw(zero, (mmx_8_8)mm0); -+ d(t2) mm0 = (mmx_4_16)__builtin_ia32_punpckhbw(zero, (mmx_8_8)mm0); -+ d(t3) mm2 = (mmx_4_16)__builtin_ia32_punpckhbw(zero, (mmx_8_8)mm3); -+ d(t4) mm3 = (mmx_4_16)__builtin_ia32_punpcklbw(zero, (mmx_8_8)mm3); -+ // TODO: compiler error at __builtin_ia32_psraw(mm3,4); -+ d(t5) mm3 = __builtin_ia32_pmulhw(mm3,(mmx_4_16)(0x1000100010001000ULL)); -+ -+ /* -+ intermediate results: squares and rmean factors -+ -+ 00 (d1r/2)^2 00 (d3r/2)^2 00 (d8r/2)^2 00 (d0r/2)^2 = mm0 -+ 00 (d1b/2)^2 00 (d3b/2)^2 00 (d8b/2)^2 00 (d0b/2)^2 = mm1 -+ 00 256*(d1g/8)^2 00 256*(d3g/8)^2 00 256*(d8g/8)^2 00 256*(d0g/8)^2 = mm2 -+ 00 128+d1rmean/4 00 128+d3rmean/4 00 128+d8rmean/4 00 128+d0rmean/4 = mm3 -+ 00 192-d1rmean/4 00 192-d3rmean/4 00 192-d8rmean/4 00 192-d0rmean/4 = mm4 -+ -+ */ -+ -+ d(t9) mm0 = __builtin_ia32_pmulhw(mm0,mm0); -+ d(t10) mm1 = __builtin_ia32_pmulhw(mm1,mm1); -+ d(t11) mm2 = __builtin_ia32_pmulhw(mm2,mm2); -+ d(t12) mm4 = __builtin_ia32_psubsw(rmean_off,mm3); -+ d(t13) mm3 = __builtin_ia32_paddsw(mm3,rmean_off); -+ -+ /* -+ intermediate results: finish red and blue components -+ -+ 00 (128+d1rmean/4)*(d1r/8)^2 00 (128+d3rmean/4)*(d3r/8)^2 00 (128+d8rmean/4)*(d8r/8)^2 00 (128+d0rmean/4)*(d0r/8)^2 = mm0 -+ 00 (192-d1rmean/4)*(d1b/8)^2 00 (192-d3rmean/4)*(d3b/8)^2 00 (192-d8rmean/4)*(d8b/8)^2 00 (192-d0rmean/4)*(d0b/8)^2 = mm1 -+ 00 (d1g/8)^2 00 (d3g/8)^2 00 (d8g/8)^2 00 (d0g/8)^2 = mm2 -+ -+ */ -+ -+ d(t15) mm1 = __builtin_ia32_pmulhw(mm1, mm4); -+ d(t16) mm0 = __builtin_ia32_pmulhw(mm0, mm3); -+ -+ /* -+ calculate final visual difference -+ -+ (128+rmean/4)*(r/8)^2+(192-rmean/4)*(b/8)^2+256*(g/8)^2 = mm0 (order: 1 3 8 0) -+ */ -+ -+ d(t17) mm0 = __builtin_ia32_paddw(mm0,mm1); -+ d(t18) mm0 = __builtin_ia32_paddw(mm0,mm2); -+ -+#ifdef DEBUG -+{ -+ int p1 = e1[0]; -+ int p5 = e2[1]; -+ short *smm0 = (void*)&mm0; -+ long r,g,b; -+ long rmean, diff; -+ rmean = (((p1+p5)&Rmask)-32); -+ r = (p1&Rmask)-(p5&Rmask); -+ g = ((p1&Gmask) - (p5&Gmask)) >> 8; -+ b = ((p1&Bmask) - (p5&Bmask)) >> 16; -+ -+ diff = ((160+rmean)*r*r + 256*g*g + (160-rmean)*b*b)/256; -+ if (diff > smm0[0]+1 || diff < smm0[0]-1) abort(); -+} -+#endif -+ -+ /* -+ Code equivalent: -+ ((((512+rmean)>>8)*r*r) + 4*g*g + (((768-rmean)>>8)*b*b)) -+ -+ test against threshold -+ -+ (diff1?0xffff:0x0000) (diff3?0xffff:0x0000) (diff8?0xffff:0x0000) (diff0?0x0xffff:0x0000) = mm0 -+ */ -+ -+ mm0 = __builtin_ia32_pcmpgtw(mm0,mmx_trigger); -+ -+ /* -+ create final bit patterns -+ -+ 0000 0000 (diff1*0x03000300)|(diff3*0x0c000c00)|(diff8*0x005500aa)|(diff0*0x00aa0055) -+ -+ */ -+ -+ mm0 = (mmx_4_16)__builtin_ia32_pmaddwd(mm0,factors); -+ mm0 = (mmx_4_16)__builtin_ia32_punpcklbw((mmx_8_8)mm0,(mmx_8_8)__builtin_ia32_punpckhbw((mmx_8_8)mm0,(mmx_8_8)mm0)); -+ -+ return (unsigned long)(unsigned long long)mm0; -+ /* -+ Total: 11+16+3 = 30 ops for 4 distances vs. 16+13+7 = 36 ops for 1 distance -+ */ -+} -+#endif -+#endif -+ -+static int LUTPAL8to32[256] __attribute__((aligned(32))); -+//#define factors(a,b,c,d) (((a)-1) | ((b)<<2) | ((c)<<4) | ((d)<<6)) -+#define P0 {8,0,0,0} -+#define P10 {6,2,0,0} -+#define P11 {6,0,0,2} -+#define P12 {6,0,2,0} -+#define P20 {4,0,2,2} -+#define P21 {4,2,2,0} -+#define P22 {4,2,0,2} -+#define P60 {5,0,2,1} -+#define P61 {5,0,1,2} -+#define P70 {6,0,1,1} -+#define P90 {2,0,3,3} -+#define P100 {7,0,0,1} -+#define X {0,0,0,0} -+#define UNUSED X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, \ -+ X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X, X,X,X,X,X,X,X,X -+ -+/* sparse table: only 2k entries are used */ -+static unsigned char factors[4096][4] __attribute__((aligned(32))) = { -+/* 0000 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0040 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0080 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 00c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0100 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0140 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0180 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 01c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0200 */ UNUSED, -+/* 0400 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0440 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0480 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 04c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0500 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0540 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0580 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 05c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0600 */ UNUSED, -+/* 0800 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0840 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0880 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 08c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0900 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0940 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0980 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 09c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 0a00 */ UNUSED, -+/* 0c00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0c40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0c80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0cc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0d00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0d40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0d80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0dc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 0e00 */ UNUSED, -+/* 1000 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1040 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1080 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 10c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1100 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1140 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1180 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 11c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1200 */ UNUSED, -+/* 1400 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0, -+/* 1440 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0, -+/* 1480 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0, -+/* 14c0 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0, -+/* 1500 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0, -+/* 1540 */ P20, P20, P10, P0, P20, P20, P10, P0, P90, P90, P10, P0, P90, P90, P10, P0, -+/* 1580 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0, -+/* 15c0 */ P90, P90, P10, P0, P90, P90, P10, P0, P70, P100,P10, P0, P70, P100,P10, P0, -+/* 1600 */ UNUSED, -+/* 1800 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12, -+/* 1840 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12, -+/* 1880 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12, -+/* 18c0 */ P21, P61, P21, P61, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12, -+/* 1900 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1940 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1980 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 19c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 1a00 */ UNUSED, -+/* 1c00 */ P20, P20, P0, P0, P20, P20, P0, P0, P10, P20, P10, P0, P10, P20, P10, P0, -+/* 1c40 */ P20, P20, P0, P0, P20, P20, P0, P0, P10, P20, P10, P0, P10, P20, P10, P0, -+/* 1c80 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 1cc0 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 1d00 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, -+/* 1d40 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, -+/* 1d80 */ P70, P90, P10, P0, P70, P90, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 1dc0 */ P70, P90, P10, P0, P70, P90, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 1e00 */ UNUSED, -+/* 2000 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2040 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2080 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 20c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2100 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2140 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2180 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 21c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2200 */ UNUSED, -+/* 2400 */ P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, /* above */ -+/* 2440 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2480 */ P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, P22, P60, -+/* 24c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2500 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2540 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2580 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 25c0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2600 */ UNUSED, -+/* 2800 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2840 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2880 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 28c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2900 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2940 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2980 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 29c0 */ P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, P20, -+/* 2a00 */ UNUSED, -+/* 2c00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, /* rot */ -+/* 2c40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2c80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P60, P22, P60, P22, P60, P22, P60, -+/* 2cc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2d00 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2d40 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2d80 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2dc0 */ P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, P22, P11, -+/* 2e00 */ UNUSED, -+/* 3000 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3040 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3080 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 30c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3100 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3140 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3180 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 31c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3200 */ UNUSED, -+/* 3400 */ P20, P20, P0, P0, P20, P20, P0, P0, P70, P20, P10, P0, P70, P20, P10, P0, -+/* 3440 */ P20, P20, P0, P0, P20, P20, P0, P0, P70, P20, P10, P0, P70, P20, P10, P0, -+/* 3480 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 34c0 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 3500 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P90, P10, P0, P70, P90, P10, P0, -+/* 3540 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P90, P10, P0, P70, P90, P10, P0, -+/* 3580 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 35c0 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 3600 */ UNUSED, -+/* 3800 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3840 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3880 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12, -+/* 38c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P61, P21, P61, P21, P12, P21, P12, -+/* 3900 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3940 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3980 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 39c0 */ P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, P21, P12, -+/* 3a00 */ UNUSED, -+/* 3c00 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, -+/* 3c40 */ P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, P70, P20, P10, P0, -+/* 3c80 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 3cc0 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 3d00 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P20, P10, P0, P10, P20, P10, P0, -+/* 3d40 */ P70, P20, P10, P0, P70, P20, P10, P0, P10, P20, P10, P0, P10, P20, P10, P0, -+/* 3d80 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 3dc0 */ P10, P20, P10, P0, P10, P20, P10, P0, P10, P100,P10, P0, P10, P100,P10, P0, -+/* 3e00 */ UNUSED -+}; -+ -+/* Memory usage at 320 pixels width: -+ -+ ~4k line buffer -+ ~2.5k pattern buffer -+ 8k factor table (can be packed into as less as 2k, but unpacking takes more time than we lose due -+ to cache trashing, at least on a pentium2) -+ 1k palette table -+------ -+~15.5k data -+ -+If the diff table is used, another 8k (packed) / 64k (unpacked) are used. -+*/ -+ -+/* Optimizations: -+ -+Pixel/Pattern layout: -+ -+1 2 3 -+ a c -+4 5 6 -+ b d -+7 8 9 -+ -+Factor storage: -+Pixel: 5 1 2 4 -+ -+ 0: 8 0 0 0 -+ 10: 6 2 0 0 -+ 11: 6 0 0 2 -+ 12: 6 0 2 0 -+ 20: 4 0 2 2 -+ 21: 4 2 2 0 -+ 22: 4 2 0 2 -+ 60: 5 0 2 1 -+ 61: 5 0 1 2 -+ 70: 6 0 1 1 -+ 90: 2 0 3 3 -+100: 7 0 1 0 -+ -+bits: 3 1 2 2 = 8 bit -+ -+Factor set 100 is wrong: 7 0 0.5 0.5 would be 100% like the original, but -+since pixel 2 and 4 are visually close for all patterns where this set is -+used, this important simplification should not be visible. -+ -+Pattern usage: -+ -+a b c d -+0 8 1 3 10 5 6 (a) 9 2 (b) 4 7 (11) (c) (d) -> 1 2 4 5 -+ -+0 8 10 5 9 2 7 x 1 6 3 4 -+ -+cr+a2 dr+b2 ar br -+2 9 1 4 11 7 6 (d) 8 0 (x) 3 5 (10) (y) (a) -> 3 2 6 5 -+ -+br+a3 ar dr+c3 cr -+5 10 6 3 8 0 1 (m) 11 7 (c) 4 2 (9) (b) (n) -> 7 8 4 5 -+ -+d+b2+c3 c+a2 b+a3 a -+7 11 6 4 9 2 1 (n) 10 5 (y) 3 0 (8) (x) (m) -> 9 8 6 5 -+ -+ -+Pattern storage: -+ -+0123 = \/|_ -+ -+0101 010x 2323 -+1010 101x 2323 -+ -+*/ -+ -+union pattern { -+ unsigned short p[2]; -+ unsigned long value; -+}; -+ -+static unsigned long lines0[Hq2x_MAXWIDTH+2] __attribute__((aligned(32))); -+static unsigned long lines1[Hq2x_MAXWIDTH+2] __attribute__((aligned(32))); -+static unsigned long lines2[Hq2x_MAXWIDTH+2] __attribute__((aligned(32))); -+static unsigned long *l1, *l2, *l3, *tmp; -+static union pattern p0[Hq2x_MAXWIDTH+2] __attribute__((aligned(32))); -+static union pattern p1[Hq2x_MAXWIDTH+2] __attribute__((aligned(32))); -+static union pattern *top, *bot, *ptmp; -+static unsigned char prev[Hq2x_MAXWIDTH+2]; -+ -+#ifdef DIFF_TABLE -+#define diffcall bot[i].value = Diff(prev+i,pIn+i); memcpy(prev,pIn,Scaler_SrcWidth+1); -+#else -+#define diffcall bot[i].value = Diff(l2+i,l3+i); -+#endif -+ -+#ifndef __MMX__ -+#define __builtin_ia32_emms() -+#endif -+ -+#define CONSTCHECK if (Scaler_SrcWidth == 320) RENDER_DrawLine = -+ -+#define store(out,index,x,y) do{((unsigned long*)out)[index*2] = 0xff000000|(x); ((unsigned long*)out)[index*2+1] = 0xff000000|(y);}while(0) -+#define type long -+ -+#define FUNC Hq2x_long_320_line -+#define Scaler_SrcWidth 320 -+#define CHECK_CONST -+#include "render_hq2x_template.h" -+#undef CHECK_CONST -+#undef Scaler_SrcWidth -+#undef FUNC -+ -+#define FUNC Hq2x_long_Scaler_SrcWidth_line -+#define CHECK_CONST CONSTCHECK Hq2x_long_320_line; -+#include "render_hq2x_template.h" -+#undef store -+#undef type -+#undef CHECK_CONST -+#undef FUNC -+ -+/* 16 bit support */ -+#ifdef WORDS_BIGENDIAN -+#define store(out,index,y,x) ((unsigned long *)out)[index] = (((((x)>>3)&0x1f)|(((y)<<13))&0x1f0000)|((((x)>>5)&0x7e0)|(((y)<<11))&0x7e00000)|((((x)>>8)&0xf800)|(((y)<<8))&0xf8000000)) -+#else -+#define store(out,index,x,y) ((unsigned long *)out)[index] = (((((x)>>3)&0x1f)|(((y)<<13))&0x1f0000)|((((x)>>5)&0x7e0)|(((y)<<11))&0x7e00000)|((((x)>>8)&0xf800)|(((y)<<8))&0xf8000000)) -+#endif -+#define type short -+ -+#define FUNC Hq2x_short_320_line -+#define Scaler_SrcWidth 320 -+#define CHECK_CONST -+#include "render_hq2x_template.h" -+#undef CHECK_CONST -+#undef Scaler_SrcWidth -+#undef FUNC -+ -+#define FUNC Hq2x_short_Scaler_SrcWidth_line -+#define CHECK_CONST CONSTCHECK Hq2x_short_320_line; -+#include "render_hq2x_template.h" -+#undef store -+#undef type -+#undef CHECK_CONST -+#undef FUNC -+ -+ScalerBlock Hq2x_8={ -+ CAN_16|CAN_32|LOVE_32|NEED_RGB, -+ 2,2,1, -+ 0,Hq2x_short_Scaler_SrcWidth_line,Hq2x_short_Scaler_SrcWidth_line,Hq2x_long_Scaler_SrcWidth_line -+}; -+ -+void Hq2x_InitLUTs(const void *pal, int palette_end, int palette_start) -+{ -+ int i, j; -+ struct GFX_PalEntry *palette = (struct GFX_PalEntry *)pal; -+ -+ // All componets are reduced to 5 bit (VGA palette has 6 bit) -+ // for simpler multiplication and storage (divided by 8) -+ for (i=palette_start; i<=palette_end; i++) { -+ // 5 significant bits with 3 bit multiplier fit into 8 bit, thus -+ // plain int multiplication can be used without tricks -+ // R is duplicated into A, negated and increased by 32 for some -+ // nice mmx distance calculation tricks -+ LUTPAL8to32[i] = ((palette[i].r&0xf8) << 13) | ((palette[i].g&0xf8) << 5) | ((palette[i].b&0xf8) >> 3) | ((32*8-(palette[i].r&0xf8)) << 21); -+ } -+ -+#ifdef DIFF_TABLE -+#if DIFF_TABLE != 1 -+ memset(difftable,0,sizeof(difftable)); -+#endif -+ for (i = 0; i < 256; i++) { -+ for (j = 0; j < 256; j++) { -+ difftable[(i) * (256/bits) + ((j) / bits)] -+#if DIFF_TABLE == 1 -+ = -+#else -+ |= -+#endif -+ Diff1_calc((LUTPAL8to32[i]>>16)&0x1f,(LUTPAL8to32[i]>>8)&0x1f,(LUTPAL8to32[i])&0x1f, (LUTPAL8to32[j]>>16)&0x1f,(LUTPAL8to32[j]>>8)&0x1f,(LUTPAL8to32[j])&0x1f) << (j%bits); -+ } -+ } -+#endif -+ -+#ifdef __MMX__ -+ *((short *)(&mmx_trigger)) = Hq2x_colourTrigger; -+ *(((short *)(&mmx_trigger))+1) = Hq2x_colourTrigger; -+ *(((short *)(&mmx_trigger))+2) = Hq2x_colourTrigger; -+ *(((short *)(&mmx_trigger))+3) = Hq2x_colourTrigger; -+#endif -+} -+ -+void Hq2x_IncreaseThreshold(void) -+{ -+ if (Hq2x_colourTrigger < 255) Hq2x_colourTrigger++; -+ Hq2x_InitLUTs(0,0,1); -+ LOG_MSG("Hq2x threshold at %i",Hq2x_colourTrigger); -+} -+ -+void Hq2x_DecreaseThreshold(void) -+{ -+ if (Hq2x_colourTrigger > 0) Hq2x_colourTrigger--; -+ Hq2x_InitLUTs(0,0,1); -+ LOG_MSG("Hq2x threshold at %i",Hq2x_colourTrigger); -+} -+ -+void Hq2x_IncreaseThresholdAdaptive(void) -+{ -+ if (Hq2x_colourTrigger_adaptive < 100) Hq2x_colourTrigger_adaptive++; -+ LOG_MSG("Hq2x adaptive threshold at %i",Hq2x_colourTrigger_adaptive); -+} -+ -+void Hq2x_DecreaseThresholdAdaptive(void) -+{ -+ if (Hq2x_colourTrigger_adaptive > 0) Hq2x_colourTrigger_adaptive--; -+ LOG_MSG("Hq2x adaptive threshold at %i",Hq2x_colourTrigger_adaptive); -+} -+ -diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_hq2x.h dosbox-0.61+hq2x/src/gui/render_hq2x.h ---- dosbox-0.61/src/gui/render_hq2x.h 1970-01-01 01:00:00.000000000 +0100 -+++ dosbox-0.61+hq2x/src/gui/render_hq2x.h 2004-08-02 20:34:46.000000000 +0200 -@@ -0,0 +1,31 @@ -+//derived from the hq2x filter demo program -+//---------------------------------------------------------- -+//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) -+// Speed optimization and mmx code Copyright (c) 2004 Jörg Walter (jwalt@garni.ch) -+ -+//This program is free software; you can redistribute it and/or -+//modify it under the terms of the GNU Lesser General Public -+//License as published by the Free Software Foundation; either -+//version 2.1 of the License, or (at your option) any later version. -+// -+//This program is distributed in the hope that it will be useful, -+//but WITHOUT ANY WARRANTY; without even the implied warranty of -+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+//Lesser General Public License for more details. -+// -+//You should have received a copy of the GNU Lesser General Public -+//License along with this program; if not, write to the Free Software -+//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -+ -+#ifndef __HQ2X_H -+#define __HQ2X_H -+ -+#define Hq2x_MAXWIDTH 640-2 -+extern long Hq2x_colourTrigger; -+extern long Hq2x_colourTrigger_adaptive; -+extern void Hq2x_InitLUTs(const void *palette, int palette_end, int palette_start); -+extern void Hq2x_IncreaseThreshold(void); -+extern void Hq2x_DecreaseThreshold(void); -+extern void Hq2x_IncreaseThresholdAdaptive(void); -+extern void Hq2x_DecreaseThresholdAdaptive(void); -+#endif -diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_hq2x_template.h dosbox-0.61+hq2x/src/gui/render_hq2x_template.h ---- dosbox-0.61/src/gui/render_hq2x_template.h 1970-01-01 01:00:00.000000000 +0100 -+++ dosbox-0.61+hq2x/src/gui/render_hq2x_template.h 2004-08-02 17:27:01.000000000 +0200 -@@ -0,0 +1,78 @@ -+static void FUNC(const unsigned char *pIn) -+{ -+ int i, j; -+ unsigned int factor, value1, value2, linesa = (*Scaler_Index++)+1, linesb = linesa/2; -+ linesa -= linesb; -+ -+ pIn--; -+ if (__builtin_expect(Scaler_Line++==0,0)) { -+ int i; -+ CHECK_CONST -+ -+ Scaler_DstWrite -= 2*sizeof(type); -+ l1 = lines0; -+ l2 = lines1; -+ l3 = lines2; -+ for (i=0; i <= Scaler_SrcWidth+1; i++) l2[i] = 0x20000000; -+ l3[0] = 0x20000000; -+ l3[1] = LUTPAL8to32[pIn[1]]; -+ -+ for (i=2; i<=Scaler_SrcWidth+1; i++) -+ l3[i] = LUTPAL8to32[pIn[i]]; -+ -+ top = p0; -+ bot = p1; -+ memcpy(prev,pIn,Scaler_SrcWidth+1); -+ for (i=1; i <= Scaler_SrcWidth; i++) diffcall -+ return; -+ } -+ -+ tmp = l1; l1 = l2; l2 = l3; l3 = tmp; -+ ptmp = top; top = bot; bot = ptmp; -+ bot[0].value = 0x07ff07ff; -+ -+ l3[0] = 0x20000000; -+ l3[1] = LUTPAL8to32[pIn[1]]; -+ -+ for (i=2; i<=Scaler_SrcWidth+1; i++) -+ l3[i] = LUTPAL8to32[pIn[i]]; -+ -+ for (i=1; i<=Scaler_SrcWidth; i++) diffcall -+ -+ if (linesa > 0) { -+ for (i=1; i<=Scaler_SrcWidth; i++) { -+ factor = (top[i-1].p[0]&0x503)|(bot[i-1].p[0]&0x20c)|(top[i].p[0]&0x830)|(bot[i].p[0]&0x040); -+ value1 = (l1[i-1]*factors[factor][1]+l1[i]*factors[factor][2]+l2[i-1]*factors[factor][3]+l2[i]*factors[factor][0]); -+ -+ factor = (top[i-1].p[1]&0x930)|(bot[i-1].p[1]&0x240)|(top[i].p[1]&0x403)|(bot[i].p[1]&0x00c); -+ value2 = (l1[i+1]*factors[factor][1]+l1[i]*factors[factor][2]+l2[i+1]*factors[factor][3]+l2[i]*factors[factor][0]); -+ store(Scaler_DstWrite,i,value1,value2); -+ } -+ while (--linesa) { -+ memcpy(Scaler_DstWrite+Scaler_DstPitch,Scaler_DstWrite,Scaler_DstPitch); -+ Scaler_DstWrite += Scaler_DstPitch; -+ } -+ Scaler_DstWrite += Scaler_DstPitch; -+ } -+ -+ if (linesb > 0) { -+ for (i=1; i <= Scaler_SrcWidth; i++) { -+ factor = (top[i-1].p[1]&0x60c)|(bot[i-1].p[1]&0x103)|(top[i].p[1]&0x840)|(bot[i].p[1]&0x030); -+ value1 = (l3[i-1]*factors[factor][1]+l3[i]*factors[factor][2]+l2[i-1]*factors[factor][3]+l2[i]*factors[factor][0]); -+ -+ factor = (top[i-1].p[0]&0xa40)|(bot[i-1].p[0]&0x130)|(top[i].p[0]&0x40c)|(bot[i].p[0]&0x003); -+ value2 = (l3[i+1]*factors[factor][1]+l3[i]*factors[factor][2]+l2[i+1]*factors[factor][3]+l2[i]*factors[factor][0]); -+ store(Scaler_DstWrite,i,value1,value2); -+ } -+ while (--linesb) { -+ memcpy(Scaler_DstWrite+Scaler_DstPitch,Scaler_DstWrite,Scaler_DstPitch); -+ Scaler_DstWrite += Scaler_DstPitch; -+ } -+ Scaler_DstWrite += Scaler_DstPitch; -+ } -+ -+ if (__builtin_expect(Scaler_Line==Scaler_SrcHeight,0)) { -+ FUNC(pIn+1); -+ __builtin_ia32_emms(); -+ } -+} -diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/gui/render_scalers.h dosbox-0.61+hq2x/src/gui/render_scalers.h ---- dosbox-0.61/src/gui/render_scalers.h 2004-06-10 09:18:19.000000000 +0200 -+++ dosbox-0.61+hq2x/src/gui/render_scalers.h 2004-07-04 23:29:49.000000000 +0200 -@@ -30,6 +30,7 @@ - OP_AdvInterp2x, - OP_Interp2x, - OP_TV2x, -+ OP_Hq2x, - }; - - struct ScalerBlock { -@@ -46,6 +47,7 @@ - extern ScalerBlock AdvInterp2x_8; - extern ScalerBlock Interp2x_8; - extern ScalerBlock TV2x_8; -+extern ScalerBlock Hq2x_8; - - - #endif -diff -x aclocal.m4 -x CVS -x configure -x '*.in' -x '*~' -x '*.o' -x '*.a' -x Makefile -x config.h -x config.status -x config.log -x 'stamp-h*' -x '*.Po' -x autom4te.cache -x config.guess -x '.#*' -ruN dosbox-0.61/src/hardware/ymf262.c dosbox-0.61+hq2x/src/hardware/ymf262.c ---- dosbox-0.61/src/hardware/ymf262.c 2004-03-28 15:04:45.000000000 +0200 -+++ dosbox-0.61+hq2x/src/hardware/ymf262.c 2004-06-20 03:54:47.000000000 +0200 -@@ -844,23 +844,52 @@ - INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm, unsigned int wave_tab) - { - UINT32 p; -+ int pos = (((signed int)((phase & ~FREQ_MASK) + (pm<<16))) >> FREQ_SH ); -+#ifdef SMALL_CACHE -+ if ((wave_tab == 1*SIN_LEN) && (pos & (SIN_LEN>>1))) pos = 0; -+ if ((wave_tab == 3*SIN_LEN) && (pos & (SIN_LEN>>2))) pos = 0; -+ if (wave_tab == 2*SIN_LEN || wave_tab == 3*SIN_LEN) pos &= SIN_MASK>>1; -+ if (wave_tab == 4*SIN_LEN || wave_tab == 5*SIN_LEN) { -+ if (wave_tab == 5*SIN_LEN) pos &= SIN_MASK>>1; -+ pos *= 2; -+ if (pos & (SIN_LEN>>1)) pos = 0; -+ } -+ if (wave_tab != 6*SIN_LEN && wave_tab != 7*SIN_LEN) wave_tab = 0; -+#endif -+ p = (env<<4) + sin_tab[wave_tab + (pos & SIN_MASK)]; - -- p = (env<<4) + sin_tab[wave_tab + ((((signed int)((phase & ~FREQ_MASK) + (pm<<16))) >> FREQ_SH ) & SIN_MASK) ]; -- -- if (p >= TL_TAB_LEN) -- return 0; -+#if 1 -+ return tl_tab[p&(TL_TAB_LEN/13-1)] >> (p/(TL_TAB_LEN/13)); -+#else -+ if (p > TL_TAB_LEN) return 0; - return tl_tab[p]; -+#endif - } - - INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm, unsigned int wave_tab) - { - UINT32 p; -+ int pos = (((signed int)((phase & ~FREQ_MASK) + pm)) >> FREQ_SH ); -+#ifdef SMALL_CACHE -+ if ((wave_tab == 1*SIN_LEN) && (pos & (SIN_LEN>>1))) pos = 0; -+ if ((wave_tab == 3*SIN_LEN) && (pos & (SIN_LEN>>2))) pos = 0; -+ if (wave_tab == 2*SIN_LEN || wave_tab == 3*SIN_LEN) pos &= SIN_MASK>>1; -+ if (wave_tab == 4*SIN_LEN || wave_tab == 5*SIN_LEN) { -+ if (wave_tab == 5*SIN_LEN) pos &= SIN_MASK>>1; -+ pos *= 2; -+ if (pos & (SIN_LEN>>1)) pos = 0; -+ } -+ if (wave_tab != 6*SIN_LEN && wave_tab != 7*SIN_LEN) wave_tab = 0; -+#endif - -- p = (env<<4) + sin_tab[wave_tab + ((((signed int)((phase & ~FREQ_MASK) + pm))>>FREQ_SH) & SIN_MASK)]; -+ p = (env<<4) + sin_tab[wave_tab + (pos & SIN_MASK)]; - -- if (p >= TL_TAB_LEN) -- return 0; -+#if 1 -+ return tl_tab[p&(TL_TAB_LEN/13-1)] >> (p/(TL_TAB_LEN/13)); -+#else -+ if (p > TL_TAB_LEN) return 0; - return tl_tab[p]; -+#endif - } - - -diff -ruN src./dosbox.cpp src/dosbox.cpp ---- dupa/src./dosbox.cpp 2004-09-30 15:15:59.000000000 +0200 -+++ dupa/src/dosbox.cpp 2004-09-30 15:18:48.301932384 +0200 -@@ -231,11 +231,17 @@ - secprop->Add_int("frameskip",0); - secprop->Add_bool("aspect",false); - secprop->Add_string("scaler","normal2x"); -+ secprop->Add_int("hq2x_threshold_adaptive",75); -+ secprop->Add_int("hq2x_threshold",0); - MSG_Add("RENDER_CONFIGFILE_HELP", - "frameskip -- How many frames dosbox skips before drawing one.\n" - "aspect -- Do aspect correction.\n" - "scaler -- Scaler used to enlarge/enhance low resolution modes.\n" -- " Supported are none,normal2x,advmame2x,advmame3x,advinterp2x,interp2x,tv2x.\n" -+ " Supported are none,normal2x,advmame2x,advmame3x,advinterp2x,interp2x,tv2x,hq2x.\n" -+ "hq2x_threshold_adaptive -- The adaptive threshold used to detect edges in hq2x\n" -+ " Possible values are 0-100, can be modified with Ctrl+Alt+F5/F6\n" -+ "hq2x_threshold -- The static threshold used to detect edges in hq2x\n" -+ " Possible values are 0-255, can be modified with Ctrl+Alt+F3/F4\n" - ); - - secprop=control->AddSection_prop("cpu",&CPU_Init); diff --git a/dosbox_coreswitch.patch b/dosbox_coreswitch.patch deleted file mode 100644 index 745afe3..0000000 --- a/dosbox_coreswitch.patch +++ /dev/null @@ -1,332 +0,0 @@ -diff -urN dosbox_org/include/cpu.h dosbox/include/cpu.h ---- dosbox_org/include/cpu.h 2005-02-10 11:20:47.000000000 +0100 -+++ dosbox/include/cpu.h 2005-02-21 14:52:24.000000000 +0100 -@@ -27,6 +27,9 @@ - extern Bits CPU_Cycles; - extern Bits CPU_CycleLeft; - extern Bits CPU_CycleMax; -+extern bool CPU_TimeSynched; -+extern Bitu CPU_CyclesCur; -+extern char core_mode[10]; - - /* Some common Defines */ - /* A CPU Handler */ -diff -urN dosbox_org/include/mapper.h dosbox/include/mapper.h ---- dosbox_org/include/mapper.h 2005-02-10 11:20:47.000000000 +0100 -+++ dosbox/include/mapper.h 2005-02-21 14:52:24.000000000 +0100 -@@ -21,7 +21,8 @@ - - enum MapKeys { - MK_f1,MK_f2,MK_f3,MK_f4,MK_f5,MK_f6,MK_f7,MK_f8,MK_f9,MK_f10,MK_f11,MK_f12, -- MK_return,MK_kpminus,MK_scrolllock,MK_printscreen,MK_pause, -+ MK_return,MK_kpminus,MK_equals,MK_scrolllock,MK_printscreen,MK_pause, -+ MK_1, MK_2, MK_3, MK_4, - - }; - -diff -urN dosbox_org/src/cpu/core_dyn_x86.cpp dosbox/src/cpu/core_dyn_x86.cpp ---- dosbox_org/src/cpu/core_dyn_x86.cpp 2005-02-10 11:20:48.000000000 +0100 -+++ dosbox/src/cpu/core_dyn_x86.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -188,6 +188,8 @@ - } - #include "core_dyn_x86/decoder.h" - -+Bitu CPU_CyclesOld; -+ - Bits CPU_Core_Dyn_X86_Run(void) { - /* Determine the linear address of CS:EIP */ - restart_core: -@@ -204,8 +206,10 @@ - block=CreateCacheBlock(chandler,ip_point,32); - } - run_block: -+ CPU_CyclesOld = CPU_Cycles; - cache.block.running=0; - BlockReturn ret=gen_runcode(block->cache.start); -+ cycle_count += CPU_CyclesOld - CPU_Cycles; - switch (ret) { - case BR_Normal: - /* Maybe check if we staying in the same page? */ -diff -urN dosbox_org/src/cpu/core_full.cpp dosbox/src/cpu/core_full.cpp ---- dosbox_org/src/cpu/core_full.cpp 2005-02-10 11:20:48.000000000 +0100 -+++ dosbox/src/cpu/core_full.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -64,8 +64,8 @@ - Bits CPU_Core_Full_Run(void) { - FullData inst; - while (CPU_Cycles-->0) { --#if C_DEBUG - cycle_count++; -+#if C_DEBUG - #if C_HEAVY_DEBUG - if (DEBUG_HeavyIsBreakpoint()) { - FillFlags(); -diff -urN dosbox_org/src/cpu/core_normal.cpp dosbox/src/cpu/core_normal.cpp ---- dosbox_org/src/cpu/core_normal.cpp 2005-02-10 11:20:48.000000000 +0100 -+++ dosbox/src/cpu/core_normal.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -48,7 +48,7 @@ - #define SaveMd(off,val) mem_writed_inline(off,val) - #endif - --extern Bitu cycle_count; -+Bitu cycle_count; - - #if C_FPU - #define CPU_FPU 1 //Enable FPU escape instructions -@@ -151,8 +151,8 @@ - return debugCallback; - }; - #endif -- cycle_count++; - #endif -+ cycle_count++; - restart_opcode: - switch (core.opcode_index+Fetchb()) { - #include "core_normal/prefix_none.h" -diff -urN dosbox_org/src/cpu/core_simple.cpp dosbox/src/cpu/core_simple.cpp ---- dosbox_org/src/cpu/core_simple.cpp 2005-02-10 11:20:48.000000000 +0100 -+++ dosbox/src/cpu/core_simple.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -144,8 +144,8 @@ - return debugCallback; - }; - #endif -- cycle_count++; - #endif -+ cycle_count++; - restart_opcode: - switch (core.opcode_index+Fetchb()) { - -diff -urN dosbox_org/src/cpu/cpu.cpp dosbox/src/cpu/cpu.cpp ---- dosbox_org/src/cpu/cpu.cpp 2005-02-10 11:20:48.000000000 +0100 -+++ dosbox/src/cpu/cpu.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -44,6 +44,9 @@ - Bits CPU_CycleMax = 2500; - Bits CPU_CycleUp = 0; - Bits CPU_CycleDown = 0; -+bool CPU_TimeSynched = false; -+Bitu CPU_CyclesCur = 0; -+char core_mode[10]; - CPU_Decoder * cpudecoder; - - void CPU_Core_Full_Init(void); -@@ -1793,6 +1796,37 @@ - GFX_SetTitle(CPU_CycleMax,-1,false); - } - -+static void CPU_ToggleTimeSynch(void) { -+ CPU_TimeSynched = ! CPU_TimeSynched; -+ GFX_SetTitle(CPU_CycleMax,-1,false); -+} -+ -+static void CPU_ToggleFullCore(void) { -+ strcpy(core_mode, "Full"); -+ cpudecoder=&CPU_Core_Full_Run; -+ GFX_SetTitle(CPU_CycleMax,-1,false); -+} -+ -+static void CPU_ToggleNormalCore(void) { -+ strcpy(core_mode, "Normal"); -+ cpudecoder=&CPU_Core_Normal_Run; -+ GFX_SetTitle(CPU_CycleMax,-1,false); -+} -+ -+static void CPU_ToggleDynamicCore(void) { -+ strcpy(core_mode, "Dynamic"); -+ cpudecoder=&CPU_Core_Dyn_X86_Run; -+ GFX_SetTitle(CPU_CycleMax,-1,false); -+} -+ -+static void CPU_ToggleSimpleCore(void) { -+ strcpy(core_mode, "Simple"); -+ cpudecoder=&CPU_Core_Simple_Run;; -+ GFX_SetTitle(CPU_CycleMax,-1,false); -+} -+ -+extern bool showcycles; -+ - void CPU_Init(Section* sec) { - Section_prop * section=static_cast(sec); - reg_eax=0; -@@ -1829,22 +1863,33 @@ - #endif - MAPPER_AddHandler(CPU_CycleDecrease,MK_f11,MMOD1,"cycledown","Dec Cycles"); - MAPPER_AddHandler(CPU_CycleIncrease,MK_f12,MMOD1,"cycleup" ,"Inc Cycles"); -+ MAPPER_AddHandler(CPU_ToggleTimeSynch,MK_equals,MMOD1,"timesynched" ,"Tog. TimeSynch"); -+ MAPPER_AddHandler(CPU_ToggleNormalCore,MK_1,MMOD1,"normal" ,"Tog. Normal Core"); -+ MAPPER_AddHandler(CPU_ToggleFullCore,MK_2,MMOD1,"full" ,"Tog. Full Core"); -+ MAPPER_AddHandler(CPU_ToggleDynamicCore,MK_3,MMOD1,"dynamic" ,"Tog. Dynamic Core"); -+ MAPPER_AddHandler(CPU_ToggleSimpleCore,MK_4,MMOD1,"simple" ,"Tog. Simple Core"); - CPU_Cycles=0; - CPU_CycleMax=section->Get_int("cycles");; - CPU_CycleUp=section->Get_int("cycleup"); - CPU_CycleDown=section->Get_int("cycledown"); -+ CPU_TimeSynched=section->Get_bool("timesynched"); -+ showcycles=section->Get_bool("showcycles"); - const char * core=section->Get_string("core"); -+ strcpy(core_mode, "Normal"); - cpudecoder=&CPU_Core_Normal_Run; - if (!strcasecmp(core,"normal")) { - cpudecoder=&CPU_Core_Normal_Run; - } else if (!strcasecmp(core,"simple")) { - cpudecoder=&CPU_Core_Simple_Run; -+ strcpy(core_mode, "Simple"); - } else if (!strcasecmp(core,"full")) { - cpudecoder=&CPU_Core_Full_Run; -+ strcpy(core_mode, "Full"); - } - #if (C_DYNAMIC_X86) - else if (!strcasecmp(core,"dynamic")) { - cpudecoder=&CPU_Core_Dyn_X86_Run; -+ strcpy(core_mode, "Dynamic"); - } - #endif - else { -diff -urN dosbox_org/src/debug/debug.cpp dosbox/src/debug/debug.cpp ---- dosbox_org/src/debug/debug.cpp 2005-02-10 11:20:50.000000000 +0100 -+++ dosbox/src/debug/debug.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -92,7 +92,7 @@ - static Bitu oldflags; - DBGBlock dbg; - static Bitu input_count; --Bitu cycle_count; -+extern Bitu cycle_count; - static bool debugging; - - -diff -urN dosbox_org/src/dosbox.cpp dosbox/src/dosbox.cpp ---- dosbox_org/src/dosbox.cpp 2005-02-10 11:20:47.000000000 +0100 -+++ dosbox/src/dosbox.cpp 2005-02-21 14:58:12.000000000 +0100 -@@ -116,11 +116,34 @@ - - Bits RemainTicks; - Bits LastTicks; -+Bits Ticks = 0; -+ -+bool showcycles; -+ -+extern void GFX_SetTitle(Bits cycles, Bits frameskip,bool paused); -+extern Bitu cycle_count; -+extern Bitu frames; - - static Bitu Normal_Loop(void) { - Bits ret,NewTicks; - while (1) { - if (PIC_RunQueue()) { -+ if((CPU_TimeSynched) || (showcycles)) { -+ NewTicks=GetTicks(); -+ -+ if((CPU_TimeSynched) && (NewTicks!=LastTicks)) -+ CPU_Cycles=0; -+ -+ if((showcycles) && (NewTicks>=Ticks)) { -+ CPU_CyclesCur=(cycle_count-CPU_CyclesCur) >> 9; -+ Ticks=NewTicks + 512; // next update in 512ms -+ frames*=1.95; // compensate for 512ms interval -+ GFX_SetTitle(CPU_CycleMax,-1,false); -+ CPU_CyclesCur=cycle_count; -+ frames=0; -+ } -+ } -+ - ret=(*cpudecoder)(); - if (ret<0) return 1; - if (ret>0) { -@@ -243,6 +266,8 @@ - secprop->Add_int("cycles",3000); - secprop->Add_int("cycleup",500); - secprop->Add_int("cycledown",20); -+ secprop->Add_bool("timesynched",false); -+ secprop->Add_bool("showcycles",false); - MSG_Add("CPU_CONFIGFILE_HELP", - "core -- CPU Core used in emulation: simple,normal,full" - #if (C_DYNAMIC_X86) -@@ -250,9 +275,11 @@ - #endif - ".\n" - "cycles -- Amount of instructions dosbox tries to emulate each millisecond.\n" -- " Setting this higher than your machine can handle is bad!\n" -+ " Setting this higher than your machine can handle is bad! (unless timesynched is set)\n" - "cycleup -- Amount of cycles to increase/decrease with keycombo.\n" - "cycledown Setting it lower than 100 will be a percentage.\n" -+ "timesynched -- Do not emulate more cycles than possible.\n" -+ "showcycles -- Display the number of emulated cycles in the titlebar (uses some CPU).\n" - ); - #if C_FPU - secprop->AddInitFunction(&FPU_Init); -diff -urN dosbox_org/src/gui/sdl_mapper.cpp dosbox/src/gui/sdl_mapper.cpp ---- dosbox_org/src/gui/sdl_mapper.cpp 2005-02-10 11:21:07.000000000 +0100 -+++ dosbox/src/gui/sdl_mapper.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -748,6 +748,9 @@ - case MK_kpminus: - key=SDLK_KP_MINUS; - break; -+ case MK_equals: -+ key=SDLK_EQUALS; -+ break; - case MK_scrolllock: - key=SDLK_SCROLLOCK; - break; -@@ -757,6 +760,18 @@ - case MK_printscreen: - key=SDLK_PRINT; - break; -+ case MK_1: -+ key=SDLK_1; -+ break; -+ case MK_2: -+ key=SDLK_2; -+ break; -+ case MK_3: -+ key=SDLK_3; -+ break; -+ case MK_4: -+ key=SDLK_4; -+ break; - } - sprintf(buf,"%s \"key %d%s%s%s\"", - entry, -@@ -1133,7 +1148,7 @@ - } - - void MAPPER_AddHandler(MAPPER_Handler * handler,MapKeys key,Bitu mods,char * eventname,char * buttonname) { -- char tempname[17]; -+ char tempname[99]; - strcpy(tempname,"hand_"); - strcat(tempname,eventname); - new CHandlerEvent(tempname,handler,key,mods,buttonname); -diff -urN dosbox_org/src/gui/sdlmain.cpp dosbox/src/gui/sdlmain.cpp ---- dosbox_org/src/gui/sdlmain.cpp 2005-02-10 11:21:07.000000000 +0100 -+++ dosbox/src/gui/sdlmain.cpp 2005-02-21 14:52:24.000000000 +0100 -@@ -197,16 +197,23 @@ - //Globals for keyboard initialisation - bool startup_state_numlock=false; - bool startup_state_capslock=false; -+ -+Bitu frames = 0; -+extern bool showcycles; -+ -+#include "cpu.h" -+ - void GFX_SetTitle(Bits cycles,Bits frameskip,bool paused){ - char title[200]={0}; - static Bits internal_cycles=0; - static Bits internal_frameskip=0; - if(cycles != -1) internal_cycles = cycles; - if(frameskip != -1) internal_frameskip = frameskip; -+ if(!showcycles) frames = 0; - if(paused) -- sprintf(title,"DOSBox %s,Cpu Cycles: %8d, Frameskip %2d, Program: %8s PAUSED",VERSION,internal_cycles,internal_frameskip,RunningProgram); -+ sprintf(title,"Core: %s, Cpu Cycles: %8d %c %8d, FPS: %d, skip %2d, Program: %8s PAUSED",core_mode,CPU_CyclesCur,CPU_TimeSynched ? '<' : '=',internal_cycles,frames,internal_frameskip,RunningProgram); - else -- sprintf(title,"DOSBox %s,Cpu Cycles: %8d, Frameskip %2d, Program: %8s",VERSION,internal_cycles,internal_frameskip,RunningProgram); -+ sprintf(title,"Core: %s, Cpu Cycles: %8d %c %8d, FPS: %d, skip %2d, Program: %8s",core_mode,CPU_CyclesCur,CPU_TimeSynched ? '<' : '=',internal_cycles,frames,internal_frameskip,RunningProgram); - SDL_WM_SetCaption(title,VERSION); - } - -@@ -597,6 +604,7 @@ - int ret; - if (!sdl.updating) return; - sdl.updating=false; -+ frames++; - switch (sdl.desktop.type) { - case SCREEN_SURFACE: - if (SDL_MUSTLOCK(sdl.surface)) { -- 2.43.0