1 --- SDL_gfx-2.0.13/SDL_imageFilter.c.orig 2004-11-29 20:53:35.000000000 +0100
2 +++ SDL_gfx-2.0.13/SDL_imageFilter.c 2005-01-16 00:19:22.272596920 +0100
4 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
5 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
6 ".align 16 \n\t" // 16 byte allignment of the loop entry
7 - ".L1010: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
8 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
9 "paddusb (%%ebx), %%mm1 \n\t" // mm1=Src1+Src2 (add 8 bytes with saturation)
10 "movq %%mm1, (%%edi) \n\t" // store result in Dest
11 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
12 "add $8, %%ebx \n\t" // register pointers by 8
13 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
14 - "jnz .L1010 \n\t" // check loop termination, proceed if required
15 + "jnz 1b \n\t" // check loop termination, proceed if required
16 "emms \n\t" // exit MMX state
17 "popa \n\t":"=m" (Dest) // %0
20 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
21 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
22 ".align 16 \n\t" // 16 byte allignment of the loop entry
25 "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
26 "movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2
27 // --- Byte shift via Word shift ---
29 "add $8, %%ebx \n\t" // register pointers by 8
31 "dec %%ecx \n\t" // decrease loop counter
32 - "jnz .L21011 \n\t" // check loop termination, proceed if required
33 + "jnz 1b \n\t" // check loop termination, proceed if required
34 "emms \n\t" // exit MMX state
35 "popa \n\t":"=m" (Dest) // %0
38 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
39 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
40 ".align 16 \n\t" // 16 byte allignment of the loop entry
41 - ".L1012: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
42 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
43 "psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation)
44 "movq %%mm1, (%%edi) \n\t" // store result in Dest
45 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
46 "add $8, %%ebx \n\t" // register pointers by 8
47 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
48 - "jnz .L1012 \n\t" // check loop termination, proceed if required
49 + "jnz 1b \n\t" // check loop termination, proceed if required
50 "emms \n\t" // exit MMX state
51 "popa \n\t":"=m" (Dest) // %0
54 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
55 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
56 ".align 16 \n\t" // 16 byte allignment of the loop entry
57 - ".L1013: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
58 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
59 "movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2
60 "psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation)
61 "psubusb (%%eax), %%mm2 \n\t" // mm2=Src2-Src1 (sub 8 bytes with saturation)
63 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
64 "add $8, %%ebx \n\t" // register pointers by 8
65 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
66 - "jnz .L1013 \n\t" // check loop termination, proceed if required
67 + "jnz 1b \n\t" // check loop termination, proceed if required
68 "emms \n\t" // exit MMX state
69 "popa \n\t":"=m" (Dest) // %0
72 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
73 "pxor %%mm0, %%mm0 \n\t" // zero mm0 register
74 ".align 16 \n\t" // 16 byte allignment of the loop entry
75 - ".L1014: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
76 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
77 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
78 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
79 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
81 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
82 "add $8, %%ebx \n\t" // register pointers by 8
83 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
84 - "jnz .L1014 \n\t" // check loop termination, proceed if required
85 + "jnz 1b \n\t" // check loop termination, proceed if required
86 "emms \n\t" // exit MMX state
87 "popa \n\t":"=m" (Dest) // %0
90 "mov %0, %%edi \n\t" // load Dest address into edi
91 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
92 ".align 16 \n\t" // 16 byte allignment of the loop entry
93 - ".L10141: \n\t" "mov (%%edx), %%al \n\t" // load a byte from Src1
94 + "1: \n\t" "mov (%%edx), %%al \n\t" // load a byte from Src1
95 "mulb (%%esi) \n\t" // mul with a byte from Src2
96 - ".L10142: \n\t" "mov %%al, (%%edi) \n\t" // move a byte result to Dest
97 + "mov %%al, (%%edi) \n\t" // move a byte result to Dest
98 "inc %%edx \n\t" // increment Src1, Src2, Dest
99 "inc %%esi \n\t" // pointer registers by one
100 "inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
101 - "jnz .L10141 \n\t" // check loop termination, proceed if required
102 + "jnz 1b \n\t" // check loop termination, proceed if required
103 "popa \n\t":"=m" (Dest) // %0
107 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
108 "pxor %%mm0, %%mm0 \n\t" // zero mm0 register
109 ".align 16 \n\t" // 16 byte allignment of the loop entry
110 - ".L1015: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
111 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
112 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
113 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
114 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
116 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
117 "add $8, %%ebx \n\t" // register pointers by 8
118 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
119 - "jnz .L1015 \n\t" // check loop termination, proceed if required
120 + "jnz 1b \n\t" // check loop termination, proceed if required
121 "emms \n\t" // exit MMX state
122 "popa \n\t":"=m" (Dest) // %0
125 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
126 "pxor %%mm0, %%mm0 \n\t" // zero mm0 register
127 ".align 16 \n\t" // 16 byte allignment of the loop entry
128 - ".L1016: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
129 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
130 "movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
131 "movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
132 "movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
134 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
135 "add $8, %%ebx \n\t" // register pointers by 8
136 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
137 - "jnz .L1016 \n\t" // check loop termination, proceed if required
138 + "jnz 1b \n\t" // check loop termination, proceed if required
139 "emms \n\t" // exit MMX state
140 "popa \n\t":"=m" (Dest) // %0
142 @@ -720,13 +720,13 @@
143 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
144 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
145 ".align 16 \n\t" // 16 byte allignment of the loop entry
146 - ".L1017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
147 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
148 "pand (%%ebx), %%mm1 \n\t" // mm1=Src1&Src2
149 "movq %%mm1, (%%edi) \n\t" // store result in Dest
150 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
151 "add $8, %%ebx \n\t" // register pointers by 8
152 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
153 - "jnz .L1017 \n\t" // check loop termination, proceed if required
154 + "jnz 1b \n\t" // check loop termination, proceed if required
155 "emms \n\t" // exit MMX state
156 "popa \n\t":"=m" (Dest) // %0
158 @@ -792,13 +792,13 @@
159 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
160 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
161 ".align 16 \n\t" // 16 byte allignment of the loop entry
162 - ".L91017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
163 + "1: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
164 "por (%%ebx), %%mm1 \n\t" // mm1=Src1|Src2
165 "movq %%mm1, (%%edi) \n\t" // store result in Dest
166 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
167 "add $8, %%ebx \n\t" // register pointers by 8
168 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
169 - "jnz .L91017 \n\t" // check loop termination, proceed if required
170 + "jnz 1b \n\t" // check loop termination, proceed if required
171 "emms \n\t" // exit MMX state
172 "popa \n\t":"=m" (Dest) // %0
174 @@ -860,17 +860,17 @@
175 "mov %0, %%edi \n\t" // load Dest address into edi
176 "mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
177 ".align 16 \n\t" // 16 byte allignment of the loop entry
178 - ".L10191: \n\t" "mov (%%esi), %%bl \n\t" // load a byte from Src2
179 + "1: \n\t" "mov (%%esi), %%bl \n\t" // load a byte from Src2
180 "cmp $0, %%bl \n\t" // check if it zero
181 - "jnz .L10192 \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!!
182 - "jmp .L10193 \n\t" ".L10192: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register
183 + "jnz 2f \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!!
184 + "jmp 3f \n\t" "2: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register
185 "mov (%%edx), %%al \n\t" // load a byte from Src1 into AL
186 "div %%bl \n\t" // divide AL by BL
187 "mov %%al, (%%edi) \n\t" // move a byte result to Dest
188 - ".L10193: \n\t" "inc %%edx \n\t" // increment Src1, Src2, Dest
189 + "3: \n\t" "inc %%edx \n\t" // increment Src1, Src2, Dest
190 "inc %%esi \n\t" // pointer registers by one
191 "inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
192 - "jnz .L10191 \n\t" // check loop termination, proceed if required
193 + "jnz 1b \n\t" // check loop termination, proceed if required
194 "popa \n\t":"=m" (Dest) // %0
197 @@ -907,12 +907,12 @@
198 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
199 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
200 ".align 16 \n\t" // 16 byte allignment of the loop entry
201 - ".L91117: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1
202 + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1
203 "pxor %%mm1, %%mm0 \n\t" // negate mm0 by xoring with mm1
204 "movq %%mm0, (%%edi) \n\t" // store result in Dest
205 "add $8, %%eax \n\t" // increase Src1, Src2 and Dest
206 "add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
207 - "jnz .L91117 \n\t" // check loop termination, proceed if required
208 + "jnz 1b \n\t" // check loop termination, proceed if required
209 "emms \n\t" // exit MMX state
210 "popa \n\t":"=m" (Dest) // %0
212 @@ -980,14 +980,14 @@
213 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
214 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
215 ".align 16 \n\t" // 16 byte allignment of the loop entry
218 "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
219 "paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
220 "movq %%mm0, (%%edi) \n\t" // store result in Dest
221 "add $8, %%eax \n\t" // increase Dest register pointer by 8
222 "add $8, %%edi \n\t" // increase Dest register pointer by 8
223 "dec %%ecx \n\t" // decrease loop counter
224 - "jnz .L1021 \n\t" // check loop termination, proceed if required
225 + "jnz 1b \n\t" // check loop termination, proceed if required
226 "emms \n\t" // exit MMX state
227 "popa \n\t":"=m" (Dest) // %0
229 @@ -1059,14 +1059,14 @@
230 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
231 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
232 ".align 16 \n\t" // 16 byte allignment of the loop entry
235 "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
236 "paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
237 "movq %%mm0, (%%edi) \n\t" // store result in SrcDest
238 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
239 "add $8, %%edi \n\t" // increase Dest register pointer by 8
240 "dec %%ecx \n\t" // decrease loop counter
241 - "jnz .L11023 \n\t" // check loop termination, proceed if required
242 + "jnz 1b \n\t" // check loop termination, proceed if required
243 "emms \n\t" // exit MMX state
244 "popa \n\t":"=m" (Dest) // %0
246 @@ -1154,7 +1154,7 @@
247 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
248 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
249 ".align 16 \n\t" // 16 byte allignment of the loop entry
252 "movq (%%eax), %%mm2 \n\t" // load 8 bytes from Src1 into MM2
253 "psrlw $1, %%mm2 \n\t" // shift 4 WORDS of MM2 1 bit to the right
254 // "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of MM2
255 @@ -1164,7 +1164,7 @@
256 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
257 "add $8, %%edi \n\t" // increase Dest register pointer by 8
258 "dec %%ecx \n\t" // decrease loop counter
259 - "jnz .L1022 \n\t" // check loop termination, proceed if required
260 + "jnz 1b \n\t" // check loop termination, proceed if required
261 "emms \n\t" // exit MMX state
262 "popa \n\t":"=m" (Dest) // %0
264 @@ -1243,13 +1243,13 @@
265 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
266 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
267 ".align 16 \n\t" // 16 byte allignment of the loop entry
268 - ".L1023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
269 + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
270 "psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation)
271 "movq %%mm0, (%%edi) \n\t" // store result in SrcDest
272 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
273 "add $8, %%edi \n\t" // increase Dest register pointer by 8
274 "dec %%ecx \n\t" // decrease loop counter
275 - "jnz .L1023 \n\t" // check loop termination, proceed if required
276 + "jnz 1b \n\t" // check loop termination, proceed if required
277 "emms \n\t" // exit MMX state
278 "popa \n\t":"=m" (Dest) // %0
280 @@ -1322,13 +1322,13 @@
281 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
282 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
283 ".align 16 \n\t" // 16 byte allignment of the loop entry
284 - ".L11024: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
285 + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
286 "psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation)
287 "movq %%mm0, (%%edi) \n\t" // store result in SrcDest
288 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
289 "add $8, %%edi \n\t" // increase Dest register pointer by 8
290 "dec %%ecx \n\t" // decrease loop counter
291 - "jnz .L11024 \n\t" // check loop termination, proceed if required
292 + "jnz 1b \n\t" // check loop termination, proceed if required
293 "emms \n\t" // exit MMX state
294 "popa \n\t":"=m" (Dest) // %0
296 @@ -1405,19 +1405,19 @@
297 "mov %3, %%cl \n\t" // load loop counter (N) into CL
298 "movd %%ecx, %%mm3 \n\t" // copy (N) into MM3
299 "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1
300 - ".L10240: \n\t" // ** Prepare proper bit-Mask in MM1 **
301 + "1: \n\t" // ** Prepare proper bit-Mask in MM1 **
302 "psrlw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the right
303 // "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1
304 ".byte 0x0f, 0xdb, 0xc8 \n\t"
305 "dec %%cl \n\t" // decrease loop counter
306 - "jnz .L10240 \n\t" // check loop termination, proceed if required
307 + "jnz 1b \n\t" // check loop termination, proceed if required
308 // ** Shift all bytes of the image **
309 "mov %1, %%eax \n\t" // load Src1 address into eax
310 "mov %0, %%edi \n\t" // load Dest address into edi
311 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
312 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
313 ".align 16 \n\t" // 16 byte allignment of the loop entry
316 "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
317 "psrlw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the right
318 // "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0
319 @@ -1426,7 +1426,7 @@
320 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
321 "add $8, %%edi \n\t" // increase Dest register pointer by 8
322 "dec %%ecx \n\t" // decrease loop counter
323 - "jnz .L10241 \n\t" // check loop termination, proceed if required
324 + "jnz 2b \n\t" // check loop termination, proceed if required
325 "emms \n\t" // exit MMX state
326 "popa \n\t":"=m" (Dest) // %0
328 @@ -1495,13 +1495,13 @@
329 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
330 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
331 ".align 16 \n\t" // 16 byte allignment of the loop entry
332 - ".L13023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
333 + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
334 "psrld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
335 "movq %%mm0, (%%edi) \n\t" // store result in SrcDest
336 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
337 "add $8, %%edi \n\t" // increase Dest register pointer by 8
338 "dec %%ecx \n\t" // decrease loop counter
339 - "jnz .L13023 \n\t" // check loop termination, proceed if required
340 + "jnz 1b \n\t" // check loop termination, proceed if required
341 "emms \n\t" // exit MMX state
342 "popa \n\t":"=m" (Dest) // %0
344 @@ -1581,8 +1581,8 @@
345 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
346 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
347 "cmp $128, %%al \n\t" // if (C <= 128) execute more efficient code
348 - "jg .L10251 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
349 - ".L10250: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
350 + "jg 1f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
351 + "2: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
352 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
353 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
354 "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
355 @@ -1593,9 +1593,9 @@
356 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
357 "add $8, %%edi \n\t" // increase Dest register pointer by 8
358 "dec %%ecx \n\t" // decrease loop counter
359 - "jnz .L10250 \n\t" // check loop termination, proceed if required
360 - "jmp .L10252 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
361 - ".L10251: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
362 + "jnz 2b \n\t" // check loop termination, proceed if required
363 + "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
364 + "1: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
365 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
366 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
367 "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
368 @@ -1615,8 +1615,8 @@
369 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
370 "add $8, %%edi \n\t" // increase Dest register pointer by 8
371 "dec %%ecx \n\t" // decrease loop counter
372 - "jnz .L10251 \n\t" // check loop termination, proceed if required
373 - ".L10252: \n\t" "emms \n\t" // exit MMX state
374 + "jnz 1b \n\t" // check loop termination, proceed if required
375 + "3: \n\t" "emms \n\t" // exit MMX state
376 "popa \n\t":"=m" (Dest) // %0
379 @@ -1696,7 +1696,7 @@
380 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
381 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
382 ".align 16 \n\t" // 16 byte allignment of the loop entry
383 - ".L1026: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
384 + "1: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
385 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
386 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
387 "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
388 @@ -1709,7 +1709,7 @@
389 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
390 "add $8, %%edi \n\t" // increase Dest register pointer by 8
391 "dec %%ecx \n\t" // decrease loop counter
392 - "jnz .L1026 \n\t" // check loop termination, proceed if required
393 + "jnz 1b \n\t" // check loop termination, proceed if required
394 "emms \n\t" // exit MMX state
395 "popa \n\t":"=m" (Dest) // %0
397 @@ -1784,25 +1784,25 @@
398 "mov %3, %%cl \n\t" // load loop counter (N) into CL
399 "movd %%ecx, %%mm3 \n\t" // copy (N) into MM3
400 "pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1
401 - ".L10270: \n\t" // ** Prepare proper bit-Mask in MM1 **
402 + "1: \n\t" // ** Prepare proper bit-Mask in MM1 **
403 "psllw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the left
404 // "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1
405 ".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" // decrease loop counter
406 - "jnz .L10270 \n\t" // check loop termination, proceed if required
407 + "jnz 1b \n\t" // check loop termination, proceed if required
408 // ** Shift all bytes of the image **
409 "mov %1, %%eax \n\t" // load Src1 address into eax
410 "mov %0, %%edi \n\t" // load SrcDest address into edi
411 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
412 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
413 ".align 16 \n\t" // 16 byte allignment of the loop entry
414 - ".L10271: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
415 + "2: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
416 "psllw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the left
417 // "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0
418 ".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" // store result in Dest
419 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
420 "add $8, %%edi \n\t" // increase Dest register pointer by 8
421 "dec %%ecx \n\t" // decrease loop counter
422 - "jnz .L10271 \n\t" // check loop termination, proceed if required
423 + "jnz 2b \n\t" // check loop termination, proceed if required
424 "emms \n\t" // exit MMX state
425 "popa \n\t":"=m" (Dest) // %0
427 @@ -1870,13 +1870,13 @@
428 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
429 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
430 ".align 16 \n\t" // 16 byte allignment of the loop entry
431 - ".L12023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
432 + "1: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
433 "pslld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
434 "movq %%mm0, (%%edi) \n\t" // store result in SrcDest
435 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
436 "add $8, %%edi \n\t" // increase Dest register pointer by 8
437 "dec %%ecx \n\t" // decrease loop counter
438 - "jnz .L12023 \n\t" // check loop termination, proceed if required
439 + "jnz 1b \n\t" // check loop termination, proceed if required
440 "emms \n\t" // exit MMX state
441 "popa \n\t":"=m" (Dest) // %0
443 @@ -1949,8 +1949,8 @@
444 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
445 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
446 "cmp $7, %%al \n\t" // if (N <= 7) execute more efficient code
447 - "jg .L10281 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
448 - ".L10280: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
449 + "jg 1f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
450 + "2: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
451 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
452 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
453 "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
454 @@ -1961,9 +1961,9 @@
455 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
456 "add $8, %%edi \n\t" // increase Dest register pointer by 8
457 "dec %%ecx \n\t" // decrease loop counter
458 - "jnz .L10280 \n\t" // check loop termination, proceed if required
459 - "jmp .L10282 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
460 - ".L10281: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
461 + "jnz 2b \n\t" // check loop termination, proceed if required
462 + "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
463 + "1: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
464 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
465 "punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
466 "punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
467 @@ -1983,8 +1983,8 @@
468 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
469 "add $8, %%edi \n\t" // increase Dest register pointer by 8
470 "dec %%ecx \n\t" // decrease loop counter
471 - "jnz .L10281 \n\t" // check loop termination, proceed if required
472 - ".L10282: \n\t" "emms \n\t" // exit MMX state
473 + "jnz 1b \n\t" // check loop termination, proceed if required
474 + "3: \n\t" "emms \n\t" // exit MMX state
475 "popa \n\t":"=m" (Dest) // %0
478 @@ -2063,7 +2063,7 @@
479 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
480 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
481 ".align 16 \n\t" // 16 byte alignment of the loop entry
484 "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
485 "paddusb %%mm2, %%mm0 \n\t" // MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation)
486 "pcmpeqb %%mm1, %%mm0 \n\t" // binarize 255:0, comparing to 255
487 @@ -2071,7 +2071,7 @@
488 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
489 "add $8, %%edi \n\t" // increase Dest register pointer by 8
490 "dec %%ecx \n\t" // decrease loop counter
491 - "jnz .L1029 \n\t" // check loop termination, proceed if required
492 + "jnz 1b \n\t" // check loop termination, proceed if required
493 "emms \n\t" // exit MMX state
494 "popa \n\t":"=m" (Dest) // %0
496 @@ -2154,7 +2154,7 @@
497 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
498 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
499 ".align 16 \n\t" // 16 byte allignment of the loop entry
502 "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
503 "paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+(0xFF-Tmax)
504 "psubusb %%mm7, %%mm0 \n\t" // MM0=MM0-(0xFF-Tmax+Tmin)
505 @@ -2163,7 +2163,7 @@
506 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
507 "add $8, %%edi \n\t" // increase Dest register pointer by 8
508 "dec %%ecx \n\t" // decrease loop counter
509 - "jnz .L1030 \n\t" // check loop termination, proceed if required
510 + "jnz 1b \n\t" // check loop termination, proceed if required
511 "emms \n\t" // exit MMX state
512 "popa \n\t":"=m" (Dest) // %0
514 @@ -2231,11 +2231,11 @@
515 "mov %4, %%bx \n\t" // load Cmax in BX
516 "sub %5, %%ax \n\t" // AX = Nmax - Nmin
517 "sub %3, %%bx \n\t" // BX = Cmax - Cmin
518 - "jz .L10311 \n\t" // check division by zero
519 + "jz 1f \n\t" // check division by zero
520 "xor %%dx, %%dx \n\t" // prepare for division, zero DX
521 "div %%bx \n\t" // AX = AX/BX
522 - "jmp .L10312 \n\t" ".L10311: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value
523 - ".L10312: \n\t" // ** Duplicate AX in 4 words of MM0 **
524 + "jmp 2f \n\t" "1: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value
525 + "2: \n\t" // ** Duplicate AX in 4 words of MM0 **
526 "mov %%ax, %%bx \n\t" // copy AX into BX
527 "shl $16, %%eax \n\t" // shift 2 bytes of EAX left
528 "mov %%bx, %%ax \n\t" // copy BX into AX
529 @@ -2264,7 +2264,7 @@
530 "mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
531 "shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
532 ".align 16 \n\t" // 16 byte allignment of the loop entry
535 "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
536 "movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
537 "punpcklbw %%mm7, %%mm3 \n\t" // unpack low bytes of SrcDest into words
538 @@ -2289,7 +2289,7 @@
539 "add $8, %%eax \n\t" // increase Src1 register pointer by 8
540 "add $8, %%edi \n\t" // increase Dest register pointer by 8
541 "dec %%ecx \n\t" // decrease loop counter
542 - "jnz .L1031 \n\t" // check loop termination, proceed if required
543 + "jnz 3b \n\t" // check loop termination, proceed if required
544 "emms \n\t" // exit MMX state
545 "popa \n\t":"=m" (Dest) // %0
547 @@ -2383,10 +2383,10 @@
548 "mov %2, %%edx \n\t" // initialize ROWS counter
549 "sub $2, %%edx \n\t" // do not use first and last row
551 - ".L10320: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
552 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
553 "sub $2, %%ecx \n\t" // do not use first and last column
554 ".align 16 \n\t" // 16 byte allignment of the loop entry
558 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the image first row
559 "add %%eax, %%esi \n\t" // move one row below
560 @@ -2427,11 +2427,11 @@
561 "inc %%edi \n\t" // move Dest pointer to the next pixel
563 "dec %%ecx \n\t" // decrease loop counter COLUMNS
564 - "jnz .L10322 \n\t" // check loop termination, proceed if required
565 + "jnz 2b \n\t" // check loop termination, proceed if required
566 "add $2, %%esi \n\t" // move to the next row in Src
567 "add $2, %%edi \n\t" // move to the next row in Dest
568 "dec %%edx \n\t" // decrease loop counter ROWS
569 - "jnz .L10320 \n\t" // check loop termination, proceed if required
570 + "jnz 1b \n\t" // check loop termination, proceed if required
572 "emms \n\t" // exit MMX state
573 "popa \n\t":"=m" (Dest) // %0
574 @@ -2474,10 +2474,10 @@
575 "mov %2, %%ebx \n\t" // initialize ROWS counter
576 "sub $4, %%ebx \n\t" // do not use first 2 and last 2 rows
578 - ".L10330: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
579 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
580 "sub $4, %%ecx \n\t" // do not use first 2 and last 2 columns
581 ".align 16 \n\t" // 16 byte allignment of the loop entry
582 - ".L10332: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
583 + "2: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
584 "movd %%esi, %%mm6 \n\t" // save ESI in MM6
586 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the Src
587 @@ -2577,11 +2577,11 @@
588 "inc %%edi \n\t" // move Dest pointer to the next pixel
590 "dec %%ecx \n\t" // decrease loop counter COLUMNS
591 - "jnz .L10332 \n\t" // check loop termination, proceed if required
592 + "jnz 2b \n\t" // check loop termination, proceed if required
593 "add $4, %%esi \n\t" // move to the next row in Src
594 "add $4, %%edi \n\t" // move to the next row in Dest
595 "dec %%ebx \n\t" // decrease loop counter ROWS
596 - "jnz .L10330 \n\t" // check loop termination, proceed if required
597 + "jnz 1b \n\t" // check loop termination, proceed if required
599 "emms \n\t" // exit MMX state
600 "popa \n\t":"=m" (Dest) // %0
601 @@ -2622,10 +2622,10 @@
602 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" // initialize ROWS counter
603 "sub $6, %%ebx \n\t" // do not use first 3 and last 3 rows
605 - ".L10340: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
606 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
607 "sub $6, %%ecx \n\t" // do not use first 3 and last 3 columns
608 ".align 16 \n\t" // 16 byte allignment of the loop entry
609 - ".L10342: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
610 + "2: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
611 "movd %%esi, %%mm6 \n\t" // save ESI in MM6
613 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the Src
614 @@ -2753,11 +2753,11 @@
615 "inc %%edi \n\t" // move Dest pointer to the next pixel
617 "dec %%ecx \n\t" // decrease loop counter COLUMNS
618 - "jnz .L10342 \n\t" // check loop termination, proceed if required
619 + "jnz 2b \n\t" // check loop termination, proceed if required
620 "add $6, %%esi \n\t" // move to the next row in Src
621 "add $6, %%edi \n\t" // move to the next row in Dest
622 "dec %%ebx \n\t" // decrease loop counter ROWS
623 - "jnz .L10340 \n\t" // check loop termination, proceed if required
624 + "jnz 1b \n\t" // check loop termination, proceed if required
626 "emms \n\t" // exit MMX state
627 "popa \n\t":"=m" (Dest) // %0
628 @@ -2798,10 +2798,10 @@
629 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" // initialize ROWS counter
630 "sub $8, %%ebx \n\t" // do not use first 4 and last 4 rows
632 - ".L10350: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
633 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
634 "sub $8, %%ecx \n\t" // do not use first 4 and last 4 columns
635 ".align 16 \n\t" // 16 byte allignment of the loop entry
636 - ".L10352: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
637 + "2: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
638 "movd %%esi, %%mm6 \n\t" // save ESI in MM6
640 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the Src
641 @@ -3020,11 +3020,11 @@
642 "inc %%edi \n\t" // move Dest pointer to the next pixel
644 "dec %%ecx \n\t" // decrease loop counter COLUMNS
645 - "jnz .L10352 \n\t" // check loop termination, proceed if required
646 + "jnz 2b \n\t" // check loop termination, proceed if required
647 "add $8, %%esi \n\t" // move to the next row in Src
648 "add $8, %%edi \n\t" // move to the next row in Dest
649 "dec %%ebx \n\t" // decrease loop counter ROWS
650 - "jnz .L10350 \n\t" // check loop termination, proceed if required
651 + "jnz 1b \n\t" // check loop termination, proceed if required
653 "emms \n\t" // exit MMX state
654 "popa \n\t":"=m" (Dest) // %0
655 @@ -3071,10 +3071,10 @@
656 "mov %2, %%edx \n\t" // initialize ROWS counter
657 "sub $2, %%edx \n\t" // do not use first and last row
659 - ".L10360: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
660 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
661 "sub $2, %%ecx \n\t" // do not use first and last column
662 ".align 16 \n\t" // 16 byte allignment of the loop entry
666 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the image first row
667 "add %%eax, %%esi \n\t" // move one row below
668 @@ -3107,11 +3107,11 @@
669 "inc %%edi \n\t" // move Dest pointer to the next pixel
671 "dec %%ecx \n\t" // decrease loop counter COLUMNS
672 - "jnz .L10362 \n\t" // check loop termination, proceed if required
673 + "jnz 2b \n\t" // check loop termination, proceed if required
674 "add $2, %%esi \n\t" // move to the next row in Src
675 "add $2, %%edi \n\t" // move to the next row in Dest
676 "dec %%edx \n\t" // decrease loop counter ROWS
677 - "jnz .L10360 \n\t" // check loop termination, proceed if required
678 + "jnz 1b \n\t" // check loop termination, proceed if required
680 "emms \n\t" // exit MMX state
681 "popa \n\t":"=m" (Dest) // %0
682 @@ -3154,10 +3154,10 @@
683 "mov %2, %%ebx \n\t" // initialize ROWS counter
684 "sub $4, %%ebx \n\t" // do not use first 2 and last 2 rows
686 - ".L10370: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
687 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
688 "sub $4, %%ecx \n\t" // do not use first 2 and last 2 columns
689 ".align 16 \n\t" // 16 byte allignment of the loop entry
690 - ".L10372: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
691 + "2: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
692 "movd %%esi, %%mm6 \n\t" // save ESI in MM6
694 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the Src
695 @@ -3256,11 +3256,11 @@
696 "inc %%edi \n\t" // move Dest pointer to the next pixel
698 "dec %%ecx \n\t" // decrease loop counter COLUMNS
699 - "jnz .L10372 \n\t" // check loop termination, proceed if required
700 + "jnz 2b \n\t" // check loop termination, proceed if required
701 "add $4, %%esi \n\t" // move to the next row in Src
702 "add $4, %%edi \n\t" // move to the next row in Dest
703 "dec %%ebx \n\t" // decrease loop counter ROWS
704 - "jnz .L10370 \n\t" // check loop termination, proceed if required
705 + "jnz 1b \n\t" // check loop termination, proceed if required
707 "emms \n\t" // exit MMX state
708 "popa \n\t":"=m" (Dest) // %0
709 @@ -3301,10 +3301,10 @@
710 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" // initialize ROWS counter
711 "sub $6, %%ebx \n\t" // do not use first 3 and last 3 rows
713 - ".L10380: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
714 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
715 "sub $6, %%ecx \n\t" // do not use first 3 and last 3 columns
716 ".align 16 \n\t" // 16 byte allignment of the loop entry
717 - ".L10382: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
718 + "2: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
719 "movd %%esi, %%mm6 \n\t" // save ESI in MM6
721 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the Src
722 @@ -3435,11 +3435,11 @@
723 "inc %%edi \n\t" // move Dest pointer to the next pixel
725 "dec %%ecx \n\t" // decrease loop counter COLUMNS
726 - "jnz .L10382 \n\t" // check loop termination, proceed if required
727 + "jnz 2b \n\t" // check loop termination, proceed if required
728 "add $6, %%esi \n\t" // move to the next row in Src
729 "add $6, %%edi \n\t" // move to the next row in Dest
730 "dec %%ebx \n\t" // decrease loop counter ROWS
731 - "jnz .L10380 \n\t" // check loop termination, proceed if required
732 + "jnz 1b \n\t" // check loop termination, proceed if required
734 "emms \n\t" // exit MMX state
735 "popa \n\t":"=m" (Dest) // %0
736 @@ -3480,10 +3480,10 @@
737 "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "add %%eax, %%edi \n\t" "mov %2, %%ebx \n\t" // initialize ROWS counter
738 "sub $8, %%ebx \n\t" // do not use first 4 and last 4 rows
740 - ".L10390: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
741 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMNS counter
742 "sub $8, %%ecx \n\t" // do not use first 4 and last 4 columns
743 ".align 16 \n\t" // 16 byte allignment of the loop entry
744 - ".L10392: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
745 + "2: \n\t" "pxor %%mm7, %%mm7 \n\t" // zero MM7 (accumulator)
746 "movd %%esi, %%mm6 \n\t" // save ESI in MM6
748 "movq (%%esi), %%mm1 \n\t" // load 8 bytes of the Src
749 @@ -3718,11 +3718,11 @@
750 "inc %%edi \n\t" // move Dest pointer to the next pixel
752 "dec %%ecx \n\t" // decrease loop counter COLUMNS
753 - "jnz .L10392 \n\t" // check loop termination, proceed if required
754 + "jnz 2b \n\t" // check loop termination, proceed if required
755 "add $8, %%esi \n\t" // move to the next row in Src
756 "add $8, %%edi \n\t" // move to the next row in Dest
757 "dec %%ebx \n\t" // decrease loop counter ROWS
758 - "jnz .L10390 \n\t" // check loop termination, proceed if required
759 + "jnz 1b \n\t" // check loop termination, proceed if required
761 "emms \n\t" // exit MMX state
762 "popa \n\t":"=m" (Dest) // %0
763 @@ -3761,12 +3761,12 @@
764 "mov %2, %%edx \n\t" // initialize ROWS counter
765 "sub $2, %%edx \n\t" // do not use first and last rows
767 - ".L10400: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
768 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
769 "shr $3, %%ecx \n\t" // EBX/8 (MMX loads 8 bytes at a time)
770 "mov %%esi, %%ebx \n\t" // save ESI in EBX
771 "movd %%edi, %%mm1 \n\t" // save EDI in MM1
772 ".align 16 \n\t" // 16 byte allignment of the loop entry
776 "movq (%%esi), %%mm4 \n\t" // load 8 bytes from Src
777 "movq %%mm4, %%mm5 \n\t" // save MM4 in MM5
778 @@ -3844,13 +3844,13 @@
779 "add $8, %%edi \n\t" // move Dest pointer to the next 8 pixels
781 "dec %%ecx \n\t" // decrease loop counter COLUMNS
782 - "jnz .L10402 \n\t" // check loop termination, proceed if required
783 + "jnz 2b \n\t" // check loop termination, proceed if required
784 "mov %%ebx, %%esi \n\t" // restore most left current row Src address
785 "movd %%mm1, %%edi \n\t" // restore most left current row Dest address
786 "add %%eax, %%esi \n\t" // move to the next row in Src
787 "add %%eax, %%edi \n\t" // move to the next row in Dest
788 "dec %%edx \n\t" // decrease loop counter ROWS
789 - "jnz .L10400 \n\t" // check loop termination, proceed if required
790 + "jnz 1b \n\t" // check loop termination, proceed if required
792 "emms \n\t" // exit MMX state
793 "popa \n\t":"=m" (Dest) // %0
794 @@ -3889,12 +3889,12 @@
795 // initialize ROWS counter
796 "subl $2, %2 \n\t" // do not use first and last rows
798 - ".L10410: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
799 + "1: \n\t" "mov %%eax, %%ecx \n\t" // initialize COLUMS counter
800 "shr $3, %%ecx \n\t" // EBX/8 (MMX loads 8 bytes at a time)
801 "mov %%esi, %%ebx \n\t" // save ESI in EBX
802 "mov %%edi, %%edx \n\t" // save EDI in EDX
803 ".align 16 \n\t" // 16 byte allignment of the loop entry
807 "movq (%%esi), %%mm4 \n\t" // load 8 bytes from Src
808 "movq %%mm4, %%mm5 \n\t" // save MM4 in MM5
809 @@ -3984,13 +3984,13 @@
810 "add $8, %%edi \n\t" // move Dest pointer to the next 8 pixels
812 "dec %%ecx \n\t" // decrease loop counter COLUMNS
813 - "jnz .L10412 \n\t" // check loop termination, proceed if required
814 + "jnz 2b \n\t" // check loop termination, proceed if required
815 "mov %%ebx, %%esi \n\t" // restore most left current row Src address
816 "mov %%edx, %%edi \n\t" // restore most left current row Dest address
817 "add %%eax, %%esi \n\t" // move to the next row in Src
818 "add %%eax, %%edi \n\t" // move to the next row in Dest
819 "decl %2 \n\t" // decrease loop counter ROWS
820 - "jnz .L10410 \n\t" // check loop termination, proceed if required
821 + "jnz 1b \n\t" // check loop termination, proceed if required
823 "emms \n\t" // exit MMX state
824 "popa \n\t":"=m" (Dest) // %0